From 39d607e74e95a1e8268ba32fbcc3d05d7ab761d8 Mon Sep 17 00:00:00 2001 From: Gerald Haesendonck Date: Fri, 8 Jan 2021 09:21:38 +0100 Subject: [PATCH 01/83] Minimal voc used in examples, part 1. --- .../core/vocabulary/Namespaces.scala | 13 +++- .../framework/core/vocabulary/RMLVoc.scala | 1 + .../framework/core/vocabulary/WoTVoc.scala | 74 +++++++++++++++++++ 3 files changed, 87 insertions(+), 1 deletion(-) create mode 100644 src/main/scala/io/rml/framework/core/vocabulary/WoTVoc.scala diff --git a/src/main/scala/io/rml/framework/core/vocabulary/Namespaces.scala b/src/main/scala/io/rml/framework/core/vocabulary/Namespaces.scala index 7fbec062..b67923bf 100644 --- a/src/main/scala/io/rml/framework/core/vocabulary/Namespaces.scala +++ b/src/main/scala/io/rml/framework/core/vocabulary/Namespaces.scala @@ -34,6 +34,7 @@ object Namespaces { "rdf" -> "http://www.w3.org/1999/02/22-rdf-syntax-ns#", "rdfs" -> "http://www.w3.org/2000/01/rdf-schema#", "rr" -> "http://www.w3.org/ns/r2rml#", + "formats" -> "https://www.w3.org/ns/formats/", "ql" -> "http://semweb.mmlab.be/ns/ql#", "rml" -> "http://semweb.mmlab.be/ns/rml#", "rmls" -> "http://semweb.mmlab.be/ns/rmls#", @@ -47,7 +48,17 @@ object Namespaces { "lib" -> "http://example.com/library#", "xsd" -> "http://www.w3.org/2001/XMLSchema#", - "doap" -> "http://usefulinc.com/ns/doap#" + "doap" -> "http://usefulinc.com/ns/doap#", + + // Web of Things + "td" -> "https://www.w3.org/2019/wot/td#", + "hctl" -> "https://www.w3.org/2019/wot/hypermedia#", + "mqv" -> "http://www.example.org/mqtt-binding#", // TODO: change once an officlial vocabulary is published + "wotsec" -> "https://www.w3.org/2019/wot/security#", + + // HTTP + "http" -> "http://www.w3.org/2011/http#", + "htv" -> "http://www.w3.org/2011/http#" // typically used in WoT documents. ) /** diff --git a/src/main/scala/io/rml/framework/core/vocabulary/RMLVoc.scala b/src/main/scala/io/rml/framework/core/vocabulary/RMLVoc.scala index 7aba736c..ed6aff05 100644 --- a/src/main/scala/io/rml/framework/core/vocabulary/RMLVoc.scala +++ b/src/main/scala/io/rml/framework/core/vocabulary/RMLVoc.scala @@ -81,6 +81,7 @@ object RMLVoc { val ITERATOR = Namespaces("rml", "iterator") val REFERENCEFORMULATION = Namespaces("rml", "referenceFormulation") val SOURCE = Namespaces("rml", "source") + val LOGICALTARGET = Namespaces("rml", "logicalTarget") /////////////////////////////////////////////////////////////////////////// // RMLS: TCP Source diff --git a/src/main/scala/io/rml/framework/core/vocabulary/WoTVoc.scala b/src/main/scala/io/rml/framework/core/vocabulary/WoTVoc.scala new file mode 100644 index 00000000..efec3d5d --- /dev/null +++ b/src/main/scala/io/rml/framework/core/vocabulary/WoTVoc.scala @@ -0,0 +1,74 @@ +package io.rml.framework.core.vocabulary + +/** + * MIT License + * + * Copyright (C) 2017 - 2021 RDF Mapping Language (RML) + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + * */ + +/** + * Contains the constants of the Web of Things vocabulary. + */ +object WoTVoc { + + object Propertry { + /////////////////////////////////////////////////////////////////////////// + // TD (https://www.w3.org/2019/wot/td) + /////////////////////////////////////////////////////////////////////////// + val PROPERTYAFFORDANCE = Namespaces("td", "hasPropertyAffordance") + val FORM = Namespaces("td", "hasForm") + val TARGET = Namespaces("td", "hasTarget") + val CONTENTTYPE = Namespaces("td", "forContentType") + val SECURITYCONFIGURATION = Namespaces("td", "hasSecurityConfiguration") + + /////////////////////////////////////////////////////////////////////////// + // HCTL -- Hypermedia Controls Vocabulary (https://www.w3.org/2019/wot/hypermedia) + /////////////////////////////////////////////////////////////////////////// + val OPERATIONTYPE = Namespaces("hctl", "hasOperationType") + + /////////////////////////////////////////////////////////////////////////// + // MQV -- MQTT vocabulary. + // in the making so unofficial (https://www.w3.org/TR/2020/NOTE-wot-binding-templates-20200130/#mqtt-vocabulary) + /////////////////////////////////////////////////////////////////////////// + val CONTROLPACKETVALUE = Namespaces("mqv", "controlPacketValue") + val OPTIONS = Namespaces("mqv", "options") + val OPTIONNAME = Namespaces("mqv", "optionName") + val OPTIONVALUE = Namespaces("mqv", "optionValue") + + /////////////////////////////////////////////////////////////////////////// + // WOTSEC (https://www.w3.org/2019/wot/security) + /////////////////////////////////////////////////////////////////////////// + val IN = Namespaces("wotsec", "in") + } + + object Class { + /////////////////////////////////////////////////////////////////////////// + // TD + /////////////////////////////////////////////////////////////////////////// + val THING = Namespaces("td", "Thing") + + /////////////////////////////////////////////////////////////////////////// + // WOTSEC + /////////////////////////////////////////////////////////////////////////// + val BASICSECURITYSCHEME = Namespaces("wotsec", "BasicSecurityScheme") + } +} From f37998a32d8c44b6d08cb7a0d6269ab4f96efe60 Mon Sep 17 00:00:00 2001 From: Gerald Haesendonck Date: Fri, 8 Jan 2021 09:52:28 +0100 Subject: [PATCH 02/83] Refactoring, define namespace at vocabulary level. --- .../core/vocabulary/Namespaces.scala | 12 +-- .../framework/core/vocabulary/WoTVoc.scala | 86 +++++++++++-------- 2 files changed, 58 insertions(+), 40 deletions(-) diff --git a/src/main/scala/io/rml/framework/core/vocabulary/Namespaces.scala b/src/main/scala/io/rml/framework/core/vocabulary/Namespaces.scala index b67923bf..9a9baf0d 100644 --- a/src/main/scala/io/rml/framework/core/vocabulary/Namespaces.scala +++ b/src/main/scala/io/rml/framework/core/vocabulary/Namespaces.scala @@ -51,14 +51,14 @@ object Namespaces { "doap" -> "http://usefulinc.com/ns/doap#", // Web of Things - "td" -> "https://www.w3.org/2019/wot/td#", - "hctl" -> "https://www.w3.org/2019/wot/hypermedia#", - "mqv" -> "http://www.example.org/mqtt-binding#", // TODO: change once an officlial vocabulary is published - "wotsec" -> "https://www.w3.org/2019/wot/security#", + WoTVoc.ThingDescription.namespace._1 -> WoTVoc.ThingDescription.namespace._2, + WoTVoc.Hypermedia.namespace._1 -> WoTVoc.Hypermedia.namespace._2, + WoTVoc.WoTMQTT.namespace._1 -> WoTVoc.WoTMQTT.namespace._2, + WoTVoc.WotSecurity.namespace._1 -> WoTVoc.WotSecurity.namespace._2 // HTTP - "http" -> "http://www.w3.org/2011/http#", - "htv" -> "http://www.w3.org/2011/http#" // typically used in WoT documents. + //"http" -> "http://www.w3.org/2011/http#", + // TODO "htv" -> "http://www.w3.org/2011/http#" // typically used in WoT documents. ) /** diff --git a/src/main/scala/io/rml/framework/core/vocabulary/WoTVoc.scala b/src/main/scala/io/rml/framework/core/vocabulary/WoTVoc.scala index efec3d5d..60ac84c0 100644 --- a/src/main/scala/io/rml/framework/core/vocabulary/WoTVoc.scala +++ b/src/main/scala/io/rml/framework/core/vocabulary/WoTVoc.scala @@ -30,45 +30,63 @@ package io.rml.framework.core.vocabulary */ object WoTVoc { - object Propertry { - /////////////////////////////////////////////////////////////////////////// - // TD (https://www.w3.org/2019/wot/td) - /////////////////////////////////////////////////////////////////////////// - val PROPERTYAFFORDANCE = Namespaces("td", "hasPropertyAffordance") - val FORM = Namespaces("td", "hasForm") - val TARGET = Namespaces("td", "hasTarget") - val CONTENTTYPE = Namespaces("td", "forContentType") - val SECURITYCONFIGURATION = Namespaces("td", "hasSecurityConfiguration") + /////////////////////////////////////////////////////////////////////////// + // TD (https://www.w3.org/2019/wot/td) + /////////////////////////////////////////////////////////////////////////// + object ThingDescription { + val namespace = ("td", "https://www.w3.org/2019/wot/td#") - /////////////////////////////////////////////////////////////////////////// - // HCTL -- Hypermedia Controls Vocabulary (https://www.w3.org/2019/wot/hypermedia) - /////////////////////////////////////////////////////////////////////////// - val OPERATIONTYPE = Namespaces("hctl", "hasOperationType") + object Property { + val HASPROPERTYAFFORDANCE = Namespaces("td", "hasPropertyAffordance") + val HASFORM = Namespaces("td", "hasForm") + val HASTARGET = Namespaces("td", "hasTarget") + val HASCONTENTTYPE = Namespaces("td", "forContentType") + val HASSECURITYCONFIGURATION = Namespaces("td", "hasSecurityConfiguration") + } - /////////////////////////////////////////////////////////////////////////// - // MQV -- MQTT vocabulary. - // in the making so unofficial (https://www.w3.org/TR/2020/NOTE-wot-binding-templates-20200130/#mqtt-vocabulary) - /////////////////////////////////////////////////////////////////////////// - val CONTROLPACKETVALUE = Namespaces("mqv", "controlPacketValue") - val OPTIONS = Namespaces("mqv", "options") - val OPTIONNAME = Namespaces("mqv", "optionName") - val OPTIONVALUE = Namespaces("mqv", "optionValue") + object Class { + val THING = Namespaces("td", "Thing") + } + } + + /////////////////////////////////////////////////////////////////////////// + // MQV -- MQTT vocabulary. + // in the making so unofficial (https://www.w3.org/TR/2020/NOTE-wot-binding-templates-20200130/#mqtt-vocabulary) + /////////////////////////////////////////////////////////////////////////// + object WoTMQTT { + val namespace = ("mqv", "http://www.example.org/mqtt-binding#") // TODO: change once an officlial vocabulary is published + + object Property { + val CONTROLPACKETVALUE = Namespaces("mqv", "controlPacketValue") + val OPTIONS = Namespaces("mqv", "options") + val OPTIONNAME = Namespaces("mqv", "optionName") + val OPTIONVALUE = Namespaces("mqv", "optionValue") + } + } + + /////////////////////////////////////////////////////////////////////////// + // WOTSEC (https://www.w3.org/2019/wot/security) + /////////////////////////////////////////////////////////////////////////// + object WotSecurity { + val namespace = ("wotsec", "https://www.w3.org/2019/wot/security#") + + object Property { + val IN = Namespaces("wotsec", "in") + } - /////////////////////////////////////////////////////////////////////////// - // WOTSEC (https://www.w3.org/2019/wot/security) - /////////////////////////////////////////////////////////////////////////// - val IN = Namespaces("wotsec", "in") + object Class { + val BASICSECURITYSCHEME = Namespaces("wotsec", "BasicSecurityScheme") + } } - object Class { - /////////////////////////////////////////////////////////////////////////// - // TD - /////////////////////////////////////////////////////////////////////////// - val THING = Namespaces("td", "Thing") + /////////////////////////////////////////////////////////////////////////// + // HCTL -- Hypermedia Controls Vocabulary (https://www.w3.org/2019/wot/hypermedia) + /////////////////////////////////////////////////////////////////////////// + object Hypermedia { + val namespace = ("hctl", "https://www.w3.org/2019/wot/hypermedia#") - /////////////////////////////////////////////////////////////////////////// - // WOTSEC - /////////////////////////////////////////////////////////////////////////// - val BASICSECURITYSCHEME = Namespaces("wotsec", "BasicSecurityScheme") + object Property { + val OPERATIONTYPE = Namespaces("hctl", "hasOperationType") + } } } From 282c298db4f76bbe9e516dc01a6bc0d4944076e7 Mon Sep 17 00:00:00 2001 From: Gerald Haesendonck Date: Fri, 8 Jan 2021 10:37:21 +0100 Subject: [PATCH 03/83] Wrongly assigned hypermedia voc stuff to td prefix. --- .../core/vocabulary/Hypermedia.scala | 39 +++++++++++++++++++ .../core/vocabulary/Namespaces.scala | 6 ++- .../framework/core/vocabulary/WoTVoc.scala | 31 +++++---------- 3 files changed, 52 insertions(+), 24 deletions(-) create mode 100644 src/main/scala/io/rml/framework/core/vocabulary/Hypermedia.scala diff --git a/src/main/scala/io/rml/framework/core/vocabulary/Hypermedia.scala b/src/main/scala/io/rml/framework/core/vocabulary/Hypermedia.scala new file mode 100644 index 00000000..4a238ace --- /dev/null +++ b/src/main/scala/io/rml/framework/core/vocabulary/Hypermedia.scala @@ -0,0 +1,39 @@ +package io.rml.framework.core.vocabulary + +/** + * MIT License + * + * Copyright (C) 2017 - 2021 RDF Mapping Language (RML) + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + * + * HCTL -- Hypermedia Controls Vocabulary (https://www.w3.org/2019/wot/hypermedia) + * + **/ +object Hypermedia { + val namespace = ("hctl", "https://www.w3.org/2019/wot/hypermedia#") + + object Property { + val FORCONTENTTYPE = Namespaces(namespace._1, "forContentType") + val HASTARGET = Namespaces(namespace._1, "hasTarget") + val HASOPERATIONTYPE = Namespaces(namespace._1, "hasOperationType") + } + +} diff --git a/src/main/scala/io/rml/framework/core/vocabulary/Namespaces.scala b/src/main/scala/io/rml/framework/core/vocabulary/Namespaces.scala index 9a9baf0d..70a30805 100644 --- a/src/main/scala/io/rml/framework/core/vocabulary/Namespaces.scala +++ b/src/main/scala/io/rml/framework/core/vocabulary/Namespaces.scala @@ -52,9 +52,11 @@ object Namespaces { // Web of Things WoTVoc.ThingDescription.namespace._1 -> WoTVoc.ThingDescription.namespace._2, - WoTVoc.Hypermedia.namespace._1 -> WoTVoc.Hypermedia.namespace._2, WoTVoc.WoTMQTT.namespace._1 -> WoTVoc.WoTMQTT.namespace._2, - WoTVoc.WotSecurity.namespace._1 -> WoTVoc.WotSecurity.namespace._2 + WoTVoc.WotSecurity.namespace._1 -> WoTVoc.WotSecurity.namespace._2, + + // Hypermedia + Hypermedia.namespace._1 -> Hypermedia.namespace._2 // HTTP //"http" -> "http://www.w3.org/2011/http#", diff --git a/src/main/scala/io/rml/framework/core/vocabulary/WoTVoc.scala b/src/main/scala/io/rml/framework/core/vocabulary/WoTVoc.scala index 60ac84c0..b99a10e2 100644 --- a/src/main/scala/io/rml/framework/core/vocabulary/WoTVoc.scala +++ b/src/main/scala/io/rml/framework/core/vocabulary/WoTVoc.scala @@ -37,11 +37,9 @@ object WoTVoc { val namespace = ("td", "https://www.w3.org/2019/wot/td#") object Property { - val HASPROPERTYAFFORDANCE = Namespaces("td", "hasPropertyAffordance") - val HASFORM = Namespaces("td", "hasForm") - val HASTARGET = Namespaces("td", "hasTarget") - val HASCONTENTTYPE = Namespaces("td", "forContentType") - val HASSECURITYCONFIGURATION = Namespaces("td", "hasSecurityConfiguration") + val HASPROPERTYAFFORDANCE = Namespaces(namespace._1, "hasPropertyAffordance") + val HASFORM = Namespaces(namespace._1, "hasForm") + val HASSECURITYCONFIGURATION = Namespaces(namespace._1, "hasSecurityConfiguration") } object Class { @@ -57,10 +55,10 @@ object WoTVoc { val namespace = ("mqv", "http://www.example.org/mqtt-binding#") // TODO: change once an officlial vocabulary is published object Property { - val CONTROLPACKETVALUE = Namespaces("mqv", "controlPacketValue") - val OPTIONS = Namespaces("mqv", "options") - val OPTIONNAME = Namespaces("mqv", "optionName") - val OPTIONVALUE = Namespaces("mqv", "optionValue") + val CONTROLPACKETVALUE = Namespaces(namespace._1, "controlPacketValue") + val OPTIONS = Namespaces(namespace._1, "options") + val OPTIONNAME = Namespaces(namespace._1, "optionName") + val OPTIONVALUE = Namespaces(namespace._1, "optionValue") } } @@ -71,22 +69,11 @@ object WoTVoc { val namespace = ("wotsec", "https://www.w3.org/2019/wot/security#") object Property { - val IN = Namespaces("wotsec", "in") + val IN = Namespaces(namespace._1, "in") } object Class { - val BASICSECURITYSCHEME = Namespaces("wotsec", "BasicSecurityScheme") - } - } - - /////////////////////////////////////////////////////////////////////////// - // HCTL -- Hypermedia Controls Vocabulary (https://www.w3.org/2019/wot/hypermedia) - /////////////////////////////////////////////////////////////////////////// - object Hypermedia { - val namespace = ("hctl", "https://www.w3.org/2019/wot/hypermedia#") - - object Property { - val OPERATIONTYPE = Namespaces("hctl", "hasOperationType") + val BASICSECURITYSCHEME = Namespaces(namespace._1, "BasicSecurityScheme") } } } From 8313bd69cfd60e15fb2efc5196da3a039013c47b Mon Sep 17 00:00:00 2001 From: Gerald Haesendonck Date: Fri, 8 Jan 2021 21:43:33 +0100 Subject: [PATCH 04/83] WIP: read WoT logical source --- .../core/extractors/DataSourceExtractor.scala | 3 +- .../core/extractors/ExtractorUtil.scala | 19 +++-- .../std/StdDataSourceExtractor.scala | 69 +++++++++---------- .../{Hypermedia.scala => HypermediaVoc.scala} | 2 +- .../core/vocabulary/Namespaces.scala | 2 +- .../engine/StatementEngineTest.scala | 9 +++ 6 files changed, 58 insertions(+), 46 deletions(-) rename src/main/scala/io/rml/framework/core/vocabulary/{Hypermedia.scala => HypermediaVoc.scala} (98%) diff --git a/src/main/scala/io/rml/framework/core/extractors/DataSourceExtractor.scala b/src/main/scala/io/rml/framework/core/extractors/DataSourceExtractor.scala index 8dec9d78..d5235750 100644 --- a/src/main/scala/io/rml/framework/core/extractors/DataSourceExtractor.scala +++ b/src/main/scala/io/rml/framework/core/extractors/DataSourceExtractor.scala @@ -40,8 +40,7 @@ object DataSourceExtractor { * @return */ def apply(): DataSourceExtractor = { - lazy val extractor = new StdDataSourceExtractor() - extractor + new StdDataSourceExtractor() } } diff --git a/src/main/scala/io/rml/framework/core/extractors/ExtractorUtil.scala b/src/main/scala/io/rml/framework/core/extractors/ExtractorUtil.scala index b9cd3a10..b9e6d2cd 100644 --- a/src/main/scala/io/rml/framework/core/extractors/ExtractorUtil.scala +++ b/src/main/scala/io/rml/framework/core/extractors/ExtractorUtil.scala @@ -25,16 +25,27 @@ package io.rml.framework.core.extractors import io.rml.framework.core.model.Literal -import io.rml.framework.core.model.rdf.{RDFNode, RDFResource} +import io.rml.framework.core.model.rdf.RDFResource import io.rml.framework.shared.RMLException object ExtractorUtil { - def matchLiteral(node: RDFNode): Literal = { - node match { - case literal: Literal => literal + def extractSingleLiteralFromProperty(resource: RDFResource, property: String): String = { + val properties = resource.listProperties(property); + require(properties.length == 1, resource.uri.toString + ": exactly 1 " + property + " needed."); + properties.head match { + case literal: Literal => literal.value case res: RDFResource => throw new RMLException(res.uri + ": must be a literal.") } } + def extractSingleResourceFromProperty(resource: RDFResource, property: String): RDFResource = { + val properties = resource.listProperties(property); + require(properties.length == 1, resource.uri.toString + ": exactly 1 " + property + " needed."); + properties.head match { + case literal: Literal => throw new RMLException(resource.uri + ": " + property + " must be a resource."); + case resource: RDFResource => resource + } + } + } diff --git a/src/main/scala/io/rml/framework/core/extractors/std/StdDataSourceExtractor.scala b/src/main/scala/io/rml/framework/core/extractors/std/StdDataSourceExtractor.scala index 0e635bd7..6ab38121 100644 --- a/src/main/scala/io/rml/framework/core/extractors/std/StdDataSourceExtractor.scala +++ b/src/main/scala/io/rml/framework/core/extractors/std/StdDataSourceExtractor.scala @@ -25,10 +25,11 @@ package io.rml.framework.core.extractors.std -import io.rml.framework.core.extractors.{DataSourceExtractor, ExtractorUtil} +import io.rml.framework.core.extractors.DataSourceExtractor +import io.rml.framework.core.extractors.ExtractorUtil.{extractSingleLiteralFromProperty, extractSingleResourceFromProperty} import io.rml.framework.core.model._ import io.rml.framework.core.model.rdf.RDFResource -import io.rml.framework.core.vocabulary.{RDFVoc, RMLVoc} +import io.rml.framework.core.vocabulary.{HypermediaVoc, RDFVoc, RMLVoc, WoTVoc} import io.rml.framework.shared.RMLException class StdDataSourceExtractor extends DataSourceExtractor { @@ -68,58 +69,50 @@ class StdDataSourceExtractor extends DataSourceExtractor { case Uri(RMLVoc.Class.TCPSOCKETSTREAM) => extractTCPSocketStream(resource) case Uri(RMLVoc.Class.FILESTREAM) => extractFileStream(resource) case Uri(RMLVoc.Class.KAFKASTREAM) => extractKafkaStream(resource) + case Uri(WoTVoc.ThingDescription.Class.THING) => extractWoTSource(resource) } case literal: Literal => throw new RMLException(literal.value + ": type must be a resource.") } } private def extractFileStream(resource: RDFResource): StreamDataSource = { - val pathProperties = resource.listProperties(RMLVoc.Property.PATH) - require(pathProperties.length == 1, "exactly 1 path needed.") - val path = ExtractorUtil.matchLiteral(pathProperties.head) - FileStream(path.value) + val path = extractSingleLiteralFromProperty(resource, RMLVoc.Property.PATH) + FileStream(path) } private def extractKafkaStream(resource: RDFResource): StreamDataSource = { - val brokerProperties = resource.listProperties(RMLVoc.Property.BROKER) - require(brokerProperties.length == 1, "exactly 1 broker needed") - val groupIdProperties = resource.listProperties(RMLVoc.Property.GROUPID) - require(groupIdProperties.length == 1, "exactly 1 groupID needed") - val topicProperties = resource.listProperties(RMLVoc.Property.TOPIC) - require(topicProperties.length == 1, "exactly 1 topic needed") - val versionProperties = resource.listProperties(RMLVoc.Property.KAFKAVERSION) - require(versionProperties.length <= 1, "at most 1 kafka version needed") + val broker = extractSingleLiteralFromProperty(resource, RMLVoc.Property.BROKER) + val groupId = extractSingleLiteralFromProperty(resource, RMLVoc.Property.GROUPID) + val topic = extractSingleLiteralFromProperty(resource, RMLVoc.Property.TOPIC) + KafkaStream(List(broker), groupId, topic) + } - val broker = ExtractorUtil.matchLiteral(brokerProperties.head) - val groupId = ExtractorUtil.matchLiteral(groupIdProperties.head) - val topic = ExtractorUtil.matchLiteral(topicProperties.head) + private def extractTCPSocketStream(resource: RDFResource): StreamDataSource = { + val hostName = extractSingleLiteralFromProperty(resource, RMLVoc.Property.HOSTNAME) + val port = extractSingleLiteralFromProperty(resource, RMLVoc.Property.PORT) + val _type = extractSingleLiteralFromProperty(resource, RMLVoc.Property.TYPE) + TCPSocketStream(hostName, port.toInt, _type) + } - //val kafkaVersion = Kafka010 + private def extractWoTSource(resource: RDFResource): DataSource = { + // A WoT Thing contains (in our case) a PropertyAffordance, which contains a form describing how to access the real source + val propertyAffordance = extractSingleResourceFromProperty(resource, WoTVoc.ThingDescription.Property.HASPROPERTYAFFORDANCE); + val form = extractSingleResourceFromProperty(propertyAffordance, WoTVoc.ThingDescription.Property.HASFORM); - KafkaStream(List(broker.value), groupId.value, topic.value/*, kafkaVersion*/) - } + // extract info from form - private def extractTCPSocketStream(resource: RDFResource): StreamDataSource = { - val hostNameProperties = resource.listProperties(RMLVoc.Property.HOSTNAME) - require(hostNameProperties.length == 1, resource.uri.toString + ": exactly 1 hostname needed.") - val portProperties = resource.listProperties(RMLVoc.Property.PORT) - require(portProperties.length == 1, resource.uri.toString + ": exactly 1 port needed.") - val typeProperties = resource.listProperties(RMLVoc.Property.TYPE) - require(typeProperties.length == 1, resource.uri.toString + ": needs type.") - - val hostName = hostNameProperties.head match { - case resource: RDFResource => throw new RMLException(resource.uri + ": hostname must be a literal.") - case literal: Literal => literal.value - } - val port = portProperties.head match { - case resource: RDFResource => throw new RMLException(resource.uri + ": port must be a literal.") - case literal: Literal => literal.value - } + // extract the hypermedia target (~uri) + val hypermediaTarget = extractSingleLiteralFromProperty(form, HypermediaVoc.Property.HASTARGET); + + // extract the desired content type + val contentType = extractSingleLiteralFromProperty(form, HypermediaVoc.Property.FORCONTENTTYPE); - val _type = ExtractorUtil.matchLiteral(typeProperties.head) - TCPSocketStream(hostName, port.toInt, _type.value) + + // TODO replace with real source + FileDataSource(Literal("/tmp/test")) } + } diff --git a/src/main/scala/io/rml/framework/core/vocabulary/Hypermedia.scala b/src/main/scala/io/rml/framework/core/vocabulary/HypermediaVoc.scala similarity index 98% rename from src/main/scala/io/rml/framework/core/vocabulary/Hypermedia.scala rename to src/main/scala/io/rml/framework/core/vocabulary/HypermediaVoc.scala index 4a238ace..fa97026a 100644 --- a/src/main/scala/io/rml/framework/core/vocabulary/Hypermedia.scala +++ b/src/main/scala/io/rml/framework/core/vocabulary/HypermediaVoc.scala @@ -27,7 +27,7 @@ package io.rml.framework.core.vocabulary * HCTL -- Hypermedia Controls Vocabulary (https://www.w3.org/2019/wot/hypermedia) * **/ -object Hypermedia { +object HypermediaVoc { val namespace = ("hctl", "https://www.w3.org/2019/wot/hypermedia#") object Property { diff --git a/src/main/scala/io/rml/framework/core/vocabulary/Namespaces.scala b/src/main/scala/io/rml/framework/core/vocabulary/Namespaces.scala index 70a30805..6f8e6d8a 100644 --- a/src/main/scala/io/rml/framework/core/vocabulary/Namespaces.scala +++ b/src/main/scala/io/rml/framework/core/vocabulary/Namespaces.scala @@ -56,7 +56,7 @@ object Namespaces { WoTVoc.WotSecurity.namespace._1 -> WoTVoc.WotSecurity.namespace._2, // Hypermedia - Hypermedia.namespace._1 -> Hypermedia.namespace._2 + HypermediaVoc.namespace._1 -> HypermediaVoc.namespace._2 // HTTP //"http" -> "http://www.w3.org/2011/http#", diff --git a/src/test/scala/io/rml/framework/engine/StatementEngineTest.scala b/src/test/scala/io/rml/framework/engine/StatementEngineTest.scala index c005cad8..f1199c03 100644 --- a/src/test/scala/io/rml/framework/engine/StatementEngineTest.scala +++ b/src/test/scala/io/rml/framework/engine/StatementEngineTest.scala @@ -131,4 +131,13 @@ class StatementEngineTest extends FunSuite with Matchers { executeTest("csv-extensive-1/complete.rml.ttl") } + /////////////////////////// + // Web of Things tests + /////////////////////////// + + test("wot-mqtt-logical-source") { + pending + executeTest("WoT/mqtt-logical-source/wot-mqtt.rml.ttl") + } + } From 368ba804b6662992ce8303d1dc094c52f9f9ff60 Mon Sep 17 00:00:00 2001 From: Gerald Haesendonck Date: Mon, 11 Jan 2021 14:04:21 +0100 Subject: [PATCH 05/83] Read WoT-MQTT logical source. Example ready, now actual source needs to be coupled. --- .../core/extractors/ExtractorUtil.scala | 24 +++++++ .../std/StdDataSourceExtractor.scala | 33 +++++++++- .../core/model/rdf/RDFResource.scala | 9 ++- .../core/model/rdf/jena/JenaResource.scala | 9 ++- .../WoT/mqtt-logical-source/wot-mqtt.rml.ttl | 66 +++++++++++++++++++ 5 files changed, 138 insertions(+), 3 deletions(-) create mode 100644 src/test/resources/WoT/mqtt-logical-source/wot-mqtt.rml.ttl diff --git a/src/main/scala/io/rml/framework/core/extractors/ExtractorUtil.scala b/src/main/scala/io/rml/framework/core/extractors/ExtractorUtil.scala index b9e6d2cd..28588039 100644 --- a/src/main/scala/io/rml/framework/core/extractors/ExtractorUtil.scala +++ b/src/main/scala/io/rml/framework/core/extractors/ExtractorUtil.scala @@ -30,6 +30,18 @@ import io.rml.framework.shared.RMLException object ExtractorUtil { + def extractLiteralFromProperty(resource: RDFResource, property: String, defaultValue: String) = { + val properties = resource.listProperties(property); + if (properties.isEmpty) { + defaultValue; + } else { + properties.head match { + case literal: Literal => literal.value + case _ => defaultValue + } + } + } + def extractSingleLiteralFromProperty(resource: RDFResource, property: String): String = { val properties = resource.listProperties(property); require(properties.length == 1, resource.uri.toString + ": exactly 1 " + property + " needed."); @@ -48,4 +60,16 @@ object ExtractorUtil { } } + def extractResourceFromProperty(resource: RDFResource, property: String): Option[RDFResource] = { + val properties = resource.listProperties(property); + if (properties.isEmpty) { + None + } else { + properties.head match { + case literal: Literal => throw new RMLException(resource.uri + ": " + property + " must be a resource."); + case resource: RDFResource => Some(resource) + } + } + } + } diff --git a/src/main/scala/io/rml/framework/core/extractors/std/StdDataSourceExtractor.scala b/src/main/scala/io/rml/framework/core/extractors/std/StdDataSourceExtractor.scala index 6ab38121..caf26bb9 100644 --- a/src/main/scala/io/rml/framework/core/extractors/std/StdDataSourceExtractor.scala +++ b/src/main/scala/io/rml/framework/core/extractors/std/StdDataSourceExtractor.scala @@ -26,7 +26,7 @@ package io.rml.framework.core.extractors.std import io.rml.framework.core.extractors.DataSourceExtractor -import io.rml.framework.core.extractors.ExtractorUtil.{extractSingleLiteralFromProperty, extractSingleResourceFromProperty} +import io.rml.framework.core.extractors.ExtractorUtil.{extractLiteralFromProperty, extractResourceFromProperty, extractSingleLiteralFromProperty, extractSingleResourceFromProperty} import io.rml.framework.core.model._ import io.rml.framework.core.model.rdf.RDFResource import io.rml.framework.core.vocabulary.{HypermediaVoc, RDFVoc, RMLVoc, WoTVoc} @@ -110,9 +110,40 @@ class StdDataSourceExtractor extends DataSourceExtractor { // extract the desired content type val contentType = extractSingleLiteralFromProperty(form, HypermediaVoc.Property.FORCONTENTTYPE); + // now check for soure type (MQTT, HTTP, ...) + val isMQTT = form.hasPredicateWith(WoTVoc.WoTMQTT.namespace._2); + if (isMQTT) { + return extractWoTMQTTSource(form, hypermediaTarget, contentType); + } // TODO replace with real source FileDataSource(Literal("/tmp/test")) } + private def extractWoTMQTTSource(form: RDFResource, hypermediaTarget: String, contentType: String): DataSource = { + val controlPacketValue = extractLiteralFromProperty(form, WoTVoc.WoTMQTT.Property.CONTROLPACKETVALUE, "SUBSCRIBE"); + + var qosOpt: Option[String] = None; + var dup: Boolean = false; + val mqttOptions = extractResourceFromProperty(form, WoTVoc.WoTMQTT.Property.OPTIONS); + if (mqttOptions.isDefined) { + // extract the actual values + val mqttOptionsResource = mqttOptions.get; + mqttOptionsResource.getList + .map(rdfNode => rdfNode.asInstanceOf[RDFResource]) + .foreach(mqttOptionsResource => { + val optionName = extractSingleLiteralFromProperty(mqttOptionsResource, WoTVoc.WoTMQTT.Property.OPTIONNAME); + optionName match { + case "qos" => qosOpt = Some(extractSingleLiteralFromProperty(mqttOptionsResource, WoTVoc.WoTMQTT.Property.OPTIONVALUE)); + case "dup" => dup = true; + }; + }); + } + + // TODO make actual data source + logWarning("Here a MQTT data source will be created. hypermediaTarget: " + hypermediaTarget + + ", contentType: " + contentType + ", dup: " + dup + ", qusOpt: " + qosOpt) + FileDataSource(Literal("/tmp/test")) + } + } diff --git a/src/main/scala/io/rml/framework/core/model/rdf/RDFResource.scala b/src/main/scala/io/rml/framework/core/model/rdf/RDFResource.scala index efcff62e..b3a15ea4 100644 --- a/src/main/scala/io/rml/framework/core/model/rdf/RDFResource.scala +++ b/src/main/scala/io/rml/framework/core/model/rdf/RDFResource.scala @@ -25,8 +25,8 @@ package io.rml.framework.core.model.rdf -import io.rml.framework.core.model.Uri import io.rml.framework.core.internal.Logging +import io.rml.framework.core.model.Uri /** * This trait represents a resource that is able to query an underlying RDF model. @@ -47,6 +47,13 @@ trait RDFResource extends RDFNode with Logging { def getList: List[RDFNode] + /** + * Checks if there exists a predicate matching the prefix string. + * @param prefix The predicate to look for + * @return true if found, false if not found. + */ + def hasPredicateWith(prefix: String): Boolean + /** * * @param property diff --git a/src/main/scala/io/rml/framework/core/model/rdf/jena/JenaResource.scala b/src/main/scala/io/rml/framework/core/model/rdf/jena/JenaResource.scala index 3ee864c9..a4adc40a 100644 --- a/src/main/scala/io/rml/framework/core/model/rdf/jena/JenaResource.scala +++ b/src/main/scala/io/rml/framework/core/model/rdf/jena/JenaResource.scala @@ -27,7 +27,7 @@ package io.rml.framework.core.model.rdf.jena import io.rml.framework.core.model.Uri import io.rml.framework.core.model.rdf.{RDFLiteral, RDFNode, RDFResource} -import io.rml.framework.core.vocabulary.{RDFVoc, RMLVoc} +import io.rml.framework.core.vocabulary.RDFVoc import org.apache.commons.lang3.builder.HashCodeBuilder import org.apache.jena.rdf.model.{RDFList, Resource} @@ -43,6 +43,13 @@ class JenaResource(val resource: Resource) extends RDFResource { else Uri(_uri) } + override def hasPredicateWith(prefix: String): Boolean = { + resource + .listProperties().asScala + .map(_.getPredicate) + .exists(_.getNameSpace.equals(prefix)) + } + override def listProperties(propertyUri: String): List[RDFNode] = { diff --git a/src/test/resources/WoT/mqtt-logical-source/wot-mqtt.rml.ttl b/src/test/resources/WoT/mqtt-logical-source/wot-mqtt.rml.ttl new file mode 100644 index 00000000..37c7f532 --- /dev/null +++ b/src/test/resources/WoT/mqtt-logical-source/wot-mqtt.rml.ttl @@ -0,0 +1,66 @@ +@prefix rml: . +@prefix rr: . +@prefix ql: . +@prefix rdf: . +@prefix ex: . +@prefix schema: . +@prefix dbo: . +@prefix td: . +@prefix htv: . +@prefix hctl: . +@prefix time: . +@prefix xsd: . +@prefix wotsec: . +@prefix mqv: . +@base . + +# API key in HTTP header +<#WotMQTTSecurity> a wotsec:BasicSecurityScheme; + wotsec:in "body"; +. + +<#WoTWebAPI> a td:Thing; + td:hasPropertyAffordance [ + td:hasForm [ + # URL and content type + hctl:hasTarget "mqtt://localhost/topic"; + hctl:forContentType "application/json"; + # Read only + hctl:hasOperationType "readproperty" ; + # Set MQTT stuff + mqv:controlPacketValue "SUBSCRIBE"; + mqv:options ([ mqv:optionName "qos"; mqv:optionValue "1" ] [ mqv:optionName "dup" ]); + ]; + ]; + td:hasSecurityConfiguration <#WotMQTTSecurity> ; +. + +<#TriplesMap> a rr:TriplesMap; + rml:logicalSource [ + rml:source <#WoTWebAPI>; + rml:referenceFormulation ql:JSONPath; + rml:iterator "$.[*]"; + rml:frequency [ a time:GeneralDateTimeDescription; + time:minute "5"^^xsd:integer; + ]; + ]; + + rr:subjectMap [ + rr:template "http://example.org/bigbelly/{id}" + ]; + + rr:predicateObjectMap [ + rr:predicate ex:fillingLevel; + rr:objectMap [ + rml:reference "fillingLevel.value"; + ]; + ]; + + rr:predicateObjectMap [ + rr:predicate rdf:type; + rr:objectMap [ + rr:constant ex:Trashcan; + ]; + ]; +. + From cf51a798c874f741ec2ae09f2e0bc22d470f0933 Mon Sep 17 00:00:00 2001 From: Gerald Haesendonck Date: Tue, 12 Jan 2021 12:05:02 +0100 Subject: [PATCH 06/83] Refactoring of vocabularies: put every (class of) vocabulary in its own object; that way it's easier to add or remove vocabularies. --- .../std/StdDataSourceExtractor.scala | 22 +- .../std/StdFunctionMapExtractor.scala | 4 +- .../extractors/std/StdGraphMapExtractor.scala | 10 +- .../std/StdJoinConditionExtractor.scala | 6 +- .../std/StdObjectMapExtractor.scala | 32 +-- .../std/StdPredicateMapExtractor.scala | 12 +- .../std/StdPredicateObjectMapExtractor.scala | 4 +- .../std/StdSubjectMapExtractor.scala | 10 +- .../std/StdTriplesMapExtractor.scala | 8 +- .../extractors/std/TermMapExtractor.scala | 8 +- .../core/function/FunctionUtils.scala | 31 ++- .../model/std/StdRandomFunction.scala | 7 +- .../model/std/StdUpperCaseFunction.scala | 10 +- .../core/function/std/StdFunctionLoader.scala | 26 +-- .../framework/core/model/PredicateMap.scala | 4 +- .../core/model/StreamDataSource.scala | 8 +- .../core/model/std/StdGraphMap.scala | 4 +- .../framework/core/vocabulary/DOAPVoc.scala | 38 ++++ .../framework/core/vocabulary/FormatVoc.scala | 38 ++++ .../framework/core/vocabulary/FunVoc.scala | 129 +++++++++++ .../core/vocabulary/HypermediaVoc.scala | 6 +- .../framework/core/vocabulary/LibVoc.scala | 38 ++++ .../core/vocabulary/Namespaces.scala | 30 +-- .../framework/core/vocabulary/QueryVoc.scala | 37 ++++ .../framework/core/vocabulary/R2RMLVoc.scala | 67 ++++++ .../framework/core/vocabulary/RDFSVoc.scala | 31 +++ .../framework/core/vocabulary/RDFVoc.scala | 10 +- .../framework/core/vocabulary/RMLSVoc.scala | 62 ++++++ .../framework/core/vocabulary/RMLVoc.scala | 200 +----------------- .../framework/core/vocabulary/WoTVoc.scala | 20 +- .../framework/core/vocabulary/XsdVoc.scala | 44 ++++ .../FunctionMapGeneratorAssembler.scala | 17 +- .../statement/ObjectGeneratorAssembler.scala | 4 +- .../statement/SubjectGeneratorAssembler.scala | 4 +- .../statement/TermMapGeneratorAssembler.scala | 18 +- .../framework/flink/source/FileDataSet.scala | 12 +- .../framework/flink/source/JSONStream.scala | 4 +- .../rml/framework/flink/source/Source.scala | 8 +- .../framework/flink/source/XMLStream.scala | 4 +- .../std/StdObjectMapExtractorTest.scala | 8 +- .../StdPredicateObjectMapExtractorTest.scala | 6 +- 41 files changed, 669 insertions(+), 372 deletions(-) create mode 100644 src/main/scala/io/rml/framework/core/vocabulary/DOAPVoc.scala create mode 100644 src/main/scala/io/rml/framework/core/vocabulary/FormatVoc.scala create mode 100644 src/main/scala/io/rml/framework/core/vocabulary/FunVoc.scala create mode 100644 src/main/scala/io/rml/framework/core/vocabulary/LibVoc.scala create mode 100644 src/main/scala/io/rml/framework/core/vocabulary/QueryVoc.scala create mode 100644 src/main/scala/io/rml/framework/core/vocabulary/R2RMLVoc.scala create mode 100644 src/main/scala/io/rml/framework/core/vocabulary/RDFSVoc.scala create mode 100644 src/main/scala/io/rml/framework/core/vocabulary/RMLSVoc.scala create mode 100644 src/main/scala/io/rml/framework/core/vocabulary/XsdVoc.scala diff --git a/src/main/scala/io/rml/framework/core/extractors/std/StdDataSourceExtractor.scala b/src/main/scala/io/rml/framework/core/extractors/std/StdDataSourceExtractor.scala index caf26bb9..3c93ecf9 100644 --- a/src/main/scala/io/rml/framework/core/extractors/std/StdDataSourceExtractor.scala +++ b/src/main/scala/io/rml/framework/core/extractors/std/StdDataSourceExtractor.scala @@ -29,7 +29,7 @@ import io.rml.framework.core.extractors.DataSourceExtractor import io.rml.framework.core.extractors.ExtractorUtil.{extractLiteralFromProperty, extractResourceFromProperty, extractSingleLiteralFromProperty, extractSingleResourceFromProperty} import io.rml.framework.core.model._ import io.rml.framework.core.model.rdf.RDFResource -import io.rml.framework.core.vocabulary.{HypermediaVoc, RDFVoc, RMLVoc, WoTVoc} +import io.rml.framework.core.vocabulary._ import io.rml.framework.shared.RMLException class StdDataSourceExtractor extends DataSourceExtractor { @@ -66,9 +66,9 @@ class StdDataSourceExtractor extends DataSourceExtractor { if (properties.size != 1) throw new RMLException(resource.uri + ": type must be given.") properties.head match { case classResource: RDFResource => classResource.uri match { - case Uri(RMLVoc.Class.TCPSOCKETSTREAM) => extractTCPSocketStream(resource) - case Uri(RMLVoc.Class.FILESTREAM) => extractFileStream(resource) - case Uri(RMLVoc.Class.KAFKASTREAM) => extractKafkaStream(resource) + case Uri(RMLSVoc.Class.TCPSOCKETSTREAM) => extractTCPSocketStream(resource) + case Uri(RMLSVoc.Class.FILESTREAM) => extractFileStream(resource) + case Uri(RMLSVoc.Class.KAFKASTREAM) => extractKafkaStream(resource) case Uri(WoTVoc.ThingDescription.Class.THING) => extractWoTSource(resource) } case literal: Literal => throw new RMLException(literal.value + ": type must be a resource.") @@ -76,23 +76,23 @@ class StdDataSourceExtractor extends DataSourceExtractor { } private def extractFileStream(resource: RDFResource): StreamDataSource = { - val path = extractSingleLiteralFromProperty(resource, RMLVoc.Property.PATH) + val path = extractSingleLiteralFromProperty(resource, RMLSVoc.Property.PATH) FileStream(path) } private def extractKafkaStream(resource: RDFResource): StreamDataSource = { - val broker = extractSingleLiteralFromProperty(resource, RMLVoc.Property.BROKER) - val groupId = extractSingleLiteralFromProperty(resource, RMLVoc.Property.GROUPID) - val topic = extractSingleLiteralFromProperty(resource, RMLVoc.Property.TOPIC) + val broker = extractSingleLiteralFromProperty(resource, RMLSVoc.Property.BROKER) + val groupId = extractSingleLiteralFromProperty(resource, RMLSVoc.Property.GROUPID) + val topic = extractSingleLiteralFromProperty(resource, RMLSVoc.Property.TOPIC) KafkaStream(List(broker), groupId, topic) } private def extractTCPSocketStream(resource: RDFResource): StreamDataSource = { - val hostName = extractSingleLiteralFromProperty(resource, RMLVoc.Property.HOSTNAME) - val port = extractSingleLiteralFromProperty(resource, RMLVoc.Property.PORT) + val hostName = extractSingleLiteralFromProperty(resource, RMLSVoc.Property.HOSTNAME) + val port = extractSingleLiteralFromProperty(resource, RMLSVoc.Property.PORT) - val _type = extractSingleLiteralFromProperty(resource, RMLVoc.Property.TYPE) + val _type = extractSingleLiteralFromProperty(resource, RMLSVoc.Property.TYPE) TCPSocketStream(hostName, port.toInt, _type) } diff --git a/src/main/scala/io/rml/framework/core/extractors/std/StdFunctionMapExtractor.scala b/src/main/scala/io/rml/framework/core/extractors/std/StdFunctionMapExtractor.scala index 4288ed9c..15cd6242 100644 --- a/src/main/scala/io/rml/framework/core/extractors/std/StdFunctionMapExtractor.scala +++ b/src/main/scala/io/rml/framework/core/extractors/std/StdFunctionMapExtractor.scala @@ -3,7 +3,7 @@ package io.rml.framework.core.extractors.std import io.rml.framework.core.extractors.{FunctionMapExtractor, PredicateObjectMapExtractor} import io.rml.framework.core.model.FunctionMap import io.rml.framework.core.model.rdf.RDFResource -import io.rml.framework.core.vocabulary.RMLVoc +import io.rml.framework.core.vocabulary.FunVoc class StdFunctionMapExtractor extends FunctionMapExtractor { @@ -33,7 +33,7 @@ class StdFunctionMapExtractor extends FunctionMapExtractor { * @return */ private def extractFunctionMap(fnParentMap: String, resource: RDFResource): List[FunctionMap] = { - val functionValues = resource.listProperties(RMLVoc.Property.FUNCTIONVALUE) + val functionValues = resource.listProperties(FunVoc.Fnml.Property.FUNCTIONVALUE) require(functionValues.size <= 1, "At most only 1 function value allowed.") val result = functionValues.map(node => { diff --git a/src/main/scala/io/rml/framework/core/extractors/std/StdGraphMapExtractor.scala b/src/main/scala/io/rml/framework/core/extractors/std/StdGraphMapExtractor.scala index b082888f..1d625961 100644 --- a/src/main/scala/io/rml/framework/core/extractors/std/StdGraphMapExtractor.scala +++ b/src/main/scala/io/rml/framework/core/extractors/std/StdGraphMapExtractor.scala @@ -27,7 +27,7 @@ package io.rml.framework.core.extractors.std import io.rml.framework.core.extractors.{FunctionMapExtractor, GraphMapExtractor} import io.rml.framework.core.model.rdf.{RDFNode, RDFResource} import io.rml.framework.core.model.{GraphMap, Literal, Uri} -import io.rml.framework.core.vocabulary.RMLVoc +import io.rml.framework.core.vocabulary.R2RMLVoc import io.rml.framework.shared.RMLException class StdGraphMapExtractor extends GraphMapExtractor { @@ -40,8 +40,8 @@ class StdGraphMapExtractor extends GraphMapExtractor { * */ override def extract(resource: RDFResource): Option[GraphMap] = { - val mapProperties = resource.listProperties(RMLVoc.Property.GRAPHMAP) - val shortcutProperties = resource.listProperties(RMLVoc.Property.GRAPH) + val mapProperties = resource.listProperties(R2RMLVoc.Property.GRAPHMAP) + val shortcutProperties = resource.listProperties(R2RMLVoc.Property.GRAPH) val amount = mapProperties.size + shortcutProperties.size amount match { @@ -59,7 +59,7 @@ class StdGraphMapExtractor extends GraphMapExtractor { override def extractTermType(resource: RDFResource): Option[Uri] = { val result = super.extractTermType(resource) - if (result.isDefined) result else Some(Uri(RMLVoc.Class.IRI)) + if (result.isDefined) result else Some(Uri(R2RMLVoc.Class.IRI)) } def generalExtractGraph(node: RDFNode, extractFunc: RDFResource => Option[GraphMap]): Option[GraphMap] = { @@ -69,7 +69,7 @@ class StdGraphMapExtractor extends GraphMapExtractor { } resource.uri match { - case Uri(RMLVoc.Property.DEFAULTGRAPH) => None + case Uri(R2RMLVoc.Property.DEFAULTGRAPH) => None case _ => extractFunc(resource) } diff --git a/src/main/scala/io/rml/framework/core/extractors/std/StdJoinConditionExtractor.scala b/src/main/scala/io/rml/framework/core/extractors/std/StdJoinConditionExtractor.scala index 46902a07..16005e73 100644 --- a/src/main/scala/io/rml/framework/core/extractors/std/StdJoinConditionExtractor.scala +++ b/src/main/scala/io/rml/framework/core/extractors/std/StdJoinConditionExtractor.scala @@ -29,7 +29,7 @@ import io.rml.framework.core.extractors.JoinConditionExtractor import io.rml.framework.core.internal.Logging import io.rml.framework.core.model.rdf.RDFResource import io.rml.framework.core.model.{JoinCondition, Literal} -import io.rml.framework.core.vocabulary.RMLVoc +import io.rml.framework.core.vocabulary.R2RMLVoc import io.rml.framework.shared.RMLException class StdJoinConditionExtractor extends JoinConditionExtractor with Logging { @@ -54,7 +54,7 @@ class StdJoinConditionExtractor extends JoinConditionExtractor with Logging { private def extractParent(resource: RDFResource): Option[Literal] = { - val property = RMLVoc.Property.PARENT + val property = R2RMLVoc.Property.PARENT val properties = resource.listProperties(property) if (properties.size != 1) @@ -68,7 +68,7 @@ class StdJoinConditionExtractor extends JoinConditionExtractor with Logging { } private def extractChild(resource: RDFResource): Option[Literal] = { - val property = RMLVoc.Property.CHILD + val property = R2RMLVoc.Property.CHILD val properties = resource.listProperties(property) if (properties.size != 1) diff --git a/src/main/scala/io/rml/framework/core/extractors/std/StdObjectMapExtractor.scala b/src/main/scala/io/rml/framework/core/extractors/std/StdObjectMapExtractor.scala index d5416075..0b241d95 100644 --- a/src/main/scala/io/rml/framework/core/extractors/std/StdObjectMapExtractor.scala +++ b/src/main/scala/io/rml/framework/core/extractors/std/StdObjectMapExtractor.scala @@ -29,7 +29,7 @@ import io.rml.framework.core.extractors.{FunctionMapExtractor, JoinConditionExtr import io.rml.framework.core.model._ import io.rml.framework.core.model.rdf.{RDFLiteral, RDFResource} import io.rml.framework.core.util.Util -import io.rml.framework.core.vocabulary.RMLVoc +import io.rml.framework.core.vocabulary.{FunVoc, R2RMLVoc, RMLVoc} import io.rml.framework.shared.RMLException class StdObjectMapExtractor extends ObjectMapExtractor { @@ -50,15 +50,15 @@ class StdObjectMapExtractor extends ObjectMapExtractor { * @return */ private def extractObjects(resource: RDFResource): List[ObjectMap] = { - val property = RMLVoc.Property.OBJECT + val property = R2RMLVoc.Property.OBJECT val properties = resource.listProperties(property) // iterates over predicates, converts these to predicate maps as blanks properties.map { case literal: RDFLiteral => - ObjectMap("", constant = Some(Literal(literal.value)), termType = Some(Uri(RMLVoc.Class.LITERAL))) + ObjectMap("", constant = Some(Literal(literal.value)), termType = Some(Uri(R2RMLVoc.Class.LITERAL))) case resource: RDFResource => - ObjectMap("", constant = Some(resource.uri), termType = Some(Uri(RMLVoc.Class.IRI))) + ObjectMap("", constant = Some(resource.uri), termType = Some(Uri(R2RMLVoc.Class.IRI))) } } @@ -70,7 +70,7 @@ class StdObjectMapExtractor extends ObjectMapExtractor { */ private def extractObjectMaps(resource: RDFResource): List[ObjectMap] = { this.logDebug("extractObjectMaps(resource)") - val property = RMLVoc.Property.OBJECTMAP + val property = R2RMLVoc.Property.OBJECTMAP val properties = resource.listProperties(property) // iterates over predicatesMaps @@ -106,7 +106,7 @@ class StdObjectMapExtractor extends ObjectMapExtractor { } def extractDatatype(resource: RDFResource): Option[Uri] = { - val property = RMLVoc.Property.DATATYPE + val property = R2RMLVoc.Property.DATATYPE val properties = resource.listProperties(property) if (properties.size > 1) @@ -124,13 +124,13 @@ class StdObjectMapExtractor extends ObjectMapExtractor { if (result.isDefined) result else { //if the resource has rr:constant, return the type of the node referred by rr:constant. - val constantValue = resource.listProperties(RMLVoc.Property.CONSTANT) + val constantValue = resource.listProperties(R2RMLVoc.Property.CONSTANT) if (constantValue.nonEmpty) { constantValue.head match { - case literal: Literal => Some(Uri(RMLVoc.Class.LITERAL)) - case _ => Some(Uri(RMLVoc.Class.IRI)) + case literal: Literal => Some(Uri(R2RMLVoc.Class.LITERAL)) + case _ => Some(Uri(R2RMLVoc.Class.IRI)) } } else { @@ -142,17 +142,17 @@ class StdObjectMapExtractor extends ObjectMapExtractor { val elements = resource.listProperties(RMLVoc.Property.REFERENCE) ++ resource.listProperties(RMLVoc.Property.REFERENCEFORMULATION) ++ - resource.listProperties(RMLVoc.Property.DATATYPE) ++ - resource.listProperties(RMLVoc.Property.FUNCTIONVALUE) + resource.listProperties(R2RMLVoc.Property.DATATYPE) ++ + resource.listProperties(FunVoc.Fnml.Property.FUNCTIONVALUE) - if (elements.nonEmpty) Some(Uri(RMLVoc.Class.LITERAL)) - else Some(Uri(RMLVoc.Class.IRI)) + if (elements.nonEmpty) Some(Uri(R2RMLVoc.Class.LITERAL)) + else Some(Uri(R2RMLVoc.Class.IRI)) } } } def extractLanguage(resource: RDFResource): Option[Literal] = { - val property = RMLVoc.Property.LANGUAGE + val property = R2RMLVoc.Property.LANGUAGE val properties = resource.listProperties(property) if (properties.size > 1) @@ -174,7 +174,7 @@ class StdObjectMapExtractor extends ObjectMapExtractor { private def extractParentTriplesMap(resource: RDFResource): Option[String] = { - val property = RMLVoc.Property.PARENTTRIPLESMAP + val property = R2RMLVoc.Property.PARENTTRIPLESMAP val properties = resource.listProperties(property) if (properties.size > 1) @@ -190,7 +190,7 @@ class StdObjectMapExtractor extends ObjectMapExtractor { } private def extractJoinCondition(resource: RDFResource): Option[JoinCondition] = { - val property = RMLVoc.Property.JOINCONDITION + val property = R2RMLVoc.Property.JOINCONDITION val properties = resource.listProperties(property) if (properties.size > 1) diff --git a/src/main/scala/io/rml/framework/core/extractors/std/StdPredicateMapExtractor.scala b/src/main/scala/io/rml/framework/core/extractors/std/StdPredicateMapExtractor.scala index d0bff3b5..1d5c7933 100644 --- a/src/main/scala/io/rml/framework/core/extractors/std/StdPredicateMapExtractor.scala +++ b/src/main/scala/io/rml/framework/core/extractors/std/StdPredicateMapExtractor.scala @@ -28,7 +28,7 @@ package io.rml.framework.core.extractors.std import io.rml.framework.core.extractors.{FunctionMapExtractor, PredicateMapExtractor} import io.rml.framework.core.model.rdf.{RDFLiteral, RDFResource} import io.rml.framework.core.model.{PredicateMap, Uri} -import io.rml.framework.core.vocabulary.RMLVoc +import io.rml.framework.core.vocabulary.R2RMLVoc import io.rml.framework.shared.RMLException @@ -50,15 +50,15 @@ class StdPredicateMapExtractor() extends PredicateMapExtractor { * @return */ private def extractPredicates(resource: RDFResource): List[PredicateMap] = { - val property = RMLVoc.Property.PREDICATE + val property = R2RMLVoc.Property.PREDICATE val properties = resource.listProperties(property) // iterates over predicates, converts these to predicate maps as blanks properties.map { case literal: RDFLiteral => - PredicateMap("", constant = Some(Uri(literal.value)), termType = Some(Uri(RMLVoc.Class.IRI))) + PredicateMap("", constant = Some(Uri(literal.value)), termType = Some(Uri(R2RMLVoc.Class.IRI))) case resource: RDFResource => - PredicateMap("", constant = Some(resource.uri), termType = Some(Uri(RMLVoc.Class.IRI))) + PredicateMap("", constant = Some(resource.uri), termType = Some(Uri(R2RMLVoc.Class.IRI))) } } @@ -69,7 +69,7 @@ class StdPredicateMapExtractor() extends PredicateMapExtractor { * @return */ private def extractPredicateMaps(resource: RDFResource): List[PredicateMap] = { - val property = RMLVoc.Property.PREDICATEMAP + val property = R2RMLVoc.Property.PREDICATEMAP val properties = resource.listProperties(property) // iterates over predicatesMaps @@ -88,7 +88,7 @@ class StdPredicateMapExtractor() extends PredicateMapExtractor { * @return */ private def extractPredicateMap(resource: RDFResource): PredicateMap = { - val termType = Some(Uri(RMLVoc.Class.IRI)) // this is always the case as defined by the spec + val termType = Some(Uri(R2RMLVoc.Class.IRI)) // this is always the case as defined by the spec val template = extractTemplate(resource) val constant = extractConstant(resource) val reference = extractReference(resource) diff --git a/src/main/scala/io/rml/framework/core/extractors/std/StdPredicateObjectMapExtractor.scala b/src/main/scala/io/rml/framework/core/extractors/std/StdPredicateObjectMapExtractor.scala index 6be34471..31286a04 100644 --- a/src/main/scala/io/rml/framework/core/extractors/std/StdPredicateObjectMapExtractor.scala +++ b/src/main/scala/io/rml/framework/core/extractors/std/StdPredicateObjectMapExtractor.scala @@ -29,7 +29,7 @@ import io.rml.framework.core.extractors._ import io.rml.framework.core.internal.Logging import io.rml.framework.core.model.PredicateObjectMap import io.rml.framework.core.model.rdf.{RDFLiteral, RDFResource} -import io.rml.framework.core.vocabulary.RMLVoc +import io.rml.framework.core.vocabulary.R2RMLVoc import io.rml.framework.shared.RMLException /** @@ -48,7 +48,7 @@ class StdPredicateObjectMapExtractor extends PredicateObjectMapExtractor with Lo logDebug(node.uri + ": Extracting predicate object maps.") // filter all predicate object map resources - val properties = node.listProperties(RMLVoc.Property.PREDICATEOBJECTMAP) + val properties = node.listProperties(R2RMLVoc.Property.PREDICATEOBJECTMAP) // iterate over all found predicate object map resources properties.map { diff --git a/src/main/scala/io/rml/framework/core/extractors/std/StdSubjectMapExtractor.scala b/src/main/scala/io/rml/framework/core/extractors/std/StdSubjectMapExtractor.scala index e10810ca..6c9abcf8 100644 --- a/src/main/scala/io/rml/framework/core/extractors/std/StdSubjectMapExtractor.scala +++ b/src/main/scala/io/rml/framework/core/extractors/std/StdSubjectMapExtractor.scala @@ -29,7 +29,7 @@ import io.rml.framework.core.extractors.{FunctionMapExtractor, GraphMapExtractor import io.rml.framework.core.internal.Logging import io.rml.framework.core.model.rdf.{RDFLiteral, RDFResource} import io.rml.framework.core.model.{Literal, SubjectMap, Uri} -import io.rml.framework.core.vocabulary.RMLVoc +import io.rml.framework.core.vocabulary.R2RMLVoc import io.rml.framework.shared.RMLException /** @@ -50,7 +50,7 @@ class StdSubjectMapExtractor extends SubjectMapExtractor with Logging { logDebug(node.uri + ": Extracting subject map.") - val property = RMLVoc.Property.SUBJECTMAP + val property = R2RMLVoc.Property.SUBJECTMAP val subjectMapResources = node.listProperties(property) if (subjectMapResources.size != 1) @@ -100,9 +100,9 @@ class StdSubjectMapExtractor extends SubjectMapExtractor with Logging { val result = super.extractTermType(resource) result match{ - case Some(Uri(RMLVoc.Class.LITERAL)) => throw new RMLException("Subject cannot be literal type") + case Some(Uri(R2RMLVoc.Class.LITERAL)) => throw new RMLException("Subject cannot be literal type") case Some(e) => Some(e) - case _ => Some(Uri(RMLVoc.Class.IRI)) + case _ => Some(Uri(R2RMLVoc.Class.IRI)) } } @@ -115,7 +115,7 @@ class StdSubjectMapExtractor extends SubjectMapExtractor with Logging { */ @throws(classOf[RMLException]) private def extractClass(resource: RDFResource): List[Uri] = { - val property = RMLVoc.Property.CLASS + val property = R2RMLVoc.Property.CLASS val classResources = resource.listProperties(property) classResources.map { case resource: RDFResource => resource.uri diff --git a/src/main/scala/io/rml/framework/core/extractors/std/StdTriplesMapExtractor.scala b/src/main/scala/io/rml/framework/core/extractors/std/StdTriplesMapExtractor.scala index 298fce3c..e8e97b82 100644 --- a/src/main/scala/io/rml/framework/core/extractors/std/StdTriplesMapExtractor.scala +++ b/src/main/scala/io/rml/framework/core/extractors/std/StdTriplesMapExtractor.scala @@ -29,7 +29,7 @@ import io.rml.framework.core.extractors.{TriplesMapsCache, _} import io.rml.framework.core.internal.Logging import io.rml.framework.core.model.rdf.{RDFGraph, RDFResource} import io.rml.framework.core.model.{TriplesMap, Uri} -import io.rml.framework.core.vocabulary.RMLVoc +import io.rml.framework.core.vocabulary.{R2RMLVoc, RMLVoc} import io.rml.framework.shared.RMLException /** @@ -52,11 +52,11 @@ object StdTriplesMapExtractor extends TriplesMapExtractor with Logging { */ private def isTriplesMap(resource : RDFResource) : Boolean = { val logicalSourceProperty = RMLVoc.Property.LOGICALSOURCE - val subjectMapProperty = RMLVoc.Property.SUBJECTMAP - val subjectConstantProperty = RMLVoc.Property.SUBJECT + val subjectMapProperty = R2RMLVoc.Property.SUBJECTMAP + val subjectConstantProperty = R2RMLVoc.Property.SUBJECT // trivial case - val isTriplesMap = resource.getType.equals(Some(Uri(RMLVoc.Class.TRIPLESMAP))) + val isTriplesMap = resource.getType.equals(Some(Uri(R2RMLVoc.Class.TRIPLESMAP))) // property requirements for a triplesmap val hasExactlyOneLogicalSource = resource.listProperties(logicalSourceProperty).length==1 diff --git a/src/main/scala/io/rml/framework/core/extractors/std/TermMapExtractor.scala b/src/main/scala/io/rml/framework/core/extractors/std/TermMapExtractor.scala index 39b7ccad..587760cd 100644 --- a/src/main/scala/io/rml/framework/core/extractors/std/TermMapExtractor.scala +++ b/src/main/scala/io/rml/framework/core/extractors/std/TermMapExtractor.scala @@ -28,7 +28,7 @@ package io.rml.framework.core.extractors.std import io.rml.framework.core.extractors.ResourceExtractor import io.rml.framework.core.model.rdf.RDFResource import io.rml.framework.core.model.{Entity, Literal, Uri} -import io.rml.framework.core.vocabulary.RMLVoc +import io.rml.framework.core.vocabulary.{R2RMLVoc, RMLVoc} import io.rml.framework.shared.RMLException import scala.util.matching.Regex @@ -49,7 +49,7 @@ abstract class TermMapExtractor[T] extends ResourceExtractor[T] { */ @throws(classOf[RMLException]) protected def extractTemplate(resource: RDFResource): Option[Literal] = { - val property = RMLVoc.Property.TEMPLATE + val property = R2RMLVoc.Property.TEMPLATE val properties = resource.listProperties(property) if (properties.size > 1) @@ -99,7 +99,7 @@ abstract class TermMapExtractor[T] extends ResourceExtractor[T] { */ @throws(classOf[RMLException]) protected def extractConstant(resource: RDFResource): Option[Entity] = { - val property = RMLVoc.Property.CONSTANT + val property = R2RMLVoc.Property.CONSTANT val properties = resource.listProperties(property) if (properties.size > 1) @@ -121,7 +121,7 @@ abstract class TermMapExtractor[T] extends ResourceExtractor[T] { */ @throws(classOf[RMLException]) protected def extractTermType(resource: RDFResource): Option[Uri] = { - val property = RMLVoc.Property.TERMTYPE + val property = R2RMLVoc.Property.TERMTYPE val properties = resource.listProperties(property) if (properties.size > 1) diff --git a/src/main/scala/io/rml/framework/core/function/FunctionUtils.scala b/src/main/scala/io/rml/framework/core/function/FunctionUtils.scala index 7a1fbff2..d368ffd3 100644 --- a/src/main/scala/io/rml/framework/core/function/FunctionUtils.scala +++ b/src/main/scala/io/rml/framework/core/function/FunctionUtils.scala @@ -1,16 +1,15 @@ package io.rml.framework.core.function -import java.io.File -import java.net.MalformedURLException -import java.time.Instant - import io.rml.framework.api.RMLEnvironment import io.rml.framework.core.function.model.Parameter import io.rml.framework.core.internal.Logging import io.rml.framework.core.model.{Entity, Literal, Uri} -import io.rml.framework.core.vocabulary.RMLVoc +import io.rml.framework.core.vocabulary.{RDFVoc, XsdVoc} import io.rml.framework.shared.RMLException +import java.io.File +import java.net.MalformedURLException +import java.time.Instant import scala.reflect.internal.util.ScalaClassLoader.URLClassLoader object FunctionUtils extends Logging { @@ -61,17 +60,17 @@ object FunctionUtils extends Logging { def getTypeClass(uri: Uri): Class[_] = { uri match { - case Uri(RMLVoc.Type.XSD_POSITIVE_INTEGER) => classOf[Int] - case Uri(RMLVoc.Type.XSD_INTEGER) => classOf[Int] - case Uri(RMLVoc.Type.XSD_INT) => classOf[Int] - case Uri(RMLVoc.Type.XSD_STRING) => classOf[String] - case Uri(RMLVoc.Type.XSD_DOUBLE) => classOf[Double] - case Uri(RMLVoc.Type.XSD_LONG) => classOf[Long] - case Uri(RMLVoc.Type.XSD_DATETIME) => classOf[Instant] - case Uri(RMLVoc.Type.XSD_BOOLEAN) => classOf[Boolean] - case Uri(RMLVoc.Type.RDF_LIST) => classOf[List[_]] - case Uri(RMLVoc.Type.XSD_ANY) => classOf[Any] - case Uri(RMLVoc.Type.RDF_OBJECT) => classOf[Any] + case Uri(XsdVoc.Type.XSD_POSITIVE_INTEGER) => classOf[Int] + case Uri(XsdVoc.Type.XSD_INTEGER) => classOf[Int] + case Uri(XsdVoc.Type.XSD_INT) => classOf[Int] + case Uri(XsdVoc.Type.XSD_STRING) => classOf[String] + case Uri(XsdVoc.Type.XSD_DOUBLE) => classOf[Double] + case Uri(XsdVoc.Type.XSD_LONG) => classOf[Long] + case Uri(XsdVoc.Type.XSD_DATETIME) => classOf[Instant] + case Uri(XsdVoc.Type.XSD_BOOLEAN) => classOf[Boolean] + case Uri(RDFVoc.Type.RDF_LIST) => classOf[List[_]] + case Uri(XsdVoc.Type.XSD_ANY) => classOf[Any] + case Uri(RDFVoc.Type.RDF_OBJECT) => classOf[Any] case _ => throw new RMLException(s"Type $uri not supported for parameter") } } diff --git a/src/main/scala/io/rml/framework/core/function/model/std/StdRandomFunction.scala b/src/main/scala/io/rml/framework/core/function/model/std/StdRandomFunction.scala index 0e3b5fe3..8af02d16 100644 --- a/src/main/scala/io/rml/framework/core/function/model/std/StdRandomFunction.scala +++ b/src/main/scala/io/rml/framework/core/function/model/std/StdRandomFunction.scala @@ -1,15 +1,14 @@ package io.rml.framework.core.function.model.std -import java.lang.reflect.Method - import io.rml.framework.core.function.model.Function import io.rml.framework.core.model.{Entity, Literal, Uri} -import io.rml.framework.core.vocabulary.RMLVoc +import io.rml.framework.core.vocabulary.FunVoc import io.rml.framework.flink.sink.FlinkRDFQuad +import java.lang.reflect.Method import scala.util.Random -case class StdRandomFunction(identifier:String = RMLVoc.Property.GREL_RANDOM) extends Function{ +case class StdRandomFunction(identifier:String = FunVoc.GREL.Property.GREL_RANDOM) extends Function{ private val random = new Random() override def execute(arguments: Map[Uri, String]): Option[Iterable[Entity]] = { diff --git a/src/main/scala/io/rml/framework/core/function/model/std/StdUpperCaseFunction.scala b/src/main/scala/io/rml/framework/core/function/model/std/StdUpperCaseFunction.scala index dbf6bb9c..f93cf5f0 100644 --- a/src/main/scala/io/rml/framework/core/function/model/std/StdUpperCaseFunction.scala +++ b/src/main/scala/io/rml/framework/core/function/model/std/StdUpperCaseFunction.scala @@ -1,15 +1,15 @@ package io.rml.framework.core.function.model.std -import java.lang.reflect.Method - import io.rml.framework.core.function.model.Function import io.rml.framework.core.model.{Entity, Literal, Uri} -import io.rml.framework.core.vocabulary.{Namespaces, RMLVoc} +import io.rml.framework.core.vocabulary.FunVoc import io.rml.framework.flink.sink.FlinkRDFQuad -case class StdUpperCaseFunction(identifier: String = RMLVoc.Property.GREL_UPPERCASE) extends Function { +import java.lang.reflect.Method + +case class StdUpperCaseFunction(identifier: String = FunVoc.GREL.Property.GREL_UPPERCASE) extends Function { override def execute(arguments: Map[Uri, String]): Option[Iterable[Entity]] = { - val parameter = arguments.get(Uri(Namespaces("grel", "valueParameter"))) + val parameter = arguments.get(Uri(FunVoc.GREL.Property.GREL_VALUEPARAMETER)); parameter.map(string => List(Literal(string))) } diff --git a/src/main/scala/io/rml/framework/core/function/std/StdFunctionLoader.scala b/src/main/scala/io/rml/framework/core/function/std/StdFunctionLoader.scala index 941e24b9..7cdecbeb 100644 --- a/src/main/scala/io/rml/framework/core/function/std/StdFunctionLoader.scala +++ b/src/main/scala/io/rml/framework/core/function/std/StdFunctionLoader.scala @@ -5,7 +5,7 @@ import io.rml.framework.core.function.{FunctionLoader, FunctionUtils} import io.rml.framework.core.model.Uri import io.rml.framework.core.model.rdf.{RDFGraph, RDFNode, RDFResource} import io.rml.framework.core.util.Util -import io.rml.framework.core.vocabulary.RMLVoc +import io.rml.framework.core.vocabulary.{DOAPVoc, FunVoc} import io.rml.framework.shared.{FnOException, RMLException} @@ -14,7 +14,7 @@ case class StdFunctionLoader private (functionDescriptionTriplesGraph : RDFGraph override def parseFunctionMapping(graph: RDFGraph): FunctionLoader = { logDebug("parsing functions the new way (i.e. using StdFunctionLoader)") - val fnoFunctionProperty = Uri(RMLVoc.Property.FNO_FUNCTION) + val fnoFunctionProperty = Uri(FunVoc.FnO.Property.FNO_FUNCTION) // subject resources with fno:function property // these resources have type fnoi:Mapping @@ -22,20 +22,20 @@ case class StdFunctionLoader private (functionDescriptionTriplesGraph : RDFGraph if(mappings.isEmpty) throw new RMLException("No function mappings found...") - val functionDescriptionResources = this.functionDescriptionTriplesGraph.filterResources(Uri(RMLVoc.Class.FNO_FUNCTION)) + val functionDescriptionResources = this.functionDescriptionTriplesGraph.filterResources(Uri(FunVoc.FnO.Class.FNO_FUNCTION)) logDebug(s"${functionDescriptionResources.length} functionDescriptionResources present") logDebug(s"The current function description graph contains ${mappings.length} mappings") for (map <- mappings) { logDebug(s"Processing mapping: ${map.uri}") try { - val functionUri = map.listProperties(RMLVoc.Property.FNO_FUNCTION).head.asInstanceOf[RDFResource].uri + val functionUri = map.listProperties(FunVoc.FnO.Property.FNO_FUNCTION).head.asInstanceOf[RDFResource].uri - val methodMappingResource = map.listProperties(RMLVoc.Property.FNO_METHOD_MAPPING).head.asInstanceOf[RDFResource] - val methodName = methodMappingResource.listProperties(RMLVoc.Property.FNOM_METHOD_NAME).head.toString - val implementationResource = map.listProperties(RMLVoc.Property.FNO_IMPLEMENTATION).head.asInstanceOf[RDFResource] + val methodMappingResource = map.listProperties(FunVoc.FnO.Property.FNO_METHOD_MAPPING).head.asInstanceOf[RDFResource] + val methodName = methodMappingResource.listProperties(FunVoc.FnOMapping.Property.FNOM_METHOD_NAME).head.toString + val implementationResource = map.listProperties(FunVoc.FnO.Property.FNO_IMPLEMENTATION).head.asInstanceOf[RDFResource] - val className = Util.getLiteral(implementationResource.listProperties(RMLVoc.Property.FNOI_CLASS_NAME).head) - val downloadPage = Util.getLiteral(implementationResource.listProperties(RMLVoc.Property.DOAP_DOWNLOAD_PAGE).head) + val className = Util.getLiteral(implementationResource.listProperties(FunVoc.FnoImplementation.Property.FNOI_CLASS_NAME).head) + val downloadPage = Util.getLiteral(implementationResource.listProperties(DOAPVoc.Property.DOAP_DOWNLOAD_PAGE).head) logDebug(s"Found map with methodname: ${methodName}, className: ${className}, downloadPage: ${downloadPage}") // Get function description resource that corresponds with the current functionUri @@ -46,12 +46,12 @@ case class StdFunctionLoader private (functionDescriptionTriplesGraph : RDFGraph // extraction of input parameters - val expectsResource = functionDescriptionResourceOption.get.listProperties(RMLVoc.Property.FNO_EXPECTS).headOption + val expectsResource = functionDescriptionResourceOption.get.listProperties(FunVoc.FnO.Property.FNO_EXPECTS).headOption val inputParameterResources = expectsResource.get.asInstanceOf[RDFResource].getList.asInstanceOf[List[RDFResource]] val inputParamList = parseParameterResources(inputParameterResources) // extraction of output parameters - val returnsResource = functionDescriptionResourceOption.get.listProperties(RMLVoc.Property.FNO_RETURNS).headOption + val returnsResource = functionDescriptionResourceOption.get.listProperties(FunVoc.FnO.Property.FNO_RETURNS).headOption val outputParameterResources = returnsResource.get.asInstanceOf[RDFResource].getList.asInstanceOf[List[RDFResource]] val outputParamList = parseParameterResources(outputParameterResources) @@ -77,8 +77,8 @@ case class StdFunctionLoader private (functionDescriptionTriplesGraph : RDFGraph override def parseParameter(inputNode: RDFNode, pos: Int): Parameter = { val inputResource = inputNode.asInstanceOf[RDFResource] - val paramType = inputResource.listProperties(RMLVoc.Property.FNO_TYPE).headOption - val paramUri = inputResource.listProperties(RMLVoc.Property.FNO_PREDICATE).headOption + val paramType = inputResource.listProperties(FunVoc.FnO.Property.FNO_TYPE).headOption + val paramUri = inputResource.listProperties(FunVoc.FnO.Property.FNO_PREDICATE).headOption if(paramType.isEmpty) diff --git a/src/main/scala/io/rml/framework/core/model/PredicateMap.scala b/src/main/scala/io/rml/framework/core/model/PredicateMap.scala index 6fd3e323..73e4637b 100644 --- a/src/main/scala/io/rml/framework/core/model/PredicateMap.scala +++ b/src/main/scala/io/rml/framework/core/model/PredicateMap.scala @@ -26,7 +26,7 @@ package io.rml.framework.core.model import io.rml.framework.core.model.std.StdPredicateMap -import io.rml.framework.core.vocabulary.RMLVoc +import io.rml.framework.core.vocabulary.R2RMLVoc /** * This trait represents a predicate-map. @@ -50,5 +50,5 @@ object PredicateMap { constant, reference, template, - Some(Uri(RMLVoc.Class.IRI))) + Some(Uri(R2RMLVoc.Class.IRI))) } diff --git a/src/main/scala/io/rml/framework/core/model/StreamDataSource.scala b/src/main/scala/io/rml/framework/core/model/StreamDataSource.scala index bc2f3812..426187bc 100644 --- a/src/main/scala/io/rml/framework/core/model/StreamDataSource.scala +++ b/src/main/scala/io/rml/framework/core/model/StreamDataSource.scala @@ -25,7 +25,7 @@ package io.rml.framework.core.model -import io.rml.framework.core.vocabulary.RMLVoc +import io.rml.framework.core.vocabulary.QueryVoc import io.rml.framework.flink.source.{CSVStream, JSONStream, Stream, XMLStream} import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment @@ -40,9 +40,9 @@ object StreamDataSource { logicalSource.source match { case source: StreamDataSource => logicalSource.referenceFormulation match { - case Uri(RMLVoc.Class.CSV) => CSVStream(source) - case Uri(RMLVoc.Class.XPATH) => XMLStream(source, logicalSource.iterators.distinct) - case Uri(RMLVoc.Class.JSONPATH) => JSONStream(source, logicalSource.iterators.distinct) + case Uri(QueryVoc.Class.CSV) => CSVStream(source) + case Uri(QueryVoc.Class.XPATH) => XMLStream(source, logicalSource.iterators.distinct) + case Uri(QueryVoc.Class.JSONPATH) => JSONStream(source, logicalSource.iterators.distinct) } } } diff --git a/src/main/scala/io/rml/framework/core/model/std/StdGraphMap.scala b/src/main/scala/io/rml/framework/core/model/std/StdGraphMap.scala index b323e6fa..5782f547 100644 --- a/src/main/scala/io/rml/framework/core/model/std/StdGraphMap.scala +++ b/src/main/scala/io/rml/framework/core/model/std/StdGraphMap.scala @@ -25,7 +25,7 @@ package io.rml.framework.core.model.std import io.rml.framework.core.model._ -import io.rml.framework.core.vocabulary.RMLVoc +import io.rml.framework.core.vocabulary.R2RMLVoc case class StdGraphMap(identifier: String, override val functionMap:List[FunctionMap], @@ -33,6 +33,6 @@ case class StdGraphMap(identifier: String, reference: Option[Literal], template: Option[Literal]) extends GraphMap { - override def termType: Option[Uri] = Some(Uri(RMLVoc.Class.IRI)) + override def termType: Option[Uri] = Some(Uri(R2RMLVoc.Class.IRI)) } diff --git a/src/main/scala/io/rml/framework/core/vocabulary/DOAPVoc.scala b/src/main/scala/io/rml/framework/core/vocabulary/DOAPVoc.scala new file mode 100644 index 00000000..b15f9924 --- /dev/null +++ b/src/main/scala/io/rml/framework/core/vocabulary/DOAPVoc.scala @@ -0,0 +1,38 @@ +package io.rml.framework.core.vocabulary + +/** + * MIT License + * + * Copyright (C) 2017 - 2021 RDF Mapping Language (RML) + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + * Description of a Project (DOAP) vocabulary + * http://usefulinc.com/ns/doap# + * + * */ +object DOAPVoc { + + val namespace = ("doap", "http://usefulinc.com/ns/doap#"); + + object Property { + val DOAP_DOWNLOAD_PAGE = namespace._2 + "download-page"; + } + +} diff --git a/src/main/scala/io/rml/framework/core/vocabulary/FormatVoc.scala b/src/main/scala/io/rml/framework/core/vocabulary/FormatVoc.scala new file mode 100644 index 00000000..e84eba6a --- /dev/null +++ b/src/main/scala/io/rml/framework/core/vocabulary/FormatVoc.scala @@ -0,0 +1,38 @@ +package io.rml.framework.core.vocabulary + +/** + * MIT License + * + * Copyright (C) 2017 - 2021 RDF Mapping Language (RML) + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + * + * See https://www.w3.org/ns/formats/ + * */ +object FormatVoc { + val namespace = ("formats", "http://www.w3.org/ns/formats/"); + + object Class { + val NTRIPLES = namespace._2 + "N-Triples"; + val NQUADS = namespace._2 + "N-Quads"; + val JSONLD = namespace._2 + "JSON-LD"; + } + +} diff --git a/src/main/scala/io/rml/framework/core/vocabulary/FunVoc.scala b/src/main/scala/io/rml/framework/core/vocabulary/FunVoc.scala new file mode 100644 index 00000000..47eef366 --- /dev/null +++ b/src/main/scala/io/rml/framework/core/vocabulary/FunVoc.scala @@ -0,0 +1,129 @@ +package io.rml.framework.core.vocabulary + +/** + * MIT License + * + * Copyright (C) 2017 - 2021 RDF Mapping Language (RML) + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + * Function Ontology + * https://fno.io + * https://fno.io/ontology/index-en.html + * + * */ +object FunVoc { + + ///////////////////// + // FNO + // Function Ontology + ///////////////////// + object FnO { + val namespace = ("fno", "https://w3id.org/function/ontology#"); + + object Property { + val EXECUTES = namespace._2 + "executes"; + val FNO_SOLVES = namespace._2 + "solves"; + val FNO_IMPLEMENTS = namespace._2 + "implements"; + val FNO_PREDICATE = namespace._2 + "predicate"; + val FNO_EXPECTS = namespace._2 + "expects"; + val FNO_RETURNS = namespace._2 + "returns"; + val FNO_TYPE = namespace._2 + "type"; + + val FNO_IMPLEMENTATION = namespace._2 + "implementation"; + + val FNO_FUNCTION = namespace._2 + "function"; + + val FNO_METHOD_MAPPING = namespace._2 + "methodMapping"; + } + + object Class { + val FNO_FUNCTION = namespace._2 + "Function"; + val FNO_PARAMETER = namespace._2 + "Parameter"; + val FNO_EXECUTION = namespace._2 + "Execution"; + val FNO_OUTPUT = namespace._2 + "Output"; + val FNO_ALGORITHM = namespace._2 + "Algorithm"; + val FNO_PROBLEM = namespace._2 + "Problem"; + + val FNO_MAPPING = namespace._2 + "Mapping"; + val FNO_METHOD_MAPPING = namespace._2 + "MethodMapping"; + } + } + + ////////////////////////////////////// + // FNOM + // Function Ontology Method Mappings + ////////////////////////////////////// + object FnOMapping { + val namespace = ("fnom", "https://w3id.org/function/vocabulary/mapping#"); + + object Property { + val FNOM_METHOD_NAME = namespace._2 + "method-name"; + } + + object Class { + val FNOM_STRING_METHOD_MAPPING = namespace._2 + "StringMethodMapping"; + } + } + + object FnoImplementation { + val namespace = ("fnoi", "https://w3id.org/function/vocabulary/implementation#"); + + object Property { + val FNOI_CLASS_NAME = namespace._2 + "class-name"; + } + + object Class { + val FNOI_JAVA_CLASS = namespace._2 + "JavaClass"; + } + } + + ////////////////////////////////////////////// + // FNML + // Function RML Extension + // http://semweb.mmlab.be/ns/fnml/fnml.html + ////////////////////////////////////////////// + object Fnml { + val namespace = ("fnml", "http://semweb.mmlab.be/ns/fnml#"); + + object Property { + val FUNCTIONVALUE = namespace._2 + "functionValue"; + } + + object Class { + val FUNCTIONTERMMAP = namespace._2 + "FunctionTermMap"; + } + } + + ///// + // GREL functions + // https://fno.io + // https://github.com/OpenRefine/OpenRefine/wiki/General-Refine-Expression-Language + // + object GREL { + val namespace = ("grel", "http://users.ugent.be/~bjdmeest/function/grel.ttl#"); + + object Property { + val GREL_RANDOM = namespace._2 + "random"; + val GREL_UPPERCASE = namespace._2 + "toUpperCase"; + val GREL_VALUEPARAMETER = namespace._2 + "valueParameter"; + } + } + +} diff --git a/src/main/scala/io/rml/framework/core/vocabulary/HypermediaVoc.scala b/src/main/scala/io/rml/framework/core/vocabulary/HypermediaVoc.scala index fa97026a..715858ae 100644 --- a/src/main/scala/io/rml/framework/core/vocabulary/HypermediaVoc.scala +++ b/src/main/scala/io/rml/framework/core/vocabulary/HypermediaVoc.scala @@ -31,9 +31,9 @@ object HypermediaVoc { val namespace = ("hctl", "https://www.w3.org/2019/wot/hypermedia#") object Property { - val FORCONTENTTYPE = Namespaces(namespace._1, "forContentType") - val HASTARGET = Namespaces(namespace._1, "hasTarget") - val HASOPERATIONTYPE = Namespaces(namespace._1, "hasOperationType") + val FORCONTENTTYPE = namespace._2 + "forContentType"; + val HASTARGET = namespace._2 + "hasTarget"; + val HASOPERATIONTYPE = namespace._2 + "hasOperationType"; } } diff --git a/src/main/scala/io/rml/framework/core/vocabulary/LibVoc.scala b/src/main/scala/io/rml/framework/core/vocabulary/LibVoc.scala new file mode 100644 index 00000000..513c8aea --- /dev/null +++ b/src/main/scala/io/rml/framework/core/vocabulary/LibVoc.scala @@ -0,0 +1,38 @@ +package io.rml.framework.core.vocabulary + +/** + * MIT License + * + * Copyright (C) 2017 - 2021 RDF Mapping Language (RML) + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + * Custom library functions + * // TODO: unused?? + * */ +object LibVoc { + val namespace = ("lib", "http://example.com/library#"); + + object Property { + val LIB_PROVIDED_BY = namespace._2 + "providedBy"; + val LIB_CLASS = namespace._2 + "class"; + val LIB_METHOD = namespace._2 + "method"; + val LIB_LOCAL_LIBRARY = namespace._2 + "localLibrary"; + } +} diff --git a/src/main/scala/io/rml/framework/core/vocabulary/Namespaces.scala b/src/main/scala/io/rml/framework/core/vocabulary/Namespaces.scala index 6f8e6d8a..cb8ff93c 100644 --- a/src/main/scala/io/rml/framework/core/vocabulary/Namespaces.scala +++ b/src/main/scala/io/rml/framework/core/vocabulary/Namespaces.scala @@ -31,24 +31,24 @@ package io.rml.framework.core.vocabulary object Namespaces { private val _namespaces: Map[String, String] = Map( - "rdf" -> "http://www.w3.org/1999/02/22-rdf-syntax-ns#", - "rdfs" -> "http://www.w3.org/2000/01/rdf-schema#", - "rr" -> "http://www.w3.org/ns/r2rml#", - "formats" -> "https://www.w3.org/ns/formats/", - "ql" -> "http://semweb.mmlab.be/ns/ql#", - "rml" -> "http://semweb.mmlab.be/ns/rml#", - "rmls" -> "http://semweb.mmlab.be/ns/rmls#", - "fnml" -> "http://semweb.mmlab.be/ns/fnml#", + RDFVoc.namespace._1 -> RDFVoc.namespace._2, + RDFSVoc.namespace._1 -> RDFSVoc.namespace._2, + R2RMLVoc.namespace._1 -> R2RMLVoc.namespace._2, + FormatVoc.namespace._1 -> FormatVoc.namespace._2, + QueryVoc.namespace._1 -> QueryVoc.namespace._2, + RMLVoc.namespace._1 -> RMLVoc.namespace._2, + RMLSVoc.namespace._1 -> RMLSVoc.namespace._2, + FunVoc.Fnml.namespace._1 -> FunVoc.Fnml.namespace._2, - "fno" -> "https://w3id.org/function/ontology#", - "fnom" -> "https://w3id.org/function/vocabulary/mapping#", - "fnoi" -> "https://w3id.org/function/vocabulary/implementation#", + FunVoc.FnO.namespace._1 -> FunVoc.FnO.namespace._2, + FunVoc.FnOMapping.namespace._1 -> FunVoc.FnOMapping.namespace._2, + FunVoc.FnoImplementation.namespace._1 -> FunVoc.FnoImplementation.namespace._2, - "grel" -> "http://users.ugent.be/~bjdmeest/function/grel.ttl#", - "lib" -> "http://example.com/library#", - "xsd" -> "http://www.w3.org/2001/XMLSchema#", + FunVoc.GREL.namespace._1 -> FunVoc.GREL.namespace._2, + LibVoc.namespace._1 -> LibVoc.namespace._2, + XsdVoc.namespace._1 -> XsdVoc.namespace._2, - "doap" -> "http://usefulinc.com/ns/doap#", + DOAPVoc.namespace._1 -> DOAPVoc.namespace._2, // Web of Things WoTVoc.ThingDescription.namespace._1 -> WoTVoc.ThingDescription.namespace._2, diff --git a/src/main/scala/io/rml/framework/core/vocabulary/QueryVoc.scala b/src/main/scala/io/rml/framework/core/vocabulary/QueryVoc.scala new file mode 100644 index 00000000..7c73a68a --- /dev/null +++ b/src/main/scala/io/rml/framework/core/vocabulary/QueryVoc.scala @@ -0,0 +1,37 @@ +package io.rml.framework.core.vocabulary + +/** + * MIT License + * + * Copyright (C) 2017 - 2021 RDF Mapping Language (RML) + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + * Query language vocabulary + * http://semweb.mmlab.be/ns/ql + * */ +object QueryVoc { + val namespace = ("ql", "http://semweb.mmlab.be/ns/ql#"); + + object Class { + val JSONPATH = namespace._2 + "JSONPath"; + val CSV = namespace._2 + "CSV"; + val XPATH = namespace._2 + "XPath"; + } +} diff --git a/src/main/scala/io/rml/framework/core/vocabulary/R2RMLVoc.scala b/src/main/scala/io/rml/framework/core/vocabulary/R2RMLVoc.scala new file mode 100644 index 00000000..a862ad49 --- /dev/null +++ b/src/main/scala/io/rml/framework/core/vocabulary/R2RMLVoc.scala @@ -0,0 +1,67 @@ +package io.rml.framework.core.vocabulary + +/** + * MIT License + * + * Copyright (C) 2017 - 2021 RDF Mapping Language (RML) + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + * The R2RML vocabulary + * https://www.w3.org/TR/r2rml/ + * + * */ +object R2RMLVoc { + + val namespace = ("rr", "http://www.w3.org/ns/r2rml#"); + + object Property { + val PREDICATEOBJECTMAP = namespace._2 + "predicateObjectMap"; + val PREDICATE = namespace._2 + "predicate"; + val PREDICATEMAP = namespace._2 + "predicateMap"; + val OBJECT = namespace._2 + "object"; + val OBJECTMAP = namespace._2 + "objectMap"; + val TRIPLESMAP = namespace._2 + "triplesMap"; + val SUBJECTMAP = namespace._2 + "subjectMap"; + val SUBJECT = namespace._2 + "subject"; + val CONSTANT = namespace._2 + "constant"; + val TEMPLATE = namespace._2 + "template"; + val TERMTYPE = namespace._2 + "termType"; + val CLASS = namespace._2 + "class"; + val PARENTTRIPLESMAP = namespace._2 + "parentTriplesMap"; + val JOINCONDITION = namespace._2 + "joinCondition"; + val PARENT = namespace._2 + "parent"; + val CHILD = namespace._2 + "child"; + val GRAPH = namespace._2 + "graph"; + val GRAPHMAP = namespace._2 + "graphMap"; + val DATATYPE = namespace._2 + "datatype"; + val LANGUAGE = namespace._2 + "language"; + val DEFAULTGRAPH = namespace._2 + "defaultGraph"; + } + + object Class { + val PREDICATEOBJECTMAP = namespace._2 + "PredicateObjectMap"; + val OBJECTMAP = namespace._2 + "ObjectMap"; + val TRIPLESMAP = namespace._2 + "TriplesMap"; + val IRI = namespace._2 + "IRI"; + val BLANKNODE = namespace._2 + "BlankNode"; + val LITERAL = namespace._2 + "Literal"; + } + +} diff --git a/src/main/scala/io/rml/framework/core/vocabulary/RDFSVoc.scala b/src/main/scala/io/rml/framework/core/vocabulary/RDFSVoc.scala new file mode 100644 index 00000000..5c49b853 --- /dev/null +++ b/src/main/scala/io/rml/framework/core/vocabulary/RDFSVoc.scala @@ -0,0 +1,31 @@ +package io.rml.framework.core.vocabulary + +/** + * MIT License + * + * Copyright (C) 2017 - 2021 RDF Mapping Language (RML) + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + * RDF Schema vocabulary + * http://www.w3.org/2000/01/rdf-schema + * */ +object RDFSVoc { + val namespace = ("rdfs", "http://www.w3.org/2000/01/rdf-schema#"); +} diff --git a/src/main/scala/io/rml/framework/core/vocabulary/RDFVoc.scala b/src/main/scala/io/rml/framework/core/vocabulary/RDFVoc.scala index 8c11d97a..49d57948 100644 --- a/src/main/scala/io/rml/framework/core/vocabulary/RDFVoc.scala +++ b/src/main/scala/io/rml/framework/core/vocabulary/RDFVoc.scala @@ -26,15 +26,15 @@ package io.rml.framework.core.vocabulary object RDFVoc { + val namespace = ("rdf", "http://www.w3.org/1999/02/22-rdf-syntax-ns#"); object Property { - - val TYPE = Namespaces("rdf", "type") - + val TYPE = namespace._2 + "type"; } - object Class { - + object Type { + val RDF_OBJECT = namespace._2 + "object"; + val RDF_LIST = namespace._2 + "List"; } } diff --git a/src/main/scala/io/rml/framework/core/vocabulary/RMLSVoc.scala b/src/main/scala/io/rml/framework/core/vocabulary/RMLSVoc.scala new file mode 100644 index 00000000..56b95db8 --- /dev/null +++ b/src/main/scala/io/rml/framework/core/vocabulary/RMLSVoc.scala @@ -0,0 +1,62 @@ +package io.rml.framework.core.vocabulary + +/** + * MIT License + * + * Copyright (C) 2017 - 2021 RDF Mapping Language (RML) + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + * unofficial vocabulary for RML Streaming + * + * */ +object RMLSVoc { + val namespace = ("rmls", "http://semweb.mmlab.be/ns/rmls#"); + + object Property { + /////////////////////////////////////////////////////////////////////////// + // RMLS: TCP Source + /////////////////////////////////////////////////////////////////////////// + + val HOSTNAME = namespace._2 + "hostName"; + val PORT = namespace._2 + "port"; + val PATH = namespace._2 + "path"; + val TYPE = namespace._2 + "type"; + + /////////////////////////////////////////////////////////////////////////// + // RMLS: Kafka Source + /////////////////////////////////////////////////////////////////////////// + + val BROKER = namespace._2 + "broker"; + val GROUPID = namespace._2 + "groupId"; + val TOPIC = namespace._2 + "topic"; + val KAFKAVERSION= namespace._2 + "kafkaVersion"; + } + + object Class { + + /////////////////////////////////////////////////////////////////////////// + // RMLS + /////////////////////////////////////////////////////////////////////////// + + val TCPSOCKETSTREAM = namespace._2 + "TCPSocketStream"; + val FILESTREAM = namespace._2 + "FileStream"; + val KAFKASTREAM = namespace._2 + "KafkaStream"; + } +} diff --git a/src/main/scala/io/rml/framework/core/vocabulary/RMLVoc.scala b/src/main/scala/io/rml/framework/core/vocabulary/RMLVoc.scala index ed6aff05..3ec2f67d 100644 --- a/src/main/scala/io/rml/framework/core/vocabulary/RMLVoc.scala +++ b/src/main/scala/io/rml/framework/core/vocabulary/RMLVoc.scala @@ -27,203 +27,21 @@ package io.rml.framework.core.vocabulary /** * Contains the constants of the RML vocabulary. + * https://rml.io/specs/rml/ */ object RMLVoc { - - object Type { - val XSD_STRING = Namespaces("xsd", "string") - val XSD_INT = Namespaces("xsd", "int") // signed 32-bit integer - val XSD_INTEGER = Namespaces("xsd", "integer") // integer value - val XSD_DOUBLE = Namespaces("xsd", "double") - val XSD_LONG = Namespaces("xsd", "long") - val RDF_LIST = Namespaces("rdf", "List") - val XSD_POSITIVE_INTEGER = Namespaces("xsd", "positiveInteger") - val XSD_BOOLEAN = Namespaces("xsd", "boolean") - val XSD_DATETIME = Namespaces("xsd", "dateTime") - - val XSD_ANY = Namespaces("xsd", "any") - val RDF_OBJECT = Namespaces("rdf", "object") - - } - + val namespace = ("rml", "http://semweb.mmlab.be/ns/rml#"); + object Property { - /////////////////////////////////////////////////////////////////////////// - // RR - /////////////////////////////////////////////////////////////////////////// - val PREDICATEOBJECTMAP = Namespaces("rr", "predicateObjectMap") - val PREDICATE = Namespaces("rr", "predicate") - val PREDICATEMAP = Namespaces("rr", "predicateMap") - val OBJECT = Namespaces("rr", "object") - val OBJECTMAP = Namespaces("rr", "objectMap") - val TRIPLESMAP = Namespaces("rr", "triplesMap") - val SUBJECTMAP = Namespaces("rr", "subjectMap") - val SUBJECT = Namespaces("rr", "subject") - val CONSTANT = Namespaces("rr", "constant") - val TEMPLATE = Namespaces("rr", "template") - val TERMTYPE = Namespaces("rr", "termType") - val CLASS = Namespaces("rr", "class") - val PARENTTRIPLESMAP = Namespaces("rr", "parentTriplesMap") - val JOINCONDITION = Namespaces("rr", "joinCondition") - val PARENT = Namespaces("rr", "parent") - val CHILD = Namespaces("rr", "child") - val GRAPH = Namespaces("rr", "graph") - val GRAPHMAP = Namespaces("rr", "graphMap") - val DATATYPE = Namespaces("rr", "datatype") - val LANGUAGE = Namespaces("rr", "language") - val DEFAULTGRAPH = Namespaces("rr", "defaultGraph") - /////////////////////////////////////////////////////////////////////////// // RML /////////////////////////////////////////////////////////////////////////// - val REFERENCE = Namespaces("rml", "reference") - val LOGICALSOURCE = Namespaces("rml", "logicalSource") - val ITERATOR = Namespaces("rml", "iterator") - val REFERENCEFORMULATION = Namespaces("rml", "referenceFormulation") - val SOURCE = Namespaces("rml", "source") - val LOGICALTARGET = Namespaces("rml", "logicalTarget") - - /////////////////////////////////////////////////////////////////////////// - // RMLS: TCP Source - /////////////////////////////////////////////////////////////////////////// - - val HOSTNAME = Namespaces("rmls", "hostName") - val PORT = Namespaces("rmls", "port") - val PATH = Namespaces("rmls", "path") - val TYPE = Namespaces("rmls", "type") - - /////////////////////////////////////////////////////////////////////////// - // RMLS: Kafka Source - /////////////////////////////////////////////////////////////////////////// - - val BROKER = Namespaces("rmls", "broker") - val GROUPID = Namespaces("rmls", "groupId") - val TOPIC = Namespaces("rmls", "topic") - val KAFKAVERSION= Namespaces("rmls", "kafkaVersion") - - - /////////////////////////////////////////////////////////////////////////// - // GREL - /////////////////////////////////////////////////////////////////////////// - val GREL_RANDOM = Namespaces("grel", "random") - val GREL_UPPERCASE = Namespaces("grel", "toUpperCase") - - - /////////////////////////////////////////////////////////////////////////// - // FNML - /////////////////////////////////////////////////////////////////////////// - - val FUNCTIONVALUE = Namespaces("fnml", "functionValue") - - /////////////////////////////////////////////////////////////////////////// - // FNO - /////////////////////////////////////////////////////////////////////////// - - val EXECUTES = Namespaces("fno", "executes") - val FNO_SOLVES = Namespaces("fno", "solves") - val FNO_IMPLEMENTS = Namespaces("fno", "implements") - val FNO_PREDICATE = Namespaces("fno", "predicate") - val FNO_EXPECTS = Namespaces("fno", "expects") - val FNO_RETURNS = Namespaces("fno", "returns") - val FNO_TYPE = Namespaces("fno", "type") - - val FNO_IMPLEMENTATION = Namespaces("fno", "implementation") - - val FNO_FUNCTION = Namespaces("fno", "function") - - val FNO_METHOD_MAPPING = Namespaces("fno", "methodMapping") - - /////////////////////////////////////////////////////////////////////////// - // LIB - /////////////////////////////////////////////////////////////////////////// - - val LIB_PROVIDED_BY = Namespaces("lib", "providedBy") - val LIB_CLASS = Namespaces("lib", "class") - val LIB_METHOD = Namespaces("lib", "method") - val LIB_LOCAL_LIBRARY = Namespaces("lib", "localLibrary") - - /////////////////////////////////////////////////////////////////////////// - // FNOM(apping) - /////////////////////////////////////////////////////////////////////////// - - val FNOM_METHOD_NAME = Namespaces("fnom", "method-name") - - - - /////////////////////////////////////////////////////////////////////////// - // FNOI - /////////////////////////////////////////////////////////////////////////// - val FNOI_CLASS_NAME = Namespaces("fnoi", "class-name") - - /////////////////////////////////////////////////////////////////////////// - // DOAP - /////////////////////////////////////////////////////////////////////////// - - val DOAP_DOWNLOAD_PAGE = Namespaces("doap", "download-page") - - - + val REFERENCE = namespace._2 + "reference"; + val LOGICALSOURCE = namespace._2 + "logicalSource"; + val ITERATOR = namespace._2 + "iterator"; + val REFERENCEFORMULATION = namespace._2 + "referenceFormulation"; + val SOURCE = namespace._2 + "source"; + val LOGICALTARGET = namespace._2 + "logicalTarget"; } - - object Class { - - /////////////////////////////////////////////////////////////////////////// - // RR - /////////////////////////////////////////////////////////////////////////// - val PREDICATEOBJECTMAP = Namespaces("rr", "PredicateObjectMap") - val OBJECTMAP = Namespaces("rr", "ObjectMap") - val TRIPLESMAP = Namespaces("rr", "TriplesMap") - val IRI = Namespaces("rr", "IRI") - val BLANKNODE = Namespaces("rr", "BlankNode") - val LITERAL = Namespaces("rr", "Literal") - - /////////////////////////////////////////////////////////////////////////// - // QL - /////////////////////////////////////////////////////////////////////////// - - val JSONPATH = Namespaces("ql", "JSONPath") - val CSV = Namespaces("ql", "CSV") - val XPATH = Namespaces("ql", "XPath") - - /////////////////////////////////////////////////////////////////////////// - // RMLS - /////////////////////////////////////////////////////////////////////////// - - val TCPSOCKETSTREAM = Namespaces("rmls", "TCPSocketStream") - val FILESTREAM = Namespaces("rmls", "FileStream") - val KAFKASTREAM = Namespaces("rmls", "KafkaStream") - - /////////////////////////////////////////////////////////////////////////// - // FNML - /////////////////////////////////////////////////////////////////////////// - - val FUNCTIONTERMMAP = Namespaces("fnml", "FunctionTermMap") - - /////////////////////////////////////////////////////////////////////////// - // FNO - /////////////////////////////////////////////////////////////////////////// - val FNO_FUNCTION = Namespaces("fno", "Function") - val FNO_PARAMETER = Namespaces("fno", "Parameter") - val FNO_EXECUTION = Namespaces("fno", "Execution") - val FNO_OUTPUT = Namespaces("fno", "Output") - val FNO_ALGORITHM = Namespaces("fno", "Algorithm") - val FNO_PROBLEM = Namespaces("fno", "Problem") - - val FNO_MAPPING = Namespaces("fno", "Mapping") - val FNO_METHOD_MAPPING = Namespaces("fno", "MethodMapping") - - /////////////////////////////////////////////////////////////////////////// - // FNOI - /////////////////////////////////////////////////////////////////////////// - - val FNOI_JAVA_CLASS = Namespaces("fnoi", "JavaClass") - - /////////////////////////////////////////////////////////////////////////// - // FNOM(apping) - /////////////////////////////////////////////////////////////////////////// - - val FNOM_STRING_METHOD_MAPPING = Namespaces("fnom", "StringMethodMapping") - - } - } diff --git a/src/main/scala/io/rml/framework/core/vocabulary/WoTVoc.scala b/src/main/scala/io/rml/framework/core/vocabulary/WoTVoc.scala index b99a10e2..8a87a878 100644 --- a/src/main/scala/io/rml/framework/core/vocabulary/WoTVoc.scala +++ b/src/main/scala/io/rml/framework/core/vocabulary/WoTVoc.scala @@ -37,13 +37,13 @@ object WoTVoc { val namespace = ("td", "https://www.w3.org/2019/wot/td#") object Property { - val HASPROPERTYAFFORDANCE = Namespaces(namespace._1, "hasPropertyAffordance") - val HASFORM = Namespaces(namespace._1, "hasForm") - val HASSECURITYCONFIGURATION = Namespaces(namespace._1, "hasSecurityConfiguration") + val HASPROPERTYAFFORDANCE = namespace._2 + "hasPropertyAffordance"; + val HASFORM = namespace._2 + "hasForm"; + val HASSECURITYCONFIGURATION = namespace._2 + "hasSecurityConfiguration"; } object Class { - val THING = Namespaces("td", "Thing") + val THING = namespace._2 + "Thing"; } } @@ -55,10 +55,10 @@ object WoTVoc { val namespace = ("mqv", "http://www.example.org/mqtt-binding#") // TODO: change once an officlial vocabulary is published object Property { - val CONTROLPACKETVALUE = Namespaces(namespace._1, "controlPacketValue") - val OPTIONS = Namespaces(namespace._1, "options") - val OPTIONNAME = Namespaces(namespace._1, "optionName") - val OPTIONVALUE = Namespaces(namespace._1, "optionValue") + val CONTROLPACKETVALUE = namespace._2 + "controlPacketValue"; + val OPTIONS = namespace._2 + "options"; + val OPTIONNAME = namespace._2 + "optionName"; + val OPTIONVALUE = namespace._2 + "optionValue"; } } @@ -69,11 +69,11 @@ object WoTVoc { val namespace = ("wotsec", "https://www.w3.org/2019/wot/security#") object Property { - val IN = Namespaces(namespace._1, "in") + val IN = namespace._2 + "in"; } object Class { - val BASICSECURITYSCHEME = Namespaces(namespace._1, "BasicSecurityScheme") + val BASICSECURITYSCHEME = namespace._2 + "BasicSecurityScheme"; } } } diff --git a/src/main/scala/io/rml/framework/core/vocabulary/XsdVoc.scala b/src/main/scala/io/rml/framework/core/vocabulary/XsdVoc.scala new file mode 100644 index 00000000..8218a066 --- /dev/null +++ b/src/main/scala/io/rml/framework/core/vocabulary/XsdVoc.scala @@ -0,0 +1,44 @@ +package io.rml.framework.core.vocabulary + +/** + * MIT License + * + * Copyright (C) 2017 - 2021 RDF Mapping Language (RML) + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + * https://www.w3.org/2001/XMLSchema + * */ +object XsdVoc { + val namespace = ("xsd", "http://www.w3.org/2001/XMLSchema#"); + + object Type { + val XSD_STRING = namespace._2 + "string"; + val XSD_INT = namespace._2 + "int"; // signed 32-bit integer + val XSD_INTEGER = namespace._2 + "integer"; // integer value + val XSD_DOUBLE = namespace._2 + "double"; + val XSD_LONG = namespace._2 + "long"; + val XSD_POSITIVE_INTEGER = namespace._2 + "positiveInteger"; + val XSD_BOOLEAN = namespace._2 + "boolean"; + val XSD_DATETIME = namespace._2 + "dateTime"; + + val XSD_ANY = namespace._2 + "any"; + } + +} diff --git a/src/main/scala/io/rml/framework/engine/statement/FunctionMapGeneratorAssembler.scala b/src/main/scala/io/rml/framework/engine/statement/FunctionMapGeneratorAssembler.scala index cd19579f..29e185cc 100644 --- a/src/main/scala/io/rml/framework/engine/statement/FunctionMapGeneratorAssembler.scala +++ b/src/main/scala/io/rml/framework/engine/statement/FunctionMapGeneratorAssembler.scala @@ -24,15 +24,12 @@ **/ package io.rml.framework.engine.statement -import java.io.File - -import io.rml.framework.api.RMLEnvironment -import io.rml.framework.core.function.{FunctionLoader, ReflectionUtils} -import io.rml.framework.core.function.model.{DynamicFunction, Function} +import io.rml.framework.core.function.FunctionLoader +import io.rml.framework.core.function.model.Function import io.rml.framework.core.model._ -import io.rml.framework.core.vocabulary.RMLVoc +import io.rml.framework.core.vocabulary.FunVoc import io.rml.framework.flink.item.Item -import io.rml.framework.flink.sink.{FlinkRDFNode, FlinkRDFQuad} +import io.rml.framework.flink.sink.FlinkRDFQuad import io.rml.framework.flink.source.EmptyItem import io.rml.framework.shared.RMLException @@ -62,9 +59,9 @@ case class FunctionMapGeneratorAssembler() extends TermMapGeneratorAssembler { this.logDebug("parseFunction (assembledPom)") val placeHolder: List[FlinkRDFQuad] = generateFunctionTriples(new EmptyItem(), assembledPom) - val executeProperties = placeHolder.filter( quad => quad.predicate.value == Uri(RMLVoc.Property.EXECUTES)) + val executeProperties = placeHolder.filter( quad => quad.predicate.value == Uri(FunVoc.FnO.Property.EXECUTES)) if(executeProperties.isEmpty) - throw new RMLException(s"Couldn't find ${RMLVoc.Property.EXECUTES} property." + + throw new RMLException(s"Couldn't find ${FunVoc.FnO.Property.EXECUTES} property." + s"Is the namespace correct? (e.g. HTTP vs. HTTPS)") val functionName = Uri( @@ -91,7 +88,7 @@ case class FunctionMapGeneratorAssembler() extends TermMapGeneratorAssembler { private def createAssemblerFunction(function: Function, assembledPom: List[(Item => Option[Iterable[Uri]], Item => Option[Iterable[Entity]])]): Item => Option[Iterable[Entity]] = { (item: Item) => { val triples: List[FlinkRDFQuad] = generateFunctionTriples(item, assembledPom) - val paramTriples = triples.filter(triple => triple.predicate.uri != Uri(RMLVoc.Property.EXECUTES)) + val paramTriples = triples.filter(triple => triple.predicate.uri != Uri(FunVoc.FnO.Property.EXECUTES)) diff --git a/src/main/scala/io/rml/framework/engine/statement/ObjectGeneratorAssembler.scala b/src/main/scala/io/rml/framework/engine/statement/ObjectGeneratorAssembler.scala index e5751940..21850750 100644 --- a/src/main/scala/io/rml/framework/engine/statement/ObjectGeneratorAssembler.scala +++ b/src/main/scala/io/rml/framework/engine/statement/ObjectGeneratorAssembler.scala @@ -27,7 +27,7 @@ package io.rml.framework.engine.statement import io.rml.framework.core.extractors.TriplesMapsCache import io.rml.framework.core.model.{Entity, Literal, ObjectMap, Uri} -import io.rml.framework.core.vocabulary.RMLVoc +import io.rml.framework.core.vocabulary.R2RMLVoc import io.rml.framework.flink.item.Item class ObjectGeneratorAssembler extends TermMapGeneratorAssembler { @@ -43,7 +43,7 @@ class ObjectGeneratorAssembler extends TermMapGeneratorAssembler { assembledFunction.andThen(item => { if (item.isDefined) { termTypeString match { - case RMLVoc.Class.IRI => item.map(iter => iter.map(elem => Uri(elem.toString))) + case R2RMLVoc.Class.IRI => item.map(iter => iter.map(elem => Uri(elem.toString))) case _ => item.map(iter => iter.flatMap(elem => { Some(Literal(elem.identifier, objectMap.datatype, objectMap.language)) })) diff --git a/src/main/scala/io/rml/framework/engine/statement/SubjectGeneratorAssembler.scala b/src/main/scala/io/rml/framework/engine/statement/SubjectGeneratorAssembler.scala index 25053311..34540d65 100644 --- a/src/main/scala/io/rml/framework/engine/statement/SubjectGeneratorAssembler.scala +++ b/src/main/scala/io/rml/framework/engine/statement/SubjectGeneratorAssembler.scala @@ -27,7 +27,7 @@ package io.rml.framework.engine.statement import io.rml.framework.core.function.FunctionUtils import io.rml.framework.core.model.{TermMap, TermNode, Uri} -import io.rml.framework.core.vocabulary.RMLVoc +import io.rml.framework.core.vocabulary.R2RMLVoc import io.rml.framework.flink.item.Item import io.rml.framework.shared.TermTypeException @@ -58,7 +58,7 @@ class SubjectGeneratorAssembler extends TermMapGeneratorAssembler { * Maybe move this check to subject map extractor for early checking during the reading process? */ termMap.termType.get.toString match { - case RMLVoc.Class.LITERAL => throw new TermTypeException("Subject cannot be of type Literal!") + case R2RMLVoc.Class.LITERAL => throw new TermTypeException("Subject cannot be of type Literal!") case _ => } diff --git a/src/main/scala/io/rml/framework/engine/statement/TermMapGeneratorAssembler.scala b/src/main/scala/io/rml/framework/engine/statement/TermMapGeneratorAssembler.scala index 5afa4c93..5b8ec651 100644 --- a/src/main/scala/io/rml/framework/engine/statement/TermMapGeneratorAssembler.scala +++ b/src/main/scala/io/rml/framework/engine/statement/TermMapGeneratorAssembler.scala @@ -27,7 +27,7 @@ package io.rml.framework.engine.statement import io.rml.framework.core.internal.Logging import io.rml.framework.core.model._ -import io.rml.framework.core.vocabulary.RMLVoc +import io.rml.framework.core.vocabulary.R2RMLVoc import io.rml.framework.flink.item.Item /** @@ -47,7 +47,7 @@ abstract class TermMapGeneratorAssembler extends Logging { templateGenerator(termMap) } else if (termMap.hasReference) { referenceGenerator(termMap) - } else if (termMap.hasTermType && termMap.termType.get == Uri(RMLVoc.Class.BLANKNODE)) { + } else if (termMap.hasTermType && termMap.termType.get == Uri(R2RMLVoc.Class.BLANKNODE)) { blankNodeGenerator() } else { if (isWarnEnabled) logWarning(termMap.toString + ": no constant, template or reference present.") @@ -68,8 +68,8 @@ abstract class TermMapGeneratorAssembler extends Logging { */ private def constantGenerator(termMap: TermMap): Item => Option[Iterable[Entity]] = { termMap.termType.get.toString match { - case RMLVoc.Class.IRI => TermMapGenerators.constantUriGenerator(termMap.constant.get) - case RMLVoc.Class.LITERAL => TermMapGenerators.constantLiteralGenerator(termMap.constant.get, termMap.datatype, termMap.language) + case R2RMLVoc.Class.IRI => TermMapGenerators.constantUriGenerator(termMap.constant.get) + case R2RMLVoc.Class.LITERAL => TermMapGenerators.constantLiteralGenerator(termMap.constant.get, termMap.datatype, termMap.language) } } @@ -80,9 +80,9 @@ abstract class TermMapGeneratorAssembler extends Logging { */ private def templateGenerator(termMap: TermMap): Item => Option[Iterable[Entity]] = { termMap.termType.get.toString match { - case RMLVoc.Class.IRI => TermMapGenerators.templateUriGenerator(termMap) - case RMLVoc.Class.LITERAL => TermMapGenerators.templateLiteralGenerator(termMap) - case RMLVoc.Class.BLANKNODE => TermMapGenerators.templateBlankNodeGenerator(termMap) + case R2RMLVoc.Class.IRI => TermMapGenerators.templateUriGenerator(termMap) + case R2RMLVoc.Class.LITERAL => TermMapGenerators.templateLiteralGenerator(termMap) + case R2RMLVoc.Class.BLANKNODE => TermMapGenerators.templateBlankNodeGenerator(termMap) } } @@ -93,8 +93,8 @@ abstract class TermMapGeneratorAssembler extends Logging { */ private def referenceGenerator(termMap: TermMap): Item =>Option[Iterable[Entity]] = { termMap.termType.get.toString match { - case RMLVoc.Class.IRI => TermMapGenerators.referenceUriGenerator(termMap) - case RMLVoc.Class.LITERAL => TermMapGenerators.referenceLiteralGenerator(termMap) + case R2RMLVoc.Class.IRI => TermMapGenerators.referenceUriGenerator(termMap) + case R2RMLVoc.Class.LITERAL => TermMapGenerators.referenceLiteralGenerator(termMap) } } diff --git a/src/main/scala/io/rml/framework/flink/source/FileDataSet.scala b/src/main/scala/io/rml/framework/flink/source/FileDataSet.scala index 36d5c010..30668919 100644 --- a/src/main/scala/io/rml/framework/flink/source/FileDataSet.scala +++ b/src/main/scala/io/rml/framework/flink/source/FileDataSet.scala @@ -24,17 +24,17 @@ **/ package io.rml.framework.flink.source -import java.nio.file.Paths - import io.rml.framework.core.internal.Logging import io.rml.framework.core.model.{LogicalSource, Uri} -import io.rml.framework.core.vocabulary.RMLVoc +import io.rml.framework.core.vocabulary.QueryVoc import io.rml.framework.flink.item.Item import io.rml.framework.flink.item.csv.CSVHeader import io.rml.framework.flink.util.DefaultCSVConfig import org.apache.commons.csv.CSVFormat import org.apache.flink.api.scala._ +import java.nio.file.Paths + sealed abstract class FileDataSet extends Source { def dataset: DataSet[Item] } @@ -51,9 +51,9 @@ object FileDataSet extends Logging { def apply(logicalSource: LogicalSource)(implicit env: ExecutionEnvironment): FileDataSet = { logicalSource.referenceFormulation match { - case Uri(RMLVoc.Class.CSV) => createCSVDataSet(logicalSource.source.uri.toString) - case Uri(RMLVoc.Class.XPATH) => createXMLWithXPathDataSet(logicalSource.source.uri.toString, logicalSource.iterators.head) - case Uri(RMLVoc.Class.JSONPATH) => createJSONWithJSONPathDataSet(logicalSource.source.uri.toString, logicalSource.iterators.head) + case Uri(QueryVoc.Class.CSV) => createCSVDataSet(logicalSource.source.uri.toString) + case Uri(QueryVoc.Class.XPATH) => createXMLWithXPathDataSet(logicalSource.source.uri.toString, logicalSource.iterators.head) + case Uri(QueryVoc.Class.JSONPATH) => createJSONWithJSONPathDataSet(logicalSource.source.uri.toString, logicalSource.iterators.head) } } diff --git a/src/main/scala/io/rml/framework/flink/source/JSONStream.scala b/src/main/scala/io/rml/framework/flink/source/JSONStream.scala index f9013455..c8bbd834 100644 --- a/src/main/scala/io/rml/framework/flink/source/JSONStream.scala +++ b/src/main/scala/io/rml/framework/flink/source/JSONStream.scala @@ -26,7 +26,7 @@ package io.rml.framework.flink.source import io.rml.framework.core.internal.Logging import io.rml.framework.core.model.{FileStream, KafkaStream, StreamDataSource, TCPSocketStream} -import io.rml.framework.core.vocabulary.RMLVoc +import io.rml.framework.core.vocabulary.QueryVoc import io.rml.framework.flink.item.Item import io.rml.framework.flink.item.json.JSONItem import org.apache.flink.api.common.serialization.SimpleStringSchema @@ -36,7 +36,7 @@ import org.apache.flink.streaming.api.scala.{DataStream, StreamExecutionEnvironm case class JSONStream(val stream: DataStream[Iterable[Item]]) extends Stream object JSONStream extends Logging { - val DEFAULT_PATH_OPTION: String = Source.DEFAULT_ITERATOR_MAP(RMLVoc.Class.JSONPATH) + val DEFAULT_PATH_OPTION: String = Source.DEFAULT_ITERATOR_MAP(QueryVoc.Class.JSONPATH) def apply(source: StreamDataSource, jsonPaths: List[String])(implicit env: StreamExecutionEnvironment): Stream = { diff --git a/src/main/scala/io/rml/framework/flink/source/Source.scala b/src/main/scala/io/rml/framework/flink/source/Source.scala index f44ecee2..a759ee0c 100644 --- a/src/main/scala/io/rml/framework/flink/source/Source.scala +++ b/src/main/scala/io/rml/framework/flink/source/Source.scala @@ -25,7 +25,7 @@ package io.rml.framework.flink.source import io.rml.framework.core.model.{FileDataSource, LogicalSource, StreamDataSource} -import io.rml.framework.core.vocabulary.RMLVoc +import io.rml.framework.core.vocabulary.QueryVoc import org.apache.flink.api.scala.ExecutionEnvironment import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment @@ -37,9 +37,9 @@ trait Source object Source { val DEFAULT_ITERATOR_MAP: Map[String, String] = Map( - RMLVoc.Class.JSONPATH -> "$", - RMLVoc.Class.CSV -> "", - RMLVoc.Class.XPATH -> "/*" + QueryVoc.Class.JSONPATH -> "$", + QueryVoc.Class.CSV -> "", + QueryVoc.Class.XPATH -> "/*" ) val DEFAULT_ITERATOR_SET: Set[String] = DEFAULT_ITERATOR_MAP.values.toSet diff --git a/src/main/scala/io/rml/framework/flink/source/XMLStream.scala b/src/main/scala/io/rml/framework/flink/source/XMLStream.scala index 577ce504..8a11b05f 100644 --- a/src/main/scala/io/rml/framework/flink/source/XMLStream.scala +++ b/src/main/scala/io/rml/framework/flink/source/XMLStream.scala @@ -25,7 +25,7 @@ package io.rml.framework.flink.source import io.rml.framework.core.model.{FileStream, KafkaStream, StreamDataSource, TCPSocketStream} -import io.rml.framework.core.vocabulary.RMLVoc +import io.rml.framework.core.vocabulary.QueryVoc import io.rml.framework.flink.item.Item import io.rml.framework.flink.item.xml.XMLItem import org.apache.flink.api.common.serialization.SimpleStringSchema @@ -37,7 +37,7 @@ import org.slf4j.LoggerFactory case class XMLStream(stream: DataStream[Iterable[Item]]) extends Stream object XMLStream { - val DEFAULT_PATH_OPTION: String = Source.DEFAULT_ITERATOR_MAP(RMLVoc.Class.XPATH) + val DEFAULT_PATH_OPTION: String = Source.DEFAULT_ITERATOR_MAP(QueryVoc.Class.XPATH) def apply(source: StreamDataSource, xpaths: List[String])(implicit env: StreamExecutionEnvironment): Stream = { diff --git a/src/test/scala/io/rml/framework/std/StdObjectMapExtractorTest.scala b/src/test/scala/io/rml/framework/std/StdObjectMapExtractorTest.scala index e7f70a20..1c7384bd 100644 --- a/src/test/scala/io/rml/framework/std/StdObjectMapExtractorTest.scala +++ b/src/test/scala/io/rml/framework/std/StdObjectMapExtractorTest.scala @@ -28,7 +28,7 @@ package io.rml.framework.std import io.rml.framework.core.extractors.ObjectMapExtractor import io.rml.framework.core.model.Uri import io.rml.framework.core.model.rdf.{RDFGraph, RDFResource} -import io.rml.framework.core.vocabulary.RMLVoc +import io.rml.framework.core.vocabulary.R2RMLVoc import org.scalatest.{FunSuite, Matchers} class StdObjectMapExtractorTest extends FunSuite with Matchers { @@ -52,12 +52,12 @@ class StdObjectMapExtractorTest extends FunSuite with Matchers { val resource = RDFResource("#PredicateObjectMap") - .addProperty(RMLVoc.Property.OBJECTMAP, + .addProperty(R2RMLVoc.Property.OBJECTMAP, RDFResource("#ObjectMap") - .addProperty(RMLVoc.Property.CONSTANT, + .addProperty(R2RMLVoc.Property.CONSTANT, RDFResource("#Object"))) - .addProperty(RMLVoc.Property.OBJECT, + .addProperty(R2RMLVoc.Property.OBJECT, RDFResource("#Object")) // ============================================================================================ diff --git a/src/test/scala/io/rml/framework/std/StdPredicateObjectMapExtractorTest.scala b/src/test/scala/io/rml/framework/std/StdPredicateObjectMapExtractorTest.scala index 104c5dbc..d94d0c96 100644 --- a/src/test/scala/io/rml/framework/std/StdPredicateObjectMapExtractorTest.scala +++ b/src/test/scala/io/rml/framework/std/StdPredicateObjectMapExtractorTest.scala @@ -28,7 +28,7 @@ package io.rml.framework.std import io.rml.framework.core.extractors.PredicateObjectMapExtractor import io.rml.framework.core.model.Uri import io.rml.framework.core.model.rdf.{RDFGraph, RDFResource} -import io.rml.framework.core.vocabulary.RMLVoc +import io.rml.framework.core.vocabulary.R2RMLVoc import org.scalatest.{FunSuite, Matchers} class StdPredicateObjectMapExtractorTest extends FunSuite with Matchers { @@ -53,9 +53,9 @@ class StdPredicateObjectMapExtractorTest extends FunSuite with Matchers { val resource = RDFResource(Uri("#TripleMap")) - .addProperty(RMLVoc.Property.PREDICATEOBJECTMAP, + .addProperty(R2RMLVoc.Property.PREDICATEOBJECTMAP, RDFResource(Uri("#PredicateObjectMap"))) - .addProperty(RMLVoc.Property.PREDICATEOBJECTMAP, + .addProperty(R2RMLVoc.Property.PREDICATEOBJECTMAP, RDFResource(Uri("#PredicateObjectMap_2"))) // ============================================================================================ From 48c46c04c2f3b0bd1cc4f89dc8d8a682ed0fd4ce Mon Sep 17 00:00:00 2001 From: Gerald Haesendonck Date: Tue, 12 Jan 2021 12:13:09 +0100 Subject: [PATCH 07/83] disable maven build cache --- .gitlab-ci.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 1bae6359..7ba87031 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -26,9 +26,9 @@ variables: # Cache downloaded dependencies and plugins between builds. # To keep cache across branches add 'key: "$CI_JOB_NAME"' -cache: - paths: - - .m2/repository +#cache: +# paths: +# - .m2/repository # This will only the project. .build: &build From 118f79a940e5c8d5cb3c723ff2ebdbee8d13a1ce Mon Sep 17 00:00:00 2001 From: Gerald Haesendonck Date: Tue, 12 Jan 2021 12:26:50 +0100 Subject: [PATCH 08/83] undo disable maven build cache --- .gitlab-ci.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 7ba87031..1bae6359 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -26,9 +26,9 @@ variables: # Cache downloaded dependencies and plugins between builds. # To keep cache across branches add 'key: "$CI_JOB_NAME"' -#cache: -# paths: -# - .m2/repository +cache: + paths: + - .m2/repository # This will only the project. .build: &build From c6c36d6ec517caedefb14b40ce63a030048cef77 Mon Sep 17 00:00:00 2001 From: Gerald Haesendonck Date: Tue, 12 Jan 2021 15:15:25 +0100 Subject: [PATCH 09/83] Load known prefixes used in mapping file. --- .../framework/core/model/rdf/jena/JenaGraph.scala | 15 ++++++++++----- .../framework/core/vocabulary/Namespaces.scala | 4 ++++ 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/src/main/scala/io/rml/framework/core/model/rdf/jena/JenaGraph.scala b/src/main/scala/io/rml/framework/core/model/rdf/jena/JenaGraph.scala index de79f97e..0614ef45 100644 --- a/src/main/scala/io/rml/framework/core/model/rdf/jena/JenaGraph.scala +++ b/src/main/scala/io/rml/framework/core/model/rdf/jena/JenaGraph.scala @@ -25,18 +25,17 @@ package io.rml.framework.core.model.rdf.jena -import java.io.{ByteArrayInputStream, ByteArrayOutputStream, File, InputStream} -import java.nio.charset.StandardCharsets - import io.rml.framework.core.internal.Logging import io.rml.framework.core.model.rdf.{RDFGraph, RDFLiteral, RDFResource, RDFTriple} import io.rml.framework.core.model.{Literal, Uri} import io.rml.framework.core.util.{Format, JenaUtil, Util} -import io.rml.framework.core.vocabulary.RDFVoc +import io.rml.framework.core.vocabulary.{Namespaces, RDFVoc} import io.rml.framework.shared.{RMLException, ReadException} import org.apache.jena.rdf.model.{Model, ModelFactory, Statement} import org.apache.jena.riot.RDFDataMgr +import java.io.{ByteArrayInputStream, ByteArrayOutputStream, File, InputStream} +import java.nio.charset.StandardCharsets import scala.collection.JavaConverters._ class JenaGraph(model: Model) extends RDFGraph with Logging { @@ -117,7 +116,13 @@ class JenaGraph(model: Model) extends RDFGraph with Logging { case Some(iri) => iri case None => null } - model.removeAll() + model.removeAll(); + + // load known prefixes + Namespaces.iterator().toIterable.foreach(prefix2Uri => { + model.setNsPrefix(prefix2Uri._1, prefix2Uri._2); + }); + Util.tryWith(in) { in => model.read(in, bIri, JenaUtil.format(format)) } diff --git a/src/main/scala/io/rml/framework/core/vocabulary/Namespaces.scala b/src/main/scala/io/rml/framework/core/vocabulary/Namespaces.scala index cb8ff93c..d99b233d 100644 --- a/src/main/scala/io/rml/framework/core/vocabulary/Namespaces.scala +++ b/src/main/scala/io/rml/framework/core/vocabulary/Namespaces.scala @@ -81,4 +81,8 @@ object Namespaces { */ def apply(prefix: String, suffix: String): String = _namespaces(prefix) + suffix + def iterator(): Iterator[(String, String)] = { + _namespaces.seq.iterator + } + } From 9f0ce5f25c2ec3c9d7eee4ea85a283b3dfb2461d Mon Sep 17 00:00:00 2001 From: Gerald Haesendonck Date: Tue, 12 Jan 2021 21:03:38 +0100 Subject: [PATCH 10/83] Added `Added` section --- CHANGELOG.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 66cba4f5..f3ed81db 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,9 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. ## [2.0.1] - +### Added +* Web of Things source (MQTT) + ### Changed * Updated Flink from version 1.10.0 to 1.11.2 * Updated Kafka version from version 2.2.2 to 2.4.1 (more versions supported using the universal connector) From 44488891c1604c1e3bed2823c955ea2de92fed4c Mon Sep 17 00:00:00 2001 From: Gertjan De Mulder Date: Tue, 19 Jan 2021 14:44:23 +0100 Subject: [PATCH 11/83] Icwe2021 mqtt tests --- .gitlab-ci.yml | 3 + pom.xml | 14 ++- .../std/StdDataSourceExtractor.scala | 26 +++-- .../rml/framework/core/model/MQTTStream.scala | 37 +++++++ .../framework/flink/source/JSONStream.scala | 15 ++- .../flink/source/MQTTPropertyKeys.scala | 8 ++ .../flink/source/RichMQTTSource.scala | 96 +++++++++++++++++++ .../datasource.json | 3 + .../RMLTC0000-JSON-STREAM-MQTT/mapping.ttl | 63 ++++++++++++ .../RMLTC0000-JSON-STREAM-MQTT/output.ttl | 1 + .../datasource.json | 5 + .../RMLTC0001a-JSON-STREAM-MQTT/mapping.ttl | 66 +++++++++++++ .../RMLTC0001a-JSON-STREAM-MQTT/output.ttl | 2 + .../datasource.json | 5 + .../RMLTC0001b-JSON-STREAM-MQTT/mapping.ttl | 68 +++++++++++++ .../RMLTC0001b-JSON-STREAM-MQTT/output.ttl | 2 + .../datasource.json | 6 ++ .../RMLTC0002a-JSON-STREAM-MQTT/mapping.ttl | 68 +++++++++++++ .../RMLTC0002a-JSON-STREAM-MQTT/output.ttl | 4 + .../io/rml/framework/MQTTStreamTests.scala | 95 ++++++++++++++++++ 20 files changed, 575 insertions(+), 12 deletions(-) create mode 100644 src/main/scala/io/rml/framework/core/model/MQTTStream.scala create mode 100644 src/main/scala/io/rml/framework/flink/source/MQTTPropertyKeys.scala create mode 100644 src/main/scala/io/rml/framework/flink/source/RichMQTTSource.scala create mode 100644 src/test/resources/sandbox/stream/mqtt/RMLTC0000-JSON-STREAM-MQTT/datasource.json create mode 100644 src/test/resources/sandbox/stream/mqtt/RMLTC0000-JSON-STREAM-MQTT/mapping.ttl create mode 100644 src/test/resources/sandbox/stream/mqtt/RMLTC0000-JSON-STREAM-MQTT/output.ttl create mode 100644 src/test/resources/sandbox/stream/mqtt/RMLTC0001a-JSON-STREAM-MQTT/datasource.json create mode 100644 src/test/resources/sandbox/stream/mqtt/RMLTC0001a-JSON-STREAM-MQTT/mapping.ttl create mode 100644 src/test/resources/sandbox/stream/mqtt/RMLTC0001a-JSON-STREAM-MQTT/output.ttl create mode 100644 src/test/resources/sandbox/stream/mqtt/RMLTC0001b-JSON-STREAM-MQTT/datasource.json create mode 100644 src/test/resources/sandbox/stream/mqtt/RMLTC0001b-JSON-STREAM-MQTT/mapping.ttl create mode 100644 src/test/resources/sandbox/stream/mqtt/RMLTC0001b-JSON-STREAM-MQTT/output.ttl create mode 100644 src/test/resources/sandbox/stream/mqtt/RMLTC0002a-JSON-STREAM-MQTT/datasource.json create mode 100644 src/test/resources/sandbox/stream/mqtt/RMLTC0002a-JSON-STREAM-MQTT/mapping.ttl create mode 100644 src/test/resources/sandbox/stream/mqtt/RMLTC0002a-JSON-STREAM-MQTT/output.ttl create mode 100644 src/test/scala/io/rml/framework/MQTTStreamTests.scala diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index f07556b3..1a50dab1 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -39,6 +39,9 @@ cache: # This will also run tests. .test: &test stage: test + services: + - name: eclipse-mosquitto:latest + alias: mosquittobroker script: - 'mvn $MAVEN_CLI_OPTS test' diff --git a/pom.xml b/pom.xml index 58ffd4f0..b79dac94 100644 --- a/pom.xml +++ b/pom.xml @@ -140,7 +140,12 @@ SOFTWARE. - + + + com.rabbitmq + amqp-client + 5.10.0 + com.ximpleware vtd-xml @@ -289,7 +294,12 @@ SOFTWARE. pom runtime - + + org.eclipse.paho + org.eclipse.paho.client.mqttv3 + 1.2.5 + + diff --git a/src/main/scala/io/rml/framework/core/extractors/std/StdDataSourceExtractor.scala b/src/main/scala/io/rml/framework/core/extractors/std/StdDataSourceExtractor.scala index 3c93ecf9..4a6e88db 100644 --- a/src/main/scala/io/rml/framework/core/extractors/std/StdDataSourceExtractor.scala +++ b/src/main/scala/io/rml/framework/core/extractors/std/StdDataSourceExtractor.scala @@ -32,6 +32,8 @@ import io.rml.framework.core.model.rdf.RDFResource import io.rml.framework.core.vocabulary._ import io.rml.framework.shared.RMLException +import java.util.Properties + class StdDataSourceExtractor extends DataSourceExtractor { /** @@ -114,17 +116,16 @@ class StdDataSourceExtractor extends DataSourceExtractor { val isMQTT = form.hasPredicateWith(WoTVoc.WoTMQTT.namespace._2); if (isMQTT) { return extractWoTMQTTSource(form, hypermediaTarget, contentType); + } else { + throw new RMLException("Unknown Web of Things source defined.") } - - // TODO replace with real source - FileDataSource(Literal("/tmp/test")) } private def extractWoTMQTTSource(form: RDFResource, hypermediaTarget: String, contentType: String): DataSource = { val controlPacketValue = extractLiteralFromProperty(form, WoTVoc.WoTMQTT.Property.CONTROLPACKETVALUE, "SUBSCRIBE"); var qosOpt: Option[String] = None; - var dup: Boolean = false; + var dup: String = "false"; val mqttOptions = extractResourceFromProperty(form, WoTVoc.WoTMQTT.Property.OPTIONS); if (mqttOptions.isDefined) { // extract the actual values @@ -135,15 +136,22 @@ class StdDataSourceExtractor extends DataSourceExtractor { val optionName = extractSingleLiteralFromProperty(mqttOptionsResource, WoTVoc.WoTMQTT.Property.OPTIONNAME); optionName match { case "qos" => qosOpt = Some(extractSingleLiteralFromProperty(mqttOptionsResource, WoTVoc.WoTMQTT.Property.OPTIONVALUE)); - case "dup" => dup = true; + case "dup" => dup = "true"; }; }); } - // TODO make actual data source - logWarning("Here a MQTT data source will be created. hypermediaTarget: " + hypermediaTarget - + ", contentType: " + contentType + ", dup: " + dup + ", qusOpt: " + qosOpt) - FileDataSource(Literal("/tmp/test")) + logDebug("MQTT data source defined in mapping file. hypermediaTarget: " + hypermediaTarget + + ", contentType: " + contentType + ", dup: " + dup + ", qosOpt: " + qosOpt); + val mqttProperties = new Properties; + mqttProperties.put("hypermediaTarget", hypermediaTarget); + mqttProperties.put("contentType", contentType); + mqttProperties.put("controlPacketValue", controlPacketValue); + if (qosOpt.isDefined) { + mqttProperties.put("qos", qosOpt.get); + } + mqttProperties.put("dup", dup); // Java 8 can't handle Scala Boolean objects in a Properties object. + MQTTStream(mqttProperties) } } diff --git a/src/main/scala/io/rml/framework/core/model/MQTTStream.scala b/src/main/scala/io/rml/framework/core/model/MQTTStream.scala new file mode 100644 index 00000000..e15b9320 --- /dev/null +++ b/src/main/scala/io/rml/framework/core/model/MQTTStream.scala @@ -0,0 +1,37 @@ +/** + * MIT License + * + * Copyright (C) 2017 - 2020 RDF Mapping Language (RML) + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + **/ +package io.rml.framework.core.model + + +import java.util.{Objects, Properties} +import scala.collection.JavaConversions._ + +case class MQTTStream(properties : Properties) extends StreamDataSource { + + override def uri: ExplicitNode = { + val totalHash = Objects.hash(properties.entrySet().map(e=>s"${e.getKey}=${e.getValue}")) + Uri(totalHash.toHexString) + } +} diff --git a/src/main/scala/io/rml/framework/flink/source/JSONStream.scala b/src/main/scala/io/rml/framework/flink/source/JSONStream.scala index c8bbd834..c526db6b 100644 --- a/src/main/scala/io/rml/framework/flink/source/JSONStream.scala +++ b/src/main/scala/io/rml/framework/flink/source/JSONStream.scala @@ -25,7 +25,7 @@ package io.rml.framework.flink.source import io.rml.framework.core.internal.Logging -import io.rml.framework.core.model.{FileStream, KafkaStream, StreamDataSource, TCPSocketStream} +import io.rml.framework.core.model.{FileStream, KafkaStream, MQTTStream, StreamDataSource, TCPSocketStream} import io.rml.framework.core.vocabulary.QueryVoc import io.rml.framework.flink.item.Item import io.rml.framework.flink.item.json.JSONItem @@ -44,6 +44,7 @@ object JSONStream extends Logging { case tcpStream: TCPSocketStream => fromTCPSocketStream(tcpStream, jsonPaths) case fileStream: FileStream => fromFileStream(fileStream.path, jsonPaths) case kafkaStream: KafkaStream => fromKafkaStream(kafkaStream, jsonPaths) + case mqttStream : MQTTStream => fromMQTTStream(mqttStream, jsonPaths) } } @@ -76,4 +77,16 @@ object JSONStream extends Logging { } JSONStream(stream) } + + def fromMQTTStream(mqttStream : MQTTStream, jsonPaths : List[String])(implicit env: StreamExecutionEnvironment):JSONStream = { + + val source = new RichMQTTSource(mqttStream.properties) + val parallelStream = StreamUtil.paralleliseOverSlots(env.addSource(source)) + val stream: DataStream[Iterable[Item]] = parallelStream + .map { item => + JSONItem.fromStringOptionableList(item, jsonPaths) + } + + JSONStream(stream) + } } diff --git a/src/main/scala/io/rml/framework/flink/source/MQTTPropertyKeys.scala b/src/main/scala/io/rml/framework/flink/source/MQTTPropertyKeys.scala new file mode 100644 index 00000000..495f5bcb --- /dev/null +++ b/src/main/scala/io/rml/framework/flink/source/MQTTPropertyKeys.scala @@ -0,0 +1,8 @@ +package io.rml.framework.flink.source +object MQTTPropertyKeys { + val SERVER_URI = "serverUri" + val CLIENT_ID = "clientId" + val TOPIC = "mqtt.topic" + val USERNAME = "username" + val PASSWORD = "password" +} \ No newline at end of file diff --git a/src/main/scala/io/rml/framework/flink/source/RichMQTTSource.scala b/src/main/scala/io/rml/framework/flink/source/RichMQTTSource.scala new file mode 100644 index 00000000..b49f78c1 --- /dev/null +++ b/src/main/scala/io/rml/framework/flink/source/RichMQTTSource.scala @@ -0,0 +1,96 @@ +package io.rml.framework.flink.source + +import io.rml.framework.core.internal.Logging +import org.apache.flink.configuration.Configuration +import org.apache.flink.streaming.api.functions.source.{RichSourceFunction, SourceFunction} +import org.eclipse.paho.client.mqttv3.persist.MemoryPersistence +import org.eclipse.paho.client.mqttv3.{IMqttMessageListener, MqttClient, MqttConnectOptions, MqttMessage} + +import java.net.URI +import java.util.Properties +import scala.collection.JavaConversions._ + +/** + * + * @param properties + * @tparam T + */ +case class RichMQTTSource(properties: Properties) extends RichSourceFunction[String] with Logging { + + private var client: MqttClient = _ + + override def open(parameters: Configuration): Unit = { + super.open(parameters) + + val hmTargetUri = new URI(properties.getProperty("hypermediaTarget")) + val serverUri = hmTargetUri.toString + // remove path (solves: URI path must be empty "tcp://mosquittobroker:1883/topic") + .replace(hmTargetUri.getPath, "") + // replace mqtt scheme with tcp (solves: no NetworkModule installed for scheme "mqtt" of URI "mqtt://mosquittobroker:1883") + .replace("mqtt", "tcp") // TODO: find better solution for "no NetworkModule installed for scheme "mqtt" of URI "mqtt://mosquittobroker:1883"" + + // strips the first forward-slash from the path (e.g. /topicname becomes topicname) + val topic = hmTargetUri.getPath.substring(1) + + properties.setProperty(MQTTPropertyKeys.SERVER_URI, serverUri) + properties.setProperty(MQTTPropertyKeys.TOPIC, topic) + + logInfo("RichMQTTSource properties:") + for (x <- properties.entrySet()) { + logInfo(s"${x.getKey}: ${x.getValue}") + } + + client = new MqttClient(properties.getProperty(MQTTPropertyKeys.SERVER_URI), + properties.getProperty("clientId", MqttClient.generateClientId()), + new MemoryPersistence()) + + val connectOptions = createMqttConnectOptions(properties) + client.connect(connectOptions) + } + + protected def createMqttConnectOptions(properties: Properties) = { + val connectOptions = new MqttConnectOptions() + + if (properties.containsKey(MQTTPropertyKeys.USERNAME)) connectOptions.setUserName(properties.getProperty(MQTTPropertyKeys.USERNAME)) + if (properties.containsKey(MQTTPropertyKeys.PASSWORD)) connectOptions.setPassword(properties.getProperty(MQTTPropertyKeys.PASSWORD).toCharArray) + + connectOptions.setAutomaticReconnect(true) + connectOptions.setCleanSession(false) + + connectOptions + } + + override def run(sourceContext: SourceFunction.SourceContext[String]): Unit = { + val lock = sourceContext.getCheckpointLock + val topicName = properties.getProperty(MQTTPropertyKeys.TOPIC) + client.subscribe(topicName, new IMqttMessageListener { + override def messageArrived(topic: String, message: MqttMessage): Unit = { + lock.synchronized { + + logDebug(s"Collecting MQTT Message [Thread: ${Thread.currentThread().getId}]" + + s"\nmessage.isDuplicate: ${message.isDuplicate}" + + s"\nmessage.isRetained: ${message.isRetained}") + + val payloadString = message.toString + sourceContext.collect(payloadString) + } + } + }) + + while (true) + Thread.sleep(1) + } + + override def cancel(): Unit = { + teardownClient() + } + + override def close(): Unit = { + super.close() + teardownClient() + } + + protected def teardownClient() = { + // TODO: PROPERLY TEARDOWN CLIENT + } +} diff --git a/src/test/resources/sandbox/stream/mqtt/RMLTC0000-JSON-STREAM-MQTT/datasource.json b/src/test/resources/sandbox/stream/mqtt/RMLTC0000-JSON-STREAM-MQTT/datasource.json new file mode 100644 index 00000000..81bf2f6a --- /dev/null +++ b/src/test/resources/sandbox/stream/mqtt/RMLTC0000-JSON-STREAM-MQTT/datasource.json @@ -0,0 +1,3 @@ +{ + "students": [] +} diff --git a/src/test/resources/sandbox/stream/mqtt/RMLTC0000-JSON-STREAM-MQTT/mapping.ttl b/src/test/resources/sandbox/stream/mqtt/RMLTC0000-JSON-STREAM-MQTT/mapping.ttl new file mode 100644 index 00000000..2720c125 --- /dev/null +++ b/src/test/resources/sandbox/stream/mqtt/RMLTC0000-JSON-STREAM-MQTT/mapping.ttl @@ -0,0 +1,63 @@ +@prefix rr: . +@prefix foaf: . +@prefix rml: . +@prefix ql: . +@prefix mail: . +@prefix xsd: . +@prefix ex: . +@prefix rmls: . +@prefix rdf: . +@prefix td: . +@prefix htv: . +@prefix hctl: . +@prefix time: . +@prefix xsd: . +@prefix wotsec: . +@prefix mqv: . +@base . + + +# API key in HTTP header +<#WotMQTTSecurity> a wotsec:BasicSecurityScheme; + wotsec:in "body"; +. + +<#WoTWebAPI> a td:Thing; + td:hasPropertyAffordance [ + td:hasForm [ + # URL and content type + hctl:hasTarget "mqtt://mosquittobroker:1883/topic"; + hctl:forContentType "application/json"; + # Read only + hctl:hasOperationType "readproperty" ; + # Set MQTT stuff + mqv:controlPacketValue "SUBSCRIBE"; + mqv:options ([ mqv:optionName "qos"; mqv:optionValue "1" ] [ mqv:optionName "dup" ]); + ]; + ]; + td:hasSecurityConfiguration <#WotMQTTSecurity> ; +. + + + + a rr:TriplesMap; + + rml:logicalSource [ + rml:source <#WoTWebAPI>; + rml:referenceFormulation ql:JSONPath; + rml:iterator "$.students[*]"; + rml:frequency [ a time:GeneralDateTimeDescription; + time:minute "5"^^xsd:integer; + ]; + ]; + + rr:subjectMap [ + rr:template "http://example.com/{Name}" + ]; + + rr:predicateObjectMap [ + rr:predicate foaf:name ; + rr:objectMap [ + rml:reference "Name" + ] + ]. diff --git a/src/test/resources/sandbox/stream/mqtt/RMLTC0000-JSON-STREAM-MQTT/output.ttl b/src/test/resources/sandbox/stream/mqtt/RMLTC0000-JSON-STREAM-MQTT/output.ttl new file mode 100644 index 00000000..ca0b9916 --- /dev/null +++ b/src/test/resources/sandbox/stream/mqtt/RMLTC0000-JSON-STREAM-MQTT/output.ttl @@ -0,0 +1 @@ +# empty database diff --git a/src/test/resources/sandbox/stream/mqtt/RMLTC0001a-JSON-STREAM-MQTT/datasource.json b/src/test/resources/sandbox/stream/mqtt/RMLTC0001a-JSON-STREAM-MQTT/datasource.json new file mode 100644 index 00000000..b3ae4c4e --- /dev/null +++ b/src/test/resources/sandbox/stream/mqtt/RMLTC0001a-JSON-STREAM-MQTT/datasource.json @@ -0,0 +1,5 @@ +{ + "students": [{ + "Name":"Venus" + }] +} diff --git a/src/test/resources/sandbox/stream/mqtt/RMLTC0001a-JSON-STREAM-MQTT/mapping.ttl b/src/test/resources/sandbox/stream/mqtt/RMLTC0001a-JSON-STREAM-MQTT/mapping.ttl new file mode 100644 index 00000000..14a47850 --- /dev/null +++ b/src/test/resources/sandbox/stream/mqtt/RMLTC0001a-JSON-STREAM-MQTT/mapping.ttl @@ -0,0 +1,66 @@ +@prefix rr: . +@prefix foaf: . +@prefix rml: . +@prefix ql: . +@prefix mail: . +@prefix xsd: . +@prefix ex: . +@prefix rmls: . +@prefix rdf: . +@prefix td: . +@prefix htv: . +@prefix hctl: . +@prefix time: . +@prefix xsd: . +@prefix wotsec: . +@prefix mqv: . +@base . + + +# API key in HTTP header +<#WotMQTTSecurity> a wotsec:BasicSecurityScheme; + wotsec:in "body"; +. + +<#WoTWebAPI> a td:Thing; + td:hasPropertyAffordance [ + td:hasForm [ + # URL and content type + hctl:hasTarget "mqtt://mosquittobroker:1883/topic"; + hctl:forContentType "application/json"; + # Read only + hctl:hasOperationType "readproperty" ; + # Set MQTT stuff + mqv:controlPacketValue "SUBSCRIBE"; + mqv:options ([ mqv:optionName "qos"; mqv:optionValue "1" ] [ mqv:optionName "dup" ]); + ]; + ]; + td:hasSecurityConfiguration <#WotMQTTSecurity> ; +. + + + + a rr:TriplesMap; + + rml:logicalSource [ + rml:source <#WoTWebAPI>; + rml:referenceFormulation ql:JSONPath; + rml:iterator "$.students[*]"; + rml:frequency [ a time:GeneralDateTimeDescription; + time:minute "5"^^xsd:integer; + ]; + ]; + + + rr:subjectMap [ + rr:template "http://example.com/{Name}" + ]; + + rr:predicateObjectMap [ + rr:predicate foaf:name; + rr:objectMap [ + rml:reference "Name" + ] + ]. + + diff --git a/src/test/resources/sandbox/stream/mqtt/RMLTC0001a-JSON-STREAM-MQTT/output.ttl b/src/test/resources/sandbox/stream/mqtt/RMLTC0001a-JSON-STREAM-MQTT/output.ttl new file mode 100644 index 00000000..73f40ddf --- /dev/null +++ b/src/test/resources/sandbox/stream/mqtt/RMLTC0001a-JSON-STREAM-MQTT/output.ttl @@ -0,0 +1,2 @@ + "Venus" . + diff --git a/src/test/resources/sandbox/stream/mqtt/RMLTC0001b-JSON-STREAM-MQTT/datasource.json b/src/test/resources/sandbox/stream/mqtt/RMLTC0001b-JSON-STREAM-MQTT/datasource.json new file mode 100644 index 00000000..b3ae4c4e --- /dev/null +++ b/src/test/resources/sandbox/stream/mqtt/RMLTC0001b-JSON-STREAM-MQTT/datasource.json @@ -0,0 +1,5 @@ +{ + "students": [{ + "Name":"Venus" + }] +} diff --git a/src/test/resources/sandbox/stream/mqtt/RMLTC0001b-JSON-STREAM-MQTT/mapping.ttl b/src/test/resources/sandbox/stream/mqtt/RMLTC0001b-JSON-STREAM-MQTT/mapping.ttl new file mode 100644 index 00000000..87c7f333 --- /dev/null +++ b/src/test/resources/sandbox/stream/mqtt/RMLTC0001b-JSON-STREAM-MQTT/mapping.ttl @@ -0,0 +1,68 @@ +@prefix rr: . +@prefix foaf: . +@prefix rml: . +@prefix ql: . +@prefix mail: . +@prefix xsd: . +@prefix ex: . +@prefix rmls: . +@prefix rdf: . +@prefix td: . +@prefix htv: . +@prefix hctl: . +@prefix time: . +@prefix xsd: . +@prefix wotsec: . +@prefix mqv: . +@base . + + +# API key in HTTP header +<#WotMQTTSecurity> a wotsec:BasicSecurityScheme; + wotsec:in "body"; +. + +<#WoTWebAPI> a td:Thing; + td:hasPropertyAffordance [ + td:hasForm [ + # URL and content type + hctl:hasTarget "mqtt://mosquittobroker:1883/topic"; + hctl:forContentType "application/json"; + # Read only + hctl:hasOperationType "readproperty" ; + # Set MQTT stuff + mqv:controlPacketValue "SUBSCRIBE"; + mqv:options ([ mqv:optionName "qos"; mqv:optionValue "1" ] [ mqv:optionName "dup" ]); + ]; + ]; + td:hasSecurityConfiguration <#WotMQTTSecurity> ; +. + + + a rr:TriplesMap; + + + + rml:logicalSource [ + rml:source <#WoTWebAPI>; + rml:referenceFormulation ql:JSONPath; + rml:iterator "$.students[*]"; + rml:frequency [ a time:GeneralDateTimeDescription; + time:minute "5"^^xsd:integer; + ]; + ]; + + + + rr:subjectMap [ + rr:template "{Name}"; + rr:termType rr:BlankNode + ]; + + rr:predicateObjectMap [ + rr:predicate foaf:name; + rr:objectMap [ + rml:reference "Name" + ] + ]. + diff --git a/src/test/resources/sandbox/stream/mqtt/RMLTC0001b-JSON-STREAM-MQTT/output.ttl b/src/test/resources/sandbox/stream/mqtt/RMLTC0001b-JSON-STREAM-MQTT/output.ttl new file mode 100644 index 00000000..082a16c0 --- /dev/null +++ b/src/test/resources/sandbox/stream/mqtt/RMLTC0001b-JSON-STREAM-MQTT/output.ttl @@ -0,0 +1,2 @@ +_:Venus "Venus" . + diff --git a/src/test/resources/sandbox/stream/mqtt/RMLTC0002a-JSON-STREAM-MQTT/datasource.json b/src/test/resources/sandbox/stream/mqtt/RMLTC0002a-JSON-STREAM-MQTT/datasource.json new file mode 100644 index 00000000..0f6f797f --- /dev/null +++ b/src/test/resources/sandbox/stream/mqtt/RMLTC0002a-JSON-STREAM-MQTT/datasource.json @@ -0,0 +1,6 @@ +{ + "students": [{ + "ID": 10, + "Name":"Venus" + }] +} diff --git a/src/test/resources/sandbox/stream/mqtt/RMLTC0002a-JSON-STREAM-MQTT/mapping.ttl b/src/test/resources/sandbox/stream/mqtt/RMLTC0002a-JSON-STREAM-MQTT/mapping.ttl new file mode 100644 index 00000000..1dbd97c3 --- /dev/null +++ b/src/test/resources/sandbox/stream/mqtt/RMLTC0002a-JSON-STREAM-MQTT/mapping.ttl @@ -0,0 +1,68 @@ +@prefix rr: . +@prefix foaf: . +@prefix rml: . +@prefix ql: . +@prefix mail: . +@prefix xsd: . +@prefix ex: . +@prefix rmls: . +@prefix rdf: . +@prefix td: . +@prefix htv: . +@prefix hctl: . +@prefix time: . +@prefix xsd: . +@prefix wotsec: . +@prefix mqv: . +@base . + + + + +# API key in HTTP header +<#WotMQTTSecurity> a wotsec:BasicSecurityScheme; + wotsec:in "body"; +. + +<#WoTWebAPI> a td:Thing; + td:hasPropertyAffordance [ + td:hasForm [ + # URL and content type + hctl:hasTarget "mqtt://mosquittobroker:1883/topic"; + hctl:forContentType "application/json"; + # Read only + hctl:hasOperationType "readproperty" ; + # Set MQTT stuff + mqv:controlPacketValue "SUBSCRIBE"; + mqv:options ([ mqv:optionName "qos"; mqv:optionValue "1" ] [ mqv:optionName "dup" ]); + ]; + ]; + td:hasSecurityConfiguration <#WotMQTTSecurity> ; +. + + + a rr:TriplesMap; + + rml:logicalSource [ + rml:source <#WoTWebAPI>; + rml:referenceFormulation ql:JSONPath; + rml:iterator "$.students[*]"; + rml:frequency [ a time:GeneralDateTimeDescription; + time:minute "5"^^xsd:integer; + ]; + ]; + + rr:subjectMap [ + rr:template "http://example.com/{ID}/{Name}"; + rr:class foaf:Person + ]; + + rr:predicateObjectMap [ + rr:predicate ex:id ; + rr:objectMap [ rml:reference "ID" ] + ]; + + rr:predicateObjectMap [ + rr:predicate foaf:name ; + rr:objectMap [ rml:reference "Name" ] + ]. diff --git a/src/test/resources/sandbox/stream/mqtt/RMLTC0002a-JSON-STREAM-MQTT/output.ttl b/src/test/resources/sandbox/stream/mqtt/RMLTC0002a-JSON-STREAM-MQTT/output.ttl new file mode 100644 index 00000000..174309a6 --- /dev/null +++ b/src/test/resources/sandbox/stream/mqtt/RMLTC0002a-JSON-STREAM-MQTT/output.ttl @@ -0,0 +1,4 @@ + "Venus" . + "10" . + . + diff --git a/src/test/scala/io/rml/framework/MQTTStreamTests.scala b/src/test/scala/io/rml/framework/MQTTStreamTests.scala new file mode 100644 index 00000000..c513a855 --- /dev/null +++ b/src/test/scala/io/rml/framework/MQTTStreamTests.scala @@ -0,0 +1,95 @@ +/** + * MIT License + * + * Copyright (C) 2017 - 2020 RDF Mapping Language (RML) + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + * */ +package io.rml.framework + +import io.rml.framework.util.server.TestData +import org.eclipse.paho.client.mqttv3.MqttAsyncClient +import org.eclipse.paho.client.mqttv3.persist.MemoryPersistence + +import scala.concurrent.ExecutionContextExecutor + +object MQTTStreamTestsConfiguration { + val VIRTUAL_HOST = "/" + val HOSTNAME_BROKER = "mosquittobroker" + val PORT = 1883 + val PROTOCOL = "tcp" + val TOPIC_NAME = "topic" + val QOS = 1 + val RETAINED = false + val COMPLETION_TIMEOUT = 1000 +} + +class MQTTStreamTests extends StreamTestSync { + + var producer: MqttAsyncClient = _ + + override def setup(): Unit = { + setupProducer() + } + + private def setupProducer() = { + val protocol = MQTTStreamTestsConfiguration.PROTOCOL + val host = MQTTStreamTestsConfiguration.HOSTNAME_BROKER + val port = MQTTStreamTestsConfiguration.PORT + val serverUri = s"${protocol}://${host}:${port}" + this.producer = new MqttAsyncClient(serverUri, MqttAsyncClient.generateClientId(), new MemoryPersistence()) + logInfo("created MQTT Producer") + this.producer.connect().waitForCompletion() + logInfo("MQTT Producer connected") + } + + override protected def passingTests: Array[(String, String)] = Array( + (testFolder, "noopt") + ) + + override protected def testFolder: String = "sandbox/stream/mqtt" + + override protected def beforeTestCase(): Unit = { + logInfo("before test case") + } + + override protected def afterTestCase(): Unit = { + logInfo("after test case") + } + + override protected def teardown(): Unit = { + logInfo("Tearingdown MQTT Stream tests") + } + + override protected def writeData(input: List[TestData])(implicit executor: ExecutionContextExecutor): Unit = { + logInfo("writeData") + for (batch <- input) { + val topic = MQTTStreamTestsConfiguration.TOPIC_NAME + val qos = MQTTStreamTestsConfiguration.QOS + val retained = MQTTStreamTestsConfiguration.RETAINED + val completionTimeout = MQTTStreamTestsConfiguration.COMPLETION_TIMEOUT + for (in <- batch.data) { + val deliveryToken = this.producer.publish(topic, in.getBytes, qos, retained) + deliveryToken.waitForCompletion(completionTimeout) + logInfo("messaged delivered...") + } + } + } +} From 8bf14d8b07bdca472ce384e7202a64aa21632b6b Mon Sep 17 00:00:00 2001 From: Gerald Haesendonck Date: Tue, 2 Mar 2021 09:20:52 +0000 Subject: [PATCH 12/83] Feature/118 embedded test mqtt broker. Closes #118. --- .gitlab-ci.yml | 3 --- pom.xml | 27 ++++++++++++++++--- .../RMLTC0000-JSON-STREAM-MQTT/mapping.ttl | 2 +- .../RMLTC0001a-JSON-STREAM-MQTT/mapping.ttl | 2 +- .../RMLTC0001b-JSON-STREAM-MQTT/mapping.ttl | 2 +- .../RMLTC0002a-JSON-STREAM-MQTT/mapping.ttl | 2 +- .../io/rml/framework/MQTTStreamTests.scala | 18 ++++++++++--- 7 files changed, 42 insertions(+), 14 deletions(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 1a50dab1..f07556b3 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -39,9 +39,6 @@ cache: # This will also run tests. .test: &test stage: test - services: - - name: eclipse-mosquitto:latest - alias: mosquittobroker script: - 'mvn $MAVEN_CLI_OPTS test' diff --git a/pom.xml b/pom.xml index b79dac94..648c28a0 100644 --- a/pom.xml +++ b/pom.xml @@ -52,12 +52,22 @@ SOFTWARE. mit - + + + + + io.moquette + moquette-broker + 0.13 + test + + diff --git a/src/test/resources/sandbox/stream/mqtt/RMLTC0000-JSON-STREAM-MQTT/mapping.ttl b/src/test/resources/sandbox/stream/mqtt/RMLTC0000-JSON-STREAM-MQTT/mapping.ttl index 2720c125..211c9542 100644 --- a/src/test/resources/sandbox/stream/mqtt/RMLTC0000-JSON-STREAM-MQTT/mapping.ttl +++ b/src/test/resources/sandbox/stream/mqtt/RMLTC0000-JSON-STREAM-MQTT/mapping.ttl @@ -26,7 +26,7 @@ td:hasPropertyAffordance [ td:hasForm [ # URL and content type - hctl:hasTarget "mqtt://mosquittobroker:1883/topic"; + hctl:hasTarget "mqtt://localhost:1883/topic"; hctl:forContentType "application/json"; # Read only hctl:hasOperationType "readproperty" ; diff --git a/src/test/resources/sandbox/stream/mqtt/RMLTC0001a-JSON-STREAM-MQTT/mapping.ttl b/src/test/resources/sandbox/stream/mqtt/RMLTC0001a-JSON-STREAM-MQTT/mapping.ttl index 14a47850..0af5d426 100644 --- a/src/test/resources/sandbox/stream/mqtt/RMLTC0001a-JSON-STREAM-MQTT/mapping.ttl +++ b/src/test/resources/sandbox/stream/mqtt/RMLTC0001a-JSON-STREAM-MQTT/mapping.ttl @@ -26,7 +26,7 @@ td:hasPropertyAffordance [ td:hasForm [ # URL and content type - hctl:hasTarget "mqtt://mosquittobroker:1883/topic"; + hctl:hasTarget "mqtt://localhost:1883/topic"; hctl:forContentType "application/json"; # Read only hctl:hasOperationType "readproperty" ; diff --git a/src/test/resources/sandbox/stream/mqtt/RMLTC0001b-JSON-STREAM-MQTT/mapping.ttl b/src/test/resources/sandbox/stream/mqtt/RMLTC0001b-JSON-STREAM-MQTT/mapping.ttl index 87c7f333..89af4eed 100644 --- a/src/test/resources/sandbox/stream/mqtt/RMLTC0001b-JSON-STREAM-MQTT/mapping.ttl +++ b/src/test/resources/sandbox/stream/mqtt/RMLTC0001b-JSON-STREAM-MQTT/mapping.ttl @@ -26,7 +26,7 @@ td:hasPropertyAffordance [ td:hasForm [ # URL and content type - hctl:hasTarget "mqtt://mosquittobroker:1883/topic"; + hctl:hasTarget "mqtt://localhost:1883/topic"; hctl:forContentType "application/json"; # Read only hctl:hasOperationType "readproperty" ; diff --git a/src/test/resources/sandbox/stream/mqtt/RMLTC0002a-JSON-STREAM-MQTT/mapping.ttl b/src/test/resources/sandbox/stream/mqtt/RMLTC0002a-JSON-STREAM-MQTT/mapping.ttl index 1dbd97c3..dc9597e6 100644 --- a/src/test/resources/sandbox/stream/mqtt/RMLTC0002a-JSON-STREAM-MQTT/mapping.ttl +++ b/src/test/resources/sandbox/stream/mqtt/RMLTC0002a-JSON-STREAM-MQTT/mapping.ttl @@ -28,7 +28,7 @@ td:hasPropertyAffordance [ td:hasForm [ # URL and content type - hctl:hasTarget "mqtt://mosquittobroker:1883/topic"; + hctl:hasTarget "mqtt://localhost:1883/topic"; hctl:forContentType "application/json"; # Read only hctl:hasOperationType "readproperty" ; diff --git a/src/test/scala/io/rml/framework/MQTTStreamTests.scala b/src/test/scala/io/rml/framework/MQTTStreamTests.scala index c513a855..cc27d0b6 100644 --- a/src/test/scala/io/rml/framework/MQTTStreamTests.scala +++ b/src/test/scala/io/rml/framework/MQTTStreamTests.scala @@ -24,15 +24,17 @@ * */ package io.rml.framework +import io.moquette.broker.Server import io.rml.framework.util.server.TestData import org.eclipse.paho.client.mqttv3.MqttAsyncClient import org.eclipse.paho.client.mqttv3.persist.MemoryPersistence +import java.util.Properties import scala.concurrent.ExecutionContextExecutor object MQTTStreamTestsConfiguration { val VIRTUAL_HOST = "/" - val HOSTNAME_BROKER = "mosquittobroker" + val HOSTNAME_BROKER = "localhost" val PORT = 1883 val PROTOCOL = "tcp" val TOPIC_NAME = "topic" @@ -42,13 +44,22 @@ object MQTTStreamTestsConfiguration { } class MQTTStreamTests extends StreamTestSync { - var producer: MqttAsyncClient = _ + var broker: Server = _ override def setup(): Unit = { + setupBroker() setupProducer() } + private def setupBroker() = { + val mqttProps = new Properties + mqttProps.put("port", MQTTStreamTestsConfiguration.PORT.toString); + mqttProps.put("host", MQTTStreamTestsConfiguration.HOSTNAME_BROKER); + broker = new Server(); + broker.startServer(mqttProps); + } + private def setupProducer() = { val protocol = MQTTStreamTestsConfiguration.PROTOCOL val host = MQTTStreamTestsConfiguration.HOSTNAME_BROKER @@ -75,7 +86,8 @@ class MQTTStreamTests extends StreamTestSync { } override protected def teardown(): Unit = { - logInfo("Tearingdown MQTT Stream tests") + logInfo("Tearing down MQTT Stream tests") + broker.stopServer(); } override protected def writeData(input: List[TestData])(implicit executor: ExecutionContextExecutor): Unit = { From 409ee63174d5971ff747d2045e845583c5ba724d Mon Sep 17 00:00:00 2001 From: Gerald Haesendonck Date: Thu, 4 Mar 2021 14:37:26 +0100 Subject: [PATCH 13/83] RabbitMQ is not used. --- pom.xml | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/pom.xml b/pom.xml index 7d474742..05af3cf9 100644 --- a/pom.xml +++ b/pom.xml @@ -150,12 +150,7 @@ SOFTWARE. - - - com.rabbitmq - amqp-client - 5.10.0 - + com.ximpleware vtd-xml From 63853d4c53ef0d94543e4a34f17930fd7595cf8b Mon Sep 17 00:00:00 2001 From: Gerald Haesendonck Date: Tue, 9 Mar 2021 14:25:34 +0100 Subject: [PATCH 14/83] * Added DataStore as a supertrait of DataSource and future DataTarget --- .../rml/framework/core/model/DataSource.scala | 7 +--- .../rml/framework/core/model/DataStore.scala | 32 +++++++++++++++++++ .../core/model/StreamDataSource.scala | 23 +------------ .../rml/framework/flink/source/Source.scala | 13 ++++++-- 4 files changed, 45 insertions(+), 30 deletions(-) create mode 100644 src/main/scala/io/rml/framework/core/model/DataStore.scala diff --git a/src/main/scala/io/rml/framework/core/model/DataSource.scala b/src/main/scala/io/rml/framework/core/model/DataSource.scala index b1508ead..0cfcce0c 100644 --- a/src/main/scala/io/rml/framework/core/model/DataSource.scala +++ b/src/main/scala/io/rml/framework/core/model/DataSource.scala @@ -35,9 +35,4 @@ package io.rml.framework.core.model * * */ -trait DataSource extends Node{ - - def uri:ExplicitNode - - override def identifier: String = this.uri.toString -} +trait DataSource extends DataStore diff --git a/src/main/scala/io/rml/framework/core/model/DataStore.scala b/src/main/scala/io/rml/framework/core/model/DataStore.scala new file mode 100644 index 00000000..adcb1cf5 --- /dev/null +++ b/src/main/scala/io/rml/framework/core/model/DataStore.scala @@ -0,0 +1,32 @@ +package io.rml.framework.core.model + +/** + * MIT License + * + * Copyright (C) 2017 - 2021 RDF Mapping Language (RML) + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + * */ +trait DataStore extends Node { + def uri:ExplicitNode + + override def identifier: String = this.uri.toString + +} diff --git a/src/main/scala/io/rml/framework/core/model/StreamDataSource.scala b/src/main/scala/io/rml/framework/core/model/StreamDataSource.scala index 426187bc..9ac8fbca 100644 --- a/src/main/scala/io/rml/framework/core/model/StreamDataSource.scala +++ b/src/main/scala/io/rml/framework/core/model/StreamDataSource.scala @@ -25,25 +25,4 @@ package io.rml.framework.core.model -import io.rml.framework.core.vocabulary.QueryVoc -import io.rml.framework.flink.source.{CSVStream, JSONStream, Stream, XMLStream} -import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment - - -trait StreamDataSource extends DataSource - -object StreamDataSource { - - - def fromLogicalSource(logicalSource: LogicalSource)(implicit env: StreamExecutionEnvironment): Stream = { - - logicalSource.source match { - case source: StreamDataSource => - logicalSource.referenceFormulation match { - case Uri(QueryVoc.Class.CSV) => CSVStream(source) - case Uri(QueryVoc.Class.XPATH) => XMLStream(source, logicalSource.iterators.distinct) - case Uri(QueryVoc.Class.JSONPATH) => JSONStream(source, logicalSource.iterators.distinct) - } - } - } -} +trait StreamDataSource extends DataSource \ No newline at end of file diff --git a/src/main/scala/io/rml/framework/flink/source/Source.scala b/src/main/scala/io/rml/framework/flink/source/Source.scala index a759ee0c..ca79d6f3 100644 --- a/src/main/scala/io/rml/framework/flink/source/Source.scala +++ b/src/main/scala/io/rml/framework/flink/source/Source.scala @@ -24,7 +24,7 @@ **/ package io.rml.framework.flink.source -import io.rml.framework.core.model.{FileDataSource, LogicalSource, StreamDataSource} +import io.rml.framework.core.model.{FileDataSource, LogicalSource, StreamDataSource, Uri} import io.rml.framework.core.vocabulary.QueryVoc import org.apache.flink.api.scala.ExecutionEnvironment import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment @@ -47,7 +47,16 @@ object Source { def apply(logicalSource: LogicalSource)(implicit env: ExecutionEnvironment, senv: StreamExecutionEnvironment): Source = { logicalSource.source match { case fs: FileDataSource => FileDataSet(logicalSource) - case ss: StreamDataSource => StreamDataSource.fromLogicalSource(logicalSource) + case ss: StreamDataSource => { + logicalSource.source match { + case source: StreamDataSource => + logicalSource.referenceFormulation match { + case Uri(QueryVoc.Class.CSV) => CSVStream(source) + case Uri(QueryVoc.Class.XPATH) => XMLStream(source, logicalSource.iterators.distinct) + case Uri(QueryVoc.Class.JSONPATH) => JSONStream(source, logicalSource.iterators.distinct) + } + } + } } } From 8c19776f6707a40b74d5ee2f9870533a274ed728 Mon Sep 17 00:00:00 2001 From: Gerald Haesendonck Date: Thu, 18 Mar 2021 13:46:34 +0100 Subject: [PATCH 15/83] Prepare next development cycle --- pom.xml | 2 +- src/main/scala/io/rml/framework/flink/util/ParameterUtil.scala | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pom.xml b/pom.xml index f224437e..4954e499 100644 --- a/pom.xml +++ b/pom.xml @@ -28,7 +28,7 @@ SOFTWARE. io.rml RMLStreamer - 2.1.0-RELEASE + 2.1.1-SNAPSHOT jar RMLStreamer diff --git a/src/main/scala/io/rml/framework/flink/util/ParameterUtil.scala b/src/main/scala/io/rml/framework/flink/util/ParameterUtil.scala index 453a0b70..e074b360 100644 --- a/src/main/scala/io/rml/framework/flink/util/ParameterUtil.scala +++ b/src/main/scala/io/rml/framework/flink/util/ParameterUtil.scala @@ -78,7 +78,7 @@ object ParameterUtil { val parser = new scopt.OptionParser[ParameterConfig]("RMLStreamer") { override def showUsageOnError = true - head("RMLStreamer", "2.0.1-SNAPSHOT") + head("RMLStreamer", "2.1.0-SNAPSHOT") opt[String]('j', "job-name").valueName("") .optional() From 3cb2ace4e70fd2cd46420835ae32431c6c1e8e47 Mon Sep 17 00:00:00 2001 From: Gerald Haesendonck Date: Mon, 22 Mar 2021 12:59:33 +0000 Subject: [PATCH 16/83] * Reduce depencencies on io.rml.framework.flink package to decouple the "business logic" from the Flink framework. * Fixes https://gitlab.ilabt.imec.be/rml/proc/rml-streamer/-/issues/132 * Fixes example10 in StatementEngineTest --- src/main/scala/io/rml/framework/Main.scala | 23 +++++++------ .../extractors/std/StdGraphMapExtractor.scala | 4 +-- .../std/StdLogicalSourceExtractor.scala | 4 +-- .../extractors/std/StdMappingExtractor.scala | 2 +- .../std/StdObjectMapExtractor.scala | 6 ++-- .../std/StdPredicateMapExtractor.scala | 2 +- .../std/StdPredicateObjectMapExtractor.scala | 2 +- .../std/StdSubjectMapExtractor.scala | 2 +- .../std/StdTriplesMapExtractor.scala | 4 +-- .../core/function/FunctionLoader.scala | 8 +++-- .../core/function/FunctionUtils.scala | 9 +++-- .../core/function/model/DynamicFunction.scala | 28 +++++++++------- .../core/function/model/Function.scala | 8 ++--- .../core/function/model/Parameter.scala | 5 ++- .../model/std/StdRandomFunction.scala | 25 -------------- .../model/std/StdUpperCaseFunction.scala | 22 ------------- .../core/function/std/StdFunctionLoader.scala | 11 ++++--- .../RDFQuad.scala => item/EmptyItem.scala} | 16 ++++----- .../framework/{flink => core}/item/Item.scala | 2 +- .../{flink => core}/item/JoinedItem.scala | 2 +- .../{flink => core}/item/JoinedItems.scala | 2 +- .../{flink => core}/item/csv/CSVHeader.scala | 7 ++-- .../{flink => core}/item/csv/CSVItem.scala | 8 ++--- .../{flink => core}/item/json/JSONItem.scala | 15 ++++----- .../{flink => core}/item/xml/XMLItem.scala | 29 ++++++++-------- .../item/xml}/XMLIterator.scala | 7 ++-- .../rml/framework/core/model/DataSource.scala | 2 +- .../io/rml/framework/core/model/Entity.scala | 4 ++- .../framework/core/model/FileDataSource.scala | 9 +++-- .../framework/core/model/KafkaStream.scala | 6 ---- .../io/rml/framework/core/model/Literal.scala | 13 +++++++- .../io/rml/framework/core/model/Node.scala | 2 +- .../core/model/StreamDataSource.scala | 23 +------------ .../io/rml/framework/core/model/Uri.scala | 10 +++--- .../framework/core/model/rdf/RDFNode.scala | 2 +- .../core/model/rdf/RDFResource.scala | 9 +++-- .../model/rdf/SerializableRDF.scala} | 33 +++++++------------ .../core/model/rdf/jena/JenaGraph.scala | 12 +++---- .../core/model/rdf/jena/JenaLiteral.scala | 6 +++- .../framework/core/model/std/StdBlank.scala | 7 +++- .../rml/framework/core/model/std/StdUri.scala | 1 + .../{flink => core}/util/CSVConfig.scala | 2 +- .../{flink => core}/util/ParameterUtil.scala | 6 ++-- .../io/rml/framework/core/util/Util.scala | 11 ++++++- .../{flink => core}/util/XMLNamespace.scala | 2 +- .../io/rml/framework/engine/Engine.scala | 11 +++---- .../rml/framework/engine/PostProcessor.scala | 16 ++++----- .../io/rml/framework/engine/Processor.scala | 2 +- .../framework/engine/StaticProcessor.scala | 2 +- .../framework/engine/StreamProcessor.scala | 2 +- .../FunctionMapGeneratorAssembler.scala | 13 ++++---- .../statement/GraphGeneratorAssembler.scala | 5 ++- .../statement/ObjectGeneratorAssembler.scala | 2 +- .../PredicateGeneratorAssembler.scala | 6 ++-- .../PredicateObjectGeneratorAssembler.scala | 2 +- .../engine/statement/Statement.scala | 30 ++++++++--------- .../engine/statement/StatementEngine.scala | 9 ++--- .../statement/StatementsAssembler.scala | 2 +- .../statement/SubjectGeneratorAssembler.scala | 4 +-- .../statement/TermMapGeneratorAssembler.scala | 2 +- .../engine/statement/TermMapGenerators.scala | 6 ++-- .../function}/FnOEnvironmentLoader.scala | 7 ++-- .../flink/source/CSVInputFormat.scala | 4 +-- .../framework/flink/source/CSVStream.scala | 13 ++++---- .../framework/flink/source/FileDataSet.scala | 16 ++++----- .../flink/source/JSONInputFormat.scala | 15 +++++---- .../framework/flink/source/JSONStream.scala | 9 +++-- .../rml/framework/flink/source/Source.scala | 22 ++++++------- .../rml/framework/flink/source/Stream.scala | 2 +- .../flink/source/XMLInputFormat.scala | 10 +++--- .../framework/flink/source/XMLStream.scala | 9 +++-- .../RMLFNOTC0004-CSV/mapping.ttl | 2 +- .../rml/framework/OutputGenerationTest.scala | 2 +- .../io/rml/framework/engine/CSVItemTest.scala | 2 +- .../io/rml/framework/engine/EngineTest.scala | 2 +- .../engine/StatementEngineTest.scala | 2 -- .../io/rml/framework/util/TestUtil.scala | 1 + 77 files changed, 295 insertions(+), 350 deletions(-) delete mode 100644 src/main/scala/io/rml/framework/core/function/model/std/StdRandomFunction.scala delete mode 100644 src/main/scala/io/rml/framework/core/function/model/std/StdUpperCaseFunction.scala rename src/main/scala/io/rml/framework/core/{model/rdf/RDFQuad.scala => item/EmptyItem.scala} (83%) rename src/main/scala/io/rml/framework/{flink => core}/item/Item.scala (97%) rename src/main/scala/io/rml/framework/{flink => core}/item/JoinedItem.scala (97%) rename src/main/scala/io/rml/framework/{flink => core}/item/JoinedItems.scala (97%) rename src/main/scala/io/rml/framework/{flink => core}/item/csv/CSVHeader.scala (98%) rename src/main/scala/io/rml/framework/{flink => core}/item/csv/CSVItem.scala (95%) rename src/main/scala/io/rml/framework/{flink => core}/item/json/JSONItem.scala (91%) rename src/main/scala/io/rml/framework/{flink => core}/item/xml/XMLItem.scala (93%) rename src/main/scala/io/rml/framework/{flink/source => core/item/xml}/XMLIterator.scala (97%) rename src/main/scala/io/rml/framework/{flink/sink/FlinkTriple.scala => core/model/rdf/SerializableRDF.scala} (60%) rename src/main/scala/io/rml/framework/{flink => core}/util/CSVConfig.scala (97%) rename src/main/scala/io/rml/framework/{flink => core}/util/ParameterUtil.scala (97%) rename src/main/scala/io/rml/framework/{flink => core}/util/XMLNamespace.scala (98%) rename src/main/scala/io/rml/framework/{core/function/flink => flink/function}/FnOEnvironmentLoader.scala (98%) diff --git a/src/main/scala/io/rml/framework/Main.scala b/src/main/scala/io/rml/framework/Main.scala index 600c9435..8db7d949 100644 --- a/src/main/scala/io/rml/framework/Main.scala +++ b/src/main/scala/io/rml/framework/Main.scala @@ -28,26 +28,25 @@ package io.rml.framework import io.rml.framework.api.{FnOEnvironment, RMLEnvironment} import io.rml.framework.core.extractors.TriplesMapsCache -import io.rml.framework.core.function.flink.{FnOEnvironmentLoader, FnOEnvironmentStreamLoader, RichItemIdentityFunction, RichStreamItemIdentityFunction} import io.rml.framework.core.internal.Logging +import io.rml.framework.core.item.{EmptyItem, Item, JoinedItem} import io.rml.framework.core.model._ -import io.rml.framework.core.util.{StreamerConfig, Util} +import io.rml.framework.core.util.ParameterUtil.{OutputSinkOption, PostProcessorOption} +import io.rml.framework.core.util.{ParameterUtil, StreamerConfig, Util} import io.rml.framework.engine._ import io.rml.framework.engine.statement.StatementEngine import io.rml.framework.flink.connector.kafka.{RMLPartitioner, UniversalKafkaConnectorFactory} -import io.rml.framework.flink.item.{Item, JoinedItem} -import io.rml.framework.flink.source.{EmptyItem, FileDataSet, Source} -import io.rml.framework.flink.util.ParameterUtil -import io.rml.framework.flink.util.ParameterUtil.{OutputSinkOption, PostProcessorOption} +import io.rml.framework.flink.function.{FnOEnvironmentLoader, FnOEnvironmentStreamLoader, RichItemIdentityFunction, RichStreamItemIdentityFunction} +import io.rml.framework.flink.source.{FileDataSet, Source} import org.apache.flink.api.common.serialization.{SimpleStringEncoder, SimpleStringSchema} import org.apache.flink.api.scala._ import org.apache.flink.core.fs.FileSystem.WriteMode import org.apache.flink.core.fs.Path import org.apache.flink.streaming.api.CheckpointingMode import org.apache.flink.streaming.api.functions.ProcessFunction -import org.apache.flink.streaming.api.functions.sink.filesystem.{OutputFileConfig, StreamingFileSink} import org.apache.flink.streaming.api.functions.sink.filesystem.bucketassigners.BasePathBucketAssigner -import org.apache.flink.streaming.api.functions.sink.filesystem.rollingpolicies.{OnCheckpointRollingPolicy} +import org.apache.flink.streaming.api.functions.sink.filesystem.rollingpolicies.OnCheckpointRollingPolicy +import org.apache.flink.streaming.api.functions.sink.filesystem.{OutputFileConfig, StreamingFileSink} import org.apache.flink.streaming.api.scala.{DataStream, OutputTag, StreamExecutionEnvironment} import org.apache.flink.util.Collector @@ -537,7 +536,7 @@ object Main extends Logging { // filter out all items that do not contain the childs join condition .filter(item => { if (tm.joinCondition.isDefined) { - item.refer(tm.joinCondition.get.child.toString).isDefined + item.refer(tm.joinCondition.get.child.value).isDefined } else true // if there are no join conditions all items can pass // filter out all empty items (some iterators can emit empty items) @@ -555,7 +554,7 @@ object Main extends Logging { // filter out all items that do not contain the parents join condition .filter(item => { if (tm.joinCondition.isDefined) { - item.refer(tm.joinCondition.get.parent.toString).isDefined + item.refer(tm.joinCondition.get.parent.value).isDefined } else true // if there are no join conditions all items can pass // filter out all empty items @@ -570,10 +569,10 @@ object Main extends Logging { val joined: JoinDataSet[Item, Item] = childDataset.join(parentDataset) .where(item => { - item.refer(tm.joinCondition.get.child.toString).get.head + item.refer(tm.joinCondition.get.child.value).get.head }) // empty fields are already filtered .equalTo(item => { - item.refer(tm.joinCondition.get.parent.toString).get.head + item.refer(tm.joinCondition.get.parent.value).get.head }) // empty fields are already filtered joined.name("Join child and parent.") diff --git a/src/main/scala/io/rml/framework/core/extractors/std/StdGraphMapExtractor.scala b/src/main/scala/io/rml/framework/core/extractors/std/StdGraphMapExtractor.scala index b082888f..94698375 100644 --- a/src/main/scala/io/rml/framework/core/extractors/std/StdGraphMapExtractor.scala +++ b/src/main/scala/io/rml/framework/core/extractors/std/StdGraphMapExtractor.scala @@ -77,7 +77,7 @@ class StdGraphMapExtractor extends GraphMapExtractor { def extractGraph(resource: RDFResource): Option[GraphMap] = { - Some(GraphMap(resource.uri.toString, List(), Some(resource.uri), None, None, extractTermType(resource))) + Some(GraphMap(resource.uri.value, List(), Some(resource.uri), None, None, extractTermType(resource))) } @@ -87,7 +87,7 @@ class StdGraphMapExtractor extends GraphMapExtractor { val constant = extractConstant(resource) val reference = extractReference(resource) val functionMap = FunctionMapExtractor().extract(resource) - Some(GraphMap(constant.getOrElse(resource.uri).toString, functionMap, constant, reference, template, termType)) + Some(GraphMap(constant.getOrElse(resource.uri).value, functionMap, constant, reference, template, termType)) } } diff --git a/src/main/scala/io/rml/framework/core/extractors/std/StdLogicalSourceExtractor.scala b/src/main/scala/io/rml/framework/core/extractors/std/StdLogicalSourceExtractor.scala index aa22bf04..84c8773e 100644 --- a/src/main/scala/io/rml/framework/core/extractors/std/StdLogicalSourceExtractor.scala +++ b/src/main/scala/io/rml/framework/core/extractors/std/StdLogicalSourceExtractor.scala @@ -29,8 +29,8 @@ import io.rml.framework.core.extractors.{DataSourceExtractor, LogicalSourceExtra import io.rml.framework.core.internal.Logging import io.rml.framework.core.model.rdf.RDFResource import io.rml.framework.core.model.{DataSource, Literal, LogicalSource, Uri} +import io.rml.framework.core.util.Util.DEFAULT_ITERATOR_MAP import io.rml.framework.core.vocabulary.RMLVoc -import io.rml.framework.flink.source.Source.DEFAULT_ITERATOR_MAP import io.rml.framework.shared.RMLException /** @@ -103,7 +103,7 @@ class StdLogicalSourceExtractor(dataSourceExtractor: DataSourceExtractor) if (properties.size > 1) throw new RMLException(resource.uri + ": invalid amount of iterators.") if (properties.isEmpty) { - return DEFAULT_ITERATOR_MAP(referenceFormulation.uri) + return DEFAULT_ITERATOR_MAP(referenceFormulation.value) } properties.head match { diff --git a/src/main/scala/io/rml/framework/core/extractors/std/StdMappingExtractor.scala b/src/main/scala/io/rml/framework/core/extractors/std/StdMappingExtractor.scala index de7db01f..f1087010 100644 --- a/src/main/scala/io/rml/framework/core/extractors/std/StdMappingExtractor.scala +++ b/src/main/scala/io/rml/framework/core/extractors/std/StdMappingExtractor.scala @@ -44,7 +44,7 @@ class StdMappingExtractor extends MappingExtractor[RMLMapping] { override def extract(graph: RDFGraph): RMLMapping = { val triplesMapsExtractor = TriplesMapExtractor() val triplesMaps = triplesMapsExtractor.extract(graph) - RMLMapping(triplesMaps, graph.uri.toString) + RMLMapping(triplesMaps, graph.uri.value) } } diff --git a/src/main/scala/io/rml/framework/core/extractors/std/StdObjectMapExtractor.scala b/src/main/scala/io/rml/framework/core/extractors/std/StdObjectMapExtractor.scala index d5416075..5480ab50 100644 --- a/src/main/scala/io/rml/framework/core/extractors/std/StdObjectMapExtractor.scala +++ b/src/main/scala/io/rml/framework/core/extractors/std/StdObjectMapExtractor.scala @@ -102,7 +102,7 @@ class StdObjectMapExtractor extends ObjectMapExtractor { val language = extractLanguage(resource) val datatype = extractDatatype(resource) val functionMap = FunctionMapExtractor().extract(resource) - ObjectMap(resource.uri.toString, functionMap, constant, reference, template, termType, datatype, language, parentTriplesMap, joinCondition) + ObjectMap(resource.uri.identifier, functionMap, constant, reference, template, termType, datatype, language, parentTriplesMap, joinCondition) } def extractDatatype(resource: RDFResource): Option[Uri] = { @@ -163,7 +163,7 @@ class StdObjectMapExtractor extends ObjectMapExtractor { case literal: Literal => Some(literal) case resource: RDFResource => throw new RMLException(resource.uri + ": invalid language type.") } - val tag = languageLiteral.get.toString + val tag = languageLiteral.get.value if (!Util.isValidrrLanguage(tag)) throw new RMLException(s"Language tag '$tag' does not conform to BCP 47 standards") @@ -182,7 +182,7 @@ class StdObjectMapExtractor extends ObjectMapExtractor { if (properties.isEmpty) return None properties.head match { - case resource: RDFResource => Some(resource.toString) + case resource: RDFResource => Some(resource.value) case literal: Literal => throw new RMLException(literal.toString + ": invalid parent triple map.") } diff --git a/src/main/scala/io/rml/framework/core/extractors/std/StdPredicateMapExtractor.scala b/src/main/scala/io/rml/framework/core/extractors/std/StdPredicateMapExtractor.scala index d0bff3b5..8aca228b 100644 --- a/src/main/scala/io/rml/framework/core/extractors/std/StdPredicateMapExtractor.scala +++ b/src/main/scala/io/rml/framework/core/extractors/std/StdPredicateMapExtractor.scala @@ -94,7 +94,7 @@ class StdPredicateMapExtractor() extends PredicateMapExtractor { val reference = extractReference(resource) val functionMap = FunctionMapExtractor().extract(resource) - PredicateMap(resource.uri.toString, functionMap, constant, reference, template, termType) + PredicateMap(resource.uri.value, functionMap, constant, reference, template, termType) } } diff --git a/src/main/scala/io/rml/framework/core/extractors/std/StdPredicateObjectMapExtractor.scala b/src/main/scala/io/rml/framework/core/extractors/std/StdPredicateObjectMapExtractor.scala index 6be34471..57836bb7 100644 --- a/src/main/scala/io/rml/framework/core/extractors/std/StdPredicateObjectMapExtractor.scala +++ b/src/main/scala/io/rml/framework/core/extractors/std/StdPredicateObjectMapExtractor.scala @@ -78,7 +78,7 @@ class StdPredicateObjectMapExtractor extends PredicateObjectMapExtractor with Lo val graphMap = GraphMapExtractor().extract(resource) this.logDebug("extractPredicateObjectMap : returning resulting PredicateObjectMap") - PredicateObjectMap(resource.uri.toString, objectMaps, predicateMaps, graphMap) + PredicateObjectMap(resource.uri.value, objectMaps, predicateMaps, graphMap) } } diff --git a/src/main/scala/io/rml/framework/core/extractors/std/StdSubjectMapExtractor.scala b/src/main/scala/io/rml/framework/core/extractors/std/StdSubjectMapExtractor.scala index e10810ca..7cf37941 100644 --- a/src/main/scala/io/rml/framework/core/extractors/std/StdSubjectMapExtractor.scala +++ b/src/main/scala/io/rml/framework/core/extractors/std/StdSubjectMapExtractor.scala @@ -93,7 +93,7 @@ class StdSubjectMapExtractor extends SubjectMapExtractor with Logging { ", graphMap -> " + graphMap + ", class -> " + _class) - SubjectMap(resource.uri.toString, _class, functionMap, constant, reference, template, termType, graphMap) + SubjectMap(resource.uri.value, _class, functionMap, constant, reference, template, termType, graphMap) } override def extractTermType(resource: RDFResource): Option[Uri] = { diff --git a/src/main/scala/io/rml/framework/core/extractors/std/StdTriplesMapExtractor.scala b/src/main/scala/io/rml/framework/core/extractors/std/StdTriplesMapExtractor.scala index 298fce3c..c49bc120 100644 --- a/src/main/scala/io/rml/framework/core/extractors/std/StdTriplesMapExtractor.scala +++ b/src/main/scala/io/rml/framework/core/extractors/std/StdTriplesMapExtractor.scala @@ -110,7 +110,7 @@ object StdTriplesMapExtractor extends TriplesMapExtractor with Logging { * @return */ def extractTriplesMapProperties(resource: RDFResource): Option[TriplesMap] = { - val resourceStr = resource.toString; + val resourceStr = resource.value; // errors can occur during extraction of sub structures if (TriplesMapsCache.contains(resourceStr)) { TriplesMapsCache.get(resourceStr) @@ -121,7 +121,7 @@ object StdTriplesMapExtractor extends TriplesMapExtractor with Logging { val triplesMap = TriplesMap(PredicateObjectMapExtractor().extract(resource), LogicalSourceExtractor().extract(resource), SubjectMapExtractor().extract(resource), - resource.uri.toString, + resource.uri.value, GraphMapExtractor().extract(resource) ) val t = TriplesMapsCache.put(resourceStr, triplesMap); diff --git a/src/main/scala/io/rml/framework/core/function/FunctionLoader.scala b/src/main/scala/io/rml/framework/core/function/FunctionLoader.scala index af4f4eaf..affa8df8 100644 --- a/src/main/scala/io/rml/framework/core/function/FunctionLoader.scala +++ b/src/main/scala/io/rml/framework/core/function/FunctionLoader.scala @@ -31,7 +31,7 @@ abstract class FunctionLoader extends Logging { * @return function (if successful) */ def createFunction(uri: Uri): Option[Function] = { - logDebug(s"createFunction: ${uri.uri}") + logDebug(s"createFunction: ${uri.value}") val optFunctionMetaData = functionMap.get(uri) @@ -42,7 +42,7 @@ abstract class FunctionLoader extends Logging { } else { // when the function uri is not present in the function map, complain. val availableFunctionURIs = functionMap.keys.map(u=>u.toString) - throw new IOException(s"The function with URI ${uri.toString} can not be found.\n" + + throw new IOException(s"The function with URI ${uri.value} can not be found.\n" + s"The available function URIs are: " + availableFunctionURIs) } } @@ -124,7 +124,9 @@ object FunctionLoader extends Logging{ if(functionDescriptionsGraph.isDefined) { singletonFunctionLoader = Some(StdFunctionLoader(functionDescriptionsGraph.get)) // now parse the mappings - functionMappingPaths.foreach(fmp=>singletonFunctionLoader.get.parseFunctionMapping(new File(fmp.path))) + functionMappingPaths.foreach(fmp => { + singletonFunctionLoader.get.parseFunctionMapping(new File(fmp.path)) + }) } else { logWarning("No function graph found. Continuing without loading functions.") diff --git a/src/main/scala/io/rml/framework/core/function/FunctionUtils.scala b/src/main/scala/io/rml/framework/core/function/FunctionUtils.scala index 7a1fbff2..7e400ee9 100644 --- a/src/main/scala/io/rml/framework/core/function/FunctionUtils.scala +++ b/src/main/scala/io/rml/framework/core/function/FunctionUtils.scala @@ -1,9 +1,5 @@ package io.rml.framework.core.function -import java.io.File -import java.net.MalformedURLException -import java.time.Instant - import io.rml.framework.api.RMLEnvironment import io.rml.framework.core.function.model.Parameter import io.rml.framework.core.internal.Logging @@ -11,6 +7,9 @@ import io.rml.framework.core.model.{Entity, Literal, Uri} import io.rml.framework.core.vocabulary.RMLVoc import io.rml.framework.shared.RMLException +import java.io.File +import java.net.MalformedURLException +import java.time.Instant import scala.reflect.internal.util.ScalaClassLoader.URLClassLoader object FunctionUtils extends Logging { @@ -72,7 +71,7 @@ object FunctionUtils extends Logging { case Uri(RMLVoc.Type.RDF_LIST) => classOf[List[_]] case Uri(RMLVoc.Type.XSD_ANY) => classOf[Any] case Uri(RMLVoc.Type.RDF_OBJECT) => classOf[Any] - case _ => throw new RMLException(s"Type $uri not supported for parameter") + case _ => throw new RMLException(s"Type ${uri.value} not supported for parameter") } } diff --git a/src/main/scala/io/rml/framework/core/function/model/DynamicFunction.scala b/src/main/scala/io/rml/framework/core/function/model/DynamicFunction.scala index 7b8aedfb..10f35e8c 100644 --- a/src/main/scala/io/rml/framework/core/function/model/DynamicFunction.scala +++ b/src/main/scala/io/rml/framework/core/function/model/DynamicFunction.scala @@ -1,12 +1,12 @@ package io.rml.framework.core.function.model -import java.io.{File, IOException, ObjectInputStream, ObjectOutputStream} -import java.lang.reflect.Method - -import io.rml.framework.api.{FnOEnvironment, RMLEnvironment} +import io.rml.framework.api.FnOEnvironment import io.rml.framework.core.function.{FunctionUtils, ReflectionUtils} +import io.rml.framework.core.model.rdf.SerializableRDFQuad import io.rml.framework.core.model.{Entity, Literal, Uri} -import io.rml.framework.flink.sink.FlinkRDFQuad + +import java.io.{File, IOException, ObjectInputStream, ObjectOutputStream} +import java.lang.reflect.Method /** @@ -50,7 +50,7 @@ case class DynamicFunction(identifier: String, metaData: FunctionMetaData) exten optMethod } - override def execute(paramTriples: List[FlinkRDFQuad]): Option[Iterable[Entity]] = { + override def execute(paramTriples: List[SerializableRDFQuad]): Option[Iterable[Entity]] = { // if a group (key: uri) results in a list with 1 element, extract that single element // otherwise, when a group has a list with more than 1 element, keep it as a list val argResourcesGroupedByUri = paramTriples.groupBy(_.predicate).map { @@ -65,10 +65,10 @@ case class DynamicFunction(identifier: String, metaData: FunctionMetaData) exten val argObjectsGroupedByUri = argResourcesGroupedByUri.map { pair => { - if (pair._2.isInstanceOf[Iterable[FlinkRDFQuad]]) { - pair._1 -> pair._2.asInstanceOf[Iterable[FlinkRDFQuad]].map(x => x.`object`.value.toString) + if (pair._2.isInstanceOf[Iterable[SerializableRDFQuad]]) { + pair._1 -> pair._2.asInstanceOf[Iterable[SerializableRDFQuad]].map(x => x.`object`.value.value) } else { - pair._1 -> pair._2.asInstanceOf[FlinkRDFQuad].`object`.value.toString + pair._1 -> pair._2.asInstanceOf[SerializableRDFQuad].`object`.value.value } } @@ -92,10 +92,14 @@ case class DynamicFunction(identifier: String, metaData: FunctionMetaData) exten try { val output = method.invoke(null, castParameterValues: _*) - val result = metaData.outputParam.flatMap(elem => elem.getValue(output)) map (elem => Literal(elem.toString)) + val result = metaData.outputParam + .flatMap(elem => elem.getValue(output)) + .map (elem => { + Literal(elem.toString) + }) Some(result) } catch { - case e: Exception => { + case e: Throwable => { logError(s"The following exception occurred when invoking the method ${method.getName}: ${e.getMessage}." + s"\nThe result will be set to None.") None @@ -121,7 +125,7 @@ case class DynamicFunction(identifier: String, metaData: FunctionMetaData) exten override def execute(arguments: Map[Uri, String]): Option[Iterable[Entity]] = { val inputParams = metaData.inputParam // casted to List[AnyRef] since method.invoke(...) only accepts reference type but not primitive type of Scala - val paramsOrdered = arguments.groupBy(_._1.uri).map(_._2.asInstanceOf[AnyRef]).toList + val paramsOrdered = arguments.groupBy(_._1.value).map(_._2.asInstanceOf[AnyRef]).toList val outputParams = metaData.outputParam diff --git a/src/main/scala/io/rml/framework/core/function/model/Function.scala b/src/main/scala/io/rml/framework/core/function/model/Function.scala index bbfb5461..d86de34a 100644 --- a/src/main/scala/io/rml/framework/core/function/model/Function.scala +++ b/src/main/scala/io/rml/framework/core/function/model/Function.scala @@ -1,10 +1,10 @@ package io.rml.framework.core.function.model -import java.lang.reflect.Method - import io.rml.framework.core.internal.Logging +import io.rml.framework.core.model.rdf.SerializableRDFQuad import io.rml.framework.core.model.{Entity, Node, Uri} -import io.rml.framework.flink.sink.FlinkRDFQuad + +import java.lang.reflect.Method abstract class Function extends Node with Logging{ @@ -13,7 +13,7 @@ abstract class Function extends Node with Logging{ def getMethod: Option[Method] - def execute(paramTriples: List[FlinkRDFQuad]): Option[Iterable[Entity]] + def execute(paramTriples: List[SerializableRDFQuad]): Option[Iterable[Entity]] @deprecated("Please use execute(paramTriples: List[FlinkRDFQuad]) instead") def execute(argumentsMap: Map[Uri, String]): Option[Iterable[Entity]] diff --git a/src/main/scala/io/rml/framework/core/function/model/Parameter.scala b/src/main/scala/io/rml/framework/core/function/model/Parameter.scala index 7136d926..cc199fbd 100644 --- a/src/main/scala/io/rml/framework/core/function/model/Parameter.scala +++ b/src/main/scala/io/rml/framework/core/function/model/Parameter.scala @@ -1,9 +1,8 @@ package io.rml.framework.core.function.model -import java.time.Instant - import io.rml.framework.core.model.{Node, Uri} +import java.time.Instant import scala.util.parsing.json.JSON /** @@ -33,7 +32,7 @@ abstract class Parameter extends Node { val paraValue: Option[String] val position: Int - override def identifier: String = paramUri.toString + " " + paraValue.getOrElse("None") + override def identifier: String = paramUri.value + " " + paraValue.getOrElse("None") def getValue: Option[Any] = { getValue(paraValue.getOrElse(throw new IllegalStateException(s"${this}'s value option is empty."))) diff --git a/src/main/scala/io/rml/framework/core/function/model/std/StdRandomFunction.scala b/src/main/scala/io/rml/framework/core/function/model/std/StdRandomFunction.scala deleted file mode 100644 index 0e3b5fe3..00000000 --- a/src/main/scala/io/rml/framework/core/function/model/std/StdRandomFunction.scala +++ /dev/null @@ -1,25 +0,0 @@ -package io.rml.framework.core.function.model.std - -import java.lang.reflect.Method - -import io.rml.framework.core.function.model.Function -import io.rml.framework.core.model.{Entity, Literal, Uri} -import io.rml.framework.core.vocabulary.RMLVoc -import io.rml.framework.flink.sink.FlinkRDFQuad - -import scala.util.Random - -case class StdRandomFunction(identifier:String = RMLVoc.Property.GREL_RANDOM) extends Function{ - private val random = new Random() - - override def execute(arguments: Map[Uri, String]): Option[Iterable[Entity]] = { - Some(List(Literal(random.nextString(10)))) - } - - - override def execute(paramTriples: List[FlinkRDFQuad]): Option[Iterable[Entity]] = ??? - - override def getMethod: Option[Method] = { - None - } -} diff --git a/src/main/scala/io/rml/framework/core/function/model/std/StdUpperCaseFunction.scala b/src/main/scala/io/rml/framework/core/function/model/std/StdUpperCaseFunction.scala deleted file mode 100644 index dbf6bb9c..00000000 --- a/src/main/scala/io/rml/framework/core/function/model/std/StdUpperCaseFunction.scala +++ /dev/null @@ -1,22 +0,0 @@ -package io.rml.framework.core.function.model.std - -import java.lang.reflect.Method - -import io.rml.framework.core.function.model.Function -import io.rml.framework.core.model.{Entity, Literal, Uri} -import io.rml.framework.core.vocabulary.{Namespaces, RMLVoc} -import io.rml.framework.flink.sink.FlinkRDFQuad - -case class StdUpperCaseFunction(identifier: String = RMLVoc.Property.GREL_UPPERCASE) extends Function { - override def execute(arguments: Map[Uri, String]): Option[Iterable[Entity]] = { - val parameter = arguments.get(Uri(Namespaces("grel", "valueParameter"))) - - parameter.map(string => List(Literal(string))) - } - - override def execute(paramTriples: List[FlinkRDFQuad]): Option[Iterable[Entity]] = ??? - - override def getMethod: Option[Method] = { - None - } -} diff --git a/src/main/scala/io/rml/framework/core/function/std/StdFunctionLoader.scala b/src/main/scala/io/rml/framework/core/function/std/StdFunctionLoader.scala index 941e24b9..d0ce15d9 100644 --- a/src/main/scala/io/rml/framework/core/function/std/StdFunctionLoader.scala +++ b/src/main/scala/io/rml/framework/core/function/std/StdFunctionLoader.scala @@ -2,8 +2,8 @@ package io.rml.framework.core.function.std import io.rml.framework.core.function.model.{FunctionMetaData, Parameter} import io.rml.framework.core.function.{FunctionLoader, FunctionUtils} -import io.rml.framework.core.model.Uri import io.rml.framework.core.model.rdf.{RDFGraph, RDFNode, RDFResource} +import io.rml.framework.core.model.{Literal, Uri} import io.rml.framework.core.util.Util import io.rml.framework.core.vocabulary.RMLVoc import io.rml.framework.shared.{FnOException, RMLException} @@ -31,7 +31,8 @@ case class StdFunctionLoader private (functionDescriptionTriplesGraph : RDFGraph val functionUri = map.listProperties(RMLVoc.Property.FNO_FUNCTION).head.asInstanceOf[RDFResource].uri val methodMappingResource = map.listProperties(RMLVoc.Property.FNO_METHOD_MAPPING).head.asInstanceOf[RDFResource] - val methodName = methodMappingResource.listProperties(RMLVoc.Property.FNOM_METHOD_NAME).head.toString + val methodNode = methodMappingResource.listProperties(RMLVoc.Property.FNOM_METHOD_NAME).head.asInstanceOf[Literal] + val methodName = methodNode.value val implementationResource = map.listProperties(RMLVoc.Property.FNO_IMPLEMENTATION).head.asInstanceOf[RDFResource] val className = Util.getLiteral(implementationResource.listProperties(RMLVoc.Property.FNOI_CLASS_NAME).head) @@ -87,8 +88,8 @@ case class StdFunctionLoader private (functionDescriptionTriplesGraph : RDFGraph if(paramUri.isEmpty) throw new FnOException(s"Parameter Uri not defined for parameter resource: ${inputResource.uri}") - - val typeClass = FunctionUtils.getTypeClass(Uri(paramType.get.toString)) - Parameter(typeClass, Uri(paramUri.get.toString), pos) + val paramTypeResource = paramType.get.asInstanceOf[RDFResource] + val typeClass = FunctionUtils.getTypeClass(paramTypeResource.uri) + Parameter(typeClass, Uri(paramUri.get.identifier), pos) } } diff --git a/src/main/scala/io/rml/framework/core/model/rdf/RDFQuad.scala b/src/main/scala/io/rml/framework/core/item/EmptyItem.scala similarity index 83% rename from src/main/scala/io/rml/framework/core/model/rdf/RDFQuad.scala rename to src/main/scala/io/rml/framework/core/item/EmptyItem.scala index 633eda45..c0ec7da8 100644 --- a/src/main/scala/io/rml/framework/core/model/rdf/RDFQuad.scala +++ b/src/main/scala/io/rml/framework/core/item/EmptyItem.scala @@ -1,7 +1,9 @@ +package io.rml.framework.core.item + /** * MIT License * - * Copyright (C) 2017 - 2020 RDF Mapping Language (RML) + * Copyright (C) 2017 - 2021 RDF Mapping Language (RML) * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal @@ -21,13 +23,9 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. * - **/ - -package io.rml.framework.core.model.rdf - -/** - * Created by wmaroy on 22.08.17. - */ -trait RDFQuad extends RDFTriple { + * */ +class EmptyItem() extends Item { + override def refer(reference: String): Option[List[String]] = None + override def tag = "" } diff --git a/src/main/scala/io/rml/framework/flink/item/Item.scala b/src/main/scala/io/rml/framework/core/item/Item.scala similarity index 97% rename from src/main/scala/io/rml/framework/flink/item/Item.scala rename to src/main/scala/io/rml/framework/core/item/Item.scala index ae383cc9..828cd34f 100644 --- a/src/main/scala/io/rml/framework/flink/item/Item.scala +++ b/src/main/scala/io/rml/framework/core/item/Item.scala @@ -22,7 +22,7 @@ * THE SOFTWARE. * **/ -package io.rml.framework.flink.item +package io.rml.framework.core.item import org.apache.jena.graph.BlankNodeId diff --git a/src/main/scala/io/rml/framework/flink/item/JoinedItem.scala b/src/main/scala/io/rml/framework/core/item/JoinedItem.scala similarity index 97% rename from src/main/scala/io/rml/framework/flink/item/JoinedItem.scala rename to src/main/scala/io/rml/framework/core/item/JoinedItem.scala index 6d08f43f..1ca0a029 100644 --- a/src/main/scala/io/rml/framework/flink/item/JoinedItem.scala +++ b/src/main/scala/io/rml/framework/core/item/JoinedItem.scala @@ -22,7 +22,7 @@ * THE SOFTWARE. * **/ -package io.rml.framework.flink.item +package io.rml.framework.core.item case class JoinedItem(child: Item, parent: Item) extends Item { override def refer(reference: String): Option[List[String]] = { diff --git a/src/main/scala/io/rml/framework/flink/item/JoinedItems.scala b/src/main/scala/io/rml/framework/core/item/JoinedItems.scala similarity index 97% rename from src/main/scala/io/rml/framework/flink/item/JoinedItems.scala rename to src/main/scala/io/rml/framework/core/item/JoinedItems.scala index 95284956..66ce40b0 100644 --- a/src/main/scala/io/rml/framework/flink/item/JoinedItems.scala +++ b/src/main/scala/io/rml/framework/core/item/JoinedItems.scala @@ -22,7 +22,7 @@ * THE SOFTWARE. * **/ -package io.rml.framework.flink.item +package io.rml.framework.core.item case class JoinedItems(items: List[Item]) { def apply(index: Int): Item = items(index) diff --git a/src/main/scala/io/rml/framework/flink/item/csv/CSVHeader.scala b/src/main/scala/io/rml/framework/core/item/csv/CSVHeader.scala similarity index 98% rename from src/main/scala/io/rml/framework/flink/item/csv/CSVHeader.scala rename to src/main/scala/io/rml/framework/core/item/csv/CSVHeader.scala index 199cf86b..2eff227e 100644 --- a/src/main/scala/io/rml/framework/flink/item/csv/CSVHeader.scala +++ b/src/main/scala/io/rml/framework/core/item/csv/CSVHeader.scala @@ -23,13 +23,12 @@ * **/ -package io.rml.framework.flink.item.csv - -import java.io.{IOException, StringReader} -import java.nio.file.Path +package io.rml.framework.core.item.csv import org.apache.commons.csv.CSVFormat +import java.io.{IOException, StringReader} +import java.nio.file.Path import scala.collection.JavaConverters._ import scala.io.Source diff --git a/src/main/scala/io/rml/framework/flink/item/csv/CSVItem.scala b/src/main/scala/io/rml/framework/core/item/csv/CSVItem.scala similarity index 95% rename from src/main/scala/io/rml/framework/flink/item/csv/CSVItem.scala rename to src/main/scala/io/rml/framework/core/item/csv/CSVItem.scala index 497198dd..00c511b9 100644 --- a/src/main/scala/io/rml/framework/flink/item/csv/CSVItem.scala +++ b/src/main/scala/io/rml/framework/core/item/csv/CSVItem.scala @@ -23,15 +23,13 @@ * **/ -package io.rml.framework.flink.item.csv - -import java.io.{IOException, StringReader} +package io.rml.framework.core.item.csv import io.rml.framework.core.internal.Logging -import io.rml.framework.flink.item.Item -import io.rml.framework.flink.source.EmptyItem +import io.rml.framework.core.item.{EmptyItem, Item} import org.apache.commons.csv.{CSVFormat, CSVRecord} +import java.io.{IOException, StringReader} import scala.collection.JavaConverters._ /** diff --git a/src/main/scala/io/rml/framework/flink/item/json/JSONItem.scala b/src/main/scala/io/rml/framework/core/item/json/JSONItem.scala similarity index 91% rename from src/main/scala/io/rml/framework/flink/item/json/JSONItem.scala rename to src/main/scala/io/rml/framework/core/item/json/JSONItem.scala index f757bd14..6ec034b8 100644 --- a/src/main/scala/io/rml/framework/flink/item/json/JSONItem.scala +++ b/src/main/scala/io/rml/framework/core/item/json/JSONItem.scala @@ -23,18 +23,18 @@ * **/ -package io.rml.framework.flink.item.json - -import java.util +package io.rml.framework.core.item.json import com.fasterxml.jackson.databind.ObjectMapper import com.jayway.jsonpath.JsonPath import io.rml.framework.core.internal.Logging -import io.rml.framework.flink.item.Item -import io.rml.framework.flink.source.JSONStream +import io.rml.framework.core.item.Item +import io.rml.framework.core.util.Util.DEFAULT_ITERATOR_MAP +import io.rml.framework.core.vocabulary.RMLVoc import org.jsfr.json.provider.JacksonProvider import org.jsfr.json.{JacksonParser, JsonSurfer} +import java.util import scala.collection.JavaConversions._ import scala.util.control.NonFatal @@ -71,8 +71,7 @@ class JSONItem(map: java.util.Map[String, Object], val tag: String) extends Item object JSONItem extends Logging { private val surfer = new JsonSurfer(JacksonParser.INSTANCE, JacksonProvider.INSTANCE) - - + private val DEFAULT_PATH_OPTION: String = DEFAULT_ITERATOR_MAP(RMLVoc.Class.JSONPATH) def fromStringOptionableList(json: String, jsonPaths: List[String]): List[Item] = { val result: List[Item] = jsonPaths @@ -81,7 +80,7 @@ object JSONItem extends Logging { jsonPath => { try { val tag = jsonPath match { - case JSONStream.DEFAULT_PATH_OPTION => "" + case DEFAULT_PATH_OPTION => "" case _ => jsonPath } val collection = surfer.collectAll(json, jsonPath) diff --git a/src/main/scala/io/rml/framework/flink/item/xml/XMLItem.scala b/src/main/scala/io/rml/framework/core/item/xml/XMLItem.scala similarity index 93% rename from src/main/scala/io/rml/framework/flink/item/xml/XMLItem.scala rename to src/main/scala/io/rml/framework/core/item/xml/XMLItem.scala index f6371837..abda9fef 100644 --- a/src/main/scala/io/rml/framework/flink/item/xml/XMLItem.scala +++ b/src/main/scala/io/rml/framework/core/item/xml/XMLItem.scala @@ -23,25 +23,23 @@ * **/ -package io.rml.framework.flink.item.xml - -import java.io.{ByteArrayInputStream, InputStreamReader} -import java.nio.charset.StandardCharsets +package io.rml.framework.core.item.xml import com.ximpleware.extended.{AutoPilotHuge, VTDGenHuge, XMLBuffer} import io.rml.framework.core.internal.Logging -import io.rml.framework.flink.item.Item -import io.rml.framework.flink.item.xml.XMLItem.documentToString -import io.rml.framework.flink.source.{XMLIterator, XMLStream} -import io.rml.framework.flink.util.XMLNamespace -import javax.xml.namespace.NamespaceContext -import javax.xml.parsers.DocumentBuilderFactory -import javax.xml.xpath.{XPathConstants, XPathFactory} +import io.rml.framework.core.item.Item +import io.rml.framework.core.util.Util.DEFAULT_ITERATOR_MAP +import io.rml.framework.core.util.XMLNamespace +import io.rml.framework.core.vocabulary.RMLVoc import org.apache.commons.io.IOUtils import org.w3c.dom.{Document, NodeList} +import java.io.{ByteArrayInputStream, InputStreamReader} +import java.nio.charset.StandardCharsets +import javax.xml.namespace.NamespaceContext +import javax.xml.parsers.DocumentBuilderFactory +import javax.xml.xpath.{XPathConstants, XPathFactory} import scala.util.control.NonFatal -//import scala.xml.{PrettyPrinter, XML} class XMLItem(xml: Document, namespaces: Map[String, String], val tag: String) extends Item { @@ -80,12 +78,12 @@ class XMLItem(xml: Document, namespaces: Map[String, String], val tag: String) e } else None } - override def toString: String = documentToString(xml) + override def toString: String = XMLItem.documentToString(xml) } object XMLItem extends Logging { - + private val DEFAULT_PATH_OPTION: String = DEFAULT_ITERATOR_MAP(RMLVoc.Class.XPATH) def getNSpacesFromString(xml: String): Map[String, String] = { val inputStream = new ByteArrayInputStream(xml.getBytes(StandardCharsets.UTF_8)) @@ -118,7 +116,7 @@ object XMLItem extends Logging { document = documentBuilder.parse(IOUtils.toInputStream(documentToString(document), StandardCharsets.UTF_8)) val tag = xpath match { - case XMLStream.DEFAULT_PATH_OPTION => "" + case DEFAULT_PATH_OPTION => "" case _ => xpath } @@ -170,7 +168,6 @@ object XMLItem extends Logging { def documentToString(document: Document) : String = { import java.io.StringWriter - import javax.xml.transform.dom.DOMSource import javax.xml.transform.stream.StreamResult import javax.xml.transform.{OutputKeys, TransformerFactory} diff --git a/src/main/scala/io/rml/framework/flink/source/XMLIterator.scala b/src/main/scala/io/rml/framework/core/item/xml/XMLIterator.scala similarity index 97% rename from src/main/scala/io/rml/framework/flink/source/XMLIterator.scala rename to src/main/scala/io/rml/framework/core/item/xml/XMLIterator.scala index 7746b73a..335dccd3 100644 --- a/src/main/scala/io/rml/framework/flink/source/XMLIterator.scala +++ b/src/main/scala/io/rml/framework/core/item/xml/XMLIterator.scala @@ -22,12 +22,11 @@ * THE SOFTWARE. * **/ -package io.rml.framework.flink.source +package io.rml.framework.core.item.xml import com.ximpleware.extended.{AutoPilotHuge, VTDNavHuge} import io.rml.framework.core.internal.Logging -import io.rml.framework.flink.item.Item -import io.rml.framework.flink.item.xml.XMLItem +import io.rml.framework.core.item.Item import org.slf4j.LoggerFactory import java.nio.charset.{Charset, StandardCharsets} @@ -39,6 +38,8 @@ import java.nio.charset.{Charset, StandardCharsets} * * @param ap * @param vn + * + * TODO: move to other package */ class XMLIterator(val ap: AutoPilotHuge, vn: VTDNavHuge, namespaces: Map[String, String], xPath: String) extends Iterator[Option[Item]] with Logging { diff --git a/src/main/scala/io/rml/framework/core/model/DataSource.scala b/src/main/scala/io/rml/framework/core/model/DataSource.scala index b1508ead..15f0cdb6 100644 --- a/src/main/scala/io/rml/framework/core/model/DataSource.scala +++ b/src/main/scala/io/rml/framework/core/model/DataSource.scala @@ -39,5 +39,5 @@ trait DataSource extends Node{ def uri:ExplicitNode - override def identifier: String = this.uri.toString + override def identifier: String = this.uri.identifier } diff --git a/src/main/scala/io/rml/framework/core/model/Entity.scala b/src/main/scala/io/rml/framework/core/model/Entity.scala index 9a82040f..1ed1435c 100644 --- a/src/main/scala/io/rml/framework/core/model/Entity.scala +++ b/src/main/scala/io/rml/framework/core/model/Entity.scala @@ -28,4 +28,6 @@ package io.rml.framework.core.model /** * RML entity representing the uri, blank or literal nodes. */ -trait Entity extends Node +trait Entity extends Node { + def value: String +} diff --git a/src/main/scala/io/rml/framework/core/model/FileDataSource.scala b/src/main/scala/io/rml/framework/core/model/FileDataSource.scala index 6d496ff8..9179dc2e 100644 --- a/src/main/scala/io/rml/framework/core/model/FileDataSource.scala +++ b/src/main/scala/io/rml/framework/core/model/FileDataSource.scala @@ -25,14 +25,13 @@ package io.rml.framework.core.model -import java.io.File - import io.rml.framework.api.RMLEnvironment import io.rml.framework.core.internal.Logging import io.rml.framework.core.model.std.StdFileDataSource import io.rml.framework.core.util.Util import io.rml.framework.shared.RMLException +import java.io.File import scala.util.{Failure, Success, Try} /** @@ -52,17 +51,17 @@ object FileDataSource extends Logging { * @return An instance of DataSource. */ def apply(uri: ExplicitNode): DataSource = { - val file = new File(uri.toString) + val file = new File(uri.value) Try(Util.resolveFileRelativeToSourceFileParent(RMLEnvironment.getMappingFileBaseIRI().get, file.getPath)) match { case Success(resolvedFile) => { StdFileDataSource(Uri(resolvedFile.getAbsolutePath)) } case Failure(exception) => { if (file.isAbsolute) { - logDebug(Uri(file.getAbsolutePath).uri) + logDebug(Uri(file.getAbsolutePath).value) StdFileDataSource(Uri(file.getAbsolutePath)) } else { - val url = ClassLoader.getSystemResource(uri.toString) + val url = ClassLoader.getSystemResource(uri.value) if (url == null) throw new RMLException(uri.toString + " can't be found.") val file_2 = new File(url.toURI) StdFileDataSource(Uri(file_2.getAbsolutePath)) diff --git a/src/main/scala/io/rml/framework/core/model/KafkaStream.scala b/src/main/scala/io/rml/framework/core/model/KafkaStream.scala index 6463b518..dbfb184b 100644 --- a/src/main/scala/io/rml/framework/core/model/KafkaStream.scala +++ b/src/main/scala/io/rml/framework/core/model/KafkaStream.scala @@ -26,16 +26,10 @@ package io.rml.framework.core.model import java.util.{Objects, Properties} -import io.rml.framework.flink.connector.kafka.{KafkaConnectorFactory, UniversalKafkaConnectorFactory} - case class KafkaStream( brokers: List[String], groupId: String, topic: String) extends StreamDataSource { - def getConnectorFactory: KafkaConnectorFactory = { - UniversalKafkaConnectorFactory - } - def getProperties: Properties = { val properties = new Properties() val brokersCommaSeparated = brokers.reduce((a, b) => a + ", " + b) diff --git a/src/main/scala/io/rml/framework/core/model/Literal.scala b/src/main/scala/io/rml/framework/core/model/Literal.scala index 0ebc5196..62e9638a 100644 --- a/src/main/scala/io/rml/framework/core/model/Literal.scala +++ b/src/main/scala/io/rml/framework/core/model/Literal.scala @@ -54,7 +54,18 @@ trait Literal extends Entity with ExplicitNode{ * * @return */ - override def toString: String = value + override def toString: String = { + val base = '"' + value + '"' + if (`type`.isDefined) { + if (language.isDefined) base + "^^<" + `type`.get.toString + ">@" + language.get.toString + else base + "^^<" + `type`.get.toString + ">" + } else { + if (language.isDefined) { + base + "@" + language.get.value + } + else base + } + } override def identifier: String = value diff --git a/src/main/scala/io/rml/framework/core/model/Node.scala b/src/main/scala/io/rml/framework/core/model/Node.scala index 665f1b41..0de9431f 100644 --- a/src/main/scala/io/rml/framework/core/model/Node.scala +++ b/src/main/scala/io/rml/framework/core/model/Node.scala @@ -39,5 +39,5 @@ trait Node extends Serializable{ def identifier: String - override def toString: String = identifier.toString + override def toString: String = identifier } diff --git a/src/main/scala/io/rml/framework/core/model/StreamDataSource.scala b/src/main/scala/io/rml/framework/core/model/StreamDataSource.scala index bc2f3812..9ac8fbca 100644 --- a/src/main/scala/io/rml/framework/core/model/StreamDataSource.scala +++ b/src/main/scala/io/rml/framework/core/model/StreamDataSource.scala @@ -25,25 +25,4 @@ package io.rml.framework.core.model -import io.rml.framework.core.vocabulary.RMLVoc -import io.rml.framework.flink.source.{CSVStream, JSONStream, Stream, XMLStream} -import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment - - -trait StreamDataSource extends DataSource - -object StreamDataSource { - - - def fromLogicalSource(logicalSource: LogicalSource)(implicit env: StreamExecutionEnvironment): Stream = { - - logicalSource.source match { - case source: StreamDataSource => - logicalSource.referenceFormulation match { - case Uri(RMLVoc.Class.CSV) => CSVStream(source) - case Uri(RMLVoc.Class.XPATH) => XMLStream(source, logicalSource.iterators.distinct) - case Uri(RMLVoc.Class.JSONPATH) => JSONStream(source, logicalSource.iterators.distinct) - } - } - } -} +trait StreamDataSource extends DataSource \ No newline at end of file diff --git a/src/main/scala/io/rml/framework/core/model/Uri.scala b/src/main/scala/io/rml/framework/core/model/Uri.scala index 1d6ccb87..91dceee3 100644 --- a/src/main/scala/io/rml/framework/core/model/Uri.scala +++ b/src/main/scala/io/rml/framework/core/model/Uri.scala @@ -25,18 +25,18 @@ package io.rml.framework.core.model -import java.net.URLEncoder - import io.rml.framework.core.model.std.StdUri +import java.net.URLEncoder + /** * This trait represents a Uri. */ trait Uri extends TermNode with ExplicitNode{ - def uri : String + //def uri : String - override def identifier: String = this.uri + override def identifier: String = this.value } @@ -48,7 +48,7 @@ object Uri { * @return */ def apply(uri: String): Uri = { - if (uri != null) StdUri(uri) else Uri("") + if (uri != null) StdUri(uri) else StdUri("") } def unapply(arg: Uri): Option[String] = Some(arg.toString) diff --git a/src/main/scala/io/rml/framework/core/model/rdf/RDFNode.scala b/src/main/scala/io/rml/framework/core/model/rdf/RDFNode.scala index 918fa0bf..588c834c 100644 --- a/src/main/scala/io/rml/framework/core/model/rdf/RDFNode.scala +++ b/src/main/scala/io/rml/framework/core/model/rdf/RDFNode.scala @@ -32,5 +32,5 @@ import io.rml.framework.core.model.Node * of querying an underlying RDF model. */ trait RDFNode extends Node { - + def value: String } diff --git a/src/main/scala/io/rml/framework/core/model/rdf/RDFResource.scala b/src/main/scala/io/rml/framework/core/model/rdf/RDFResource.scala index efcff62e..41638d9a 100644 --- a/src/main/scala/io/rml/framework/core/model/rdf/RDFResource.scala +++ b/src/main/scala/io/rml/framework/core/model/rdf/RDFResource.scala @@ -25,8 +25,8 @@ package io.rml.framework.core.model.rdf -import io.rml.framework.core.model.Uri import io.rml.framework.core.internal.Logging +import io.rml.framework.core.model.Uri /** * This trait represents a resource that is able to query an underlying RDF model. @@ -37,7 +37,12 @@ trait RDFResource extends RDFNode with Logging { def uri: Uri - override def toString: String = uri.toString + def value = uri.value + + override def toString: String = { + val base = "<" + uri.toString + ">" + base + } override def identifier: String = uri.toString diff --git a/src/main/scala/io/rml/framework/flink/sink/FlinkTriple.scala b/src/main/scala/io/rml/framework/core/model/rdf/SerializableRDF.scala similarity index 60% rename from src/main/scala/io/rml/framework/flink/sink/FlinkTriple.scala rename to src/main/scala/io/rml/framework/core/model/rdf/SerializableRDF.scala index 3c65cdd9..0fc6413e 100644 --- a/src/main/scala/io/rml/framework/flink/sink/FlinkTriple.scala +++ b/src/main/scala/io/rml/framework/core/model/rdf/SerializableRDF.scala @@ -22,7 +22,7 @@ * THE SOFTWARE. * **/ -package io.rml.framework.flink.sink +package io.rml.framework.core.model.rdf import io.rml.framework.core.model._ @@ -31,17 +31,17 @@ import io.rml.framework.core.model._ * * @param value */ -abstract class FlinkRDFNode(val value: Entity) extends Serializable +abstract class SerializableRDFNode(val value: Entity) extends Serializable -abstract class FlinkRDFTermNode(val termNode: TermNode) extends FlinkRDFNode(termNode) +abstract class SerializableRDFTermNode(val termNode: TermNode) extends SerializableRDFNode(termNode) -case class FlinkRDFBlank(blank: Blank) extends FlinkRDFTermNode(blank) { +case class SerializableRDFBlank(blank: Blank) extends SerializableRDFTermNode(blank) { override def toString: String = { - "_:" + blank.toString + blank.toString } } -case class FlinkRDFResource(uri: Uri) extends FlinkRDFTermNode(uri) { +case class SerializableRDFResource(uri: Uri) extends SerializableRDFTermNode(uri) { override def toString: String = { val base = "<" + uri.toString + ">" @@ -49,25 +49,16 @@ case class FlinkRDFResource(uri: Uri) extends FlinkRDFTermNode(uri) { } } -case class FlinkRDFLiteral(literal: Literal) extends FlinkRDFNode(literal) { - override def toString: String = { - val base = '"' + literal.toString + '"' - if (literal.`type`.isDefined) { - if (literal.language.isDefined) base + "^^<" + literal.`type`.get.toString + ">@" + literal.language.get.toString - else base + "^^<" + literal.`type`.get.toString + ">" - } else { - if (literal.language.isDefined) base + "@" + literal.language.get.toString - else base - } - } +case class FlinkRDFLiteral(literal: Literal) extends SerializableRDFNode(literal) { + override def toString: String = literal.toString } -case class FlinkRDFQuad(subject: FlinkRDFTermNode, - predicate: FlinkRDFResource, - `object`: FlinkRDFNode, - graph: Option[FlinkRDFResource] = None) +case class SerializableRDFQuad(subject: SerializableRDFTermNode, + predicate: SerializableRDFResource, + `object`: SerializableRDFNode, + graph: Option[SerializableRDFResource] = None) extends Serializable { override def toString: String = { diff --git a/src/main/scala/io/rml/framework/core/model/rdf/jena/JenaGraph.scala b/src/main/scala/io/rml/framework/core/model/rdf/jena/JenaGraph.scala index de79f97e..93648d6d 100644 --- a/src/main/scala/io/rml/framework/core/model/rdf/jena/JenaGraph.scala +++ b/src/main/scala/io/rml/framework/core/model/rdf/jena/JenaGraph.scala @@ -25,9 +25,6 @@ package io.rml.framework.core.model.rdf.jena -import java.io.{ByteArrayInputStream, ByteArrayOutputStream, File, InputStream} -import java.nio.charset.StandardCharsets - import io.rml.framework.core.internal.Logging import io.rml.framework.core.model.rdf.{RDFGraph, RDFLiteral, RDFResource, RDFTriple} import io.rml.framework.core.model.{Literal, Uri} @@ -37,6 +34,8 @@ import io.rml.framework.shared.{RMLException, ReadException} import org.apache.jena.rdf.model.{Model, ModelFactory, Statement} import org.apache.jena.riot.RDFDataMgr +import java.io.{ByteArrayInputStream, ByteArrayOutputStream, File, InputStream} +import java.nio.charset.StandardCharsets import scala.collection.JavaConverters._ class JenaGraph(model: Model) extends RDFGraph with Logging { @@ -184,9 +183,9 @@ class JenaGraph(model: Model) extends RDFGraph with Logging { private var _uri: Uri = _ private def extractStatementFromTriple(triple: RDFTriple): Statement = { - val subject = model.createResource(triple.subject.uri.toString) - val predicate = model.createProperty(triple.predicate.uri.toString) - val _object = model.createResource(triple.`object`.toString) + val subject = model.createResource(triple.subject.uri.value) + val predicate = model.createProperty(triple.predicate.uri.value) + val _object = model.createResource(triple.`object`.value) model.createStatement(subject, predicate, _object) } @@ -199,6 +198,7 @@ class JenaGraph(model: Model) extends RDFGraph with Logging { } } + override def value: String = ??? // this doesn't make much sense. } object JenaGraph { diff --git a/src/main/scala/io/rml/framework/core/model/rdf/jena/JenaLiteral.scala b/src/main/scala/io/rml/framework/core/model/rdf/jena/JenaLiteral.scala index ee61dddd..1bd735d2 100644 --- a/src/main/scala/io/rml/framework/core/model/rdf/jena/JenaLiteral.scala +++ b/src/main/scala/io/rml/framework/core/model/rdf/jena/JenaLiteral.scala @@ -34,7 +34,11 @@ case class JenaLiteral(value: String, `type`: Option[Uri], override val language object JenaLiteral { def apply(literal: JLiteral): JenaLiteral = { - JenaLiteral(literal.toString, Option(Uri(literal.getDatatypeURI)), Option(Literal(literal.getLanguage))) + val language = literal.getLanguage match { + case "" => None + case someValue: String => Some(Literal(someValue)) + } + JenaLiteral(literal.toString, Option(Uri(literal.getDatatypeURI)), language) } } diff --git a/src/main/scala/io/rml/framework/core/model/std/StdBlank.scala b/src/main/scala/io/rml/framework/core/model/std/StdBlank.scala index b777c08c..986800ed 100644 --- a/src/main/scala/io/rml/framework/core/model/std/StdBlank.scala +++ b/src/main/scala/io/rml/framework/core/model/std/StdBlank.scala @@ -29,5 +29,10 @@ import io.rml.framework.core.model.Blank case class StdBlank(id: String = "_") extends Blank { override def identifier: String = id - override def toString: String = id + + override def value: String = id + + override def toString: String = { + "_:" + id + } } diff --git a/src/main/scala/io/rml/framework/core/model/std/StdUri.scala b/src/main/scala/io/rml/framework/core/model/std/StdUri.scala index c410b81a..90ee6240 100644 --- a/src/main/scala/io/rml/framework/core/model/std/StdUri.scala +++ b/src/main/scala/io/rml/framework/core/model/std/StdUri.scala @@ -31,5 +31,6 @@ case class StdUri(uri: String) extends Uri { override def toString: String = uri + override def value: String = uri } diff --git a/src/main/scala/io/rml/framework/flink/util/CSVConfig.scala b/src/main/scala/io/rml/framework/core/util/CSVConfig.scala similarity index 97% rename from src/main/scala/io/rml/framework/flink/util/CSVConfig.scala rename to src/main/scala/io/rml/framework/core/util/CSVConfig.scala index 9fd21da3..e59fedbb 100644 --- a/src/main/scala/io/rml/framework/flink/util/CSVConfig.scala +++ b/src/main/scala/io/rml/framework/core/util/CSVConfig.scala @@ -22,7 +22,7 @@ * THE SOFTWARE. * **/ -package io.rml.framework.flink.util +package io.rml.framework.core.util sealed abstract class CSVConfig(val delimiter: Char = ',', val quoteCharacter: Char = '"', val recordDelimiter: String = "\n") diff --git a/src/main/scala/io/rml/framework/flink/util/ParameterUtil.scala b/src/main/scala/io/rml/framework/core/util/ParameterUtil.scala similarity index 97% rename from src/main/scala/io/rml/framework/flink/util/ParameterUtil.scala rename to src/main/scala/io/rml/framework/core/util/ParameterUtil.scala index e074b360..466368a4 100644 --- a/src/main/scala/io/rml/framework/flink/util/ParameterUtil.scala +++ b/src/main/scala/io/rml/framework/core/util/ParameterUtil.scala @@ -1,7 +1,7 @@ -package io.rml.framework.flink.util +package io.rml.framework.core.util -import io.rml.framework.flink.util.ParameterUtil.OutputSinkOption.OutputSinkOption -import io.rml.framework.flink.util.ParameterUtil.PostProcessorOption.PostProcessorOption +import io.rml.framework.core.util.ParameterUtil.OutputSinkOption.OutputSinkOption +import io.rml.framework.core.util.ParameterUtil.PostProcessorOption.PostProcessorOption /** * MIT License diff --git a/src/main/scala/io/rml/framework/core/util/Util.scala b/src/main/scala/io/rml/framework/core/util/Util.scala index 7fda10e7..8c30bd95 100644 --- a/src/main/scala/io/rml/framework/core/util/Util.scala +++ b/src/main/scala/io/rml/framework/core/util/Util.scala @@ -28,6 +28,7 @@ import io.rml.framework.api.RMLEnvironment import io.rml.framework.core.extractors.MappingReader import io.rml.framework.core.internal.Logging import io.rml.framework.core.model.{FormattedRMLMapping, Literal, Node, RMLMapping} +import io.rml.framework.core.vocabulary.RMLVoc import io.rml.framework.shared.ReadException import java.io._ @@ -47,6 +48,14 @@ object Util extends Logging{ private val baseDirectiveCapture = "@base <([^<>]*)>.*".r + val DEFAULT_ITERATOR_MAP: Map[String, String] = Map( + RMLVoc.Class.JSONPATH -> "$", + RMLVoc.Class.CSV -> "", + RMLVoc.Class.XPATH -> "/*" + ) + + val DEFAULT_ITERATOR_SET: Set[String] = DEFAULT_ITERATOR_MAP.values.toSet + def getLiteral(node: Node):Option[Literal]= { node match{ case lit:Literal => Some(lit) @@ -150,7 +159,7 @@ object Util extends Logging{ def isRootIteratorTag(tag: String): Boolean = { - io.rml.framework.flink.source.Source.DEFAULT_ITERATOR_SET.contains(tag) + DEFAULT_ITERATOR_SET.contains(tag) } // auto-close resources, seems to be missing in Scala diff --git a/src/main/scala/io/rml/framework/flink/util/XMLNamespace.scala b/src/main/scala/io/rml/framework/core/util/XMLNamespace.scala similarity index 98% rename from src/main/scala/io/rml/framework/flink/util/XMLNamespace.scala rename to src/main/scala/io/rml/framework/core/util/XMLNamespace.scala index ba839acd..20af167a 100644 --- a/src/main/scala/io/rml/framework/flink/util/XMLNamespace.scala +++ b/src/main/scala/io/rml/framework/core/util/XMLNamespace.scala @@ -22,7 +22,7 @@ * THE SOFTWARE. * **/ -package io.rml.framework.flink.util +package io.rml.framework.core.util import java.io.{File, FileInputStream, InputStreamReader} diff --git a/src/main/scala/io/rml/framework/engine/Engine.scala b/src/main/scala/io/rml/framework/engine/Engine.scala index 12df37d5..535d0ee7 100644 --- a/src/main/scala/io/rml/framework/engine/Engine.scala +++ b/src/main/scala/io/rml/framework/engine/Engine.scala @@ -25,13 +25,12 @@ package io.rml.framework.engine -import java.util.regex.Pattern - import io.rml.framework.core.internal.Logging +import io.rml.framework.core.item.Item +import io.rml.framework.core.model.rdf.SerializableRDFQuad import io.rml.framework.core.model.{Literal, Uri} -import io.rml.framework.flink.item.Item -import io.rml.framework.flink.sink.FlinkRDFQuad +import java.util.regex.Pattern import scala.collection.mutable /** @@ -39,7 +38,7 @@ import scala.collection.mutable */ trait Engine[T] extends Serializable { - def process(item: T): List[FlinkRDFQuad] + def process(item: T): List[SerializableRDFQuad] } @@ -113,7 +112,7 @@ object Engine extends Logging { * @return */ def processReference(reference: Literal, item: Item): Option[List[String]] = { - item.refer(reference.toString) + item.refer(reference.value) } /** diff --git a/src/main/scala/io/rml/framework/engine/PostProcessor.scala b/src/main/scala/io/rml/framework/engine/PostProcessor.scala index ba0dd022..96146d68 100644 --- a/src/main/scala/io/rml/framework/engine/PostProcessor.scala +++ b/src/main/scala/io/rml/framework/engine/PostProcessor.scala @@ -24,14 +24,14 @@ **/ package io.rml.framework.engine -import java.io.ByteArrayOutputStream -import java.nio.charset.StandardCharsets - import io.rml.framework.api.RMLEnvironment +import io.rml.framework.core.model.rdf.SerializableRDFQuad import io.rml.framework.core.util._ -import io.rml.framework.flink.sink.FlinkRDFQuad import org.apache.jena.riot.{Lang, RDFDataMgr} +import java.io.ByteArrayOutputStream +import java.nio.charset.StandardCharsets + /** @@ -40,7 +40,7 @@ import org.apache.jena.riot.{Lang, RDFDataMgr} */ trait PostProcessor extends Serializable{ - def process(quadStrings: Iterable[FlinkRDFQuad]): List[String] + def process(quadStrings: Iterable[SerializableRDFQuad]): List[String] def outputFormat: Format } @@ -52,7 +52,7 @@ trait AtMostOneProcessor extends PostProcessor // TODO: define exact semantics * Does nothing, returns the input list of strings */ class NopPostProcessor extends PostProcessor { - override def process(quadStrings: Iterable[FlinkRDFQuad]): List[String] = { + override def process(quadStrings: Iterable[SerializableRDFQuad]): List[String] = { quadStrings.map(_.toString).toList } @@ -65,7 +65,7 @@ class NopPostProcessor extends PostProcessor { * string. */ class BulkPostProcessor extends AtMostOneProcessor { - override def process(quadStrings: Iterable[FlinkRDFQuad]): List[String] = { + override def process(quadStrings: Iterable[SerializableRDFQuad]): List[String] = { List(quadStrings.mkString("\n")) } @@ -81,7 +81,7 @@ class JsonLDProcessor() extends AtMostOneProcessor { override def outputFormat: Format = JSON_LD - override def process(quadStrings: Iterable[FlinkRDFQuad]): List[String] = { + override def process(quadStrings: Iterable[SerializableRDFQuad]): List[String] = { if (quadStrings.isEmpty || quadStrings.mkString.isEmpty) { return List() } diff --git a/src/main/scala/io/rml/framework/engine/Processor.scala b/src/main/scala/io/rml/framework/engine/Processor.scala index d8ef7d2f..e150c71c 100644 --- a/src/main/scala/io/rml/framework/engine/Processor.scala +++ b/src/main/scala/io/rml/framework/engine/Processor.scala @@ -24,8 +24,8 @@ **/ package io.rml.framework.engine +import io.rml.framework.core.item.Item import io.rml.framework.engine.statement.StatementEngine -import io.rml.framework.flink.item.Item import org.apache.flink.api.common.functions.RichMapFunction /** diff --git a/src/main/scala/io/rml/framework/engine/StaticProcessor.scala b/src/main/scala/io/rml/framework/engine/StaticProcessor.scala index 5e4d717f..1c5dc226 100644 --- a/src/main/scala/io/rml/framework/engine/StaticProcessor.scala +++ b/src/main/scala/io/rml/framework/engine/StaticProcessor.scala @@ -24,8 +24,8 @@ **/ package io.rml.framework.engine +import io.rml.framework.core.item.{Item, JoinedItem} import io.rml.framework.engine.statement.StatementEngine -import io.rml.framework.flink.item.{Item, JoinedItem} abstract class StaticProcessor[T<:Item](engine: StatementEngine[T]) (implicit postProcessor: PostProcessor) extends Processor[T, T](engine) { diff --git a/src/main/scala/io/rml/framework/engine/StreamProcessor.scala b/src/main/scala/io/rml/framework/engine/StreamProcessor.scala index c1a4f4e8..bc857c73 100644 --- a/src/main/scala/io/rml/framework/engine/StreamProcessor.scala +++ b/src/main/scala/io/rml/framework/engine/StreamProcessor.scala @@ -24,8 +24,8 @@ **/ package io.rml.framework.engine +import io.rml.framework.core.item.{Item, JoinedItem} import io.rml.framework.engine.statement.StatementEngine -import io.rml.framework.flink.item.{Item, JoinedItem} abstract class StreamProcessor[T <: Item](engine: StatementEngine[T])(implicit postProcessor: PostProcessor) extends Processor[T, Iterable[T]](engine) { override def map(in: Iterable[T]): List[String] = { diff --git a/src/main/scala/io/rml/framework/engine/statement/FunctionMapGeneratorAssembler.scala b/src/main/scala/io/rml/framework/engine/statement/FunctionMapGeneratorAssembler.scala index af021dc5..73e08259 100644 --- a/src/main/scala/io/rml/framework/engine/statement/FunctionMapGeneratorAssembler.scala +++ b/src/main/scala/io/rml/framework/engine/statement/FunctionMapGeneratorAssembler.scala @@ -26,11 +26,10 @@ package io.rml.framework.engine.statement import io.rml.framework.core.function.FunctionLoader import io.rml.framework.core.function.model.Function +import io.rml.framework.core.item.{EmptyItem, Item} import io.rml.framework.core.model._ +import io.rml.framework.core.model.rdf.SerializableRDFQuad import io.rml.framework.core.vocabulary.RMLVoc -import io.rml.framework.flink.item.Item -import io.rml.framework.flink.sink.FlinkRDFQuad -import io.rml.framework.flink.source.EmptyItem import io.rml.framework.shared.RMLException case class FunctionMapGeneratorAssembler() extends TermMapGeneratorAssembler { @@ -57,7 +56,7 @@ case class FunctionMapGeneratorAssembler() extends TermMapGeneratorAssembler { List[(Item => Option[Iterable[Uri]], Item => Option[Iterable[Entity]])]): Option[Function] = { this.logDebug("parseFunction (assembledPom)") - val placeHolder: List[FlinkRDFQuad] = generateFunctionTriples(new EmptyItem(), assembledPom) + val placeHolder: List[SerializableRDFQuad] = generateFunctionTriples(new EmptyItem(), assembledPom) val executeProperties = placeHolder.filter( quad => quad.predicate.value == Uri(RMLVoc.Property.EXECUTES)) if(executeProperties.isEmpty) @@ -69,7 +68,7 @@ case class FunctionMapGeneratorAssembler() extends TermMapGeneratorAssembler { .head .`object` .value - .toString) + .value) val functionLoaderOption = FunctionLoader(); @@ -90,7 +89,7 @@ case class FunctionMapGeneratorAssembler() extends TermMapGeneratorAssembler { */ private def createAssemblerFunction(function: Option[Function], assembledPom: List[(Item => Option[Iterable[Uri]], Item => Option[Iterable[Entity]])]): Item => Option[Iterable[Entity]] = { (item: Item) => { - val triples: List[FlinkRDFQuad] = generateFunctionTriples(item, assembledPom) + val triples: List[SerializableRDFQuad] = generateFunctionTriples(item, assembledPom) val paramTriples = triples.filter(triple => triple.predicate.uri != Uri(RMLVoc.Property.EXECUTES)) @@ -109,7 +108,7 @@ case class FunctionMapGeneratorAssembler() extends TermMapGeneratorAssembler { * @param item * @return */ - private def generateFunctionTriples(item: Item, assembledPom: List[(Item => Option[Iterable[Uri]], Item => Option[Iterable[Entity]])]): List[FlinkRDFQuad] = { + private def generateFunctionTriples(item: Item, assembledPom: List[(Item => Option[Iterable[Uri]], Item => Option[Iterable[Entity]])]): List[SerializableRDFQuad] = { val result = for{ (predicateGen, objGen) <- assembledPom diff --git a/src/main/scala/io/rml/framework/engine/statement/GraphGeneratorAssembler.scala b/src/main/scala/io/rml/framework/engine/statement/GraphGeneratorAssembler.scala index ca720a5f..8bb2aaea 100644 --- a/src/main/scala/io/rml/framework/engine/statement/GraphGeneratorAssembler.scala +++ b/src/main/scala/io/rml/framework/engine/statement/GraphGeneratorAssembler.scala @@ -24,9 +24,8 @@ **/ package io.rml.framework.engine.statement +import io.rml.framework.core.item.Item import io.rml.framework.core.model.{GraphMap, TermMap, Uri} -import io.rml.framework.core.util.Util -import io.rml.framework.flink.item.Item class GraphGeneratorAssembler extends TermMapGeneratorAssembler { @@ -44,7 +43,7 @@ class GraphGeneratorAssembler extends TermMapGeneratorAssembler { assembled.andThen(result => { - result.map(iter => iter.map(elem => Uri(elem.toString))) + result.map(iter => iter.map(elem => Uri(elem.value))) }) } else { super.assemble(termMap).asInstanceOf[(Item) => Option[Iterable[Uri]]] diff --git a/src/main/scala/io/rml/framework/engine/statement/ObjectGeneratorAssembler.scala b/src/main/scala/io/rml/framework/engine/statement/ObjectGeneratorAssembler.scala index e5751940..38322e57 100644 --- a/src/main/scala/io/rml/framework/engine/statement/ObjectGeneratorAssembler.scala +++ b/src/main/scala/io/rml/framework/engine/statement/ObjectGeneratorAssembler.scala @@ -26,9 +26,9 @@ package io.rml.framework.engine.statement import io.rml.framework.core.extractors.TriplesMapsCache +import io.rml.framework.core.item.Item import io.rml.framework.core.model.{Entity, Literal, ObjectMap, Uri} import io.rml.framework.core.vocabulary.RMLVoc -import io.rml.framework.flink.item.Item class ObjectGeneratorAssembler extends TermMapGeneratorAssembler { diff --git a/src/main/scala/io/rml/framework/engine/statement/PredicateGeneratorAssembler.scala b/src/main/scala/io/rml/framework/engine/statement/PredicateGeneratorAssembler.scala index e20f27e9..5aa1fe7b 100644 --- a/src/main/scala/io/rml/framework/engine/statement/PredicateGeneratorAssembler.scala +++ b/src/main/scala/io/rml/framework/engine/statement/PredicateGeneratorAssembler.scala @@ -26,8 +26,8 @@ package io.rml.framework.engine.statement import io.rml.framework.core.function.FunctionUtils -import io.rml.framework.core.model.{Literal, TermMap, Uri} -import io.rml.framework.flink.item.Item +import io.rml.framework.core.item.Item +import io.rml.framework.core.model.{TermMap, Uri} class PredicateGeneratorAssembler extends TermMapGeneratorAssembler { @@ -41,7 +41,7 @@ class PredicateGeneratorAssembler extends TermMapGeneratorAssembler { if(item.isDefined) { item.map(iter => iter.flatMap(elem => { val castedResult = FunctionUtils.typeCastDataType(elem, termMap.datatype) - castedResult.map(v => Uri(v.toString)) + castedResult.map(v => Uri(v.value)) })) }else { None diff --git a/src/main/scala/io/rml/framework/engine/statement/PredicateObjectGeneratorAssembler.scala b/src/main/scala/io/rml/framework/engine/statement/PredicateObjectGeneratorAssembler.scala index f71131a3..e39d7a68 100644 --- a/src/main/scala/io/rml/framework/engine/statement/PredicateObjectGeneratorAssembler.scala +++ b/src/main/scala/io/rml/framework/engine/statement/PredicateObjectGeneratorAssembler.scala @@ -26,8 +26,8 @@ package io.rml.framework.engine.statement import io.rml.framework.core.internal.Logging +import io.rml.framework.core.item.Item import io.rml.framework.core.model.{Entity, PredicateObjectMap, Uri} -import io.rml.framework.flink.item.Item class PredicateObjectGeneratorAssembler(predicateGeneratorAssembler: PredicateGeneratorAssembler, objectGeneratorAssembler: ObjectGeneratorAssembler, graphGeneratorAssembler: GraphGeneratorAssembler) extends Logging{ diff --git a/src/main/scala/io/rml/framework/engine/statement/Statement.scala b/src/main/scala/io/rml/framework/engine/statement/Statement.scala index 2358e989..f2776bc3 100644 --- a/src/main/scala/io/rml/framework/engine/statement/Statement.scala +++ b/src/main/scala/io/rml/framework/engine/statement/Statement.scala @@ -26,9 +26,9 @@ package io.rml.framework.engine.statement import io.rml.framework.core.internal.Logging +import io.rml.framework.core.item.{Item, JoinedItem} import io.rml.framework.core.model._ -import io.rml.framework.flink.item.{Item, JoinedItem} -import io.rml.framework.flink.sink._ +import io.rml.framework.core.model.rdf.{FlinkRDFLiteral, SerializableRDFBlank, SerializableRDFQuad, SerializableRDFResource} /** * Represents a potential triple. A statement potentially generates a triple @@ -46,10 +46,10 @@ abstract class Statement[T] { - def process(item: T): Option[Iterable[FlinkRDFQuad]] + def process(item: T): Option[Iterable[SerializableRDFQuad]] - def subProcess[S <: Item] (graphItem:S, subjItem:S, predItem: S, objectItem: S): Option[Iterable[FlinkRDFQuad]] = { + def subProcess[S <: Item] (graphItem:S, subjItem:S, predItem: S, objectItem: S): Option[Iterable[SerializableRDFQuad]] = { val graphOption = graphGenerator(graphItem) val result = for { @@ -73,7 +73,7 @@ case class ChildStatement(subjectGenerator: Item => Option[Iterable[TermNode]], objectGenerator: Item => Option[Iterable[Entity]], graphGenerator: Item => Option[Iterable[Uri]]) extends Statement[JoinedItem] with Serializable { - def process(item: JoinedItem): Option[Iterable[FlinkRDFQuad]] = { + def process(item: JoinedItem): Option[Iterable[SerializableRDFQuad]] = { subProcess(item.child, item.child, item.child, item.parent) } } @@ -83,7 +83,7 @@ case class ParentStatement(subjectGenerator: Item => Option[Iterable[TermNode]], objectGenerator: Item => Option[Iterable[Entity]], graphGenerator: Item => Option[Iterable[Uri]]) extends Statement[JoinedItem] with Serializable { - def process(item: JoinedItem): Option[Iterable[FlinkRDFQuad]] = { + def process(item: JoinedItem): Option[Iterable[SerializableRDFQuad]] = { subProcess(item.parent, item.parent, item.parent, item.parent) } } @@ -99,7 +99,7 @@ case class StdStatement(subjectGenerator: Item => Option[Iterable[TermNode]], * @param item * @return */ - def process(item: Item): Option[Iterable[FlinkRDFQuad]] = { + def process(item: Item): Option[Iterable[SerializableRDFQuad]] = { subProcess(item,item,item,item) } @@ -131,21 +131,21 @@ object Statement extends Logging { } - def generateQuad(subject: TermNode, predicate: Uri, _object: Entity, graphOpt: Option[Uri] = None): Option[FlinkRDFQuad] = { + def generateQuad(subject: TermNode, predicate: Uri, _object: Entity, graphOpt: Option[Uri] = None): Option[SerializableRDFQuad] = { val subjectResource = subject match { - case blank: Blank => FlinkRDFBlank(blank) - case resource: Uri => FlinkRDFResource(resource) + case blank: Blank => SerializableRDFBlank(blank) + case resource: Uri => SerializableRDFResource(resource) } - val predicateResource = FlinkRDFResource(predicate) + val predicateResource = SerializableRDFResource(predicate) val objectNode = _object match { case literal: Literal => FlinkRDFLiteral(literal) - case resource: Uri => FlinkRDFResource(resource) - case blank: Blank => FlinkRDFBlank(blank) + case resource: Uri => SerializableRDFResource(resource) + case blank: Blank => SerializableRDFBlank(blank) } - val graphUri = graphOpt.map(FlinkRDFResource) + val graphUri = graphOpt.map(SerializableRDFResource) - val result = Some(FlinkRDFQuad(subjectResource, predicateResource, objectNode, graphUri)) + val result = Some(SerializableRDFQuad(subjectResource, predicateResource, objectNode, graphUri)) logDebug(result.get.toString) result } diff --git a/src/main/scala/io/rml/framework/engine/statement/StatementEngine.scala b/src/main/scala/io/rml/framework/engine/statement/StatementEngine.scala index 9f613ac7..b13e5893 100644 --- a/src/main/scala/io/rml/framework/engine/statement/StatementEngine.scala +++ b/src/main/scala/io/rml/framework/engine/statement/StatementEngine.scala @@ -26,10 +26,11 @@ package io.rml.framework.engine.statement import io.rml.framework.core.internal.Logging +import io.rml.framework.core.item.{Item, JoinedItem} +import io.rml.framework.core.model.rdf.SerializableRDFQuad import io.rml.framework.core.model.{JoinedTriplesMap, TriplesMap} +import io.rml.framework.core.util.Util.DEFAULT_ITERATOR_SET import io.rml.framework.engine.Engine -import io.rml.framework.flink.item.{Item, JoinedItem} -import io.rml.framework.flink.sink.FlinkRDFQuad /** * A statement engine is an engine implementation that makes use of a transformed @@ -52,7 +53,7 @@ class StatementEngine[T <: Item](val statementMap: Map[String, List[Statement[T] * @param item * @return */ - override def process(item: T): List[FlinkRDFQuad] = { + override def process(item: T): List[SerializableRDFQuad] = { val statements = statementMap.getOrElse(item.tag, List()) @@ -81,7 +82,7 @@ object StatementEngine extends Logging { val iteratorGroup = triplesMaps.groupBy(tm => { // here we require the triples map to have only one iterator val iteratorQuery = tm.logicalSource.iterators.head - if (io.rml.framework.flink.source.Source.DEFAULT_ITERATOR_SET.contains(iteratorQuery)) { + if (DEFAULT_ITERATOR_SET.contains(iteratorQuery)) { "" } else { iteratorQuery diff --git a/src/main/scala/io/rml/framework/engine/statement/StatementsAssembler.scala b/src/main/scala/io/rml/framework/engine/statement/StatementsAssembler.scala index 33568837..c4e0c8a4 100644 --- a/src/main/scala/io/rml/framework/engine/statement/StatementsAssembler.scala +++ b/src/main/scala/io/rml/framework/engine/statement/StatementsAssembler.scala @@ -27,9 +27,9 @@ package io.rml.framework.engine.statement import io.rml.framework.core.extractors.TriplesMapsCache import io.rml.framework.core.internal.Logging +import io.rml.framework.core.item.{Item, JoinedItem} import io.rml.framework.core.model._ import io.rml.framework.core.vocabulary.RDFVoc -import io.rml.framework.flink.item.{Item, JoinedItem} /** * Creates statements from triple maps. */ diff --git a/src/main/scala/io/rml/framework/engine/statement/SubjectGeneratorAssembler.scala b/src/main/scala/io/rml/framework/engine/statement/SubjectGeneratorAssembler.scala index 25053311..b6839031 100644 --- a/src/main/scala/io/rml/framework/engine/statement/SubjectGeneratorAssembler.scala +++ b/src/main/scala/io/rml/framework/engine/statement/SubjectGeneratorAssembler.scala @@ -26,9 +26,9 @@ package io.rml.framework.engine.statement import io.rml.framework.core.function.FunctionUtils +import io.rml.framework.core.item.Item import io.rml.framework.core.model.{TermMap, TermNode, Uri} import io.rml.framework.core.vocabulary.RMLVoc -import io.rml.framework.flink.item.Item import io.rml.framework.shared.TermTypeException @@ -43,7 +43,7 @@ class SubjectGeneratorAssembler extends TermMapGeneratorAssembler { if(item.isDefined) { item.map(iter => iter.flatMap(elem => { val castedResult = FunctionUtils.typeCastDataType(elem, termMap.datatype) - castedResult.map(v => Uri(v.toString)) + castedResult.map(v => Uri(v.value)) })) }else { None diff --git a/src/main/scala/io/rml/framework/engine/statement/TermMapGeneratorAssembler.scala b/src/main/scala/io/rml/framework/engine/statement/TermMapGeneratorAssembler.scala index 5afa4c93..6f898945 100644 --- a/src/main/scala/io/rml/framework/engine/statement/TermMapGeneratorAssembler.scala +++ b/src/main/scala/io/rml/framework/engine/statement/TermMapGeneratorAssembler.scala @@ -26,9 +26,9 @@ package io.rml.framework.engine.statement import io.rml.framework.core.internal.Logging +import io.rml.framework.core.item.Item import io.rml.framework.core.model._ import io.rml.framework.core.vocabulary.RMLVoc -import io.rml.framework.flink.item.Item /** * diff --git a/src/main/scala/io/rml/framework/engine/statement/TermMapGenerators.scala b/src/main/scala/io/rml/framework/engine/statement/TermMapGenerators.scala index 3947b893..1de8fc58 100644 --- a/src/main/scala/io/rml/framework/engine/statement/TermMapGenerators.scala +++ b/src/main/scala/io/rml/framework/engine/statement/TermMapGenerators.scala @@ -27,10 +27,10 @@ package io.rml.framework.engine.statement import io.rml.framework.api.RMLEnvironment +import io.rml.framework.core.item.Item import io.rml.framework.core.model._ import io.rml.framework.core.util.Util import io.rml.framework.engine.Engine -import io.rml.framework.flink.item.Item /** * @@ -40,14 +40,14 @@ object TermMapGenerators { def constantUriGenerator(constant: Entity): Item => Option[Iterable[Uri]] = { // return a function that just returns the constant (item: Item) => { - Some(List(Uri(constant.toString))) + Some(List(Uri(constant.value))) } } def constantLiteralGenerator(constant: Entity, datatype: Option[Uri] = None, language: Option[Literal]): Item => Option[Iterable[Literal]] = { // return a function that just returns the constant item: Item => { - Some(List(Literal(constant.toString, datatype, language))) + Some(List(Literal(constant.value, datatype, language))) } } diff --git a/src/main/scala/io/rml/framework/core/function/flink/FnOEnvironmentLoader.scala b/src/main/scala/io/rml/framework/flink/function/FnOEnvironmentLoader.scala similarity index 98% rename from src/main/scala/io/rml/framework/core/function/flink/FnOEnvironmentLoader.scala rename to src/main/scala/io/rml/framework/flink/function/FnOEnvironmentLoader.scala index 9c714baa..525d3d30 100644 --- a/src/main/scala/io/rml/framework/core/function/flink/FnOEnvironmentLoader.scala +++ b/src/main/scala/io/rml/framework/flink/function/FnOEnvironmentLoader.scala @@ -1,13 +1,12 @@ -package io.rml.framework.core.function.flink - -import java.net.URLClassLoader +package io.rml.framework.flink.function import io.rml.framework.Main.logError import io.rml.framework.api.FnOEnvironment -import io.rml.framework.flink.item.Item +import io.rml.framework.core.item.Item import org.apache.flink.api.common.functions.RichMapFunction import org.apache.flink.configuration.Configuration +import java.net.URLClassLoader import scala.util.{Failure, Success, Try} /** diff --git a/src/main/scala/io/rml/framework/flink/source/CSVInputFormat.scala b/src/main/scala/io/rml/framework/flink/source/CSVInputFormat.scala index 46510ef1..092008a0 100644 --- a/src/main/scala/io/rml/framework/flink/source/CSVInputFormat.scala +++ b/src/main/scala/io/rml/framework/flink/source/CSVInputFormat.scala @@ -24,8 +24,8 @@ **/ package io.rml.framework.flink.source -import io.rml.framework.flink.item.Item -import io.rml.framework.flink.item.csv.CSVItem +import io.rml.framework.core.item.csv.CSVItem +import io.rml.framework.core.item.{EmptyItem, Item} import org.apache.commons.csv.{CSVFormat, CSVParser, CSVRecord} import org.apache.flink.api.common.io.GenericCsvInputFormat import org.apache.flink.core.fs.Path diff --git a/src/main/scala/io/rml/framework/flink/source/CSVStream.scala b/src/main/scala/io/rml/framework/flink/source/CSVStream.scala index c10df2fd..7187a16f 100644 --- a/src/main/scala/io/rml/framework/flink/source/CSVStream.scala +++ b/src/main/scala/io/rml/framework/flink/source/CSVStream.scala @@ -25,12 +25,11 @@ package io.rml.framework.flink.source -import java.nio.file.Paths - +import io.rml.framework.core.item.Item +import io.rml.framework.core.item.csv.{CSVHeader, CSVItem} import io.rml.framework.core.model.{FileStream, KafkaStream, StreamDataSource, TCPSocketStream} -import io.rml.framework.flink.item.Item -import io.rml.framework.flink.item.csv.{CSVHeader, CSVItem} -import io.rml.framework.flink.util.{CustomCSVConfig, DefaultCSVConfig} +import io.rml.framework.core.util.{CustomCSVConfig, DefaultCSVConfig} +import io.rml.framework.flink.connector.kafka.UniversalKafkaConnectorFactory import org.apache.commons.csv.CSVFormat import org.apache.flink.api.common.serialization.SimpleStringSchema import org.apache.flink.api.common.typeinfo.TypeInformation @@ -41,6 +40,8 @@ import org.apache.flink.table.api.bridge.scala.StreamTableEnvironment import org.apache.flink.table.descriptors.FileSystem import org.apache.flink.types.Row +import java.nio.file.Paths + case class CSVStream(stream: DataStream[Iterable[Item]] ) extends Stream object CSVStream { @@ -89,7 +90,7 @@ object CSVStream { .withFirstRecordAsHeader() val properties = kafkaStream.getProperties - val consumer = kafkaStream.getConnectorFactory.getSource(kafkaStream.topic, new SimpleStringSchema(), properties) + val consumer = UniversalKafkaConnectorFactory.getSource(kafkaStream.topic, new SimpleStringSchema(), properties) val stream: DataStream[Iterable[Item]] = StreamUtil.paralleliseOverSlots(env.addSource(consumer)) .map(batchString => { CSVItem.fromDataBatch(batchString, format) diff --git a/src/main/scala/io/rml/framework/flink/source/FileDataSet.scala b/src/main/scala/io/rml/framework/flink/source/FileDataSet.scala index 36d5c010..df80052d 100644 --- a/src/main/scala/io/rml/framework/flink/source/FileDataSet.scala +++ b/src/main/scala/io/rml/framework/flink/source/FileDataSet.scala @@ -24,17 +24,17 @@ **/ package io.rml.framework.flink.source -import java.nio.file.Paths - import io.rml.framework.core.internal.Logging +import io.rml.framework.core.item.Item +import io.rml.framework.core.item.csv.CSVHeader import io.rml.framework.core.model.{LogicalSource, Uri} +import io.rml.framework.core.util.DefaultCSVConfig import io.rml.framework.core.vocabulary.RMLVoc -import io.rml.framework.flink.item.Item -import io.rml.framework.flink.item.csv.CSVHeader -import io.rml.framework.flink.util.DefaultCSVConfig import org.apache.commons.csv.CSVFormat import org.apache.flink.api.scala._ +import java.nio.file.Paths + sealed abstract class FileDataSet extends Source { def dataset: DataSet[Item] } @@ -51,9 +51,9 @@ object FileDataSet extends Logging { def apply(logicalSource: LogicalSource)(implicit env: ExecutionEnvironment): FileDataSet = { logicalSource.referenceFormulation match { - case Uri(RMLVoc.Class.CSV) => createCSVDataSet(logicalSource.source.uri.toString) - case Uri(RMLVoc.Class.XPATH) => createXMLWithXPathDataSet(logicalSource.source.uri.toString, logicalSource.iterators.head) - case Uri(RMLVoc.Class.JSONPATH) => createJSONWithJSONPathDataSet(logicalSource.source.uri.toString, logicalSource.iterators.head) + case Uri(RMLVoc.Class.CSV) => createCSVDataSet(logicalSource.source.uri.value) + case Uri(RMLVoc.Class.XPATH) => createXMLWithXPathDataSet(logicalSource.source.uri.value, logicalSource.iterators.head) + case Uri(RMLVoc.Class.JSONPATH) => createJSONWithJSONPathDataSet(logicalSource.source.uri.value, logicalSource.iterators.head) } } diff --git a/src/main/scala/io/rml/framework/flink/source/JSONInputFormat.scala b/src/main/scala/io/rml/framework/flink/source/JSONInputFormat.scala index 4f782e82..34cf103d 100644 --- a/src/main/scala/io/rml/framework/flink/source/JSONInputFormat.scala +++ b/src/main/scala/io/rml/framework/flink/source/JSONInputFormat.scala @@ -24,23 +24,26 @@ **/ package io.rml.framework.flink.source -import java.io.{FileInputStream, InputStream} -import java.util - import com.fasterxml.jackson.databind.ObjectMapper import com.fasterxml.jackson.databind.node.ObjectNode -import io.rml.framework.flink.item.Item -import io.rml.framework.flink.item.json.JSONItem +import io.rml.framework.core.item.Item +import io.rml.framework.core.item.json.JSONItem +import io.rml.framework.core.util.Util.DEFAULT_ITERATOR_MAP +import io.rml.framework.core.vocabulary.RMLVoc import org.apache.flink.api.common.io.{GenericInputFormat, NonParallelInput} import org.apache.flink.core.io.GenericInputSplit import org.jsfr.json.compiler.JsonPathCompiler import org.jsfr.json.provider.JacksonProvider import org.jsfr.json.{JacksonParser, JsonSurfer} +import java.io.{FileInputStream, InputStream} +import java.util + class JSONInputFormat(path: String, jsonPath: String) extends GenericInputFormat[Item] with NonParallelInput { private var iterator: util.Iterator[Object] = _ private var inputStream: InputStream = _ + private val DEFAULT_PATH_OPTION: String = DEFAULT_ITERATOR_MAP(RMLVoc.Class.JSONPATH) override def open(inputSplit: GenericInputSplit): Unit = { super.open(inputSplit) @@ -58,7 +61,7 @@ class JSONInputFormat(path: String, jsonPath: String) extends GenericInputFormat val mapper = new ObjectMapper() val map = mapper.convertValue(asInstanceOf, classOf[java.util.Map[String, Object]]) val tag = jsonPath match { - case JSONStream.DEFAULT_PATH_OPTION => "" + case DEFAULT_PATH_OPTION => "" case _ => jsonPath } new JSONItem(map, tag) diff --git a/src/main/scala/io/rml/framework/flink/source/JSONStream.scala b/src/main/scala/io/rml/framework/flink/source/JSONStream.scala index f9013455..36a00f6d 100644 --- a/src/main/scala/io/rml/framework/flink/source/JSONStream.scala +++ b/src/main/scala/io/rml/framework/flink/source/JSONStream.scala @@ -25,10 +25,10 @@ package io.rml.framework.flink.source import io.rml.framework.core.internal.Logging +import io.rml.framework.core.item.Item +import io.rml.framework.core.item.json.JSONItem import io.rml.framework.core.model.{FileStream, KafkaStream, StreamDataSource, TCPSocketStream} -import io.rml.framework.core.vocabulary.RMLVoc -import io.rml.framework.flink.item.Item -import io.rml.framework.flink.item.json.JSONItem +import io.rml.framework.flink.connector.kafka.UniversalKafkaConnectorFactory import org.apache.flink.api.common.serialization.SimpleStringSchema import org.apache.flink.api.scala._ import org.apache.flink.streaming.api.scala.{DataStream, StreamExecutionEnvironment} @@ -36,7 +36,6 @@ import org.apache.flink.streaming.api.scala.{DataStream, StreamExecutionEnvironm case class JSONStream(val stream: DataStream[Iterable[Item]]) extends Stream object JSONStream extends Logging { - val DEFAULT_PATH_OPTION: String = Source.DEFAULT_ITERATOR_MAP(RMLVoc.Class.JSONPATH) def apply(source: StreamDataSource, jsonPaths: List[String])(implicit env: StreamExecutionEnvironment): Stream = { @@ -66,7 +65,7 @@ object JSONStream extends Logging { def fromKafkaStream(kafkaStream: KafkaStream, jsonPaths: List[String])(implicit env: StreamExecutionEnvironment): JSONStream = { val properties = kafkaStream.getProperties - val consumer = kafkaStream.getConnectorFactory.getSource(kafkaStream.topic, new SimpleStringSchema(), properties) + val consumer = UniversalKafkaConnectorFactory.getSource(kafkaStream.topic, new SimpleStringSchema(), properties) logDebug(consumer.getProducedType.toString) val parallelStream = StreamUtil.paralleliseOverSlots(env.addSource(consumer)) diff --git a/src/main/scala/io/rml/framework/flink/source/Source.scala b/src/main/scala/io/rml/framework/flink/source/Source.scala index f44ecee2..6581795a 100644 --- a/src/main/scala/io/rml/framework/flink/source/Source.scala +++ b/src/main/scala/io/rml/framework/flink/source/Source.scala @@ -24,7 +24,7 @@ **/ package io.rml.framework.flink.source -import io.rml.framework.core.model.{FileDataSource, LogicalSource, StreamDataSource} +import io.rml.framework.core.model.{FileDataSource, LogicalSource, StreamDataSource, Uri} import io.rml.framework.core.vocabulary.RMLVoc import org.apache.flink.api.scala.ExecutionEnvironment import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment @@ -36,19 +36,19 @@ trait Source */ object Source { - val DEFAULT_ITERATOR_MAP: Map[String, String] = Map( - RMLVoc.Class.JSONPATH -> "$", - RMLVoc.Class.CSV -> "", - RMLVoc.Class.XPATH -> "/*" - ) - - val DEFAULT_ITERATOR_SET: Set[String] = DEFAULT_ITERATOR_MAP.values.toSet - def apply(logicalSource: LogicalSource)(implicit env: ExecutionEnvironment, senv: StreamExecutionEnvironment): Source = { logicalSource.source match { case fs: FileDataSource => FileDataSet(logicalSource) - case ss: StreamDataSource => StreamDataSource.fromLogicalSource(logicalSource) + case ss: StreamDataSource => { + logicalSource.source match { + case source: StreamDataSource => + logicalSource.referenceFormulation match { + case Uri(RMLVoc.Class.CSV) => CSVStream(source) + case Uri(RMLVoc.Class.XPATH) => XMLStream(source, logicalSource.iterators.distinct) + case Uri(RMLVoc.Class.JSONPATH) => JSONStream(source, logicalSource.iterators.distinct) + } + } + } } } - } diff --git a/src/main/scala/io/rml/framework/flink/source/Stream.scala b/src/main/scala/io/rml/framework/flink/source/Stream.scala index 69d33732..6bd188ab 100644 --- a/src/main/scala/io/rml/framework/flink/source/Stream.scala +++ b/src/main/scala/io/rml/framework/flink/source/Stream.scala @@ -24,7 +24,7 @@ **/ package io.rml.framework.flink.source -import io.rml.framework.flink.item.Item +import io.rml.framework.core.item.Item import org.apache.flink.streaming.api.scala.DataStream abstract class Stream extends Source { diff --git a/src/main/scala/io/rml/framework/flink/source/XMLInputFormat.scala b/src/main/scala/io/rml/framework/flink/source/XMLInputFormat.scala index 28d8eb14..04ed642e 100644 --- a/src/main/scala/io/rml/framework/flink/source/XMLInputFormat.scala +++ b/src/main/scala/io/rml/framework/flink/source/XMLInputFormat.scala @@ -25,8 +25,9 @@ package io.rml.framework.flink.source import com.ximpleware.extended.{AutoPilotHuge, VTDGenHuge} -import io.rml.framework.flink.item.Item -import io.rml.framework.flink.util.XMLNamespace +import io.rml.framework.core.item.xml.XMLIterator +import io.rml.framework.core.item.{EmptyItem, Item} +import io.rml.framework.core.util.XMLNamespace import io.rml.framework.shared.RMLException import org.apache.flink.api.common.io.{GenericInputFormat, NonParallelInput} import org.apache.flink.core.io.GenericInputSplit @@ -73,7 +74,4 @@ class XMLInputFormat(path: String, xpath: String) extends GenericInputFormat[Ite } -class EmptyItem() extends Item { - override def refer(reference: String): Option[List[String]] = None - override def tag = "" -} + diff --git a/src/main/scala/io/rml/framework/flink/source/XMLStream.scala b/src/main/scala/io/rml/framework/flink/source/XMLStream.scala index 577ce504..85e69c10 100644 --- a/src/main/scala/io/rml/framework/flink/source/XMLStream.scala +++ b/src/main/scala/io/rml/framework/flink/source/XMLStream.scala @@ -24,10 +24,10 @@ **/ package io.rml.framework.flink.source +import io.rml.framework.core.item.Item +import io.rml.framework.core.item.xml.XMLItem import io.rml.framework.core.model.{FileStream, KafkaStream, StreamDataSource, TCPSocketStream} -import io.rml.framework.core.vocabulary.RMLVoc -import io.rml.framework.flink.item.Item -import io.rml.framework.flink.item.xml.XMLItem +import io.rml.framework.flink.connector.kafka.UniversalKafkaConnectorFactory import org.apache.flink.api.common.serialization.SimpleStringSchema import org.apache.flink.api.scala._ import org.apache.flink.streaming.api.functions.source.SourceFunction @@ -37,7 +37,6 @@ import org.slf4j.LoggerFactory case class XMLStream(stream: DataStream[Iterable[Item]]) extends Stream object XMLStream { - val DEFAULT_PATH_OPTION: String = Source.DEFAULT_ITERATOR_MAP(RMLVoc.Class.XPATH) def apply(source: StreamDataSource, xpaths: List[String])(implicit env: StreamExecutionEnvironment): Stream = { @@ -65,7 +64,7 @@ object XMLStream { def fromKafkaStream(kafkaStream: KafkaStream, xpaths: List[String])(implicit env: StreamExecutionEnvironment): XMLStream = { val properties = kafkaStream.getProperties - val consumer = kafkaStream.getConnectorFactory.getSource(kafkaStream.topic, new SimpleStringSchema(), properties) + val consumer = UniversalKafkaConnectorFactory.getSource(kafkaStream.topic, new SimpleStringSchema(), properties) val stream: DataStream[Iterable[Item]] = StreamUtil.paralleliseOverSlots(env.addSource(consumer)) .map(item => { XMLItem.fromStringOptionable(item, xpaths) diff --git a/src/test/resources/fno-testcases/RMLFNOTC0004-CSV/mapping.ttl b/src/test/resources/fno-testcases/RMLFNOTC0004-CSV/mapping.ttl index fd98a596..b28b411e 100644 --- a/src/test/resources/fno-testcases/RMLFNOTC0004-CSV/mapping.ttl +++ b/src/test/resources/fno-testcases/RMLFNOTC0004-CSV/mapping.ttl @@ -16,7 +16,7 @@ a rr:TriplesMap; rml:logicalSource [ - rml:source "../student.csv"; + rml:source "../student_url.csv"; rml:referenceFormulation ql:CSV ]; diff --git a/src/test/scala/io/rml/framework/OutputGenerationTest.scala b/src/test/scala/io/rml/framework/OutputGenerationTest.scala index 292bc2ee..1122c82d 100644 --- a/src/test/scala/io/rml/framework/OutputGenerationTest.scala +++ b/src/test/scala/io/rml/framework/OutputGenerationTest.scala @@ -150,7 +150,7 @@ class OutputGenerationTest extends StaticTestSpec with ReadMappingBehaviour with } } } catch { - case e => { + case e: Throwable => { if (shouldPass) { fail(e); System.exit(1) diff --git a/src/test/scala/io/rml/framework/engine/CSVItemTest.scala b/src/test/scala/io/rml/framework/engine/CSVItemTest.scala index a467432c..7df6b83e 100644 --- a/src/test/scala/io/rml/framework/engine/CSVItemTest.scala +++ b/src/test/scala/io/rml/framework/engine/CSVItemTest.scala @@ -25,7 +25,7 @@ package io.rml.framework.engine -import io.rml.framework.flink.item.csv.CSVItem +import io.rml.framework.core.item.csv.CSVItem import org.scalatest.{FunSuite, Matchers} class CSVItemTest extends FunSuite with Matchers { diff --git a/src/test/scala/io/rml/framework/engine/EngineTest.scala b/src/test/scala/io/rml/framework/engine/EngineTest.scala index 4756d620..4ef3775b 100644 --- a/src/test/scala/io/rml/framework/engine/EngineTest.scala +++ b/src/test/scala/io/rml/framework/engine/EngineTest.scala @@ -25,8 +25,8 @@ package io.rml.framework.engine +import io.rml.framework.core.item.Item import io.rml.framework.core.model.Literal -import io.rml.framework.flink.item.Item import org.scalatest.{FunSuite, Matchers} class EngineTest extends FunSuite with Matchers { diff --git a/src/test/scala/io/rml/framework/engine/StatementEngineTest.scala b/src/test/scala/io/rml/framework/engine/StatementEngineTest.scala index 0a6c7431..f936c9ed 100644 --- a/src/test/scala/io/rml/framework/engine/StatementEngineTest.scala +++ b/src/test/scala/io/rml/framework/engine/StatementEngineTest.scala @@ -62,7 +62,6 @@ class StatementEngineTest extends FunSuite with Matchers { testOutcome match { case Left(e) => { Logger.logError(e) - System.exit(1) fail(e) } case Right(e) => { @@ -77,7 +76,6 @@ class StatementEngineTest extends FunSuite with Matchers { * 13:24:26.959 [main] ERROR be.ugent.rml.cli.Main .main(315) - Expected ':', found '/' [line 1] */ test("example10") { - pending executeTest("example10/mapping.rml.ttl") } diff --git a/src/test/scala/io/rml/framework/util/TestUtil.scala b/src/test/scala/io/rml/framework/util/TestUtil.scala index e69819ed..d8c40a18 100644 --- a/src/test/scala/io/rml/framework/util/TestUtil.scala +++ b/src/test/scala/io/rml/framework/util/TestUtil.scala @@ -88,6 +88,7 @@ object TestUtil extends Logging { case anyException: Throwable => { val message = s"Testcase ${testCase} FAILED: ${anyException.getMessage}" logError(message) + anyException.printStackTrace() Left(message) } } From 1c38e3380fd06da3be1157e5fb8ad7c65974861f Mon Sep 17 00:00:00 2001 From: Gerald Haesendonck Date: Mon, 22 Mar 2021 14:30:02 +0100 Subject: [PATCH 17/83] Manually merged some conflicting changes --- .../scala/io/rml/framework/core/item/json/JSONItem.scala | 4 ++-- .../scala/io/rml/framework/core/item/xml/XMLItem.scala | 4 ++-- src/main/scala/io/rml/framework/core/util/Util.scala | 8 ++++---- .../engine/statement/FunctionMapGeneratorAssembler.scala | 1 - .../engine/statement/ObjectGeneratorAssembler.scala | 1 - .../scala/io/rml/framework/flink/source/FileDataSet.scala | 1 + .../io/rml/framework/flink/source/JSONInputFormat.scala | 4 ++-- src/main/scala/io/rml/framework/flink/source/Source.scala | 8 ++++---- 8 files changed, 15 insertions(+), 16 deletions(-) diff --git a/src/main/scala/io/rml/framework/core/item/json/JSONItem.scala b/src/main/scala/io/rml/framework/core/item/json/JSONItem.scala index 6ec034b8..7f36a9bd 100644 --- a/src/main/scala/io/rml/framework/core/item/json/JSONItem.scala +++ b/src/main/scala/io/rml/framework/core/item/json/JSONItem.scala @@ -30,7 +30,7 @@ import com.jayway.jsonpath.JsonPath import io.rml.framework.core.internal.Logging import io.rml.framework.core.item.Item import io.rml.framework.core.util.Util.DEFAULT_ITERATOR_MAP -import io.rml.framework.core.vocabulary.RMLVoc +import io.rml.framework.core.vocabulary.QueryVoc import org.jsfr.json.provider.JacksonProvider import org.jsfr.json.{JacksonParser, JsonSurfer} @@ -71,7 +71,7 @@ class JSONItem(map: java.util.Map[String, Object], val tag: String) extends Item object JSONItem extends Logging { private val surfer = new JsonSurfer(JacksonParser.INSTANCE, JacksonProvider.INSTANCE) - private val DEFAULT_PATH_OPTION: String = DEFAULT_ITERATOR_MAP(RMLVoc.Class.JSONPATH) + private val DEFAULT_PATH_OPTION: String = DEFAULT_ITERATOR_MAP(QueryVoc.Class.JSONPATH) def fromStringOptionableList(json: String, jsonPaths: List[String]): List[Item] = { val result: List[Item] = jsonPaths diff --git a/src/main/scala/io/rml/framework/core/item/xml/XMLItem.scala b/src/main/scala/io/rml/framework/core/item/xml/XMLItem.scala index abda9fef..25f25edf 100644 --- a/src/main/scala/io/rml/framework/core/item/xml/XMLItem.scala +++ b/src/main/scala/io/rml/framework/core/item/xml/XMLItem.scala @@ -30,7 +30,7 @@ import io.rml.framework.core.internal.Logging import io.rml.framework.core.item.Item import io.rml.framework.core.util.Util.DEFAULT_ITERATOR_MAP import io.rml.framework.core.util.XMLNamespace -import io.rml.framework.core.vocabulary.RMLVoc +import io.rml.framework.core.vocabulary.QueryVoc import org.apache.commons.io.IOUtils import org.w3c.dom.{Document, NodeList} @@ -83,7 +83,7 @@ class XMLItem(xml: Document, namespaces: Map[String, String], val tag: String) e } object XMLItem extends Logging { - private val DEFAULT_PATH_OPTION: String = DEFAULT_ITERATOR_MAP(RMLVoc.Class.XPATH) + private val DEFAULT_PATH_OPTION: String = DEFAULT_ITERATOR_MAP(QueryVoc.Class.XPATH) def getNSpacesFromString(xml: String): Map[String, String] = { val inputStream = new ByteArrayInputStream(xml.getBytes(StandardCharsets.UTF_8)) diff --git a/src/main/scala/io/rml/framework/core/util/Util.scala b/src/main/scala/io/rml/framework/core/util/Util.scala index 8c30bd95..f7dff4d3 100644 --- a/src/main/scala/io/rml/framework/core/util/Util.scala +++ b/src/main/scala/io/rml/framework/core/util/Util.scala @@ -28,7 +28,7 @@ import io.rml.framework.api.RMLEnvironment import io.rml.framework.core.extractors.MappingReader import io.rml.framework.core.internal.Logging import io.rml.framework.core.model.{FormattedRMLMapping, Literal, Node, RMLMapping} -import io.rml.framework.core.vocabulary.RMLVoc +import io.rml.framework.core.vocabulary.QueryVoc import io.rml.framework.shared.ReadException import java.io._ @@ -49,9 +49,9 @@ object Util extends Logging{ private val baseDirectiveCapture = "@base <([^<>]*)>.*".r val DEFAULT_ITERATOR_MAP: Map[String, String] = Map( - RMLVoc.Class.JSONPATH -> "$", - RMLVoc.Class.CSV -> "", - RMLVoc.Class.XPATH -> "/*" + QueryVoc.Class.JSONPATH -> "$", + QueryVoc.Class.CSV -> "", + QueryVoc.Class.XPATH -> "/*" ) val DEFAULT_ITERATOR_SET: Set[String] = DEFAULT_ITERATOR_MAP.values.toSet diff --git a/src/main/scala/io/rml/framework/engine/statement/FunctionMapGeneratorAssembler.scala b/src/main/scala/io/rml/framework/engine/statement/FunctionMapGeneratorAssembler.scala index 5f827302..ba6a0ca9 100644 --- a/src/main/scala/io/rml/framework/engine/statement/FunctionMapGeneratorAssembler.scala +++ b/src/main/scala/io/rml/framework/engine/statement/FunctionMapGeneratorAssembler.scala @@ -30,7 +30,6 @@ import io.rml.framework.core.item.{EmptyItem, Item} import io.rml.framework.core.model._ import io.rml.framework.core.model.rdf.SerializableRDFQuad import io.rml.framework.core.vocabulary.FunVoc -import io.rml.framework.flink.source.EmptyItem import io.rml.framework.shared.RMLException case class FunctionMapGeneratorAssembler() extends TermMapGeneratorAssembler { diff --git a/src/main/scala/io/rml/framework/engine/statement/ObjectGeneratorAssembler.scala b/src/main/scala/io/rml/framework/engine/statement/ObjectGeneratorAssembler.scala index c2f669b4..5104af24 100644 --- a/src/main/scala/io/rml/framework/engine/statement/ObjectGeneratorAssembler.scala +++ b/src/main/scala/io/rml/framework/engine/statement/ObjectGeneratorAssembler.scala @@ -29,7 +29,6 @@ import io.rml.framework.core.extractors.TriplesMapsCache import io.rml.framework.core.item.Item import io.rml.framework.core.model.{Entity, Literal, ObjectMap, Uri} import io.rml.framework.core.vocabulary.R2RMLVoc -import io.rml.framework.flink.item.Item class ObjectGeneratorAssembler extends TermMapGeneratorAssembler { diff --git a/src/main/scala/io/rml/framework/flink/source/FileDataSet.scala b/src/main/scala/io/rml/framework/flink/source/FileDataSet.scala index 081a0e20..b713d124 100644 --- a/src/main/scala/io/rml/framework/flink/source/FileDataSet.scala +++ b/src/main/scala/io/rml/framework/flink/source/FileDataSet.scala @@ -28,6 +28,7 @@ import io.rml.framework.core.internal.Logging import io.rml.framework.core.item.Item import io.rml.framework.core.item.csv.CSVHeader import io.rml.framework.core.model.{LogicalSource, Uri} +import io.rml.framework.core.util.DefaultCSVConfig import io.rml.framework.core.vocabulary.QueryVoc import org.apache.commons.csv.CSVFormat import org.apache.flink.api.scala._ diff --git a/src/main/scala/io/rml/framework/flink/source/JSONInputFormat.scala b/src/main/scala/io/rml/framework/flink/source/JSONInputFormat.scala index 34cf103d..94f9ce74 100644 --- a/src/main/scala/io/rml/framework/flink/source/JSONInputFormat.scala +++ b/src/main/scala/io/rml/framework/flink/source/JSONInputFormat.scala @@ -29,7 +29,7 @@ import com.fasterxml.jackson.databind.node.ObjectNode import io.rml.framework.core.item.Item import io.rml.framework.core.item.json.JSONItem import io.rml.framework.core.util.Util.DEFAULT_ITERATOR_MAP -import io.rml.framework.core.vocabulary.RMLVoc +import io.rml.framework.core.vocabulary.QueryVoc import org.apache.flink.api.common.io.{GenericInputFormat, NonParallelInput} import org.apache.flink.core.io.GenericInputSplit import org.jsfr.json.compiler.JsonPathCompiler @@ -43,7 +43,7 @@ class JSONInputFormat(path: String, jsonPath: String) extends GenericInputFormat private var iterator: util.Iterator[Object] = _ private var inputStream: InputStream = _ - private val DEFAULT_PATH_OPTION: String = DEFAULT_ITERATOR_MAP(RMLVoc.Class.JSONPATH) + private val DEFAULT_PATH_OPTION: String = DEFAULT_ITERATOR_MAP(QueryVoc.Class.JSONPATH) override def open(inputSplit: GenericInputSplit): Unit = { super.open(inputSplit) diff --git a/src/main/scala/io/rml/framework/flink/source/Source.scala b/src/main/scala/io/rml/framework/flink/source/Source.scala index 604e23d7..ea79bad4 100644 --- a/src/main/scala/io/rml/framework/flink/source/Source.scala +++ b/src/main/scala/io/rml/framework/flink/source/Source.scala @@ -24,8 +24,8 @@ **/ package io.rml.framework.flink.source -import io.rml.framework.core.vocabulary.QueryVoc import io.rml.framework.core.model.{FileDataSource, LogicalSource, StreamDataSource, Uri} +import io.rml.framework.core.vocabulary.QueryVoc import org.apache.flink.api.scala.ExecutionEnvironment import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment @@ -51,9 +51,9 @@ object Source { logicalSource.source match { case source: StreamDataSource => logicalSource.referenceFormulation match { - case Uri(RMLVoc.Class.CSV) => CSVStream(source) - case Uri(RMLVoc.Class.XPATH) => XMLStream(source, logicalSource.iterators.distinct) - case Uri(RMLVoc.Class.JSONPATH) => JSONStream(source, logicalSource.iterators.distinct) + case Uri(QueryVoc.Class.CSV) => CSVStream(source) + case Uri(QueryVoc.Class.XPATH) => XMLStream(source, logicalSource.iterators.distinct) + case Uri(QueryVoc.Class.JSONPATH) => JSONStream(source, logicalSource.iterators.distinct) } } } From bb126071ba68e7905297ce7a8ee92efb98784c69 Mon Sep 17 00:00:00 2001 From: Gerald Haesendonck Date: Tue, 23 Mar 2021 10:44:11 +0100 Subject: [PATCH 18/83] Manually merged some conflicting changes --- src/main/scala/io/rml/framework/core/model/DataStore.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/scala/io/rml/framework/core/model/DataStore.scala b/src/main/scala/io/rml/framework/core/model/DataStore.scala index adcb1cf5..69c96584 100644 --- a/src/main/scala/io/rml/framework/core/model/DataStore.scala +++ b/src/main/scala/io/rml/framework/core/model/DataStore.scala @@ -27,6 +27,6 @@ package io.rml.framework.core.model trait DataStore extends Node { def uri:ExplicitNode - override def identifier: String = this.uri.toString + override def identifier: String = this.uri.identifier } From 5e07f453b55e0bf67e228293a794249274987ec0 Mon Sep 17 00:00:00 2001 From: Gerald Haesendonck Date: Tue, 23 Mar 2021 12:16:45 +0100 Subject: [PATCH 19/83] Added some classes used for logical target --- .../rml/framework/core/model/DataSink.scala | 27 ++++++++++++ .../framework/core/model/LogicalTarget.scala | 31 ++++++++++++++ .../framework/core/vocabulary/RMLTVoc.scala | 42 +++++++++++++++++++ 3 files changed, 100 insertions(+) create mode 100644 src/main/scala/io/rml/framework/core/model/DataSink.scala create mode 100644 src/main/scala/io/rml/framework/core/model/LogicalTarget.scala create mode 100644 src/main/scala/io/rml/framework/core/vocabulary/RMLTVoc.scala diff --git a/src/main/scala/io/rml/framework/core/model/DataSink.scala b/src/main/scala/io/rml/framework/core/model/DataSink.scala new file mode 100644 index 00000000..5e92f309 --- /dev/null +++ b/src/main/scala/io/rml/framework/core/model/DataSink.scala @@ -0,0 +1,27 @@ +package io.rml.framework.core.model + +/** + * MIT License + * + * Copyright (C) 2017 - 2021 RDF Mapping Language (RML) + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + * */ +trait DataSink extends DataStore diff --git a/src/main/scala/io/rml/framework/core/model/LogicalTarget.scala b/src/main/scala/io/rml/framework/core/model/LogicalTarget.scala new file mode 100644 index 00000000..77fba697 --- /dev/null +++ b/src/main/scala/io/rml/framework/core/model/LogicalTarget.scala @@ -0,0 +1,31 @@ +package io.rml.framework.core.model + +/** + * MIT License + * + * Copyright (C) 2017 - 2021 RDF Mapping Language (RML) + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + * */ +trait LogicalTarget extends Node { + + def target: DataSink + +} diff --git a/src/main/scala/io/rml/framework/core/vocabulary/RMLTVoc.scala b/src/main/scala/io/rml/framework/core/vocabulary/RMLTVoc.scala new file mode 100644 index 00000000..84f6ef56 --- /dev/null +++ b/src/main/scala/io/rml/framework/core/vocabulary/RMLTVoc.scala @@ -0,0 +1,42 @@ +package io.rml.framework.core.vocabulary + +/** + * MIT License + * + * Copyright (C) 2017 - 2021 RDF Mapping Language (RML) + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + * + * Logical Target vocabulary. See TODO + * + * */ +object RMLTVoc { + val namespace = ("rmlt", "http://semweb.mmlab.be/ns/rml-target#") + + object Property { + val TARGET = namespace._2 + "target" + val SERIALIZATION = namespace._2 + "serialization" + val COMPRESSION = namespace._2 + "compression" + } + + object Class { + val LOGICALTARGET = namespace._2 + "LogicalTarget" + } +} From dda57460ce4adbdf4666f937fa5b836a2ed39b23 Mon Sep 17 00:00:00 2001 From: Gerald Haesendonck Date: Tue, 23 Mar 2021 13:23:28 +0100 Subject: [PATCH 20/83] Added part of compression vocabulary --- .../core/vocabulary/RMLCompVoc.scala | 35 +++++++++++++++++++ 1 file changed, 35 insertions(+) create mode 100644 src/main/scala/io/rml/framework/core/vocabulary/RMLCompVoc.scala diff --git a/src/main/scala/io/rml/framework/core/vocabulary/RMLCompVoc.scala b/src/main/scala/io/rml/framework/core/vocabulary/RMLCompVoc.scala new file mode 100644 index 00000000..8d7864bd --- /dev/null +++ b/src/main/scala/io/rml/framework/core/vocabulary/RMLCompVoc.scala @@ -0,0 +1,35 @@ +package io.rml.framework.core.vocabulary + +/** + * MIT License + * + * Copyright (C) 2017 - 2021 RDF Mapping Language (RML) + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + * */ +object RMLCompVoc { + val namespace = ("comp", "http://semweb.mmlab.be/ns/rml-compression#") + + object Class { + val GZIP = namespace._2 + "GZip" + val ZIP = namespace._2 + "Zip" + } + +} From 02e1394706f217f94fb7978c9a559a887cb57295 Mon Sep 17 00:00:00 2001 From: Gerald Haesendonck Date: Wed, 24 Mar 2021 15:39:06 +0100 Subject: [PATCH 21/83] WIP: boiler plate code for extracting a logical target from triples map. To do: add extraction for term maps, implement actual logic. --- .../extractors/LogicalTargetExtractor.scala | 39 +++++++++++++++++ .../std/StdLogicalTargetExtractor.scala | 43 +++++++++++++++++++ .../std/StdTriplesMapExtractor.scala | 3 +- .../core/model/FormattedRMLMapping.scala | 4 +- .../framework/core/model/LogicalTarget.scala | 24 +++++++++++ .../core/model/StreamTriplesMap.scala | 2 + .../rml/framework/core/model/TriplesMap.scala | 6 ++- .../core/model/std/StdJoinedTriplesMap.scala | 6 ++- .../core/model/std/StdLogicalTarget.scala | 31 +++++++++++++ .../core/model/std/StdParentTriplesMap.scala | 4 +- .../core/model/std/StdTriplesMap.scala | 1 + 11 files changed, 156 insertions(+), 7 deletions(-) create mode 100644 src/main/scala/io/rml/framework/core/extractors/LogicalTargetExtractor.scala create mode 100644 src/main/scala/io/rml/framework/core/extractors/std/StdLogicalTargetExtractor.scala create mode 100644 src/main/scala/io/rml/framework/core/model/std/StdLogicalTarget.scala diff --git a/src/main/scala/io/rml/framework/core/extractors/LogicalTargetExtractor.scala b/src/main/scala/io/rml/framework/core/extractors/LogicalTargetExtractor.scala new file mode 100644 index 00000000..95424498 --- /dev/null +++ b/src/main/scala/io/rml/framework/core/extractors/LogicalTargetExtractor.scala @@ -0,0 +1,39 @@ +package io.rml.framework.core.extractors + +import io.rml.framework.core.extractors.std.StdLogicalTargetExtractor +import io.rml.framework.core.model.LogicalTarget + +/** + * MIT License + * + * Copyright (C) 2017 - 2021 RDF Mapping Language (RML) + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + * */ +trait LogicalTargetExtractor extends ResourceExtractor[Option[LogicalTarget]] + +object LogicalTargetExtractor { + + def apply(): LogicalTargetExtractor = { + lazy val extractor = new StdLogicalTargetExtractor + extractor + } + +} diff --git a/src/main/scala/io/rml/framework/core/extractors/std/StdLogicalTargetExtractor.scala b/src/main/scala/io/rml/framework/core/extractors/std/StdLogicalTargetExtractor.scala new file mode 100644 index 00000000..947ce80a --- /dev/null +++ b/src/main/scala/io/rml/framework/core/extractors/std/StdLogicalTargetExtractor.scala @@ -0,0 +1,43 @@ +package io.rml.framework.core.extractors.std + +import io.rml.framework.core.extractors.LogicalTargetExtractor +import io.rml.framework.core.internal.Logging +import io.rml.framework.core.model.LogicalTarget +import io.rml.framework.core.model.rdf.RDFResource + +/** + * MIT License + * + * Copyright (C) 2017 - 2021 RDF Mapping Language (RML) + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + * */ +class StdLogicalTargetExtractor extends LogicalTargetExtractor with Logging { + /** + * Extract. + * + * @param node Node to extract from. + * @return + */ + override def extract(node: RDFResource): Option[LogicalTarget] = { + None + // TODO here comes the real code + } +} diff --git a/src/main/scala/io/rml/framework/core/extractors/std/StdTriplesMapExtractor.scala b/src/main/scala/io/rml/framework/core/extractors/std/StdTriplesMapExtractor.scala index 16466eed..f8de7bc5 100644 --- a/src/main/scala/io/rml/framework/core/extractors/std/StdTriplesMapExtractor.scala +++ b/src/main/scala/io/rml/framework/core/extractors/std/StdTriplesMapExtractor.scala @@ -122,7 +122,8 @@ object StdTriplesMapExtractor extends TriplesMapExtractor with Logging { LogicalSourceExtractor().extract(resource), SubjectMapExtractor().extract(resource), resource.uri.value, - GraphMapExtractor().extract(resource) + GraphMapExtractor().extract(resource), + LogicalTargetExtractor().extract(resource) ) val t = TriplesMapsCache.put(resourceStr, triplesMap); Some(triplesMap) diff --git a/src/main/scala/io/rml/framework/core/model/FormattedRMLMapping.scala b/src/main/scala/io/rml/framework/core/model/FormattedRMLMapping.scala index 03f0c6a5..453dcfd8 100644 --- a/src/main/scala/io/rml/framework/core/model/FormattedRMLMapping.scala +++ b/src/main/scala/io/rml/framework/core/model/FormattedRMLMapping.scala @@ -139,7 +139,7 @@ object FormattedRMLMapping { }) // every new pom will have exactly one parent triple map, create a JoinedTriplesMap from these poms newPoms.map(pom => { - JoinedTriplesMap(TriplesMap(List(pom), triplesMap.logicalSource, triplesMap.subjectMap, triplesMap.identifier)) + JoinedTriplesMap(TriplesMap(List(pom), triplesMap.logicalSource, triplesMap.subjectMap, triplesMap.identifier, triplesMap.graphMap, triplesMap.logicalTarget)) }).toList } @@ -160,7 +160,7 @@ object FormattedRMLMapping { PredicateObjectMap(item._1.identifier, List(item._2), item._1.predicateMaps,item._1.graphMap) }) }) - TriplesMap(newPoms.toList, triplesMap.logicalSource, triplesMap.subjectMap, triplesMap.identifier) + TriplesMap(newPoms.toList, triplesMap.logicalSource, triplesMap.subjectMap, triplesMap.identifier, triplesMap.graphMap, triplesMap.logicalTarget) } } diff --git a/src/main/scala/io/rml/framework/core/model/LogicalTarget.scala b/src/main/scala/io/rml/framework/core/model/LogicalTarget.scala index 77fba697..9d4bbb2a 100644 --- a/src/main/scala/io/rml/framework/core/model/LogicalTarget.scala +++ b/src/main/scala/io/rml/framework/core/model/LogicalTarget.scala @@ -1,5 +1,9 @@ package io.rml.framework.core.model +import io.rml.framework.core.model.std.StdLogicalTarget + +import java.util.Objects + /** * MIT License * @@ -28,4 +32,24 @@ trait LogicalTarget extends Node { def target: DataSink + def serialization: Option[Uri] + + def compression: Option[Uri] + + override def identifier: String = { + val serHash = if (serialization.isDefined) serialization.get.identifier else "" + val compHash = if (compression.isDefined) compression.get.identifier else "" + Objects.hash(target.identifier, serHash, compHash).toHexString + } + +} + +object LogicalTarget { + def apply( + target: DataSink, + serialization: Option[Uri], + compression: Option[Uri] + ): LogicalTarget = { + StdLogicalTarget(target, serialization, compression) + } } diff --git a/src/main/scala/io/rml/framework/core/model/StreamTriplesMap.scala b/src/main/scala/io/rml/framework/core/model/StreamTriplesMap.scala index accd081e..1fea560c 100644 --- a/src/main/scala/io/rml/framework/core/model/StreamTriplesMap.scala +++ b/src/main/scala/io/rml/framework/core/model/StreamTriplesMap.scala @@ -63,6 +63,8 @@ abstract class StreamTriplesMap(triplesMap: TriplesMap) extends TriplesMap { */ override def graphMap = triplesMap.graphMap + override def logicalTarget: Option[LogicalTarget] = triplesMap.logicalTarget + } object StreamTriplesMap { diff --git a/src/main/scala/io/rml/framework/core/model/TriplesMap.scala b/src/main/scala/io/rml/framework/core/model/TriplesMap.scala index 112e0394..85eb5109 100644 --- a/src/main/scala/io/rml/framework/core/model/TriplesMap.scala +++ b/src/main/scala/io/rml/framework/core/model/TriplesMap.scala @@ -69,6 +69,8 @@ trait TriplesMap extends Node { */ def graphMap: Option[GraphMap] + def logicalTarget: Option[LogicalTarget] + /** * * @return @@ -91,13 +93,15 @@ object TriplesMap { logicalSource: LogicalSource, subjectMap: SubjectMap, identifier: String, - graphMap: Option[GraphMap] = None + graphMap: Option[GraphMap], + logicalTarget: Option[LogicalTarget] ): TriplesMap = { val triplesMap = StdTriplesMap(predicateObjectMaps, logicalSource, subjectMap, graphMap, + logicalTarget, identifier) if (logicalSource.source.isInstanceOf[StreamDataSource]) { diff --git a/src/main/scala/io/rml/framework/core/model/std/StdJoinedTriplesMap.scala b/src/main/scala/io/rml/framework/core/model/std/StdJoinedTriplesMap.scala index 4ecf7507..31b420e7 100644 --- a/src/main/scala/io/rml/framework/core/model/std/StdJoinedTriplesMap.scala +++ b/src/main/scala/io/rml/framework/core/model/std/StdJoinedTriplesMap.scala @@ -24,7 +24,7 @@ **/ package io.rml.framework.core.model.std -import io.rml.framework.core.model.{JoinedTriplesMap, TriplesMap} +import io.rml.framework.core.model.{JoinedTriplesMap, LogicalTarget, TriplesMap} case class StdJoinedTriplesMap(triplesMap: TriplesMap) extends JoinedTriplesMap(triplesMap) { /** @@ -51,11 +51,13 @@ case class StdJoinedTriplesMap(triplesMap: TriplesMap) extends JoinedTriplesMap( */ override def containsParentTriplesMap = triplesMap.containsParentTriplesMap - override def identifier(): String = triplesMap.identifier + override def identifier: String = triplesMap.identifier /** * * @return */ override def graphMap = ??? + + override def logicalTarget: Option[LogicalTarget] = triplesMap.logicalTarget } diff --git a/src/main/scala/io/rml/framework/core/model/std/StdLogicalTarget.scala b/src/main/scala/io/rml/framework/core/model/std/StdLogicalTarget.scala new file mode 100644 index 00000000..d6fa8854 --- /dev/null +++ b/src/main/scala/io/rml/framework/core/model/std/StdLogicalTarget.scala @@ -0,0 +1,31 @@ +package io.rml.framework.core.model.std + +import io.rml.framework.core.model.{DataSink, LogicalTarget, Uri} + +/** + * MIT License + * + * Copyright (C) 2017 - 2021 RDF Mapping Language (RML) + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + * */ +case class StdLogicalTarget(target: DataSink, + serialization: Option[Uri], + compression: Option[Uri]) extends LogicalTarget diff --git a/src/main/scala/io/rml/framework/core/model/std/StdParentTriplesMap.scala b/src/main/scala/io/rml/framework/core/model/std/StdParentTriplesMap.scala index 45f1f1ed..424a9f39 100644 --- a/src/main/scala/io/rml/framework/core/model/std/StdParentTriplesMap.scala +++ b/src/main/scala/io/rml/framework/core/model/std/StdParentTriplesMap.scala @@ -24,7 +24,7 @@ **/ package io.rml.framework.core.model.std -import io.rml.framework.core.model.{ParentTriplesMap, TriplesMap} +import io.rml.framework.core.model.{LogicalTarget, ParentTriplesMap, TriplesMap} case class StdParentTriplesMap(triplesMap: TriplesMap) extends ParentTriplesMap { /** @@ -59,4 +59,6 @@ case class StdParentTriplesMap(triplesMap: TriplesMap) extends ParentTriplesMap * @return */ override def graphMap = ??? + + override def logicalTarget: Option[LogicalTarget] = triplesMap.logicalTarget } diff --git a/src/main/scala/io/rml/framework/core/model/std/StdTriplesMap.scala b/src/main/scala/io/rml/framework/core/model/std/StdTriplesMap.scala index 8b6766b3..80de2010 100644 --- a/src/main/scala/io/rml/framework/core/model/std/StdTriplesMap.scala +++ b/src/main/scala/io/rml/framework/core/model/std/StdTriplesMap.scala @@ -31,6 +31,7 @@ case class StdTriplesMap(predicateObjectMaps: List[PredicateObjectMap], logicalSource: LogicalSource, subjectMap: SubjectMap, graphMap: Option[GraphMap], + logicalTarget: Option[LogicalTarget], identifier: String) extends TriplesMap { /** * From 4043cddc766c36c9f579a948c188bac9d7f0fa83 Mon Sep 17 00:00:00 2001 From: Michiel Derveeuw Date: Wed, 24 Mar 2021 11:16:30 +0100 Subject: [PATCH 22/83] Update flink to 1.12.2 --- pom.xml | 2 +- src/test/scala/io/rml/framework/StreamTestSync.scala | 1 + src/test/scala/io/rml/framework/util/server/TestSink2.scala | 2 +- 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/pom.xml b/pom.xml index e7f4798a..207b4ae5 100644 --- a/pom.xml +++ b/pom.xml @@ -36,7 +36,7 @@ SOFTWARE. UTF-8 - 1.11.3 + 1.12.2 1.7.26 2.13.3 3.11.0 diff --git a/src/test/scala/io/rml/framework/StreamTestSync.scala b/src/test/scala/io/rml/framework/StreamTestSync.scala index 23bc5e19..107b245f 100644 --- a/src/test/scala/io/rml/framework/StreamTestSync.scala +++ b/src/test/scala/io/rml/framework/StreamTestSync.scala @@ -202,6 +202,7 @@ abstract class StreamTestSync extends StaticTestSpec with ReadMappingBehaviour w val customConfig = new Configuration() customConfig.setString("io.tmp.dirs", getTempDir.getAbsolutePath) customConfig.setString("rest.bind-port", "50000-51000") // see https://github.com/apache/flink/commit/730eed71ef3f718d61f85d5e94b1060844ca56db + customConfig.setString("classloader.check-leaked-classloader", "false") // this option is to required to fix strange issue related to class loading, see discussion: https://gitlab.ilabt.imec.be/rml/proc/rml-streamer/-/issues/121 val configuration = new MiniClusterConfiguration.Builder() .setConfiguration(customConfig) diff --git a/src/test/scala/io/rml/framework/util/server/TestSink2.scala b/src/test/scala/io/rml/framework/util/server/TestSink2.scala index 191f4165..caae76c1 100644 --- a/src/test/scala/io/rml/framework/util/server/TestSink2.scala +++ b/src/test/scala/io/rml/framework/util/server/TestSink2.scala @@ -47,7 +47,7 @@ object TestSink2 extends SinkFunction[String] { class TestSink2 extends SinkFunction[String] { import TestSink2._ - override def invoke(value: String, context: SinkFunction.Context[_]): Unit = { + override def invoke(value: String, context: SinkFunction.Context): Unit = { Logger.logInfo(s"TestSink2: got value [${value}]") if (value.trim.nonEmpty) { triples = value :: triples From 9f20234317f62cf45aa569d77a0704d63b0e5163 Mon Sep 17 00:00:00 2001 From: Michiel Derveeuw Date: Wed, 24 Mar 2021 11:47:51 +0100 Subject: [PATCH 23/83] Update docker and documentation flink version to 1.12.2 --- README.md | 6 +++--- docker/docker-compose.yml | 4 ++-- documentation/README_Functions.md | 10 +++++----- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/README.md b/README.md index 2f7cde90..c4692351 100644 --- a/README.md +++ b/README.md @@ -15,9 +15,9 @@ If you want to deploy it yourself, read on. ### Installing Flink RMLStreamer runs its jobs on Flink clusters. -More information on how to install Flink and getting started can be found [here](https://ci.apache.org/projects/flink/flink-docs-release-1.11/try-flink/local_installation.html). +More information on how to install Flink and getting started can be found [here](https://ci.apache.org/projects/flink/flink-docs-release-1.12/try-flink/local_installation.html). At least a local cluster must be running in order to start executing RML Mappings with RMLStreamer. -Please note that this version works with Flink 1.11.3 with Scala 2.11 support, which can be downloaded [here](https://archive.apache.org/dist/flink/flink-1.11.3/flink-1.11.3-bin-scala_2.11.tgz). +Please note that this version works with Flink 1.12.2 with Scala 2.11 support, which can be downloaded [here](https://archive.apache.org/dist/flink/flink-1.12.2/flink-1.12.2-bin-scala_2.11.tgz). ### Building RMLStreamer @@ -46,7 +46,7 @@ The resulting `RMLStreamer-.jar`, found in the `target` folder, can be ### Executing RML Mappings Here we give examples for running RMLStreamer from the command line. We use `FLINK_BIN` to denote the Flink CLI tool, -usually found in the `bin` directory of the Flink installation. E.g. `/home/myuser/flink-1.11.3/bin/flink`. +usually found in the `bin` directory of the Flink installation. E.g. `/home/myuser/flink-1.12.2/bin/flink`. For Windows a `flink.bat` script is provided. The general usage is: diff --git a/docker/docker-compose.yml b/docker/docker-compose.yml index fb61a26c..5e0d9d3c 100644 --- a/docker/docker-compose.yml +++ b/docker/docker-compose.yml @@ -2,7 +2,7 @@ version: '3' services: jobmanager: - image: flink:1.11.3-scala_2.11 + image: flink:1.12.2-scala_2.11 expose: - "6123" ports: @@ -14,7 +14,7 @@ services: - data:/mnt/data taskmanager: - image: flink:1.11.3-scala_2.11 + image: flink:1.12.2-scala_2.11 expose: - "6121" - "6122" diff --git a/documentation/README_Functions.md b/documentation/README_Functions.md index d53ac3bf..7a2e75d3 100644 --- a/documentation/README_Functions.md +++ b/documentation/README_Functions.md @@ -15,13 +15,13 @@ These files can be obtained from `src/main/resources`: ## Example: RML Streamer + Flink Flink's `lib` directory should contain the jar-files with the custom functions. In this example, these are marked with `*` ``` -flink-1.11.2-scala_2.11 +flink-1.12.2-scala_2.11 └── lib ├── GrelFunctions.jar * ├── IDLabFunctions.jar * - ├── flink-dist_2.11-1.11.2.jar - ├── flink-table-blink_2.11-1.11.2.jar - ├── flink-table_2.11-1.11.2.jar + ├── flink-dist_2.11-1.12.2.jar + ├── flink-table-blink_2.11-1.12.2.jar + ├── flink-table_2.11-1.12.2.jar ├── log4j-1.2.17.jar └── slf4j-log4j12-1.7.15.jar ``` @@ -40,7 +40,7 @@ Note that the function descriptions and function mappings are present. The command for running the RML Streamer on Flink should look like ``` -~/flink/flink-1.11.2-scala_2.11/bin/flink run -c io.rml.framework.Main RMLStreamer-2.0.1-SNAPSHOT.jar toFile --output-path $(pwd)'/out.ttl' -m mapping.ttl +~/flink/flink-1.12.2-scala_2.11/bin/flink run -c io.rml.framework.Main RMLStreamer-2.0.1-SNAPSHOT.jar toFile --output-path $(pwd)'/out.ttl' -m mapping.ttl ``` ## Test Cases From f1741cef5a5a3c7e9eecbad8a3872fe0ad590776 Mon Sep 17 00:00:00 2001 From: Gerald Haesendonck Date: Fri, 26 Mar 2021 06:18:40 +0100 Subject: [PATCH 24/83] Revert "Update docker and documentation flink version to 1.12.2" This reverts commit 9f202343 --- README.md | 6 +++--- docker/docker-compose.yml | 4 ++-- documentation/README_Functions.md | 10 +++++----- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/README.md b/README.md index c4692351..2f7cde90 100644 --- a/README.md +++ b/README.md @@ -15,9 +15,9 @@ If you want to deploy it yourself, read on. ### Installing Flink RMLStreamer runs its jobs on Flink clusters. -More information on how to install Flink and getting started can be found [here](https://ci.apache.org/projects/flink/flink-docs-release-1.12/try-flink/local_installation.html). +More information on how to install Flink and getting started can be found [here](https://ci.apache.org/projects/flink/flink-docs-release-1.11/try-flink/local_installation.html). At least a local cluster must be running in order to start executing RML Mappings with RMLStreamer. -Please note that this version works with Flink 1.12.2 with Scala 2.11 support, which can be downloaded [here](https://archive.apache.org/dist/flink/flink-1.12.2/flink-1.12.2-bin-scala_2.11.tgz). +Please note that this version works with Flink 1.11.3 with Scala 2.11 support, which can be downloaded [here](https://archive.apache.org/dist/flink/flink-1.11.3/flink-1.11.3-bin-scala_2.11.tgz). ### Building RMLStreamer @@ -46,7 +46,7 @@ The resulting `RMLStreamer-.jar`, found in the `target` folder, can be ### Executing RML Mappings Here we give examples for running RMLStreamer from the command line. We use `FLINK_BIN` to denote the Flink CLI tool, -usually found in the `bin` directory of the Flink installation. E.g. `/home/myuser/flink-1.12.2/bin/flink`. +usually found in the `bin` directory of the Flink installation. E.g. `/home/myuser/flink-1.11.3/bin/flink`. For Windows a `flink.bat` script is provided. The general usage is: diff --git a/docker/docker-compose.yml b/docker/docker-compose.yml index 5e0d9d3c..fb61a26c 100644 --- a/docker/docker-compose.yml +++ b/docker/docker-compose.yml @@ -2,7 +2,7 @@ version: '3' services: jobmanager: - image: flink:1.12.2-scala_2.11 + image: flink:1.11.3-scala_2.11 expose: - "6123" ports: @@ -14,7 +14,7 @@ services: - data:/mnt/data taskmanager: - image: flink:1.12.2-scala_2.11 + image: flink:1.11.3-scala_2.11 expose: - "6121" - "6122" diff --git a/documentation/README_Functions.md b/documentation/README_Functions.md index 7a2e75d3..d53ac3bf 100644 --- a/documentation/README_Functions.md +++ b/documentation/README_Functions.md @@ -15,13 +15,13 @@ These files can be obtained from `src/main/resources`: ## Example: RML Streamer + Flink Flink's `lib` directory should contain the jar-files with the custom functions. In this example, these are marked with `*` ``` -flink-1.12.2-scala_2.11 +flink-1.11.2-scala_2.11 └── lib ├── GrelFunctions.jar * ├── IDLabFunctions.jar * - ├── flink-dist_2.11-1.12.2.jar - ├── flink-table-blink_2.11-1.12.2.jar - ├── flink-table_2.11-1.12.2.jar + ├── flink-dist_2.11-1.11.2.jar + ├── flink-table-blink_2.11-1.11.2.jar + ├── flink-table_2.11-1.11.2.jar ├── log4j-1.2.17.jar └── slf4j-log4j12-1.7.15.jar ``` @@ -40,7 +40,7 @@ Note that the function descriptions and function mappings are present. The command for running the RML Streamer on Flink should look like ``` -~/flink/flink-1.12.2-scala_2.11/bin/flink run -c io.rml.framework.Main RMLStreamer-2.0.1-SNAPSHOT.jar toFile --output-path $(pwd)'/out.ttl' -m mapping.ttl +~/flink/flink-1.11.2-scala_2.11/bin/flink run -c io.rml.framework.Main RMLStreamer-2.0.1-SNAPSHOT.jar toFile --output-path $(pwd)'/out.ttl' -m mapping.ttl ``` ## Test Cases From 73f4a549ef46af820ac44783afe442809dc7f11b Mon Sep 17 00:00:00 2001 From: Gerald Haesendonck Date: Fri, 26 Mar 2021 06:19:49 +0100 Subject: [PATCH 25/83] Revert "Update flink to 1.12.2" This reverts commit 4043cddc --- pom.xml | 2 +- src/test/scala/io/rml/framework/StreamTestSync.scala | 1 - src/test/scala/io/rml/framework/util/server/TestSink2.scala | 2 +- 3 files changed, 2 insertions(+), 3 deletions(-) diff --git a/pom.xml b/pom.xml index 207b4ae5..e7f4798a 100644 --- a/pom.xml +++ b/pom.xml @@ -36,7 +36,7 @@ SOFTWARE. UTF-8 - 1.12.2 + 1.11.3 1.7.26 2.13.3 3.11.0 diff --git a/src/test/scala/io/rml/framework/StreamTestSync.scala b/src/test/scala/io/rml/framework/StreamTestSync.scala index 107b245f..23bc5e19 100644 --- a/src/test/scala/io/rml/framework/StreamTestSync.scala +++ b/src/test/scala/io/rml/framework/StreamTestSync.scala @@ -202,7 +202,6 @@ abstract class StreamTestSync extends StaticTestSpec with ReadMappingBehaviour w val customConfig = new Configuration() customConfig.setString("io.tmp.dirs", getTempDir.getAbsolutePath) customConfig.setString("rest.bind-port", "50000-51000") // see https://github.com/apache/flink/commit/730eed71ef3f718d61f85d5e94b1060844ca56db - customConfig.setString("classloader.check-leaked-classloader", "false") // this option is to required to fix strange issue related to class loading, see discussion: https://gitlab.ilabt.imec.be/rml/proc/rml-streamer/-/issues/121 val configuration = new MiniClusterConfiguration.Builder() .setConfiguration(customConfig) diff --git a/src/test/scala/io/rml/framework/util/server/TestSink2.scala b/src/test/scala/io/rml/framework/util/server/TestSink2.scala index caae76c1..191f4165 100644 --- a/src/test/scala/io/rml/framework/util/server/TestSink2.scala +++ b/src/test/scala/io/rml/framework/util/server/TestSink2.scala @@ -47,7 +47,7 @@ object TestSink2 extends SinkFunction[String] { class TestSink2 extends SinkFunction[String] { import TestSink2._ - override def invoke(value: String, context: SinkFunction.Context): Unit = { + override def invoke(value: String, context: SinkFunction.Context[_]): Unit = { Logger.logInfo(s"TestSink2: got value [${value}]") if (value.trim.nonEmpty) { triples = value :: triples From c5633746a12609f33a77dbb714a715715b2a1ad6 Mon Sep 17 00:00:00 2001 From: Gerald Haesendonck Date: Fri, 26 Mar 2021 06:01:42 +0000 Subject: [PATCH 26/83] Revert "Update flink version to 1.12.2" This reverts commit 9f202343 --- README.md | 6 +++--- docker/docker-compose.yml | 4 ++-- documentation/README_Functions.md | 10 +++++----- pom.xml | 2 +- src/test/scala/io/rml/framework/StreamTestSync.scala | 1 - .../scala/io/rml/framework/util/server/TestSink2.scala | 2 +- 6 files changed, 12 insertions(+), 13 deletions(-) diff --git a/README.md b/README.md index c4692351..2f7cde90 100644 --- a/README.md +++ b/README.md @@ -15,9 +15,9 @@ If you want to deploy it yourself, read on. ### Installing Flink RMLStreamer runs its jobs on Flink clusters. -More information on how to install Flink and getting started can be found [here](https://ci.apache.org/projects/flink/flink-docs-release-1.12/try-flink/local_installation.html). +More information on how to install Flink and getting started can be found [here](https://ci.apache.org/projects/flink/flink-docs-release-1.11/try-flink/local_installation.html). At least a local cluster must be running in order to start executing RML Mappings with RMLStreamer. -Please note that this version works with Flink 1.12.2 with Scala 2.11 support, which can be downloaded [here](https://archive.apache.org/dist/flink/flink-1.12.2/flink-1.12.2-bin-scala_2.11.tgz). +Please note that this version works with Flink 1.11.3 with Scala 2.11 support, which can be downloaded [here](https://archive.apache.org/dist/flink/flink-1.11.3/flink-1.11.3-bin-scala_2.11.tgz). ### Building RMLStreamer @@ -46,7 +46,7 @@ The resulting `RMLStreamer-.jar`, found in the `target` folder, can be ### Executing RML Mappings Here we give examples for running RMLStreamer from the command line. We use `FLINK_BIN` to denote the Flink CLI tool, -usually found in the `bin` directory of the Flink installation. E.g. `/home/myuser/flink-1.12.2/bin/flink`. +usually found in the `bin` directory of the Flink installation. E.g. `/home/myuser/flink-1.11.3/bin/flink`. For Windows a `flink.bat` script is provided. The general usage is: diff --git a/docker/docker-compose.yml b/docker/docker-compose.yml index 5e0d9d3c..fb61a26c 100644 --- a/docker/docker-compose.yml +++ b/docker/docker-compose.yml @@ -2,7 +2,7 @@ version: '3' services: jobmanager: - image: flink:1.12.2-scala_2.11 + image: flink:1.11.3-scala_2.11 expose: - "6123" ports: @@ -14,7 +14,7 @@ services: - data:/mnt/data taskmanager: - image: flink:1.12.2-scala_2.11 + image: flink:1.11.3-scala_2.11 expose: - "6121" - "6122" diff --git a/documentation/README_Functions.md b/documentation/README_Functions.md index 7a2e75d3..d53ac3bf 100644 --- a/documentation/README_Functions.md +++ b/documentation/README_Functions.md @@ -15,13 +15,13 @@ These files can be obtained from `src/main/resources`: ## Example: RML Streamer + Flink Flink's `lib` directory should contain the jar-files with the custom functions. In this example, these are marked with `*` ``` -flink-1.12.2-scala_2.11 +flink-1.11.2-scala_2.11 └── lib ├── GrelFunctions.jar * ├── IDLabFunctions.jar * - ├── flink-dist_2.11-1.12.2.jar - ├── flink-table-blink_2.11-1.12.2.jar - ├── flink-table_2.11-1.12.2.jar + ├── flink-dist_2.11-1.11.2.jar + ├── flink-table-blink_2.11-1.11.2.jar + ├── flink-table_2.11-1.11.2.jar ├── log4j-1.2.17.jar └── slf4j-log4j12-1.7.15.jar ``` @@ -40,7 +40,7 @@ Note that the function descriptions and function mappings are present. The command for running the RML Streamer on Flink should look like ``` -~/flink/flink-1.12.2-scala_2.11/bin/flink run -c io.rml.framework.Main RMLStreamer-2.0.1-SNAPSHOT.jar toFile --output-path $(pwd)'/out.ttl' -m mapping.ttl +~/flink/flink-1.11.2-scala_2.11/bin/flink run -c io.rml.framework.Main RMLStreamer-2.0.1-SNAPSHOT.jar toFile --output-path $(pwd)'/out.ttl' -m mapping.ttl ``` ## Test Cases diff --git a/pom.xml b/pom.xml index 207b4ae5..e7f4798a 100644 --- a/pom.xml +++ b/pom.xml @@ -36,7 +36,7 @@ SOFTWARE. UTF-8 - 1.12.2 + 1.11.3 1.7.26 2.13.3 3.11.0 diff --git a/src/test/scala/io/rml/framework/StreamTestSync.scala b/src/test/scala/io/rml/framework/StreamTestSync.scala index 107b245f..23bc5e19 100644 --- a/src/test/scala/io/rml/framework/StreamTestSync.scala +++ b/src/test/scala/io/rml/framework/StreamTestSync.scala @@ -202,7 +202,6 @@ abstract class StreamTestSync extends StaticTestSpec with ReadMappingBehaviour w val customConfig = new Configuration() customConfig.setString("io.tmp.dirs", getTempDir.getAbsolutePath) customConfig.setString("rest.bind-port", "50000-51000") // see https://github.com/apache/flink/commit/730eed71ef3f718d61f85d5e94b1060844ca56db - customConfig.setString("classloader.check-leaked-classloader", "false") // this option is to required to fix strange issue related to class loading, see discussion: https://gitlab.ilabt.imec.be/rml/proc/rml-streamer/-/issues/121 val configuration = new MiniClusterConfiguration.Builder() .setConfiguration(customConfig) diff --git a/src/test/scala/io/rml/framework/util/server/TestSink2.scala b/src/test/scala/io/rml/framework/util/server/TestSink2.scala index caae76c1..191f4165 100644 --- a/src/test/scala/io/rml/framework/util/server/TestSink2.scala +++ b/src/test/scala/io/rml/framework/util/server/TestSink2.scala @@ -47,7 +47,7 @@ object TestSink2 extends SinkFunction[String] { class TestSink2 extends SinkFunction[String] { import TestSink2._ - override def invoke(value: String, context: SinkFunction.Context): Unit = { + override def invoke(value: String, context: SinkFunction.Context[_]): Unit = { Logger.logInfo(s"TestSink2: got value [${value}]") if (value.trim.nonEmpty) { triples = value :: triples From 3a14dd60bbfb4a868226a7f799b75896c2c68424 Mon Sep 17 00:00:00 2001 From: Michiel Derveeuw Date: Wed, 31 Mar 2021 16:53:07 +0200 Subject: [PATCH 27/83] Update and fix class loading issue --- pom.xml | 2 +- .../scala/io/rml/framework/KafkaStreamTestSyncFnO.scala | 4 ++-- src/test/scala/io/rml/framework/SandboxTests.scala | 3 ++- src/test/scala/io/rml/framework/StreamTestSync.scala | 6 ++++-- src/test/scala/io/rml/framework/TCPStreamTestSync.scala | 5 ++++- src/test/scala/io/rml/framework/TCPStreamTestSyncFnO.scala | 3 --- .../rml/framework/util/fileprocessing/TestFilesUtil.scala | 3 ++- src/test/scala/io/rml/framework/util/server/TestSink2.scala | 2 +- 8 files changed, 16 insertions(+), 12 deletions(-) diff --git a/pom.xml b/pom.xml index e7f4798a..207b4ae5 100644 --- a/pom.xml +++ b/pom.xml @@ -36,7 +36,7 @@ SOFTWARE. UTF-8 - 1.11.3 + 1.12.2 1.7.26 2.13.3 3.11.0 diff --git a/src/test/scala/io/rml/framework/KafkaStreamTestSyncFnO.scala b/src/test/scala/io/rml/framework/KafkaStreamTestSyncFnO.scala index 93303a3d..dddb9d84 100644 --- a/src/test/scala/io/rml/framework/KafkaStreamTestSyncFnO.scala +++ b/src/test/scala/io/rml/framework/KafkaStreamTestSyncFnO.scala @@ -77,7 +77,7 @@ class KafkaStreamTestSyncFnO extends StreamTestSync{ } - override def beforeTestCase(): Unit = { +// override def beforeTestCase(): Unit = { // topics seem to be created automatically... /*logInfo("Creating Kafka input topic...") @@ -89,7 +89,7 @@ class KafkaStreamTestSyncFnO extends StreamTestSync{ createTopicsResult.all().get() // wait for completion of creating topics Thread.sleep(2000) logInfo("Creating Kafka input topic done.") */ - } +// } override def afterTestCase(): Unit = { logInfo("Deleting Kafka input topic(s)...") diff --git a/src/test/scala/io/rml/framework/SandboxTests.scala b/src/test/scala/io/rml/framework/SandboxTests.scala index 1628b94a..c869ea3f 100644 --- a/src/test/scala/io/rml/framework/SandboxTests.scala +++ b/src/test/scala/io/rml/framework/SandboxTests.scala @@ -24,7 +24,7 @@ **/ package io.rml.framework -import io.rml.framework.api.RMLEnvironment +import io.rml.framework.api.{FnOEnvironment, RMLEnvironment} import io.rml.framework.core.extractors.TriplesMapsCache import io.rml.framework.core.util.Util import io.rml.framework.engine.NopPostProcessor @@ -42,6 +42,7 @@ class SandboxTests extends FunSuite with Matchers with FunctionMappingTest { private def executeTest(mappingFile: String): Unit = { TriplesMapsCache.clear(); + FnOEnvironment.loadedClassesMap.clear() RMLEnvironment.setGeneratorBaseIRI(Some("http://example.org/base/")) implicit val env = ExecutionEnvironment.getExecutionEnvironment implicit val senv = StreamExecutionEnvironment.getExecutionEnvironment diff --git a/src/test/scala/io/rml/framework/StreamTestSync.scala b/src/test/scala/io/rml/framework/StreamTestSync.scala index 23bc5e19..8bb5b2bf 100644 --- a/src/test/scala/io/rml/framework/StreamTestSync.scala +++ b/src/test/scala/io/rml/framework/StreamTestSync.scala @@ -24,7 +24,7 @@ **/ package io.rml.framework -import io.rml.framework.api.RMLEnvironment +import io.rml.framework.api.{FnOEnvironment, RMLEnvironment} import io.rml.framework.core.extractors.TriplesMapsCache import io.rml.framework.core.internal.Logging import io.rml.framework.core.util.{StreamerConfig, Util} @@ -73,7 +73,9 @@ abstract class StreamTestSync extends StaticTestSpec with ReadMappingBehaviour w } // Things to do before running one test case - protected def beforeTestCase(): Unit + protected def beforeTestCase(): Unit = { + FnOEnvironment.loadedClassesMap.clear() + } // Things to do after running one test case protected def afterTestCase(): Unit diff --git a/src/test/scala/io/rml/framework/TCPStreamTestSync.scala b/src/test/scala/io/rml/framework/TCPStreamTestSync.scala index eb36977f..bcf1b3c6 100644 --- a/src/test/scala/io/rml/framework/TCPStreamTestSync.scala +++ b/src/test/scala/io/rml/framework/TCPStreamTestSync.scala @@ -34,6 +34,7 @@ import io.netty.channel.socket.SocketChannel import io.netty.channel.socket.nio.NioServerSocketChannel import io.netty.channel.{ChannelFuture, ChannelHandlerContext, ChannelInboundHandlerAdapter, ChannelInitializer} import io.netty.util.{CharsetUtil, ReferenceCountUtil} +import io.rml.framework.api.FnOEnvironment import io.rml.framework.util.logging.Logger import io.rml.framework.util.server.TestData @@ -78,7 +79,9 @@ class TCPStreamTestSync extends StreamTestSync { } - override def beforeTestCase(): Unit = {} + override def beforeTestCase(): Unit = { + FnOEnvironment.loadedClassesMap.clear() + } override def afterTestCase(): Unit = {} diff --git a/src/test/scala/io/rml/framework/TCPStreamTestSyncFnO.scala b/src/test/scala/io/rml/framework/TCPStreamTestSyncFnO.scala index 32a1a8f7..9b0d7e95 100644 --- a/src/test/scala/io/rml/framework/TCPStreamTestSyncFnO.scala +++ b/src/test/scala/io/rml/framework/TCPStreamTestSyncFnO.scala @@ -27,7 +27,6 @@ package io.rml.framework import java.net.InetSocketAddress import java.nio.charset.StandardCharsets import java.util.concurrent.TimeUnit - import io.netty.bootstrap.ServerBootstrap import io.netty.buffer.ByteBuf import io.netty.channel.nio.NioEventLoopGroup @@ -77,8 +76,6 @@ class TCPStreamTestSyncFnO extends StreamTestSync { } - override def beforeTestCase(): Unit = {} - override def afterTestCase(): Unit = {} override def teardown(): Unit = { diff --git a/src/test/scala/io/rml/framework/util/fileprocessing/TestFilesUtil.scala b/src/test/scala/io/rml/framework/util/fileprocessing/TestFilesUtil.scala index 12224f70..9f2c6509 100644 --- a/src/test/scala/io/rml/framework/util/fileprocessing/TestFilesUtil.scala +++ b/src/test/scala/io/rml/framework/util/fileprocessing/TestFilesUtil.scala @@ -24,6 +24,7 @@ **/ package io.rml.framework.util.fileprocessing +import io.rml.framework.api.FnOEnvironment import io.rml.framework.core.util.Util import io.rml.framework.util.logging.Logger @@ -74,7 +75,7 @@ trait TestFilesUtil[R] { def test(rootDir: String, shouldPass: Boolean, checkFunc: (String, Boolean) => Unit): Unit = { var checkedTestCases = Array("") for (pathString <- getTestCaseFolders(rootDir).map(_.toString).sorted) { - + FnOEnvironment.loadedClassesMap.clear() checkFunc(pathString, shouldPass) val testCase = new File(pathString).getName Logger.logSuccess("Passed processing: " + testCase) diff --git a/src/test/scala/io/rml/framework/util/server/TestSink2.scala b/src/test/scala/io/rml/framework/util/server/TestSink2.scala index 191f4165..caae76c1 100644 --- a/src/test/scala/io/rml/framework/util/server/TestSink2.scala +++ b/src/test/scala/io/rml/framework/util/server/TestSink2.scala @@ -47,7 +47,7 @@ object TestSink2 extends SinkFunction[String] { class TestSink2 extends SinkFunction[String] { import TestSink2._ - override def invoke(value: String, context: SinkFunction.Context[_]): Unit = { + override def invoke(value: String, context: SinkFunction.Context): Unit = { Logger.logInfo(s"TestSink2: got value [${value}]") if (value.trim.nonEmpty) { triples = value :: triples From 68511542adc289233708ff5e5a6c627116adecf7 Mon Sep 17 00:00:00 2001 From: Gerald Haesendonck Date: Tue, 6 Apr 2021 11:33:16 +0200 Subject: [PATCH 28/83] WIP: a logical target may occur zero or more times --- .../extractors/LogicalTargetExtractor.scala | 2 +- .../std/StdLogicalTargetExtractor.scala | 18 ++++++++++++++++-- 2 files changed, 17 insertions(+), 3 deletions(-) diff --git a/src/main/scala/io/rml/framework/core/extractors/LogicalTargetExtractor.scala b/src/main/scala/io/rml/framework/core/extractors/LogicalTargetExtractor.scala index 95424498..f2591b15 100644 --- a/src/main/scala/io/rml/framework/core/extractors/LogicalTargetExtractor.scala +++ b/src/main/scala/io/rml/framework/core/extractors/LogicalTargetExtractor.scala @@ -27,7 +27,7 @@ import io.rml.framework.core.model.LogicalTarget * THE SOFTWARE. * * */ -trait LogicalTargetExtractor extends ResourceExtractor[Option[LogicalTarget]] +trait LogicalTargetExtractor extends ResourceExtractor[List[LogicalTarget]] object LogicalTargetExtractor { diff --git a/src/main/scala/io/rml/framework/core/extractors/std/StdLogicalTargetExtractor.scala b/src/main/scala/io/rml/framework/core/extractors/std/StdLogicalTargetExtractor.scala index 947ce80a..142388d7 100644 --- a/src/main/scala/io/rml/framework/core/extractors/std/StdLogicalTargetExtractor.scala +++ b/src/main/scala/io/rml/framework/core/extractors/std/StdLogicalTargetExtractor.scala @@ -4,6 +4,9 @@ import io.rml.framework.core.extractors.LogicalTargetExtractor import io.rml.framework.core.internal.Logging import io.rml.framework.core.model.LogicalTarget import io.rml.framework.core.model.rdf.RDFResource +import io.rml.framework.core.vocabulary.RMLVoc + +import scala.collection.mutable.ListBuffer /** * MIT License @@ -36,8 +39,19 @@ class StdLogicalTargetExtractor extends LogicalTargetExtractor with Logging { * @param node Node to extract from. * @return */ - override def extract(node: RDFResource): Option[LogicalTarget] = { - None + override def extract(node: RDFResource): List[LogicalTarget] = { + logDebug(s"Extracting logical target: ${node.uri}") + + var result = new ListBuffer[LogicalTarget] + + val properties = node.listProperties(RMLVoc.Property.LOGICALTARGET) + properties.foreach(logicalTargetResource => { + /*logicalTargetResource match { + case resource: RDFResource + }*/ + }) + + result.toList // TODO here comes the real code } } From c5d2389a64110ef7fbb25b16b7fef1576ad9277d Mon Sep 17 00:00:00 2001 From: Gerald Haesendonck Date: Tue, 6 Apr 2021 11:34:19 +0200 Subject: [PATCH 29/83] Revert "WIP: boiler plate code for extracting a logical target from triples map. To do: add extraction for term maps, implement actual logic." This reverts commit 02e13947 --- .../std/StdTriplesMapExtractor.scala | 3 +- .../core/model/FormattedRMLMapping.scala | 4 +-- .../framework/core/model/LogicalTarget.scala | 24 -------------- .../core/model/StreamTriplesMap.scala | 2 -- .../rml/framework/core/model/TriplesMap.scala | 6 +--- .../core/model/std/StdJoinedTriplesMap.scala | 6 ++-- .../core/model/std/StdLogicalTarget.scala | 31 ------------------- .../core/model/std/StdParentTriplesMap.scala | 4 +-- .../core/model/std/StdTriplesMap.scala | 1 - 9 files changed, 7 insertions(+), 74 deletions(-) delete mode 100644 src/main/scala/io/rml/framework/core/model/std/StdLogicalTarget.scala diff --git a/src/main/scala/io/rml/framework/core/extractors/std/StdTriplesMapExtractor.scala b/src/main/scala/io/rml/framework/core/extractors/std/StdTriplesMapExtractor.scala index f8de7bc5..16466eed 100644 --- a/src/main/scala/io/rml/framework/core/extractors/std/StdTriplesMapExtractor.scala +++ b/src/main/scala/io/rml/framework/core/extractors/std/StdTriplesMapExtractor.scala @@ -122,8 +122,7 @@ object StdTriplesMapExtractor extends TriplesMapExtractor with Logging { LogicalSourceExtractor().extract(resource), SubjectMapExtractor().extract(resource), resource.uri.value, - GraphMapExtractor().extract(resource), - LogicalTargetExtractor().extract(resource) + GraphMapExtractor().extract(resource) ) val t = TriplesMapsCache.put(resourceStr, triplesMap); Some(triplesMap) diff --git a/src/main/scala/io/rml/framework/core/model/FormattedRMLMapping.scala b/src/main/scala/io/rml/framework/core/model/FormattedRMLMapping.scala index 453dcfd8..03f0c6a5 100644 --- a/src/main/scala/io/rml/framework/core/model/FormattedRMLMapping.scala +++ b/src/main/scala/io/rml/framework/core/model/FormattedRMLMapping.scala @@ -139,7 +139,7 @@ object FormattedRMLMapping { }) // every new pom will have exactly one parent triple map, create a JoinedTriplesMap from these poms newPoms.map(pom => { - JoinedTriplesMap(TriplesMap(List(pom), triplesMap.logicalSource, triplesMap.subjectMap, triplesMap.identifier, triplesMap.graphMap, triplesMap.logicalTarget)) + JoinedTriplesMap(TriplesMap(List(pom), triplesMap.logicalSource, triplesMap.subjectMap, triplesMap.identifier)) }).toList } @@ -160,7 +160,7 @@ object FormattedRMLMapping { PredicateObjectMap(item._1.identifier, List(item._2), item._1.predicateMaps,item._1.graphMap) }) }) - TriplesMap(newPoms.toList, triplesMap.logicalSource, triplesMap.subjectMap, triplesMap.identifier, triplesMap.graphMap, triplesMap.logicalTarget) + TriplesMap(newPoms.toList, triplesMap.logicalSource, triplesMap.subjectMap, triplesMap.identifier) } } diff --git a/src/main/scala/io/rml/framework/core/model/LogicalTarget.scala b/src/main/scala/io/rml/framework/core/model/LogicalTarget.scala index 9d4bbb2a..77fba697 100644 --- a/src/main/scala/io/rml/framework/core/model/LogicalTarget.scala +++ b/src/main/scala/io/rml/framework/core/model/LogicalTarget.scala @@ -1,9 +1,5 @@ package io.rml.framework.core.model -import io.rml.framework.core.model.std.StdLogicalTarget - -import java.util.Objects - /** * MIT License * @@ -32,24 +28,4 @@ trait LogicalTarget extends Node { def target: DataSink - def serialization: Option[Uri] - - def compression: Option[Uri] - - override def identifier: String = { - val serHash = if (serialization.isDefined) serialization.get.identifier else "" - val compHash = if (compression.isDefined) compression.get.identifier else "" - Objects.hash(target.identifier, serHash, compHash).toHexString - } - -} - -object LogicalTarget { - def apply( - target: DataSink, - serialization: Option[Uri], - compression: Option[Uri] - ): LogicalTarget = { - StdLogicalTarget(target, serialization, compression) - } } diff --git a/src/main/scala/io/rml/framework/core/model/StreamTriplesMap.scala b/src/main/scala/io/rml/framework/core/model/StreamTriplesMap.scala index 1fea560c..accd081e 100644 --- a/src/main/scala/io/rml/framework/core/model/StreamTriplesMap.scala +++ b/src/main/scala/io/rml/framework/core/model/StreamTriplesMap.scala @@ -63,8 +63,6 @@ abstract class StreamTriplesMap(triplesMap: TriplesMap) extends TriplesMap { */ override def graphMap = triplesMap.graphMap - override def logicalTarget: Option[LogicalTarget] = triplesMap.logicalTarget - } object StreamTriplesMap { diff --git a/src/main/scala/io/rml/framework/core/model/TriplesMap.scala b/src/main/scala/io/rml/framework/core/model/TriplesMap.scala index 85eb5109..112e0394 100644 --- a/src/main/scala/io/rml/framework/core/model/TriplesMap.scala +++ b/src/main/scala/io/rml/framework/core/model/TriplesMap.scala @@ -69,8 +69,6 @@ trait TriplesMap extends Node { */ def graphMap: Option[GraphMap] - def logicalTarget: Option[LogicalTarget] - /** * * @return @@ -93,15 +91,13 @@ object TriplesMap { logicalSource: LogicalSource, subjectMap: SubjectMap, identifier: String, - graphMap: Option[GraphMap], - logicalTarget: Option[LogicalTarget] + graphMap: Option[GraphMap] = None ): TriplesMap = { val triplesMap = StdTriplesMap(predicateObjectMaps, logicalSource, subjectMap, graphMap, - logicalTarget, identifier) if (logicalSource.source.isInstanceOf[StreamDataSource]) { diff --git a/src/main/scala/io/rml/framework/core/model/std/StdJoinedTriplesMap.scala b/src/main/scala/io/rml/framework/core/model/std/StdJoinedTriplesMap.scala index 31b420e7..4ecf7507 100644 --- a/src/main/scala/io/rml/framework/core/model/std/StdJoinedTriplesMap.scala +++ b/src/main/scala/io/rml/framework/core/model/std/StdJoinedTriplesMap.scala @@ -24,7 +24,7 @@ **/ package io.rml.framework.core.model.std -import io.rml.framework.core.model.{JoinedTriplesMap, LogicalTarget, TriplesMap} +import io.rml.framework.core.model.{JoinedTriplesMap, TriplesMap} case class StdJoinedTriplesMap(triplesMap: TriplesMap) extends JoinedTriplesMap(triplesMap) { /** @@ -51,13 +51,11 @@ case class StdJoinedTriplesMap(triplesMap: TriplesMap) extends JoinedTriplesMap( */ override def containsParentTriplesMap = triplesMap.containsParentTriplesMap - override def identifier: String = triplesMap.identifier + override def identifier(): String = triplesMap.identifier /** * * @return */ override def graphMap = ??? - - override def logicalTarget: Option[LogicalTarget] = triplesMap.logicalTarget } diff --git a/src/main/scala/io/rml/framework/core/model/std/StdLogicalTarget.scala b/src/main/scala/io/rml/framework/core/model/std/StdLogicalTarget.scala deleted file mode 100644 index d6fa8854..00000000 --- a/src/main/scala/io/rml/framework/core/model/std/StdLogicalTarget.scala +++ /dev/null @@ -1,31 +0,0 @@ -package io.rml.framework.core.model.std - -import io.rml.framework.core.model.{DataSink, LogicalTarget, Uri} - -/** - * MIT License - * - * Copyright (C) 2017 - 2021 RDF Mapping Language (RML) - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. - * - * */ -case class StdLogicalTarget(target: DataSink, - serialization: Option[Uri], - compression: Option[Uri]) extends LogicalTarget diff --git a/src/main/scala/io/rml/framework/core/model/std/StdParentTriplesMap.scala b/src/main/scala/io/rml/framework/core/model/std/StdParentTriplesMap.scala index 424a9f39..45f1f1ed 100644 --- a/src/main/scala/io/rml/framework/core/model/std/StdParentTriplesMap.scala +++ b/src/main/scala/io/rml/framework/core/model/std/StdParentTriplesMap.scala @@ -24,7 +24,7 @@ **/ package io.rml.framework.core.model.std -import io.rml.framework.core.model.{LogicalTarget, ParentTriplesMap, TriplesMap} +import io.rml.framework.core.model.{ParentTriplesMap, TriplesMap} case class StdParentTriplesMap(triplesMap: TriplesMap) extends ParentTriplesMap { /** @@ -59,6 +59,4 @@ case class StdParentTriplesMap(triplesMap: TriplesMap) extends ParentTriplesMap * @return */ override def graphMap = ??? - - override def logicalTarget: Option[LogicalTarget] = triplesMap.logicalTarget } diff --git a/src/main/scala/io/rml/framework/core/model/std/StdTriplesMap.scala b/src/main/scala/io/rml/framework/core/model/std/StdTriplesMap.scala index 80de2010..8b6766b3 100644 --- a/src/main/scala/io/rml/framework/core/model/std/StdTriplesMap.scala +++ b/src/main/scala/io/rml/framework/core/model/std/StdTriplesMap.scala @@ -31,7 +31,6 @@ case class StdTriplesMap(predicateObjectMaps: List[PredicateObjectMap], logicalSource: LogicalSource, subjectMap: SubjectMap, graphMap: Option[GraphMap], - logicalTarget: Option[LogicalTarget], identifier: String) extends TriplesMap { /** * From 19d94efb5fcddaea7cffc7473969228631c3cd6f Mon Sep 17 00:00:00 2001 From: Gerald Haesendonck Date: Tue, 6 Apr 2021 14:57:53 +0200 Subject: [PATCH 30/83] WIP: put logical target extraction in term map --- .../core/extractors/std/StdFunctionMapExtractor.scala | 2 +- .../core/extractors/std/StdGraphMapExtractor.scala | 4 ++-- .../core/extractors/std/StdObjectMapExtractor.scala | 7 ++++--- .../core/extractors/std/StdPredicateMapExtractor.scala | 7 ++++--- .../core/extractors/std/StdSubjectMapExtractor.scala | 4 +++- .../framework/core/extractors/std/TermMapExtractor.scala | 9 +++++++-- .../scala/io/rml/framework/core/model/FunctionMap.scala | 2 +- .../scala/io/rml/framework/core/model/GraphMap.scala | 4 +++- .../scala/io/rml/framework/core/model/ObjectMap.scala | 6 ++++-- .../scala/io/rml/framework/core/model/PredicateMap.scala | 6 ++++-- .../scala/io/rml/framework/core/model/SubjectMap.scala | 5 +++-- src/main/scala/io/rml/framework/core/model/TermMap.scala | 2 ++ .../io/rml/framework/core/model/std/StdGraphMap.scala | 3 ++- .../io/rml/framework/core/model/std/StdObjectMap.scala | 3 ++- .../rml/framework/core/model/std/StdPredicateMap.scala | 3 ++- .../io/rml/framework/core/model/std/StdSubjectMap.scala | 3 ++- 16 files changed, 46 insertions(+), 24 deletions(-) diff --git a/src/main/scala/io/rml/framework/core/extractors/std/StdFunctionMapExtractor.scala b/src/main/scala/io/rml/framework/core/extractors/std/StdFunctionMapExtractor.scala index 15cd6242..4a244ed9 100644 --- a/src/main/scala/io/rml/framework/core/extractors/std/StdFunctionMapExtractor.scala +++ b/src/main/scala/io/rml/framework/core/extractors/std/StdFunctionMapExtractor.scala @@ -41,7 +41,7 @@ class StdFunctionMapExtractor extends FunctionMapExtractor { val poms = pomExtractor.extract(functionValue) - FunctionMap(fnParentMap, functionValue.uri.toString, poms) + FunctionMap(fnParentMap, functionValue.uri.toString, poms, extractLogicalTargets(resource)) }) diff --git a/src/main/scala/io/rml/framework/core/extractors/std/StdGraphMapExtractor.scala b/src/main/scala/io/rml/framework/core/extractors/std/StdGraphMapExtractor.scala index f06959a5..31545f0b 100644 --- a/src/main/scala/io/rml/framework/core/extractors/std/StdGraphMapExtractor.scala +++ b/src/main/scala/io/rml/framework/core/extractors/std/StdGraphMapExtractor.scala @@ -77,7 +77,7 @@ class StdGraphMapExtractor extends GraphMapExtractor { def extractGraph(resource: RDFResource): Option[GraphMap] = { - Some(GraphMap(resource.uri.value, List(), Some(resource.uri), None, None, extractTermType(resource))) + Some(GraphMap(resource.uri.value, List(), Some(resource.uri), None, None, extractTermType(resource), extractLogicalTargets(resource))) } @@ -87,7 +87,7 @@ class StdGraphMapExtractor extends GraphMapExtractor { val constant = extractConstant(resource) val reference = extractReference(resource) val functionMap = FunctionMapExtractor().extract(resource) - Some(GraphMap(constant.getOrElse(resource.uri).value, functionMap, constant, reference, template, termType)) + Some(GraphMap(constant.getOrElse(resource.uri).value, functionMap, constant, reference, template, termType, extractLogicalTargets(resource))) } } diff --git a/src/main/scala/io/rml/framework/core/extractors/std/StdObjectMapExtractor.scala b/src/main/scala/io/rml/framework/core/extractors/std/StdObjectMapExtractor.scala index 195658f4..6f87f291 100644 --- a/src/main/scala/io/rml/framework/core/extractors/std/StdObjectMapExtractor.scala +++ b/src/main/scala/io/rml/framework/core/extractors/std/StdObjectMapExtractor.scala @@ -56,9 +56,9 @@ class StdObjectMapExtractor extends ObjectMapExtractor { // iterates over predicates, converts these to predicate maps as blanks properties.map { case literal: RDFLiteral => - ObjectMap("", constant = Some(Literal(literal.value)), termType = Some(Uri(R2RMLVoc.Class.LITERAL))) + ObjectMap("", constant = Some(Literal(literal.value)), termType = Some(Uri(R2RMLVoc.Class.LITERAL)), logicalTargets = List()) case resource: RDFResource => - ObjectMap("", constant = Some(resource.uri), termType = Some(Uri(R2RMLVoc.Class.IRI))) + ObjectMap("", constant = Some(resource.uri), termType = Some(Uri(R2RMLVoc.Class.IRI)), logicalTargets = List()) } } @@ -102,7 +102,8 @@ class StdObjectMapExtractor extends ObjectMapExtractor { val language = extractLanguage(resource) val datatype = extractDatatype(resource) val functionMap = FunctionMapExtractor().extract(resource) - ObjectMap(resource.uri.identifier, functionMap, constant, reference, template, termType, datatype, language, parentTriplesMap, joinCondition) + val logicalTargets = extractLogicalTargets(resource) + ObjectMap(resource.uri.identifier, functionMap, constant, reference, template, termType, datatype, language, parentTriplesMap, joinCondition, logicalTargets) } def extractDatatype(resource: RDFResource): Option[Uri] = { diff --git a/src/main/scala/io/rml/framework/core/extractors/std/StdPredicateMapExtractor.scala b/src/main/scala/io/rml/framework/core/extractors/std/StdPredicateMapExtractor.scala index a335f594..085d4713 100644 --- a/src/main/scala/io/rml/framework/core/extractors/std/StdPredicateMapExtractor.scala +++ b/src/main/scala/io/rml/framework/core/extractors/std/StdPredicateMapExtractor.scala @@ -56,9 +56,9 @@ class StdPredicateMapExtractor() extends PredicateMapExtractor { // iterates over predicates, converts these to predicate maps as blanks properties.map { case literal: RDFLiteral => - PredicateMap("", constant = Some(Uri(literal.value)), termType = Some(Uri(R2RMLVoc.Class.IRI))) + PredicateMap("", constant = Some(Uri(literal.value)), termType = Some(Uri(R2RMLVoc.Class.IRI)), logicalTargets = List()) case resource: RDFResource => - PredicateMap("", constant = Some(resource.uri), termType = Some(Uri(R2RMLVoc.Class.IRI))) + PredicateMap("", constant = Some(resource.uri), termType = Some(Uri(R2RMLVoc.Class.IRI)), logicalTargets = List()) } } @@ -93,8 +93,9 @@ class StdPredicateMapExtractor() extends PredicateMapExtractor { val constant = extractConstant(resource) val reference = extractReference(resource) val functionMap = FunctionMapExtractor().extract(resource) + val logicalTargets = extractLogicalTargets(resource) - PredicateMap(resource.uri.value, functionMap, constant, reference, template, termType) + PredicateMap(resource.uri.value, functionMap, constant, reference, template, termType, logicalTargets) } } diff --git a/src/main/scala/io/rml/framework/core/extractors/std/StdSubjectMapExtractor.scala b/src/main/scala/io/rml/framework/core/extractors/std/StdSubjectMapExtractor.scala index e60380d0..c278ddd1 100644 --- a/src/main/scala/io/rml/framework/core/extractors/std/StdSubjectMapExtractor.scala +++ b/src/main/scala/io/rml/framework/core/extractors/std/StdSubjectMapExtractor.scala @@ -85,6 +85,8 @@ class StdSubjectMapExtractor extends SubjectMapExtractor with Logging { val functionMap = FunctionMapExtractor().extract(resource) + val logicalTargets = extractLogicalTargets(resource) + logDebug(resource.uri + ": Extracted from subject map" + ": reference -> " + reference + ", constant -> " + constant + @@ -93,7 +95,7 @@ class StdSubjectMapExtractor extends SubjectMapExtractor with Logging { ", graphMap -> " + graphMap + ", class -> " + _class) - SubjectMap(resource.uri.value, _class, functionMap, constant, reference, template, termType, graphMap) + SubjectMap(resource.uri.value, _class, functionMap, constant, reference, template, termType, graphMap, logicalTargets) } override def extractTermType(resource: RDFResource): Option[Uri] = { diff --git a/src/main/scala/io/rml/framework/core/extractors/std/TermMapExtractor.scala b/src/main/scala/io/rml/framework/core/extractors/std/TermMapExtractor.scala index 587760cd..d6812bef 100644 --- a/src/main/scala/io/rml/framework/core/extractors/std/TermMapExtractor.scala +++ b/src/main/scala/io/rml/framework/core/extractors/std/TermMapExtractor.scala @@ -25,9 +25,9 @@ package io.rml.framework.core.extractors.std -import io.rml.framework.core.extractors.ResourceExtractor +import io.rml.framework.core.extractors.{LogicalTargetExtractor, ResourceExtractor} import io.rml.framework.core.model.rdf.RDFResource -import io.rml.framework.core.model.{Entity, Literal, Uri} +import io.rml.framework.core.model.{Entity, Literal, LogicalTarget, Uri} import io.rml.framework.core.vocabulary.{R2RMLVoc, RMLVoc} import io.rml.framework.shared.RMLException @@ -39,6 +39,11 @@ import scala.util.matching.Regex * @tparam T */ abstract class TermMapExtractor[T] extends ResourceExtractor[T] { + lazy private val logicalTargetExtractor: LogicalTargetExtractor = LogicalTargetExtractor() + + protected def extractLogicalTargets(node: RDFResource): List[LogicalTarget] = { + logicalTargetExtractor.extract(node) + } /** * Extracts template property from a resource. diff --git a/src/main/scala/io/rml/framework/core/model/FunctionMap.scala b/src/main/scala/io/rml/framework/core/model/FunctionMap.scala index e2fa8550..b49eedad 100644 --- a/src/main/scala/io/rml/framework/core/model/FunctionMap.scala +++ b/src/main/scala/io/rml/framework/core/model/FunctionMap.scala @@ -24,7 +24,7 @@ **/ package io.rml.framework.core.model -case class FunctionMap(parentMap : String, identifier: String, functionValue: List[PredicateObjectMap]) extends TermMap { +case class FunctionMap(parentMap : String, identifier: String, functionValue: List[PredicateObjectMap], logicalTargets: List[LogicalTarget]) extends TermMap { /** diff --git a/src/main/scala/io/rml/framework/core/model/GraphMap.scala b/src/main/scala/io/rml/framework/core/model/GraphMap.scala index a992a342..3df3e56b 100644 --- a/src/main/scala/io/rml/framework/core/model/GraphMap.scala +++ b/src/main/scala/io/rml/framework/core/model/GraphMap.scala @@ -45,5 +45,7 @@ object GraphMap { constant: Option[Entity], reference: Option[Literal], template: Option[Literal], - termType: Option[Uri]): GraphMap = StdGraphMap(identifier, functionMap, constant, reference, template) + termType: Option[Uri], + logicalTargets: List[LogicalTarget]): GraphMap = StdGraphMap( + identifier, functionMap, constant, reference, template, logicalTargets) } diff --git a/src/main/scala/io/rml/framework/core/model/ObjectMap.scala b/src/main/scala/io/rml/framework/core/model/ObjectMap.scala index a98b9d2b..6e553e67 100644 --- a/src/main/scala/io/rml/framework/core/model/ObjectMap.scala +++ b/src/main/scala/io/rml/framework/core/model/ObjectMap.scala @@ -54,7 +54,8 @@ object ObjectMap { datatype: Option[Uri] = None, language: Option[Literal] = None, parentTriplesMap: Option[String] = None, - joinCondition: Option[JoinCondition] = None): ObjectMap = + joinCondition: Option[JoinCondition] = None, + logicalTargets: List[LogicalTarget]): ObjectMap = StdObjectMap(identifier, functionMap, @@ -65,5 +66,6 @@ object ObjectMap { datatype, language, parentTriplesMap, - joinCondition) + joinCondition, + logicalTargets) } diff --git a/src/main/scala/io/rml/framework/core/model/PredicateMap.scala b/src/main/scala/io/rml/framework/core/model/PredicateMap.scala index 73e4637b..d3329557 100644 --- a/src/main/scala/io/rml/framework/core/model/PredicateMap.scala +++ b/src/main/scala/io/rml/framework/core/model/PredicateMap.scala @@ -43,12 +43,14 @@ object PredicateMap { constant: Option[Entity] = None, reference: Option[Literal] = None, template: Option[Literal] = None, - termType: Option[Uri] = None): PredicateMap = + termType: Option[Uri] = None, + logicalTargets: List[LogicalTarget]): PredicateMap = StdPredicateMap(identifier, functionMap, constant, reference, template, - Some(Uri(R2RMLVoc.Class.IRI))) + Some(Uri(R2RMLVoc.Class.IRI)), + logicalTargets) } diff --git a/src/main/scala/io/rml/framework/core/model/SubjectMap.scala b/src/main/scala/io/rml/framework/core/model/SubjectMap.scala index 14e66360..092fef70 100644 --- a/src/main/scala/io/rml/framework/core/model/SubjectMap.scala +++ b/src/main/scala/io/rml/framework/core/model/SubjectMap.scala @@ -64,9 +64,10 @@ object SubjectMap { reference: Option[Literal], template: Option[Literal], termType: Option[Uri], - graphMap: Option[GraphMap]): SubjectMap = { + graphMap: Option[GraphMap], + logicalTargets: List[LogicalTarget]): SubjectMap = { - StdSubjectMap(identifier, `class`, functionMap, constant, reference, template, termType, graphMap) + StdSubjectMap(identifier, `class`, functionMap, constant, reference, template, termType, graphMap, logicalTargets) } } diff --git a/src/main/scala/io/rml/framework/core/model/TermMap.scala b/src/main/scala/io/rml/framework/core/model/TermMap.scala index 1d0f6785..cf09f92b 100644 --- a/src/main/scala/io/rml/framework/core/model/TermMap.scala +++ b/src/main/scala/io/rml/framework/core/model/TermMap.scala @@ -82,6 +82,8 @@ trait TermMap extends Node { */ def termType: Option[Uri] + def logicalTargets: List[LogicalTarget] + /** * * @return diff --git a/src/main/scala/io/rml/framework/core/model/std/StdGraphMap.scala b/src/main/scala/io/rml/framework/core/model/std/StdGraphMap.scala index 5782f547..8867b3e0 100644 --- a/src/main/scala/io/rml/framework/core/model/std/StdGraphMap.scala +++ b/src/main/scala/io/rml/framework/core/model/std/StdGraphMap.scala @@ -31,7 +31,8 @@ case class StdGraphMap(identifier: String, override val functionMap:List[FunctionMap], constant: Option[Entity], reference: Option[Literal], - template: Option[Literal]) extends GraphMap { + template: Option[Literal], + logicalTargets: List[LogicalTarget]) extends GraphMap { override def termType: Option[Uri] = Some(Uri(R2RMLVoc.Class.IRI)) diff --git a/src/main/scala/io/rml/framework/core/model/std/StdObjectMap.scala b/src/main/scala/io/rml/framework/core/model/std/StdObjectMap.scala index 7ca6fd4a..0a241eb0 100644 --- a/src/main/scala/io/rml/framework/core/model/std/StdObjectMap.scala +++ b/src/main/scala/io/rml/framework/core/model/std/StdObjectMap.scala @@ -35,4 +35,5 @@ case class StdObjectMap(identifier: String, override val datatype: Option[Uri], override val language: Option[Literal], parentTriplesMap: Option[String], - joinCondition: Option[JoinCondition]) extends ObjectMap + joinCondition: Option[JoinCondition], + logicalTargets: List[LogicalTarget]) extends ObjectMap diff --git a/src/main/scala/io/rml/framework/core/model/std/StdPredicateMap.scala b/src/main/scala/io/rml/framework/core/model/std/StdPredicateMap.scala index 7a858b08..89c671cc 100644 --- a/src/main/scala/io/rml/framework/core/model/std/StdPredicateMap.scala +++ b/src/main/scala/io/rml/framework/core/model/std/StdPredicateMap.scala @@ -32,4 +32,5 @@ case class StdPredicateMap(identifier: String, constant: Option[Entity], reference: Option[Literal], template: Option[Literal], - termType: Option[Uri]) extends PredicateMap + termType: Option[Uri], + logicalTargets: List[LogicalTarget]) extends PredicateMap diff --git a/src/main/scala/io/rml/framework/core/model/std/StdSubjectMap.scala b/src/main/scala/io/rml/framework/core/model/std/StdSubjectMap.scala index 9583aba0..52941df7 100644 --- a/src/main/scala/io/rml/framework/core/model/std/StdSubjectMap.scala +++ b/src/main/scala/io/rml/framework/core/model/std/StdSubjectMap.scala @@ -34,5 +34,6 @@ case class StdSubjectMap(identifier: String, reference: Option[Literal], template: Option[Literal], termType: Option[Uri], - graphMap: Option[GraphMap]) extends SubjectMap + graphMap: Option[GraphMap], + logicalTargets: List[LogicalTarget]) extends SubjectMap From 7c530f871c5ae982a292307fe37fd61e29ceb647 Mon Sep 17 00:00:00 2001 From: Gerald Haesendonck Date: Wed, 7 Apr 2021 11:35:51 +0200 Subject: [PATCH 31/83] Added first test --- .../example-2-subjectMap/mapping.ttl | 42 +++++++++++++++++++ .../example-2-subjectMap/output.nq | 6 +++ .../sandbox/logical-target/people.json | 17 ++++++++ .../scala/io/rml/framework/SandboxTests.scala | 5 ++- 4 files changed, 68 insertions(+), 2 deletions(-) create mode 100644 src/test/resources/sandbox/logical-target/example-2-subjectMap/mapping.ttl create mode 100644 src/test/resources/sandbox/logical-target/example-2-subjectMap/output.nq create mode 100644 src/test/resources/sandbox/logical-target/people.json diff --git a/src/test/resources/sandbox/logical-target/example-2-subjectMap/mapping.ttl b/src/test/resources/sandbox/logical-target/example-2-subjectMap/mapping.ttl new file mode 100644 index 00000000..39e53169 --- /dev/null +++ b/src/test/resources/sandbox/logical-target/example-2-subjectMap/mapping.ttl @@ -0,0 +1,42 @@ +@prefix comp: . +@prefix rr: . +@prefix foaf: . +@prefix formats: . +@prefix ql: . +@prefix rml: . +@prefix rmlt: . +@prefix void: . + +@base . + +<#LogicalSource1> a rml:LogicalSource; + rml:source "../people.json"; + rml:referenceFormulation ql:JSONPath; + rml:iterator "$.*" +. + +<#VoIDDump> a void:Dataset ; + void:dataDump ; +. + +<#LogicalTarget1> a rmlt:LogicalTarget; + rmlt:target <#VoIDDump>; + rmlt:serialization formats:N-Quads ; + rmlt:compression comp:GZip; +. + +<#TriplesMap> a rr:TriplesMap; + rml:logicalSource <#LogicalSource1>; + rr:subjectMap [ + rr:template "http://example.org/{id}"; + rml:logicalTarget <#LogicalTarget1>; + ]; + rr:predicateObjectMap [ + rr:predicateMap [ rr:constant foaf:name ]; + rr:objectMap [ rml:reference "name"; ]; + ]; + rr:predicateObjectMap [ + rr:predicateMap [ rr:constant foaf:age ]; + rr:objectMap [ rml:reference "age" ]; + ]; +. \ No newline at end of file diff --git a/src/test/resources/sandbox/logical-target/example-2-subjectMap/output.nq b/src/test/resources/sandbox/logical-target/example-2-subjectMap/output.nq new file mode 100644 index 00000000..a9e608b1 --- /dev/null +++ b/src/test/resources/sandbox/logical-target/example-2-subjectMap/output.nq @@ -0,0 +1,6 @@ + "Alice" . + "42" . + "Bob" . + "27" . + "Jesus" . + "2021" . \ No newline at end of file diff --git a/src/test/resources/sandbox/logical-target/people.json b/src/test/resources/sandbox/logical-target/people.json new file mode 100644 index 00000000..80a5814f --- /dev/null +++ b/src/test/resources/sandbox/logical-target/people.json @@ -0,0 +1,17 @@ +[ + { + "id": "0001", + "name": "Alice", + "age": 42 + }, + { + "id": "0002", + "name": "Bob", + "age": 27 + }, + { + "id": "0003", + "name": "Jesus", + "age": 2021 + } +] \ No newline at end of file diff --git a/src/test/scala/io/rml/framework/SandboxTests.scala b/src/test/scala/io/rml/framework/SandboxTests.scala index 1628b94a..a3812fee 100644 --- a/src/test/scala/io/rml/framework/SandboxTests.scala +++ b/src/test/scala/io/rml/framework/SandboxTests.scala @@ -126,13 +126,14 @@ class SandboxTests extends FunSuite with Matchers with FunctionMappingTest { executeTest("sandbox/function_related/condition-on-mapping-subject-function/mapping.ttl") } - test("failing/fno-testcases/RMLFNOTC0023-CSV") { pending executeTest("failing/fno-testcases/RMLFNOTC0023-CSV/mapping.ttl") } - + test("sandbox/logical-target/example-2-subjectMap") { + executeTest("sandbox/logical-target/example-2-subjectMap/mapping.ttl") + } From 35b6fadc39e19575ad52d1b82a25ca64bc303987 Mon Sep 17 00:00:00 2001 From: Gerald Haesendonck Date: Wed, 7 Apr 2021 13:11:52 +0200 Subject: [PATCH 32/83] WIP: extract compression and serialization --- .../core/extractors/ExtractorUtil.scala | 1 + .../std/StdLogicalSourceExtractor.scala | 29 ++--------- .../std/StdLogicalTargetExtractor.scala | 50 ++++++++++++++++--- 3 files changed, 48 insertions(+), 32 deletions(-) diff --git a/src/main/scala/io/rml/framework/core/extractors/ExtractorUtil.scala b/src/main/scala/io/rml/framework/core/extractors/ExtractorUtil.scala index 28588039..0d740036 100644 --- a/src/main/scala/io/rml/framework/core/extractors/ExtractorUtil.scala +++ b/src/main/scala/io/rml/framework/core/extractors/ExtractorUtil.scala @@ -62,6 +62,7 @@ object ExtractorUtil { def extractResourceFromProperty(resource: RDFResource, property: String): Option[RDFResource] = { val properties = resource.listProperties(property); + require(properties.length <= 1, resource.uri.toString + ": at most 1 " + property + " needed."); if (properties.isEmpty) { None } else { diff --git a/src/main/scala/io/rml/framework/core/extractors/std/StdLogicalSourceExtractor.scala b/src/main/scala/io/rml/framework/core/extractors/std/StdLogicalSourceExtractor.scala index 84c8773e..e66c19e2 100644 --- a/src/main/scala/io/rml/framework/core/extractors/std/StdLogicalSourceExtractor.scala +++ b/src/main/scala/io/rml/framework/core/extractors/std/StdLogicalSourceExtractor.scala @@ -25,7 +25,7 @@ package io.rml.framework.core.extractors.std -import io.rml.framework.core.extractors.{DataSourceExtractor, LogicalSourceExtractor} +import io.rml.framework.core.extractors.{DataSourceExtractor, ExtractorUtil, LogicalSourceExtractor} import io.rml.framework.core.internal.Logging import io.rml.framework.core.model.rdf.RDFResource import io.rml.framework.core.model.{DataSource, Literal, LogicalSource, Uri} @@ -97,20 +97,7 @@ class StdLogicalSourceExtractor(dataSourceExtractor: DataSourceExtractor) */ @throws(classOf[RMLException]) private def extractIterator(resource: RDFResource, referenceFormulation: Uri): String = { - - val property = RMLVoc.Property.ITERATOR - val properties = resource.listProperties(property) - - if (properties.size > 1) throw new RMLException(resource.uri + ": invalid amount of iterators.") - if (properties.isEmpty) { - return DEFAULT_ITERATOR_MAP(referenceFormulation.value) - } - - properties.head match { - case uri: Uri => throw new RMLException(uri + ": iterator must be a literal.") - case literal: Literal => literal.value - } - + ExtractorUtil.extractLiteralFromProperty(resource, RMLVoc.Property.ITERATOR, DEFAULT_ITERATOR_MAP(referenceFormulation.value)) } /** @@ -131,15 +118,7 @@ class StdLogicalSourceExtractor(dataSourceExtractor: DataSourceExtractor) */ @throws(classOf[RMLException]) private def extractReferenceFormulation(resource: RDFResource): Uri = { - - val property = RMLVoc.Property.REFERENCEFORMULATION - val properties = resource.listProperties(property) - - if (properties.size != 1) throw new RMLException(resource.uri + ": exactly one reference formulation allowed.") - - properties.head match { - case resource: RDFResource => resource.uri - case literal: Literal => throw new RMLException(literal.toString + ": iterator must be a uri.") - } + val referenceFormulationResource = ExtractorUtil.extractSingleResourceFromProperty(resource, RMLVoc.Property.REFERENCEFORMULATION) + referenceFormulationResource.uri } } diff --git a/src/main/scala/io/rml/framework/core/extractors/std/StdLogicalTargetExtractor.scala b/src/main/scala/io/rml/framework/core/extractors/std/StdLogicalTargetExtractor.scala index 142388d7..317c0c77 100644 --- a/src/main/scala/io/rml/framework/core/extractors/std/StdLogicalTargetExtractor.scala +++ b/src/main/scala/io/rml/framework/core/extractors/std/StdLogicalTargetExtractor.scala @@ -1,10 +1,11 @@ package io.rml.framework.core.extractors.std -import io.rml.framework.core.extractors.LogicalTargetExtractor +import io.rml.framework.core.extractors.{ExtractorUtil, LogicalTargetExtractor} import io.rml.framework.core.internal.Logging -import io.rml.framework.core.model.LogicalTarget import io.rml.framework.core.model.rdf.RDFResource -import io.rml.framework.core.vocabulary.RMLVoc +import io.rml.framework.core.model.{LogicalTarget, Uri} +import io.rml.framework.core.vocabulary.{RMLTVoc, RMLVoc} +import io.rml.framework.shared.RMLException import scala.collection.mutable.ListBuffer @@ -46,12 +47,47 @@ class StdLogicalTargetExtractor extends LogicalTargetExtractor with Logging { val properties = node.listProperties(RMLVoc.Property.LOGICALTARGET) properties.foreach(logicalTargetResource => { - /*logicalTargetResource match { - case resource: RDFResource - }*/ + logicalTargetResource match { + case resource: RDFResource => { + val extractResult = extractLogicalTargetProperties(resource) + if (extractResult.isDefined) { + result = result += extractResult.get + } + } + case _ => throw new RMLException("Only logical target from resource allowed.") + } }) result.toList - // TODO here comes the real code + } + + private def extractLogicalTargetProperties(resource: RDFResource): Option[LogicalTarget] = { + val compression: Option[Uri] = extractCompression(resource) + val serialization: Option[Uri] = extractSerialization(resource) + // TODO extract actual target + None + } + + /** + * Extracts the compression specification. + * @param resource The Logical Target resource + * @return An Uri representing the compression, or None if no compression. + */ + private def extractCompression(resource: RDFResource): Option[Uri] = { + val compressionResource = ExtractorUtil.extractResourceFromProperty(resource, RMLTVoc.Property.COMPRESSION) + if (compressionResource.isDefined) { + Some(compressionResource.get.uri) + } else { + None + } + } + + private def extractSerialization(resource: RDFResource): Option[Uri] = { + val serializationResource = ExtractorUtil.extractResourceFromProperty(resource, RMLTVoc.Property.SERIALIZATION) + if (serializationResource.isDefined) { + Some(serializationResource.get.uri) + } else { + None + } } } From 7499ce881a99eb995b68e298fcd4c87629f985f5 Mon Sep 17 00:00:00 2001 From: Gerald Haesendonck Date: Fri, 9 Apr 2021 09:37:26 +0200 Subject: [PATCH 33/83] Logical source in Subject map --- .../core/extractors/DataTargetExtractor.scala | 37 +++++++++++++ .../extractors/LogicalTargetExtractor.scala | 2 +- .../std/StdDataSourceExtractor.scala | 1 + .../std/StdDataTargetExtractor.scala | 54 +++++++++++++++++++ .../std/StdLogicalTargetExtractor.scala | 32 +++++------ .../std/StdSubjectMapExtractor.scala | 1 + .../{DataSink.scala => DataTarget.scala} | 2 +- .../framework/core/model/FileDataSource.scala | 10 ++-- .../framework/core/model/FileDataTarget.scala | 54 +++++++++++++++++++ .../framework/core/model/LogicalTarget.scala | 20 ++++++- ...ataSource.scala => StdFileDataStore.scala} | 4 +- .../core/model/std/StdLogicalTarget.scala | 32 +++++++++++ .../io/rml/framework/core/util/Util.scala | 2 +- .../framework/core/vocabulary/VoIDVoc.scala | 38 +++++++++++++ .../example-2-subjectMap/mapping.ttl | 2 +- 15 files changed, 264 insertions(+), 27 deletions(-) create mode 100644 src/main/scala/io/rml/framework/core/extractors/DataTargetExtractor.scala create mode 100644 src/main/scala/io/rml/framework/core/extractors/std/StdDataTargetExtractor.scala rename src/main/scala/io/rml/framework/core/model/{DataSink.scala => DataTarget.scala} (97%) create mode 100644 src/main/scala/io/rml/framework/core/model/FileDataTarget.scala rename src/main/scala/io/rml/framework/core/model/std/{StdFileDataSource.scala => StdFileDataStore.scala} (88%) create mode 100644 src/main/scala/io/rml/framework/core/model/std/StdLogicalTarget.scala create mode 100644 src/main/scala/io/rml/framework/core/vocabulary/VoIDVoc.scala diff --git a/src/main/scala/io/rml/framework/core/extractors/DataTargetExtractor.scala b/src/main/scala/io/rml/framework/core/extractors/DataTargetExtractor.scala new file mode 100644 index 00000000..430ebf7a --- /dev/null +++ b/src/main/scala/io/rml/framework/core/extractors/DataTargetExtractor.scala @@ -0,0 +1,37 @@ +package io.rml.framework.core.extractors + +import io.rml.framework.core.extractors.std.StdDataTargetExtractor +import io.rml.framework.core.model.DataTarget + +/** + * MIT License + * + * Copyright (C) 2017 - 2021 RDF Mapping Language (RML) + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + * */ +trait DataTargetExtractor extends ResourceExtractor[DataTarget] + +object DataTargetExtractor { + def apply(): DataTargetExtractor = { + new StdDataTargetExtractor() + } +} + diff --git a/src/main/scala/io/rml/framework/core/extractors/LogicalTargetExtractor.scala b/src/main/scala/io/rml/framework/core/extractors/LogicalTargetExtractor.scala index f2591b15..66dca620 100644 --- a/src/main/scala/io/rml/framework/core/extractors/LogicalTargetExtractor.scala +++ b/src/main/scala/io/rml/framework/core/extractors/LogicalTargetExtractor.scala @@ -32,7 +32,7 @@ trait LogicalTargetExtractor extends ResourceExtractor[List[LogicalTarget]] object LogicalTargetExtractor { def apply(): LogicalTargetExtractor = { - lazy val extractor = new StdLogicalTargetExtractor + lazy val extractor = new StdLogicalTargetExtractor(DataTargetExtractor()) extractor } diff --git a/src/main/scala/io/rml/framework/core/extractors/std/StdDataSourceExtractor.scala b/src/main/scala/io/rml/framework/core/extractors/std/StdDataSourceExtractor.scala index 640230da..30de41a6 100644 --- a/src/main/scala/io/rml/framework/core/extractors/std/StdDataSourceExtractor.scala +++ b/src/main/scala/io/rml/framework/core/extractors/std/StdDataSourceExtractor.scala @@ -72,6 +72,7 @@ class StdDataSourceExtractor extends DataSourceExtractor { case Uri(RMLSVoc.Class.FILESTREAM) => extractFileStream(resource) case Uri(RMLSVoc.Class.KAFKASTREAM) => extractKafkaStream(resource) case Uri(WoTVoc.ThingDescription.Class.THING) => extractWoTSource(resource) + case _ => throw new RMLException(s"${classResource.uri} not supported as data source.") } case literal: Literal => throw new RMLException(literal.value + ": type must be a resource.") } diff --git a/src/main/scala/io/rml/framework/core/extractors/std/StdDataTargetExtractor.scala b/src/main/scala/io/rml/framework/core/extractors/std/StdDataTargetExtractor.scala new file mode 100644 index 00000000..70ef645c --- /dev/null +++ b/src/main/scala/io/rml/framework/core/extractors/std/StdDataTargetExtractor.scala @@ -0,0 +1,54 @@ +package io.rml.framework.core.extractors.std + +import io.rml.framework.core.extractors.{DataTargetExtractor, ExtractorUtil} +import io.rml.framework.core.model.rdf.RDFResource +import io.rml.framework.core.model.{DataTarget, FileDataTarget, Uri} +import io.rml.framework.core.vocabulary.{RDFVoc, RMLTVoc, VoIDVoc} +import io.rml.framework.shared.RMLException + +/** + * MIT License + * + * Copyright (C) 2017 - 2021 RDF Mapping Language (RML) + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + * */ +class StdDataTargetExtractor extends DataTargetExtractor { + /** + * Extract. + * + * @param node Node to extract from. + * @return + */ + override def extract(node: RDFResource): DataTarget = { + val targetResource = ExtractorUtil.extractSingleResourceFromProperty(node, RMLTVoc.Property.TARGET) + val targetType = ExtractorUtil.extractSingleResourceFromProperty(targetResource, RDFVoc.Property.TYPE) + targetType.uri match { + case Uri(VoIDVoc.Class.DATASET) => extractFileDataTarget(targetResource) + case _ => throw new RMLException(s"${targetType} not supported as data target.") + } + } + + private def extractFileDataTarget(resource: RDFResource): DataTarget = { + val path = ExtractorUtil.extractSingleResourceFromProperty(resource, VoIDVoc.Property.DATADUMP) + FileDataTarget(path.uri) + } + +} diff --git a/src/main/scala/io/rml/framework/core/extractors/std/StdLogicalTargetExtractor.scala b/src/main/scala/io/rml/framework/core/extractors/std/StdLogicalTargetExtractor.scala index 317c0c77..142cccd5 100644 --- a/src/main/scala/io/rml/framework/core/extractors/std/StdLogicalTargetExtractor.scala +++ b/src/main/scala/io/rml/framework/core/extractors/std/StdLogicalTargetExtractor.scala @@ -1,10 +1,10 @@ package io.rml.framework.core.extractors.std -import io.rml.framework.core.extractors.{ExtractorUtil, LogicalTargetExtractor} +import io.rml.framework.core.extractors.{DataTargetExtractor, ExtractorUtil, LogicalTargetExtractor} import io.rml.framework.core.internal.Logging import io.rml.framework.core.model.rdf.RDFResource -import io.rml.framework.core.model.{LogicalTarget, Uri} -import io.rml.framework.core.vocabulary.{RMLTVoc, RMLVoc} +import io.rml.framework.core.model.{DataTarget, LogicalTarget, Uri} +import io.rml.framework.core.vocabulary.{FormatVoc, RMLTVoc, RMLVoc} import io.rml.framework.shared.RMLException import scala.collection.mutable.ListBuffer @@ -33,7 +33,7 @@ import scala.collection.mutable.ListBuffer * THE SOFTWARE. * * */ -class StdLogicalTargetExtractor extends LogicalTargetExtractor with Logging { +class StdLogicalTargetExtractor(dataTargetExtractor: DataTargetExtractor) extends LogicalTargetExtractor with Logging { /** * Extract. * @@ -49,10 +49,7 @@ class StdLogicalTargetExtractor extends LogicalTargetExtractor with Logging { properties.foreach(logicalTargetResource => { logicalTargetResource match { case resource: RDFResource => { - val extractResult = extractLogicalTargetProperties(resource) - if (extractResult.isDefined) { - result = result += extractResult.get - } + result += extractLogicalTargetProperties(resource) } case _ => throw new RMLException("Only logical target from resource allowed.") } @@ -61,11 +58,11 @@ class StdLogicalTargetExtractor extends LogicalTargetExtractor with Logging { result.toList } - private def extractLogicalTargetProperties(resource: RDFResource): Option[LogicalTarget] = { + private def extractLogicalTargetProperties(resource: RDFResource): LogicalTarget = { val compression: Option[Uri] = extractCompression(resource) - val serialization: Option[Uri] = extractSerialization(resource) - // TODO extract actual target - None + val serialization: Uri = extractSerialization(resource) + val target: DataTarget = dataTargetExtractor.extract(resource) + LogicalTarget(target, serialization, compression) } /** @@ -82,12 +79,17 @@ class StdLogicalTargetExtractor extends LogicalTargetExtractor with Logging { } } - private def extractSerialization(resource: RDFResource): Option[Uri] = { + /** + * Extracts the serialization from the logical target. + * @param resource The resource representing the logical target + * @return The URI of the resource representing the serialization. The default is http://www.w3.org/ns/formats/N-Quads + */ + private def extractSerialization(resource: RDFResource): Uri = { val serializationResource = ExtractorUtil.extractResourceFromProperty(resource, RMLTVoc.Property.SERIALIZATION) if (serializationResource.isDefined) { - Some(serializationResource.get.uri) + serializationResource.get.uri } else { - None + Uri(FormatVoc.Class.NQUADS) } } } diff --git a/src/main/scala/io/rml/framework/core/extractors/std/StdSubjectMapExtractor.scala b/src/main/scala/io/rml/framework/core/extractors/std/StdSubjectMapExtractor.scala index c278ddd1..9bc38b3a 100644 --- a/src/main/scala/io/rml/framework/core/extractors/std/StdSubjectMapExtractor.scala +++ b/src/main/scala/io/rml/framework/core/extractors/std/StdSubjectMapExtractor.scala @@ -93,6 +93,7 @@ class StdSubjectMapExtractor extends SubjectMapExtractor with Logging { ", template -> " + template + ", termType -> " + termType + ", graphMap -> " + graphMap + + ", logicalTargets -> " + logicalTargets + ", class -> " + _class) SubjectMap(resource.uri.value, _class, functionMap, constant, reference, template, termType, graphMap, logicalTargets) diff --git a/src/main/scala/io/rml/framework/core/model/DataSink.scala b/src/main/scala/io/rml/framework/core/model/DataTarget.scala similarity index 97% rename from src/main/scala/io/rml/framework/core/model/DataSink.scala rename to src/main/scala/io/rml/framework/core/model/DataTarget.scala index 5e92f309..fd23e6d5 100644 --- a/src/main/scala/io/rml/framework/core/model/DataSink.scala +++ b/src/main/scala/io/rml/framework/core/model/DataTarget.scala @@ -24,4 +24,4 @@ package io.rml.framework.core.model * THE SOFTWARE. * * */ -trait DataSink extends DataStore +trait DataTarget extends DataStore diff --git a/src/main/scala/io/rml/framework/core/model/FileDataSource.scala b/src/main/scala/io/rml/framework/core/model/FileDataSource.scala index 9179dc2e..15d796b6 100644 --- a/src/main/scala/io/rml/framework/core/model/FileDataSource.scala +++ b/src/main/scala/io/rml/framework/core/model/FileDataSource.scala @@ -27,7 +27,7 @@ package io.rml.framework.core.model import io.rml.framework.api.RMLEnvironment import io.rml.framework.core.internal.Logging -import io.rml.framework.core.model.std.StdFileDataSource +import io.rml.framework.core.model.std.StdFileDataStore import io.rml.framework.core.util.Util import io.rml.framework.shared.RMLException @@ -54,17 +54,17 @@ object FileDataSource extends Logging { val file = new File(uri.value) Try(Util.resolveFileRelativeToSourceFileParent(RMLEnvironment.getMappingFileBaseIRI().get, file.getPath)) match { case Success(resolvedFile) => { - StdFileDataSource(Uri(resolvedFile.getAbsolutePath)) + StdFileDataStore(Uri(resolvedFile.getCanonicalPath)) } case Failure(exception) => { if (file.isAbsolute) { - logDebug(Uri(file.getAbsolutePath).value) - StdFileDataSource(Uri(file.getAbsolutePath)) + logDebug(Uri(file.getCanonicalPath).value) + StdFileDataStore(Uri(file.getCanonicalPath)) } else { val url = ClassLoader.getSystemResource(uri.value) if (url == null) throw new RMLException(uri.toString + " can't be found.") val file_2 = new File(url.toURI) - StdFileDataSource(Uri(file_2.getAbsolutePath)) + StdFileDataStore(Uri(file_2.getCanonicalPath)) } } } diff --git a/src/main/scala/io/rml/framework/core/model/FileDataTarget.scala b/src/main/scala/io/rml/framework/core/model/FileDataTarget.scala new file mode 100644 index 00000000..53541567 --- /dev/null +++ b/src/main/scala/io/rml/framework/core/model/FileDataTarget.scala @@ -0,0 +1,54 @@ +package io.rml.framework.core.model + +import io.rml.framework.api.RMLEnvironment +import io.rml.framework.core.internal.Logging +import io.rml.framework.core.model.std.StdFileDataStore + +import java.io.File +import java.net.URI +import java.nio.file.Paths + +/** + * MIT License + * + * Copyright (C) 2017 - 2021 RDF Mapping Language (RML) + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + * */ +trait FileDataTarget extends DataTarget + +object FileDataTarget extends Logging { + def apply(uri: ExplicitNode): DataTarget = { + val fileUri = URI.create(uri.value); + if (fileUri.getScheme.equals("file")) { + val file = new File(fileUri.getPath) + if (file.isAbsolute) { + StdFileDataStore(Uri(file.getCanonicalPath)) + } else { + // create path relative to mapping file + val mappingFileParentPath = Paths.get(RMLEnvironment.getMappingFileBaseIRI().get).getParent + val filePath = mappingFileParentPath.resolve(file.toPath) + StdFileDataStore(Uri(filePath.toFile.getCanonicalPath)) + } + } else { + StdFileDataStore(Uri(uri.value)) + } + } +} \ No newline at end of file diff --git a/src/main/scala/io/rml/framework/core/model/LogicalTarget.scala b/src/main/scala/io/rml/framework/core/model/LogicalTarget.scala index 77fba697..63bd44f3 100644 --- a/src/main/scala/io/rml/framework/core/model/LogicalTarget.scala +++ b/src/main/scala/io/rml/framework/core/model/LogicalTarget.scala @@ -1,5 +1,9 @@ package io.rml.framework.core.model +import io.rml.framework.core.model.std.StdLogicalTarget + +import java.util.Objects + /** * MIT License * @@ -26,6 +30,20 @@ package io.rml.framework.core.model * */ trait LogicalTarget extends Node { - def target: DataSink + def target: DataTarget + + def compression: Option[Uri] + + def serialization: Uri + override def identifier: String = { + Objects.hash(target.identifier, compression, serialization.identifier).toHexString + } } + +object LogicalTarget { + def apply(target: DataTarget, serialization: Uri, compression: Option[Uri]) = { + StdLogicalTarget(target, serialization, compression) + } +} + diff --git a/src/main/scala/io/rml/framework/core/model/std/StdFileDataSource.scala b/src/main/scala/io/rml/framework/core/model/std/StdFileDataStore.scala similarity index 88% rename from src/main/scala/io/rml/framework/core/model/std/StdFileDataSource.scala rename to src/main/scala/io/rml/framework/core/model/std/StdFileDataStore.scala index c518f12a..9d8b74da 100644 --- a/src/main/scala/io/rml/framework/core/model/std/StdFileDataSource.scala +++ b/src/main/scala/io/rml/framework/core/model/std/StdFileDataStore.scala @@ -25,6 +25,6 @@ package io.rml.framework.core.model.std -import io.rml.framework.core.model.{FileDataSource, Uri} +import io.rml.framework.core.model.{FileDataSource, FileDataTarget, Uri} -case class StdFileDataSource(uri: Uri) extends FileDataSource +case class StdFileDataStore(uri: Uri) extends FileDataSource with FileDataTarget diff --git a/src/main/scala/io/rml/framework/core/model/std/StdLogicalTarget.scala b/src/main/scala/io/rml/framework/core/model/std/StdLogicalTarget.scala new file mode 100644 index 00000000..1add2798 --- /dev/null +++ b/src/main/scala/io/rml/framework/core/model/std/StdLogicalTarget.scala @@ -0,0 +1,32 @@ +package io.rml.framework.core.model.std + +import io.rml.framework.core.model.{DataTarget, LogicalTarget, Uri} + +/** + * MIT License + * + * Copyright (C) 2017 - 2021 RDF Mapping Language (RML) + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + * */ +case class StdLogicalTarget ( + target: DataTarget, + serialization: Uri, + compression: Option[Uri]) extends LogicalTarget diff --git a/src/main/scala/io/rml/framework/core/util/Util.scala b/src/main/scala/io/rml/framework/core/util/Util.scala index f7dff4d3..7f1f3947 100644 --- a/src/main/scala/io/rml/framework/core/util/Util.scala +++ b/src/main/scala/io/rml/framework/core/util/Util.scala @@ -91,7 +91,7 @@ object Util extends Logging{ var done = false while (lineIter.hasNext && !done) { val line = lineIter.next().trim - if (line.length > 0) { + if (line.nonEmpty) { if (line.head != '@') { done = true } else if (line.contains("@base")) { diff --git a/src/main/scala/io/rml/framework/core/vocabulary/VoIDVoc.scala b/src/main/scala/io/rml/framework/core/vocabulary/VoIDVoc.scala new file mode 100644 index 00000000..87b8b957 --- /dev/null +++ b/src/main/scala/io/rml/framework/core/vocabulary/VoIDVoc.scala @@ -0,0 +1,38 @@ +package io.rml.framework.core.vocabulary + +/** + * MIT License + * + * Copyright (C) 2017 - 2021 RDF Mapping Language (RML) + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + * */ +object VoIDVoc { + val namespace = ("void", "http://rdfs.org/ns/void#") + + object Property { + val DATADUMP = namespace._2 + "dataDump" + } + + object Class { + val DATASET = namespace._2 + "Dataset" + } + +} diff --git a/src/test/resources/sandbox/logical-target/example-2-subjectMap/mapping.ttl b/src/test/resources/sandbox/logical-target/example-2-subjectMap/mapping.ttl index 39e53169..67fbb55f 100644 --- a/src/test/resources/sandbox/logical-target/example-2-subjectMap/mapping.ttl +++ b/src/test/resources/sandbox/logical-target/example-2-subjectMap/mapping.ttl @@ -16,7 +16,7 @@ . <#VoIDDump> a void:Dataset ; - void:dataDump ; + void:dataDump ; . <#LogicalTarget1> a rmlt:LogicalTarget; From f1b8c087bfdb639aa88515e8c53ac161edb37f51 Mon Sep 17 00:00:00 2001 From: Gerald Haesendonck Date: Fri, 9 Apr 2021 11:45:26 +0200 Subject: [PATCH 34/83] Check on subject happens earlier. --- .../statement/SubjectGeneratorAssembler.scala | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/src/main/scala/io/rml/framework/engine/statement/SubjectGeneratorAssembler.scala b/src/main/scala/io/rml/framework/engine/statement/SubjectGeneratorAssembler.scala index 9ccb0296..f4bfaaca 100644 --- a/src/main/scala/io/rml/framework/engine/statement/SubjectGeneratorAssembler.scala +++ b/src/main/scala/io/rml/framework/engine/statement/SubjectGeneratorAssembler.scala @@ -28,8 +28,6 @@ package io.rml.framework.engine.statement import io.rml.framework.core.function.FunctionUtils import io.rml.framework.core.item.Item import io.rml.framework.core.model.{TermMap, TermNode, Uri} -import io.rml.framework.core.vocabulary.R2RMLVoc -import io.rml.framework.shared.TermTypeException class SubjectGeneratorAssembler extends TermMapGeneratorAssembler { @@ -51,17 +49,6 @@ class SubjectGeneratorAssembler extends TermMapGeneratorAssembler { }) }else { - /** - * Tried implementing literal check in subject map extractor but it was assumed - * that the extractor would just extract subject maps even if it is typed to be literal. - * - * Maybe move this check to subject map extractor for early checking during the reading process? - */ - termMap.termType.get.toString match { - case R2RMLVoc.Class.LITERAL => throw new TermTypeException("Subject cannot be of type Literal!") - case _ => - - } super.assemble(termMap).asInstanceOf[(Item) => Option[Iterable[TermNode]]] } } From a38bfe6992e188a7f0c55fa878a194e82cfb0427 Mon Sep 17 00:00:00 2001 From: Michiel Derveeuw Date: Mon, 19 Apr 2021 14:30:52 +0200 Subject: [PATCH 35/83] Update documentation and docker flink version --- README.md | 6 +++--- docker/docker-compose.yml | 4 ++-- documentation/README_Functions.md | 10 +++++----- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/README.md b/README.md index 2f7cde90..c4692351 100644 --- a/README.md +++ b/README.md @@ -15,9 +15,9 @@ If you want to deploy it yourself, read on. ### Installing Flink RMLStreamer runs its jobs on Flink clusters. -More information on how to install Flink and getting started can be found [here](https://ci.apache.org/projects/flink/flink-docs-release-1.11/try-flink/local_installation.html). +More information on how to install Flink and getting started can be found [here](https://ci.apache.org/projects/flink/flink-docs-release-1.12/try-flink/local_installation.html). At least a local cluster must be running in order to start executing RML Mappings with RMLStreamer. -Please note that this version works with Flink 1.11.3 with Scala 2.11 support, which can be downloaded [here](https://archive.apache.org/dist/flink/flink-1.11.3/flink-1.11.3-bin-scala_2.11.tgz). +Please note that this version works with Flink 1.12.2 with Scala 2.11 support, which can be downloaded [here](https://archive.apache.org/dist/flink/flink-1.12.2/flink-1.12.2-bin-scala_2.11.tgz). ### Building RMLStreamer @@ -46,7 +46,7 @@ The resulting `RMLStreamer-.jar`, found in the `target` folder, can be ### Executing RML Mappings Here we give examples for running RMLStreamer from the command line. We use `FLINK_BIN` to denote the Flink CLI tool, -usually found in the `bin` directory of the Flink installation. E.g. `/home/myuser/flink-1.11.3/bin/flink`. +usually found in the `bin` directory of the Flink installation. E.g. `/home/myuser/flink-1.12.2/bin/flink`. For Windows a `flink.bat` script is provided. The general usage is: diff --git a/docker/docker-compose.yml b/docker/docker-compose.yml index fb61a26c..5e0d9d3c 100644 --- a/docker/docker-compose.yml +++ b/docker/docker-compose.yml @@ -2,7 +2,7 @@ version: '3' services: jobmanager: - image: flink:1.11.3-scala_2.11 + image: flink:1.12.2-scala_2.11 expose: - "6123" ports: @@ -14,7 +14,7 @@ services: - data:/mnt/data taskmanager: - image: flink:1.11.3-scala_2.11 + image: flink:1.12.2-scala_2.11 expose: - "6121" - "6122" diff --git a/documentation/README_Functions.md b/documentation/README_Functions.md index d53ac3bf..7a2e75d3 100644 --- a/documentation/README_Functions.md +++ b/documentation/README_Functions.md @@ -15,13 +15,13 @@ These files can be obtained from `src/main/resources`: ## Example: RML Streamer + Flink Flink's `lib` directory should contain the jar-files with the custom functions. In this example, these are marked with `*` ``` -flink-1.11.2-scala_2.11 +flink-1.12.2-scala_2.11 └── lib ├── GrelFunctions.jar * ├── IDLabFunctions.jar * - ├── flink-dist_2.11-1.11.2.jar - ├── flink-table-blink_2.11-1.11.2.jar - ├── flink-table_2.11-1.11.2.jar + ├── flink-dist_2.11-1.12.2.jar + ├── flink-table-blink_2.11-1.12.2.jar + ├── flink-table_2.11-1.12.2.jar ├── log4j-1.2.17.jar └── slf4j-log4j12-1.7.15.jar ``` @@ -40,7 +40,7 @@ Note that the function descriptions and function mappings are present. The command for running the RML Streamer on Flink should look like ``` -~/flink/flink-1.11.2-scala_2.11/bin/flink run -c io.rml.framework.Main RMLStreamer-2.0.1-SNAPSHOT.jar toFile --output-path $(pwd)'/out.ttl' -m mapping.ttl +~/flink/flink-1.12.2-scala_2.11/bin/flink run -c io.rml.framework.Main RMLStreamer-2.0.1-SNAPSHOT.jar toFile --output-path $(pwd)'/out.ttl' -m mapping.ttl ``` ## Test Cases From 46ebe51c2dca42421b54b878575c58937998c927 Mon Sep 17 00:00:00 2001 From: Michiel Derveeuw Date: Mon, 19 Apr 2021 12:38:44 +0000 Subject: [PATCH 36/83] Add comment documenting classloader fix --- .../io/rml/framework/util/fileprocessing/TestFilesUtil.scala | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/test/scala/io/rml/framework/util/fileprocessing/TestFilesUtil.scala b/src/test/scala/io/rml/framework/util/fileprocessing/TestFilesUtil.scala index 9f2c6509..c093f809 100644 --- a/src/test/scala/io/rml/framework/util/fileprocessing/TestFilesUtil.scala +++ b/src/test/scala/io/rml/framework/util/fileprocessing/TestFilesUtil.scala @@ -75,6 +75,8 @@ trait TestFilesUtil[R] { def test(rootDir: String, shouldPass: Boolean, checkFunc: (String, Boolean) => Unit): Unit = { var checkedTestCases = Array("") for (pathString <- getTestCaseFolders(rootDir).map(_.toString).sorted) { + // clear the loaded classes, this prevents an Exception that would occur when using classes + // from an unloaded class loader FnOEnvironment.loadedClassesMap.clear() checkFunc(pathString, shouldPass) val testCase = new File(pathString).getName From 93c59979566f0a13c8c31bd234e1d3852c944740 Mon Sep 17 00:00:00 2001 From: Michiel Derveeuw Date: Mon, 19 Apr 2021 12:40:36 +0000 Subject: [PATCH 37/83] Add comment documenting classloader fix --- src/test/scala/io/rml/framework/TCPStreamTestSync.scala | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/test/scala/io/rml/framework/TCPStreamTestSync.scala b/src/test/scala/io/rml/framework/TCPStreamTestSync.scala index bcf1b3c6..8f1cb01f 100644 --- a/src/test/scala/io/rml/framework/TCPStreamTestSync.scala +++ b/src/test/scala/io/rml/framework/TCPStreamTestSync.scala @@ -80,6 +80,8 @@ class TCPStreamTestSync extends StreamTestSync { } override def beforeTestCase(): Unit = { + // clear the loaded classes, this prevents an Exception that would occur when using classes + // from an unloaded class loader FnOEnvironment.loadedClassesMap.clear() } From a85d34cfb29963493b66a66547bf85fdb6748a24 Mon Sep 17 00:00:00 2001 From: Michiel Derveeuw Date: Mon, 19 Apr 2021 12:42:39 +0000 Subject: [PATCH 38/83] Add comment documenting classloader fix --- src/test/scala/io/rml/framework/StreamTestSync.scala | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/test/scala/io/rml/framework/StreamTestSync.scala b/src/test/scala/io/rml/framework/StreamTestSync.scala index 8bb5b2bf..32e5f863 100644 --- a/src/test/scala/io/rml/framework/StreamTestSync.scala +++ b/src/test/scala/io/rml/framework/StreamTestSync.scala @@ -74,6 +74,8 @@ abstract class StreamTestSync extends StaticTestSpec with ReadMappingBehaviour w // Things to do before running one test case protected def beforeTestCase(): Unit = { + // clear the loaded classes, this prevents an Exception that would occur when using classes + // from an unloaded class loader FnOEnvironment.loadedClassesMap.clear() } From edc18a30412780b992663ff1a6edc0b63dea5fa6 Mon Sep 17 00:00:00 2001 From: Michiel Derveeuw Date: Mon, 19 Apr 2021 12:43:57 +0000 Subject: [PATCH 39/83] Add comment documenting classloader fix --- src/test/scala/io/rml/framework/SandboxTests.scala | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/test/scala/io/rml/framework/SandboxTests.scala b/src/test/scala/io/rml/framework/SandboxTests.scala index c869ea3f..19bb889b 100644 --- a/src/test/scala/io/rml/framework/SandboxTests.scala +++ b/src/test/scala/io/rml/framework/SandboxTests.scala @@ -42,6 +42,8 @@ class SandboxTests extends FunSuite with Matchers with FunctionMappingTest { private def executeTest(mappingFile: String): Unit = { TriplesMapsCache.clear(); + // clear the loaded classes, this prevents an Exception that would occur when using classes + // from an unloaded class loader FnOEnvironment.loadedClassesMap.clear() RMLEnvironment.setGeneratorBaseIRI(Some("http://example.org/base/")) implicit val env = ExecutionEnvironment.getExecutionEnvironment From f29aaa1cce91da088adecc88a4f5bcec4aec9f3e Mon Sep 17 00:00:00 2001 From: Michiel Derveeuw Date: Wed, 24 Mar 2021 16:04:58 +0100 Subject: [PATCH 40/83] Create basic MQTTSink --- .../framework/flink/sink/RichMQTTSink.scala | 31 +++++++++++++++++++ 1 file changed, 31 insertions(+) create mode 100644 src/main/scala/io/rml/framework/flink/sink/RichMQTTSink.scala diff --git a/src/main/scala/io/rml/framework/flink/sink/RichMQTTSink.scala b/src/main/scala/io/rml/framework/flink/sink/RichMQTTSink.scala new file mode 100644 index 00000000..baf5931e --- /dev/null +++ b/src/main/scala/io/rml/framework/flink/sink/RichMQTTSink.scala @@ -0,0 +1,31 @@ +package io.rml.framework.flink.sink + +import org.apache.flink.configuration.Configuration +import org.apache.flink.streaming.api.functions.sink.{RichSinkFunction, SinkFunction} +import org.eclipse.paho.client.mqttv3.{MqttClient, MqttConnectOptions} +import org.eclipse.paho.client.mqttv3.persist.MemoryPersistence + +class RichMQTTSink(val url: String, val topic: String) extends RichSinkFunction[String] { + + private var client: MqttClient = _ + + override def open(parameters: Configuration): Unit = { + super.open(parameters) + + client = new MqttClient(url, MqttClient.generateClientId(), new MemoryPersistence) + val connectionOptions = new MqttConnectOptions + connectionOptions.setAutomaticReconnect(true) + connectionOptions.setCleanSession(false) + + client.connect(connectionOptions) + } + + override def invoke(value: String, context: SinkFunction.Context[_]): Unit = { + val payload = value.getBytes() + client.publish(topic, payload, 2, false) + } + + override def close(): Unit = { + client.disconnect() + } +} From 214ae51d28371ca0d642484bafe7a5a4b1c752ac Mon Sep 17 00:00:00 2001 From: Michiel Derveeuw Date: Mon, 29 Mar 2021 15:13:13 +0200 Subject: [PATCH 41/83] Add MQTT sink to parameters and main --- src/main/scala/io/rml/framework/Main.scala | 5 +++++ .../rml/framework/core/util/ParameterUtil.scala | 15 ++++++++++++++- .../rml/framework/flink/sink/RichMQTTSink.scala | 11 +++++++++-- 3 files changed, 28 insertions(+), 3 deletions(-) diff --git a/src/main/scala/io/rml/framework/Main.scala b/src/main/scala/io/rml/framework/Main.scala index 8db7d949..da00117d 100644 --- a/src/main/scala/io/rml/framework/Main.scala +++ b/src/main/scala/io/rml/framework/Main.scala @@ -52,6 +52,7 @@ import org.apache.flink.util.Collector import java.util.Properties import scala.collection.{immutable, mutable} +import io.rml.framework.flink.sink.RichMQTTSink /** * @@ -176,6 +177,10 @@ object Main extends Logging { .build() stream.addSink(sink).name("Streaming file sink") } + else if (config.outputSink.equals(OutputSinkOption.MQTT)) { + val sink = new RichMQTTSink(config.broker.get, config.topic.get) + stream.addSink(sink) + } // discard output if the parameter is given else if (config.outputSink.equals(OutputSinkOption.None)) { stream.addSink(output => {}).name("No output sink") diff --git a/src/main/scala/io/rml/framework/core/util/ParameterUtil.scala b/src/main/scala/io/rml/framework/core/util/ParameterUtil.scala index 466368a4..93713225 100644 --- a/src/main/scala/io/rml/framework/core/util/ParameterUtil.scala +++ b/src/main/scala/io/rml/framework/core/util/ParameterUtil.scala @@ -38,6 +38,7 @@ object ParameterUtil { checkpointInterval: Option[Long] = None, outputPath: Option[String] = None, brokerList: Option[String] = None, + broker: Option[String] = None, topic: Option[String] = None, partitionId: Option[Int] = None, socket: Option[String] = None, @@ -65,7 +66,7 @@ object ParameterUtil { // possible output sink options object OutputSinkOption extends Enumeration { type OutputSinkOption = Value - val File, Socket, Kafka, None = Value + val File, Socket, Kafka, MQTT, None = Value } // possible post processor options @@ -159,6 +160,18 @@ object ParameterUtil { .text("The TCP socket to write to.") ) + cmd("toMQTT") + .text("Write output to an MQTT topic") + .action((_, config) => config.copy(outputSink = OutputSinkOption.MQTT)) + .children( + opt[String]('b', "broker").valueName("").required() + .action((value, config) => config.copy(broker = Some(value))) + .text("The MQTT broker."), + opt[String]('t', "topic").valueName("").required() + .action((value, config) => config.copy(topic = Some(value))) + .text("The name of the MQTT topic to write output to.") + ) + cmd("noOutput") .text("Do everything, but discard output") .action((_, config) => config.copy(outputSink = OutputSinkOption.None)) diff --git a/src/main/scala/io/rml/framework/flink/sink/RichMQTTSink.scala b/src/main/scala/io/rml/framework/flink/sink/RichMQTTSink.scala index baf5931e..3012eead 100644 --- a/src/main/scala/io/rml/framework/flink/sink/RichMQTTSink.scala +++ b/src/main/scala/io/rml/framework/flink/sink/RichMQTTSink.scala @@ -2,17 +2,24 @@ package io.rml.framework.flink.sink import org.apache.flink.configuration.Configuration import org.apache.flink.streaming.api.functions.sink.{RichSinkFunction, SinkFunction} +import org.apache.http.client.utils.URIBuilder import org.eclipse.paho.client.mqttv3.{MqttClient, MqttConnectOptions} import org.eclipse.paho.client.mqttv3.persist.MemoryPersistence -class RichMQTTSink(val url: String, val topic: String) extends RichSinkFunction[String] { +import java.net.URI + +class RichMQTTSink(val broker: String, val topic: String) extends RichSinkFunction[String] { private var client: MqttClient = _ override def open(parameters: Configuration): Unit = { super.open(parameters) - client = new MqttClient(url, MqttClient.generateClientId(), new MemoryPersistence) + val uriBuilder = new URIBuilder() + uriBuilder.setHost(broker) + uriBuilder.setScheme("tcp") + + client = new MqttClient(uriBuilder.build.toString, MqttClient.generateClientId(), new MemoryPersistence) val connectionOptions = new MqttConnectOptions connectionOptions.setAutomaticReconnect(true) connectionOptions.setCleanSession(false) From a47cc4830fef18c21f3a26e3c1af261b077b2cde Mon Sep 17 00:00:00 2001 From: Michiel Derveeuw Date: Mon, 19 Apr 2021 14:48:13 +0200 Subject: [PATCH 42/83] Use UTF8 charset --- src/main/scala/io/rml/framework/flink/sink/RichMQTTSink.scala | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/main/scala/io/rml/framework/flink/sink/RichMQTTSink.scala b/src/main/scala/io/rml/framework/flink/sink/RichMQTTSink.scala index 3012eead..90b2349f 100644 --- a/src/main/scala/io/rml/framework/flink/sink/RichMQTTSink.scala +++ b/src/main/scala/io/rml/framework/flink/sink/RichMQTTSink.scala @@ -7,6 +7,7 @@ import org.eclipse.paho.client.mqttv3.{MqttClient, MqttConnectOptions} import org.eclipse.paho.client.mqttv3.persist.MemoryPersistence import java.net.URI +import java.nio.charset.StandardCharsets class RichMQTTSink(val broker: String, val topic: String) extends RichSinkFunction[String] { @@ -28,7 +29,7 @@ class RichMQTTSink(val broker: String, val topic: String) extends RichSinkFuncti } override def invoke(value: String, context: SinkFunction.Context[_]): Unit = { - val payload = value.getBytes() + val payload = value.getBytes(StandardCharsets.UTF_8) client.publish(topic, payload, 2, false) } From 31b1be637f1e7dfccb8e8fed791d32d3940dad55 Mon Sep 17 00:00:00 2001 From: Michiel Derveeuw Date: Mon, 19 Apr 2021 16:09:26 +0200 Subject: [PATCH 43/83] Remove beforeTestCase method --- .../io/rml/framework/KafkaStreamTestSyncFnO.scala | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/src/test/scala/io/rml/framework/KafkaStreamTestSyncFnO.scala b/src/test/scala/io/rml/framework/KafkaStreamTestSyncFnO.scala index dddb9d84..2ec7124a 100644 --- a/src/test/scala/io/rml/framework/KafkaStreamTestSyncFnO.scala +++ b/src/test/scala/io/rml/framework/KafkaStreamTestSyncFnO.scala @@ -76,21 +76,6 @@ class KafkaStreamTestSyncFnO extends StreamTestSync{ logInfo("Creating Kafka client created.") } - -// override def beforeTestCase(): Unit = { - // topics seem to be created automatically... - - /*logInfo("Creating Kafka input topic...") - val topicName = "demo" - val numPartitions = 1 - val replicationFactor = 1 - val topic = new NewTopic(topicName, numPartitions, replicationFactor.toShort) - val createTopicsResult = admin.createTopics(ArrayBuffer(topic).asJava) - createTopicsResult.all().get() // wait for completion of creating topics - Thread.sleep(2000) - logInfo("Creating Kafka input topic done.") */ -// } - override def afterTestCase(): Unit = { logInfo("Deleting Kafka input topic(s)...") val topics = admin.listTopics().names().get() From b156536300bc50504d0764b48cf0e44799d1e430 Mon Sep 17 00:00:00 2001 From: Gerald Haesendonck Date: Thu, 22 Apr 2021 13:34:38 +0200 Subject: [PATCH 44/83] Added logical targets up to SerializableRDFQuad level. Next: implement actual routing of output to targets --- .../extractors/LogicalTargetExtractor.scala | 2 +- .../std/StdLogicalTargetExtractor.scala | 4 +- .../std/StdObjectMapExtractor.scala | 4 +- .../std/StdPredicateMapExtractor.scala | 4 +- .../extractors/std/TermMapExtractor.scala | 2 +- .../framework/core/model/FunctionMap.scala | 2 +- .../rml/framework/core/model/GraphMap.scala | 2 +- .../framework/core/model/LogicalTarget.scala | 2 +- .../rml/framework/core/model/ObjectMap.scala | 2 +- .../framework/core/model/PredicateMap.scala | 2 +- .../core/model/PredicateObjectMap.scala | 8 ++++ .../rml/framework/core/model/SubjectMap.scala | 12 +++++- .../io/rml/framework/core/model/TermMap.scala | 7 ++- .../core/model/rdf/SerializableRDF.scala | 3 +- .../core/model/std/StdGraphMap.scala | 2 +- .../core/model/std/StdObjectMap.scala | 2 +- .../core/model/std/StdPredicateMap.scala | 2 +- .../core/model/std/StdSubjectMap.scala | 2 +- .../FunctionMapGeneratorAssembler.scala | 16 +++---- .../statement/GraphGeneratorAssembler.scala | 14 +++--- .../statement/ObjectGeneratorAssembler.scala | 10 ++--- .../PredicateGeneratorAssembler.scala | 7 +-- .../PredicateObjectGeneratorAssembler.scala | 21 ++++++--- .../engine/statement/Statement.scala | 25 ++++++----- .../statement/StatementsAssembler.scala | 43 ++++++++++++------- .../statement/SubjectGeneratorAssembler.scala | 7 +-- .../statement/TermMapGeneratorAssembler.scala | 2 +- 27 files changed, 129 insertions(+), 80 deletions(-) diff --git a/src/main/scala/io/rml/framework/core/extractors/LogicalTargetExtractor.scala b/src/main/scala/io/rml/framework/core/extractors/LogicalTargetExtractor.scala index 66dca620..5148f26b 100644 --- a/src/main/scala/io/rml/framework/core/extractors/LogicalTargetExtractor.scala +++ b/src/main/scala/io/rml/framework/core/extractors/LogicalTargetExtractor.scala @@ -27,7 +27,7 @@ import io.rml.framework.core.model.LogicalTarget * THE SOFTWARE. * * */ -trait LogicalTargetExtractor extends ResourceExtractor[List[LogicalTarget]] +trait LogicalTargetExtractor extends ResourceExtractor[Set[LogicalTarget]] object LogicalTargetExtractor { diff --git a/src/main/scala/io/rml/framework/core/extractors/std/StdLogicalTargetExtractor.scala b/src/main/scala/io/rml/framework/core/extractors/std/StdLogicalTargetExtractor.scala index 142cccd5..cd921920 100644 --- a/src/main/scala/io/rml/framework/core/extractors/std/StdLogicalTargetExtractor.scala +++ b/src/main/scala/io/rml/framework/core/extractors/std/StdLogicalTargetExtractor.scala @@ -40,7 +40,7 @@ class StdLogicalTargetExtractor(dataTargetExtractor: DataTargetExtractor) extend * @param node Node to extract from. * @return */ - override def extract(node: RDFResource): List[LogicalTarget] = { + override def extract(node: RDFResource): Set[LogicalTarget] = { logDebug(s"Extracting logical target: ${node.uri}") var result = new ListBuffer[LogicalTarget] @@ -55,7 +55,7 @@ class StdLogicalTargetExtractor(dataTargetExtractor: DataTargetExtractor) extend } }) - result.toList + result.toSet } private def extractLogicalTargetProperties(resource: RDFResource): LogicalTarget = { diff --git a/src/main/scala/io/rml/framework/core/extractors/std/StdObjectMapExtractor.scala b/src/main/scala/io/rml/framework/core/extractors/std/StdObjectMapExtractor.scala index 6f87f291..bac0b8bf 100644 --- a/src/main/scala/io/rml/framework/core/extractors/std/StdObjectMapExtractor.scala +++ b/src/main/scala/io/rml/framework/core/extractors/std/StdObjectMapExtractor.scala @@ -56,9 +56,9 @@ class StdObjectMapExtractor extends ObjectMapExtractor { // iterates over predicates, converts these to predicate maps as blanks properties.map { case literal: RDFLiteral => - ObjectMap("", constant = Some(Literal(literal.value)), termType = Some(Uri(R2RMLVoc.Class.LITERAL)), logicalTargets = List()) + ObjectMap("", constant = Some(Literal(literal.value)), termType = Some(Uri(R2RMLVoc.Class.LITERAL)), logicalTargets = Set()) case resource: RDFResource => - ObjectMap("", constant = Some(resource.uri), termType = Some(Uri(R2RMLVoc.Class.IRI)), logicalTargets = List()) + ObjectMap("", constant = Some(resource.uri), termType = Some(Uri(R2RMLVoc.Class.IRI)), logicalTargets = Set()) } } diff --git a/src/main/scala/io/rml/framework/core/extractors/std/StdPredicateMapExtractor.scala b/src/main/scala/io/rml/framework/core/extractors/std/StdPredicateMapExtractor.scala index 085d4713..910d7964 100644 --- a/src/main/scala/io/rml/framework/core/extractors/std/StdPredicateMapExtractor.scala +++ b/src/main/scala/io/rml/framework/core/extractors/std/StdPredicateMapExtractor.scala @@ -56,9 +56,9 @@ class StdPredicateMapExtractor() extends PredicateMapExtractor { // iterates over predicates, converts these to predicate maps as blanks properties.map { case literal: RDFLiteral => - PredicateMap("", constant = Some(Uri(literal.value)), termType = Some(Uri(R2RMLVoc.Class.IRI)), logicalTargets = List()) + PredicateMap("", constant = Some(Uri(literal.value)), termType = Some(Uri(R2RMLVoc.Class.IRI)), logicalTargets = Set()) case resource: RDFResource => - PredicateMap("", constant = Some(resource.uri), termType = Some(Uri(R2RMLVoc.Class.IRI)), logicalTargets = List()) + PredicateMap("", constant = Some(resource.uri), termType = Some(Uri(R2RMLVoc.Class.IRI)), logicalTargets = Set()) } } diff --git a/src/main/scala/io/rml/framework/core/extractors/std/TermMapExtractor.scala b/src/main/scala/io/rml/framework/core/extractors/std/TermMapExtractor.scala index d6812bef..f0467356 100644 --- a/src/main/scala/io/rml/framework/core/extractors/std/TermMapExtractor.scala +++ b/src/main/scala/io/rml/framework/core/extractors/std/TermMapExtractor.scala @@ -41,7 +41,7 @@ import scala.util.matching.Regex abstract class TermMapExtractor[T] extends ResourceExtractor[T] { lazy private val logicalTargetExtractor: LogicalTargetExtractor = LogicalTargetExtractor() - protected def extractLogicalTargets(node: RDFResource): List[LogicalTarget] = { + protected def extractLogicalTargets(node: RDFResource): Set[LogicalTarget] = { logicalTargetExtractor.extract(node) } diff --git a/src/main/scala/io/rml/framework/core/model/FunctionMap.scala b/src/main/scala/io/rml/framework/core/model/FunctionMap.scala index b49eedad..13c841c1 100644 --- a/src/main/scala/io/rml/framework/core/model/FunctionMap.scala +++ b/src/main/scala/io/rml/framework/core/model/FunctionMap.scala @@ -24,7 +24,7 @@ **/ package io.rml.framework.core.model -case class FunctionMap(parentMap : String, identifier: String, functionValue: List[PredicateObjectMap], logicalTargets: List[LogicalTarget]) extends TermMap { +case class FunctionMap(parentMap : String, identifier: String, functionValue: List[PredicateObjectMap], logicalTargets: Set[LogicalTarget]) extends TermMap { /** diff --git a/src/main/scala/io/rml/framework/core/model/GraphMap.scala b/src/main/scala/io/rml/framework/core/model/GraphMap.scala index 3df3e56b..9226d982 100644 --- a/src/main/scala/io/rml/framework/core/model/GraphMap.scala +++ b/src/main/scala/io/rml/framework/core/model/GraphMap.scala @@ -46,6 +46,6 @@ object GraphMap { reference: Option[Literal], template: Option[Literal], termType: Option[Uri], - logicalTargets: List[LogicalTarget]): GraphMap = StdGraphMap( + logicalTargets: Set[LogicalTarget]): GraphMap = StdGraphMap( identifier, functionMap, constant, reference, template, logicalTargets) } diff --git a/src/main/scala/io/rml/framework/core/model/LogicalTarget.scala b/src/main/scala/io/rml/framework/core/model/LogicalTarget.scala index 63bd44f3..6b4976f0 100644 --- a/src/main/scala/io/rml/framework/core/model/LogicalTarget.scala +++ b/src/main/scala/io/rml/framework/core/model/LogicalTarget.scala @@ -37,7 +37,7 @@ trait LogicalTarget extends Node { def serialization: Uri override def identifier: String = { - Objects.hash(target.identifier, compression, serialization.identifier).toHexString + Objects.hash(target.identifier, compression.map(uri => uri.identifier).getOrElse(""), serialization.identifier).toHexString } } diff --git a/src/main/scala/io/rml/framework/core/model/ObjectMap.scala b/src/main/scala/io/rml/framework/core/model/ObjectMap.scala index 6e553e67..b6532e23 100644 --- a/src/main/scala/io/rml/framework/core/model/ObjectMap.scala +++ b/src/main/scala/io/rml/framework/core/model/ObjectMap.scala @@ -55,7 +55,7 @@ object ObjectMap { language: Option[Literal] = None, parentTriplesMap: Option[String] = None, joinCondition: Option[JoinCondition] = None, - logicalTargets: List[LogicalTarget]): ObjectMap = + logicalTargets: Set[LogicalTarget]): ObjectMap = StdObjectMap(identifier, functionMap, diff --git a/src/main/scala/io/rml/framework/core/model/PredicateMap.scala b/src/main/scala/io/rml/framework/core/model/PredicateMap.scala index d3329557..36247881 100644 --- a/src/main/scala/io/rml/framework/core/model/PredicateMap.scala +++ b/src/main/scala/io/rml/framework/core/model/PredicateMap.scala @@ -44,7 +44,7 @@ object PredicateMap { reference: Option[Literal] = None, template: Option[Literal] = None, termType: Option[Uri] = None, - logicalTargets: List[LogicalTarget]): PredicateMap = + logicalTargets: Set[LogicalTarget]): PredicateMap = StdPredicateMap(identifier, functionMap, diff --git a/src/main/scala/io/rml/framework/core/model/PredicateObjectMap.scala b/src/main/scala/io/rml/framework/core/model/PredicateObjectMap.scala index f4dc257e..7adf0398 100644 --- a/src/main/scala/io/rml/framework/core/model/PredicateObjectMap.scala +++ b/src/main/scala/io/rml/framework/core/model/PredicateObjectMap.scala @@ -46,6 +46,14 @@ trait PredicateObjectMap extends Node { def predicateMaps: List[PredicateMap] def graphMap: Option[GraphMap] + + def getGraphMapLogicalTargetIds: Set[String] = { + if (graphMap.isDefined) { + graphMap.get.getAllLogicalTargetIds + } else { + Set() + } + } } object PredicateObjectMap { diff --git a/src/main/scala/io/rml/framework/core/model/SubjectMap.scala b/src/main/scala/io/rml/framework/core/model/SubjectMap.scala index 092fef70..e9e7ff18 100644 --- a/src/main/scala/io/rml/framework/core/model/SubjectMap.scala +++ b/src/main/scala/io/rml/framework/core/model/SubjectMap.scala @@ -43,6 +43,16 @@ trait SubjectMap extends TermMap { def `class`: List[Uri] def graphMap: Option[GraphMap] + + override def getAllLogicalTargetIds: Set[String] = { + val graphMaps = if (graphMap.isDefined) { + graphMap.get.getAllLogicalTargetIds + } else { + Set() + } + super.getAllLogicalTargetIds ++ graphMaps + } + } object SubjectMap { @@ -65,7 +75,7 @@ object SubjectMap { template: Option[Literal], termType: Option[Uri], graphMap: Option[GraphMap], - logicalTargets: List[LogicalTarget]): SubjectMap = { + logicalTargets: Set[LogicalTarget]): SubjectMap = { StdSubjectMap(identifier, `class`, functionMap, constant, reference, template, termType, graphMap, logicalTargets) } diff --git a/src/main/scala/io/rml/framework/core/model/TermMap.scala b/src/main/scala/io/rml/framework/core/model/TermMap.scala index cf09f92b..7617c897 100644 --- a/src/main/scala/io/rml/framework/core/model/TermMap.scala +++ b/src/main/scala/io/rml/framework/core/model/TermMap.scala @@ -82,7 +82,12 @@ trait TermMap extends Node { */ def termType: Option[Uri] - def logicalTargets: List[LogicalTarget] + def logicalTargets: Set[LogicalTarget] + + def getAllLogicalTargetIds: Set[String] = { + logicalTargets + .map(logicalTarget => logicalTarget.identifier) + } /** * diff --git a/src/main/scala/io/rml/framework/core/model/rdf/SerializableRDF.scala b/src/main/scala/io/rml/framework/core/model/rdf/SerializableRDF.scala index 0fc6413e..dc4880a8 100644 --- a/src/main/scala/io/rml/framework/core/model/rdf/SerializableRDF.scala +++ b/src/main/scala/io/rml/framework/core/model/rdf/SerializableRDF.scala @@ -58,7 +58,8 @@ case class FlinkRDFLiteral(literal: Literal) extends SerializableRDFNode(literal case class SerializableRDFQuad(subject: SerializableRDFTermNode, predicate: SerializableRDFResource, `object`: SerializableRDFNode, - graph: Option[SerializableRDFResource] = None) + graph: Option[SerializableRDFResource] = None, + logicalTargetIDs: Set[String]) extends Serializable { override def toString: String = { diff --git a/src/main/scala/io/rml/framework/core/model/std/StdGraphMap.scala b/src/main/scala/io/rml/framework/core/model/std/StdGraphMap.scala index 8867b3e0..3c1959ef 100644 --- a/src/main/scala/io/rml/framework/core/model/std/StdGraphMap.scala +++ b/src/main/scala/io/rml/framework/core/model/std/StdGraphMap.scala @@ -32,7 +32,7 @@ case class StdGraphMap(identifier: String, constant: Option[Entity], reference: Option[Literal], template: Option[Literal], - logicalTargets: List[LogicalTarget]) extends GraphMap { + logicalTargets: Set[LogicalTarget]) extends GraphMap { override def termType: Option[Uri] = Some(Uri(R2RMLVoc.Class.IRI)) diff --git a/src/main/scala/io/rml/framework/core/model/std/StdObjectMap.scala b/src/main/scala/io/rml/framework/core/model/std/StdObjectMap.scala index 0a241eb0..31229b5d 100644 --- a/src/main/scala/io/rml/framework/core/model/std/StdObjectMap.scala +++ b/src/main/scala/io/rml/framework/core/model/std/StdObjectMap.scala @@ -36,4 +36,4 @@ case class StdObjectMap(identifier: String, override val language: Option[Literal], parentTriplesMap: Option[String], joinCondition: Option[JoinCondition], - logicalTargets: List[LogicalTarget]) extends ObjectMap + logicalTargets: Set[LogicalTarget]) extends ObjectMap diff --git a/src/main/scala/io/rml/framework/core/model/std/StdPredicateMap.scala b/src/main/scala/io/rml/framework/core/model/std/StdPredicateMap.scala index 89c671cc..fad94add 100644 --- a/src/main/scala/io/rml/framework/core/model/std/StdPredicateMap.scala +++ b/src/main/scala/io/rml/framework/core/model/std/StdPredicateMap.scala @@ -33,4 +33,4 @@ case class StdPredicateMap(identifier: String, reference: Option[Literal], template: Option[Literal], termType: Option[Uri], - logicalTargets: List[LogicalTarget]) extends PredicateMap + logicalTargets: Set[LogicalTarget]) extends PredicateMap diff --git a/src/main/scala/io/rml/framework/core/model/std/StdSubjectMap.scala b/src/main/scala/io/rml/framework/core/model/std/StdSubjectMap.scala index 52941df7..d682dddf 100644 --- a/src/main/scala/io/rml/framework/core/model/std/StdSubjectMap.scala +++ b/src/main/scala/io/rml/framework/core/model/std/StdSubjectMap.scala @@ -35,5 +35,5 @@ case class StdSubjectMap(identifier: String, template: Option[Literal], termType: Option[Uri], graphMap: Option[GraphMap], - logicalTargets: List[LogicalTarget]) extends SubjectMap + logicalTargets: Set[LogicalTarget]) extends SubjectMap diff --git a/src/main/scala/io/rml/framework/engine/statement/FunctionMapGeneratorAssembler.scala b/src/main/scala/io/rml/framework/engine/statement/FunctionMapGeneratorAssembler.scala index ba6a0ca9..c78040f0 100644 --- a/src/main/scala/io/rml/framework/engine/statement/FunctionMapGeneratorAssembler.scala +++ b/src/main/scala/io/rml/framework/engine/statement/FunctionMapGeneratorAssembler.scala @@ -35,16 +35,16 @@ import io.rml.framework.shared.RMLException case class FunctionMapGeneratorAssembler() extends TermMapGeneratorAssembler { - override def assemble(termMap: TermMap): (Item) => Option[Iterable[Entity]] = { + override def assemble(termMap: TermMap, higherLevelLogicalTargetIDs: Set[String]): (Item) => Option[Iterable[Entity]] = { require(termMap.isInstanceOf[FunctionMap], "Wrong TermMap instance.") val functionMap = termMap.asInstanceOf[FunctionMap] val pomAssembler = PredicateObjectGeneratorAssembler() val assembledPom = functionMap.functionValue.sortBy(_.identifier) // sortBy required for retaining correct parameter ordering - .flatMap(pomAssembler.assemble) + .flatMap(predicateObjectMap => pomAssembler.assemble(predicateObjectMap, higherLevelLogicalTargetIDs)) .map { - case (predicateGen, objGen, _) => (predicateGen, objGen) + case (predicateGen, objGen, _, logicalTargetIDs) => (predicateGen, objGen, logicalTargetIDs) } val function = parseFunction(assembledPom) @@ -53,7 +53,7 @@ case class FunctionMapGeneratorAssembler() extends TermMapGeneratorAssembler { } private def parseFunction(assembledPom: - List[(Item => Option[Iterable[Uri]], Item => Option[Iterable[Entity]])]): Option[Function] = { + List[(Item => Option[Iterable[Uri]], Item => Option[Iterable[Entity]], Set[String])]): Option[Function] = { this.logDebug("parseFunction (assembledPom)") val placeHolder: List[SerializableRDFQuad] = generateFunctionTriples(new EmptyItem(), assembledPom) @@ -87,7 +87,7 @@ case class FunctionMapGeneratorAssembler() extends TermMapGeneratorAssembler { * @param assembledPom List of predicate object generator functions * @return anon function taking in [[Item]] and returns entities using the function */ - private def createAssemblerFunction(function: Option[Function], assembledPom: List[(Item => Option[Iterable[Uri]], Item => Option[Iterable[Entity]])]): Item => Option[Iterable[Entity]] = { + private def createAssemblerFunction(function: Option[Function], assembledPom: List[(Item => Option[Iterable[Uri]], Item => Option[Iterable[Entity]], Set[String])]): Item => Option[Iterable[Entity]] = { (item: Item) => { val triples: List[SerializableRDFQuad] = generateFunctionTriples(item, assembledPom) val paramTriples = triples.filter(triple => triple.predicate.uri != Uri(FunVoc.FnO.Property.EXECUTES)) @@ -108,16 +108,16 @@ case class FunctionMapGeneratorAssembler() extends TermMapGeneratorAssembler { * @param item * @return */ - private def generateFunctionTriples(item: Item, assembledPom: List[(Item => Option[Iterable[Uri]], Item => Option[Iterable[Entity]])]): List[SerializableRDFQuad] = { + private def generateFunctionTriples(item: Item, assembledPom: List[(Item => Option[Iterable[Uri]], Item => Option[Iterable[Entity]], Set[String])]): List[SerializableRDFQuad] = { val result = for{ - (predicateGen, objGen) <- assembledPom + (predicateGen, objGen, logicalTargetIDs) <- assembledPom predicateIter <- predicateGen(item) objIter <- objGen(item) } yield for { predicate <- predicateIter obj <- objIter - quad <- Statement.generateQuad(Blank(), predicate, obj) + quad <- Statement.generateQuad(Blank(), predicate, obj, logicalTargetIDs) } yield quad diff --git a/src/main/scala/io/rml/framework/engine/statement/GraphGeneratorAssembler.scala b/src/main/scala/io/rml/framework/engine/statement/GraphGeneratorAssembler.scala index 8bb2aaea..fbd5c420 100644 --- a/src/main/scala/io/rml/framework/engine/statement/GraphGeneratorAssembler.scala +++ b/src/main/scala/io/rml/framework/engine/statement/GraphGeneratorAssembler.scala @@ -29,27 +29,27 @@ import io.rml.framework.core.model.{GraphMap, TermMap, Uri} class GraphGeneratorAssembler extends TermMapGeneratorAssembler { - def assemble(graphMapOpt: Option[GraphMap]): Item => Option[Iterable[Uri]] = { + def assemble(graphMapOpt: Option[GraphMap], higherLevelLogicalTargetIDs: Set[String]): Item => Option[Iterable[Uri]] = { graphMapOpt match { case None => Item => None - case Some(map) => assemble(map) + case Some(map) => assemble(map, higherLevelLogicalTargetIDs) } } - override def assemble(termMap: TermMap): Item => Option[Iterable[Uri]] = + override def assemble(termMap: TermMap, higherLevelLogicalTargetIDs: Set[String]): Item => Option[Iterable[Uri]] = { if (termMap.hasFunctionMap) { - val assembled = FunctionMapGeneratorAssembler().assemble(termMap.functionMap.head) + val logicalTargetIDs = termMap.getAllLogicalTargetIds + val assembled = FunctionMapGeneratorAssembler().assemble(termMap.functionMap.head, higherLevelLogicalTargetIDs ++ logicalTargetIDs) assembled.andThen(result => { result.map(iter => iter.map(elem => Uri(elem.value))) }) } else { - super.assemble(termMap).asInstanceOf[(Item) => Option[Iterable[Uri]]] + super.assemble(termMap, Set()).asInstanceOf[(Item) => Option[Iterable[Uri]]] } - - + } } object GraphGeneratorAssembler { diff --git a/src/main/scala/io/rml/framework/engine/statement/ObjectGeneratorAssembler.scala b/src/main/scala/io/rml/framework/engine/statement/ObjectGeneratorAssembler.scala index 5104af24..7dea21e4 100644 --- a/src/main/scala/io/rml/framework/engine/statement/ObjectGeneratorAssembler.scala +++ b/src/main/scala/io/rml/framework/engine/statement/ObjectGeneratorAssembler.scala @@ -32,13 +32,13 @@ import io.rml.framework.core.vocabulary.R2RMLVoc class ObjectGeneratorAssembler extends TermMapGeneratorAssembler { - def assemble(objectMap: ObjectMap): (Item) => Option[Iterable[Entity]] = { - + def assemble(objectMap: ObjectMap, higherLevelLogicalTargetIDs: Set[String]): (Item) => Option[Iterable[Entity]] = { + val logicalTargetIDs = higherLevelLogicalTargetIDs ++ objectMap.getAllLogicalTargetIds // check if it has a parent triple map if (objectMap.parentTriplesMap.isDefined) { - super.assemble(TriplesMapsCache.get(objectMap.parentTriplesMap.get).get.subjectMap) + super.assemble(TriplesMapsCache.get(objectMap.parentTriplesMap.get).get.subjectMap, logicalTargetIDs) } else if (objectMap.hasFunctionMap) { - val assembledFunction = FunctionMapGeneratorAssembler().assemble(objectMap.functionMap.head) + val assembledFunction = FunctionMapGeneratorAssembler().assemble(objectMap.functionMap.head, logicalTargetIDs) val termTypeString = objectMap.termType.map(_.toString).getOrElse("") assembledFunction.andThen(item => { if (item.isDefined) { @@ -55,7 +55,7 @@ class ObjectGeneratorAssembler extends TermMapGeneratorAssembler { }) } else { - super.assemble(objectMap) + super.assemble(objectMap, Set()) } } diff --git a/src/main/scala/io/rml/framework/engine/statement/PredicateGeneratorAssembler.scala b/src/main/scala/io/rml/framework/engine/statement/PredicateGeneratorAssembler.scala index 5aa1fe7b..c6f2dcd4 100644 --- a/src/main/scala/io/rml/framework/engine/statement/PredicateGeneratorAssembler.scala +++ b/src/main/scala/io/rml/framework/engine/statement/PredicateGeneratorAssembler.scala @@ -31,12 +31,13 @@ import io.rml.framework.core.model.{TermMap, Uri} class PredicateGeneratorAssembler extends TermMapGeneratorAssembler { - override def assemble(termMap: TermMap): (Item) => Option[Iterable[Uri]] = { + override def assemble(termMap: TermMap, higherLogicalTargetIDs: Set[String]): (Item) => Option[Iterable[Uri]] = { // Note: this code is very redundant to ObjectGeneratorAssembler. TODO: generalize? if(termMap.hasFunctionMap){ + val logicalTargetIDs = termMap.getAllLogicalTargetIds ++ higherLogicalTargetIDs val fmap = termMap.functionMap.head - val assembledFunction = FunctionMapGeneratorAssembler().assemble(fmap) + val assembledFunction = FunctionMapGeneratorAssembler().assemble(fmap, logicalTargetIDs) assembledFunction.andThen(item => { if(item.isDefined) { item.map(iter => iter.flatMap(elem => { @@ -49,7 +50,7 @@ class PredicateGeneratorAssembler extends TermMapGeneratorAssembler { }) } else { - super.assemble(termMap).asInstanceOf[(Item) => Option[Iterable[Uri]]] + super.assemble(termMap, Set()).asInstanceOf[(Item) => Option[Iterable[Uri]]] } } diff --git a/src/main/scala/io/rml/framework/engine/statement/PredicateObjectGeneratorAssembler.scala b/src/main/scala/io/rml/framework/engine/statement/PredicateObjectGeneratorAssembler.scala index e39d7a68..36f0f00f 100644 --- a/src/main/scala/io/rml/framework/engine/statement/PredicateObjectGeneratorAssembler.scala +++ b/src/main/scala/io/rml/framework/engine/statement/PredicateObjectGeneratorAssembler.scala @@ -32,17 +32,26 @@ class PredicateObjectGeneratorAssembler(predicateGeneratorAssembler: PredicateGe objectGeneratorAssembler: ObjectGeneratorAssembler, graphGeneratorAssembler: GraphGeneratorAssembler) extends Logging{ - def assemble(predicateObjectMap: PredicateObjectMap) - : List[(Item => Option[Iterable[Uri]], Item => Option[Iterable[Entity]], Item => Option[Iterable[Uri]])] = { + def assemble(predicateObjectMap: PredicateObjectMap, higherLevelLogicalTargetIDs: Set[String]) + : List[(Item => Option[Iterable[Uri]], Item => Option[Iterable[Entity]], Item => Option[Iterable[Uri]], Set[String])] = { this.logDebug("assemble (predicateObjectMap)") + val graphLogicalTargetIDs = if (predicateObjectMap.graphMap.isDefined) { + predicateObjectMap.graphMap.get.getAllLogicalTargetIds + } else { + Set() + } - val graphStatement = graphGeneratorAssembler.assemble(predicateObjectMap.graphMap) + val graphStatement = graphGeneratorAssembler.assemble(predicateObjectMap.graphMap, higherLevelLogicalTargetIDs ++ graphLogicalTargetIDs) predicateObjectMap.predicateMaps.flatMap(predicateMap => { + val predicateLogicalTargetIDs = predicateMap.getAllLogicalTargetIds predicateObjectMap.objectMaps.map(objectMap => { - (predicateGeneratorAssembler.assemble(predicateMap), - objectGeneratorAssembler.assemble(objectMap), - graphStatement) + val objectLogicalTargetIDs = objectMap.getAllLogicalTargetIds + val allLogicalTargetIDs = higherLevelLogicalTargetIDs ++ predicateLogicalTargetIDs ++ objectLogicalTargetIDs ++ graphLogicalTargetIDs + (predicateGeneratorAssembler.assemble(predicateMap, allLogicalTargetIDs), + objectGeneratorAssembler.assemble(objectMap, allLogicalTargetIDs), + graphStatement, + allLogicalTargetIDs) }) }) diff --git a/src/main/scala/io/rml/framework/engine/statement/Statement.scala b/src/main/scala/io/rml/framework/engine/statement/Statement.scala index f2776bc3..f6552e57 100644 --- a/src/main/scala/io/rml/framework/engine/statement/Statement.scala +++ b/src/main/scala/io/rml/framework/engine/statement/Statement.scala @@ -49,7 +49,7 @@ abstract class Statement[T] { def process(item: T): Option[Iterable[SerializableRDFQuad]] - def subProcess[S <: Item] (graphItem:S, subjItem:S, predItem: S, objectItem: S): Option[Iterable[SerializableRDFQuad]] = { + def subProcess[S <: Item] (graphItem:S, subjItem:S, predItem: S, objectItem: S, logicalTargetIDs: Set[String]): Option[Iterable[SerializableRDFQuad]] = { val graphOption = graphGenerator(graphItem) val result = for { @@ -58,9 +58,9 @@ abstract class Statement[T] { _object <- objectGenerator(objectItem ) // try to generate the object } yield for { (subj, pred, obj, graph) <- Statement.quadCombination(subject, predicate, _object, graphOption) - triple <- Statement.generateQuad(subj, pred, obj, graph) + quad <- Statement.generateQuad(subj, pred, obj, logicalTargetIDs, graph) - } yield triple + } yield quad if (result.isEmpty) None else result @@ -71,27 +71,30 @@ abstract class Statement[T] { case class ChildStatement(subjectGenerator: Item => Option[Iterable[TermNode]], predicateGenerator: Item => Option[Iterable[Uri]], objectGenerator: Item => Option[Iterable[Entity]], - graphGenerator: Item => Option[Iterable[Uri]]) extends Statement[JoinedItem] with Serializable { + graphGenerator: Item => Option[Iterable[Uri]], + logicalTargetIDs: Set[String]) extends Statement[JoinedItem] with Serializable { def process(item: JoinedItem): Option[Iterable[SerializableRDFQuad]] = { - subProcess(item.child, item.child, item.child, item.parent) + subProcess(item.child, item.child, item.child, item.parent, logicalTargetIDs) } } case class ParentStatement(subjectGenerator: Item => Option[Iterable[TermNode]], predicateGenerator: Item => Option[Iterable[Uri]], objectGenerator: Item => Option[Iterable[Entity]], - graphGenerator: Item => Option[Iterable[Uri]]) extends Statement[JoinedItem] with Serializable { + graphGenerator: Item => Option[Iterable[Uri]], + logicalTargetIDs: Set[String]) extends Statement[JoinedItem] with Serializable { def process(item: JoinedItem): Option[Iterable[SerializableRDFQuad]] = { - subProcess(item.parent, item.parent, item.parent, item.parent) + subProcess(item.parent, item.parent, item.parent, item.parent, logicalTargetIDs) } } case class StdStatement(subjectGenerator: Item => Option[Iterable[TermNode]], predicateGenerator: Item => Option[Iterable[Uri]], objectGenerator: Item => Option[Iterable[Entity]], - graphGenerator: Item => Option[Iterable[Uri]]) extends Statement[Item] with Serializable { + graphGenerator: Item => Option[Iterable[Uri]], + logicalTargetIDs: Set[String]) extends Statement[Item] with Serializable { /** * Tries to refer a triple from the given item. @@ -101,7 +104,7 @@ case class StdStatement(subjectGenerator: Item => Option[Iterable[TermNode]], */ def process(item: Item): Option[Iterable[SerializableRDFQuad]] = { - subProcess(item,item,item,item) + subProcess(item,item,item,item, logicalTargetIDs) } } @@ -131,7 +134,7 @@ object Statement extends Logging { } - def generateQuad(subject: TermNode, predicate: Uri, _object: Entity, graphOpt: Option[Uri] = None): Option[SerializableRDFQuad] = { + def generateQuad(subject: TermNode, predicate: Uri, _object: Entity, logicalTargetIDs: Set[String], graphOpt: Option[Uri] = None): Option[SerializableRDFQuad] = { val subjectResource = subject match { case blank: Blank => SerializableRDFBlank(blank) @@ -145,7 +148,7 @@ object Statement extends Logging { } val graphUri = graphOpt.map(SerializableRDFResource) - val result = Some(SerializableRDFQuad(subjectResource, predicateResource, objectNode, graphUri)) + val result = Some(SerializableRDFQuad(subjectResource, predicateResource, objectNode, graphUri, logicalTargetIDs)) logDebug(result.get.toString) result } diff --git a/src/main/scala/io/rml/framework/engine/statement/StatementsAssembler.scala b/src/main/scala/io/rml/framework/engine/statement/StatementsAssembler.scala index c4e0c8a4..af88d320 100644 --- a/src/main/scala/io/rml/framework/engine/statement/StatementsAssembler.scala +++ b/src/main/scala/io/rml/framework/engine/statement/StatementsAssembler.scala @@ -44,35 +44,46 @@ extends Logging{ * @param triplesMap * @return */ - def assembleStatements(triplesMap: TriplesMap): List[(Item => Option[Iterable[TermNode]], Item => Option[Iterable[Uri]], Item => Option[Iterable[Entity]], Item => Option[Iterable[Uri]])] = { + def assembleStatements(triplesMap: TriplesMap): List[(Item => Option[Iterable[TermNode]], Item => Option[Iterable[Uri]], Item => Option[Iterable[Entity]], Item => Option[Iterable[Uri]], Set[String])] = { this.logDebug("assembleStatements(triplesmaps)") - val subjectGraphGenerator = graphAssembler.assemble(triplesMap.subjectMap.graphMap) + val subjectLogicalTargetIds = triplesMap.subjectMap.getAllLogicalTargetIds // including GraphMap logical target IDs + + val subjectGraphGenerator = graphAssembler.assemble(triplesMap.subjectMap.graphMap, subjectLogicalTargetIds) // assemble subject - val subjectGenerator = subjectAssembler.assemble(triplesMap.subjectMap) + val subjectGenerator = subjectAssembler.assemble(triplesMap.subjectMap, Set()) // there are no higher level logical targets since it's a "top" SubjectMap // check for class mappings (rr:class) - val classMappings = getClassMappingStatements(subjectGenerator, triplesMap.subjectMap.`class`, subjectGraphGenerator) + val classMappingStatements = getClassMappingStatements(subjectGenerator, triplesMap.subjectMap.`class`, subjectGraphGenerator, subjectLogicalTargetIds) // assemble predicate and object - val predicateObjects = triplesMap.predicateObjectMaps.flatMap(predicateObjectMap => { - predicateObjectAssembler.assemble(predicateObjectMap) + val allPredicateObjectsAndLogicalTargetIDs = triplesMap.predicateObjectMaps.map(predicateObjectMap => { + predicateObjectAssembler.assemble(predicateObjectMap, subjectLogicalTargetIds) }) // create the statements - predicateObjects.map(predicateObject => { - val graphGenerator = if(triplesMap.subjectMap.graphMap.isDefined) subjectGraphGenerator else predicateObject._3 - (subjectGenerator, predicateObject._1, predicateObject._2, graphGenerator) - }) ++ classMappings // add class mappings - + val predicateObjectStatements = allPredicateObjectsAndLogicalTargetIDs.flatMap(predicateObjectAndLogicalTargetIDs => { + val statementsPerPredicateObjectMap = predicateObjectAndLogicalTargetIDs.map(predicateObjects => { + val graphGenerator = if(triplesMap.subjectMap.graphMap.isDefined) subjectGraphGenerator else predicateObjects._3 + (subjectGenerator, predicateObjects._1, predicateObjects._2, graphGenerator, predicateObjects._4) + }) + statementsPerPredicateObjectMap + + /*val graphGenerator = if(triplesMap.subjectMap.graphMap.isDefined) subjectGraphGenerator else predicateObjects._3 + (subjectGenerator, predicateObjectAndLogicalTargetIDs._1, predicateObjectAndLogicalTargetIDs._2, graphGenerator, predicateObjectAndLogicalTargetIDs._4)*/ + }) // add class mappings + predicateObjectStatements ++ classMappingStatements } private def getClassMappingStatements(subjectGenerator: (Item) => Option[Iterable[TermNode]], - classes: List[Uri], graphGenerator: Item => Option[Iterable[Uri]]): Seq[(Item => Option[Iterable[TermNode]], Item => Some[List[Uri]], Item => Some[List[Uri]], Item => Option[Iterable[Uri]])] = { + classes: List[Uri], + graphGenerator: Item => Option[Iterable[Uri]], + subjectLogicalTargetIds: Set[String]) + : List[(Item => Option[Iterable[TermNode]], Item => Some[List[Uri]], Item => Some[List[Uri]], Item => Option[Iterable[Uri]], Set[String])] = { classes.map(_class => { val predicateGenerator = (item: Item) => Some(List(Uri(RDFVoc.Property.TYPE))) val objectGenerator = (item: Item) => Some(List(_class)) - (subjectGenerator, predicateGenerator, objectGenerator, graphGenerator) + (subjectGenerator, predicateGenerator, objectGenerator, graphGenerator, subjectLogicalTargetIds) }) } @@ -86,19 +97,19 @@ object StatementsAssembler { def assembleStatements(triplesMap: TriplesMap): List[Statement[Item]] = { val quads = new StatementsAssembler() .assembleStatements(triplesMap) - quads.map(quad => StdStatement(quad._1, quad._2, quad._3,quad._4)) + quads.map(quad => StdStatement(quad._1, quad._2, quad._3,quad._4, quad._5)) } def assembleChildStatements(joinedTriplesMap: JoinedTriplesMap): List[Statement[JoinedItem]] = { val triples = new StatementsAssembler() .assembleStatements(joinedTriplesMap) - triples.map(triple => ChildStatement(triple._1, triple._2, triple._3, triple._4)) + triples.map(triple => ChildStatement(triple._1, triple._2, triple._3, triple._4, triple._5)) } def assembleParentStatements(joinedTriplesMap: JoinedTriplesMap): List[Statement[JoinedItem]] = { val triples = new StatementsAssembler() .assembleStatements(TriplesMapsCache.get(joinedTriplesMap.parentTriplesMap).get) - triples.map(triple => ParentStatement(triple._1, triple._2, triple._3, triple._4)) + triples.map(triple => ParentStatement(triple._1, triple._2, triple._3, triple._4, triple._5)) } diff --git a/src/main/scala/io/rml/framework/engine/statement/SubjectGeneratorAssembler.scala b/src/main/scala/io/rml/framework/engine/statement/SubjectGeneratorAssembler.scala index f4bfaaca..806e6584 100644 --- a/src/main/scala/io/rml/framework/engine/statement/SubjectGeneratorAssembler.scala +++ b/src/main/scala/io/rml/framework/engine/statement/SubjectGeneratorAssembler.scala @@ -31,11 +31,12 @@ import io.rml.framework.core.model.{TermMap, TermNode, Uri} class SubjectGeneratorAssembler extends TermMapGeneratorAssembler { - override def assemble(termMap: TermMap): (Item) => Option[Iterable[TermNode]] = { + override def assemble(termMap: TermMap, higherLevelLogicalTargetIDs: Set[String]): (Item) => Option[Iterable[TermNode]] = { if(termMap.hasFunctionMap){ + val logicalTargetIDs = higherLevelLogicalTargetIDs ++ termMap.getAllLogicalTargetIds val fmap = termMap.functionMap.head - val assembledFunction = FunctionMapGeneratorAssembler().assemble(fmap) + val assembledFunction = FunctionMapGeneratorAssembler().assemble(fmap, logicalTargetIDs) assembledFunction.andThen(item => { if(item.isDefined) { @@ -49,7 +50,7 @@ class SubjectGeneratorAssembler extends TermMapGeneratorAssembler { }) }else { - super.assemble(termMap).asInstanceOf[(Item) => Option[Iterable[TermNode]]] + super.assemble(termMap, Set()).asInstanceOf[(Item) => Option[Iterable[TermNode]]] } } diff --git a/src/main/scala/io/rml/framework/engine/statement/TermMapGeneratorAssembler.scala b/src/main/scala/io/rml/framework/engine/statement/TermMapGeneratorAssembler.scala index a51f06c6..86fef103 100644 --- a/src/main/scala/io/rml/framework/engine/statement/TermMapGeneratorAssembler.scala +++ b/src/main/scala/io/rml/framework/engine/statement/TermMapGeneratorAssembler.scala @@ -40,7 +40,7 @@ abstract class TermMapGeneratorAssembler extends Logging { * @param termMap * @return */ - def assemble(termMap: TermMap): (Item) => Option[Iterable[Entity]] = { + def assemble(termMap: TermMap, higherLevelLogicalTargetIDs: Set[String]): (Item) => Option[Iterable[Entity]] = { if (termMap.hasConstant) { constantGenerator(termMap) } else if (termMap.hasTemplate) { From 24ca049202b0468a97fb1f2a5b1e775bcd97aa77 Mon Sep 17 00:00:00 2001 From: Gerald Haesendonck Date: Thu, 22 Apr 2021 14:41:03 +0200 Subject: [PATCH 45/83] Updated compression vocabulary --- .../scala/io/rml/framework/core/vocabulary/RMLCompVoc.scala | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/main/scala/io/rml/framework/core/vocabulary/RMLCompVoc.scala b/src/main/scala/io/rml/framework/core/vocabulary/RMLCompVoc.scala index 8d7864bd..99950306 100644 --- a/src/main/scala/io/rml/framework/core/vocabulary/RMLCompVoc.scala +++ b/src/main/scala/io/rml/framework/core/vocabulary/RMLCompVoc.scala @@ -28,8 +28,10 @@ object RMLCompVoc { val namespace = ("comp", "http://semweb.mmlab.be/ns/rml-compression#") object Class { - val GZIP = namespace._2 + "GZip" - val ZIP = namespace._2 + "Zip" + val GZIP = namespace._2 + "gzip" + val ZIP = namespace._2 + "zip" + val TARGZIP = namespace._2 + "targzip" + val TARXZ = namespace._2 + "tarxz" } } From da0a9ec4ee34e0d61f0496305aa7bfed95c5126c Mon Sep 17 00:00:00 2001 From: Gerald Haesendonck Date: Thu, 22 Apr 2021 14:44:02 +0200 Subject: [PATCH 46/83] Generalise TriplesMapsCache to NodeCache --- src/main/scala/io/rml/framework/Main.scala | 6 ++--- ...TriplesMapsCache.scala => NodeCache.scala} | 23 +++++++++++++++++-- .../std/StdTriplesMapExtractor.scala | 10 ++++---- .../statement/ObjectGeneratorAssembler.scala | 5 ++-- .../statement/StatementsAssembler.scala | 4 ++-- .../scala/io/rml/framework/SandboxTests.scala | 4 ++-- .../io/rml/framework/StreamTestSync.scala | 4 ++-- .../engine/StatementEngineTest.scala | 4 ++-- .../framework/std/StdMappingReaderTest.scala | 8 +++---- .../util/fileprocessing/MappingTestUtil.scala | 4 ++-- .../TripleGeneratorTestUtil.scala | 4 ++-- 11 files changed, 48 insertions(+), 28 deletions(-) rename src/main/scala/io/rml/framework/core/extractors/{TriplesMapsCache.scala => NodeCache.scala} (62%) diff --git a/src/main/scala/io/rml/framework/Main.scala b/src/main/scala/io/rml/framework/Main.scala index 8db7d949..0bf30cef 100644 --- a/src/main/scala/io/rml/framework/Main.scala +++ b/src/main/scala/io/rml/framework/Main.scala @@ -27,7 +27,7 @@ package io.rml.framework import io.rml.framework.api.{FnOEnvironment, RMLEnvironment} -import io.rml.framework.core.extractors.TriplesMapsCache +import io.rml.framework.core.extractors.NodeCache import io.rml.framework.core.internal.Logging import io.rml.framework.core.item.{EmptyItem, Item, JoinedItem} import io.rml.framework.core.model._ @@ -374,7 +374,7 @@ object Main extends Logging { formattedMapping.joinedSteamTriplesMaps.foreach(joinedTm => { // identify the parent triples map - val parentTm = TriplesMapsCache.get(joinedTm.parentTriplesMap).get; + val parentTm = NodeCache.getTriplesMap(joinedTm.parentTriplesMap).get; // find the parent source of the join condition val joinParentSource = joinedTm.joinCondition.get.parent.identifier @@ -545,7 +545,7 @@ object Main extends Logging { }) - val parentTriplesMap = TriplesMapsCache.get(tm.parentTriplesMap).get; + val parentTriplesMap = NodeCache.getTriplesMap(tm.parentTriplesMap).get; val parentDataset = // Create a Source from the parents logical source Source(parentTriplesMap.logicalSource).asInstanceOf[FileDataSet] diff --git a/src/main/scala/io/rml/framework/core/extractors/TriplesMapsCache.scala b/src/main/scala/io/rml/framework/core/extractors/NodeCache.scala similarity index 62% rename from src/main/scala/io/rml/framework/core/extractors/TriplesMapsCache.scala rename to src/main/scala/io/rml/framework/core/extractors/NodeCache.scala index 362cb292..80231508 100644 --- a/src/main/scala/io/rml/framework/core/extractors/TriplesMapsCache.scala +++ b/src/main/scala/io/rml/framework/core/extractors/NodeCache.scala @@ -1,6 +1,6 @@ package io.rml.framework.core.extractors -import io.rml.framework.core.model.TriplesMap +import io.rml.framework.core.model.{LogicalTarget, Node, TriplesMap} /** * MIT License @@ -26,4 +26,23 @@ import io.rml.framework.core.model.TriplesMap * THE SOFTWARE. * * */ -object TriplesMapsCache extends scala.collection.mutable.HashMap[String, TriplesMap] +object NodeCache extends scala.collection.mutable.HashMap[String, Node] { + + def getTriplesMap(resource: String): Option[TriplesMap] = { + val node = NodeCache.get(resource) + node match { + case Some(tm: TriplesMap) => Some(tm) + case None => None + case _ => throw new InternalError(s"Expected TriplesMap in node cache for key ${resource}") + } + } + + def getLogicalTarget(identifier: String): Option[LogicalTarget] = { + val node = NodeCache.get(identifier) + node match { + case Some(tm: LogicalTarget) => Some(tm) + case None => None + case _ => throw new InternalError(s"Expected TriplesMap in node cache for key ${identifier}") + } + } +} diff --git a/src/main/scala/io/rml/framework/core/extractors/std/StdTriplesMapExtractor.scala b/src/main/scala/io/rml/framework/core/extractors/std/StdTriplesMapExtractor.scala index 16466eed..5d85e0e7 100644 --- a/src/main/scala/io/rml/framework/core/extractors/std/StdTriplesMapExtractor.scala +++ b/src/main/scala/io/rml/framework/core/extractors/std/StdTriplesMapExtractor.scala @@ -25,7 +25,7 @@ package io.rml.framework.core.extractors.std -import io.rml.framework.core.extractors.{TriplesMapsCache, _} +import io.rml.framework.core.extractors.{NodeCache, _} import io.rml.framework.core.internal.Logging import io.rml.framework.core.model.rdf.{RDFGraph, RDFResource} import io.rml.framework.core.model.{TriplesMap, Uri} @@ -112,8 +112,8 @@ object StdTriplesMapExtractor extends TriplesMapExtractor with Logging { def extractTriplesMapProperties(resource: RDFResource): Option[TriplesMap] = { val resourceStr = resource.value; // errors can occur during extraction of sub structures - if (TriplesMapsCache.contains(resourceStr)) { - TriplesMapsCache.get(resourceStr) + if (NodeCache.contains(resourceStr)) { + NodeCache.getTriplesMap(resourceStr) } else { try { @@ -124,7 +124,7 @@ object StdTriplesMapExtractor extends TriplesMapExtractor with Logging { resource.uri.value, GraphMapExtractor().extract(resource) ) - val t = TriplesMapsCache.put(resourceStr, triplesMap); + val t = NodeCache.put(resourceStr, triplesMap); Some(triplesMap) } catch { @@ -132,7 +132,7 @@ object StdTriplesMapExtractor extends TriplesMapExtractor with Logging { case e: RMLException => e.printStackTrace() logWarning(e.getMessage) - logWarning(resource.uri + ": Skipping triple map.") + logWarning(resource.uri + ": Skipping triples map.") throw e } } diff --git a/src/main/scala/io/rml/framework/engine/statement/ObjectGeneratorAssembler.scala b/src/main/scala/io/rml/framework/engine/statement/ObjectGeneratorAssembler.scala index 7dea21e4..3ed9a716 100644 --- a/src/main/scala/io/rml/framework/engine/statement/ObjectGeneratorAssembler.scala +++ b/src/main/scala/io/rml/framework/engine/statement/ObjectGeneratorAssembler.scala @@ -25,7 +25,7 @@ package io.rml.framework.engine.statement -import io.rml.framework.core.extractors.TriplesMapsCache +import io.rml.framework.core.extractors.NodeCache import io.rml.framework.core.item.Item import io.rml.framework.core.model.{Entity, Literal, ObjectMap, Uri} import io.rml.framework.core.vocabulary.R2RMLVoc @@ -36,7 +36,8 @@ class ObjectGeneratorAssembler extends TermMapGeneratorAssembler { val logicalTargetIDs = higherLevelLogicalTargetIDs ++ objectMap.getAllLogicalTargetIds // check if it has a parent triple map if (objectMap.parentTriplesMap.isDefined) { - super.assemble(TriplesMapsCache.get(objectMap.parentTriplesMap.get).get.subjectMap, logicalTargetIDs) + + super.assemble(NodeCache.getTriplesMap(objectMap.parentTriplesMap.get).get.subjectMap, logicalTargetIDs) } else if (objectMap.hasFunctionMap) { val assembledFunction = FunctionMapGeneratorAssembler().assemble(objectMap.functionMap.head, logicalTargetIDs) val termTypeString = objectMap.termType.map(_.toString).getOrElse("") diff --git a/src/main/scala/io/rml/framework/engine/statement/StatementsAssembler.scala b/src/main/scala/io/rml/framework/engine/statement/StatementsAssembler.scala index af88d320..8635616b 100644 --- a/src/main/scala/io/rml/framework/engine/statement/StatementsAssembler.scala +++ b/src/main/scala/io/rml/framework/engine/statement/StatementsAssembler.scala @@ -25,7 +25,7 @@ package io.rml.framework.engine.statement -import io.rml.framework.core.extractors.TriplesMapsCache +import io.rml.framework.core.extractors.NodeCache import io.rml.framework.core.internal.Logging import io.rml.framework.core.item.{Item, JoinedItem} import io.rml.framework.core.model._ @@ -108,7 +108,7 @@ object StatementsAssembler { def assembleParentStatements(joinedTriplesMap: JoinedTriplesMap): List[Statement[JoinedItem]] = { val triples = new StatementsAssembler() - .assembleStatements(TriplesMapsCache.get(joinedTriplesMap.parentTriplesMap).get) + .assembleStatements(NodeCache.getTriplesMap(joinedTriplesMap.parentTriplesMap).get) triples.map(triple => ParentStatement(triple._1, triple._2, triple._3, triple._4, triple._5)) } diff --git a/src/test/scala/io/rml/framework/SandboxTests.scala b/src/test/scala/io/rml/framework/SandboxTests.scala index a3812fee..753d2c26 100644 --- a/src/test/scala/io/rml/framework/SandboxTests.scala +++ b/src/test/scala/io/rml/framework/SandboxTests.scala @@ -25,7 +25,7 @@ package io.rml.framework import io.rml.framework.api.RMLEnvironment -import io.rml.framework.core.extractors.TriplesMapsCache +import io.rml.framework.core.extractors.NodeCache import io.rml.framework.core.util.Util import io.rml.framework.engine.NopPostProcessor import io.rml.framework.util.TestUtil @@ -41,7 +41,7 @@ class SandboxTests extends FunSuite with Matchers with FunctionMappingTest { private def executeTest(mappingFile: String): Unit = { - TriplesMapsCache.clear(); + NodeCache.clear(); RMLEnvironment.setGeneratorBaseIRI(Some("http://example.org/base/")) implicit val env = ExecutionEnvironment.getExecutionEnvironment implicit val senv = StreamExecutionEnvironment.getExecutionEnvironment diff --git a/src/test/scala/io/rml/framework/StreamTestSync.scala b/src/test/scala/io/rml/framework/StreamTestSync.scala index 23bc5e19..7ba4e602 100644 --- a/src/test/scala/io/rml/framework/StreamTestSync.scala +++ b/src/test/scala/io/rml/framework/StreamTestSync.scala @@ -25,7 +25,7 @@ package io.rml.framework import io.rml.framework.api.RMLEnvironment -import io.rml.framework.core.extractors.TriplesMapsCache +import io.rml.framework.core.extractors.NodeCache import io.rml.framework.core.internal.Logging import io.rml.framework.core.util.{StreamerConfig, Util} import io.rml.framework.engine.PostProcessor @@ -104,7 +104,7 @@ abstract class StreamTestSync extends StaticTestSpec with ReadMappingBehaviour w // run the test cases for ((folderPath, postProcessorName) <- testCases) { - TriplesMapsCache.clear(); + NodeCache.clear(); //it should s"produce triples equal to the expected triples for ${folderPath.getFileName}" in { Logger.lineBreak(50) diff --git a/src/test/scala/io/rml/framework/engine/StatementEngineTest.scala b/src/test/scala/io/rml/framework/engine/StatementEngineTest.scala index ab5607e5..68764f5b 100644 --- a/src/test/scala/io/rml/framework/engine/StatementEngineTest.scala +++ b/src/test/scala/io/rml/framework/engine/StatementEngineTest.scala @@ -26,7 +26,7 @@ package io.rml.framework.engine import io.rml.framework.Main import io.rml.framework.api.RMLEnvironment -import io.rml.framework.core.extractors.TriplesMapsCache +import io.rml.framework.core.extractors.NodeCache import io.rml.framework.core.util.Util import io.rml.framework.util.TestUtil import io.rml.framework.util.logging.Logger @@ -39,7 +39,7 @@ import java.io.File class StatementEngineTest extends FunSuite with Matchers { private def executeTest(mappingFile: String): Unit = { - TriplesMapsCache.clear(); + NodeCache.clear(); RMLEnvironment.setGeneratorBaseIRI(Some("http://example.org/base/")) implicit val env = ExecutionEnvironment.getExecutionEnvironment implicit val senv = StreamExecutionEnvironment.getExecutionEnvironment diff --git a/src/test/scala/io/rml/framework/std/StdMappingReaderTest.scala b/src/test/scala/io/rml/framework/std/StdMappingReaderTest.scala index 15d98f5c..acdf9f51 100644 --- a/src/test/scala/io/rml/framework/std/StdMappingReaderTest.scala +++ b/src/test/scala/io/rml/framework/std/StdMappingReaderTest.scala @@ -26,7 +26,7 @@ package io.rml.framework.std import io.rml.framework.api.RMLEnvironment -import io.rml.framework.core.extractors.TriplesMapsCache +import io.rml.framework.core.extractors.NodeCache import io.rml.framework.core.internal.Logging import io.rml.framework.core.util.Util import org.scalatest.{BeforeAndAfter, FunSuite, Matchers} @@ -51,7 +51,7 @@ class StdMappingReaderTest extends FunSuite with Matchers * │   └── student.csv */ test("relative paths: scenario01"){ - TriplesMapsCache.clear(); + NodeCache.clear(); val mappingFile = "relative_paths/scenario01/RMLTC0001a-CSV/mapping.ttl" // determine the base IRI of the mapping file RMLEnvironment.setMappingFileBaseIRI(Some((mappingFile))) @@ -67,7 +67,7 @@ class StdMappingReaderTest extends FunSuite with Matchers * │   └── student.csv */ test("relative paths: scenario02"){ - TriplesMapsCache.clear(); + NodeCache.clear(); val mappingFile = "relative_paths/scenario02/RMLTC0001a-CSV/mapping.ttl" // determine the base IRI of the mapping file RMLEnvironment.setMappingFileBaseIRI(Some((mappingFile))) @@ -83,7 +83,7 @@ class StdMappingReaderTest extends FunSuite with Matchers * └── output.ttl */ test("relative paths: scenario03"){ - TriplesMapsCache.clear(); + NodeCache.clear(); val mappingFile = "relative_paths/scenario03/RMLTC0001a-CSV/mapping.ttl" // determine the base IRI of the mapping file RMLEnvironment.setMappingFileBaseIRI(Some((mappingFile))) diff --git a/src/test/scala/io/rml/framework/util/fileprocessing/MappingTestUtil.scala b/src/test/scala/io/rml/framework/util/fileprocessing/MappingTestUtil.scala index 97ae304b..b998e26a 100644 --- a/src/test/scala/io/rml/framework/util/fileprocessing/MappingTestUtil.scala +++ b/src/test/scala/io/rml/framework/util/fileprocessing/MappingTestUtil.scala @@ -24,7 +24,7 @@ **/ package io.rml.framework.util.fileprocessing -import io.rml.framework.core.extractors.TriplesMapsCache +import io.rml.framework.core.extractors.NodeCache import io.rml.framework.core.model.FormattedRMLMapping import io.rml.framework.core.util.Util @@ -38,7 +38,7 @@ object MappingTestUtil extends FileProcessingUtil[FormattedRMLMapping] { override def processFile(file: File): FormattedRMLMapping = { - TriplesMapsCache.clear(); + NodeCache.clear(); Util.readMappingFile(file.getCanonicalPath) //val mapping = MappingReader().read(file) //FormattedRMLMapping.fromRMLMapping(mapping.asInstanceOf[RMLMapping]) diff --git a/src/test/scala/io/rml/framework/util/fileprocessing/TripleGeneratorTestUtil.scala b/src/test/scala/io/rml/framework/util/fileprocessing/TripleGeneratorTestUtil.scala index 49265772..1440bf9a 100644 --- a/src/test/scala/io/rml/framework/util/fileprocessing/TripleGeneratorTestUtil.scala +++ b/src/test/scala/io/rml/framework/util/fileprocessing/TripleGeneratorTestUtil.scala @@ -25,7 +25,7 @@ package io.rml.framework.util.fileprocessing import io.rml.framework.Main -import io.rml.framework.core.extractors.TriplesMapsCache +import io.rml.framework.core.extractors.NodeCache import io.rml.framework.core.util.{Format, NQuads, Util} import io.rml.framework.engine.{NopPostProcessor, PostProcessor} import io.rml.framework.util.logging.Logger @@ -65,7 +65,7 @@ object TripleGeneratorTestUtil extends TestFilesUtil[(List[String], Format)] { */ def processFile(file: File): (List[String], Format) = { try { - TriplesMapsCache.clear(); + NodeCache.clear(); val formattedMapping = Util.readMappingFile(file.getCanonicalPath) val dataSet = Main.createDataSetFromFormattedMapping(formattedMapping).collect From c6be9d5b3b199a862b87886669ad4f56bc04f56e Mon Sep 17 00:00:00 2001 From: Gerald Haesendonck Date: Thu, 22 Apr 2021 15:01:10 +0200 Subject: [PATCH 47/83] Update example to new compression ontology (lower case) --- .../sandbox/logical-target/example-2-subjectMap/mapping.ttl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/test/resources/sandbox/logical-target/example-2-subjectMap/mapping.ttl b/src/test/resources/sandbox/logical-target/example-2-subjectMap/mapping.ttl index 67fbb55f..9edcce2d 100644 --- a/src/test/resources/sandbox/logical-target/example-2-subjectMap/mapping.ttl +++ b/src/test/resources/sandbox/logical-target/example-2-subjectMap/mapping.ttl @@ -22,7 +22,7 @@ <#LogicalTarget1> a rmlt:LogicalTarget; rmlt:target <#VoIDDump>; rmlt:serialization formats:N-Quads ; - rmlt:compression comp:GZip; + rmlt:compression comp:gzip; . <#TriplesMap> a rr:TriplesMap; From d735d26e9b598c8ac7d8e25114247b72ea0c492f Mon Sep 17 00:00:00 2001 From: Gerald Haesendonck Date: Thu, 22 Apr 2021 17:41:25 +0000 Subject: [PATCH 48/83] Update RMLStreamer version in doc --- documentation/README_Functions.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/documentation/README_Functions.md b/documentation/README_Functions.md index 7a2e75d3..c62f7f5f 100644 --- a/documentation/README_Functions.md +++ b/documentation/README_Functions.md @@ -40,7 +40,7 @@ Note that the function descriptions and function mappings are present. The command for running the RML Streamer on Flink should look like ``` -~/flink/flink-1.12.2-scala_2.11/bin/flink run -c io.rml.framework.Main RMLStreamer-2.0.1-SNAPSHOT.jar toFile --output-path $(pwd)'/out.ttl' -m mapping.ttl +~/flink/flink-1.12.2-scala_2.11/bin/flink run -c io.rml.framework.Main RMLStreamer-2.1.1-SNAPSHOT.jar toFile --output-path $(pwd)'/out.ttl' -m mapping.ttl ``` ## Test Cases @@ -188,4 +188,4 @@ to load and bind every function as specified in the testcase's `mapping.ttl`. # Remarks - When the RMLStreamer is unable to find a function description or function mapping, bind method parameters to values, it will be logged as an error to the console and the function will not be applied. - \ No newline at end of file + From 68485f9bac951e9ef8ad1f783b9afb8f0bf8595e Mon Sep 17 00:00:00 2001 From: Gerald Haesendonck Date: Thu, 22 Apr 2021 19:48:44 +0200 Subject: [PATCH 49/83] Add LogicalTargets to NodeCache --- .../core/extractors/std/StdLogicalTargetExtractor.scala | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/main/scala/io/rml/framework/core/extractors/std/StdLogicalTargetExtractor.scala b/src/main/scala/io/rml/framework/core/extractors/std/StdLogicalTargetExtractor.scala index cd921920..95411e5f 100644 --- a/src/main/scala/io/rml/framework/core/extractors/std/StdLogicalTargetExtractor.scala +++ b/src/main/scala/io/rml/framework/core/extractors/std/StdLogicalTargetExtractor.scala @@ -1,6 +1,6 @@ package io.rml.framework.core.extractors.std -import io.rml.framework.core.extractors.{DataTargetExtractor, ExtractorUtil, LogicalTargetExtractor} +import io.rml.framework.core.extractors.{DataTargetExtractor, ExtractorUtil, LogicalTargetExtractor, NodeCache} import io.rml.framework.core.internal.Logging import io.rml.framework.core.model.rdf.RDFResource import io.rml.framework.core.model.{DataTarget, LogicalTarget, Uri} @@ -49,7 +49,9 @@ class StdLogicalTargetExtractor(dataTargetExtractor: DataTargetExtractor) extend properties.foreach(logicalTargetResource => { logicalTargetResource match { case resource: RDFResource => { - result += extractLogicalTargetProperties(resource) + val logicalTarget = extractLogicalTargetProperties(resource) + NodeCache.put(logicalTarget.identifier, logicalTarget) + result += logicalTarget } case _ => throw new RMLException("Only logical target from resource allowed.") } From e039d3790c57573ca8217687cdc676d0a9b46eec Mon Sep 17 00:00:00 2001 From: Michiel Derveeuw Date: Wed, 31 Mar 2021 16:53:07 +0200 Subject: [PATCH 50/83] Update and fix class loading issue --- pom.xml | 2 +- .../scala/io/rml/framework/KafkaStreamTestSyncFnO.scala | 4 ++-- src/test/scala/io/rml/framework/SandboxTests.scala | 3 ++- src/test/scala/io/rml/framework/StreamTestSync.scala | 6 ++++-- src/test/scala/io/rml/framework/TCPStreamTestSync.scala | 5 ++++- src/test/scala/io/rml/framework/TCPStreamTestSyncFnO.scala | 3 --- .../rml/framework/util/fileprocessing/TestFilesUtil.scala | 3 ++- src/test/scala/io/rml/framework/util/server/TestSink2.scala | 2 +- 8 files changed, 16 insertions(+), 12 deletions(-) diff --git a/pom.xml b/pom.xml index e7f4798a..207b4ae5 100644 --- a/pom.xml +++ b/pom.xml @@ -36,7 +36,7 @@ SOFTWARE. UTF-8 - 1.11.3 + 1.12.2 1.7.26 2.13.3 3.11.0 diff --git a/src/test/scala/io/rml/framework/KafkaStreamTestSyncFnO.scala b/src/test/scala/io/rml/framework/KafkaStreamTestSyncFnO.scala index 93303a3d..dddb9d84 100644 --- a/src/test/scala/io/rml/framework/KafkaStreamTestSyncFnO.scala +++ b/src/test/scala/io/rml/framework/KafkaStreamTestSyncFnO.scala @@ -77,7 +77,7 @@ class KafkaStreamTestSyncFnO extends StreamTestSync{ } - override def beforeTestCase(): Unit = { +// override def beforeTestCase(): Unit = { // topics seem to be created automatically... /*logInfo("Creating Kafka input topic...") @@ -89,7 +89,7 @@ class KafkaStreamTestSyncFnO extends StreamTestSync{ createTopicsResult.all().get() // wait for completion of creating topics Thread.sleep(2000) logInfo("Creating Kafka input topic done.") */ - } +// } override def afterTestCase(): Unit = { logInfo("Deleting Kafka input topic(s)...") diff --git a/src/test/scala/io/rml/framework/SandboxTests.scala b/src/test/scala/io/rml/framework/SandboxTests.scala index 1628b94a..c869ea3f 100644 --- a/src/test/scala/io/rml/framework/SandboxTests.scala +++ b/src/test/scala/io/rml/framework/SandboxTests.scala @@ -24,7 +24,7 @@ **/ package io.rml.framework -import io.rml.framework.api.RMLEnvironment +import io.rml.framework.api.{FnOEnvironment, RMLEnvironment} import io.rml.framework.core.extractors.TriplesMapsCache import io.rml.framework.core.util.Util import io.rml.framework.engine.NopPostProcessor @@ -42,6 +42,7 @@ class SandboxTests extends FunSuite with Matchers with FunctionMappingTest { private def executeTest(mappingFile: String): Unit = { TriplesMapsCache.clear(); + FnOEnvironment.loadedClassesMap.clear() RMLEnvironment.setGeneratorBaseIRI(Some("http://example.org/base/")) implicit val env = ExecutionEnvironment.getExecutionEnvironment implicit val senv = StreamExecutionEnvironment.getExecutionEnvironment diff --git a/src/test/scala/io/rml/framework/StreamTestSync.scala b/src/test/scala/io/rml/framework/StreamTestSync.scala index 23bc5e19..8bb5b2bf 100644 --- a/src/test/scala/io/rml/framework/StreamTestSync.scala +++ b/src/test/scala/io/rml/framework/StreamTestSync.scala @@ -24,7 +24,7 @@ **/ package io.rml.framework -import io.rml.framework.api.RMLEnvironment +import io.rml.framework.api.{FnOEnvironment, RMLEnvironment} import io.rml.framework.core.extractors.TriplesMapsCache import io.rml.framework.core.internal.Logging import io.rml.framework.core.util.{StreamerConfig, Util} @@ -73,7 +73,9 @@ abstract class StreamTestSync extends StaticTestSpec with ReadMappingBehaviour w } // Things to do before running one test case - protected def beforeTestCase(): Unit + protected def beforeTestCase(): Unit = { + FnOEnvironment.loadedClassesMap.clear() + } // Things to do after running one test case protected def afterTestCase(): Unit diff --git a/src/test/scala/io/rml/framework/TCPStreamTestSync.scala b/src/test/scala/io/rml/framework/TCPStreamTestSync.scala index eb36977f..bcf1b3c6 100644 --- a/src/test/scala/io/rml/framework/TCPStreamTestSync.scala +++ b/src/test/scala/io/rml/framework/TCPStreamTestSync.scala @@ -34,6 +34,7 @@ import io.netty.channel.socket.SocketChannel import io.netty.channel.socket.nio.NioServerSocketChannel import io.netty.channel.{ChannelFuture, ChannelHandlerContext, ChannelInboundHandlerAdapter, ChannelInitializer} import io.netty.util.{CharsetUtil, ReferenceCountUtil} +import io.rml.framework.api.FnOEnvironment import io.rml.framework.util.logging.Logger import io.rml.framework.util.server.TestData @@ -78,7 +79,9 @@ class TCPStreamTestSync extends StreamTestSync { } - override def beforeTestCase(): Unit = {} + override def beforeTestCase(): Unit = { + FnOEnvironment.loadedClassesMap.clear() + } override def afterTestCase(): Unit = {} diff --git a/src/test/scala/io/rml/framework/TCPStreamTestSyncFnO.scala b/src/test/scala/io/rml/framework/TCPStreamTestSyncFnO.scala index 32a1a8f7..9b0d7e95 100644 --- a/src/test/scala/io/rml/framework/TCPStreamTestSyncFnO.scala +++ b/src/test/scala/io/rml/framework/TCPStreamTestSyncFnO.scala @@ -27,7 +27,6 @@ package io.rml.framework import java.net.InetSocketAddress import java.nio.charset.StandardCharsets import java.util.concurrent.TimeUnit - import io.netty.bootstrap.ServerBootstrap import io.netty.buffer.ByteBuf import io.netty.channel.nio.NioEventLoopGroup @@ -77,8 +76,6 @@ class TCPStreamTestSyncFnO extends StreamTestSync { } - override def beforeTestCase(): Unit = {} - override def afterTestCase(): Unit = {} override def teardown(): Unit = { diff --git a/src/test/scala/io/rml/framework/util/fileprocessing/TestFilesUtil.scala b/src/test/scala/io/rml/framework/util/fileprocessing/TestFilesUtil.scala index 12224f70..9f2c6509 100644 --- a/src/test/scala/io/rml/framework/util/fileprocessing/TestFilesUtil.scala +++ b/src/test/scala/io/rml/framework/util/fileprocessing/TestFilesUtil.scala @@ -24,6 +24,7 @@ **/ package io.rml.framework.util.fileprocessing +import io.rml.framework.api.FnOEnvironment import io.rml.framework.core.util.Util import io.rml.framework.util.logging.Logger @@ -74,7 +75,7 @@ trait TestFilesUtil[R] { def test(rootDir: String, shouldPass: Boolean, checkFunc: (String, Boolean) => Unit): Unit = { var checkedTestCases = Array("") for (pathString <- getTestCaseFolders(rootDir).map(_.toString).sorted) { - + FnOEnvironment.loadedClassesMap.clear() checkFunc(pathString, shouldPass) val testCase = new File(pathString).getName Logger.logSuccess("Passed processing: " + testCase) diff --git a/src/test/scala/io/rml/framework/util/server/TestSink2.scala b/src/test/scala/io/rml/framework/util/server/TestSink2.scala index 191f4165..caae76c1 100644 --- a/src/test/scala/io/rml/framework/util/server/TestSink2.scala +++ b/src/test/scala/io/rml/framework/util/server/TestSink2.scala @@ -47,7 +47,7 @@ object TestSink2 extends SinkFunction[String] { class TestSink2 extends SinkFunction[String] { import TestSink2._ - override def invoke(value: String, context: SinkFunction.Context[_]): Unit = { + override def invoke(value: String, context: SinkFunction.Context): Unit = { Logger.logInfo(s"TestSink2: got value [${value}]") if (value.trim.nonEmpty) { triples = value :: triples From 84b41932ef52a616811b3a895d8626cb465ab37e Mon Sep 17 00:00:00 2001 From: Michiel Derveeuw Date: Mon, 19 Apr 2021 14:30:52 +0200 Subject: [PATCH 51/83] Update documentation and docker flink version --- README.md | 6 +++--- docker/docker-compose.yml | 4 ++-- documentation/README_Functions.md | 10 +++++----- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/README.md b/README.md index 2f7cde90..c4692351 100644 --- a/README.md +++ b/README.md @@ -15,9 +15,9 @@ If you want to deploy it yourself, read on. ### Installing Flink RMLStreamer runs its jobs on Flink clusters. -More information on how to install Flink and getting started can be found [here](https://ci.apache.org/projects/flink/flink-docs-release-1.11/try-flink/local_installation.html). +More information on how to install Flink and getting started can be found [here](https://ci.apache.org/projects/flink/flink-docs-release-1.12/try-flink/local_installation.html). At least a local cluster must be running in order to start executing RML Mappings with RMLStreamer. -Please note that this version works with Flink 1.11.3 with Scala 2.11 support, which can be downloaded [here](https://archive.apache.org/dist/flink/flink-1.11.3/flink-1.11.3-bin-scala_2.11.tgz). +Please note that this version works with Flink 1.12.2 with Scala 2.11 support, which can be downloaded [here](https://archive.apache.org/dist/flink/flink-1.12.2/flink-1.12.2-bin-scala_2.11.tgz). ### Building RMLStreamer @@ -46,7 +46,7 @@ The resulting `RMLStreamer-.jar`, found in the `target` folder, can be ### Executing RML Mappings Here we give examples for running RMLStreamer from the command line. We use `FLINK_BIN` to denote the Flink CLI tool, -usually found in the `bin` directory of the Flink installation. E.g. `/home/myuser/flink-1.11.3/bin/flink`. +usually found in the `bin` directory of the Flink installation. E.g. `/home/myuser/flink-1.12.2/bin/flink`. For Windows a `flink.bat` script is provided. The general usage is: diff --git a/docker/docker-compose.yml b/docker/docker-compose.yml index fb61a26c..5e0d9d3c 100644 --- a/docker/docker-compose.yml +++ b/docker/docker-compose.yml @@ -2,7 +2,7 @@ version: '3' services: jobmanager: - image: flink:1.11.3-scala_2.11 + image: flink:1.12.2-scala_2.11 expose: - "6123" ports: @@ -14,7 +14,7 @@ services: - data:/mnt/data taskmanager: - image: flink:1.11.3-scala_2.11 + image: flink:1.12.2-scala_2.11 expose: - "6121" - "6122" diff --git a/documentation/README_Functions.md b/documentation/README_Functions.md index d53ac3bf..7a2e75d3 100644 --- a/documentation/README_Functions.md +++ b/documentation/README_Functions.md @@ -15,13 +15,13 @@ These files can be obtained from `src/main/resources`: ## Example: RML Streamer + Flink Flink's `lib` directory should contain the jar-files with the custom functions. In this example, these are marked with `*` ``` -flink-1.11.2-scala_2.11 +flink-1.12.2-scala_2.11 └── lib ├── GrelFunctions.jar * ├── IDLabFunctions.jar * - ├── flink-dist_2.11-1.11.2.jar - ├── flink-table-blink_2.11-1.11.2.jar - ├── flink-table_2.11-1.11.2.jar + ├── flink-dist_2.11-1.12.2.jar + ├── flink-table-blink_2.11-1.12.2.jar + ├── flink-table_2.11-1.12.2.jar ├── log4j-1.2.17.jar └── slf4j-log4j12-1.7.15.jar ``` @@ -40,7 +40,7 @@ Note that the function descriptions and function mappings are present. The command for running the RML Streamer on Flink should look like ``` -~/flink/flink-1.11.2-scala_2.11/bin/flink run -c io.rml.framework.Main RMLStreamer-2.0.1-SNAPSHOT.jar toFile --output-path $(pwd)'/out.ttl' -m mapping.ttl +~/flink/flink-1.12.2-scala_2.11/bin/flink run -c io.rml.framework.Main RMLStreamer-2.0.1-SNAPSHOT.jar toFile --output-path $(pwd)'/out.ttl' -m mapping.ttl ``` ## Test Cases From 3a70b55f934778ea4b08f36fe46a61d956bab74f Mon Sep 17 00:00:00 2001 From: Michiel Derveeuw Date: Mon, 19 Apr 2021 12:38:44 +0000 Subject: [PATCH 52/83] Add comment documenting classloader fix --- .../io/rml/framework/util/fileprocessing/TestFilesUtil.scala | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/test/scala/io/rml/framework/util/fileprocessing/TestFilesUtil.scala b/src/test/scala/io/rml/framework/util/fileprocessing/TestFilesUtil.scala index 9f2c6509..c093f809 100644 --- a/src/test/scala/io/rml/framework/util/fileprocessing/TestFilesUtil.scala +++ b/src/test/scala/io/rml/framework/util/fileprocessing/TestFilesUtil.scala @@ -75,6 +75,8 @@ trait TestFilesUtil[R] { def test(rootDir: String, shouldPass: Boolean, checkFunc: (String, Boolean) => Unit): Unit = { var checkedTestCases = Array("") for (pathString <- getTestCaseFolders(rootDir).map(_.toString).sorted) { + // clear the loaded classes, this prevents an Exception that would occur when using classes + // from an unloaded class loader FnOEnvironment.loadedClassesMap.clear() checkFunc(pathString, shouldPass) val testCase = new File(pathString).getName From 817d277f28271a4d770a3dd834f8a231c00df1a2 Mon Sep 17 00:00:00 2001 From: Michiel Derveeuw Date: Mon, 19 Apr 2021 12:40:36 +0000 Subject: [PATCH 53/83] Add comment documenting classloader fix --- src/test/scala/io/rml/framework/TCPStreamTestSync.scala | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/test/scala/io/rml/framework/TCPStreamTestSync.scala b/src/test/scala/io/rml/framework/TCPStreamTestSync.scala index bcf1b3c6..8f1cb01f 100644 --- a/src/test/scala/io/rml/framework/TCPStreamTestSync.scala +++ b/src/test/scala/io/rml/framework/TCPStreamTestSync.scala @@ -80,6 +80,8 @@ class TCPStreamTestSync extends StreamTestSync { } override def beforeTestCase(): Unit = { + // clear the loaded classes, this prevents an Exception that would occur when using classes + // from an unloaded class loader FnOEnvironment.loadedClassesMap.clear() } From d8cca23814733ca3b35fa555134fccd3e7257487 Mon Sep 17 00:00:00 2001 From: Michiel Derveeuw Date: Mon, 19 Apr 2021 12:42:39 +0000 Subject: [PATCH 54/83] Add comment documenting classloader fix --- src/test/scala/io/rml/framework/StreamTestSync.scala | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/test/scala/io/rml/framework/StreamTestSync.scala b/src/test/scala/io/rml/framework/StreamTestSync.scala index 8bb5b2bf..32e5f863 100644 --- a/src/test/scala/io/rml/framework/StreamTestSync.scala +++ b/src/test/scala/io/rml/framework/StreamTestSync.scala @@ -74,6 +74,8 @@ abstract class StreamTestSync extends StaticTestSpec with ReadMappingBehaviour w // Things to do before running one test case protected def beforeTestCase(): Unit = { + // clear the loaded classes, this prevents an Exception that would occur when using classes + // from an unloaded class loader FnOEnvironment.loadedClassesMap.clear() } From b39912adc7bf0800b4ab4a4badf31beb200bfa3f Mon Sep 17 00:00:00 2001 From: Michiel Derveeuw Date: Mon, 19 Apr 2021 12:43:57 +0000 Subject: [PATCH 55/83] Add comment documenting classloader fix --- src/test/scala/io/rml/framework/SandboxTests.scala | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/test/scala/io/rml/framework/SandboxTests.scala b/src/test/scala/io/rml/framework/SandboxTests.scala index c869ea3f..19bb889b 100644 --- a/src/test/scala/io/rml/framework/SandboxTests.scala +++ b/src/test/scala/io/rml/framework/SandboxTests.scala @@ -42,6 +42,8 @@ class SandboxTests extends FunSuite with Matchers with FunctionMappingTest { private def executeTest(mappingFile: String): Unit = { TriplesMapsCache.clear(); + // clear the loaded classes, this prevents an Exception that would occur when using classes + // from an unloaded class loader FnOEnvironment.loadedClassesMap.clear() RMLEnvironment.setGeneratorBaseIRI(Some("http://example.org/base/")) implicit val env = ExecutionEnvironment.getExecutionEnvironment From 4c27bf5e1076630c5b90d6d1770123bfbf563093 Mon Sep 17 00:00:00 2001 From: Michiel Derveeuw Date: Mon, 19 Apr 2021 16:09:26 +0200 Subject: [PATCH 56/83] Remove beforeTestCase method --- .../io/rml/framework/KafkaStreamTestSyncFnO.scala | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/src/test/scala/io/rml/framework/KafkaStreamTestSyncFnO.scala b/src/test/scala/io/rml/framework/KafkaStreamTestSyncFnO.scala index dddb9d84..2ec7124a 100644 --- a/src/test/scala/io/rml/framework/KafkaStreamTestSyncFnO.scala +++ b/src/test/scala/io/rml/framework/KafkaStreamTestSyncFnO.scala @@ -76,21 +76,6 @@ class KafkaStreamTestSyncFnO extends StreamTestSync{ logInfo("Creating Kafka client created.") } - -// override def beforeTestCase(): Unit = { - // topics seem to be created automatically... - - /*logInfo("Creating Kafka input topic...") - val topicName = "demo" - val numPartitions = 1 - val replicationFactor = 1 - val topic = new NewTopic(topicName, numPartitions, replicationFactor.toShort) - val createTopicsResult = admin.createTopics(ArrayBuffer(topic).asJava) - createTopicsResult.all().get() // wait for completion of creating topics - Thread.sleep(2000) - logInfo("Creating Kafka input topic done.") */ -// } - override def afterTestCase(): Unit = { logInfo("Deleting Kafka input topic(s)...") val topics = admin.listTopics().names().get() From fae44c0e4101e6b6443b0326ef7dcd0a209a4735 Mon Sep 17 00:00:00 2001 From: Gerald Haesendonck Date: Thu, 22 Apr 2021 17:41:25 +0000 Subject: [PATCH 57/83] Update RMLStreamer version in doc --- documentation/README_Functions.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/documentation/README_Functions.md b/documentation/README_Functions.md index 7a2e75d3..c62f7f5f 100644 --- a/documentation/README_Functions.md +++ b/documentation/README_Functions.md @@ -40,7 +40,7 @@ Note that the function descriptions and function mappings are present. The command for running the RML Streamer on Flink should look like ``` -~/flink/flink-1.12.2-scala_2.11/bin/flink run -c io.rml.framework.Main RMLStreamer-2.0.1-SNAPSHOT.jar toFile --output-path $(pwd)'/out.ttl' -m mapping.ttl +~/flink/flink-1.12.2-scala_2.11/bin/flink run -c io.rml.framework.Main RMLStreamer-2.1.1-SNAPSHOT.jar toFile --output-path $(pwd)'/out.ttl' -m mapping.ttl ``` ## Test Cases @@ -188,4 +188,4 @@ to load and bind every function as specified in the testcase's `mapping.ttl`. # Remarks - When the RMLStreamer is unable to find a function description or function mapping, bind method parameters to values, it will be logged as an error to the console and the function will not be applied. - \ No newline at end of file + From 6bc96f862f7bbf344c237e65c2503cbbe8eb1141 Mon Sep 17 00:00:00 2001 From: Michiel Derveeuw Date: Wed, 24 Mar 2021 16:04:58 +0100 Subject: [PATCH 58/83] Create basic MQTTSink --- .../framework/flink/sink/RichMQTTSink.scala | 31 +++++++++++++++++++ 1 file changed, 31 insertions(+) create mode 100644 src/main/scala/io/rml/framework/flink/sink/RichMQTTSink.scala diff --git a/src/main/scala/io/rml/framework/flink/sink/RichMQTTSink.scala b/src/main/scala/io/rml/framework/flink/sink/RichMQTTSink.scala new file mode 100644 index 00000000..baf5931e --- /dev/null +++ b/src/main/scala/io/rml/framework/flink/sink/RichMQTTSink.scala @@ -0,0 +1,31 @@ +package io.rml.framework.flink.sink + +import org.apache.flink.configuration.Configuration +import org.apache.flink.streaming.api.functions.sink.{RichSinkFunction, SinkFunction} +import org.eclipse.paho.client.mqttv3.{MqttClient, MqttConnectOptions} +import org.eclipse.paho.client.mqttv3.persist.MemoryPersistence + +class RichMQTTSink(val url: String, val topic: String) extends RichSinkFunction[String] { + + private var client: MqttClient = _ + + override def open(parameters: Configuration): Unit = { + super.open(parameters) + + client = new MqttClient(url, MqttClient.generateClientId(), new MemoryPersistence) + val connectionOptions = new MqttConnectOptions + connectionOptions.setAutomaticReconnect(true) + connectionOptions.setCleanSession(false) + + client.connect(connectionOptions) + } + + override def invoke(value: String, context: SinkFunction.Context[_]): Unit = { + val payload = value.getBytes() + client.publish(topic, payload, 2, false) + } + + override def close(): Unit = { + client.disconnect() + } +} From c5d749fa02a68fadd96bdb011d35c5d223256ccd Mon Sep 17 00:00:00 2001 From: Michiel Derveeuw Date: Mon, 29 Mar 2021 15:13:13 +0200 Subject: [PATCH 59/83] Add MQTT sink to parameters and main --- src/main/scala/io/rml/framework/Main.scala | 5 +++++ .../rml/framework/core/util/ParameterUtil.scala | 15 ++++++++++++++- .../rml/framework/flink/sink/RichMQTTSink.scala | 11 +++++++++-- 3 files changed, 28 insertions(+), 3 deletions(-) diff --git a/src/main/scala/io/rml/framework/Main.scala b/src/main/scala/io/rml/framework/Main.scala index 0bf30cef..151e786b 100644 --- a/src/main/scala/io/rml/framework/Main.scala +++ b/src/main/scala/io/rml/framework/Main.scala @@ -52,6 +52,7 @@ import org.apache.flink.util.Collector import java.util.Properties import scala.collection.{immutable, mutable} +import io.rml.framework.flink.sink.RichMQTTSink /** * @@ -176,6 +177,10 @@ object Main extends Logging { .build() stream.addSink(sink).name("Streaming file sink") } + else if (config.outputSink.equals(OutputSinkOption.MQTT)) { + val sink = new RichMQTTSink(config.broker.get, config.topic.get) + stream.addSink(sink) + } // discard output if the parameter is given else if (config.outputSink.equals(OutputSinkOption.None)) { stream.addSink(output => {}).name("No output sink") diff --git a/src/main/scala/io/rml/framework/core/util/ParameterUtil.scala b/src/main/scala/io/rml/framework/core/util/ParameterUtil.scala index 466368a4..93713225 100644 --- a/src/main/scala/io/rml/framework/core/util/ParameterUtil.scala +++ b/src/main/scala/io/rml/framework/core/util/ParameterUtil.scala @@ -38,6 +38,7 @@ object ParameterUtil { checkpointInterval: Option[Long] = None, outputPath: Option[String] = None, brokerList: Option[String] = None, + broker: Option[String] = None, topic: Option[String] = None, partitionId: Option[Int] = None, socket: Option[String] = None, @@ -65,7 +66,7 @@ object ParameterUtil { // possible output sink options object OutputSinkOption extends Enumeration { type OutputSinkOption = Value - val File, Socket, Kafka, None = Value + val File, Socket, Kafka, MQTT, None = Value } // possible post processor options @@ -159,6 +160,18 @@ object ParameterUtil { .text("The TCP socket to write to.") ) + cmd("toMQTT") + .text("Write output to an MQTT topic") + .action((_, config) => config.copy(outputSink = OutputSinkOption.MQTT)) + .children( + opt[String]('b', "broker").valueName("").required() + .action((value, config) => config.copy(broker = Some(value))) + .text("The MQTT broker."), + opt[String]('t', "topic").valueName("").required() + .action((value, config) => config.copy(topic = Some(value))) + .text("The name of the MQTT topic to write output to.") + ) + cmd("noOutput") .text("Do everything, but discard output") .action((_, config) => config.copy(outputSink = OutputSinkOption.None)) diff --git a/src/main/scala/io/rml/framework/flink/sink/RichMQTTSink.scala b/src/main/scala/io/rml/framework/flink/sink/RichMQTTSink.scala index baf5931e..3012eead 100644 --- a/src/main/scala/io/rml/framework/flink/sink/RichMQTTSink.scala +++ b/src/main/scala/io/rml/framework/flink/sink/RichMQTTSink.scala @@ -2,17 +2,24 @@ package io.rml.framework.flink.sink import org.apache.flink.configuration.Configuration import org.apache.flink.streaming.api.functions.sink.{RichSinkFunction, SinkFunction} +import org.apache.http.client.utils.URIBuilder import org.eclipse.paho.client.mqttv3.{MqttClient, MqttConnectOptions} import org.eclipse.paho.client.mqttv3.persist.MemoryPersistence -class RichMQTTSink(val url: String, val topic: String) extends RichSinkFunction[String] { +import java.net.URI + +class RichMQTTSink(val broker: String, val topic: String) extends RichSinkFunction[String] { private var client: MqttClient = _ override def open(parameters: Configuration): Unit = { super.open(parameters) - client = new MqttClient(url, MqttClient.generateClientId(), new MemoryPersistence) + val uriBuilder = new URIBuilder() + uriBuilder.setHost(broker) + uriBuilder.setScheme("tcp") + + client = new MqttClient(uriBuilder.build.toString, MqttClient.generateClientId(), new MemoryPersistence) val connectionOptions = new MqttConnectOptions connectionOptions.setAutomaticReconnect(true) connectionOptions.setCleanSession(false) From da42f793887856e299fa3eb2606d162cd17135dd Mon Sep 17 00:00:00 2001 From: Michiel Derveeuw Date: Mon, 19 Apr 2021 14:48:13 +0200 Subject: [PATCH 60/83] Use UTF8 charset --- src/main/scala/io/rml/framework/flink/sink/RichMQTTSink.scala | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/main/scala/io/rml/framework/flink/sink/RichMQTTSink.scala b/src/main/scala/io/rml/framework/flink/sink/RichMQTTSink.scala index 3012eead..90b2349f 100644 --- a/src/main/scala/io/rml/framework/flink/sink/RichMQTTSink.scala +++ b/src/main/scala/io/rml/framework/flink/sink/RichMQTTSink.scala @@ -7,6 +7,7 @@ import org.eclipse.paho.client.mqttv3.{MqttClient, MqttConnectOptions} import org.eclipse.paho.client.mqttv3.persist.MemoryPersistence import java.net.URI +import java.nio.charset.StandardCharsets class RichMQTTSink(val broker: String, val topic: String) extends RichSinkFunction[String] { @@ -28,7 +29,7 @@ class RichMQTTSink(val broker: String, val topic: String) extends RichSinkFuncti } override def invoke(value: String, context: SinkFunction.Context[_]): Unit = { - val payload = value.getBytes() + val payload = value.getBytes(StandardCharsets.UTF_8) client.publish(topic, payload, 2, false) } From d3c37f1ef18c30e3264aecaf5bed8d2ead84b099 Mon Sep 17 00:00:00 2001 From: Michiel Derveeuw Date: Wed, 31 Mar 2021 16:53:07 +0200 Subject: [PATCH 61/83] Update and fix class loading issue --- pom.xml | 2 +- .../scala/io/rml/framework/KafkaStreamTestSyncFnO.scala | 4 ++-- src/test/scala/io/rml/framework/SandboxTests.scala | 2 ++ src/test/scala/io/rml/framework/StreamTestSync.scala | 6 ++++-- src/test/scala/io/rml/framework/TCPStreamTestSync.scala | 5 ++++- src/test/scala/io/rml/framework/TCPStreamTestSyncFnO.scala | 3 --- .../rml/framework/util/fileprocessing/TestFilesUtil.scala | 3 ++- src/test/scala/io/rml/framework/util/server/TestSink2.scala | 2 +- 8 files changed, 16 insertions(+), 11 deletions(-) diff --git a/pom.xml b/pom.xml index e7f4798a..207b4ae5 100644 --- a/pom.xml +++ b/pom.xml @@ -36,7 +36,7 @@ SOFTWARE. UTF-8 - 1.11.3 + 1.12.2 1.7.26 2.13.3 3.11.0 diff --git a/src/test/scala/io/rml/framework/KafkaStreamTestSyncFnO.scala b/src/test/scala/io/rml/framework/KafkaStreamTestSyncFnO.scala index 93303a3d..dddb9d84 100644 --- a/src/test/scala/io/rml/framework/KafkaStreamTestSyncFnO.scala +++ b/src/test/scala/io/rml/framework/KafkaStreamTestSyncFnO.scala @@ -77,7 +77,7 @@ class KafkaStreamTestSyncFnO extends StreamTestSync{ } - override def beforeTestCase(): Unit = { +// override def beforeTestCase(): Unit = { // topics seem to be created automatically... /*logInfo("Creating Kafka input topic...") @@ -89,7 +89,7 @@ class KafkaStreamTestSyncFnO extends StreamTestSync{ createTopicsResult.all().get() // wait for completion of creating topics Thread.sleep(2000) logInfo("Creating Kafka input topic done.") */ - } +// } override def afterTestCase(): Unit = { logInfo("Deleting Kafka input topic(s)...") diff --git a/src/test/scala/io/rml/framework/SandboxTests.scala b/src/test/scala/io/rml/framework/SandboxTests.scala index 753d2c26..acc6659a 100644 --- a/src/test/scala/io/rml/framework/SandboxTests.scala +++ b/src/test/scala/io/rml/framework/SandboxTests.scala @@ -24,6 +24,7 @@ **/ package io.rml.framework +import io.rml.framework.api.{FnOEnvironment, RMLEnvironment} import io.rml.framework.api.RMLEnvironment import io.rml.framework.core.extractors.NodeCache import io.rml.framework.core.util.Util @@ -42,6 +43,7 @@ class SandboxTests extends FunSuite with Matchers with FunctionMappingTest { private def executeTest(mappingFile: String): Unit = { NodeCache.clear(); + FnOEnvironment.loadedClassesMap.clear() RMLEnvironment.setGeneratorBaseIRI(Some("http://example.org/base/")) implicit val env = ExecutionEnvironment.getExecutionEnvironment implicit val senv = StreamExecutionEnvironment.getExecutionEnvironment diff --git a/src/test/scala/io/rml/framework/StreamTestSync.scala b/src/test/scala/io/rml/framework/StreamTestSync.scala index 7ba4e602..8183669e 100644 --- a/src/test/scala/io/rml/framework/StreamTestSync.scala +++ b/src/test/scala/io/rml/framework/StreamTestSync.scala @@ -24,7 +24,7 @@ **/ package io.rml.framework -import io.rml.framework.api.RMLEnvironment +import io.rml.framework.api.{FnOEnvironment, RMLEnvironment} import io.rml.framework.core.extractors.NodeCache import io.rml.framework.core.internal.Logging import io.rml.framework.core.util.{StreamerConfig, Util} @@ -73,7 +73,9 @@ abstract class StreamTestSync extends StaticTestSpec with ReadMappingBehaviour w } // Things to do before running one test case - protected def beforeTestCase(): Unit + protected def beforeTestCase(): Unit = { + FnOEnvironment.loadedClassesMap.clear() + } // Things to do after running one test case protected def afterTestCase(): Unit diff --git a/src/test/scala/io/rml/framework/TCPStreamTestSync.scala b/src/test/scala/io/rml/framework/TCPStreamTestSync.scala index eb36977f..bcf1b3c6 100644 --- a/src/test/scala/io/rml/framework/TCPStreamTestSync.scala +++ b/src/test/scala/io/rml/framework/TCPStreamTestSync.scala @@ -34,6 +34,7 @@ import io.netty.channel.socket.SocketChannel import io.netty.channel.socket.nio.NioServerSocketChannel import io.netty.channel.{ChannelFuture, ChannelHandlerContext, ChannelInboundHandlerAdapter, ChannelInitializer} import io.netty.util.{CharsetUtil, ReferenceCountUtil} +import io.rml.framework.api.FnOEnvironment import io.rml.framework.util.logging.Logger import io.rml.framework.util.server.TestData @@ -78,7 +79,9 @@ class TCPStreamTestSync extends StreamTestSync { } - override def beforeTestCase(): Unit = {} + override def beforeTestCase(): Unit = { + FnOEnvironment.loadedClassesMap.clear() + } override def afterTestCase(): Unit = {} diff --git a/src/test/scala/io/rml/framework/TCPStreamTestSyncFnO.scala b/src/test/scala/io/rml/framework/TCPStreamTestSyncFnO.scala index 32a1a8f7..9b0d7e95 100644 --- a/src/test/scala/io/rml/framework/TCPStreamTestSyncFnO.scala +++ b/src/test/scala/io/rml/framework/TCPStreamTestSyncFnO.scala @@ -27,7 +27,6 @@ package io.rml.framework import java.net.InetSocketAddress import java.nio.charset.StandardCharsets import java.util.concurrent.TimeUnit - import io.netty.bootstrap.ServerBootstrap import io.netty.buffer.ByteBuf import io.netty.channel.nio.NioEventLoopGroup @@ -77,8 +76,6 @@ class TCPStreamTestSyncFnO extends StreamTestSync { } - override def beforeTestCase(): Unit = {} - override def afterTestCase(): Unit = {} override def teardown(): Unit = { diff --git a/src/test/scala/io/rml/framework/util/fileprocessing/TestFilesUtil.scala b/src/test/scala/io/rml/framework/util/fileprocessing/TestFilesUtil.scala index 12224f70..9f2c6509 100644 --- a/src/test/scala/io/rml/framework/util/fileprocessing/TestFilesUtil.scala +++ b/src/test/scala/io/rml/framework/util/fileprocessing/TestFilesUtil.scala @@ -24,6 +24,7 @@ **/ package io.rml.framework.util.fileprocessing +import io.rml.framework.api.FnOEnvironment import io.rml.framework.core.util.Util import io.rml.framework.util.logging.Logger @@ -74,7 +75,7 @@ trait TestFilesUtil[R] { def test(rootDir: String, shouldPass: Boolean, checkFunc: (String, Boolean) => Unit): Unit = { var checkedTestCases = Array("") for (pathString <- getTestCaseFolders(rootDir).map(_.toString).sorted) { - + FnOEnvironment.loadedClassesMap.clear() checkFunc(pathString, shouldPass) val testCase = new File(pathString).getName Logger.logSuccess("Passed processing: " + testCase) diff --git a/src/test/scala/io/rml/framework/util/server/TestSink2.scala b/src/test/scala/io/rml/framework/util/server/TestSink2.scala index 191f4165..caae76c1 100644 --- a/src/test/scala/io/rml/framework/util/server/TestSink2.scala +++ b/src/test/scala/io/rml/framework/util/server/TestSink2.scala @@ -47,7 +47,7 @@ object TestSink2 extends SinkFunction[String] { class TestSink2 extends SinkFunction[String] { import TestSink2._ - override def invoke(value: String, context: SinkFunction.Context[_]): Unit = { + override def invoke(value: String, context: SinkFunction.Context): Unit = { Logger.logInfo(s"TestSink2: got value [${value}]") if (value.trim.nonEmpty) { triples = value :: triples From d084dbcdc76522d1b1c4301c18d5bf916a8e9caa Mon Sep 17 00:00:00 2001 From: Michiel Derveeuw Date: Mon, 19 Apr 2021 14:30:52 +0200 Subject: [PATCH 62/83] Update documentation and docker flink version --- README.md | 6 +++--- docker/docker-compose.yml | 4 ++-- documentation/README_Functions.md | 10 +++++----- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/README.md b/README.md index 2f7cde90..c4692351 100644 --- a/README.md +++ b/README.md @@ -15,9 +15,9 @@ If you want to deploy it yourself, read on. ### Installing Flink RMLStreamer runs its jobs on Flink clusters. -More information on how to install Flink and getting started can be found [here](https://ci.apache.org/projects/flink/flink-docs-release-1.11/try-flink/local_installation.html). +More information on how to install Flink and getting started can be found [here](https://ci.apache.org/projects/flink/flink-docs-release-1.12/try-flink/local_installation.html). At least a local cluster must be running in order to start executing RML Mappings with RMLStreamer. -Please note that this version works with Flink 1.11.3 with Scala 2.11 support, which can be downloaded [here](https://archive.apache.org/dist/flink/flink-1.11.3/flink-1.11.3-bin-scala_2.11.tgz). +Please note that this version works with Flink 1.12.2 with Scala 2.11 support, which can be downloaded [here](https://archive.apache.org/dist/flink/flink-1.12.2/flink-1.12.2-bin-scala_2.11.tgz). ### Building RMLStreamer @@ -46,7 +46,7 @@ The resulting `RMLStreamer-.jar`, found in the `target` folder, can be ### Executing RML Mappings Here we give examples for running RMLStreamer from the command line. We use `FLINK_BIN` to denote the Flink CLI tool, -usually found in the `bin` directory of the Flink installation. E.g. `/home/myuser/flink-1.11.3/bin/flink`. +usually found in the `bin` directory of the Flink installation. E.g. `/home/myuser/flink-1.12.2/bin/flink`. For Windows a `flink.bat` script is provided. The general usage is: diff --git a/docker/docker-compose.yml b/docker/docker-compose.yml index fb61a26c..5e0d9d3c 100644 --- a/docker/docker-compose.yml +++ b/docker/docker-compose.yml @@ -2,7 +2,7 @@ version: '3' services: jobmanager: - image: flink:1.11.3-scala_2.11 + image: flink:1.12.2-scala_2.11 expose: - "6123" ports: @@ -14,7 +14,7 @@ services: - data:/mnt/data taskmanager: - image: flink:1.11.3-scala_2.11 + image: flink:1.12.2-scala_2.11 expose: - "6121" - "6122" diff --git a/documentation/README_Functions.md b/documentation/README_Functions.md index d53ac3bf..7a2e75d3 100644 --- a/documentation/README_Functions.md +++ b/documentation/README_Functions.md @@ -15,13 +15,13 @@ These files can be obtained from `src/main/resources`: ## Example: RML Streamer + Flink Flink's `lib` directory should contain the jar-files with the custom functions. In this example, these are marked with `*` ``` -flink-1.11.2-scala_2.11 +flink-1.12.2-scala_2.11 └── lib ├── GrelFunctions.jar * ├── IDLabFunctions.jar * - ├── flink-dist_2.11-1.11.2.jar - ├── flink-table-blink_2.11-1.11.2.jar - ├── flink-table_2.11-1.11.2.jar + ├── flink-dist_2.11-1.12.2.jar + ├── flink-table-blink_2.11-1.12.2.jar + ├── flink-table_2.11-1.12.2.jar ├── log4j-1.2.17.jar └── slf4j-log4j12-1.7.15.jar ``` @@ -40,7 +40,7 @@ Note that the function descriptions and function mappings are present. The command for running the RML Streamer on Flink should look like ``` -~/flink/flink-1.11.2-scala_2.11/bin/flink run -c io.rml.framework.Main RMLStreamer-2.0.1-SNAPSHOT.jar toFile --output-path $(pwd)'/out.ttl' -m mapping.ttl +~/flink/flink-1.12.2-scala_2.11/bin/flink run -c io.rml.framework.Main RMLStreamer-2.0.1-SNAPSHOT.jar toFile --output-path $(pwd)'/out.ttl' -m mapping.ttl ``` ## Test Cases From c1def9822d8c3fa1e58d45d4ea3ed9d04aab1f18 Mon Sep 17 00:00:00 2001 From: Michiel Derveeuw Date: Mon, 19 Apr 2021 12:38:44 +0000 Subject: [PATCH 63/83] Add comment documenting classloader fix --- .../io/rml/framework/util/fileprocessing/TestFilesUtil.scala | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/test/scala/io/rml/framework/util/fileprocessing/TestFilesUtil.scala b/src/test/scala/io/rml/framework/util/fileprocessing/TestFilesUtil.scala index 9f2c6509..c093f809 100644 --- a/src/test/scala/io/rml/framework/util/fileprocessing/TestFilesUtil.scala +++ b/src/test/scala/io/rml/framework/util/fileprocessing/TestFilesUtil.scala @@ -75,6 +75,8 @@ trait TestFilesUtil[R] { def test(rootDir: String, shouldPass: Boolean, checkFunc: (String, Boolean) => Unit): Unit = { var checkedTestCases = Array("") for (pathString <- getTestCaseFolders(rootDir).map(_.toString).sorted) { + // clear the loaded classes, this prevents an Exception that would occur when using classes + // from an unloaded class loader FnOEnvironment.loadedClassesMap.clear() checkFunc(pathString, shouldPass) val testCase = new File(pathString).getName From c95d68e39548f36f7513e4571dfd6d652fe8c3d8 Mon Sep 17 00:00:00 2001 From: Michiel Derveeuw Date: Mon, 19 Apr 2021 12:40:36 +0000 Subject: [PATCH 64/83] Add comment documenting classloader fix --- src/test/scala/io/rml/framework/TCPStreamTestSync.scala | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/test/scala/io/rml/framework/TCPStreamTestSync.scala b/src/test/scala/io/rml/framework/TCPStreamTestSync.scala index bcf1b3c6..8f1cb01f 100644 --- a/src/test/scala/io/rml/framework/TCPStreamTestSync.scala +++ b/src/test/scala/io/rml/framework/TCPStreamTestSync.scala @@ -80,6 +80,8 @@ class TCPStreamTestSync extends StreamTestSync { } override def beforeTestCase(): Unit = { + // clear the loaded classes, this prevents an Exception that would occur when using classes + // from an unloaded class loader FnOEnvironment.loadedClassesMap.clear() } From 87f777f3c5339b75ad1f2ddd840cd357da79e5cf Mon Sep 17 00:00:00 2001 From: Michiel Derveeuw Date: Mon, 19 Apr 2021 12:42:39 +0000 Subject: [PATCH 65/83] Add comment documenting classloader fix --- src/test/scala/io/rml/framework/StreamTestSync.scala | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/test/scala/io/rml/framework/StreamTestSync.scala b/src/test/scala/io/rml/framework/StreamTestSync.scala index 8183669e..f78465ea 100644 --- a/src/test/scala/io/rml/framework/StreamTestSync.scala +++ b/src/test/scala/io/rml/framework/StreamTestSync.scala @@ -74,6 +74,8 @@ abstract class StreamTestSync extends StaticTestSpec with ReadMappingBehaviour w // Things to do before running one test case protected def beforeTestCase(): Unit = { + // clear the loaded classes, this prevents an Exception that would occur when using classes + // from an unloaded class loader FnOEnvironment.loadedClassesMap.clear() } From 7f005cc493420834c2453fad7372cdc99c505e07 Mon Sep 17 00:00:00 2001 From: Michiel Derveeuw Date: Mon, 19 Apr 2021 12:43:57 +0000 Subject: [PATCH 66/83] Add comment documenting classloader fix --- src/test/scala/io/rml/framework/SandboxTests.scala | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/test/scala/io/rml/framework/SandboxTests.scala b/src/test/scala/io/rml/framework/SandboxTests.scala index acc6659a..e85cf129 100644 --- a/src/test/scala/io/rml/framework/SandboxTests.scala +++ b/src/test/scala/io/rml/framework/SandboxTests.scala @@ -43,6 +43,8 @@ class SandboxTests extends FunSuite with Matchers with FunctionMappingTest { private def executeTest(mappingFile: String): Unit = { NodeCache.clear(); + // clear the loaded classes, this prevents an Exception that would occur when using classes + // from an unloaded class loader FnOEnvironment.loadedClassesMap.clear() RMLEnvironment.setGeneratorBaseIRI(Some("http://example.org/base/")) implicit val env = ExecutionEnvironment.getExecutionEnvironment From 847d1602d7b28b3fe9b328415b412f92b26ae3e6 Mon Sep 17 00:00:00 2001 From: Michiel Derveeuw Date: Mon, 19 Apr 2021 16:09:26 +0200 Subject: [PATCH 67/83] Remove beforeTestCase method --- .../io/rml/framework/KafkaStreamTestSyncFnO.scala | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/src/test/scala/io/rml/framework/KafkaStreamTestSyncFnO.scala b/src/test/scala/io/rml/framework/KafkaStreamTestSyncFnO.scala index dddb9d84..2ec7124a 100644 --- a/src/test/scala/io/rml/framework/KafkaStreamTestSyncFnO.scala +++ b/src/test/scala/io/rml/framework/KafkaStreamTestSyncFnO.scala @@ -76,21 +76,6 @@ class KafkaStreamTestSyncFnO extends StreamTestSync{ logInfo("Creating Kafka client created.") } - -// override def beforeTestCase(): Unit = { - // topics seem to be created automatically... - - /*logInfo("Creating Kafka input topic...") - val topicName = "demo" - val numPartitions = 1 - val replicationFactor = 1 - val topic = new NewTopic(topicName, numPartitions, replicationFactor.toShort) - val createTopicsResult = admin.createTopics(ArrayBuffer(topic).asJava) - createTopicsResult.all().get() // wait for completion of creating topics - Thread.sleep(2000) - logInfo("Creating Kafka input topic done.") */ -// } - override def afterTestCase(): Unit = { logInfo("Deleting Kafka input topic(s)...") val topics = admin.listTopics().names().get() From bea0c4eb95b1584738d7e92db75c70b3f7c4bb42 Mon Sep 17 00:00:00 2001 From: Gerald Haesendonck Date: Thu, 22 Apr 2021 17:41:25 +0000 Subject: [PATCH 68/83] Update RMLStreamer version in doc --- documentation/README_Functions.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/documentation/README_Functions.md b/documentation/README_Functions.md index 7a2e75d3..c62f7f5f 100644 --- a/documentation/README_Functions.md +++ b/documentation/README_Functions.md @@ -40,7 +40,7 @@ Note that the function descriptions and function mappings are present. The command for running the RML Streamer on Flink should look like ``` -~/flink/flink-1.12.2-scala_2.11/bin/flink run -c io.rml.framework.Main RMLStreamer-2.0.1-SNAPSHOT.jar toFile --output-path $(pwd)'/out.ttl' -m mapping.ttl +~/flink/flink-1.12.2-scala_2.11/bin/flink run -c io.rml.framework.Main RMLStreamer-2.1.1-SNAPSHOT.jar toFile --output-path $(pwd)'/out.ttl' -m mapping.ttl ``` ## Test Cases @@ -188,4 +188,4 @@ to load and bind every function as specified in the testcase's `mapping.ttl`. # Remarks - When the RMLStreamer is unable to find a function description or function mapping, bind method parameters to values, it will be logged as an error to the console and the function will not be applied. - \ No newline at end of file + From 578c569c6bfc4c4dea38425a6363985541dd7164 Mon Sep 17 00:00:00 2001 From: Gerald Haesendonck Date: Fri, 26 Mar 2021 06:18:40 +0100 Subject: [PATCH 69/83] Revert "Update docker and documentation flink version to 1.12.2" This reverts commit 9f202343 --- README.md | 6 +++--- docker/docker-compose.yml | 4 ++-- documentation/README_Functions.md | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index c4692351..2f7cde90 100644 --- a/README.md +++ b/README.md @@ -15,9 +15,9 @@ If you want to deploy it yourself, read on. ### Installing Flink RMLStreamer runs its jobs on Flink clusters. -More information on how to install Flink and getting started can be found [here](https://ci.apache.org/projects/flink/flink-docs-release-1.12/try-flink/local_installation.html). +More information on how to install Flink and getting started can be found [here](https://ci.apache.org/projects/flink/flink-docs-release-1.11/try-flink/local_installation.html). At least a local cluster must be running in order to start executing RML Mappings with RMLStreamer. -Please note that this version works with Flink 1.12.2 with Scala 2.11 support, which can be downloaded [here](https://archive.apache.org/dist/flink/flink-1.12.2/flink-1.12.2-bin-scala_2.11.tgz). +Please note that this version works with Flink 1.11.3 with Scala 2.11 support, which can be downloaded [here](https://archive.apache.org/dist/flink/flink-1.11.3/flink-1.11.3-bin-scala_2.11.tgz). ### Building RMLStreamer @@ -46,7 +46,7 @@ The resulting `RMLStreamer-.jar`, found in the `target` folder, can be ### Executing RML Mappings Here we give examples for running RMLStreamer from the command line. We use `FLINK_BIN` to denote the Flink CLI tool, -usually found in the `bin` directory of the Flink installation. E.g. `/home/myuser/flink-1.12.2/bin/flink`. +usually found in the `bin` directory of the Flink installation. E.g. `/home/myuser/flink-1.11.3/bin/flink`. For Windows a `flink.bat` script is provided. The general usage is: diff --git a/docker/docker-compose.yml b/docker/docker-compose.yml index 5e0d9d3c..fb61a26c 100644 --- a/docker/docker-compose.yml +++ b/docker/docker-compose.yml @@ -2,7 +2,7 @@ version: '3' services: jobmanager: - image: flink:1.12.2-scala_2.11 + image: flink:1.11.3-scala_2.11 expose: - "6123" ports: @@ -14,7 +14,7 @@ services: - data:/mnt/data taskmanager: - image: flink:1.12.2-scala_2.11 + image: flink:1.11.3-scala_2.11 expose: - "6121" - "6122" diff --git a/documentation/README_Functions.md b/documentation/README_Functions.md index c62f7f5f..b7bb2f13 100644 --- a/documentation/README_Functions.md +++ b/documentation/README_Functions.md @@ -188,4 +188,4 @@ to load and bind every function as specified in the testcase's `mapping.ttl`. # Remarks - When the RMLStreamer is unable to find a function description or function mapping, bind method parameters to values, it will be logged as an error to the console and the function will not be applied. - + \ No newline at end of file From 40faf4406319876e98a845b7a79a469674c495bf Mon Sep 17 00:00:00 2001 From: Gerald Haesendonck Date: Fri, 26 Mar 2021 06:19:49 +0100 Subject: [PATCH 70/83] Revert "Update flink to 1.12.2" This reverts commit 4043cddc --- pom.xml | 2 +- src/test/scala/io/rml/framework/util/server/TestSink2.scala | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pom.xml b/pom.xml index 207b4ae5..e7f4798a 100644 --- a/pom.xml +++ b/pom.xml @@ -36,7 +36,7 @@ SOFTWARE. UTF-8 - 1.12.2 + 1.11.3 1.7.26 2.13.3 3.11.0 diff --git a/src/test/scala/io/rml/framework/util/server/TestSink2.scala b/src/test/scala/io/rml/framework/util/server/TestSink2.scala index caae76c1..191f4165 100644 --- a/src/test/scala/io/rml/framework/util/server/TestSink2.scala +++ b/src/test/scala/io/rml/framework/util/server/TestSink2.scala @@ -47,7 +47,7 @@ object TestSink2 extends SinkFunction[String] { class TestSink2 extends SinkFunction[String] { import TestSink2._ - override def invoke(value: String, context: SinkFunction.Context): Unit = { + override def invoke(value: String, context: SinkFunction.Context[_]): Unit = { Logger.logInfo(s"TestSink2: got value [${value}]") if (value.trim.nonEmpty) { triples = value :: triples From 1cf1f855dd0f25aaff4ba23599e612a6522d2314 Mon Sep 17 00:00:00 2001 From: Gerald Haesendonck Date: Thu, 22 Apr 2021 14:44:02 +0200 Subject: [PATCH 71/83] Generalise TriplesMapsCache to NodeCache --- src/test/scala/io/rml/framework/SandboxTests.scala | 9 --------- 1 file changed, 9 deletions(-) diff --git a/src/test/scala/io/rml/framework/SandboxTests.scala b/src/test/scala/io/rml/framework/SandboxTests.scala index e85cf129..b2021a98 100644 --- a/src/test/scala/io/rml/framework/SandboxTests.scala +++ b/src/test/scala/io/rml/framework/SandboxTests.scala @@ -25,7 +25,6 @@ package io.rml.framework import io.rml.framework.api.{FnOEnvironment, RMLEnvironment} -import io.rml.framework.api.RMLEnvironment import io.rml.framework.core.extractors.NodeCache import io.rml.framework.core.util.Util import io.rml.framework.engine.NopPostProcessor @@ -138,12 +137,4 @@ class SandboxTests extends FunSuite with Matchers with FunctionMappingTest { test("sandbox/logical-target/example-2-subjectMap") { executeTest("sandbox/logical-target/example-2-subjectMap/mapping.ttl") } - - - - - - - - } From 8c299613624ef78af2c9797ae596cf4128d54193 Mon Sep 17 00:00:00 2001 From: Gerald Haesendonck Date: Fri, 23 Apr 2021 10:25:50 +0200 Subject: [PATCH 72/83] Updated MQTT sink to adhere to new RichSinkFunction interface --- src/main/scala/io/rml/framework/flink/sink/RichMQTTSink.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/scala/io/rml/framework/flink/sink/RichMQTTSink.scala b/src/main/scala/io/rml/framework/flink/sink/RichMQTTSink.scala index 90b2349f..60344c7a 100644 --- a/src/main/scala/io/rml/framework/flink/sink/RichMQTTSink.scala +++ b/src/main/scala/io/rml/framework/flink/sink/RichMQTTSink.scala @@ -28,7 +28,7 @@ class RichMQTTSink(val broker: String, val topic: String) extends RichSinkFuncti client.connect(connectionOptions) } - override def invoke(value: String, context: SinkFunction.Context[_]): Unit = { + override def invoke(value: String, context: SinkFunction.Context): Unit = { val payload = value.getBytes(StandardCharsets.UTF_8) client.publish(topic, payload, 2, false) } From a378bbb707ff6783c4057e0daa801a545774f2bb Mon Sep 17 00:00:00 2001 From: Gerald Haesendonck Date: Fri, 23 Apr 2021 10:36:01 +0200 Subject: [PATCH 73/83] Revert "Revert "Update flink version to 1.12.2"" This reverts commit c5633746 --- README.md | 6 +++--- docker/docker-compose.yml | 4 ++-- documentation/README_Functions.md | 4 ++-- pom.xml | 2 +- src/test/scala/io/rml/framework/StreamTestSync.scala | 1 + src/test/scala/io/rml/framework/util/server/TestSink2.scala | 2 +- 6 files changed, 10 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index 2f7cde90..c4692351 100644 --- a/README.md +++ b/README.md @@ -15,9 +15,9 @@ If you want to deploy it yourself, read on. ### Installing Flink RMLStreamer runs its jobs on Flink clusters. -More information on how to install Flink and getting started can be found [here](https://ci.apache.org/projects/flink/flink-docs-release-1.11/try-flink/local_installation.html). +More information on how to install Flink and getting started can be found [here](https://ci.apache.org/projects/flink/flink-docs-release-1.12/try-flink/local_installation.html). At least a local cluster must be running in order to start executing RML Mappings with RMLStreamer. -Please note that this version works with Flink 1.11.3 with Scala 2.11 support, which can be downloaded [here](https://archive.apache.org/dist/flink/flink-1.11.3/flink-1.11.3-bin-scala_2.11.tgz). +Please note that this version works with Flink 1.12.2 with Scala 2.11 support, which can be downloaded [here](https://archive.apache.org/dist/flink/flink-1.12.2/flink-1.12.2-bin-scala_2.11.tgz). ### Building RMLStreamer @@ -46,7 +46,7 @@ The resulting `RMLStreamer-.jar`, found in the `target` folder, can be ### Executing RML Mappings Here we give examples for running RMLStreamer from the command line. We use `FLINK_BIN` to denote the Flink CLI tool, -usually found in the `bin` directory of the Flink installation. E.g. `/home/myuser/flink-1.11.3/bin/flink`. +usually found in the `bin` directory of the Flink installation. E.g. `/home/myuser/flink-1.12.2/bin/flink`. For Windows a `flink.bat` script is provided. The general usage is: diff --git a/docker/docker-compose.yml b/docker/docker-compose.yml index fb61a26c..5e0d9d3c 100644 --- a/docker/docker-compose.yml +++ b/docker/docker-compose.yml @@ -2,7 +2,7 @@ version: '3' services: jobmanager: - image: flink:1.11.3-scala_2.11 + image: flink:1.12.2-scala_2.11 expose: - "6123" ports: @@ -14,7 +14,7 @@ services: - data:/mnt/data taskmanager: - image: flink:1.11.3-scala_2.11 + image: flink:1.12.2-scala_2.11 expose: - "6121" - "6122" diff --git a/documentation/README_Functions.md b/documentation/README_Functions.md index c62f7f5f..3cf5eb0c 100644 --- a/documentation/README_Functions.md +++ b/documentation/README_Functions.md @@ -40,7 +40,7 @@ Note that the function descriptions and function mappings are present. The command for running the RML Streamer on Flink should look like ``` -~/flink/flink-1.12.2-scala_2.11/bin/flink run -c io.rml.framework.Main RMLStreamer-2.1.1-SNAPSHOT.jar toFile --output-path $(pwd)'/out.ttl' -m mapping.ttl +~/flink/flink-1.11.2-scala_2.11/bin/flink run -c io.rml.framework.Main RMLStreamer-2.0.1-SNAPSHOT.jar toFile --output-path $(pwd)'/out.ttl' -m mapping.ttl ``` ## Test Cases @@ -188,4 +188,4 @@ to load and bind every function as specified in the testcase's `mapping.ttl`. # Remarks - When the RMLStreamer is unable to find a function description or function mapping, bind method parameters to values, it will be logged as an error to the console and the function will not be applied. - + \ No newline at end of file diff --git a/pom.xml b/pom.xml index e7f4798a..207b4ae5 100644 --- a/pom.xml +++ b/pom.xml @@ -36,7 +36,7 @@ SOFTWARE. UTF-8 - 1.11.3 + 1.12.2 1.7.26 2.13.3 3.11.0 diff --git a/src/test/scala/io/rml/framework/StreamTestSync.scala b/src/test/scala/io/rml/framework/StreamTestSync.scala index f78465ea..c4101c53 100644 --- a/src/test/scala/io/rml/framework/StreamTestSync.scala +++ b/src/test/scala/io/rml/framework/StreamTestSync.scala @@ -206,6 +206,7 @@ abstract class StreamTestSync extends StaticTestSpec with ReadMappingBehaviour w val customConfig = new Configuration() customConfig.setString("io.tmp.dirs", getTempDir.getAbsolutePath) customConfig.setString("rest.bind-port", "50000-51000") // see https://github.com/apache/flink/commit/730eed71ef3f718d61f85d5e94b1060844ca56db + customConfig.setString("classloader.check-leaked-classloader", "false") // this option is to required to fix strange issue related to class loading, see discussion: https://gitlab.ilabt.imec.be/rml/proc/rml-streamer/-/issues/121 val configuration = new MiniClusterConfiguration.Builder() .setConfiguration(customConfig) diff --git a/src/test/scala/io/rml/framework/util/server/TestSink2.scala b/src/test/scala/io/rml/framework/util/server/TestSink2.scala index 191f4165..caae76c1 100644 --- a/src/test/scala/io/rml/framework/util/server/TestSink2.scala +++ b/src/test/scala/io/rml/framework/util/server/TestSink2.scala @@ -47,7 +47,7 @@ object TestSink2 extends SinkFunction[String] { class TestSink2 extends SinkFunction[String] { import TestSink2._ - override def invoke(value: String, context: SinkFunction.Context[_]): Unit = { + override def invoke(value: String, context: SinkFunction.Context): Unit = { Logger.logInfo(s"TestSink2: got value [${value}]") if (value.trim.nonEmpty) { triples = value :: triples From 985055b67c4f915c296814da4910992025ab6a14 Mon Sep 17 00:00:00 2001 From: Gerald Haesendonck Date: Fri, 23 Apr 2021 10:36:29 +0200 Subject: [PATCH 74/83] Revert "Revert "Update docker and documentation flink version to 1.12.2"" This reverts commit f1741cef --- documentation/README_Functions.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/documentation/README_Functions.md b/documentation/README_Functions.md index 3cf5eb0c..7a2e75d3 100644 --- a/documentation/README_Functions.md +++ b/documentation/README_Functions.md @@ -40,7 +40,7 @@ Note that the function descriptions and function mappings are present. The command for running the RML Streamer on Flink should look like ``` -~/flink/flink-1.11.2-scala_2.11/bin/flink run -c io.rml.framework.Main RMLStreamer-2.0.1-SNAPSHOT.jar toFile --output-path $(pwd)'/out.ttl' -m mapping.ttl +~/flink/flink-1.12.2-scala_2.11/bin/flink run -c io.rml.framework.Main RMLStreamer-2.0.1-SNAPSHOT.jar toFile --output-path $(pwd)'/out.ttl' -m mapping.ttl ``` ## Test Cases From 447152cf68c8567567f1b4695e6cb8fc6a77c29c Mon Sep 17 00:00:00 2001 From: Gerald Haesendonck Date: Fri, 23 Apr 2021 10:54:24 +0200 Subject: [PATCH 75/83] Updated JsonSurfer to solve dependency conflict --- CHANGELOG.md | 6 ++++++ pom.xml | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 49e7f688..b4e7ef64 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,12 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. * Support for stream partitioning in windows * Joins of data streams +### Added +* Target in RML TODO: link to spec + +### Changed +* Updated JsonSurfer from version 1.5.1 to 1.6.0 + ## [2.1.0] - 2020-03-18 ### Added diff --git a/pom.xml b/pom.xml index 207b4ae5..998d8019 100644 --- a/pom.xml +++ b/pom.xml @@ -113,7 +113,7 @@ SOFTWARE. com.github.jsurfer jsurfer-jackson - 1.5.1 + 1.6.0 From 58b87ee98509eb9a5381172d0cb76674f8bcb010 Mon Sep 17 00:00:00 2001 From: Gerald Haesendonck Date: Fri, 23 Apr 2021 11:32:21 +0200 Subject: [PATCH 76/83] Refactor RichMQTTSource to accept explicit parameters instead of properties. --- .../extractors/std/StdDataSourceExtractor.scala | 12 +----------- .../io/rml/framework/core/model/MQTTStream.scala | 13 +++++++++---- .../rml/framework/flink/source/JSONStream.scala | 12 ++++++++---- .../framework/flink/source/RichMQTTSource.scala | 15 ++++++++++++++- 4 files changed, 32 insertions(+), 20 deletions(-) diff --git a/src/main/scala/io/rml/framework/core/extractors/std/StdDataSourceExtractor.scala b/src/main/scala/io/rml/framework/core/extractors/std/StdDataSourceExtractor.scala index 30de41a6..73753597 100644 --- a/src/main/scala/io/rml/framework/core/extractors/std/StdDataSourceExtractor.scala +++ b/src/main/scala/io/rml/framework/core/extractors/std/StdDataSourceExtractor.scala @@ -32,8 +32,6 @@ import io.rml.framework.core.model.rdf.RDFResource import io.rml.framework.core.vocabulary._ import io.rml.framework.shared.RMLException -import java.util.Properties - class StdDataSourceExtractor extends DataSourceExtractor { /** @@ -141,14 +139,6 @@ class StdDataSourceExtractor extends DataSourceExtractor { } logDebug("MQTT data source defined in mapping file. hypermediaTarget: " + hypermediaTarget + ", contentType: " + contentType + ", dup: " + dup + ", qosOpt: " + qosOpt); - val mqttProperties = new Properties; - mqttProperties.put("hypermediaTarget", hypermediaTarget); - mqttProperties.put("contentType", contentType); - mqttProperties.put("controlPacketValue", controlPacketValue); - if (qosOpt.isDefined) { - mqttProperties.put("qos", qosOpt.get); - } - mqttProperties.put("dup", dup); // Java 8 can't handle Scala Boolean objects in a Properties object. - MQTTStream(mqttProperties) + MQTTStream(hypermediaTarget, contentType, controlPacketValue, dup, qosOpt) } } diff --git a/src/main/scala/io/rml/framework/core/model/MQTTStream.scala b/src/main/scala/io/rml/framework/core/model/MQTTStream.scala index e15b9320..9f6286fa 100644 --- a/src/main/scala/io/rml/framework/core/model/MQTTStream.scala +++ b/src/main/scala/io/rml/framework/core/model/MQTTStream.scala @@ -25,13 +25,18 @@ package io.rml.framework.core.model -import java.util.{Objects, Properties} -import scala.collection.JavaConversions._ +import java.util.Objects -case class MQTTStream(properties : Properties) extends StreamDataSource { +case class MQTTStream( + hypermediaTarget: String, + contentType: String, + controlPacketValue: String, + dup: String, + qos: Option[String] + ) extends StreamDataSource { override def uri: ExplicitNode = { - val totalHash = Objects.hash(properties.entrySet().map(e=>s"${e.getKey}=${e.getValue}")) + val totalHash = Objects.hash(hypermediaTarget, contentType, controlPacketValue, dup, qos.getOrElse("")) Uri(totalHash.toHexString) } } diff --git a/src/main/scala/io/rml/framework/flink/source/JSONStream.scala b/src/main/scala/io/rml/framework/flink/source/JSONStream.scala index 2b76d7d7..9af0c7db 100644 --- a/src/main/scala/io/rml/framework/flink/source/JSONStream.scala +++ b/src/main/scala/io/rml/framework/flink/source/JSONStream.scala @@ -25,11 +25,9 @@ package io.rml.framework.flink.source import io.rml.framework.core.internal.Logging -import io.rml.framework.core.model.{FileStream, KafkaStream, MQTTStream, StreamDataSource, TCPSocketStream} -import io.rml.framework.core.vocabulary.QueryVoc import io.rml.framework.core.item.Item import io.rml.framework.core.item.json.JSONItem -import io.rml.framework.core.model.{FileStream, KafkaStream, StreamDataSource, TCPSocketStream} +import io.rml.framework.core.model._ import io.rml.framework.flink.connector.kafka.UniversalKafkaConnectorFactory import org.apache.flink.api.common.serialization.SimpleStringSchema import org.apache.flink.api.scala._ @@ -81,7 +79,13 @@ object JSONStream extends Logging { def fromMQTTStream(mqttStream : MQTTStream, jsonPaths : List[String])(implicit env: StreamExecutionEnvironment):JSONStream = { - val source = new RichMQTTSource(mqttStream.properties) + val source = RichMQTTSource( + mqttStream.hypermediaTarget, + mqttStream.contentType, + mqttStream.controlPacketValue, + mqttStream.dup, + mqttStream.qos + ) val parallelStream = StreamUtil.paralleliseOverSlots(env.addSource(source)) val stream: DataStream[Iterable[Item]] = parallelStream .map { item => diff --git a/src/main/scala/io/rml/framework/flink/source/RichMQTTSource.scala b/src/main/scala/io/rml/framework/flink/source/RichMQTTSource.scala index b49f78c1..71fb791a 100644 --- a/src/main/scala/io/rml/framework/flink/source/RichMQTTSource.scala +++ b/src/main/scala/io/rml/framework/flink/source/RichMQTTSource.scala @@ -15,7 +15,20 @@ import scala.collection.JavaConversions._ * @param properties * @tparam T */ -case class RichMQTTSource(properties: Properties) extends RichSourceFunction[String] with Logging { +case class RichMQTTSource(hypermediaTarget: String, + contentType: String, + controlPacketValue: String, + dup: String, + qos: Option[String]) extends RichSourceFunction[String] with Logging { + + private val properties = new Properties; + properties.put("hypermediaTarget", hypermediaTarget); + properties.put("contentType", contentType); + properties.put("controlPacketValue", controlPacketValue); + if (qos.isDefined) { + properties.put("qos", qos.get); + } + properties.put("dup", dup); private var client: MqttClient = _ From 0042f424df73aeb26c76cf114053fcc759d4c678 Mon Sep 17 00:00:00 2001 From: Michiel Derveeuw Date: Mon, 3 May 2021 17:11:31 +0200 Subject: [PATCH 77/83] Update flink to version 1.12.3 --- README.md | 4 ++-- docker/docker-compose.yml | 4 ++-- documentation/README_Functions.md | 10 +++++----- pom.xml | 2 +- 4 files changed, 10 insertions(+), 10 deletions(-) diff --git a/README.md b/README.md index c4692351..68463371 100644 --- a/README.md +++ b/README.md @@ -17,7 +17,7 @@ If you want to deploy it yourself, read on. RMLStreamer runs its jobs on Flink clusters. More information on how to install Flink and getting started can be found [here](https://ci.apache.org/projects/flink/flink-docs-release-1.12/try-flink/local_installation.html). At least a local cluster must be running in order to start executing RML Mappings with RMLStreamer. -Please note that this version works with Flink 1.12.2 with Scala 2.11 support, which can be downloaded [here](https://archive.apache.org/dist/flink/flink-1.12.2/flink-1.12.2-bin-scala_2.11.tgz). +Please note that this version works with Flink 1.12.3 with Scala 2.11 support, which can be downloaded [here](https://archive.apache.org/dist/flink/flink-1.12.3/flink-1.12.3-bin-scala_2.11.tgz). ### Building RMLStreamer @@ -46,7 +46,7 @@ The resulting `RMLStreamer-.jar`, found in the `target` folder, can be ### Executing RML Mappings Here we give examples for running RMLStreamer from the command line. We use `FLINK_BIN` to denote the Flink CLI tool, -usually found in the `bin` directory of the Flink installation. E.g. `/home/myuser/flink-1.12.2/bin/flink`. +usually found in the `bin` directory of the Flink installation. E.g. `/home/myuser/flink-1.12.3/bin/flink`. For Windows a `flink.bat` script is provided. The general usage is: diff --git a/docker/docker-compose.yml b/docker/docker-compose.yml index 5e0d9d3c..0f89ce75 100644 --- a/docker/docker-compose.yml +++ b/docker/docker-compose.yml @@ -2,7 +2,7 @@ version: '3' services: jobmanager: - image: flink:1.12.2-scala_2.11 + image: flink:1.12.3-scala_2.11 expose: - "6123" ports: @@ -14,7 +14,7 @@ services: - data:/mnt/data taskmanager: - image: flink:1.12.2-scala_2.11 + image: flink:1.12.3-scala_2.11 expose: - "6121" - "6122" diff --git a/documentation/README_Functions.md b/documentation/README_Functions.md index c62f7f5f..e19820ac 100644 --- a/documentation/README_Functions.md +++ b/documentation/README_Functions.md @@ -15,13 +15,13 @@ These files can be obtained from `src/main/resources`: ## Example: RML Streamer + Flink Flink's `lib` directory should contain the jar-files with the custom functions. In this example, these are marked with `*` ``` -flink-1.12.2-scala_2.11 +flink-1.12.3-scala_2.11 └── lib ├── GrelFunctions.jar * ├── IDLabFunctions.jar * - ├── flink-dist_2.11-1.12.2.jar - ├── flink-table-blink_2.11-1.12.2.jar - ├── flink-table_2.11-1.12.2.jar + ├── flink-dist_2.11-1.12.3.jar + ├── flink-table-blink_2.11-1.12.3.jar + ├── flink-table_2.11-1.12.3.jar ├── log4j-1.2.17.jar └── slf4j-log4j12-1.7.15.jar ``` @@ -40,7 +40,7 @@ Note that the function descriptions and function mappings are present. The command for running the RML Streamer on Flink should look like ``` -~/flink/flink-1.12.2-scala_2.11/bin/flink run -c io.rml.framework.Main RMLStreamer-2.1.1-SNAPSHOT.jar toFile --output-path $(pwd)'/out.ttl' -m mapping.ttl +~/flink/flink-1.12.3-scala_2.11/bin/flink run -c io.rml.framework.Main RMLStreamer-2.1.1-SNAPSHOT.jar toFile --output-path $(pwd)'/out.ttl' -m mapping.ttl ``` ## Test Cases diff --git a/pom.xml b/pom.xml index 207b4ae5..290b25a8 100644 --- a/pom.xml +++ b/pom.xml @@ -36,7 +36,7 @@ SOFTWARE. UTF-8 - 1.12.2 + 1.12.3 1.7.26 2.13.3 3.11.0 From b2697b5e79e4a783b8e82bc16beca1eb58166079 Mon Sep 17 00:00:00 2001 From: Gerald Haesendonck Date: Mon, 10 May 2021 09:08:31 +0000 Subject: [PATCH 78/83] Update CHANGELOG.md --- CHANGELOG.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 49e7f688..20945aa2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,11 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. * Support for stream partitioning in windows * Joins of data streams +## 2.1.1 - ? + +### Changed +* Update Flink from version 1.11.3 to 1.12.2 + ## [2.1.0] - 2020-03-18 ### Added From 715779dbd1c3739fdcdaf479484910fca96cf405 Mon Sep 17 00:00:00 2001 From: Gerald Haesendonck Date: Mon, 17 May 2021 15:36:59 +0200 Subject: [PATCH 79/83] * Logical target for streams mechanism ready; * Streaming file sink implemented as logical target; * Default logical target introduced for backard compatibility with CLI output parameter. --- src/main/scala/io/rml/framework/Main.scala | 71 ++++++---- .../framework/core/extractors/NodeCache.scala | 6 + .../model/DefaultLogicalTargetFactory.scala | 40 ++++++ .../model/std/StdDefaultLogicalTarget.scala | 35 +++++ .../io/rml/framework/core/util/Util.scala | 22 ++- .../rml/framework/engine/PostProcessor.scala | 62 ++++++--- .../io/rml/framework/engine/Processor.scala | 2 +- .../framework/engine/StaticProcessor.scala | 7 +- .../framework/engine/StreamProcessor.scala | 12 +- .../engine/statement/Statement.scala | 7 +- .../flink/sink/TargetSinkFactory.scala | 127 ++++++++++++++++++ 11 files changed, 332 insertions(+), 59 deletions(-) create mode 100644 src/main/scala/io/rml/framework/core/model/DefaultLogicalTargetFactory.scala create mode 100644 src/main/scala/io/rml/framework/core/model/std/StdDefaultLogicalTarget.scala create mode 100644 src/main/scala/io/rml/framework/flink/sink/TargetSinkFactory.scala diff --git a/src/main/scala/io/rml/framework/Main.scala b/src/main/scala/io/rml/framework/Main.scala index 151e786b..99b5467f 100644 --- a/src/main/scala/io/rml/framework/Main.scala +++ b/src/main/scala/io/rml/framework/Main.scala @@ -37,6 +37,7 @@ import io.rml.framework.engine._ import io.rml.framework.engine.statement.StatementEngine import io.rml.framework.flink.connector.kafka.{RMLPartitioner, UniversalKafkaConnectorFactory} import io.rml.framework.flink.function.{FnOEnvironmentLoader, FnOEnvironmentStreamLoader, RichItemIdentityFunction, RichStreamItemIdentityFunction} +import io.rml.framework.flink.sink.{RichMQTTSink, TargetSinkFactory} import io.rml.framework.flink.source.{FileDataSet, Source} import org.apache.flink.api.common.serialization.{SimpleStringEncoder, SimpleStringSchema} import org.apache.flink.api.scala._ @@ -52,7 +53,6 @@ import org.apache.flink.util.Collector import java.util.Properties import scala.collection.{immutable, mutable} -import io.rml.framework.flink.sink.RichMQTTSink /** * @@ -240,31 +240,26 @@ object Main extends Logging { new RichStreamItemIdentityFunction() } + // Create sinks for every logical target + val logicalTargetId2Sinks = TargetSinkFactory.createStreamSinksFromLogicalTargetCache() + // This is the collection of all data streams that are created by the current mapping - val processedStreams: immutable.Iterable[DataStream[String]] = + val processedStreams: immutable.Iterable[DataStream[String]] = { sourceEngineMap.map(entry => { val source = entry._1.asInstanceOf[io.rml.framework.flink.source.Stream] val engine = entry._2 // link the different steps in each pipeline - source.stream // this will generate a stream of items + val dataStream = source.stream // this will generate a stream of items // process every item by a processor with a loaded engine .map(preProcessingFunction) .map(new StdStreamProcessor(engine)) .name("Execute mapping statements on items") - // format every list of triples (as strings) - .flatMap( - list => { - if (list.nonEmpty) { - Some(list.reduce((a, b) => a + "\n" + b) + "\n\n") - } else { - None - } - } - ) - .name("Convert triples to strings") + // add sinks to the data stream + TargetSinkFactory.appendSinksToStream(logicalTargetId2Sinks, dataStream) }) + } // union all streams to one final stream unionStreams(processedStreams) @@ -354,17 +349,13 @@ object Main extends Logging { // the "normal" scenario. val engine = StatementEngine.fromTriplesMaps(List(triplesMap)) - stream + val dataStream = stream .map(new StdStreamProcessor(engine)) .name("Execute mapping statements on items") - // format every list of triples (as strings) - .flatMap(list => - if (list.nonEmpty) { - Some(list.reduce((a, b) => a + "\n" + b) + "\n\n") - } else None - ) - .name("Convert triples to strings") + val logicalTargetId2Sinks = TargetSinkFactory.createStreamSinksFromLogicalTargetCache() + + TargetSinkFactory.appendSinksToStream(logicalTargetId2Sinks, dataStream) } }) @@ -508,9 +499,16 @@ object Main extends Logging { .map(preProcessingFunction) .map(new StdStaticProcessor(engine)) .name("Execute mapping statements on items") - + .map(outputStringToLogicalTargetIDs => { + outputStringToLogicalTargetIDs.map(outputStringToLogicalTargetID => outputStringToLogicalTargetID._2) + // TODO: integrate logical target for data set. + }) + .name("Ignoring the logical target for now.") + .flatMap(list => { + list.seq + }) // format every list of triples (as strings) - .flatMap(list => if (list.nonEmpty) Some(list.reduce((a, b) => a + "\n" + b) + "\n\n") else None) + .reduce((a, b) => a + "\n" + b + "\n\n") .name("Convert triples to strings") }) @@ -592,7 +590,16 @@ object Main extends Logging { .map(new JoinedStaticProcessor(engine)).name("Execute mapping statements on joined items") // format the list of triples as strings - .flatMap(list => if (list.nonEmpty) Some(list.reduce((a, b) => a + "\n" + b)) else None) + .map(outputStringToLogicalTargetIDs => { + outputStringToLogicalTargetIDs.map(outputStringToLogicalTargetID => outputStringToLogicalTargetID._2) + // TODO: integrate logical target for data set. + }) + .name("Ignoring the logical target for now.") + .flatMap(list => { + list.seq + }) + // format every list of triples (as strings) + .reduce((a, b) => a + "\n" + b + "\n\n") .name("Convert triples to strings") } else { // if there are no join conditions a cross join will be executed @@ -604,7 +611,16 @@ object Main extends Logging { JoinedItem(items._1, items._2) ) // create a JoinedItem from the crossed items .map(new JoinedStaticProcessor(engine)).name("Execute mapping statements on joined items") // process the joined items - .flatMap(list => if (list.nonEmpty) Some(list.reduce((a, b) => a + "\n" + b)) else None) // format the triples + .map(outputStringToLogicalTargetIDs => { + outputStringToLogicalTargetIDs.map(outputStringToLogicalTargetID => outputStringToLogicalTargetID._2) + // TODO: integrate logical target for data set. + }) + .name("Ignoring the logical target for now.") + .flatMap(list => { + list.seq + }) + // format every list of triples (as strings) + .reduce((a, b) => a + "\n" + b + "\n\n") .name("Convert joined triples to strings") } @@ -646,7 +662,4 @@ object Main extends Logging { } else head } - - - } diff --git a/src/main/scala/io/rml/framework/core/extractors/NodeCache.scala b/src/main/scala/io/rml/framework/core/extractors/NodeCache.scala index 80231508..ebe5ffb4 100644 --- a/src/main/scala/io/rml/framework/core/extractors/NodeCache.scala +++ b/src/main/scala/io/rml/framework/core/extractors/NodeCache.scala @@ -45,4 +45,10 @@ object NodeCache extends scala.collection.mutable.HashMap[String, Node] { case _ => throw new InternalError(s"Expected TriplesMap in node cache for key ${identifier}") } } + + def logicalTargetIterator: Iterator[(String, LogicalTarget)] = { + this.iterator + .filter(entry => entry._2.isInstanceOf[LogicalTarget]) + .map(entry => (entry._1, entry._2.asInstanceOf[LogicalTarget])) + } } diff --git a/src/main/scala/io/rml/framework/core/model/DefaultLogicalTargetFactory.scala b/src/main/scala/io/rml/framework/core/model/DefaultLogicalTargetFactory.scala new file mode 100644 index 00000000..b863cfa0 --- /dev/null +++ b/src/main/scala/io/rml/framework/core/model/DefaultLogicalTargetFactory.scala @@ -0,0 +1,40 @@ +package io.rml.framework.core.model + +import io.rml.framework.core.model.std.StdDefaultLogicalTarget + +/** + * MIT License + * + * Copyright (C) 2017 - 2021 RDF Mapping Language (RML) + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + * */ +object DefaultLogicalTargetFactory { + + /** + * + * @param path The path to the file. This can be absolute, relative to the path + * of the mapping file, or a URI in the form of file:///... + */ + def createFileLogicalTarget(path: String, serialization: String, compression: Option[Uri]): LogicalTarget = { + val dataTarget = FileDataTarget(Uri(path)) + StdDefaultLogicalTarget(dataTarget, Uri(serialization), compression) + } +} diff --git a/src/main/scala/io/rml/framework/core/model/std/StdDefaultLogicalTarget.scala b/src/main/scala/io/rml/framework/core/model/std/StdDefaultLogicalTarget.scala new file mode 100644 index 00000000..265a5331 --- /dev/null +++ b/src/main/scala/io/rml/framework/core/model/std/StdDefaultLogicalTarget.scala @@ -0,0 +1,35 @@ +package io.rml.framework.core.model.std + +import io.rml.framework.core.model.{DataTarget, LogicalTarget, Uri} + +/** + * MIT License + * + * Copyright (C) 2017 - 2021 RDF Mapping Language (RML) + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + * This logical target is made by the + * + * */ +case class StdDefaultLogicalTarget (override val target: DataTarget, + override val serialization: Uri, + override val compression: Option[Uri]) extends LogicalTarget { + override def identifier: String = "default" +} diff --git a/src/main/scala/io/rml/framework/core/util/Util.scala b/src/main/scala/io/rml/framework/core/util/Util.scala index 7f1f3947..1dab8f2a 100644 --- a/src/main/scala/io/rml/framework/core/util/Util.scala +++ b/src/main/scala/io/rml/framework/core/util/Util.scala @@ -27,6 +27,7 @@ package io.rml.framework.core.util import io.rml.framework.api.RMLEnvironment import io.rml.framework.core.extractors.MappingReader import io.rml.framework.core.internal.Logging +import io.rml.framework.core.model.rdf.SerializableRDFQuad import io.rml.framework.core.model.{FormattedRMLMapping, Literal, Node, RMLMapping} import io.rml.framework.core.vocabulary.QueryVoc import io.rml.framework.shared.ReadException @@ -36,7 +37,7 @@ import java.net.URI import java.nio.charset.StandardCharsets import java.nio.file.{Files, Paths} import java.util.regex.Pattern -import scala.collection.mutable.ListBuffer +import scala.collection.mutable.{ListBuffer, Map} import scala.io.Source import scala.util.{Failure, Success, Try} @@ -265,6 +266,23 @@ object Util extends Logging{ .getCanonicalFile } - + /** + * Renders RDF statements as N-Quads and groups them by logical target ID + * @param quadStrings The generated RDF statements + * @return A map (logical target ID -> Set[rendered statement as N-Quad]) + */ + def groupQuadStringsPerLogicalTargetID(quadStrings: Iterable[SerializableRDFQuad]): Map[String, Set[String]] = { + val logicalTargetIDs2outputStrings: Map[String, Set[String]] = Map.empty[String, Set[String]].withDefaultValue(Set.empty[String]) + quadStrings.foreach(quad => { + val logicalTargetIDs: Set[String] = quad.logicalTargetIDs + val outputString = quad.toString + logicalTargetIDs.foreach(logicalTargetID => { + var outputStrings: Set[String] = logicalTargetIDs2outputStrings(logicalTargetID) + outputStrings += outputString + logicalTargetIDs2outputStrings += logicalTargetID -> outputStrings + }) + }) + logicalTargetIDs2outputStrings + } } diff --git a/src/main/scala/io/rml/framework/engine/PostProcessor.scala b/src/main/scala/io/rml/framework/engine/PostProcessor.scala index 96146d68..acdfc659 100644 --- a/src/main/scala/io/rml/framework/engine/PostProcessor.scala +++ b/src/main/scala/io/rml/framework/engine/PostProcessor.scala @@ -31,6 +31,7 @@ import org.apache.jena.riot.{Lang, RDFDataMgr} import java.io.ByteArrayOutputStream import java.nio.charset.StandardCharsets +import scala.collection.immutable.List @@ -40,7 +41,12 @@ import java.nio.charset.StandardCharsets */ trait PostProcessor extends Serializable{ - def process(quadStrings: Iterable[SerializableRDFQuad]): List[String] + /** + * Serializes quads according to PostProcessor + * @param serializableRDFQuads + * @return A sequence of pairs (set of logical target IDs, output string) + */ + def process(serializableRDFQuads: Iterable[SerializableRDFQuad]): Iterable[(String, String)] def outputFormat: Format } @@ -49,24 +55,33 @@ trait AtMostOneProcessor extends PostProcessor // TODO: define exact semantics /** - * Does nothing, returns the input list of strings + * Reders the RDF statements as a sequence of (logical target ID, N-Quad) pairs. */ class NopPostProcessor extends PostProcessor { - override def process(quadStrings: Iterable[SerializableRDFQuad]): List[String] = { - quadStrings.map(_.toString).toList + override def process(serializableRDFQuads: Iterable[SerializableRDFQuad]): Iterable[(String, String)] = { + serializableRDFQuads.flatMap(quad => { + val quadStr = quad.toString + for (logicalTargetID <- quad.logicalTargetIDs) yield (logicalTargetID, quadStr) + }) } override def outputFormat: Format = NQuads } /** - * - * Groups the list of generated triples from one record into one big - * string. + * Groups the list of generated statements from one record into a pair + * (logical target ID, one big string of N-Quads separated by '\n'). */ class BulkPostProcessor extends AtMostOneProcessor { - override def process(quadStrings: Iterable[SerializableRDFQuad]): List[String] = { - List(quadStrings.mkString("\n")) + override def process(serializableRDFQuads: Iterable[SerializableRDFQuad]): Iterable[(String, String)] = { + // first group quads per logical target ID + val logicalTargetIDs2outputStrings: Map[String, Set[String]] = Util.groupQuadStringsPerLogicalTargetID(serializableRDFQuads).toMap + + logicalTargetIDs2outputStrings.flatMap(logicalTargetID2outputStrings => { + val logicalTargetID = logicalTargetID2outputStrings._1 + val outputStrings: String = logicalTargetID2outputStrings._2.mkString("\n") + List((logicalTargetID, outputStrings)) + }) } override def outputFormat: Format = NQuads @@ -74,26 +89,31 @@ class BulkPostProcessor extends AtMostOneProcessor { /** * - * Format the generated triples into json-ld format + * Formats the generated statements into (logical target ID, json-ld string) pairs */ class JsonLDProcessor() extends AtMostOneProcessor { override def outputFormat: Format = JSON_LD - override def process(quadStrings: Iterable[SerializableRDFQuad]): List[String] = { - if (quadStrings.isEmpty || quadStrings.mkString.isEmpty) { + override def process(serializableRDFQuads: Iterable[SerializableRDFQuad]): Iterable[(String, String)] = { + if (serializableRDFQuads.isEmpty || serializableRDFQuads.mkString.isEmpty) { return List() } - - val quads = quadStrings.mkString("\n") - val dataset = JenaUtil.readDataset(quads, RMLEnvironment.getGeneratorBaseIRI().getOrElse(""), NQuads) - val bos = new ByteArrayOutputStream - Util.tryWith(bos: ByteArrayOutputStream) { - bos => { - RDFDataMgr.write(bos, dataset, Lang.JSONLD) - List(bos.toString(StandardCharsets.UTF_8.name())) + + val logicalTargetIDs2outputStrings: Map[String, Set[String]] = Util.groupQuadStringsPerLogicalTargetID(serializableRDFQuads).toMap + logicalTargetIDs2outputStrings.flatMap(logicalTargetID2outputStrings => { + val logicalTargetID = logicalTargetID2outputStrings._1 + val outputStrings: String = logicalTargetID2outputStrings._2.mkString("\n") + val dataset = JenaUtil.readDataset(outputStrings, RMLEnvironment.getGeneratorBaseIRI().getOrElse(""), NQuads) + val bos: ByteArrayOutputStream = new ByteArrayOutputStream + val jsonLDOutput = Util.tryWith(bos: ByteArrayOutputStream) { + bos => { + RDFDataMgr.write(bos, dataset, Lang.JSONLD) + bos.toString(StandardCharsets.UTF_8.name()) + } } - } + List((logicalTargetID, jsonLDOutput)) + }) } } diff --git a/src/main/scala/io/rml/framework/engine/Processor.scala b/src/main/scala/io/rml/framework/engine/Processor.scala index e150c71c..565e906f 100644 --- a/src/main/scala/io/rml/framework/engine/Processor.scala +++ b/src/main/scala/io/rml/framework/engine/Processor.scala @@ -38,5 +38,5 @@ import org.apache.flink.api.common.functions.RichMapFunction * @tparam T has upper bound of [[Item]] * @tparam IN specifies the type of the input for the map(..) function */ -abstract class Processor[T<:Item, IN](engine: StatementEngine[T])(implicit postProcessor: PostProcessor) extends RichMapFunction[IN, List[String]] +abstract class Processor[T<:Item, IN](engine: StatementEngine[T])(implicit postProcessor: PostProcessor) extends RichMapFunction[IN, Iterable[(String, String)]] diff --git a/src/main/scala/io/rml/framework/engine/StaticProcessor.scala b/src/main/scala/io/rml/framework/engine/StaticProcessor.scala index 1c5dc226..99ac853e 100644 --- a/src/main/scala/io/rml/framework/engine/StaticProcessor.scala +++ b/src/main/scala/io/rml/framework/engine/StaticProcessor.scala @@ -29,7 +29,12 @@ import io.rml.framework.engine.statement.StatementEngine abstract class StaticProcessor[T<:Item](engine: StatementEngine[T]) (implicit postProcessor: PostProcessor) extends Processor[T, T](engine) { - override def map(in: T): List[String] = { + /** + * Maps items to a iterable of (logical target ID, rendered RDF statement(s)) pairs + * @param in Input items, in RMLStreamer of [[Item]] type + * @return + */ + override def map(in: T): Iterable[(String, String)] = { val triples = engine.process(in) postProcessor.process(triples) diff --git a/src/main/scala/io/rml/framework/engine/StreamProcessor.scala b/src/main/scala/io/rml/framework/engine/StreamProcessor.scala index bc857c73..90767dc4 100644 --- a/src/main/scala/io/rml/framework/engine/StreamProcessor.scala +++ b/src/main/scala/io/rml/framework/engine/StreamProcessor.scala @@ -28,11 +28,17 @@ import io.rml.framework.core.item.{Item, JoinedItem} import io.rml.framework.engine.statement.StatementEngine abstract class StreamProcessor[T <: Item](engine: StatementEngine[T])(implicit postProcessor: PostProcessor) extends Processor[T, Iterable[T]](engine) { - override def map(in: Iterable[T]): List[String] = { + + /** + * Maps items to serialized RDF according to the mapping rules ecompassed by the statement engine + * @param in A sequence of input items + * @return A sequence of (logical target ID, rendered RDF statement(s)) pairs + */ + override def map(in: Iterable[T]): Iterable[(String, String)] = { if (in.isEmpty) return List() - val triples = in flatMap engine.process - postProcessor.process(triples) + val quads = in flatMap engine.process + postProcessor.process(quads) } } diff --git a/src/main/scala/io/rml/framework/engine/statement/Statement.scala b/src/main/scala/io/rml/framework/engine/statement/Statement.scala index f6552e57..66103417 100644 --- a/src/main/scala/io/rml/framework/engine/statement/Statement.scala +++ b/src/main/scala/io/rml/framework/engine/statement/Statement.scala @@ -111,7 +111,7 @@ case class StdStatement(subjectGenerator: Item => Option[Iterable[TermNode]], object Statement extends Logging { - def quadCombination(subjectIter: Iterable[TermNode], predicateIter: Iterable[Uri], objIter: Iterable[Entity], graphIterOpt: Option[Iterable[Uri]] = None): Iterable[(TermNode, Uri, Entity, Option[Uri])] = { + private def quadCombination(subjectIter: Iterable[TermNode], predicateIter: Iterable[Uri], objIter: Iterable[Entity], graphIterOpt: Option[Iterable[Uri]] = None): Iterable[(TermNode, Uri, Entity, Option[Uri])] = { val graphIter: Iterable[Uri] = graphIterOpt getOrElse List() @@ -148,7 +148,10 @@ object Statement extends Logging { } val graphUri = graphOpt.map(SerializableRDFResource) - val result = Some(SerializableRDFQuad(subjectResource, predicateResource, objectNode, graphUri, logicalTargetIDs)) + // if there are no logical target IDs, then the "default" logical target ID has to be set. + val newLogicalTargetIDs: Set[String] = if (logicalTargetIDs.isEmpty) Set("default") else logicalTargetIDs + + val result = Some(SerializableRDFQuad(subjectResource, predicateResource, objectNode, graphUri, newLogicalTargetIDs)) logDebug(result.get.toString) result } diff --git a/src/main/scala/io/rml/framework/flink/sink/TargetSinkFactory.scala b/src/main/scala/io/rml/framework/flink/sink/TargetSinkFactory.scala new file mode 100644 index 00000000..28aee8ec --- /dev/null +++ b/src/main/scala/io/rml/framework/flink/sink/TargetSinkFactory.scala @@ -0,0 +1,127 @@ +package io.rml.framework.flink.sink + +import io.rml.framework.core.extractors.NodeCache +import io.rml.framework.core.model.{DataTarget, FileDataTarget, LogicalTarget} +import io.rml.framework.shared.RMLException +import org.apache.flink.api.common.serialization.SimpleStringEncoder +import org.apache.flink.api.scala.createTypeInformation +import org.apache.flink.core.fs.Path +import org.apache.flink.streaming.api.functions.ProcessFunction +import org.apache.flink.streaming.api.functions.sink.SinkFunction +import org.apache.flink.streaming.api.functions.sink.filesystem.bucketassigners.BasePathBucketAssigner +import org.apache.flink.streaming.api.functions.sink.filesystem.rollingpolicies.OnCheckpointRollingPolicy +import org.apache.flink.streaming.api.functions.sink.filesystem.{OutputFileConfig, StreamingFileSink} +import org.apache.flink.streaming.api.scala.{DataStream, OutputTag} +import org.apache.flink.util.Collector + +import scala.collection.mutable.Map + +/** + * MIT License + * + * Copyright (C) 2017 - 2021 RDF Mapping Language (RML) + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + * */ +object TargetSinkFactory { + + /** + * Creates an output sink for for every logical target fount in the [[NodeCache]]. + * @return A map (logical target ID -> output sink function) + */ + def createStreamSinksFromLogicalTargetCache(): Map[String, SinkFunction[String]] = { + val logicalTargetId2Sink: Map[String, SinkFunction[String]] = Map.empty + NodeCache.logicalTargetIterator.foreach(identifier2target => { + val identifier = identifier2target._1 + val logicalTarget: LogicalTarget = identifier2target._2 + val dataTarget: DataTarget = logicalTarget.target + val sink: SinkFunction[String] = dataTarget match { + case fileDataTarget: FileDataTarget => createFileStreamSink(fileDataTarget) + case _ => throw new RMLException(s"${dataTarget.getClass.toString} not supported as data target.") + } + logicalTargetId2Sink += identifier -> sink + }) + logicalTargetId2Sink + } + + /** + * Appends all logical target sinks to the given stream by writing records of the stream to side outputs. + * Earch side output has a tag that corresponds to a logical target ID. + * @param logicalTargetId2Sinks Maps a logical target ID to a sink + * @param dataStream The given data stream + * @return A stream with records that are *not* written to any logical target, + * i.e., they must be written to the output given as the command line argument. + */ + def appendSinksToStream(logicalTargetId2Sinks: Map[String, SinkFunction[String]], dataStream: DataStream[Iterable[(String, String)]]): DataStream[String] = { + + // "split" the stream according to logicalTargetId (which becomes the output tag) + val mainDataStream = dataStream.process(new ProcessFunction[Iterable[(String, String)], String] { + override def processElement(logicalTargetId2OutputStrings: Iterable[(String, String)], ctx: ProcessFunction[Iterable[(String, String)], String]#Context, out: Collector[String]): Unit = { + logicalTargetId2OutputStrings.foreach(logicalTargetId2OutputString => { + val logicalTargetId = logicalTargetId2OutputString._1 + val outputString = logicalTargetId2OutputString._2 + if (logicalTargetId.equals("default")) { + out.collect(outputString) + } else { + ctx.output(OutputTag[String](logicalTargetId), outputString) + } + }) + } + }) + + // create side output stream for every logicalTargetId and append the sink. + logicalTargetId2Sinks.foreach(logicalTargetId2Sink => { + val logicalTargetId = logicalTargetId2Sink._1 + val sink = logicalTargetId2Sink._2 + val sideOutputStream = mainDataStream.getSideOutput(OutputTag[String](logicalTargetId)) + sideOutputStream.addSink(sink) + }) + + mainDataStream + } + + private def createFileStreamSink(fileDataTarget: FileDataTarget): SinkFunction[String] = { + createFileStreamSink(fileDataTarget.uri.value) + } + + private def createFileStreamSink(outputPath: String): SinkFunction[String] = { + val parts = outputPath.split('.') + val path = parts.slice(0, parts.length - 1).mkString(".") + val suffix = if (parts.length > 1) { + "." ++ parts.slice(1, parts.length).mkString(".") + } else { + "" + } + + // remark: does not support compression + // TODO: One can override SimpleStringEncoder (or implement Encoder) to support compression. + StreamingFileSink + .forRowFormat(new Path(path), + new SimpleStringEncoder[String]("UTF-8") + ) + .withBucketAssigner(new BasePathBucketAssigner[String]) + .withRollingPolicy(OnCheckpointRollingPolicy.build()) + .withOutputFileConfig(OutputFileConfig + .builder() + .withPartSuffix(suffix) + .build()) + .build() + } +} From f8804200ea053f74337141417863780699c98653 Mon Sep 17 00:00:00 2001 From: Gerald Haesendonck Date: Tue, 18 May 2021 11:47:06 +0200 Subject: [PATCH 80/83] Updated CHANGELOG.md --- CHANGELOG.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b4e7ef64..00a3c7b3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,10 +11,11 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. * Joins of data streams ### Added -* Target in RML TODO: link to spec +* [Target in RML](https://rml.io/specs/rml-target) for streams: define an output sink at term map level. ### Changed * Updated JsonSurfer from version 1.5.1 to 1.6.0 +* Updated Flink from version 1.11.3 to 1.12.2 ## [2.1.0] - 2020-03-18 From 73524b812e965f00f93db089733017e8255b0f66 Mon Sep 17 00:00:00 2001 From: Gerald Haesendonck Date: Tue, 18 May 2021 16:20:31 +0200 Subject: [PATCH 81/83] Removed unused code --- .../model/DefaultLogicalTargetFactory.scala | 40 ------------------- 1 file changed, 40 deletions(-) delete mode 100644 src/main/scala/io/rml/framework/core/model/DefaultLogicalTargetFactory.scala diff --git a/src/main/scala/io/rml/framework/core/model/DefaultLogicalTargetFactory.scala b/src/main/scala/io/rml/framework/core/model/DefaultLogicalTargetFactory.scala deleted file mode 100644 index b863cfa0..00000000 --- a/src/main/scala/io/rml/framework/core/model/DefaultLogicalTargetFactory.scala +++ /dev/null @@ -1,40 +0,0 @@ -package io.rml.framework.core.model - -import io.rml.framework.core.model.std.StdDefaultLogicalTarget - -/** - * MIT License - * - * Copyright (C) 2017 - 2021 RDF Mapping Language (RML) - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. - * - * */ -object DefaultLogicalTargetFactory { - - /** - * - * @param path The path to the file. This can be absolute, relative to the path - * of the mapping file, or a URI in the form of file:///... - */ - def createFileLogicalTarget(path: String, serialization: String, compression: Option[Uri]): LogicalTarget = { - val dataTarget = FileDataTarget(Uri(path)) - StdDefaultLogicalTarget(dataTarget, Uri(serialization), compression) - } -} From 550b2018cf9d0aa1dbd992f9bb3b80c37e0fa123 Mon Sep 17 00:00:00 2001 From: Gerald Haesendonck Date: Wed, 19 May 2021 14:55:52 +0200 Subject: [PATCH 82/83] Updated CHANGELOG.md --- CHANGELOG.md | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 00a3c7b3..9d127ad3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,7 +11,10 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. * Joins of data streams ### Added -* [Target in RML](https://rml.io/specs/rml-target) for streams: define an output sink at term map level. +* Support for using Web of Things descriptions in logical soure and logical target, as described in [Van Assche et al](https://link.springer.com/chapter/10.1007/978-3-030-74296-6_26) + and [Target in RML specification](https://rml.io/specs/rml-target). + The current imlementation is a proof-of-concept. As WoT data source RMLStreamer supports MQTT streams; + as logical target a file dump is supported. ### Changed * Updated JsonSurfer from version 1.5.1 to 1.6.0 From c2bc9c1eda6b4182f115c1234c6317f906ad29f0 Mon Sep 17 00:00:00 2001 From: Gerald Haesendonck Date: Wed, 19 May 2021 15:19:02 +0200 Subject: [PATCH 83/83] Updated CHANGELOG.md; prepare for 2.1.1 release --- CHANGELOG.md | 9 +++++++-- pom.xml | 2 +- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 416eaa49..0d227232 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,7 +10,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. * Support for stream partitioning in windows * Joins of data streams -## 2.1.1 - ? +## [2.1.1] - 2020-05-19 ### Added * Support for using Web of Things descriptions in logical soure and logical target, as described in [Van Assche et al](https://link.springer.com/chapter/10.1007/978-3-030-74296-6_26) @@ -20,7 +20,11 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. ### Changed * Updated JsonSurfer from version 1.5.1 to 1.6.0 -* Updated Flink from version 1.11.3 to 1.12.2 +* Updated Flink from version 1.11.3 to 1.12.3 + +### Fixed +* Function loading didn't always work because the `toString` method was called on the `Uri` to a function instead of +the `value` method. (Internal [issue #132](https://gitlab.ilabt.imec.be/rml/proc/rml-streamer/-/issues/132)). ## [2.1.0] - 2020-03-18 @@ -137,3 +141,4 @@ can be set with the program argument `--baseIRI`. [1.2.3]: https://github.com/RMLio/RMLStreamer/compare/v1.2.2...v1.2.3 [2.0.0]: https://github.com/RMLio/RMLStreamer/compare/v1.2.3...v2.0.0 [2.1.0]: https://github.com/RMLio/RMLStreamer/compare/v2.0.0...v2.1.0 +[2.1.1]: https://github.com/RMLio/RMLStreamer/compare/v2.1.0...v2.1.1 diff --git a/pom.xml b/pom.xml index 966c4ed5..6f52a9c9 100644 --- a/pom.xml +++ b/pom.xml @@ -28,7 +28,7 @@ SOFTWARE. io.rml RMLStreamer - 2.1.1-SNAPSHOT + 2.1.1 jar RMLStreamer