diff --git a/core/model-vocabulary/src/main/java/org/eclipse/rdf4j/model/vocabulary/CSVW.java b/core/model-vocabulary/src/main/java/org/eclipse/rdf4j/model/vocabulary/CSVW.java
index 603812446b..9502cc5ec2 100644
--- a/core/model-vocabulary/src/main/java/org/eclipse/rdf4j/model/vocabulary/CSVW.java
+++ b/core/model-vocabulary/src/main/java/org/eclipse/rdf4j/model/vocabulary/CSVW.java
@@ -75,6 +75,9 @@ public class CSVW {
/** csvw:header */
public static final IRI HEADER;
+ /** csvw:headerRowCount */
+ public static final IRI HEADER_ROW_COUNT;
+
/** csvw:lang */
public static final IRI LANG;
@@ -90,6 +93,9 @@ public class CSVW {
/** csvw:required */
public static final IRI REQUIRED;
+ /** csvw:skipRows */
+ public static final IRI SKIP_ROWS;
+
/** csvw:tableSchema */
public static final IRI TABLE_SCHEMA;
@@ -121,11 +127,13 @@ public class CSVW {
FORMAT = Vocabularies.createIRI(NAMESPACE, "format");
GROUP_CHAR = Vocabularies.createIRI(NAMESPACE, "groupChar");
HEADER = Vocabularies.createIRI(NAMESPACE, "header");
+ HEADER_ROW_COUNT = Vocabularies.createIRI(NAMESPACE, "headerRowCount");
LANG = Vocabularies.createIRI(NAMESPACE, "lang");
NAME = Vocabularies.createIRI(NAMESPACE, "name");
PROPERTY_URL = Vocabularies.createIRI(NAMESPACE, "propertyUrl");
QUOTE_CHAR = Vocabularies.createIRI(NAMESPACE, "quoteChar");
REQUIRED = Vocabularies.createIRI(NAMESPACE, "required");
+ SKIP_ROWS = Vocabularies.createIRI(NAMESPACE, "skipRows");
TABLE_SCHEMA = Vocabularies.createIRI(NAMESPACE, "tableSchema");
TABLES = Vocabularies.createIRI(NAMESPACE, "tables");
TITLE = Vocabularies.createIRI(NAMESPACE, "title");
diff --git a/core/rio/csvw/src/main/java/org/eclipse/rdf4j/rio/csvw/CSVWParser.java b/core/rio/csvw/src/main/java/org/eclipse/rdf4j/rio/csvw/CSVWParser.java
index 190c1249d6..b31515c58e 100644
--- a/core/rio/csvw/src/main/java/org/eclipse/rdf4j/rio/csvw/CSVWParser.java
+++ b/core/rio/csvw/src/main/java/org/eclipse/rdf4j/rio/csvw/CSVWParser.java
@@ -60,8 +60,9 @@
* Currently only "minimal mode" is supported
*
* @author Bart Hanssens
- * @see CSV on the Web Primer
*
+ * @see CSV on the Web Primer
+ * @see Metadata Vocabulary for Tabular Data
* @since 5.1.0
*/
public class CSVWParser extends AbstractRDFParser {
@@ -225,6 +226,8 @@ private CellParser getCellParser(Model metadata, Resource column) {
Models.getPropertyString(metadata, column, CSVW.DEFAULT).ifPresent(v -> parser.setDefaultValue(v));
Models.getPropertyString(metadata, column, CSVW.REQUIRED)
.ifPresent(v -> parser.setIsRequired(Boolean.parseBoolean(v)));
+ Models.getPropertyString(metadata, column, CSVW.VIRTUAL)
+ .ifPresent(v -> parser.setVirtual(Boolean.parseBoolean(v)));
// only useful for strings
Models.getPropertyString(metadata, column, CSVW.LANG).ifPresent(v -> parser.setLang(v));
@@ -297,8 +300,10 @@ private Optional getFormat(Model metadata, Resource column) {
* @param subject
* @return aboutURL or null
*/
- private String getAboutURL(Model metadata, Resource subject) {
- return Models.getPropertyString(metadata, subject, CSVW.ABOUT_URL).orElse(null);
+ private String getAboutURL(Model metadata, Resource table) {
+ return Models.getPropertyString(metadata, table, CSVW.ABOUT_URL)
+ .orElse(Models.getPropertyString(metadata, getTableSchema(metadata, table), CSVW.ABOUT_URL)
+ .orElse(null));
}
/**
@@ -336,7 +341,7 @@ private void parseCSV(Model metadata, RDFHandler handler, URI csvFile, CellParse
// check for placeholder / column name that's being used to create subject IRI
int aboutIndex = getAboutIndex(aboutURL, cellParsers);
- String placeholder = (aboutIndex > -1) ? cellParsers[aboutIndex].getName() : null;
+ String placeholder = (aboutIndex > -1) ? "{" + cellParsers[aboutIndex].getName() + "}" : null;
LOGGER.info("Parsing {}", csvFile);
@@ -382,14 +387,19 @@ private CSVReader getCSVReader(Model metadata, Resource table, Reader reader) {
Optional val = Models.getProperty(metadata, table, CSVW.DIALECT);
if (val.isPresent()) {
Resource dialect = (Resource) val.get();
+
+ // skip header (and possibly other) rows
+ String headerRows = Models.getPropertyString(metadata, dialect, CSVW.HEADER_ROW_COUNT).orElse("1");
+ String skipRows = Models.getPropertyString(metadata, dialect, CSVW.SKIP_ROWS).orElse("0");
+ int skip = Integer.valueOf(headerRows) + Integer.valueOf(skipRows);
+ Models.getPropertyString(metadata, dialect, CSVW.HEADER)
+ .ifPresent(v -> builder.withSkipLines(v.equalsIgnoreCase("false") ? 0 : skip));
+
Models.getPropertyString(metadata, dialect, CSVW.DELIMITER)
.ifPresent(v -> parserBuilder.withSeparator(v.charAt(0)));
- Models.getPropertyString(metadata, dialect, CSVW.HEADER)
- .ifPresent(v -> builder.withSkipLines(v.equalsIgnoreCase("false") ? 0 : 1));
Models.getPropertyString(metadata, dialect, CSVW.QUOTE_CHAR)
.ifPresent(v -> parserBuilder.withQuoteChar(v.charAt(0)));
}
-
return builder.withCSVParser(parserBuilder.build()).build();
}
@@ -423,7 +433,7 @@ private Resource getIRIorBnode(CellParser[] cellParsers, String[] cells, String
if (aboutIndex > -1) {
Value val = cellParsers[aboutIndex].parse(cells[aboutIndex]);
if (val != null) {
- return Values.iri(aboutURL.replace(placeholder, val.toString()));
+ return Values.iri(aboutURL.replace(placeholder, val.stringValue()));
} else {
throw new RDFParseException("NULL value in aboutURL");
}
diff --git a/core/rio/csvw/src/main/java/org/eclipse/rdf4j/rio/csvw/parsers/CellParser.java b/core/rio/csvw/src/main/java/org/eclipse/rdf4j/rio/csvw/parsers/CellParser.java
index 80389f8f30..daf90679b3 100644
--- a/core/rio/csvw/src/main/java/org/eclipse/rdf4j/rio/csvw/parsers/CellParser.java
+++ b/core/rio/csvw/src/main/java/org/eclipse/rdf4j/rio/csvw/parsers/CellParser.java
@@ -27,6 +27,7 @@ public abstract class CellParser {
protected String lang;
protected String defaultValue;
protected boolean isRequired;
+ protected boolean isVirtual;
protected IRI propertyIRI;
protected String valueUrl;
protected String format;
@@ -174,6 +175,20 @@ protected String getValueOrDefault(String s) {
return s;
}
+ /**
+ * @return true if the column is virtual
+ */
+ public boolean isVirtual() {
+ return this.isVirtual;
+ }
+
+ /**
+ * @param isVirtual
+ */
+ public void setVirtual(boolean isVirtual) {
+ this.isVirtual = isVirtual;
+ }
+
/**
* Get the value from a cell
*
diff --git a/core/rio/csvw/src/test/resources/painters-metadata.json b/core/rio/csvw/src/test/resources/painters-metadata.json
index a31781f74b..88fd61b0f4 100644
--- a/core/rio/csvw/src/test/resources/painters-metadata.json
+++ b/core/rio/csvw/src/test/resources/painters-metadata.json
@@ -2,21 +2,23 @@
"@context": "http://www.w3.org/ns/csvw",
"url": "painters.csv",
"tableSchema": {
+ "aboutUrl": "https://www.wikidata.org/wiki/{wikidata_id}",
"columns": [
{ "name": "wikidata_id",
- "datatype": "string",
- "valueUrl": "https://www.wikidata.org/wiki/{wikidata_id}"},
+ "datatype": "string"},
{ "name": "first_name",
"propertyUrl": "schema:givenName"},
{ "name": "last_name",
"propertyUrl": "schema:familyName"},
{ "name": "country_id",
+ "propertyUrl": "schema:nationality",
"valueUrl": "https://www.wikidata.org/wiki/{country_id}"},
{ "name": "country_name_nl",
"lang": "nl" },
{ "name": "country_name_en",
"lang": "en" },
{ "name": "date_of_birth",
+ "propertyUrl": "schema:birthDate",
"datatype": {
"base": "date",
"format": "d/M/yyyy"
@@ -27,7 +29,10 @@
"format": "Yes|No"
} },
{ "name": "languages",
- "separator": " " }
+ "separator": " " },
+ { "virtual": true,
+ "propertyUrl": "rdf:type",
+ "valueUrl": "schema:Person" }
],
"primaryKey": "wikidata_id"
}