diff --git a/core/model-vocabulary/src/main/java/org/eclipse/rdf4j/model/vocabulary/CSVW.java b/core/model-vocabulary/src/main/java/org/eclipse/rdf4j/model/vocabulary/CSVW.java index 603812446b..9502cc5ec2 100644 --- a/core/model-vocabulary/src/main/java/org/eclipse/rdf4j/model/vocabulary/CSVW.java +++ b/core/model-vocabulary/src/main/java/org/eclipse/rdf4j/model/vocabulary/CSVW.java @@ -75,6 +75,9 @@ public class CSVW { /** csvw:header */ public static final IRI HEADER; + /** csvw:headerRowCount */ + public static final IRI HEADER_ROW_COUNT; + /** csvw:lang */ public static final IRI LANG; @@ -90,6 +93,9 @@ public class CSVW { /** csvw:required */ public static final IRI REQUIRED; + /** csvw:skipRows */ + public static final IRI SKIP_ROWS; + /** csvw:tableSchema */ public static final IRI TABLE_SCHEMA; @@ -121,11 +127,13 @@ public class CSVW { FORMAT = Vocabularies.createIRI(NAMESPACE, "format"); GROUP_CHAR = Vocabularies.createIRI(NAMESPACE, "groupChar"); HEADER = Vocabularies.createIRI(NAMESPACE, "header"); + HEADER_ROW_COUNT = Vocabularies.createIRI(NAMESPACE, "headerRowCount"); LANG = Vocabularies.createIRI(NAMESPACE, "lang"); NAME = Vocabularies.createIRI(NAMESPACE, "name"); PROPERTY_URL = Vocabularies.createIRI(NAMESPACE, "propertyUrl"); QUOTE_CHAR = Vocabularies.createIRI(NAMESPACE, "quoteChar"); REQUIRED = Vocabularies.createIRI(NAMESPACE, "required"); + SKIP_ROWS = Vocabularies.createIRI(NAMESPACE, "skipRows"); TABLE_SCHEMA = Vocabularies.createIRI(NAMESPACE, "tableSchema"); TABLES = Vocabularies.createIRI(NAMESPACE, "tables"); TITLE = Vocabularies.createIRI(NAMESPACE, "title"); diff --git a/core/rio/csvw/src/main/java/org/eclipse/rdf4j/rio/csvw/CSVWParser.java b/core/rio/csvw/src/main/java/org/eclipse/rdf4j/rio/csvw/CSVWParser.java index 190c1249d6..b31515c58e 100644 --- a/core/rio/csvw/src/main/java/org/eclipse/rdf4j/rio/csvw/CSVWParser.java +++ b/core/rio/csvw/src/main/java/org/eclipse/rdf4j/rio/csvw/CSVWParser.java @@ -60,8 +60,9 @@ * Currently only "minimal mode" is supported * * @author Bart Hanssens - * @see CSV on the Web Primer * + * @see CSV on the Web Primer + * @see Metadata Vocabulary for Tabular Data * @since 5.1.0 */ public class CSVWParser extends AbstractRDFParser { @@ -225,6 +226,8 @@ private CellParser getCellParser(Model metadata, Resource column) { Models.getPropertyString(metadata, column, CSVW.DEFAULT).ifPresent(v -> parser.setDefaultValue(v)); Models.getPropertyString(metadata, column, CSVW.REQUIRED) .ifPresent(v -> parser.setIsRequired(Boolean.parseBoolean(v))); + Models.getPropertyString(metadata, column, CSVW.VIRTUAL) + .ifPresent(v -> parser.setVirtual(Boolean.parseBoolean(v))); // only useful for strings Models.getPropertyString(metadata, column, CSVW.LANG).ifPresent(v -> parser.setLang(v)); @@ -297,8 +300,10 @@ private Optional getFormat(Model metadata, Resource column) { * @param subject * @return aboutURL or null */ - private String getAboutURL(Model metadata, Resource subject) { - return Models.getPropertyString(metadata, subject, CSVW.ABOUT_URL).orElse(null); + private String getAboutURL(Model metadata, Resource table) { + return Models.getPropertyString(metadata, table, CSVW.ABOUT_URL) + .orElse(Models.getPropertyString(metadata, getTableSchema(metadata, table), CSVW.ABOUT_URL) + .orElse(null)); } /** @@ -336,7 +341,7 @@ private void parseCSV(Model metadata, RDFHandler handler, URI csvFile, CellParse // check for placeholder / column name that's being used to create subject IRI int aboutIndex = getAboutIndex(aboutURL, cellParsers); - String placeholder = (aboutIndex > -1) ? cellParsers[aboutIndex].getName() : null; + String placeholder = (aboutIndex > -1) ? "{" + cellParsers[aboutIndex].getName() + "}" : null; LOGGER.info("Parsing {}", csvFile); @@ -382,14 +387,19 @@ private CSVReader getCSVReader(Model metadata, Resource table, Reader reader) { Optional val = Models.getProperty(metadata, table, CSVW.DIALECT); if (val.isPresent()) { Resource dialect = (Resource) val.get(); + + // skip header (and possibly other) rows + String headerRows = Models.getPropertyString(metadata, dialect, CSVW.HEADER_ROW_COUNT).orElse("1"); + String skipRows = Models.getPropertyString(metadata, dialect, CSVW.SKIP_ROWS).orElse("0"); + int skip = Integer.valueOf(headerRows) + Integer.valueOf(skipRows); + Models.getPropertyString(metadata, dialect, CSVW.HEADER) + .ifPresent(v -> builder.withSkipLines(v.equalsIgnoreCase("false") ? 0 : skip)); + Models.getPropertyString(metadata, dialect, CSVW.DELIMITER) .ifPresent(v -> parserBuilder.withSeparator(v.charAt(0))); - Models.getPropertyString(metadata, dialect, CSVW.HEADER) - .ifPresent(v -> builder.withSkipLines(v.equalsIgnoreCase("false") ? 0 : 1)); Models.getPropertyString(metadata, dialect, CSVW.QUOTE_CHAR) .ifPresent(v -> parserBuilder.withQuoteChar(v.charAt(0))); } - return builder.withCSVParser(parserBuilder.build()).build(); } @@ -423,7 +433,7 @@ private Resource getIRIorBnode(CellParser[] cellParsers, String[] cells, String if (aboutIndex > -1) { Value val = cellParsers[aboutIndex].parse(cells[aboutIndex]); if (val != null) { - return Values.iri(aboutURL.replace(placeholder, val.toString())); + return Values.iri(aboutURL.replace(placeholder, val.stringValue())); } else { throw new RDFParseException("NULL value in aboutURL"); } diff --git a/core/rio/csvw/src/main/java/org/eclipse/rdf4j/rio/csvw/parsers/CellParser.java b/core/rio/csvw/src/main/java/org/eclipse/rdf4j/rio/csvw/parsers/CellParser.java index 80389f8f30..daf90679b3 100644 --- a/core/rio/csvw/src/main/java/org/eclipse/rdf4j/rio/csvw/parsers/CellParser.java +++ b/core/rio/csvw/src/main/java/org/eclipse/rdf4j/rio/csvw/parsers/CellParser.java @@ -27,6 +27,7 @@ public abstract class CellParser { protected String lang; protected String defaultValue; protected boolean isRequired; + protected boolean isVirtual; protected IRI propertyIRI; protected String valueUrl; protected String format; @@ -174,6 +175,20 @@ protected String getValueOrDefault(String s) { return s; } + /** + * @return true if the column is virtual + */ + public boolean isVirtual() { + return this.isVirtual; + } + + /** + * @param isVirtual + */ + public void setVirtual(boolean isVirtual) { + this.isVirtual = isVirtual; + } + /** * Get the value from a cell * diff --git a/core/rio/csvw/src/test/resources/painters-metadata.json b/core/rio/csvw/src/test/resources/painters-metadata.json index a31781f74b..88fd61b0f4 100644 --- a/core/rio/csvw/src/test/resources/painters-metadata.json +++ b/core/rio/csvw/src/test/resources/painters-metadata.json @@ -2,21 +2,23 @@ "@context": "http://www.w3.org/ns/csvw", "url": "painters.csv", "tableSchema": { + "aboutUrl": "https://www.wikidata.org/wiki/{wikidata_id}", "columns": [ { "name": "wikidata_id", - "datatype": "string", - "valueUrl": "https://www.wikidata.org/wiki/{wikidata_id}"}, + "datatype": "string"}, { "name": "first_name", "propertyUrl": "schema:givenName"}, { "name": "last_name", "propertyUrl": "schema:familyName"}, { "name": "country_id", + "propertyUrl": "schema:nationality", "valueUrl": "https://www.wikidata.org/wiki/{country_id}"}, { "name": "country_name_nl", "lang": "nl" }, { "name": "country_name_en", "lang": "en" }, { "name": "date_of_birth", + "propertyUrl": "schema:birthDate", "datatype": { "base": "date", "format": "d/M/yyyy" @@ -27,7 +29,10 @@ "format": "Yes|No" } }, { "name": "languages", - "separator": " " } + "separator": " " }, + { "virtual": true, + "propertyUrl": "rdf:type", + "valueUrl": "schema:Person" } ], "primaryKey": "wikidata_id" }