diff --git a/metafacture-biblio/src/main/java/org/metafacture/biblio/marc21/MarcXmlEncoder.java b/metafacture-biblio/src/main/java/org/metafacture/biblio/marc21/MarcXmlEncoder.java index 928859360..76f871869 100644 --- a/metafacture-biblio/src/main/java/org/metafacture/biblio/marc21/MarcXmlEncoder.java +++ b/metafacture-biblio/src/main/java/org/metafacture/biblio/marc21/MarcXmlEncoder.java @@ -36,7 +36,7 @@ * @author Pascal Christoph (dr0i) dug it up again */ -@Description("Encodes a stream into MARCXML.") +@Description("Encodes a stream into MARCXML. If you can't ensure valid MARC21 (e.g. the leader isn't correct or not set as one literal) then set the parameter `ensureCorrectMarc21Xml` to `true`.") @In(StreamReceiver.class) @Out(String.class) @FluxCommand("encode-marcxml") @@ -47,6 +47,7 @@ public final class MarcXmlEncoder extends DefaultStreamPipe"; private static final String ROOT_CLOSE = ""; @@ -104,28 +105,23 @@ public String close(final Object[] args) { private static final int TAG_BEGIN = 0; private static final int TAG_END = 3; - private final StringBuilder builder = new StringBuilder(); - private final StringBuilder leaderBuilder = new StringBuilder(); + private final Encoder encoder = new Encoder(); + private final Marc21Decoder decoder = new Marc21Decoder(); + private final Marc21Encoder wrapper = new Marc21Encoder(); - private boolean atStreamStart = true; - - private boolean omitXmlDeclaration = OMIT_XML_DECLARATION; - private String xmlVersion = XML_VERSION; - private String xmlEncoding = XML_ENCODING; - - private String currentEntity = ""; - - private boolean emitNamespace = true; - private Object[] namespacePrefix = new Object[]{emitNamespace ? NAMESPACE_PREFIX : EMPTY}; - - private int indentationLevel; - private boolean formatted = PRETTY_PRINTED; - private int recordAttributeOffset; + private DefaultStreamPipe> pipe; /** * Creates an instance of {@link MarcXmlEncoder}. */ public MarcXmlEncoder() { + decoder.setEmitLeaderAsWhole(true); + + wrapper + .setReceiver(decoder) + .setReceiver(encoder); + + setEnsureCorrectMarc21Xml(ENSURE_CORRECT_MARC21_XML); } /** @@ -135,8 +131,7 @@ public MarcXmlEncoder() { * @param emitNamespace true if the namespace is emitted, otherwise false */ public void setEmitNamespace(final boolean emitNamespace) { - this.emitNamespace = emitNamespace; - namespacePrefix = new Object[]{emitNamespace ? NAMESPACE_PREFIX : EMPTY}; + encoder.setEmitNamespace(emitNamespace); } /** @@ -148,7 +143,7 @@ public void setEmitNamespace(final boolean emitNamespace) { * false */ public void omitXmlDeclaration(final boolean currentOmitXmlDeclaration) { - omitXmlDeclaration = currentOmitXmlDeclaration; + encoder.omitXmlDeclaration(currentOmitXmlDeclaration); } /** @@ -159,7 +154,7 @@ public void omitXmlDeclaration(final boolean currentOmitXmlDeclaration) { * @param xmlVersion the XML version */ public void setXmlVersion(final String xmlVersion) { - this.xmlVersion = xmlVersion; + encoder.setXmlVersion(xmlVersion); } /** @@ -170,7 +165,21 @@ public void setXmlVersion(final String xmlVersion) { * @param xmlEncoding the XML encoding */ public void setXmlEncoding(final String xmlEncoding) { - this.xmlEncoding = xmlEncoding; + encoder.setXmlEncoding(xmlEncoding); + } + + /** + * Sets to ensure correct MARC21 XML. + * If true, the input data is validated to ensure correct MARC21. Also the leader may be generated. + * It acts as a wrapper: the input is piped to {@link org.metafacture.biblio.marc21.Marc21Encoder}, whose output is piped to {@link org.metafacture.biblio.marc21.Marc21Decoder}, whose output is piped to {@link org.metafacture.biblio.marc21.MarcXmlEncoder}. + * This validation and treatment of the leader is more safe but comes with a performance impact. + * + * Default value: {@value #ENSURE_CORRECT_MARC21_XML} + * + * @param ensureCorrectMarc21Xml if true the input data is validated to ensure correct MARC21. Also the leader may be generated. + */ + public void setEnsureCorrectMarc21Xml(final boolean ensureCorrectMarc21Xml) { + pipe = ensureCorrectMarc21Xml ? wrapper : encoder; } /** @@ -181,196 +190,282 @@ public void setXmlEncoding(final String xmlEncoding) { * @param formatted true if formatting is activated, otherwise false */ public void setFormatted(final boolean formatted) { - this.formatted = formatted; + encoder.setFormatted(formatted); } @Override public void startRecord(final String identifier) { - if (atStreamStart) { - if (!omitXmlDeclaration) { - writeHeader(); - prettyPrintNewLine(); - } - writeTag(Tag.collection::open, emitNamespace ? NAMESPACE_SUFFIX : EMPTY, emitNamespace ? SCHEMA_ATTRIBUTES : EMPTY); - prettyPrintNewLine(); - incrementIndentationLevel(); - } - atStreamStart = false; - - prettyPrintIndentation(); - writeTag(Tag.record::open); - recordAttributeOffset = builder.length() - 1; - prettyPrintNewLine(); - - incrementIndentationLevel(); + pipe.startRecord(identifier); } @Override public void endRecord() { - writeLeader(); - decrementIndentationLevel(); - prettyPrintIndentation(); - writeTag(Tag.record::close); - prettyPrintNewLine(); - sendAndClearData(); + pipe.endRecord(); } @Override public void startEntity(final String name) { - currentEntity = name; - if (!name.equals(Marc21EventNames.LEADER_ENTITY)) { - if (name.length() != LEADER_ENTITY_LENGTH) { - final String message = String.format("Entity too short." + "Got a string ('%s') of length %d." + - "Expected a length of " + LEADER_ENTITY_LENGTH + " (field + indicators).", name, name.length()); - throw new MetafactureException(message); + pipe.startEntity(name); + } + + @Override + public void endEntity() { + pipe.endEntity(); + } + + @Override + public void literal(final String name, final String value) { + pipe.literal(name, value); + } + + @Override + protected void onResetStream() { + pipe.resetStream(); + } + + @Override + protected void onCloseStream() { + pipe.closeStream(); + } + + @Override + protected void onSetReceiver() { + encoder.setReceiver(getReceiver()); + } + + private static class Encoder extends DefaultStreamPipe> { + + private final StringBuilder builder = new StringBuilder(); + private final StringBuilder leaderBuilder = new StringBuilder(); + + private boolean atStreamStart = true; + + private boolean omitXmlDeclaration = OMIT_XML_DECLARATION; + private String xmlVersion = XML_VERSION; + private String xmlEncoding = XML_ENCODING; + + private String currentEntity = ""; + + private boolean emitNamespace = true; + private Object[] namespacePrefix = new Object[]{emitNamespace ? NAMESPACE_PREFIX : EMPTY}; + + private int indentationLevel; + private boolean formatted = PRETTY_PRINTED; + private int recordAttributeOffset; + + private Encoder() { + } + + public void setEmitNamespace(final boolean emitNamespace) { + this.emitNamespace = emitNamespace; + namespacePrefix = new Object[]{emitNamespace ? NAMESPACE_PREFIX : EMPTY}; + } + + public void omitXmlDeclaration(final boolean currentOmitXmlDeclaration) { + omitXmlDeclaration = currentOmitXmlDeclaration; + } + + public void setXmlVersion(final String xmlVersion) { + this.xmlVersion = xmlVersion; + } + + public void setXmlEncoding(final String xmlEncoding) { + this.xmlEncoding = xmlEncoding; + } + + public void setFormatted(final boolean formatted) { + this.formatted = formatted; + } + + @Override + public void startRecord(final String identifier) { + if (atStreamStart) { + if (!omitXmlDeclaration) { + writeHeader(); + prettyPrintNewLine(); + } + writeTag(Tag.collection::open, emitNamespace ? NAMESPACE_SUFFIX : EMPTY, emitNamespace ? SCHEMA_ATTRIBUTES : EMPTY); + prettyPrintNewLine(); + incrementIndentationLevel(); } + atStreamStart = false; - final String tag = name.substring(TAG_BEGIN, TAG_END); - final String ind1 = name.substring(IND1_BEGIN, IND1_END); - final String ind2 = name.substring(IND2_BEGIN, IND2_END); prettyPrintIndentation(); - writeTag(Tag.datafield::open, tag, ind1, ind2); + writeTag(Tag.record::open); + recordAttributeOffset = builder.length() - 1; prettyPrintNewLine(); + incrementIndentationLevel(); } - } - @Override - public void endEntity() { - if (!currentEntity.equals(Marc21EventNames.LEADER_ENTITY)) { + @Override + public void endRecord() { + writeLeader(); decrementIndentationLevel(); prettyPrintIndentation(); - writeTag(Tag.datafield::close); + writeTag(Tag.record::close); prettyPrintNewLine(); + sendAndClearData(); } - currentEntity = ""; - } - @Override - public void literal(final String name, final String value) { - if ("".equals(currentEntity)) { - if (name.equals(Marc21EventNames.MARCXML_TYPE_LITERAL)) { - if (value != null) { - builder.insert(recordAttributeOffset, String.format(ATTRIBUTE_TEMPLATE, name, value)); + @Override + public void startEntity(final String name) { + currentEntity = name; + if (!name.equals(Marc21EventNames.LEADER_ENTITY)) { + if (name.length() != LEADER_ENTITY_LENGTH) { + final String message = String.format("Entity too short." + "Got a string ('%s') of length %d." + + "Expected a length of " + LEADER_ENTITY_LENGTH + " (field + indicators).", name, name.length()); + throw new MetafactureException(message); } + + final String tag = name.substring(TAG_BEGIN, TAG_END); + final String ind1 = name.substring(IND1_BEGIN, IND1_END); + final String ind2 = name.substring(IND2_BEGIN, IND2_END); + prettyPrintIndentation(); + writeTag(Tag.datafield::open, tag, ind1, ind2); + prettyPrintNewLine(); + incrementIndentationLevel(); } - else if (!appendLeader(name, value)) { + } + + @Override + public void endEntity() { + if (!currentEntity.equals(Marc21EventNames.LEADER_ENTITY)) { + decrementIndentationLevel(); prettyPrintIndentation(); - writeTag(Tag.controlfield::open, name); - if (value != null) { - writeEscaped(value.trim()); + writeTag(Tag.datafield::close); + prettyPrintNewLine(); + } + currentEntity = ""; + } + + @Override + public void literal(final String name, final String value) { + if ("".equals(currentEntity)) { + if (name.equals(Marc21EventNames.MARCXML_TYPE_LITERAL)) { + if (value != null) { + builder.insert(recordAttributeOffset, String.format(ATTRIBUTE_TEMPLATE, name, value)); + } } - writeTag(Tag.controlfield::close); + else if (!appendLeader(name, value)) { + prettyPrintIndentation(); + writeTag(Tag.controlfield::open, name); + if (value != null) { + writeEscaped(value.trim()); + } + writeTag(Tag.controlfield::close); + prettyPrintNewLine(); + } + } + else if (!appendLeader(currentEntity, value)) { + prettyPrintIndentation(); + writeTag(Tag.subfield::open, name); + writeEscaped(value.trim()); + writeTag(Tag.subfield::close); prettyPrintNewLine(); } } - else if (!appendLeader(currentEntity, value)) { - prettyPrintIndentation(); - writeTag(Tag.subfield::open, name); - writeEscaped(value.trim()); - writeTag(Tag.subfield::close); - prettyPrintNewLine(); + + @Override + protected void onResetStream() { + if (!atStreamStart) { + writeFooter(); + } + sendAndClearData(); + indentationLevel = 0; + atStreamStart = true; } - } - @Override - protected void onResetStream() { - if (!atStreamStart) { + @Override + protected void onCloseStream() { writeFooter(); + sendAndClearData(); } - sendAndClearData(); - indentationLevel = 0; - atStreamStart = true; - } - - @Override - protected void onCloseStream() { - writeFooter(); - sendAndClearData(); - } - /** Increments the indentation level by one */ - private void incrementIndentationLevel() { - indentationLevel += 1; - } + /** Increments the indentation level by one */ + private void incrementIndentationLevel() { + indentationLevel += 1; + } - /** Decrements the indentation level by one */ - private void decrementIndentationLevel() { - indentationLevel -= 1; - } + /** Decrements the indentation level by one */ + private void decrementIndentationLevel() { + indentationLevel -= 1; + } - /** Adds a XML Header */ - private void writeHeader() { - writeRaw(String.format(XML_DECLARATION_TEMPLATE, xmlVersion, xmlEncoding)); - } + /** Adds a XML Header */ + private void writeHeader() { + writeRaw(String.format(XML_DECLARATION_TEMPLATE, xmlVersion, xmlEncoding)); + } - /** Closes the root tag */ - private void writeFooter() { - writeTag(Tag.collection::close); - } + /** Closes the root tag */ + private void writeFooter() { + writeTag(Tag.collection::close); + } - /** - * Writes an unescaped sequence. - * - * @param str the unescaped sequence to be written - */ - private void writeRaw(final String str) { - builder.append(str); - } + /** + * Writes an unescaped sequence. + * + * @param str the unescaped sequence to be written + */ + private void writeRaw(final String str) { + builder.append(str); + } - private boolean appendLeader(final String name, final String value) { - if (name.equals(Marc21EventNames.LEADER_ENTITY)) { - leaderBuilder.append(value); - return true; + private boolean appendLeader(final String name, final String value) { + if (name.equals(Marc21EventNames.LEADER_ENTITY)) { + leaderBuilder.append(value); + return true; + } + else { + return false; + } } - else { - return false; + + /** + * Writes an escaped sequence. + * + * @param str the unescaped sequence to be written + */ + private void writeEscaped(final String str) { + builder.append(XmlUtil.escape(str, false)); } - } - /** - * Writes an escaped sequence. - * - * @param str the unescaped sequence to be written - */ - private void writeEscaped(final String str) { - builder.append(XmlUtil.escape(str, false)); - } + private void writeLeader() { + final String leader = leaderBuilder.toString(); + if (!leader.isEmpty()) { + prettyPrintIndentation(); + writeTag(Tag.leader::open); + writeRaw(leader); + writeTag(Tag.leader::close); + prettyPrintNewLine(); + } + } - private void writeLeader() { - final String leader = leaderBuilder.toString(); - if (!leader.isEmpty()) { - prettyPrintIndentation(); - writeTag(Tag.leader::open); - writeRaw(leader); - writeTag(Tag.leader::close); - prettyPrintNewLine(); + private void writeTag(final Function function, final Object... args) { + final Object[] allArgs = Arrays.copyOf(namespacePrefix, namespacePrefix.length + args.length); + System.arraycopy(args, 0, allArgs, namespacePrefix.length, args.length); + writeRaw(function.apply(allArgs)); } - } - private void writeTag(final Function function, final Object... args) { - final Object[] allArgs = Arrays.copyOf(namespacePrefix, namespacePrefix.length + args.length); - System.arraycopy(args, 0, allArgs, namespacePrefix.length, args.length); - writeRaw(function.apply(allArgs)); - } + private void prettyPrintIndentation() { + if (formatted) { + final String prefix = String.join("", Collections.nCopies(indentationLevel, INDENT)); + builder.append(prefix); + } + } - private void prettyPrintIndentation() { - if (formatted) { - final String prefix = String.join("", Collections.nCopies(indentationLevel, INDENT)); - builder.append(prefix); + private void prettyPrintNewLine() { + if (formatted) { + builder.append(NEW_LINE); + } } - } - private void prettyPrintNewLine() { - if (formatted) { - builder.append(NEW_LINE); + private void sendAndClearData() { + getReceiver().process(builder.toString()); + builder.delete(0, builder.length()); + recordAttributeOffset = 0; } - } - private void sendAndClearData() { - getReceiver().process(builder.toString()); - builder.delete(0, builder.length()); - recordAttributeOffset = 0; } } diff --git a/metafacture-biblio/src/test/java/org/metafacture/biblio/marc21/MarcXmlEncoderTest.java b/metafacture-biblio/src/test/java/org/metafacture/biblio/marc21/MarcXmlEncoderTest.java index 00f497b05..b167e5bba 100644 --- a/metafacture-biblio/src/test/java/org/metafacture/biblio/marc21/MarcXmlEncoderTest.java +++ b/metafacture-biblio/src/test/java/org/metafacture/biblio/marc21/MarcXmlEncoderTest.java @@ -16,7 +16,9 @@ package org.metafacture.biblio.marc21; +import org.metafacture.framework.FormatException; import org.metafacture.framework.MetafactureException; +import org.metafacture.framework.MissingIdException; import org.metafacture.framework.helpers.DefaultObjectReceiver; import org.junit.Before; @@ -183,7 +185,7 @@ public void emitExceptionWhenEntityLengthNot5() { } @Test - public void createAnRecordWithLeader() { + public void createRecordWithLeader() { encoder.startRecord("1"); encoder.startEntity(Marc21EventNames.LEADER_ENTITY); encoder.literal(Marc21EventNames.LEADER_ENTITY, "dummy"); @@ -196,6 +198,12 @@ public void createAnRecordWithLeader() { assertEquals(expected, actual); } + @Test(expected = FormatException.class) + public void createRecordWithLeader_ensureCorrectMarc21Xml() { + encoder.setEnsureCorrectMarc21Xml(true); + createRecordWithLeader(); + } + @Test public void issue336_createRecordWithTopLevelLeader_dummy() { encoder.startRecord("1"); @@ -213,6 +221,12 @@ public void issue336_createRecordWithTopLevelLeader_defaultMarc21Xml() { issue336_createRecordWithTopLevelLeader(encoder, "00000naa a2200000uc 4500"); } + @Test + public void issue336_createRecordWithTopLevelLeader_ensureCorrectMarc21Xml() { + encoder.setEnsureCorrectMarc21Xml(true); + issue336_createRecordWithTopLevelLeader(encoder, "00048naa a2200037uc 4500"); + } + private void issue336_createRecordWithTopLevelLeader(final MarcXmlEncoder encoder, final String expectedLeader) { encoder.startRecord("1"); encoder.literal("001", "8u3287432"); @@ -231,6 +245,12 @@ public void issue527_shouldEmitLeaderAlwaysAsWholeString() { issue527_shouldEmitLeaderAlwaysAsWholeString(encoder); } + @Test(expected = MissingIdException.class) + public void issue527_shouldEmitLeaderAlwaysAsWholeString_ensureCorrectMarc21Xml() { + encoder.setEnsureCorrectMarc21Xml(true); + issue527_shouldEmitLeaderAlwaysAsWholeString(encoder); + } + private void issue527_shouldEmitLeaderAlwaysAsWholeString(final MarcXmlEncoder encoder) { encoder.startRecord("1"); encoder.startEntity(Marc21EventNames.LEADER_ENTITY);