From bff54dafa95066ecdf10467206573236f25355d7 Mon Sep 17 00:00:00 2001 From: Pascal Christoph Date: Mon, 6 May 2024 10:02:07 +0200 Subject: [PATCH] Add Marc21XmlEncoder (#527) Marc21XmlEncoder acts as a wrapper. It makes use of Marc21Encoder, Marc21Decoder and MarcXmlEncoder to ensure a proper MarcXml, especially regarding the leader. Also - in contrast to MarcXmlEncoder - the record id (field 001) is mandatory. --- .../biblio/marc21/AbstractMarcXmlEncoder.java | 10 ++ .../biblio/marc21/Marc21XmlEncoder.java | 115 ++++++++++++++++++ .../biblio/marc21/MarcXmlEncoder.java | 34 ++++-- .../marc21/MarcXmlEncoderInterface.java | 49 ++++++++ .../biblio/marc21/Marc21XmlEncoderTest.java | 37 ++++++ .../biblio/marc21/MarcXmlEncoderTest.java | 56 ++++++++- 6 files changed, 287 insertions(+), 14 deletions(-) create mode 100644 metafacture-biblio/src/main/java/org/metafacture/biblio/marc21/AbstractMarcXmlEncoder.java create mode 100644 metafacture-biblio/src/main/java/org/metafacture/biblio/marc21/Marc21XmlEncoder.java create mode 100644 metafacture-biblio/src/main/java/org/metafacture/biblio/marc21/MarcXmlEncoderInterface.java create mode 100644 metafacture-biblio/src/test/java/org/metafacture/biblio/marc21/Marc21XmlEncoderTest.java diff --git a/metafacture-biblio/src/main/java/org/metafacture/biblio/marc21/AbstractMarcXmlEncoder.java b/metafacture-biblio/src/main/java/org/metafacture/biblio/marc21/AbstractMarcXmlEncoder.java new file mode 100644 index 000000000..a54d5b455 --- /dev/null +++ b/metafacture-biblio/src/main/java/org/metafacture/biblio/marc21/AbstractMarcXmlEncoder.java @@ -0,0 +1,10 @@ +package org.metafacture.biblio.marc21; + +import org.metafacture.framework.ObjectReceiver; +import org.metafacture.framework.helpers.DefaultStreamPipe; + +public abstract class AbstractMarcXmlEncoder extends DefaultStreamPipe> implements MarcXmlEncoderInterface { + + protected void onResetStream() { + } +} diff --git a/metafacture-biblio/src/main/java/org/metafacture/biblio/marc21/Marc21XmlEncoder.java b/metafacture-biblio/src/main/java/org/metafacture/biblio/marc21/Marc21XmlEncoder.java new file mode 100644 index 000000000..445bce46e --- /dev/null +++ b/metafacture-biblio/src/main/java/org/metafacture/biblio/marc21/Marc21XmlEncoder.java @@ -0,0 +1,115 @@ +/* + * Copyright 2024 hbz + * + * Licensed under the Apache License, Version 2.0 the "License"; + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.metafacture.biblio.marc21; + +import org.metafacture.framework.FluxCommand; +import org.metafacture.framework.StreamReceiver; +import org.metafacture.framework.annotations.Description; +import org.metafacture.framework.annotations.In; +import org.metafacture.framework.annotations.Out; + +/** + * Acts as a wrapper: pipes input to Marc21Encoder which output is piped to Marc21Decoder which output is piped to MarcXmlEncoder. + * + * @author Pascal Christoph (dr0i) + * + */ +@In(StreamReceiver.class) +@Out(String.class) +@Description("Encodes MARC21 records as MARCXML. It wraps 'encode-marc21 | decode-marc21 | encode-marcxml ' to generate MARCXML more safely, especially when the building the 'leader'.") +@FluxCommand("encode-marc21xml") +public class Marc21XmlEncoder extends AbstractMarcXmlEncoder { + private final Marc21Decoder marc21Decoder = new Marc21Decoder(); + private final Marc21Encoder marc21Encoder = new Marc21Encoder(); + private final MarcXmlEncoder marcXmlEncoder = new MarcXmlEncoder(); + + /** + * Creates an instance of {@link Marc21XmlEncoder}. + */ + public Marc21XmlEncoder() { + marc21Decoder.setEmitLeaderAsWhole(true); + + marc21Encoder.setReceiver(marc21Decoder); + marc21Decoder.setReceiver(marcXmlEncoder); + } + + @Override + protected void onSetReceiver() { + marcXmlEncoder.setReceiver(getReceiver()); + } + + @Override + public void startRecord(final String identifier) { + marc21Encoder.startRecord(identifier); + } + + @Override + public void endRecord() { + marc21Encoder.endRecord(); + } + + @Override + public void startEntity(final String name) { + marc21Encoder.startEntity(name); + } + + @Override + public void endEntity() { + marc21Encoder.endEntity(); + } + + @Override + public void literal(final String name, final String value) { + marc21Encoder.literal(name, value); + } + + @Override + protected void onCloseStream() { + marc21Encoder.closeStream(); + } + + @Override + public void onResetStream() { + marc21Encoder.resetStream(); + } + + @Override + public void setEmitNamespace(final boolean emitNamespace) { + marcXmlEncoder.setEmitNamespace(emitNamespace); + } + + @Override + public void omitXmlDeclaration(final boolean currentOmitXmlDeclaration) { + marcXmlEncoder.omitXmlDeclaration(currentOmitXmlDeclaration); + } + + @Override + public void setXmlVersion(final String xmlVersion) { + marcXmlEncoder.setXmlVersion(xmlVersion); + } + + @Override + public void setXmlEncoding(final String xmlEncoding) { + marcXmlEncoder.setXmlEncoding(xmlEncoding); + } + + @Override + public void setFormatted(final boolean formatted) { + marcXmlEncoder.setFormatted(formatted); + } +} + diff --git a/metafacture-biblio/src/main/java/org/metafacture/biblio/marc21/MarcXmlEncoder.java b/metafacture-biblio/src/main/java/org/metafacture/biblio/marc21/MarcXmlEncoder.java index 0b8202ec0..74279fcf0 100644 --- a/metafacture-biblio/src/main/java/org/metafacture/biblio/marc21/MarcXmlEncoder.java +++ b/metafacture-biblio/src/main/java/org/metafacture/biblio/marc21/MarcXmlEncoder.java @@ -18,12 +18,10 @@ import org.metafacture.commons.XmlUtil; import org.metafacture.framework.FluxCommand; import org.metafacture.framework.MetafactureException; -import org.metafacture.framework.ObjectReceiver; import org.metafacture.framework.StreamReceiver; import org.metafacture.framework.annotations.Description; import org.metafacture.framework.annotations.In; import org.metafacture.framework.annotations.Out; -import org.metafacture.framework.helpers.DefaultStreamPipe; import java.util.Arrays; import java.util.Collections; @@ -36,11 +34,11 @@ * @author Pascal Christoph (dr0i) dug it up again */ -@Description("Encodes a stream into MARCXML.") +@Description("Encodes a stream into MARCXML. Use this only if you can ensure valid MARC21. Also, the leader must be correct and set as one literal. You may want to use encode-marc21xml instead (which can cope with e.g. an irregular leader).") @In(StreamReceiver.class) @Out(String.class) @FluxCommand("encode-marcxml") -public final class MarcXmlEncoder extends DefaultStreamPipe> { +public class MarcXmlEncoder extends AbstractMarcXmlEncoder { public static final String NAMESPACE_NAME = "marc"; public static final String XML_ENCODING = "UTF-8"; @@ -106,6 +104,7 @@ public String close(final Object[] args) { private final StringBuilder builder = new StringBuilder(); + private final StringBuilder builderLeader = new StringBuilder(); private boolean atStreamStart = true; private boolean omitXmlDeclaration = OMIT_XML_DECLARATION; @@ -206,6 +205,9 @@ public void startRecord(final String identifier) { @Override public void endRecord() { + if (builderLeader.length() > 0) { + writeLeader(); + } decrementIndentationLevel(); prettyPrintIndentation(); writeTag(Tag.record::close); @@ -315,6 +317,15 @@ private void writeRaw(final String str) { builder.append(str); } + /** + * Writes an unescaped sequence to the leader literal. + * + * @param str the unescaped sequence to be written + */ + private void writeRawLeader(final String str) { + builderLeader.append(str); + } + /** * Writes an escaped sequence. * @@ -324,14 +335,17 @@ private void writeEscaped(final String str) { builder.append(XmlUtil.escape(str, false)); } + private void writeLeader() { + prettyPrintIndentation(); + writeTag(Tag.leader::open); + writeRaw(builderLeader.toString()); + writeTag(Tag.leader::close); + prettyPrintNewLine(); + } + private boolean writeLeader(final String name, final String value) { if (name.equals(Marc21EventNames.LEADER_ENTITY)) { - prettyPrintIndentation(); - writeTag(Tag.leader::open); - writeRaw(value); - writeTag(Tag.leader::close); - prettyPrintNewLine(); - + writeRawLeader(value); return true; } else { diff --git a/metafacture-biblio/src/main/java/org/metafacture/biblio/marc21/MarcXmlEncoderInterface.java b/metafacture-biblio/src/main/java/org/metafacture/biblio/marc21/MarcXmlEncoderInterface.java new file mode 100644 index 000000000..b3e2f8840 --- /dev/null +++ b/metafacture-biblio/src/main/java/org/metafacture/biblio/marc21/MarcXmlEncoderInterface.java @@ -0,0 +1,49 @@ +package org.metafacture.biblio.marc21; + +public interface MarcXmlEncoderInterface { + + /** + * Sets the flag to decide whether to emit the {@value MarcXmlEncoder#NAMESPACE_NAME} + * namespace + * + * @param emitNamespace true if the namespace is emitted, otherwise false + */ + void setEmitNamespace(boolean emitNamespace); + + /** + * Sets the flag to decide whether to omit the XML declaration. + * + * Default value: {@value MarcXmlEncoder#OMIT_XML_DECLARATION} + * + * @param currentOmitXmlDeclaration true if the XML declaration is omitted, otherwise + * false + */ + void omitXmlDeclaration(boolean currentOmitXmlDeclaration); + + /** + * Sets the XML version. + * + * Default value: {@value MarcXmlEncoder#XML_VERSION} + * + * @param xmlVersion the XML version + */ + void setXmlVersion(String xmlVersion); + + /** + * Sets the XML encoding. + * + * Default value: {@value MarcXmlEncoder#XML_ENCODING} + * + * @param xmlEncoding the XML encoding + */ + void setXmlEncoding(String xmlEncoding); + + /** + * Formats the resulting xml by indentation. Aka "pretty printing". + * + * Default value: {@value MarcXmlEncoder#PRETTY_PRINTED} + * + * @param formatted true if formatting is activated, otherwise false + */ + void setFormatted(boolean formatted); +} diff --git a/metafacture-biblio/src/test/java/org/metafacture/biblio/marc21/Marc21XmlEncoderTest.java b/metafacture-biblio/src/test/java/org/metafacture/biblio/marc21/Marc21XmlEncoderTest.java new file mode 100644 index 000000000..9ef77f973 --- /dev/null +++ b/metafacture-biblio/src/test/java/org/metafacture/biblio/marc21/Marc21XmlEncoderTest.java @@ -0,0 +1,37 @@ +package org.metafacture.biblio.marc21; + +import org.junit.Before; +import org.junit.Test; +import org.metafacture.framework.FormatException; +import org.metafacture.framework.MissingIdException; + +public class Marc21XmlEncoderTest { + MarcXmlEncoderTest marcXmlEncoderTest = new MarcXmlEncoderTest(); + + @Before + public void setUp() { + marcXmlEncoderTest.encoder=new Marc21XmlEncoder(); + marcXmlEncoderTest.initializeEncoder(); + } + + @Test(expected = FormatException.class) + public void createAnRecordWithLeader() { + marcXmlEncoderTest.createAnRecordWithLeader(); + } + + @Test(expected = FormatException.class) + public void issue336_createRecordWithTopLevelLeader() { + marcXmlEncoderTest.issue336_createRecordWithTopLevelLeader(); + } + + @Test + public void issue336_createRecordWithTopLevelLeader_Marc21Xml() { + marcXmlEncoderTest.issue336_createRecordWithTopLevelLeader_Marc21Xml(); + } + + @Test(expected = MissingIdException.class) + public void issue527ShouldEmitLeaderAlwaysAsWholeString() { + marcXmlEncoderTest.issue527ShouldEmitLeaderAlwaysAsWholeString(); + } + +} diff --git a/metafacture-biblio/src/test/java/org/metafacture/biblio/marc21/MarcXmlEncoderTest.java b/metafacture-biblio/src/test/java/org/metafacture/biblio/marc21/MarcXmlEncoderTest.java index bc6fb0d49..aa8ab45ea 100644 --- a/metafacture-biblio/src/test/java/org/metafacture/biblio/marc21/MarcXmlEncoderTest.java +++ b/metafacture-biblio/src/test/java/org/metafacture/biblio/marc21/MarcXmlEncoderTest.java @@ -16,12 +16,20 @@ package org.metafacture.biblio.marc21; +import org.junit.After; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; - -import org.junit.After; import org.junit.Before; +import org.junit.ComparisonFailure; import org.junit.Test; +import static org.metafacture.biblio.marc21.Marc21EventNames.BIBLIOGRAPHIC_LEVEL_LITERAL; +import static org.metafacture.biblio.marc21.Marc21EventNames.CATALOGING_FORM_LITERAL; +import static org.metafacture.biblio.marc21.Marc21EventNames.CHARACTER_CODING_LITERAL; +import static org.metafacture.biblio.marc21.Marc21EventNames.ENCODING_LEVEL_LITERAL; +import static org.metafacture.biblio.marc21.Marc21EventNames.MULTIPART_LEVEL_LITERAL; +import static org.metafacture.biblio.marc21.Marc21EventNames.RECORD_STATUS_LITERAL; +import static org.metafacture.biblio.marc21.Marc21EventNames.RECORD_TYPE_LITERAL; +import static org.metafacture.biblio.marc21.Marc21EventNames.TYPE_OF_CONTROL_LITERAL; import org.metafacture.framework.MetafactureException; import org.metafacture.framework.helpers.DefaultObjectReceiver; @@ -48,11 +56,15 @@ public class MarcXmlEncoderTest { private static final String RECORD_ID = "92005291"; private static StringBuilder resultCollector; - private static MarcXmlEncoder encoder; + AbstractMarcXmlEncoder encoder; @Before public void setUp() { encoder = new MarcXmlEncoder(); + initializeEncoder(); + } + + void initializeEncoder() { encoder.setFormatted(false); encoder.setReceiver(new DefaultObjectReceiver() { @Override @@ -67,7 +79,7 @@ public void process(final String obj) { public void tearDown() { } - private void addOneRecord(MarcXmlEncoder encoder) { + private void addOneRecord(AbstractMarcXmlEncoder encoder) { encoder.startRecord(RECORD_ID); encoder.literal("001", RECORD_ID); encoder.startEntity("010 "); @@ -212,6 +224,42 @@ public void issue336_createRecordWithTopLevelLeader() { assertEquals(expected, actual); } + @Test(expected = ComparisonFailure.class) + public void issue336_createRecordWithTopLevelLeader_Marc21Xml() { + encoder.startRecord("1"); + encoder.literal("001", "8u3287432"); + encoder.literal(Marc21EventNames.LEADER_ENTITY, "00000naa a2200000uc 4500"); + encoder.endRecord(); + encoder.closeStream(); + String expected = XML_DECLARATION + XML_ROOT_OPEN + + "8u3287432" + + "00048naa a2200037uc 4500" + XML_MARC_COLLECTION_END_TAG; + String actual = resultCollector.toString(); + assertEquals(expected, actual); + } + + @Test + public void issue527ShouldEmitLeaderAlwaysAsWholeString() { + encoder.startRecord("1"); + encoder.startEntity(Marc21EventNames.LEADER_ENTITY); + encoder.literal(RECORD_STATUS_LITERAL, "a"); + + encoder.literal(RECORD_TYPE_LITERAL, "o"); + encoder.literal(BIBLIOGRAPHIC_LEVEL_LITERAL, "a"); + encoder.literal(TYPE_OF_CONTROL_LITERAL, " "); + encoder.literal(CHARACTER_CODING_LITERAL, "a"); + encoder.literal(ENCODING_LEVEL_LITERAL, "z"); + encoder.literal(CATALOGING_FORM_LITERAL, "u"); + encoder.literal(MULTIPART_LEVEL_LITERAL, " "); + encoder.endEntity(); + encoder.endRecord(); + encoder.closeStream(); + String expected = XML_DECLARATION + XML_ROOT_OPEN + + "aoa azu " + XML_MARC_COLLECTION_END_TAG; + String actual = resultCollector.toString(); + assertEquals(expected, actual); + } + @Test public void sendDataAndClearWhenRecordStartedAndStreamResets() { encoder.startRecord("1");