Skip to content

Commit

Permalink
Add Marc21XmlEncoder (#527)
Browse files Browse the repository at this point in the history
Marc21XmlEncoder acts as a wrapper. It makes use of Marc21Encoder, Marc21Decoder
and MarcXmlEncoder to ensure a proper MarcXml, especially regarding the leader.
Also - in contrast to MarcXmlEncoder - the record id (field 001) is mandatory.
  • Loading branch information
dr0i committed May 6, 2024
1 parent 67dbabd commit bff54da
Show file tree
Hide file tree
Showing 6 changed files with 287 additions and 14 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
package org.metafacture.biblio.marc21;

import org.metafacture.framework.ObjectReceiver;
import org.metafacture.framework.helpers.DefaultStreamPipe;

public abstract class AbstractMarcXmlEncoder extends DefaultStreamPipe<ObjectReceiver<String>> implements MarcXmlEncoderInterface {

protected void onResetStream() {
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
/*
* Copyright 2024 hbz
*
* Licensed under the Apache License, Version 2.0 the "License";
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.metafacture.biblio.marc21;

import org.metafacture.framework.FluxCommand;
import org.metafacture.framework.StreamReceiver;
import org.metafacture.framework.annotations.Description;
import org.metafacture.framework.annotations.In;
import org.metafacture.framework.annotations.Out;

/**
* Acts as a wrapper: pipes input to Marc21Encoder which output is piped to Marc21Decoder which output is piped to MarcXmlEncoder.
*
* @author Pascal Christoph (dr0i)
*
*/
@In(StreamReceiver.class)
@Out(String.class)
@Description("Encodes MARC21 records as MARCXML. It wraps 'encode-marc21 | decode-marc21 | encode-marcxml ' to generate MARCXML more safely, especially when the building the 'leader'.")
@FluxCommand("encode-marc21xml")
public class Marc21XmlEncoder extends AbstractMarcXmlEncoder {
private final Marc21Decoder marc21Decoder = new Marc21Decoder();
private final Marc21Encoder marc21Encoder = new Marc21Encoder();
private final MarcXmlEncoder marcXmlEncoder = new MarcXmlEncoder();

/**
* Creates an instance of {@link Marc21XmlEncoder}.
*/
public Marc21XmlEncoder() {
marc21Decoder.setEmitLeaderAsWhole(true);

marc21Encoder.setReceiver(marc21Decoder);
marc21Decoder.setReceiver(marcXmlEncoder);
}

@Override
protected void onSetReceiver() {
marcXmlEncoder.setReceiver(getReceiver());
}

@Override
public void startRecord(final String identifier) {
marc21Encoder.startRecord(identifier);
}

@Override
public void endRecord() {
marc21Encoder.endRecord();
}

@Override
public void startEntity(final String name) {
marc21Encoder.startEntity(name);
}

@Override
public void endEntity() {
marc21Encoder.endEntity();
}

@Override
public void literal(final String name, final String value) {
marc21Encoder.literal(name, value);
}

@Override
protected void onCloseStream() {
marc21Encoder.closeStream();
}

@Override
public void onResetStream() {
marc21Encoder.resetStream();
}

@Override
public void setEmitNamespace(final boolean emitNamespace) {
marcXmlEncoder.setEmitNamespace(emitNamespace);
}

@Override
public void omitXmlDeclaration(final boolean currentOmitXmlDeclaration) {
marcXmlEncoder.omitXmlDeclaration(currentOmitXmlDeclaration);
}

@Override
public void setXmlVersion(final String xmlVersion) {
marcXmlEncoder.setXmlVersion(xmlVersion);
}

@Override
public void setXmlEncoding(final String xmlEncoding) {
marcXmlEncoder.setXmlEncoding(xmlEncoding);
}

@Override
public void setFormatted(final boolean formatted) {
marcXmlEncoder.setFormatted(formatted);
}
}

Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,10 @@
import org.metafacture.commons.XmlUtil;
import org.metafacture.framework.FluxCommand;
import org.metafacture.framework.MetafactureException;
import org.metafacture.framework.ObjectReceiver;
import org.metafacture.framework.StreamReceiver;
import org.metafacture.framework.annotations.Description;
import org.metafacture.framework.annotations.In;
import org.metafacture.framework.annotations.Out;
import org.metafacture.framework.helpers.DefaultStreamPipe;

import java.util.Arrays;
import java.util.Collections;
Expand All @@ -36,11 +34,11 @@
* @author Pascal Christoph (dr0i) dug it up again
*/

@Description("Encodes a stream into MARCXML.")
@Description("Encodes a stream into MARCXML. Use this only if you can ensure valid MARC21. Also, the leader must be correct and set as one literal. You may want to use encode-marc21xml instead (which can cope with e.g. an irregular leader).")
@In(StreamReceiver.class)
@Out(String.class)
@FluxCommand("encode-marcxml")
public final class MarcXmlEncoder extends DefaultStreamPipe<ObjectReceiver<String>> {
public class MarcXmlEncoder extends AbstractMarcXmlEncoder {

public static final String NAMESPACE_NAME = "marc";
public static final String XML_ENCODING = "UTF-8";
Expand Down Expand Up @@ -106,6 +104,7 @@ public String close(final Object[] args) {

private final StringBuilder builder = new StringBuilder();

private final StringBuilder builderLeader = new StringBuilder();
private boolean atStreamStart = true;

private boolean omitXmlDeclaration = OMIT_XML_DECLARATION;
Expand Down Expand Up @@ -206,6 +205,9 @@ public void startRecord(final String identifier) {

@Override
public void endRecord() {
if (builderLeader.length() > 0) {
writeLeader();
}
decrementIndentationLevel();
prettyPrintIndentation();
writeTag(Tag.record::close);
Expand Down Expand Up @@ -315,6 +317,15 @@ private void writeRaw(final String str) {
builder.append(str);
}

/**
* Writes an unescaped sequence to the leader literal.
*
* @param str the unescaped sequence to be written
*/
private void writeRawLeader(final String str) {
builderLeader.append(str);
}

/**
* Writes an escaped sequence.
*
Expand All @@ -324,14 +335,17 @@ private void writeEscaped(final String str) {
builder.append(XmlUtil.escape(str, false));
}

private void writeLeader() {
prettyPrintIndentation();
writeTag(Tag.leader::open);
writeRaw(builderLeader.toString());
writeTag(Tag.leader::close);
prettyPrintNewLine();
}

private boolean writeLeader(final String name, final String value) {
if (name.equals(Marc21EventNames.LEADER_ENTITY)) {
prettyPrintIndentation();
writeTag(Tag.leader::open);
writeRaw(value);
writeTag(Tag.leader::close);
prettyPrintNewLine();

writeRawLeader(value);
return true;
}
else {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
package org.metafacture.biblio.marc21;

public interface MarcXmlEncoderInterface {

/**
* Sets the flag to decide whether to emit the {@value MarcXmlEncoder#NAMESPACE_NAME}
* namespace
*
* @param emitNamespace true if the namespace is emitted, otherwise false
*/
void setEmitNamespace(boolean emitNamespace);

/**
* Sets the flag to decide whether to omit the XML declaration.
*
* <strong>Default value: {@value MarcXmlEncoder#OMIT_XML_DECLARATION}</strong>
*
* @param currentOmitXmlDeclaration true if the XML declaration is omitted, otherwise
* false
*/
void omitXmlDeclaration(boolean currentOmitXmlDeclaration);

/**
* Sets the XML version.
*
* <strong>Default value: {@value MarcXmlEncoder#XML_VERSION}</strong>
*
* @param xmlVersion the XML version
*/
void setXmlVersion(String xmlVersion);

/**
* Sets the XML encoding.
*
* <strong>Default value: {@value MarcXmlEncoder#XML_ENCODING}</strong>
*
* @param xmlEncoding the XML encoding
*/
void setXmlEncoding(String xmlEncoding);

/**
* Formats the resulting xml by indentation. Aka "pretty printing".
*
* <strong>Default value: {@value MarcXmlEncoder#PRETTY_PRINTED}</strong>
*
* @param formatted true if formatting is activated, otherwise false
*/
void setFormatted(boolean formatted);
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
package org.metafacture.biblio.marc21;

import org.junit.Before;
import org.junit.Test;
import org.metafacture.framework.FormatException;
import org.metafacture.framework.MissingIdException;

public class Marc21XmlEncoderTest {
MarcXmlEncoderTest marcXmlEncoderTest = new MarcXmlEncoderTest();

@Before
public void setUp() {
marcXmlEncoderTest.encoder=new Marc21XmlEncoder();
marcXmlEncoderTest.initializeEncoder();
}

@Test(expected = FormatException.class)
public void createAnRecordWithLeader() {
marcXmlEncoderTest.createAnRecordWithLeader();
}

@Test(expected = FormatException.class)
public void issue336_createRecordWithTopLevelLeader() {
marcXmlEncoderTest.issue336_createRecordWithTopLevelLeader();
}

@Test
public void issue336_createRecordWithTopLevelLeader_Marc21Xml() {
marcXmlEncoderTest.issue336_createRecordWithTopLevelLeader_Marc21Xml();
}

@Test(expected = MissingIdException.class)
public void issue527ShouldEmitLeaderAlwaysAsWholeString() {
marcXmlEncoderTest.issue527ShouldEmitLeaderAlwaysAsWholeString();
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,20 @@

package org.metafacture.biblio.marc21;

import org.junit.After;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;

import org.junit.After;
import org.junit.Before;
import org.junit.ComparisonFailure;
import org.junit.Test;
import static org.metafacture.biblio.marc21.Marc21EventNames.BIBLIOGRAPHIC_LEVEL_LITERAL;
import static org.metafacture.biblio.marc21.Marc21EventNames.CATALOGING_FORM_LITERAL;
import static org.metafacture.biblio.marc21.Marc21EventNames.CHARACTER_CODING_LITERAL;
import static org.metafacture.biblio.marc21.Marc21EventNames.ENCODING_LEVEL_LITERAL;
import static org.metafacture.biblio.marc21.Marc21EventNames.MULTIPART_LEVEL_LITERAL;
import static org.metafacture.biblio.marc21.Marc21EventNames.RECORD_STATUS_LITERAL;
import static org.metafacture.biblio.marc21.Marc21EventNames.RECORD_TYPE_LITERAL;
import static org.metafacture.biblio.marc21.Marc21EventNames.TYPE_OF_CONTROL_LITERAL;
import org.metafacture.framework.MetafactureException;
import org.metafacture.framework.helpers.DefaultObjectReceiver;

Expand All @@ -48,11 +56,15 @@ public class MarcXmlEncoderTest {
private static final String RECORD_ID = "92005291";

private static StringBuilder resultCollector;
private static MarcXmlEncoder encoder;
AbstractMarcXmlEncoder encoder;

@Before
public void setUp() {
encoder = new MarcXmlEncoder();
initializeEncoder();
}

void initializeEncoder() {
encoder.setFormatted(false);
encoder.setReceiver(new DefaultObjectReceiver<String>() {
@Override
Expand All @@ -67,7 +79,7 @@ public void process(final String obj) {
public void tearDown() {
}

private void addOneRecord(MarcXmlEncoder encoder) {
private void addOneRecord(AbstractMarcXmlEncoder encoder) {
encoder.startRecord(RECORD_ID);
encoder.literal("001", RECORD_ID);
encoder.startEntity("010 ");
Expand Down Expand Up @@ -212,6 +224,42 @@ public void issue336_createRecordWithTopLevelLeader() {
assertEquals(expected, actual);
}

@Test(expected = ComparisonFailure.class)
public void issue336_createRecordWithTopLevelLeader_Marc21Xml() {
encoder.startRecord("1");
encoder.literal("001", "8u3287432");
encoder.literal(Marc21EventNames.LEADER_ENTITY, "00000naa a2200000uc 4500");
encoder.endRecord();
encoder.closeStream();
String expected = XML_DECLARATION + XML_ROOT_OPEN
+ "<marc:record><marc:controlfield tag=\"001\">8u3287432</marc:controlfield>" +
"<marc:leader>00048naa a2200037uc 4500</marc:leader></marc:record>" + XML_MARC_COLLECTION_END_TAG;
String actual = resultCollector.toString();
assertEquals(expected, actual);
}

@Test
public void issue527ShouldEmitLeaderAlwaysAsWholeString() {
encoder.startRecord("1");
encoder.startEntity(Marc21EventNames.LEADER_ENTITY);
encoder.literal(RECORD_STATUS_LITERAL, "a");

encoder.literal(RECORD_TYPE_LITERAL, "o");
encoder.literal(BIBLIOGRAPHIC_LEVEL_LITERAL, "a");
encoder.literal(TYPE_OF_CONTROL_LITERAL, " ");
encoder.literal(CHARACTER_CODING_LITERAL, "a");
encoder.literal(ENCODING_LEVEL_LITERAL, "z");
encoder.literal(CATALOGING_FORM_LITERAL, "u");
encoder.literal(MULTIPART_LEVEL_LITERAL, " ");
encoder.endEntity();
encoder.endRecord();
encoder.closeStream();
String expected = XML_DECLARATION + XML_ROOT_OPEN
+ "<marc:record><marc:leader>aoa azu </marc:leader></marc:record>" + XML_MARC_COLLECTION_END_TAG;
String actual = resultCollector.toString();
assertEquals(expected, actual);
}

@Test
public void sendDataAndClearWhenRecordStartedAndStreamResets() {
encoder.startRecord("1");
Expand Down

0 comments on commit bff54da

Please sign in to comment.