From 9178786591cab429ad80d2b55394af97d58397a7 Mon Sep 17 00:00:00 2001 From: Pascal Christoph Date: Mon, 29 Apr 2024 15:39:50 +0200 Subject: [PATCH] WIP2 --- metafacture-biblio/build.gradle | 1 + .../biblio/marc21/Marc21Encoder.java | 5 ++ .../biblio/marc21/MarcXmlEncoder.java | 10 ++- .../biblio/marc21/Marc21DecoderTest.java | 75 +++++++++++-------- 4 files changed, 57 insertions(+), 34 deletions(-) diff --git a/metafacture-biblio/build.gradle b/metafacture-biblio/build.gradle index b1326088a..2f4c88b94 100644 --- a/metafacture-biblio/build.gradle +++ b/metafacture-biblio/build.gradle @@ -21,6 +21,7 @@ dependencies { api project(':metafacture-framework') implementation project(':metafacture-commons') implementation project(':metafacture-flowcontrol') + implementation project(':metafacture-io') implementation 'org.dspace:oclc-harvester2:1.0.0' implementation ('xalan:xalan:2.7.0') { exclude group: 'xalan', module: 'serializer' diff --git a/metafacture-biblio/src/main/java/org/metafacture/biblio/marc21/Marc21Encoder.java b/metafacture-biblio/src/main/java/org/metafacture/biblio/marc21/Marc21Encoder.java index 3cd536fe7..b69d2730f 100644 --- a/metafacture-biblio/src/main/java/org/metafacture/biblio/marc21/Marc21Encoder.java +++ b/metafacture-biblio/src/main/java/org/metafacture/biblio/marc21/Marc21Encoder.java @@ -137,6 +137,7 @@ private void initLeader() { @Override public void endRecord() { final byte[] record = builder.build(); + System.out.println(builder.toString()); getReceiver().process(new String(record, Marc21Constants.MARC21_CHARSET)); state = State.IN_STREAM; } @@ -163,6 +164,10 @@ private void startField(final String name) { final char[] indicators = new char[Marc21Constants.MARC21_FORMAT.getIndicatorLength()]; name.getChars(0, tag.length, tag, 0); name.getChars(tag.length, name.length(), indicators, 0); + System.out.println(name); + if (name.startsWith("7872")){ + System.out.println(name); + } builder.startDataField(tag, indicators); } diff --git a/metafacture-biblio/src/main/java/org/metafacture/biblio/marc21/MarcXmlEncoder.java b/metafacture-biblio/src/main/java/org/metafacture/biblio/marc21/MarcXmlEncoder.java index 0aa75d2ec..390efb39e 100644 --- a/metafacture-biblio/src/main/java/org/metafacture/biblio/marc21/MarcXmlEncoder.java +++ b/metafacture-biblio/src/main/java/org/metafacture/biblio/marc21/MarcXmlEncoder.java @@ -208,9 +208,7 @@ public void startRecord(final String identifier) { @Override public void endRecord() { prettyPrintIndentation(); - writeTag(Tag.leader::open); - writeRaw(builderLeader.toString()); - writeTag(Tag.leader::close); + writeLeader(); prettyPrintNewLine(); decrementIndentationLevel(); prettyPrintIndentation(); @@ -219,6 +217,12 @@ public void endRecord() { sendAndClearData(); } + private void writeLeader() { + writeTag(Tag.leader::open); + writeRaw(builderLeader.toString()); + writeTag(Tag.leader::close); + } + @Override public void startEntity(final String name) { currentEntity = name; diff --git a/metafacture-biblio/src/test/java/org/metafacture/biblio/marc21/Marc21DecoderTest.java b/metafacture-biblio/src/test/java/org/metafacture/biblio/marc21/Marc21DecoderTest.java index aa70be104..fcc19d492 100644 --- a/metafacture-biblio/src/test/java/org/metafacture/biblio/marc21/Marc21DecoderTest.java +++ b/metafacture-biblio/src/test/java/org/metafacture/biblio/marc21/Marc21DecoderTest.java @@ -20,14 +20,13 @@ import org.junit.Before; import org.junit.Test; import org.metafacture.framework.FormatException; -import org.metafacture.framework.StreamReceiver; -import org.mockito.InOrder; +import org.metafacture.framework.ObjectReceiver; +import org.metafacture.io.HttpOpener; +import org.metafacture.io.LineReader; import org.mockito.Mock; +import static org.mockito.Mockito.verifyZeroInteractions; import org.mockito.MockitoAnnotations; -import static org.mockito.ArgumentMatchers.matches; -import static org.mockito.Mockito.*; - /** * Tests for class {@link Marc21Decoder}. * @@ -54,19 +53,33 @@ public final class Marc21DecoderTest { private static final String DATA = RECORD_ID + FIELD_SEPARATOR + CONTROLFIELD_VALUE + FIELD_SEPARATOR + FIELD1 + FIELD_SEPARATOR + FIELD2 + FIELD_SEPARATOR; - private static final String RECORD = RECORD_LABEL + DIRECTORY - + FIELD_SEPARATOR + DATA + RECORD_SEPARATOR; +// private static final String RECORD = RECORD_LABEL + DIRECTORY +// + FIELD_SEPARATOR + DATA + RECORD_SEPARATOR; + + // private static final String RECORD="03994nas a2200913 c 4500001001000000003000700010005001700017007000300034008004100037016002200078016001700100022001400117022001400131022001400145022001400159022001400173022001400187022001400201022001400215022001400229022001400243022001400257022001400271030001000285035002000295035002100315040002800336041000800364044001000372082002900382084001300411084002700424084004400451210002700495245011300522246000800635246002800643246001800671246002200689246005300711246003200764246004700796246003400843246002800877246003100905246004100936246001700977246002800994246003101022260003101053362001301084500003401097501033301131515003701464550003301501555010101534591004801635650005701683650005901740650005701799650005901856689005501915689005701970689001902027689005502046689005702101689001902158775007002177775008602247780007002333780008002403780007402483780010502557780008402662780006902746780008002815780009902895780008602994010000445DE-10120121114152453.0tu991118c19479999ne u||p|r ||| 0||||0eng c7 2DE-101a0100004457 2DE-600a60-7 a0006-3002 a0005-2728 a0005-2736 a0304-4165 a0167-4838 a1388-1981 a0167-4889 a0167-4781 a0304-419X a1570-9639 a0925-4439 a1874-9399 aBBACA a(DE-599)ZDB60-7 a(OCoLC)183277993 a9001bgercDE-101d0029 aeng cXA-NL74a570a540qDE-600222sdnb a122ssgn a570a540qDE-6002sdnb aHMZ bio 1031144/1145rotaWA 15000]2rvk10aBiochim. Biophys. Acta10aBiochimica et biophysica actabBBA ; international journal of biochemistry, biophysics and molecular biology13aBBA13aMolecular cell research13aBioenergetics13aReviews on cancer13aBiochimica et biophysica acta / General subjects13aLipids and lipid metabolism13aProtein structure and molecular enzymology13aGene structure and expression13aProteins and proteomics13aMolecular basis of disease13aMolecular and cell biology of lipids13aBiomembranes13aReviews on biomembranes13aGene regulatory mechanisms3 aAmsterdam [u.a.]bElsevier0 a1.1947 - aRepr.: New York, NY : Johnson aAb 1990 ersch. die früheren Unterreihen und neuere Sektionen wechselweise in einzelnen Bd.; neue Sektion ab 1990: Molecular basis of disease; ab 1998: Molecular and cell biology of lipids; ab 2002: Proteins and proteomics; Reviews in biomembranes ab 2002 enth. in Biomembranes; neue Sektion ab 2008: Gene regulatory mechanisms a1966 - 1967 auch Issue-Zählung aHrsg. früher: W.T. Astbury8 aIndex 1972/2000(2002) zur Unterreihe \"Reviews on biomembranes\" in: 1516.2002,1; 51/100.1961/65 - aAutopsie(keine 4245)Kopie bei 282711;s.Abl. 70(DE-588)4006891-20(DE-101)040068919aBiophysik2gnd 70(DE-588)4067488-50(DE-101)040674886aZeitschrift2gnd 70(DE-588)4006777-40(DE-101)040067777aBiochemie2gnd 70(DE-588)4067488-50(DE-101)040674886aZeitschrift2gnd000(DE-588)4006891-20(DE-101)040068919DsaBiophysik010(DE-588)4067488-50(DE-101)040674886DsaZeitschrift0 5DE-6005DE-600100(DE-588)4006777-40(DE-101)040067777DsaBiochemie110(DE-588)4067488-50(DE-101)040674886DsaZeitschrift1 5DE-6005DE-60008iCD-ROM-Ausg.tBBA on CD-ROMw(DE-600)1434539-0w(DE-101)01928629508iOnline-Ausg.tBiochimica et biophysica actaw(DE-600)1460387-1w(DE-101)01953770000iDarin aufgeg.tBioenergeticsw(DE-600)282711-6w(DE-101)01175997600iDarin aufgeg.tMolecular cell researchw(DE-600)283444-3w(DE-101)01176430900iDarin aufgeg.tReviews on cancerw(DE-600)192424-2w(DE-101)01107018800iDarin aufgeg.tBiochimica et biophysica acta / General subjectsw(DE-600)840755-1w(DE-101)01452479100iDarin aufgeg.tLipids and lipid metabolismw(DE-600)282393-7w(DE-101)01175787600iDarin aufgeg.tBiomembranesw(DE-600)282512-0w(DE-101)01175877500iDarin aufgeg.tReviews on biomembranesw(DE-600)195188-9w(DE-101)01109503200iDarin aufgeg.tProtein structure and molecular enzymologyw(DE-600)283435-2w(DE-101)01176425200iDarin aufgeg.tGene structure and expressionw(DE-600)283437-6w(DE-101)011764260"; + // private static final String RECORD= "02602pam a2200529 c 4500001001000000003000700010005001700017007000300034008004100037015003400078016002200112020005800134035002500192040003500217041000800252044001000260084001900270090000600289100006100295245012400356260011200480300002200592490008100614600007900695600007000774600007100844650005400915650007400969650005401043650007401097650005401171650007401225689007701299689005201376689007201428689001901500689006801519689005201587689007201639689001901711689006901730689005201799689007201851689001901923830012301942925000702065\u001E946638705\u001EDE-101\u001E20070429135622.0\u001Etu\u001E960123s2004 gw |||||r|||| 00||||eng \u001E \u001Fa05,A03,2104\u001Fz96,N47,0454\u001F2dnb\u001E7 \u001F2DE-101\u001Fa946638705\u001E \u001Fa0820431125\u001FcPp. : EUR 56.70, sfr 83.00\u001F90-8204-3112-5\u001E \u001Fa(DE-599)DNB946638705\u001E \u001Fa1140\u001Fbger\u001FcDE-101\u001Fd9999\u001Ferakwb\u001E \u001Faeng\u001E \u001FcXA-DE\u001E \u001Fa820\u001Fa890\u001F2sdnb\u001E \u001Fab\u001E1 \u001FaKim, Soonsik\u001F4aut\u001F0(DE-588a)11515454X\u001F0(DE-101)11515454X\u001E10\u001FaColonial and post-colonial discourse in the novels of Yo§am Sang-So§ap, Chinua Achebe and Salman Rushdie\u001FcSoonsik Kim\u001E3 \u001FaNew York\u001FaWashington, D.C./Baltimore\u001FaBern\u001FaFrankfurt am Main\u001FaBerlin\u001FaBrussels\u001FaVienna\u001FaOxford\u001FbLang\u001Fc2004\u001E \u001FaVI, 214 S.\u001Fc24 cm\u001E1 \u001FaComparative cultures and literatures\u001FvVol. 94201 Literaturverz. S. 197 - 207\u001E17\u001F0(DE-588c)4806527-4\u001F0(DE-101)129612111\u001F2swd\u001FaYo§am, Sang-so§ap\u001Fd1897-1963\u001E17\u001F0(DE-588c)4084672-6\u001F0(DE-101)118646680\u001F2swd\u001FaAchebe, Chinua\u001Fd1930-\u001E17\u001F0(DE-588c)4217069-2\u001F0(DE-101)118873520\u001F2swd\u001FaRushdie, Salman\u001Fd1947-\u001E 7\u001F0(DE-588c)4050479-7\u001F0(DE-101)040504794\u001F2swd\u001FaRoman\u001E 7\u001F0(DE-588c)4681702-5\u001F0(DE-101)964233509\u001F2swd\u001FaPostkolonialismus \u001E 7\u001F0(DE-588c)4050479-7\u001F0(DE-101)040504794\u001F2swd\u001FaRoman\u001E 7\u001F0(DE-588c)4681702-5\u001F0(DE-101)964233509\u001F2swd\u001FaPostkolonialismus \u001E 7\u001F0(DE-588c)4050479-7\u001F0(DE-101)040504794\u001F2swd\u001FaRoman\u001E 7\u001F0(DE-588c)4681702-5\u001F0(DE-101)964233509\u001F2swd\u001FaPostkolonialismus \u001E00\u001FAp\u001F0(DE-588c)4806527-4\u001F0(DE-101)129612111\u001FaYo§am, Sang-so§ap\u001Fd1897-1963\u001E01\u001FAs\u001F0(DE-588c)4050479-7\u001F0(DE-101)040504794\u001FaRoman\u001E02\u001FAs\u001F0(DE-588c)4681702-5\u001F0(DE-101)964233509\u001FaPostkolonialismus \u001E0 \u001F5DE-101\u001F5DE-101\u001E10\u001FAp\u001F0(DE-588c)4084672-6\u001F0(DE-101)118646680\u001FaAchebe, Chinua\u001Fd1930-\u001E11\u001FAs\u001F0(DE-588c)4050479-7\u001F0(DE-101)040504794\u001FaRoman\u001E12\u001FAs\u001F0(DE-588c)4681702-5\u001F0(DE-101)964233509\u001FaPostkolonialismus \u001E1 \u001F5DE-101\u001F5DE-101\u001E20\u001FAp\u001F0(DE-588c)4217069-2\u001F0(DE-101)118873520\u001FaRushdie, Salman\u001Fd1947-\u001E21\u001FAs\u001F0(DE-588c)4050479-7\u001F0(DE-101)040504794\u001FaRoman\u001E22\u001FAs\u001F0(DE-588c)4681702-5\u001F0(DE-101)964233509\u001FaPostkolonialismus \u001E2 \u001F5DE-101\u001F5DE-101\u001E 0\u001FaComparative cultures and literatures\u001FvVol. 94201 Literaturverz. S. 197 - 207\u001Fw(DE-101)025300458\u001Fw(DE-600)2126467-3\u001F919\u001Er \u001Fara\u001E\u001D"; private Marc21Decoder marc21Decoder; + private static String RECORD=""; @Mock - private StreamReceiver receiver; + private ObjectReceiver receiver; @Before public void setup() { MockitoAnnotations.initMocks(this); + HttpOpener httpOpener = new HttpOpener(); + LineReader lineReader = new LineReader(); + httpOpener.setReceiver(lineReader); marc21Decoder = new Marc21Decoder(); - marc21Decoder.setReceiver(receiver); + Marc21Encoder marc21Encoder = new Marc21Encoder(); + marc21Encoder.setReceiver(receiver); + marc21Decoder.setReceiver(marc21Encoder); + lineReader.setReceiver(marc21Decoder); + + httpOpener.process("https://raw.githubusercontent.com/gbv/Catmandu-Tutorial/master/data/marc.mrc"); + + // marc21Decoder.setReceiver(receiver); } @After @@ -78,28 +91,28 @@ public void cleanup() { public void shouldProcessMarc21Record() { marc21Decoder.process(RECORD); - final InOrder ordered = inOrder(receiver); - ordered.verify(receiver).startRecord(RECORD_ID); - ordered.verify(receiver).startEntity("leader"); - ordered.verify(receiver).literal("status", "n"); - ordered.verify(receiver).literal("type", "o"); - ordered.verify(receiver).literal("bibliographicLevel", "a"); - ordered.verify(receiver).literal("typeOfControl", " "); - ordered.verify(receiver).literal("characterCodingScheme", "a"); - ordered.verify(receiver).literal("encodingLevel", "z"); - ordered.verify(receiver).literal("catalogingForm", "u"); - ordered.verify(receiver).literal("multipartLevel", " "); - ordered.verify(receiver).endEntity(); - ordered.verify(receiver).literal("001", RECORD_ID); - ordered.verify(receiver).literal("002", CONTROLFIELD_VALUE); - ordered.verify(receiver).startEntity("100AB"); - ordered.verify(receiver).literal("1", "value1"); - ordered.verify(receiver).endEntity(); - ordered.verify(receiver).startEntity("200CD"); - ordered.verify(receiver).literal("2", "value2"); - ordered.verify(receiver).literal("3", "value3"); - ordered.verify(receiver).endEntity(); - ordered.verify(receiver).endRecord(); + // final InOrder ordered = inOrder(receiver); +// ordered.verify(receiver).startRecord(RECORD_ID); +// ordered.verify(receiver).startEntity("leader"); +// ordered.verify(receiver).literal("status", "n"); +// ordered.verify(receiver).literal("type", "o"); +// ordered.verify(receiver).literal("bibliographicLevel", "a"); +// ordered.verify(receiver).literal("typeOfControl", " "); +// ordered.verify(receiver).literal("characterCodingScheme", "a"); +// ordered.verify(receiver).literal("encodingLevel", "z"); +// ordered.verify(receiver).literal("catalogingForm", "u"); +// ordered.verify(receiver).literal("multipartLevel", " "); +// ordered.verify(receiver).endEntity(); +// ordered.verify(receiver).literal("001", RECORD_ID); +// ordered.verify(receiver).literal("002", CONTROLFIELD_VALUE); +// ordered.verify(receiver).startEntity("100AB"); +// ordered.verify(receiver).literal("1", "value1"); +// ordered.verify(receiver).endEntity(); +// ordered.verify(receiver).startEntity("200CD"); +// ordered.verify(receiver).literal("2", "value2"); +// ordered.verify(receiver).literal("3", "value3"); +// ordered.verify(receiver).endEntity(); +// ordered.verify(receiver).endRecord(); } @Test