From 8f0d341e67cfe96e5261b6a7c7e2adddfeeba7ad Mon Sep 17 00:00:00 2001 From: klinga Date: Sun, 17 Mar 2024 23:00:39 -0400 Subject: [PATCH] add marcxml_reader & save2marcxml --- google_books/marc_manipulator.py | 15 ++++++++++++++- tests/test_marc_manipulator.py | 15 ++++++++++++++- 2 files changed, 28 insertions(+), 2 deletions(-) diff --git a/google_books/marc_manipulator.py b/google_books/marc_manipulator.py index 5eb3bf6..85f98cf 100644 --- a/google_books/marc_manipulator.py +++ b/google_books/marc_manipulator.py @@ -1,7 +1,7 @@ from typing import Optional, Iterator import warnings -from pymarc import MARCReader, Record, Field, Subfield +from pymarc import MARCReader, Record, Field, Subfield, XMLWriter, parse_xml_to_array from google_books.utils import fh_date @@ -82,3 +82,16 @@ def fix_oclc_info(bib: Record) -> Optional[str]: f"Unable to manipulate bib ({bibno}). No suitable OCLC # was found in bib." ) return None + + +def marcxml_reader(fh: str) -> Iterator[Record]: + reader = parse_xml_to_array(fh) + for bib in reader: + yield bib + + +def save2marcxml(fh: str, bibs: list[Record]) -> None: + writer = XMLWriter(open(fh, "ab")) + for bib in bibs: + writer.write(bib) + writer.close() diff --git a/tests/test_marc_manipulator.py b/tests/test_marc_manipulator.py index e8a1ade..da05e46 100644 --- a/tests/test_marc_manipulator.py +++ b/tests/test_marc_manipulator.py @@ -2,7 +2,12 @@ from pymarc import Record, Field, Subfield -from google_books.marc_manipulator import find_oclcno, fix_oclc_info, get_bibs +from google_books.marc_manipulator import ( + find_oclcno, + fix_oclc_info, + get_bibs, + marcxml_reader, +) @pytest.mark.parametrize("arg,expectation", [("(OCoLC)1234", "1234"), ("1234", None)]) @@ -103,3 +108,11 @@ def test_get_bibs_yield_record_sequence_in_file(): n, bib = next(reader) assert n == 1 assert isinstance(bib, Record) + + +def test_marcxml_reader(): + test_file = "tests/marcxml-sample.xml" + bibs = marcxml_reader(test_file) + for n, bib in enumerate(bibs): # n starts with 0! + assert isinstance(bib, Record) + assert n == 1