Skip to content

Commit

Permalink
add marcxml_reader & save2marcxml
Browse files Browse the repository at this point in the history
  • Loading branch information
klinga committed Mar 18, 2024
1 parent 75a4e4b commit 8f0d341
Show file tree
Hide file tree
Showing 2 changed files with 28 additions and 2 deletions.
15 changes: 14 additions & 1 deletion google_books/marc_manipulator.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from typing import Optional, Iterator
import warnings

from pymarc import MARCReader, Record, Field, Subfield
from pymarc import MARCReader, Record, Field, Subfield, XMLWriter, parse_xml_to_array

from google_books.utils import fh_date

Expand Down Expand Up @@ -82,3 +82,16 @@ def fix_oclc_info(bib: Record) -> Optional[str]:
f"Unable to manipulate bib ({bibno}). No suitable OCLC # was found in bib."
)
return None


def marcxml_reader(fh: str) -> Iterator[Record]:
reader = parse_xml_to_array(fh)
for bib in reader:
yield bib


def save2marcxml(fh: str, bibs: list[Record]) -> None:
writer = XMLWriter(open(fh, "ab"))
for bib in bibs:
writer.write(bib)
writer.close()
15 changes: 14 additions & 1 deletion tests/test_marc_manipulator.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,12 @@

from pymarc import Record, Field, Subfield

from google_books.marc_manipulator import find_oclcno, fix_oclc_info, get_bibs
from google_books.marc_manipulator import (
find_oclcno,
fix_oclc_info,
get_bibs,
marcxml_reader,
)


@pytest.mark.parametrize("arg,expectation", [("(OCoLC)1234", "1234"), ("1234", None)])
Expand Down Expand Up @@ -103,3 +108,11 @@ def test_get_bibs_yield_record_sequence_in_file():
n, bib = next(reader)
assert n == 1
assert isinstance(bib, Record)


def test_marcxml_reader():
test_file = "tests/marcxml-sample.xml"
bibs = marcxml_reader(test_file)
for n, bib in enumerate(bibs): # n starts with 0!
assert isinstance(bib, Record)
assert n == 1

0 comments on commit 8f0d341

Please sign in to comment.