Skip to content

Commit

Permalink
fix number of rejected items in Google reconciliation report (dedup)
Browse files Browse the repository at this point in the history
  • Loading branch information
klinga committed Aug 26, 2024
1 parent 2e8732e commit 9daa4b5
Showing 1 changed file with 5 additions and 4 deletions.
9 changes: 5 additions & 4 deletions google_books/hathi_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,21 +95,21 @@ def parse_hathi_processing_report(fh: str) -> None:

def google_reconciliation_to_barcodes_lst(fh: str) -> list[str]:
"""
Parses Google's *FOreconciled.txt report and returns a list
Parses Google's *FOreconciled.txt report and returns a list. The list is deduped.
Args:
fh: path to google reconciliation report
Returns:
A list of rejected barcodes
"""
barcodes = []
barcodes = set()
with open(fh, "r") as report:
reader = csv.reader(report)
next(reader)
for row in reader:
barcodes.append(row[0])
return barcodes
barcodes.add(row[0])
return list(barcodes)


def clean_metadata_for_hathi_submission(
Expand All @@ -124,6 +124,7 @@ def clean_metadata_for_hathi_submission(
google_report: path to google reconciliation FO report
"""
rejected_barcodes = google_reconciliation_to_barcodes_lst(google_report)
print(f"Rejected {len(rejected_barcodes)} barcode(s).")
bibs = marcxml_reader(marcxml)

bibs2keep = []
Expand Down

0 comments on commit 9daa4b5

Please sign in to comment.