-
Notifications
You must be signed in to change notification settings - Fork 50
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
cbeb30b
commit 6c70246
Showing
4 changed files
with
107 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
A set of scripts to add isbn_13 values to editions with IA/ocaid references containing one. | ||
### How To Use | ||
```bash | ||
# Find Editions with IA ISBN, but no ISBN 13 | ||
./find_editions_with_isbnianot13.sh /path/to/ol_dump.txt.gz /path/to/filtered_dump.txt.gz | ||
# Add ISBN 13s converted from the ia ocaid source | ||
python isbn_ia_to_13.py --dump_path=/path/to/filtered_dump.txt.gz --dry_run=<bool> --limit=<init> | ||
``` | ||
If `dry_run` is True, the script will run as normal, but no changes will be saved to OpenLibrary. | ||
This is for debugging purposes. By default, `dry_run` is `True`. | ||
`limit` is the maximum number of changes to OpenLibrary that will occur before the script quits. | ||
By default, `limit` is set to `1`. Setting `limit` to `0` allows unlimited edits. | ||
A log is automatically generated whenever `isbn_ia_to_13.py` executes. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
#!/bin/bash | ||
|
||
if [[ -z $1 ]] | ||
then | ||
echo "No dump file provided" | ||
exit 1 | ||
fi | ||
if [[ -z $2 ]] | ||
then | ||
echo "No output file provided" | ||
exit 1 | ||
fi | ||
|
||
OL_DUMP=$1 | ||
OUTPUT=$2 | ||
|
||
zgrep ^/type/edition $OL_DUMP | | ||
grep -E '"ia:isbn_\d{13}"' | | ||
grep -v -E '"isbn_13":' | | ||
grep -v -E '"isbn_10"' | | ||
pv | | ||
gzip > $OUTPUT |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,70 @@ | ||
""" | ||
BWB isbn ref to isbn 13 | ||
NOTE: This script ideally works on an Open Library Dump that only contains editions with an BWB isbn ref and no isbn_13 | ||
""" | ||
import gzip | ||
import json | ||
import re | ||
|
||
import isbnlib | ||
import olclient | ||
|
||
|
||
class ConvertISBNiato13Job(olclient.AbstractBotJob): | ||
def run(self) -> None: | ||
"""Looks for any IA ISBN to convert to 13""" | ||
self.write_changes_declaration() | ||
header = {"type": 0, "key": 1, "revision": 2, "last_modified": 3, "JSON": 4} | ||
comment = "extract ISBN 13 from IA source_record" | ||
with gzip.open(self.args.file, "rb") as fin: | ||
for row_num, row in enumerate(fin): | ||
row = row.decode().split("\t") | ||
_json = json.loads(row[header["JSON"]]) | ||
if _json["type"]["key"] != "/type/edition": | ||
continue | ||
|
||
if hasattr(_json, "isbn_13"): | ||
# we only update editions with no existing isbn 13s (for now at least) | ||
continue | ||
|
||
if "source_records" in _json: | ||
source_records = _json.get("source_records", None) | ||
else: | ||
continue | ||
regex = "ia:isbn_[0-9]{13}" | ||
isbn_13 = False | ||
for source_record in source_records: | ||
if re.fullmatch(regex, source_record): | ||
isbn_13 = source_record[8:] | ||
break | ||
|
||
if not isbn_13: | ||
continue | ||
|
||
if not isbnlib.is_isbn13(isbn_13): | ||
continue | ||
|
||
olid = _json["key"].split("/")[-1] | ||
edition = self.ol.Edition.get(olid) | ||
if edition.type["key"] != "/type/edition": | ||
continue | ||
|
||
if hasattr(edition, "isbn_13"): | ||
# don't update editions that already have an isbn 13 | ||
continue | ||
|
||
isbns_13 = [isbn_13] | ||
|
||
setattr(edition, "isbn_13", isbns_13) | ||
self.logger.info("\t".join([olid, source_record, str(isbns_13)])) | ||
self.save(lambda: edition.save(comment=comment)) | ||
|
||
|
||
if __name__ == "__main__": | ||
job = ConvertISBNiato13Job() | ||
|
||
try: | ||
job.run() | ||
except Exception as e: | ||
job.logger.exception(e) | ||
raise e |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
openlibrary-client==0.0.30 | ||
isbnlib==3.10.14 |