Skip to content

Commit

Permalink
Merge pull request #1 from BookOps-CAT/monthly-run
Browse files Browse the repository at this point in the history
Monthly run
  • Loading branch information
charlottekostelic authored Apr 8, 2024
2 parents 7b8f75f + ccc0c89 commit cda0b00
Show file tree
Hide file tree
Showing 20 changed files with 343 additions and 227 deletions.
48 changes: 48 additions & 0 deletions .github/workflows/monthly-run.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
name: monthly lcsh check
on:
push:
branches:
- main
pull_request:
branches:
- main
# on:
# schedule:
# - chron: * 9 1 * *
jobs:
monthly-run:
name: Run monthly LCSH check
runs-on: ubuntu-latest
permissions:
issues: write
steps:
- uses: actions/checkout@v4

- name: Set up Python 3.12
uses: actions/setup-python@v5
with:
python-version: "3.12"

- name: Install dependencies
run: |
python -m pip install --upgrade pip
python -m pip install -r dev-requirements.txt
python -m pip install -r requirements.txt
- name: Create branch
run: git checkout -b lcsh_check

- name: Run script
run: |
python monthly_lcsh_check.py
if [[]]
- name: Commit and push changed files
run:
git config user.name "Charlotte Kostelic"
git config user.email "[email protected]"
git add lcsh.log
git diff data/
git add data/*
git commit -a -m "Monthly LCSH Check"
git push
30 changes: 8 additions & 22 deletions .github/workflows/unit-tests.yaml
Original file line number Diff line number Diff line change
@@ -1,39 +1,25 @@
name: tests
on:
push:
branches:
- main
pull_request:
branches:
- main
on: [push, pull_request]
jobs:
test:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: ["3.12"]
steps:
- uses: actions/checkout@v4
- name: Set up Python

- name: Set up Python 3.12
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version}}
python-version: "3.12"

- name: Install dependencies
run: |
python -m pip install --upgrade pip
python -m pip install -r dev-requirements.txt
- name: Run tests
run: python -m pytest --cov=acc_lcsh_check/

- name: Send report to Coveralls
uses: AndreMiras/coveralls-python-action@develop
with:
parallel: true
github-token: ${{ secrets.GITHUB_TOKEN}}
finish:
needs: test
runs-on: ubuntu-latest
steps:
- name: Coveralls Finished
uses: AndreMiras/coveralls-python-action@develop
with:
parallel-finished: true
github-token: ${{ secrets.GITHUB_TOKEN}}
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,10 @@ dmypy.json

# localfiles/
temp/
tests/data/*
!tests/data/*_in.csv
!lcsh.log


# OSX
.DS_Store
48 changes: 0 additions & 48 deletions acc_lcsh_check/checker.py

This file was deleted.

41 changes: 36 additions & 5 deletions acc_lcsh_check/lcsh.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,25 +3,46 @@


class LCTerm:
def __init__(self, id: str, old_heading: str, id_type: str):
"""
A class that defines a LC subject heading.
"""

def __init__(self, id: str, old_heading: str) -> None:
self.id = id
self.old_heading = old_heading
self.id_type = id_type
self.format = ".skos.json"
self.url = "https://id.loc.gov/authorities/"

self._get_id_type()

self.query = f"{self.url + self.id_type + '/' + self.id}"

self._get_skos_json()
self._get_current_heading()
self._get_changes()
self._compare_headings()

def _get_id_type(self):
if self.id[:2] == "sh":
self.id_type = "subjects"
elif self.id[:2] == "dg":
self.id_type = "demographicTerms"
elif self.id[:1] == "n":
self.id_type = "names"

def _get_skos_json(self):
"""
Send request to id.loc.gov and get the response in .skos.json format.
"""

skos_json_response = requests.get(f"{self.query + self.format}")
self.skos_json = skos_json_response.json()
return self.skos_json

def _get_current_heading(self):
"""
Parse response from id.loc.gov and get current heading.
"""
for item in self.skos_json:
if "id.loc.gov/authorities/" in item["@id"]:
if "http://www.w3.org/2004/02/skos/core#prefLabel" in item:
Expand All @@ -34,6 +55,10 @@ def _get_current_heading(self):
][0]["@value"]

def _get_changes(self):
"""
Parse response from id.loc.gov and determine if record has been changed
in last month or if it is deprecated.
"""
today = datetime.datetime.now()
self.changes = []
for item in self.skos_json:
Expand All @@ -60,17 +85,23 @@ def _get_changes(self):
change_date = datetime.datetime.strptime(
change["change_date"], "%Y-%m-%dT%H:%M:%S"
)
if change_date >= today - datetime.timedelta(days=30):
if change_date >= today - datetime.timedelta(days=31):
self.recent_change = True
else:
self.recent_change = False
if "deprecated" in change["change_reason"]:
self.is_deprecated = True
self.deprecated_date = change_date
else:
self.is_deprecated = False
self.deprecated_date = None

def _compare_headings(self):
"""
Sometimes headings are marked as revised in id.loc.gov without changing the
heading. This function checks if the heading is the same as the ACC term.
"""
if str(self.current_heading).lower() != str(self.old_heading).lower():
self.check_heading = True
self.revised_heading = True
else:
self.check_heading = False
self.revised_heading = False
80 changes: 80 additions & 0 deletions acc_lcsh_check/log.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
import csv
import os
import datetime
import logging
import logging.handlers
from acc_lcsh_check.lcsh import LCTerm


class LogSession:
def __init__(self, logger_name: str, logfile: str, infile: str, outfile: str):
self.logger_name = logger_name
self.logfile = logfile
self.infile = infile
self.outfile = outfile
self.logger = logging.getLogger(logger_name)
self.logger_handler = logging.handlers.RotatingFileHandler(
logfile, encoding="utf-8", delay=True
)
self.formatter = logging.Formatter("%(asctime)s - %(levelname)s - %(message)s")

self.logger_handler.setFormatter(self.formatter)
self.logger.setLevel(logging.DEBUG)
self.logger.addHandler(self.logger_handler)
self._check_terms()

def _check_terms(self):
deprecated_terms = []
revised_terms = []
current_terms = []

with open(self.infile, "r") as csvfile:
reader = csv.reader(csvfile)
for term in reader:
loc = LCTerm(
id=f"{term[1].strip('" ')}", old_heading=f"{term[0].strip(' "')}"
)
if loc.is_deprecated is True:
deprecated_terms.append(loc.id)
elif loc.revised_heading is True:
revised_terms.append(loc.id)
else:
current_terms.append(f'"{loc.old_heading}", "{loc.id}"')
self.session_data = {
"deprecated_terms": deprecated_terms,
"revised_terms": revised_terms,
"current_terms": current_terms,
}
return self.session_data

def run_logger(self):
if os.path.exists(f"{self.outfile}"):
os.remove(f"{self.outfile}")

with open(self.outfile, "a") as out:
for term in self.session_data["current_terms"]:
out.write(f'{term}\n')

self.logger.info("Checking id.loc.gov")
if len(self.session_data["deprecated_terms"]) == 0 and len(
self.session_data["revised_terms"]
) == 0:
self.logger.info("No changes to ACC terms this month.")
self.logger.info(f"Updated list of current terms is in {self.outfile}")
elif len(self.session_data["revised_terms"]) == 0:
self.logger.debug(
f'Deprecated terms to check: {self.session_data["deprecated_terms"]}'
)
self.logger.info(f"Updated list of current terms is in {self.outfile}")
else:
self.logger.debug(
f"Revised terms to check: {self.session_data["revised_terms"]}"
)
self.logger.info(f"Updated list of current terms is in {self.outfile}")

def rename_files(self):
today = datetime.datetime.strftime(datetime.datetime.now(), "%Y%m%d")
if os.path.exists(f"{self.infile.split('.')[0]}_{today}.csv"):
os.remove(f"{self.infile.split('.')[0]}_{today}.csv")
os.rename(self.infile, f"{self.infile.split('.')[0]}_{today}.csv")
os.rename(self.outfile, f"{self.infile}")
69 changes: 69 additions & 0 deletions data/acc_in.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
"Abnormalities, human", "sh85000182"
"Amputees", "sh85004669"
"Asperger's syndrome", "sh91001353"
"Autism", "sh85010038"
"Autistic youth", "sh92001053"
"Blind", "sh85014855"
"Cerebral palsied", "sh85022096"
"Chronically ill", "sh85025397"
"Deaf", "sh85036047"
"Down syndrome", "sh85039232"
"Drug addicts", "sh85089803"
"Ex-drug addicts", "sh2007000408"
"Hearing impaired", "sh85059626"
"Insanity (Law)", "sh85066558"
"Learning disabled", "sh91000027"
"Selective mutism", "sh85089143"
"People with mental disabilities", "sh85083667"
"Terminally ill", "sh85134022"
"Terminally ill children", "sh85134025"
"Traffic accident victims", "sh94000400"
"Indians of North America", "sh85065184"
"Shamans", "sh85121087"
"Shamanism", "sh85121085"
"Delaware Indians", "sh85036546"
"Algonquian Indians", "sh85003482"
"Indian women", "sh93007784"
"Indian children", "sh93008500"
"Indian art", "sh85065027"
"Eskimos", "sh85044824"
"Indians in motion pictures", "sh85065084"
"Indian youth", "sh93007384"
"Indians in literature", "sh85065082"
"Indians of North America--Ethnobotany", "sh85065259"
"Indian reservations", "sh85065369"
"Female impersonators", "sh85064634"
"Female-to-male transsexuals", "sh2002011161"
"Homosexuality", "sh85061780"
"Intersexuality", "sh85060401"
"Lesbianism", "sh85076157"
"Lesbian couples as parents", "sh2010012672"
"Lesbian teenagers", "sh93005205"
"Man-woman relationships", "sh92001504"
"Minority lesbians", "sh98007820"
"Parents of sexual minority youth", "sh2004011607"
"Sexual minorities", "sh2004003385"
"Transsexualism", "sh94005829"
"Transsexuals", "sh85137086"
"Multiple personality", "sh85088368"
"Deviant behavior", "sh85037372"
"Cutting (Self-mutilation)", "sh2008003537"
"Self-mutilation", "sh85119771"
"Self-mutilation in adolescence", "sh2006008443"
"Schizophrenics", "sh85118170"
"Paranoid Schizophrenia", "sh87001886"
"Depression, mental", "sh85037053"
"Race", "sh85110232"
"Racism", "sh85110266"
"Race Awareness", "sh85110234"
"Race Discrimination", "sh85110237"
"Race Relations", "sh85110249"
"Minorities", "sh85085792"
"Indians of North America", "sh85065184"
"Indigenous Peoples", "sh85090174"
"African Americans", "sh85001932"
"Hispanic Americans", "sh85061051"
"White People", "sh85146547"
"Intercultural Communication", "sh85067222"
"Immigrants", "sh85064517"
"Racially mixed People", "sh91005403"
Loading

0 comments on commit cda0b00

Please sign in to comment.