Skip to content

Commit

Permalink
Merge pull request #30 from mlibrary/remediated-sh-headings
Browse files Browse the repository at this point in the history
Remediated sh headings
  • Loading branch information
niquerio authored Aug 26, 2024
2 parents 0112f2b + 10841e9 commit ece432f
Show file tree
Hide file tree
Showing 28 changed files with 699 additions and 37 deletions.
39 changes: 39 additions & 0 deletions .github/workflows/update-sh-config.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
name: Update subject headings config file

on:
workflow_dispatch:
schedule:
- cron: '0 8 1 * *' #8AM first of the month


jobs:
update_subject_headings:
runs-on: ubuntu-latest
outputs:
sha: ${{ steps.cpr.outputs.pull-request-head-sha }}
steps:
- uses: actions/checkout@v4
- name: Create .env file
run: cat env.* > .env
- name: Load .env file
uses: xom9ikk/dotenv@v2
- name: Set up Ruby 3.3
uses: ruby/setup-ruby@v1
with:
ruby-version: '3.3'
bundler-cache: true
- name: set path
run: |
echo "$GITHUB_WORKSPACE/exe" >> $GITHUB_PATH
- name: get update
env:
ALMA_API_KEY: ${{ secrets.ALMA_API_KEY }}
SUBJECT_HEADING_REMEDIATION_SET_ID: ${{ vars.SUBJECT_HEADING_REMEDIATION_SET_ID }}
run: browse subjects generate_remediated_authorities_file
- name: Create Pull Request
id: cpr
uses: peter-evans/create-pull-request@v6
with:
commit-message: "update remediated subject headings config file"
title: Update remediated subject headings config file
reviewers: niquerio
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ authority_browse.zip
reports/
*.db
.solargraph.yml
*.sql

tmp/*
!tmp/.keep
2 changes: 2 additions & 0 deletions Gemfile
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,9 @@ gem "canister"
gem "rubyzip"
gem "semantic_logger"
gem "thor"
gem "marc"
gem "solr_cloud-connection", ">= 0.4.0"
gem "alma_rest_client", github: "mlibrary/alma_rest_client", tag: "v2.0.0"

gem "sqlite3", "~> 1.4", platforms: :mri
gem "jdbc-sqlite3", "~> 3.28", platforms: :jruby
Expand Down
42 changes: 42 additions & 0 deletions Gemfile.lock
Original file line number Diff line number Diff line change
@@ -1,6 +1,28 @@
GIT
remote: https://github.com/mlibrary/alma_rest_client.git
revision: 9606225d82480b6d1568902813ae9018dd8c1acc
tag: v2.0.0
specs:
alma_rest_client (2.0.0)
activesupport (~> 7.0, >= 4.2)
faraday
faraday-retry
httpx
rexml

GEM
remote: https://rubygems.org/
specs:
activesupport (7.1.3.4)
base64
bigdecimal
concurrent-ruby (~> 1.0, >= 1.0.2)
connection_pool (>= 2.2.5)
drb
i18n (>= 1.6, < 2)
minitest (>= 5.1)
mutex_m
tzinfo (~> 2.0)
addressable (2.8.6)
public_suffix (>= 2.0.2, < 6.0)
ast (2.4.2)
Expand All @@ -10,30 +32,42 @@ GEM
canister (0.9.2)
coderay (1.1.3)
concurrent-ruby (1.2.2)
connection_pool (2.4.1)
crack (0.4.5)
rexml
diff-lcs (1.5.0)
docile (1.4.0)
dotenv (2.8.1)
drb (2.2.1)
faraday (2.7.12)
base64
faraday-net_http (>= 2.0, < 3.1)
ruby2_keywords (>= 0.0.4)
faraday-follow_redirects (0.3.0)
faraday (>= 1, < 3)
faraday-net_http (3.0.2)
faraday-retry (2.2.1)
faraday (~> 2.0)
ffi (1.16.3)
ffi-icu (0.5.2)
ffi (~> 1.0, >= 1.0.9)
hashdiff (1.0.1)
http-2-next (1.0.1)
httpx (1.1.5)
http-2-next (>= 1.0.1)
i18n (1.14.5)
concurrent-ruby (~> 1.0)
json (2.7.1)
language_server-protocol (3.17.0.3)
lint_roller (1.1.0)
marc (1.2.0)
rexml
scrub_rb (>= 1.0.1, < 2)
unf
method_source (1.0.0)
milemarker (1.0.0)
minitest (5.24.1)
mutex_m (0.2.0)
mysql2 (0.5.5)
parallel (1.23.0)
parser (3.2.2.4)
Expand Down Expand Up @@ -80,6 +114,7 @@ GEM
ruby-progressbar (1.13.0)
ruby2_keywords (0.0.5)
rubyzip (2.3.2)
scrub_rb (1.0.1)
semantic_logger (4.15.0)
concurrent-ruby (~> 1.0)
sequel (5.75.0)
Expand Down Expand Up @@ -110,6 +145,11 @@ GEM
standardrb (1.0.1)
standard
thor (1.3.0)
tzinfo (2.0.6)
concurrent-ruby (~> 1.0)
unf (0.1.4)
unf_ext
unf_ext (0.0.9.1)
unicode-display_width (2.5.0)
webmock (3.19.1)
addressable (>= 2.8.0)
Expand All @@ -121,6 +161,7 @@ PLATFORMS
x86_64-linux

DEPENDENCIES
alma_rest_client!
byebug
canister
concurrent-ruby (~> 1.1)
Expand All @@ -130,6 +171,7 @@ DEPENDENCIES
ffi-icu
httpx
jdbc-sqlite3 (~> 3.28)
marc
milemarker (~> 1.0)
mysql2
pry (~> 0.14)
Expand Down
2 changes: 1 addition & 1 deletion compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@ services:
- .:/app
- gem_cache:/gems
env_file:
- .env
- env.development
- .env
command: "tail -f /dev/null"

database:
Expand Down
12 changes: 12 additions & 0 deletions conf/remediated_subjects.xml

Large diffs are not rendered by default.

5 changes: 5 additions & 0 deletions docs/remediated_subject_headings_flowchart.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
```mermaid
flowchart TD
A[Set up Subject Authorities DB] --> B[Iterate over remediated authority records\nto add remediated headings to subjects table]
B --> C[Iterate over remediated authority records again.\n Add see_instead xrefs and broader/narrower xrefs]
```
2 changes: 2 additions & 0 deletions env.example
Original file line number Diff line number Diff line change
@@ -1 +1,3 @@
BIBLIO_SOLR="http://YOUR_SOLR_URL/solr/biblio"
ALMA_API_KEY="YOUR_API_KEY"
SUBJECT_HEADING_REMEDIATION_SET_ID="YOUR_SET_ID"
2 changes: 1 addition & 1 deletion exe/browse
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#!/usr/bin/env ruby
$LOAD_PATH.unshift("/app/lib")
$LOAD_PATH.unshift("#{File.dirname(__FILE__)}/../lib")

require "bundler/setup"
require "browse"
Expand Down
3 changes: 3 additions & 0 deletions lib/authority_browse.rb
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
require "byebug"
require "services"
require "concurrent"
require "alma_rest_client"
require "marc"

module AuthorityBrowse
end
Expand All @@ -19,3 +21,4 @@ module AuthorityBrowse
require "authority_browse/base"
require "authority_browse/names"
require "authority_browse/subjects"
require "authority_browse/remediated_subjects"
163 changes: 163 additions & 0 deletions lib/authority_browse/remediated_subjects.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,163 @@
module AuthorityBrowse
class RemediatedSubjects
include Enumerable

# List of RemediatedSubjects::Entriees
# @param file_path [String] Path to config file with remediated subjects
# info
def initialize(file_path = S.remediated_subjects_file)
xml_lines = File.readlines(file_path)
@entries = xml_lines.map do |line|
Entry.new(line)
end
end

def each(&block)
@entries.each(&block)
end

class Entry
# An Authority Record Entry
# @param xml [String] Authority Record MARCXML String
def initialize(xml)
@record = MARC::XMLReader.new(StringIO.new(xml)).first
end

def id
@record["001"].value
end

def preferred_term
@preferred_term ||= Term::Preferred.new(@record["150"])
end

# Returns the cross references found in the 450 and 550 fields
# @return [Array<Term>] An Array of xref terms
def xrefs
@record.fields(["450", "550"]).map do |field|
[Term::SeeInstead, Term::Broader, Term::Narrower].find do |kind|
kind.match?(field)
end&.new(field)
end.compact
end

# Adds the preferred term and xrefs to the subjects and subjects_xrefs
# db tables
# @return [Nil]
def add_to_db
preferred_term.add_to_db(id)
xrefs.each do |xref|
xref.add_to_db(id)
end
end
end

class Term
# @param field [MARC::DataField] A subject term field
def initialize(field)
@field = field
end

# What kind of field it is. It's used for setting the xref_kind in the subjects_xrefs table.
def kind
raise NotImplementedError
end

# This is the first step in adding the xref to term to the database. It's
# overwritten for a PreferredTerm. The check for id and match_text is to
# make sure the id isn't already in the db. If the id given is the match
# text that means the term isn't in the db.
#
# @param preferred_term_id [[TODO:type]] [TODO:description]
def add_to_db(preferred_term_id)
if id == match_text
AuthorityBrowse.db[:subjects].insert(id: id, label: label, match_text: match_text, deprecated: false)
end
end

# @return [String]
def label
@field.subfields
.filter_map do |x|
x.value if ["a", "v", "x", "y", "z"].include?(x.code)
end
.join("--")
end

# @return [String]
def match_text
AuthorityBrowse::Normalize.match_text(label)
end

# @return [String]
def id
AuthorityBrowse.db[:subjects]&.first(match_text: match_text)&.dig(:id) || match_text
end

class Preferred < Term
# Adds the preferred term to the db
#
# @return nil
def add_to_db(id)
AuthorityBrowse.db[:subjects].insert(id: id, label: label, match_text: match_text, deprecated: false)
end
end

class SeeInstead < Term
def self.match?(field)
field.tag == "450"
end

def kind
"see_instead"
end

# @param preferred_term_id [String]
# @return [Nil]
def add_to_db(preferred_term_id)
super
xrefs = AuthorityBrowse.db[:subjects_xrefs]
xrefs.insert(subject_id: id, xref_id: preferred_term_id, xref_kind: kind)
end
end

class Broader < Term
def self.match?(field)
field.tag == "550" && field["w"] == "g"
end

def kind
"broader"
end

# @param preferred_term_id [String]
# @return [Nil]
def add_to_db(preferred_term_id)
super
xrefs = AuthorityBrowse.db[:subjects_xrefs]
xrefs.insert(subject_id: preferred_term_id, xref_id: id, xref_kind: kind)
xrefs.insert(subject_id: id, xref_id: preferred_term_id, xref_kind: "narrower")
end
end

class Narrower < Term
def self.match?(field)
field.tag == "550" && field["w"] == "h"
end

def kind
"narrower"
end

# @param preferred_term_id [String]
# @return [Nil]
def add_to_db(preferred_term_id)
super
xrefs = AuthorityBrowse.db[:subjects_xrefs]
xrefs.insert(subject_id: preferred_term_id, xref_id: id, xref_kind: kind)
xrefs.insert(subject_id: id, xref_id: preferred_term_id, xref_kind: "broader")
end
end
end
end
end
8 changes: 4 additions & 4 deletions lib/authority_browse/solr_document/authority_graph.rb
Original file line number Diff line number Diff line change
Expand Up @@ -33,9 +33,9 @@ def term

# Library of Congress ID
#
# @return [String]
# @return [String] if the id is a valid one
def loc_id
first[:id]
first[:id] if first[:id]&.match?("loc.gov")
end

# @return [Integer]
Expand All @@ -52,10 +52,10 @@ def xrefs
@xrefs.map do |xref|
[
xref, @data.filter_map do |x|
xref_count = x[:xref_count]
xref_count = x[:xref_count].to_i
output = "#{x[:xref_label]}||#{xref_count}"
if @kind.to_s == "name"
output unless xref_count.nil? || xref_count == 0
output unless xref_count == 0
elsif x[:xref_kind] == xref.to_s
output
end
Expand Down
Loading

0 comments on commit ece432f

Please sign in to comment.