Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Sorting and docs #23

Merged
merged 2 commits into from
Dec 18, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 0 additions & 51 deletions database_strategy.txt

This file was deleted.

6 changes: 6 additions & 0 deletions lib/authority_browse/db/names.rb
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
module AuthorityBrowse
class DB
class Names < AuthorityBrowse::DB
# Tables for names for AuthorityBrowse
#
# @return [Hash]
def self.database_definitions
{
names: proc do
Expand All @@ -23,6 +26,9 @@ def self.database_definitions
}
end

# Sets indexes on the :names and :names_see_also tables
#
# @return [Nil]
def self.set_names_indexes!
AuthorityBrowse.db.alter_table(:names) do
add_index :id
Expand Down
6 changes: 6 additions & 0 deletions lib/authority_browse/db/subjects.rb
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
module AuthorityBrowse
class DB
class Subjects < AuthorityBrowse::DB
# Tables for subjects for AuthorityBrowse
#
# @return [Hash]
def self.database_definitions
{
subjects: proc do
Expand All @@ -24,6 +27,9 @@ def self.database_definitions
}
end

# Sets indexes on the :subjects and :subjects_xrefs tables
#
# @return [Nil]
def self.set_subjects_indexes!
AuthorityBrowse.db.alter_table(:subjects) do
add_index :id
Expand Down
17 changes: 17 additions & 0 deletions lib/authority_browse/db_mutator/base.rb
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,20 @@ module AuthorityBrowse
class DBMutator
class Base
class << self
# Sets count to 0 in the main table.
#
# @return [Nil]
def zero_out_counts
AuthorityBrowse.db.transaction do
AuthorityBrowse.db[main_table].update(count: 0)
end
end

# Updates the main table with counts from the from_biblio table.
# The match between the tables happens on the `match_text` fields
# in both tables.
#
# @return [Nil]
def update_main_with_counts
statement = <<~SQL.strip
UPDATE #{main_table} AS m
Expand All @@ -20,6 +28,10 @@ def update_main_with_counts
AuthorityBrowse.db.run(statement)
end

# Removes deprecated terms in the main table when there is an
# undeprecated term with the same match text.
#
# @return [Nil]
def remove_deprecated_when_undeprecated_match_text_exists
statement = <<~SQL.strip
DELETE FROM #{main_table}
Expand All @@ -33,6 +45,11 @@ def remove_deprecated_when_undeprecated_match_text_exists
AuthorityBrowse.db.run(statement)
end

# Updates the from_biblio table with ids of matching entries in the
# main_table. This enables determining the list of unmatched entries in
# the from_biblio table
#
# @return [Nil]
def add_ids_to_from_biblio
statement = <<~SQL.strip
UPDATE #{from_biblio_table} AS fb
Expand Down
9 changes: 9 additions & 0 deletions lib/authority_browse/db_mutator/names.rb
Original file line number Diff line number Diff line change
Expand Up @@ -2,22 +2,31 @@ module AuthorityBrowse
class DBMutator
class Names < Base
class << self
# Alias of update_main_with_counts
#
# @return [Nil]
def update_names_with_counts
update_main_with_counts
end

# Alias of add_ids_to_from_biblio
#
# @return [Nil]
def add_ids_to_names_from_biblio
add_ids_to_from_biblio
end

# @return [:Symbol]
def main_id
:name_id
end

# @return [:Symbol]
def main_table
:names
end

# @return [:Symbol]
def from_biblio_table
:names_from_biblio
end
Expand Down
9 changes: 9 additions & 0 deletions lib/authority_browse/db_mutator/subjects.rb
Original file line number Diff line number Diff line change
Expand Up @@ -2,22 +2,31 @@ module AuthorityBrowse
class DBMutator
class Subjects < Base
class << self
# Alias of update_main_with_counts
#
# @return [Nil]
def update_subjects_with_counts
update_main_with_counts
end

# Alias of add_ids_to_from_biblio
#
# @return [Nil]
def add_ids_to_subjects_from_biblio
add_ids_to_from_biblio
end

# @return [:Symbol]
def main_id
:subject_id
end

# @return [:Symbol]
def main_table
:subjects
end

# @return [:Symbol]
def from_biblio_table
:subjects_from_biblio
end
Expand Down
3 changes: 3 additions & 0 deletions lib/authority_browse/loc_authorities/entry.rb
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,12 @@ def id
@id ||= "http://id.loc.gov#{@data["@id"]}"
end

# @return [Hash] component from "@graph" that describes the main id
def main_component
@main_component ||= @data["@graph"].find { |x| x["@id"] == id }
end

# @return [String] Preferred Label
def label
raise NotImplementedError
end
Expand All @@ -26,6 +28,7 @@ def match_text
AuthorityBrowse::Normalize.match_text(label)
end

# @return [Boolean] Do any of the graph elements show that this id has been deprecated?
def deprecated?
@data["@graph"].any? { |x| x["cs:changeReason"] == "deprecated" }
end
Expand Down
2 changes: 1 addition & 1 deletion lib/authority_browse/loc_authorities/name.rb
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ def label
main_component["skos:prefLabel"] || main_component["skosxl:literalForm"]
end

# @return [Array] [Array of strings of see_also_ids]
# @return [Array<String>] ids of see also xrefs
def see_also_ids
@see_also_ids ||= _get_xref_ids("rdfs:seeAlso")
end
Expand Down
4 changes: 4 additions & 0 deletions lib/authority_browse/loc_authorities/subject.rb
Original file line number Diff line number Diff line change
@@ -1,18 +1,22 @@
module AuthorityBrowse
module LocAuthorities
class Subject < Entry
# @return [String] Preferred Label
def label
main_component&.dig("skos:prefLabel", "@value") || main_component&.dig("skosxl:literalForm", "@value")
end

# @return [Array<String>] ids of broader xrefs
def broader_ids
@broader_ids ||= _get_xref_ids("skos:broader")
end

# @return [Array<String>] ids of narrower xrefs
def narrower_ids
@narrower_ids ||= _get_xref_ids("skos:narrower")
end

# @return [Boolean] Does it have any xref_ids?
def xref_ids?
!(narrower_ids.empty? && broader_ids.empty?)
end
Expand Down
29 changes: 23 additions & 6 deletions lib/authority_browse/names.rb
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,18 @@
module AuthorityBrowse
class Names < Base
class << self
# What kind of Object is it?
#
# @return [String]
def kind
"name"
end

# Loads the names and names_see_also table with data from loc
# Loads the names and names_see_also table with data from LOC
#
# @param loc_file_getter [Proc] when called needs to put a file with skos
# data into skos_file
# @return [Nil]
def reset_db(loc_file_getter = lambda { fetch_skos_file })
# get names file
loc_file_getter.call
Expand Down Expand Up @@ -57,9 +62,11 @@ def reset_db(loc_file_getter = lambda { fetch_skos_file })
end
end

# Loads solr with documents of names that match data from library of
# congress.
# Loads solr with documents of names that match data from Library of
# Congress.
#
# @param solr_uploader [Solr::Uploader]
# @return [Nil]
def load_solr_with_matched(solr_uploader = Solr::Uploader.new(collection: "authority_browse_reindex"))
write_docs do |out, milemarker|
AuthorityBrowse.db.fetch(get_matched_query).stream.chunk_while { |bef, aft| aft[:id] == bef[:id] }.each do |ary|
Expand All @@ -71,9 +78,11 @@ def load_solr_with_matched(solr_uploader = Solr::Uploader.new(collection: "autho
solr_uploader.send_file_to_solr(solr_docs_file)
end

# Loads solr with documents of names that don't match entries in library
# of congress
# Loads solr with documents of names that don't match entries in Library
# of Congress
#
# @param solr_uploader [Solr::Uploader]
# @return [Nil]
def load_solr_with_unmatched(solr_uploader = Solr::Uploader.new(collection: "authority_browse_reindex"))
write_docs do |out, milemarker|
AuthorityBrowse.db[:names_from_biblio].stream.filter(name_id: nil).where { count > 0 }.each do |name|
Expand All @@ -87,7 +96,7 @@ def load_solr_with_unmatched(solr_uploader = Solr::Uploader.new(collection: "aut
# Sequel query that gets names and see alsos with their counts
#
# Private method
# return [String]
# @return [String]
def get_matched_query
<<~SQL.strip
SELECT names.id,
Expand All @@ -104,20 +113,28 @@ def get_matched_query
SQL
end

# Field name/Facet in Biblio that we should get counts for
#
# @return [String]
def field_name
"author_authoritative_browse"
end

# URL for LOC skos file
#
# @return [String]
def remote_skos_file
"https://id.loc.gov/download/authorities/names.skosrdf.jsonld.gz"
end

# Path to the file library of congress skos data
#
# @return [String]
def local_skos_file
"tmp/names.skosrdf.jsonld.gz"
end

# @return [Symbol]
def from_biblio_table
:names_from_biblio
end
Expand Down
20 changes: 9 additions & 11 deletions lib/authority_browse/normalize.rb
Original file line number Diff line number Diff line change
Expand Up @@ -17,20 +17,13 @@ module Normalize
# We want it to match solr because we need to generate a search string that will find all the stuff
# in the catalog we're claiming it should find.

# PUNCT_SPACE_COMBO = /(?:\p{P}+(?:\s+|\Z))|(?:(?:\A|\s+)\p{P}+)/
UNNECESSARY_ENDING_PUNCT = /[\/.;,]+\Z/

# not sure this is used anywhere
# For a sort key, we want to eliminate punctuation in general.
# However, things that act like a space between words should
# be turned into spaces.

# This should match as exactly as possible the fieldType authority_search

WHICH_PUNCT_TO_SPACIFY = /[:-]+/
EMPTY_STRING = ""
ONE_SPACE = " "
# this is used
# Return the appropriate match text for a given string
#
# @param str [String] String to be normalized
# @return [String] Normalized string
def match_text(str)
str = unicode_normalize(str)
str.gsub!(/\Athe\s+/, EMPTY_STRING)
Expand All @@ -41,6 +34,11 @@ def match_text(str)
cleanup_spaces(str)
end

# Gets rid of leading and trailing spaces. Shrinks other space to a single
# space.
#
# @param str [String] String with spaces
# @return [String] String with appropriate number of spaces
def cleanup_spaces(str)
str.gsub(/\s+/, ONE_SPACE).strip
end
Expand Down
Loading