Skip to content

Commit

Permalink
Merge pull request #11 from mlibrary/speed-up-skos-db-load
Browse files Browse the repository at this point in the history
adds indexes at the end
  • Loading branch information
billdueber authored Nov 1, 2023
2 parents 9c223c1 + f710b90 commit fb4ddc4
Show file tree
Hide file tree
Showing 5 changed files with 54 additions and 16 deletions.
2 changes: 1 addition & 1 deletion env.development
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
SOLR_HOST="http://solr:8983"
SOLR_PASSWORD=SolrRocks
SOLR_USERNAME=solr
SOLR_USER=solr
SOLR_CONFIGURATION="authority_browse"
SOLR_COLLECTION="authority_browse"
MARIADB_ROOT_PASSWORD=password
Expand Down
1 change: 1 addition & 0 deletions lib/authority_browse.rb
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
require "logger"
require "byebug"
require "services"
require "concurrent"

module AuthorityBrowse
IS_JRUBY = (RUBY_ENGINE == "jruby")
Expand Down
28 changes: 20 additions & 8 deletions lib/authority_browse/db/names.rb
Original file line number Diff line number Diff line change
Expand Up @@ -4,26 +4,38 @@ class Names < AuthorityBrowse::DB
def self.database_definitions
{
names: proc do
String :id, primary_key: true
String :id
String :label, text: true
String :match_text, text: true, index: true
Boolean :deprecated, default: false, index: true
Integer :count, default: 0, index: true
String :match_text, text: true
Boolean :deprecated, default: false
Integer :count, default: 0
end,
names_see_also: proc do
primary_key :id
String :name_id, index: true
String :see_also_id, index: true
String :name_id
String :see_also_id
end,
names_from_biblio: proc do
primary_key :id
String :term, text: true
String :match_text, text: true, index: true
Integer :count, default: 0
String :name_id, default: nil
end
}
end

def self.set_names_indexes!
AuthorityBrowse.db.alter_table(:names) do
add_index :id
add_index :match_text
add_index :deprecated
add_index :count
end

AuthorityBrowse.db.alter_table(:names_see_also) do
add_index :name_id
add_index :see_also_id
end
end
end
end
end
30 changes: 23 additions & 7 deletions lib/authority_browse/names.rb
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,9 @@ def reset_db(loc_file_getter = lambda { fetch_skos_file })
DB::Names.recreate_table!(:names)
DB::Names.recreate_table!(:names_see_also)

milemarker = Milemarker.new(batch_size: 100_000, name: "adding to entries array", logger: Services.logger)
milemarker.log "Starting adding to entries array"
milemarker = Milemarker.new(batch_size: 100_000, name: "add names to db", logger: Services.logger)
milemarker.log "Start adding names to db"
Zinzout.zin(skos_file).each_slice(100_000) do |slice|
# Zinzout.zin("./data/smaller.jsonld.gz").each_slice(100_000) do |slice|
entries = slice.map do |line|
AuthorityBrowse::LocAuthorities::Entry.new(JSON.parse(line))
end
Expand All @@ -44,16 +43,33 @@ def reset_db(loc_file_getter = lambda { fetch_skos_file })

milemarker.log_final_line

DBMutator::Names.remove_deprecated_when_undeprecated_match_text_exists
S.logger.info "Start: set the indexes"
S.logger.measure_info("set the indexes") do
AuthorityBrowse::DB::Names.set_names_indexes!
end
S.logger.info "Start: remove deprecated when undeprecated match text exists"
S.logger.measure_info("removed deprecated terms with undprecated match text") do
DBMutator::Names.remove_deprecated_when_undeprecated_match_text_exists
end
end

# Fetches terms from Biblio, updates counts in :names, and adds loc ids to
# :names_from_biblio
def update
S.logger.info "Start Term fetcher"
TermFetcher.new.run
DBMutator::Names.zero_out_counts
DBMutator::Names.update_names_with_counts
DBMutator::Names.add_ids_to_names_from_biblio
S.logger.info "Start: zeroing out counts"
S.logger.measure_info("Zeroed out counts") do
DBMutator::Names.zero_out_counts
end
S.logger.info "Start: update names with counts"
S.logger.measure_info("updated names with counts") do
DBMutator::Names.update_names_with_counts
end
S.logger.info "Start: add ids to names_from_biblio"
S.logger.measure_info("Updated ids in names_from_biblio") do
DBMutator::Names.add_ids_to_names_from_biblio
end
end

# Loads solr with documents of names that match data from library of
Expand Down
9 changes: 9 additions & 0 deletions spec/authority_browse/db/names_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -43,4 +43,13 @@
expect(AuthorityBrowse.db[:names_from_biblio].count).to eq(0)
end
end
context ".set_names_indexes!" do
it "sets the indexes on names and names_see_also" do
expect(AuthorityBrowse.db.indexes(:names)).to eq({})
expect(AuthorityBrowse.db.indexes(:names_see_also)).to eq({})
subject.set_names_indexes!
expect(AuthorityBrowse.db.indexes(:names)).not_to eq({})
expect(AuthorityBrowse.db.indexes(:names_see_also)).not_to eq({})
end
end
end

0 comments on commit fb4ddc4

Please sign in to comment.