From 36c6aab9623361e3750f7cebe2f51192d40be9ab Mon Sep 17 00:00:00 2001 From: Kiah Stroud <32469930+bkiahstroud@users.noreply.github.com> Date: Tue, 26 Nov 2024 07:56:07 -0800 Subject: [PATCH] Move Bulkrax field mappings to Hyku (#2389) * move Bulkrax field mappings to Hyku A recent Hyku feature introduced the ability to configure Bulkrax field mappings on a per-tenant basis. It also introduced a bug that impacted existing Hyku applications who had custom field mappings. Since the feature fell back on Bulkrax's default field mappings if an Account hadn't explicitly set their own, the existing field mapping customizations in the Bulkrax initializer were ignored. To solve that issue, as well as the question of "what if I want all my tenants to have the same field mappings, but don't want to customize them all by hand?", this commit introduces the idea of a set of default field mappings at the Hyku application level. This means that when Bulkrax looks for field mappings, it will discover them in this order (depending on presence): Account setting? -> Hyku defaults? -> Bulkrax defaults There are a couple reasons why I decided to put this in the Hyku module instead of just using Hyku's Bulkrax initializer: 1. Since they're Hyku's defaults, it makes sense to denote them as such semantically and conceptually 2. Since the ultimate fallback is Bulkrax's default field mappings, it makes sense not to alter the field mappings that Bulkrax "manages" with this feature 3. When adding hyku_knapsack into the mix, their often ends up being three separate Bulkrax config files (the knapsack's config/initializers/bulkrax.rb, Hyku's config/initializers/bulkrax.rb, and Bulkrax's lib/bulkrax.rb). This gets very muddy very quickly with how knapsack works technically Ref: - https://github.com/samvera/hyku/pull/2384 * add setter for Hyku.default_bulkrax_field_mappings This can be used by a hyku_knapsack app initializer to override the default Hyku application field mappings * add spec coverage --- app/models/concerns/account_settings.rb | 2 +- config/application.rb | 103 ++++++++++++++++++ config/initializers/bulkrax.rb | 100 ++--------------- lib/bulkrax/bulkrax_decorator.rb | 2 +- spec/lib/bulkrax/bulkrax_decorator_spec.rb | 30 ++++- spec/lib/hyku_spec.rb | 70 ++++++++++++ spec/models/concerns/account_settings_spec.rb | 6 +- 7 files changed, 210 insertions(+), 103 deletions(-) diff --git a/app/models/concerns/account_settings.rb b/app/models/concerns/account_settings.rb index 1fb4fc198..2e9dc2e91 100644 --- a/app/models/concerns/account_settings.rb +++ b/app/models/concerns/account_settings.rb @@ -23,7 +23,7 @@ module AccountSettings setting :allow_downloads, type: 'boolean', default: true setting :allow_signup, type: 'boolean', default: true setting :analytics_provider, type: 'string' - setting :bulkrax_field_mappings, type: 'json_editor', default: Bulkrax.field_mappings.to_json + setting :bulkrax_field_mappings, type: 'json_editor', default: Hyku.default_bulkrax_field_mappings.to_json setting :bulkrax_validations, type: 'boolean', disabled: true setting :cache_api, type: 'boolean', default: false setting :contact_email, type: 'string', default: 'change-me-in-settings@example.com' diff --git a/config/application.rb b/config/application.rb index 5ba253e80..254493035 100644 --- a/config/application.rb +++ b/config/application.rb @@ -39,6 +39,109 @@ def self.bulkrax_enabled? ActiveModel::Type::Boolean.new.cast(ENV.fetch('HYKU_BULKRAX_ENABLED', true)) end + def self.default_bulkrax_field_mappings=(value) + err_msg = 'Hyku.default_bulkrax_field_mappings must respond to #with_indifferent_access' + raise err_msg unless value.respond_to?(:with_indifferent_access) + + @default_bulkrax_field_mappings = value.with_indifferent_access + end + + # This represents the default Bulkrax field mappings that new Accounts will be initialized with. + # Bulkrax field mappings should not be configured within the Bulkrax initializer in Hyku. + # @see lib/bulkrax/bulkrax_decorator.rb + # @see https://github.com/samvera/bulkrax/wiki/Configuring-Bulkrax#field-mappings + def self.default_bulkrax_field_mappings + return @default_bulkrax_field_mappings if @default_bulkrax_field_mappings.present? + + default_bulkrax_fm = {} + defaults = { + 'abstract' => { from: ['abstract'], split: true }, + 'accessibility_feature' => { from: ['accessibility_feature'], split: '\|' }, + 'accessibility_hazard' => { from: ['accessibility_hazard'], split: '\|' }, + 'accessibility_summary' => { from: ['accessibility_summary'] }, + 'additional_information' => { from: ['additional_information'], split: '\|', generated: true }, + 'admin_note' => { from: ['admin_note'] }, + 'admin_set_id' => { from: ['admin_set_id'], generated: true }, + 'alternate_version' => { from: ['alternate_version'], split: '\|' }, + 'alternative_title' => { from: ['alternative_title'], split: '\|', generated: true }, + 'arkivo_checksum' => { from: ['arkivo_checksum'], split: '\|', generated: true }, + 'audience' => { from: ['audience'], split: '\|' }, + 'based_near' => { from: ['location'], split: '\|' }, + 'bibliographic_citation' => { from: ['bibliographic_citation'], split: true }, + 'contributor' => { from: ['contributor'], split: true }, + 'create_date' => { from: ['create_date'], split: true }, + 'children' => { from: ['children'], related_children_field_mapping: true }, + 'committee_member' => { from: ['committee_member'], split: '\|' }, + 'creator' => { from: ['creator'], split: true }, + 'date_created' => { from: ['date_created'], split: true }, + 'date_uploaded' => { from: ['date_uploaded'], generated: true }, + 'degree_discipline' => { from: ['discipline'], split: '\|' }, + 'degree_grantor' => { from: ['grantor'], split: '\|' }, + 'degree_level' => { from: ['level'], split: '\|' }, + 'degree_name' => { from: ['degree'], split: '\|' }, + 'depositor' => { from: ['depositor'], split: '\|', generated: true }, + 'description' => { from: ['description'], split: true }, + 'discipline' => { from: ['discipline'], split: '\|' }, + 'education_level' => { from: ['education_level'], split: '\|' }, + 'embargo_id' => { from: ['embargo_id'], generated: true }, + 'extent' => { from: ['extent'], split: true }, + 'file' => { from: ['file'], split: /\s*[|]\s*/ }, + 'identifier' => { from: ['identifier'], split: true }, + 'import_url' => { from: ['import_url'], split: '\|', generated: true }, + 'keyword' => { from: ['keyword'], split: true }, + 'label' => { from: ['label'], generated: true }, + 'language' => { from: ['language'], split: true }, + 'lease_id' => { from: ['lease_id'], generated: true }, + 'library_catalog_identifier' => { from: ['library_catalog_identifier'], split: '\|' }, + 'license' => { from: ['license'], split: /\s*[|]\s*/ }, + 'modified_date' => { from: ['modified_date'], split: true }, + 'newer_version' => { from: ['newer_version'], split: '\|' }, + 'oer_size' => { from: ['oer_size'], split: '\|' }, + 'on_behalf_of' => { from: ['on_behalf_of'], generated: true }, + 'owner' => { from: ['owner'], generated: true }, + 'parents' => { from: ['parents'], related_parents_field_mapping: true }, + 'previous_version' => { from: ['previous_version'], split: '\|' }, + 'publisher' => { from: ['publisher'], split: true }, + 'related_item' => { from: ['related_item'], split: '\|' }, + 'relative_path' => { from: ['relative_path'], split: '\|', generated: true }, + 'related_url' => { from: ['related_url', 'relation'], split: /\s* [|]\s*/ }, + 'remote_files' => { from: ['remote_files'], split: /\s*[|]\s*/ }, + 'rendering_ids' => { from: ['rendering_ids'], split: '\|', generated: true }, + 'resource_type' => { from: ['resource_type'], split: true }, + 'rights_holder' => { from: ['rights_holder'], split: '\|' }, + 'rights_notes' => { from: ['rights_notes'], split: true }, + 'rights_statement' => { from: ['rights', 'rights_statement'], split: '\|', generated: true }, + 'source' => { from: ['source'], split: true }, + 'state' => { from: ['state'], generated: true }, + 'subject' => { from: ['subject'], split: true }, + 'table_of_contents' => { from: ['table_of_contents'], split: '\|' }, + 'title' => { from: ['title'], split: /\s*[|]\s*/ }, + 'video_embed' => { from: ['video_embed'] } + } + + default_bulkrax_fm['Bulkrax::BagitParser'] = defaults.merge({ + # add or remove custom mappings for this parser here + }) + + default_bulkrax_fm['Bulkrax::CsvParser'] = defaults.merge({ + # add or remove custom mappings for this parser here + }) + + default_bulkrax_fm['Bulkrax::OaiDcParser'] = defaults.merge({ + # add or remove custom mappings for this parser here + }) + + default_bulkrax_fm['Bulkrax::OaiQualifiedDcParser'] = defaults.merge({ + # add or remove custom mappings for this parser here + }) + + default_bulkrax_fm['Bulkrax::XmlParser'] = defaults.merge({ + # add or remove custom mappings for this parser here + }) + + default_bulkrax_fm.with_indifferent_access + end + # rubocop:disable Metrics/ClassLength class Application < Rails::Application ## diff --git a/config/initializers/bulkrax.rb b/config/initializers/bulkrax.rb index 5184f11a8..edeeca7fe 100644 --- a/config/initializers/bulkrax.rb +++ b/config/initializers/bulkrax.rb @@ -45,99 +45,13 @@ # config.collection_field_mapping['Bulkrax::RdfEntry'] = 'http://opaquenamespace.org/ns/set' # Field mappings - # Create a completely new set of mappings by replacing the whole set as follows - # config.field_mappings = { - # "Bulkrax::OaiDcParser" => { **individual field mappings go here*** } - # } - - # Add to, or change existing mappings as follows - # e.g. to exclude date - # config.field_mappings["Bulkrax::OaiDcParser"]["date"] = { from: ["date"], excluded: true } - - default_field_mapping = { - 'abstract' => { from: ['abstract'], split: true }, - 'accessibility_feature' => { from: ['accessibility_feature'], split: '\|' }, - 'accessibility_hazard' => { from: ['accessibility_hazard'], split: '\|' }, - 'accessibility_summary' => { from: ['accessibility_summary'] }, - 'additional_information' => { from: ['additional_information'], split: '\|', generated: true }, - 'admin_note' => { from: ['admin_note'] }, - 'admin_set_id' => { from: ['admin_set_id'], generated: true }, - 'alternate_version' => { from: ['alternate_version'], split: '\|' }, - 'alternative_title' => { from: ['alternative_title'], split: '\|', generated: true }, - 'arkivo_checksum' => { from: ['arkivo_checksum'], split: '\|', generated: true }, - 'audience' => { from: ['audience'], split: '\|' }, - 'based_near' => { from: ['location'], split: '\|' }, - 'bibliographic_citation' => { from: ['bibliographic_citation'], split: true }, - 'contributor' => { from: ['contributor'], split: true }, - 'create_date' => { from: ['create_date'], split: true }, - 'children' => { from: ['children'], related_children_field_mapping: true }, - 'committee_member' => { from: ['committee_member'], split: '\|' }, - 'creator' => { from: ['creator'], split: true }, - 'date_created' => { from: ['date_created'], split: true }, - 'date_uploaded' => { from: ['date_uploaded'], generated: true }, - 'degree_discipline' => { from: ['discipline'], split: '\|' }, - 'degree_grantor' => { from: ['grantor'], split: '\|' }, - 'degree_level' => { from: ['level'], split: '\|' }, - 'degree_name' => { from: ['degree'], split: '\|' }, - 'depositor' => { from: ['depositor'], split: '\|', generated: true }, - 'description' => { from: ['description'], split: true }, - 'discipline' => { from: ['discipline'], split: '\|' }, - 'education_level' => { from: ['education_level'], split: '\|' }, - 'embargo_id' => { from: ['embargo_id'], generated: true }, - 'extent' => { from: ['extent'], split: true }, - 'file' => { from: ['file'], split: /\s*[|]\s*/ }, - 'identifier' => { from: ['identifier'], split: true }, - 'import_url' => { from: ['import_url'], split: '\|', generated: true }, - 'keyword' => { from: ['keyword'], split: true }, - 'label' => { from: ['label'], generated: true }, - 'language' => { from: ['language'], split: true }, - 'lease_id' => { from: ['lease_id'], generated: true }, - 'library_catalog_identifier' => { from: ['library_catalog_identifier'], split: '\|' }, - 'license' => { from: ['license'], split: /\s*[|]\s*/ }, - 'modified_date' => { from: ['modified_date'], split: true }, - 'newer_version' => { from: ['newer_version'], split: '\|' }, - 'oer_size' => { from: ['oer_size'], split: '\|' }, - 'on_behalf_of' => { from: ['on_behalf_of'], generated: true }, - 'owner' => { from: ['owner'], generated: true }, - 'parents' => { from: ['parents'], related_parents_field_mapping: true }, - 'previous_version' => { from: ['previous_version'], split: '\|' }, - 'publisher' => { from: ['publisher'], split: true }, - 'related_item' => { from: ['related_item'], split: '\|' }, - 'relative_path' => { from: ['relative_path'], split: '\|', generated: true }, - 'related_url' => { from: ['related_url', 'relation'], split: /\s* [|]\s*/ }, - 'remote_files' => { from: ['remote_files'], split: /\s*[|]\s*/ }, - 'rendering_ids' => { from: ['rendering_ids'], split: '\|', generated: true }, - 'resource_type' => { from: ['resource_type'], split: true }, - 'rights_holder' => { from: ['rights_holder'], split: '\|' }, - 'rights_notes' => { from: ['rights_notes'], split: true }, - 'rights_statement' => { from: ['rights', 'rights_statement'], split: '\|', generated: true }, - 'source' => { from: ['source'], split: true }, - 'state' => { from: ['state'], generated: true }, - 'subject' => { from: ['subject'], split: true }, - 'table_of_contents' => { from: ['table_of_contents'], split: '\|' }, - 'title' => { from: ['title'], split: /\s*[|]\s*/ }, - 'video_embed' => { from: ['video_embed'] } - } - - config.field_mappings["Bulkrax::BagitParser"] = default_field_mapping.merge({ - # add or remove custom mappings for this parser here - }) - - config.field_mappings["Bulkrax::CsvParser"] = default_field_mapping.merge({ - # add or remove custom mappings for this parser here - }) - - config.field_mappings["Bulkrax::OaiDcParser"] = default_field_mapping.merge({ - # add or remove custom mappings for this parser here - }) - - config.field_mappings["Bulkrax::OaiQualifiedDcParser"] = default_field_mapping.merge({ - # add or remove custom mappings for this parser here - }) - - config.field_mappings["Bulkrax::XmlParser"] = default_field_mapping.merge({ - # add or remove custom mappings for this parser here - }) + # NOTE: Bulkrax field mappings are configured on a per-tenant basis in the Account settings. + # The default set of field mappings that new tenants will be initialized with can be found + # and/or modified in config/application.rb (Hyku#default_bulkrax_field_mappings) + # @see config/application.rb + # @see app/models/concerns/account_settings.rb + # WARN: Modifying Bulkrax's field mappings in this file will not work as expected + # @see lib/bulkrax/bulkrax_decorator.rb # Because Hyku now uses and assumes Valkyrie to query the repository layer, we need to match the # object factory to use Valkyrie. diff --git a/lib/bulkrax/bulkrax_decorator.rb b/lib/bulkrax/bulkrax_decorator.rb index 5df4c161a..08536eefe 100644 --- a/lib/bulkrax/bulkrax_decorator.rb +++ b/lib/bulkrax/bulkrax_decorator.rb @@ -6,7 +6,7 @@ def field_mappings if Site.account.present? && Site.account.bulkrax_field_mappings.present? JSON.parse(Site.account.bulkrax_field_mappings).with_indifferent_access else - super + Hyku.default_bulkrax_field_mappings.presence || super end end end diff --git a/spec/lib/bulkrax/bulkrax_decorator_spec.rb b/spec/lib/bulkrax/bulkrax_decorator_spec.rb index 2d7246015..3aea219c4 100644 --- a/spec/lib/bulkrax/bulkrax_decorator_spec.rb +++ b/spec/lib/bulkrax/bulkrax_decorator_spec.rb @@ -8,12 +8,32 @@ context 'when the current Account does not have any tenant-specific field mappings' do let(:account) { build(:account) } - it "returns Bulkrax's default field mappings" do - default_bulkrax_mapping_keys = ['Bulkrax::OaiDcParser', 'Bulkrax::OaiQualifiedDcParser', 'Bulkrax::CsvParser', 'Bulkrax::BagitParser', 'Bulkrax::XmlParser'] + context 'when Hyku.default_bulkrax_mapping_keys is unset' do + before do + allow(Hyku).to receive(:default_bulkrax_field_mappings).and_return nil + end - expect(Site.account.settings['bulkrax_field_mappings']).to be_nil - expect(Bulkrax.field_mappings).to be_a(Hash) - expect(Bulkrax.field_mappings.keys.sort).to eq(default_bulkrax_mapping_keys.sort) + it "returns Bulkrax's default field mappings" do + default_bulkrax_mapping_keys = ['Bulkrax::OaiDcParser', 'Bulkrax::OaiQualifiedDcParser', 'Bulkrax::CsvParser', 'Bulkrax::BagitParser', 'Bulkrax::XmlParser'] + + expect(Site.account.settings['bulkrax_field_mappings']).to be_nil + expect(Bulkrax.field_mappings).to be_a(Hash) + expect(Bulkrax.field_mappings.keys.sort).to eq(default_bulkrax_mapping_keys.sort) + end + end + + context 'when Hyku.default_bulkrax_mapping_keys is set' do + before do + allow(Site.account).to receive(:bulkrax_field_mappings).and_return nil + end + + it "returns Hyku's default field mappings" do + Hyku.default_bulkrax_field_mappings = { this: 'is fine' } + + expect(Site.account.settings['bulkrax_field_mappings']).to be_nil + expect(Bulkrax.field_mappings).to be_a(Hash) + expect(Bulkrax.field_mappings).to eq({ this: 'is fine' }.with_indifferent_access) + end end end diff --git a/spec/lib/hyku_spec.rb b/spec/lib/hyku_spec.rb index 2ea1c8afc..8e241b808 100644 --- a/spec/lib/hyku_spec.rb +++ b/spec/lib/hyku_spec.rb @@ -4,4 +4,74 @@ it 'has a version' do expect(described_class).to have_constant(:VERSION) end + + # @see config/application.rb + describe '#default_bulkrax_field_mappings=' do + around do |example| + Hyku.instance_variable_set(:@default_bulkrax_field_mappings, nil) + example.run + Hyku.instance_variable_set(:@default_bulkrax_field_mappings, nil) + end + + context 'when value is a Hash' do + let(:value) { { hello: 'world' } } + + it 'sets @default_bulkrax_field_mappings' do + expect { described_class.default_bulkrax_field_mappings = value } + .to change { Hyku.instance_variable_get(:@default_bulkrax_field_mappings) } + .from(nil) + .to(value.with_indifferent_access) + end + end + + context 'when value is an ActiveSupport::HashWithIndifferentAccess' do + let(:value) { { hello: 'world' }.with_indifferent_access } + + it 'sets @default_bulkrax_field_mappings' do + expect { described_class.default_bulkrax_field_mappings = value } + .to change { Hyku.instance_variable_get(:@default_bulkrax_field_mappings) } + .from(nil) + .to(value) + end + end + + context 'when value does not respond to :with_indifferent_access' do + let(:value) { 'hello world' } + + it 'throws an error' do + expect { described_class.default_bulkrax_field_mappings = value } + .to raise_error(RuntimeError, 'Hyku.default_bulkrax_field_mappings must respond to #with_indifferent_access') + end + end + end + + # @see config/application.rb + describe '#default_bulkrax_field_mappings' do + context 'when @default_bulkrax_field_mappings is present' do + around do |example| + Hyku.instance_variable_set(:@default_bulkrax_field_mappings, 'greetings') + example.run + Hyku.instance_variable_set(:@default_bulkrax_field_mappings, nil) + end + + it 'returns @default_bulkrax_field_mappings' do + expect(described_class.default_bulkrax_field_mappings).to eq('greetings') + end + end + + context 'when @default_bulkrax_field_mappings is blank' do + around do |example| + Hyku.instance_variable_set(:@default_bulkrax_field_mappings, nil) + example.run + Hyku.instance_variable_set(:@default_bulkrax_field_mappings, nil) + end + + it 'returns the default field mappings' do + default_bulkrax_mapping_keys = ['Bulkrax::OaiDcParser', 'Bulkrax::OaiQualifiedDcParser', 'Bulkrax::CsvParser', 'Bulkrax::BagitParser', 'Bulkrax::XmlParser'] + + expect(described_class.default_bulkrax_field_mappings).to be_a(ActiveSupport::HashWithIndifferentAccess) + expect(described_class.default_bulkrax_field_mappings.keys.sort).to eq(default_bulkrax_mapping_keys.sort) + end + end + end end diff --git a/spec/models/concerns/account_settings_spec.rb b/spec/models/concerns/account_settings_spec.rb index a63b14858..4a57aea9a 100644 --- a/spec/models/concerns/account_settings_spec.rb +++ b/spec/models/concerns/account_settings_spec.rb @@ -53,11 +53,11 @@ describe '#bulkrax_field_mappings' do context 'when the setting is blank' do - it 'returns the default field mappings configured in Bulkrax' do + it 'returns the default field mappings configured in Hyku' do expect(account.settings['bulkrax_field_mappings']).to be_nil - # For parity, parse Bulkrax field mappings from JSON. #to_json will stringify keys as + # For parity, parse field mappings from JSON. #to_json will stringify keys as # well as turn a regex like /\|/ into (?-mix:\\|) - default_bulkrax_mappings = JSON.parse(Bulkrax.field_mappings.to_json) + default_bulkrax_mappings = JSON.parse(Hyku.default_bulkrax_field_mappings.to_json) default_tenant_mappings = JSON.parse(account.bulkrax_field_mappings) expect(default_tenant_mappings).to eq(default_bulkrax_mappings)