From da23315b52609a498338a32c4b90a69b86557134 Mon Sep 17 00:00:00 2001 From: Benjamin Kiah Stroud <32469930+bkiahstroud@users.noreply.github.com> Date: Fri, 22 Nov 2024 15:39:36 -0800 Subject: [PATCH] move Bulkrax field mappings to Hyku A recent Hyku feature introduced the ability to configure Bulkrax field mappings on a per-tenant basis. It also introduced a bug that impacted existing Hyku applications who had custom field mappings. Since the feature fell back on Bulkrax's default field mappings if an Account hadn't explicitly set their own, the existing field mapping customizations in the Bulkrax initializer were ignored. To solve that issue, as well as the question of "what if I want all my tenants to have the same field mappings, but don't want to customize them all by hand?", this commit introduces the idea of a set of default field mappings at the Hyku application level. This means that when Bulkrax looks for field mappings, it will discover them in this order (depending on presence): Account setting? -> Hyku defaults? -> Bulkrax defaults There are a couple reasons why I decided to put this in the Hyku module instead of just using Hyku's Bulkrax initializer: 1. Since they're Hyku's defaults, it makes sense to denote them as such semantically and conceptually 2. Since the ultimate fallback is Bulkrax's default field mappings, it makes sense not to alter the field mappings that Bulkrax "manages" with this feature 3. When adding hyku_knapsack into the mix, their often ends up being three separate Bulkrax config files (the knapsack's config/initializers/bulkrax.rb, Hyku's config/initializers/bulkrax.rb, and Bulkrax's lib/bulkrax.rb). This gets very muddy very quickly with how knapsack works technically Ref: - https://github.com/samvera/hyku/pull/2384 --- app/models/concerns/account_settings.rb | 2 +- config/application.rb | 94 ++++++++++++++++++++++ config/initializers/bulkrax.rb | 100 ++---------------------- lib/bulkrax/bulkrax_decorator.rb | 2 +- 4 files changed, 103 insertions(+), 95 deletions(-) diff --git a/app/models/concerns/account_settings.rb b/app/models/concerns/account_settings.rb index 1fb4fc198..2e9dc2e91 100644 --- a/app/models/concerns/account_settings.rb +++ b/app/models/concerns/account_settings.rb @@ -23,7 +23,7 @@ module AccountSettings setting :allow_downloads, type: 'boolean', default: true setting :allow_signup, type: 'boolean', default: true setting :analytics_provider, type: 'string' - setting :bulkrax_field_mappings, type: 'json_editor', default: Bulkrax.field_mappings.to_json + setting :bulkrax_field_mappings, type: 'json_editor', default: Hyku.default_bulkrax_field_mappings.to_json setting :bulkrax_validations, type: 'boolean', disabled: true setting :cache_api, type: 'boolean', default: false setting :contact_email, type: 'string', default: 'change-me-in-settings@example.com' diff --git a/config/application.rb b/config/application.rb index 5ba253e80..e0845ee54 100644 --- a/config/application.rb +++ b/config/application.rb @@ -39,6 +39,100 @@ def self.bulkrax_enabled? ActiveModel::Type::Boolean.new.cast(ENV.fetch('HYKU_BULKRAX_ENABLED', true)) end + # This represents the default Bulkrax field mappings that new Accounts will be initialized with. + # Bulkrax field mappings should not be configured within the Bulkrax initializer in Hyku. + # @see lib/bulkrax/bulkrax_decorator.rb + # @see https://github.com/samvera/bulkrax/wiki/Configuring-Bulkrax#field-mappings + def self.default_bulkrax_field_mappings + default_bulkrax_fm = {} + defaults = { + 'abstract' => { from: ['abstract'], split: true }, + 'accessibility_feature' => { from: ['accessibility_feature'], split: '\|' }, + 'accessibility_hazard' => { from: ['accessibility_hazard'], split: '\|' }, + 'accessibility_summary' => { from: ['accessibility_summary'] }, + 'additional_information' => { from: ['additional_information'], split: '\|', generated: true }, + 'admin_note' => { from: ['admin_note'] }, + 'admin_set_id' => { from: ['admin_set_id'], generated: true }, + 'alternate_version' => { from: ['alternate_version'], split: '\|' }, + 'alternative_title' => { from: ['alternative_title'], split: '\|', generated: true }, + 'arkivo_checksum' => { from: ['arkivo_checksum'], split: '\|', generated: true }, + 'audience' => { from: ['audience'], split: '\|' }, + 'based_near' => { from: ['location'], split: '\|' }, + 'bibliographic_citation' => { from: ['bibliographic_citation'], split: true }, + 'contributor' => { from: ['contributor'], split: true }, + 'create_date' => { from: ['create_date'], split: true }, + 'children' => { from: ['children'], related_children_field_mapping: true }, + 'committee_member' => { from: ['committee_member'], split: '\|' }, + 'creator' => { from: ['creator'], split: true }, + 'date_created' => { from: ['date_created'], split: true }, + 'date_uploaded' => { from: ['date_uploaded'], generated: true }, + 'degree_discipline' => { from: ['discipline'], split: '\|' }, + 'degree_grantor' => { from: ['grantor'], split: '\|' }, + 'degree_level' => { from: ['level'], split: '\|' }, + 'degree_name' => { from: ['degree'], split: '\|' }, + 'depositor' => { from: ['depositor'], split: '\|', generated: true }, + 'description' => { from: ['description'], split: true }, + 'discipline' => { from: ['discipline'], split: '\|' }, + 'education_level' => { from: ['education_level'], split: '\|' }, + 'embargo_id' => { from: ['embargo_id'], generated: true }, + 'extent' => { from: ['extent'], split: true }, + 'file' => { from: ['file'], split: /\s*[|]\s*/ }, + 'identifier' => { from: ['identifier'], split: true }, + 'import_url' => { from: ['import_url'], split: '\|', generated: true }, + 'keyword' => { from: ['keyword'], split: true }, + 'label' => { from: ['label'], generated: true }, + 'language' => { from: ['language'], split: true }, + 'lease_id' => { from: ['lease_id'], generated: true }, + 'library_catalog_identifier' => { from: ['library_catalog_identifier'], split: '\|' }, + 'license' => { from: ['license'], split: /\s*[|]\s*/ }, + 'modified_date' => { from: ['modified_date'], split: true }, + 'newer_version' => { from: ['newer_version'], split: '\|' }, + 'oer_size' => { from: ['oer_size'], split: '\|' }, + 'on_behalf_of' => { from: ['on_behalf_of'], generated: true }, + 'owner' => { from: ['owner'], generated: true }, + 'parents' => { from: ['parents'], related_parents_field_mapping: true }, + 'previous_version' => { from: ['previous_version'], split: '\|' }, + 'publisher' => { from: ['publisher'], split: true }, + 'related_item' => { from: ['related_item'], split: '\|' }, + 'relative_path' => { from: ['relative_path'], split: '\|', generated: true }, + 'related_url' => { from: ['related_url', 'relation'], split: /\s* [|]\s*/ }, + 'remote_files' => { from: ['remote_files'], split: /\s*[|]\s*/ }, + 'rendering_ids' => { from: ['rendering_ids'], split: '\|', generated: true }, + 'resource_type' => { from: ['resource_type'], split: true }, + 'rights_holder' => { from: ['rights_holder'], split: '\|' }, + 'rights_notes' => { from: ['rights_notes'], split: true }, + 'rights_statement' => { from: ['rights', 'rights_statement'], split: '\|', generated: true }, + 'source' => { from: ['source'], split: true }, + 'state' => { from: ['state'], generated: true }, + 'subject' => { from: ['subject'], split: true }, + 'table_of_contents' => { from: ['table_of_contents'], split: '\|' }, + 'title' => { from: ['title'], split: /\s*[|]\s*/ }, + 'video_embed' => { from: ['video_embed'] } + } + + default_bulkrax_fm['Bulkrax::BagitParser'] = defaults.merge({ + # add or remove custom mappings for this parser here + }) + + default_bulkrax_fm['Bulkrax::CsvParser'] = defaults.merge({ + # add or remove custom mappings for this parser here + }) + + default_bulkrax_fm['Bulkrax::OaiDcParser'] = defaults.merge({ + # add or remove custom mappings for this parser here + }) + + default_bulkrax_fm['Bulkrax::OaiQualifiedDcParser'] = defaults.merge({ + # add or remove custom mappings for this parser here + }) + + default_bulkrax_fm['Bulkrax::XmlParser'] = defaults.merge({ + # add or remove custom mappings for this parser here + }) + + default_bulkrax_fm.with_indifferent_access + end + # rubocop:disable Metrics/ClassLength class Application < Rails::Application ## diff --git a/config/initializers/bulkrax.rb b/config/initializers/bulkrax.rb index 5184f11a8..edeeca7fe 100644 --- a/config/initializers/bulkrax.rb +++ b/config/initializers/bulkrax.rb @@ -45,99 +45,13 @@ # config.collection_field_mapping['Bulkrax::RdfEntry'] = 'http://opaquenamespace.org/ns/set' # Field mappings - # Create a completely new set of mappings by replacing the whole set as follows - # config.field_mappings = { - # "Bulkrax::OaiDcParser" => { **individual field mappings go here*** } - # } - - # Add to, or change existing mappings as follows - # e.g. to exclude date - # config.field_mappings["Bulkrax::OaiDcParser"]["date"] = { from: ["date"], excluded: true } - - default_field_mapping = { - 'abstract' => { from: ['abstract'], split: true }, - 'accessibility_feature' => { from: ['accessibility_feature'], split: '\|' }, - 'accessibility_hazard' => { from: ['accessibility_hazard'], split: '\|' }, - 'accessibility_summary' => { from: ['accessibility_summary'] }, - 'additional_information' => { from: ['additional_information'], split: '\|', generated: true }, - 'admin_note' => { from: ['admin_note'] }, - 'admin_set_id' => { from: ['admin_set_id'], generated: true }, - 'alternate_version' => { from: ['alternate_version'], split: '\|' }, - 'alternative_title' => { from: ['alternative_title'], split: '\|', generated: true }, - 'arkivo_checksum' => { from: ['arkivo_checksum'], split: '\|', generated: true }, - 'audience' => { from: ['audience'], split: '\|' }, - 'based_near' => { from: ['location'], split: '\|' }, - 'bibliographic_citation' => { from: ['bibliographic_citation'], split: true }, - 'contributor' => { from: ['contributor'], split: true }, - 'create_date' => { from: ['create_date'], split: true }, - 'children' => { from: ['children'], related_children_field_mapping: true }, - 'committee_member' => { from: ['committee_member'], split: '\|' }, - 'creator' => { from: ['creator'], split: true }, - 'date_created' => { from: ['date_created'], split: true }, - 'date_uploaded' => { from: ['date_uploaded'], generated: true }, - 'degree_discipline' => { from: ['discipline'], split: '\|' }, - 'degree_grantor' => { from: ['grantor'], split: '\|' }, - 'degree_level' => { from: ['level'], split: '\|' }, - 'degree_name' => { from: ['degree'], split: '\|' }, - 'depositor' => { from: ['depositor'], split: '\|', generated: true }, - 'description' => { from: ['description'], split: true }, - 'discipline' => { from: ['discipline'], split: '\|' }, - 'education_level' => { from: ['education_level'], split: '\|' }, - 'embargo_id' => { from: ['embargo_id'], generated: true }, - 'extent' => { from: ['extent'], split: true }, - 'file' => { from: ['file'], split: /\s*[|]\s*/ }, - 'identifier' => { from: ['identifier'], split: true }, - 'import_url' => { from: ['import_url'], split: '\|', generated: true }, - 'keyword' => { from: ['keyword'], split: true }, - 'label' => { from: ['label'], generated: true }, - 'language' => { from: ['language'], split: true }, - 'lease_id' => { from: ['lease_id'], generated: true }, - 'library_catalog_identifier' => { from: ['library_catalog_identifier'], split: '\|' }, - 'license' => { from: ['license'], split: /\s*[|]\s*/ }, - 'modified_date' => { from: ['modified_date'], split: true }, - 'newer_version' => { from: ['newer_version'], split: '\|' }, - 'oer_size' => { from: ['oer_size'], split: '\|' }, - 'on_behalf_of' => { from: ['on_behalf_of'], generated: true }, - 'owner' => { from: ['owner'], generated: true }, - 'parents' => { from: ['parents'], related_parents_field_mapping: true }, - 'previous_version' => { from: ['previous_version'], split: '\|' }, - 'publisher' => { from: ['publisher'], split: true }, - 'related_item' => { from: ['related_item'], split: '\|' }, - 'relative_path' => { from: ['relative_path'], split: '\|', generated: true }, - 'related_url' => { from: ['related_url', 'relation'], split: /\s* [|]\s*/ }, - 'remote_files' => { from: ['remote_files'], split: /\s*[|]\s*/ }, - 'rendering_ids' => { from: ['rendering_ids'], split: '\|', generated: true }, - 'resource_type' => { from: ['resource_type'], split: true }, - 'rights_holder' => { from: ['rights_holder'], split: '\|' }, - 'rights_notes' => { from: ['rights_notes'], split: true }, - 'rights_statement' => { from: ['rights', 'rights_statement'], split: '\|', generated: true }, - 'source' => { from: ['source'], split: true }, - 'state' => { from: ['state'], generated: true }, - 'subject' => { from: ['subject'], split: true }, - 'table_of_contents' => { from: ['table_of_contents'], split: '\|' }, - 'title' => { from: ['title'], split: /\s*[|]\s*/ }, - 'video_embed' => { from: ['video_embed'] } - } - - config.field_mappings["Bulkrax::BagitParser"] = default_field_mapping.merge({ - # add or remove custom mappings for this parser here - }) - - config.field_mappings["Bulkrax::CsvParser"] = default_field_mapping.merge({ - # add or remove custom mappings for this parser here - }) - - config.field_mappings["Bulkrax::OaiDcParser"] = default_field_mapping.merge({ - # add or remove custom mappings for this parser here - }) - - config.field_mappings["Bulkrax::OaiQualifiedDcParser"] = default_field_mapping.merge({ - # add or remove custom mappings for this parser here - }) - - config.field_mappings["Bulkrax::XmlParser"] = default_field_mapping.merge({ - # add or remove custom mappings for this parser here - }) + # NOTE: Bulkrax field mappings are configured on a per-tenant basis in the Account settings. + # The default set of field mappings that new tenants will be initialized with can be found + # and/or modified in config/application.rb (Hyku#default_bulkrax_field_mappings) + # @see config/application.rb + # @see app/models/concerns/account_settings.rb + # WARN: Modifying Bulkrax's field mappings in this file will not work as expected + # @see lib/bulkrax/bulkrax_decorator.rb # Because Hyku now uses and assumes Valkyrie to query the repository layer, we need to match the # object factory to use Valkyrie. diff --git a/lib/bulkrax/bulkrax_decorator.rb b/lib/bulkrax/bulkrax_decorator.rb index 5df4c161a..08536eefe 100644 --- a/lib/bulkrax/bulkrax_decorator.rb +++ b/lib/bulkrax/bulkrax_decorator.rb @@ -6,7 +6,7 @@ def field_mappings if Site.account.present? && Site.account.bulkrax_field_mappings.present? JSON.parse(Site.account.bulkrax_field_mappings).with_indifferent_access else - super + Hyku.default_bulkrax_field_mappings.presence || super end end end