From 51886b91027bda901d1be09f7efbb13c1c594307 Mon Sep 17 00:00:00 2001 From: Ry Biesemeyer Date: Mon, 6 Nov 2023 09:22:23 -0800 Subject: [PATCH] geoip: extract database manager to stand-alone feature (#15348) * geoip: extract database manager to stand-alone feature Introduces an Elastic-licensed GeoipDatabaseManagement tool that can be used by ANY plugin running on Elastic-licensed Logstash to retrieve a subscription to a GeoIP database that ensures EULA-compliance and frequent updates, and migrates the previous Elastic-licensed code-in-Logstash-core extension to the Geoip Filter to use this new tool, requiring ZERO changes to in-the-wild versions of the plugin. The implementation of the new tool follows the previous implementation as closely as possible, but presents a new interface that ensures that a consumer can ATOMICALLY subscribe to a database path without risk that the subscriber will receive an update or expiry before it is finished applying the initial value: ~~~ ruby geoip_manager = LogStash::GeoipDatabaseManagement::Manager.instance subscription = geoip_manager.subscribe('City') subscription.observe(construct: ->(initial_dbinfo){ }, on_update: ->(updated_dbinfo){ }, on_expire: ->( _ ){ }) subscription.release! ~~~ * docs: link in geoip database manager docs * docs: reorganize pending 'geoip database management' feature * docs: link to geoip pages from feature index * geoip: add SubscriptionObserver "interface" simplifies using Subscription#observe from Java * geoip: fixup SubscriptionObserver after rename * geoip: quacking like a SubscriptionObserver is enough * geoip: simplify constants of legacy geoip filter extension * geoip: bump logging level to debug for non-actionable log * geoip: refine log message to omit non-actionable info * re-enable invokedynamic (was disabled to avoid upstream bug) * geoip: resolve testing fall-out from filter extension's "private" constants removal * geoip: consistently use `DataPath#resolve` internally, too --- config/logstash.yml | 4 +- docs/index.asciidoc | 3 + .../static/geoip-database-management.asciidoc | 10 + .../configuring.asciidoc | 68 +++ .../geoip-database-management/index.asciidoc | 19 + .../metrics.asciidoc | 56 ++ ...eoip-database-management-settings.asciidoc | 26 + logstash-core/lib/logstash/agent.rb | 12 +- logstash-core/lib/logstash/util.rb | 13 + x-pack/lib/filters/geoip/database_manager.rb | 316 ++++------ x-pack/lib/filters/geoip/database_metadata.rb | 108 ---- x-pack/lib/filters/geoip/database_metric.rb | 73 --- x-pack/lib/filters/geoip/download_manager.rb | 153 ----- x-pack/lib/filters/geoip/util.rb | 62 -- .../geoip_database_management/constants.rb | 19 + .../geoip_database_management/data_path.rb | 28 + .../lib/geoip_database_management/db_info.rb | 36 ++ .../geoip_database_management/downloader.rb | 188 ++++++ .../extension.rb | 8 +- .../lib/geoip_database_management/manager.rb | 359 ++++++++++++ .../lib/geoip_database_management/metadata.rb | 124 ++++ .../lib/geoip_database_management/metric.rb | 67 +++ .../geoip_database_management/subscription.rb | 124 ++++ .../subscription_observer.rb | 105 ++++ x-pack/lib/geoip_database_management/util.rb | 37 ++ x-pack/lib/x-pack/logstash_registry.rb | 4 +- .../filters/geoip/database_manager_spec.rb | 539 +++++------------ .../filters/geoip/database_metadata_spec.rb | 223 ------- .../filters/geoip/download_manager_spec.rb | 244 -------- .../downloader_spec.rb | 287 +++++++++ .../fixtures/normal_resp.json | 0 .../fixtures/sample.tgz | Bin .../geoip_database_management/manager_spec.rb | 548 ++++++++++++++++++ .../metadata_spec.rb | 86 +++ .../geoip_database_management/spec_helper.rb | 14 + .../subscription_spec.rb | 223 +++++++ x-pack/spec/support/helpers.rb | 2 + 37 files changed, 2706 insertions(+), 1482 deletions(-) create mode 100644 docs/static/geoip-database-management.asciidoc create mode 100644 docs/static/geoip-database-management/configuring.asciidoc create mode 100644 docs/static/geoip-database-management/index.asciidoc create mode 100644 docs/static/geoip-database-management/metrics.asciidoc create mode 100644 docs/static/settings/geoip-database-management-settings.asciidoc delete mode 100644 x-pack/lib/filters/geoip/database_metadata.rb delete mode 100644 x-pack/lib/filters/geoip/database_metric.rb delete mode 100644 x-pack/lib/filters/geoip/download_manager.rb delete mode 100644 x-pack/lib/filters/geoip/util.rb create mode 100644 x-pack/lib/geoip_database_management/constants.rb create mode 100644 x-pack/lib/geoip_database_management/data_path.rb create mode 100644 x-pack/lib/geoip_database_management/db_info.rb create mode 100644 x-pack/lib/geoip_database_management/downloader.rb rename x-pack/lib/{filters/geoip => geoip_database_management}/extension.rb (64%) create mode 100644 x-pack/lib/geoip_database_management/manager.rb create mode 100644 x-pack/lib/geoip_database_management/metadata.rb create mode 100644 x-pack/lib/geoip_database_management/metric.rb create mode 100644 x-pack/lib/geoip_database_management/subscription.rb create mode 100644 x-pack/lib/geoip_database_management/subscription_observer.rb create mode 100644 x-pack/lib/geoip_database_management/util.rb delete mode 100644 x-pack/spec/filters/geoip/database_metadata_spec.rb delete mode 100644 x-pack/spec/filters/geoip/download_manager_spec.rb create mode 100644 x-pack/spec/geoip_database_management/downloader_spec.rb rename x-pack/spec/{filters/geoip => geoip_database_management}/fixtures/normal_resp.json (100%) rename x-pack/spec/{filters/geoip => geoip_database_management}/fixtures/sample.tgz (100%) create mode 100644 x-pack/spec/geoip_database_management/manager_spec.rb create mode 100644 x-pack/spec/geoip_database_management/metadata_spec.rb create mode 100644 x-pack/spec/geoip_database_management/spec_helper.rb create mode 100644 x-pack/spec/geoip_database_management/subscription_spec.rb diff --git a/config/logstash.yml b/config/logstash.yml index 439830955ee..54045407d70 100644 --- a/config/logstash.yml +++ b/config/logstash.yml @@ -379,7 +379,7 @@ #xpack.management.elasticsearch.sniffing: false #xpack.management.logstash.poll_interval: 5s -# X-Pack GeoIP plugin +# X-Pack GeoIP Database Management # https://www.elastic.co/guide/en/logstash/current/plugins-filters-geoip.html#plugins-filters-geoip-manage_update -#xpack.geoip.download.endpoint: "https://geoip.elastic.co/v1/database" #xpack.geoip.downloader.enabled: true +#xpack.geoip.downloader.endpoint: "https://geoip.elastic.co/v1/database" diff --git a/docs/index.asciidoc b/docs/index.asciidoc index ef4f1f7b86a..61cb37aebcf 100644 --- a/docs/index.asciidoc +++ b/docs/index.asciidoc @@ -142,6 +142,9 @@ include::static/transforming-data.asciidoc[] // Deploying & Scaling include::static/deploying.asciidoc[] +// GeoIP Database Management +include::static/geoip-database-management.asciidoc[] + // Troubleshooting performance include::static/performance-checklist.asciidoc[] diff --git a/docs/static/geoip-database-management.asciidoc b/docs/static/geoip-database-management.asciidoc new file mode 100644 index 00000000000..7928bf41ad5 --- /dev/null +++ b/docs/static/geoip-database-management.asciidoc @@ -0,0 +1,10 @@ +[[geoip-database-management]] +== Managing GeoIP Databases + +Logstash provides GeoIP database management features to make it easier for you to +use plugins that require an up-to-date database to enrich events with geographic data. + +- <> +- <> + +include::geoip-database-management/index.asciidoc[] diff --git a/docs/static/geoip-database-management/configuring.asciidoc b/docs/static/geoip-database-management/configuring.asciidoc new file mode 100644 index 00000000000..74bce91a055 --- /dev/null +++ b/docs/static/geoip-database-management/configuring.asciidoc @@ -0,0 +1,68 @@ +[role="xpack"] +[[configuring-geoip-database-management]] +=== Configure GeoIP Database Management + +To configure +<>: + +. Verify that you are using a license that includes the geoip database management +feature. ++ +-- +For more information, see https://www.elastic.co/subscriptions and +{kibana-ref}/managing-licenses.html[License management]. +-- + +. Specify +<> in the +`logstash.yml` file to tune the configuration as-needed. + +include::../settings/geoip-database-management-settings.asciidoc[] + +[[configuring-geoip-database-management-offline]] +==== Offline and air-gapped environments + +If Logstash does not have access to the internet, or if you want to disable the database manager, set the `xpack.geoip.downloader.enabled` value to `false` in `logstash.yml`. +When the database manager is disabled, plugins that require GeoIP lookups must be configured with their own source of GeoIP databases. + +===== Using an HTTP proxy + +If you can't connect directly to the Elastic GeoIP endpoint, consider setting up an HTTP proxy server. +You can then specify the proxy with `http_proxy` environment variable. + +[source,sh] +---- +export http_proxy="http://PROXY_IP:PROXY_PORT" +---- + +===== Using a custom endpoint + +If you work in an air-gapped environment and can't update your databases from the Elastic endpoint, +You can then download databases from MaxMind and bootstrap the service. + +. Download both `GeoLite2-ASN.mmdb` and `GeoLite2-City.mmdb` database files from the +http://dev.maxmind.com/geoip/geoip2/geolite2[MaxMind site]. + +. Copy both database files to a single directory. + +. https://www.elastic.co/downloads/elasticsearch[Download {es}]. + +. From your {es} directory, run: ++ +[source,sh] +---- +./bin/elasticsearch-geoip -s my/database/dir +---- + +. Serve the static database files from your directory. For example, you can use +Docker to serve the files from nginx server: ++ +[source,sh] +---- +docker run -p 8080:80 -v my/database/dir:/usr/share/nginx/html:ro nginx +---- + +. Specify the service's endpoint URL in Logstash using the +`xpack.geoip.download.endpoint=http://localhost:8080/overview.json` setting in `logstash.yml`. + +Logstash gets automatic updates from this service. diff --git a/docs/static/geoip-database-management/index.asciidoc b/docs/static/geoip-database-management/index.asciidoc new file mode 100644 index 00000000000..cb2ab60e695 --- /dev/null +++ b/docs/static/geoip-database-management/index.asciidoc @@ -0,0 +1,19 @@ +[role="xpack"] +[[logstash-geoip-database-management]] +=== GeoIP Database Management + +Logstash provides a mechanism for provisioning and maintaining GeoIP databases, which plugins can use to ensure that they have access to an always-up-to-date and EULA-compliant database for geo enrichment. +This mechanism requires internet access or a network route to an Elastic GeoIP database service. + +If the database manager is enabled in `logstash.yml` (as it is by default), a plugin may subscribe to a database, triggering a download if a valid database is not already available. +Logstash checks for updates every day. +When an updated database is discovered, it is downloaded in the background and made available to the plugins that rely on it. + +The GeoIP databases are separately-licensed from MaxMind under the terms of an End User License Agreement, which prohibits a database from being used after an update has been available for more than 30 days. +When Logstash cannot reach the database service for 30 days or more to validate that a managed database is up-to-date, that database is deleted and made unavailable to the plugins that subscribed to it. + +NOTE: GeoIP database management is a licensed feature of Logstash, and is only available in the Elastic-licensed complete distribution of Logstash. + +include::metrics.asciidoc[] + +include::configuring.asciidoc[] diff --git a/docs/static/geoip-database-management/metrics.asciidoc b/docs/static/geoip-database-management/metrics.asciidoc new file mode 100644 index 00000000000..2b4b5ef89b7 --- /dev/null +++ b/docs/static/geoip-database-management/metrics.asciidoc @@ -0,0 +1,56 @@ + +[[logstash-geoip-database-management-metrics]] +==== Database Metrics + +You can monitor the managed database's status through the <>. + +The following request returns a JSON document containing database manager stats, +including: + +* database status and freshness +** `geoip_download_manager.database.*.status` +*** `init` : initial CC database status +*** `up_to_date` : using up-to-date EULA database +*** `to_be_expired` : 25 days without calling service +*** `expired` : 30 days without calling service +** `fail_check_in_days` : number of days Logstash fails to call service since the last success +* info about download successes and failures +** `geoip_download_manager.download_stats.successes` number of successful checks and downloads +** `geoip_download_manager.download_stats.failures` number of failed check or download +** `geoip_download_manager.download_stats.status` +*** `updating` : check and download at the moment +*** `succeeded` : last download succeed +*** `failed` : last download failed + +[source,js] +-------------------------------------------------- +curl -XGET 'localhost:9600/_node/stats/geoip_download_manager?pretty' +-------------------------------------------------- + +Example response: + +[source,js] +-------------------------------------------------- +{ + "geoip_download_manager" : { + "database" : { + "ASN" : { + "status" : "up_to_date", + "fail_check_in_days" : 0, + "last_updated_at": "2021-06-21T16:06:54+02:00" + }, + "City" : { + "status" : "up_to_date", + "fail_check_in_days" : 0, + "last_updated_at": "2021-06-21T16:06:54+02:00" + } + }, + "download_stats" : { + "successes" : 15, + "failures" : 1, + "last_checked_at" : "2021-06-21T16:07:03+02:00", + "status" : "succeeded" + } + } +} +-------------------------------------------------- diff --git a/docs/static/settings/geoip-database-management-settings.asciidoc b/docs/static/settings/geoip-database-management-settings.asciidoc new file mode 100644 index 00000000000..a0d6730cfad --- /dev/null +++ b/docs/static/settings/geoip-database-management-settings.asciidoc @@ -0,0 +1,26 @@ +[role="xpack"] +[[geoip-database-management-settings]] +==== GeoIP database Management settings in {ls} +++++ +GeoIP Database Management Settings +++++ + +You can set the following `xpack.geoip` settings in `logstash.yml` to configure the <>. +For more information about configuring Logstash, see <>. + +`xpack.geoip.downloader.enabled`:: + +(Boolean) If `true`, Logstash automatically downloads and manages updates for GeoIP2 databases from the `xpack.geoip.downloader.endpoint`. +If `false`, Logstash does not manage GeoIP2 databases and plugins that need a GeoIP2 database must be configured to provide their own. + +`xpack.geoip.downloader.endpoint`:: + +(String) Endpoint URL used to download updates for GeoIP2 databases. +For example, `https://mydomain.com/overview.json`. +Defaults to `https://geoip.elastic.co/v1/database`. +Note that Logstash will periodically make a GET request to `${xpack.geoip.downloader.endpoint}?elastic_geoip_service_tos=agree`, expecting the list of metadata about databases typically found in `overview.json`. + +`xpack.geoip.downloader.poll.interval`:: +(Time Value) How often Logstash checks for GeoIP2 database updates at the `xpack.geoip.downloader.endpoint`. +For example, `6h` to check every six hours. +Defaults to `24h` (24 hours). \ No newline at end of file diff --git a/logstash-core/lib/logstash/agent.rb b/logstash-core/lib/logstash/agent.rb index 6cdb2b0a694..98b16f371b1 100644 --- a/logstash-core/lib/logstash/agent.rb +++ b/logstash-core/lib/logstash/agent.rb @@ -609,9 +609,9 @@ def update_successful_reload_metrics(action, action_result) def initialize_geoip_database_metrics(metric) begin - relative_path = ::File.join(LogStash::Environment::LOGSTASH_HOME, "x-pack", "lib", "filters", "geoip") - require_relative ::File.join(relative_path, "database_manager") - require_relative ::File.join(relative_path, "database_metric") + relative_path = ::File.join(LogStash::Environment::LOGSTASH_HOME, "x-pack", "lib", "geoip_database_management") + require_relative ::File.join(relative_path, "manager") + require_relative ::File.join(relative_path, "metric") geoip_metric = metric.namespace([:geoip_download_manager]).tap do |n| db = n.namespace([:database]) @@ -629,11 +629,11 @@ def initialize_geoip_database_metrics(metric) dl.gauge(:status, nil) end - database_metric = LogStash::Filters::Geoip::DatabaseMetric.new(geoip_metric) - database_manager = LogStash::Filters::Geoip::DatabaseManager.instance + database_metric = LogStash::GeoipDatabaseManagement::Metric.new(geoip_metric) + database_manager = LogStash::GeoipDatabaseManagement::Manager.instance database_manager.database_metric = database_metric rescue LoadError => e - @logger.trace("DatabaseManager is not in classpath") + @logger.trace("DatabaseManager is not in classpath", exception: e.message, backtrace: e.backtrace) end end end # class LogStash::Agent diff --git a/logstash-core/lib/logstash/util.rb b/logstash-core/lib/logstash/util.rb index cae2c62a893..9eac95e1a27 100644 --- a/logstash-core/lib/logstash/util.rb +++ b/logstash-core/lib/logstash/util.rb @@ -51,6 +51,19 @@ def self.set_thread_plugin(plugin) Thread.current[:plugin] = plugin end + def self.with_logging_thread_context(override_context) + java_import org.apache.logging.log4j.ThreadContext + + backup = ThreadContext.getImmutableContext() + ThreadContext.putAll(override_context) + + yield + + ensure + ThreadContext.removeAll(override_context.keys) + ThreadContext.putAll(backup) + end + def self.thread_info(thread) # When the `thread` is dead, `Thread#backtrace` returns `nil`; fall back to an empty array. backtrace = (thread.backtrace || []).map do |line| diff --git a/x-pack/lib/filters/geoip/database_manager.rb b/x-pack/lib/filters/geoip/database_manager.rb index 7ef4751e2e9..9e8f7605f53 100644 --- a/x-pack/lib/filters/geoip/database_manager.rb +++ b/x-pack/lib/filters/geoip/database_manager.rb @@ -3,10 +3,6 @@ # you may not use this file except in compliance with the Elastic License. require "logstash/util/loggable" -require_relative "util" -require_relative "database_metadata" -require_relative "download_manager" -require_relative "database_metric" require "json" require "stud/try" require "singleton" @@ -14,6 +10,8 @@ require "concurrent/timer_task" require "thread" +require "geoip_database_management/manager" + # The mission of DatabaseManager is to ensure the plugin running an up-to-date MaxMind database and # thus users are compliant with EULA. # DatabaseManager does a daily checking by calling an endpoint to notice a version update. @@ -26,255 +24,100 @@ # while `offline` is for static database path provided by users module LogStash module Filters module Geoip class DatabaseManager - extend LogStash::Filters::Geoip::Util include LogStash::Util::Loggable - include LogStash::Filters::Geoip::Util include Singleton + CC_DB_TYPES = %w(City ASN).map(&:freeze).freeze + java_import org.apache.logging.log4j.ThreadContext private def initialize @triggered = false - @trigger_lock = Mutex.new - @download_interval = 24 * 60 * 60 # 24h - end - - def setup - prepare_cc_db - cc_city_database_path = get_db_path(CITY, CC) - cc_asn_database_path = get_db_path(ASN, CC) - - prepare_metadata - city_database_path = @metadata.database_path(CITY) - asn_database_path = @metadata.database_path(ASN) - - @states = { "#{CITY}" => DatabaseState.new(@metadata.is_eula(CITY), - Concurrent::Array.new, - city_database_path, - cc_city_database_path), - "#{ASN}" => DatabaseState.new(@metadata.is_eula(ASN), - Concurrent::Array.new, - asn_database_path, - cc_asn_database_path) } + @trigger_lock = Monitor.new - @download_manager = DownloadManager.new(@metadata) + @data_dir_path = ::File.join(LogStash::SETTINGS.get_value("path.data"), "plugins", "filters", "geoip") + end - database_metric.initialize_metrics(@metadata.get_all, @states) + def eula_manager + @eula_manager ||= LogStash::GeoipDatabaseManagement::Manager.instance end - protected - # create data dir, path.data, for geoip if it doesn't exist - # copy CC databases to data dir - def prepare_cc_db - FileUtils::mkdir_p(get_data_dir_path) - unless ::File.exist?(get_db_path(CITY, CC)) && ::File.exist?(get_db_path(ASN, CC)) - cc_database_paths = ::Dir.glob(::File.join(LogStash::Environment::LOGSTASH_HOME, "vendor", "**", "{GeoLite2-ASN,GeoLite2-City}.mmdb")) - cc_dir_path = get_dir_path(CC) - FileUtils.mkdir_p(cc_dir_path) - FileUtils.cp_r(cc_database_paths, cc_dir_path) + def setup + @eula_subscriptions = eula_manager.supported_database_types.each_with_object({}) do |database_type, memo| + memo[database_type] = eula_manager.subscribe_database_path(database_type).observe( + construct: -> (initial_db_info) { create!(database_type, initial_db_info) }, + on_update: -> (updated_db_info) { update!(database_type, updated_db_info) }, + on_expire: -> ( ) { expire!(database_type) } + ) end end - def prepare_metadata - @metadata = DatabaseMetadata.new - - unless @metadata.exist? - @metadata.save_metadata(CITY, CC, false) - @metadata.save_metadata(ASN, CC, false) + def trigger_download + return if @triggered + @trigger_lock.synchronize do + setup if @eula_subscriptions.nil? + @triggered = true end - - # reset md5 to allow re-download when the database directory is deleted manually - DB_TYPES.each { |type| @metadata.reset_md5(type) if @metadata.database_path(type).nil? } - - @metadata end - # notice plugins to use the new database path - # update metadata timestamp for those dbs that has no update or a valid update - # do daily check and clean up - def execute_download_job - success_cnt = 0 - - begin - pipeline_id = ThreadContext.get("pipeline.id") - ThreadContext.put("pipeline.id", nil) - - database_metric.set_download_status_updating - - updated_db = @download_manager.fetch_database - updated_db.each do |database_type, valid_download, dirname, new_database_path| - if valid_download - @metadata.save_metadata(database_type, dirname, true) - @states[database_type].is_eula = true - @states[database_type].is_expired = false - @states[database_type].database_path = new_database_path + protected + # resolve vendored databases... + def prepare_cc_db + geoip_filter_plugin_path = Gem.loaded_specs['logstash-filter-geoip']&.full_gem_path or fail("geoip filter plugin library not found") + vendored_cc_licensed_dbs = ::File.expand_path('vendor', geoip_filter_plugin_path) - notify_plugins(database_type, :update, new_database_path) do |db_type, ids| - logger.info("geoip plugin will use database #{new_database_path}", - :database_type => db_type, :pipeline_ids => ids) unless ids.empty? - end + @cc_dbs = CC_DB_TYPES.each_with_object({}) do |database_type, memo| + database_filename = "GeoLite2-#{database_type}.mmdb" + vendored_database_path = ::File.expand_path(database_filename, vendored_cc_licensed_dbs) + fail("vendored #{database_type} database not present in #{vendored_cc_licensed_dbs}") unless ::File::exists?(vendored_database_path) - success_cnt += 1 - end - end + cc_dir_path = ::File.expand_path("CC", @data_dir_path) + FileUtils.mkdir_p(cc_dir_path) + FileUtils.cp_r(vendored_database_path, cc_dir_path) - updated_types = updated_db.map { |database_type, valid_download, dirname, new_database_path| database_type } - (DB_TYPES - updated_types).each do |unchange_type| - @metadata.update_timestamp(unchange_type) - success_cnt += 1 - end - rescue => e - logger.error(e.message, error_details(e, logger)) - ensure - check_age - clean_up_database(@metadata.dirnames) - database_metric.update_download_stats(success_cnt) - - ThreadContext.put("pipeline.id", pipeline_id) + memo[database_type] = ::File.expand_path(database_filename, cc_dir_path) end + logger.info("CC-licensed GeoIP databases are prepared for use by the GeoIP filter: #{@cc_dbs}") + rescue => e + fail "CC-licensed GeoIP databases could not be loaded: #{e}" end def notify_plugins(database_type, action, *args) plugins = @states[database_type].plugins.dup - ids = plugins.map { |plugin| plugin.execution_context.pipeline_id }.sort + ids = plugins.map { |plugin| plugin.execution_context.pipeline_id }.sort.uniq yield database_type, ids plugins.each { |plugin| plugin.update_filter(action, *args) if plugin } end - # call expiry action if Logstash use EULA database and fail to touch the endpoint for 30 days in a row - def check_age(database_types = DB_TYPES) - database_types.map do |database_type| - next unless @states[database_type].is_eula - - metadata = @metadata.get_metadata(database_type).last - check_at = metadata[DatabaseMetadata::Column::CHECK_AT].to_i - days_without_update = time_diff_in_days(check_at) - - case - when days_without_update >= 30 - was_expired = @states[database_type].is_expired - @states[database_type].is_expired = true - @states[database_type].database_path = nil - - notify_plugins(database_type, :expire) do |db_type, ids| - unless was_expired - logger.error("The MaxMind database hasn't been updated from last 30 days. Logstash is unable to get newer version from internet. "\ - "According to EULA, GeoIP plugin needs to stop using MaxMind database in order to be compliant. "\ - "Please check the network settings and allow Logstash accesses the internet to download the latest database, "\ - "or switch to offline mode (:database => PATH_TO_YOUR_DATABASE) to use a self-managed database "\ - "which you can download from https://dev.maxmind.com/geoip/geoip2/geolite2/ ") - - logger.warn("geoip plugin will stop filtering and will tag all events with the '_geoip_expired_database' tag.", - :database_type => db_type, :pipeline_ids => ids) - end - end - - database_status = DatabaseMetric::DATABASE_EXPIRED - when days_without_update >= 25 - logger.warn("The MaxMind database hasn't been updated for last #{days_without_update} days. "\ - "Logstash will fail the GeoIP plugin in #{30 - days_without_update} days. "\ - "Please check the network settings and allow Logstash accesses the internet to download the latest database ") - database_status = DatabaseMetric::DATABASE_TO_BE_EXPIRED - else - logger.trace("passed age check", :days_without_update => days_without_update) - database_status = DatabaseMetric::DATABASE_UP_TO_DATE - end - - database_metric.update_database_status(database_type, database_status, metadata, days_without_update) - end - end - - # Clean up directories which are not mentioned in the excluded_dirnames and not CC database - def clean_up_database(excluded_dirnames = []) - protected_dirnames = (excluded_dirnames + [CC]).uniq - existing_dirnames = ::Dir.children(get_data_dir_path) - .select { |f| ::File.directory? ::File.join(get_data_dir_path, f) } - - (existing_dirnames - protected_dirnames).each do |dirname| - dir_path = get_dir_path(dirname) - FileUtils.rm_r(dir_path) - logger.info("#{dir_path} is deleted") - end - end - - def trigger_download - return if @triggered - @trigger_lock.synchronize do - return if @triggered - setup - execute_download_job - # check database update periodically: - - @download_task = Concurrent::TimerTask.execute(execution_interval: @download_interval) do - LogStash::Util.set_thread_name 'geoip database download task' - database_update_check # every 24h - end - @triggered = true - end - end - def trigger_cc_database_fallback - return if @triggered - @trigger_lock.synchronize do - return if @triggered - - logger.info "The MaxMind EULA requires users to update the GeoIP databases within 30 days following the release of the update. " \ - "By setting `xpack.geoip.downloader.enabled` value in logstash.yml to `false`, any previously downloaded version of the database " \ - "are destroyed and replaced by the MaxMind Creative Commons license database." - + return if @cc_dbs setup_cc_database - @triggered = true end end def setup_cc_database prepare_cc_db - delete_eula_databases - DatabaseMetadata.new.delete - end - - def delete_eula_databases - begin - clean_up_database - rescue => e - details = error_details(e, logger) - details[:databases_path] = get_data_dir_path - logger.error "Failed to delete existing MaxMind EULA databases. To be compliant with the MaxMind EULA, you must "\ - "manually destroy any downloaded version of the EULA databases.", details - end - end - - def database_auto_update? - LogStash::SETTINGS.get("xpack.geoip.downloader.enabled") end public - # @note this method is expected to execute on a separate thread - def database_update_check - logger.debug "running database update check" - ThreadContext.put("pipeline.id", nil) - execute_download_job - end - private :database_update_check - def subscribe_database_path(database_type, database_path, geoip_plugin) if database_path.nil? - if database_auto_update? + if eula_manager.enabled? trigger_download logger.info "By not manually configuring a database path with `database =>`, you accepted and agreed MaxMind EULA. "\ - "For more details please visit https://www.maxmind.com/en/geolite2/eula" if @states[database_type].is_eula + "For more details please visit https://www.maxmind.com/en/geolite2/eula" - @states[database_type].plugins.push(geoip_plugin) unless @states[database_type].plugins.member?(geoip_plugin) + @states[database_type].plugins.add(geoip_plugin) @trigger_lock.synchronize do - @states[database_type].database_path + @states.fetch(database_type).database_path end else trigger_cc_database_fallback - get_db_path(database_type, CC) + @cc_dbs.fetch(database_type) end else logger.info "GeoIP database path is configured manually so the plugin will not check for update. "\ @@ -292,29 +135,70 @@ def database_path(database_type) @states[database_type].database_path end - def database_metric=(database_metric) - @database_metric = database_metric + def update!(database_type, updated_db_info) + new_database_path = updated_db_info.path + notify_plugins(database_type, :update, new_database_path) do |db_type, ids| + logger.info("geoip filter plugin will use database #{new_database_path}", + :database_type => db_type, :pipeline_ids => ids) unless ids.empty? + end + end + + def expire!(database_type) + notify_plugins(database_type, :expire) do |db_type, ids| + logger.warn("geoip filter plugin will stop filtering and will tag all events with the '_geoip_expired_database' tag.", + :database_type => db_type, :pipeline_ids => ids) + end + end + + def create!(database_type, initial_db_info) + @trigger_lock.synchronize do + @states ||= {} + + if initial_db_info.pending? + trigger_cc_database_fallback + effective_database_path, is_eula = @cc_dbs.fetch(database_type), false + else + effective_database_path, is_eula = initial_db_info.path, true + end + + @states[database_type] = DatabaseState.new(effective_database_path, is_eula) + end + end + + ## + # @api testing + def subscribed_plugins_count(database_type) + @states&.dig(database_type)&.plugins&.size || 0 + end + protected :subscribed_plugins_count + + ## + # @api testing + def eula_subscribed? + @eula_subscriptions&.any? end + protected :eula_subscribed? - def database_metric - logger.debug("DatabaseMetric is nil. No geoip metrics are available. Please report the bug") if @database_metric.nil? - @database_metric ||= LogStash::Filters::Geoip::DatabaseMetric.new(LogStash::Instrument::NamespacedNullMetric.new) + ## + # @api testing + def eula_subscription(database_type) + @eula_subscriptions&.dig(database_type) end + protected :eula_subscription + ## + # @api internal class DatabaseState - attr_reader :is_eula, :plugins, :database_path, :cc_database_path, :is_expired - attr_writer :is_eula, :database_path, :is_expired + attr_reader :plugins + attr_accessor :database_path + attr_reader :is_eula + # @param initial_database_path [String] # @param is_eula [Boolean] - # @param plugins [Concurrent::Array] - # @param database_path [String] - # @param cc_database_path [String] - def initialize(is_eula, plugins, database_path, cc_database_path) + def initialize(initial_database_path, is_eula) + @plugins = Concurrent::Set.new + @database_path = initial_database_path @is_eula = is_eula - @plugins = plugins - @database_path = database_path - @cc_database_path = cc_database_path - @is_expired = false end end end end end end diff --git a/x-pack/lib/filters/geoip/database_metadata.rb b/x-pack/lib/filters/geoip/database_metadata.rb deleted file mode 100644 index 5a6a5527005..00000000000 --- a/x-pack/lib/filters/geoip/database_metadata.rb +++ /dev/null @@ -1,108 +0,0 @@ -# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one -# or more contributor license agreements. Licensed under the Elastic License; -# you may not use this file except in compliance with the Elastic License. - -require "logstash/util/loggable" -require_relative "util" -require "csv" -require "date" - -module LogStash module Filters module Geoip class DatabaseMetadata - include LogStash::Util::Loggable - include LogStash::Filters::Geoip::Util - - def initialize - @metadata_path = ::File.join(get_data_dir_path, "metadata.csv") - end - - public - - # csv format: database_type, check_at, gz_md5, dirname, is_eula - def save_metadata(database_type, dirname, is_eula) - metadata = get_metadata(database_type, false) - metadata << [database_type, Time.now.to_i, md5(get_gz_path(database_type, dirname)), - dirname, is_eula] - update(metadata) - end - - def update_timestamp(database_type) - update_each_row do |row| - row[Column::CHECK_AT] = Time.now.to_i if row[Column::DATABASE_TYPE].eql?(database_type) - row - end - end - - def reset_md5(database_type) - update_each_row do |row| - row[Column::GZ_MD5] = "" if row[Column::DATABASE_TYPE].eql?(database_type) - row - end - end - - def update_each_row(&block) - metadata = get_all.map do |row| - yield row - end - update(metadata) - end - - def update(metadata) - metadata = metadata.sort_by { |row| row[Column::DATABASE_TYPE] } - ::CSV.open @metadata_path, 'w' do |csv| - metadata.each { |row| csv << row } - end - logger.trace("metadata updated", :metadata => metadata) - end - - def get_all - file_exist?(@metadata_path) ? ::CSV.read(@metadata_path, headers: false) : Array.new - end - - # Give rows of metadata that match/exclude the type - def get_metadata(database_type, match = true) - get_all.select { |row| row[Column::DATABASE_TYPE].eql?(database_type) == match } - end - - # Return a valid database path - def database_path(database_type) - get_metadata(database_type).map { |metadata| get_db_path(database_type, metadata[Column::DIRNAME]) } - .select { |path| file_exist?(path) } - .last - end - - def gz_md5(database_type) - get_metadata(database_type).map { |metadata| metadata[Column::GZ_MD5] } - .last || '' - end - - def check_at(database_type) - (get_metadata(database_type).map { |metadata| metadata[Column::CHECK_AT] } - .last || 0).to_i - end - - def is_eula(database_type) - (get_metadata(database_type).map { |metadata| metadata[Column::IS_EULA] } - .last || 'false') == 'true' - end - - # Return all dirname - def dirnames - get_all.map { |metadata| metadata[Column::DIRNAME] } - end - - def exist? - file_exist?(@metadata_path) - end - - def delete - ::File.delete(@metadata_path) if exist? - end - - class Column - DATABASE_TYPE = 0 - CHECK_AT = 1 - GZ_MD5 = 2 - DIRNAME = 3 - IS_EULA = 4 - end -end end end end diff --git a/x-pack/lib/filters/geoip/database_metric.rb b/x-pack/lib/filters/geoip/database_metric.rb deleted file mode 100644 index 73f3f320a55..00000000000 --- a/x-pack/lib/filters/geoip/database_metric.rb +++ /dev/null @@ -1,73 +0,0 @@ -# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one -# or more contributor license agreements. Licensed under the Elastic License; -# you may not use this file except in compliance with the Elastic License. - -require "logstash/util/loggable" -require_relative "util" -require_relative "database_metadata" -require_relative "download_manager" -require "date" -require "time" - -module LogStash module Filters module Geoip class DatabaseMetric - include LogStash::Util::Loggable - include LogStash::Filters::Geoip::Util - - DATABASE_INIT = "init".freeze - DATABASE_UP_TO_DATE = "up_to_date".freeze - DATABASE_TO_BE_EXPIRED = "to_be_expired".freeze - DATABASE_EXPIRED = "expired".freeze - - DOWNLOAD_SUCCEEDED = "succeeded".freeze - DOWNLOAD_FAILED = "failed".freeze - DOWNLOAD_UPDATING = "updating".freeze - - def initialize(metric) - # Fallback when testing plugin and no metric collector are correctly configured. - @metric = metric || LogStash::Instrument::NamespacedNullMetric.new - end - - def initialize_metrics(metadatas, states) - metadatas.each do |row| - type = row[DatabaseMetadata::Column::DATABASE_TYPE] - @metric.namespace([:database, type.to_sym]).tap do |n| - n.gauge(:status, states[type].is_eula ? DATABASE_UP_TO_DATE : DATABASE_INIT) - if states[type].is_eula - n.gauge(:last_updated_at, unix_time_to_iso8601(row[DatabaseMetadata::Column::DIRNAME])) - n.gauge(:fail_check_in_days, time_diff_in_days(row[DatabaseMetadata::Column::CHECK_AT])) - end - end - end - - @metric.namespace([:download_stats]).tap do |n| - check_at = metadatas.map { |row| row[DatabaseMetadata::Column::CHECK_AT].to_i }.max - n.gauge(:last_checked_at, unix_time_to_iso8601(check_at)) - end - end - - def update_download_stats(success_cnt) - @metric.namespace([:download_stats]).tap do |n| - n.gauge(:last_checked_at, Time.now.iso8601) - - if success_cnt == DB_TYPES.size - n.increment(:successes, 1) - n.gauge(:status, DOWNLOAD_SUCCEEDED) - else - n.increment(:failures, 1) - n.gauge(:status, DOWNLOAD_FAILED) - end - end - end - - def set_download_status_updating - @metric.namespace([:download_stats]).gauge(:status, DOWNLOAD_UPDATING) - end - - def update_database_status(database_type, database_status, metadata, days_without_update) - @metric.namespace([:database, database_type.to_sym]).tap do |n| - n.gauge(:status, database_status) - n.gauge(:last_updated_at, unix_time_to_iso8601(metadata[DatabaseMetadata::Column::DIRNAME])) - n.gauge(:fail_check_in_days, days_without_update) - end - end -end end end end diff --git a/x-pack/lib/filters/geoip/download_manager.rb b/x-pack/lib/filters/geoip/download_manager.rb deleted file mode 100644 index 938888b476e..00000000000 --- a/x-pack/lib/filters/geoip/download_manager.rb +++ /dev/null @@ -1,153 +0,0 @@ -# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one -# or more contributor license agreements. Licensed under the Elastic License; -# you may not use this file except in compliance with the Elastic License. - -require_relative '../../../../lib/bootstrap/util/compress' -require "logstash/util/loggable" -require_relative "util" -require_relative "database_metadata" -require "logstash-filter-geoip_jars" -require "json" -require "zlib" -require "stud/try" -require "down" -require "fileutils" -require 'uri' - -module LogStash module Filters module Geoip class DownloadManager - include LogStash::Util::Loggable - include LogStash::Filters::Geoip::Util - - def initialize(metadata) - @metadata = metadata - end - - GEOIP_HOST = "https://geoip.elastic.co".freeze - GEOIP_PATH = "/v1/database".freeze - GEOIP_ENDPOINT = "#{GEOIP_HOST}#{GEOIP_PATH}".freeze - - class BadResponseCodeError < Error - attr_reader :response_code, :response_body - - def initialize(response_code, response_body) - @response_code = response_code - @response_body = response_body - end - - def message - "GeoIP service response code '#{response_code}', body '#{response_body}'" - end - end - - public - # Check available update and download them. Unzip and validate the file. - # if the download failed, valid_download return false - # return Array of [database_type, valid_download, dirname, new_database_path] - def fetch_database - dirname = Time.now.to_i.to_s - check_update - .map do |database_type, db_info| - begin - new_zip_path = download_database(database_type, dirname, db_info) - new_database_path = unzip(database_type, dirname, new_zip_path) - assert_database!(new_database_path) - [database_type, true, dirname, new_database_path] - rescue => e - logger.error(e.message, error_details(e, logger)) - [database_type, false, nil, nil] - end - end - end - - private - # Call infra endpoint to get md5 of latest databases and verify with metadata - # return Array of new database information [database_type, db_info] - def check_update - res = rest_client.get(service_endpoint) - logger.debug("check update", :endpoint => service_endpoint.to_s, :response => res.code) - - if res.code < 200 || res.code > 299 - raise BadResponseCodeError.new(res.code, res.body) - end - - service_resp = JSON.parse(res.body) - - updated_db = DB_TYPES.map do |database_type| - db_info = service_resp.find { |db| db['name'].eql?("#{GEOLITE}#{database_type}.#{GZ_EXT}") } - has_update = @metadata.gz_md5(database_type) != db_info['md5_hash'] - [database_type, has_update, db_info] - end - .select { |database_type, has_update, db_info| has_update } - .map { |database_type, has_update, db_info| [database_type, db_info] } - - logger.info "new database version detected? #{!updated_db.empty?}" - - updated_db - end - - def download_database(database_type, dirname, db_info) - Stud.try(3.times) do - FileUtils.mkdir_p(get_dir_path(dirname)) - zip_path = get_gz_path(database_type, dirname) - - actual_url = download_url(db_info['url']) - logger.debug? && logger.debug("download #{actual_url}") - - options = { destination: zip_path } - options.merge!({proxy: ENV['http_proxy']}) if ENV.include?('http_proxy') - Down.download(actual_url, **options) - raise "the new download has wrong checksum" if md5(zip_path) != db_info['md5_hash'] - - logger.debug("new database downloaded in ", :path => zip_path) - zip_path - end - end - - # extract all files and folders from .tgz to path.data directory - # return dirname [String], new_database_path [String] - def unzip(database_type, dirname, zip_path) - temp_path = ::File.join(get_dir_path(dirname), database_type) - LogStash::Util::Tar.extract(zip_path, temp_path) - FileUtils.cp_r(::File.join(temp_path, '.'), get_dir_path(dirname)) - FileUtils.rm_r(temp_path) - - get_db_path(database_type, dirname) - end - - # Make sure the path has usable database - def assert_database!(database_path) - raise "failed to load database #{database_path}" unless org.logstash.filters.geoip.GeoIPFilter.database_valid?(database_path) - end - - def rest_client - @client ||= begin - client_options = { - request_timeout: 15, - connect_timeout: 5 - } - client_options[:proxy] = ENV['http_proxy'] if ENV.include?('http_proxy') - Manticore::Client.new(client_options) - end - end - - def uuid - @uuid ||= ::File.read(::File.join(LogStash::SETTINGS.get("path.data"), "uuid")) - end - - def service_endpoint - return @service_endpoint if @service_endpoint - - uri = URI(LogStash::SETTINGS.get("xpack.geoip.download.endpoint") || GEOIP_ENDPOINT) - uri.query = "key=#{uuid}&elastic_geoip_service_tos=agree" - @service_endpoint = uri - end - - def download_url(url) - uri = URI(url) - return url if uri.scheme - - download_uri = service_endpoint.dup - download_uri.path = "/#{url}" - download_uri.to_s - end -end end end end diff --git a/x-pack/lib/filters/geoip/util.rb b/x-pack/lib/filters/geoip/util.rb deleted file mode 100644 index e7a3eb14feb..00000000000 --- a/x-pack/lib/filters/geoip/util.rb +++ /dev/null @@ -1,62 +0,0 @@ -# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one -# or more contributor license agreements. Licensed under the Elastic License; -# you may not use this file except in compliance with the Elastic License. - -require "digest" -require "date" -require "time" - -module LogStash module Filters - module Geoip - GZ_EXT = 'tgz'.freeze - DB_EXT = 'mmdb'.freeze - GEOLITE = 'GeoLite2-'.freeze - CITY = "City".freeze - ASN = "ASN".freeze - DB_TYPES = [ASN, CITY].freeze - CITY_DB_NAME = "#{GEOLITE}#{CITY}.#{DB_EXT}".freeze - ASN_DB_NAME = "#{GEOLITE}#{ASN}.#{DB_EXT}".freeze - DEFAULT_DB_NAMES = [CITY_DB_NAME, ASN_DB_NAME].freeze - CC = "CC".freeze - - module Util - def get_db_path(database_type, dirname) - ::File.join(get_data_dir_path, dirname, "#{GEOLITE}#{database_type}.#{DB_EXT}") - end - - def get_gz_path(database_type, dirname) - ::File.join(get_data_dir_path, dirname, "#{GEOLITE}#{database_type}.#{GZ_EXT}") - end - - def get_dir_path(dirname) - ::File.join(get_data_dir_path, dirname) - end - - def get_data_dir_path - ::File.join(LogStash::SETTINGS.get_value("path.data"), "plugins", "filters", "geoip") - end - - def file_exist?(path) - !path.nil? && ::File.exist?(path) && !::File.empty?(path) - end - - def md5(file_path) - file_exist?(file_path) ? Digest::MD5.hexdigest(::File.read(file_path)) : "" - end - - def error_details(e, logger) - error_details = { :cause => e.cause } - error_details[:backtrace] = e.backtrace if logger.debug? - error_details - end - - def time_diff_in_days(timestamp) - (::Date.today - ::Time.at(timestamp.to_i).to_date).to_i - end - - def unix_time_to_iso8601(timestamp) - Time.at(timestamp.to_i).iso8601 - end - end - end -end end diff --git a/x-pack/lib/geoip_database_management/constants.rb b/x-pack/lib/geoip_database_management/constants.rb new file mode 100644 index 00000000000..cf0c0d2d96d --- /dev/null +++ b/x-pack/lib/geoip_database_management/constants.rb @@ -0,0 +1,19 @@ +# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +# or more contributor license agreements. Licensed under the Elastic License; +# you may not use this file except in compliance with the Elastic License. + +module LogStash module GeoipDatabaseManagement + module Constants + GZ_EXT = 'tgz'.freeze + DB_EXT = 'mmdb'.freeze + + GEOLITE = 'GeoLite2-'.freeze + CITY = "City".freeze + ASN = "ASN".freeze + DB_TYPES = [ASN, CITY].freeze + + CITY_DB_NAME = "#{GEOLITE}#{CITY}.#{DB_EXT}".freeze + ASN_DB_NAME = "#{GEOLITE}#{ASN}.#{DB_EXT}".freeze + DEFAULT_DB_NAMES = [CITY_DB_NAME, ASN_DB_NAME].freeze + end +end end \ No newline at end of file diff --git a/x-pack/lib/geoip_database_management/data_path.rb b/x-pack/lib/geoip_database_management/data_path.rb new file mode 100644 index 00000000000..8a10d0321c9 --- /dev/null +++ b/x-pack/lib/geoip_database_management/data_path.rb @@ -0,0 +1,28 @@ +# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +# or more contributor license agreements. Licensed under the Elastic License; +# you may not use this file except in compliance with the Elastic License. + +module LogStash module GeoipDatabaseManagement + class DataPath + include GeoipDatabaseManagement::Constants + + def initialize(root) + @root = ::File::expand_path(root).freeze + end + + attr_reader :root + + def gz(database_type, dirname) + resolve(dirname, "#{GEOLITE}#{database_type}.#{GZ_EXT}") + end + + def db(database_type, dirname) + resolve(dirname, "#{GEOLITE}#{database_type}.#{DB_EXT}") + end + + def resolve(relative_path, *more) + ::File.expand_path(::File.join(relative_path, *more), @root) + end + + end +end; end \ No newline at end of file diff --git a/x-pack/lib/geoip_database_management/db_info.rb b/x-pack/lib/geoip_database_management/db_info.rb new file mode 100644 index 00000000000..7c8de896121 --- /dev/null +++ b/x-pack/lib/geoip_database_management/db_info.rb @@ -0,0 +1,36 @@ +# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +# or more contributor license agreements. Licensed under the Elastic License; +# you may not use this file except in compliance with the Elastic License. + +module LogStash module GeoipDatabaseManagement + + ## + # The DbInfo is the read-only immutable state of a managed database. + # It is provided by Subscription#value and Subscription#observe + class DbInfo + + attr_reader :path + + def initialize(path:, pending: false, expired: false) + @path = path&.dup.freeze + @pending = pending + @expired = expired + end + + def expired? + @expired + end + + def pending? + @pending + end + + def removed? + !@pending && @path.nil? + end + + EXPIRED = DbInfo.new(path: nil, expired: true) + PENDING = DbInfo.new(path: nil, pending: true) + REMOVED = DbInfo.new(path: nil, pending: false) + end +end end \ No newline at end of file diff --git a/x-pack/lib/geoip_database_management/downloader.rb b/x-pack/lib/geoip_database_management/downloader.rb new file mode 100644 index 00000000000..fc532ab9d94 --- /dev/null +++ b/x-pack/lib/geoip_database_management/downloader.rb @@ -0,0 +1,188 @@ +# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +# or more contributor license agreements. Licensed under the Elastic License; +# you may not use this file except in compliance with the Elastic License. + +require_relative '../../../lib/bootstrap/util/compress' +require 'logstash/util/loggable' + +require_relative 'util' +require_relative 'metadata' + +require "json" +require "zlib" +require "stud/try" +require "down" +require "fileutils" +require 'uri' + +module LogStash module GeoipDatabaseManagement + class Downloader + include GeoipDatabaseManagement::Constants + include GeoipDatabaseManagement::Util + include LogStash::Util::Loggable + + class BadResponseCodeError < Error + attr_reader :response_code, :response_body + + def initialize(response_code, response_body) + @response_code = response_code + @response_body = response_body + end + + def message + "GeoIP service response code '#{response_code}', body '#{response_body}'" + end + end + + attr_reader :list_databases_url + + ## + # @param metadata [Metadata] + # @param service_endpoint [URI,String] + def initialize(metadata, service_endpoint) + logger.trace("init", metadata: metadata, endpoint: service_endpoint) + @metadata = metadata + @paths = metadata.paths + service_endpoint = URI(service_endpoint).dup.freeze + + if service_endpoint.query&.chars&.any? + logger.warn("GeoIP endpoint URI includes query parameter, which will be ignored: `#{safe_uri service_endpoint}`") + end + + @list_databases_url = service_endpoint.merge("?key=#{uuid}&elastic_geoip_service_tos=agree").freeze + end + + public + # Check available update and download them. Unzip and validate the file. + # if the download failed, valid_download return false + # return Array of [database_type, valid_download, dirname, new_database_path] + def fetch_databases(db_types) + dirname = Time.now.to_i.to_s + check_update(db_types) + .map do |database_type, db_info| + begin + new_zip_path = download_database(database_type, dirname, db_info) + new_database_path = unzip(database_type, dirname, new_zip_path) + assert_database!(new_database_path) + [database_type, true, dirname, new_database_path] + rescue => e + logger.error("failed to fetch #{database_type} database", error_details(e, logger)) + [database_type, false, nil, nil] + end + end + end + + private + # Call infra endpoint to get md5 of latest databases and verify with metadata + # return Array of new database information [database_type, db_info] + def check_update(db_types) + return enum_for(:check_update, db_types).to_a unless block_given? + + res = rest_client.get(list_databases_url) + logger.debug("check update", :endpoint => safe_uri(list_databases_url).to_s, :response => res.code) + + if res.code < 200 || res.code > 299 + raise BadResponseCodeError.new(res.code, res.body) + end + + service_resp = JSON.parse(res.body) + + db_types.each do |database_type| + db_info = service_resp.find { |db| db['name'].eql?("#{GEOLITE}#{database_type}.#{GZ_EXT}") } + if db_info.nil? + logger.debug("Database service did not include #{database_type}") + elsif @metadata.database_path(database_type).nil? + logger.debug("Local #{database_type} database is not present.") + yield(database_type, db_info) + elsif @metadata.gz_md5(database_type) == db_info['md5_hash'] + logger.debug("Local #{database_type} database is up-to-date.") + else + logger.debug("Updated #{database_type} database is available.") + yield(database_type, db_info) + end + end + end + + def download_database(database_type, dirname, db_info) + Stud.try(3.times) do + FileUtils.mkdir_p(@paths.resolve(dirname)) + zip_path = @paths.gz(database_type, dirname) + + actual_url = resolve_download_url(db_info['url']).to_s + logger.debug? && logger.debug("download #{actual_url}") + + options = { destination: zip_path } + options.merge!({proxy: ENV['http_proxy']}) if ENV.include?('http_proxy') + Down.download(actual_url, **options) + + raise "the new download has wrong checksum" if md5(zip_path) != db_info['md5_hash'] + + logger.debug("new database downloaded in ", :path => zip_path) + zip_path + end + end + + # extract all files and folders from .tgz to path.data directory + # return dirname [String], new_database_path [String] + def unzip(database_type, dirname, zip_path) + temp_path = ::File.join(@paths.resolve(dirname), database_type) + LogStash::Util::Tar.extract(zip_path, temp_path) + FileUtils.cp_r(::File.join(temp_path, '.'), @paths.resolve(dirname)) + FileUtils.rm_r(temp_path) + + @paths.db(database_type, dirname) + end + + def rest_client + @client ||= begin + client_options = { + request_timeout: 15, + connect_timeout: 5 + } + client_options[:proxy] = ENV['http_proxy'] if ENV.include?('http_proxy') + Manticore::Client.new(client_options) + end + end + + def uuid + @uuid ||= ::File.read(::File.join(LogStash::SETTINGS.get("path.data"), "uuid")) + rescue + "UNSET" + end + + def resolve_download_url(possibly_relative_url) + list_databases_url.merge(possibly_relative_url) + end + + def assert_database!(database_path) + raise "failed to load database #{database_path} because it does not exist" unless file_exist?(database_path) + raise "failed to load database #{database_path} because it does not appear to be a MaxMind DB" unless scan_binary_file(database_path, "\xab\xcd\xefMaxMind.com") + end + + def safe_uri(unsafe) + LogStash::Util::SafeURI.new(unsafe) + end + + ## + # Scans a binary file for the given verbatim byte sequence + # without loading the entire binary file into memory by scanning + # in chunks + def scan_binary_file(file_path, byte_sequence) + byte_sequence = byte_sequence.b + partial_size = [byte_sequence.bytesize, 1024].max + ::File.open(file_path, 'r:BINARY') do |io| + a, b = ''.b, ''.b # two binary buffers + until io.eof? + io.readpartial(partial_size, b) + + bridged_chunk = (a+b) + + return true if bridged_chunk.include?(byte_sequence) + a,b = b,a # swap buffers before continuing + end + end + + false + end + end +end end diff --git a/x-pack/lib/filters/geoip/extension.rb b/x-pack/lib/geoip_database_management/extension.rb similarity index 64% rename from x-pack/lib/filters/geoip/extension.rb rename to x-pack/lib/geoip_database_management/extension.rb index 859760d5d23..5ce0ff120ca 100644 --- a/x-pack/lib/filters/geoip/extension.rb +++ b/x-pack/lib/geoip_database_management/extension.rb @@ -4,18 +4,20 @@ require "logstash/environment" -module LogStash module Filters module Geoip +module LogStash module GeoipDatabaseManagement class Extension < LogStash::UniversalPlugin include LogStash::Util::Loggable def additionals_settings(settings) require "logstash/runner" logger.trace("Registering additional geoip settings") - settings.register(LogStash::Setting::NullableString.new("xpack.geoip.download.endpoint")) + settings.register(LogStash::Setting::String.new("xpack.geoip.downloader.endpoint", "https://geoip.elastic.co/v1/database") + .with_deprecated_alias("xpack.geoip.download.endpoint")) + settings.register(LogStash::Setting::TimeValue.new("xpack.geoip.downloader.poll.interval", "24h")) settings.register(LogStash::Setting::Boolean.new("xpack.geoip.downloader.enabled", true)) rescue => e logger.error("Cannot register new settings", :message => e.message, :backtrace => e.backtrace) raise e end end -end end end +end end \ No newline at end of file diff --git a/x-pack/lib/geoip_database_management/manager.rb b/x-pack/lib/geoip_database_management/manager.rb new file mode 100644 index 00000000000..fa1b1862680 --- /dev/null +++ b/x-pack/lib/geoip_database_management/manager.rb @@ -0,0 +1,359 @@ +# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +# or more contributor license agreements. Licensed under the Elastic License; +# you may not use this file except in compliance with the Elastic License. + +require_relative 'constants' +require_relative 'util' +require_relative 'data_path' +require_relative 'metadata' +require_relative 'downloader' +require_relative 'subscription' +require_relative 'db_info' +require_relative 'metric' + +require "logstash/util/loggable" +require "singleton" +require "concurrent/set" + +## +# The GeoipDatabaseManagement's Manager can be used by plugins to acquire +# a subscription to an auto-updating, EULA-compliant Geoip database. +# The Manager is lazy by default, and begins maintaining local databases +# on disk when the first subscription is started. +# +# Once started, it queries an Elastic database service daily to discover +# available updates, loading discovered updates in the background and notifying +# all subscribers before eventually removing databases from disk that are no +# longer assigned to any subscribers. +# +# The terms of the MaxMind EULA are enforced, ensuring that databases that +# have not been synchronized with the service in more than 30 days are not +# assigned to subscribers so they can be freed. After 25 days without sync +# the manager begins emitting warning messages. +# +# The provided Metric namespace is populated with information about the +# current state of managed databases, age-since-sync, etc. +# +# @example Subscribe to a database +# sub = Manager.instance.subscribe_database_path("City") +# sub.observe(construct: -> (db_info) { ... }, +# on_update: -> (db_info) { ... }, +# on_expire: -> ( ) { ... }) +# sub.release! # unsubscribe +module LogStash module GeoipDatabaseManagement class Manager + include Constants + include Util + include LogStash::Util::Loggable + include Singleton + + def initialize + @start_lock = Mutex.new + + @enabled = LogStash::SETTINGS.get("xpack.geoip.downloader.enabled") + @endpoint = LogStash::SETTINGS.get("xpack.geoip.downloader.endpoint") + @poll_interval = LogStash::SETTINGS.get("xpack.geoip.downloader.poll.interval") + + data_directory = ::File.expand_path("geoip_database_management", LogStash::SETTINGS.get_value('path.data')) + @data_path = GeoipDatabaseManagement::DataPath.new(data_directory) + + @metadata = Metadata.new(data_path) + + unless enabled? + logger.info("database manager is disabled; removing managed databases from disk``") + metadata.delete + clean_up_database + end + end + + ## + # @param database_type [String] one of `GeoipDatabaseManagement::DB_TYPES` + # @return [Subscription] the observer + def subscribe_database_path(database_type) + fail ArgumentError, "unsupported database type `#{database_type}`" unless DB_TYPES.include?(database_type) + + return nil unless enabled? + + ensure_started! + + @states.fetch(database_type).subscribe + end + + ## + # @return [Boolean] true unless the database management feature has been disabled + def enabled? + @enabled + end + + ## + # @return [Enumerable] the types of databases that can be subscribed to + def supported_database_types + DB_TYPES + end + + ## + # @api internal + def database_metric=(database_metric) + @database_metric = database_metric + end + + ## + # @api internal + def running? + @start_lock.synchronize { @download_task&.running? } + end + + protected + + attr_reader :endpoint + attr_reader :poll_interval + attr_reader :data_path + attr_reader :metadata + + def database_metric + logger.debug("Database Metric is nil. No geoip metrics are available. Please report the bug") if @database_metric.nil? + @database_metric ||= LogStash::GeoipDatabaseManagement::Metric.new(LogStash::Instrument::NamespacedNullMetric.new) + end + + def downloader + @downloader ||= Downloader.new(metadata, endpoint) + end + + def ensure_started! + @start_lock.synchronize do + return if @download_task + + setup + execute_download_job + + logger.debug "spawning periodic check for updates (#{poll_interval})" + @download_task = Concurrent::TimerTask.execute(execution_interval: poll_interval.to_seconds) do + periodic_sync + end + end + end + + def periodic_sync + LogStash::Util::set_thread_name 'geoip database sync task' do + LogStash::Util::with_logging_thread_context("pipeline.id" => nil) do + logger.debug "running database update check" + execute_download_job + end + end + end + + def clean_up_database(excluded_dirnames = []) + protected_dirnames = excluded_dirnames.uniq + existing_dirnames = ::Dir.children(data_path.root) + .select { |f| ::File.directory? ::File.join(data_path.root, f) } + + (existing_dirnames - protected_dirnames).each do |dirname| + dir_path = data_path.resolve(dirname) + FileUtils.rm_r(dir_path) + logger.info("Stale database directory `#{dir_path}` has been deleted") + end + end + + def setup + FileUtils.mkdir_p(data_path.root) + metadata.touch + + @states = DB_TYPES.each_with_object({}) do |type, memo| + db_info = if metadata.has_type?(type) + DbInfo.new(path: metadata.database_path(type)) + else + DbInfo::PENDING + end + memo[type] = State.new(db_info) + end + + database_metric.initialize_metrics(metadata.get_all) + end + + def execute_download_job + success_cnt = 0 + + database_metric.set_download_status_updating + + updated_db = downloader.fetch_databases(DB_TYPES) + updated_db.each do |database_type, valid_download, dirname, new_database_path| + if valid_download + metadata.save_metadata(database_type, dirname, gz_md5: md5(data_path.gz(database_type, dirname))) + + @states[database_type].update!(new_database_path) do |previous_db_info| + logger.info("managed geoip database has been updated on disk", + :database_type => database_type, :database_path => new_database_path) + end + + success_cnt += 1 + end + end + + updated_types = updated_db.map { |database_type, valid_download, dirname, new_database_path| database_type } + (DB_TYPES - updated_types).each do |unchange_type| + metadata.update_timestamp(unchange_type) + success_cnt += 1 + end + rescue => e + logger.error("failed to sync databases", error_details(e, logger)) + ensure + check_age + clean_up_database(metadata.dirnames) + database_metric.update_download_stats(success_cnt == DB_TYPES.size) + end + + def check_age(database_types = DB_TYPES) + deferred_deletions = [] + database_types.map do |database_type| + db_metadata = metadata.get_metadata(database_type).last + if db_metadata.nil? + logger.debug("No metadata for #{database_type}...") + next + end + + check_at = db_metadata[Metadata::Column::CHECK_AT].to_i + days_without_update = time_diff_in_days(check_at) + + case + when days_without_update >= 30 + @states[database_type].expire! do |db_info| + logger.error("The managed MaxMind GeoIP #{database_type} database hasn't been synchronized in #{days_without_update} days "\ + "and #{db_info.expired? ? "has been" : "will be"} removed in order to remain compliant with the MaxMind EULA. "\ + "Logstash is unable to get newer version from internet. "\ + "Please check the network settings and allow Logstash accesses the internet to download the latest database. "\ + "Alternatively you can switch to a self-managed GeoIP database service (`xpack.geoip.download.endpoint`), or "\ + "configure each plugin with a self-managed database which you can download from https://dev.maxmind.com/geoip/geoip2/geolite2/ ") + end + + deferred_deletions << metadata.database_path(database_type) + metadata.unset_path(database_type) + + database_status = Metric::DATABASE_EXPIRED + when days_without_update >= 25 + logger.warn("The MaxMind GeoIP #{database_type} database hasn't been synchronized in #{days_without_update} days. "\ + "Logstash will remove access to the stale database in #{30 - days_without_update} days in order to remain compliant with the MaxMind EULA. "\ + "Please check the network settings and allow Logstash accesses the internet to download the latest database.") + database_status = Metric::DATABASE_TO_BE_EXPIRED + else + logger.trace("The MaxMind GeoIP #{database_type} database passed age check", :days_without_update => days_without_update) + database_status = Metric::DATABASE_UP_TO_DATE + end + + database_metric.update_database_status(database_type, database_status, db_metadata, days_without_update) + end + ensure + deferred_deletions.compact.each do |path| + FileUtils.rm(path, force: true) + logger.debug("Removed database file `#{path}`") + end + end + + ## + # @api testing + def shutdown! + @start_lock.synchronize do + return unless @download_task&.running? + + @download_task.shutdown + 10.times do + break unless @download_task.running? + sleep 1 + end + + @states.values.each(&:delete_observers) + end + end + private :shutdown! + + ## + # @api testing + def current_db_info(database_type) + current_state(database_type)&.db_info + end + private :current_db_info + + ## + # @api testing + def current_state(database_type) + @states&.dig(database_type) + end + private :current_state + + ## + # @api private + class State + attr_reader :db_info + + require 'observer' # ruby stdlib + include Observable + + def initialize(db_info) + @db_info = db_info + end + + ## + # @api internal + def subscribe + synchronize do + subscription = Subscription.new(@db_info, self) + add_observer(subscription, :notify) + subscription + end + end + + def unsubscribe(observer) + synchronize do + delete_observer(observer) + end + end + + ## + # @param new_database_path [String] + # @yieldparam previous_db_info [DbInfo] + # @yieldreturn [void] + def update!(new_database_path) + synchronize do + previous_db_info, @db_info = @db_info, DbInfo.new(path: new_database_path) + + changed + + yield(previous_db_info) if block_given? + + notify_observers(@db_info) + end + end + + ## + # @yieldparam previous_path [String] + # @yieldparam was_expired [Boolean] + # @yieldreturn [void] + def expire! + synchronize do + previous_db_info, @db_info = @db_info, DbInfo::EXPIRED + + changed + + yield(previous_db_info) if block_given? + + notify_observers(@db_info) + end + end + + ## + # @api internal + def release!(subscription) + synchronize do + delete_observer(subscription) + end + end + + private + + def synchronize(&block) + LogStash::Util.synchronize(self) do + yield + end + end + + end + +end; end; end diff --git a/x-pack/lib/geoip_database_management/metadata.rb b/x-pack/lib/geoip_database_management/metadata.rb new file mode 100644 index 00000000000..ffc852a0908 --- /dev/null +++ b/x-pack/lib/geoip_database_management/metadata.rb @@ -0,0 +1,124 @@ +# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +# or more contributor license agreements. Licensed under the Elastic License; +# you may not use this file except in compliance with the Elastic License. + +require_relative 'util' +require_relative 'constants' + +require "csv" + +module LogStash module GeoipDatabaseManagement + class Metadata + include LogStash::Util::Loggable + + include GeoipDatabaseManagement::Constants + include GeoipDatabaseManagement::Util + + def initialize(paths) + @paths = paths + @metadata_path = paths.resolve("metadata.csv") + end + + attr_reader :paths + + # csv format: database_type, check_at, gz_md5, dirname, is_eula + def save_metadata(database_type, dirname, gz_md5:) + metadata = get_metadata(database_type, false) + + current_timestamp = Time.now.to_i + + entry = [] + entry[Column::DATABASE_TYPE] = database_type + entry[Column::CHECK_AT] = current_timestamp + entry[Column::GZ_MD5] = gz_md5 + entry[Column::DIRNAME] = dirname + + metadata << entry + update(metadata) + end + + def update_timestamp(database_type) + update_each_row do |row| + row[Column::CHECK_AT] = Time.now.to_i if row[Column::DATABASE_TYPE].eql?(database_type) + row + end + end + + def update_each_row(&block) + metadata = get_all.map do |row| + yield row + end + update(metadata) + end + + def update(metadata) + metadata = metadata.sort_by { |row| row[Column::DATABASE_TYPE] } + ::CSV.open(@metadata_path, 'w') do |csv| + metadata.each { |row| csv << row } + end + logger.trace("metadata updated", :metadata => metadata) + end + + def touch + update_each_row(&:itself) + end + + def unset_path(database_type) + update_each_row do |row| + row[Column::DIRNAME] = "" if row[Column::DATABASE_TYPE].eql?(database_type) + row + end + end + + def get_all + file_exist?(@metadata_path) ? ::CSV.read(@metadata_path, headers: false) : Array.new + end + + # Give rows of metadata that match/exclude the type + def get_metadata(database_type, match = true) + get_all.select { |row| row[Column::DATABASE_TYPE].eql?(database_type) == match } + end + + # Return a valid database path + def database_path(database_type) + get_metadata(database_type).map { |metadata| @paths.db(database_type, metadata[Column::DIRNAME]) } + .reject(&:empty?) + .select { |path| file_exist?(path) } + .last + end + + def has_type?(database_type) + get_metadata(database_type).any? + end + + def gz_md5(database_type) + get_metadata(database_type).map { |metadata| metadata[Column::GZ_MD5] } + .last || '' + end + + def check_at(database_type) + (get_metadata(database_type).map { |metadata| metadata[Column::CHECK_AT] } + .last || 0).to_i + end + + # Return all active dirname + def dirnames + get_all.map { |metadata| metadata[Column::DIRNAME] }.reject(&:empty?) + end + + def exist? + file_exist?(@metadata_path) + end + + def delete + ::File.delete(@metadata_path) if exist? + end + + module Column + DATABASE_TYPE = 0 + CHECK_AT = 1 + GZ_MD5 = 2 + DIRNAME = 3 + end + end +end end \ No newline at end of file diff --git a/x-pack/lib/geoip_database_management/metric.rb b/x-pack/lib/geoip_database_management/metric.rb new file mode 100644 index 00000000000..0d785dcadfd --- /dev/null +++ b/x-pack/lib/geoip_database_management/metric.rb @@ -0,0 +1,67 @@ +# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +# or more contributor license agreements. Licensed under the Elastic License; +# you may not use this file except in compliance with the Elastic License. + +require_relative 'util' + +module LogStash module GeoipDatabaseManagement + class Metric + include GeoipDatabaseManagement::Util + + DATABASE_INIT = "init".freeze + DATABASE_UP_TO_DATE = "up_to_date".freeze + DATABASE_TO_BE_EXPIRED = "to_be_expired".freeze + DATABASE_EXPIRED = "expired".freeze + + DOWNLOAD_SUCCEEDED = "succeeded".freeze + DOWNLOAD_FAILED = "failed".freeze + DOWNLOAD_UPDATING = "updating".freeze + + def initialize(metric) + # Fallback when testing plugin and no metric collector are correctly configured. + @metric = metric || LogStash::Instrument::NamespacedNullMetric.new + end + + def initialize_metrics(metadatas) + metadatas.each do |row| + type = row[Metadata::Column::DATABASE_TYPE] + @metric.namespace([:database, type.to_sym]).tap do |n| + n.gauge(:status, DATABASE_INIT) + n.gauge(:last_updated_at, unix_time_to_iso8601(row[Metadata::Column::CHECK_AT])) + n.gauge(:fail_check_in_days, time_diff_in_days(row[Metadata::Column::CHECK_AT])) + end + end + + @metric.namespace([:download_stats]).tap do |n| + check_at = metadatas.map { |row| row[Metadata::Column::CHECK_AT].to_i }.max + n.gauge(:last_checked_at, unix_time_to_iso8601(check_at)) + end + end + + def update_download_stats(is_success) + @metric.namespace([:download_stats]).tap do |n| + n.gauge(:last_checked_at, Time.now.iso8601) + + if is_success + n.increment(:successes, 1) + n.gauge(:status, DOWNLOAD_SUCCEEDED) + else + n.increment(:failures, 1) + n.gauge(:status, DOWNLOAD_FAILED) + end + end + end + + def set_download_status_updating + @metric.namespace([:download_stats]).gauge(:status, DOWNLOAD_UPDATING) + end + + def update_database_status(database_type, database_status, metadata, days_without_update) + @metric.namespace([:database, database_type.to_sym]).tap do |n| + n.gauge(:status, database_status) + n.gauge(:last_updated_at, unix_time_to_iso8601(metadata[Metadata::Column::CHECK_AT])) + n.gauge(:fail_check_in_days, days_without_update) + end + end + end +end end \ No newline at end of file diff --git a/x-pack/lib/geoip_database_management/subscription.rb b/x-pack/lib/geoip_database_management/subscription.rb new file mode 100644 index 00000000000..d6787e26af8 --- /dev/null +++ b/x-pack/lib/geoip_database_management/subscription.rb @@ -0,0 +1,124 @@ +# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +# or more contributor license agreements. Licensed under the Elastic License; +# you may not use this file except in compliance with the Elastic License. + +require "logstash/util/loggable" +require "thread" + +require_relative "subscription_observer" + +require 'observer' +require 'concurrent/atomic/reentrant_read_write_lock' + +module LogStash module GeoipDatabaseManagement + ## + # A Subscription is acquired with Manager#subscribe_database_path + class Subscription + include LogStash::Util::Loggable + include Observable # @api internal + + ## + # @param initial [DBInfo] + # @param state [#release!] + # @api private + def initialize(initial, state=nil) + @state = state + @observable = true + @value = initial + @lock = Concurrent::ReentrantReadWriteLock.new + end + + ## + # @overload value(&consumer) + # Yields the current DbInfo and prevents changes from occurring until control is returned. + # @note: this is intended for short-lived locks ONLY, as blocking writes will prevent updates + # from being observed by other subscribers. + # @yield db_info yields current DBInfo and returns the result of the block + # @yieldparam db_info [DbInfo] + # @return [Object] the result of the provided block + # @overload value() + # Returns the current DBInfo immediately + # @return [DbInfo] + def value(&consumer) + @lock.with_read_lock do + return yield(@value) if block_given? + + @value + end + end + + ## + # Register an observer that will observe the current value and each subsequent update and expire, + # until Subscription#release! + # + # @note: interacting with this Subscription or the Manager in any way in the provided hooks is + # not advised, as it may cause deadlocks. + # @overload observe(observer) + # @param observer [SubscriptionObserver] + # @return [Subscription] + # @overload observe(observer_spec) + # @param observer_spec [Hash]: (@see SubscriptionObserver::coerce) + # @return [Subscription] + def observe(observer_spec) + observer = SubscriptionObserver.coerce(observer_spec) + + @lock.with_write_lock do + fail "Subscription has been released!" unless @observable + + observer.construct(@value) + self.add_observer do |new_value| + @lock.with_read_lock do + if new_value.expired? + observer.on_expire + else + observer.on_update(new_value) + end + end + end + end + + self + end + + ## + # Releases this subscription and all of its observers + # from receiving additional notifications. + def release! + @lock.with_write_lock do + @observable = false + delete_observers + + @state&.release!(self) + @state = nil + end + end + + ## + # @api internal + def notify(updated_value) + write_lock_held = @lock.acquire_write_lock + @value = updated_value + + # downgrade to read lock for notifications + @lock.with_read_lock do + write_lock_held = !@lock.release_write_lock + self.changed + self.notify_observers(updated_value) + end + ensure + @lock.release_read_lock if write_lock_held + end + + ## + # @api private + def add_observer(*args, &block) + @lock.with_write_lock do + if block_given? + super(block, :call) + else + super(*args) + end + end + end + end +end end diff --git a/x-pack/lib/geoip_database_management/subscription_observer.rb b/x-pack/lib/geoip_database_management/subscription_observer.rb new file mode 100644 index 00000000000..4a58f24a14a --- /dev/null +++ b/x-pack/lib/geoip_database_management/subscription_observer.rb @@ -0,0 +1,105 @@ +# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +# or more contributor license agreements. Licensed under the Elastic License; +# you may not use this file except in compliance with the Elastic License. + +require "logstash/util/loggable" +require "thread" + +require 'observer' +require 'concurrent/atomic/reentrant_read_write_lock' + +module LogStash module GeoipDatabaseManagement + ## + # Provide a SubscriptionObserver or a SubscriptionObserver::coerce-able object + # to Subscription#observe to use the current value and observe changes to the + # subscription's state + # + # @api public + module SubscriptionObserver + + ## + # Coerce an object into an `SubscriptionObserver`, if necessary + # @overload coerce(observer) + # @param observer [SubscriptionObserver]: an object that "quacks like" a `SubscriptionObserver` + # as defined by `SubscriptionObserver::===` + # @return [SubscriptionObserver] + # @overload coerce(construct:, :on_update, :on_expire) + # @param construct [Proc(DbInfo)->void]: a single-arity Proc that will receive the current + # DbInfo at the beginning of observation + # @param on_update [Proc(DbInfo)->void]: a single-arity Proc that will receive notifications + # of each subsequent `DBInfo` + # @param on_expire [Proc()->void]: a zero-arity Proc that will receive notifications of the + # current value expiring. + # @return [SubscriptionObserver::Proxy] + # @api public + def self.coerce(observer_spec) + case observer_spec + when SubscriptionObserver then observer_spec + when Hash then Proxy.new(**observer_spec) + else + fail ArgumentError, "Could not make a SubscriptionObserver from #{observer_spec.inspect}" + end + end + + ## + # Quacks-like check, to simplify consuming from Java where the ruby module can't be + # directly mixed into a Java class + def self.===(candidate) + return true if super + + return false unless candidate.respond_to?(:construct) + return false unless candidate.respond_to?(:on_update) + return false unless candidate.respond_to?(:on_expire) + + true + end + + ## + # Observe the value at observer's construction, before any state-change notifications are fired + def construct(initial_value) + fail NotImplementedError + end + + ## + # Observe an update notice, after construction is complete + def on_update(updated_value) + fail NotImplementedError + end + + ## + # Observe an expiry notice, after construction is complete + def on_expire + fail NotImplementedError + end + + ## + # @api internal + # @see SubscriptionObserver#coerce + class Proxy + include SubscriptionObserver + + def initialize(construct:, on_update:, on_expire:) + fail ArgumentError unless construct.respond_to?(:call) && construct.arity == 1 + fail ArgumentError unless on_update.respond_to?(:call) && on_update.arity == 1 + fail ArgumentError unless on_expire.respond_to?(:call) && on_expire.arity == 0 + + @construct = construct + @on_update = on_update + @on_expire = on_expire + end + + def construct(initial_value) + @construct.call(initial_value) + end + + def on_update(updated_value) + @on_update.call(updated_value) + end + + def on_expire + @on_expire.call + end + end + private_constant :Proxy + end +end end \ No newline at end of file diff --git a/x-pack/lib/geoip_database_management/util.rb b/x-pack/lib/geoip_database_management/util.rb new file mode 100644 index 00000000000..db7d4a2e5e7 --- /dev/null +++ b/x-pack/lib/geoip_database_management/util.rb @@ -0,0 +1,37 @@ +# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +# or more contributor license agreements. Licensed under the Elastic License; +# you may not use this file except in compliance with the Elastic License. + +module LogStash module GeoipDatabaseManagement + + require_relative 'constants' + include Constants # TODO: push up + + module Util + extend self + + def file_exist?(path) + !path.nil? && ::File.exist?(path) && !::File.empty?(path) + end + + def md5(file_path) + file_exist?(file_path) ? Digest::MD5.hexdigest(::File.read(file_path)) : "" + end + + def error_details(e, logger) + {}.tap do |error_details| + error_details[:exception] = e.message + error_details[:cause] = e.cause if logger.debug? && e.cause + error_details[:backtrace] = e.backtrace if logger.debug? + end + end + + def time_diff_in_days(timestamp) + (::Date.today - ::Time.at(timestamp.to_i).to_date).to_i + end + + def unix_time_to_iso8601(timestamp) + Time.at(timestamp.to_i).iso8601 + end + end +end end \ No newline at end of file diff --git a/x-pack/lib/x-pack/logstash_registry.rb b/x-pack/lib/x-pack/logstash_registry.rb index 250e7fceb5a..c1433580229 100644 --- a/x-pack/lib/x-pack/logstash_registry.rb +++ b/x-pack/lib/x-pack/logstash_registry.rb @@ -13,7 +13,7 @@ require "monitoring/inputs/metrics" require "monitoring/outputs/elasticsearch_monitoring" require "config_management/extension" -require "filters/geoip/extension" +require "geoip_database_management/extension" require "modules/xpack_scaffold" require "filters/azure_event" @@ -21,7 +21,7 @@ LogStash::PLUGIN_REGISTRY.add(:output, "elasticsearch_monitoring", LogStash::Outputs::ElasticSearchMonitoring) LogStash::PLUGIN_REGISTRY.add(:universal, "monitoring", LogStash::MonitoringExtension) LogStash::PLUGIN_REGISTRY.add(:universal, "config_management", LogStash::ConfigManagement::Extension) -LogStash::PLUGIN_REGISTRY.add(:universal, "geoip_auto_update", LogStash::Filters::Geoip::Extension) +LogStash::PLUGIN_REGISTRY.add(:universal, "geoip_database_management", LogStash::GeoipDatabaseManagement::Extension) license_levels = Hash.new license_levels.default = LogStash::LicenseChecker::LICENSE_TYPES diff --git a/x-pack/spec/filters/geoip/database_manager_spec.rb b/x-pack/spec/filters/geoip/database_manager_spec.rb index decc8ba6644..f30c584ebf0 100644 --- a/x-pack/spec/filters/geoip/database_manager_spec.rb +++ b/x-pack/spec/filters/geoip/database_manager_spec.rb @@ -4,458 +4,217 @@ require_relative 'test_helper' require "filters/geoip/database_manager" -require "filters/geoip/database_metric" describe LogStash::Filters::Geoip do describe 'DatabaseManager', :aggregate_failures do - let(:mock_geoip_plugin) { double("geoip_plugin") } - let(:mock_metadata) { double("database_metadata") } - let(:mock_download_manager) { double("download_manager") } - let(:agent_metric) { LogStash::Instrument::Metric.new(LogStash::Instrument::Collector.new) } - let(:database_metric) { LogStash::Filters::Geoip::DatabaseMetric.new(agent_metric) } - let(:db_manager) do - manager = Class.new(LogStash::Filters::Geoip::DatabaseManager).instance - manager.database_metric = database_metric - manager.send(:setup) - manager.instance_variable_set(:@metadata, mock_metadata) - manager.instance_variable_set(:@download_manager, mock_download_manager) - manager - end - let(:logger) { double("Logger") } - - CITY = LogStash::Filters::Geoip::CITY - ASN = LogStash::Filters::Geoip::ASN - CC = LogStash::Filters::Geoip::CC - - before do - stub_const('LogStash::Filters::Geoip::DownloadManager::GEOIP_ENDPOINT', "https://somewhere.dev") - allow(mock_geoip_plugin).to receive(:update_filter) - end - - after do - delete_file(metadata_path, get_dir_path(second_dirname)) - db_manager.database_metric = nil - end - - context "initialize" do - it "should set the initial state to cc database" do - states = db_manager.instance_variable_get(:@states) - expect(states[CITY].is_eula).to be_falsey - expect(states[CITY].database_path).to eql(states[CITY].cc_database_path) - expect(::File.exist?(states[CITY].cc_database_path)).to be_truthy - expect(states[ASN].is_eula).to be_falsey - expect(states[ASN].database_path).to eql(states[ASN].cc_database_path) - expect(::File.exist?(states[ASN].cc_database_path)).to be_truthy - - c = metric_collector(db_manager) - [ASN, CITY].each do |type| - expect(c.get([:database, type.to_sym], :status, :gauge).value).to eql(LogStash::Filters::Geoip::DatabaseMetric::DATABASE_INIT) - expect(c.get([:database, type.to_sym], :fail_check_in_days, :gauge).value).to be_nil - end - expect_initial_download_metric(c) - end - - context "when metadata exists" do - before do - copy_cc(get_dir_path(second_dirname)) - rewrite_temp_metadata(metadata_path, [city2_metadata]) - end - - it "should use database record in metadata" do - states = db_manager.instance_variable_get(:@states) - expect(states[CITY].is_eula).to be_truthy - expect(states[CITY].database_path).to include second_dirname - - c = metric_collector(db_manager) - expect_second_database_metric(c) - expect_initial_download_metric(c) - end - end - - context "when metadata exists but database is deleted manually" do - let(:db_manager) do - manager = Class.new(LogStash::Filters::Geoip::DatabaseManager).instance - manager.database_metric = database_metric - manager.send(:setup) - manager - end - - before do - rewrite_temp_metadata(metadata_path, [city2_metadata]) - end - - it "should return nil database path" do - states = db_manager.instance_variable_get(:@states) - expect(states[CITY].is_eula).to be_truthy - expect(states[CITY].database_path).to be_nil - - c = metric_collector(db_manager) - expect_second_database_metric(c) - expect_initial_download_metric(c) - end - end - - def expect_second_database_metric(c) - expect(c.get([:database, CITY.to_sym], :status, :gauge).value).to eql(LogStash::Filters::Geoip::DatabaseMetric::DATABASE_UP_TO_DATE) - expect(c.get([:database, CITY.to_sym], :last_updated_at, :gauge).value).to match /2020-02-20/ - expect(c.get([:database, CITY.to_sym], :fail_check_in_days, :gauge).value).to eql(0) - end - - def expect_initial_download_metric(c) - expect(c.get([:download_stats], :successes, :counter).value).to eql(0) - expect(c.get([:download_stats], :failures, :counter).value).to eql(0) - expect(c.get([:download_stats], :last_checked_at, :gauge).value).to match /#{now_in_ymd}/ - expect(c.get([:download_stats], :status, :gauge).value).to be_nil + let(:pipeline_id) { SecureRandom.hex(16) } + let(:mock_geoip_plugin) do + double("LogStash::Filters::Geoip").tap do |c| + allow(c).to receive(:execution_context).and_return(double("EC", pipeline_id: pipeline_id)) + allow(c).to receive(:update_filter).with(anything) end end - context "execute download job" do - let(:valid_city_fetch) { [CITY, true, second_dirname, second_city_db_path] } - let(:valid_asn_fetch) { [ASN, true, second_dirname, second_asn_db_path] } - let(:invalid_city_fetch) { [CITY, false, nil, nil] } - - context "plugin is set" do - let(:db_manager) do - manager = super() - manager.instance_variable_get(:@states)[CITY].plugins.push(mock_geoip_plugin) - manager.instance_variable_get(:@states)[CITY].is_eula = true - manager.instance_variable_get(:@states)[ASN].plugins.push(mock_geoip_plugin) - manager.instance_variable_get(:@states)[ASN].is_eula = true - manager + let(:eula_database_infos) { Hash.new { LogStash::GeoipDatabaseManagement::DbInfo::PENDING } } + let(:eula_manager_enabled) { true } + let(:mock_eula_manager) do + double('LogStash::GeoipDatabaseManagement::Manager').tap do |c| + allow(c).to receive(:enabled?).and_return(eula_manager_enabled) + allow(c).to receive(:supported_database_types).and_return(%w(City ASN)) + allow(c).to receive(:subscribe_database_path) do |type| + LogStash::GeoipDatabaseManagement::Subscription.new(eula_database_infos[type]) end - - it "should update states when new downloads are valid" do - expect(mock_download_manager).to receive(:fetch_database).and_return([valid_city_fetch, valid_asn_fetch]) - expect(mock_metadata).to receive(:save_metadata).at_least(:twice) - allow(mock_geoip_plugin).to receive_message_chain('execution_context.pipeline_id').and_return('pipeline_1', 'pipeline_2') - expect(mock_geoip_plugin).to receive(:update_filter).with(:update, instance_of(String)).at_least(:twice) - expect(mock_metadata).to receive(:update_timestamp).never - expect(mock_metadata).to receive(:dirnames) - expect(db_manager).to receive(:check_age) - expect(db_manager).to receive(:clean_up_database) - - db_manager.send(:execute_download_job) - expect(db_manager.database_path(CITY)).to match /#{second_dirname}\/#{default_city_db_name}/ - expect(db_manager.database_path(ASN)).to match /#{second_dirname}\/#{default_asn_db_name}/ - - c = metric_collector(db_manager) - expect_download_metric_success(c) - end - end - - it "should update single state when new downloads are partially valid" do - expect(mock_download_manager).to receive(:fetch_database).and_return([invalid_city_fetch, valid_asn_fetch]) - expect(mock_metadata).to receive(:save_metadata).with(ASN, second_dirname, true).at_least(:once) - expect(mock_metadata).to receive(:update_timestamp).never - expect(mock_metadata).to receive(:dirnames) - expect(db_manager).to receive(:check_age) - expect(db_manager).to receive(:clean_up_database) - - db_manager.send(:execute_download_job) - expect(db_manager.database_path(CITY)).to match /#{CC}\/#{default_city_db_name}/ - expect(db_manager.database_path(ASN)).to match /#{second_dirname}\/#{default_asn_db_name}/ - - c = metric_collector(db_manager) - expect_download_metric_fail(c) - end - - it "should update single state and single metadata timestamp when one database got update" do - expect(mock_download_manager).to receive(:fetch_database).and_return([valid_asn_fetch]) - expect(mock_metadata).to receive(:save_metadata).with(ASN, second_dirname, true).at_least(:once) - expect(mock_metadata).to receive(:update_timestamp).with(CITY).at_least(:once) - expect(mock_metadata).to receive(:dirnames) - expect(db_manager).to receive(:check_age) - expect(db_manager).to receive(:clean_up_database) - - db_manager.send(:execute_download_job) - expect(db_manager.database_path(CITY)).to match /#{CC}\/#{default_city_db_name}/ - expect(db_manager.database_path(ASN)).to match /#{second_dirname}\/#{default_asn_db_name}/ - - c = metric_collector(db_manager) - expect_download_metric_success(c) end + end - it "should update metadata timestamp for the unchange (no update)" do - expect(mock_download_manager).to receive(:fetch_database).and_return([]) - expect(mock_metadata).to receive(:save_metadata).never - expect(mock_metadata).to receive(:update_timestamp).at_least(:twice) - expect(mock_metadata).to receive(:dirnames) - expect(db_manager).to receive(:check_age) - expect(db_manager).to receive(:clean_up_database) - - db_manager.send(:execute_download_job) - expect(db_manager.database_path(CITY)).to match /#{CC}\/#{default_city_db_name}/ - expect(db_manager.database_path(ASN)).to match /#{CC}\/#{default_asn_db_name}/ - - c = metric_collector(db_manager) - expect_download_metric_success(c) + let(:testable_described_class) do + Class.new(LogStash::Filters::Geoip::DatabaseManager) do + public :eula_subscription + public :eula_subscribed? + public :subscribed_plugins_count end + end - it "should not update metadata when fetch database throw exception" do - expect(mock_download_manager).to receive(:fetch_database).and_raise('boom') - expect(db_manager).to receive(:check_age) - expect(db_manager).to receive(:clean_up_database) - expect(mock_metadata).to receive(:save_metadata).never - expect(mock_metadata).to receive(:dirnames) - - db_manager.send(:execute_download_job) - - c = metric_collector(db_manager) - expect_download_metric_fail(c) - end + subject(:db_manager) { testable_described_class.instance } - def expect_download_metric_success(c) - expect(c.get([:download_stats], :last_checked_at, :gauge).value).to match /#{now_in_ymd}/ - expect(c.get([:download_stats], :successes, :counter).value).to eql(1) - expect(c.get([:download_stats], :status, :gauge).value).to eql(LogStash::Filters::Geoip::DatabaseMetric::DOWNLOAD_SUCCEEDED) - end + let(:mock_logger) { double("Logger").as_null_object } - def expect_download_metric_fail(c) - expect(c.get([:download_stats], :last_checked_at, :gauge).value).to match /#{now_in_ymd}/ - expect(c.get([:download_stats], :failures, :counter).value).to eql(1) - expect(c.get([:download_stats], :status, :gauge).value).to eql(LogStash::Filters::Geoip::DatabaseMetric::DOWNLOAD_FAILED) - end + before(:each) do + allow(db_manager).to receive(:logger).and_return(mock_logger) + allow(db_manager).to receive(:eula_manager).and_return(mock_eula_manager) + allow(mock_geoip_plugin).to receive(:update_filter) end - context "periodic database update" do - before do - allow(db_manager).to receive(:setup) - allow(db_manager).to receive(:execute_download_job) - allow(db_manager).to receive(:database_update_check) - end + self::CITY = LogStash::GeoipDatabaseManagement::Constants::CITY + self::ASN = LogStash::GeoipDatabaseManagement::Constants::ASN - it 'sets up periodic task when download triggered' do - db_manager.send :trigger_download - download_task = db_manager.instance_variable_get(:@download_task) - expect(download_task).to_not be nil - expect(download_task.running?).to be true - expect(download_task.execution_interval).to eq 86_400 + shared_examples "not subscribed to the EULA manager" do + it "is not subscribed to the EULA manager" do + expect(db_manager).to_not be_eula_subscribed end + end - it 'executes download job after interval passes' do - db_manager.instance_variable_set(:@download_interval, 1.5) - db_manager.send :trigger_download - download_task = db_manager.instance_variable_get(:@download_task) - expect(download_task.running?).to be true - expect(db_manager).to receive :database_update_check - sleep 2.0 # wait for task execution + shared_examples "subscribed to the EULA manager" do + it "is subscribed to the EULA manager" do + expect(db_manager).to be_eula_subscribed end end - context "check age" do - context "eula database" do - let(:db_manager) do - manager = super() - manager.instance_variable_get(:@states)[CITY].plugins.push(mock_geoip_plugin) - manager.instance_variable_get(:@states)[CITY].is_eula = true - manager.instance_variable_get(:@states)[ASN].plugins.push(mock_geoip_plugin) - manager.instance_variable_get(:@states)[ASN].is_eula = true - manager - end - - it "should give warning after 25 days" do - mock_data = [['City', (Time.now - (60 * 60 * 24 * 26)).to_i, 'ANY', second_dirname, true]] - expect(mock_metadata).to receive(:get_metadata).and_return(mock_data).at_least(:twice) - expect(mock_geoip_plugin).to receive(:update_filter).never - allow(LogStash::Filters::Geoip::DatabaseManager).to receive(:logger).at_least(:once).and_return(logger) - expect(logger).to receive(:warn).at_least(:twice) + context "initialize" do + include_examples "not subscribed to the EULA manager" + end - db_manager.send(:check_age) + context "subscribe database path" do + let(:eula_database_infos) { + super().merge("City" => LogStash::GeoipDatabaseManagement::DbInfo.new(path: default_city_db_path)) + } - c = metric_collector(db_manager) - expect_database_metric(c, LogStash::Filters::Geoip::DatabaseMetric::DATABASE_TO_BE_EXPIRED, second_dirname_in_ymd, 26) - end + shared_examples "explicit path" do + context "when user subscribes to explicit path" do + let(:explicit_path) { "/this/that/another.mmdb" } + subject!(:resolved_path) { db_manager.subscribe_database_path("City", explicit_path, mock_geoip_plugin) } - it "should log error and update plugin filter when 30 days has passed" do - mock_data = [['City', (Time.now - (60 * 60 * 24 * 33)).to_i, 'ANY', second_dirname, true]] - expect(mock_metadata).to receive(:get_metadata).and_return(mock_data).at_least(:twice) - allow(mock_geoip_plugin).to receive_message_chain('execution_context.pipeline_id').and_return('pipeline_1', 'pipeline_2') - expect(mock_geoip_plugin).to receive(:update_filter).with(:expire).at_least(:twice) + it "returns user input path" do + expect(resolved_path).to eq(explicit_path) + end - db_manager.send(:check_age) + it "logs about the path being configured manually" do + expect(db_manager.logger).to have_received(:info).with(a_string_including "GeoIP database path is configured manually") + end - c = metric_collector(db_manager) - expect_database_metric(c, LogStash::Filters::Geoip::DatabaseMetric::DATABASE_EXPIRED, second_dirname_in_ymd, 33) + include_examples "not subscribed to the EULA manager" end end - context "cc database" do - before do - allow(LogStash::Filters::Geoip::DatabaseManager).to receive(:logger).and_return(logger) + shared_examples "CC-fallback" do |type| + it 'returns the CC-licensed database' do + expect(resolved_path).to end_with("/CC/GeoLite2-#{type}.mmdb") + expect(::File).to exist(resolved_path) end - - it "should not give warning after 25 days" do - expect(mock_geoip_plugin).to receive(:update_filter).never - expect(logger).to receive(:warn).never - - db_manager.send(:check_age) - - c = metric_collector(db_manager) - expect_healthy_database_metric(c) + it 'logged about preparing CC' do + expect(db_manager.logger).to have_received(:info).with(a_string_including "CC-licensed GeoIP databases are prepared") end - - it "should not log error when 30 days has passed" do - expect(logger).to receive(:error).never - expect(mock_geoip_plugin).to receive(:update_filter).never - - db_manager.send(:check_age) - - c = metric_collector(db_manager) - expect_healthy_database_metric(c) - end - end - - def expect_database_metric(c, status, download_at, days) - expect(c.get([:database, CITY.to_sym], :status, :gauge).value).to eql(status) - expect(c.get([:database, CITY.to_sym], :last_updated_at, :gauge).value).to match /#{download_at}/ - expect(c.get([:database, CITY.to_sym], :fail_check_in_days, :gauge).value).to eql(days) - end - - def expect_healthy_database_metric(c) - expect(c.get([:database, CITY.to_sym], :status, :gauge).value).to eql(LogStash::Filters::Geoip::DatabaseMetric::DATABASE_INIT) - expect(c.get([:database, CITY.to_sym], :last_updated_at, :gauge).value).to be_nil - expect(c.get([:database, CITY.to_sym], :fail_check_in_days, :gauge).value).to be_nil end - end - - context "clean up database" do - let(:dirname) { "0123456789" } - let(:dirname2) { "9876543210" } - let(:dir_path) { get_dir_path(dirname) } - let(:dir_path2) { get_dir_path(dirname2) } - let(:asn00) { get_file_path(dirname, default_asn_db_name) } - let(:city00) { get_file_path(dirname, default_city_db_name) } - let(:asn02) { get_file_path(dirname2, default_asn_db_name) } - let(:city02) { get_file_path(dirname2, default_city_db_name) } - before(:each) do - FileUtils.mkdir_p [dir_path, dir_path2] - end + context "when manager is disabled" do + let(:eula_manager_enabled) { false } - it "should delete file which is not in metadata" do - FileUtils.touch [asn00, city00, asn02, city02] + include_examples "explicit path" - db_manager.send(:clean_up_database, [dirname]) + context "when user does not specify an explict path" do + subject!(:resolved_path) { db_manager.subscribe_database_path("City", nil, mock_geoip_plugin) } - [asn02, city02].each { |file_path| expect(::File.exist?(file_path)).to be_falsey } - [get_dir_path(CC), asn00, city00].each { |file_path| expect(::File.exist?(file_path)).to be_truthy } - end - end - - context "subscribe database path" do - it "should return user input path" do - path = db_manager.subscribe_database_path(CITY, default_city_db_path, mock_geoip_plugin) - expect(db_manager.instance_variable_get(:@states)[CITY].plugins.size).to eq(0) - expect(path).to eq(default_city_db_path) - end - - it "should return database path in state if no user input" do - expect(db_manager.instance_variable_get(:@states)[CITY].plugins.size).to eq(0) - allow(db_manager).to receive(:trigger_download) - path = db_manager.subscribe_database_path(CITY, nil, mock_geoip_plugin) - expect(db_manager.instance_variable_get(:@states)[CITY].plugins.size).to eq(1) - expect(path).to eq(default_city_db_path) - end - - context "when eula database is expired" do - before do - rewrite_temp_metadata(metadata_path, [city_expired_metadata]) - end - - it "should return nil" do - expect(db_manager.instance_variable_get(:@states)[CITY].plugins.size).to eq(0) - allow(db_manager).to receive(:trigger_download) - path = db_manager.subscribe_database_path(CITY, nil, mock_geoip_plugin) - expect(db_manager.instance_variable_get(:@states)[CITY].plugins.size).to eq(1) - expect(path).to be_nil + include_examples "CC-fallback", "City" + include_examples "not subscribed to the EULA manager" end end - context "downloader setting" do - context "enabled" do - it "should trigger database download" do - allow(db_manager).to receive(:trigger_download) - db_manager.subscribe_database_path(CITY, nil, mock_geoip_plugin) - expect(db_manager).to have_received(:trigger_download) - end - end + context "when manager is enabled" do + let(:eula_manager_enabled) { true } - context "disabled" do - it "should return cc database when database path is nil" do - allow(LogStash::SETTINGS).to receive(:get).with("xpack.geoip.downloader.enabled").and_return(false) - allow(mock_metadata).to receive(:delete).once + include_examples "explicit path" - path = db_manager.subscribe_database_path(CITY, nil, mock_geoip_plugin) + context "when user does not specify an explicit path" do + subject!(:resolved_path) { db_manager.subscribe_database_path("City", nil, mock_geoip_plugin) } - expect(path).to eq(default_city_db_path) + shared_examples "subscribed to expire notifications" do + context "when the manager expires the db" do + it "notifies the plugin" do + db_manager.eula_subscription("City").notify(LogStash::GeoipDatabaseManagement::DbInfo::EXPIRED) + expect(mock_geoip_plugin).to have_received(:update_filter).with(:expire) + end + end + context "when the manager expires a different DB" do + it 'does not notify the plugin' do + db_manager.eula_subscription("ASN").notify(LogStash::GeoipDatabaseManagement::DbInfo::EXPIRED) + expect(mock_geoip_plugin).to_not have_received(:update_filter) + end + end end - it "should delete eula databases and metadata when database path is nil" do - allow(LogStash::SETTINGS).to receive(:get).with("xpack.geoip.downloader.enabled").and_return(false) - allow(mock_metadata).to receive(:delete).once - - eula_db_dirname = get_dir_path("foo") - FileUtils.mkdir_p(eula_db_dirname) - rewrite_temp_metadata(metadata_path, [["City", "1620246514", "", "foo", true], - ["ASN", "1620246514", "", "foo", true]]) - - path = db_manager.subscribe_database_path(CITY, nil, mock_geoip_plugin) + shared_examples "subscribed to update notifications" do + context "when the manager updates the db" do + let(:updated_db_path) { "/this/that/another.mmdb" } + it "notifies the plugin" do + db_manager.eula_subscription("City").notify(LogStash::GeoipDatabaseManagement::DbInfo.new(path: updated_db_path)) + expect(mock_geoip_plugin).to have_received(:update_filter).with(:update, updated_db_path) + end + end + context "when the manager updates a different DB" do + let(:updated_db_path) { "/this/that/another.mmdb" } + it 'does not notify the plugin' do + db_manager.eula_subscription("ASN").notify(LogStash::GeoipDatabaseManagement::DbInfo.new(path: updated_db_path)) + expect(mock_geoip_plugin).to_not have_received(:update_filter) + end + end + end - expect(path).to eq(default_city_db_path) - expect(File).not_to exist(eula_db_dirname) + shared_examples "logs implicit EULA" do + it 'logs about the user implicitly accepting the MaxMind EULA' do + expect(db_manager.logger).to have_received(:info).with(a_string_including "you accepted and agreed MaxMind EULA") + end end - it "should return user input database path" do - allow(LogStash::SETTINGS).to receive(:get).with("xpack.geoip.downloader.enabled").and_return(false) - allow(db_manager).to receive(:trigger_download) - allow(db_manager).to receive(:trigger_cc_database_fallback) + context "and EULA database is expired" do + let(:eula_database_infos) { + super().merge("City" => LogStash::GeoipDatabaseManagement::DbInfo::EXPIRED) + } + it 'returns nil' do + expect(resolved_path).to be_nil + end + it 'is subscribed for updates' do + expect(db_manager.subscribed_plugins_count("City")).to eq(1) + end + include_examples "subscribed to update notifications" + include_examples "logs implicit EULA" + end - path = db_manager.subscribe_database_path(CITY, "path/to/db", mock_geoip_plugin) + context "and EULA database is pending" do + let(:eula_database_infos) { + super().merge("City" => LogStash::GeoipDatabaseManagement::DbInfo::PENDING) + } + include_examples "CC-fallback", "City" + include_examples "subscribed to update notifications" + include_examples "subscribed to expire notifications" + include_examples "logs implicit EULA" + end - expect(db_manager).not_to have_received(:trigger_download) - expect(db_manager).not_to have_received(:trigger_cc_database_fallback) - expect(path).to eq("path/to/db") + context "and EULA database has a recent database" do + let(:managed_city_database) { "/this/that/GeoLite2-City.mmdb"} + let(:eula_database_infos) { + super().merge("City" => LogStash::GeoipDatabaseManagement::DbInfo.new(path: managed_city_database)) + } + it 'returns the path to the managed database' do + expect(resolved_path).to eq(managed_city_database) + end + it 'is subscribed for updates' do + expect(db_manager.subscribed_plugins_count("City")).to eq(1) + end + include_examples "subscribed to update notifications" + include_examples "subscribed to expire notifications" + include_examples "logs implicit EULA" end end end end context "unsubscribe" do - let(:db_manager) do - manager = super() - manager.instance_variable_get(:@states)[CITY].plugins.push(mock_geoip_plugin) - manager.instance_variable_get(:@states)[CITY].is_eula = true - manager + before(:each) do + db_manager.subscribe_database_path("City", nil, mock_geoip_plugin) + expect(db_manager.subscribed_plugins_count("City")).to eq(1) end - it "should remove plugin in state" do - db_manager.unsubscribe_database_path(CITY, mock_geoip_plugin) - expect(db_manager.instance_variable_get(:@states)[CITY].plugins.size).to eq(0) + it "removes plugin in state" do + db_manager.unsubscribe_database_path("City", mock_geoip_plugin) + expect(db_manager.subscribed_plugins_count("City")).to eq(0) end end context "shutdown" do - let(:db_manager) { Class.new(LogStash::Filters::Geoip::DatabaseManager).instance } - - it "should unsubscribe gracefully" do - db_manager.subscribe_database_path(CITY, default_city_db_path, mock_geoip_plugin) - expect { db_manager.unsubscribe_database_path(CITY, mock_geoip_plugin) }.not_to raise_error + it "unsubscribes gracefully" do + db_manager.subscribe_database_path("City", default_city_db_path, mock_geoip_plugin) + expect { db_manager.unsubscribe_database_path("City", mock_geoip_plugin) }.not_to raise_error end end - - context "database metric is not assigned" do - let(:db_manager) { Class.new(LogStash::Filters::Geoip::DatabaseManager).instance } - - it "does not throw error" do - allow(LogStash::Filters::Geoip::DatabaseManager).to receive(:logger).and_return(logger) - expect(logger).to receive(:debug).once - database_metric = db_manager.database_metric - expect { database_metric.set_download_status_updating }.not_to raise_error - end - end - - def metric_collector(db_manager) - db_manager.instance_variable_get(:@database_metric).instance_variable_get(:@metric).collector - end end end diff --git a/x-pack/spec/filters/geoip/database_metadata_spec.rb b/x-pack/spec/filters/geoip/database_metadata_spec.rb deleted file mode 100644 index f2d40435089..00000000000 --- a/x-pack/spec/filters/geoip/database_metadata_spec.rb +++ /dev/null @@ -1,223 +0,0 @@ -# # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one -# # or more contributor license agreements. Licensed under the Elastic License; -# # you may not use this file except in compliance with the Elastic License. - -require_relative 'test_helper' -require "filters/geoip/database_metadata" -require "filters/geoip/database_manager" -require "stud/temporary" -require "fileutils" - -describe LogStash::Filters::Geoip do - describe 'DatabaseMetadata', :aggregate_failures do - let(:database_type) { LogStash::Filters::Geoip::CITY } - let(:dbm) do - dbm = LogStash::Filters::Geoip::DatabaseMetadata.new - dbm.instance_variable_set(:@metadata_path, Stud::Temporary.file.path) - dbm - end - let(:temp_metadata_path) { dbm.instance_variable_get(:@metadata_path) } - let(:logger) { double("Logger") } - - context "get all" do - it "return multiple rows" do - write_temp_metadata(temp_metadata_path, city2_metadata) - - expect(dbm.get_all.size).to eq(3) - end - end - - context "get metadata" do - it "return metadata" do - write_temp_metadata(temp_metadata_path, city2_metadata) - - city = dbm.get_metadata(database_type) - expect(city.size).to eq(2) - - asn = dbm.get_metadata("ASN") - expect(asn.size).to eq(1) - end - - it "return empty array when file is missing" do - metadata = dbm.get_metadata(database_type) - expect(metadata.size).to eq(0) - end - - it "return empty array when an empty file exist" do - FileUtils.touch(temp_metadata_path) - - metadata = dbm.get_metadata(database_type) - expect(metadata.size).to eq(0) - end - end - - context "save timestamp" do - it "write the current time" do - write_temp_metadata(temp_metadata_path) - dbm.save_metadata(database_type, second_dirname, true) - - expect(dbm.get_metadata(database_type).size).to eq(1) - expect(dbm.get_all.size).to eq(2) - - metadata = dbm.get_metadata(database_type).last - expect(metadata[LogStash::Filters::Geoip::DatabaseMetadata::Column::DATABASE_TYPE]).to eq("City") - past = metadata[LogStash::Filters::Geoip::DatabaseMetadata::Column::CHECK_AT] - expect(Time.now.to_i - past.to_i).to be < 100 - expect(metadata[LogStash::Filters::Geoip::DatabaseMetadata::Column::GZ_MD5]).to eq(md5(default_city_gz_path)) - expect(metadata[LogStash::Filters::Geoip::DatabaseMetadata::Column::DIRNAME]).to eq(second_dirname) - expect(metadata[LogStash::Filters::Geoip::DatabaseMetadata::Column::IS_EULA]).to eq("true") - end - end - - context "database path" do - before do - copy_cc(get_dir_path("CC")) - copy_cc(get_dir_path(second_dirname)) - end - - it "return the default city database path" do - write_temp_metadata(temp_metadata_path) - - expect(dbm.database_path(database_type)).to eq(default_city_db_path) - end - - context "when the database exist" do - it "return the last database path with valid md5" do - write_temp_metadata(temp_metadata_path, city2_metadata) - - expect(dbm.database_path(database_type)).to eq(second_city_db_path) - end - end - - context "with ASN database type" do - let(:database_type) { "ASN" } - let(:dbm) do - dbm = LogStash::Filters::Geoip::DatabaseMetadata.new - dbm.instance_variable_set(:@metadata_path, Stud::Temporary.file.path) - dbm - end - - it "return the default asn database path" do - write_temp_metadata(temp_metadata_path) - - expect(dbm.database_path(database_type)).to eq(default_asn_db_path) - end - end - - context "with invalid database type" do - let(:database_type) { "???" } - let(:dbm) do - dbm = LogStash::Filters::Geoip::DatabaseMetadata.new - dbm.instance_variable_set(:@metadata_path, Stud::Temporary.file.path) - dbm - end - - it "return nil if md5 not matched" do - write_temp_metadata(temp_metadata_path) - - expect(dbm.database_path(database_type)).to be_nil - end - end - end - - context "gz md5" do - it "should give the last gz md5" do - write_temp_metadata(temp_metadata_path, ["City", "", "SOME_GZ_MD5", "SOME_MD5", second_dirname]) - expect(dbm.gz_md5(database_type)).to eq("SOME_GZ_MD5") - end - - it "should give empty string if metadata is empty" do - expect(dbm.gz_md5(database_type)).to eq("") - end - end - - context "updated at" do - it "should give the last update timestamp" do - write_temp_metadata(temp_metadata_path, ["City", "1611690807", "SOME_GZ_MD5", second_dirname, true]) - expect(dbm.check_at(database_type)).to eq(1611690807) - end - - it "should give 0 if metadata is empty" do - expect(dbm.check_at(database_type)).to eq(0) - end - end - - context "exist" do - it "should be false because Stud create empty temp file" do - expect(dbm.exist?).to be_falsey - end - - it "should be true if temp file has content" do - ::File.open(temp_metadata_path, "w") { |f| f.write("something") } - - expect(dbm.exist?).to be_truthy - end - end - - context "is eula" do - it "should give boolean false if database is CC" do - write_temp_metadata(temp_metadata_path) - expect(dbm.is_eula(database_type)).to eq(false) - end - - it "should give boolean true if database is EULA" do - write_temp_metadata(temp_metadata_path, city2_metadata) - expect(dbm.is_eula(database_type)).to eq(true) - end - end - - context "update timestamp" do - it "should update timestamp only for database type" do - write_temp_metadata(temp_metadata_path) - original = dbm.get_all - sleep(2) - - dbm.update_timestamp(database_type) - updated = dbm.get_all - - original.size.times do |i| - expect(original[i][LogStash::Filters::Geoip::DatabaseMetadata::Column::DATABASE_TYPE]). - to(eq(updated[i][LogStash::Filters::Geoip::DatabaseMetadata::Column::DATABASE_TYPE])) - expect(original[i][LogStash::Filters::Geoip::DatabaseMetadata::Column::GZ_MD5]) - .to(eq(updated[i][LogStash::Filters::Geoip::DatabaseMetadata::Column::GZ_MD5])) - expect(original[i][LogStash::Filters::Geoip::DatabaseMetadata::Column::DIRNAME]) - .to(eq(updated[i][LogStash::Filters::Geoip::DatabaseMetadata::Column::DIRNAME])) - expect(original[i][LogStash::Filters::Geoip::DatabaseMetadata::Column::IS_EULA]) - .to(eq(updated[i][LogStash::Filters::Geoip::DatabaseMetadata::Column::IS_EULA])) - end - - # ASN - expect(original[0][LogStash::Filters::Geoip::DatabaseMetadata::Column::CHECK_AT]) - .to(eq(updated[0][LogStash::Filters::Geoip::DatabaseMetadata::Column::CHECK_AT])) - - # City - expect(original[1][LogStash::Filters::Geoip::DatabaseMetadata::Column::CHECK_AT]) - .not_to(eq(updated[1][LogStash::Filters::Geoip::DatabaseMetadata::Column::CHECK_AT])) - end - end - - context "reset md5" do - it "should reset md5 to empty string only" do - rewrite_temp_metadata(temp_metadata_path, [["ASN", "1620246514", "SOME MD5", "1620246514", true], - ["City", "1620246514", "SOME MD5", "1620246514", true]]) - - dbm.reset_md5(database_type) - row = dbm.get_metadata(database_type).last - expect(row[LogStash::Filters::Geoip::DatabaseMetadata::Column::GZ_MD5]).to be_empty - expect(row[LogStash::Filters::Geoip::DatabaseMetadata::Column::DIRNAME]).to eql("1620246514") - expect(row[LogStash::Filters::Geoip::DatabaseMetadata::Column::IS_EULA]).to be_truthy - end - end - - context "dirnames" do - it "should reset md5 to empty string only" do - write_temp_metadata(temp_metadata_path, city2_metadata) - rewrite_temp_metadata(temp_metadata_path, [["ASN", "1620246514", "SOME MD5", "CC", true], - city2_metadata]) - - dirnames = dbm.dirnames - expect(dirnames).to match_array([second_dirname, "CC"]) - end - end - end -end diff --git a/x-pack/spec/filters/geoip/download_manager_spec.rb b/x-pack/spec/filters/geoip/download_manager_spec.rb deleted file mode 100644 index 2e160e44fe2..00000000000 --- a/x-pack/spec/filters/geoip/download_manager_spec.rb +++ /dev/null @@ -1,244 +0,0 @@ -# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one -# or more contributor license agreements. Licensed under the Elastic License; -# you may not use this file except in compliance with the Elastic License. - -require_relative 'test_helper' -require 'fileutils' -require "filters/geoip/download_manager" -require "filters/geoip/database_manager" - -describe LogStash::Filters::Geoip do - describe 'DownloadManager', :aggregate_failures do - let(:mock_metadata) { double("database_metadata") } - let(:download_manager) do - manager = LogStash::Filters::Geoip::DownloadManager.new(mock_metadata) - manager - end - let(:database_type) { LogStash::Filters::Geoip::CITY } - let(:logger) { double("Logger") } - - GEOIP_STAGING_HOST = "https://geoip.elastic.dev" - GEOIP_STAGING_ENDPOINT = "#{GEOIP_STAGING_HOST}#{LogStash::Filters::Geoip::DownloadManager::GEOIP_PATH}" - - before do - allow(LogStash::SETTINGS).to receive(:get).with("xpack.geoip.download.endpoint").and_return(GEOIP_STAGING_ENDPOINT) - end - - # this is disabled until https://github.com/elastic/logstash/issues/13261 is solved - context "rest client" do - it "can call endpoint" do - conn = download_manager.send(:rest_client) - res = conn.get("#{GEOIP_STAGING_ENDPOINT}?key=#{SecureRandom.uuid}&elastic_geoip_service_tos=agree") - expect(res.code).to eq(200) - end - - it "should raise error when endpoint response 4xx" do - bad_uri = URI("#{GEOIP_STAGING_HOST}?key=#{SecureRandom.uuid}&elastic_geoip_service_tos=agree") - expect(download_manager).to receive(:service_endpoint).and_return(bad_uri).twice - expect { download_manager.send(:check_update) }.to raise_error(LogStash::Filters::Geoip::DownloadManager::BadResponseCodeError, /404/) - end - - context "when ENV['http_proxy'] is set" do - let(:mock_resp) { JSON.parse(::File.read(::File.expand_path("./fixtures/normal_resp.json", ::File.dirname(__FILE__)))) } - let(:db_info) { mock_resp[1] } - let(:proxy_url) { 'http://user:pass@example.com:1234' } - - around(:each) { |example| with_environment('http_proxy' => proxy_url, &example) } - - it "initializes the rest client with the proxy" do - expect(::Manticore::Client).to receive(:new).with(a_hash_including(:proxy => proxy_url)).and_call_original - - download_manager.send(:rest_client) - end - - it "download database with the proxy" do - expect(download_manager).to receive(:md5).and_return(db_info['md5_hash']) - expect(::Down).to receive(:download).with(db_info['url'], a_hash_including(:proxy => proxy_url)).and_return(true) - - download_manager.send(:download_database, database_type, second_dirname, db_info) - end - end - end - - context "check update" do - before(:each) do - expect(download_manager).to receive(:uuid).and_return(SecureRandom.uuid) - mock_resp = double("geoip_endpoint", - :body => ::File.read(::File.expand_path("./fixtures/normal_resp.json", ::File.dirname(__FILE__))), - :code => 200) - allow(download_manager).to receive_message_chain("rest_client.get").and_return(mock_resp) - end - - it "should return City db info when City md5 does not match" do - expect(mock_metadata).to receive(:gz_md5).and_return("8d57aec1958070f01042ac1ecd8ec2ab", "a123a45d67890a2bd02e5edd680f6703c") - - updated_db = download_manager.send(:check_update) - expect(updated_db.size).to eql(1) - - type, info = updated_db[0] - expect(info).to have_key("md5_hash") - expect(info).to have_key("name") - expect(info).to have_key("provider") - expect(info).to have_key("updated") - expect(info).to have_key("url") - expect(type).to eql(database_type) - end - - it "should return empty array when md5 are the same" do - expect(mock_metadata).to receive(:gz_md5).and_return("8d57aec1958070f01042ac1ecd8ec2ab", "a195a73d4651a2bd02e5edd680f6703c") - - updated_db = download_manager.send(:check_update) - expect(updated_db.size).to eql(0) - end - end - - context "download database" do - let(:db_info) do - { - "age" => 297221, - "md5_hash" => md5_hash, - "name" => filename, - "provider" => "maxmind", - "updated" => 1609891257, - "url" => "https://github.com/logstash-plugins/logstash-filter-geoip/archive/main.zip" - } - end - let(:md5_hash) { SecureRandom.hex } - let(:filename) { "GeoLite2-City.tgz"} - let(:dirname) { "0123456789" } - - it "should raise error if md5 does not match" do - allow(Down).to receive(:download) - expect { download_manager.send(:download_database, database_type, dirname, db_info) }.to raise_error /wrong checksum/ - end - - it "should download file and return zip path" do - expect(download_manager).to receive(:md5).and_return(md5_hash) - - new_zip_path = download_manager.send(:download_database, database_type, dirname, db_info) - expect(new_zip_path).to match /GeoLite2-City\.tgz/ - expect(::File.exist?(new_zip_path)).to be_truthy - end - end - - context "unzip" do - let(:dirname) { Time.now.to_i.to_s } - let(:copyright_path) { get_file_path(dirname, 'COPYRIGHT.txt') } - let(:license_path) { get_file_path(dirname, 'LICENSE.txt') } - let(:readme_path) { get_file_path(dirname, 'README.txt') } - let(:folder_path) { get_file_path(dirname, 'inner') } - let(:folder_more_path) { ::File.join(folder_path, 'more.txt') } - let(:folder_less_path) { ::File.join(folder_path, 'less.txt') } - - before do - FileUtils.mkdir_p(get_dir_path(dirname)) - end - - after do - file_path = ::File.expand_path("./fixtures/sample.mmdb", ::File.dirname(__FILE__)) - delete_file(file_path, copyright_path, license_path, readme_path) - FileUtils.rm_r folder_path - end - - it "should extract all files in tarball" do - zip_path = ::File.expand_path("./fixtures/sample.tgz", ::File.dirname(__FILE__)) - new_db_path = download_manager.send(:unzip, database_type, dirname, zip_path) - - expect(new_db_path).to match /GeoLite2-#{database_type}\.mmdb/ - expect(::File.exist?(new_db_path)).to be_truthy - expect(::File.exist?(copyright_path)).to be_truthy - expect(::File.exist?(license_path)).to be_truthy - expect(::File.exist?(readme_path)).to be_truthy - expect(::File.directory?(folder_path)).to be_truthy - expect(::File.exist?(folder_more_path)).to be_truthy - expect(::File.exist?(folder_less_path)).to be_truthy - end - end - - context "assert database" do - before do - copy_cc(get_dir_path("CC")) - end - - it "should raise error if file is invalid" do - expect { download_manager.send(:assert_database!, "Gemfile") }.to raise_error /failed to load database/ - end - - it "should pass validation" do - expect(download_manager.send(:assert_database!, default_city_db_path)).to be_nil - end - end - - context "fetch database" do - it "should return array of db which has valid download" do - expect(download_manager).to receive(:check_update).and_return([[LogStash::Filters::Geoip::ASN, {}], - [LogStash::Filters::Geoip::CITY, {}]]) - allow(download_manager).to receive(:download_database) - allow(download_manager).to receive(:unzip).and_return("NEW_DATABASE_PATH") - allow(download_manager).to receive(:assert_database!) - - updated_db = download_manager.send(:fetch_database) - - expect(updated_db.size).to eql(2) - asn_type, asn_valid_download, asn_dirname, asn_path = updated_db[0] - city_type, city_valid_download, city_dirname, city_path = updated_db[1] - expect(asn_valid_download).to be_truthy - expect(asn_path).to eql("NEW_DATABASE_PATH") - expect(city_valid_download).to be_truthy - expect(city_path).to eql("NEW_DATABASE_PATH") - end - - it "should return array of db which has invalid download" do - expect(download_manager).to receive(:check_update).and_return([[LogStash::Filters::Geoip::ASN, {}], - [LogStash::Filters::Geoip::CITY, {}]]) - expect(download_manager).to receive(:download_database).and_raise('boom').at_least(:twice) - - updated_db = download_manager.send(:fetch_database) - - expect(updated_db.size).to eql(2) - asn_type, asn_valid_download, asn_path = updated_db[0] - city_type, city_valid_download, city_path = updated_db[1] - expect(asn_valid_download).to be_falsey - expect(asn_path).to be_nil - expect(city_valid_download).to be_falsey - expect(city_path).to be_nil - end - end - - context "download url" do - before do - allow(download_manager).to receive(:uuid).and_return(SecureRandom.uuid) - end - - it "should give a path with hostname when input is a filename" do - expect(download_manager.send(:download_url, "GeoLite2-ASN.tgz")).to match /#{GEOIP_STAGING_HOST}/ - end - - it "should give a unmodified path when input has scheme" do - expect(download_manager.send(:download_url, GEOIP_STAGING_ENDPOINT)).to eq(GEOIP_STAGING_ENDPOINT) - end - end - - context "service endpoint" do - before do - allow(download_manager).to receive(:uuid).and_return(SecureRandom.uuid) - end - - it "should give xpack setting" do - uri = download_manager.send(:service_endpoint) - expect(uri.to_s).to match /#{GEOIP_STAGING_ENDPOINT}/ - end - - context "empty xpack config" do - before do - allow(LogStash::SETTINGS).to receive(:get).with("xpack.geoip.download.endpoint").and_return(nil) - end - - it "should give default endpoint" do - uri = download_manager.send(:service_endpoint) - expect(uri.to_s).to match /#{LogStash::Filters::Geoip::DownloadManager::GEOIP_ENDPOINT}/ - end - end - end - end -end diff --git a/x-pack/spec/geoip_database_management/downloader_spec.rb b/x-pack/spec/geoip_database_management/downloader_spec.rb new file mode 100644 index 00000000000..fdeb67716b5 --- /dev/null +++ b/x-pack/spec/geoip_database_management/downloader_spec.rb @@ -0,0 +1,287 @@ +# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +# or more contributor license agreements. Licensed under the Elastic License; +# you may not use this file except in compliance with the Elastic License. + +describe LogStash::GeoipDatabaseManagement::Downloader, aggregate_failures: true, verify_stubs: true do + let(:temp_metadata_path) { Stud::Temporary.directory } + let(:data_path) { LogStash::GeoipDatabaseManagement::DataPath.new(temp_metadata_path) } + let(:metadata) { LogStash::GeoipDatabaseManagement::Metadata.new(data_path) } + + let(:service_host) { "https://geoip.elastic.dev" } + let(:service_path) { "v1/database" } + let(:service_endpoint) { "#{service_host}/#{service_path}" } + + let(:database_type) { constants::CITY } + + let(:constants) { LogStash::GeoipDatabaseManagement::Constants } + + subject(:downloader) { described_class.new(metadata, service_endpoint) } + + after(:each) do + FileUtils::rm_rf(temp_metadata_path) + end + + context "rest client" do + it "can call endpoint" do + conn = downloader.send(:rest_client) + res = conn.get(downloader.list_databases_url) + expect(res.code).to eq(200) + end + + it 'raises error when endpoint response 4xx' do + bad_uri = "#{service_host}/?key=#{SecureRandom.uuid}&elastic_geoip_service_tos=agree" + expect(downloader).to receive(:list_databases_url).and_return(bad_uri).twice + expect { downloader.send(:check_update, constants::DB_TYPES) }.to raise_error(described_class::BadResponseCodeError, /404/) + end + + context "when ENV['http_proxy'] is set" do + let(:mock_resp) { JSON.parse(::File.read(::File.expand_path("./fixtures/normal_resp.json", ::File.dirname(__FILE__)))) } + let(:db_info) { mock_resp.find {|i| i["name"].include?(database_type) } } + let(:proxy_url) { 'http://user:pass@example.com:1234' } + + around(:each) { |example| with_environment('http_proxy' => proxy_url, &example) } + + it "initializes the rest client with the proxy" do + expect(::Manticore::Client).to receive(:new).with(a_hash_including(:proxy => proxy_url)).and_call_original + + downloader.send(:rest_client) + end + + it "download database with the proxy" do + dirname = Time.now.to_i.to_s + expected_gz_download_location = data_path.gz(database_type, dirname) + expect(downloader).to receive(:md5).with(expected_gz_download_location).and_return(db_info['md5_hash']) + expect(::Down).to receive(:download).with(db_info['url'], a_hash_including(:proxy => proxy_url)).and_return(true) + + downloader.send(:download_database, database_type, dirname, db_info) + end + end + end + + context 'check_update' do + let(:mock_resp_decoded) { JSON.parse(mock_resp_body) } + let(:mock_resp_body) { ::File.read(::File.expand_path("./fixtures/normal_resp.json", ::File.dirname(__FILE__))) } + let(:mock_resp) { double("list_databases_response", :body => mock_resp_body, code: 200)} + + let(:asn_info) { mock_resp_decoded.find { |i| i["name"].include?(constants::ASN) } } + let(:city_info) { mock_resp_decoded.find { |i| i["name"].include?(constants::CITY) } } + + before(:each) do + allow(downloader).to receive_message_chain('rest_client.get').and_return(mock_resp) + end + + it "returns City db info when City md5 does not match" do + metadata_city_gzmd5 = SecureRandom.hex(20) + expect(metadata).to receive(:database_path).with(constants::CITY).and_return("/this/that/GeoLite2-City.mmdb") + expect(metadata).to receive(:gz_md5).with(constants::CITY).and_return(metadata_city_gzmd5) + + expect(metadata).to receive(:database_path).with(constants::ASN).and_return("/this/that/GeoLite2-ASN.mmdb") + expect(metadata).to receive(:gz_md5).with(constants::ASN).and_return(asn_info['md5_hash']) + + updated_dbs = downloader.send(:check_update, constants::DB_TYPES) + expect(updated_dbs.size).to eql(1) + + type, info = updated_dbs[0] + expect(info).to have_key("md5_hash") + expect(info).to have_key("name") + expect(info).to have_key("provider") + expect(info).to have_key("updated") + expect(info).to have_key("url") + expect(type).to eql(constants::CITY) + end + + it "returns empty array when all md5's match" do + expect(metadata).to receive(:database_path).with(constants::CITY).and_return("/this/that/GeoLite2-City.mmdb") + expect(metadata).to receive(:gz_md5).with(constants::CITY).and_return(city_info['md5_hash']) + + expect(metadata).to receive(:database_path).with(constants::ASN).and_return("/this/that/GeoLite2-ASN.mmdb") + expect(metadata).to receive(:gz_md5).with(constants::ASN).and_return(asn_info['md5_hash']) + + updated_dbs = downloader.send(:check_update, constants::DB_TYPES) + expect(updated_dbs.size).to eql(0) + end + + it "returns City db info when City db not in metadata" do + expect(metadata).to receive(:database_path).with(constants::CITY).and_return(nil) # signal missing file + + expect(metadata).to receive(:database_path).with(constants::ASN).and_return("/this/that/GeoLite2-ASN.mmdb") + expect(metadata).to receive(:gz_md5).with(constants::ASN).and_return(asn_info['md5_hash']) + + updated_dbs = downloader.send(:check_update, constants::DB_TYPES) + expect(updated_dbs.size).to eql(1) + + type, info = updated_dbs[0] + expect(info).to have_key("md5_hash") + expect(info).to have_key("name") + expect(info).to have_key("provider") + expect(info).to have_key("updated") + expect(info).to have_key("url") + expect(type).to eql(constants::CITY) + end + end + + context "download database" do + let(:db_info) do + { + "age" => 297221, + "md5_hash" => md5_hash, + "name" => filename, + "provider" => "maxmind", + "updated" => 1609891257, + "url" => expected_download_url + } + end + let(:md5_hash) { SecureRandom.hex } + let(:filename) { "GeoLite2-City.tgz"} + let(:dirname) { "0123456789" } + + let(:expected_download_url) { "#{service_host}/blob/sample.tgz" } + let(:sample_city_db_gz) { ::File.expand_path("./fixtures/sample.tgz", ::File.dirname(__FILE__)) } + + before(:each) do + allow(Down).to receive(:download).with(expected_download_url, anything) do |url, options| + FileUtils::cp(sample_city_db_gz, options[:destination]) + true + end + end + + context "with mismatched md5 checksum" do + let(:md5_hash) { SecureRandom.hex } + it "should raise error if md5 does not match" do + expect { downloader.send(:download_database, database_type, dirname, db_info) }.to raise_error /wrong checksum/ + end + end + + context "with matching md5 checksum" do + let(:md5_hash) { LogStash::GeoipDatabaseManagement::Util.md5(sample_city_db_gz) } + it "should download file and return zip path" do + new_zip_path = downloader.send(:download_database, database_type, dirname, db_info) + expect(new_zip_path).to match /GeoLite2-City\.tgz/ + expect(::File.exist?(new_zip_path)).to be_truthy + end + end + end + + context "unzip" do + let(:dirname) { Time.now.to_i.to_s } + let(:copyright_path) { data_path.resolve(dirname, 'COPYRIGHT.txt') } + let(:license_path) { data_path.resolve(dirname, 'LICENSE.txt') } + let(:readme_path) { data_path.resolve(dirname, 'README.txt') } + let(:folder_path) { data_path.resolve(dirname, 'inner') } + let(:folder_more_path) { data_path.resolve(dirname, 'inner', 'more.txt') } + let(:folder_less_path) { data_path.resolve(dirname, 'inner', 'less.txt') } + + before do + FileUtils.mkdir_p(data_path.resolve(dirname)) + end + + it "should extract all files in tarball" do + zip_path = ::File.expand_path("./fixtures/sample.tgz", ::File.dirname(__FILE__)) + new_db_path = downloader.send(:unzip, database_type, dirname, zip_path) + + expect(new_db_path).to match /GeoLite2-#{database_type}\.mmdb/ + expect(::File.exist?(new_db_path)).to be_truthy + expect(::File.exist?(copyright_path)).to be_truthy + expect(::File.exist?(license_path)).to be_truthy + expect(::File.exist?(readme_path)).to be_truthy + expect(::File.directory?(folder_path)).to be_truthy + expect(::File.exist?(folder_more_path)).to be_truthy + expect(::File.exist?(folder_less_path)).to be_truthy + end + end + + context "assert_database!" do + + let(:sample_city_db_gz) { ::File.expand_path("./fixtures/sample.tgz", ::File.dirname(__FILE__)) } + + it "rejects files that don't exist" do + expect { downloader.send(:assert_database!, data_path.resolve("nope.mmdb") ) }.to raise_exception(/does not exist/) + end + it "rejects files that aren't MMDB" do + expect { downloader.send(:assert_database!, __FILE__ ) }.to raise_exception(/does not appear to be a MaxMind DB/) + end + it "accepts files that have MMDB marker" do + candidate = data_path.db(constants::CITY, "expanded") + FileUtils.mkdir_p(data_path.resolve("expanded")) + + # A file that has the magic MaxMind marker buried inside it + ::File.open(candidate, 'w:BINARY') do |handle| + handle.write("#{database_type}".b) + handle.write(SecureRandom.bytes(rand(2048...10240)).b) + handle.write("#\xab\xcd\xefMaxMind.com".b) + handle.write(SecureRandom.bytes(rand(2048...10240)).b) + handle.write("#{database_type}".b) + handle.flush + end + + downloader.send(:assert_database!, candidate) + end + end + + context "fetch_databases" do + it "should return array of db which has valid download" do + expect(downloader).to receive(:check_update).and_return([[constants::ASN, {}], + [constants::CITY, {}]]) + allow(downloader).to receive(:download_database) + allow(downloader).to receive(:unzip).and_return("NEW_DATABASE_PATH") + expect(downloader).to receive(:assert_database!).at_least(:once) + + updated_db = downloader.send(:fetch_databases, constants::DB_TYPES) + + expect(updated_db.size).to eql(2) + asn_type, asn_valid_download, asn_dirname, asn_path = updated_db[0] + city_type, city_valid_download, city_dirname, city_path = updated_db[1] + expect(asn_valid_download).to be_truthy + expect(asn_path).to eql("NEW_DATABASE_PATH") + expect(city_valid_download).to be_truthy + expect(city_path).to eql("NEW_DATABASE_PATH") + end + + it "should return array of db which has invalid download" do + expect(downloader).to receive(:check_update).and_return([[constants::ASN, {}], + [constants::CITY, {}]]) + expect(downloader).to receive(:download_database).and_raise('boom').at_least(:twice) + + updated_db = downloader.send(:fetch_databases, constants::DB_TYPES) + + expect(updated_db.size).to eql(2) + asn_type, asn_valid_download, asn_path = updated_db[0] + city_type, city_valid_download, city_path = updated_db[1] + expect(asn_valid_download).to be_falsey + expect(asn_path).to be_nil + expect(city_valid_download).to be_falsey + expect(city_path).to be_nil + end + end + + context "#resolve_download_url" do + context "when given an absolute URL" do + let(:absolute_url) { "https://example.com/blob/this.tgz" } + it 'returns the provided URL' do + expect(downloader.send(:resolve_download_url, absolute_url).to_s).to eq(absolute_url) + end + end + context "when given a relative URL with absolute path" do + let(:relative_url) { "/blob/this.tgz" } + it 'returns a url resolved relative to service endpoint' do + expect(downloader.send(:resolve_download_url, relative_url).to_s).to eq("#{service_host}#{relative_url}") + end + end + context "when given a relative URL with relative path" do + let(:relative_url) { "blob/this.tgz" } + it 'returns a url resolved relative to service endpoint' do + expect(downloader.send(:resolve_download_url, relative_url).to_s).to eq("#{service_host}/v1/#{relative_url}") + end + end + end + + context "#list_databases_url" do + subject(:list_databases_url) { downloader.list_databases_url } + it "adds the key and tos agreement parameters" do + expect(list_databases_url.host).to eq("geoip.elastic.dev") + expect(list_databases_url.path).to eq("/v1/database") + expect(list_databases_url.query).to include "key=#{downloader.send(:uuid)}" + expect(list_databases_url.query).to include "elastic_geoip_service_tos=agree" + end + end +end \ No newline at end of file diff --git a/x-pack/spec/filters/geoip/fixtures/normal_resp.json b/x-pack/spec/geoip_database_management/fixtures/normal_resp.json similarity index 100% rename from x-pack/spec/filters/geoip/fixtures/normal_resp.json rename to x-pack/spec/geoip_database_management/fixtures/normal_resp.json diff --git a/x-pack/spec/filters/geoip/fixtures/sample.tgz b/x-pack/spec/geoip_database_management/fixtures/sample.tgz similarity index 100% rename from x-pack/spec/filters/geoip/fixtures/sample.tgz rename to x-pack/spec/geoip_database_management/fixtures/sample.tgz diff --git a/x-pack/spec/geoip_database_management/manager_spec.rb b/x-pack/spec/geoip_database_management/manager_spec.rb new file mode 100644 index 00000000000..a5c1600e45c --- /dev/null +++ b/x-pack/spec/geoip_database_management/manager_spec.rb @@ -0,0 +1,548 @@ +# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +# or more contributor license agreements. Licensed under the Elastic License; +# you may not use this file except in compliance with the Elastic License. + +describe LogStash::GeoipDatabaseManagement::Manager, aggregate_failures: true, verify_stubs: true do + + def write_dummy_mmdb(type, path) + FileUtils.mkdir_p(File::dirname(path)) + File.open(path, "w:BINARY") do |handle| + handle.write("#{type}\xab\xcd\xefMaxMind.com#{type}".force_encoding("BINARY")) + end + end + + let(:manager_instance) do + apply_settings(settings_overrides) do |applied_settings| + stub_const("LogStash::SETTINGS", applied_settings) + Class.new(described_class) do + public :setup + public :shutdown! + public :current_db_info + public :current_state + public :execute_download_job + public :metadata + public :downloader + end.instance + end + end + + let(:constants) { LogStash::GeoipDatabaseManagement::Constants } + + let(:settings_overrides) do + { + 'path.data' => settings_path_data, + } + end + let(:settings_path_data) { Stud::Temporary.directory } + let(:geoip_data_path) { ::File.expand_path("geoip_database_management", settings_path_data)} + let(:geoip_metadata_path) { ::File.expand_path("metadata.csv", geoip_data_path) } + + let(:metadata_contents) { nil } + before(:each) do + ::FileUtils.mkdir_p(::File.dirname(geoip_metadata_path)) + ::File.write(geoip_metadata_path, metadata_contents) unless metadata_contents.nil? + end + + after(:each) do + manager_instance.shutdown! + FileUtils.rm_rf(settings_path_data) + end + + shared_context "existing databases from metadata" do + let(:existing_dirname) { (Time.now.to_i - 1000).to_s } + + let(:existing_city_db_check_at) { Time.now.to_i - 100 } + let(:existing_city_gzmd5) { SecureRandom::hex(20) } + let(:existing_city_db_path) { ::File.join(geoip_data_path, existing_dirname, "GeoLite2-City.mmdb") } + + let(:existing_asn_db_check_at) { Time.now.to_i - 100 } + let(:existing_asn_gzmd5) { SecureRandom::hex(20) } + let(:existing_asn_db_path) { ::File.join(geoip_data_path, existing_dirname, "GeoLite2-ASN.mmdb") } + + before(:each) do + write_dummy_mmdb(constants::CITY, existing_city_db_path) unless existing_city_db_path.nil? + write_dummy_mmdb(constants::ASN, existing_asn_db_path) unless existing_asn_db_path.nil? + end + + let(:metadata_contents) do + <<~EOMETA + #{constants::CITY},#{existing_city_db_check_at},#{existing_city_gzmd5},#{existing_dirname} + #{constants::ASN},#{existing_asn_db_check_at},#{existing_asn_gzmd5},#{existing_dirname} + EOMETA + end + end + + context "pre-use" do + before(:each) do + expect_any_instance_of(described_class).to_not receive(:execute_download_job) + end + it 'is not running' do + expect(manager_instance).to_not be_running + end + context "when disabled" do + let(:settings_overrides) { super().merge('xpack.geoip.downloader.enabled' => false) } + + include_context "existing databases from metadata" + + let(:mock_logger) { double('logger').as_null_object } + before(:each) do + allow(described_class).to receive(:logger).and_return(mock_logger) + end + + it 'logs info about removing managed databases' do + manager_instance # instantiate + + expect(mock_logger).to have_received(:info).with(a_string_including "removing managed databases from disk") + end + it 'removes on-disk metadata' do + manager_instance # instantiate + + expect(manager_instance.metadata).to_not exist + expect(Pathname(geoip_metadata_path)).to_not be_file + end + it 'removes on-disk databases' do + manager_instance # instantiate + + expect(Pathname(existing_city_db_path)).to_not be_file + expect(Pathname(existing_asn_db_path)).to_not be_file + end + end + end + + context "once started" do + before(:each) do + allow(manager_instance).to receive(:execute_download_job).and_return(nil) + manager_instance.send(:ensure_started!) + end + it 'is running' do + expect(manager_instance).to be_running + expect(manager_instance).to have_received(:execute_download_job) + end + it 'has a data directory' do + expect(Pathname(geoip_data_path)).to be_directory + end + it 'has metadata' do + expect(Pathname(::File.expand_path("metadata.csv", geoip_data_path))).to be_file + end + end + + context "#supported_database_types" do + subject(:supported_database_types) { manager_instance.supported_database_types } + it 'includes City' do + expect(supported_database_types).to include(constants::CITY) + end + it 'includes ASN' do + expect(supported_database_types).to include(constants::ASN) + end + it 'returns only frozen strings' do + expect(supported_database_types).to all( be_a_kind_of String ) + expect(supported_database_types).to all( be_frozen ) + end + end + + context "#subscribe_database_path" do + + context "and manager is not enabled" do + let(:settings_overrides) { super().merge('xpack.geoip.downloader.enabled' => false) } + it "returns nil" do + expect(manager_instance.subscribe_database_path(constants::CITY)).to be_nil + end + end + + shared_examples "active subscription" do |database_type| + it 'receives expiry notifications' do + allow(subscription).to receive(:notify).and_call_original + + manager_instance.current_state(database_type).expire! + + expect(subscription) + .to have_received(:notify) + .with(an_object_having_attributes({:expired? => true, + :path => nil, + :removed? => true})) + end + it 'receives update notifications' do + allow(subscription).to receive(:notify).and_call_original + + updated_db_path = ::File.join(geoip_data_path, Time.now.to_i.to_s, "GeoLite2-#{database_type}.mmdb") + write_dummy_mmdb(database_type, updated_db_path) + + manager_instance.current_state(database_type).update!(updated_db_path) + + expect(subscription) + .to have_received(:notify) + .with(an_object_having_attributes({:expired? => false, + :path => updated_db_path})) + end + end + + context "when metadata exists" do + include_context "existing databases from metadata" + + before(:each) do + allow(manager_instance).to receive(:execute_download_job).and_return(nil) + end + + context "the returned subscription" do + subject(:subscription) { manager_instance.subscribe_database_path(constants::CITY) } + + it 'carries the path of the DB from metadata' do + expect(subscription.value).to have_attributes(:path => existing_city_db_path) + end + + include_examples "active subscription", LogStash::GeoipDatabaseManagement::Constants::CITY + end + + context "and metadata references an mmdb that has been removed" do + let(:existing_city_db_path) { nil } # prevent write + + context "the returned subscription" do + subject(:subscription) { manager_instance.subscribe_database_path(constants::CITY) } + + it 'indicates that the DB has been removed' do + expect(subscription.value).to be_removed + end + + include_examples "active subscription", LogStash::GeoipDatabaseManagement::Constants::CITY + end + end + + context "and metadata does not contain an entry for the specified DB" do + let(:metadata_contents) do + <<~EOMETA + #{constants::ASN},#{existing_asn_db_check_at},#{existing_asn_gzmd5},#{existing_dirname} + EOMETA + end + context "the returned subscription" do + subject(:subscription) { manager_instance.subscribe_database_path(constants::CITY) } + + it 'indicates that the DB is pending' do + expect(subscription.value).to be_pending + end + + include_examples "active subscription", LogStash::GeoipDatabaseManagement::Constants::CITY + end + end + end + + context "when metadata does not yet exist" do + before(:each) do + allow(manager_instance).to receive(:execute_download_job).and_return(nil) + end + + context "the returned subscription" do + subject(:subscription) { manager_instance.subscribe_database_path(constants::CITY) } + + it 'is marked as pending' do + expect(subscription.value).to be_pending + end + + include_examples "active subscription", LogStash::GeoipDatabaseManagement::Constants::CITY + end + end + end + + context "execute_download_job" do + let(:mock_logger) { double('logger').as_null_object } + before(:each) do + allow(manager_instance).to receive(:logger).and_return(mock_logger) + expect(manager_instance).to receive(:downloader).and_return(mock_downloader) + end + + let(:downloader_response) { [] } + let(:mock_downloader) do + double("downloader").tap do |downloader| + allow(downloader).to receive(:fetch_databases).with(constants::DB_TYPES).and_return(downloader_response) + allow(downloader).to receive(:uuid).and_return(SecureRandom.uuid) + end + end + + let(:updated_dirname) { (Time.now.to_i - 1).to_s } + let(:updated_city_db_path) { ::File.join(geoip_data_path, updated_dirname, "GeoLite2-City.mmdb")} + let(:updated_asn_db_path) { ::File.join(geoip_data_path, updated_dirname, "GeoLite2-ASN.mmdb")} + + shared_examples "ASN near expiry warning" do + context "when a near-expiry ASN database is not succesfully updated" do + let(:existing_asn_db_check_at) { Time.now.to_i - (27 * 24 * 60 * 60) } # 27 days ago + + it 'retains ASN state' do + allow(manager_instance.current_state(constants::ASN)).to receive(:update!).and_call_original + + manager_instance.execute_download_job + + expected_asn_attributes = { + :path => existing_asn_db_path, :pending? => false, :expired? => false, :removed? => false + } + expect(manager_instance.current_db_info(constants::ASN)).to have_attributes(expected_asn_attributes) + expect(manager_instance.current_state(constants::ASN)).to_not have_received(:update!) + end + + it "emits a warning log about pending ASN expiry" do + manager_instance.execute_download_job + + expect(manager_instance.logger).to have_received(:warn).with(a_string_including "MaxMind GeoIP ASN database hasn't been synchronized in 27 days") + end + end + end + + shared_examples "ASN past expiry eviction" do + context "when a past-expiry ASN database is not successfully updated" do + let(:existing_asn_db_check_at) { Time.now.to_i - (31 * 24 * 60 * 60) } # 31 days ago + + it 'expires the ASN state' do + allow(manager_instance.current_state(constants::ASN)).to receive(:expire!).and_call_original + + manager_instance.execute_download_job + + expected_asn_attributes = { + :path => nil, :pending? => false, :expired? => true, :removed? => true + } + expect(manager_instance.current_db_info(constants::ASN)).to have_attributes(expected_asn_attributes) + expect(manager_instance.current_state(constants::ASN)).to have_received(:expire!) + end + + it "emits an error log about ASN expiry eviction" do + manager_instance.execute_download_job + + expect(manager_instance.logger).to have_received(:error).with(a_string_including("MaxMind GeoIP ASN database hasn't been synchronized in 31 days").and(including("removed"))) + end + + it "removes the expired ASN dbpath from metadata" do + manager_instance.execute_download_job + expect(manager_instance.metadata.database_path(constants::ASN)).to be_nil + end + end + end + + shared_examples "ASN updated" do + it "updates ASN state" do + allow(manager_instance.current_state(constants::ASN)).to receive(:update!).and_call_original + + manager_instance.execute_download_job + + manager_instance.current_db_info(constants::ASN).tap do |asn_db_info| + expect(asn_db_info.path).to eq(updated_asn_db_path) + expect(asn_db_info).to_not be_pending + expect(asn_db_info).to_not be_expired + expect(asn_db_info).to_not be_removed + end + + expect(manager_instance.current_state(constants::ASN)).to have_received(:update!).with(updated_asn_db_path) + end + it "updates ASN metadata" do + manager_instance.execute_download_job + + expect(manager_instance.metadata.database_path(constants::ASN)).to eq(updated_asn_db_path) + expect(manager_instance.metadata.check_at(constants::ASN)).to satisfy { |x| Time.now.to_i - x <= 1 } + end + end + + shared_examples "ASN unchanged" do + it "retains ASN state" do + allow(manager_instance.current_state(constants::ASN)).to receive(:update!).and_call_original + + manager_instance.execute_download_job + + manager_instance.current_db_info(constants::ASN).tap do |asn_db_info| + expect(asn_db_info.path).to eq(existing_asn_db_path) + expect(asn_db_info).to_not be_pending + expect(asn_db_info).to_not be_expired + expect(asn_db_info).to_not be_removed + end + + expect(manager_instance.current_state(constants::ASN)).to_not have_received(:update!) + end + it "updates ASN metadata check_at" do + manager_instance.execute_download_job + + expect(manager_instance.metadata.database_path(constants::ASN)).to eq(existing_asn_db_path) + expect(manager_instance.metadata.check_at(constants::ASN)).to satisfy { |x| Time.now.to_i - x <= 1 } + end + end + + shared_examples "ASN errored" do + it "retains ASN state" do + allow(manager_instance.current_state(constants::ASN)).to receive(:update!).and_call_original + + manager_instance.execute_download_job + + manager_instance.current_db_info(constants::ASN).tap do |asn_db_info| + expect(asn_db_info.path).to eq(existing_asn_db_path) + expect(asn_db_info).to_not be_pending + expect(asn_db_info).to_not be_expired + expect(asn_db_info).to_not be_removed + end + + expect(manager_instance.current_state(constants::ASN)).to_not have_received(:update!) + end + it "retains ASN metadata check_at" do + manager_instance.execute_download_job + + expect(manager_instance.metadata.database_path(constants::ASN)).to eq(existing_asn_db_path) + expect(manager_instance.metadata.check_at(constants::ASN)).to eq(existing_asn_db_check_at) + end + end + + shared_examples "City updated" do + it "updates City state" do + allow(manager_instance.current_state(constants::CITY)).to receive(:update!).and_call_original + + manager_instance.execute_download_job + + manager_instance.current_db_info(constants::CITY).tap do |city_db_info| + expect(city_db_info.path).to eq(updated_city_db_path) + expect(city_db_info).to_not be_pending + expect(city_db_info).to_not be_expired + expect(city_db_info).to_not be_removed + end + + expect(manager_instance.current_state(constants::CITY)).to have_received(:update!).with(updated_city_db_path) + end + it "updates City metadata" do + manager_instance.execute_download_job + + expect(manager_instance.metadata.database_path(constants::CITY)).to eq(updated_city_db_path) + expect(manager_instance.metadata.check_at(constants::CITY)).to satisfy { |x| Time.now.to_i - x <= 1 } + end + end + + shared_examples "City unchanged" do + it "retains City state" do + allow(manager_instance.current_state(constants::CITY)).to receive(:update!).and_call_original + + manager_instance.execute_download_job + + manager_instance.current_db_info(constants::CITY).tap do |city_db_info| + expect(city_db_info.path).to eq(existing_city_db_path) + expect(city_db_info).to_not be_pending + expect(city_db_info).to_not be_expired + expect(city_db_info).to_not be_removed + end + + expect(manager_instance.current_state(constants::CITY)).to_not have_received(:update!) + end + it "updates City metadata check_at" do + manager_instance.execute_download_job + + expect(manager_instance.metadata.database_path(constants::CITY)).to eq(existing_city_db_path) + expect(manager_instance.metadata.check_at(constants::CITY)).to satisfy { |x| Time.now.to_i - x <= 1 } + end + end + + shared_examples "City errored" do + it "retains City state" do + allow(manager_instance.current_state(constants::CITY)).to receive(:update!).and_call_original + + manager_instance.execute_download_job + + manager_instance.current_db_info(constants::CITY).tap do |city_db_info| + expect(city_db_info.path).to eq(existing_city_db_path) + expect(city_db_info).to_not be_pending + expect(city_db_info).to_not be_expired + expect(city_db_info).to_not be_removed + end + + expect(manager_instance.current_state(constants::CITY)).to_not have_received(:update!) + end + it "retains City metadata check_at" do + manager_instance.execute_download_job + + expect(manager_instance.metadata.database_path(constants::CITY)).to eq(existing_city_db_path) + expect(manager_instance.metadata.check_at(constants::CITY)).to eq(existing_city_db_check_at) + end + end + + context "when downloader has updates for all" do + include_context "existing databases from metadata" + + let(:updated_city_fetch) { [constants::CITY, true, updated_dirname, updated_city_db_path] } + let(:updated_asn_fetch) { [constants::ASN, true, updated_dirname, updated_asn_db_path] } + let(:downloader_response) do + [ + updated_city_fetch, + updated_asn_fetch + ] + end + + before(:each) do + manager_instance.setup + write_dummy_mmdb(constants::CITY, updated_city_db_path) + write_dummy_mmdb(constants::ASN, updated_asn_db_path) + end + + include_examples "City updated" + include_examples "ASN updated" + end + + context "when downloader has updates for City, but ASN is unchanged" do + include_context "existing databases from metadata" + + let(:updated_city_fetch) { [constants::CITY, true, updated_dirname, updated_city_db_path] } + + # implementation detail: the downloader _excludes_ confirmed-same entries from the response + let(:downloader_response) do + [ + updated_city_fetch + ] + end + + before(:each) do + manager_instance.setup + write_dummy_mmdb(constants::CITY, updated_city_db_path) + end + + include_examples "City updated" + include_examples "ASN unchanged" + end + + context "when downloader has updates for City, but ASN has errors" do + include_context "existing databases from metadata" + + let(:updated_city_fetch) { [constants::CITY, true, updated_dirname, updated_city_db_path] } + let(:updated_asn_fetch) { [constants::ASN, false, nil, nil] } + + # implementation detail: the downloader _excludes_ confirmed-same entries from the response + let(:downloader_response) do + [ + updated_city_fetch, + updated_asn_fetch + ] + end + + before(:each) do + manager_instance.setup + write_dummy_mmdb(constants::CITY, updated_city_db_path) + end + + include_examples "City updated" + include_examples "ASN errored" + include_examples "ASN near expiry warning" + include_examples "ASN past expiry eviction" + end + + context "when downloader has no changes" do + include_context "existing databases from metadata" + + before(:each) do + manager_instance.setup + end + + include_examples "City unchanged" + include_examples "ASN unchanged" + end + + context "when downloader is exceptional" do + include_context "existing databases from metadata" + + before(:each) do + expect(mock_downloader).to receive(:fetch_databases).with(constants::DB_TYPES).and_raise(RuntimeError) + manager_instance.setup + end + + include_examples "City errored" + include_examples "ASN errored" + include_examples "ASN near expiry warning" + include_examples "ASN past expiry eviction" + end + end +end \ No newline at end of file diff --git a/x-pack/spec/geoip_database_management/metadata_spec.rb b/x-pack/spec/geoip_database_management/metadata_spec.rb new file mode 100644 index 00000000000..de8ee05af6f --- /dev/null +++ b/x-pack/spec/geoip_database_management/metadata_spec.rb @@ -0,0 +1,86 @@ +# # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +# # or more contributor license agreements. Licensed under the Elastic License; +# # you may not use this file except in compliance with the Elastic License. + +require 'geoip_database_management/manager' +require 'geoip_database_management/metadata' +require 'geoip_database_management/data_path' +require 'geoip_database_management/util' + +describe LogStash::GeoipDatabaseManagement::Metadata, :aggregate_failures do + let(:temp_metadata_path) { Stud::Temporary.directory } + let(:data_path) { LogStash::GeoipDatabaseManagement::DataPath.new(temp_metadata_path) } + let(:dbm) { described_class.new(data_path) } + let(:logger) { double("Logger").as_null_object } + + context "get all" do + it "returns multiple rows" do + dbm.save_metadata(LogStash::GeoipDatabaseManagement::CITY, "#{Time.now.to_i - 1}", gz_md5: SecureRandom.hex(40)) + dbm.save_metadata(LogStash::GeoipDatabaseManagement::ASN, "#{Time.now.to_i}", gz_md5: SecureRandom.hex(40)) + + expect(dbm.get_all.size).to eq(2) + end + end + + context "get metadata" do + context "when populated file exists" do + before(:each) do + dbm.save_metadata(LogStash::GeoipDatabaseManagement::CITY, "#{Time.now.to_i - 1}", gz_md5: SecureRandom.hex(40)) + dbm.save_metadata(LogStash::GeoipDatabaseManagement::ASN, "#{Time.now.to_i}", gz_md5: SecureRandom.hex(40)) + end + it "returns matching metadata" do + city_rows = dbm.get_metadata(LogStash::GeoipDatabaseManagement::CITY) + expect(city_rows.size).to eq(1) + expect(city_rows).to all satisfy {|row| row[described_class::Column::DATABASE_TYPE] == LogStash::GeoipDatabaseManagement::CITY } + + asn_rows = dbm.get_metadata(LogStash::GeoipDatabaseManagement::ASN) + expect(asn_rows.size).to eq(1) + expect(asn_rows).to all satisfy {|row| row[described_class::Column::DATABASE_TYPE] == LogStash::GeoipDatabaseManagement::ASN } + end + end + + context "when file does not exist" do + it "returns empty results" do + city_rows = dbm.get_metadata(LogStash::GeoipDatabaseManagement::CITY) + expect(city_rows).to be_empty + + asn_rows = dbm.get_metadata(LogStash::GeoipDatabaseManagement::ASN) + expect(asn_rows).to be_empty + end + end + + context "when empty file exists" do + before(:each) do + FileUtils.touch(temp_metadata_path) + end + it "returns empty results" do + city_rows = dbm.get_metadata(LogStash::GeoipDatabaseManagement::CITY) + expect(city_rows).to be_empty + + asn_rows = dbm.get_metadata(LogStash::GeoipDatabaseManagement::ASN) + expect(asn_rows).to be_empty + end + end + + context "saving" do + let(:database_dirname) { "#{Time.now.to_i}" } + let(:database_gz_md5) { "1bad1dea" } + let(:database_db_md5) { "0f1c1a17" } + before(:each) do + dbm.save_metadata(LogStash::GeoipDatabaseManagement::CITY, database_dirname, gz_md5: database_gz_md5) + end + + it "saves the metadata" do + metadata = dbm.get_metadata(LogStash::GeoipDatabaseManagement::CITY).last + + expect(metadata[described_class::Column::DATABASE_TYPE]).to eq(LogStash::GeoipDatabaseManagement::CITY) + + check_at = metadata[described_class::Column::CHECK_AT] + expect(Time.now.to_i - check_at.to_i).to be < 100 + + expect(metadata[described_class::Column::DIRNAME]).to eq(database_dirname) + expect(metadata[described_class::Column::GZ_MD5]).to eq(database_gz_md5) + end + end + end +end \ No newline at end of file diff --git a/x-pack/spec/geoip_database_management/spec_helper.rb b/x-pack/spec/geoip_database_management/spec_helper.rb new file mode 100644 index 00000000000..e175f75c445 --- /dev/null +++ b/x-pack/spec/geoip_database_management/spec_helper.rb @@ -0,0 +1,14 @@ + +RSpec.configure do |config| + config.around(:each, verify_stubs: true) do |example| + config.mock_with :rspec do |mocks| + begin + previous_verify = mockes.verify_partial_doubles + mocks.verify_partial_doubles = true + example.run + ensure + mocks.verify_partial_doubles = previous_verify + end + end + end +end \ No newline at end of file diff --git a/x-pack/spec/geoip_database_management/subscription_spec.rb b/x-pack/spec/geoip_database_management/subscription_spec.rb new file mode 100644 index 00000000000..53ce31de6f1 --- /dev/null +++ b/x-pack/spec/geoip_database_management/subscription_spec.rb @@ -0,0 +1,223 @@ +# # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +# # or more contributor license agreements. Licensed under the Elastic License; +# # you may not use this file except in compliance with the Elastic License. + +require 'geoip_database_management/subscription' + +describe LogStash::GeoipDatabaseManagement::Subscription, :aggregate_failures do + let(:mock_state) { double("state", release!: nil) } + let(:initial_value) { LogStash::GeoipDatabaseManagement::DbInfo::PENDING } + + subject(:subscription) { described_class.new(initial_value, mock_state) } + + context "#value" do + context "blocking" do + it 'yields the current value' do + expect { |b| subscription.value(&b) }.to yield_with_args(initial_value) + end + it 'returns the result of the block' do + return_value = Object.new + expect(subscription.value { |_| return_value}).to equal return_value + end + context "under contention" do + it 'allows many concurrent readers' do + concurrency = 10 + + start_latch = Concurrent::CountDownLatch.new(concurrency) + release_latch = Concurrent::CountDownLatch.new(concurrency) + finish_latch = Concurrent::CountDownLatch.new(concurrency) + + max_concurrent = Concurrent::AtomicFixnum.new + + threads = concurrency.times.map do |idx| + Thread.new do + Thread.current.abort_on_exception = true + start_latch.count_down + start_latch.wait(2) || fail("threads failed to start") + subscription.value do |db_info| + max_concurrent.increment + + release_latch.count_down + release_latch.wait(2) || fail("threads failed to concurrently lock value (#{max_concurrent})") + end + finish_latch.count_down + finish_latch.wait(2) || fail("failed to release") + end + end + + # cleanup threads + deadline = Time.now + 10 + threads.each do |t| + timeout_remaining = [deadline - Time.now, 0].max + t.kill unless t.join(timeout_remaining) + end + + expect(max_concurrent.value).to eq(concurrency) + end + + # validates that #value with a block will prevent updates until control is returned. + # sets up a sequence in which several readers get the initial value concurrently, + # a writer contends for the lock and modifies the value, and subsequent readers get + # the updated value. + it 'read-write contention', aggregate_failures: true do + + pre_write_count = 3 + post_write_count = 7 + reader_count = pre_write_count + post_write_count + + readers_ready_latch = Concurrent::CountDownLatch.new(reader_count) + writer_ready_event = Concurrent::Event.new + pre_write_read_acquired_latch = Concurrent::CountDownLatch.new(pre_write_count) + pre_write_read_released_latch = Concurrent::CountDownLatch.new(pre_write_count) + pre_write_event = Concurrent::Event.new + + values = Queue.new + + threads = [] + + # pre-write: acquire multiple locks, then signal writer and give it + # a chance to contend for the lock before releasing + pre_write_count.times do |idx| + threads << Thread.new do + Thread.current.abort_on_exception = true + readers_ready_latch.count_down + writer_ready_event.wait(2) || fail("writer failed to become ready") + subscription.value do |db_info| + pre_write_read_acquired_latch.count_down + values << db_info + + # wait until writer has signaled that it is about to try to write + pre_write_event.wait(2) || fail("writer failed to begin action") + sleep(1) # wait long enough to ensure contention + # ensure that the other readers are free to begin + pre_write_read_released_latch.count_down + end + end + end + + # post-write: wait until _just_ before the pre-write readers release their lock, + # ensuring we are queued after the writer's blocked write. + post_write_count.times do |idx| + threads << Thread.new do + Thread.current.abort_on_exception = true + readers_ready_latch.count_down + pre_write_read_released_latch.wait(10) || fail("pre-write readers failed to finish") + subscription.value do |db_info| + values << db_info + end + end + end + + # write: wait until the pre-write readers have acquired the lock + # before performing the write. + updated_db_info = LogStash::GeoipDatabaseManagement::DbInfo.new(path: "/path/to/db") + threads << Thread.new do + Thread.current.abort_on_exception = true + writer_ready_event.set + readers_ready_latch.wait(10) || fail("readers never became ready") + pre_write_read_acquired_latch.wait(10) || fail("pre reads never acquired") + pre_write_event.set + subscription.notify(updated_db_info) + end + + # cleanup threads + deadline = Time.now + 10 + threads.each do |t| + timeout_remaining = [deadline - Time.now, 0].max + t.kill unless t.join(timeout_remaining) + end + + expect(values.size).to eq(pre_write_count + post_write_count) + pre_write_count.times do + expect(values.pop(true)).to equal initial_value + end + post_write_count.times do + expect(values.pop(true)).to equal updated_db_info + end + expect(values).to be_empty + end + end + end + + context "non-blocking" do + it 'returns the current value' do + expect(subscription.value).to equal initial_value + end + end + end + + context '#release!' do + it 'releases' do + subscription.release! + + expect(mock_state).to have_received(:release!).with(subscription) + end + end + + context "#observe" do + shared_examples "observation" do + let!(:log) { Queue.new } + + it "observes construct, update, and expiry" do + current_value = LogStash::GeoipDatabaseManagement::DbInfo.new(path: "/one/two") + subscription.notify(current_value) + expect(log).to be_empty + + subscription.observe(observer_spec) + + expect(log.size).to eq(1) + expect(log.pop(true)).to eq([:construct, current_value]) + + updated_value = LogStash::GeoipDatabaseManagement::DbInfo.new(path: "/three/four") + subscription.notify(updated_value) + expect(log.size).to eq(1) + expect(log.pop(true)).to eq([:on_update, updated_value]) + + expired_value = LogStash::GeoipDatabaseManagement::DbInfo::EXPIRED + subscription.notify(expired_value) + + expect(log.size).to eq(1) + expect(log.pop(true)).to eq([:on_expire]) + + another_updated_value = LogStash::GeoipDatabaseManagement::DbInfo.new(path: "/five/six") + subscription.notify(another_updated_value) + expect(log.size).to eq(1) + expect(log.pop(true)).to eq([:on_update, another_updated_value]) + end + + context 'when subscription was previously released' do + before(:each) { subscription.release! } + it 'prevents new observation' do + expect { subscription.observe(observer_spec) }.to raise_exception(/released/) + expect(log).to be_empty + end + end + end + + context "when given a components hash" do + let(:observer_spec) { + { + construct: ->(v) { log << [:construct, v]}, + on_update: ->(v) { log << [:on_update, v]}, + on_expire: ->( ) { log << [:on_expire] }, + } + } + + include_examples "observation" + end + + context "when given an object that quacks like a SubscriptionObserver instance" do + let(:observer_class) do + Class.new do + def initialize(log); @log = log; end + def construct(v); @log << [:construct, v]; end + def on_update(v); @log << [:on_update, v]; end + def on_expire; @log << [:on_expire]; end + end + end + let(:observer_spec) { observer_class.new(log) } + + include_examples "observation" + end + end +end \ No newline at end of file diff --git a/x-pack/spec/support/helpers.rb b/x-pack/spec/support/helpers.rb index 4d7fa45fc54..3407503460d 100644 --- a/x-pack/spec/support/helpers.rb +++ b/x-pack/spec/support/helpers.rb @@ -30,6 +30,8 @@ def apply_settings(settings_values, settings = nil) settings.set(key, value) end + return yield(settings) if block_given? + settings end