diff --git a/app/models/concerns/alegre_v2.rb b/app/models/concerns/alegre_v2.rb index 2dde5fc159..b8066a6786 100644 --- a/app/models/concerns/alegre_v2.rb +++ b/app/models/concerns/alegre_v2.rb @@ -1,7 +1,7 @@ require 'active_support/concern' class AlegreTimeoutError < StandardError; end class TemporaryProjectMedia - attr_accessor :team_id, :id, :url, :type + attr_accessor :team_id, :id, :url, :text, :type def media media_type_map = { "claim" => "Claim", @@ -55,11 +55,18 @@ def sync_path_for_type(type) end def async_path(project_media) - "/similarity/async/#{get_type(project_media)}" + self.async_path_for_type(get_type(project_media)) + end + + def async_path_for_type(type) + "/similarity/async/#{type}" end def delete_path(project_media) - type = get_type(project_media) + self.delete_path_for_type(get_type(project_media)) + end + + def delete_path_for_type(type) "/#{type}/similarity/" end @@ -122,6 +129,10 @@ def request(method, path, params, retries=3) end end + def request_delete_from_raw(params, type) + request("delete", delete_path_for_type(type), params) + end + def request_delete(data, project_media) request("delete", delete_path(project_media), data) end @@ -148,18 +159,22 @@ def get_type(project_media) type end + def content_hash_for_value(value) + Digest::MD5.hexdigest(value) + end + def content_hash(project_media, field) if Bot::Alegre::ALL_TEXT_SIMILARITY_FIELDS.include?(field) - Digest::MD5.hexdigest(project_media.send(field)) + content_hash_for_value(project_media.send(field)) else if project_media.is_link? - return Digest::MD5.hexdigest(project_media.media.url) + return content_hash_for_value(project_media.media.url) elsif project_media.is_a?(TemporaryProjectMedia) return Rails.cache.read("url_sha:#{project_media.url}") elsif !project_media.is_text? return project_media.media.file.filename.split(".").first else - return Digest::MD5.hexdigest(project_media.send(field).to_s) + return content_hash_for_value(project_media.send(field).to_s) end end end @@ -267,6 +282,14 @@ def store_package_text(project_media, field, params) generic_package_text(project_media, field, params) end + def get_sync_raw_params(params, type) + request("post", sync_path_for_type(type), params) + end + + def get_async_raw_params(params, type) + request("post", async_path_for_type(type), params) + end + def get_sync(project_media, field=nil, params={}) request_sync( store_package(project_media, field, params), diff --git a/app/models/explainer.rb b/app/models/explainer.rb index a4319e718a..aab8e8dbbf 100644 --- a/app/models/explainer.rb +++ b/app/models/explainer.rb @@ -63,24 +63,26 @@ def self.update_paragraphs_in_alegre(id, previous_paragraphs_count, timestamp) # Index title params = { + content_hash: Bot::Alegre.content_hash_for_value(explainer.title), doc_id: Digest::MD5.hexdigest(['explainer', explainer.id, 'title'].join(':')), + context: base_context.merge({ field: 'title' }), text: explainer.title, models: ALEGRE_MODELS_AND_THRESHOLDS.keys, - context: base_context.merge({ field: 'title' }) } - Bot::Alegre.request('post', '/text/similarity/', params) + Bot::Alegre.get_async_raw_params(params, "text") # Index paragraphs count = 0 explainer.description.to_s.gsub(/\r\n?/, "\n").split(/\n+/).reject{ |paragraph| paragraph.strip.blank? }.each do |paragraph| count += 1 params = { + content_hash: Bot::Alegre.content_hash_for_value(paragraph.strip), doc_id: Digest::MD5.hexdigest(['explainer', explainer.id, 'paragraph', count].join(':')), + context: base_context.merge({ paragraph: count }), text: paragraph.strip, models: ALEGRE_MODELS_AND_THRESHOLDS.keys, - context: base_context.merge({ paragraph: count }) } - Bot::Alegre.request('post', '/text/similarity/', params) + Bot::Alegre.get_async_raw_params(params, "text") end # Remove paragraphs that don't exist anymore (we delete after updating in order to avoid race conditions) @@ -91,7 +93,7 @@ def self.update_paragraphs_in_alegre(id, previous_paragraphs_count, timestamp) quiet: true, context: base_context.merge({ paragraph: count }) } - Bot::Alegre.request('delete', '/text/similarity/', params) + Bot::Alegre.request_delete_from_raw(params, type) end end @@ -106,7 +108,7 @@ def self.search_by_similarity(text, language, team_id) language: language } } - response = Bot::Alegre.request('post', '/text/similarity/search/', params) + Bot::Alegre.get_async_raw_params(params, "text") results = response['result'].to_a.sort_by{ |result| result['_score'] } explainer_ids = results.collect{ |result| result.dig('_source', 'context', 'explainer_id').to_i }.uniq.first(3) explainer_ids.empty? ? Explainer.none : Explainer.where(team_id: team_id, id: explainer_ids)