From 991368544e9b4e595a54be1339df6357feedf864 Mon Sep 17 00:00:00 2001 From: Devin Gaffney Date: Sat, 17 Aug 2024 10:14:47 -0700 Subject: [PATCH 01/16] CV2-5087 move Articles side effecting saves to to it via presto --- app/models/concerns/alegre_v2.rb | 35 ++++++++++++++++++++++++++------ app/models/explainer.rb | 14 +++++++------ 2 files changed, 37 insertions(+), 12 deletions(-) diff --git a/app/models/concerns/alegre_v2.rb b/app/models/concerns/alegre_v2.rb index 2dde5fc159..b8066a6786 100644 --- a/app/models/concerns/alegre_v2.rb +++ b/app/models/concerns/alegre_v2.rb @@ -1,7 +1,7 @@ require 'active_support/concern' class AlegreTimeoutError < StandardError; end class TemporaryProjectMedia - attr_accessor :team_id, :id, :url, :type + attr_accessor :team_id, :id, :url, :text, :type def media media_type_map = { "claim" => "Claim", @@ -55,11 +55,18 @@ def sync_path_for_type(type) end def async_path(project_media) - "/similarity/async/#{get_type(project_media)}" + self.async_path_for_type(get_type(project_media)) + end + + def async_path_for_type(type) + "/similarity/async/#{type}" end def delete_path(project_media) - type = get_type(project_media) + self.delete_path_for_type(get_type(project_media)) + end + + def delete_path_for_type(type) "/#{type}/similarity/" end @@ -122,6 +129,10 @@ def request(method, path, params, retries=3) end end + def request_delete_from_raw(params, type) + request("delete", delete_path_for_type(type), params) + end + def request_delete(data, project_media) request("delete", delete_path(project_media), data) end @@ -148,18 +159,22 @@ def get_type(project_media) type end + def content_hash_for_value(value) + Digest::MD5.hexdigest(value) + end + def content_hash(project_media, field) if Bot::Alegre::ALL_TEXT_SIMILARITY_FIELDS.include?(field) - Digest::MD5.hexdigest(project_media.send(field)) + content_hash_for_value(project_media.send(field)) else if project_media.is_link? - return Digest::MD5.hexdigest(project_media.media.url) + return content_hash_for_value(project_media.media.url) elsif project_media.is_a?(TemporaryProjectMedia) return Rails.cache.read("url_sha:#{project_media.url}") elsif !project_media.is_text? return project_media.media.file.filename.split(".").first else - return Digest::MD5.hexdigest(project_media.send(field).to_s) + return content_hash_for_value(project_media.send(field).to_s) end end end @@ -267,6 +282,14 @@ def store_package_text(project_media, field, params) generic_package_text(project_media, field, params) end + def get_sync_raw_params(params, type) + request("post", sync_path_for_type(type), params) + end + + def get_async_raw_params(params, type) + request("post", async_path_for_type(type), params) + end + def get_sync(project_media, field=nil, params={}) request_sync( store_package(project_media, field, params), diff --git a/app/models/explainer.rb b/app/models/explainer.rb index a4319e718a..aab8e8dbbf 100644 --- a/app/models/explainer.rb +++ b/app/models/explainer.rb @@ -63,24 +63,26 @@ def self.update_paragraphs_in_alegre(id, previous_paragraphs_count, timestamp) # Index title params = { + content_hash: Bot::Alegre.content_hash_for_value(explainer.title), doc_id: Digest::MD5.hexdigest(['explainer', explainer.id, 'title'].join(':')), + context: base_context.merge({ field: 'title' }), text: explainer.title, models: ALEGRE_MODELS_AND_THRESHOLDS.keys, - context: base_context.merge({ field: 'title' }) } - Bot::Alegre.request('post', '/text/similarity/', params) + Bot::Alegre.get_async_raw_params(params, "text") # Index paragraphs count = 0 explainer.description.to_s.gsub(/\r\n?/, "\n").split(/\n+/).reject{ |paragraph| paragraph.strip.blank? }.each do |paragraph| count += 1 params = { + content_hash: Bot::Alegre.content_hash_for_value(paragraph.strip), doc_id: Digest::MD5.hexdigest(['explainer', explainer.id, 'paragraph', count].join(':')), + context: base_context.merge({ paragraph: count }), text: paragraph.strip, models: ALEGRE_MODELS_AND_THRESHOLDS.keys, - context: base_context.merge({ paragraph: count }) } - Bot::Alegre.request('post', '/text/similarity/', params) + Bot::Alegre.get_async_raw_params(params, "text") end # Remove paragraphs that don't exist anymore (we delete after updating in order to avoid race conditions) @@ -91,7 +93,7 @@ def self.update_paragraphs_in_alegre(id, previous_paragraphs_count, timestamp) quiet: true, context: base_context.merge({ paragraph: count }) } - Bot::Alegre.request('delete', '/text/similarity/', params) + Bot::Alegre.request_delete_from_raw(params, type) end end @@ -106,7 +108,7 @@ def self.search_by_similarity(text, language, team_id) language: language } } - response = Bot::Alegre.request('post', '/text/similarity/search/', params) + Bot::Alegre.get_async_raw_params(params, "text") results = response['result'].to_a.sort_by{ |result| result['_score'] } explainer_ids = results.collect{ |result| result.dig('_source', 'context', 'explainer_id').to_i }.uniq.first(3) explainer_ids.empty? ? Explainer.none : Explainer.where(team_id: team_id, id: explainer_ids) From 3f540c495c1ca302c9e285c98732b62f71246321 Mon Sep 17 00:00:00 2001 From: Devin Gaffney Date: Sat, 17 Aug 2024 10:28:50 -0700 Subject: [PATCH 02/16] CV2-5082 move article indexing to presto --- app/models/concerns/article.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/models/concerns/article.rb b/app/models/concerns/article.rb index e4b1ed48be..3cfafc5dda 100644 --- a/app/models/concerns/article.rb +++ b/app/models/concerns/article.rb @@ -84,7 +84,7 @@ def send_to_alegre(id) obj = self.find_by_id(id) return if obj.project_media.nil? obj.text_fields.each do |field| - ::Bot::Alegre.send_field_to_similarity_index(obj.project_media, field) + ::Bot::Alegre.relate_project_media_async(obj.project_media, field) end unless obj.nil? end end From 8f5ca5ddb813c73364051ae8d606d030b78c0002 Mon Sep 17 00:00:00 2001 From: Devin Gaffney Date: Sat, 17 Aug 2024 15:18:24 -0700 Subject: [PATCH 03/16] resolve test errors --- app/models/concerns/alegre_v2.rb | 8 +++++--- test/models/bot/smooch_6_test.rb | 8 +++++--- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/app/models/concerns/alegre_v2.rb b/app/models/concerns/alegre_v2.rb index b8066a6786..cd3e791052 100644 --- a/app/models/concerns/alegre_v2.rb +++ b/app/models/concerns/alegre_v2.rb @@ -160,7 +160,7 @@ def get_type(project_media) end def content_hash_for_value(value) - Digest::MD5.hexdigest(value) + value.nil? ? nil : Digest::MD5.hexdigest(value) end def content_hash(project_media, field) @@ -180,11 +180,13 @@ def content_hash(project_media, field) end def generic_package(project_media, field) - { - content_hash: content_hash(project_media, field), + content_hash_value = content_hash(project_media, field) + params = { doc_id: item_doc_id(project_media, field), context: get_context(project_media, field) } + params[:content_hash] = content_hash_value if !content_hash_value.nil? + params end def delete_package(project_media, field, params={}, quiet=false) diff --git a/test/models/bot/smooch_6_test.rb b/test/models/bot/smooch_6_test.rb index 6473d95dc2..bc414b93ef 100644 --- a/test/models/bot/smooch_6_test.rb +++ b/test/models/bot/smooch_6_test.rb @@ -139,6 +139,7 @@ def send_message_outside_24_hours_window(template, pm = nil) test "should submit query without details on tipline bot v2" do WebMock.stub_request(:post, /\/text\/similarity\/search\//).to_return(body: {}.to_json) # For explainers + WebMock.stub_request(:post, /\/similarity\/async\/text/).to_return(body: {}.to_json) # For explainers claim = 'This is a test claim' send_message 'hello', '1', '1', random_string, random_string, claim, random_string, random_string, '1' assert_saved_query_type 'default_requests' @@ -208,6 +209,7 @@ def send_message_outside_24_hours_window(template, pm = nil) end test "should submit query with details on tipline bot v2" do + WebMock.stub_request(:post, /\/similarity\/async\/text/).to_return(body: {}.to_json) # For explainers WebMock.stub_request(:post, /\/text\/similarity\/search\//).to_return(body: {}.to_json) # For explainers claim = 'This is a test claim' send_message 'hello', '1', '1', random_string, '2', random_string, claim, '1' @@ -285,7 +287,7 @@ def send_message_outside_24_hours_window(template, pm = nil) end test "should submit query and handle search error on tipline bot v2" do - WebMock.stub_request(:post, /\/text\/similarity\/search\//).to_return(body: {}.to_json) # For explainers + WebMock.stub_request(:post, /\/similarity\/async\/text/).to_return(body: {}.to_json) # For explainers CheckSearch.any_instance.stubs(:medias).raises(StandardError) Sidekiq::Testing.inline! do send_message 'hello', '1', '1', 'Foo bar', '1' @@ -384,7 +386,7 @@ def send_message_outside_24_hours_window(template, pm = nil) ProjectMedia.any_instance.stubs(:report_status).returns('published') ProjectMedia.any_instance.stubs(:analysis_published_article_url).returns(random_url) Bot::Alegre.stubs(:get_merged_similar_items).returns({ create_project_media.id => { score: 0.9 } }) - WebMock.stub_request(:post, /\/text\/similarity\/search\//).to_return(body: {}.to_json) # For explainers + WebMock.stub_request(:post, /\/similarity\/async\/text/).to_return(body: {}.to_json) # For explainers Sidekiq::Testing.inline! do send_message 'hello', '1', '1', "Foo bar foo bar #{url} foo bar", '1' end @@ -693,7 +695,7 @@ def send_message_outside_24_hours_window(template, pm = nil) pm = create_project_media team: @team publish_report(pm, {}, nil, { language: 'pt', use_visual_card: false }) Bot::Smooch.stubs(:get_search_results).returns([pm]) - WebMock.stub_request(:post, /\/text\/similarity\/search\//).to_return(body: {}.to_json) # For explainers + WebMock.stub_request(:post, /\/similarity\/async\/text/).to_return(body: {}.to_json) # For explainers Sidekiq::Testing.inline! do send_message 'hello', '1', '1', 'Foo bar', '1' end From 1b964b6ff3c52b7362352cb51a5193e873a3205e Mon Sep 17 00:00:00 2001 From: Devin Gaffney Date: Sat, 17 Aug 2024 17:16:04 -0700 Subject: [PATCH 04/16] updates for broken tests --- test/models/bot/smooch_7_test.rb | 2 +- test/models/explainer_test.rb | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/test/models/bot/smooch_7_test.rb b/test/models/bot/smooch_7_test.rb index 4fd46ac40e..c0d877c837 100644 --- a/test/models/bot/smooch_7_test.rb +++ b/test/models/bot/smooch_7_test.rb @@ -600,7 +600,7 @@ def teardown end test "should include claim_description_content in smooch search" do - WebMock.stub_request(:post, 'http://alegre:3100/text/similarity/').to_return(body: {}.to_json) + WebMock.stub_request(:post, 'http://alegre:3100/similarity/async/image').to_return(body: {}.to_json) RequestStore.store[:skip_cached_field_update] = false t = create_team m = create_uploaded_image diff --git a/test/models/explainer_test.rb b/test/models/explainer_test.rb index cce9645166..c9010238fe 100644 --- a/test/models/explainer_test.rb +++ b/test/models/explainer_test.rb @@ -99,12 +99,12 @@ def setup } # Index two paragraphs and title when the explainer is created - Bot::Alegre.stubs(:request).with('post', '/text/similarity/', anything).times(3) + Bot::Alegre.stubs(:request).with('post', '/similarity/async/text', anything).times(3) Bot::Alegre.stubs(:request).with('delete', '/text/similarity/', anything).never ex = create_explainer description: description # Update the index when paragraphs change - Bot::Alegre.stubs(:request).with('post', '/text/similarity/', anything).times(2) + Bot::Alegre.stubs(:request).with('post', '/similarity/async/text', anything).times(2) Bot::Alegre.stubs(:request).with('delete', '/text/similarity/', anything).once ex = Explainer.find(ex.id) ex.description = 'Now this is the only paragraph' From b0d4791814b96926e25ad25cd08d0d9734f53b11 Mon Sep 17 00:00:00 2001 From: Devin Gaffney Date: Sat, 17 Aug 2024 17:29:02 -0700 Subject: [PATCH 05/16] small tweak --- app/models/explainer.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/models/explainer.rb b/app/models/explainer.rb index aab8e8dbbf..c59740740c 100644 --- a/app/models/explainer.rb +++ b/app/models/explainer.rb @@ -93,7 +93,7 @@ def self.update_paragraphs_in_alegre(id, previous_paragraphs_count, timestamp) quiet: true, context: base_context.merge({ paragraph: count }) } - Bot::Alegre.request_delete_from_raw(params, type) + Bot::Alegre.request_delete_from_raw(params, "text") end end From 8245dd1595373ea42bbd14a581404b45f4efa02e Mon Sep 17 00:00:00 2001 From: Devin Gaffney Date: Sat, 17 Aug 2024 19:11:01 -0700 Subject: [PATCH 06/16] set to sync --- app/models/explainer.rb | 6 +++--- test/models/bot/smooch_6_test.rb | 10 +++++----- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/app/models/explainer.rb b/app/models/explainer.rb index c59740740c..3ac3e05a71 100644 --- a/app/models/explainer.rb +++ b/app/models/explainer.rb @@ -69,7 +69,7 @@ def self.update_paragraphs_in_alegre(id, previous_paragraphs_count, timestamp) text: explainer.title, models: ALEGRE_MODELS_AND_THRESHOLDS.keys, } - Bot::Alegre.get_async_raw_params(params, "text") + Bot::Alegre.get_sync_raw_params(params, "text") # Index paragraphs count = 0 @@ -82,7 +82,7 @@ def self.update_paragraphs_in_alegre(id, previous_paragraphs_count, timestamp) text: paragraph.strip, models: ALEGRE_MODELS_AND_THRESHOLDS.keys, } - Bot::Alegre.get_async_raw_params(params, "text") + Bot::Alegre.get_sync_raw_params(params, "text") end # Remove paragraphs that don't exist anymore (we delete after updating in order to avoid race conditions) @@ -108,7 +108,7 @@ def self.search_by_similarity(text, language, team_id) language: language } } - Bot::Alegre.get_async_raw_params(params, "text") + response = Bot::Alegre.get_sync_raw_params(params, "text") results = response['result'].to_a.sort_by{ |result| result['_score'] } explainer_ids = results.collect{ |result| result.dig('_source', 'context', 'explainer_id').to_i }.uniq.first(3) explainer_ids.empty? ? Explainer.none : Explainer.where(team_id: team_id, id: explainer_ids) diff --git a/test/models/bot/smooch_6_test.rb b/test/models/bot/smooch_6_test.rb index bc414b93ef..eed425a1b7 100644 --- a/test/models/bot/smooch_6_test.rb +++ b/test/models/bot/smooch_6_test.rb @@ -139,7 +139,7 @@ def send_message_outside_24_hours_window(template, pm = nil) test "should submit query without details on tipline bot v2" do WebMock.stub_request(:post, /\/text\/similarity\/search\//).to_return(body: {}.to_json) # For explainers - WebMock.stub_request(:post, /\/similarity\/async\/text/).to_return(body: {}.to_json) # For explainers + WebMock.stub_request(:post, /\/similarity\/sync\/text/).to_return(body: {}.to_json) # For explainers claim = 'This is a test claim' send_message 'hello', '1', '1', random_string, random_string, claim, random_string, random_string, '1' assert_saved_query_type 'default_requests' @@ -209,7 +209,7 @@ def send_message_outside_24_hours_window(template, pm = nil) end test "should submit query with details on tipline bot v2" do - WebMock.stub_request(:post, /\/similarity\/async\/text/).to_return(body: {}.to_json) # For explainers + WebMock.stub_request(:post, /\/similarity\/sync\/text/).to_return(body: {}.to_json) # For explainers WebMock.stub_request(:post, /\/text\/similarity\/search\//).to_return(body: {}.to_json) # For explainers claim = 'This is a test claim' send_message 'hello', '1', '1', random_string, '2', random_string, claim, '1' @@ -287,7 +287,7 @@ def send_message_outside_24_hours_window(template, pm = nil) end test "should submit query and handle search error on tipline bot v2" do - WebMock.stub_request(:post, /\/similarity\/async\/text/).to_return(body: {}.to_json) # For explainers + WebMock.stub_request(:post, /\/similarity\/sync\/text/).to_return(body: {}.to_json) # For explainers CheckSearch.any_instance.stubs(:medias).raises(StandardError) Sidekiq::Testing.inline! do send_message 'hello', '1', '1', 'Foo bar', '1' @@ -386,7 +386,7 @@ def send_message_outside_24_hours_window(template, pm = nil) ProjectMedia.any_instance.stubs(:report_status).returns('published') ProjectMedia.any_instance.stubs(:analysis_published_article_url).returns(random_url) Bot::Alegre.stubs(:get_merged_similar_items).returns({ create_project_media.id => { score: 0.9 } }) - WebMock.stub_request(:post, /\/similarity\/async\/text/).to_return(body: {}.to_json) # For explainers + WebMock.stub_request(:post, /\/similarity\/sync\/text/).to_return(body: {}.to_json) # For explainers Sidekiq::Testing.inline! do send_message 'hello', '1', '1', "Foo bar foo bar #{url} foo bar", '1' end @@ -695,7 +695,7 @@ def send_message_outside_24_hours_window(template, pm = nil) pm = create_project_media team: @team publish_report(pm, {}, nil, { language: 'pt', use_visual_card: false }) Bot::Smooch.stubs(:get_search_results).returns([pm]) - WebMock.stub_request(:post, /\/similarity\/async\/text/).to_return(body: {}.to_json) # For explainers + WebMock.stub_request(:post, /\/similarity\/sync\/text/).to_return(body: {}.to_json) # For explainers Sidekiq::Testing.inline! do send_message 'hello', '1', '1', 'Foo bar', '1' end From 89d6b8b95360a4a56050e00302f70a906815a663 Mon Sep 17 00:00:00 2001 From: Devin Gaffney Date: Sun, 18 Aug 2024 06:05:15 -0700 Subject: [PATCH 07/16] more fixes --- test/models/explainer_test.rb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/models/explainer_test.rb b/test/models/explainer_test.rb index c9010238fe..44b0bd64a4 100644 --- a/test/models/explainer_test.rb +++ b/test/models/explainer_test.rb @@ -99,12 +99,12 @@ def setup } # Index two paragraphs and title when the explainer is created - Bot::Alegre.stubs(:request).with('post', '/similarity/async/text', anything).times(3) + Bot::Alegre.stubs(:request).with('post', '/similarity/sync/text', anything).times(3) Bot::Alegre.stubs(:request).with('delete', '/text/similarity/', anything).never ex = create_explainer description: description # Update the index when paragraphs change - Bot::Alegre.stubs(:request).with('post', '/similarity/async/text', anything).times(2) + Bot::Alegre.stubs(:request).with('post', '/similarity/sync/text', anything).times(2) Bot::Alegre.stubs(:request).with('delete', '/text/similarity/', anything).once ex = Explainer.find(ex.id) ex.description = 'Now this is the only paragraph' From 6e6e68f4f14039a19a770645ac3d22d87f5d99fa Mon Sep 17 00:00:00 2001 From: Devin Gaffney Date: Mon, 19 Aug 2024 09:28:44 -0700 Subject: [PATCH 08/16] rename function and revert request --- app/models/concerns/alegre_v2.rb | 4 ++-- app/models/concerns/article.rb | 2 +- app/models/explainer.rb | 6 +++--- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/app/models/concerns/alegre_v2.rb b/app/models/concerns/alegre_v2.rb index cd3e791052..acd2c3ed4b 100644 --- a/app/models/concerns/alegre_v2.rb +++ b/app/models/concerns/alegre_v2.rb @@ -284,11 +284,11 @@ def store_package_text(project_media, field, params) generic_package_text(project_media, field, params) end - def get_sync_raw_params(params, type) + def get_sync_with_params(params, type) request("post", sync_path_for_type(type), params) end - def get_async_raw_params(params, type) + def get_async_with_params(params, type) request("post", async_path_for_type(type), params) end diff --git a/app/models/concerns/article.rb b/app/models/concerns/article.rb index 3cfafc5dda..e4b1ed48be 100644 --- a/app/models/concerns/article.rb +++ b/app/models/concerns/article.rb @@ -84,7 +84,7 @@ def send_to_alegre(id) obj = self.find_by_id(id) return if obj.project_media.nil? obj.text_fields.each do |field| - ::Bot::Alegre.relate_project_media_async(obj.project_media, field) + ::Bot::Alegre.send_field_to_similarity_index(obj.project_media, field) end unless obj.nil? end end diff --git a/app/models/explainer.rb b/app/models/explainer.rb index 3ac3e05a71..9cabd15f48 100644 --- a/app/models/explainer.rb +++ b/app/models/explainer.rb @@ -69,7 +69,7 @@ def self.update_paragraphs_in_alegre(id, previous_paragraphs_count, timestamp) text: explainer.title, models: ALEGRE_MODELS_AND_THRESHOLDS.keys, } - Bot::Alegre.get_sync_raw_params(params, "text") + Bot::Alegre.get_sync_with_params(params, "text") # Index paragraphs count = 0 @@ -82,7 +82,7 @@ def self.update_paragraphs_in_alegre(id, previous_paragraphs_count, timestamp) text: paragraph.strip, models: ALEGRE_MODELS_AND_THRESHOLDS.keys, } - Bot::Alegre.get_sync_raw_params(params, "text") + Bot::Alegre.get_sync_with_params(params, "text") end # Remove paragraphs that don't exist anymore (we delete after updating in order to avoid race conditions) @@ -108,7 +108,7 @@ def self.search_by_similarity(text, language, team_id) language: language } } - response = Bot::Alegre.get_sync_raw_params(params, "text") + response = Bot::Alegre.get_sync_with_params(params, "text") results = response['result'].to_a.sort_by{ |result| result['_score'] } explainer_ids = results.collect{ |result| result.dig('_source', 'context', 'explainer_id').to_i }.uniq.first(3) explainer_ids.empty? ? Explainer.none : Explainer.where(team_id: team_id, id: explainer_ids) From a9576ba905949bf001f781aaf94900bb3549ada0 Mon Sep 17 00:00:00 2001 From: Devin Gaffney Date: Mon, 19 Aug 2024 14:40:10 -0700 Subject: [PATCH 09/16] add response suppression and move to specific path for side effecting requests --- app/models/concerns/alegre_v2.rb | 4 ++++ app/models/explainer.rb | 6 +++--- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/app/models/concerns/alegre_v2.rb b/app/models/concerns/alegre_v2.rb index acd2c3ed4b..adc74a47f8 100644 --- a/app/models/concerns/alegre_v2.rb +++ b/app/models/concerns/alegre_v2.rb @@ -284,6 +284,10 @@ def store_package_text(project_media, field, params) generic_package_text(project_media, field, params) end + def index_async_with_params(params, type, suppress_response=true) + request("post", async_path_for_type(type), params.merge(suppress_response: suppress_response)) + end + def get_sync_with_params(params, type) request("post", sync_path_for_type(type), params) end diff --git a/app/models/explainer.rb b/app/models/explainer.rb index 9cabd15f48..58d510f889 100644 --- a/app/models/explainer.rb +++ b/app/models/explainer.rb @@ -69,7 +69,7 @@ def self.update_paragraphs_in_alegre(id, previous_paragraphs_count, timestamp) text: explainer.title, models: ALEGRE_MODELS_AND_THRESHOLDS.keys, } - Bot::Alegre.get_sync_with_params(params, "text") + Bot::Alegre.index_async_with_params(params, "text") # Index paragraphs count = 0 @@ -82,7 +82,7 @@ def self.update_paragraphs_in_alegre(id, previous_paragraphs_count, timestamp) text: paragraph.strip, models: ALEGRE_MODELS_AND_THRESHOLDS.keys, } - Bot::Alegre.get_sync_with_params(params, "text") + Bot::Alegre.index_async_with_params(params, "text") end # Remove paragraphs that don't exist anymore (we delete after updating in order to avoid race conditions) @@ -108,7 +108,7 @@ def self.search_by_similarity(text, language, team_id) language: language } } - response = Bot::Alegre.get_sync_with_params(params, "text") + response = Bot::Alegre.index_async_with_params(params, "text") results = response['result'].to_a.sort_by{ |result| result['_score'] } explainer_ids = results.collect{ |result| result.dig('_source', 'context', 'explainer_id').to_i }.uniq.first(3) explainer_ids.empty? ? Explainer.none : Explainer.where(team_id: team_id, id: explainer_ids) From 4b2066ba154f7c90d327deb53fa35f6fac212162 Mon Sep 17 00:00:00 2001 From: Devin Gaffney Date: Wed, 21 Aug 2024 06:54:52 -0700 Subject: [PATCH 10/16] extend similar media to allow for temporary texts --- app/models/concerns/alegre_v2.rb | 36 ++++++++++++++++++-------------- 1 file changed, 20 insertions(+), 16 deletions(-) diff --git a/app/models/concerns/alegre_v2.rb b/app/models/concerns/alegre_v2.rb index adc74a47f8..ccd150a00c 100644 --- a/app/models/concerns/alegre_v2.rb +++ b/app/models/concerns/alegre_v2.rb @@ -1,7 +1,7 @@ require 'active_support/concern' class AlegreTimeoutError < StandardError; end class TemporaryProjectMedia - attr_accessor :team_id, :id, :url, :text, :type + attr_accessor :team_id, :id, :url, :text, :type, :field def media media_type_map = { "claim" => "Claim", @@ -284,8 +284,8 @@ def store_package_text(project_media, field, params) generic_package_text(project_media, field, params) end - def index_async_with_params(params, type, suppress_response=true) - request("post", async_path_for_type(type), params.merge(suppress_response: suppress_response)) + def index_async_with_params(params, type, suppress_search_response=true) + request("post", async_path_for_type(type), params.merge(suppress_search_response: suppress_search_response)) end def get_sync_with_params(params, type) @@ -514,25 +514,27 @@ def wait_for_results(project_media, args) end def get_items_with_similar_media_v2(args={}) + text = args[:text] + field = args[:field] media_url = args[:media_url] project_media = args[:project_media] threshold = args[:threshold] team_ids = args[:team_ids] type = args[:type] - if ['audio', 'image', 'video'].include?(type) - if project_media.nil? - project_media = TemporaryProjectMedia.new - project_media.url = media_url - project_media.id = Digest::MD5.hexdigest(project_media.url).to_i(16) - project_media.team_id = team_ids - project_media.type = type - end - get_similar_items_v2_async(project_media, nil, threshold) - wait_for_results(project_media, args) - response = get_similar_items_v2_callback(project_media, nil) - delete(project_media, nil) if project_media.is_a?(TemporaryProjectMedia) - return response + if project_media.nil? + project_media = TemporaryProjectMedia.new + project_media.text = text + project_media.field = field + project_media.url = media_url + project_media.id = Digest::MD5.hexdigest(project_media.url).to_i(16) + project_media.team_id = team_ids + project_media.type = type end + get_similar_items_v2_async(project_media, nil, threshold) + wait_for_results(project_media, args) + response = get_similar_items_v2_callback(project_media, nil) + delete(project_media, nil) if project_media.is_a?(TemporaryProjectMedia) + return response end def process_alegre_callback(params) @@ -541,9 +543,11 @@ def process_alegre_callback(params) should_relate = true if project_media.nil? project_media = TemporaryProjectMedia.new + project_media.text = params.dig('data', 'item', 'raw', 'text') project_media.url = params.dig('data', 'item', 'raw', 'url') project_media.id = params.dig('data', 'item', 'raw', 'context', 'project_media_id') project_media.team_id = params.dig('data', 'item', 'raw', 'context', 'team_id') + project_media.field = params.dig('data', 'item', 'raw', 'context', 'field') project_media.type = params['model_type'] should_relate = false end From 90c88eed6a6104c1b103b965bb0e60e02be45c93 Mon Sep 17 00:00:00 2001 From: Devin Gaffney Date: Wed, 21 Aug 2024 08:34:20 -0700 Subject: [PATCH 11/16] fix broken test fixture --- test/models/bot/smooch_6_test.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/models/bot/smooch_6_test.rb b/test/models/bot/smooch_6_test.rb index eed425a1b7..f2eb12f6bc 100644 --- a/test/models/bot/smooch_6_test.rb +++ b/test/models/bot/smooch_6_test.rb @@ -695,7 +695,7 @@ def send_message_outside_24_hours_window(template, pm = nil) pm = create_project_media team: @team publish_report(pm, {}, nil, { language: 'pt', use_visual_card: false }) Bot::Smooch.stubs(:get_search_results).returns([pm]) - WebMock.stub_request(:post, /\/similarity\/sync\/text/).to_return(body: {}.to_json) # For explainers + WebMock.stub_request(:post, /\/similarity\/async\/text/).to_return(body: {}.to_json) # For explainers Sidekiq::Testing.inline! do send_message 'hello', '1', '1', 'Foo bar', '1' end From 5ae5aa5ff4d355a30868f5fc0e65daedd4da0a89 Mon Sep 17 00:00:00 2001 From: Devin Gaffney Date: Wed, 21 Aug 2024 10:49:25 -0700 Subject: [PATCH 12/16] revert back to async --- test/models/bot/smooch_6_test.rb | 8 ++++---- test/models/explainer_test.rb | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/test/models/bot/smooch_6_test.rb b/test/models/bot/smooch_6_test.rb index f2eb12f6bc..bc414b93ef 100644 --- a/test/models/bot/smooch_6_test.rb +++ b/test/models/bot/smooch_6_test.rb @@ -139,7 +139,7 @@ def send_message_outside_24_hours_window(template, pm = nil) test "should submit query without details on tipline bot v2" do WebMock.stub_request(:post, /\/text\/similarity\/search\//).to_return(body: {}.to_json) # For explainers - WebMock.stub_request(:post, /\/similarity\/sync\/text/).to_return(body: {}.to_json) # For explainers + WebMock.stub_request(:post, /\/similarity\/async\/text/).to_return(body: {}.to_json) # For explainers claim = 'This is a test claim' send_message 'hello', '1', '1', random_string, random_string, claim, random_string, random_string, '1' assert_saved_query_type 'default_requests' @@ -209,7 +209,7 @@ def send_message_outside_24_hours_window(template, pm = nil) end test "should submit query with details on tipline bot v2" do - WebMock.stub_request(:post, /\/similarity\/sync\/text/).to_return(body: {}.to_json) # For explainers + WebMock.stub_request(:post, /\/similarity\/async\/text/).to_return(body: {}.to_json) # For explainers WebMock.stub_request(:post, /\/text\/similarity\/search\//).to_return(body: {}.to_json) # For explainers claim = 'This is a test claim' send_message 'hello', '1', '1', random_string, '2', random_string, claim, '1' @@ -287,7 +287,7 @@ def send_message_outside_24_hours_window(template, pm = nil) end test "should submit query and handle search error on tipline bot v2" do - WebMock.stub_request(:post, /\/similarity\/sync\/text/).to_return(body: {}.to_json) # For explainers + WebMock.stub_request(:post, /\/similarity\/async\/text/).to_return(body: {}.to_json) # For explainers CheckSearch.any_instance.stubs(:medias).raises(StandardError) Sidekiq::Testing.inline! do send_message 'hello', '1', '1', 'Foo bar', '1' @@ -386,7 +386,7 @@ def send_message_outside_24_hours_window(template, pm = nil) ProjectMedia.any_instance.stubs(:report_status).returns('published') ProjectMedia.any_instance.stubs(:analysis_published_article_url).returns(random_url) Bot::Alegre.stubs(:get_merged_similar_items).returns({ create_project_media.id => { score: 0.9 } }) - WebMock.stub_request(:post, /\/similarity\/sync\/text/).to_return(body: {}.to_json) # For explainers + WebMock.stub_request(:post, /\/similarity\/async\/text/).to_return(body: {}.to_json) # For explainers Sidekiq::Testing.inline! do send_message 'hello', '1', '1', "Foo bar foo bar #{url} foo bar", '1' end diff --git a/test/models/explainer_test.rb b/test/models/explainer_test.rb index 44b0bd64a4..c9010238fe 100644 --- a/test/models/explainer_test.rb +++ b/test/models/explainer_test.rb @@ -99,12 +99,12 @@ def setup } # Index two paragraphs and title when the explainer is created - Bot::Alegre.stubs(:request).with('post', '/similarity/sync/text', anything).times(3) + Bot::Alegre.stubs(:request).with('post', '/similarity/async/text', anything).times(3) Bot::Alegre.stubs(:request).with('delete', '/text/similarity/', anything).never ex = create_explainer description: description # Update the index when paragraphs change - Bot::Alegre.stubs(:request).with('post', '/similarity/sync/text', anything).times(2) + Bot::Alegre.stubs(:request).with('post', '/similarity/async/text', anything).times(2) Bot::Alegre.stubs(:request).with('delete', '/text/similarity/', anything).once ex = Explainer.find(ex.id) ex.description = 'Now this is the only paragraph' From 1e7019a3838f65b6df08d80d133935901113869e Mon Sep 17 00:00:00 2001 From: Devin Gaffney Date: Wed, 21 Aug 2024 14:31:08 -0700 Subject: [PATCH 13/16] fix another test --- test/models/bot/smooch_7_test.rb | 1 + 1 file changed, 1 insertion(+) diff --git a/test/models/bot/smooch_7_test.rb b/test/models/bot/smooch_7_test.rb index c0d877c837..9a7cb345c0 100644 --- a/test/models/bot/smooch_7_test.rb +++ b/test/models/bot/smooch_7_test.rb @@ -601,6 +601,7 @@ def teardown test "should include claim_description_content in smooch search" do WebMock.stub_request(:post, 'http://alegre:3100/similarity/async/image').to_return(body: {}.to_json) + WebMock.stub_request(:post, 'http://alegre:3100/text/similarity/').to_return(body: {}.to_json) RequestStore.store[:skip_cached_field_update] = false t = create_team m = create_uploaded_image From 1a6010ff5a3fbdd62d9f7241f2432c7e71603e0f Mon Sep 17 00:00:00 2001 From: Devin Gaffney Date: Thu, 22 Aug 2024 14:16:32 -0700 Subject: [PATCH 14/16] fixes per PR review --- app/models/concerns/alegre_v2.rb | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/app/models/concerns/alegre_v2.rb b/app/models/concerns/alegre_v2.rb index ccd150a00c..c60184dee3 100644 --- a/app/models/concerns/alegre_v2.rb +++ b/app/models/concerns/alegre_v2.rb @@ -166,16 +166,14 @@ def content_hash_for_value(value) def content_hash(project_media, field) if Bot::Alegre::ALL_TEXT_SIMILARITY_FIELDS.include?(field) content_hash_for_value(project_media.send(field)) + elsif project_media.is_link? + return content_hash_for_value(project_media.media.url) + elsif project_media.is_a?(TemporaryProjectMedia) + return Rails.cache.read("url_sha:#{project_media.url}") + elsif !project_media.is_text? + return project_media.media.file.filename.split(".").first else - if project_media.is_link? - return content_hash_for_value(project_media.media.url) - elsif project_media.is_a?(TemporaryProjectMedia) - return Rails.cache.read("url_sha:#{project_media.url}") - elsif !project_media.is_text? - return project_media.media.file.filename.split(".").first - else - return content_hash_for_value(project_media.send(field).to_s) - end + return content_hash_for_value(project_media.send(field).to_s) end end From bf0a85d6829846eb6b846007c6bdb14bffc7c14e Mon Sep 17 00:00:00 2001 From: Devin Gaffney Date: Thu, 22 Aug 2024 14:22:16 -0700 Subject: [PATCH 15/16] fixes per PR review --- app/models/explainer.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/models/explainer.rb b/app/models/explainer.rb index 58d510f889..f1599f4617 100644 --- a/app/models/explainer.rb +++ b/app/models/explainer.rb @@ -108,7 +108,7 @@ def self.search_by_similarity(text, language, team_id) language: language } } - response = Bot::Alegre.index_async_with_params(params, "text") + response = Bot::Alegre.get_async_with_params(params, "text") results = response['result'].to_a.sort_by{ |result| result['_score'] } explainer_ids = results.collect{ |result| result.dig('_source', 'context', 'explainer_id').to_i }.uniq.first(3) explainer_ids.empty? ? Explainer.none : Explainer.where(team_id: team_id, id: explainer_ids) From 0d5f1097234d920079353ca597b9c2441f8677da Mon Sep 17 00:00:00 2001 From: Devin Gaffney Date: Mon, 26 Aug 2024 10:36:27 -0700 Subject: [PATCH 16/16] more fixes after review --- app/models/concerns/alegre_v2.rb | 10 +++++++++- app/models/concerns/project_media_getters.rb | 4 ++++ 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/app/models/concerns/alegre_v2.rb b/app/models/concerns/alegre_v2.rb index c60184dee3..46db6ea13c 100644 --- a/app/models/concerns/alegre_v2.rb +++ b/app/models/concerns/alegre_v2.rb @@ -36,6 +36,10 @@ def is_video? def is_audio? self.type == "audio" end + + def is_uploaded_media? + self.is_image? || self.is_audio? || self.is_video? + end end module AlegreV2 @@ -170,7 +174,7 @@ def content_hash(project_media, field) return content_hash_for_value(project_media.media.url) elsif project_media.is_a?(TemporaryProjectMedia) return Rails.cache.read("url_sha:#{project_media.url}") - elsif !project_media.is_text? + elsif project_media.is_uploaded_media? return project_media.media.file.filename.split(".").first else return content_hash_for_value(project_media.send(field).to_s) @@ -313,6 +317,10 @@ def delete(project_media, field=nil, params={}) delete_package(project_media, field, params), project_media ) + rescue StandardError => e + error = Error.new(e) + Rails.logger.error("[AutoTagger Bot] Exception for event `#{body['event']}`: #{error.class} - #{error.message}") + CheckSentry.notify(error, bot: "alegre", project_media: project_media, params: params, field: field) end def get_per_model_threshold(project_media, threshold) diff --git a/app/models/concerns/project_media_getters.rb b/app/models/concerns/project_media_getters.rb index b8662224e7..56040e69c7 100644 --- a/app/models/concerns/project_media_getters.rb +++ b/app/models/concerns/project_media_getters.rb @@ -31,6 +31,10 @@ def is_image? self.is_uploaded_image? end + def is_uploaded_media? + self.is_image? || self.is_audio? || self.is_video? + end + def is_text? self.is_claim? || self.is_link? end