diff --git a/app/lib/tipline_search_result.rb b/app/lib/tipline_search_result.rb new file mode 100644 index 0000000000..7482872661 --- /dev/null +++ b/app/lib/tipline_search_result.rb @@ -0,0 +1,58 @@ +class TiplineSearchResult + attr_accessor :team, :title, :body, :image_url, :language, :url, :type, :format + + def initialize(team:, title:, body:, image_url:, language:, url:, type:, format:) + self.team = team + self.title = title + self.body = body + self.image_url = image_url + self.language = language + self.url = url + self.type = type # :explainer or :fact_check + self.format = format # :text or :image + end + + def should_send_in_language?(language) + return true if self.team.get_languages.to_a.size < 2 + tbi = TeamBotInstallation.where(team_id: self.team.id, user: BotUser.alegre_user).last + should_send_report_in_different_language = !tbi&.alegre_settings&.dig('single_language_fact_checks_enabled') + self.language == language || should_send_report_in_different_language + end + + def team_report_setting_value(key, language) + self.team.get_report.to_h.with_indifferent_access.dig(language, key) + end + + def footer(language) + footer = [] + prefixes = { + whatsapp: 'WhatsApp: ', + facebook: 'FB Messenger: m.me/', + twitter: 'Twitter: twitter.com/', + telegram: 'Telegram: t.me/', + viber: 'Viber: ', + line: 'LINE: ', + instagram: 'Instagram: instagram.com/' + } + [:signature, :whatsapp, :facebook, :twitter, :telegram, :viber, :line, :instagram].each do |field| + value = self.team_report_setting_value(field.to_s, language) + footer << "#{prefixes[field]}#{value}" unless value.blank? + end + footer.join("\n") + end + + def text(language = nil, hide_body = false) + text = [] + text << "*#{self.title.strip}*" unless self.title.blank? + text << self.body.to_s unless hide_body + text << self.url unless self.url.blank? + text = text.collect do |part| + self.team.get_shorten_outgoing_urls ? UrlRewriter.shorten_and_utmize_urls(part, self.team.get_outgoing_urls_utm_code) : part + end + unless language.nil? + footer = self.footer(language) + text << footer if !footer.blank? && self.team_report_setting_value('use_signature', language) + end + text.join("\n\n") + end +end diff --git a/app/models/concerns/smooch_search.rb b/app/models/concerns/smooch_search.rb index 8dd4729892..00bbda28c5 100644 --- a/app/models/concerns/smooch_search.rb +++ b/app/models/concerns/smooch_search.rb @@ -4,6 +4,7 @@ module SmoochSearch extend ActiveSupport::Concern module ClassMethods + # This method runs in background def search(app_id, uid, language, message, team_id, workflow, provider = nil) platform = self.get_platform_from_message(message) @@ -11,16 +12,26 @@ def search(app_id, uid, language, message, team_id, workflow, provider = nil) sm = CheckStateMachine.new(uid) self.get_installation(self.installation_setting_id_keys, app_id) if self.config.blank? RequestStore.store[:smooch_bot_provider] = provider unless provider.blank? - results = self.get_search_results(uid, message, team_id, language).select do |pm| - pm = Relationship.confirmed_parent(pm) - report = pm.get_dynamic_annotation('report_design') - !report.nil? && !!report.should_send_report_in_this_language?(language) - end.collect{ |pm| Relationship.confirmed_parent(pm) }.uniq - if results.empty? + query = self.get_search_query(uid, message) + results = self.get_search_results(uid, query, team_id, language).collect{ |pm| Relationship.confirmed_parent(pm) }.uniq + reports = results.collect{ |pm| pm.get_dynamic_annotation('report_design') }.reject{ |r| r.nil? }.collect{ |r| r.report_design_to_tipline_search_result }.select{ |r| r.should_send_in_language?(language) } + + # Extract explainers from matched media if they don't have published fact-checks but they have explainers + reports = results.collect{ |pm| pm.explainers.to_a }.flatten.uniq.first(3).map(&:as_tipline_search_result) if !results.empty? && reports.empty? + + # Search for explainers if fact-checks were not found + if reports.empty? && query['type'] == 'text' + explainers = self.search_for_explainers(uid, query['text'], team_id, language).first(3).select{ |explainer| explainer.as_tipline_search_result.should_send_in_language?(language) } + Rails.logger.info "[Smooch Bot] Text similarity search got #{explainers.count} explainers while looking for '#{query['text']}' for team #{team_id}" + results = explainers.collect{ |explainer| explainer.project_medias.to_a }.flatten.uniq.reject{ |pm| pm.blank? }.first(3) + reports = explainers.map(&:as_tipline_search_result) + end + + if reports.empty? self.bundle_messages(uid, '', app_id, 'default_requests', nil, true) self.send_final_message_to_user(uid, self.get_custom_string('search_no_results', language), workflow, language, 'no_results') else - self.send_search_results_to_user(uid, results, team_id, platform) + self.send_search_results_to_user(uid, reports, team_id, platform) sm.go_to_search_result self.save_search_results_for_user(uid, results.map(&:id)) self.delay_for(1.second, { queue: 'smooch_priority' }).ask_for_feedback_when_all_search_results_are_received(app_id, language, workflow, uid, platform, provider, 1) @@ -80,7 +91,7 @@ def filter_search_results(pms, after, feed_id, team_ids) end def is_a_valid_search_result(pm) - pm.report_status == 'published' && [CheckArchivedFlags::FlagCodes::NONE, CheckArchivedFlags::FlagCodes::UNCONFIRMED].include?(pm.archived) + (pm.report_status == 'published' || pm.explainers.count > 0) && [CheckArchivedFlags::FlagCodes::NONE, CheckArchivedFlags::FlagCodes::UNCONFIRMED].include?(pm.archived) end def reject_temporary_results(results) @@ -91,7 +102,7 @@ def reject_temporary_results(results) def parse_search_results_from_alegre(results, after = nil, feed_id = nil, team_ids = nil) pms = reject_temporary_results(results).sort_by{ |a| [a[1][:model] != Bot::Alegre::ELASTICSEARCH_MODEL ? 1 : 0, a[1][:score]] }.to_h.keys.reverse.collect{ |id| Relationship.confirmed_parent(ProjectMedia.find_by_id(id)) } - filter_search_results(pms, after, feed_id, team_ids).uniq(&:id).first(3) + filter_search_results(pms, after, feed_id, team_ids).uniq(&:id).sort_by{ |pm| pm.report_status == 'published' ? 0 : 1 }.first(3) end def date_filter(team_id) @@ -111,11 +122,14 @@ def get_text_similarity_threshold value == 0.0 ? 0.85 : value end - def get_search_results(uid, last_message, team_id, language) + def get_search_query(uid, last_message) + list = self.list_of_bundled_messages_from_user(uid) + self.bundle_list_of_messages(list, last_message, true) + end + + def get_search_results(uid, message, team_id, language) results = [] begin - list = self.list_of_bundled_messages_from_user(uid) - message = self.bundle_list_of_messages(list, last_message, true) type = message['type'] after = self.date_filter(team_id) query = message['text'] @@ -243,22 +257,22 @@ def search_by_keywords_for_similar_published_fact_checks(words, after, team_ids, results end - def send_search_results_to_user(uid, results, team_id, platform) + def send_search_results_to_user(uid, reports, team_id, platform) team = Team.find(team_id) redis = Redis.new(REDIS_CONFIG) language = self.get_user_language(uid) - reports = results.collect{ |r| r.get_dynamic_annotation('report_design') } - # Get reports languages - reports_language = reports.map { |r| r&.report_design_field_value('language') }.uniq - if team.get_languages.to_a.size > 1 && !reports_language.include?(language) + reports_languages = reports.map(&:language).uniq + + if team.get_languages.to_a.size > 1 && !reports_languages.include?(language) self.send_message_to_user(uid, self.get_string(:no_results_in_language, language).gsub('%{language}', CheckCldr.language_code_to_name(language, language)), {}, false, true, 'no_results') sleep 1 end - reports.reject{ |r| r.blank? }.each do |report| + + reports.each do |report| response = nil - no_body = (platform == 'Facebook Messenger' && !report.report_design_field_value('published_article_url').blank?) - response = self.send_message_to_user(uid, report.report_design_text(nil, no_body), {}, false, true, 'search_result') if report.report_design_field_value('use_text_message') - response = self.send_message_to_user(uid, '', { 'type' => 'image', 'mediaUrl' => report.report_design_image_url }, false, true, 'search_result') if !report.report_design_field_value('use_text_message') && report.report_design_field_value('use_visual_card') + no_body = (platform == 'Facebook Messenger' && !report.url.blank?) + response = self.send_message_to_user(uid, report.text(nil, no_body), {}, false, true, 'search_result') if report.format == :text + response = self.send_message_to_user(uid, '', { 'type' => 'image', 'mediaUrl' => report.image_url }, false, true, 'search_result') if report.format == :image id = self.get_id_from_send_response(response) redis.rpush("smooch:search:#{uid}", id) unless id.blank? end @@ -284,5 +298,22 @@ def ask_for_feedback_when_all_search_results_are_received(app_id, language, work self.delay_for(1.second, { queue: 'smooch_priority' }).ask_for_feedback_when_all_search_results_are_received(app_id, language, workflow, uid, platform, provider, attempts + 1) if attempts < max # Try for 20 seconds end end + + def search_for_explainers(uid, query, team_id, language) + results = nil + begin + text = ::Bot::Smooch.extract_claim(query) + if Bot::Alegre.get_number_of_words(text) == 1 + results = Explainer.where(team_id: team_id).where('description ILIKE ? OR title ILIKE ?', "%#{text}%", "%#{text}%") + results = results.where(language: language) if should_restrict_by_language?([team_id]) + results = results.order('updated_at DESC') + else + results = Explainer.search_by_similarity(text, language, team_id) + end + rescue StandardError => e + self.handle_search_error(uid, e, language) + end + results.joins(:project_medias) + end end end diff --git a/app/models/explainer.rb b/app/models/explainer.rb index 20dde8aa2f..927b6a4258 100644 --- a/app/models/explainer.rb +++ b/app/models/explainer.rb @@ -1,6 +1,12 @@ class Explainer < ApplicationRecord include Article + # FIXME: Read from workspace settings + ALEGRE_MODELS_AND_THRESHOLDS = { + # Bot::Alegre::ELASTICSEARCH_MODEL => 0.8 # Sometimes this is easier for local development + Bot::Alegre::PARAPHRASE_MULTILINGUAL_MODEL => 0.7 + } + belongs_to :team has_annotations @@ -12,12 +18,89 @@ class Explainer < ApplicationRecord validates_presence_of :team, :title, :description validate :language_in_allowed_values, unless: proc { |e| e.language.blank? } + after_save :update_paragraphs_in_alegre + def notify_bots # Nothing to do for Explainer end def send_to_alegre - # Nothing to do for Explainer + # Let's not use the same callbacks from article.rb + end + + def as_tipline_search_result + TiplineSearchResult.new( + team: self.team, + title: self.title, + body: self.description, + image_url: nil, + language: self.language, + url: self.url, + type: :explainer, + format: :text + ) + end + + def update_paragraphs_in_alegre + previous_paragraphs_count = self.description_before_last_save.to_s.gsub(/\r\n?/, "\n").split(/\n+/).reject{ |paragraph| paragraph.strip.blank? }.size + + # Schedule to run 5 seconds later - it's a way to be sure there won't be more updates coming + self.class.delay_for(5.seconds).update_paragraphs_in_alegre(self.id, previous_paragraphs_count, Time.now.to_f) + end + + def self.update_paragraphs_in_alegre(id, previous_paragraphs_count, timestamp) + explainer = Explainer.find(id) + + # Skip if the explainer was saved since this job was created (it means that there is a more recent job) + return if explainer.updated_at.to_f > timestamp + + base_context = { + type: 'explainer', + team: explainer.team.slug, + language: explainer.language, + explainer_id: explainer.id + } + + # Index paragraphs + count = 0 + explainer.description.to_s.gsub(/\r\n?/, "\n").split(/\n+/).reject{ |paragraph| paragraph.strip.blank? }.each do |paragraph| + count += 1 + params = { + doc_id: Digest::MD5.hexdigest(['explainer', explainer.id, 'paragraph', count].join(':')), + text: paragraph.strip, + models: ALEGRE_MODELS_AND_THRESHOLDS.keys, + context: base_context.merge({ paragraph: count }) + } + Bot::Alegre.request('post', '/text/similarity/', params) + end + + # Remove paragraphs that don't exist anymore (we delete after updating in order to avoid race conditions) + previous_paragraphs_count.times do |index| + next if index < count + params = { + doc_id: Digest::MD5.hexdigest(['explainer', explainer.id, 'paragraph', index + 1].join(':')), + quiet: true, + context: base_context.merge({ paragraph: count }) + } + Bot::Alegre.request('delete', '/text/similarity/', params) + end + end + + def self.search_by_similarity(text, language, team_id) + params = { + text: text, + models: ALEGRE_MODELS_AND_THRESHOLDS.keys, + per_model_threshold: ALEGRE_MODELS_AND_THRESHOLDS, + context: { + type: 'explainer', + team: Team.find(team_id).slug, + language: language + } + } + response = Bot::Alegre.request('post', '/text/similarity/search/', params) + results = response['result'].to_a.sort_by{ |result| result['_score'] } + explainer_ids = results.collect{ |result| result.dig('_source', 'context', 'explainer_id').to_i }.uniq.first(3) + explainer_ids.empty? ? Explainer.none : Explainer.where(team_id: team_id, id: explainer_ids) end private diff --git a/config/initializers/report_designer.rb b/config/initializers/report_designer.rb index 9a36a9282b..08cd3dcbf8 100644 --- a/config/initializers/report_designer.rb +++ b/config/initializers/report_designer.rb @@ -93,41 +93,24 @@ def report_design_team_setting_value(field, language) self.annotated&.team&.get_report.to_h.with_indifferent_access.dig(language, field) if self.annotation_type == 'report_design' end - def report_design_text_footer(language) - footer = [] - prefixes = { - whatsapp: 'WhatsApp: ', - facebook: 'FB Messenger: m.me/', - twitter: 'Twitter: twitter.com/', - telegram: 'Telegram: t.me/', - viber: 'Viber: ', - line: 'LINE: ', - instagram: 'Instagram: instagram.com/' - } - [:signature, :whatsapp, :facebook, :twitter, :telegram, :viber, :line, :instagram].each do |field| - value = self.report_design_team_setting_value(field.to_s, language) - footer << "#{prefixes[field]}#{value}" unless value.blank? + def report_design_to_tipline_search_result + if self.annotation_type == 'report_design' + TiplineSearchResult.new( + type: :fact_check, + team: self.annotated.team, + title: self.report_design_field_value('title'), + body: self.report_design_field_value('text'), + image_url: self.report_design_image_url, + language: self.report_design_field_value('language'), + url: self.report_design_field_value('published_article_url'), + format: (!self.report_design_field_value('use_text_message') && self.report_design_field_value('use_visual_card')) ? :image : :text + ) end - footer.join("\n") end def report_design_text(language = nil, hide_body = false) if self.annotation_type == 'report_design' - team = self.annotated.team - text = [] - title = self.report_design_field_value('title') - text << "*#{title.strip}*" unless title.blank? - text << self.report_design_field_value('text').to_s unless hide_body - url = self.report_design_field_value('published_article_url') - text << url unless url.blank? - text = text.collect do |part| - team.get_shorten_outgoing_urls ? UrlRewriter.shorten_and_utmize_urls(part, team.get_outgoing_urls_utm_code) : part - end - unless language.nil? - footer = self.report_design_text_footer(language) - text << footer if !footer.blank? && self.report_design_team_setting_value('use_signature', language) - end - text.join("\n\n") + self.report_design_to_tipline_search_result.text(language, hide_body) end end @@ -241,10 +224,6 @@ def sent_count end def should_send_report_in_this_language?(language) - team = self.annotated.team - return true if team.get_languages.to_a.size < 2 - tbi = TeamBotInstallation.where(team_id: team.id, user: BotUser.alegre_user).last - should_send_report_in_different_language = !tbi&.alegre_settings&.dig('single_language_fact_checks_enabled') - self.annotation_type == 'report_design' && (self.report_design_field_value('language') == language || should_send_report_in_different_language) + self.annotation_type == 'report_design' && self.report_design_to_tipline_search_result.should_send_in_language?(language) end end diff --git a/test/models/bot/smooch_4_test.rb b/test/models/bot/smooch_4_test.rb index f8df58f903..34167fa5f6 100644 --- a/test/models/bot/smooch_4_test.rb +++ b/test/models/bot/smooch_4_test.rb @@ -669,9 +669,12 @@ def teardown CheckSearch.any_instance.stubs(:medias).returns([pm1]) Bot::Alegre.stubs(:get_merged_similar_items).returns({ pm2.id => { score: 0.9, model: 'elasticsearch', context: {foo: :bar} } }) - assert_equal [pm2], Bot::Smooch.get_search_results(random_string, {}, t.id, 'en') + uid = random_string + query = Bot::Smooch.get_search_query(uid, {}) + assert_equal [pm2], Bot::Smooch.get_search_results(uid, query, t.id, 'en') Bot::Smooch.stubs(:bundle_list_of_messages).returns({ 'type' => 'text', 'text' => "Test #{url}" }) - assert_equal [pm1], Bot::Smooch.get_search_results(random_string, {}, t.id, 'en') + query = Bot::Smooch.get_search_query(uid, {}) + assert_equal [pm1], Bot::Smooch.get_search_results(uid, query, t.id, 'en') ProjectMedia.any_instance.unstub(:report_status) CheckSearch.any_instance.unstub(:medias) diff --git a/test/models/bot/smooch_6_test.rb b/test/models/bot/smooch_6_test.rb index 0251e7cab8..6473d95dc2 100644 --- a/test/models/bot/smooch_6_test.rb +++ b/test/models/bot/smooch_6_test.rb @@ -138,7 +138,7 @@ def send_message_outside_24_hours_window(template, pm = nil) end test "should submit query without details on tipline bot v2" do - WebMock.stub_request(:post, /\/text\/similarity\/search\//).to_return(body: {}.to_json) + WebMock.stub_request(:post, /\/text\/similarity\/search\//).to_return(body: {}.to_json) # For explainers claim = 'This is a test claim' send_message 'hello', '1', '1', random_string, random_string, claim, random_string, random_string, '1' assert_saved_query_type 'default_requests' @@ -208,7 +208,7 @@ def send_message_outside_24_hours_window(template, pm = nil) end test "should submit query with details on tipline bot v2" do - WebMock.stub_request(:post, /\/text\/similarity\/search\//).to_return(body: {}.to_json) + WebMock.stub_request(:post, /\/text\/similarity\/search\//).to_return(body: {}.to_json) # For explainers claim = 'This is a test claim' send_message 'hello', '1', '1', random_string, '2', random_string, claim, '1' assert_saved_query_type 'default_requests' @@ -285,6 +285,7 @@ def send_message_outside_24_hours_window(template, pm = nil) end test "should submit query and handle search error on tipline bot v2" do + WebMock.stub_request(:post, /\/text\/similarity\/search\//).to_return(body: {}.to_json) # For explainers CheckSearch.any_instance.stubs(:medias).raises(StandardError) Sidekiq::Testing.inline! do send_message 'hello', '1', '1', 'Foo bar', '1' @@ -383,6 +384,7 @@ def send_message_outside_24_hours_window(template, pm = nil) ProjectMedia.any_instance.stubs(:report_status).returns('published') ProjectMedia.any_instance.stubs(:analysis_published_article_url).returns(random_url) Bot::Alegre.stubs(:get_merged_similar_items).returns({ create_project_media.id => { score: 0.9 } }) + WebMock.stub_request(:post, /\/text\/similarity\/search\//).to_return(body: {}.to_json) # For explainers Sidekiq::Testing.inline! do send_message 'hello', '1', '1', "Foo bar foo bar #{url} foo bar", '1' end @@ -691,6 +693,7 @@ def send_message_outside_24_hours_window(template, pm = nil) pm = create_project_media team: @team publish_report(pm, {}, nil, { language: 'pt', use_visual_card: false }) Bot::Smooch.stubs(:get_search_results).returns([pm]) + WebMock.stub_request(:post, /\/text\/similarity\/search\//).to_return(body: {}.to_json) # For explainers Sidekiq::Testing.inline! do send_message 'hello', '1', '1', 'Foo bar', '1' end @@ -943,4 +946,21 @@ def send_message_outside_24_hours_window(template, pm = nil) end end end + + test "should submit query and handle explainer search error on tipline bot v2" do + Explainer.stubs(:search_by_similarity).raises(StandardError) + Sidekiq::Testing.inline! do + send_message 'hello', '1', '1', 'Foo bar', '1' + end + end + + test "should search by explainers on tipline bot v2" do + assert_nil Rails.cache.read("smooch:user_search_results:#{@uid}") + @search_result.explainers << create_explainer(language: 'en', team: @team, title: 'Test', description: 'Foo bar') + Bot::Smooch.stubs(:get_search_results).returns([]) + Sidekiq::Testing.inline! do + send_message 'hi', '1', '1', 'Foo', '1' + end + assert_not_nil Rails.cache.read("smooch:user_search_results:#{@uid}") + end end diff --git a/test/models/bot/smooch_7_test.rb b/test/models/bot/smooch_7_test.rb index ad852ffb92..4fd46ac40e 100644 --- a/test/models/bot/smooch_7_test.rb +++ b/test/models/bot/smooch_7_test.rb @@ -217,7 +217,9 @@ def teardown Bot::Smooch.stubs(:bundle_list_of_messages).returns({ 'type' => 'text', 'text' => 'Foo bar' }) CheckSearch.any_instance.stubs(:medias).returns([pm]) - assert_equal [pm], Bot::Smooch.get_search_results(random_string, {}, pm.team_id, 'en') + uid = random_string + query = Bot::Smooch.get_search_query(uid, {}) + assert_equal [pm], Bot::Smooch.get_search_results(uid, query, pm.team_id, 'en') Bot::Smooch.unstub(:bundle_list_of_messages) CheckSearch.any_instance.unstub(:medias) @@ -238,7 +240,9 @@ def teardown ProjectMedia.any_instance.stubs(:analysis_published_article_url).returns(random_url) Bot::Alegre.stubs(:get_merged_similar_items).returns({ pm.id => { score: 0.9, model: 'elasticsearch', context: {foo: :bar} } }) - assert_equal [pm], Bot::Smooch.get_search_results(random_string, {}, pm.team_id, 'en') + uid = random_string + query = Bot::Smooch.get_search_query(uid, {}) + assert_equal [pm], Bot::Smooch.get_search_results(uid, query, pm.team_id, 'en') Bot::Smooch.unstub(:bundle_list_of_messages) ProjectMedia.any_instance.unstub(:report_status) diff --git a/test/models/explainer_test.rb b/test/models/explainer_test.rb index d2cd7d6a2d..556d911c96 100644 --- a/test/models/explainer_test.rb +++ b/test/models/explainer_test.rb @@ -104,4 +104,25 @@ def setup end end end + + test "should index explainer information" do + Sidekiq::Testing.inline! + description = %{ + The is the first paragraph. + + This is the second paragraph. + } + + # Index two paragraphs when the explainer is created + Bot::Alegre.stubs(:request).with('post', '/text/similarity/', anything).times(2) + Bot::Alegre.stubs(:request).with('delete', '/text/similarity/', anything).never + ex = create_explainer description: description + + # Update the index when paragraphs change + Bot::Alegre.stubs(:request).with('post', '/text/similarity/', anything).once + Bot::Alegre.stubs(:request).with('delete', '/text/similarity/', anything).once + ex = Explainer.find(ex.id) + ex.description = 'Now this is the only paragraph' + ex.save! + end end