Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Cv2 5086 smooch nlu to presto 2 #2019

10 changes: 3 additions & 7 deletions app/lib/smooch_nlu.rb
Original file line number Diff line number Diff line change
Expand Up @@ -44,15 +44,11 @@ def update_keywords(language, keywords, keyword, operation, doc_id, context)
}
if operation == 'add' && !keywords.include?(keyword)
keywords << keyword
alegre_operation = 'post'
alegre_params = common_alegre_params.merge({ text: keyword, models: ALEGRE_MODELS_AND_THRESHOLDS.keys })
Bot::Alegre.get_sync_with_params(common_alegre_params.merge({ text: keyword, models: ALEGRE_MODELS_AND_THRESHOLDS.keys }), "text")
DGaffney marked this conversation as resolved.
Show resolved Hide resolved
elsif operation == 'remove'
keywords -= [keyword]
alegre_operation = 'delete'
alegre_params = common_alegre_params.merge({ quiet: true })
Bot::Alegre.request_delete_from_raw(common_alegre_params.merge({ quiet: true }), "text")
end
# FIXME: Add error handling and better logging
Bot::Alegre.request(alegre_operation, '/text/similarity/', alegre_params) if alegre_operation && alegre_params
keywords
end

Expand Down Expand Up @@ -91,7 +87,7 @@ def self.alegre_matches_from_message(message, language, context, alegre_result_k
language: language,
}.merge(context)
}
response = Bot::Alegre.request('post', '/text/similarity/search/', params)
response = Bot::Alegre.get_sync_with_params(params, "text")

# One approach would be to take the option that has the most matches
# Unfortunately this approach is influenced by the number of keywords per option
Expand Down
95 changes: 67 additions & 28 deletions app/models/concerns/alegre_v2.rb
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
require 'active_support/concern'
class AlegreTimeoutError < StandardError; end
class TemporaryProjectMedia
attr_accessor :team_id, :id, :url, :type
attr_accessor :team_id, :id, :url, :text, :type, :field
def media
media_type_map = {
"claim" => "Claim",
Expand Down Expand Up @@ -36,6 +36,10 @@ def is_video?
def is_audio?
self.type == "audio"
end

def is_uploaded_media?
self.is_image? || self.is_audio? || self.is_video?
end
end

module AlegreV2
Expand All @@ -55,11 +59,18 @@ def sync_path_for_type(type)
end

def async_path(project_media)
"/similarity/async/#{get_type(project_media)}"
self.async_path_for_type(get_type(project_media))
end

def async_path_for_type(type)
"/similarity/async/#{type}"
end

def delete_path(project_media)
type = get_type(project_media)
self.delete_path_for_type(get_type(project_media))
end

def delete_path_for_type(type)
"/#{type}/similarity/"
end

Expand Down Expand Up @@ -122,6 +133,10 @@ def request(method, path, params, retries=3)
end
end

def request_delete_from_raw(params, type)
request("delete", delete_path_for_type(type), params)
end

def request_delete(data, project_media)
request("delete", delete_path(project_media), data)
end
Expand All @@ -148,28 +163,32 @@ def get_type(project_media)
type
end

def content_hash_for_value(value)
value.nil? ? nil : Digest::MD5.hexdigest(value)
end

def content_hash(project_media, field)
if Bot::Alegre::ALL_TEXT_SIMILARITY_FIELDS.include?(field)
Digest::MD5.hexdigest(project_media.send(field))
content_hash_for_value(project_media.send(field))
elsif project_media.is_link?
return content_hash_for_value(project_media.media.url)
elsif project_media.is_a?(TemporaryProjectMedia)
return Rails.cache.read("url_sha:#{project_media.url}")
elsif project_media.is_uploaded_media?
return project_media.media.file.filename.split(".").first
else
if project_media.is_link?
return Digest::MD5.hexdigest(project_media.media.url)
elsif project_media.is_a?(TemporaryProjectMedia)
return Rails.cache.read("url_sha:#{project_media.url}")
elsif !project_media.is_text?
return project_media.media.file.filename.split(".").first
else
return Digest::MD5.hexdigest(project_media.send(field).to_s)
end
return content_hash_for_value(project_media.send(field).to_s)
end
end

def generic_package(project_media, field)
{
content_hash: content_hash(project_media, field),
content_hash_value = content_hash(project_media, field)
params = {
doc_id: item_doc_id(project_media, field),
context: get_context(project_media, field)
}
params[:content_hash] = content_hash_value if !content_hash_value.nil?
params
end

def delete_package(project_media, field, params={}, quiet=false)
Expand Down Expand Up @@ -267,6 +286,18 @@ def store_package_text(project_media, field, params)
generic_package_text(project_media, field, params)
end

def index_async_with_params(params, type, suppress_search_response=true)
request("post", async_path_for_type(type), params.merge(suppress_search_response: suppress_search_response))
end

def get_sync_with_params(params, type)
request("post", sync_path_for_type(type), params)
end

def get_async_with_params(params, type)
request("post", async_path_for_type(type), params)
end

def get_sync(project_media, field=nil, params={})
request_sync(
store_package(project_media, field, params),
Expand All @@ -286,6 +317,10 @@ def delete(project_media, field=nil, params={})
delete_package(project_media, field, params),
project_media
)
rescue StandardError => e
error = Error.new(e)
Rails.logger.error("[AutoTagger Bot] Exception for event `#{body['event']}`: #{error.class} - #{error.message}")
CheckSentry.notify(error, bot: "alegre", project_media: project_media, params: params, field: field)
end

def get_per_model_threshold(project_media, threshold)
Expand Down Expand Up @@ -485,25 +520,27 @@ def wait_for_results(project_media, args)
end

def get_items_with_similar_media_v2(args={})
text = args[:text]
field = args[:field]
media_url = args[:media_url]
project_media = args[:project_media]
threshold = args[:threshold]
team_ids = args[:team_ids]
type = args[:type]
if ['audio', 'image', 'video'].include?(type)
if project_media.nil?
project_media = TemporaryProjectMedia.new
project_media.url = media_url
project_media.id = Digest::MD5.hexdigest(project_media.url).to_i(16)
project_media.team_id = team_ids
project_media.type = type
end
get_similar_items_v2_async(project_media, nil, threshold)
wait_for_results(project_media, args)
response = get_similar_items_v2_callback(project_media, nil)
delete(project_media, nil) if project_media.is_a?(TemporaryProjectMedia)
return response
if project_media.nil?
project_media = TemporaryProjectMedia.new
project_media.text = text
project_media.field = field
project_media.url = media_url
project_media.id = Digest::MD5.hexdigest(project_media.url).to_i(16)
project_media.team_id = team_ids
project_media.type = type
end
get_similar_items_v2_async(project_media, nil, threshold)
wait_for_results(project_media, args)
response = get_similar_items_v2_callback(project_media, nil)
delete(project_media, nil) if project_media.is_a?(TemporaryProjectMedia)
return response
end

def process_alegre_callback(params)
Expand All @@ -512,9 +549,11 @@ def process_alegre_callback(params)
should_relate = true
if project_media.nil?
project_media = TemporaryProjectMedia.new
project_media.text = params.dig('data', 'item', 'raw', 'text')
project_media.url = params.dig('data', 'item', 'raw', 'url')
project_media.id = params.dig('data', 'item', 'raw', 'context', 'project_media_id')
project_media.team_id = params.dig('data', 'item', 'raw', 'context', 'team_id')
project_media.field = params.dig('data', 'item', 'raw', 'context', 'field')
project_media.type = params['model_type']
should_relate = false
end
Expand Down
4 changes: 4 additions & 0 deletions app/models/concerns/project_media_getters.rb
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,10 @@ def is_image?
self.is_uploaded_image?
end

def is_uploaded_media?
self.is_image? || self.is_audio? || self.is_video?
end

def is_text?
self.is_claim? || self.is_link?
end
Expand Down
14 changes: 8 additions & 6 deletions app/models/explainer.rb
Original file line number Diff line number Diff line change
Expand Up @@ -71,24 +71,26 @@ def self.update_paragraphs_in_alegre(id, previous_paragraphs_count, timestamp)

# Index title
params = {
content_hash: Bot::Alegre.content_hash_for_value(explainer.title),
doc_id: Digest::MD5.hexdigest(['explainer', explainer.id, 'title'].join(':')),
context: base_context.merge({ field: 'title' }),
text: explainer.title,
models: ALEGRE_MODELS_AND_THRESHOLDS.keys,
context: base_context.merge({ field: 'title' })
}
Bot::Alegre.request('post', '/text/similarity/', params)
Bot::Alegre.index_async_with_params(params, "text")

# Index paragraphs
count = 0
explainer.description.to_s.gsub(/\r\n?/, "\n").split(/\n+/).reject{ |paragraph| paragraph.strip.blank? }.each do |paragraph|
count += 1
params = {
content_hash: Bot::Alegre.content_hash_for_value(paragraph.strip),
doc_id: Digest::MD5.hexdigest(['explainer', explainer.id, 'paragraph', count].join(':')),
context: base_context.merge({ paragraph: count }),
text: paragraph.strip,
models: ALEGRE_MODELS_AND_THRESHOLDS.keys,
context: base_context.merge({ paragraph: count })
}
Bot::Alegre.request('post', '/text/similarity/', params)
Bot::Alegre.index_async_with_params(params, "text")
end

# Remove paragraphs that don't exist anymore (we delete after updating in order to avoid race conditions)
Expand All @@ -99,7 +101,7 @@ def self.update_paragraphs_in_alegre(id, previous_paragraphs_count, timestamp)
quiet: true,
context: base_context.merge({ paragraph: count })
}
Bot::Alegre.request('delete', '/text/similarity/', params)
Bot::Alegre.request_delete_from_raw(params, "text")
end
end

Expand All @@ -114,7 +116,7 @@ def self.search_by_similarity(text, language, team_id)
language: language
}
}
response = Bot::Alegre.request('post', '/text/similarity/search/', params)
response = Bot::Alegre.get_async_with_params(params, "text")
results = response['result'].to_a.sort_by{ |result| result['_score'] }
explainer_ids = results.collect{ |result| result.dig('_source', 'context', 'explainer_id').to_i }.uniq.first(3)
explainer_ids.empty? ? Explainer.none : Explainer.where(team_id: team_id, id: explainer_ids)
Expand Down
10 changes: 5 additions & 5 deletions test/lib/smooch_nlu_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ def create_team_with_smooch_bot_installed
team = create_team_with_smooch_bot_installed
nlu = SmoochNlu.new(team.slug)
nlu.enable!
Bot::Alegre.expects(:request).with{ |x, y, _z| x == 'post' && y == '/text/similarity/' }.once
Bot::Alegre.expects(:request).with{ |x, y, _z| x == 'post' && y == '/similarity/sync/text' }.once
nlu.add_keyword_to_menu_option('en', 'main', 0, 'subscribe')
expected_output = {
'en' => {
Expand All @@ -85,17 +85,17 @@ def create_team_with_smooch_bot_installed
end

test 'should add keyword if it does not exist' do
Bot::Alegre.expects(:request).with{ |x, y, _z| x == 'post' && y == '/text/similarity/' }.once
Bot::Alegre.expects(:request).with{ |x, y, _z| x == 'post' && y == '/similarity/sync/text' }.once
team = create_team_with_smooch_bot_installed
SmoochNlu.new(team.slug).add_keyword_to_menu_option('en', 'main', 0, 'subscribe to the newsletter')
end

test 'should not add keyword if it exists' do
team = create_team_with_smooch_bot_installed
nlu = SmoochNlu.new(team.slug)
Bot::Alegre.expects(:request).with{ |x, y, _z| x == 'post' && y == '/text/similarity/' }.once
Bot::Alegre.expects(:request).with{ |x, y, _z| x == 'post' && y == '/similarity/sync/text' }.once
nlu.add_keyword_to_menu_option('en', 'main', 0, 'subscribe to the newsletter')
Bot::Alegre.expects(:request).with{ |x, y, _z| x == 'post' && y == '/text/similarity/' }.never
Bot::Alegre.expects(:request).with{ |x, y, _z| x == 'post' && y == '/similarity/sync/text' }.never
nlu.add_keyword_to_menu_option('en', 'main', 0, 'subscribe to the newsletter')
end

Expand All @@ -114,7 +114,7 @@ def create_team_with_smooch_bot_installed
end

test 'should return a menu option if NLU is enabled' do
Bot::Alegre.stubs(:request).with{ |x, y, z| x == 'post' && y == '/text/similarity/search/' && z[:text] =~ /newsletter/ }.returns({ 'result' => [
Bot::Alegre.stubs(:request).with{ |x, y, z| x == 'post' && y == '/similarity/sync/text' && z[:text] =~ /newsletter/ }.returns({ 'result' => [
{ '_score' => 0.9, '_source' => { 'context' => { 'menu_option_id' => 'test' } } },
]})
team = create_team_with_smooch_bot_installed
Expand Down
22 changes: 12 additions & 10 deletions test/models/bot/smooch_6_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,7 @@ def send_message_outside_24_hours_window(template, pm = nil)

test "should submit query without details on tipline bot v2" do
WebMock.stub_request(:post, /\/text\/similarity\/search\//).to_return(body: {}.to_json) # For explainers
WebMock.stub_request(:post, /\/similarity\/async\/text/).to_return(body: {}.to_json) # For explainers
claim = 'This is a test claim'
send_message 'hello', '1', '1', random_string, random_string, claim, random_string, random_string, '1'
assert_saved_query_type 'default_requests'
Expand Down Expand Up @@ -208,6 +209,7 @@ def send_message_outside_24_hours_window(template, pm = nil)
end

test "should submit query with details on tipline bot v2" do
WebMock.stub_request(:post, /\/similarity\/async\/text/).to_return(body: {}.to_json) # For explainers
WebMock.stub_request(:post, /\/text\/similarity\/search\//).to_return(body: {}.to_json) # For explainers
claim = 'This is a test claim'
send_message 'hello', '1', '1', random_string, '2', random_string, claim, '1'
Expand Down Expand Up @@ -285,7 +287,7 @@ def send_message_outside_24_hours_window(template, pm = nil)
end

test "should submit query and handle search error on tipline bot v2" do
WebMock.stub_request(:post, /\/text\/similarity\/search\//).to_return(body: {}.to_json) # For explainers
WebMock.stub_request(:post, /\/similarity\/async\/text/).to_return(body: {}.to_json) # For explainers
CheckSearch.any_instance.stubs(:medias).raises(StandardError)
Sidekiq::Testing.inline! do
send_message 'hello', '1', '1', 'Foo bar', '1'
Expand Down Expand Up @@ -384,7 +386,7 @@ def send_message_outside_24_hours_window(template, pm = nil)
ProjectMedia.any_instance.stubs(:report_status).returns('published')
ProjectMedia.any_instance.stubs(:analysis_published_article_url).returns(random_url)
Bot::Alegre.stubs(:get_merged_similar_items).returns({ create_project_media.id => { score: 0.9 } })
WebMock.stub_request(:post, /\/text\/similarity\/search\//).to_return(body: {}.to_json) # For explainers
WebMock.stub_request(:post, /\/similarity\/async\/text/).to_return(body: {}.to_json) # For explainers
Sidekiq::Testing.inline! do
send_message 'hello', '1', '1', "Foo bar foo bar #{url} foo bar", '1'
end
Expand Down Expand Up @@ -693,7 +695,7 @@ def send_message_outside_24_hours_window(template, pm = nil)
pm = create_project_media team: @team
publish_report(pm, {}, nil, { language: 'pt', use_visual_card: false })
Bot::Smooch.stubs(:get_search_results).returns([pm])
WebMock.stub_request(:post, /\/text\/similarity\/search\//).to_return(body: {}.to_json) # For explainers
WebMock.stub_request(:post, /\/similarity\/async\/text/).to_return(body: {}.to_json) # For explainers
Sidekiq::Testing.inline! do
send_message 'hello', '1', '1', 'Foo bar', '1'
end
Expand Down Expand Up @@ -807,9 +809,9 @@ def send_message_outside_24_hours_window(template, pm = nil)

test 'should process menu option using NLU' do
# Mock any call to Alegre like `POST /text/similarity/` with a "text" parameter that contains "want"
Bot::Alegre.stubs(:request).with{ |x, y, z| x == 'post' && y == '/text/similarity/' && z[:text] =~ /want/ }.returns(true)
Bot::Alegre.stubs(:request).with{ |x, y, z| x == 'post' && y == '/similarity/sync/text' && z[:text] =~ /want/ }.returns(true)
# Mock any call to Alegre like `GET /text/similarity/` with a "text" parameter that does not contain "want"
Bot::Alegre.stubs(:request).with{ |x, y, z| x == 'post' && y == '/text/similarity/search/' && (z[:text] =~ /want/).nil? }.returns({ 'result' => [] })
Bot::Alegre.stubs(:request).with{ |x, y, z| x == 'post' && y == '/similarity/sync/text' && (z[:text] =~ /want/).nil? }.returns({ 'result' => [] })

# Enable NLU and add a couple of keywords for the newsletter menu option
nlu = SmoochNlu.new(@team.slug)
Expand All @@ -822,7 +824,7 @@ def send_message_outside_24_hours_window(template, pm = nil)
subscription_option_id = @installation.get_smooch_workflows[0]['smooch_state_main']['smooch_menu_options'][2]['smooch_menu_option_id']

# Mock a call to Alegre like `GET /text/similarity/` with a "text" parameter that contains "want"
Bot::Alegre.stubs(:request).with{ |x, y, z| x == 'post' && y == '/text/similarity/search/' && z[:text] =~ /want/ }.returns({ 'result' => [
Bot::Alegre.stubs(:request).with{ |x, y, z| x == 'post' && y == '/similarity/sync/text' && z[:text] =~ /want/ }.returns({ 'result' => [
{ '_score' => 0.9, '_source' => { 'context' => { 'menu_option_id' => subscription_option_id } } },
{ '_score' => 0.2, '_source' => { 'context' => { 'menu_option_id' => query_option_id } } }
]})
Expand All @@ -836,7 +838,7 @@ def send_message_outside_24_hours_window(template, pm = nil)
assert_state 'main'

# Mock a call to Alegre like `GET /text/similarity/` with a "text" parameter that contains "want"
Bot::Alegre.stubs(:request).with{ |x, y, z| x == 'post' && y == '/text/similarity/search/' && z[:text] =~ /want/ }.returns({ 'result' => [
Bot::Alegre.stubs(:request).with{ |x, y, z| x == 'post' && y == '/similarity/sync/text' && z[:text] =~ /want/ }.returns({ 'result' => [
{ '_score' => 0.96, '_source' => { 'context' => { 'menu_option_id' => subscription_option_id } } },
{ '_score' => 0.91, '_source' => { 'context' => { 'menu_option_id' => query_option_id } } }
]})
Expand Down Expand Up @@ -875,9 +877,9 @@ def send_message_outside_24_hours_window(template, pm = nil)
Sidekiq::Testing.fake! do
WebMock.disable_net_connect! allow: /#{CheckConfig.get('elasticsearch_host')}|#{CheckConfig.get('storage_endpoint')}/
# Mock any call to Alegre like `POST /text/similarity/` with a "text" parameter that contains "who are you"
Bot::Alegre.stubs(:request).with{ |x, y, z| x == 'post' && y == '/text/similarity/' && z[:text] =~ /who are you/ }.returns(true)
Bot::Alegre.stubs(:request).with{ |x, y, z| x == 'post' && y == '/similarity/sync/text' && z[:text] =~ /who are you/ }.returns(true)
# Mock any call to Alegre like `GET /text/similarity/` with a "text" parameter that does not contain "who are you"
Bot::Alegre.stubs(:request).with{ |x, y, z| x == 'post' && y == '/text/similarity/search/' && (z[:text] =~ /who are you/).nil? }.returns({ 'result' => [] })
Bot::Alegre.stubs(:request).with{ |x, y, z| x == 'post' && y == '/similarity/sync/text' && (z[:text] =~ /who are you/).nil? }.returns({ 'result' => [] })

# Enable NLU and add a couple of keywords to a new "About Us" resource
nlu = SmoochNlu.new(@team.slug)
Expand All @@ -887,7 +889,7 @@ def send_message_outside_24_hours_window(template, pm = nil)
r.add_keyword('who are you')

# Mock a call to Alegre like `GET /text/similarity/` with a "text" parameter that contains "who are you"
Bot::Alegre.stubs(:request).with{ |x, y, z| x == 'post' && y == '/text/similarity/search/' && z[:text] =~ /who are you/ }.returns({ 'result' => [
Bot::Alegre.stubs(:request).with{ |x, y, z| x == 'post' && y == '/similarity/sync/text' && z[:text] =~ /who are you/ }.returns({ 'result' => [
{ '_score' => 0.9, '_source' => { 'context' => { 'resource_id' => 0 } } },
{ '_score' => 0.8, '_source' => { 'context' => { 'resource_id' => r.id } } }
]})
Expand Down
Loading
Loading