Skip to content

Commit

Permalink
CV2-5087 move Articles side effecting saves to to it via presto
Browse files Browse the repository at this point in the history
  • Loading branch information
DGaffney committed Aug 17, 2024
1 parent 8400e94 commit 9913685
Show file tree
Hide file tree
Showing 2 changed files with 37 additions and 12 deletions.
35 changes: 29 additions & 6 deletions app/models/concerns/alegre_v2.rb
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
require 'active_support/concern'
class AlegreTimeoutError < StandardError; end
class TemporaryProjectMedia
attr_accessor :team_id, :id, :url, :type
attr_accessor :team_id, :id, :url, :text, :type
def media
media_type_map = {
"claim" => "Claim",
Expand Down Expand Up @@ -55,11 +55,18 @@ def sync_path_for_type(type)
end

def async_path(project_media)
"/similarity/async/#{get_type(project_media)}"
self.async_path_for_type(get_type(project_media))
end

def async_path_for_type(type)
"/similarity/async/#{type}"
end

def delete_path(project_media)
type = get_type(project_media)
self.delete_path_for_type(get_type(project_media))
end

def delete_path_for_type(type)
"/#{type}/similarity/"
end

Expand Down Expand Up @@ -122,6 +129,10 @@ def request(method, path, params, retries=3)
end
end

def request_delete_from_raw(params, type)
request("delete", delete_path_for_type(type), params)
end

def request_delete(data, project_media)
request("delete", delete_path(project_media), data)
end
Expand All @@ -148,18 +159,22 @@ def get_type(project_media)
type
end

def content_hash_for_value(value)
Digest::MD5.hexdigest(value)
end

def content_hash(project_media, field)
if Bot::Alegre::ALL_TEXT_SIMILARITY_FIELDS.include?(field)
Digest::MD5.hexdigest(project_media.send(field))
content_hash_for_value(project_media.send(field))
else
if project_media.is_link?
return Digest::MD5.hexdigest(project_media.media.url)
return content_hash_for_value(project_media.media.url)
elsif project_media.is_a?(TemporaryProjectMedia)
return Rails.cache.read("url_sha:#{project_media.url}")
elsif !project_media.is_text?
return project_media.media.file.filename.split(".").first
else
return Digest::MD5.hexdigest(project_media.send(field).to_s)
return content_hash_for_value(project_media.send(field).to_s)
end
end
end
Expand Down Expand Up @@ -267,6 +282,14 @@ def store_package_text(project_media, field, params)
generic_package_text(project_media, field, params)
end

def get_sync_raw_params(params, type)
request("post", sync_path_for_type(type), params)
end

def get_async_raw_params(params, type)
request("post", async_path_for_type(type), params)
end

def get_sync(project_media, field=nil, params={})
request_sync(
store_package(project_media, field, params),
Expand Down
14 changes: 8 additions & 6 deletions app/models/explainer.rb
Original file line number Diff line number Diff line change
Expand Up @@ -63,24 +63,26 @@ def self.update_paragraphs_in_alegre(id, previous_paragraphs_count, timestamp)

# Index title
params = {
content_hash: Bot::Alegre.content_hash_for_value(explainer.title),
doc_id: Digest::MD5.hexdigest(['explainer', explainer.id, 'title'].join(':')),
context: base_context.merge({ field: 'title' }),
text: explainer.title,
models: ALEGRE_MODELS_AND_THRESHOLDS.keys,
context: base_context.merge({ field: 'title' })
}
Bot::Alegre.request('post', '/text/similarity/', params)
Bot::Alegre.get_async_raw_params(params, "text")

# Index paragraphs
count = 0
explainer.description.to_s.gsub(/\r\n?/, "\n").split(/\n+/).reject{ |paragraph| paragraph.strip.blank? }.each do |paragraph|
count += 1
params = {
content_hash: Bot::Alegre.content_hash_for_value(paragraph.strip),
doc_id: Digest::MD5.hexdigest(['explainer', explainer.id, 'paragraph', count].join(':')),
context: base_context.merge({ paragraph: count }),
text: paragraph.strip,
models: ALEGRE_MODELS_AND_THRESHOLDS.keys,
context: base_context.merge({ paragraph: count })
}
Bot::Alegre.request('post', '/text/similarity/', params)
Bot::Alegre.get_async_raw_params(params, "text")
end

# Remove paragraphs that don't exist anymore (we delete after updating in order to avoid race conditions)
Expand All @@ -91,7 +93,7 @@ def self.update_paragraphs_in_alegre(id, previous_paragraphs_count, timestamp)
quiet: true,
context: base_context.merge({ paragraph: count })
}
Bot::Alegre.request('delete', '/text/similarity/', params)
Bot::Alegre.request_delete_from_raw(params, type)
end
end

Expand All @@ -106,7 +108,7 @@ def self.search_by_similarity(text, language, team_id)
language: language
}
}
response = Bot::Alegre.request('post', '/text/similarity/search/', params)
Bot::Alegre.get_async_raw_params(params, "text")
results = response['result'].to_a.sort_by{ |result| result['_score'] }
explainer_ids = results.collect{ |result| result.dig('_source', 'context', 'explainer_id').to_i }.uniq.first(3)
explainer_ids.empty? ? Explainer.none : Explainer.where(team_id: team_id, id: explainer_ids)
Expand Down

0 comments on commit 9913685

Please sign in to comment.