Skip to content

Commit

Permalink
CV2-4719 add unique identifier as doc_id at check-api level (#1926)
Browse files Browse the repository at this point in the history
* CV2-4719 add unique identifier as doc_id at check-api level

* more tweaking for initial caching strategy

* fix typo

* add in shim for caching the sha on capi messages

* updates

* resolve testing errors

* fix last broken test param

* add tests for each branch of content_hash function

* fix typo

* updates

* refactor to DRY up file writing on messages

* Refactor as per review
  • Loading branch information
DGaffney authored Jun 24, 2024
1 parent 1d4fa70 commit 95c4af9
Show file tree
Hide file tree
Showing 7 changed files with 105 additions and 42 deletions.
15 changes: 15 additions & 0 deletions app/models/concerns/alegre_v2.rb
Original file line number Diff line number Diff line change
Expand Up @@ -148,8 +148,23 @@ def get_type(project_media)
type
end

def content_hash(project_media, field)
if Bot::Alegre::ALL_TEXT_SIMILARITY_FIELDS.include?(field)
Digest::MD5.hexdigest(project_media.send(field))
else
if project_media.is_link?
return Digest::MD5.hexdigest(project_media.media.url)
elsif project_media.is_a?(TemporaryProjectMedia)
return Rails.cache.read("url_sha:#{project_media.url}")
elsif !project_media.is_text?
return project_media.media.file.filename.split(".").first
end
end
end

def generic_package(project_media, field)
{
content_hash: content_hash(project_media, field),
doc_id: item_doc_id(project_media, field),
context: get_context(project_media, field)
}
Expand Down
14 changes: 6 additions & 8 deletions app/models/concerns/smooch_capi.rb
Original file line number Diff line number Diff line change
Expand Up @@ -57,15 +57,13 @@ def store_capi_media(media_id, mime_type)
req = Net::HTTP::Get.new(uri.request_uri, 'Content-Type' => 'application/json', 'Authorization' => "Bearer #{self.config['capi_permanent_token']}")
response = http.request(req)
media_url = JSON.parse(response.body)['url']

uri = URI(media_url)
http = Net::HTTP.new(uri.host, uri.port)
http.use_ssl = true
req = Net::HTTP::Get.new(uri.request_uri, 'Authorization' => "Bearer #{self.config['capi_permanent_token']}")
response = http.request(req)
path = "capi/#{media_id}"
CheckS3.write(path, mime_type, response.body)
CheckS3.public_url(path)
self.write_file_to_s3(
media_url,
path,
mime_type,
{'Authorization' => "Bearer #{self.config['capi_permanent_token']}"}
)
end

def handle_capi_system_message(message)
Expand Down
15 changes: 13 additions & 2 deletions app/models/concerns/smooch_search.rb
Original file line number Diff line number Diff line change
Expand Up @@ -198,13 +198,24 @@ def save_locally_and_return_url(media_url, type, feed_id)
audio: 'audio/ogg',
video: 'video/mp4'
}[type.to_sym]
path = "feed/#{feed.id}/#{SecureRandom.hex}"
self.write_file_to_s3(
media_url,
path,
mime,
headers
)
end

def write_file_to_s3(media_url, path, mime, headers)
uri = URI(media_url)
http = Net::HTTP.new(uri.host, uri.port)
http.use_ssl = uri.scheme == 'https'
req = Net::HTTP::Get.new(uri.request_uri, headers)
response = http.request(req)
path = "feed/#{feed.id}/#{SecureRandom.hex}"
CheckS3.write(path, mime, response.body)
body = response.body
CheckS3.write(path, mime, body)
Rails.cache.write("url_sha:#{media_url}", Digest::MD5.hexdigest(body), expires_in: 60*3)
CheckS3.public_url(path)
end

Expand Down
4 changes: 2 additions & 2 deletions test/controllers/elastic_search_9_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -71,10 +71,10 @@ def setup
# Text extraction
Bot::Alegre.unstub(:media_file_url)
pm = create_project_media team: team, media: create_uploaded_image, disable_es_callbacks: false
WebMock.stub_request(:post, 'http://alegre/similarity/async/image').with(body: {doc_id: Bot::Alegre.item_doc_id(pm), context: {:has_custom_id=>true, :project_media_id=>pm.id, :team_id=>pm.team_id, :temporary_media=>false}, threshold: 0.89, url: "some/path", confirmed: false}).to_return(body: {
WebMock.stub_request(:post, 'http://alegre/similarity/async/image').with(body: {content_hash: Bot::Alegre.content_hash(pm, nil), doc_id: Bot::Alegre.item_doc_id(pm), context: {:has_custom_id=>true, :project_media_id=>pm.id, :team_id=>pm.team_id, :temporary_media=>false}, threshold: 0.89, url: "some/path", confirmed: false}).to_return(body: {
"result": []
}.to_json)
WebMock.stub_request(:post, 'http://alegre/similarity/async/image').with(body: {doc_id: Bot::Alegre.item_doc_id(pm), context: {:has_custom_id=>true, :project_media_id=>pm.id, :team_id=>pm.team_id, :temporary_media=>false}, threshold: 0.95, url: "some/path", confirmed: true}).to_return(body: {
WebMock.stub_request(:post, 'http://alegre/similarity/async/image').with(body: {content_hash: Bot::Alegre.content_hash(pm, nil), doc_id: Bot::Alegre.item_doc_id(pm), context: {:has_custom_id=>true, :project_media_id=>pm.id, :team_id=>pm.team_id, :temporary_media=>false}, threshold: 0.95, url: "some/path", confirmed: true}).to_return(body: {
"result": []
}.to_json)
Bot::Alegre.stubs(:media_file_url).with(pm).returns("some/path")
Expand Down
12 changes: 6 additions & 6 deletions test/models/bot/alegre_2_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ def teardown
pm1 = create_project_media team: @team, media: create_uploaded_video
pm2 = create_project_media team: @team, media: create_uploaded_video
pm3 = create_project_media team: @team, media: create_uploaded_video
params = {:doc_id => Bot::Alegre.item_doc_id(pm3), :context => {:team_id => pm3.team_id, :project_media_id => pm3.id, :has_custom_id => true, :temporary_media => false}, :url => @media_path}
params = {:content_hash=>Bot::Alegre.content_hash(pm3, nil), :doc_id => Bot::Alegre.item_doc_id(pm3), :context => {:team_id => pm3.team_id, :project_media_id => pm3.id, :has_custom_id => true, :temporary_media => false}, :url => @media_path}
Bot::Alegre.stubs(:request).with('post', '/similarity/async/video', params.merge({ threshold: 0.9, confirmed: false })).returns(true)
Bot::Alegre.stubs(:request).with('post', '/similarity/async/video', params.merge({ threshold: 0.9, confirmed: true })).returns(true)
Redis.any_instance.stubs(:get).returns({
Expand Down Expand Up @@ -70,7 +70,7 @@ def teardown
pm1 = create_project_media team: @team, media: create_uploaded_audio
pm2 = create_project_media team: @team, media: create_uploaded_audio
pm3 = create_project_media team: @team, media: create_uploaded_audio
params = {:doc_id => Bot::Alegre.item_doc_id(pm3), :context => {:team_id => pm3.team_id, :project_media_id => pm3.id, :has_custom_id => true, :temporary_media => false}, :url => @media_path}
params = {:content_hash=>Bot::Alegre.content_hash(pm3, nil), :doc_id => Bot::Alegre.item_doc_id(pm3), :context => {:team_id => pm3.team_id, :project_media_id => pm3.id, :has_custom_id => true, :temporary_media => false}, :url => @media_path}
Bot::Alegre.stubs(:request).with('post', '/similarity/async/audio', params.merge({ threshold: 0.9, confirmed: false })).returns(true)
Bot::Alegre.stubs(:request).with('post', '/similarity/async/audio', params.merge({ threshold: 0.9, confirmed: true })).returns(true)

Expand Down Expand Up @@ -110,8 +110,8 @@ def teardown
pm1 = create_project_media team: @team, media: create_uploaded_video
pm2 = create_project_media team: @team, media: create_uploaded_audio
pm3 = create_project_media team: @team, media: create_uploaded_audio
request_params = {:doc_id=>Bot::Alegre.item_doc_id(pm3), :context=>{:team_id=>pm3.team_id, :project_media_id=>pm3.id, :has_custom_id=>true, :temporary_media=>false}, :url=>@media_path, :threshold=>0.9, :confirmed=>true}
request_params_unconfirmed = {:doc_id=>Bot::Alegre.item_doc_id(pm3), :context=>{:team_id=>pm3.team_id, :project_media_id=>pm3.id, :has_custom_id=>true, :temporary_media=>false}, :url=>@media_path, :threshold=>0.9, :confirmed=>false}
request_params = {:content_hash=>Bot::Alegre.content_hash(pm3, nil), :doc_id=>Bot::Alegre.item_doc_id(pm3), :context=>{:team_id=>pm3.team_id, :project_media_id=>pm3.id, :has_custom_id=>true, :temporary_media=>false}, :url=>@media_path, :threshold=>0.9, :confirmed=>true}
request_params_unconfirmed = {:content_hash=>Bot::Alegre.content_hash(pm3, nil), :doc_id=>Bot::Alegre.item_doc_id(pm3), :context=>{:team_id=>pm3.team_id, :project_media_id=>pm3.id, :has_custom_id=>true, :temporary_media=>false}, :url=>@media_path, :threshold=>0.9, :confirmed=>false}
Bot::Alegre.stubs(:request).with('post', '/similarity/async/audio', request_params).returns({
result: [
{
Expand Down Expand Up @@ -221,7 +221,7 @@ def teardown
}
]
}.with_indifferent_access
params = {:doc_id => Bot::Alegre.item_doc_id(pm3), :context => {:team_id => pm3.team_id, :project_media_id => pm3.id, :has_custom_id => true, :temporary_media => false}, :url => @media_path}
params = {:content_hash=>Bot::Alegre.content_hash(pm3, nil), :doc_id => Bot::Alegre.item_doc_id(pm3), :context => {:team_id => pm3.team_id, :project_media_id => pm3.id, :has_custom_id => true, :temporary_media => false}, :url => @media_path}
Bot::Alegre.stubs(:request).with('post', '/similarity/async/image', params.merge({ threshold: 0.89, confirmed: false })).returns(result)
Bot::Alegre.stubs(:request).with('post', '/similarity/async/image', params.merge({ threshold: 0.95, confirmed: true })).returns(result)
Bot::Alegre.stubs(:media_file_url).with(pm3).returns(@media_path)
Expand Down Expand Up @@ -289,7 +289,7 @@ def teardown
relationship_type: Relationship.suggested_type
}
}.to_yaml)
params = {:doc_id => Bot::Alegre.item_doc_id(pm1a), :context => {:team_id => pm1a.team_id, :project_media_id => pm1a.id, :has_custom_id => true, :temporary_media => false}, :url => @media_path}
params = {:content_hash=>Bot::Alegre.content_hash(pm1a, nil), :doc_id => Bot::Alegre.item_doc_id(pm1a), :context => {:team_id => pm1a.team_id, :project_media_id => pm1a.id, :has_custom_id => true, :temporary_media => false}, :url => @media_path}
Bot::Alegre.stubs(:media_file_url).with(pm1a).returns(@media_path)
Bot::Alegre.stubs(:request).with('post', '/similarity/async/image', params.merge({ threshold: 0.89, confirmed: false })).returns(true)
Bot::Alegre.stubs(:request).with('post', '/similarity/async/image', params.merge({ threshold: 0.95, confirmed: true })).returns(true)
Expand Down
8 changes: 4 additions & 4 deletions test/models/bot/alegre_3_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -91,10 +91,10 @@ def teardown
Bot::Alegre.stubs(:media_file_url).returns(media_file_url)

pm1 = create_project_media team: @pm.team, media: create_uploaded_audio(file: 'rails.mp3')
WebMock.stub_request(:post, "http://alegre/similarity/async/audio").with(body: {:doc_id=>Bot::Alegre.item_doc_id(pm1), :context=>{:team_id=>pm1.team_id, :project_media_id=>pm1.id, :has_custom_id=>true, :temporary_media=>false}, :url=>media_file_url, :threshold=>0.9, :confirmed=> false}).to_return(body: {
WebMock.stub_request(:post, "http://alegre/similarity/async/audio").with(body: {:content_hash=>Bot::Alegre.content_hash(pm1, nil), :doc_id=>Bot::Alegre.item_doc_id(pm1), :context=>{:team_id=>pm1.team_id, :project_media_id=>pm1.id, :has_custom_id=>true, :temporary_media=>false}, :url=>media_file_url, :threshold=>0.9, :confirmed=> false}).to_return(body: {
"result": []
}.to_json)
WebMock.stub_request(:post, "http://alegre/similarity/async/audio").with(body: {:doc_id=>Bot::Alegre.item_doc_id(pm1), :context=>{:team_id=>pm1.team_id, :project_media_id=>pm1.id, :has_custom_id=>true, :temporary_media=>false}, :url=>media_file_url, :threshold=>0.9, :confirmed=> true}).to_return(body: {
WebMock.stub_request(:post, "http://alegre/similarity/async/audio").with(body: {:content_hash=>Bot::Alegre.content_hash(pm1, nil), :doc_id=>Bot::Alegre.item_doc_id(pm1), :context=>{:team_id=>pm1.team_id, :project_media_id=>pm1.id, :has_custom_id=>true, :temporary_media=>false}, :url=>media_file_url, :threshold=>0.9, :confirmed=> true}).to_return(body: {
"result": []
}.to_json)
WebMock.stub_request(:post, 'http://alegre/audio/transcription/result/').with(body: {job_name: "0c481e87f2774b1bd41a0a70d9b70d11"}).to_return(body: { 'job_status' => 'DONE' }.to_json)
Expand Down Expand Up @@ -160,10 +160,10 @@ def teardown
Bot::Alegre.stubs(:media_file_url).returns(media_file_url)

pm1 = create_project_media team: @pm.team, media: create_uploaded_audio(file: 'rails.mp3')
WebMock.stub_request(:post, "http://alegre/similarity/async/audio").with(body: {:doc_id=>Bot::Alegre.item_doc_id(pm1), :context=>{:team_id=>pm1.team_id, :project_media_id=>pm1.id, :has_custom_id=>true, :temporary_media=>false}, :url=>media_file_url, :threshold=>0.9, :confirmed=>true}).to_return(body: {
WebMock.stub_request(:post, "http://alegre/similarity/async/audio").with(body: {:content_hash=>Bot::Alegre.content_hash(pm1, nil), :doc_id=>Bot::Alegre.item_doc_id(pm1), :context=>{:team_id=>pm1.team_id, :project_media_id=>pm1.id, :has_custom_id=>true, :temporary_media=>false}, :url=>media_file_url, :threshold=>0.9, :confirmed=>true}).to_return(body: {
"result": []
}.to_json)
WebMock.stub_request(:post, "http://alegre/similarity/async/audio").with(body: {:doc_id=>Bot::Alegre.item_doc_id(pm1), :context=>{:team_id=>pm1.team_id, :project_media_id=>pm1.id, :has_custom_id=>true, :temporary_media=>false}, :url=>media_file_url, :threshold=>0.9, :confirmed=>false}).to_return(body: {
WebMock.stub_request(:post, "http://alegre/similarity/async/audio").with(body: {:content_hash=>Bot::Alegre.content_hash(pm1, nil), :doc_id=>Bot::Alegre.item_doc_id(pm1), :context=>{:team_id=>pm1.team_id, :project_media_id=>pm1.id, :has_custom_id=>true, :temporary_media=>false}, :url=>media_file_url, :threshold=>0.9, :confirmed=>false}).to_return(body: {
"result": []
}.to_json)
WebMock.stub_request(:post, 'http://alegre/audio/transcription/').with({
Expand Down
Loading

0 comments on commit 95c4af9

Please sign in to comment.