Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

CV2-4719 add unique identifier as doc_id at check-api level #1926

Merged
merged 12 commits into from
Jun 24, 2024
15 changes: 15 additions & 0 deletions app/models/concerns/alegre_v2.rb
Original file line number Diff line number Diff line change
Expand Up @@ -148,8 +148,23 @@ def get_type(project_media)
type
end

def content_hash(project_media, field)
if Bot::Alegre::ALL_TEXT_SIMILARITY_FIELDS.include?(field)
Digest::MD5.hexdigest(project_media.send(field))
else
if project_media.is_link?
return Digest::MD5.hexdigest(project_media.media.url)
elsif project_media.is_a?(TemporaryProjectMedia)
return Rails.cache.read("url_sha:#{project_media.url}")
elsif !project_media.is_text?
return project_media.media.file.filename.split(".").first
end
end
end

def generic_package(project_media, field)
{
content_hash: content_hash(project_media, field),
doc_id: item_doc_id(project_media, field),
context: get_context(project_media, field)
}
Expand Down
14 changes: 6 additions & 8 deletions app/models/concerns/smooch_capi.rb
Original file line number Diff line number Diff line change
Expand Up @@ -57,15 +57,13 @@ def store_capi_media(media_id, mime_type)
req = Net::HTTP::Get.new(uri.request_uri, 'Content-Type' => 'application/json', 'Authorization' => "Bearer #{self.config['capi_permanent_token']}")
response = http.request(req)
media_url = JSON.parse(response.body)['url']

uri = URI(media_url)
http = Net::HTTP.new(uri.host, uri.port)
http.use_ssl = true
req = Net::HTTP::Get.new(uri.request_uri, 'Authorization' => "Bearer #{self.config['capi_permanent_token']}")
response = http.request(req)
path = "capi/#{media_id}"
CheckS3.write(path, mime_type, response.body)
CheckS3.public_url(path)
self.write_file_to_s3(
media_url,
path,
mime_type,
{'Authorization' => "Bearer #{self.config['capi_permanent_token']}"}
)
end

def handle_capi_system_message(message)
Expand Down
15 changes: 13 additions & 2 deletions app/models/concerns/smooch_search.rb
Original file line number Diff line number Diff line change
Expand Up @@ -198,13 +198,24 @@ def save_locally_and_return_url(media_url, type, feed_id)
audio: 'audio/ogg',
video: 'video/mp4'
}[type.to_sym]
path = "feed/#{feed.id}/#{SecureRandom.hex}"
self.write_file_to_s3(
media_url,
path,
mime,
headers
)
end

def write_file_to_s3(media_url, path, mime, headers)
uri = URI(media_url)
http = Net::HTTP.new(uri.host, uri.port)
http.use_ssl = uri.scheme == 'https'
req = Net::HTTP::Get.new(uri.request_uri, headers)
response = http.request(req)
path = "feed/#{feed.id}/#{SecureRandom.hex}"
CheckS3.write(path, mime, response.body)
body = response.body
CheckS3.write(path, mime, body)
Rails.cache.write("url_sha:#{media_url}", Digest::MD5.hexdigest(body), expires_in: 60*3)
CheckS3.public_url(path)
end

Expand Down
4 changes: 2 additions & 2 deletions test/controllers/elastic_search_9_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -71,10 +71,10 @@ def setup
# Text extraction
Bot::Alegre.unstub(:media_file_url)
pm = create_project_media team: team, media: create_uploaded_image, disable_es_callbacks: false
WebMock.stub_request(:post, 'http://alegre/similarity/async/image').with(body: {doc_id: Bot::Alegre.item_doc_id(pm), context: {:has_custom_id=>true, :project_media_id=>pm.id, :team_id=>pm.team_id, :temporary_media=>false}, threshold: 0.89, url: "some/path", confirmed: false}).to_return(body: {
WebMock.stub_request(:post, 'http://alegre/similarity/async/image').with(body: {content_hash: Bot::Alegre.content_hash(pm, nil), doc_id: Bot::Alegre.item_doc_id(pm), context: {:has_custom_id=>true, :project_media_id=>pm.id, :team_id=>pm.team_id, :temporary_media=>false}, threshold: 0.89, url: "some/path", confirmed: false}).to_return(body: {
"result": []
}.to_json)
WebMock.stub_request(:post, 'http://alegre/similarity/async/image').with(body: {doc_id: Bot::Alegre.item_doc_id(pm), context: {:has_custom_id=>true, :project_media_id=>pm.id, :team_id=>pm.team_id, :temporary_media=>false}, threshold: 0.95, url: "some/path", confirmed: true}).to_return(body: {
WebMock.stub_request(:post, 'http://alegre/similarity/async/image').with(body: {content_hash: Bot::Alegre.content_hash(pm, nil), doc_id: Bot::Alegre.item_doc_id(pm), context: {:has_custom_id=>true, :project_media_id=>pm.id, :team_id=>pm.team_id, :temporary_media=>false}, threshold: 0.95, url: "some/path", confirmed: true}).to_return(body: {
"result": []
}.to_json)
Bot::Alegre.stubs(:media_file_url).with(pm).returns("some/path")
Expand Down
12 changes: 6 additions & 6 deletions test/models/bot/alegre_2_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ def teardown
pm1 = create_project_media team: @team, media: create_uploaded_video
pm2 = create_project_media team: @team, media: create_uploaded_video
pm3 = create_project_media team: @team, media: create_uploaded_video
params = {:doc_id => Bot::Alegre.item_doc_id(pm3), :context => {:team_id => pm3.team_id, :project_media_id => pm3.id, :has_custom_id => true, :temporary_media => false}, :url => @media_path}
params = {:content_hash=>Bot::Alegre.content_hash(pm3, nil), :doc_id => Bot::Alegre.item_doc_id(pm3), :context => {:team_id => pm3.team_id, :project_media_id => pm3.id, :has_custom_id => true, :temporary_media => false}, :url => @media_path}
Bot::Alegre.stubs(:request).with('post', '/similarity/async/video', params.merge({ threshold: 0.9, confirmed: false })).returns(true)
Bot::Alegre.stubs(:request).with('post', '/similarity/async/video', params.merge({ threshold: 0.9, confirmed: true })).returns(true)
Redis.any_instance.stubs(:get).returns({
Expand Down Expand Up @@ -70,7 +70,7 @@ def teardown
pm1 = create_project_media team: @team, media: create_uploaded_audio
pm2 = create_project_media team: @team, media: create_uploaded_audio
pm3 = create_project_media team: @team, media: create_uploaded_audio
params = {:doc_id => Bot::Alegre.item_doc_id(pm3), :context => {:team_id => pm3.team_id, :project_media_id => pm3.id, :has_custom_id => true, :temporary_media => false}, :url => @media_path}
params = {:content_hash=>Bot::Alegre.content_hash(pm3, nil), :doc_id => Bot::Alegre.item_doc_id(pm3), :context => {:team_id => pm3.team_id, :project_media_id => pm3.id, :has_custom_id => true, :temporary_media => false}, :url => @media_path}
Bot::Alegre.stubs(:request).with('post', '/similarity/async/audio', params.merge({ threshold: 0.9, confirmed: false })).returns(true)
Bot::Alegre.stubs(:request).with('post', '/similarity/async/audio', params.merge({ threshold: 0.9, confirmed: true })).returns(true)

Expand Down Expand Up @@ -110,8 +110,8 @@ def teardown
pm1 = create_project_media team: @team, media: create_uploaded_video
pm2 = create_project_media team: @team, media: create_uploaded_audio
pm3 = create_project_media team: @team, media: create_uploaded_audio
request_params = {:doc_id=>Bot::Alegre.item_doc_id(pm3), :context=>{:team_id=>pm3.team_id, :project_media_id=>pm3.id, :has_custom_id=>true, :temporary_media=>false}, :url=>@media_path, :threshold=>0.9, :confirmed=>true}
request_params_unconfirmed = {:doc_id=>Bot::Alegre.item_doc_id(pm3), :context=>{:team_id=>pm3.team_id, :project_media_id=>pm3.id, :has_custom_id=>true, :temporary_media=>false}, :url=>@media_path, :threshold=>0.9, :confirmed=>false}
request_params = {:content_hash=>Bot::Alegre.content_hash(pm3, nil), :doc_id=>Bot::Alegre.item_doc_id(pm3), :context=>{:team_id=>pm3.team_id, :project_media_id=>pm3.id, :has_custom_id=>true, :temporary_media=>false}, :url=>@media_path, :threshold=>0.9, :confirmed=>true}
request_params_unconfirmed = {:content_hash=>Bot::Alegre.content_hash(pm3, nil), :doc_id=>Bot::Alegre.item_doc_id(pm3), :context=>{:team_id=>pm3.team_id, :project_media_id=>pm3.id, :has_custom_id=>true, :temporary_media=>false}, :url=>@media_path, :threshold=>0.9, :confirmed=>false}
Bot::Alegre.stubs(:request).with('post', '/similarity/async/audio', request_params).returns({
result: [
{
Expand Down Expand Up @@ -221,7 +221,7 @@ def teardown
}
]
}.with_indifferent_access
params = {:doc_id => Bot::Alegre.item_doc_id(pm3), :context => {:team_id => pm3.team_id, :project_media_id => pm3.id, :has_custom_id => true, :temporary_media => false}, :url => @media_path}
params = {:content_hash=>Bot::Alegre.content_hash(pm3, nil), :doc_id => Bot::Alegre.item_doc_id(pm3), :context => {:team_id => pm3.team_id, :project_media_id => pm3.id, :has_custom_id => true, :temporary_media => false}, :url => @media_path}
Bot::Alegre.stubs(:request).with('post', '/similarity/async/image', params.merge({ threshold: 0.89, confirmed: false })).returns(result)
Bot::Alegre.stubs(:request).with('post', '/similarity/async/image', params.merge({ threshold: 0.95, confirmed: true })).returns(result)
Bot::Alegre.stubs(:media_file_url).with(pm3).returns(@media_path)
Expand Down Expand Up @@ -289,7 +289,7 @@ def teardown
relationship_type: Relationship.suggested_type
}
}.to_yaml)
params = {:doc_id => Bot::Alegre.item_doc_id(pm1a), :context => {:team_id => pm1a.team_id, :project_media_id => pm1a.id, :has_custom_id => true, :temporary_media => false}, :url => @media_path}
params = {:content_hash=>Bot::Alegre.content_hash(pm1a, nil), :doc_id => Bot::Alegre.item_doc_id(pm1a), :context => {:team_id => pm1a.team_id, :project_media_id => pm1a.id, :has_custom_id => true, :temporary_media => false}, :url => @media_path}
Bot::Alegre.stubs(:media_file_url).with(pm1a).returns(@media_path)
Bot::Alegre.stubs(:request).with('post', '/similarity/async/image', params.merge({ threshold: 0.89, confirmed: false })).returns(true)
Bot::Alegre.stubs(:request).with('post', '/similarity/async/image', params.merge({ threshold: 0.95, confirmed: true })).returns(true)
Expand Down
8 changes: 4 additions & 4 deletions test/models/bot/alegre_3_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -91,10 +91,10 @@ def teardown
Bot::Alegre.stubs(:media_file_url).returns(media_file_url)

pm1 = create_project_media team: @pm.team, media: create_uploaded_audio(file: 'rails.mp3')
WebMock.stub_request(:post, "http://alegre/similarity/async/audio").with(body: {:doc_id=>Bot::Alegre.item_doc_id(pm1), :context=>{:team_id=>pm1.team_id, :project_media_id=>pm1.id, :has_custom_id=>true, :temporary_media=>false}, :url=>media_file_url, :threshold=>0.9, :confirmed=> false}).to_return(body: {
WebMock.stub_request(:post, "http://alegre/similarity/async/audio").with(body: {:content_hash=>Bot::Alegre.content_hash(pm1, nil), :doc_id=>Bot::Alegre.item_doc_id(pm1), :context=>{:team_id=>pm1.team_id, :project_media_id=>pm1.id, :has_custom_id=>true, :temporary_media=>false}, :url=>media_file_url, :threshold=>0.9, :confirmed=> false}).to_return(body: {
"result": []
}.to_json)
WebMock.stub_request(:post, "http://alegre/similarity/async/audio").with(body: {:doc_id=>Bot::Alegre.item_doc_id(pm1), :context=>{:team_id=>pm1.team_id, :project_media_id=>pm1.id, :has_custom_id=>true, :temporary_media=>false}, :url=>media_file_url, :threshold=>0.9, :confirmed=> true}).to_return(body: {
WebMock.stub_request(:post, "http://alegre/similarity/async/audio").with(body: {:content_hash=>Bot::Alegre.content_hash(pm1, nil), :doc_id=>Bot::Alegre.item_doc_id(pm1), :context=>{:team_id=>pm1.team_id, :project_media_id=>pm1.id, :has_custom_id=>true, :temporary_media=>false}, :url=>media_file_url, :threshold=>0.9, :confirmed=> true}).to_return(body: {
"result": []
}.to_json)
WebMock.stub_request(:post, 'http://alegre/audio/transcription/result/').with(body: {job_name: "0c481e87f2774b1bd41a0a70d9b70d11"}).to_return(body: { 'job_status' => 'DONE' }.to_json)
Expand Down Expand Up @@ -160,10 +160,10 @@ def teardown
Bot::Alegre.stubs(:media_file_url).returns(media_file_url)

pm1 = create_project_media team: @pm.team, media: create_uploaded_audio(file: 'rails.mp3')
WebMock.stub_request(:post, "http://alegre/similarity/async/audio").with(body: {:doc_id=>Bot::Alegre.item_doc_id(pm1), :context=>{:team_id=>pm1.team_id, :project_media_id=>pm1.id, :has_custom_id=>true, :temporary_media=>false}, :url=>media_file_url, :threshold=>0.9, :confirmed=>true}).to_return(body: {
WebMock.stub_request(:post, "http://alegre/similarity/async/audio").with(body: {:content_hash=>Bot::Alegre.content_hash(pm1, nil), :doc_id=>Bot::Alegre.item_doc_id(pm1), :context=>{:team_id=>pm1.team_id, :project_media_id=>pm1.id, :has_custom_id=>true, :temporary_media=>false}, :url=>media_file_url, :threshold=>0.9, :confirmed=>true}).to_return(body: {
"result": []
}.to_json)
WebMock.stub_request(:post, "http://alegre/similarity/async/audio").with(body: {:doc_id=>Bot::Alegre.item_doc_id(pm1), :context=>{:team_id=>pm1.team_id, :project_media_id=>pm1.id, :has_custom_id=>true, :temporary_media=>false}, :url=>media_file_url, :threshold=>0.9, :confirmed=>false}).to_return(body: {
WebMock.stub_request(:post, "http://alegre/similarity/async/audio").with(body: {:content_hash=>Bot::Alegre.content_hash(pm1, nil), :doc_id=>Bot::Alegre.item_doc_id(pm1), :context=>{:team_id=>pm1.team_id, :project_media_id=>pm1.id, :has_custom_id=>true, :temporary_media=>false}, :url=>media_file_url, :threshold=>0.9, :confirmed=>false}).to_return(body: {
"result": []
}.to_json)
WebMock.stub_request(:post, 'http://alegre/audio/transcription/').with({
Expand Down
Loading
Loading