Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Be able to export a full list of media clusters. #2024

Merged
merged 8 commits into from
Sep 10, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .codeclimate.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ checks:
enabled: false
method-complexity:
config:
threshold: 22
threshold: 25
method-count:
config:
threshold: 65
Expand Down
82 changes: 56 additions & 26 deletions lib/check_search.rb
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,10 @@ def team
Team.find_by_id(team_id)
end

def feed
@feed
end

def teams
[]
end
Expand Down Expand Up @@ -335,40 +339,66 @@ def medias_get_search_result(query)

def self.get_exported_data(query, team_id)
team = Team.find(team_id)
Team.current = team
search = CheckSearch.new(query, nil, team_id)
feed_sharing_only_fact_checks = (search.feed && search.feed.data_points == [1])

# Prepare the export
data = []
header = ['Claim', 'Item page URL', 'Status', 'Created by', 'Submitted at', 'Published at', 'Number of media', 'Tags']
fields = team.team_tasks.sort
fields.each { |tt| header << tt.label }
header = nil
if feed_sharing_only_fact_checks
header = ['Fact-check title', 'Fact-check summary', 'Fact-check URL', 'Tags', 'Workspace', 'Updated at', 'Rating']
else
header = ['Claim', 'Item page URL', 'Status', 'Created by', 'Submitted at', 'Published at', 'Number of media', 'Tags']
fields = team.team_tasks.sort
fields.each { |tt| header << tt.label }
end
data << header

# No pagination for the export
search.set_option('esoffset', 0)
search.set_option('eslimit', CheckConfig.get(:export_csv_maximum_number_of_results, 10000, :integer))

# Iterate through each result and generate an output row for the CSV
search.medias.find_each do |pm|
row = [
pm.claim_description&.description,
pm.full_url,
pm.status_i18n,
pm.author_name.to_s.gsub(/ \[.*\]$/, ''),
pm.created_at.strftime("%Y-%m-%d %H:%M:%S"),
pm.published_at&.strftime("%Y-%m-%d %H:%M:%S"),
pm.linked_items_count,
pm.tags_as_sentence
]
annotations = pm.get_annotations('task').map(&:load)
fields.each do |field|
annotation = annotations.find { |a| a.team_task_id == field.id }
answer = (annotation ? (begin annotation.first_response_obj.file_data[:file_urls].join("\n") rescue annotation.first_response.to_s end) : '')
answer = begin JSON.parse(answer).collect{ |x| x['url'] }.join(', ') rescue answer end
row << answer
# Paginate
search_after = [0]
while !search_after.empty?
result = $repository.search(_source: 'annotated_id', query: search.medias_query, sort: [{ annotated_id: { order: :asc } }], size: 10000, search_after: search_after).results
ids = result.collect{ |i| i['annotated_id'] }.uniq.compact.map(&:to_i)

# Iterate through each result and generate an output row for the CSV
ProjectMedia.where(id: ids, team_id: search.team_condition(team_id)).find_each do |pm|
row = nil
if feed_sharing_only_fact_checks
row = [
pm.fact_check_title,
pm.fact_check_summary,
pm.fact_check_url,
pm.tags_as_sentence,
pm.team_name,
pm.updated_at_timestamp,
pm.status
]
else
row = [
pm.claim_description&.description,
pm.full_url,
pm.status_i18n,
pm.author_name.to_s.gsub(/ \[.*\]$/, ''),
pm.created_at.strftime("%Y-%m-%d %H:%M:%S"),
pm.published_at&.strftime("%Y-%m-%d %H:%M:%S"),
pm.linked_items_count,
pm.tags_as_sentence
]
annotations = pm.get_annotations('task').map(&:load)
fields.each do |field|
annotation = annotations.find { |a| a.team_task_id == field.id }
answer = (annotation ? (begin annotation.first_response_obj.file_data[:file_urls].join("\n") rescue annotation.first_response.to_s end) : '')
answer = begin JSON.parse(answer).collect{ |x| x['url'] }.join(', ') rescue answer end
row << answer
end
end
data << row
end
data << row

search_after = [ids.max].compact
end

data
end

Expand Down
45 changes: 40 additions & 5 deletions test/lib/list_export_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -26,21 +26,26 @@ def teardown
end
end

test "should export media CSV" do
test "should export media (including child media) CSV" do
setup_elasticsearch
t = create_team
create_team_task team_id: t.id, fieldset: 'tasks'
2.times { create_project_media team: t }
parent = create_project_media team: t, disable_es_callbacks: false
child = create_project_media team: t, disable_es_callbacks: false
create_relationship source_id: parent.id, target_id: child.id, relationship_type: Relationship.confirmed_type

export = ListExport.new(:media, '{}', t.id)
sleep 2 # Wait for indexing

export = ListExport.new(:media, { show_similar: true }.to_json, t.id)
csv_url = export.generate_csv_and_send_email(create_user)
response = Net::HTTP.get_response(URI(csv_url))
assert_equal 200, response.code.to_i
csv_content = CSV.parse(response.body, headers: true)
assert_equal 2, csv_content.size
assert_equal 2, export.number_of_rows
assert_equal 2, csv_content.size
end

test "should export feed CSV" do
test "should export media feed CSV" do
t = create_team
f = create_feed team: t
2.times { f.clusters << create_cluster }
Expand All @@ -54,6 +59,36 @@ def teardown
assert_equal 2, export.number_of_rows
end

test "should export fact-check feed CSV" do
setup_elasticsearch
RequestStore.store[:skip_cached_field_update] = false

pender_url = CheckConfig.get('pender_url_private')
WebMock.stub_request(:get, /#{pender_url}/).to_return(body: '{}', status: 200)

t = create_team
2.times do
pm = create_project_media team: t, disable_es_callbacks: false
r = publish_report(pm, {}, nil, { language: 'en', use_visual_card: false })
r = Dynamic.find(r.id)
r.disable_es_callbacks = false
r.set_fields = { state: 'published' }.to_json
r.save!
end
ss = create_saved_search team: t
f = create_feed team: t, data_points: [1], saved_search: ss, published: true

sleep 2 # Wait for indexing

export = ListExport.new(:media, { feed_id: f.id, feed_view: 'fact_check' }.to_json, t.id)
csv_url = export.generate_csv_and_send_email(create_user)
response = Net::HTTP.get_response(URI(csv_url))
assert_equal 200, response.code.to_i
csv_content = CSV.parse(response.body, headers: true)
assert_equal 2, export.number_of_rows
assert_equal 2, csv_content.size
end

test "should export fact-checks CSV" do
t = create_team
2.times do
Expand Down
Loading