Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Be able to export a full list of media clusters. #2024

Merged
merged 8 commits into from
Sep 10, 2024
Merged
57 changes: 33 additions & 24 deletions lib/check_search.rb
Original file line number Diff line number Diff line change
Expand Up @@ -335,7 +335,6 @@ def medias_get_search_result(query)

def self.get_exported_data(query, team_id)
team = Team.find(team_id)
search = CheckSearch.new(query, nil, team_id)

# Prepare the export
data = []
Expand All @@ -344,31 +343,41 @@ def self.get_exported_data(query, team_id)
fields.each { |tt| header << tt.label }
data << header

# No pagination for the export
search.set_option('esoffset', 0)
search.set_option('eslimit', CheckConfig.get(:export_csv_maximum_number_of_results, 10000, :integer))

# Iterate through each result and generate an output row for the CSV
search.medias.find_each do |pm|
row = [
pm.claim_description&.description,
pm.full_url,
pm.status_i18n,
pm.author_name.to_s.gsub(/ \[.*\]$/, ''),
pm.created_at.strftime("%Y-%m-%d %H:%M:%S"),
pm.published_at&.strftime("%Y-%m-%d %H:%M:%S"),
pm.linked_items_count,
pm.tags_as_sentence
]
annotations = pm.get_annotations('task').map(&:load)
fields.each do |field|
annotation = annotations.find { |a| a.team_task_id == field.id }
answer = (annotation ? (begin annotation.first_response_obj.file_data[:file_urls].join("\n") rescue annotation.first_response.to_s end) : '')
answer = begin JSON.parse(answer).collect{ |x| x['url'] }.join(', ') rescue answer end
row << answer
# Paginate
page_size = 10000
search = CheckSearch.new(query, nil, team_id)
total = search.number_of_results
offset = 0
while offset < total
search = CheckSearch.new(query, nil, team_id)
search.set_option('eslimit', page_size)
search.set_option('esoffset', offset)

# Iterate through each result and generate an output row for the CSV
search.medias.find_each do |pm|
row = [
pm.claim_description&.description,
pm.full_url,
pm.status_i18n,
pm.author_name.to_s.gsub(/ \[.*\]$/, ''),
pm.created_at.strftime("%Y-%m-%d %H:%M:%S"),
pm.published_at&.strftime("%Y-%m-%d %H:%M:%S"),
pm.linked_items_count,
pm.tags_as_sentence
]
annotations = pm.get_annotations('task').map(&:load)
fields.each do |field|
annotation = annotations.find { |a| a.team_task_id == field.id }
answer = (annotation ? (begin annotation.first_response_obj.file_data[:file_urls].join("\n") rescue annotation.first_response.to_s end) : '')
answer = begin JSON.parse(answer).collect{ |x| x['url'] }.join(', ') rescue answer end
row << answer
end
data << row
end
data << row

offset += page_size
end

data
end

Expand Down
Loading