From f3501d99809d7efa4c3a11e92a2883ea9e472d40 Mon Sep 17 00:00:00 2001
From: Caio <117518+caiosba@users.noreply.github.com>
Date: Thu, 5 Sep 2024 01:40:39 -0300
Subject: [PATCH 1/7] Be able to export a full list of media clusters.

The current export limit for media lists is 10.000 because this is the maximum size of a result window in ElasticSearch. The solution is to paginate the results.

Fixes: CV2-5205.
---
 lib/check_search.rb | 57 ++++++++++++++++++++++++++-------------------
 1 file changed, 33 insertions(+), 24 deletions(-)

diff --git a/lib/check_search.rb b/lib/check_search.rb
index ce8746209..b156e3f40 100644
--- a/lib/check_search.rb
+++ b/lib/check_search.rb
@@ -335,7 +335,6 @@ def medias_get_search_result(query)
 
   def self.get_exported_data(query, team_id)
     team = Team.find(team_id)
-    search = CheckSearch.new(query, nil, team_id)
 
     # Prepare the export
     data = []
@@ -344,31 +343,41 @@ def self.get_exported_data(query, team_id)
     fields.each { |tt| header << tt.label }
     data << header
 
-    # No pagination for the export
-    search.set_option('esoffset', 0)
-    search.set_option('eslimit', CheckConfig.get(:export_csv_maximum_number_of_results, 10000, :integer))
-
-    # Iterate through each result and generate an output row for the CSV
-    search.medias.find_each do |pm|
-      row = [
-        pm.claim_description&.description,
-        pm.full_url,
-        pm.status_i18n,
-        pm.author_name.to_s.gsub(/ \[.*\]$/, ''),
-        pm.created_at.strftime("%Y-%m-%d %H:%M:%S"),
-        pm.published_at&.strftime("%Y-%m-%d %H:%M:%S"),
-        pm.linked_items_count,
-        pm.tags_as_sentence
-      ]
-      annotations = pm.get_annotations('task').map(&:load)
-      fields.each do |field|
-        annotation = annotations.find { |a| a.team_task_id == field.id }
-        answer = (annotation ? (begin annotation.first_response_obj.file_data[:file_urls].join("\n") rescue annotation.first_response.to_s end) : '')
-        answer = begin JSON.parse(answer).collect{ |x| x['url'] }.join(', ') rescue answer end
-        row << answer
+    # Paginate
+    page_size = 10000
+    search = CheckSearch.new(query, nil, team_id)
+    total = search.number_of_results
+    offset = 0
+    while offset < total
+      search = CheckSearch.new(query, nil, team_id)
+      search.set_option('eslimit', page_size)
+      search.set_option('esoffset', offset)
+
+      # Iterate through each result and generate an output row for the CSV
+      search.medias.find_each do |pm|
+        row = [
+          pm.claim_description&.description,
+          pm.full_url,
+          pm.status_i18n,
+          pm.author_name.to_s.gsub(/ \[.*\]$/, ''),
+          pm.created_at.strftime("%Y-%m-%d %H:%M:%S"),
+          pm.published_at&.strftime("%Y-%m-%d %H:%M:%S"),
+          pm.linked_items_count,
+          pm.tags_as_sentence
+        ]
+        annotations = pm.get_annotations('task').map(&:load)
+        fields.each do |field|
+          annotation = annotations.find { |a| a.team_task_id == field.id }
+          answer = (annotation ? (begin annotation.first_response_obj.file_data[:file_urls].join("\n") rescue annotation.first_response.to_s end) : '')
+          answer = begin JSON.parse(answer).collect{ |x| x['url'] }.join(', ') rescue answer end
+          row << answer
+        end
+        data << row
       end
-      data << row
+
+      offset += page_size
     end
+
     data
   end
 

From 35061bcb6ab252ce6ebd59af4664b5389ebda9f5 Mon Sep 17 00:00:00 2001
From: Caio <117518+caiosba@users.noreply.github.com>
Date: Sun, 8 Sep 2024 22:01:44 -0300
Subject: [PATCH 2/7] Applying code review, which now supports more than 10.000
 results too

---
 lib/check_search.rb          | 17 ++++++++---------
 test/lib/list_export_test.rb | 13 +++++++++----
 2 files changed, 17 insertions(+), 13 deletions(-)

diff --git a/lib/check_search.rb b/lib/check_search.rb
index b156e3f40..e4debbfe2 100644
--- a/lib/check_search.rb
+++ b/lib/check_search.rb
@@ -335,6 +335,7 @@ def medias_get_search_result(query)
 
   def self.get_exported_data(query, team_id)
     team = Team.find(team_id)
+    Team.current = team
 
     # Prepare the export
     data = []
@@ -344,17 +345,15 @@ def self.get_exported_data(query, team_id)
     data << header
 
     # Paginate
-    page_size = 10000
     search = CheckSearch.new(query, nil, team_id)
-    total = search.number_of_results
-    offset = 0
-    while offset < total
-      search = CheckSearch.new(query, nil, team_id)
-      search.set_option('eslimit', page_size)
-      search.set_option('esoffset', offset)
+    search_after = [0]
+    while true
+      result = $repository.search(_source: 'annotated_id', query: search.medias_query, sort: [{ annotated_id: { order: :asc } }], size: 10000, search_after: search_after).results
+      ids = result.collect{ |i| i['annotated_id'] }.uniq.map(&:to_i)
+      break if ids.empty?
 
       # Iterate through each result and generate an output row for the CSV
-      search.medias.find_each do |pm|
+      ProjectMedia.where(id: ids, team_id: search.team_condition(team_id)).find_each do |pm|
         row = [
           pm.claim_description&.description,
           pm.full_url,
@@ -375,7 +374,7 @@ def self.get_exported_data(query, team_id)
         data << row
       end
 
-      offset += page_size
+      search_after = [ids.max]
     end
 
     data
diff --git a/test/lib/list_export_test.rb b/test/lib/list_export_test.rb
index 668797d6e..dfff4e524 100644
--- a/test/lib/list_export_test.rb
+++ b/test/lib/list_export_test.rb
@@ -26,18 +26,23 @@ def teardown
     end
   end
 
-  test "should export media CSV" do
+  test "should export media (including child media) CSV" do
+    setup_elasticsearch
     t = create_team
     create_team_task team_id: t.id, fieldset: 'tasks'
-    2.times { create_project_media team: t }
+    parent = create_project_media team: t, disable_es_callbacks: false
+    child = create_project_media team: t, disable_es_callbacks: false
+    create_relationship source_id: parent.id, target_id: child.id, relationship_type: Relationship.confirmed_type
 
-    export = ListExport.new(:media, '{}', t.id)
+    sleep 2 # Wait for indexing
+
+    export = ListExport.new(:media, { show_similar: true }.to_json, t.id)
     csv_url = export.generate_csv_and_send_email(create_user)
     response = Net::HTTP.get_response(URI(csv_url))
     assert_equal 200, response.code.to_i
     csv_content = CSV.parse(response.body, headers: true)
-    assert_equal 2, csv_content.size
     assert_equal 2, export.number_of_rows
+    assert_equal 2, csv_content.size
   end
 
   test "should export feed CSV" do

From 55bbfb4d235086003da9cd9d897cf771238be99a Mon Sep 17 00:00:00 2001
From: Caio <117518+caiosba@users.noreply.github.com>
Date: Mon, 9 Sep 2024 10:33:43 -0300
Subject: [PATCH 3/7] Fixing CC issue

---
 lib/check_search.rb | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/lib/check_search.rb b/lib/check_search.rb
index 86e3c045a..57fa535ce 100644
--- a/lib/check_search.rb
+++ b/lib/check_search.rb
@@ -347,10 +347,9 @@ def self.get_exported_data(query, team_id)
     # Paginate
     search = CheckSearch.new(query, nil, team_id)
     search_after = [0]
-    while true
+    while !search_after.empty?
       result = $repository.search(_source: 'annotated_id', query: search.medias_query, sort: [{ annotated_id: { order: :asc } }], size: 10000, search_after: search_after).results
-      ids = result.collect{ |i| i['annotated_id'] }.uniq.map(&:to_i)
-      break if ids.empty?
+      ids = result.collect{ |i| i['annotated_id'] }.uniq.compact.map(&:to_i)
 
       # Iterate through each result and generate an output row for the CSV
       ProjectMedia.where(id: ids, team_id: search.team_condition(team_id)).find_each do |pm|
@@ -374,7 +373,7 @@ def self.get_exported_data(query, team_id)
         data << row
       end
 
-      search_after = [ids.max]
+      search_after = [ids.max].compact
     end
 
     data

From c1d5047339bfc9d063f98dddc5ec0ee8bbc42948 Mon Sep 17 00:00:00 2001
From: Caio <117518+caiosba@users.noreply.github.com>
Date: Mon, 9 Sep 2024 11:29:51 -0300
Subject: [PATCH 4/7] Fixing export for feeds sharing only fact-checks

---
 lib/check_search.rb | 63 +++++++++++++++++++++++++++++++--------------
 1 file changed, 43 insertions(+), 20 deletions(-)

diff --git a/lib/check_search.rb b/lib/check_search.rb
index 57fa535ce..d9fbb18b2 100644
--- a/lib/check_search.rb
+++ b/lib/check_search.rb
@@ -89,6 +89,10 @@ def team
     Team.find_by_id(team_id)
   end
 
+  def feed
+    @feed
+  end
+
   def teams
     []
   end
@@ -336,16 +340,22 @@ def medias_get_search_result(query)
   def self.get_exported_data(query, team_id)
     team = Team.find(team_id)
     Team.current = team
+    search = CheckSearch.new(query, nil, team_id)
+    feed_sharing_only_fact_checks = (search.feed && search.feed.data_points == [1])
 
     # Prepare the export
     data = []
-    header = ['Claim', 'Item page URL', 'Status', 'Created by', 'Submitted at', 'Published at', 'Number of media', 'Tags']
-    fields = team.team_tasks.sort
-    fields.each { |tt| header << tt.label }
+    header = nil
+    if feed_sharing_only_fact_checks
+      header = ['Fact-check title', 'Fact-check summary', 'Fact-check URL', 'Tags', 'Workspace', 'Updated at', 'Rating']
+    else
+      header = ['Claim', 'Item page URL', 'Status', 'Created by', 'Submitted at', 'Published at', 'Number of media', 'Tags']
+      fields = team.team_tasks.sort
+      fields.each { |tt| header << tt.label }
+    end
     data << header
 
     # Paginate
-    search = CheckSearch.new(query, nil, team_id)
     search_after = [0]
     while !search_after.empty?
       result = $repository.search(_source: 'annotated_id', query: search.medias_query, sort: [{ annotated_id: { order: :asc } }], size: 10000, search_after: search_after).results
@@ -353,22 +363,35 @@ def self.get_exported_data(query, team_id)
 
       # Iterate through each result and generate an output row for the CSV
       ProjectMedia.where(id: ids, team_id: search.team_condition(team_id)).find_each do |pm|
-        row = [
-          pm.claim_description&.description,
-          pm.full_url,
-          pm.status_i18n,
-          pm.author_name.to_s.gsub(/ \[.*\]$/, ''),
-          pm.created_at.strftime("%Y-%m-%d %H:%M:%S"),
-          pm.published_at&.strftime("%Y-%m-%d %H:%M:%S"),
-          pm.linked_items_count,
-          pm.tags_as_sentence
-        ]
-        annotations = pm.get_annotations('task').map(&:load)
-        fields.each do |field|
-          annotation = annotations.find { |a| a.team_task_id == field.id }
-          answer = (annotation ? (begin annotation.first_response_obj.file_data[:file_urls].join("\n") rescue annotation.first_response.to_s end) : '')
-          answer = begin JSON.parse(answer).collect{ |x| x['url'] }.join(', ') rescue answer end
-          row << answer
+        row = nil
+        if feed_sharing_only_fact_checks
+          row = [
+            pm.fact_check_title,
+            pm.fact_check_summary,
+            pm.fact_check_url,
+            pm.tags_as_sentence,
+            pm.team_name,
+            pm.updated_at_timestamp,
+            pm.status
+          ]
+        else
+          row = [
+            pm.claim_description&.description,
+            pm.full_url,
+            pm.status_i18n,
+            pm.author_name.to_s.gsub(/ \[.*\]$/, ''),
+            pm.created_at.strftime("%Y-%m-%d %H:%M:%S"),
+            pm.published_at&.strftime("%Y-%m-%d %H:%M:%S"),
+            pm.linked_items_count,
+            pm.tags_as_sentence
+          ]
+          annotations = pm.get_annotations('task').map(&:load)
+          fields.each do |field|
+            annotation = annotations.find { |a| a.team_task_id == field.id }
+            answer = (annotation ? (begin annotation.first_response_obj.file_data[:file_urls].join("\n") rescue annotation.first_response.to_s end) : '')
+            answer = begin JSON.parse(answer).collect{ |x| x['url'] }.join(', ') rescue answer end
+            row << answer
+          end
         end
         data << row
       end

From 14cbefa78d3defcfc64a0c033032a231b263d427 Mon Sep 17 00:00:00 2001
From: Caio <117518+caiosba@users.noreply.github.com>
Date: Mon, 9 Sep 2024 17:34:38 -0300
Subject: [PATCH 5/7] Adding test for new case

---
 test/lib/list_export_test.rb | 28 +++++++++++++++++++++++++++-
 1 file changed, 27 insertions(+), 1 deletion(-)

diff --git a/test/lib/list_export_test.rb b/test/lib/list_export_test.rb
index dfff4e524..e850977bb 100644
--- a/test/lib/list_export_test.rb
+++ b/test/lib/list_export_test.rb
@@ -45,7 +45,7 @@ def teardown
     assert_equal 2, csv_content.size
   end
 
-  test "should export feed CSV" do
+  test "should export media feed CSV" do
     t = create_team
     f = create_feed team: t
     2.times { f.clusters << create_cluster }
@@ -59,6 +59,32 @@ def teardown
     assert_equal 2, export.number_of_rows
   end
 
+  test "should export fact-check feed CSV" do
+    setup_elasticsearch
+    RequestStore.store[:skip_cached_field_update] = false
+    t = create_team
+    2.times do
+      pm = create_project_media team: t, disable_es_callbacks: false
+      r = publish_report(pm, {}, nil, { language: 'en', use_visual_card: false })
+      r = Dynamic.find(r.id)
+      r.disable_es_callbacks = false
+      r.set_fields = { state: 'published' }.to_json
+      r.save!
+    end
+    ss = create_saved_search team: t
+    f = create_feed team: t, data_points: [1], saved_search: ss, published: true
+
+    sleep 2 # Wait for indexing
+
+    export = ListExport.new(:media, { feed_id: f.id, feed_view: 'fact_check' }.to_json, t.id)
+    csv_url = export.generate_csv_and_send_email(create_user)
+    response = Net::HTTP.get_response(URI(csv_url))
+    assert_equal 200, response.code.to_i
+    csv_content = CSV.parse(response.body, headers: true)
+    assert_equal 2, export.number_of_rows
+    assert_equal 2, csv_content.size
+  end
+
   test "should export fact-checks CSV" do
     t = create_team
     2.times do

From dff87af62ac188dfb50e8f82ce3bd8bf2d97700c Mon Sep 17 00:00:00 2001
From: Caio <117518+caiosba@users.noreply.github.com>
Date: Mon, 9 Sep 2024 17:38:28 -0300
Subject: [PATCH 6/7] Updating Code Climate

---
 .codeclimate.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.codeclimate.yml b/.codeclimate.yml
index 773f85a55..ff19a8b13 100644
--- a/.codeclimate.yml
+++ b/.codeclimate.yml
@@ -10,7 +10,7 @@ checks:
     enabled: false
   method-complexity:
     config:
-      threshold: 22
+      threshold: 25
   method-count:
     config:
       threshold: 65

From 896e08a5a8e43ea0e7ee1893ccad1e72c743179d Mon Sep 17 00:00:00 2001
From: Caio <117518+caiosba@users.noreply.github.com>
Date: Mon, 9 Sep 2024 20:42:57 -0300
Subject: [PATCH 7/7] Fixing test

---
 test/lib/list_export_test.rb | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/test/lib/list_export_test.rb b/test/lib/list_export_test.rb
index e850977bb..15551ba12 100644
--- a/test/lib/list_export_test.rb
+++ b/test/lib/list_export_test.rb
@@ -62,6 +62,10 @@ def teardown
   test "should export fact-check feed CSV" do
     setup_elasticsearch
     RequestStore.store[:skip_cached_field_update] = false
+
+    pender_url = CheckConfig.get('pender_url_private')
+    WebMock.stub_request(:get, /#{pender_url}/).to_return(body: '{}', status: 200)
+
     t = create_team
     2.times do
       pm = create_project_media team: t, disable_es_callbacks: false