Skip to content

Commit

Permalink
[WIP] Ticket CV2-5067: Export data as a CSV saved in S3 using a pre-s…
Browse files Browse the repository at this point in the history
…igned URL and that expires after X days
  • Loading branch information
caiosba committed Aug 18, 2024
1 parent e54e250 commit e79c18b
Show file tree
Hide file tree
Showing 4 changed files with 48 additions and 12 deletions.
11 changes: 9 additions & 2 deletions config/config.yml.example
Original file line number Diff line number Diff line change
Expand Up @@ -262,7 +262,7 @@ development: &default
otel_traces_sampler:
otel_custom_sampling_rate:

# Rate limits for tiplines
# Limits
#
# OPTIONAL
# When not set, default values are used.
Expand All @@ -274,9 +274,16 @@ development: &default
devise_unlock_accounts_after: 1
login_rate_limit: 10
api_rate_limit: 100
export_csv_maximum_number_of_results: 10000
export_csv_expire: 604800 # Seconds: Default is 7 days

# Session
#
# OPTIONAL
# When not set, default values are used.
#
session_store_key: '_checkdesk_session_dev'
session_store_domain: 'localhost'
export_csv_maximum_number_of_results: 10000
test:
<<: *default
checkdesk_base_url_private: http://api:3000
Expand Down
9 changes: 9 additions & 0 deletions lib/check_s3.rb
Original file line number Diff line number Diff line change
Expand Up @@ -65,4 +65,13 @@ def self.delete(*paths)
client = Aws::S3::Client.new
client.delete_objects(bucket: CheckConfig.get('storage_bucket'), delete: { objects: objects })
end

def self.write_presigned(path, content_type, content)
self.write(path, content_type, content)
bucket = CheckConfig.get('storage_bucket')
client = Aws::S3::Client.new
s3 = Aws::S3::Resource.new(client: client)
obj = s3.bucket(bucket).object(path)
obj.presigned_url(:get, expires_in: CheckConfig.get('export_csv_expire', 7.days.to_i, :integer))
end
end
20 changes: 14 additions & 6 deletions lib/check_search.rb
Original file line number Diff line number Diff line change
Expand Up @@ -338,16 +338,17 @@ def self.export_to_csv(query, team_id)
search = CheckSearch.new(query, nil, team_id)

# Prepare the export
csv_file_path = File.join(Rails.root, 'tmp', "items-export-#{Time.now.to_i}-#{Digest::MD5.hexdigest(query)}.csv")
csv = File.open(csv_file_path, 'w+')
data = []
header = ['Claim', 'Item page URL', 'Status', 'Created by', 'Submitted at', 'Published at', 'Number of media', 'Tags']
fields = team.team_tasks.sort
fields.each { |tt| header << tt.label }
csv.puts(header.collect{ |x| '"' + x.to_s.gsub('"', '') + '"' }.join(','))
data << header

# No pagination for the export
search.set_option('esoffset', 0)
search.set_option('eslimit', CheckConfig.get(:export_csv_maximum_number_of_results, 10000, :integer))

# Iterate through each result and generate an output row for the CSV
search.medias.find_each do |pm|
row = [
pm.claim_description&.description,
Expand All @@ -366,11 +367,18 @@ def self.export_to_csv(query, team_id)
answer = begin JSON.parse(answer).collect{ |x| x['url'] }.join(', ') rescue answer end
row << answer
end
csv.puts(row.collect{ |x| '"' + x.to_s.gsub('"', '') + '"' }.join(','))
data << row
end

# Convert to CSV
csv_string = CSV.generate do |csv|
data.each do |row|
csv << row
end
end

csv.close
csv_file_path
# Save to S3
CheckS3.write_presigned("export/item/#{team.slug}/#{Time.now.to_i}/#{Digest::MD5.hexdigest(query)}.csv", 'text/csv', csv_string)
end

private
Expand Down
20 changes: 16 additions & 4 deletions test/lib/check_search_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,25 @@ def setup
def teardown
end

test "should export CSV" do
test "should export CSV and expire it" do
t = create_team
create_team_task team_id: t.id, fieldset: 'tasks'
pm1 = create_project_media team: t
pm2 = create_project_media team: t
csv = CheckSearch.export_to_csv('{}', t.id)
assert File.exist?(csv)
assert_equal 3, File.readlines(csv).size # One line is for the header

stub_configs({ 'export_csv_expire' => 2 }) do

# Generate a CSV with the two exported items
csv_url = CheckSearch.export_to_csv('{}', t.id)
response = Net::HTTP.get_response(URI(csv_url))
assert_equal 200, response.code.to_i
csv_content = CSV.parse(response.body, headers: true)
assert_equal 2, csv_content.size

# Make sure it expires after 2 seconds
sleep 3 # Just to be safe
response = Net::HTTP.get_response(URI(csv_url))
assert_equal 403, response.code.to_i
end
end
end

0 comments on commit e79c18b

Please sign in to comment.