From 3b2dfc3bea26db5e1ed3e4c4e3478ff6ed8380ad Mon Sep 17 00:00:00 2001 From: Benjamin Kiah Stroud <32469930+bkiahstroud@users.noreply.github.com> Date: Tue, 23 Jul 2024 20:46:10 -0700 Subject: [PATCH 1/8] script to download, extract, and upload IA files to S3 --- .gitignore | 2 ++ bin/ia_to_s3_migrator | 25 +++++++++++++++++++++++++ 2 files changed, 27 insertions(+) create mode 100755 bin/ia_to_s3_migrator diff --git a/.gitignore b/.gitignore index c3f1151..8430916 100644 --- a/.gitignore +++ b/.gitignore @@ -5,3 +5,5 @@ !.env.development !.env.test *~undo-tree~ +tmp +log diff --git a/bin/ia_to_s3_migrator b/bin/ia_to_s3_migrator new file mode 100755 index 0000000..e7ca8a7 --- /dev/null +++ b/bin/ia_to_s3_migrator @@ -0,0 +1,25 @@ +#!/usr/bin/env ruby + +# Usage: bundle exec bin/ia_to_s3_migrator + +require 'ruby-progressbar' +require_relative '../lib/space_stone' + +puts '== Tail log/ia_to_s3_migrator.log for logs ==' + +iaids = File.read('tmp/iaids.txt').split("\n") +logger = Logger.new('log/ia_to_s3_migrator.log') +progressbar = ProgressBar.create(total: iaids.size, format: '%a %e %P% Processed: %c from %C') +s3_bucket = SpaceStone::S3Service.bucket + +iaids.each do |iaid| + # WARN: This dumbly checks for any downloads; if some files have been uploaded + # but some haven't, it will skip uploading all of them + if s3_bucket.objects(prefix: "#{iaid}/downloads").any? + logger.warn("== #{iaid} == Files have already been uploaded to S3, skipping") + else + logger.info(" == #{iaid} == Downloading files...") + process_ia_id(iaid, '/store/tmp/fast-tmp') + end + progressbar.increment +end From 56a31d8973d711048cd6e37b98dbc63e8119824e Mon Sep 17 00:00:00 2001 From: Benjamin Kiah Stroud <32469930+bkiahstroud@users.noreply.github.com> Date: Tue, 23 Jul 2024 21:26:46 -0700 Subject: [PATCH 2/8] fix entrypoint and usage comment --- bin/ia_to_s3_migrator | 2 +- docker-compose.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/bin/ia_to_s3_migrator b/bin/ia_to_s3_migrator index e7ca8a7..72bf6f5 100755 --- a/bin/ia_to_s3_migrator +++ b/bin/ia_to_s3_migrator @@ -1,6 +1,6 @@ #!/usr/bin/env ruby -# Usage: bundle exec bin/ia_to_s3_migrator +# Usage: STAGE_ENV=production docker compose run web 'bundle exec bin/ia_to_s3_migrator' require 'ruby-progressbar' require_relative '../lib/space_stone' diff --git a/docker-compose.yml b/docker-compose.yml index aeac80c..6ab678e 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -16,6 +16,6 @@ services: - .:/var/task:delegated - /var/run/docker.sock:/var/run/docker.sock command: "sam local start-api --host '0.0.0.0' --port 3030 --docker-volume-basedir ${PWD}" - entrypoint: /bin/bash + entrypoint: /bin/bash -c ports: - 3030:3030 From 5479f7ae872b6821ef5a4572b3d480b9f579e31b Mon Sep 17 00:00:00 2001 From: Benjamin Kiah Stroud <32469930+bkiahstroud@users.noreply.github.com> Date: Wed, 24 Jul 2024 08:25:48 -0700 Subject: [PATCH 3/8] add convenience script --- bin/migrate_iaids | 4 ++++ 1 file changed, 4 insertions(+) create mode 100755 bin/migrate_iaids diff --git a/bin/migrate_iaids b/bin/migrate_iaids new file mode 100755 index 0000000..88fd20b --- /dev/null +++ b/bin/migrate_iaids @@ -0,0 +1,4 @@ +#!/bin/bash +set -e + +STAGE_ENV=production docker compose run web 'bundle exec bin/ia_to_s3_migrator' From d44d31c522314d53286b387c8dc64e6324567e7d Mon Sep 17 00:00:00 2001 From: Benjamin Kiah Stroud <32469930+bkiahstroud@users.noreply.github.com> Date: Wed, 24 Jul 2024 09:02:59 -0700 Subject: [PATCH 4/8] mount correct volume On the prod server, this file path automatically gets cleaned up by a crontab job --- bin/migrate_iaids | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bin/migrate_iaids b/bin/migrate_iaids index 88fd20b..0746caa 100755 --- a/bin/migrate_iaids +++ b/bin/migrate_iaids @@ -1,4 +1,4 @@ #!/bin/bash set -e -STAGE_ENV=production docker compose run web 'bundle exec bin/ia_to_s3_migrator' +STAGE_ENV=production docker compose run --volume /store/tmp/fast-tmp:/store/tmp/fast-tmp web 'bundle exec bin/ia_to_s3_migrator' From 46794ce69853ca9928540e52ced64d293a1f47ab Mon Sep 17 00:00:00 2001 From: Benjamin Kiah Stroud <32469930+bkiahstroud@users.noreply.github.com> Date: Wed, 24 Jul 2024 10:19:55 -0700 Subject: [PATCH 5/8] pass credentials to SQS if they exist --- bin/migrate_iaids | 7 ++++++- lib/space_stone/sqs_service.rb | 6 +++++- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/bin/migrate_iaids b/bin/migrate_iaids index 0746caa..89f88e1 100755 --- a/bin/migrate_iaids +++ b/bin/migrate_iaids @@ -1,4 +1,9 @@ #!/bin/bash set -e -STAGE_ENV=production docker compose run --volume /store/tmp/fast-tmp:/store/tmp/fast-tmp web 'bundle exec bin/ia_to_s3_migrator' +STAGE_ENV=production docker compose run \ + --rm \ + --remove-orphans \ + --volume /store/tmp/fast-tmp:/store/tmp/fast-tmp \ + web \ + 'bundle exec bin/ia_to_s3_migrator' diff --git a/lib/space_stone/sqs_service.rb b/lib/space_stone/sqs_service.rb index 8687eea..c65cd78 100644 --- a/lib/space_stone/sqs_service.rb +++ b/lib/space_stone/sqs_service.rb @@ -6,7 +6,11 @@ module SpaceStone # Service object to add messages to either sqs queue module SqsService def client - @client ||= Aws::SQS::Client.new(region: 'us-east-2') + @client ||= if ENV.fetch('AWS_S3_ACCESS_KEY_ID', nil) + Aws::SQS::Client.new(region: 'us-east-2', credentials: Aws::Credentials.new(ENV.fetch('AWS_S3_ACCESS_KEY_ID'), ENV.fetch('AWS_S3_SECRET_ACCESS_KEY'))) + else + Aws::SQS::Client.new(region: 'us-east-2') + end end def ocr_queue_url From 5edf3d7a00ec59fd038351892df91d6175ba3e06 Mon Sep 17 00:00:00 2001 From: Benjamin Kiah Stroud <32469930+bkiahstroud@users.noreply.github.com> Date: Wed, 24 Jul 2024 10:38:33 -0700 Subject: [PATCH 6/8] update usage comment --- bin/ia_to_s3_migrator | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bin/ia_to_s3_migrator b/bin/ia_to_s3_migrator index 72bf6f5..68f4ec4 100755 --- a/bin/ia_to_s3_migrator +++ b/bin/ia_to_s3_migrator @@ -1,6 +1,6 @@ #!/usr/bin/env ruby -# Usage: STAGE_ENV=production docker compose run web 'bundle exec bin/ia_to_s3_migrator' +# Usage: bin/migrate_iaids require 'ruby-progressbar' require_relative '../lib/space_stone' From e71c8d50410f2aab02cc5542433987f97a280479 Mon Sep 17 00:00:00 2001 From: Benjamin Kiah Stroud <32469930+bkiahstroud@users.noreply.github.com> Date: Tue, 1 Oct 2024 10:27:55 -0700 Subject: [PATCH 7/8] do not deploy logs to AWS Lambda If the files are large enough, they can exceed the file size cap of the Lambda, causing the deploy to fail. Logs are not necessary for the Lambda to run --- bin/build | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/bin/build b/bin/build index 411addb..d32816a 100755 --- a/bin/build +++ b/bin/build @@ -23,6 +23,7 @@ rm -rf \ test \ tmp \ .ruby-lsp \ + log \ vendor/bundle/ruby/3.*/cache find . -iname "*~undo-tree~" -delete popd @@ -38,7 +39,8 @@ rm -rf \ README.md \ test \ tmp \ - .ruby-lsp \ + .ruby-lsp \ + log \ vendor/bundle/ruby/3.*/cache find . -iname "*~undo-tree~" -delete popd @@ -54,7 +56,8 @@ rm -rf \ README.md \ test \ tmp \ - .ruby-lsp \ + .ruby-lsp \ + log \ vendor/bundle/ruby/3.*/cache find . -iname "*~undo-tree~" -delete popd From 2a8b62bd41cf13df38d9849269c1e05853a3edcc Mon Sep 17 00:00:00 2001 From: Benjamin Kiah Stroud <32469930+bkiahstroud@users.noreply.github.com> Date: Tue, 1 Oct 2024 10:43:50 -0700 Subject: [PATCH 8/8] remove extra whitespace --- bin/ia_to_s3_migrator | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bin/ia_to_s3_migrator b/bin/ia_to_s3_migrator index 68f4ec4..9946c8c 100755 --- a/bin/ia_to_s3_migrator +++ b/bin/ia_to_s3_migrator @@ -18,7 +18,7 @@ iaids.each do |iaid| if s3_bucket.objects(prefix: "#{iaid}/downloads").any? logger.warn("== #{iaid} == Files have already been uploaded to S3, skipping") else - logger.info(" == #{iaid} == Downloading files...") + logger.info("== #{iaid} == Downloading files...") process_ia_id(iaid, '/store/tmp/fast-tmp') end progressbar.increment