diff --git a/lib/space_stone.rb b/lib/space_stone.rb index 29a1333..94a6301 100644 --- a/lib/space_stone.rb +++ b/lib/space_stone.rb @@ -10,15 +10,16 @@ require_relative './space_stone/sqs_service' # Invokers -def download(event:, context:) # rubocop:disable Lint/UnusedMethodArgument +# :download_dir must be a full path (i.e. starts with "/") and have no trailing slash +def download(event:, context:, download_dir: '/tmp') # rubocop:disable Lint/UnusedMethodArgument puts "event: #{event.inspect}" unless SpaceStone::Env.test? ia_ids = get_event_body(event: event) results = {} ia_ids.each do |ia_id| - jp2s = process_ia_id(ia_id.strip) - results[ia_id] = jp2s.map { |v| v.sub('/tmp/', '') } - puts %x{rm -rf /tmp/#{ia_id}} + jp2s = process_ia_id(ia_id.strip, download_dir) + results[ia_id] = jp2s.map { |v| v.sub("#{download_dir}/", '') } + puts %x{rm -rf #{download_dir}/#{ia_id}} end send_results(results) end @@ -57,16 +58,16 @@ def thumbnail(event:, context:) end # Helpers -def process_ia_id(ia_id) - FileUtils.mkdir_p("/tmp/#{ia_id}") +def process_ia_id(ia_id, download_dir) + FileUtils.mkdir_p("#{download_dir}/#{ia_id}") # download zip file - ia_download = SpaceStone::IaDownload.new(id: ia_id) + ia_download = SpaceStone::IaDownload.new(id: ia_id, base_path: download_dir) downloads = ia_download.download_jp2s downloads += ia_download.dataset_files downloads.each do |path| - SpaceStone::S3Service.upload(path) - SpaceStone::SqsService.add(message: path.sub('/tmp/', ''), queue: 'ocr') if path.match(/jp2$/) - SpaceStone::SqsService.add(message: path.sub('/tmp/', ''), queue: 'thumbnail') if path.match(/jp2$/) + SpaceStone::S3Service.upload(path, download_dir) + SpaceStone::SqsService.add(message: path.sub("#{download_dir}/", ''), queue: 'ocr') if path.match(/jp2$/) + SpaceStone::SqsService.add(message: path.sub("#{download_dir}/", ''), queue: 'thumbnail') if path.match(/jp2$/) end end diff --git a/lib/space_stone/ia_download.rb b/lib/space_stone/ia_download.rb index 4c8c787..0b8342a 100644 --- a/lib/space_stone/ia_download.rb +++ b/lib/space_stone/ia_download.rb @@ -8,7 +8,7 @@ module SpaceStone # Download files from Internet Archive class IaDownload - attr_accessor :id + attr_accessor :id, :downloads_path def self.json_data return @json_data if @json_data @@ -31,8 +31,11 @@ def self.login_cookies @login_cookies = cookie_hash.to_cookie_string end - def initialize(id:) + # :base_path must be a full path (i.e. starts with "/") and have no trailing slash + def initialize(id:, base_path: '/tmp') @id = id + @downloads_path = "#{base_path}/#{id}/downloads" + FileUtils.mkdir_p @downloads_path end def login_cookies @@ -53,14 +56,6 @@ def remote_file_link @remote_file_link = url + jp2_zip_link end - def downloads_path - return @downloads_path if @downloads_path - - @downloads_path = "/tmp/#{id}/downloads" - FileUtils.mkdir_p @downloads_path - @downloads_path - end - def zip return @zip if @zip diff --git a/lib/space_stone/s3_service.rb b/lib/space_stone/s3_service.rb index cd1f220..740f42e 100644 --- a/lib/space_stone/s3_service.rb +++ b/lib/space_stone/s3_service.rb @@ -17,8 +17,8 @@ def bucket @bucket ||= resource.bucket(ENV.fetch('AWS_S3_BUCKET')) end - def upload(path) - obj = bucket.object(path.sub('/tmp/', '')) + def upload(path, download_dir = '/tmp') + obj = bucket.object(path.sub("#{download_dir}/", '')) puts "upload path #{path} - #{File.exist?(path)}" obj.upload_file(path) end