diff --git a/lib/derivative_rodeo/generators/pdf_split_generator.rb b/lib/derivative_rodeo/generators/pdf_split_generator.rb index 87d019b..4281678 100644 --- a/lib/derivative_rodeo/generators/pdf_split_generator.rb +++ b/lib/derivative_rodeo/generators/pdf_split_generator.rb @@ -30,9 +30,7 @@ class PdfSplitGenerator < BaseGenerator # # @see #existing_page_locations def image_file_basename_template(basename:) - # We can do this because the temp files are always local; and we'll need to modify how we - # write these files. - "pages/#{basename}-%d.#{output_extension}" + "#{basename}/pages/#{basename}-%d.#{output_extension}" end ## @@ -45,10 +43,12 @@ def image_file_basename_template(basename:) # with :tail_glob. # # @note There is relation to {Generators::BaseGenerator#destination} and this method. + # + # @note The tail_glob is in relation to the {#image_file_basename_template} def existing_page_locations(input_location:) - # TODO: Are we adequately accounting for the directory structure necessary to have a work have - # more than one PDF and then split each PDF's pages into the correct sub directory? - tail_glob = "pages/*.#{output_extension}" + # See image_file_basename_template + tail_glob = "#{input_location.file_basename}/pages/*.#{output_extension}" + output_locations = input_location.derived_file_from(template: output_location_template).globbed_tail_locations(tail_glob: tail_glob) return output_locations if output_locations.count.positive? @@ -69,7 +69,12 @@ def existing_page_locations(input_location:) # @yieldparam image_location [StorageLocations::FileLocation] the file and adapter logic. # @yieldparam image_path [String] where to find this file in the tmp space # + # @note This function makes a concession; namely that if it encounters any + # {#existing_page_locations} it will use all of that result as the entire number of pages. + # We could make this smarter but at the moment we're deferring on that. + # # @see BaseGenerator#with_each_requisite_location_and_tmp_file_path for further discussion + # # rubocop:disable Metrics/MethodLength def with_each_requisite_location_and_tmp_file_path input_files.each do |input_location| diff --git a/spec/derivative_rodeo/generators/pdf_split_generator_spec.rb b/spec/derivative_rodeo/generators/pdf_split_generator_spec.rb index 595a8ec..7fe7f13 100644 --- a/spec/derivative_rodeo/generators/pdf_split_generator_spec.rb +++ b/spec/derivative_rodeo/generators/pdf_split_generator_spec.rb @@ -17,6 +17,33 @@ end describe '#generated_files' do + context 'when given an already split PDF' do + it 'uses the already split components' do + Fixtures.with_file_uris_for("minimal-2-page.pdf") do |input_uris| + Fixtures.with_temporary_directory do |output_temporary_path| + output_location_template = "file://#{output_temporary_path}/{{ dir_parts[-1..-1] }}/{{ filename }}" + instance = described_class.new(input_uris: input_uris, output_location_template: output_location_template) + output_location = DerivativeRodeo::StorageLocations::FileLocation.build(from_uri: input_uris.first, template: output_location_template) + + # Let's fake a nice TIFF being in a pre-processed location. + pre_existing_tiff_path = File.join(output_location.file_dir, output_location.file_basename, "pages/1.tiff") + FileUtils.mkdir_p(File.dirname(pre_existing_tiff_path)) + File.open(pre_existing_tiff_path, "w+") do |f| + f.puts "🤠🐮🐴 A muppet man parading as a TIFF." + end + + generated_files = instance.generated_files + # TODO: The PDF is two pages yet we only check for the presence of one + # or more derived files; hence our faked pre-processed derivative is all that we find. + expect(generated_files.size).to eq(1) + + # Ensuring that we do in fact have the pre-made file. + expect(File.read(generated_files.first.file_path)).to start_with("🤠🐮🐴") + end + end + end + end + context 'when given a PDF to split' do it 'will create one image per page, writing that to the storage adapter, and then enqueue each page for processing' do generated_files = nil diff --git a/spec/derivative_rodeo/storage_locations/s3_location_spec.rb b/spec/derivative_rodeo/storage_locations/s3_location_spec.rb index a13ba9b..2785f99 100644 --- a/spec/derivative_rodeo/storage_locations/s3_location_spec.rb +++ b/spec/derivative_rodeo/storage_locations/s3_location_spec.rb @@ -66,11 +66,9 @@ describe '#globbed_tail_locations' do it 'searched the bucket' do - # The subject's bucket is not the same as the above bucket - subject.bucket = bucket basename_ish = short_path.split(".").first key = File.join(basename_ish, File.basename(__FILE__)) - bucket.object(key).upload_file(__FILE__) + subject.bucket.object(key).upload_file(__FILE__) subject.globbed_tail_locations(tail_glob: "*.rb") end