From 34de40bb1c3c3e10634665e9dbaef22a8bb2df63 Mon Sep 17 00:00:00 2001 From: Monique Rio Date: Fri, 15 Nov 2024 16:09:22 +0000 Subject: [PATCH] digifeeds: copy barcode to working directory cifs storage is unreliable, so first move the folder to an nfs working directory, then zip, then upload, the copy to a processed directory and then delete all the old stuff. --- bin/digifeeds/upload_to_s3.sh | 49 ++++++++++++++++++----- bin/digifeeds/upload_to_s3_test.sh | 62 ++++++++++++++++++++++++++++-- 2 files changed, 98 insertions(+), 13 deletions(-) diff --git a/bin/digifeeds/upload_to_s3.sh b/bin/digifeeds/upload_to_s3.sh index 82e4865..d92c2c6 100755 --- a/bin/digifeeds/upload_to_s3.sh +++ b/bin/digifeeds/upload_to_s3.sh @@ -21,6 +21,7 @@ if [[ $APP_ENV != "test" ]]; then # Variables contained in the config file: # # input_directory: path to the input directory + # working directory: path to nfs directory for doing the zipping and sending # processed_directory: path to the directory of processed files+% # digifeeds_bucket: rclone remote for the digifeeds bucket # @@ -33,6 +34,7 @@ if [[ $APP_ENV != "test" ]]; then fi if ! input_directory=${input_directory:?}; then exit 1; fi if ! processed_directory=${processed_directory:?}; then exit 1; fi +if ! working_directory=${working_directory:?}; then exit 1; fi if ! digifeeds_bucket=${digifeeds_bucket:?}; then exit 1; fi send_metrics=${send_metrics:-"true"} @@ -90,7 +92,7 @@ verify_image_order() { for arg in "${sorted[@]}"; do cnt=$((cnt + 1)) int=${arg:0:8} - [ $(( 10#$int )) != $cnt ] && return 1 + [ $((10#$int)) != $cnt ] && return 1 done return 0 } @@ -175,14 +177,23 @@ main() { for barcode_path in "${input_directory}"/*/; do local barcode barcode=$(basename "${barcode_path%%/}") + working_barcode_path="${working_directory}/$barcode" - log_info "Copying $barcode" + log_info "Processing $barcode" + + log_debug "Copying $barcode to working directory" + + if ! cp -r "$barcode_path" "$working_barcode_path"; then + log_error "Copying $barcode to working directory failed" + errors_total=$((errors_total + 1)) + continue + fi log_debug "Verifying image order $barcode" #8 digits, ends in .tif or .jp2 filter_regex='[[:digit:]]{8}\.tif$|[[:digit:]]{8}\.jp2$' local image_list - image_list=$(list_files "$barcode_path" | grep -E "$filter_regex") + image_list=$(list_files "$working_barcode_path" | grep -E "$filter_regex") if ! verify_image_order "$image_list"; then log_error "Image order incorrect for $barcode" image_order_errors_total=$((image_order_errors_total + 1)) @@ -191,21 +202,21 @@ main() { fi log_debug "Zipping $barcode" - if ! zip_it "$input_directory"/"$barcode"; then + if ! zip_it "$working_barcode_path"; then log_error "Failed to zip $barcode" errors_total=$((errors_total + 1)) continue fi log_debug "Verifying zip of $barcode" - if ! verify_zip "$input_directory"/"$barcode"; then + if ! verify_zip "$working_barcode_path"; then log_error "$barcode.zip does not contain the correct files" errors_total=$((errors_total + 1)) continue fi log_debug "Sending $barcode to S3" - if ! rclone copy "$input_directory"/"$barcode".zip "$digifeeds_bucket":; then + if ! rclone copy "$working_barcode_path".zip "$digifeeds_bucket":; then log_error "Failed to copy $barcode" upload_errors_total=$((upload_errors_total + 1)) errors_total=$((errors_total + 1)) @@ -213,7 +224,7 @@ main() { fi log_debug "Verifying barcode in S3" - if ! rclone check "$input_directory"/"$barcode".zip "$digifeeds_bucket":; then + if ! rclone check "$working_barcode_path".zip "$digifeeds_bucket":; then log_error "$barcode not found in S3" upload_errors_total=$((upload_errors_total + 1)) errors_total=$((errors_total + 1)) @@ -221,8 +232,28 @@ main() { fi log_info "Moving $barcode to processed" - mv "$input_directory"/"$barcode".zip "$processed_directory"/"${TIMESTAMP}"_"${barcode}".zip - mv "$input_directory"/"$barcode" "$processed_directory"/"${TIMESTAMP}"_"${barcode}" + log_debug "Copying ${barcode}.zip to processed" + if ! cp "$working_barcode_path".zip "$processed_directory"/"${TIMESTAMP}"_"${barcode}".zip; then + log_error "Failed to copy $barcode.zip to processed" + errors_total=$((errors_total + 1)) + continue + fi + + log_debug "Deleting ${working_barcode_path}.zip" + rm "$working_barcode_path".zip + + log_debug "Copying ${barcode} to processed" + if ! cp -r "$working_barcode_path" "$processed_directory"/"${TIMESTAMP}"_"${barcode}"; then + log_error "Failed to copy $barcode to processed" + errors_total=$((errors_total + 1)) + continue + fi + + log_debug "Deleting ${working_barcode_path}" + rm -r "$working_barcode_path" + log_debug "Deleting ${barcode_path}" + rm -r "$barcode_path" + files_processed_total=$((files_processed_total + 1)) done diff --git a/bin/digifeeds/upload_to_s3_test.sh b/bin/digifeeds/upload_to_s3_test.sh index 9657d20..5c1f333 100755 --- a/bin/digifeeds/upload_to_s3_test.sh +++ b/bin/digifeeds/upload_to_s3_test.sh @@ -12,6 +12,7 @@ setup() { mkdir $SCRATCH_PATH INPUT_DIR=$SCRATCH_PATH/input + WORKING_DIR=$SCRATCH_PATH/working PROCESSED_DIR=$SCRATCH_PATH/processed BARCODE_1="30000000189012" @@ -20,6 +21,7 @@ setup() { mkdir $INPUT_DIR mkdir $PROCESSED_DIR + mkdir $WORKING_DIR mkdir $INPUT_DIR/$BARCODE_1 touch $INPUT_DIR/$BARCODE_1/00000001.tif @@ -34,6 +36,7 @@ setup() { ## Config that's in main. export input_directory="$INPUT_DIR" export processed_directory="$PROCESSED_DIR" + export working_directory="$WORKING_DIR" export digifeeds_bucket="digifeeds_bucket" export timestamp=$TIMESTAMP export send_metrics="false" @@ -65,7 +68,7 @@ teardown() { @test "It Works" { shellmock new rclone shellmock config rclone 0 1:copy regex-3:^digifeeds_bucket: - shellmock config rclone 0 1:check regex-2:"$INPUT_DIR" regex-3:^digifeeds_bucket: + shellmock config rclone 0 1:check regex-2:"$WORKING_DIR" regex-3:^digifeeds_bucket: run $SUBJECT assert_success @@ -81,7 +84,7 @@ teardown() { @test "It filters the appropriate files" { shellmock new rclone shellmock config rclone 0 1:copy regex-3:^digifeeds_bucket: - shellmock config rclone 0 1:check regex-2:"$INPUT_DIR" regex-3:^digifeeds_bucket: + shellmock config rclone 0 1:check regex-2:"$WORKING_DIR" regex-3:^digifeeds_bucket: run $SUBJECT cd "$BATS_TEST_TMPDIR" @@ -118,7 +121,7 @@ teardown() { @test "Failed image order" { shellmock new rclone shellmock config rclone 0 1:copy regex-3:^digifeeds_bucket: - shellmock config rclone 0 1:check regex-2:"$INPUT_DIR" regex-3:^digifeeds_bucket: + shellmock config rclone 0 1:check regex-2:"$WORKING_DIR" regex-3:^digifeeds_bucket: touch "$INPUT_DIR"/"$BARCODE_1"/00000004.jp2 run $SUBJECT assert_output --partial "ERROR: Image order incorrect for $BARCODE_1" @@ -158,7 +161,7 @@ teardown() { @test "Failed on S3 verification and moves on" { shellmock new rclone shellmock config rclone 0 1:copy regex-3:^digifeeds_bucket: - shellmock config rclone 1 1:check regex-2:"$INPUT_DIR" regex-3:^digifeeds_bucket: + shellmock config rclone 1 1:check regex-2:"$WORKING_DIR" regex-3:^digifeeds_bucket: run $SUBJECT assert_output --partial "ERROR: $BARCODE_1 not found in S3" assert_output --partial "ERROR: $BARCODE_2 not found in S3" @@ -168,6 +171,57 @@ teardown() { assert_output --partial "INFO: Total errors uploading to S3: 2" shellmock assert expectations rclone } +@test "Fails on copying barcode folder to working directory and moves on" { + shellmock new cp + shellmock config cp 1 regex-2:"$INPUT_DIR" <<<"Error" + run $SUBJECT + assert_output --partial "ERROR: Copying $BARCODE_1 to working directory failed" + assert_output --partial "ERROR: Copying $BARCODE_2 to working directory failed" + assert_output --partial "INFO: Total files processed: 0" + assert_output --partial "INFO: Total errors image order: 0" + assert_output --partial "INFO: Total errors: 2" + assert_output --partial "INFO: Total errors uploading to S3: 0" + shellmock assert expectations cp +} + +@test "Fails on copying zip to processed directory and moves on" { + shellmock new rclone + shellmock config rclone 0 1:copy regex-3:^digifeeds_bucket: + shellmock config rclone 0 1:check regex-2:"$WORKING_DIR" regex-3:^digifeeds_bucket: + cp -r "$INPUT_DIR"/* "$WORKING_DIR" + shellmock new cp + shellmock config cp 0 regex-2:"$INPUT_DIR" + shellmock config cp 1 regex-2:.zip <<<"Error" + run $SUBJECT + assert_output --partial "ERROR: Failed to copy ${BARCODE_1}.zip to processed" + assert_output --partial "ERROR: Failed to copy ${BARCODE_2}.zip to processed" + assert_output --partial "INFO: Total files processed: 0" + assert_output --partial "INFO: Total errors image order: 0" + assert_output --partial "INFO: Total errors: 2" + assert_output --partial "INFO: Total errors uploading to S3: 0" + shellmock assert expectations cp + shellmock assert expectations rclone +} +@test "Fails on copying working folder to processed directory and moves on" { + shellmock new rclone + shellmock config rclone 0 1:copy regex-3:^digifeeds_bucket: + shellmock config rclone 0 1:check regex-2:"$WORKING_DIR" regex-3:^digifeeds_bucket: + cp -r "$INPUT_DIR"/* "$WORKING_DIR" + shellmock new cp + shellmock config cp 0 regex-2:"$INPUT_DIR" + shellmock config cp 0 regex-2:.zip + shellmock config cp 1 <<<"Error" + run $SUBJECT + assert_output --partial "ERROR: Failed to copy ${BARCODE_1} to processed" + assert_output --partial "ERROR: Failed to copy ${BARCODE_2} to processed" + assert_output --partial "INFO: Total files processed: 0" + assert_output --partial "INFO: Total errors image order: 0" + assert_output --partial "INFO: Total errors: 2" + assert_output --partial "INFO: Total errors uploading to S3: 0" + shellmock assert expectations cp + shellmock assert expectations rclone +} + @test "print_metrics" { shellmock new pushgateway_advanced shellmock config pushgateway_advanced 0 <<<5