Skip to content

Commit

Permalink
digifeeds: copy barcode to working directory
Browse files Browse the repository at this point in the history
cifs storage is unreliable, so first move the folder to an nfs working
directory, then zip, then upload, the copy to a processed directory and
then delete all the old stuff.
  • Loading branch information
niquerio committed Nov 15, 2024
1 parent c0d2c1d commit 34de40b
Show file tree
Hide file tree
Showing 2 changed files with 98 additions and 13 deletions.
49 changes: 40 additions & 9 deletions bin/digifeeds/upload_to_s3.sh
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ if [[ $APP_ENV != "test" ]]; then
# Variables contained in the config file:
#
# input_directory: path to the input directory
# working directory: path to nfs directory for doing the zipping and sending
# processed_directory: path to the directory of processed files+%
# digifeeds_bucket: rclone remote for the digifeeds bucket
#
Expand All @@ -33,6 +34,7 @@ if [[ $APP_ENV != "test" ]]; then
fi
if ! input_directory=${input_directory:?}; then exit 1; fi
if ! processed_directory=${processed_directory:?}; then exit 1; fi
if ! working_directory=${working_directory:?}; then exit 1; fi
if ! digifeeds_bucket=${digifeeds_bucket:?}; then exit 1; fi
send_metrics=${send_metrics:-"true"}

Expand Down Expand Up @@ -90,7 +92,7 @@ verify_image_order() {
for arg in "${sorted[@]}"; do
cnt=$((cnt + 1))
int=${arg:0:8}
[ $(( 10#$int )) != $cnt ] && return 1
[ $((10#$int)) != $cnt ] && return 1
done
return 0
}
Expand Down Expand Up @@ -175,14 +177,23 @@ main() {
for barcode_path in "${input_directory}"/*/; do
local barcode
barcode=$(basename "${barcode_path%%/}")
working_barcode_path="${working_directory}/$barcode"

log_info "Copying $barcode"
log_info "Processing $barcode"

log_debug "Copying $barcode to working directory"

if ! cp -r "$barcode_path" "$working_barcode_path"; then
log_error "Copying $barcode to working directory failed"
errors_total=$((errors_total + 1))
continue
fi

log_debug "Verifying image order $barcode"
#8 digits, ends in .tif or .jp2
filter_regex='[[:digit:]]{8}\.tif$|[[:digit:]]{8}\.jp2$'
local image_list
image_list=$(list_files "$barcode_path" | grep -E "$filter_regex")
image_list=$(list_files "$working_barcode_path" | grep -E "$filter_regex")
if ! verify_image_order "$image_list"; then
log_error "Image order incorrect for $barcode"
image_order_errors_total=$((image_order_errors_total + 1))
Expand All @@ -191,38 +202,58 @@ main() {
fi

log_debug "Zipping $barcode"
if ! zip_it "$input_directory"/"$barcode"; then
if ! zip_it "$working_barcode_path"; then
log_error "Failed to zip $barcode"
errors_total=$((errors_total + 1))
continue
fi

log_debug "Verifying zip of $barcode"
if ! verify_zip "$input_directory"/"$barcode"; then
if ! verify_zip "$working_barcode_path"; then
log_error "$barcode.zip does not contain the correct files"
errors_total=$((errors_total + 1))
continue
fi

log_debug "Sending $barcode to S3"
if ! rclone copy "$input_directory"/"$barcode".zip "$digifeeds_bucket":; then
if ! rclone copy "$working_barcode_path".zip "$digifeeds_bucket":; then
log_error "Failed to copy $barcode"
upload_errors_total=$((upload_errors_total + 1))
errors_total=$((errors_total + 1))
continue
fi

log_debug "Verifying barcode in S3"
if ! rclone check "$input_directory"/"$barcode".zip "$digifeeds_bucket":; then
if ! rclone check "$working_barcode_path".zip "$digifeeds_bucket":; then
log_error "$barcode not found in S3"
upload_errors_total=$((upload_errors_total + 1))
errors_total=$((errors_total + 1))
continue
fi

log_info "Moving $barcode to processed"
mv "$input_directory"/"$barcode".zip "$processed_directory"/"${TIMESTAMP}"_"${barcode}".zip
mv "$input_directory"/"$barcode" "$processed_directory"/"${TIMESTAMP}"_"${barcode}"
log_debug "Copying ${barcode}.zip to processed"
if ! cp "$working_barcode_path".zip "$processed_directory"/"${TIMESTAMP}"_"${barcode}".zip; then
log_error "Failed to copy $barcode.zip to processed"
errors_total=$((errors_total + 1))
continue
fi

log_debug "Deleting ${working_barcode_path}.zip"
rm "$working_barcode_path".zip

log_debug "Copying ${barcode} to processed"
if ! cp -r "$working_barcode_path" "$processed_directory"/"${TIMESTAMP}"_"${barcode}"; then
log_error "Failed to copy $barcode to processed"
errors_total=$((errors_total + 1))
continue
fi

log_debug "Deleting ${working_barcode_path}"
rm -r "$working_barcode_path"
log_debug "Deleting ${barcode_path}"
rm -r "$barcode_path"

files_processed_total=$((files_processed_total + 1))
done

Expand Down
62 changes: 58 additions & 4 deletions bin/digifeeds/upload_to_s3_test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ setup() {
mkdir $SCRATCH_PATH

INPUT_DIR=$SCRATCH_PATH/input
WORKING_DIR=$SCRATCH_PATH/working
PROCESSED_DIR=$SCRATCH_PATH/processed

BARCODE_1="30000000189012"
Expand All @@ -20,6 +21,7 @@ setup() {

mkdir $INPUT_DIR
mkdir $PROCESSED_DIR
mkdir $WORKING_DIR

mkdir $INPUT_DIR/$BARCODE_1
touch $INPUT_DIR/$BARCODE_1/00000001.tif
Expand All @@ -34,6 +36,7 @@ setup() {
## Config that's in main.
export input_directory="$INPUT_DIR"
export processed_directory="$PROCESSED_DIR"
export working_directory="$WORKING_DIR"
export digifeeds_bucket="digifeeds_bucket"
export timestamp=$TIMESTAMP
export send_metrics="false"
Expand Down Expand Up @@ -65,7 +68,7 @@ teardown() {
@test "It Works" {
shellmock new rclone
shellmock config rclone 0 1:copy regex-3:^digifeeds_bucket:
shellmock config rclone 0 1:check regex-2:"$INPUT_DIR" regex-3:^digifeeds_bucket:
shellmock config rclone 0 1:check regex-2:"$WORKING_DIR" regex-3:^digifeeds_bucket:
run $SUBJECT

assert_success
Expand All @@ -81,7 +84,7 @@ teardown() {
@test "It filters the appropriate files" {
shellmock new rclone
shellmock config rclone 0 1:copy regex-3:^digifeeds_bucket:
shellmock config rclone 0 1:check regex-2:"$INPUT_DIR" regex-3:^digifeeds_bucket:
shellmock config rclone 0 1:check regex-2:"$WORKING_DIR" regex-3:^digifeeds_bucket:

run $SUBJECT
cd "$BATS_TEST_TMPDIR"
Expand Down Expand Up @@ -118,7 +121,7 @@ teardown() {
@test "Failed image order" {
shellmock new rclone
shellmock config rclone 0 1:copy regex-3:^digifeeds_bucket:
shellmock config rclone 0 1:check regex-2:"$INPUT_DIR" regex-3:^digifeeds_bucket:
shellmock config rclone 0 1:check regex-2:"$WORKING_DIR" regex-3:^digifeeds_bucket:
touch "$INPUT_DIR"/"$BARCODE_1"/00000004.jp2
run $SUBJECT
assert_output --partial "ERROR: Image order incorrect for $BARCODE_1"
Expand Down Expand Up @@ -158,7 +161,7 @@ teardown() {
@test "Failed on S3 verification and moves on" {
shellmock new rclone
shellmock config rclone 0 1:copy regex-3:^digifeeds_bucket:
shellmock config rclone 1 1:check regex-2:"$INPUT_DIR" regex-3:^digifeeds_bucket:
shellmock config rclone 1 1:check regex-2:"$WORKING_DIR" regex-3:^digifeeds_bucket:
run $SUBJECT
assert_output --partial "ERROR: $BARCODE_1 not found in S3"
assert_output --partial "ERROR: $BARCODE_2 not found in S3"
Expand All @@ -168,6 +171,57 @@ teardown() {
assert_output --partial "INFO: Total errors uploading to S3: 2"
shellmock assert expectations rclone
}
@test "Fails on copying barcode folder to working directory and moves on" {
shellmock new cp
shellmock config cp 1 regex-2:"$INPUT_DIR" <<<"Error"
run $SUBJECT
assert_output --partial "ERROR: Copying $BARCODE_1 to working directory failed"
assert_output --partial "ERROR: Copying $BARCODE_2 to working directory failed"
assert_output --partial "INFO: Total files processed: 0"
assert_output --partial "INFO: Total errors image order: 0"
assert_output --partial "INFO: Total errors: 2"
assert_output --partial "INFO: Total errors uploading to S3: 0"
shellmock assert expectations cp
}

@test "Fails on copying zip to processed directory and moves on" {
shellmock new rclone
shellmock config rclone 0 1:copy regex-3:^digifeeds_bucket:
shellmock config rclone 0 1:check regex-2:"$WORKING_DIR" regex-3:^digifeeds_bucket:
cp -r "$INPUT_DIR"/* "$WORKING_DIR"
shellmock new cp
shellmock config cp 0 regex-2:"$INPUT_DIR"
shellmock config cp 1 regex-2:.zip <<<"Error"
run $SUBJECT
assert_output --partial "ERROR: Failed to copy ${BARCODE_1}.zip to processed"
assert_output --partial "ERROR: Failed to copy ${BARCODE_2}.zip to processed"
assert_output --partial "INFO: Total files processed: 0"
assert_output --partial "INFO: Total errors image order: 0"
assert_output --partial "INFO: Total errors: 2"
assert_output --partial "INFO: Total errors uploading to S3: 0"
shellmock assert expectations cp
shellmock assert expectations rclone
}
@test "Fails on copying working folder to processed directory and moves on" {
shellmock new rclone
shellmock config rclone 0 1:copy regex-3:^digifeeds_bucket:
shellmock config rclone 0 1:check regex-2:"$WORKING_DIR" regex-3:^digifeeds_bucket:
cp -r "$INPUT_DIR"/* "$WORKING_DIR"
shellmock new cp
shellmock config cp 0 regex-2:"$INPUT_DIR"
shellmock config cp 0 regex-2:.zip
shellmock config cp 1 <<<"Error"
run $SUBJECT
assert_output --partial "ERROR: Failed to copy ${BARCODE_1} to processed"
assert_output --partial "ERROR: Failed to copy ${BARCODE_2} to processed"
assert_output --partial "INFO: Total files processed: 0"
assert_output --partial "INFO: Total errors image order: 0"
assert_output --partial "INFO: Total errors: 2"
assert_output --partial "INFO: Total errors uploading to S3: 0"
shellmock assert expectations cp
shellmock assert expectations rclone
}

@test "print_metrics" {
shellmock new pushgateway_advanced
shellmock config pushgateway_advanced 0 <<<5
Expand Down

0 comments on commit 34de40b

Please sign in to comment.