Skip to content

Commit

Permalink
checks image order
Browse files Browse the repository at this point in the history
  • Loading branch information
niquerio committed Oct 23, 2024
1 parent f0e9531 commit 52fdbd1
Show file tree
Hide file tree
Showing 2 changed files with 78 additions and 13 deletions.
43 changes: 40 additions & 3 deletions bin/digifeeds/upload_to_s3.sh
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ JOB_NAME="aim_digifeeds_upload_to_aws"
# COUNTERS
###########
files_processed_total=0
image_order_errors_total=0
upload_errors_total=0
errors_total=0

Expand All @@ -47,6 +48,19 @@ last_count() {
pushgateway_advanced -j $JOB_NAME -q ${metric}
}

verify_image_order(){
#Sort the array
sorted=($(printf '%s\n' "$@" | sort ))

local cnt=0
for arg in "${sorted[@]}"; do
cnt=$((cnt+1))
int=${arg:0:8}
[ $((int)) != $cnt ] && return 1
done
return 0
}

zip_it() {
local barcode_path=$1
cd $barcode_path
Expand Down Expand Up @@ -74,16 +88,22 @@ verify_zip() {

print_metrics() {
local fp_current_total=$1
local upload_errors_current_total=$2
local errors_current_total=$3
local image_order_errors_current_total=$2
local upload_errors_current_total=$3
local errors_current_total=$4

local fp_metric="${JOB_NAME}_files_processed_total"
local fp_last=$(last_count $fp_metric)
local fp_total=$((fp_last + fp_current_total))

local image_order_errors_metric="${JOB_NAME}_image_order_errors_total"
local image_order_errors_last=$(last_count $image_order_errors_metric)
local image_order_errors_total=$((image_order_errors_last + image_order_errors_current_total))

local upload_errors_metric="${JOB_NAME}_upload_errors_total"
local upload_errors_last=$(last_count $upload_errors_metric)
local upload_errors_total=$((upload_errors_last + upload_errors_current_total))


local errors_metric="${JOB_NAME}_errors_total"
local errors_last=$(last_count $errors_metric)
Expand All @@ -93,6 +113,9 @@ print_metrics() {
# HELP ${fp_metric} Count of digifeeds zip files sent to S3
# TYPE ${fp_metric} counter
$fp_metric $fp_total
# HELP ${image_order_errors_metric} Count of folders where there are missing pages of images
# TYPE ${image_order_errors_metric} counter
${image_order_errors_metric} $image_order_errors_total
# HELP ${upload_errors_metric} Count of errors when uploading digifeeds zip files to S3
# TYPE ${upload_errors_metric} counter
${upload_errors_metric} $upload_errors_total
Expand All @@ -112,6 +135,19 @@ main() {
local barcode=$(basename ${barcode_path%%/})

log_info "Copying $barcode"

log_info "Verifying image order $barcode"
#8 digits, ends in .tif or .jp2
filter_regex='[[:digit:]]{8}\.tif$|[[:digit:]]{8}\.jp2$'
local image_list=$(cd $barcode_path && ls | egrep "$filter_regex")
verify_image_order $image_list
if [[ $? != 0 ]]; then
log_error "Image order incorrect for $barcode"
image_order_errors_total=$((image_order_errors_total + 1))
errors_total=$((errors_total + 1))
continue
fi


log_info "Zipping $barcode"
zip_it $input_directory/$barcode
Expand Down Expand Up @@ -154,6 +190,7 @@ main() {
done

log_info "Total files processed: $files_processed_total"
log_info "Total errors image order: $image_order_errors_total "
log_info "Total errors uploading to S3: $upload_errors_total"
log_info "Total errors: $errors_total"
}
Expand All @@ -178,7 +215,7 @@ if [[ $APP_ENV != "test" ]]; then
main

if [ "$send_metrics" != "false" ]; then
print_metrics $files_processed_total $upload_errors_total $errors_total | /usr/local/bin/pushgateway_advanced -j $JOB_NAME
print_metrics $files_processed_total $image_order_errors_total $upload_errors_total $errors_total | /usr/local/bin/pushgateway_advanced -j $JOB_NAME
/usr/local/bin/pushgateway -j $JOB_NAME -b $START_TIME
fi
log_info "=====End $(date)====="
Expand Down
48 changes: 38 additions & 10 deletions bin/digifeeds/upload_to_s3_test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -17,22 +17,22 @@ setup() {
INPUT_DIR=$SCRATCH_PATH/input
PROCESSED_DIR=$SCRATCH_PATH/processed

BARCODE_1="30123456789012"
BARCODE_2="40123456789012"
BARCODE_1="30000000189012"
BARCODE_2="40000000189012"
TIMESTAMP="YYYY-MM-DD_hh-mm-ss"

mkdir $INPUT_DIR
mkdir $PROCESSED_DIR

mkdir $INPUT_DIR/$BARCODE_1
touch $INPUT_DIR/$BARCODE_1/01234567.tif
touch $INPUT_DIR/$BARCODE_1/01234567.jp2
touch $INPUT_DIR/$BARCODE_1/00000001.tif
touch $INPUT_DIR/$BARCODE_1/00000002.jp2
touch $INPUT_DIR/$BARCODE_1/checksum.md5
touch $INPUT_DIR/$BARCODE_1/Thumbs.db
touch $INPUT_DIR/$BARCODE_1/some_other_file.tif

mkdir $INPUT_DIR/$BARCODE_2
touch $INPUT_DIR/$BARCODE_2/01234567.tif
touch $INPUT_DIR/$BARCODE_2/00000001.tif

## Config that's in main.
input_directory="$INPUT_DIR"
Expand Down Expand Up @@ -74,8 +74,8 @@ teardown() {
cd $BATS_TEST_TMPDIR
mv $PROCESSED_DIR/${TIMESTAMP}_${BARCODE_1}.zip ./
unzip -q ${TIMESTAMP}_${BARCODE_1}.zip
assert_file_exists '01234567.jp2'
assert_file_exists '01234567.tif'
assert_file_exists '00000001.tif'
assert_file_exists '00000002.jp2'
assert_file_exists 'checksum.md5'
assert_file_not_exists 'Thumbs.db'
assert_file_not_exists 'some_other_file.tif'
Expand All @@ -92,13 +92,38 @@ teardown() {
assert_success
}

@test "verify_image_order sucess" {
run verify_image_order 00000001.tif 00000003.jp2 00000002.tif
assert_success
}

@test "verify_image_order failure" {
run verify_image_order 00000001.tif 00000003.tif 00000004.jp2
assert_failure
}

@test "Failed image order" {
shellmock new rclone
shellmock config rclone 0 1:copy regex-3:^digifeeds_bucket:
shellmock config rclone 0 1:check regex-2:$INPUT_DIR regex-3:^digifeeds_bucket:
touch $INPUT_DIR/$BARCODE_1/00000004.jp2
run $SUBJECT
assert_output --partial "ERROR: Image order incorrect for $BARCODE_1"
assert_output --partial "INFO: Total files processed: 1"
assert_output --partial "INFO: Total errors: 1"
assert_output --partial "INFO: Total errors image order: 1"
assert_output --partial "INFO: Total errors uploading to S3: 0"
shellmock assert expectations rclone
}

@test "Failed zip" {
shellmock new zip
shellmock config zip 1
run $SUBJECT
assert_output --partial "ERROR: Failed to zip $BARCODE_1"
assert_output --partial "ERROR: Failed to zip $BARCODE_2"
assert_output --partial "INFO: Total files processed: 0"
assert_output --partial "INFO: Total errors image order: 0"
assert_output --partial "INFO: Total errors: 2"
assert_output --partial "INFO: Total errors uploading to S3: 0"
shellmock assert expectations zip
Expand All @@ -111,6 +136,7 @@ teardown() {
assert_output --partial "ERROR: Failed to copy $BARCODE_1"
assert_output --partial "ERROR: Failed to copy $BARCODE_2"
assert_output --partial "INFO: Total files processed: 0"
assert_output --partial "INFO: Total errors image order: 0"
assert_output --partial "INFO: Total errors: 2"
assert_output --partial "INFO: Total errors uploading to S3: 2"
shellmock assert expectations rclone
Expand All @@ -124,17 +150,19 @@ teardown() {
assert_output --partial "ERROR: $BARCODE_1 not found in S3"
assert_output --partial "ERROR: $BARCODE_2 not found in S3"
assert_output --partial "INFO: Total files processed: 0"
assert_output --partial "INFO: Total errors image order: 0"
assert_output --partial "INFO: Total errors: 2"
assert_output --partial "INFO: Total errors uploading to S3: 2"
shellmock assert expectations rclone
}
@test "print_metrics" {
shellmock new pushgateway_advanced
shellmock config pushgateway_advanced 0 <<< 5
run print_metrics 1 2 3
run print_metrics 1 2 3 4
assert_output --partial "aim_digifeeds_upload_to_aws_files_processed_total 6"
assert_output --partial "aim_digifeeds_upload_to_aws_upload_errors_total 7"
assert_output --partial "aim_digifeeds_upload_to_aws_errors_total 8"
assert_output --partial "aim_digifeeds_upload_to_aws_image_order_errors_total 7"
assert_output --partial "aim_digifeeds_upload_to_aws_upload_errors_total 8"
assert_output --partial "aim_digifeeds_upload_to_aws_errors_total 9"
shellmock assert expectations pushgateway_advanced

}
Expand Down

0 comments on commit 52fdbd1

Please sign in to comment.