From 44a8de4e9ebb1af4ef2e5fc21994eafcfe1aaeeb Mon Sep 17 00:00:00 2001 From: dalepotter Date: Mon, 4 Jun 2018 11:53:29 +0100 Subject: [PATCH] Add logging output for stats generation process --- git.sh | 42 ++++++++++++++++++++++++++++++++++-------- git_dashboard.sh | 8 ++++++++ 2 files changed, 42 insertions(+), 8 deletions(-) diff --git a/git.sh b/git.sh index 1ced0c983e5..eb8f91ee952 100755 --- a/git.sh +++ b/git.sh @@ -1,5 +1,5 @@ #!/bin/bash - +echo "LOG: `date '+%Y-%m-%d %H:%M:%S'` - Starting Stats generation" echo $GITOUT_DIR if [ "$GITOUT_DIR" = "" ]; then GITOUT_DIR="gitout" @@ -9,26 +9,31 @@ if [ "$COMMIT_SKIP_FILE" = "" ]; then fi # Make the all the gitout directories +echo "LOG: `date '+%Y-%m-%d %H:%M:%S'` - Making gitout directories" mkdir -p $GITOUT_DIR/logs mkdir -p $GITOUT_DIR/commits mkdir -p $GITOUT_DIR/gitaggregate mkdir -p $GITOUT_DIR/gitaggregate-dated -# Build a JSON file of metadata for each CKAN publisher, and file that they have publishe. +# Build a JSON file of metadata for each CKAN publisher, and for each dataset published. # This is based on the data from the CKAN API cd helpers +echo "LOG: `date '+%Y-%m-%d %H:%M:%S'` - Running ckan.py" python ckan.py cd .. +echo "LOG: `date '+%Y-%m-%d %H:%M:%S'` - Copying ckan.json" cp helpers/ckan.json $GITOUT_DIR # Clear output directory +echo "LOG: `date '+%Y-%m-%d %H:%M:%S'` - Clearing output directory" rm -r out # Bring the IATI raw data up-to-date cd data || exit $? # Checkout automatic, and make sure it is clean and up to date +echo "LOG: `date '+%Y-%m-%d %H:%M:%S'` - Performing git operations on the data snapshot" git checkout automatic git reset --hard git clean -df @@ -36,6 +41,7 @@ git pull --ff-only # Create a gitdate file in JSON format. This contains the git hash and date for each data commit +echo "LOG: `date '+%Y-%m-%d %H:%M:%S'` - Creating gitdate file" echo '{' > gitdate.json git log --format="format:%H|%ai" | awk -F '|' '{ print "\""$1"\": \""$2"\"," } ' >> gitdate.json # Ensure the last line doesn't have a trailing comma @@ -44,6 +50,7 @@ git log --format="format:%H|%ai" | tail -n1 | awk -F '|' '{ print "\""$1"\": \"" echo '}' >> gitdate.json # Perform this dance because piping to ../ behaves differently with symlinks cd .. || exit $? +echo "LOG: `date '+%Y-%m-%d %H:%M:%S'` - Move and copy gitdate file" mv data/gitdate.json . cp gitdate.json $GITOUT_DIR @@ -51,6 +58,7 @@ cp gitdate.json $GITOUT_DIR # Store current and all commit hashes as variables cd data || exit $? # Get the latest commit hash +echo "LOG: `date '+%Y-%m-%d %H:%M:%S'` - Storing git info as bash variables" current_hash=`git rev-parse HEAD` # Get all commit hashes commits=`git log --format=format:%H` @@ -58,20 +66,23 @@ cd .. || exit $? # Loop over commits and run stats code +echo "LOG: `date '+%Y-%m-%d %H:%M:%S'` - Looping over commits" for commit in $commits; do if grep -q $commit $COMMIT_SKIP_FILE; then - echo Skipping $commit + echo "LOG: `date '+%Y-%m-%d %H:%M:%S'` - Skipping $commit (due to grep conditional)" elif [ $GITOUT_SKIP_INCOMMITSDIR ] && [ -d $GITOUT_DIR/commits/$commit ]; then - echo Skipping $commit + echo "LOG: `date '+%Y-%m-%d %H:%M:%S'` - Skipping $commit (due to git skip conditionals)" else - echo "Running stats code for commit: $commit" - + echo "LOG: `date '+%Y-%m-%d %H:%M:%S'` - Running stats code for commit: $commit" + # Get the data to the specified commit cd data || exit $? + echo "LOG: `date '+%Y-%m-%d %H:%M:%S'` - Checking out commit $commit" git checkout $commit git clean -df commit_date=`git log --format="format:%ai" | head -n 1` + echo "LOG: `date '+%Y-%m-%d %H:%M:%S'` - Set commit date as $commit_date" cd .. || exit $? # Disable this because it doesn't work for date dependent stuff......... @@ -80,39 +91,54 @@ for commit in $commits; do # (and also this on the next line: --new) # Run the stats commands and save output to log files + echo "LOG: `date '+%Y-%m-%d %H:%M:%S'` - Calculating stats (loop) for commit $commit" python calculate_stats.py $@ --today "$commit_date" loop > $GITOUT_DIR/logs/${commit}_loop.log || exit 1 + echo "LOG: `date '+%Y-%m-%d %H:%M:%S'` - Calculating stats (aggregate) for commit $commit" python calculate_stats.py $@ --today "$commit_date" aggregate > $GITOUT_DIR/logs/${commit}_aggregate.log || exit 1 if [ $commit = $current_hash ]; then - python calculate_stats.py $@ --today "$commit_date" invert > $GITOUT_DIR/logs/${commit}_invert.log + echo "LOG: `date '+%Y-%m-%d %H:%M:%S'` - Calculating stats (invert) for commit $commit" + python calculate_stats.py $@ --today "$commit_date" invert > $GITOUT_DIR/logs/${commit}_invert.log fi #python statsrunner/hashcopy.py || exit 1 + echo "LOG: `date '+%Y-%m-%d %H:%M:%S'` - Removing output for commit dir: $commit" rm -r $GITOUT_DIR/commits/$commit mv out $GITOUT_DIR/commits/$commit || exit $? + echo "LOG: `date '+%Y-%m-%d %H:%M:%S'` - Running gitaggregate.py for commit: $commit" python statsrunner/gitaggregate.py + echo "LOG: `date '+%Y-%m-%d %H:%M:%S'` - Running gitaggregate.py dated for commit: $commit" python statsrunner/gitaggregate.py dated + echo "LOG: `date '+%Y-%m-%d %H:%M:%S'` - Running gitaggregate-publisher.py for commit: $commit" python statsrunner/gitaggregate-publisher.py + echo "LOG: `date '+%Y-%m-%d %H:%M:%S'` - Running gitaggregate-publisher.py dated for commit: $commit" python statsrunner/gitaggregate-publisher.py dated # If the commit is the latest commit then, move the resulting stats to the 'current' directory if [ ! $commit = $current_hash ]; then + echo "LOG: `date '+%Y-%m-%d %H:%M:%S'` - Removing commit dir (based on latest commit logic) for commit: $commit" rm -r $GITOUT_DIR/commits/$commit else cd $GITOUT_DIR || exit $? + echo "LOG: `date '+%Y-%m-%d %H:%M:%S'` - Removing current dir for commit: $commit" rm -r current # Since we're not currently creating symlinks, we can just do a plain move here mv commits/$current_hash current + echo "LOG: `date '+%Y-%m-%d %H:%M:%S'` - Creating current.tar.gz for commit: $commit" tar -czf current.tar.gz current cd .. || exit $? fi if [ "$ALL_COMMITS" = "" ]; then + echo "LOG: `date '+%Y-%m-%d %H:%M:%S'` - Breaking out of commit logic for commit: $commit" break fi fi done cd $GITOUT_DIR || exit $? +echo "LOG: `date '+%Y-%m-%d %H:%M:%S'` - Creating compressed file: gitaggregate" tar -czf gitaggregate.tar.gz gitaggregate +echo "LOG: `date '+%Y-%m-%d %H:%M:%S'` - Creating compressed file: gitaggregate-dated" tar -czf gitaggregate-dated.tar.gz gitaggregate-dated +echo "LOG: `date '+%Y-%m-%d %H:%M:%S'` - Creating compressed file: gitaggregate-publisher" tar -czf gitaggregate-publisher.tar.gz gitaggregate-publisher +echo "LOG: `date '+%Y-%m-%d %H:%M:%S'` - Creating compressed file: gitaggregate-publisher-dated" tar -czf gitaggregate-publisher-dated.tar.gz gitaggregate-publisher-dated - diff --git a/git_dashboard.sh b/git_dashboard.sh index 311c920f7e6..6bf075743c9 100755 --- a/git_dashboard.sh +++ b/git_dashboard.sh @@ -3,5 +3,13 @@ # which are used indepently by the dashboard. # However, the plan is to have the latter stats run depend on the former: # https://github.com/IATI/IATI-Dashboard/issues/223 +echo "LOG: `date '+%Y-%m-%d %H:%M:%S'` - Starting Stats generation" +echo "=========" + +echo "LOG: `date '+%Y-%m-%d %H:%M:%S'` - Running stats-blacklist" +echo "=========" GITOUT_SKIP_INCOMMITSDIR=1 GITOUT_DIR=stats-blacklist COMMIT_SKIP_FILE=stats-blacklist/gitaggregate/activities_with_future_transactions.json ./git.sh --stats-module stats.activity_future_transaction_blacklist $@ + +echo "LOG: `date '+%Y-%m-%d %H:%M:%S'` - Running general stats" +echo "=========" GITOUT_SKIP_INCOMMITSDIR=1 ./git.sh --stats-module stats.dashboard $@