-
Notifications
You must be signed in to change notification settings - Fork 16
/
03_process_data.sh
executable file
·66 lines (54 loc) · 3.01 KB
/
03_process_data.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
#!/bin/bash
set -euo pipefail
NATUREDIR=nature_tmpdir
# test that the $NATUREDIR directory exists.
if [ ! -d "$NATUREDIR" ]; then
echo -e "\nRequired set-up not performed. Run 01_init.sh first."
exit 0
fi
cd $NATUREDIR
echo -e "\nBacking up raw data..."
# for each file whose name ends in '.csv', make a backup copy to '<filename>.csv.bak'
for f in *.csv; do
cp $f $f.bak
echo -e "Created $f.bak"
done
echo -e "\nExtracting sample numbers..."
# 'Pipe' the text of the background file to the 'cut' command and extract the first column of text (-f1)
# CSV files separate fields using commas. We let cut know that this is a CSV file using the 'delimiter' flag ('-d,')
# copy the resulting text to a new file, samples.cut
cat background.csv | cut -f1 -d, > samples.cut
echo -e "Created samples.cut"
echo -e "\nExtracting data columns..."
# As above, we use cut to extract the second field from each CSV file and save as <filename>.cut
for f in *.csv; do
cat $f | cut -f2 -d, > $f.cut
echo -e "Created $f.cut"
done
echo -e "\nLinking columns into a CSV..."
# The 'paste' command links columns into tables. We use the delimiter flag (-d ',') to indicate we want to produce a CSV.
# Save the table to $CSVFILE
paste -d ',' samples.cut background.csv.cut reading01.csv.cut reading02.csv.cut reading03.csv.cut > datatable.csv
echo -e "Created datatable.csv"
echo -e "\nShowing the first 10 rows of datatable.csv..."
# The 'head' command shows the first few lines of a longer file. In this case, the first 11 lines
# 'column' formats the table for viewing. The '-s,' flag indicates we are viewing a CSV file
cat datatable.csv | head -n 11 | column -tx -s,
echo -e "\nAverage the readings, subtract baseline and convert to abs/sec..."
read -p "Press enter to continue: " key
# get the average background reading
# Here we use the 'awk' command to 1) count the number of lines in the file, 2) sum the values in column 2 ($2),
# and 3) calculate the average. We save that number in the variable $bkgd
bkgd=$(cat background.csv | awk -F, '{ if (NR>1) { SUM+=$2; COUNT+=1; } } END { print SUM/COUNT }' )
echo -e "\nAverage background reading: $bkgd\n"
# Now we run awk multiple times to 1) add a column for the average of columns 3-5;
# 2) substract the average background calculated above, making a new column; and
# 3) divide that adjusted reading by 60 to get a per-second number, creating the final column
# Finally, save the new table in a temp file, and then rename that file as datatable.csv
# Note that this is all a single command, linked by 'pipes' (|)
cat datatable.csv |
awk -F, -v OFS="," 'NR==1 { $(NF+1)="average"; print $0; } NR>1 { $(NF+1) = ($3+$4+$5)/3; print $0; }' |
awk -F, -v OFS="," -v bkgd=$bkgd 'NR==1 { $(NF+1)="corrected"; print $0; } NR>1 { $(NF+1) = $NF - bkgd; print $0; }' |
awk -F, -v OFS="," 'NR==1 { $(NF+1)="abs/sec"; print $0; } NR>1 { $(NF+1) = $NF/60; print $0; }' > tmp && mv tmp datatable.csv
echo -e "Showing first 10 rows of modified datatable.csv..."
cat datatable.csv | head -n 11 | column -tx -s,