-
Notifications
You must be signed in to change notification settings - Fork 0
/
find_by_content.sh
executable file
·404 lines (358 loc) · 12.9 KB
/
find_by_content.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
#!/bin/bash
# Find By Content
# A program for searching through files' content. Matching files can be printed to screen
# (matching excerpts only), copied to another destination, or moved to the Trash. Usage
# details below.
# Recommended width:
# |---------------------------------------------------------------------------------------|
# Set the field separator to a newline to avoid spaces in paths breaking our
# variable-setting
IFS="
"
## GLOBALS ##
SEARCH_DIR=
SEARCH_FILES="^.*$"
declare -a SEARCH_TERMS_PLUS=()
declare -a SEARCH_TERMS_MINUS=()
WITHIN=0
CASE_INS=""
SIZE_MIN=0
SIZE_MAX=0
MODE=0
PRINT=1
COPY=2
DELETE=3
DEST_DIR=
THE_TIME=$(date "+%Y-%m-%d--%H-%M-%S")
TRASH_FOLDER="$HOME/.Trash/Deleted files ($THE_TIME)"
LOOKBACK=0
LOOKAHEAD=0
SHOW_LINE_NUM=0
QUIET=0
CHECKED=0
FOUND=0
declare -a PLUS_ARGS=()
declare -a MINUS_ARGS=()
NUM_ARGS=0
STR_FILES="files"
STR_TERMS="terms"
cols=$(tput cols)
bold=$(tput bold)
normal=$(tput sgr0)
## UTILITY FUNCTIONS ##
# An exit message which will print even if user force-quits
function niceExit()
{
if [ $NUM_ARGS -eq 1 ]; then
STR_TERMS="term"
fi
if [ $CHECKED -eq 1 ]; then
STR_FILES="file"
fi
echo
echo -n "Checked $CHECKED $STR_FILES. "
if [ $FOUND -eq 1 ]; then
STR_FILES="file"
else
STR_FILES="files"
fi
if [ $MODE -eq $PRINT ]; then
echo "Found search $STR_TERMS in $FOUND $STR_FILES."
elif [ $MODE -eq $COPY ]; then
echo "Copied $FOUND matching $STR_FILES."
else
echo "Deleted $FOUND matching $STR_FILES."
fi
exit
}
trap niceExit INT
# Print usage of program; use this margin for help text:
# |----------------------------------------------------------------------------|
function printHelp()
{
echo -n ${bold}
echo "--Find By Content--" | fmt -w $cols -c
echo -n ${normal}
cat << EOF
You can supply the following parameters, in any order:
${bold}Required:${normal}
--dir [dir]: The directory which should be recursively searched.
--find [term]: A search term (regex pattern) to look for in these files. You
can use --find as many times as you want to add search terms, and any file
that matches one or more of the terms will be returned.
${bold}(pick one:)${normal}
--print: Print the matches within each file to screen.
--copy-to [dir]: The directory into which to copy matching files.
--delete: Delete the matching files (sends files to Trash).
${bold}Optional:${normal}
--in-files [name]: The names of the files to be searched, as a regex pattern.
For example, "\.[ch]$" would search files ending in ".c" or ".h"; "\.[c]+$"
would search ".c" and ".cc" files. Otherwise all files are searched.
--omit [term]: A search term (regex pattern) to cut out of the results
obtained by searching for the --find term(s). You can use --omit as many
times as you want to create multiple cut-outs from the combined search
results of all the --find operations.
--within [num]: Any two hits must be within this many lines.
--insens: Perform content searches with case-insensitivity.
--size-above [num]: Only look at files above this size (in bytes).
--size-below [num]: Only look at files below this size (in bytes).
${bold}('print' mode only:)${normal}
--lines-above [num]: Show this many lines before the matching content.
--lines-below [num]: Show this many lines after the matching content.
--line-num: Print the line number before each line's content.
--quiet: Minimal output; no header line with the name and number of matches
in each file. Instead all content matches are printed back to back.
EOF
}
# Checks to see if file name passed in is taken; if so, it attempts to add a number to
# the file name, and passes back the first available path that is found; function will
# exit script if no available path is found
function correctForPathConflict()
{
isFile=
if ! [ -a "$1" ]; then
echo "$1"
return
elif [ -f "$1" ]; then
isFile=true
elif [ -d "$1" ]; then
isFile=false
else
echo "Error: Encountered something that is not a file or directory: $1."
exit
fi
ct=0
TEST_PATH="$1"
until [ $ct -eq 3000 ]; do
if [ -a "$TEST_PATH" ]; then
let ct+=1
# If this is a file, and it has a suffix, break the name up at the period so
# we can insert the unique number at the end of the name and not the suffix
if $isFile && [[ $1 == *.* ]]; then
preDot=${1%.*}
postDot=${1##*.}
TEST_PATH="$preDot $ct.$postDot"
else
TEST_PATH="$1 $ct"
fi
else
break
fi
done
if [ $ct -eq 3000 ]; then
# Just quit, because something is probably wrong
echo "Error: Cannot find a place in $(dirname $1) for $(basename $1)."
exit
else
echo "$TEST_PATH"
fi
}
## ARGUMENT PROCESSING ##
# Show help if called without enough args
if [ "$#" -lt 5 ]; then
printHelp
exit
fi
# Look for known options as long as there are more arguments to process
while (( "$#" )); do
case "$1" in
--dir ) SEARCH_DIR="$2"; shift 2;;
--in-files ) SEARCH_FILES="$2"; shift 2;;
--find ) SEARCH_TERMS_PLUS+=("$2"); let NUM_ARGS+=1; shift 2;;
--omit ) SEARCH_TERMS_MINUS+=("$2"); let NUM_ARGS+=1; shift 2;;
--within ) WITHIN="$2"; shift 2;;
--insens ) CASE_INS="-i"; shift;;
--size-above ) SIZE_MIN="$2"; shift 2;;
--size-below ) SIZE_MAX="$2"; shift 2;;
--print ) MODE=$PRINT; shift;;
--copy-to ) MODE=$COPY; DEST_DIR="$2"; shift 2;;
--delete ) MODE=$DELETE; shift;;
--lines-above ) LOOKBACK="$2"; shift 2;;
--lines-below ) LOOKAHEAD="$2"; shift 2;;
--line-num ) SHOW_LINE_NUM=1; shift;;
--quiet ) QUIET=1; shift;;
* ) echo "Error: Invalid argument '$1' detected."; exit;;
esac
done
# Safety checks
if [ -z "$SEARCH_DIR" ]; then
echo "You didn't specify a pattern of file name to search in using --dir! Aborting."
exit
fi
if [ ! -d "$SEARCH_DIR" ]; then
echo "Directory $SEARCH_DIR does not exist! Aborting."
exit
fi
if [ "${#SEARCH_TERMS_PLUS[@]}" -lt 1 ]; then
echo "You didn't specify anything to search for using --find! Aborting."
exit
fi
if [ $MODE -eq 0 ]; then
echo "You didn't specify a mode with --print, --copy-to, or --delete! Aborting."
exit
fi
if [ $MODE -eq $COPY ] && [ ! -d "$DEST_DIR" ]; then
echo "When using this program in copy mode, you need to specify a destination directory after --copy-to. Aborting."
exit
fi
if [ $MODE -eq $DELETE ]; then
mkdir "$TRASH_FOLDER"
if [ ! -d "$TRASH_FOLDER" ]; then
echo "Could not create the folder \"$TRASH_FOLDER\". Aborting."
exit
fi
fi
# Build additive grep query from patterns that user supplied via --find
for PLUS in "${SEARCH_TERMS_PLUS[@]}"; do
PLUS_ARGS+=("-e${PLUS}")
done
# Build subtractive grep query from patterns that user supplied via --omit
MINUS_ARGS+=("echo \$RESULT")
for MINUS in "${SEARCH_TERMS_MINUS[@]}"; do
MINUS_ARGS+=("| egrep $CASE_INS -v $MINUS")
done
# Build 'find' command. We have to build it and then 'eval' it because of the optional size
# arguments. It turns out that placing them in strings to substitute them into a "live"
# 'find' command does not work; the invocation of 'find' cannot contain string variables
# which contain arguments.
FIND_CMD="find -s \"$SEARCH_DIR\" -type f ! -name \".DS_Store\""
if [ $SIZE_MIN -ne 0 ]; then
FIND_CMD+=" -size +${SIZE_MIN}c"
fi
if [ $SIZE_MAX -ne 0 ]; then
FIND_CMD+=" -size -${SIZE_MAX}c"
fi
FIND_CMD+=" | egrep $SEARCH_FILES"
## MAIN PROGRAM ##
for FN in `eval $FIND_CMD`; do
let CHECKED+=1
# Get result of all plus terms, with accompanying line numbers
declare -a RESULTS_PLUS=($(cat "$FN" | egrep $CASE_INS -n ${PLUS_ARGS[@]}))
# Skip file if we got no results
RESULT_CHARS=0
RESULT_CHARS=`echo -n "${RESULTS_PLUS[@]}" | wc -c | tr -d '[:space:]'`
if [ $RESULT_CHARS -lt 2 ]; then
continue
fi
# Get result of running plus results against all --omit terms
declare -a RESULTS_MINUS=()
for RESULT in "${RESULTS_PLUS[@]}"; do
RESULTS_MINUS+=($(eval "${MINUS_ARGS[@]}"))
done
# Evaluate if anything's left
RESULT_CHARS=`echo -n "${RESULTS_MINUS[@]}" | wc -c | tr -d '[:space:]'`
if [ $RESULT_CHARS -gt 1 ]; then
# Save line numbers from grep results in FINAL_LINES
declare -a RES_LINES=()
for RESULT in "${RESULTS_MINUS[@]}"; do
RES_LINES+=(${RESULT%%:*}) # get everything before first ':'
done
# If two consecutive hits have more than WITHIN lines between them, ignore file
WITHIN_EXCEEDED=0
if [ $WITHIN -gt 0 ]; then
LAST_NUM=${RES_LINES[0]}
for LINE in "${RES_LINES[@]}"; do
if [ $((LINE - LAST_NUM)) -gt $WITHIN ]; then
WITHIN_EXCEEDED=1
break
else
LAST_NUM=$LINE
fi
done
fi
if [ $WITHIN_EXCEEDED -eq 1 ]; then
continue
fi
# The file meets all our criteria
let FOUND+=1
# Print mode
if [ $MODE -eq $PRINT ]; then
# Print results header: "[magenta on]'File name'[magenta off] (x matches)"
if [ $QUIET -eq 0 ]; then
RESULT_COUNT=`echo "${#RESULTS_MINUS[@]}"`
STR_MATCHES="matches"
if [ $RESULT_COUNT -eq 1 ]; then
STR_MATCHES="match"
fi
echo -e "\033[35m$FN\033[0m ($RESULT_COUNT $STR_MATCHES)"
fi
# Start off with just the line numbers for the final results
declare -a FINAL_LINES=("${RES_LINES[@]}")
# Add lookback line numbers to FINAL_LINES if lookback was requested
if [ $LOOKBACK -gt 0 ]; then
for LINE in "${RES_LINES[@]}"; do
# Find line number for every line from 1 line to LOOKBACK lines back from each
# result line, adding each number to FINAL_LINES
for i in $(seq $LOOKBACK); do
LB_LINE=$((LINE - i))
if [ $LB_LINE -gt 0 ]; then # make sure we didn't back up past line 1
FINAL_LINES+=($LB_LINE)
fi
done
done
# Sort FINAL_LINES' contents in numerical order, then pass through 'uniq' to
# eliminate duplicate lines due to overlapping ranges in results
FINAL_LINES=($(sort -g <<< "${FINAL_LINES[*]}" | uniq))
fi
# Add lookahead line numbers to FINAL_LINES if lookahead was requested
if [ $LOOKAHEAD -gt 0 ]; then
# Get and isolate number of lines in file
NUM_LINES=$(wc -l "$FN")
NUM_LINES=$(echo $NUM_LINES | egrep -o --max-count=1 "[[:digit:]]* ")
NUM_LINES=$(echo $NUM_LINES | tr -d '[:space:]')
LAST_CHAR=$(tail -c -1 "$FN")
if [ "$LAST_CHAR" != "\n" ]; then
let NUM_LINES+=1
fi
# As above, add the lines coming after each result line to FINAL_LINES
for LINE in "${RES_LINES[@]}"; do
for i in $(seq $LOOKAHEAD); do
LA_LINE=$((LINE + i))
if [ $LA_LINE -le $NUM_LINES ]; then # don't go past end of file
FINAL_LINES+=($LA_LINE)
fi
done
done
# Sort FINAL_LINES' contents in numerical order, then pass through 'uniq' to
# eliminate duplicate lines due to overlapping ranges in results
FINAL_LINES=($(sort -g <<< "${FINAL_LINES[*]}" | uniq))
fi
# Print the lines whatse numbers are in FINAL_LINES
for LINE_NUM in "${FINAL_LINES[@]}"; do
THE_LINE=$(tail -n+$LINE_NUM "$FN" | head -n1)
if [ $SHOW_LINE_NUM -eq 1 ]; then
echo -n "$LINE_NUM: "
fi
# If we're also printing lines before or after the matching one, make the
# surrounding lines gray
if [ $LOOKBACK -gt 0 ] || [ $LOOKAHEAD -gt 0 ]; then
WAS_ORIG=0
for ORIG_LINE in "${RES_LINES[@]}"; do
if [ $ORIG_LINE -eq $LINE_NUM ]; then
echo "$THE_LINE"
WAS_ORIG=1
break
fi
done
if [ $WAS_ORIG -eq 0 ]; then
echo -e "\033[2m$THE_LINE\033[0m"
fi
else
echo $THE_LINE
fi
done
if [ $QUIET -eq 0 ]; then
echo - - - - - - - -
fi
elif [ $MODE -eq $COPY ]; then
DESIRED_PATH="$DEST_DIR/$(basename $FN)"
CORRECTED_PATH=$(correctForPathConflict "$DESIRED_PATH")
cp -a "$FN" "$CORRECTED_PATH"
elif [ $MODE -eq $DELETE ]; then
DESIRED_PATH="$TRASH_FOLDER/$(basename $FN)"
CORRECTED_PATH=$(correctForPathConflict "$DESIRED_PATH")
mv "$FN" "$CORRECTED_PATH"
fi
fi
done
niceExit