forked from nosamanuel/unix_for_poets
-
Notifications
You must be signed in to change notification settings - Fork 0
/
genesis.sh
executable file
·67 lines (46 loc) · 2.28 KB
/
genesis.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
# Alpha tokenization
tr -cs "[:alpha:]" "\n" < genesis.txt > genesis.words.txt
# Strip blank lines
gsed -i '/^$/N;s/\n//' genesis.words.txt
# Case-insensitve count
sort -f genesis.words.txt | uniq -ci | sort -nr > genesis.counts.txt
# Sort by "rhyming" order
rev genesis.words.txt | sort | rev > genesis.rhymes.txt
# Bigrams
tail -n +2 genesis.words.txt | paste genesis.words.txt - > genesis.bigrams.txt
# Trigrams
function words_plus_n() { tail -n +$1 genesis.words.txt; }
paste genesis.words.txt <(words_plus_n 2) <(words_plus_n 3) > genesis.trigrams.txt
# Top bigrams
sort -f genesis.bigrams.txt | uniq -ci | sort -nr | head -n 20
# Top trigrams
sort -f genesis.trigrams.txt | uniq -ci | sort -nr | head -n 20
# Count "-ing" words
grep -e "ing$" genesis.words.txt | wc -l
# Count uppercase words
grep -E "^[A-Z]+$" genesis.words.txt | wc -l
# Count 4-letter words
grep -E "^.{4}$" genesis.words.txt | wc -l
# Find words without vowels
grep -Ei "^[^aeiou]{2,}$" genesis.words.txt | sort -f | uniq -i
# Find words with exactly 1 vowel ("1-syllable" words)
grep -Ei "^[^aeiouy]*[aeiouy][^aeiouy]*$" genesis.words.txt | sort -f | uniq -i
# Find words with exactly 2 vowels ("2-syllable" words)
grep -Ei "^[^aeiouy]*[aeiouy][^aeiouy]*[aeiouy][^aeiouy]*$" genesis.words.txt | sort -f | uniq -i > genesis.2syllables.txt
# Delete words ending with silent "e" or containing dipthongs
grep -Eiv "(ow|ou|ie|oi|oo|ea|ee|ai|[aeiou]y).*|[aeiouy][^aeiouy]e$" genesis.2syllables.txt
# Find verses with the word "light"
grep -Ei "^.*\blight\b.*$" genesis.txt
# Count verses with two or more instances of "light"
grep -Ei "^(.*light.*){2,}$" genesis.txt | wc -l
# Count verses with three or more instances of "light"
grep -Ei "^(.*light.*){3,}$" genesis.txt | wc -l
# Count verses with exactly two instances of "light"
grep -Ei "^(.*light.*){2,}$" genesis.txt | grep -Eiv "^(.*light.*){3,}$" | wc -l
grep -P '^((.(?!light))*(.light)(.(?!light))*){2}$' genesis.txt | wc -l
# Count morphs
aspell munch < genesis.words.txt | gsed 's/.*\s//;s/\s//g;s/\/.*$//' | sort -f | uniq -c
# Count word initial consonant sequences
gsed -r 's/^([^aeiou]*)[aeiouy].*$/\1/i' genesis.words.txt | sort -f | uniq -c
# Count word final consonant sequences
gsed -r 's/^.*[aeiouy]([^aeiouy]+)$|^.*[aeiouy]($)/\1/i' genesis.words.txt | sort -f | uniq -c