forked from AlexsLemonade/OpenPBTA-manuscript
-
Notifications
You must be signed in to change notification settings - Fork 0
/
build.sh
executable file
·137 lines (118 loc) · 4.78 KB
/
build.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
#!/usr/bin/env bash
## build.sh: compile manuscript outputs from content using Manubot and Pandoc
set -o errexit \
-o nounset \
-o pipefail
# Set timezone used by Python for setting the manuscript's date
export TZ=Etc/UTC
# Default Python to read/write text files using UTF-8 encoding
export LC_ALL=en_US.UTF-8
# Set DOCKER_RUNNING to true if docker is running, otherwise false.
DOCKER_RUNNING="$(docker info &> /dev/null && echo "true" || (true && echo "false"))"
# Set option defaults
CI="${CI:-false}"
BUILD_PDF="${BUILD_PDF:-true}"
BUILD_DOCX="${BUILD_DOCX:-false}"
BUILD_LATEX="${BUILD_LATEX:-false}"
SPELLCHECK="${SPELLCHECK:-false}"
MANUBOT_USE_DOCKER="${MANUBOT_USE_DOCKER:-$DOCKER_RUNNING}"
# Pandoc's configuration is specified via files of option defaults
# located in the $PANDOC_DATA_DIR/defaults directory.
PANDOC_DATA_DIR="${PANDOC_DATA_DIR:-build/pandoc}"
# Generate reference information
echo >&2 "Retrieving and processing reference metadata"
manubot process \
--content-directory=content \
--output-directory=output \
--cache-directory=ci/cache \
--skip-citations \
--log-level=INFO
# Make output directory
mkdir -p output
# Create HTML output
# https://pandoc.org/MANUAL.html
echo >&2 "Exporting HTML manuscript"
pandoc --verbose \
--data-dir="$PANDOC_DATA_DIR" \
--defaults=common.yaml \
--defaults=html.yaml
# Create PDF output (unless BUILD_PDF environment variable equals "false")
# If Docker is not available, use WeasyPrint to create PDF
if [ "${BUILD_PDF}" != "false" ] && [ "${MANUBOT_USE_DOCKER}" != "true" ]; then
echo >&2 "Exporting PDF manuscript using WeasyPrint"
if [ -L images ]; then rm images; fi # if images is a symlink, remove it
ln -s content/images
pandoc \
--data-dir="$PANDOC_DATA_DIR" \
--defaults=common.yaml \
--defaults=html.yaml \
--defaults=pdf-weasyprint.yaml
rm images
fi
# If Docker is available, use athenapdf to create PDF
if [ "${BUILD_PDF}" != "false" ] && [ "${MANUBOT_USE_DOCKER}" == "true" ]; then
echo >&2 "Exporting PDF manuscript using Docker + Athena"
if [ "${CI}" = "true" ]; then
# Incease --delay for CI builds to ensure the webpage fully renders, even when the CI server is under high load.
# Local builds default to a shorter --delay to minimize runtime, assuming proper rendering is less crucial.
MANUBOT_ATHENAPDF_DELAY="${MANUBOT_ATHENAPDF_DELAY:-5000}"
echo >&2 "Continuous integration build detected. Setting athenapdf --delay=$MANUBOT_ATHENAPDF_DELAY"
fi
if [ -d output/images ]; then rm -rf output/images; fi # if images is a directory, remove it
cp -R -L content/images output/
docker run \
--rm \
--shm-size=2g \
--volume="$(pwd)/output:/converted/" \
--security-opt=seccomp:unconfined \
arachnysdocker/athenapdf:2.16.0 \
athenapdf \
--delay=${MANUBOT_ATHENAPDF_DELAY:-1100} \
--timeout=1200 \
--pagesize=A4 \
manuscript.html manuscript.pdf
rm -rf output/images
fi
# Create DOCX output (if BUILD_DOCX environment variable equals "true")
if [ "${BUILD_DOCX}" = "true" ]; then
echo >&2 "Exporting Word Docx manuscript"
pandoc --verbose \
--data-dir="$PANDOC_DATA_DIR" \
--defaults=common.yaml \
--defaults=docx.yaml
fi
# Create LaTeX output (if BUILD_LATEX environment variable equals "true")
if [ "${BUILD_LATEX}" = "true" ]; then
echo >&2 "Exporting LaTeX manuscript"
pandoc \
--data-dir="$PANDOC_DATA_DIR" \
--defaults=common.yaml \
--defaults=latex.yaml
fi
# Spellcheck
if [ "${SPELLCHECK}" = "true" ]; then
export ASPELL_CONF="add-extra-dicts $(pwd)/build/assets/custom-dictionary.txt; ignore-case true"
# Identify and store spelling errors
pandoc \
--data-dir="$PANDOC_DATA_DIR" \
--lua-filter spellcheck.lua \
output/manuscript.md \
| sort -fu > output/spelling-errors.txt
echo >&2 "Potential spelling errors:"
cat output/spelling-errors.txt
# Add additional forms of punctuation that Pandoc converts so that the
# locations can be detected
# Create a new expanded spelling errors file so that the saved artifact
# contains only the original misspelled words
cp output/spelling-errors.txt output/expanded-spelling-errors.txt
grep "’" output/spelling-errors.txt | sed "s/’/'/g" >> output/expanded-spelling-errors.txt || true
# Find locations of spelling errors
# Use "|| true" after grep because otherwise this step of the pipeline will
# return exit code 1 if any of the markdown files do not contain a
# misspelled word
cat output/expanded-spelling-errors.txt | while read word; do grep -on "\<$word\>" content/*.md; done | sort -h -t ":" -k 1b,1 -k2,2 > output/spelling-error-locations.txt || true
echo >&2 "Filenames and line numbers with potential spelling errors:"
cat output/spelling-error-locations.txt
rm output/expanded-spelling-errors.txt
fi
echo >&2 "Build complete"