-
Notifications
You must be signed in to change notification settings - Fork 14
/
texfot.pl
executable file
·414 lines (316 loc) · 13.5 KB
/
texfot.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
#!/usr/bin/env perl
# $Id: texfot,v 1.32 2016/02/09 19:13:22 karl Exp $
# Invoke a TeX command, filtering all but interesting terminal output;
# do not look at the log or check any output files.
# Exit status is that of the subprogram.
# Tee the complete (unfiltered) standard output and standard error to
# (by default) /tmp/fot.
#
# Public domain. Originally written 2014 by Karl Berry.
my $ident = '$Id: texfot,v 1.32 2016/02/09 19:13:22 karl Exp $';
(my $prg = $0) =~ s,^.*/,,;
select STDERR; $| = 1; # no buffering
select STDOUT; $| = 1;
use IPC::Open3; # control what happens with stderr from the child.
use IO::File; # use new_tmpfile for that stderr.
# require_order because we don't want getopt to permute anything;
# arguments to the tex invocation must remain in order, not handled by us.
use Getopt::Long qw(:config require_order);
use Pod::Usage;
my $opt_debug = 0;
my @opt_ignore = ();
my $opt_interactive = 0;
my $opt_quiet = 0;
my $opt_stderr = 1;
my $opt_tee = ($ENV{"TMPDIR"} || "/tmp") . "/fot";
my $opt_version = 0;
my $opt_help = 0;
exit (&main ());
#
sub main {
my $ret = GetOptions (
"debug!" => \$opt_debug,
"ignore=s" => \@opt_ignore,
"interactive!" => \$opt_interactive,
"quiet!" => \$opt_quiet,
"stderr!" => \$opt_stderr,
"tee=s" => \$opt_tee,
"version" => \$opt_version,
"help|?" => \$opt_help) || pod2usage (2);
# --help, --version
pod2usage ("-exitstatus" => 0, "-verbose" => 2)
if $opt_help;
#
if ($opt_version) {
print "$ident\n";
return 0;
}
die "$prg: missing TeX command, i.e., a non-option argument\n"
. "Try --help if you need it."
if ! @ARGV;
# guess we're going to run something. typically no interaction.
close (STDIN) unless $opt_interactive;
local *FOTTMP;
$FOTTMP = ">$opt_tee";
open (FOTTMP) || die "$prg: aborting, open($FOTTMP) failed: $!";
# We need to separate stderr from stdout. Otherwise they are randomly
# merged, not always at line breaks, hence we can lose useful messages.
print "$0: invoking: @ARGV\n" unless $opt_quiet;
# In order to avoid deadlock when there is lots of stuff on stderr,
# we must write it to a temporary file
# http://perldoc.perl.org/perlfaq8.html#How-can-I-capture-STDERR-from-an-external-command
local *TEXERR = IO::File->new_tmpfile
|| die "IO::File->new_tmpfile failed: $!";
# But we can process stdout as it comes.
local *TEXOUT;
my $pid = open3 (undef, \*TEXOUT, ">&TEXERR", @ARGV)
|| die "$prg: fork(TeX) failed: $! [cmd=@ARGV]\n";
&debug ("open3() returned pid $pid [cmd=@ARGV]");
# It's not ideal to read all of stdout and then all of stderr; it would
# be better to intermix them in the original order of child output.
# this is simpler than other ways of avoiding possible deadlock (such
# as select, sysread, etc.).
&debug ("processing stdout from child");
&process_output (\*TEXOUT, "");
# Be sure everything is drained.
&debug ("starting waitpid() for $pid") ;
waitpid ($pid, 0) || die "$prog: waitpid($pid) failed: $!\n";
my $child_exit_status = $? >> 8;
&debug ("child exit status = $exit_status\n");
&debug ("processing stderr from child");
seek (TEXERR, 0, 0) || warn "seek(stderr) failed: $!";
&process_output (\*TEXERR, "[stderr] ");
close (TEXERR) || warn "close(stderr tmpfile) failed: $!";
return $child_exit_status;
}
# Read filehandle $FH; print lines that we want to stdout, prefixed by
# $PREFIX. If $PREFIX is null, omit lines by default; if $PREFIX is
# non-null, print lines by default.
#
sub process_output {
my ($fh,$prefix) = @_;
my $print_next = 0;
LINE: while (<$fh>) {
my $line = $_;
print FOTTMP $line; # tee everything
warn "\n" if $opt_debug; # get blank line without texfot: prefix
&debug ("looking at line: $_");
&debug ("checking if have print_next (is $print_next)\n");
if ($print_next) {
&debug (" printing next ($print_next)\n");
print $prefix;
print $line;
$print_next = 0;
next;
}
&debug ("checking ignores\n");
next if /^(
LaTeX\ Warning:\ You\ have\ requested\ package
|LaTeX\ Font\ Warning:\ Some\ font\ shapes
|LaTeX\ Font\ Warning:\ Size\ substitutions
|Package\ caption\ Warning:\ Unsupported\ document\ class
|Package\ fixltx2e\ Warning:\ fixltx2e\ is\ not\ required
|Package\ frenchb\.ldf\ Warning:\ (Figures|The\ definition)
|Reloading\ Xunicode\ for\ encoding # spurious ***
|This\ is.*(epsf\.tex|\.sty) # so what
|pdfTeX\ warning:.*inclusion:\ fou #nd PDF version ...
|pdfTeX\ warning:.*inclusion:\ mul #tiple pdfs with page group
|libpng\ warning:\ iCCP:\ Not\ recognizing
)/x;
# don't anchor user ignores, leave it up to them.
for my $user_ignore (@opt_ignore) {
&debug ("checking user ignore '$user_ignore'\n");
next LINE if /${user_ignore}/;
}
&debug ("checking for print_next\n");
if (/^(
.*?:[0-9]+: # usual file:lineno: form
|! # usual ! form
|.*pdfTeX\ warning # pdftex complaints often cross lines
|LaTeX\ Font\ Warning:\ Font\ shape
|>\ [^<] # from \show..., but not "> <img.whatever"
|removed\ on\ input\ line # hyperref
|Runaway\ argument
)/x) {
&debug (" found print_next ($1)\n");
print $prefix;
print $line;
$print_next = 1;
next;
}
&debug ("checking for showing\n");
if (/^(
This\ is
|Output\ written
|No\ pages\ of\ output
|(Und|Ov)erfull
|(LaTeX|Package|Class).*(Error|Warning)
|.*Citation.*undefined
|.*\ Error # as in \Url Error ->...
|Missing\ character: # good to show (need \tracinglostchars=1)
|\\endL.*problem # XeTeX?
|\*\*\*\s # *** from some packages or subprograms
|l\.[0-9]+\ # line number marking
|all\ text\ was\ ignored\ after\ line
|.*Fatal\ error
|.*for\ symbol.*on\ input\ line
)/x) {
&debug (" matched for showing ($1)\n");
print $prefix;
print $line;
next;
}
&debug ("done with all checks\n");
if ($prefix && $opt_stderr) {
&debug ("prefix (stderr), showing line by default: $_");
print $prefix;
print $line;
} else {
&debug ("no prefix (stdout) or no stderr, ignoring line by default: $_");
}
}
}
sub debug { warn ("$prg: ", @_) if $opt_debug; }
__END__
=head1 NAME
texfot - run TeX, filtering online transcript for interesting messages
=head1 SYNOPSIS
texfot [I<option>]... I<texcmd> [I<texarg>...]
=head1 DESCRIPTION
C<texfot> invokes I<texcmd> with the given I<texarg> arguments,
filtering the online output for ``interesting'' messages. Its exit
value is that of I<texcmd>. Examples:
# Sample basic invocation:
texfot pdflatex file.tex
# Ordinarily all output is copied to /tmp/fot before filtering;
# that can be omitted:
texfot pdflatex --tee=/dev/null file.tex
# Example of more complex engine invocation:
texfot lualatex --recorder '\nonstopmode\input file'
Aside from its own options, described below, C<texfot> just runs the
given command with the given arguments (same approach to command line
syntax as C<env>, C<nice>, C<time>, C<timeout>, etc.). Thus, C<texfot>
works with any engine and any command line options.
C<texfot> does not look at the log file or any other possible output
file(s); it only looks at the standard output and standard error from
the command. stdout is processed first, then stderr. Lines from stderr
have an identifying prefix. C<texfot> writes all accepted lines to its
stdout.
The messages shown are intended to be those which likely need action by
the author: error messages, overfull and underfull boxes, undefined
citations, missing characters from fonts, etc.
=head1 FLOW OF OPERATION
Here is the order in which lines of output are checked:
=over 4
=item 1.
If the ``next line'' needs to be printed (see below), print it.
=item 2.
Otherwise, if the line matches the built-in list of regexps to ignore,
or any user-supplied list of regexps to ignore (given with C<--ignore>,
see below), in that order, ignore it.
=item 3.
Otherwise, if the line matches the list of regexps for which the next
line (two lines in all) should be shown, show this line and set the
``next line'' flag for the next time around the loop. Examples are the
common C<!> and C<filename:lineno:> error messages, which are generally
followed by a line with specific detail about the error.
=item 4.
Otherwise, if the line matches the list of regexps to show, show it.
=item 5.
Otherwise, the default: if the line came from stdout, ignore it; if the
line came from stderr, print it (to stdout). (This distinction is made
because TeX engines write relatively few messages to stderr, and it's
not unlikely that any such should be considered.
It would be easy to add more options to allow for user additions to the
various regex lists, if that ever seems useful. Or email me (see end).
=back
Once a particular check matches, the program moves on to process the
next line.
Don't hesitate to peruse the source to the script, which is essentially
a straightforward loop matching against the different lists as above.
You can see the exact regexps being matched in the different categories
in the source.
Incidentally, although nothing in this basic operation is specific to
TeX engines, all the regular expressions included in the program are
specific to TeX. So in practice the program isn't useful except with
TeX engines, although it would be easy enough to adapt it (if there was
anything else as verbose as TeX to make that useful).
=head1 OPTIONS
The following are the options to C<texfot> itself (not the TeX engine
being invoked; consult the TeX documentation or the engine's C<--help>
output for that).
The first non-option terminates C<texfot>'s option parsing, and the
remainder of the command line is invoked as the TeX command, without
further parsing. For example, C<texfot --debug tex
--debug> will output debugging information from both C<texfot> and
C<tex>.
Options may start with either - or --, and may be unambiguously
abbreviated. It is best to use the full option name in scripts, though,
to avoid possible collisions with new options in the future.
=over 4
=item C<--debug>
=item C<--no-debug>
Output (or not) what is being done on standard error. Off by default.
=item C<--ignore> I<regexp>
Ignore lines in the TeX output matching (Perl) I<regexp>. Can be
repeated. Adds to the default set of ignore regexps rather than
replacing. These regexps are not automatically anchored (or otherwise
altered), simply used as-is.
=item C<--interactive>
=item C<--no-interactive>
By default, standard input to the TeX process is closed so that TeX's
interactive mode (waiting for input upon error, the C<*> prompt, etc.)
is never entered. Giving C<--interactive> allows interaction to happen.
=item C<--quiet>
=item C<--no-quiet>
By default, the TeX command being invoked is reported on standard output.
C<--quiet> omits that reporting.
=item C<--stderr>
=item C<--no-stderr>
The default is for C<texfot> to report everything written to stderr by
the TeX command (on stdout). C<--no-stderr> omits that reporting.
(Some programs, C<dvisvgm> is one, can be rather verbose on stderr.)
=item C<--tee> I<file>
By default, the output being filtered is C<tee>-ed, before filtering, to
C<$TMPDIR/fot> (C</tmp/fot> if C<TMPDIR> is not set), to make it easy to
check the full output when the filtering seems suspect. This option
allows specifying a different file. Use S<C<--tee /dev/null>> if you don't
want the original output at all.
=item C<--version>
Output version information and exit successfully.
=item C<--help>
Display this help and exit successfully.
=back
=head1 RATIONALE
I wrote this because, in my work as a TUGboat editor
(L<http://tug.org/TUGboat>, journal submissions always welcome!), I end
up running and rerunning many papers, many times each. It was too easy
to lose warnings I needed to see in the mass of unvarying and
uninteresting output from TeX, such as style files being read and fonts
being used. I wanted to see all and only those messages which needed
some action by me.
I found some other programs of a similar nature, the LaTeX package
C<silence>, and plenty of other (La)TeX wrappers, but it seemed none of
them did what I wanted. Either they read the log file (I wanted the
online output only), or they output more or less than I wanted, or they
required invoking TeX differently (I wanted to keep my build process
exactly the same, most critically the TeX invocation, which can get
complicated). Hence I wrote this.
Here are some keywords if you want to explore other options:
texloganalyser, pydflatex, logfilter, latexmk, rubber, arara, and
searching for C<log> at L<http://ctan.org/search>.
C<texfot> is written in Perl, and runs on Unix, and does not work on
Windows. (If by some chance anyone wants to use this program on
Windows, please make your own fork; I'm not interested in supporting
that os.)
The name comes from the C<trip.fot> and C<trap.fot> files that are part
of Knuth's trip and trap torture tests, which record the online output
from the programs. I am not sure what "fot" stands for in trip and
trap, but I can pretend that it stands for "filter online transcript" in
the present S<case :).>
=head1 AUTHORS AND COPYRIGHT
This script and its documentation were written by Karl Berry and both
are released to the public domain. Email C<[email protected]> with
bug reports. It has no home page beyond the package on CTAN:
L<http://www.ctan.org/pkg/texfot>.
=cut