From a2c36418d892810c6a89c0c354ff28d0f6687671 Mon Sep 17 00:00:00 2001 From: Lukas Mai Date: Sat, 23 Nov 2024 16:44:31 +0100 Subject: [PATCH] remove PERL_STRICT_CR Background: ----------- Before perl 5.004, the perl parser would skip over CR (carriage return) between tokens in source files, treating it as whitespace, but would retain CRs in quoted constructs such as multi-line strings and here-documents. In 5.004, the behavior was changed to make CR in source files a fatal error ("Illegal character %s (carriage return)") to avoid surprises with unexpected literal CRs in string constants when scripts were copied from DOS/Windows without newline conversion. In 5.005, the behavior changed again. Now CR was back to being ignored, but harder: Even in quoted constructs, CR was ignored when immediately followed by a newline. However, the 5.004 behavior could be restored by compiling perl with the `PERL_STRICT_CR` macro defined (e.g. with `./Configure -A ccflags=-DPERL_STRICT_CR ...`). This option was undocumented except for a brief note in perl5005delta. (Also, the "Illegal character ..." error was changed to a warning, but perldiag wasn't updated and so still listed the message as "fatal" (F).) And that's how things have been ever since 1998. Foreground: ----------- This patch removes all checks for PERL_STRICT_CR entirely, treating it as always off. Rationale: It simplifies the code and reduces clutter. (Plus I don't see the need to perpetually maintain an undocumented configuration option that enables compatibility with an ancient perl version used sometime around 1997-1998.) References: ----------- - 4fdae80067 ("Make \r in script an error (per Larry)") - ff0cee690d ("Fix carriage-return message") - 54310121b4 ("Improve diagnostic on \r in program text") - 2db4f57cd9 - f63a84b229 - 637e912262 - b8957cf14d - 6a27c1886b --- perl.c | 4 ---- pod/perldelta.pod | 14 ++++++++++++++ pod/perldiag.pod | 8 -------- t/porting/diag.t | 3 --- toke.c | 31 ++----------------------------- 5 files changed, 16 insertions(+), 44 deletions(-) diff --git a/perl.c b/perl.c index c10e10b495e5..6637506c6200 100644 --- a/perl.c +++ b/perl.c @@ -2231,9 +2231,7 @@ S_parse_body(pTHX_ char **env, XSINIT_t xsinit) reswitch: switch ((c = *s)) { case 'C': -#ifndef PERL_STRICT_CR case '\r': -#endif case ' ': case '0': case 'F': @@ -3955,9 +3953,7 @@ Perl_moreswitches(pTHX_ const char *s) break; case '-': case 0: -#if defined(WIN32) || !defined(PERL_STRICT_CR) case '\r': -#endif case '\n': case '\t': break; diff --git a/pod/perldelta.pod b/pod/perldelta.pod index 3ce40a3b49c0..3dda323dc0c0 100644 --- a/pod/perldelta.pod +++ b/pod/perldelta.pod @@ -256,6 +256,20 @@ L section, instead. XXX +=item * + +The (mostly undocumented) configuration macro C has been +removed. When enabled (e.g. with C<./Configure -A ccflags=-DPERL_STRICT_CR>), +it would make the perl parser throw a fatal error when it encountered a CR +(carriage return) character in source files. The default (and now only) +behavior of the perl parser is to strip CRs paired with newline characters and +otherwise treat them as whitespace. + +(C was originally introduced in perl 5.005 to optionally +restore backward compatibility with perl 5.004, which had made CR in source +files an error. Before that, CR was accepted, but retained literally in quoted +multi-line constructs such as here-documents, even at the end of a line.) + =back =head1 Testing diff --git a/pod/perldiag.pod b/pod/perldiag.pod index 8585d808a7d8..9ca369b0e85e 100644 --- a/pod/perldiag.pod +++ b/pod/perldiag.pod @@ -2889,14 +2889,6 @@ declaration. The '_' in a prototype must be followed by a ';', indicating the rest of the parameters are optional, or one of '@' or '%', since those two will accept 0 or more final parameters. -=item Illegal character \%o (carriage return) - -(F) Perl normally treats carriage returns in the program text as -it would any other whitespace, which means you should never see -this error when Perl was built using standard options. For some -reason, your version of Perl appears to have been built without -this support. Talk to your Perl administrator. - =item Illegal character following sigil in a subroutine signature (F) A parameter in a subroutine signature contained an unexpected character diff --git a/t/porting/diag.t b/t/porting/diag.t index bfbb853894fa..07560fa6a858 100644 --- a/t/porting/diag.t +++ b/t/porting/diag.t @@ -740,9 +740,6 @@ Wrong syntax (suid) fd script name "%s" __CATEGORIES__ -# This is a warning, but is currently followed immediately by a croak (toke.c) -Illegal character \%o (carriage return) - # Because uses WARN_MISSING as a synonym for WARN_UNINITIALIZED (sv.c) Missing argument in %s diff --git a/toke.c b/toke.c index 5bdfbe338861..b5475d1342cd 100644 --- a/toke.c +++ b/toke.c @@ -9244,13 +9244,7 @@ yyl_try(pTHX_ char *s) return tok; goto retry_bufptr; - case '\r': -#ifdef PERL_STRICT_CR - Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r'); - Perl_croak(aTHX_ - "\t(Maybe you didn't strip carriage returns after a network transfer?)\n"); -#endif - case ' ': case '\t': case '\f': case '\v': + case ' ': case '\t': case '\f': case '\r': case '\v': s++; goto retry; @@ -9394,11 +9388,7 @@ yyl_try(pTHX_ char *s) } if (PL_expect == XBLOCK) { const char *t = s; - while (SPACE_OR_TAB(*t) -#ifndef PERL_STRICT_CR - || *t == '\r' -#endif - ) + while (SPACE_OR_TAB(*t) || *t == '\r') t++; if (*t == '\n' || *t == '#') { ENTER_with_name("lex_format"); @@ -9460,11 +9450,7 @@ yyl_try(pTHX_ char *s) case '.': if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack -#ifdef PERL_STRICT_CR - && s[1] == '\n' -#else && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n')) -#endif && (s == PL_linestart || s[-1] == '\n') ) { PL_expect = XSTATE; @@ -11016,7 +11002,6 @@ S_scan_heredoc(pTHX_ char *s) *d = '\0'; len = d - PL_tokenbuf; -#ifndef PERL_STRICT_CR d = (char *) memchr(s, '\r', PL_bufend - s); if (d) { char * const olds = s; @@ -11039,7 +11024,6 @@ S_scan_heredoc(pTHX_ char *s) SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr)); s = olds; } -#endif tmpstr = newSV_type(SVt_PVIV); if (term == '\'') { @@ -11245,7 +11229,6 @@ S_scan_heredoc(pTHX_ char *s) PL_parser->herelines++; PL_last_lop = PL_last_uni = NULL; -#ifndef PERL_STRICT_CR if (PL_bufend - PL_linestart >= 2) { if ( (PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n') || (PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r')) @@ -11259,7 +11242,6 @@ S_scan_heredoc(pTHX_ char *s) } else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r') PL_bufend[-1] = '\n'; -#endif if (indented && (PL_bufend-s) >= len) { char * found = ninstr(s, PL_bufend, (PL_tokenbuf + 1), (PL_tokenbuf +1 + len)); @@ -11852,7 +11834,6 @@ Perl_scan_str(pTHX_ char *start, int keep_bracketed_quoted, int keep_delims, int if (s < PL_bufend) break; /* handle case where we are done yet :-) */ -#ifndef PERL_STRICT_CR if (to - SvPVX_const(sv) >= 2) { if ( (to[-2] == '\r' && to[-1] == '\n') || (to[-2] == '\n' && to[-1] == '\r')) @@ -11866,7 +11847,6 @@ Perl_scan_str(pTHX_ char *start, int keep_bracketed_quoted, int keep_delims, int } else if (to - SvPVX_const(sv) == 1 && to[-1] == '\r') to[-1] = '\n'; -#endif /* if we're out of file, or a read fails, bail and reset the current line marker so we can report where the unterminated string began @@ -12611,13 +12591,8 @@ S_scan_formline(pTHX_ char *s) char *eol; if (*s == '.') { char *t = s+1; -#ifdef PERL_STRICT_CR - while (SPACE_OR_TAB(*t)) - t++; -#else while (SPACE_OR_TAB(*t) || *t == '\r') t++; -#endif if (*t == '\n' || t == PL_bufend) { eofmt = TRUE; break; @@ -12642,14 +12617,12 @@ S_scan_formline(pTHX_ char *s) } if (eol > s) { sv_catpvn(stuff, s, eol-s); -#ifndef PERL_STRICT_CR if (eol-s > 1 && eol[-2] == '\r' && eol[-1] == '\n') { char *end = SvPVX(stuff) + SvCUR(stuff); end[-2] = '\n'; end[-1] = '\0'; SvCUR_set(stuff, SvCUR(stuff) - 1); } -#endif } else break;