diff --git a/MANIFEST b/MANIFEST index eef1f50dde64..543f63784f58 100644 --- a/MANIFEST +++ b/MANIFEST @@ -5207,6 +5207,7 @@ ext/XS-APItest/t/utf8_warn07.t Tests for code in utf8.c ext/XS-APItest/t/utf8_warn08.t Tests for code in utf8.c ext/XS-APItest/t/utf8_warn09.t Tests for code in utf8.c ext/XS-APItest/t/utf8_warn_base.pl Tests for code in utf8.c +ext/XS-APItest/t/valid_identifier.t XS::APItest: tests for valid_identifier_sv() ext/XS-APItest/t/weaken.t XS::APItest: tests for sv_rvweaken() and sv_get_backrefs() ext/XS-APItest/t/whichsig.t XS::APItest: tests for whichsig() and variants ext/XS-APItest/t/win32.t Test Win32 specific APIs diff --git a/embed.fnc b/embed.fnc index 5f48566133b9..a0cd6f3672db 100644 --- a/embed.fnc +++ b/embed.fnc @@ -3766,6 +3766,12 @@ EXdpx |bool |validate_proto |NN SV *name \ |NULLOK SV *proto \ |bool warn \ |bool curstash +Adp |bool |valid_identifier_pvn \ + |NN const char *s \ + |STRLEN len \ + |U32 flags +Adp |bool |valid_identifier_sv \ + |NULLOK const SV *sv CRTdip |UV |valid_utf8_to_uvchr \ |NN const U8 *s \ |NULLOK STRLEN *retlen diff --git a/embed.h b/embed.h index 86804dee2694..a4fba3060f81 100644 --- a/embed.h +++ b/embed.h @@ -865,6 +865,8 @@ # define uvchr_to_utf8_flags(a,b,c) Perl_uvchr_to_utf8_flags(aTHX,a,b,c) # define uvchr_to_utf8_flags_msgs(a,b,c,d) Perl_uvchr_to_utf8_flags_msgs(aTHX,a,b,c,d) # define uvoffuni_to_utf8_flags_msgs(a,b,c,d) Perl_uvoffuni_to_utf8_flags_msgs(aTHX_ a,b,c,d) +# define valid_identifier_pvn(a,b,c) Perl_valid_identifier_pvn(aTHX_ a,b,c) +# define valid_identifier_sv(a) Perl_valid_identifier_sv(aTHX_ a) # define valid_utf8_to_uvchr Perl_valid_utf8_to_uvchr # define vcmp(a,b) Perl_vcmp(aTHX_ a,b) # define vcroak(a,b) Perl_vcroak(aTHX_ a,b) diff --git a/ext/XS-APItest/APItest.pm b/ext/XS-APItest/APItest.pm index 05df69ef8c78..3b7201a0daf4 100644 --- a/ext/XS-APItest/APItest.pm +++ b/ext/XS-APItest/APItest.pm @@ -4,7 +4,7 @@ use strict; use warnings; use Carp; -our $VERSION = '1.39'; +our $VERSION = '1.40'; require XSLoader; diff --git a/ext/XS-APItest/APItest.xs b/ext/XS-APItest/APItest.xs index 6dcc02e8dbe7..4196bbb7004c 100644 --- a/ext/XS-APItest/APItest.xs +++ b/ext/XS-APItest/APItest.xs @@ -7408,6 +7408,12 @@ gimme() OUTPUT: RETVAL +bool +valid_identifier(SV *s) + CODE: + RETVAL = valid_identifier_sv(s); + OUTPUT: + RETVAL MODULE = XS::APItest PACKAGE = XS::APItest::Backrefs diff --git a/ext/XS-APItest/t/valid_identifier.t b/ext/XS-APItest/t/valid_identifier.t new file mode 100644 index 000000000000..1467046635b9 --- /dev/null +++ b/ext/XS-APItest/t/valid_identifier.t @@ -0,0 +1,33 @@ +#!perl + +use strict; +use warnings; + +use open ':std', ':encoding(UTF-8)'; +use Test::More; + +use_ok('XS::APItest'); + +# These should all be valid +foreach my $id (qw( abc ab_cd _abc x123 )) { + ok(valid_identifier($id), "'$id' is valid identifier"); +} + +# These should all not be +foreach my $id (qw( ab-cd 123 abc() ), "ab cd") { + ok(!valid_identifier($id), "'$id' is not valid identifier"); +} + +# Now for some UTF-8 tests +{ + use utf8; + + foreach my $id (qw( café sandviĉon )) { + ok(valid_identifier($id), "'$id' is valid UTF-8 identifier"); + } + + # en-dash + ok(!valid_identifier("ab–cd"), "'ab–cd' is not valid UTF-8 identifier"); +} + +done_testing; diff --git a/pod/perldelta.pod b/pod/perldelta.pod index 3ce40a3b49c0..8c243afa2da4 100644 --- a/pod/perldelta.pod +++ b/pod/perldelta.pod @@ -350,6 +350,12 @@ well. XXX +=item * + +New API functions C and C have +been added, which test if a string would be considered by Perl to be a valid +identifier name. + =back =head1 Selected Bug Fixes diff --git a/proto.h b/proto.h index 61327e70349d..4783f1799e3f 100644 --- a/proto.h +++ b/proto.h @@ -5390,6 +5390,15 @@ Perl_uvoffuni_to_utf8_flags_msgs(pTHX_ U8 *d, UV input_uv, const UV flags, HV ** #define PERL_ARGS_ASSERT_UVOFFUNI_TO_UTF8_FLAGS_MSGS \ assert(d) +PERL_CALLCONV bool +Perl_valid_identifier_pvn(pTHX_ const char *s, STRLEN len, U32 flags); +#define PERL_ARGS_ASSERT_VALID_IDENTIFIER_PVN \ + assert(s) + +PERL_CALLCONV bool +Perl_valid_identifier_sv(pTHX_ const SV *sv); +#define PERL_ARGS_ASSERT_VALID_IDENTIFIER_SV + PERL_CALLCONV bool Perl_validate_proto(pTHX_ SV *name, SV *proto, bool warn, bool curstash); #define PERL_ARGS_ASSERT_VALIDATE_PROTO \ diff --git a/toke.c b/toke.c index d8f77fb3a781..df4bce2d7713 100644 --- a/toke.c +++ b/toke.c @@ -13932,6 +13932,84 @@ Perl_parse_subsignature(pTHX_ U32 flags) return parse_recdescent_for_op(GRAMSUBSIGNATURE, LEX_FAKEEOF_NONEXPR); } +/* +=for apidoc valid_identifier_pvn + +Returns true if the string given by C whose length is C would be +considered valid as a Perl identifier. That is, it must begin with a +character matching C, followed by characters all matching +C. An empty string (i.e. when C is zero) will return false. + +If C contains the C bit, then the string is presumed to be +encoded in UTF-8, and suitable Unicode character test functions will be used. + +=cut +*/ + +bool +Perl_valid_identifier_pvn(pTHX_ const char *s, STRLEN len, U32 flags) +{ + PERL_ARGS_ASSERT_VALID_IDENTIFIER_PVN; + const char *end = s + len; + + if(!len) + return false; + + if(flags & SVf_UTF8) { + if(!isIDFIRST_utf8_safe((U8 *)s, (U8 *)end)) + return false; + + while(s < end) { + s += UTF8SKIP((U8 *)s); + if(s == end) + break; + if(s > end || !isIDCONT_utf8_safe((U8 *)s, (U8 *)end)) + return false; + } + return true; + } + else { + if(!isIDFIRST(s[0])) + return false; + + while(s < end) { + s += 1; + if(s == end) + break; + if(s > end || !isIDCONT(s[0])) + return false; + } + return true; + } + + return false; +} + +/* +=for apidoc valid_identifier_sv + +Returns true if the given SV contains a non-empty string whose characters +match accoding to C. Returns false if given NULL, an +undefined SV, or a SV that does not contain a non-empty string. + +Does not invoke C magic on the SV beforehand. + +=cut +*/ + +bool +Perl_valid_identifier_sv(pTHX_ const SV *sv) +{ + PERL_ARGS_ASSERT_VALID_IDENTIFIER_SV; + + /* If SvPOK is not set, there's no point stringifying a number because its + * string form will end up starting with a digit and won't match anyway */ + if(!sv || !SvOK(sv) || !SvPOK(sv)) + return false; + + return valid_identifier_pvn(SvPVX_const(sv), SvCUR(sv), SvUTF8(sv)); +} + /* * ex: set ts=8 sts=4 sw=4 et: */