From 049897a7aa42eb2ce3f3037e49711fea1cd92274 Mon Sep 17 00:00:00 2001 From: "Paul \"LeoNerd\" Evans" Date: Wed, 20 Nov 2024 15:23:53 +0000 Subject: [PATCH] Add valid_identifier_{pve,pvn,sv} API functions These functions test whether a given string would be considered by the Perl parser to be a valid identifier. Three variants are provided: one taking a string start/end pair, one a string start/length pair, and one looking at the string contained in an SV. --- MANIFEST | 1 + embed.fnc | 10 +++ embed.h | 3 + ext/XS-APItest/APItest.pm | 2 +- ext/XS-APItest/APItest.xs | 6 ++ ext/XS-APItest/t/valid_identifier.t | 43 +++++++++++++ pod/perldelta.pod | 7 ++ proto.h | 14 ++++ toke.c | 99 +++++++++++++++++++++++++++++ 9 files changed, 184 insertions(+), 1 deletion(-) create mode 100644 ext/XS-APItest/t/valid_identifier.t diff --git a/MANIFEST b/MANIFEST index eef1f50dde64..543f63784f58 100644 --- a/MANIFEST +++ b/MANIFEST @@ -5207,6 +5207,7 @@ ext/XS-APItest/t/utf8_warn07.t Tests for code in utf8.c ext/XS-APItest/t/utf8_warn08.t Tests for code in utf8.c ext/XS-APItest/t/utf8_warn09.t Tests for code in utf8.c ext/XS-APItest/t/utf8_warn_base.pl Tests for code in utf8.c +ext/XS-APItest/t/valid_identifier.t XS::APItest: tests for valid_identifier_sv() ext/XS-APItest/t/weaken.t XS::APItest: tests for sv_rvweaken() and sv_get_backrefs() ext/XS-APItest/t/whichsig.t XS::APItest: tests for whichsig() and variants ext/XS-APItest/t/win32.t Test Win32 specific APIs diff --git a/embed.fnc b/embed.fnc index 5f48566133b9..5b8bacf884eb 100644 --- a/embed.fnc +++ b/embed.fnc @@ -3766,6 +3766,16 @@ EXdpx |bool |validate_proto |NN SV *name \ |NULLOK SV *proto \ |bool warn \ |bool curstash +Adp |bool |valid_identifier_pve \ + |NN const char *s \ + |NN const char *end \ + |U32 flags +Adp |bool |valid_identifier_pvn \ + |NN const char *s \ + |STRLEN len \ + |U32 flags +Adp |bool |valid_identifier_sv \ + |NULLOK SV *sv CRTdip |UV |valid_utf8_to_uvchr \ |NN const U8 *s \ |NULLOK STRLEN *retlen diff --git a/embed.h b/embed.h index 86804dee2694..8b8b09b511a8 100644 --- a/embed.h +++ b/embed.h @@ -865,6 +865,9 @@ # define uvchr_to_utf8_flags(a,b,c) Perl_uvchr_to_utf8_flags(aTHX,a,b,c) # define uvchr_to_utf8_flags_msgs(a,b,c,d) Perl_uvchr_to_utf8_flags_msgs(aTHX,a,b,c,d) # define uvoffuni_to_utf8_flags_msgs(a,b,c,d) Perl_uvoffuni_to_utf8_flags_msgs(aTHX_ a,b,c,d) +# define valid_identifier_pve(a,b,c) Perl_valid_identifier_pve(aTHX_ a,b,c) +# define valid_identifier_pvn(a,b,c) Perl_valid_identifier_pvn(aTHX_ a,b,c) +# define valid_identifier_sv(a) Perl_valid_identifier_sv(aTHX_ a) # define valid_utf8_to_uvchr Perl_valid_utf8_to_uvchr # define vcmp(a,b) Perl_vcmp(aTHX_ a,b) # define vcroak(a,b) Perl_vcroak(aTHX_ a,b) diff --git a/ext/XS-APItest/APItest.pm b/ext/XS-APItest/APItest.pm index 05df69ef8c78..3b7201a0daf4 100644 --- a/ext/XS-APItest/APItest.pm +++ b/ext/XS-APItest/APItest.pm @@ -4,7 +4,7 @@ use strict; use warnings; use Carp; -our $VERSION = '1.39'; +our $VERSION = '1.40'; require XSLoader; diff --git a/ext/XS-APItest/APItest.xs b/ext/XS-APItest/APItest.xs index 6dcc02e8dbe7..4196bbb7004c 100644 --- a/ext/XS-APItest/APItest.xs +++ b/ext/XS-APItest/APItest.xs @@ -7408,6 +7408,12 @@ gimme() OUTPUT: RETVAL +bool +valid_identifier(SV *s) + CODE: + RETVAL = valid_identifier_sv(s); + OUTPUT: + RETVAL MODULE = XS::APItest PACKAGE = XS::APItest::Backrefs diff --git a/ext/XS-APItest/t/valid_identifier.t b/ext/XS-APItest/t/valid_identifier.t new file mode 100644 index 000000000000..8d188106fda0 --- /dev/null +++ b/ext/XS-APItest/t/valid_identifier.t @@ -0,0 +1,43 @@ +#!perl + +use strict; +use warnings; + +use open ':std', ':encoding(UTF-8)'; +use Test::More; + +use_ok('XS::APItest'); + +# These should all be valid +foreach my $id (qw( abc ab_cd _abc x123 )) { + ok(valid_identifier($id), "'$id' is valid identifier"); +} + +# These should all not be +foreach my $id (qw( ab-cd 123 abc() ), "ab cd") { + ok(!valid_identifier($id), "'$id' is not valid identifier"); +} + +# Now for some UTF-8 tests +{ + use utf8; + + foreach my $id (qw( café sandviĉon )) { + ok(valid_identifier($id), "'$id' is valid UTF-8 identifier"); + } + + # en-dash + ok(!valid_identifier("ab–cd"), "'ab–cd' is not valid UTF-8 identifier"); +} + +# objects with "" overloading still work +{ + package WithStringify { + use overload '""' => sub { return "an_identifier"; }; + sub new { bless [], shift; } + } + + ok(valid_identifier(WithStringify->new), 'Object with stringify overload can be valid identifier'); +} + +done_testing; diff --git a/pod/perldelta.pod b/pod/perldelta.pod index 3ce40a3b49c0..19b06c88ec7c 100644 --- a/pod/perldelta.pod +++ b/pod/perldelta.pod @@ -350,6 +350,13 @@ well. XXX +=item * + +New API functions L|perlapi/valid_identifier_pve>, +L|perlapi/valid_identifier_pvn> and +L|perlapi/valid_identifier_sv> have been added, which +test if a string would be considered by Perl to be a valid identifier name. + =back =head1 Selected Bug Fixes diff --git a/proto.h b/proto.h index 61327e70349d..74ea81d1a066 100644 --- a/proto.h +++ b/proto.h @@ -5390,6 +5390,20 @@ Perl_uvoffuni_to_utf8_flags_msgs(pTHX_ U8 *d, UV input_uv, const UV flags, HV ** #define PERL_ARGS_ASSERT_UVOFFUNI_TO_UTF8_FLAGS_MSGS \ assert(d) +PERL_CALLCONV bool +Perl_valid_identifier_pve(pTHX_ const char *s, const char *end, U32 flags); +#define PERL_ARGS_ASSERT_VALID_IDENTIFIER_PVE \ + assert(s); assert(end) + +PERL_CALLCONV bool +Perl_valid_identifier_pvn(pTHX_ const char *s, STRLEN len, U32 flags); +#define PERL_ARGS_ASSERT_VALID_IDENTIFIER_PVN \ + assert(s) + +PERL_CALLCONV bool +Perl_valid_identifier_sv(pTHX_ SV *sv); +#define PERL_ARGS_ASSERT_VALID_IDENTIFIER_SV + PERL_CALLCONV bool Perl_validate_proto(pTHX_ SV *name, SV *proto, bool warn, bool curstash); #define PERL_ARGS_ASSERT_VALIDATE_PROTO \ diff --git a/toke.c b/toke.c index 5bdfbe338861..714c954dd573 100644 --- a/toke.c +++ b/toke.c @@ -13932,6 +13932,105 @@ Perl_parse_subsignature(pTHX_ U32 flags) return parse_recdescent_for_op(GRAMSUBSIGNATURE, LEX_FAKEEOF_NONEXPR); } +/* +=for apidoc valid_identifier_pve + +Returns true if the string given by C until C would be considered +valid as a Perl identifier. That is, it must begin with a character matching +C, followed by characters all matching C. An empty +string (i.e. when C is C) will return false. + +If C contains the C bit, then the string is presumed to be +encoded in UTF-8, and suitable Unicode character test functions will be used. + +=cut +*/ + +bool +Perl_valid_identifier_pve(pTHX_ const char *s, const char *end, U32 flags) +{ + PERL_ARGS_ASSERT_VALID_IDENTIFIER_PVE; + + if(end <= s) + return false; + + if(flags & SVf_UTF8) { + if(!isIDFIRST_utf8_safe((U8 *)s, (U8 *)end)) + return false; + + while(s < end) { + s += UTF8SKIP((U8 *)s); + if(s == end) + break; + if(!isIDCONT_utf8_safe((U8 *)s, (U8 *)end)) + return false; + } + return true; + } + else { + if(!isIDFIRST(s[0])) + return false; + + while(s < end) { + s += 1; + if(s == end) + break; + if(!isIDCONT(s[0])) + return false; + } + return true; + } + + return false; +} + +/* +=for apidoc valid_identifier_pvn + +Returns true if the string given by C whose length is C would be +considered valid as a Perl identifier. That is, it must begin with a +character matching C, followed by characters all matching +C. An empty string (i.e. when C is zero) will return false. + +If C contains the C bit, then the string is presumed to be +encoded in UTF-8, and suitable Unicode character test functions will be used. + +=cut +*/ + +bool +Perl_valid_identifier_pvn(pTHX_ const char *s, STRLEN len, U32 flags) +{ + PERL_ARGS_ASSERT_VALID_IDENTIFIER_PVN; + + return valid_identifier_pve(s, s + len, flags); +} + +/* +=for apidoc valid_identifier_sv + +Returns true if the given SV contains a non-empty string whose characters +match accoding to C. Returns false if given NULL, an +undefined SV, or a SV that does not contain a non-empty string. + +Does not invoke C magic on the SV beforehand. + +=cut +*/ + +bool +Perl_valid_identifier_sv(pTHX_ SV *sv) +{ + PERL_ARGS_ASSERT_VALID_IDENTIFIER_SV; + + if(!sv || !SvOK(sv)) + return false; + + STRLEN len; + const char *pv = SvPV_const(sv, len); + return valid_identifier_pve(pv, pv + len, SvUTF8(sv)); +} + /* * ex: set ts=8 sts=4 sw=4 et: */