Skip to content

Commit

Permalink
Add valid_identifier_{pvn,sv} API functions
Browse files Browse the repository at this point in the history
  • Loading branch information
leonerd committed Nov 20, 2024
1 parent ee92e19 commit 37987d7
Show file tree
Hide file tree
Showing 9 changed files with 142 additions and 1 deletion.
1 change: 1 addition & 0 deletions MANIFEST
Original file line number Diff line number Diff line change
Expand Up @@ -5207,6 +5207,7 @@ ext/XS-APItest/t/utf8_warn07.t Tests for code in utf8.c
ext/XS-APItest/t/utf8_warn08.t Tests for code in utf8.c
ext/XS-APItest/t/utf8_warn09.t Tests for code in utf8.c
ext/XS-APItest/t/utf8_warn_base.pl Tests for code in utf8.c
ext/XS-APItest/t/valid_identifier.t XS::APItest: tests for valid_identifier_sv()
ext/XS-APItest/t/weaken.t XS::APItest: tests for sv_rvweaken() and sv_get_backrefs()
ext/XS-APItest/t/whichsig.t XS::APItest: tests for whichsig() and variants
ext/XS-APItest/t/win32.t Test Win32 specific APIs
Expand Down
6 changes: 6 additions & 0 deletions embed.fnc
Original file line number Diff line number Diff line change
Expand Up @@ -3766,6 +3766,12 @@ EXdpx |bool |validate_proto |NN SV *name \
|NULLOK SV *proto \
|bool warn \
|bool curstash
Adp |bool |valid_identifier_pvn \
|NN const char *s \
|STRLEN len \
|U32 flags
Adp |bool |valid_identifier_sv \
|NULLOK const SV *sv
CRTdip |UV |valid_utf8_to_uvchr \
|NN const U8 *s \
|NULLOK STRLEN *retlen
Expand Down
2 changes: 2 additions & 0 deletions embed.h
Original file line number Diff line number Diff line change
Expand Up @@ -865,6 +865,8 @@
# define uvchr_to_utf8_flags(a,b,c) Perl_uvchr_to_utf8_flags(aTHX,a,b,c)
# define uvchr_to_utf8_flags_msgs(a,b,c,d) Perl_uvchr_to_utf8_flags_msgs(aTHX,a,b,c,d)
# define uvoffuni_to_utf8_flags_msgs(a,b,c,d) Perl_uvoffuni_to_utf8_flags_msgs(aTHX_ a,b,c,d)
# define valid_identifier_pvn(a,b,c) Perl_valid_identifier_pvn(aTHX_ a,b,c)
# define valid_identifier_sv(a) Perl_valid_identifier_sv(aTHX_ a)
# define valid_utf8_to_uvchr Perl_valid_utf8_to_uvchr
# define vcmp(a,b) Perl_vcmp(aTHX_ a,b)
# define vcroak(a,b) Perl_vcroak(aTHX_ a,b)
Expand Down
2 changes: 1 addition & 1 deletion ext/XS-APItest/APItest.pm
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ use strict;
use warnings;
use Carp;

our $VERSION = '1.39';
our $VERSION = '1.40';

require XSLoader;

Expand Down
6 changes: 6 additions & 0 deletions ext/XS-APItest/APItest.xs
Original file line number Diff line number Diff line change
Expand Up @@ -7408,6 +7408,12 @@ gimme()
OUTPUT:
RETVAL

bool
valid_identifier(SV *s)
CODE:
RETVAL = valid_identifier_sv(s);
OUTPUT:
RETVAL

MODULE = XS::APItest PACKAGE = XS::APItest::Backrefs

Expand Down
33 changes: 33 additions & 0 deletions ext/XS-APItest/t/valid_identifier.t
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
#!perl

use strict;
use warnings;

use open ':std', ':encoding(UTF-8)';
use Test::More;

use_ok('XS::APItest');

# These should all be valid
foreach my $id (qw( abc ab_cd _abc x123 )) {
ok(valid_identifier($id), "'$id' is valid identifier");
}

# These should all not be
foreach my $id (qw( ab-cd 123 abc() ), "ab cd") {
ok(!valid_identifier($id), "'$id' is not valid identifier");
}

# Now for some UTF-8 tests
{
use utf8;

foreach my $id (qw( café sandviĉon )) {
ok(valid_identifier($id), "'$id' is valid UTF-8 identifier");
}

# en-dash
ok(!valid_identifier("ab–cd"), "'ab–cd' is not valid UTF-8 identifier");
}

done_testing;
6 changes: 6 additions & 0 deletions pod/perldelta.pod
Original file line number Diff line number Diff line change
Expand Up @@ -350,6 +350,12 @@ well.

XXX

=item *

New API functions C<valid_identifier_pvn()> and C<valid_identifier_sv()> have
been added, which test if a string would be considered by Perl to be a valid
identifier name.

=back

=head1 Selected Bug Fixes
Expand Down
9 changes: 9 additions & 0 deletions proto.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

78 changes: 78 additions & 0 deletions toke.c
Original file line number Diff line number Diff line change
Expand Up @@ -13932,6 +13932,84 @@ Perl_parse_subsignature(pTHX_ U32 flags)
return parse_recdescent_for_op(GRAMSUBSIGNATURE, LEX_FAKEEOF_NONEXPR);
}

/*
=for apidoc valid_identifier_pvn
Returns true if the string given by C<s> whose length is C<len> would be
considered valid as a Perl identifier. That is, it must begin with a
character matching C<isIDFIRST>, followed by characters all matching
C<isIDCONT>. An empty string (i.e. when C<len> is zero) will return false.
If C<flags> contains the C<SVf_UTF8> bit, then the string is presumed to be
encoded in UTF-8, and suitable Unicode character test functions will be used.
=cut
*/

bool
Perl_valid_identifier_pvn(pTHX_ const char *s, STRLEN len, U32 flags)
{
PERL_ARGS_ASSERT_VALID_IDENTIFIER_PVN;
const char *end = s + len;

if(!len)
return false;

if(flags & SVf_UTF8) {
if(!isIDFIRST_utf8_safe((U8 *)s, (U8 *)end))
return false;

while(s < end) {
s += UTF8SKIP((U8 *)s);
if(s == end)
break;
if(s > end || !isIDCONT_utf8_safe((U8 *)s, (U8 *)end))
return false;
}
return true;
}
else {
if(!isIDFIRST(s[0]))
return false;

while(s < end) {
s += 1;
if(s == end)
break;
if(s > end || !isIDCONT(s[0]))
return false;
}
return true;
}

return false;
}

/*
=for apidoc valid_identifier_sv
Returns true if the given SV contains a non-empty string whose characters
match accoding to C<valid_identifier_pvn>. Returns false if given NULL, an
undefined SV, or a SV that does not contain a non-empty string.
Does not invoke C<get> magic on the SV beforehand.
=cut
*/

bool
Perl_valid_identifier_sv(pTHX_ const SV *sv)
{
PERL_ARGS_ASSERT_VALID_IDENTIFIER_SV;

/* If SvPOK is not set, there's no point stringifying a number because its
* string form will end up starting with a digit and won't match anyway */
if(!sv || !SvOK(sv) || !SvPOK(sv))
return false;

return valid_identifier_pvn(SvPVX_const(sv), SvCUR(sv), SvUTF8(sv));
}

/*
* ex: set ts=8 sts=4 sw=4 et:
*/

0 comments on commit 37987d7

Please sign in to comment.