Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add valid_identifier_{pvn,sv} API functions #22769

Merged
merged 1 commit into from
Nov 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions MANIFEST
Original file line number Diff line number Diff line change
Expand Up @@ -5207,6 +5207,7 @@ ext/XS-APItest/t/utf8_warn07.t Tests for code in utf8.c
ext/XS-APItest/t/utf8_warn08.t Tests for code in utf8.c
ext/XS-APItest/t/utf8_warn09.t Tests for code in utf8.c
ext/XS-APItest/t/utf8_warn_base.pl Tests for code in utf8.c
ext/XS-APItest/t/valid_identifier.t XS::APItest: tests for valid_identifier_sv()
ext/XS-APItest/t/weaken.t XS::APItest: tests for sv_rvweaken() and sv_get_backrefs()
ext/XS-APItest/t/whichsig.t XS::APItest: tests for whichsig() and variants
ext/XS-APItest/t/win32.t Test Win32 specific APIs
Expand Down
10 changes: 10 additions & 0 deletions embed.fnc
Original file line number Diff line number Diff line change
Expand Up @@ -3766,6 +3766,16 @@ EXdpx |bool |validate_proto |NN SV *name \
|NULLOK SV *proto \
|bool warn \
|bool curstash
Adp |bool |valid_identifier_pve \
|NN const char *s \
|NN const char *end \
|U32 flags
Adp |bool |valid_identifier_pvn \
|NN const char *s \
|STRLEN len \
|U32 flags
Adp |bool |valid_identifier_sv \
|NULLOK SV *sv
CRTdip |UV |valid_utf8_to_uvchr \
|NN const U8 *s \
|NULLOK STRLEN *retlen
Expand Down
3 changes: 3 additions & 0 deletions embed.h
Original file line number Diff line number Diff line change
Expand Up @@ -865,6 +865,9 @@
# define uvchr_to_utf8_flags(a,b,c) Perl_uvchr_to_utf8_flags(aTHX,a,b,c)
# define uvchr_to_utf8_flags_msgs(a,b,c,d) Perl_uvchr_to_utf8_flags_msgs(aTHX,a,b,c,d)
# define uvoffuni_to_utf8_flags_msgs(a,b,c,d) Perl_uvoffuni_to_utf8_flags_msgs(aTHX_ a,b,c,d)
# define valid_identifier_pve(a,b,c) Perl_valid_identifier_pve(aTHX_ a,b,c)
# define valid_identifier_pvn(a,b,c) Perl_valid_identifier_pvn(aTHX_ a,b,c)
# define valid_identifier_sv(a) Perl_valid_identifier_sv(aTHX_ a)
# define valid_utf8_to_uvchr Perl_valid_utf8_to_uvchr
# define vcmp(a,b) Perl_vcmp(aTHX_ a,b)
# define vcroak(a,b) Perl_vcroak(aTHX_ a,b)
Expand Down
2 changes: 1 addition & 1 deletion ext/XS-APItest/APItest.pm
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ use strict;
use warnings;
use Carp;

our $VERSION = '1.39';
our $VERSION = '1.40';

require XSLoader;

Expand Down
6 changes: 6 additions & 0 deletions ext/XS-APItest/APItest.xs
Original file line number Diff line number Diff line change
Expand Up @@ -7408,6 +7408,12 @@ gimme()
OUTPUT:
RETVAL

bool
valid_identifier(SV *s)
CODE:
RETVAL = valid_identifier_sv(s);
OUTPUT:
RETVAL

MODULE = XS::APItest PACKAGE = XS::APItest::Backrefs

Expand Down
43 changes: 43 additions & 0 deletions ext/XS-APItest/t/valid_identifier.t
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
#!perl

use strict;
use warnings;

use open ':std', ':encoding(UTF-8)';
use Test::More;

use_ok('XS::APItest');

# These should all be valid
foreach my $id (qw( abc ab_cd _abc x123 )) {
ok(valid_identifier($id), "'$id' is valid identifier");
}

# These should all not be
foreach my $id (qw( ab-cd 123 abc() ), "ab cd") {
ok(!valid_identifier($id), "'$id' is not valid identifier");
}

# Now for some UTF-8 tests
{
use utf8;

foreach my $id (qw( café sandviĉon )) {
ok(valid_identifier($id), "'$id' is valid UTF-8 identifier");
}

# en-dash
ok(!valid_identifier("ab–cd"), "'ab–cd' is not valid UTF-8 identifier");
}

# objects with "" overloading still work
{
package WithStringify {
use overload '""' => sub { return "an_identifier"; };
sub new { bless [], shift; }
}

ok(valid_identifier(WithStringify->new), 'Object with stringify overload can be valid identifier');
}

done_testing;
7 changes: 7 additions & 0 deletions pod/perldelta.pod
Original file line number Diff line number Diff line change
Expand Up @@ -350,6 +350,13 @@ well.

XXX

=item *

New API functions L<C<valid_identifier_pve()>|perlapi/valid_identifier_pve>,
L<C<valid_identifier_pvn()>|perlapi/valid_identifier_pvn> and
L<C<valid_identifier_sv()>|perlapi/valid_identifier_sv> have been added, which
test if a string would be considered by Perl to be a valid identifier name.

=back

=head1 Selected Bug Fixes
Expand Down
14 changes: 14 additions & 0 deletions proto.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

99 changes: 99 additions & 0 deletions toke.c
Original file line number Diff line number Diff line change
Expand Up @@ -13932,6 +13932,105 @@ Perl_parse_subsignature(pTHX_ U32 flags)
return parse_recdescent_for_op(GRAMSUBSIGNATURE, LEX_FAKEEOF_NONEXPR);
}

/*
=for apidoc valid_identifier_pve

Returns true if the string given by C<s> until C<end> would be considered
valid as a Perl identifier. That is, it must begin with a character matching
C<isIDFIRST>, followed by characters all matching C<isIDCONT>. An empty
string (i.e. when C<end> is C<s>) will return false.

If C<flags> contains the C<SVf_UTF8> bit, then the string is presumed to be
encoded in UTF-8, and suitable Unicode character test functions will be used.

=cut
*/

bool
Perl_valid_identifier_pve(pTHX_ const char *s, const char *end, U32 flags)
{
PERL_ARGS_ASSERT_VALID_IDENTIFIER_PVE;

if(end <= s)
return false;

if(flags & SVf_UTF8) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is a bit tricky, in the context of the parser it's fine - we only allow non-ASCII code points in identifiers, but if this is a general use function it has the Unicode bug:

$ ./perl -Ilib -Mutf8 -MXS::APItest -E 'my $x = "café"; say valid_identifier($x) || 0; utf8::downgrade($x); say valid_identifier($x) || 0'
1
0

This could be "fixed" (I hope) by using isIDFIRST_L1()/isIDCONT_L1(), but that would allow identifiers the parser wouldn't otherwise accept when used during parsing.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah I tried to make it clear from the docs that this is all about what the parser would accept.

if(!isIDFIRST_utf8_safe((U8 *)s, (U8 *)end))
return false;

while(s < end) {
s += UTF8SKIP((U8 *)s);
if(s == end)
break;
if(!isIDCONT_utf8_safe((U8 *)s, (U8 *)end))
return false;
}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Couldn't this loop be more simply written as

while (s < end) {
    s += UTF8SKIP((U8 *) s);
    if (s >= end || ! isIDCONT_utf8_safe((U8 *) s, end)) {
        return false;
    }
}

And the second loop below similarly simplified?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not quite; it still needs the if(s == end) break condition, but it seems fine if I add that.

Updated and force-pushed.

return true;
}
else {
if(!isIDFIRST(s[0]))
return false;

while(s < end) {
s += 1;
if(s == end)
break;
if(!isIDCONT(s[0]))
return false;
}
return true;
}

return false;
}

/*
=for apidoc valid_identifier_pvn

Returns true if the string given by C<s> whose length is C<len> would be
considered valid as a Perl identifier. That is, it must begin with a
character matching C<isIDFIRST>, followed by characters all matching
C<isIDCONT>. An empty string (i.e. when C<len> is zero) will return false.

If C<flags> contains the C<SVf_UTF8> bit, then the string is presumed to be
encoded in UTF-8, and suitable Unicode character test functions will be used.

=cut
*/

bool
Perl_valid_identifier_pvn(pTHX_ const char *s, STRLEN len, U32 flags)
{
PERL_ARGS_ASSERT_VALID_IDENTIFIER_PVN;

return valid_identifier_pve(s, s + len, flags);
}

/*
=for apidoc valid_identifier_sv

Returns true if the given SV contains a non-empty string whose characters
match accoding to C<valid_identifier_pvn>. Returns false if given NULL, an
undefined SV, or a SV that does not contain a non-empty string.

Does not invoke C<get> magic on the SV beforehand.

=cut
*/

bool
Perl_valid_identifier_sv(pTHX_ SV *sv)
{
PERL_ARGS_ASSERT_VALID_IDENTIFIER_SV;

if(!sv || !SvOK(sv))
return false;

STRLEN len;
const char *pv = SvPV_const(sv, len);
return valid_identifier_pve(pv, pv + len, SvUTF8(sv));
}

/*
* ex: set ts=8 sts=4 sw=4 et:
*/
Loading