-
Notifications
You must be signed in to change notification settings - Fork 559
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add valid_identifier_{pvn,sv} API functions #22769
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -4,7 +4,7 @@ use strict; | |
use warnings; | ||
use Carp; | ||
|
||
our $VERSION = '1.39'; | ||
our $VERSION = '1.40'; | ||
|
||
require XSLoader; | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
#!perl | ||
|
||
use strict; | ||
use warnings; | ||
|
||
use open ':std', ':encoding(UTF-8)'; | ||
use Test::More; | ||
|
||
use_ok('XS::APItest'); | ||
|
||
# These should all be valid | ||
foreach my $id (qw( abc ab_cd _abc x123 )) { | ||
ok(valid_identifier($id), "'$id' is valid identifier"); | ||
} | ||
|
||
# These should all not be | ||
foreach my $id (qw( ab-cd 123 abc() ), "ab cd") { | ||
ok(!valid_identifier($id), "'$id' is not valid identifier"); | ||
} | ||
|
||
# Now for some UTF-8 tests | ||
{ | ||
use utf8; | ||
|
||
foreach my $id (qw( café sandviĉon )) { | ||
ok(valid_identifier($id), "'$id' is valid UTF-8 identifier"); | ||
} | ||
|
||
# en-dash | ||
ok(!valid_identifier("ab–cd"), "'ab–cd' is not valid UTF-8 identifier"); | ||
} | ||
|
||
# objects with "" overloading still work | ||
{ | ||
package WithStringify { | ||
use overload '""' => sub { return "an_identifier"; }; | ||
sub new { bless [], shift; } | ||
} | ||
|
||
ok(valid_identifier(WithStringify->new), 'Object with stringify overload can be valid identifier'); | ||
} | ||
|
||
done_testing; |
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -13932,6 +13932,105 @@ Perl_parse_subsignature(pTHX_ U32 flags) | |
return parse_recdescent_for_op(GRAMSUBSIGNATURE, LEX_FAKEEOF_NONEXPR); | ||
} | ||
|
||
/* | ||
=for apidoc valid_identifier_pve | ||
|
||
Returns true if the string given by C<s> until C<end> would be considered | ||
valid as a Perl identifier. That is, it must begin with a character matching | ||
C<isIDFIRST>, followed by characters all matching C<isIDCONT>. An empty | ||
string (i.e. when C<end> is C<s>) will return false. | ||
|
||
If C<flags> contains the C<SVf_UTF8> bit, then the string is presumed to be | ||
encoded in UTF-8, and suitable Unicode character test functions will be used. | ||
|
||
=cut | ||
*/ | ||
|
||
bool | ||
Perl_valid_identifier_pve(pTHX_ const char *s, const char *end, U32 flags) | ||
{ | ||
PERL_ARGS_ASSERT_VALID_IDENTIFIER_PVE; | ||
|
||
if(end <= s) | ||
return false; | ||
|
||
if(flags & SVf_UTF8) { | ||
if(!isIDFIRST_utf8_safe((U8 *)s, (U8 *)end)) | ||
return false; | ||
|
||
while(s < end) { | ||
s += UTF8SKIP((U8 *)s); | ||
if(s == end) | ||
break; | ||
if(!isIDCONT_utf8_safe((U8 *)s, (U8 *)end)) | ||
return false; | ||
} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Couldn't this loop be more simply written as
And the second loop below similarly simplified? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Not quite; it still needs the Updated and force-pushed. |
||
return true; | ||
} | ||
else { | ||
if(!isIDFIRST(s[0])) | ||
return false; | ||
|
||
while(s < end) { | ||
s += 1; | ||
if(s == end) | ||
break; | ||
if(!isIDCONT(s[0])) | ||
return false; | ||
} | ||
return true; | ||
} | ||
|
||
return false; | ||
} | ||
|
||
/* | ||
=for apidoc valid_identifier_pvn | ||
|
||
Returns true if the string given by C<s> whose length is C<len> would be | ||
considered valid as a Perl identifier. That is, it must begin with a | ||
character matching C<isIDFIRST>, followed by characters all matching | ||
C<isIDCONT>. An empty string (i.e. when C<len> is zero) will return false. | ||
|
||
If C<flags> contains the C<SVf_UTF8> bit, then the string is presumed to be | ||
encoded in UTF-8, and suitable Unicode character test functions will be used. | ||
|
||
=cut | ||
*/ | ||
|
||
bool | ||
Perl_valid_identifier_pvn(pTHX_ const char *s, STRLEN len, U32 flags) | ||
{ | ||
PERL_ARGS_ASSERT_VALID_IDENTIFIER_PVN; | ||
|
||
return valid_identifier_pve(s, s + len, flags); | ||
} | ||
|
||
/* | ||
=for apidoc valid_identifier_sv | ||
|
||
Returns true if the given SV contains a non-empty string whose characters | ||
match accoding to C<valid_identifier_pvn>. Returns false if given NULL, an | ||
undefined SV, or a SV that does not contain a non-empty string. | ||
|
||
Does not invoke C<get> magic on the SV beforehand. | ||
|
||
=cut | ||
*/ | ||
|
||
bool | ||
Perl_valid_identifier_sv(pTHX_ SV *sv) | ||
{ | ||
PERL_ARGS_ASSERT_VALID_IDENTIFIER_SV; | ||
|
||
if(!sv || !SvOK(sv)) | ||
return false; | ||
|
||
STRLEN len; | ||
const char *pv = SvPV_const(sv, len); | ||
return valid_identifier_pve(pv, pv + len, SvUTF8(sv)); | ||
} | ||
|
||
/* | ||
* ex: set ts=8 sts=4 sw=4 et: | ||
*/ |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is a bit tricky, in the context of the parser it's fine - we only allow non-ASCII code points in identifiers, but if this is a general use function it has the Unicode bug:
This could be "fixed" (I hope) by using isIDFIRST_L1()/isIDCONT_L1(), but that would allow identifiers the parser wouldn't otherwise accept when used during parsing.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yeah I tried to make it clear from the docs that this is all about what the parser would accept.