Skip to content

Commit

Permalink
Auto merge of #133219 - matthiaskrgr:rollup-hnuq0zf, r=matthiaskrgr
Browse files Browse the repository at this point in the history
Rollup of 8 pull requests

Successful merges:

 - #123947 (Add vec_deque::Iter::as_slices and friends)
 - #125405 (Add std::thread::add_spawn_hook.)
 - #133175 (ci: use free runner in dist-i686-msvc)
 - #133183 (Mention std::fs::remove_dir_all in std::fs::remove_dir)
 - #133188 (Add `visit` methods to ast nodes that already have `walk`s on ast visitors)
 - #133201 (Remove `TokenKind::InvalidPrefix`)
 - #133207 (Default-enable `llvm_tools_enabled` when no `config.toml` is present)
 - #133213 (Correct the tier listing of `wasm32-wasip2`)

r? `@ghost`
`@rustbot` modify labels: rollup
  • Loading branch information
bors committed Nov 19, 2024
2 parents ee612c4 + 2467654 commit 875df37
Show file tree
Hide file tree
Showing 18 changed files with 429 additions and 90 deletions.
8 changes: 6 additions & 2 deletions compiler/rustc_ast/src/mut_visit.rs
Original file line number Diff line number Diff line change
Expand Up @@ -330,6 +330,10 @@ pub trait MutVisitor: Sized {
fn visit_capture_by(&mut self, capture_by: &mut CaptureBy) {
walk_capture_by(self, capture_by)
}

fn visit_fn_ret_ty(&mut self, fn_ret_ty: &mut FnRetTy) {
walk_fn_ret_ty(self, fn_ret_ty)
}
}

/// Use a map-style function (`FnOnce(T) -> T`) to overwrite a `&mut T`. Useful
Expand Down Expand Up @@ -609,7 +613,7 @@ fn walk_angle_bracketed_parameter_data<T: MutVisitor>(vis: &mut T, data: &mut An
fn walk_parenthesized_parameter_data<T: MutVisitor>(vis: &mut T, args: &mut ParenthesizedArgs) {
let ParenthesizedArgs { inputs, output, span, inputs_span } = args;
visit_thin_vec(inputs, |input| vis.visit_ty(input));
walk_fn_ret_ty(vis, output);
vis.visit_fn_ret_ty(output);
vis.visit_span(span);
vis.visit_span(inputs_span);
}
Expand Down Expand Up @@ -911,7 +915,7 @@ fn walk_fn<T: MutVisitor>(vis: &mut T, kind: FnKind<'_>) {
fn walk_fn_decl<T: MutVisitor>(vis: &mut T, decl: &mut P<FnDecl>) {
let FnDecl { inputs, output } = decl.deref_mut();
inputs.flat_map_in_place(|param| vis.flat_map_param(param));
walk_fn_ret_ty(vis, output);
vis.visit_fn_ret_ty(output);
}

fn walk_fn_ret_ty<T: MutVisitor>(vis: &mut T, fn_ret_ty: &mut FnRetTy) {
Expand Down
34 changes: 20 additions & 14 deletions compiler/rustc_ast/src/visit.rs
Original file line number Diff line number Diff line change
Expand Up @@ -299,6 +299,12 @@ pub trait Visitor<'ast>: Sized {
fn visit_coroutine_kind(&mut self, _coroutine_kind: &'ast CoroutineKind) -> Self::Result {
Self::Result::output()
}
fn visit_fn_decl(&mut self, fn_decl: &'ast FnDecl) -> Self::Result {
walk_fn_decl(self, fn_decl)
}
fn visit_qself(&mut self, qs: &'ast Option<P<QSelf>>) -> Self::Result {
walk_qself(self, qs)
}
}

pub fn walk_crate<'a, V: Visitor<'a>>(visitor: &mut V, krate: &'a Crate) -> V::Result {
Expand Down Expand Up @@ -434,13 +440,13 @@ impl WalkItemKind for ItemKind {
body,
from_glob: _,
}) => {
try_visit!(walk_qself(visitor, qself));
try_visit!(visitor.visit_qself(qself));
try_visit!(visitor.visit_path(path, *id));
visit_opt!(visitor, visit_ident, rename);
visit_opt!(visitor, visit_block, body);
}
ItemKind::DelegationMac(box DelegationMac { qself, prefix, suffixes, body }) => {
try_visit!(walk_qself(visitor, qself));
try_visit!(visitor.visit_qself(qself));
try_visit!(visitor.visit_path(prefix, id));
if let Some(suffixes) = suffixes {
for (ident, rename) in suffixes {
Expand Down Expand Up @@ -518,10 +524,10 @@ pub fn walk_ty<'a, V: Visitor<'a>>(visitor: &mut V, typ: &'a Ty) -> V::Result {
let BareFnTy { safety: _, ext: _, generic_params, decl, decl_span: _ } =
&**function_declaration;
walk_list!(visitor, visit_generic_param, generic_params);
try_visit!(walk_fn_decl(visitor, decl));
try_visit!(visitor.visit_fn_decl(decl));
}
TyKind::Path(maybe_qself, path) => {
try_visit!(walk_qself(visitor, maybe_qself));
try_visit!(visitor.visit_qself(maybe_qself));
try_visit!(visitor.visit_path(path, *id));
}
TyKind::Pat(ty, pat) => {
Expand Down Expand Up @@ -652,16 +658,16 @@ pub fn walk_pat<'a, V: Visitor<'a>>(visitor: &mut V, pattern: &'a Pat) -> V::Res
let Pat { id, kind, span: _, tokens: _ } = pattern;
match kind {
PatKind::TupleStruct(opt_qself, path, elems) => {
try_visit!(walk_qself(visitor, opt_qself));
try_visit!(visitor.visit_qself(opt_qself));
try_visit!(visitor.visit_path(path, *id));
walk_list!(visitor, visit_pat, elems);
}
PatKind::Path(opt_qself, path) => {
try_visit!(walk_qself(visitor, opt_qself));
try_visit!(visitor.visit_qself(opt_qself));
try_visit!(visitor.visit_path(path, *id))
}
PatKind::Struct(opt_qself, path, fields, _rest) => {
try_visit!(walk_qself(visitor, opt_qself));
try_visit!(visitor.visit_qself(opt_qself));
try_visit!(visitor.visit_path(path, *id));
walk_list!(visitor, visit_pat_field, fields);
}
Expand Down Expand Up @@ -846,13 +852,13 @@ pub fn walk_fn<'a, V: Visitor<'a>>(visitor: &mut V, kind: FnKind<'a>) -> V::Resu
// Identifier and visibility are visited as a part of the item.
try_visit!(visitor.visit_fn_header(header));
try_visit!(visitor.visit_generics(generics));
try_visit!(walk_fn_decl(visitor, decl));
try_visit!(visitor.visit_fn_decl(decl));
visit_opt!(visitor, visit_block, body);
}
FnKind::Closure(binder, coroutine_kind, decl, body) => {
try_visit!(visitor.visit_closure_binder(binder));
visit_opt!(visitor, visit_coroutine_kind, coroutine_kind.as_ref());
try_visit!(walk_fn_decl(visitor, decl));
try_visit!(visitor.visit_fn_decl(decl));
try_visit!(visitor.visit_expr(body));
}
}
Expand Down Expand Up @@ -902,13 +908,13 @@ impl WalkItemKind for AssocItemKind {
body,
from_glob: _,
}) => {
try_visit!(walk_qself(visitor, qself));
try_visit!(visitor.visit_qself(qself));
try_visit!(visitor.visit_path(path, *id));
visit_opt!(visitor, visit_ident, rename);
visit_opt!(visitor, visit_block, body);
}
AssocItemKind::DelegationMac(box DelegationMac { qself, prefix, suffixes, body }) => {
try_visit!(walk_qself(visitor, qself));
try_visit!(visitor.visit_qself(qself));
try_visit!(visitor.visit_path(prefix, id));
if let Some(suffixes) = suffixes {
for (ident, rename) in suffixes {
Expand Down Expand Up @@ -1023,7 +1029,7 @@ pub fn walk_inline_asm_sym<'a, V: Visitor<'a>>(
visitor: &mut V,
InlineAsmSym { id, qself, path }: &'a InlineAsmSym,
) -> V::Result {
try_visit!(walk_qself(visitor, qself));
try_visit!(visitor.visit_qself(qself));
visitor.visit_path(path, *id)
}

Expand Down Expand Up @@ -1055,7 +1061,7 @@ pub fn walk_expr<'a, V: Visitor<'a>>(visitor: &mut V, expression: &'a Expr) -> V
}
ExprKind::Struct(se) => {
let StructExpr { qself, path, fields, rest } = &**se;
try_visit!(walk_qself(visitor, qself));
try_visit!(visitor.visit_qself(qself));
try_visit!(visitor.visit_path(path, *id));
walk_list!(visitor, visit_expr_field, fields);
match rest {
Expand Down Expand Up @@ -1164,7 +1170,7 @@ pub fn walk_expr<'a, V: Visitor<'a>>(visitor: &mut V, expression: &'a Expr) -> V
}
ExprKind::Underscore => {}
ExprKind::Path(maybe_qself, path) => {
try_visit!(walk_qself(visitor, maybe_qself));
try_visit!(visitor.visit_qself(maybe_qself));
try_visit!(visitor.visit_path(path, *id));
}
ExprKind::Break(opt_label, opt_expr) => {
Expand Down
5 changes: 5 additions & 0 deletions compiler/rustc_codegen_cranelift/scripts/setup_rust_fork.sh
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,11 @@ local-rebuild = true
codegen-backends = ["cranelift"]
deny-warnings = false
verbose-tests = false
# The cg_clif sysroot doesn't contain llvm tools and unless llvm_tools is
# disabled bootstrap will crash trying to copy llvm tools for the bootstrap
# compiler.
llvm_tools = false
EOF
popd

Expand Down
106 changes: 49 additions & 57 deletions compiler/rustc_lexer/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -57,11 +57,10 @@ impl Token {
/// Enum representing common lexeme types.
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum TokenKind {
// Multi-char tokens:
/// "// comment"
/// A line comment, e.g. `// comment`.
LineComment { doc_style: Option<DocStyle> },

/// `/* block comment */`
/// A block comment, e.g. `/* block comment */`.
///
/// Block comments can be recursive, so a sequence like `/* /* */`
/// will not be considered terminated and will result in a parsing error.
Expand All @@ -70,18 +69,17 @@ pub enum TokenKind {
/// Any whitespace character sequence.
Whitespace,

/// "ident" or "continue"
///
/// At this step, keywords are also considered identifiers.
/// An identifier or keyword, e.g. `ident` or `continue`.
Ident,

/// Like the above, but containing invalid unicode codepoints.
/// An identifier that is invalid because it contains emoji.
InvalidIdent,

/// "r#ident"
/// A raw identifier, e.g. "r#ident".
RawIdent,

/// An unknown prefix, like `foo#`, `foo'`, `foo"`.
/// An unknown literal prefix, like `foo#`, `foo'`, `foo"`. Excludes
/// literal prefixes that contain emoji, which are considered "invalid".
///
/// Note that only the
/// prefix (`foo`) is included in the token, not the separator (which is
Expand All @@ -93,87 +91,83 @@ pub enum TokenKind {

/// An unknown prefix in a lifetime, like `'foo#`.
///
/// Note that like above, only the `'` and prefix are included in the token
/// Like `UnknownPrefix`, only the `'` and prefix are included in the token
/// and not the separator.
UnknownPrefixLifetime,

/// `'r#lt`, which in edition < 2021 is split into several tokens: `'r # lt`.
/// A raw lifetime, e.g. `'r#foo`. In edition < 2021 it will be split into
/// several tokens: `'r` and `#` and `foo`.
RawLifetime,

/// Similar to the above, but *always* an error on every edition. This is used
/// for emoji identifier recovery, as those are not meant to be ever accepted.
InvalidPrefix,

/// Guarded string literal prefix: `#"` or `##`.
///
/// Used for reserving "guarded strings" (RFC 3598) in edition 2024.
/// Split into the component tokens on older editions.
GuardedStrPrefix,

/// Examples: `12u8`, `1.0e-40`, `b"123"`. Note that `_` is an invalid
/// Literals, e.g. `12u8`, `1.0e-40`, `b"123"`. Note that `_` is an invalid
/// suffix, but may be present here on string and float literals. Users of
/// this type will need to check for and reject that case.
///
/// See [LiteralKind] for more details.
Literal { kind: LiteralKind, suffix_start: u32 },

/// "'a"
/// A lifetime, e.g. `'a`.
Lifetime { starts_with_number: bool },

// One-char tokens:
/// ";"
/// `;`
Semi,
/// ","
/// `,`
Comma,
/// "."
/// `.`
Dot,
/// "("
/// `(`
OpenParen,
/// ")"
/// `)`
CloseParen,
/// "{"
/// `{`
OpenBrace,
/// "}"
/// `}`
CloseBrace,
/// "["
/// `[`
OpenBracket,
/// "]"
/// `]`
CloseBracket,
/// "@"
/// `@`
At,
/// "#"
/// `#`
Pound,
/// "~"
/// `~`
Tilde,
/// "?"
/// `?`
Question,
/// ":"
/// `:`
Colon,
/// "$"
/// `$`
Dollar,
/// "="
/// `=`
Eq,
/// "!"
/// `!`
Bang,
/// "<"
/// `<`
Lt,
/// ">"
/// `>`
Gt,
/// "-"
/// `-`
Minus,
/// "&"
/// `&`
And,
/// "|"
/// `|`
Or,
/// "+"
/// `+`
Plus,
/// "*"
/// `*`
Star,
/// "/"
/// `/`
Slash,
/// "^"
/// `^`
Caret,
/// "%"
/// `%`
Percent,

/// Unknown token, not expected by the lexer, e.g. "№"
Expand Down Expand Up @@ -468,7 +462,7 @@ impl Cursor<'_> {
Literal { kind, suffix_start }
}
// Identifier starting with an emoji. Only lexed for graceful error recovery.
c if !c.is_ascii() && c.is_emoji_char() => self.fake_ident_or_unknown_prefix(),
c if !c.is_ascii() && c.is_emoji_char() => self.invalid_ident(),
_ => Unknown,
};
let res = Token::new(token_kind, self.pos_within_token());
Expand Down Expand Up @@ -552,24 +546,22 @@ impl Cursor<'_> {
// we see a prefix here, it is definitely an unknown prefix.
match self.first() {
'#' | '"' | '\'' => UnknownPrefix,
c if !c.is_ascii() && c.is_emoji_char() => self.fake_ident_or_unknown_prefix(),
c if !c.is_ascii() && c.is_emoji_char() => self.invalid_ident(),
_ => Ident,
}
}

fn fake_ident_or_unknown_prefix(&mut self) -> TokenKind {
fn invalid_ident(&mut self) -> TokenKind {
// Start is already eaten, eat the rest of identifier.
self.eat_while(|c| {
unicode_xid::UnicodeXID::is_xid_continue(c)
|| (!c.is_ascii() && c.is_emoji_char())
|| c == '\u{200d}'
const ZERO_WIDTH_JOINER: char = '\u{200d}';
is_id_continue(c) || (!c.is_ascii() && c.is_emoji_char()) || c == ZERO_WIDTH_JOINER
});
// Known prefixes must have been handled earlier. So if
// we see a prefix here, it is definitely an unknown prefix.
match self.first() {
'#' | '"' | '\'' => InvalidPrefix,
_ => InvalidIdent,
}
// An invalid identifier followed by '#' or '"' or '\'' could be
// interpreted as an invalid literal prefix. We don't bother doing that
// because the treatment of invalid identifiers and invalid prefixes
// would be the same.
InvalidIdent
}

fn c_or_byte_string(
Expand Down
5 changes: 2 additions & 3 deletions compiler/rustc_parse/src/lexer/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -213,7 +213,7 @@ impl<'psess, 'src> StringReader<'psess, 'src> {
let ident = Symbol::intern(lifetime_name);
token::Lifetime(ident, IdentIsRaw::No)
}
rustc_lexer::TokenKind::InvalidIdent | rustc_lexer::TokenKind::InvalidPrefix
rustc_lexer::TokenKind::InvalidIdent
// Do not recover an identifier with emoji if the codepoint is a confusable
// with a recoverable substitution token, like `➖`.
if !UNICODE_ARRAY.iter().any(|&(c, _, _)| {
Expand Down Expand Up @@ -359,8 +359,7 @@ impl<'psess, 'src> StringReader<'psess, 'src> {
rustc_lexer::TokenKind::Percent => token::BinOp(token::Percent),

rustc_lexer::TokenKind::Unknown
| rustc_lexer::TokenKind::InvalidIdent
| rustc_lexer::TokenKind::InvalidPrefix => {
| rustc_lexer::TokenKind::InvalidIdent => {
// Don't emit diagnostics for sequences of the same invalid token
if swallow_next_invalid > 0 {
swallow_next_invalid -= 1;
Expand Down
Loading

0 comments on commit 875df37

Please sign in to comment.