From 791126677d430c7ae0a44cc44c8ee5e2a8935582 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Roland=20Lei=C3=9Fa?= Date: Fri, 18 Oct 2024 13:07:17 +0200 Subject: [PATCH] Cache in Lexer not needed anymore --- include/mim/ast/lexer.h | 7 +------ src/mim/ast/lexer.cpp | 38 ++++++-------------------------------- 2 files changed, 7 insertions(+), 38 deletions(-) diff --git a/include/mim/ast/lexer.h b/include/mim/ast/lexer.h index ffe59a326..efb7d5d3d 100644 --- a/include/mim/ast/lexer.h +++ b/include/mim/ast/lexer.h @@ -1,7 +1,5 @@ #pragma once -#include - #include #include @@ -23,7 +21,6 @@ class Lexer : public fe::Lexer<3, Lexer> { AST& ast() { return ast_; } const fs::path* path() const { return loc_.path; } - Loc loc() const { return loc_; } Tok lex(); private: @@ -41,9 +38,8 @@ class Lexer : public fe::Lexer<3, Lexer> { return res; } - Tok tok(Tok::Tag tag) { return {loc(), tag}; } + Tok tok(Tok::Tag tag) { return {loc_, tag}; } Sym sym(); - Loc cache_trailing_dot(); bool lex_id(); char8_t lex_char(); std::optional parse_lit(); @@ -60,7 +56,6 @@ class Lexer : public fe::Lexer<3, Lexer> { std::ostream* md_; bool out_ = true; fe::SymMap keywords_; - std::optional cache_ = std::nullopt; friend class fe::Lexer<3, Lexer>; }; diff --git a/src/mim/ast/lexer.cpp b/src/mim/ast/lexer.cpp index 3bf0734ce..186af4114 100644 --- a/src/mim/ast/lexer.cpp +++ b/src/mim/ast/lexer.cpp @@ -30,11 +30,6 @@ Lexer::Lexer(AST& ast, std::istream& istream, const fs::path* path /*= nullptr*/ Tok Lexer::lex() { while (true) { - if (auto cache = cache_) { - cache_.reset(); - return *cache; - } - start(); if (accept(utf8::EoF)) return tok(Tag::EoF); @@ -90,22 +85,16 @@ Tok Lexer::lex() { // clang-format on if (accept('%')) { - if (lex_id()) { - auto loc = cache_trailing_dot(); - return {loc, Tag::M_anx, sym()}; - } + if (lex_id()) return {loc_, Tag::M_anx, sym()}; ast().error(loc_, "invalid axiom name '{}'", str_); + continue; } if (accept('.')) { if (lex_id()) { if (auto i = keywords_.find(sym()); i != keywords_.end()) return tok(i->second); - // Split non-keyword into T_dot and M_id; M_id goes into cache_ for next lex(). - assert(!cache_.has_value()); - auto id_loc = loc(); - ++id_loc.begin.col; - cache_.emplace(id_loc, Tag::M_id, ast().sym(str_.substr(1))); - return {loc().anew_begin(), Tag::T_dot}; + ast().error(loc_, "invalid keyword '{}'", str_); + continue; } if (accept(utf8::isdigit)) { @@ -119,7 +108,7 @@ Tok Lexer::lex() { if (accept('\'')) { auto c = lex_char(); - if (accept('\'')) return {loc(), c}; + if (accept('\'')) return {loc_, c}; ast().error(loc_, "invalid character literal {}", str_); continue; } @@ -130,10 +119,7 @@ Tok Lexer::lex() { return {loc_, Tag::L_str, sym()}; } - if (lex_id()) { - auto loc = cache_trailing_dot(); - return {loc, Tag::M_id, sym()}; - } + if (lex_id()) return {loc_, Tag::M_id, sym()}; if (utf8::isdigit(ahead()) || utf8::any('+', '-')(ahead())) { if (auto lit = parse_lit()) return *lit; @@ -165,18 +151,6 @@ Tok Lexer::lex() { } } -// A trailing T_dot does not belong to an annex name or identifier and goes into cache_ for next lex(). -Loc Lexer::cache_trailing_dot() { - auto l = loc(); - if (str_.back() == '.') { - str_.pop_back(); - assert(!cache_.has_value()); - cache_.emplace(l.anew_finis(), Tag::T_dot); - --l.finis.col; - } - return l; -} - bool Lexer::lex_id() { if (accept([](char32_t c) { return c == '_' || utf8::isalpha(c); })) { while (accept([](char32_t c) { return c == '_' || c == '.' || utf8::isalnum(c); })) {}