Skip to content

Commit

Permalink
Cache in Lexer not needed anymore
Browse files Browse the repository at this point in the history
  • Loading branch information
leissa committed Oct 18, 2024
1 parent 10ae927 commit 7911266
Show file tree
Hide file tree
Showing 2 changed files with 7 additions and 38 deletions.
7 changes: 1 addition & 6 deletions include/mim/ast/lexer.h
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
#pragma once

#include <optional>

#include <absl/container/flat_hash_map.h>
#include <fe/lexer.h>

Expand All @@ -23,7 +21,6 @@ class Lexer : public fe::Lexer<3, Lexer> {

AST& ast() { return ast_; }
const fs::path* path() const { return loc_.path; }
Loc loc() const { return loc_; }
Tok lex();

private:
Expand All @@ -41,9 +38,8 @@ class Lexer : public fe::Lexer<3, Lexer> {
return res;
}

Tok tok(Tok::Tag tag) { return {loc(), tag}; }
Tok tok(Tok::Tag tag) { return {loc_, tag}; }
Sym sym();
Loc cache_trailing_dot();
bool lex_id();
char8_t lex_char();
std::optional<Tok> parse_lit();
Expand All @@ -60,7 +56,6 @@ class Lexer : public fe::Lexer<3, Lexer> {
std::ostream* md_;
bool out_ = true;
fe::SymMap<Tok::Tag> keywords_;
std::optional<Tok> cache_ = std::nullopt;

friend class fe::Lexer<3, Lexer>;
};
Expand Down
38 changes: 6 additions & 32 deletions src/mim/ast/lexer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,11 +30,6 @@ Lexer::Lexer(AST& ast, std::istream& istream, const fs::path* path /*= nullptr*/

Tok Lexer::lex() {
while (true) {
if (auto cache = cache_) {
cache_.reset();
return *cache;
}

start();

if (accept(utf8::EoF)) return tok(Tag::EoF);
Expand Down Expand Up @@ -90,22 +85,16 @@ Tok Lexer::lex() {
// clang-format on

if (accept('%')) {
if (lex_id()) {
auto loc = cache_trailing_dot();
return {loc, Tag::M_anx, sym()};
}
if (lex_id()) return {loc_, Tag::M_anx, sym()};
ast().error(loc_, "invalid axiom name '{}'", str_);
continue;
}

if (accept('.')) {
if (lex_id()) {
if (auto i = keywords_.find(sym()); i != keywords_.end()) return tok(i->second);
// Split non-keyword into T_dot and M_id; M_id goes into cache_ for next lex().
assert(!cache_.has_value());
auto id_loc = loc();
++id_loc.begin.col;
cache_.emplace(id_loc, Tag::M_id, ast().sym(str_.substr(1)));
return {loc().anew_begin(), Tag::T_dot};
ast().error(loc_, "invalid keyword '{}'", str_);
continue;
}

if (accept(utf8::isdigit)) {
Expand All @@ -119,7 +108,7 @@ Tok Lexer::lex() {

if (accept('\'')) {
auto c = lex_char();
if (accept('\'')) return {loc(), c};
if (accept('\'')) return {loc_, c};
ast().error(loc_, "invalid character literal {}", str_);
continue;
}
Expand All @@ -130,10 +119,7 @@ Tok Lexer::lex() {
return {loc_, Tag::L_str, sym()};
}

if (lex_id()) {
auto loc = cache_trailing_dot();
return {loc, Tag::M_id, sym()};
}
if (lex_id()) return {loc_, Tag::M_id, sym()};

if (utf8::isdigit(ahead()) || utf8::any('+', '-')(ahead())) {
if (auto lit = parse_lit()) return *lit;
Expand Down Expand Up @@ -165,18 +151,6 @@ Tok Lexer::lex() {
}
}

// A trailing T_dot does not belong to an annex name or identifier and goes into cache_ for next lex().
Loc Lexer::cache_trailing_dot() {
auto l = loc();
if (str_.back() == '.') {
str_.pop_back();
assert(!cache_.has_value());
cache_.emplace(l.anew_finis(), Tag::T_dot);
--l.finis.col;
}
return l;
}

bool Lexer::lex_id() {
if (accept([](char32_t c) { return c == '_' || utf8::isalpha(c); })) {
while (accept([](char32_t c) { return c == '_' || c == '.' || utf8::isalnum(c); })) {}
Expand Down

0 comments on commit 7911266

Please sign in to comment.