Skip to content

Commit

Permalink
Add Regex abstraction
Browse files Browse the repository at this point in the history
This abstraction allows the user to easily handle regexes.
  • Loading branch information
Deruago committed Mar 2, 2022
1 parent bcbf6a0 commit 7f6af7f
Show file tree
Hide file tree
Showing 5 changed files with 242 additions and 2 deletions.
56 changes: 56 additions & 0 deletions dregx/include/Deamer/Dregx/Regex.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
#ifndef DEAMER_DREGX_REGEX_H
#define DEAMER_DREGX_REGEX_H

#include "dregx/Statemachine/Statemachine.h"
#include "dregx/Statemachine/TransitionTable.h"
#include <string>

namespace deamer::dregx
{
class Regex
{
private:
std::string regex;
std::unique_ptr<::dregx::statemachine::Statemachine> statemachine;
::dregx::statemachine::TransitionTable transitionTable;

public:
Regex(const std::string& regex_);
~Regex() = default;

public:
bool Match(const std::string& text) const;

public:
std::string GetRegex() const;

public:
void Or(const Regex& rhs);
void Concatenate(const Regex& rhs);
bool Equal(const Regex& rhs) const;

public:
Regex& operator|=(const Regex& rhs);
Regex& operator+=(const Regex& rhs);
Regex& operator|(const Regex& rhs);
Regex& operator+(const Regex& rhs);
bool operator==(const Regex& rhs);

public:
Regex& operator|=(const std::string& rhs);
Regex& operator+=(const std::string& rhs);
Regex& operator|(const std::string& rhs);
Regex& operator+(const std::string& rhs);
bool operator==(const std::string& rhs);

private:
void SetRegex(const std::string& regex_);
std::unique_ptr<::dregx::statemachine::Statemachine> CreateDFA(const std::string& regex_);
};

bool operator==(const Regex& lhs, const Regex& rhs);
bool operator==(const Regex& lhs, const std::string& rhs);
bool operator==(const std::string& lhs, const Regex& rhs);
}

#endif // DEAMER_DREGX_REGEX_H
2 changes: 1 addition & 1 deletion dregx/include/dregx/Statemachine/TransitionTable.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ namespace dregx::statemachine
// std::size_t::max is used for rejecting states
std::vector<std::map<std::string, std::size_t>> table;
std::vector<bool> acceptingState;
bool Match(const std::string& text);
bool Match(const std::string& text) const;
void Print();
};
}
Expand Down
147 changes: 147 additions & 0 deletions dregx/lib/Deamer/Regex.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,147 @@
#include "Deamer/Dregx/Regex.h"
#include "dregx/Ast/Listener/User/TranslateToIr.h"
#include "dregx/Bison/Parser.h"
#include "dregx/Statemachine/ConvertRegexToDFA.h"
#include <limits>

deamer::dregx::Regex::Regex(const std::string& regex_)
{
SetRegex(regex_);
}

bool deamer::dregx::Regex::Match(const std::string& text) const
{
return transitionTable.Match(text);
}

std::string deamer::dregx::Regex::GetRegex() const
{
return regex;
}

void deamer::dregx::Regex::Or(const Regex& rhs)
{
if (this == &rhs)
{
return;
}

const std::string copy = "(" + this->regex + "|" + rhs.regex + ")";
this->regex = copy;
SetRegex(this->regex);
}

void deamer::dregx::Regex::Concatenate(const Regex& rhs)
{
if (this == &rhs)
{
this->regex += this->regex;
}
else
{
this->regex += rhs.regex;
}

SetRegex(this->regex);
}

bool deamer::dregx::Regex::Equal(const Regex& rhs) const
{
return statemachine->Equal(*rhs.statemachine);
}

deamer::dregx::Regex& deamer::dregx::Regex::operator|=(const Regex& rhs)
{
Or(rhs);
return *this;
}

deamer::dregx::Regex& deamer::dregx::Regex::operator+=(const Regex& rhs)
{
Concatenate(rhs);
return *this;
}

deamer::dregx::Regex& deamer::dregx::Regex::operator|(const Regex& rhs)
{
Or(rhs);
return *this;
}

deamer::dregx::Regex& deamer::dregx::Regex::operator+(const Regex& rhs)
{
Concatenate(rhs);
return *this;
}

bool deamer::dregx::Regex::operator==(const Regex& rhs)
{
return Equal(rhs);
}

deamer::dregx::Regex& deamer::dregx::Regex::operator|=(const std::string& rhs)
{
return (*this) |= Regex(rhs);
}

deamer::dregx::Regex& deamer::dregx::Regex::operator+=(const std::string& rhs)
{
return (*this) += Regex(rhs);
}

deamer::dregx::Regex& deamer::dregx::Regex::operator|(const std::string& rhs)
{
return (*this) |= Regex(rhs);
}

deamer::dregx::Regex& deamer::dregx::Regex::operator+(const std::string& rhs)
{
return (*this) += Regex(rhs);
}

bool deamer::dregx::Regex::operator==(const std::string& rhs)
{
return (*this) == Regex(rhs);
}

void deamer::dregx::Regex::SetRegex(const std::string& regex_)
{
regex = regex_;
auto newStatemachine = CreateDFA(regex);
newStatemachine->Minimize();

statemachine = std::move(newStatemachine);
transitionTable = statemachine->ToTransitionTable();
}

std::unique_ptr<::dregx::statemachine::Statemachine>
deamer::dregx::Regex::CreateDFA(const std::string& regex_)
{
const auto parser = ::dregx::parser::Parser();
const auto tree = std::unique_ptr<::deamer::external::cpp::ast::Tree>(parser.Parse(regex_));
if (tree == nullptr || tree->GetStartNode() == nullptr)
{
throw std::logic_error("Regex: " + regex_ + " is invalid");
}
auto listener = ::dregx::ast::listener::user::TranslateToIr();
listener.Dispatch(tree->GetStartNode());

auto ir = listener.GetOutput();

return std::move(::dregx::statemachine::ConvertRegexToDFA::ConvertToStatemachine(ir.get()));
}

bool deamer::dregx::operator==(const Regex& lhs, const Regex& rhs)
{
return lhs.Equal(rhs);
}

bool deamer::dregx::operator==(const Regex& lhs, const std::string& rhs)
{
return lhs.Equal(Regex(rhs));
}

bool deamer::dregx::operator==(const std::string& lhs, const Regex& rhs)
{
return Regex(lhs).Equal(rhs);
}
2 changes: 1 addition & 1 deletion dregx/lib/Statemachine/TransitionTable.cpp
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#include "dregx/Statemachine/TransitionTable.h"
#include <limits>

bool dregx::statemachine::TransitionTable::Match(const std::string& text)
bool dregx::statemachine::TransitionTable::Match(const std::string& text) const
{
std::size_t nextState = 0;
for (auto character : text)
Expand Down
37 changes: 37 additions & 0 deletions dregx/tests/dregx/Regex/TestRegex.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
#include "Deamer/Dregx/Regex.h"
#include <gmock/gmock.h>
#include <gtest/gtest.h>
#include <memory>
#include <optional>

using namespace dregx;

class TestRegex : public testing::Test
{
public:
protected:
TestRegex() = default;
virtual ~TestRegex() = default;
};

TEST_F(TestRegex, RegexOperatorOr_OrTwoRegexes)
{
auto regex = ::deamer::dregx::Regex("[a]");
regex |= "b";

const auto expectedRegex = ::deamer::dregx::Regex("[ab]");

EXPECT_EQ(expectedRegex, regex);
EXPECT_EQ("[ab]", regex);
}

TEST_F(TestRegex, RegexOperatorConcatenate_ConcatenateTwoRegexes)
{
auto regex = ::deamer::dregx::Regex("[a]");
regex += "a";

const auto expectedRegex = ::deamer::dregx::Regex("[a][a]");

EXPECT_EQ(expectedRegex, regex);
EXPECT_EQ("[a][a]", regex);
}

0 comments on commit 7f6af7f

Please sign in to comment.