Skip to content

Commit

Permalink
Fix token location computing (#124)
Browse files Browse the repository at this point in the history
* Fix location computing for plain ad hex strings

* Add Location::getText method because its binding may be handy for YDE

* Remove FIXME of already fixed tests

Co-authored-by: Tadeáš Kučera <[email protected]>
  • Loading branch information
TadeasKucera and TadeasKucera authored Aug 17, 2020
1 parent 9297e2c commit 17144b6
Show file tree
Hide file tree
Showing 10 changed files with 149 additions and 59 deletions.
32 changes: 26 additions & 6 deletions include/yaramod/parser/location.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
#include <algorithm>
#include <cstdint>
#include <iostream>
#include <sstream>

namespace yaramod {

Expand All @@ -21,13 +22,19 @@ class Location
*/
struct Position {

Position() : line(1), column(0) {}
Position(std::size_t line, std::size_t column) : line(line), column(column) {}

std::size_t getLine() { return line; }
std::size_t getColumn() { return column; }
std::size_t getLine() const { return line; }
std::size_t getColumn() const { return column; }

std::size_t line;
std::size_t column;
friend std::ostream& operator<<(std::ostream& os, const Position& position)
{
os << position.getLine() << '.' << position.getColumn();
return os;
}
};

Location() : Location(std::string{}) {}
Expand All @@ -53,8 +60,7 @@ class Location

void addColumn(std::size_t count)
{
_begin.line = _end.line;
_begin.column = _end.column;
_begin = _end;
_end.column += count;
}

Expand All @@ -63,6 +69,12 @@ class Location
_begin = {1, 0};
_end = {1, 0};
}

void setBegin(const Position& begin)
{
_begin.line = begin.line;
_begin.column = begin.column - 1;
}
/// @}

/// @name Getters
Expand All @@ -76,14 +88,22 @@ class Location
const std::string& getFilePath() const { return _filePath; }
Position begin() const { return {_begin.line, _begin.column + 1}; }
const Position& end() const { return _end; }
std::string getText() const
{
std::ostringstream ss;
ss << *this;
return ss.str();
}
/// @}

friend std::ostream& operator<<(std::ostream& os, const Location& location)
{
if (!location.isUnnamed())
os << location.getFilePath() << ':';
os << location.begin().line << '.' << location.begin().column;
if (location.begin().column < location.end().column)
os << location.begin();
if (location.begin().line != location.end().line)
os << '-' << location.end();
else if (location.begin().column < location.end().column)
os << '-' << location.end().column;
return os;
}
Expand Down
1 change: 1 addition & 0 deletions include/yaramod/parser/parser_driver.h
Original file line number Diff line number Diff line change
Expand Up @@ -179,6 +179,7 @@ class ParserDriver

private:
std::string _strLiteral; ///< Currently processed string literal.
Location::Position _positionBegin; ///< Variable storing the position the currently processed token begins at.
std::string _indent; ///< Variable storing current indentation
std::string _comment; ///< For incremental construction of parsed comments
std::string _regexpClass; ///< Currently processed regular expression class.
Expand Down
43 changes: 38 additions & 5 deletions include/yaramod/types/hex_string.h
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,8 @@ class HexStringUnit
/// @{
virtual std::string getText() const = 0;
virtual std::size_t getLength() const = 0;
virtual TokenIt getFirstTokenIt() const = 0;
virtual TokenIt getLastTokenIt() const = 0;
/// @}

/// @name Detection methods
Expand Down Expand Up @@ -78,6 +80,8 @@ class HexString : public String
/// @{
explicit HexString(const std::shared_ptr<TokenStream>& ts, const std::vector<std::shared_ptr<HexStringUnit>>& units);
explicit HexString(const std::shared_ptr<TokenStream>& ts, std::vector<std::shared_ptr<HexStringUnit>>&& units);
explicit HexString(const std::shared_ptr<TokenStream>& ts, TokenIt leftBracket, const std::vector<std::shared_ptr<HexStringUnit>>& units, TokenIt rightBracket);
explicit HexString(const std::shared_ptr<TokenStream>& ts, TokenIt leftBracket, std::vector<std::shared_ptr<HexStringUnit>>&& units, TokenIt rightBracket);
/// @}

/// @name Virtual methods.
Expand Down Expand Up @@ -109,7 +113,9 @@ class HexString : public String
bool empty() const { return _units.empty(); }

private:
std::optional<TokenIt> _leftBracket;
std::vector<std::shared_ptr<HexStringUnit>> _units; ///< Units in the hex string
std::optional<TokenIt> _rightBracket;
};

/**
Expand Down Expand Up @@ -148,9 +154,10 @@ class HexStringNibble : public HexStringUnit
assert(output <= 0xf);
return output;
}
/// @}

virtual std::size_t getLength() const override { return 1; }
virtual TokenIt getFirstTokenIt() const override { return _value; }
virtual TokenIt getLastTokenIt() const override { return _value; }
/// @}

private:
TokenIt _value; ///< Value of the nibble
Expand All @@ -173,6 +180,8 @@ class HexStringWildcard : public HexStringUnit
/// @{
virtual std::string getText() const override { return "?"; }
virtual std::size_t getLength() const override { return 1; }
virtual TokenIt getFirstTokenIt() const override { return _value; }
virtual TokenIt getLastTokenIt() const override { return _value; }
/// @}
private:
TokenIt _value; ///< Value of the nibble
Expand All @@ -189,9 +198,27 @@ class HexStringJump : public HexStringUnit
public:
/// @name Constructors
/// @{
HexStringJump() : HexStringUnit(Type::Jump) {}
HexStringJump(TokenIt low) : HexStringUnit(Type::Jump), _low(low), _high() {}
HexStringJump(TokenIt low, TokenIt high) : HexStringUnit(Type::Jump), _low(low), _high(high) {}
HexStringJump(TokenIt leftBracket, TokenIt rightBracket)
: HexStringUnit(Type::Jump)
, _leftBracket(leftBracket)
, _rightBracket(rightBracket)
{
}
HexStringJump(TokenIt leftBracket, TokenIt low, TokenIt rightBracket)
: HexStringUnit(Type::Jump)
, _leftBracket(leftBracket)
, _low(low)
, _rightBracket(rightBracket)
{
}
HexStringJump(TokenIt leftBracket, TokenIt low, TokenIt high, TokenIt rightBracket)
: HexStringUnit(Type::Jump)
, _leftBracket(leftBracket)
, _low(low)
, _high(high)
, _rightBracket(rightBracket)
{
}
/// @}

/// @name Virtual methods
Expand Down Expand Up @@ -233,10 +260,14 @@ class HexStringJump : public HexStringUnit
return _high.value()->getUInt();
return std::nullopt;
}
virtual TokenIt getFirstTokenIt() const override { return _leftBracket; }
virtual TokenIt getLastTokenIt() const override { return _rightBracket; }
/// @}

private:
TokenIt _leftBracket;
std::optional<TokenIt> _low, _high; ///< Low and high bounds of the jump.
TokenIt _rightBracket;
};

/**
Expand Down Expand Up @@ -282,6 +313,8 @@ class HexStringOr : public HexStringUnit
/// @name Getters
/// @{
const std::vector<std::shared_ptr<HexString>> getSubstrings() const { return _substrings; }
virtual TokenIt getFirstTokenIt() const override { assert(!_substrings.empty()); return _substrings.front()->getFirstTokenIt(); }
virtual TokenIt getLastTokenIt() const override { assert(!_substrings.empty()); return _substrings.back()->getLastTokenIt(); }
/// @}

/// @name Iterators
Expand Down
4 changes: 2 additions & 2 deletions include/yaramod/types/string.h
Original file line number Diff line number Diff line change
Expand Up @@ -144,8 +144,8 @@ class String

const Location getLocation() const {
return {
_id.value_or(getFirstTokenIt())->getLocation().getFilePath(),
_id.value_or(getFirstTokenIt())->getLocation().begin(),
_id.value_or(getFirstTokenIt())->getLocation().getFilePath(),
_id.value_or(getFirstTokenIt())->getLocation().begin(),
getLastTokenIt()->getLocation().end()
};
}
Expand Down
36 changes: 21 additions & 15 deletions src/builder/yara_hex_string_builder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -203,16 +203,22 @@ YaraHexStringBuilder::YaraHexStringBuilder(const std::shared_ptr<TokenStream>& t
*/
std::shared_ptr<HexString> YaraHexStringBuilder::get(const std::shared_ptr<TokenStream>& acceptor /*= nullptr*/, bool addHexParentheses /*= true*/) const
{
std::optional<TokenIt> leftBracket, rightBracket;
if (addHexParentheses)
{
_tokenStream->emplace(_tokenStream->begin(), HEX_START_BRACKET, "{");
_tokenStream->emplace_back(HEX_END_BRACKET, "}");
leftBracket = _tokenStream->emplace(_tokenStream->begin(), HEX_START_BRACKET, "{");
rightBracket = _tokenStream->emplace_back(HEX_END_BRACKET, "}");
}
if (acceptor)
{
acceptor->moveAppend(_tokenStream.get());
return std::make_shared<HexString>(acceptor, _units);
if (leftBracket && rightBracket)
return std::make_shared<HexString>(acceptor, *leftBracket, _units, *rightBracket);
else
return std::make_shared<HexString>(acceptor, _units);
}
else if (leftBracket && rightBracket)
return std::make_shared<HexString>(std::move(_tokenStream), *leftBracket, _units, *rightBracket);
else
return std::make_shared<HexString>(std::move(_tokenStream), _units);
}
Expand Down Expand Up @@ -309,10 +315,10 @@ YaraHexStringBuilder wildcardHigh(std::uint8_t low)
YaraHexStringBuilder jumpVarying()
{
auto ts = std::make_shared<TokenStream>();
ts->emplace_back(HEX_JUMP_LEFT_BRACKET, "[");
auto left = ts->emplace_back(HEX_JUMP_LEFT_BRACKET, "[");
ts->emplace_back(DASH, "-");
ts->emplace_back(HEX_JUMP_RIGHT_BRACKET, "]");
return YaraHexStringBuilder(ts, std::make_shared<HexStringJump>());
auto right = ts->emplace_back(HEX_JUMP_RIGHT_BRACKET, "]");
return YaraHexStringBuilder(ts, std::make_shared<HexStringJump>(left, right));
}

/**
Expand All @@ -328,11 +334,11 @@ YaraHexStringBuilder jumpVarying()
YaraHexStringBuilder jumpFixed(std::uint64_t value)
{
auto ts = std::make_shared<TokenStream>();
ts->emplace_back(HEX_JUMP_LEFT_BRACKET, "[");
auto left = ts->emplace_back(HEX_JUMP_LEFT_BRACKET, "[");
TokenIt t = ts->emplace_back(HEX_NIBBLE, value);
ts->emplace_back(HEX_JUMP_RIGHT_BRACKET, "]");
auto right = ts->emplace_back(HEX_JUMP_RIGHT_BRACKET, "]");

return YaraHexStringBuilder(ts, std::make_shared<HexStringJump>(t, t));
return YaraHexStringBuilder(ts, std::make_shared<HexStringJump>(left, t, t, right));
}

/**
Expand All @@ -348,12 +354,12 @@ YaraHexStringBuilder jumpFixed(std::uint64_t value)
YaraHexStringBuilder jumpVaryingRange(std::uint64_t low)
{
auto ts = std::make_shared<TokenStream>();
ts->emplace_back(HEX_JUMP_LEFT_BRACKET, "[");
auto left = ts->emplace_back(HEX_JUMP_LEFT_BRACKET, "[");
TokenIt t = ts->emplace_back(HEX_NIBBLE, low);
ts->emplace_back(DASH, "-");
ts->emplace_back(HEX_JUMP_RIGHT_BRACKET, "]");
auto right = ts->emplace_back(HEX_JUMP_RIGHT_BRACKET, "]");

return YaraHexStringBuilder(ts, std::make_shared<HexStringJump>(t));
return YaraHexStringBuilder(ts, std::make_shared<HexStringJump>(left, t, right));
}

/**
Expand All @@ -369,13 +375,13 @@ YaraHexStringBuilder jumpVaryingRange(std::uint64_t low)
YaraHexStringBuilder jumpRange(std::uint64_t low, std::uint64_t high)
{
auto ts = std::make_shared<TokenStream>();
ts->emplace_back(HEX_JUMP_LEFT_BRACKET, "[");
auto left = ts->emplace_back(HEX_JUMP_LEFT_BRACKET, "[");
TokenIt t1 = ts->emplace_back(HEX_NIBBLE, low);
ts->emplace_back(DASH, "-");
TokenIt t2 = ts->emplace_back(HEX_NIBBLE, high);
ts->emplace_back(HEX_JUMP_RIGHT_BRACKET, "]");
auto right = ts->emplace_back(HEX_JUMP_RIGHT_BRACKET, "]");

return YaraHexStringBuilder(ts, std::make_shared<HexStringJump>(t1, t2));
return YaraHexStringBuilder(ts, std::make_shared<HexStringJump>(left, t1, t2, right));
}

/**
Expand Down
12 changes: 7 additions & 5 deletions src/parser/parser_driver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -227,6 +227,7 @@ void ParserDriver::defineTokens()
// $str tokens are not delegated with return Value but stored in _strLiteral
_parser.token(R"(\")").states("@default").enter_state("$str").action([&](std::string_view) -> Value {
_strLiteral.clear();
_positionBegin = currentFileContext()->getLocation().begin();
_escapedContent = false;
return {};
});
Expand All @@ -241,6 +242,7 @@ void ParserDriver::defineTokens()
_parser.token(R"(\\[^\"tnx\\])").states("$str").action([&](std::string_view str) -> Value { error_handle(currentFileContext()->getLocation(), "Syntax error: Unknown escaped sequence '" + std::string{str} + "'"); return {}; });
_parser.token(R"(([^\\"])+)").states("$str").action([&](std::string_view str) -> Value { _strLiteral += std::string{str}; return {}; });
_parser.token(R"(\")").states("$str").symbol("STRING_LITERAL").description("\"").enter_state("@default").action([&](std::string_view) -> Value {
currentFileContext()->getLocation().setBegin(_positionBegin);
auto strIt = emplace_back(STRING_LITERAL, _strLiteral);
if (_escapedContent)
strIt->markEscaped();
Expand Down Expand Up @@ -597,7 +599,7 @@ void ParserDriver::defineGrammar()
},
"hex_string", "RCB", "hex_string_mods", [&](auto&& args) -> Value {
args[3].getTokenIt()->setType(HEX_END_BRACKET);
auto hexString = std::make_shared<HexString>(currentFileContext()->getTokenStream(), std::move(args[2].getMultipleHexUnits()));
auto hexString = std::make_shared<HexString>(currentFileContext()->getTokenStream(), args[0].getTokenIt(), std::move(args[2].getMultipleHexUnits()), args[3].getTokenIt());
hexString->setModifiers(std::move(args[4].getStringMods()));
return hexString;
}
Expand Down Expand Up @@ -802,22 +804,22 @@ void ParserDriver::defineGrammar()
.production("LSQB", "HEX_INTEGER", "RSQB", [](auto&& args) -> Value {
args[0].getTokenIt()->setType(HEX_JUMP_LEFT_BRACKET);
args[2].getTokenIt()->setType(HEX_JUMP_RIGHT_BRACKET);
return std::make_shared<HexStringJump>(args[1].getTokenIt(), args[1].getTokenIt());
return std::make_shared<HexStringJump>(args[0].getTokenIt(), args[1].getTokenIt(), args[1].getTokenIt(), args[2].getTokenIt());
})
.production("LSQB", "HEX_INTEGER", "DASH", "HEX_INTEGER", "RSQB", [](auto&& args) -> Value {
args[0].getTokenIt()->setType(HEX_JUMP_LEFT_BRACKET);
args[4].getTokenIt()->setType(HEX_JUMP_RIGHT_BRACKET);
return std::make_shared<HexStringJump>(args[1].getTokenIt(), args[3].getTokenIt());
return std::make_shared<HexStringJump>(args[0].getTokenIt(), args[1].getTokenIt(), args[3].getTokenIt(), args[4].getTokenIt());
})
.production("LSQB", "HEX_INTEGER", "DASH", "RSQB", [](auto&& args) -> Value {
args[0].getTokenIt()->setType(HEX_JUMP_LEFT_BRACKET);
args[3].getTokenIt()->setType(HEX_JUMP_RIGHT_BRACKET);
return std::make_shared<HexStringJump>(args[1].getTokenIt());
return std::make_shared<HexStringJump>(args[0].getTokenIt(), args[1].getTokenIt(), args[3].getTokenIt());
})
.production("LSQB", "DASH", "RSQB", [](auto&& args) -> Value {
args[0].getTokenIt()->setType(HEX_JUMP_LEFT_BRACKET);
args[2].getTokenIt()->setType(HEX_JUMP_RIGHT_BRACKET);
return std::make_shared<HexStringJump>();
return std::make_shared<HexStringJump>(args[0].getTokenIt(), args[2].getTokenIt());
})
;

Expand Down
3 changes: 2 additions & 1 deletion src/python/yaramod_python.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -263,7 +263,8 @@ void addBasicClasses(py::module& module)
// `line_number` property is deprecated, preferred way is to use `begin.line`
.def_property_readonly("line_number", [](const Location& self) { return self.begin().getLine(); })
.def_property_readonly("begin", &Location::begin)
.def_property_readonly("end", &Location::end);
.def_property_readonly("end", &Location::end)
.def_property_readonly("text", &Location::getText);

py::class_<Location::Position>(module, "Position")
.def_property_readonly("line", &Location::Position::getLine)
Expand Down
Loading

0 comments on commit 17144b6

Please sign in to comment.