Skip to content

Commit

Permalink
Merge pull request #723 from onekey-sec/hexstring-anchors
Browse files Browse the repository at this point in the history
parser: support regex style begin-end anchors (`^`, `$`)
  • Loading branch information
qkaiser authored Jan 25, 2024
2 parents 3c61aba + 86a90c4 commit f3be256
Show file tree
Hide file tree
Showing 3 changed files with 23 additions and 5 deletions.
4 changes: 4 additions & 0 deletions docs/development.md
Original file line number Diff line number Diff line change
Expand Up @@ -443,6 +443,10 @@ PATTERNS = [
]
```

In addition, start and end of input anchors (`^` and `$` like in regular
expressions) can also be used to restrict a match to the beginning or the end of
the input file.

### DirectoryPatterns

The `DirectoryHandler` uses these patterns to identify the starting/main file of a given
Expand Down
16 changes: 14 additions & 2 deletions tests/test_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,8 @@
rb"\x00(\x01|\x02(\x03|\x04))\x05",
id="nested-alternative",
),
pytest.param("^ 00", rb"^\x00", id="start-anchor"),
pytest.param("00 $", rb"\x00$", id="end-anchor"),
],
)
def test_simple_convert(hex_string, expected_regex):
Expand Down Expand Up @@ -72,6 +74,16 @@ def test_single_comment():
assert regex == rb"\x01\x02"


def test_invalid_hexstring():
@pytest.mark.parametrize(
"pattern",
[
pytest.param("invalid hexstring", id="invalid"),
pytest.param("00 ^", id="start-anchor-at-end"),
pytest.param("00 ^ 01", id="start-anchor-at-middle"),
pytest.param("$ 00", id="end-anchor-at-start"),
pytest.param("00 $ 01", id="end-anchor-at-middle"),
],
)
def test_invalid_hexstring(pattern):
with pytest.raises(InvalidHexString):
hexstring2regex("invalid hexstring")
hexstring2regex(pattern)
8 changes: 5 additions & 3 deletions unblob/parser.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import itertools

from lark.exceptions import UnexpectedCharacters
from lark.exceptions import UnexpectedInput
from lark.lark import Lark
from lark.visitors import Transformer

Expand All @@ -16,7 +16,7 @@
%ignore COMMENT
start: item+
start: START_ANCHOR? item+ END_ANCHOR?
item: LITERAL -> literal
| WILDCARD -> wildcard
Expand All @@ -29,6 +29,8 @@
alternative: "(" item+ (ALTERNATIVE_SEPARATOR item+)+ ")"
ALTERNATIVE_SEPARATOR: "|"
LITERAL: HEXDIGIT HEXDIGIT
START_ANCHOR: "^"
END_ANCHOR: "$"
WILDCARD: "??"
FIRSTNIBLE: "?" HEXDIGIT
SECONDNIBLE: HEXDIGIT "?"
Expand Down Expand Up @@ -89,6 +91,6 @@ class InvalidHexString(ValueError):
def hexstring2regex(hexastr):
try:
parsed = _hex_string_parser.parse(hexastr)
except UnexpectedCharacters as e:
except UnexpectedInput as e:
raise InvalidHexString(str(e)) from e
return _HexStringToRegex().transform(parsed)

0 comments on commit f3be256

Please sign in to comment.