Skip to content

Commit

Permalink
Support big endian utf16 strings write+search ##search
Browse files Browse the repository at this point in the history
  • Loading branch information
trufae authored Nov 24, 2024
1 parent 31acf6f commit 64da2da
Show file tree
Hide file tree
Showing 7 changed files with 39 additions and 9 deletions.
3 changes: 2 additions & 1 deletion libr/core/cmd_search.inc.c
Original file line number Diff line number Diff line change
Expand Up @@ -5003,10 +5003,11 @@ static int cmd_search(void *data, const char *input) {
shift++;
}
size_t strstart = shift + 1;
const bool be = r_config_get_b (core->config, "cfg.bigendian");
r_search_reset (core->search, R_SEARCH_KEYWORD);
r_search_set_distance (core->search, (int)
r_config_get_i (core->config, "search.distance"));
RSearchKeyword *skw = r_search_keyword_new_wide (input + strstart, NULL, NULL, ignorecase);
RSearchKeyword *skw = r_search_keyword_new_wide (input + strstart, NULL, NULL, ignorecase, be);
if (skw) {
r_search_kw_add (core->search, skw);
r_search_begin (core->search);
Expand Down
7 changes: 6 additions & 1 deletion libr/core/cmd_write.inc.c
Original file line number Diff line number Diff line change
Expand Up @@ -1951,10 +1951,15 @@ static int cmd_ww(void *data, const char *input) {
str++;
len = (len - 1) << 1;
char *tmp = (len > 0) ? malloc (len + 1) : NULL;
bool be = r_config_get_b (core->config, "cfg.bigendian");
if (tmp) {
int i;
for (i = 0; i < len; i++) {
if (i % 2) {
bool match = i % 2;
if (be) {
match = !match;
}
if (match) {
tmp[i] = 0;
} else {
tmp[i] = str[i >> 1];
Expand Down
2 changes: 1 addition & 1 deletion libr/include/r_search.h
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@ R_API void r_search_keyword_free(RSearchKeyword *kw);
R_API RSearchKeyword* r_search_keyword_new(const ut8 *kw, int kwlen, const ut8 *bm, int bmlen, const char *data);
R_API RSearchKeyword* r_search_keyword_new_str(const char *kw, const char *bm, const char *data, bool icase);
R_API RSearchKeyword* r_search_keyword_new_hexstr(const char *xs, const char *data);
R_API RSearchKeyword* r_search_keyword_new_wide(const char *kw, const char *bm, const char *data, bool icase);
R_API RSearchKeyword* r_search_keyword_new_wide(const char *kw, const char *bm, const char *data, bool icase, bool be);
R_API RSearchKeyword* r_search_keyword_new_hex(const char *kwstr, const char *bmstr, const char *data);
R_API RSearchKeyword* r_search_keyword_new_hexmask(const char *kwstr, const char *data);
R_API RSearchKeyword *r_search_keyword_new_regexp(const char *str, const char *data);
Expand Down
1 change: 1 addition & 0 deletions libr/include/r_util/r_utf16.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ R_API int r_utf16_decode(const ut8 *ptr, int ptrlen, RRune *ch, bool bigendian);
R_API int r_utf16le_decode(const ut8 *ptr, int ptrlen, RRune *ch);
R_API int r_utf16be_decode(const ut8 *ptr, int ptrlen, RRune *ch);
R_API int r_utf16le_encode(ut8 *ptr, RRune ch);
R_API int r_utf16be_encode(ut8 *ptr, RRune ch);

#ifdef __cplusplus
}
Expand Down
2 changes: 1 addition & 1 deletion libr/main/rafind2.c
Original file line number Diff line number Diff line change
Expand Up @@ -288,7 +288,7 @@ static int rafind_open_file(RafindOptions *ro, const char *file, const ut8 *data
k = r_search_keyword_new_hexmask (kw, NULL);
}
} else if (ro->widestr) {
k = r_search_keyword_new_wide (kw, ro->mask, NULL, 0);
k = r_search_keyword_new_wide (kw, ro->mask, NULL, 0, ro->bigendian);
} else {
k = r_search_keyword_new_str (kw, ro->mask, NULL, 0);
}
Expand Down
10 changes: 6 additions & 4 deletions libr/search/keyword.c
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ R_API RSearchKeyword* r_search_keyword_new_str(const char *kwbuf, const char *bm
return kw;
}

R_API RSearchKeyword* r_search_keyword_new_wide(const char *kwbuf, const char *bmstr, const char *data, bool ignore_case) {
R_API RSearchKeyword* r_search_keyword_new_wide(const char *kwbuf, const char *bmstr, const char *data, bool ignore_case, bool be) {
RSearchKeyword *kw;
int len;
const char *p2;
Expand All @@ -100,7 +100,7 @@ R_API RSearchKeyword* r_search_keyword_new_wide(const char *kwbuf, const char *b
int bmlen = 0;

if (bmstr) {
bmbuf = malloc (strlen (bmstr)+1);
bmbuf = malloc (strlen (bmstr) + 1);
if (!bmbuf) {
return NULL;
}
Expand All @@ -114,7 +114,7 @@ R_API RSearchKeyword* r_search_keyword_new_wide(const char *kwbuf, const char *b
str = malloc ((len + 1) * 2);
for (p2 = kwbuf, p = str; *p2; ) {
RRune ch;
int num_utf8_bytes = r_utf8_decode ((const ut8 *)p2, kwbuf + len - p2, &ch);
const int num_utf8_bytes = r_utf8_decode ((const ut8 *)p2, kwbuf + len - p2, &ch);
if (num_utf8_bytes < 1) {
R_LOG_WARN ("Malformed UTF8 at pos %d", (int)(p2 - kwbuf));
p[0] = *p2;
Expand All @@ -126,7 +126,9 @@ R_API RSearchKeyword* r_search_keyword_new_wide(const char *kwbuf, const char *b
if (ignore_case && ch <= 0xff) {
ch = tolower (ch);
}
int num_wide_bytes = r_utf16le_encode ((ut8 *)p, ch);
const int num_wide_bytes = be
? r_utf16be_encode ((ut8 *)p, ch)
: r_utf16le_encode ((ut8 *)p, ch);
r_warn_if_fail (num_wide_bytes != 0);
p2 += num_utf8_bytes;
p += num_wide_bytes;
Expand Down
23 changes: 22 additions & 1 deletion libr/util/utf16.c
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
/* radare2 - LGPL - Copyright 2017 - kazarmy */
/* radare2 - LGPL - Copyright 2017-2024 - kazarmy */

#include <r_types.h>
#include <r_util.h>
Expand Down Expand Up @@ -62,3 +62,24 @@ R_API int r_utf16le_encode(ut8 *ptr, RRune ch) {
}
return 0;
}

/* Convert a unicode RRune into a UTF-16BE buf */
R_API int r_utf16be_encode(ut8 *ptr, RRune ch) {
if (ch < 0x10000) {
ptr[1] = ch & 0xff;
ptr[0] = ch >> 8 & 0xff;
return 2;
}
if (ch < 0x110000) {
RRune high, low;
ch -= 0x10000;
high = 0xd800 + (ch >> 10 & 0x3ff);
low = 0xdc00 + (ch & 0x3ff);
ptr[3] = high & 0xff;
ptr[2] = high >> 8 & 0xff;
ptr[1] = low & 0xff;
ptr[0] = low >> 8 & 0xff;
return 4;
}
return 0;
}

0 comments on commit 64da2da

Please sign in to comment.