From d831f535d4c68089f78ac7e4789733c4897dcb04 Mon Sep 17 00:00:00 2001 From: Chris Mayhew Date: Fri, 16 Feb 2024 14:15:56 +0000 Subject: [PATCH] Updated to use YARA 4.5.0 --- MSVC 2019/XT_Main/XT_Main.vcxproj | 6 +- MSVC 2019/XT_Yara/XT_Yara.vcxproj | 6 +- MSVC 2019/libyara/libyara.vcxproj | 4 +- src/libyara/base64.c | 4 +- src/libyara/compiler.c | 26 + src/libyara/exception.h | 196 ++- src/libyara/exec.c | 27 +- src/libyara/exefiles.c | 4 +- src/libyara/grammar.c | 817 +++++++------ src/libyara/grammar.h | 2 +- src/libyara/grammar.y | 115 +- src/libyara/hex_lexer.c | 68 +- src/libyara/hex_lexer.l | 2 +- .../authenticode-parser/authenticode.h | 8 +- src/libyara/include/yara/arena.h | 9 +- src/libyara/include/yara/compiler.h | 13 + src/libyara/include/yara/error.h | 2 + src/libyara/include/yara/globals.h | 7 + src/libyara/include/yara/hex_lexer.h | 6 + src/libyara/include/yara/lexer.h | 20 +- src/libyara/include/yara/libyara.h | 4 +- src/libyara/include/yara/limits.h | 12 + src/libyara/include/yara/object.h | 5 +- src/libyara/include/yara/re.h | 8 +- src/libyara/include/yara/re_lexer.h | 24 +- src/libyara/include/yara/rules.h | 1 + src/libyara/include/yara/types.h | 18 +- src/libyara/include/yara/utils.h | 42 +- src/libyara/lexer.c | 227 ++-- src/libyara/lexer.l | 33 + src/libyara/libyara.c | 11 + src/libyara/modules/console/console.c | 17 +- src/libyara/modules/dex/dex.c | 104 +- src/libyara/modules/dotnet/dotnet.c | 16 +- src/libyara/modules/elf/elf.c | 137 +-- src/libyara/modules/hash/hash.c | 20 +- src/libyara/modules/macho/macho.c | 174 +-- src/libyara/modules/magic/magic.c | 24 +- src/libyara/modules/math/math.c | 143 ++- .../pe/authenticode-parser/authenticode.c | 1069 ++++++++--------- .../pe/authenticode-parser/certificate.c | 609 +++++----- .../pe/authenticode-parser/certificate.h | 21 +- .../pe/authenticode-parser/countersignature.c | 851 ++++++++----- .../pe/authenticode-parser/countersignature.h | 33 +- .../modules/pe/authenticode-parser/helper.c | 68 +- .../modules/pe/authenticode-parser/helper.h | 27 +- .../modules/pe/authenticode-parser/structs.c | 74 +- .../modules/pe/authenticode-parser/structs.h | 91 +- src/libyara/modules/pe/pe.c | 191 ++- src/libyara/notebook.c | 11 +- src/libyara/object.c | 5 +- src/libyara/parser.c | 56 +- src/libyara/proc/linux.c | 61 +- src/libyara/proc/mach.c | 12 +- src/libyara/re.c | 48 +- src/libyara/re_lexer.c | 206 ++-- src/libyara/re_lexer.l | 124 +- src/libyara/rules.c | 19 + src/libyara/scan.c | 110 +- src/libyara/scanner.c | 139 ++- 60 files changed, 3668 insertions(+), 2519 deletions(-) diff --git a/MSVC 2019/XT_Main/XT_Main.vcxproj b/MSVC 2019/XT_Main/XT_Main.vcxproj index 291dcaf..fb1393b 100644 --- a/MSVC 2019/XT_Main/XT_Main.vcxproj +++ b/MSVC 2019/XT_Main/XT_Main.vcxproj @@ -15,7 +15,7 @@ v4.6.1 ManagedCProj XT_Main - 10.0 + 10.0.22621.0 @@ -23,14 +23,14 @@ false false Unicode - v142 + v143 StaticLibrary false false Unicode - v142 + v143 diff --git a/MSVC 2019/XT_Yara/XT_Yara.vcxproj b/MSVC 2019/XT_Yara/XT_Yara.vcxproj index c8b0a56..c987888 100644 --- a/MSVC 2019/XT_Yara/XT_Yara.vcxproj +++ b/MSVC 2019/XT_Yara/XT_Yara.vcxproj @@ -15,7 +15,7 @@ v4.6.1 ManagedCProj XT_Yara - 10.0 + 10.0.22621.0 XT_Yara @@ -24,14 +24,14 @@ false false Unicode - v142 + v143 DynamicLibrary false false Unicode - v142 + v143 diff --git a/MSVC 2019/libyara/libyara.vcxproj b/MSVC 2019/libyara/libyara.vcxproj index 67095c0..310f61f 100644 --- a/MSVC 2019/libyara/libyara.vcxproj +++ b/MSVC 2019/libyara/libyara.vcxproj @@ -12,12 +12,12 @@ {E236CE39-D8F3-4DB6-985C-F2794FF17746} - 10.0.17134.0 + 10.0.22621.0 false - v142 + v143 StaticLibrary diff --git a/src/libyara/base64.c b/src/libyara/base64.c index 7bfb4d3..4ade061 100644 --- a/src/libyara/base64.c +++ b/src/libyara/base64.c @@ -40,7 +40,7 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // is expected to trim the appropriate number of leading and trailing bytes. // // This is based upon the ideas at: -// https://www.leeholmes.com/blog/2019/12/10/searching-for-content-in-base-64-strings-2/ +// https://www.leeholmes.com/searching-for-content-in-base-64-strings/ // // The caller is responsible for freeing the returned string. // @@ -361,7 +361,7 @@ int _yr_base64_create_regexp( // printf("%s\n", re_str); FAIL_ON_ERROR_WITH_CLEANUP( - yr_re_parse(re_str, re_ast, re_error), yr_free(re_str)); + yr_re_parse(re_str, re_ast, re_error, RE_PARSER_FLAG_NONE), yr_free(re_str)); yr_free(re_str); diff --git a/src/libyara/compiler.c b/src/libyara/compiler.c index 92fee82..505746a 100644 --- a/src/libyara/compiler.c +++ b/src/libyara/compiler.c @@ -240,6 +240,7 @@ YR_API int yr_compiler_create(YR_COMPILER** compiler) new_compiler->re_ast_clbk_user_data = NULL; new_compiler->last_error = ERROR_SUCCESS; new_compiler->last_error_line = 0; + new_compiler->strict_escape = false; new_compiler->current_line = 0; new_compiler->file_name_stack_ptr = 0; new_compiler->fixup_stack_head = NULL; @@ -629,6 +630,31 @@ YR_API int yr_compiler_add_fd( return result; } +YR_API int yr_compiler_add_bytes( + YR_COMPILER* compiler, + const void* rules_data, + size_t rules_size, + const char* namespace_) +{ + // Don't allow calls to yr_compiler_add_bytes() after + // yr_compiler_get_rules() has been called. + assert(compiler->rules == NULL); + + // Don't allow calls to yr_compiler_add_bytes() if a previous call to + // yr_compiler_add_XXXX failed. + assert(compiler->errors == 0); + + if (namespace_ != NULL) + compiler->last_error = _yr_compiler_set_namespace(compiler, namespace_); + else + compiler->last_error = _yr_compiler_set_namespace(compiler, "default"); + + if (compiler->last_error != ERROR_SUCCESS) + return ++compiler->errors; + + return yr_lex_parse_rules_bytes(rules_data, rules_size, compiler); +} + YR_API int yr_compiler_add_string( YR_COMPILER* compiler, const char* rules_string, diff --git a/src/libyara/exception.h b/src/libyara/exception.h index da36587..1bb1740 100644 --- a/src/libyara/exception.h +++ b/src/libyara/exception.h @@ -33,6 +33,13 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include #include +typedef struct { + void* memfault_from; + void* memfault_to; + void* jump_back; +} jumpinfo; + + #if _WIN32 || __CYGWIN__ #include @@ -83,65 +90,134 @@ static LONG CALLBACK exception_handler(PEXCEPTION_POINTERS ExceptionInfo) static LONG CALLBACK exception_handler(PEXCEPTION_POINTERS ExceptionInfo) { - jmp_buf* jb_ptr; + jumpinfo* jump_info; switch (ExceptionInfo->ExceptionRecord->ExceptionCode) { case EXCEPTION_IN_PAGE_ERROR: - case EXCEPTION_ACCESS_VIOLATION: - jb_ptr = - (jmp_buf*) yr_thread_storage_get_value(&yr_trycatch_trampoline_tls); - - if (jb_ptr != NULL) - longjmp(*jb_ptr, 1); + jump_info = + (jumpinfo*) yr_thread_storage_get_value(&yr_trycatch_trampoline_tls); + + if (jump_info != NULL) + { + void* fault_address = (void*) ExceptionInfo->ExceptionRecord->ExceptionInformation[1]; + if (jump_info->memfault_from <= fault_address && jump_info->memfault_to > fault_address) + { + longjmp(*(jmp_buf*)jump_info->jump_back, 1); + } + } } return EXCEPTION_CONTINUE_SEARCH; } -#define YR_TRYCATCH(_do_, _try_clause_, _catch_clause_) \ - do \ - { \ - if (_do_) \ - { \ - jmp_buf jb; \ - /* Store pointer to sigjmp_buf in TLS */ \ - yr_thread_storage_set_value(&yr_trycatch_trampoline_tls, &jb); \ - HANDLE exh = AddVectoredExceptionHandler(1, exception_handler); \ - if (setjmp(jb) == 0) \ - { \ - _try_clause_ \ - } \ - else \ - { \ - _catch_clause_ \ - } \ - RemoveVectoredExceptionHandler(exh); \ - yr_thread_storage_set_value(&yr_trycatch_trampoline_tls, NULL); \ - } \ - else \ - { \ - _try_clause_ \ - } \ +#define YR_TRYCATCH(_do_, _try_clause_, _catch_clause_) \ + do \ + { \ + if (_do_) \ + { \ + jumpinfo jump_info; \ + jump_info.memfault_from = 0; \ + jump_info.memfault_to = 0; \ + jmp_buf jb; \ + jump_info.jump_back = (void*) &jb; \ + /* Store pointer to sigjmp_buf in TLS */ \ + yr_thread_storage_set_value(&yr_trycatch_trampoline_tls, &jump_info); \ + HANDLE exh = AddVectoredExceptionHandler(1, exception_handler); \ + if (setjmp(jb) == 0) \ + { \ + _try_clause_ \ + } \ + else \ + { \ + _catch_clause_ \ + } \ + RemoveVectoredExceptionHandler(exh); \ + yr_thread_storage_set_value(&yr_trycatch_trampoline_tls, NULL); \ + } \ + else \ + { \ + _try_clause_ \ + } \ } while (0) #endif #else +#if defined(__APPLE__) || defined(__linux__) || defined(_AIX) +#define CATCH_SIGSEGV 0 +#define CATCH_SIGBUS 1 +#elif defined(BSD) +// According to #551, older BSD versions use SIGSEGV for invalid mmap access. +// Newer versions, however, use SIGBUS (tested with FreeBSD 13.2 / OpenBSD 7.4). +// To be compatible with both, catch SIGBUS and SIGSEGV. +#define CATCH_SIGSEGV 1 +#define CATCH_SIGBUS 1 +#else // For unknown systems, play it safe by catching both +#define CATCH_SIGSEGV 1 +#define CATCH_SIGBUS 1 +#endif + +#include #include #include #include -static void exception_handler(int sig) +static void exception_handler(int sig, siginfo_t * info, void *context) { - if (sig == SIGBUS || sig == SIGSEGV) + if (sig != SIGBUS && sig != SIGSEGV) + { + return; + } + jumpinfo* jump_info = (jumpinfo*) yr_thread_storage_get_value(&yr_trycatch_trampoline_tls); + + if (jump_info != NULL) { - jmp_buf* jb_ptr = - (jmp_buf*) yr_thread_storage_get_value(&yr_trycatch_trampoline_tls); + void* fault_address = (void*) info->si_addr; + if (jump_info->memfault_from <= fault_address && jump_info->memfault_to > fault_address) + { + siglongjmp(*(sigjmp_buf*)jump_info->jump_back, 1); + } + } + + // If we're here, the signal we received didn't originate from YARA. + // In this case, we want to invoke the original signal handler, which may handle the signal. - if (jb_ptr != NULL) - siglongjmp(*jb_ptr, 1); + // Lock the exception handler mutex to prevent simultaneous write access while we read the old signal handler + pthread_mutex_lock(&exception_handler_mutex); + struct sigaction old_handler; + if (sig == SIGBUS) + old_handler = old_sigbus_exception_handler; + else + old_handler = old_sigsegv_exception_handler; + pthread_mutex_unlock(&exception_handler_mutex); + + if (old_handler.sa_flags & SA_SIGINFO) + { + old_handler.sa_sigaction(sig, info, context); + } + else if (old_handler.sa_handler == SIG_DFL) + { + // Old handler is the default action. To do this, set the signal handler back to default and raise the signal. + // This is fairly volatile - since this is not an atomic operation, signals from other threads might also + // cause the default action while we're doing this. However, the default action will typically cause a + // process termination anyway. + pthread_mutex_lock(&exception_handler_mutex); + struct sigaction current_handler; + sigaction(sig, &old_handler, ¤t_handler); + raise(sig); + sigaction(sig, ¤t_handler, NULL); + pthread_mutex_unlock(&exception_handler_mutex); + } + else if (old_handler.sa_handler == SIG_IGN) + { + // SIG_IGN wants us to ignore the signal + return; + } + else + { + old_handler.sa_handler(sig); } } @@ -152,18 +228,28 @@ typedef struct sigaction sa; { \ if (_do_) \ { \ - struct sigaction old_sigbus_act; \ - struct sigaction old_sigsegv_act; \ - struct sigaction act; \ + pthread_mutex_lock(&exception_handler_mutex); \ + if (exception_handler_usecount == 0) \ + { \ + struct sigaction act; \ + /* Set exception handler for SIGSEGV / SIGBUS */ \ + act.sa_sigaction = exception_handler; \ + act.sa_flags = SA_SIGINFO | SA_ONSTACK; \ + sigfillset(&act.sa_mask); \ + if (CATCH_SIGBUS) \ + sigaction(SIGBUS, &act, &old_sigbus_exception_handler); \ + if (CATCH_SIGSEGV) \ + sigaction(SIGSEGV, &act, &old_sigsegv_exception_handler); \ + } \ + exception_handler_usecount++; \ + pthread_mutex_unlock(&exception_handler_mutex); \ + jumpinfo ji; \ + ji.memfault_from = 0; \ + ji.memfault_to = 0; \ sigjmp_buf jb; \ - /* Store pointer to sigjmp_buf in TLS */ \ - yr_thread_storage_set_value(&yr_trycatch_trampoline_tls, &jb); \ - /* Set exception handler for SIGBUS and SIGSEGV*/ \ - act.sa_handler = exception_handler; \ - act.sa_flags = 0; /* SA_ONSTACK? */ \ - sigfillset(&act.sa_mask); \ - sigaction(SIGBUS, &act, &old_sigbus_act); \ - sigaction(SIGSEGV, &act, &old_sigsegv_act); \ + ji.jump_back = (void*) &jb; \ + /* Store pointer to jumpinfo in TLS */ \ + yr_thread_storage_set_value(&yr_trycatch_trampoline_tls, &ji); \ if (sigsetjmp(jb, 1) == 0) \ { \ _try_clause_ \ @@ -172,9 +258,17 @@ typedef struct sigaction sa; { \ _catch_clause_ \ } \ - /* Stop capturing SIGBUS and SIGSEGV */ \ - sigaction(SIGBUS, &old_sigbus_act, NULL); \ - sigaction(SIGSEGV, &old_sigsegv_act, NULL); \ + pthread_mutex_lock(&exception_handler_mutex); \ + exception_handler_usecount--; \ + if (exception_handler_usecount == 0) \ + { \ + /* Stop capturing relevant signals */ \ + if (CATCH_SIGBUS) \ + sigaction(SIGBUS, &old_sigbus_exception_handler, NULL); \ + if (CATCH_SIGSEGV) \ + sigaction(SIGSEGV, &old_sigsegv_exception_handler, NULL); \ + } \ + pthread_mutex_unlock(&exception_handler_mutex); \ yr_thread_storage_set_value(&yr_trycatch_trampoline_tls, NULL); \ } \ else \ diff --git a/src/libyara/exec.c b/src/libyara/exec.c index c4eeb7e..a2dd38d 100644 --- a/src/libyara/exec.c +++ b/src/libyara/exec.c @@ -131,7 +131,7 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. offset <= block->base + block->size - sizeof(type)) \ { \ type result; \ - const uint8_t* data = block->fetch_data(block); \ + const uint8_t* data = yr_fetch_block_data(block); \ if (data == NULL) \ return YR_UNDEFINED; \ result = *(type*) (data + offset - block->base); \ @@ -1166,13 +1166,28 @@ int yr_execute_code(YR_SCAN_CONTEXT* context) current_rule = &context->rules->rules_table[current_rule_idx]; - // If the rule is disabled let's skip its code. - ip = jmp_if(RULE_IS_DISABLED(current_rule), ip); + // If the rule is disabled, let's skip its code. + bool skip_rule = RULE_IS_DISABLED(current_rule); - // Skip the bytes corresponding to the rule's index, but only if not - // taking the jump. - if (!RULE_IS_DISABLED(current_rule)) + // The rule is also skipped if it is not required to be evaluated. + skip_rule |= yr_bitmask_is_not_set( + context->required_eval, current_rule_idx); + + ip = jmp_if(skip_rule, ip); + + if (skip_rule) + { + // If the rule is skipped it is false, and if a global rule is false + // we must mark its namespace as unsatisfied. + if (RULE_IS_GLOBAL(current_rule)) + yr_bitmask_set(context->ns_unsatisfied_flags, current_rule->ns->idx); + } + else + { + // If not taking the jump, skip the bytes corresponding to the + // rule's index. ip += sizeof(uint32_t); + } break; diff --git a/src/libyara/exefiles.c b/src/libyara/exefiles.c index 89dcf9b..2ab0631 100644 --- a/src/libyara/exefiles.c +++ b/src/libyara/exefiles.c @@ -405,7 +405,7 @@ uint64_t yr_get_entry_point_address( elf_header32 = (elf32_header_t*) buffer; if (elf_header32->type == ELF_ET_EXEC) - return elf_header32->entry; + return base_address + elf_header32->entry; break; @@ -413,7 +413,7 @@ uint64_t yr_get_entry_point_address( elf_header64 = (elf64_header_t*) buffer; if (elf_header64->type == ELF_ET_EXEC) - return elf_header64->entry; + return base_address + elf_header64->entry; break; } diff --git a/src/libyara/grammar.c b/src/libyara/grammar.c index bfa4a0a..c56aa63 100644 --- a/src/libyara/grammar.c +++ b/src/libyara/grammar.c @@ -132,7 +132,7 @@ // fail_if_error() is used in parser actions for aborting the parsing if an // error has occurred. See fail_with_error for details. #define fail_if_error(e) \ - if (e != ERROR_SUCCESS) \ + if (e != ERROR_SUCCESS && e != ERROR_UNKNOWN_ESCAPE_SEQUENCE) \ { \ fail_with_error(e); \ } @@ -379,7 +379,7 @@ extern int yara_yydebug; #if ! defined YYSTYPE && ! defined YYSTYPE_IS_DECLARED union YYSTYPE { -#line 342 "libyara/grammar.y" +#line 343 "libyara/grammar.y" YR_EXPRESSION expression; SIZED_STRING* sized_string; @@ -942,23 +942,23 @@ static const yytype_int8 yytranslate[] = /* YYRLINE[YYN] -- Source line where rule number YYN was defined. */ static const yytype_int16 yyrline[] = { - 0, 361, 361, 362, 363, 364, 365, 366, 367, 375, - 388, 393, 387, 420, 423, 439, 442, 457, 462, 463, - 468, 469, 475, 478, 494, 503, 545, 546, 551, 568, - 582, 596, 610, 628, 629, 635, 634, 651, 650, 671, - 670, 695, 701, 761, 762, 763, 764, 765, 766, 772, - 793, 824, 829, 846, 851, 871, 872, 886, 887, 888, - 889, 890, 894, 895, 909, 913, 1008, 1056, 1117, 1162, - 1163, 1167, 1202, 1255, 1297, 1320, 1326, 1332, 1344, 1354, - 1364, 1374, 1384, 1394, 1404, 1414, 1428, 1443, 1454, 1529, - 1567, 1471, 1695, 1706, 1717, 1736, 1755, 1767, 1804, 1810, - 1816, 1815, 1861, 1860, 1904, 1911, 1918, 1925, 1932, 1939, - 1946, 1950, 1958, 1959, 1984, 2004, 2032, 2106, 2134, 2153, - 2164, 2207, 2223, 2243, 2253, 2252, 2261, 2275, 2276, 2281, - 2291, 2306, 2305, 2318, 2319, 2324, 2357, 2382, 2438, 2445, - 2451, 2457, 2467, 2471, 2479, 2491, 2505, 2512, 2519, 2544, - 2556, 2568, 2580, 2595, 2607, 2622, 2665, 2686, 2721, 2756, - 2790, 2815, 2832, 2842, 2852, 2862, 2872, 2892, 2912 + 0, 362, 362, 363, 364, 365, 366, 367, 368, 376, + 389, 394, 388, 425, 428, 444, 447, 462, 470, 471, + 476, 477, 483, 486, 502, 511, 553, 554, 559, 576, + 590, 604, 618, 636, 637, 643, 642, 659, 658, 679, + 678, 703, 709, 769, 770, 771, 772, 773, 774, 780, + 801, 832, 837, 854, 859, 879, 880, 894, 895, 896, + 897, 898, 902, 903, 917, 921, 1017, 1065, 1126, 1171, + 1172, 1176, 1211, 1264, 1319, 1350, 1357, 1364, 1377, 1388, + 1399, 1410, 1421, 1432, 1443, 1454, 1469, 1485, 1497, 1572, + 1610, 1514, 1739, 1762, 1774, 1802, 1821, 1844, 1892, 1899, + 1906, 1905, 1952, 1951, 2002, 2010, 2018, 2026, 2034, 2042, + 2050, 2054, 2062, 2063, 2088, 2108, 2136, 2210, 2242, 2260, + 2271, 2314, 2330, 2350, 2360, 2359, 2368, 2382, 2383, 2388, + 2398, 2413, 2412, 2425, 2426, 2431, 2464, 2489, 2545, 2552, + 2558, 2564, 2574, 2578, 2586, 2598, 2612, 2619, 2626, 2651, + 2663, 2675, 2687, 2702, 2714, 2729, 2772, 2793, 2828, 2863, + 2897, 2922, 2939, 2949, 2959, 2969, 2979, 2999, 3019 }; #endif @@ -1772,61 +1772,61 @@ yydestruct (const char *yymsg, switch (yykind) { case YYSYMBOL__IDENTIFIER_: /* "identifier" */ -#line 312 "libyara/grammar.y" +#line 313 "libyara/grammar.y" { yr_free(((*yyvaluep).c_string)); ((*yyvaluep).c_string) = NULL; } #line 1778 "libyara/grammar.c" break; case YYSYMBOL__STRING_IDENTIFIER_: /* "string identifier" */ -#line 316 "libyara/grammar.y" +#line 317 "libyara/grammar.y" { yr_free(((*yyvaluep).c_string)); ((*yyvaluep).c_string) = NULL; } #line 1784 "libyara/grammar.c" break; case YYSYMBOL__STRING_COUNT_: /* "string count" */ -#line 313 "libyara/grammar.y" +#line 314 "libyara/grammar.y" { yr_free(((*yyvaluep).c_string)); ((*yyvaluep).c_string) = NULL; } #line 1790 "libyara/grammar.c" break; case YYSYMBOL__STRING_OFFSET_: /* "string offset" */ -#line 314 "libyara/grammar.y" +#line 315 "libyara/grammar.y" { yr_free(((*yyvaluep).c_string)); ((*yyvaluep).c_string) = NULL; } #line 1796 "libyara/grammar.c" break; case YYSYMBOL__STRING_LENGTH_: /* "string length" */ -#line 315 "libyara/grammar.y" +#line 316 "libyara/grammar.y" { yr_free(((*yyvaluep).c_string)); ((*yyvaluep).c_string) = NULL; } #line 1802 "libyara/grammar.c" break; case YYSYMBOL__STRING_IDENTIFIER_WITH_WILDCARD_: /* "string identifier with wildcard" */ -#line 317 "libyara/grammar.y" +#line 318 "libyara/grammar.y" { yr_free(((*yyvaluep).c_string)); ((*yyvaluep).c_string) = NULL; } #line 1808 "libyara/grammar.c" break; case YYSYMBOL__TEXT_STRING_: /* "text string" */ -#line 318 "libyara/grammar.y" +#line 319 "libyara/grammar.y" { yr_free(((*yyvaluep).sized_string)); ((*yyvaluep).sized_string) = NULL; } #line 1814 "libyara/grammar.c" break; case YYSYMBOL__HEX_STRING_: /* "hex string" */ -#line 319 "libyara/grammar.y" +#line 320 "libyara/grammar.y" { yr_free(((*yyvaluep).sized_string)); ((*yyvaluep).sized_string) = NULL; } #line 1820 "libyara/grammar.c" break; case YYSYMBOL__REGEXP_: /* "regular expression" */ -#line 320 "libyara/grammar.y" +#line 321 "libyara/grammar.y" { yr_free(((*yyvaluep).sized_string)); ((*yyvaluep).sized_string) = NULL; } #line 1826 "libyara/grammar.c" break; case YYSYMBOL_string_modifiers: /* string_modifiers */ -#line 333 "libyara/grammar.y" +#line 334 "libyara/grammar.y" { if (((*yyvaluep).modifier).alphabet != NULL) { @@ -1838,7 +1838,7 @@ yydestruct (const char *yymsg, break; case YYSYMBOL_string_modifier: /* string_modifier */ -#line 325 "libyara/grammar.y" +#line 326 "libyara/grammar.y" { if (((*yyvaluep).modifier).alphabet != NULL) { @@ -1850,13 +1850,13 @@ yydestruct (const char *yymsg, break; case YYSYMBOL_arguments: /* arguments */ -#line 322 "libyara/grammar.y" +#line 323 "libyara/grammar.y" { yr_free(((*yyvaluep).c_string)); ((*yyvaluep).c_string) = NULL; } #line 1856 "libyara/grammar.c" break; case YYSYMBOL_arguments_list: /* arguments_list */ -#line 323 "libyara/grammar.y" +#line 324 "libyara/grammar.y" { yr_free(((*yyvaluep).c_string)); ((*yyvaluep).c_string) = NULL; } #line 1862 "libyara/grammar.c" break; @@ -2135,7 +2135,7 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); switch (yyn) { case 8: /* rules: rules "end of included file" */ -#line 368 "libyara/grammar.y" +#line 369 "libyara/grammar.y" { _yr_compiler_pop_file_name(compiler); } @@ -2143,7 +2143,7 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); break; case 9: /* import: "" "text string" */ -#line 376 "libyara/grammar.y" +#line 377 "libyara/grammar.y" { int result = yr_parser_reduce_import(yyscanner, (yyvsp[0].sized_string)); @@ -2155,7 +2155,7 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); break; case 10: /* @1: %empty */ -#line 388 "libyara/grammar.y" +#line 389 "libyara/grammar.y" { fail_if_error(yr_parser_reduce_rule_declaration_phase_1( yyscanner, (int32_t) (yyvsp[-2].integer), (yyvsp[0].c_string), &(yyval.rule))); @@ -2164,7 +2164,7 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); break; case 11: /* $@2: %empty */ -#line 393 "libyara/grammar.y" +#line 394 "libyara/grammar.y" { YR_RULE* rule = (YR_RULE*) yr_arena_ref_to_ptr( compiler->arena, &(yyvsp[-4].rule)); @@ -2182,8 +2182,12 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); break; case 12: /* rule: rule_modifiers "" "identifier" @1 tags '{' meta strings $@2 condition '}' */ -#line 407 "libyara/grammar.y" +#line 408 "libyara/grammar.y" { + YR_RULE* rule = (YR_RULE*) yr_arena_ref_to_ptr( + compiler->arena, &(yyvsp[-7].rule)); + rule->required_strings = (yyvsp[-1].expression).required_strings.count; + int result = yr_parser_reduce_rule_declaration_phase_2( yyscanner, &(yyvsp[-7].rule)); // rule created in phase 1 @@ -2191,19 +2195,19 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); fail_if_error(result); } -#line 2195 "libyara/grammar.c" +#line 2199 "libyara/grammar.c" break; case 13: /* meta: %empty */ -#line 420 "libyara/grammar.y" +#line 425 "libyara/grammar.y" { (yyval.meta) = YR_ARENA_NULL_REF; } -#line 2203 "libyara/grammar.c" +#line 2207 "libyara/grammar.c" break; case 14: /* meta: "" ':' meta_declarations */ -#line 424 "libyara/grammar.y" +#line 429 "libyara/grammar.y" { YR_META* meta = yr_arena_get_ptr( compiler->arena, @@ -2214,19 +2218,19 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); (yyval.meta) = (yyvsp[0].meta); } -#line 2218 "libyara/grammar.c" +#line 2222 "libyara/grammar.c" break; case 15: /* strings: %empty */ -#line 439 "libyara/grammar.y" +#line 444 "libyara/grammar.y" { (yyval.string) = YR_ARENA_NULL_REF; } -#line 2226 "libyara/grammar.c" +#line 2230 "libyara/grammar.c" break; case 16: /* strings: "" ':' string_declarations */ -#line 443 "libyara/grammar.y" +#line 448 "libyara/grammar.y" { YR_STRING* string = (YR_STRING*) yr_arena_get_ptr( compiler->arena, @@ -2237,43 +2241,51 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); (yyval.string) = (yyvsp[0].string); } -#line 2241 "libyara/grammar.c" +#line 2245 "libyara/grammar.c" + break; + + case 17: /* condition: "" ':' boolean_expression */ +#line 463 "libyara/grammar.y" + { + (yyval.expression) = (yyvsp[0].expression); + } +#line 2253 "libyara/grammar.c" break; case 18: /* rule_modifiers: %empty */ -#line 462 "libyara/grammar.y" +#line 470 "libyara/grammar.y" { (yyval.integer) = 0; } -#line 2247 "libyara/grammar.c" +#line 2259 "libyara/grammar.c" break; case 19: /* rule_modifiers: rule_modifiers rule_modifier */ -#line 463 "libyara/grammar.y" +#line 471 "libyara/grammar.y" { (yyval.integer) = (yyvsp[-1].integer) | (yyvsp[0].integer); } -#line 2253 "libyara/grammar.c" +#line 2265 "libyara/grammar.c" break; case 20: /* rule_modifier: "" */ -#line 468 "libyara/grammar.y" +#line 476 "libyara/grammar.y" { (yyval.integer) = RULE_FLAGS_PRIVATE; } -#line 2259 "libyara/grammar.c" +#line 2271 "libyara/grammar.c" break; case 21: /* rule_modifier: "" */ -#line 469 "libyara/grammar.y" +#line 477 "libyara/grammar.y" { (yyval.integer) = RULE_FLAGS_GLOBAL; } -#line 2265 "libyara/grammar.c" +#line 2277 "libyara/grammar.c" break; case 22: /* tags: %empty */ -#line 475 "libyara/grammar.y" +#line 483 "libyara/grammar.y" { (yyval.tag) = YR_ARENA_NULL_REF; } -#line 2273 "libyara/grammar.c" +#line 2285 "libyara/grammar.c" break; case 23: /* tags: ':' tag_list */ -#line 479 "libyara/grammar.y" +#line 487 "libyara/grammar.y" { // Tags list is represented in the arena as a sequence // of null-terminated strings, the sequence ends with an @@ -2285,11 +2297,11 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); (yyval.tag) = (yyvsp[0].tag); } -#line 2289 "libyara/grammar.c" +#line 2301 "libyara/grammar.c" break; case 24: /* tag_list: "identifier" */ -#line 495 "libyara/grammar.y" +#line 503 "libyara/grammar.y" { int result = yr_arena_write_string( yyget_extra(yyscanner)->arena, YR_SZ_POOL, (yyvsp[0].c_string), &(yyval.tag)); @@ -2298,11 +2310,11 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); fail_if_error(result); } -#line 2302 "libyara/grammar.c" +#line 2314 "libyara/grammar.c" break; case 25: /* tag_list: tag_list "identifier" */ -#line 504 "libyara/grammar.y" +#line 512 "libyara/grammar.y" { YR_ARENA_REF ref; @@ -2339,23 +2351,23 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); (yyval.tag) = (yyvsp[-1].tag); } -#line 2343 "libyara/grammar.c" +#line 2355 "libyara/grammar.c" break; case 26: /* meta_declarations: meta_declaration */ -#line 545 "libyara/grammar.y" +#line 553 "libyara/grammar.y" { (yyval.meta) = (yyvsp[0].meta); } -#line 2349 "libyara/grammar.c" +#line 2361 "libyara/grammar.c" break; case 27: /* meta_declarations: meta_declarations meta_declaration */ -#line 546 "libyara/grammar.y" +#line 554 "libyara/grammar.y" { (yyval.meta) = (yyvsp[-1].meta); } -#line 2355 "libyara/grammar.c" +#line 2367 "libyara/grammar.c" break; case 28: /* meta_declaration: "identifier" '=' "text string" */ -#line 552 "libyara/grammar.y" +#line 560 "libyara/grammar.y" { SIZED_STRING* sized_string = (yyvsp[0].sized_string); @@ -2372,11 +2384,11 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); fail_if_error(result); } -#line 2376 "libyara/grammar.c" +#line 2388 "libyara/grammar.c" break; case 29: /* meta_declaration: "identifier" '=' "integer number" */ -#line 569 "libyara/grammar.y" +#line 577 "libyara/grammar.y" { int result = yr_parser_reduce_meta_declaration( yyscanner, @@ -2390,11 +2402,11 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); fail_if_error(result); } -#line 2394 "libyara/grammar.c" +#line 2406 "libyara/grammar.c" break; case 30: /* meta_declaration: "identifier" '=' '-' "integer number" */ -#line 583 "libyara/grammar.y" +#line 591 "libyara/grammar.y" { int result = yr_parser_reduce_meta_declaration( yyscanner, @@ -2408,11 +2420,11 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); fail_if_error(result); } -#line 2412 "libyara/grammar.c" +#line 2424 "libyara/grammar.c" break; case 31: /* meta_declaration: "identifier" '=' "" */ -#line 597 "libyara/grammar.y" +#line 605 "libyara/grammar.y" { int result = yr_parser_reduce_meta_declaration( yyscanner, @@ -2426,11 +2438,11 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); fail_if_error(result); } -#line 2430 "libyara/grammar.c" +#line 2442 "libyara/grammar.c" break; case 32: /* meta_declaration: "identifier" '=' "" */ -#line 611 "libyara/grammar.y" +#line 619 "libyara/grammar.y" { int result = yr_parser_reduce_meta_declaration( yyscanner, @@ -2444,31 +2456,31 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); fail_if_error(result); } -#line 2448 "libyara/grammar.c" +#line 2460 "libyara/grammar.c" break; case 33: /* string_declarations: string_declaration */ -#line 628 "libyara/grammar.y" +#line 636 "libyara/grammar.y" { (yyval.string) = (yyvsp[0].string); } -#line 2454 "libyara/grammar.c" +#line 2466 "libyara/grammar.c" break; case 34: /* string_declarations: string_declarations string_declaration */ -#line 629 "libyara/grammar.y" +#line 637 "libyara/grammar.y" { (yyval.string) = (yyvsp[-1].string); } -#line 2460 "libyara/grammar.c" +#line 2472 "libyara/grammar.c" break; case 35: /* $@3: %empty */ -#line 635 "libyara/grammar.y" +#line 643 "libyara/grammar.y" { compiler->current_line = yyget_lineno(yyscanner); } -#line 2468 "libyara/grammar.c" +#line 2480 "libyara/grammar.c" break; case 36: /* string_declaration: "string identifier" '=' $@3 "text string" string_modifiers */ -#line 639 "libyara/grammar.y" +#line 647 "libyara/grammar.y" { int result = yr_parser_reduce_string_declaration( yyscanner, (yyvsp[0].modifier), (yyvsp[-4].c_string), (yyvsp[-1].sized_string), &(yyval.string)); @@ -2480,19 +2492,19 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); fail_if_error(result); compiler->current_line = 0; } -#line 2484 "libyara/grammar.c" +#line 2496 "libyara/grammar.c" break; case 37: /* $@4: %empty */ -#line 651 "libyara/grammar.y" +#line 659 "libyara/grammar.y" { compiler->current_line = yyget_lineno(yyscanner); } -#line 2492 "libyara/grammar.c" +#line 2504 "libyara/grammar.c" break; case 38: /* string_declaration: "string identifier" '=' $@4 "regular expression" regexp_modifiers */ -#line 655 "libyara/grammar.y" +#line 663 "libyara/grammar.y" { int result; @@ -2508,19 +2520,19 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); compiler->current_line = 0; } -#line 2512 "libyara/grammar.c" +#line 2524 "libyara/grammar.c" break; case 39: /* $@5: %empty */ -#line 671 "libyara/grammar.y" +#line 679 "libyara/grammar.y" { compiler->current_line = yyget_lineno(yyscanner); } -#line 2520 "libyara/grammar.c" +#line 2532 "libyara/grammar.c" break; case 40: /* string_declaration: "string identifier" '=' $@5 "hex string" hex_modifiers */ -#line 675 "libyara/grammar.y" +#line 683 "libyara/grammar.y" { int result; @@ -2536,22 +2548,22 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); compiler->current_line = 0; } -#line 2540 "libyara/grammar.c" +#line 2552 "libyara/grammar.c" break; case 41: /* string_modifiers: %empty */ -#line 695 "libyara/grammar.y" +#line 703 "libyara/grammar.y" { (yyval.modifier).flags = 0; (yyval.modifier).xor_min = 0; (yyval.modifier).xor_max = 0; (yyval.modifier).alphabet = NULL; } -#line 2551 "libyara/grammar.c" +#line 2563 "libyara/grammar.c" break; case 42: /* string_modifiers: string_modifiers string_modifier */ -#line 702 "libyara/grammar.y" +#line 710 "libyara/grammar.y" { (yyval.modifier) = (yyvsp[-1].modifier); @@ -2607,51 +2619,51 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); (yyval.modifier).flags = (yyval.modifier).flags | (yyvsp[0].modifier).flags; } } -#line 2611 "libyara/grammar.c" +#line 2623 "libyara/grammar.c" break; case 43: /* string_modifier: "" */ -#line 761 "libyara/grammar.y" +#line 769 "libyara/grammar.y" { (yyval.modifier).flags = STRING_FLAGS_WIDE; } -#line 2617 "libyara/grammar.c" +#line 2629 "libyara/grammar.c" break; case 44: /* string_modifier: "" */ -#line 762 "libyara/grammar.y" +#line 770 "libyara/grammar.y" { (yyval.modifier).flags = STRING_FLAGS_ASCII; } -#line 2623 "libyara/grammar.c" +#line 2635 "libyara/grammar.c" break; case 45: /* string_modifier: "" */ -#line 763 "libyara/grammar.y" +#line 771 "libyara/grammar.y" { (yyval.modifier).flags = STRING_FLAGS_NO_CASE; } -#line 2629 "libyara/grammar.c" +#line 2641 "libyara/grammar.c" break; case 46: /* string_modifier: "" */ -#line 764 "libyara/grammar.y" +#line 772 "libyara/grammar.y" { (yyval.modifier).flags = STRING_FLAGS_FULL_WORD; } -#line 2635 "libyara/grammar.c" +#line 2647 "libyara/grammar.c" break; case 47: /* string_modifier: "" */ -#line 765 "libyara/grammar.y" +#line 773 "libyara/grammar.y" { (yyval.modifier).flags = STRING_FLAGS_PRIVATE; } -#line 2641 "libyara/grammar.c" +#line 2653 "libyara/grammar.c" break; case 48: /* string_modifier: "" */ -#line 767 "libyara/grammar.y" +#line 775 "libyara/grammar.y" { (yyval.modifier).flags = STRING_FLAGS_XOR; (yyval.modifier).xor_min = 0; (yyval.modifier).xor_max = 255; } -#line 2651 "libyara/grammar.c" +#line 2663 "libyara/grammar.c" break; case 49: /* string_modifier: "" '(' "integer number" ')' */ -#line 773 "libyara/grammar.y" +#line 781 "libyara/grammar.y" { int result = ERROR_SUCCESS; @@ -2667,11 +2679,11 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); (yyval.modifier).xor_min = (uint8_t) (yyvsp[-1].integer); (yyval.modifier).xor_max = (uint8_t) (yyvsp[-1].integer); } -#line 2671 "libyara/grammar.c" +#line 2683 "libyara/grammar.c" break; case 50: /* string_modifier: "" '(' "integer number" '-' "integer number" ')' */ -#line 794 "libyara/grammar.y" +#line 802 "libyara/grammar.y" { int result = ERROR_SUCCESS; @@ -2702,20 +2714,20 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); (yyval.modifier).xor_min = (uint8_t) (yyvsp[-3].integer); (yyval.modifier).xor_max = (uint8_t) (yyvsp[-1].integer); } -#line 2706 "libyara/grammar.c" +#line 2718 "libyara/grammar.c" break; case 51: /* string_modifier: "" */ -#line 825 "libyara/grammar.y" +#line 833 "libyara/grammar.y" { (yyval.modifier).flags = STRING_FLAGS_BASE64; (yyval.modifier).alphabet = ss_new(DEFAULT_BASE64_ALPHABET); } -#line 2715 "libyara/grammar.c" +#line 2727 "libyara/grammar.c" break; case 52: /* string_modifier: "" '(' "text string" ')' */ -#line 830 "libyara/grammar.y" +#line 838 "libyara/grammar.y" { int result = ERROR_SUCCESS; @@ -2732,20 +2744,20 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); (yyval.modifier).flags = STRING_FLAGS_BASE64; (yyval.modifier).alphabet = (yyvsp[-1].sized_string); } -#line 2736 "libyara/grammar.c" +#line 2748 "libyara/grammar.c" break; case 53: /* string_modifier: "" */ -#line 847 "libyara/grammar.y" +#line 855 "libyara/grammar.y" { (yyval.modifier).flags = STRING_FLAGS_BASE64_WIDE; (yyval.modifier).alphabet = ss_new(DEFAULT_BASE64_ALPHABET); } -#line 2745 "libyara/grammar.c" +#line 2757 "libyara/grammar.c" break; case 54: /* string_modifier: "" '(' "text string" ')' */ -#line 852 "libyara/grammar.y" +#line 860 "libyara/grammar.y" { int result = ERROR_SUCCESS; @@ -2762,17 +2774,17 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); (yyval.modifier).flags = STRING_FLAGS_BASE64_WIDE; (yyval.modifier).alphabet = (yyvsp[-1].sized_string); } -#line 2766 "libyara/grammar.c" +#line 2778 "libyara/grammar.c" break; case 55: /* regexp_modifiers: %empty */ -#line 871 "libyara/grammar.y" +#line 879 "libyara/grammar.y" { (yyval.modifier).flags = 0; } -#line 2772 "libyara/grammar.c" +#line 2784 "libyara/grammar.c" break; case 56: /* regexp_modifiers: regexp_modifiers regexp_modifier */ -#line 873 "libyara/grammar.y" +#line 881 "libyara/grammar.y" { if ((yyvsp[-1].modifier).flags & (yyvsp[0].modifier).flags) { @@ -2783,47 +2795,47 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); (yyval.modifier).flags = (yyvsp[-1].modifier).flags | (yyvsp[0].modifier).flags; } } -#line 2787 "libyara/grammar.c" +#line 2799 "libyara/grammar.c" break; case 57: /* regexp_modifier: "" */ -#line 886 "libyara/grammar.y" +#line 894 "libyara/grammar.y" { (yyval.modifier).flags = STRING_FLAGS_WIDE; } -#line 2793 "libyara/grammar.c" +#line 2805 "libyara/grammar.c" break; case 58: /* regexp_modifier: "" */ -#line 887 "libyara/grammar.y" +#line 895 "libyara/grammar.y" { (yyval.modifier).flags = STRING_FLAGS_ASCII; } -#line 2799 "libyara/grammar.c" +#line 2811 "libyara/grammar.c" break; case 59: /* regexp_modifier: "" */ -#line 888 "libyara/grammar.y" +#line 896 "libyara/grammar.y" { (yyval.modifier).flags = STRING_FLAGS_NO_CASE; } -#line 2805 "libyara/grammar.c" +#line 2817 "libyara/grammar.c" break; case 60: /* regexp_modifier: "" */ -#line 889 "libyara/grammar.y" +#line 897 "libyara/grammar.y" { (yyval.modifier).flags = STRING_FLAGS_FULL_WORD; } -#line 2811 "libyara/grammar.c" +#line 2823 "libyara/grammar.c" break; case 61: /* regexp_modifier: "" */ -#line 890 "libyara/grammar.y" +#line 898 "libyara/grammar.y" { (yyval.modifier).flags = STRING_FLAGS_PRIVATE; } -#line 2817 "libyara/grammar.c" +#line 2829 "libyara/grammar.c" break; case 62: /* hex_modifiers: %empty */ -#line 894 "libyara/grammar.y" +#line 902 "libyara/grammar.y" { (yyval.modifier).flags = 0; } -#line 2823 "libyara/grammar.c" +#line 2835 "libyara/grammar.c" break; case 63: /* hex_modifiers: hex_modifiers hex_modifier */ -#line 896 "libyara/grammar.y" +#line 904 "libyara/grammar.y" { if ((yyvsp[-1].modifier).flags & (yyvsp[0].modifier).flags) { @@ -2834,17 +2846,17 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); (yyval.modifier).flags = (yyvsp[-1].modifier).flags | (yyvsp[0].modifier).flags; } } -#line 2838 "libyara/grammar.c" +#line 2850 "libyara/grammar.c" break; case 64: /* hex_modifier: "" */ -#line 909 "libyara/grammar.y" +#line 917 "libyara/grammar.y" { (yyval.modifier).flags = STRING_FLAGS_PRIVATE; } -#line 2844 "libyara/grammar.c" +#line 2856 "libyara/grammar.c" break; case 65: /* identifier: "identifier" */ -#line 914 "libyara/grammar.y" +#line 922 "libyara/grammar.y" { YR_EXPRESSION expr; @@ -2926,6 +2938,7 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); (yyval.expression).type = EXPRESSION_TYPE_BOOLEAN; (yyval.expression).value.integer = YR_UNDEFINED; (yyval.expression).identifier.ptr = NULL; + (yyval.expression).required_strings.count = 0; } else { @@ -2939,11 +2952,11 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); fail_if_error(result); } -#line 2943 "libyara/grammar.c" +#line 2956 "libyara/grammar.c" break; case 66: /* identifier: identifier '.' "identifier" */ -#line 1009 "libyara/grammar.y" +#line 1018 "libyara/grammar.y" { int result = ERROR_SUCCESS; YR_OBJECT* field = NULL; @@ -2991,11 +3004,11 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); fail_if_error(result); } -#line 2995 "libyara/grammar.c" +#line 3008 "libyara/grammar.c" break; case 67: /* identifier: identifier '[' primary_expression ']' */ -#line 1057 "libyara/grammar.y" +#line 1066 "libyara/grammar.y" { int result = ERROR_SUCCESS; YR_OBJECT_ARRAY* array; @@ -3055,11 +3068,11 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); fail_if_error(result); } -#line 3059 "libyara/grammar.c" +#line 3072 "libyara/grammar.c" break; case 68: /* identifier: identifier '(' arguments ')' */ -#line 1118 "libyara/grammar.y" +#line 1127 "libyara/grammar.y" { YR_ARENA_REF ref = YR_ARENA_NULL_REF; int result = ERROR_SUCCESS; @@ -3100,23 +3113,23 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); fail_if_error(result); } -#line 3104 "libyara/grammar.c" +#line 3117 "libyara/grammar.c" break; case 69: /* arguments: %empty */ -#line 1162 "libyara/grammar.y" +#line 1171 "libyara/grammar.y" { (yyval.c_string) = yr_strdup(""); } -#line 3110 "libyara/grammar.c" +#line 3123 "libyara/grammar.c" break; case 70: /* arguments: arguments_list */ -#line 1163 "libyara/grammar.y" +#line 1172 "libyara/grammar.y" { (yyval.c_string) = (yyvsp[0].c_string); } -#line 3116 "libyara/grammar.c" +#line 3129 "libyara/grammar.c" break; case 71: /* arguments_list: expression */ -#line 1168 "libyara/grammar.y" +#line 1177 "libyara/grammar.y" { (yyval.c_string) = (char*) yr_malloc(YR_MAX_FUNCTION_ARGS + 1); @@ -3151,11 +3164,11 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); assert(compiler->last_error != ERROR_SUCCESS); } } -#line 3155 "libyara/grammar.c" +#line 3168 "libyara/grammar.c" break; case 72: /* arguments_list: arguments_list ',' expression */ -#line 1203 "libyara/grammar.y" +#line 1212 "libyara/grammar.y" { int result = ERROR_SUCCESS; @@ -3204,17 +3217,18 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); (yyval.c_string) = (yyvsp[-2].c_string); } -#line 3208 "libyara/grammar.c" +#line 3221 "libyara/grammar.c" break; case 73: /* regexp: "regular expression" */ -#line 1256 "libyara/grammar.y" +#line 1265 "libyara/grammar.y" { YR_ARENA_REF re_ref; RE_ERROR error; int result = ERROR_SUCCESS; int re_flags = 0; + int parser_flags = RE_PARSER_FLAG_NONE; if ((yyvsp[0].sized_string)->flags & SIZED_STRING_FLAGS_NO_CASE) re_flags |= RE_FLAGS_NO_CASE; @@ -3222,9 +3236,13 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); if ((yyvsp[0].sized_string)->flags & SIZED_STRING_FLAGS_DOT_ALL) re_flags |= RE_FLAGS_DOT_ALL; + if (compiler->strict_escape) + parser_flags |= RE_PARSER_FLAG_ENABLE_STRICT_ESCAPE_SEQUENCES; + result = yr_re_compile( (yyvsp[0].sized_string)->c_string, re_flags, + parser_flags, compiler->arena, &re_ref, &error); @@ -3234,23 +3252,31 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); if (result == ERROR_INVALID_REGULAR_EXPRESSION) yr_compiler_set_error_extra_info(compiler, error.message); - if (result == ERROR_SUCCESS) + if (result == ERROR_SUCCESS || result == ERROR_UNKNOWN_ESCAPE_SEQUENCE) + { + if (result == ERROR_UNKNOWN_ESCAPE_SEQUENCE) + { + yywarning( + yyscanner, + "unknown escape sequence"); + } result = yr_parser_emit_with_arg_reloc( yyscanner, OP_PUSH, yr_arena_ref_to_ptr(compiler->arena, &re_ref), NULL, NULL); + } fail_if_error(result); (yyval.expression).type = EXPRESSION_TYPE_REGEXP; } -#line 3250 "libyara/grammar.c" +#line 3276 "libyara/grammar.c" break; case 74: /* boolean_expression: expression */ -#line 1298 "libyara/grammar.y" +#line 1320 "libyara/grammar.y" { if ((yyvsp[0].expression).type == EXPRESSION_TYPE_STRING) { @@ -3267,34 +3293,44 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); fail_if_error(yr_parser_emit( yyscanner, OP_STR_TO_BOOL, NULL)); } + if ((yyvsp[0].expression).type != EXPRESSION_TYPE_BOOLEAN) + { + (yyval.expression).required_strings.count = 0; + } + else + { + (yyval.expression).required_strings.count = (yyvsp[0].expression).required_strings.count; + } (yyval.expression).type = EXPRESSION_TYPE_BOOLEAN; } -#line 3274 "libyara/grammar.c" +#line 3308 "libyara/grammar.c" break; case 75: /* expression: "" */ -#line 1321 "libyara/grammar.y" +#line 1351 "libyara/grammar.y" { fail_if_error(yr_parser_emit_push_const(yyscanner, 1)); (yyval.expression).type = EXPRESSION_TYPE_BOOLEAN; + (yyval.expression).required_strings.count = 0; } -#line 3284 "libyara/grammar.c" +#line 3319 "libyara/grammar.c" break; case 76: /* expression: "" */ -#line 1327 "libyara/grammar.y" +#line 1358 "libyara/grammar.y" { fail_if_error(yr_parser_emit_push_const(yyscanner, 0)); (yyval.expression).type = EXPRESSION_TYPE_BOOLEAN; + (yyval.expression).required_strings.count = 0; } -#line 3294 "libyara/grammar.c" +#line 3330 "libyara/grammar.c" break; case 77: /* expression: primary_expression "" regexp */ -#line 1333 "libyara/grammar.y" +#line 1365 "libyara/grammar.y" { check_type((yyvsp[-2].expression), EXPRESSION_TYPE_STRING, "matches"); check_type((yyvsp[0].expression), EXPRESSION_TYPE_REGEXP, "matches"); @@ -3305,12 +3341,13 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); NULL)); (yyval.expression).type = EXPRESSION_TYPE_BOOLEAN; + (yyval.expression).required_strings.count = 0; } -#line 3310 "libyara/grammar.c" +#line 3347 "libyara/grammar.c" break; case 78: /* expression: primary_expression "" primary_expression */ -#line 1345 "libyara/grammar.y" +#line 1378 "libyara/grammar.y" { check_type((yyvsp[-2].expression), EXPRESSION_TYPE_STRING, "contains"); check_type((yyvsp[0].expression), EXPRESSION_TYPE_STRING, "contains"); @@ -3319,12 +3356,13 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); yyscanner, OP_CONTAINS, NULL)); (yyval.expression).type = EXPRESSION_TYPE_BOOLEAN; + (yyval.expression).required_strings.count = 0; } -#line 3324 "libyara/grammar.c" +#line 3362 "libyara/grammar.c" break; case 79: /* expression: primary_expression "" primary_expression */ -#line 1355 "libyara/grammar.y" +#line 1389 "libyara/grammar.y" { check_type((yyvsp[-2].expression), EXPRESSION_TYPE_STRING, "icontains"); check_type((yyvsp[0].expression), EXPRESSION_TYPE_STRING, "icontains"); @@ -3333,12 +3371,13 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); yyscanner, OP_ICONTAINS, NULL)); (yyval.expression).type = EXPRESSION_TYPE_BOOLEAN; + (yyval.expression).required_strings.count = 0; } -#line 3338 "libyara/grammar.c" +#line 3377 "libyara/grammar.c" break; case 80: /* expression: primary_expression "" primary_expression */ -#line 1365 "libyara/grammar.y" +#line 1400 "libyara/grammar.y" { check_type((yyvsp[-2].expression), EXPRESSION_TYPE_STRING, "startswith"); check_type((yyvsp[0].expression), EXPRESSION_TYPE_STRING, "startswith"); @@ -3347,12 +3386,13 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); yyscanner, OP_STARTSWITH, NULL)); (yyval.expression).type = EXPRESSION_TYPE_BOOLEAN; + (yyval.expression).required_strings.count = 0; } -#line 3352 "libyara/grammar.c" +#line 3392 "libyara/grammar.c" break; case 81: /* expression: primary_expression "" primary_expression */ -#line 1375 "libyara/grammar.y" +#line 1411 "libyara/grammar.y" { check_type((yyvsp[-2].expression), EXPRESSION_TYPE_STRING, "istartswith"); check_type((yyvsp[0].expression), EXPRESSION_TYPE_STRING, "istartswith"); @@ -3361,12 +3401,13 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); yyscanner, OP_ISTARTSWITH, NULL)); (yyval.expression).type = EXPRESSION_TYPE_BOOLEAN; + (yyval.expression).required_strings.count = 0; } -#line 3366 "libyara/grammar.c" +#line 3407 "libyara/grammar.c" break; case 82: /* expression: primary_expression "" primary_expression */ -#line 1385 "libyara/grammar.y" +#line 1422 "libyara/grammar.y" { check_type((yyvsp[-2].expression), EXPRESSION_TYPE_STRING, "endswith"); check_type((yyvsp[0].expression), EXPRESSION_TYPE_STRING, "endswith"); @@ -3375,12 +3416,13 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); yyscanner, OP_ENDSWITH, NULL)); (yyval.expression).type = EXPRESSION_TYPE_BOOLEAN; + (yyval.expression).required_strings.count = 0; } -#line 3380 "libyara/grammar.c" +#line 3422 "libyara/grammar.c" break; case 83: /* expression: primary_expression "" primary_expression */ -#line 1395 "libyara/grammar.y" +#line 1433 "libyara/grammar.y" { check_type((yyvsp[-2].expression), EXPRESSION_TYPE_STRING, "iendswith"); check_type((yyvsp[0].expression), EXPRESSION_TYPE_STRING, "iendswith"); @@ -3389,12 +3431,13 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); yyscanner, OP_IENDSWITH, NULL)); (yyval.expression).type = EXPRESSION_TYPE_BOOLEAN; + (yyval.expression).required_strings.count = 0; } -#line 3394 "libyara/grammar.c" +#line 3437 "libyara/grammar.c" break; case 84: /* expression: primary_expression "" primary_expression */ -#line 1405 "libyara/grammar.y" +#line 1444 "libyara/grammar.y" { check_type((yyvsp[-2].expression), EXPRESSION_TYPE_STRING, "iequals"); check_type((yyvsp[0].expression), EXPRESSION_TYPE_STRING, "iequals"); @@ -3403,12 +3446,13 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); yyscanner, OP_IEQUALS, NULL)); (yyval.expression).type = EXPRESSION_TYPE_BOOLEAN; + (yyval.expression).required_strings.count = 0; } -#line 3408 "libyara/grammar.c" +#line 3452 "libyara/grammar.c" break; case 85: /* expression: "string identifier" */ -#line 1415 "libyara/grammar.y" +#line 1455 "libyara/grammar.y" { int result = yr_parser_reduce_string_identifier( yyscanner, @@ -3421,12 +3465,13 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); fail_if_error(result); (yyval.expression).type = EXPRESSION_TYPE_BOOLEAN; + (yyval.expression).required_strings.count = 1; } -#line 3426 "libyara/grammar.c" +#line 3471 "libyara/grammar.c" break; case 86: /* expression: "string identifier" "" primary_expression */ -#line 1429 "libyara/grammar.y" +#line 1470 "libyara/grammar.y" { int result; @@ -3439,13 +3484,14 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); fail_if_error(result); + (yyval.expression).required_strings.count = 1; (yyval.expression).type = EXPRESSION_TYPE_BOOLEAN; } -#line 3445 "libyara/grammar.c" +#line 3491 "libyara/grammar.c" break; case 87: /* expression: "string identifier" "" range */ -#line 1444 "libyara/grammar.y" +#line 1486 "libyara/grammar.y" { int result = yr_parser_reduce_string_identifier( yyscanner, (yyvsp[-2].c_string), OP_FOUND_IN, YR_UNDEFINED); @@ -3454,13 +3500,14 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); fail_if_error(result); + (yyval.expression).required_strings.count = 1; (yyval.expression).type = EXPRESSION_TYPE_BOOLEAN; } -#line 3460 "libyara/grammar.c" +#line 3507 "libyara/grammar.c" break; case 88: /* expression: "" for_expression error */ -#line 1455 "libyara/grammar.y" +#line 1498 "libyara/grammar.y" { // Free all the loop variable identifiers, including the variables for // the current loop (represented by loop_index), and set loop_index to @@ -3477,11 +3524,11 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); compiler->loop_index = -1; YYERROR; } -#line 3481 "libyara/grammar.c" +#line 3528 "libyara/grammar.c" break; case 89: /* $@6: %empty */ -#line 1529 "libyara/grammar.y" +#line 1572 "libyara/grammar.y" { // var_frame is used for accessing local variables used in this loop. // All local variables are accessed using var_frame as a reference, @@ -3519,11 +3566,11 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); fail_if_error(yr_parser_emit_with_arg( yyscanner, OP_POP_M, var_frame + 2, NULL, NULL)); } -#line 3523 "libyara/grammar.c" +#line 3570 "libyara/grammar.c" break; case 90: /* $@7: %empty */ -#line 1567 "libyara/grammar.y" +#line 1610 "libyara/grammar.y" { YR_LOOP_CONTEXT* loop_ctx = &compiler->loop[compiler->loop_index]; YR_FIXUP* fixup; @@ -3572,11 +3619,11 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); loop_ctx->start_ref = loop_start_ref; } -#line 3576 "libyara/grammar.c" +#line 3623 "libyara/grammar.c" break; case 91: /* expression: "" for_expression $@6 for_iteration ':' $@7 '(' boolean_expression ')' */ -#line 1616 "libyara/grammar.y" +#line 1659 "libyara/grammar.y" { int32_t jmp_offset; YR_FIXUP* fixup; @@ -3655,27 +3702,40 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); compiler->loop_index--; (yyval.expression).type = EXPRESSION_TYPE_BOOLEAN; + (yyval.expression).required_strings.count = 0; } -#line 3660 "libyara/grammar.c" +#line 3708 "libyara/grammar.c" break; case 92: /* expression: for_expression "" string_set */ -#line 1696 "libyara/grammar.y" +#line 1740 "libyara/grammar.y" { if ((yyvsp[-2].expression).type == EXPRESSION_TYPE_INTEGER && (yyvsp[-2].expression).value.integer > (yyvsp[0].integer)) { yywarning(yyscanner, "expression always false - requesting %" PRId64 " of %" PRId64 ".", (yyvsp[-2].expression).value.integer, (yyvsp[0].integer)); } + + if (((yyvsp[-2].expression).type == EXPRESSION_TYPE_INTEGER && (yyvsp[-2].expression).value.integer > 0) || + ((yyvsp[-2].expression).type == EXPRESSION_TYPE_QUANTIFIER && + ((yyvsp[-2].expression).value.integer == FOR_EXPRESSION_ALL || (yyvsp[-2].expression).value.integer == FOR_EXPRESSION_ANY))) + { + (yyval.expression).required_strings.count = 1; + } + else + { + (yyval.expression).required_strings.count = 0; + } + yr_parser_emit_with_arg(yyscanner, OP_OF, OF_STRING_SET, NULL, NULL); (yyval.expression).type = EXPRESSION_TYPE_BOOLEAN; } -#line 3675 "libyara/grammar.c" +#line 3735 "libyara/grammar.c" break; case 93: /* expression: for_expression "" rule_set */ -#line 1707 "libyara/grammar.y" +#line 1763 "libyara/grammar.y" { if ((yyvsp[-2].expression).type == EXPRESSION_TYPE_INTEGER && (yyvsp[-2].expression).value.integer > (yyvsp[0].integer)) { @@ -3685,12 +3745,13 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); yr_parser_emit_with_arg(yyscanner, OP_OF, OF_RULE_SET, NULL, NULL); (yyval.expression).type = EXPRESSION_TYPE_BOOLEAN; + (yyval.expression).required_strings.count = 0; } -#line 3690 "libyara/grammar.c" +#line 3751 "libyara/grammar.c" break; case 94: /* expression: primary_expression '%' "" string_set */ -#line 1718 "libyara/grammar.y" +#line 1775 "libyara/grammar.y" { check_type((yyvsp[-3].expression), EXPRESSION_TYPE_INTEGER, "%"); @@ -3707,13 +3768,22 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); fail_with_error(ERROR_INVALID_PERCENTAGE); } + if (!IS_UNDEFINED((yyvsp[-3].expression).value.integer)) + { + (yyval.expression).required_strings.count = 1; + } + else + { + (yyval.expression).required_strings.count = 0; + } + yr_parser_emit_with_arg(yyscanner, OP_OF_PERCENT, OF_STRING_SET, NULL, NULL); } -#line 3713 "libyara/grammar.c" +#line 3783 "libyara/grammar.c" break; case 95: /* expression: primary_expression '%' "" rule_set */ -#line 1737 "libyara/grammar.y" +#line 1803 "libyara/grammar.y" { check_type((yyvsp[-3].expression), EXPRESSION_TYPE_INTEGER, "%"); @@ -3732,11 +3802,11 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); yr_parser_emit_with_arg(yyscanner, OP_OF_PERCENT, OF_RULE_SET, NULL, NULL); } -#line 3736 "libyara/grammar.c" +#line 3806 "libyara/grammar.c" break; case 96: /* expression: for_expression "" string_set "" range */ -#line 1756 "libyara/grammar.y" +#line 1822 "libyara/grammar.y" { if ((yyvsp[-4].expression).type == EXPRESSION_TYPE_INTEGER && (yyvsp[-4].expression).value.integer > (yyvsp[-2].integer)) { @@ -3744,15 +3814,26 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); "expression always false - requesting %" PRId64 " of %" PRId64 ".", (yyvsp[-4].expression).value.integer, (yyvsp[-2].integer)); } + if (((yyvsp[-4].expression).type == EXPRESSION_TYPE_INTEGER && (yyvsp[-4].expression).value.integer > 0) || + ((yyvsp[-4].expression).type == EXPRESSION_TYPE_QUANTIFIER && + ((yyvsp[-4].expression).value.integer == FOR_EXPRESSION_ALL || (yyvsp[-4].expression).value.integer == FOR_EXPRESSION_ANY))) + { + (yyval.expression).required_strings.count = 1; + } + else + { + (yyval.expression).required_strings.count = 0; + } + yr_parser_emit(yyscanner, OP_OF_FOUND_IN, NULL); (yyval.expression).type = EXPRESSION_TYPE_BOOLEAN; } -#line 3752 "libyara/grammar.c" +#line 3833 "libyara/grammar.c" break; case 97: /* expression: for_expression "" string_set "" primary_expression */ -#line 1768 "libyara/grammar.y" +#line 1845 "libyara/grammar.y" { if ((yyvsp[0].expression).type != EXPRESSION_TYPE_INTEGER) { @@ -3785,34 +3866,47 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); "multiple strings at an offset is usually false."); } + if (((yyvsp[-4].expression).type == EXPRESSION_TYPE_INTEGER && (yyvsp[-4].expression).value.integer > 0) || + ((yyvsp[-4].expression).type == EXPRESSION_TYPE_QUANTIFIER && + ((yyvsp[-4].expression).value.integer == FOR_EXPRESSION_ALL || (yyvsp[-4].expression).value.integer == FOR_EXPRESSION_ANY))) + { + (yyval.expression).required_strings.count = 1; + } + else + { + (yyval.expression).required_strings.count = 0; + } + yr_parser_emit(yyscanner, OP_OF_FOUND_AT, NULL); (yyval.expression).type = EXPRESSION_TYPE_BOOLEAN; } -#line 3793 "libyara/grammar.c" +#line 3885 "libyara/grammar.c" break; case 98: /* expression: "" boolean_expression */ -#line 1805 "libyara/grammar.y" +#line 1893 "libyara/grammar.y" { yr_parser_emit(yyscanner, OP_NOT, NULL); (yyval.expression).type = EXPRESSION_TYPE_BOOLEAN; + (yyval.expression).required_strings.count = 0; } -#line 3803 "libyara/grammar.c" +#line 3896 "libyara/grammar.c" break; case 99: /* expression: "" boolean_expression */ -#line 1811 "libyara/grammar.y" +#line 1900 "libyara/grammar.y" { yr_parser_emit(yyscanner, OP_DEFINED, NULL); (yyval.expression).type = EXPRESSION_TYPE_BOOLEAN; + (yyval.expression).required_strings.count = 0; } -#line 3812 "libyara/grammar.c" +#line 3906 "libyara/grammar.c" break; case 100: /* $@8: %empty */ -#line 1816 "libyara/grammar.y" +#line 1906 "libyara/grammar.y" { YR_FIXUP* fixup; YR_ARENA_REF jmp_offset_ref; @@ -3834,11 +3928,11 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); fixup->next = compiler->fixup_stack_head; compiler->fixup_stack_head = fixup; } -#line 3838 "libyara/grammar.c" +#line 3932 "libyara/grammar.c" break; case 101: /* expression: boolean_expression "" $@8 boolean_expression */ -#line 1838 "libyara/grammar.y" +#line 1928 "libyara/grammar.y" { YR_FIXUP* fixup; @@ -3860,12 +3954,13 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); yr_free(fixup); (yyval.expression).type = EXPRESSION_TYPE_BOOLEAN; + (yyval.expression).required_strings.count = (yyvsp[0].expression).required_strings.count + (yyvsp[-3].expression).required_strings.count; } -#line 3865 "libyara/grammar.c" +#line 3960 "libyara/grammar.c" break; case 102: /* $@9: %empty */ -#line 1861 "libyara/grammar.y" +#line 1952 "libyara/grammar.y" { YR_FIXUP* fixup; YR_ARENA_REF jmp_offset_ref; @@ -3886,11 +3981,11 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); fixup->next = compiler->fixup_stack_head; compiler->fixup_stack_head = fixup; } -#line 3890 "libyara/grammar.c" +#line 3985 "libyara/grammar.c" break; case 103: /* expression: boolean_expression "" $@9 boolean_expression */ -#line 1882 "libyara/grammar.y" +#line 1973 "libyara/grammar.y" { YR_FIXUP* fixup; @@ -3912,100 +4007,113 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); yr_free(fixup); (yyval.expression).type = EXPRESSION_TYPE_BOOLEAN; + + // Set required string count to minimum from both parts + if ((yyvsp[-3].expression).required_strings.count > (yyvsp[0].expression).required_strings.count) { + (yyval.expression).required_strings.count = (yyvsp[0].expression).required_strings.count; + } else { + (yyval.expression).required_strings.count = (yyvsp[-3].expression).required_strings.count; + } } -#line 3917 "libyara/grammar.c" +#line 4019 "libyara/grammar.c" break; case 104: /* expression: primary_expression "<" primary_expression */ -#line 1905 "libyara/grammar.y" +#line 2003 "libyara/grammar.y" { fail_if_error(yr_parser_reduce_operation( yyscanner, "<", (yyvsp[-2].expression), (yyvsp[0].expression))); (yyval.expression).type = EXPRESSION_TYPE_BOOLEAN; + (yyval.expression).required_strings.count = 0; } -#line 3928 "libyara/grammar.c" +#line 4031 "libyara/grammar.c" break; case 105: /* expression: primary_expression ">" primary_expression */ -#line 1912 "libyara/grammar.y" +#line 2011 "libyara/grammar.y" { fail_if_error(yr_parser_reduce_operation( yyscanner, ">", (yyvsp[-2].expression), (yyvsp[0].expression))); (yyval.expression).type = EXPRESSION_TYPE_BOOLEAN; + (yyval.expression).required_strings.count = 0; } -#line 3939 "libyara/grammar.c" +#line 4043 "libyara/grammar.c" break; case 106: /* expression: primary_expression "<=" primary_expression */ -#line 1919 "libyara/grammar.y" +#line 2019 "libyara/grammar.y" { fail_if_error(yr_parser_reduce_operation( yyscanner, "<=", (yyvsp[-2].expression), (yyvsp[0].expression))); (yyval.expression).type = EXPRESSION_TYPE_BOOLEAN; + (yyval.expression).required_strings.count = 0; } -#line 3950 "libyara/grammar.c" +#line 4055 "libyara/grammar.c" break; case 107: /* expression: primary_expression ">=" primary_expression */ -#line 1926 "libyara/grammar.y" +#line 2027 "libyara/grammar.y" { fail_if_error(yr_parser_reduce_operation( yyscanner, ">=", (yyvsp[-2].expression), (yyvsp[0].expression))); (yyval.expression).type = EXPRESSION_TYPE_BOOLEAN; + (yyval.expression).required_strings.count = 0; } -#line 3961 "libyara/grammar.c" +#line 4067 "libyara/grammar.c" break; case 108: /* expression: primary_expression "==" primary_expression */ -#line 1933 "libyara/grammar.y" +#line 2035 "libyara/grammar.y" { fail_if_error(yr_parser_reduce_operation( yyscanner, "==", (yyvsp[-2].expression), (yyvsp[0].expression))); (yyval.expression).type = EXPRESSION_TYPE_BOOLEAN; + (yyval.expression).required_strings.count = 0; } -#line 3972 "libyara/grammar.c" +#line 4079 "libyara/grammar.c" break; case 109: /* expression: primary_expression "!=" primary_expression */ -#line 1940 "libyara/grammar.y" +#line 2043 "libyara/grammar.y" { fail_if_error(yr_parser_reduce_operation( yyscanner, "!=", (yyvsp[-2].expression), (yyvsp[0].expression))); (yyval.expression).type = EXPRESSION_TYPE_BOOLEAN; + (yyval.expression).required_strings.count = 0; } -#line 3983 "libyara/grammar.c" +#line 4091 "libyara/grammar.c" break; case 110: /* expression: primary_expression */ -#line 1947 "libyara/grammar.y" +#line 2051 "libyara/grammar.y" { (yyval.expression) = (yyvsp[0].expression); } -#line 3991 "libyara/grammar.c" +#line 4099 "libyara/grammar.c" break; case 111: /* expression: '(' expression ')' */ -#line 1951 "libyara/grammar.y" +#line 2055 "libyara/grammar.y" { (yyval.expression) = (yyvsp[-1].expression); } -#line 3999 "libyara/grammar.c" +#line 4107 "libyara/grammar.c" break; case 112: /* for_iteration: for_variables "" iterator */ -#line 1958 "libyara/grammar.y" +#line 2062 "libyara/grammar.y" { (yyval.integer) = FOR_ITERATION_ITERATOR; } -#line 4005 "libyara/grammar.c" +#line 4113 "libyara/grammar.c" break; case 113: /* for_iteration: "" string_iterator */ -#line 1960 "libyara/grammar.y" +#line 2064 "libyara/grammar.y" { int var_frame; int result = ERROR_SUCCESS; @@ -4026,11 +4134,11 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); (yyval.integer) = FOR_ITERATION_STRING_SET; } -#line 4030 "libyara/grammar.c" +#line 4138 "libyara/grammar.c" break; case 114: /* for_variables: "identifier" */ -#line 1985 "libyara/grammar.y" +#line 2089 "libyara/grammar.y" { int result = ERROR_SUCCESS; @@ -4050,11 +4158,11 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); assert(loop_ctx->vars_count <= YR_MAX_LOOP_VARS); } -#line 4054 "libyara/grammar.c" +#line 4162 "libyara/grammar.c" break; case 115: /* for_variables: for_variables ',' "identifier" */ -#line 2005 "libyara/grammar.y" +#line 2109 "libyara/grammar.y" { int result = ERROR_SUCCESS; @@ -4079,11 +4187,11 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); loop_ctx->vars[loop_ctx->vars_count++].identifier.ptr = (yyvsp[0].c_string); } -#line 4083 "libyara/grammar.c" +#line 4191 "libyara/grammar.c" break; case 116: /* iterator: identifier */ -#line 2033 "libyara/grammar.y" +#line 2137 "libyara/grammar.y" { YR_LOOP_CONTEXT* loop_ctx = &compiler->loop[compiler->loop_index]; @@ -4157,11 +4265,11 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); fail_if_error(result); } -#line 4161 "libyara/grammar.c" +#line 4269 "libyara/grammar.c" break; case 117: /* iterator: set */ -#line 2107 "libyara/grammar.y" +#line 2211 "libyara/grammar.y" { int result = ERROR_SUCCESS; @@ -4170,7 +4278,11 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); if (loop_ctx->vars_count == 1) { loop_ctx->vars[0].type = (yyvsp[0].enumeration).type; - loop_ctx->vars[0].value.integer = YR_UNDEFINED; + + if ((yyvsp[0].enumeration).type == EXPRESSION_TYPE_STRING) + loop_ctx->vars[0].value.sized_string_ref = YR_ARENA_NULL_REF; + else + loop_ctx->vars[0].value.integer = YR_UNDEFINED; } else { @@ -4185,11 +4297,11 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); fail_if_error(result); } -#line 4189 "libyara/grammar.c" +#line 4301 "libyara/grammar.c" break; case 118: /* set: '(' enumeration ')' */ -#line 2135 "libyara/grammar.y" +#line 2243 "libyara/grammar.y" { // $2.count contains the number of items in the enumeration fail_if_error(yr_parser_emit_push_const(yyscanner, (yyvsp[-1].enumeration).count)); @@ -4206,24 +4318,23 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); } (yyval.enumeration).type = (yyvsp[-1].enumeration).type; - } -#line 4212 "libyara/grammar.c" +#line 4323 "libyara/grammar.c" break; case 119: /* set: range */ -#line 2154 "libyara/grammar.y" +#line 2261 "libyara/grammar.y" { fail_if_error(yr_parser_emit( yyscanner, OP_ITER_START_INT_RANGE, NULL)); (yyval.enumeration).type = EXPRESSION_TYPE_INTEGER; } -#line 4223 "libyara/grammar.c" +#line 4334 "libyara/grammar.c" break; case 120: /* range: '(' primary_expression ".." primary_expression ')' */ -#line 2165 "libyara/grammar.y" +#line 2272 "libyara/grammar.y" { int result = ERROR_SUCCESS; @@ -4262,11 +4373,11 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); fail_if_error(result); } -#line 4266 "libyara/grammar.c" +#line 4377 "libyara/grammar.c" break; case 121: /* enumeration: primary_expression */ -#line 2208 "libyara/grammar.y" +#line 2315 "libyara/grammar.y" { int result = ERROR_SUCCESS; @@ -4282,11 +4393,11 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); (yyval.enumeration).type = (yyvsp[0].expression).type; (yyval.enumeration).count = 1; } -#line 4286 "libyara/grammar.c" +#line 4397 "libyara/grammar.c" break; case 122: /* enumeration: enumeration ',' primary_expression */ -#line 2224 "libyara/grammar.y" +#line 2331 "libyara/grammar.y" { int result = ERROR_SUCCESS; @@ -4302,38 +4413,38 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); (yyval.enumeration).type = (yyvsp[-2].enumeration).type; (yyval.enumeration).count = (yyvsp[-2].enumeration).count + 1; } -#line 4306 "libyara/grammar.c" +#line 4417 "libyara/grammar.c" break; case 123: /* string_iterator: string_set */ -#line 2244 "libyara/grammar.y" +#line 2351 "libyara/grammar.y" { fail_if_error(yr_parser_emit_push_const(yyscanner, (yyvsp[0].integer))); fail_if_error(yr_parser_emit(yyscanner, OP_ITER_START_STRING_SET, NULL)); } -#line 4316 "libyara/grammar.c" +#line 4427 "libyara/grammar.c" break; case 124: /* $@10: %empty */ -#line 2253 "libyara/grammar.y" +#line 2360 "libyara/grammar.y" { // Push end-of-list marker yr_parser_emit_push_const(yyscanner, YR_UNDEFINED); } -#line 4325 "libyara/grammar.c" +#line 4436 "libyara/grammar.c" break; case 125: /* string_set: '(' $@10 string_enumeration ')' */ -#line 2258 "libyara/grammar.y" +#line 2365 "libyara/grammar.y" { (yyval.integer) = (yyvsp[-1].integer); } -#line 4333 "libyara/grammar.c" +#line 4444 "libyara/grammar.c" break; case 126: /* string_set: "" */ -#line 2262 "libyara/grammar.y" +#line 2369 "libyara/grammar.y" { fail_if_error(yr_parser_emit_push_const(yyscanner, YR_UNDEFINED)); @@ -4343,23 +4454,23 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); (yyval.integer) = count; } -#line 4347 "libyara/grammar.c" +#line 4458 "libyara/grammar.c" break; case 127: /* string_enumeration: string_enumeration_item */ -#line 2275 "libyara/grammar.y" +#line 2382 "libyara/grammar.y" { (yyval.integer) = (yyvsp[0].integer); } -#line 4353 "libyara/grammar.c" +#line 4464 "libyara/grammar.c" break; case 128: /* string_enumeration: string_enumeration ',' string_enumeration_item */ -#line 2276 "libyara/grammar.y" +#line 2383 "libyara/grammar.y" { (yyval.integer) = (yyvsp[-2].integer) + (yyvsp[0].integer); } -#line 4359 "libyara/grammar.c" +#line 4470 "libyara/grammar.c" break; case 129: /* string_enumeration_item: "string identifier" */ -#line 2282 "libyara/grammar.y" +#line 2389 "libyara/grammar.y" { int count = 0; int result = yr_parser_emit_pushes_for_strings(yyscanner, (yyvsp[0].c_string), &count); @@ -4369,11 +4480,11 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); (yyval.integer) = count; } -#line 4373 "libyara/grammar.c" +#line 4484 "libyara/grammar.c" break; case 130: /* string_enumeration_item: "string identifier with wildcard" */ -#line 2292 "libyara/grammar.y" +#line 2399 "libyara/grammar.y" { int count = 0; int result = yr_parser_emit_pushes_for_strings(yyscanner, (yyvsp[0].c_string), &count); @@ -4383,40 +4494,40 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); (yyval.integer) = count; } -#line 4387 "libyara/grammar.c" +#line 4498 "libyara/grammar.c" break; case 131: /* $@11: %empty */ -#line 2306 "libyara/grammar.y" +#line 2413 "libyara/grammar.y" { // Push end-of-list marker yr_parser_emit_push_const(yyscanner, YR_UNDEFINED); } -#line 4396 "libyara/grammar.c" +#line 4507 "libyara/grammar.c" break; case 132: /* rule_set: '(' $@11 rule_enumeration ')' */ -#line 2311 "libyara/grammar.y" +#line 2418 "libyara/grammar.y" { (yyval.integer) = (yyvsp[-1].integer); } -#line 4404 "libyara/grammar.c" +#line 4515 "libyara/grammar.c" break; case 133: /* rule_enumeration: rule_enumeration_item */ -#line 2318 "libyara/grammar.y" +#line 2425 "libyara/grammar.y" { (yyval.integer) = (yyvsp[0].integer); } -#line 4410 "libyara/grammar.c" +#line 4521 "libyara/grammar.c" break; case 134: /* rule_enumeration: rule_enumeration ',' rule_enumeration_item */ -#line 2319 "libyara/grammar.y" +#line 2426 "libyara/grammar.y" { (yyval.integer) = (yyvsp[-2].integer) + (yyvsp[0].integer); } -#line 4416 "libyara/grammar.c" +#line 4527 "libyara/grammar.c" break; case 135: /* rule_enumeration_item: "identifier" */ -#line 2325 "libyara/grammar.y" +#line 2432 "libyara/grammar.y" { int result = ERROR_SUCCESS; @@ -4449,11 +4560,11 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); (yyval.integer) = 1; } -#line 4453 "libyara/grammar.c" +#line 4564 "libyara/grammar.c" break; case 136: /* rule_enumeration_item: "identifier" '*' */ -#line 2358 "libyara/grammar.y" +#line 2465 "libyara/grammar.y" { int count = 0; YR_NAMESPACE* ns = (YR_NAMESPACE*) yr_arena_get_ptr( @@ -4474,11 +4585,11 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); (yyval.integer) = count; } -#line 4478 "libyara/grammar.c" +#line 4589 "libyara/grammar.c" break; case 137: /* for_expression: primary_expression */ -#line 2383 "libyara/grammar.y" +#line 2490 "libyara/grammar.y" { if ((yyvsp[0].expression).type == EXPRESSION_TYPE_INTEGER && !IS_UNDEFINED((yyvsp[0].expression).value.integer)) { @@ -4534,57 +4645,57 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); (yyval.expression).value.integer = (yyvsp[0].expression).value.integer; } -#line 4538 "libyara/grammar.c" +#line 4649 "libyara/grammar.c" break; case 138: /* for_expression: for_quantifier */ -#line 2439 "libyara/grammar.y" +#line 2546 "libyara/grammar.y" { (yyval.expression).value.integer = (yyvsp[0].expression).value.integer; } -#line 4546 "libyara/grammar.c" +#line 4657 "libyara/grammar.c" break; case 139: /* for_quantifier: "" */ -#line 2446 "libyara/grammar.y" +#line 2553 "libyara/grammar.y" { yr_parser_emit_push_const(yyscanner, YR_UNDEFINED); (yyval.expression).type = EXPRESSION_TYPE_QUANTIFIER; (yyval.expression).value.integer = FOR_EXPRESSION_ALL; } -#line 4556 "libyara/grammar.c" +#line 4667 "libyara/grammar.c" break; case 140: /* for_quantifier: "" */ -#line 2452 "libyara/grammar.y" +#line 2559 "libyara/grammar.y" { yr_parser_emit_push_const(yyscanner, 1); (yyval.expression).type = EXPRESSION_TYPE_QUANTIFIER; (yyval.expression).value.integer = FOR_EXPRESSION_ANY; } -#line 4566 "libyara/grammar.c" +#line 4677 "libyara/grammar.c" break; case 141: /* for_quantifier: "" */ -#line 2458 "libyara/grammar.y" +#line 2565 "libyara/grammar.y" { yr_parser_emit_push_const(yyscanner, 0); (yyval.expression).type = EXPRESSION_TYPE_QUANTIFIER; (yyval.expression).value.integer = FOR_EXPRESSION_NONE; } -#line 4576 "libyara/grammar.c" +#line 4687 "libyara/grammar.c" break; case 142: /* primary_expression: '(' primary_expression ')' */ -#line 2468 "libyara/grammar.y" +#line 2575 "libyara/grammar.y" { (yyval.expression) = (yyvsp[-1].expression); } -#line 4584 "libyara/grammar.c" +#line 4695 "libyara/grammar.c" break; case 143: /* primary_expression: "" */ -#line 2472 "libyara/grammar.y" +#line 2579 "libyara/grammar.y" { fail_if_error(yr_parser_emit( yyscanner, OP_FILESIZE, NULL)); @@ -4592,11 +4703,11 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); (yyval.expression).type = EXPRESSION_TYPE_INTEGER; (yyval.expression).value.integer = YR_UNDEFINED; } -#line 4596 "libyara/grammar.c" +#line 4707 "libyara/grammar.c" break; case 144: /* primary_expression: "" */ -#line 2480 "libyara/grammar.y" +#line 2587 "libyara/grammar.y" { yywarning(yyscanner, "using deprecated \"entrypoint\" keyword. Use the \"entry_point\" " @@ -4608,11 +4719,11 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); (yyval.expression).type = EXPRESSION_TYPE_INTEGER; (yyval.expression).value.integer = YR_UNDEFINED; } -#line 4612 "libyara/grammar.c" +#line 4723 "libyara/grammar.c" break; case 145: /* primary_expression: "integer function" '(' primary_expression ')' */ -#line 2492 "libyara/grammar.y" +#line 2599 "libyara/grammar.y" { check_type((yyvsp[-1].expression), EXPRESSION_TYPE_INTEGER, "intXXXX or uintXXXX"); @@ -4626,33 +4737,33 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); (yyval.expression).type = EXPRESSION_TYPE_INTEGER; (yyval.expression).value.integer = YR_UNDEFINED; } -#line 4630 "libyara/grammar.c" +#line 4741 "libyara/grammar.c" break; case 146: /* primary_expression: "integer number" */ -#line 2506 "libyara/grammar.y" +#line 2613 "libyara/grammar.y" { fail_if_error(yr_parser_emit_push_const(yyscanner, (yyvsp[0].integer))); (yyval.expression).type = EXPRESSION_TYPE_INTEGER; (yyval.expression).value.integer = (yyvsp[0].integer); } -#line 4641 "libyara/grammar.c" +#line 4752 "libyara/grammar.c" break; case 147: /* primary_expression: "floating point number" */ -#line 2513 "libyara/grammar.y" +#line 2620 "libyara/grammar.y" { fail_if_error(yr_parser_emit_with_arg_double( yyscanner, OP_PUSH, (yyvsp[0].double_), NULL, NULL)); (yyval.expression).type = EXPRESSION_TYPE_FLOAT; } -#line 4652 "libyara/grammar.c" +#line 4763 "libyara/grammar.c" break; case 148: /* primary_expression: "text string" */ -#line 2520 "libyara/grammar.y" +#line 2627 "libyara/grammar.y" { YR_ARENA_REF ref; @@ -4677,11 +4788,11 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); (yyval.expression).type = EXPRESSION_TYPE_STRING; (yyval.expression).value.sized_string_ref = ref; } -#line 4681 "libyara/grammar.c" +#line 4792 "libyara/grammar.c" break; case 149: /* primary_expression: "string count" "" range */ -#line 2545 "libyara/grammar.y" +#line 2652 "libyara/grammar.y" { int result = yr_parser_reduce_string_identifier( yyscanner, (yyvsp[-2].c_string), OP_COUNT_IN, YR_UNDEFINED); @@ -4693,11 +4804,11 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); (yyval.expression).type = EXPRESSION_TYPE_INTEGER; (yyval.expression).value.integer = YR_UNDEFINED; } -#line 4697 "libyara/grammar.c" +#line 4808 "libyara/grammar.c" break; case 150: /* primary_expression: "string count" */ -#line 2557 "libyara/grammar.y" +#line 2664 "libyara/grammar.y" { int result = yr_parser_reduce_string_identifier( yyscanner, (yyvsp[0].c_string), OP_COUNT, YR_UNDEFINED); @@ -4709,11 +4820,11 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); (yyval.expression).type = EXPRESSION_TYPE_INTEGER; (yyval.expression).value.integer = YR_UNDEFINED; } -#line 4713 "libyara/grammar.c" +#line 4824 "libyara/grammar.c" break; case 151: /* primary_expression: "string offset" '[' primary_expression ']' */ -#line 2569 "libyara/grammar.y" +#line 2676 "libyara/grammar.y" { int result = yr_parser_reduce_string_identifier( yyscanner, (yyvsp[-3].c_string), OP_OFFSET, YR_UNDEFINED); @@ -4725,11 +4836,11 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); (yyval.expression).type = EXPRESSION_TYPE_INTEGER; (yyval.expression).value.integer = YR_UNDEFINED; } -#line 4729 "libyara/grammar.c" +#line 4840 "libyara/grammar.c" break; case 152: /* primary_expression: "string offset" */ -#line 2581 "libyara/grammar.y" +#line 2688 "libyara/grammar.y" { int result = yr_parser_emit_push_const(yyscanner, 1); @@ -4744,11 +4855,11 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); (yyval.expression).type = EXPRESSION_TYPE_INTEGER; (yyval.expression).value.integer = YR_UNDEFINED; } -#line 4748 "libyara/grammar.c" +#line 4859 "libyara/grammar.c" break; case 153: /* primary_expression: "string length" '[' primary_expression ']' */ -#line 2596 "libyara/grammar.y" +#line 2703 "libyara/grammar.y" { int result = yr_parser_reduce_string_identifier( yyscanner, (yyvsp[-3].c_string), OP_LENGTH, YR_UNDEFINED); @@ -4760,11 +4871,11 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); (yyval.expression).type = EXPRESSION_TYPE_INTEGER; (yyval.expression).value.integer = YR_UNDEFINED; } -#line 4764 "libyara/grammar.c" +#line 4875 "libyara/grammar.c" break; case 154: /* primary_expression: "string length" */ -#line 2608 "libyara/grammar.y" +#line 2715 "libyara/grammar.y" { int result = yr_parser_emit_push_const(yyscanner, 1); @@ -4779,11 +4890,11 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); (yyval.expression).type = EXPRESSION_TYPE_INTEGER; (yyval.expression).value.integer = YR_UNDEFINED; } -#line 4783 "libyara/grammar.c" +#line 4894 "libyara/grammar.c" break; case 155: /* primary_expression: identifier */ -#line 2623 "libyara/grammar.y" +#line 2730 "libyara/grammar.y" { int result = ERROR_SUCCESS; @@ -4826,11 +4937,11 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); fail_if_error(result); } -#line 4830 "libyara/grammar.c" +#line 4941 "libyara/grammar.c" break; case 156: /* primary_expression: '-' primary_expression */ -#line 2666 "libyara/grammar.y" +#line 2773 "libyara/grammar.y" { int result = ERROR_SUCCESS; @@ -4851,11 +4962,11 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); fail_if_error(result); } -#line 4855 "libyara/grammar.c" +#line 4966 "libyara/grammar.c" break; case 157: /* primary_expression: primary_expression '+' primary_expression */ -#line 2687 "libyara/grammar.y" +#line 2794 "libyara/grammar.y" { int result = yr_parser_reduce_operation( yyscanner, "+", (yyvsp[-2].expression), (yyvsp[0].expression)); @@ -4890,11 +5001,11 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); fail_if_error(result); } -#line 4894 "libyara/grammar.c" +#line 5005 "libyara/grammar.c" break; case 158: /* primary_expression: primary_expression '-' primary_expression */ -#line 2722 "libyara/grammar.y" +#line 2829 "libyara/grammar.y" { int result = yr_parser_reduce_operation( yyscanner, "-", (yyvsp[-2].expression), (yyvsp[0].expression)); @@ -4929,11 +5040,11 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); fail_if_error(result); } -#line 4933 "libyara/grammar.c" +#line 5044 "libyara/grammar.c" break; case 159: /* primary_expression: primary_expression '*' primary_expression */ -#line 2757 "libyara/grammar.y" +#line 2864 "libyara/grammar.y" { int result = yr_parser_reduce_operation( yyscanner, "*", (yyvsp[-2].expression), (yyvsp[0].expression)); @@ -4967,11 +5078,11 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); fail_if_error(result); } -#line 4971 "libyara/grammar.c" +#line 5082 "libyara/grammar.c" break; case 160: /* primary_expression: primary_expression '\\' primary_expression */ -#line 2791 "libyara/grammar.y" +#line 2898 "libyara/grammar.y" { int result = yr_parser_reduce_operation( yyscanner, "\\", (yyvsp[-2].expression), (yyvsp[0].expression)); @@ -4996,11 +5107,11 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); fail_if_error(result); } -#line 5000 "libyara/grammar.c" +#line 5111 "libyara/grammar.c" break; case 161: /* primary_expression: primary_expression '%' primary_expression */ -#line 2816 "libyara/grammar.y" +#line 2923 "libyara/grammar.y" { check_type((yyvsp[-2].expression), EXPRESSION_TYPE_INTEGER, "%"); check_type((yyvsp[0].expression), EXPRESSION_TYPE_INTEGER, "%"); @@ -5017,11 +5128,11 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); fail_if_error(ERROR_DIVISION_BY_ZERO); } } -#line 5021 "libyara/grammar.c" +#line 5132 "libyara/grammar.c" break; case 162: /* primary_expression: primary_expression '^' primary_expression */ -#line 2833 "libyara/grammar.y" +#line 2940 "libyara/grammar.y" { check_type((yyvsp[-2].expression), EXPRESSION_TYPE_INTEGER, "^"); check_type((yyvsp[0].expression), EXPRESSION_TYPE_INTEGER, "^"); @@ -5031,11 +5142,11 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); (yyval.expression).type = EXPRESSION_TYPE_INTEGER; (yyval.expression).value.integer = OPERATION(^, (yyvsp[-2].expression).value.integer, (yyvsp[0].expression).value.integer); } -#line 5035 "libyara/grammar.c" +#line 5146 "libyara/grammar.c" break; case 163: /* primary_expression: primary_expression '&' primary_expression */ -#line 2843 "libyara/grammar.y" +#line 2950 "libyara/grammar.y" { check_type((yyvsp[-2].expression), EXPRESSION_TYPE_INTEGER, "^"); check_type((yyvsp[0].expression), EXPRESSION_TYPE_INTEGER, "^"); @@ -5045,11 +5156,11 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); (yyval.expression).type = EXPRESSION_TYPE_INTEGER; (yyval.expression).value.integer = OPERATION(&, (yyvsp[-2].expression).value.integer, (yyvsp[0].expression).value.integer); } -#line 5049 "libyara/grammar.c" +#line 5160 "libyara/grammar.c" break; case 164: /* primary_expression: primary_expression '|' primary_expression */ -#line 2853 "libyara/grammar.y" +#line 2960 "libyara/grammar.y" { check_type((yyvsp[-2].expression), EXPRESSION_TYPE_INTEGER, "|"); check_type((yyvsp[0].expression), EXPRESSION_TYPE_INTEGER, "|"); @@ -5059,11 +5170,11 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); (yyval.expression).type = EXPRESSION_TYPE_INTEGER; (yyval.expression).value.integer = OPERATION(|, (yyvsp[-2].expression).value.integer, (yyvsp[0].expression).value.integer); } -#line 5063 "libyara/grammar.c" +#line 5174 "libyara/grammar.c" break; case 165: /* primary_expression: '~' primary_expression */ -#line 2863 "libyara/grammar.y" +#line 2970 "libyara/grammar.y" { check_type((yyvsp[0].expression), EXPRESSION_TYPE_INTEGER, "~"); @@ -5073,11 +5184,11 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); (yyval.expression).value.integer = ((yyvsp[0].expression).value.integer == YR_UNDEFINED) ? YR_UNDEFINED : ~((yyvsp[0].expression).value.integer); } -#line 5077 "libyara/grammar.c" +#line 5188 "libyara/grammar.c" break; case 166: /* primary_expression: primary_expression "<<" primary_expression */ -#line 2873 "libyara/grammar.y" +#line 2980 "libyara/grammar.y" { int result; @@ -5097,11 +5208,11 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); fail_if_error(result); } -#line 5101 "libyara/grammar.c" +#line 5212 "libyara/grammar.c" break; case 167: /* primary_expression: primary_expression ">>" primary_expression */ -#line 2893 "libyara/grammar.y" +#line 3000 "libyara/grammar.y" { int result; @@ -5121,19 +5232,19 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); fail_if_error(result); } -#line 5125 "libyara/grammar.c" +#line 5236 "libyara/grammar.c" break; case 168: /* primary_expression: regexp */ -#line 2913 "libyara/grammar.y" +#line 3020 "libyara/grammar.y" { (yyval.expression) = (yyvsp[0].expression); } -#line 5133 "libyara/grammar.c" +#line 5244 "libyara/grammar.c" break; -#line 5137 "libyara/grammar.c" +#line 5248 "libyara/grammar.c" default: break; } @@ -5357,5 +5468,5 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); return yyresult; } -#line 2918 "libyara/grammar.y" +#line 3025 "libyara/grammar.y" diff --git a/src/libyara/grammar.h b/src/libyara/grammar.h index 7a2a217..8c23456 100644 --- a/src/libyara/grammar.h +++ b/src/libyara/grammar.h @@ -189,7 +189,7 @@ extern int yara_yydebug; #if ! defined YYSTYPE && ! defined YYSTYPE_IS_DECLARED union YYSTYPE { -#line 342 "libyara/grammar.y" +#line 343 "libyara/grammar.y" YR_EXPRESSION expression; SIZED_STRING* sized_string; diff --git a/src/libyara/grammar.y b/src/libyara/grammar.y index 2c1cda2..642d79e 100644 --- a/src/libyara/grammar.y +++ b/src/libyara/grammar.y @@ -88,7 +88,7 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // fail_if_error() is used in parser actions for aborting the parsing if an // error has occurred. See fail_with_error for details. #define fail_if_error(e) \ - if (e != ERROR_SUCCESS) \ + if (e != ERROR_SUCCESS && e != ERROR_UNKNOWN_ESCAPE_SEQUENCE) \ { \ fail_with_error(e); \ } @@ -304,6 +304,7 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. %type regexp %type for_expression %type for_quantifier +%type condition %type arguments @@ -405,6 +406,10 @@ rule } condition '}' { + YR_RULE* rule = (YR_RULE*) yr_arena_ref_to_ptr( + compiler->arena, &$4); + rule->required_strings = $10.required_strings.count; + int result = yr_parser_reduce_rule_declaration_phase_2( yyscanner, &$4); // rule created in phase 1 @@ -455,6 +460,9 @@ strings condition : _CONDITION_ ':' boolean_expression + { + $$ = $3; + } ; @@ -992,6 +1000,7 @@ identifier $$.type = EXPRESSION_TYPE_BOOLEAN; $$.value.integer = YR_UNDEFINED; $$.identifier.ptr = NULL; + $$.required_strings.count = 0; } else { @@ -1259,6 +1268,7 @@ regexp int result = ERROR_SUCCESS; int re_flags = 0; + int parser_flags = RE_PARSER_FLAG_NONE; if ($1->flags & SIZED_STRING_FLAGS_NO_CASE) re_flags |= RE_FLAGS_NO_CASE; @@ -1266,9 +1276,13 @@ regexp if ($1->flags & SIZED_STRING_FLAGS_DOT_ALL) re_flags |= RE_FLAGS_DOT_ALL; + if (compiler->strict_escape) + parser_flags |= RE_PARSER_FLAG_ENABLE_STRICT_ESCAPE_SEQUENCES; + result = yr_re_compile( $1->c_string, re_flags, + parser_flags, compiler->arena, &re_ref, &error); @@ -1278,13 +1292,21 @@ regexp if (result == ERROR_INVALID_REGULAR_EXPRESSION) yr_compiler_set_error_extra_info(compiler, error.message); - if (result == ERROR_SUCCESS) + if (result == ERROR_SUCCESS || result == ERROR_UNKNOWN_ESCAPE_SEQUENCE) + { + if (result == ERROR_UNKNOWN_ESCAPE_SEQUENCE) + { + yywarning( + yyscanner, + "unknown escape sequence"); + } result = yr_parser_emit_with_arg_reloc( yyscanner, OP_PUSH, yr_arena_ref_to_ptr(compiler->arena, &re_ref), NULL, NULL); + } fail_if_error(result); @@ -1311,6 +1333,14 @@ boolean_expression fail_if_error(yr_parser_emit( yyscanner, OP_STR_TO_BOOL, NULL)); } + if ($1.type != EXPRESSION_TYPE_BOOLEAN) + { + $$.required_strings.count = 0; + } + else + { + $$.required_strings.count = $1.required_strings.count; + } $$.type = EXPRESSION_TYPE_BOOLEAN; } @@ -1322,12 +1352,14 @@ expression fail_if_error(yr_parser_emit_push_const(yyscanner, 1)); $$.type = EXPRESSION_TYPE_BOOLEAN; + $$.required_strings.count = 0; } | _FALSE_ { fail_if_error(yr_parser_emit_push_const(yyscanner, 0)); $$.type = EXPRESSION_TYPE_BOOLEAN; + $$.required_strings.count = 0; } | primary_expression _MATCHES_ regexp { @@ -1340,6 +1372,7 @@ expression NULL)); $$.type = EXPRESSION_TYPE_BOOLEAN; + $$.required_strings.count = 0; } | primary_expression _CONTAINS_ primary_expression { @@ -1350,6 +1383,7 @@ expression yyscanner, OP_CONTAINS, NULL)); $$.type = EXPRESSION_TYPE_BOOLEAN; + $$.required_strings.count = 0; } | primary_expression _ICONTAINS_ primary_expression { @@ -1360,6 +1394,7 @@ expression yyscanner, OP_ICONTAINS, NULL)); $$.type = EXPRESSION_TYPE_BOOLEAN; + $$.required_strings.count = 0; } | primary_expression _STARTSWITH_ primary_expression { @@ -1370,6 +1405,7 @@ expression yyscanner, OP_STARTSWITH, NULL)); $$.type = EXPRESSION_TYPE_BOOLEAN; + $$.required_strings.count = 0; } | primary_expression _ISTARTSWITH_ primary_expression { @@ -1380,6 +1416,7 @@ expression yyscanner, OP_ISTARTSWITH, NULL)); $$.type = EXPRESSION_TYPE_BOOLEAN; + $$.required_strings.count = 0; } | primary_expression _ENDSWITH_ primary_expression { @@ -1390,6 +1427,7 @@ expression yyscanner, OP_ENDSWITH, NULL)); $$.type = EXPRESSION_TYPE_BOOLEAN; + $$.required_strings.count = 0; } | primary_expression _IENDSWITH_ primary_expression { @@ -1400,6 +1438,7 @@ expression yyscanner, OP_IENDSWITH, NULL)); $$.type = EXPRESSION_TYPE_BOOLEAN; + $$.required_strings.count = 0; } | primary_expression _IEQUALS_ primary_expression { @@ -1410,6 +1449,7 @@ expression yyscanner, OP_IEQUALS, NULL)); $$.type = EXPRESSION_TYPE_BOOLEAN; + $$.required_strings.count = 0; } | _STRING_IDENTIFIER_ { @@ -1424,6 +1464,7 @@ expression fail_if_error(result); $$.type = EXPRESSION_TYPE_BOOLEAN; + $$.required_strings.count = 1; } | _STRING_IDENTIFIER_ _AT_ primary_expression { @@ -1438,6 +1479,7 @@ expression fail_if_error(result); + $$.required_strings.count = 1; $$.type = EXPRESSION_TYPE_BOOLEAN; } | _STRING_IDENTIFIER_ _IN_ range @@ -1449,6 +1491,7 @@ expression fail_if_error(result); + $$.required_strings.count = 1; $$.type = EXPRESSION_TYPE_BOOLEAN; } | _FOR_ for_expression error @@ -1691,6 +1734,7 @@ expression compiler->loop_index--; $$.type = EXPRESSION_TYPE_BOOLEAN; + $$.required_strings.count = 0; } | for_expression _OF_ string_set { @@ -1699,6 +1743,18 @@ expression yywarning(yyscanner, "expression always false - requesting %" PRId64 " of %" PRId64 ".", $1.value.integer, $3); } + + if (($1.type == EXPRESSION_TYPE_INTEGER && $1.value.integer > 0) || + ($1.type == EXPRESSION_TYPE_QUANTIFIER && + ($1.value.integer == FOR_EXPRESSION_ALL || $1.value.integer == FOR_EXPRESSION_ANY))) + { + $$.required_strings.count = 1; + } + else + { + $$.required_strings.count = 0; + } + yr_parser_emit_with_arg(yyscanner, OP_OF, OF_STRING_SET, NULL, NULL); $$.type = EXPRESSION_TYPE_BOOLEAN; @@ -1713,6 +1769,7 @@ expression yr_parser_emit_with_arg(yyscanner, OP_OF, OF_RULE_SET, NULL, NULL); $$.type = EXPRESSION_TYPE_BOOLEAN; + $$.required_strings.count = 0; } | primary_expression '%' _OF_ string_set { @@ -1731,6 +1788,15 @@ expression fail_with_error(ERROR_INVALID_PERCENTAGE); } + if (!IS_UNDEFINED($1.value.integer)) + { + $$.required_strings.count = 1; + } + else + { + $$.required_strings.count = 0; + } + yr_parser_emit_with_arg(yyscanner, OP_OF_PERCENT, OF_STRING_SET, NULL, NULL); } | primary_expression '%' _OF_ rule_set @@ -1760,6 +1826,17 @@ expression "expression always false - requesting %" PRId64 " of %" PRId64 ".", $1.value.integer, $3); } + if (($1.type == EXPRESSION_TYPE_INTEGER && $1.value.integer > 0) || + ($1.type == EXPRESSION_TYPE_QUANTIFIER && + ($1.value.integer == FOR_EXPRESSION_ALL || $1.value.integer == FOR_EXPRESSION_ANY))) + { + $$.required_strings.count = 1; + } + else + { + $$.required_strings.count = 0; + } + yr_parser_emit(yyscanner, OP_OF_FOUND_IN, NULL); $$.type = EXPRESSION_TYPE_BOOLEAN; @@ -1797,6 +1874,17 @@ expression "multiple strings at an offset is usually false."); } + if (($1.type == EXPRESSION_TYPE_INTEGER && $1.value.integer > 0) || + ($1.type == EXPRESSION_TYPE_QUANTIFIER && + ($1.value.integer == FOR_EXPRESSION_ALL || $1.value.integer == FOR_EXPRESSION_ANY))) + { + $$.required_strings.count = 1; + } + else + { + $$.required_strings.count = 0; + } + yr_parser_emit(yyscanner, OP_OF_FOUND_AT, NULL); $$.type = EXPRESSION_TYPE_BOOLEAN; @@ -1806,11 +1894,13 @@ expression yr_parser_emit(yyscanner, OP_NOT, NULL); $$.type = EXPRESSION_TYPE_BOOLEAN; + $$.required_strings.count = 0; } | _DEFINED_ boolean_expression { yr_parser_emit(yyscanner, OP_DEFINED, NULL); $$.type = EXPRESSION_TYPE_BOOLEAN; + $$.required_strings.count = 0; } | boolean_expression _AND_ { @@ -1856,6 +1946,7 @@ expression yr_free(fixup); $$.type = EXPRESSION_TYPE_BOOLEAN; + $$.required_strings.count = $4.required_strings.count + $1.required_strings.count; } | boolean_expression _OR_ { @@ -1900,6 +1991,13 @@ expression yr_free(fixup); $$.type = EXPRESSION_TYPE_BOOLEAN; + + // Set required string count to minimum from both parts + if ($1.required_strings.count > $4.required_strings.count) { + $$.required_strings.count = $4.required_strings.count; + } else { + $$.required_strings.count = $1.required_strings.count; + } } | primary_expression _LT_ primary_expression { @@ -1907,6 +2005,7 @@ expression yyscanner, "<", $1, $3)); $$.type = EXPRESSION_TYPE_BOOLEAN; + $$.required_strings.count = 0; } | primary_expression _GT_ primary_expression { @@ -1914,6 +2013,7 @@ expression yyscanner, ">", $1, $3)); $$.type = EXPRESSION_TYPE_BOOLEAN; + $$.required_strings.count = 0; } | primary_expression _LE_ primary_expression { @@ -1921,6 +2021,7 @@ expression yyscanner, "<=", $1, $3)); $$.type = EXPRESSION_TYPE_BOOLEAN; + $$.required_strings.count = 0; } | primary_expression _GE_ primary_expression { @@ -1928,6 +2029,7 @@ expression yyscanner, ">=", $1, $3)); $$.type = EXPRESSION_TYPE_BOOLEAN; + $$.required_strings.count = 0; } | primary_expression _EQ_ primary_expression { @@ -1935,6 +2037,7 @@ expression yyscanner, "==", $1, $3)); $$.type = EXPRESSION_TYPE_BOOLEAN; + $$.required_strings.count = 0; } | primary_expression _NEQ_ primary_expression { @@ -1942,6 +2045,7 @@ expression yyscanner, "!=", $1, $3)); $$.type = EXPRESSION_TYPE_BOOLEAN; + $$.required_strings.count = 0; } | primary_expression { @@ -2112,7 +2216,11 @@ iterator if (loop_ctx->vars_count == 1) { loop_ctx->vars[0].type = $1.type; - loop_ctx->vars[0].value.integer = YR_UNDEFINED; + + if ($1.type == EXPRESSION_TYPE_STRING) + loop_ctx->vars[0].value.sized_string_ref = YR_ARENA_NULL_REF; + else + loop_ctx->vars[0].value.integer = YR_UNDEFINED; } else { @@ -2148,7 +2256,6 @@ set } $$.type = $2.type; - } | range { diff --git a/src/libyara/hex_lexer.c b/src/libyara/hex_lexer.c index 1ffe5ce..97c80f0 100644 --- a/src/libyara/hex_lexer.c +++ b/src/libyara/hex_lexer.c @@ -1,6 +1,6 @@ -#line 2 "hex_lexer.c" +#line 1 "libyara/hex_lexer.c" -#line 4 "hex_lexer.c" +#line 3 "libyara/hex_lexer.c" #define YY_INT_ALIGNED short int @@ -690,7 +690,7 @@ static const flex_int32_t yy_rule_can_match_eol[24] = #define yymore() yymore_used_but_not_detected #define YY_MORE_ADJ 0 #define YY_RESTORE_YY_MORE_OFFSET -#line 1 "hex_lexer.l" +#line 1 "libyara/hex_lexer.l" /* Copyright (c) 2013. The YARA Authors. All Rights Reserved. @@ -720,7 +720,7 @@ ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ /* Lexical analyzer for hex strings */ -#line 33 "hex_lexer.l" +#line 33 "libyara/hex_lexer.l" /* Disable warnings for unused functions in this file. @@ -760,11 +760,11 @@ with noyywrap then we can remove this pragma. YYABORT; \ } \ -#line 764 "hex_lexer.c" +#line 763 "libyara/hex_lexer.c" #define YY_NO_UNISTD_H 1 #define YY_NO_INPUT 1 -#line 768 "hex_lexer.c" +#line 767 "libyara/hex_lexer.c" #define INITIAL 0 #define comment 1 @@ -1038,11 +1038,11 @@ YY_DECL } { -#line 95 "hex_lexer.l" +#line 95 "libyara/hex_lexer.l" -#line 1046 "hex_lexer.c" +#line 1045 "libyara/hex_lexer.c" while ( /*CONSTCOND*/1 ) /* loops until end-of-file is reached */ { @@ -1109,7 +1109,7 @@ YY_DECL case 1: YY_RULE_SETUP -#line 98 "hex_lexer.l" +#line 98 "libyara/hex_lexer.l" { yylval->integer = xtoi(yytext); @@ -1118,7 +1118,7 @@ YY_RULE_SETUP YY_BREAK case 2: YY_RULE_SETUP -#line 104 "hex_lexer.l" +#line 104 "libyara/hex_lexer.l" { yytext[1] = '0'; // replace ? by 0 @@ -1128,7 +1128,7 @@ YY_RULE_SETUP YY_BREAK case 3: YY_RULE_SETUP -#line 111 "hex_lexer.l" +#line 111 "libyara/hex_lexer.l" { yylval->integer = xtoi(&(yytext[1])); @@ -1137,7 +1137,7 @@ YY_RULE_SETUP YY_BREAK case 4: YY_RULE_SETUP -#line 117 "hex_lexer.l" +#line 117 "libyara/hex_lexer.l" { yytext[0] = '0'; // replace ? by 0 @@ -1147,7 +1147,7 @@ YY_RULE_SETUP YY_BREAK case 5: YY_RULE_SETUP -#line 124 "hex_lexer.l" +#line 124 "libyara/hex_lexer.l" { yylval->integer = 0x0000; @@ -1156,7 +1156,7 @@ YY_RULE_SETUP YY_BREAK case 6: YY_RULE_SETUP -#line 130 "hex_lexer.l" +#line 130 "libyara/hex_lexer.l" { yytext[2] = '0'; // replace ? by 0 @@ -1166,7 +1166,7 @@ YY_RULE_SETUP YY_BREAK case 7: YY_RULE_SETUP -#line 137 "hex_lexer.l" +#line 137 "libyara/hex_lexer.l" { yytext[1] = '0'; // replace ? by 0 @@ -1176,7 +1176,7 @@ YY_RULE_SETUP YY_BREAK case 8: YY_RULE_SETUP -#line 144 "hex_lexer.l" +#line 144 "libyara/hex_lexer.l" { yyerror(yyscanner, lex_env, "uneven number of digits in hex string"); @@ -1185,16 +1185,16 @@ YY_RULE_SETUP YY_BREAK case 9: YY_RULE_SETUP -#line 150 "hex_lexer.l" +#line 150 "libyara/hex_lexer.l" { - + yyerror(yyscanner, lex_env, "invalid not operator (~) in hex string"); yyterminate(); } YY_BREAK case 10: YY_RULE_SETUP -#line 157 "hex_lexer.l" +#line 157 "libyara/hex_lexer.l" { BEGIN(range); @@ -1203,7 +1203,7 @@ YY_RULE_SETUP YY_BREAK case 11: YY_RULE_SETUP -#line 163 "hex_lexer.l" +#line 163 "libyara/hex_lexer.l" { BEGIN(comment); @@ -1211,7 +1211,7 @@ YY_RULE_SETUP YY_BREAK case 12: YY_RULE_SETUP -#line 168 "hex_lexer.l" +#line 168 "libyara/hex_lexer.l" { BEGIN(INITIAL); @@ -1220,17 +1220,17 @@ YY_RULE_SETUP case 13: /* rule 13 can match eol */ YY_RULE_SETUP -#line 173 "hex_lexer.l" +#line 173 "libyara/hex_lexer.l" // skip comments YY_BREAK case 14: YY_RULE_SETUP -#line 175 "hex_lexer.l" +#line 175 "libyara/hex_lexer.l" // skip single-line comments YY_BREAK case 15: YY_RULE_SETUP -#line 177 "hex_lexer.l" +#line 177 "libyara/hex_lexer.l" { return yytext[0]; @@ -1238,7 +1238,7 @@ YY_RULE_SETUP YY_BREAK case 16: YY_RULE_SETUP -#line 182 "hex_lexer.l" +#line 182 "libyara/hex_lexer.l" { yylval->integer = atoi(yytext); @@ -1247,7 +1247,7 @@ YY_RULE_SETUP YY_BREAK case 17: YY_RULE_SETUP -#line 188 "hex_lexer.l" +#line 188 "libyara/hex_lexer.l" { BEGIN(INITIAL); @@ -1257,12 +1257,12 @@ YY_RULE_SETUP case 18: /* rule 18 can match eol */ YY_RULE_SETUP -#line 194 "hex_lexer.l" +#line 194 "libyara/hex_lexer.l" // skip whitespaces YY_BREAK case 19: YY_RULE_SETUP -#line 196 "hex_lexer.l" +#line 196 "libyara/hex_lexer.l" { yyerror(yyscanner, lex_env, "invalid character in hex string jump"); @@ -1272,12 +1272,12 @@ YY_RULE_SETUP case 20: /* rule 20 can match eol */ YY_RULE_SETUP -#line 202 "hex_lexer.l" +#line 202 "libyara/hex_lexer.l" // skip whitespaces YY_BREAK case 21: YY_RULE_SETUP -#line 204 "hex_lexer.l" +#line 204 "libyara/hex_lexer.l" { // pass valid characters to the parser return yytext[0]; @@ -1285,7 +1285,7 @@ YY_RULE_SETUP YY_BREAK case 22: YY_RULE_SETUP -#line 209 "hex_lexer.l" +#line 209 "libyara/hex_lexer.l" { // reject all other characters yyerror(yyscanner, lex_env, "invalid character in hex string"); @@ -1294,10 +1294,10 @@ YY_RULE_SETUP YY_BREAK case 23: YY_RULE_SETUP -#line 215 "hex_lexer.l" +#line 215 "libyara/hex_lexer.l" ECHO; YY_BREAK -#line 1301 "hex_lexer.c" +#line 1300 "libyara/hex_lexer.c" case YY_STATE_EOF(INITIAL): case YY_STATE_EOF(comment): case YY_STATE_EOF(range): @@ -2450,7 +2450,7 @@ void yyfree (void * ptr , yyscan_t yyscanner) #define YYTABLES_NAME "yytables" -#line 215 "hex_lexer.l" +#line 215 "libyara/hex_lexer.l" // diff --git a/src/libyara/hex_lexer.l b/src/libyara/hex_lexer.l index 2c8f1a7..ad99244 100644 --- a/src/libyara/hex_lexer.l +++ b/src/libyara/hex_lexer.l @@ -148,7 +148,7 @@ hexdigit [a-fA-F0-9] } \~ { - + yyerror(yyscanner, lex_env, "invalid not operator (~) in hex string"); yyterminate(); } diff --git a/src/libyara/include/authenticode-parser/authenticode.h b/src/libyara/include/authenticode-parser/authenticode.h index 4dfe8a2..88f96a3 100644 --- a/src/libyara/include/authenticode-parser/authenticode.h +++ b/src/libyara/include/authenticode-parser/authenticode.h @@ -106,8 +106,8 @@ typedef struct { char* key_alg; /* Name of the key algorithm */ char* sig_alg; /* Name of the signature algorithm */ char* sig_alg_oid; /* OID of the signature algorithm */ - time_t not_before; /* NotBefore validity */ - time_t not_after; /* NotAfter validity */ + int64_t not_before; /* NotBefore validity */ + int64_t not_after; /* NotAfter validity */ char* key; /* PEM encoded public key */ Attributes issuer_attrs; /* Parsed X509 Attributes of Issuer */ Attributes subject_attrs; /* Parsed X509 Attributes of Subject */ @@ -120,7 +120,7 @@ typedef struct { typedef struct { int verify_flags; /* COUNTERISGNATURE_VFY_ flag */ - time_t sign_time; /* Signing time of the timestamp countersignature */ + int64_t sign_time; /* Signing time of the timestamp countersignature */ char* digest_alg; /* Name of the digest algorithm used */ ByteArray digest; /* Stored message digest */ CertificateArray* chain; /* Certificate chain of the signer */ @@ -190,7 +190,7 @@ AuthenticodeArray* parse_authenticode(const uint8_t* pe_data, uint64_t pe_len); * @param len * @return AuthenticodeArray* */ -AuthenticodeArray* authenticode_new(const uint8_t* data, long len); +AuthenticodeArray* authenticode_new(const uint8_t* data, int32_t len); /** * @brief Deallocates AuthenticodeArray and all it's allocated members diff --git a/src/libyara/include/yara/arena.h b/src/libyara/include/yara/arena.h index 11b66e2..6014a1a 100644 --- a/src/libyara/include/yara/arena.h +++ b/src/libyara/include/yara/arena.h @@ -37,10 +37,13 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define EOL ((size_t) -1) -#define YR_ARENA_FILE_VERSION 20 +#define YR_ARENA_FILE_VERSION 21 -#define YR_ARENA_NULL_REF \ - (YR_ARENA_REF) { UINT32_MAX, UINT32_MAX } +#define YR_ARENA_NULL_REF \ + (YR_ARENA_REF) \ + { \ + UINT32_MAX, UINT32_MAX \ + } #define YR_ARENA_IS_NULL_REF(ref) \ (memcmp(&(ref), &YR_ARENA_NULL_REF, sizeof(YR_ARENA_NULL_REF)) == 0) diff --git a/src/libyara/include/yara/compiler.h b/src/libyara/include/yara/compiler.h index acd173c..a2f5f50 100644 --- a/src/libyara/include/yara/compiler.h +++ b/src/libyara/include/yara/compiler.h @@ -93,6 +93,12 @@ typedef struct _YR_EXPRESSION YR_ARENA_REF sized_string_ref; } value; + // Boolean expressions can hold a string count. If not empty, this indicates that the condition + // can only be fulfilled if at least so many strings match. + struct { + int count; + } required_strings; + // An expression can have an associated identifier, if "ptr" is not NULL it // points to the identifier name, if it is NULL, then "ref" holds a reference // to the identifier within YR_SZ_POOL. When the identifier is in YR_SZ_POOL @@ -231,6 +237,7 @@ typedef struct _YR_COMPILER int current_line; int last_error; int last_error_line; + bool strict_escape; jmp_buf error_recovery; @@ -375,6 +382,12 @@ YR_API int yr_compiler_add_fd( const char* namespace_, const char* file_name); +YR_API int yr_compiler_add_bytes( + YR_COMPILER* compiler, + const void* rules_data, + size_t rules_size, + const char* namespace_); + YR_API int yr_compiler_add_string( YR_COMPILER* compiler, const char* rules_string, diff --git a/src/libyara/include/yara/error.h b/src/libyara/include/yara/error.h index aded7be..3621b08 100644 --- a/src/libyara/include/yara/error.h +++ b/src/libyara/include/yara/error.h @@ -107,6 +107,8 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define ERROR_INVALID_PERCENTAGE 62 #define ERROR_IDENTIFIER_MATCHES_WILDCARD 63 #define ERROR_INVALID_VALUE 64 +#define ERROR_TOO_SLOW_SCANNING 65 +#define ERROR_UNKNOWN_ESCAPE_SEQUENCE 66 #define GOTO_EXIT_ON_ERROR(x) \ { \ diff --git a/src/libyara/include/yara/globals.h b/src/libyara/include/yara/globals.h index 9cfb319..ab9ce73 100644 --- a/src/libyara/include/yara/globals.h +++ b/src/libyara/include/yara/globals.h @@ -48,6 +48,13 @@ extern YR_THREAD_STORAGE_KEY yr_yyfatal_trampoline_tls; // Thread-local storage (TLS) key used by YR_TRYCATCH. extern YR_THREAD_STORAGE_KEY yr_trycatch_trampoline_tls; +#if !(_WIN32 || __CYGWIN__) +extern struct sigaction old_sigsegv_exception_handler; +extern struct sigaction old_sigbus_exception_handler; +extern int exception_handler_usecount; +extern pthread_mutex_t exception_handler_mutex; +#endif + // When YARA is built with YR_DEBUG_VERBOSITY defined as larger than 0 it can // print debug information to stdout. #if 0 == YR_DEBUG_VERBOSITY diff --git a/src/libyara/include/yara/hex_lexer.h b/src/libyara/include/yara/hex_lexer.h index 588adf3..b197c02 100644 --- a/src/libyara/include/yara/hex_lexer.h +++ b/src/libyara/include/yara/hex_lexer.h @@ -53,6 +53,12 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define yyget_extra hex_yyget_extra #define yyget_lineno hex_yyget_lineno +// Define the ECHO macro as an empty macro in order to avoid the default +// implementation from being used. The default implementation of ECHO +// prints to the console any byte that is not matched by the lexer. It's +// not safe to print random bytes to the console as it may cause the calling +// program to terminate. See: https://github.com/VirusTotal/yara/issues/2007 +#define ECHO #ifndef YY_TYPEDEF_YY_SCANNER_T #define YY_TYPEDEF_YY_SCANNER_T diff --git a/src/libyara/include/yara/lexer.h b/src/libyara/include/yara/lexer.h index 5146f56..d12c950 100644 --- a/src/libyara/include/yara/lexer.h +++ b/src/libyara/include/yara/lexer.h @@ -29,7 +29,6 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include - #undef yyparse #undef yylex #undef yyerror @@ -55,6 +54,12 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define yyget_extra yara_yyget_extra #define yyget_lineno yara_yyget_lineno +// Define the ECHO macro as an empty macro in order to avoid the default +// implementation from being used. The default implementation of ECHO +// prints to the console any byte that is not matched by the lexer. It's +// not safe to print random bytes to the console as it may cause the calling +// program to terminate. See: https://github.com/VirusTotal/yara/issues/2007 +#define ECHO #ifndef YY_TYPEDEF_YY_SCANNER_T #define YY_TYPEDEF_YY_SCANNER_T @@ -67,14 +72,16 @@ union YYSTYPE; int yylex( \ union YYSTYPE* yylval_param, yyscan_t yyscanner, YR_COMPILER* compiler) - +// The default behavior when a fatal error occurs in the parser is calling +// exit(YY_EXIT_FAILURE) for terminating the process. This is not acceptable +// for a library, which should return gracefully to the calling program. For +// this reason we redefine the YY_FATAL_ERROR macro so that it expands to our +// own function instead of the one provided by default. #define YY_FATAL_ERROR(msg) yara_yyfatal(yyscanner, msg) - #define YY_EXTRA_TYPE YR_COMPILER* #define YY_USE_CONST - int yyget_lineno(yyscan_t yyscanner); int yylex( @@ -96,6 +103,11 @@ void yyfatal(yyscan_t yyscanner, const char* error_message); YY_EXTRA_TYPE yyget_extra(yyscan_t yyscanner); +int yr_lex_parse_rules_bytes( + const void* rules_data, + size_t rules_size, + YR_COMPILER* compiler); + int yr_lex_parse_rules_string(const char* rules_string, YR_COMPILER* compiler); int yr_lex_parse_rules_file(FILE* rules_file, YR_COMPILER* compiler); diff --git a/src/libyara/include/yara/libyara.h b/src/libyara/include/yara/libyara.h index d9f24d3..addc627 100644 --- a/src/libyara/include/yara/libyara.h +++ b/src/libyara/include/yara/libyara.h @@ -33,8 +33,8 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include #define YR_MAJOR_VERSION 4 -#define YR_MINOR_VERSION 3 -#define YR_MICRO_VERSION 2 +#define YR_MINOR_VERSION 5 +#define YR_MICRO_VERSION 0 #define version_str(s) _version_str(s) #define _version_str(s) #s diff --git a/src/libyara/include/yara/limits.h b/src/libyara/include/yara/limits.h index 714d66e..7ef9549 100644 --- a/src/libyara/include/yara/limits.h +++ b/src/libyara/include/yara/limits.h @@ -114,6 +114,18 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define YR_MAX_STRING_MATCHES 1000000 #endif +// The number of matches before detecting slow scanning. If more matches are found +// the scan will have a CALLBACK_MSG_TOO_SLOW_SCANNING. +#ifndef YR_SLOW_STRING_MATCHES +#define YR_SLOW_STRING_MATCHES 600000 +#endif + +// If size of the input is bigger then 0.2 MB and 0-length atoms are used +// the scan will have a CALLBACK_MSG_TOO_SLOW_SCANNING. +#ifndef YR_FILE_SIZE_THRESHOLD +#define YR_FILE_SIZE_THRESHOLD 200000 +#endif + // Maximum number of argument that a function in a YARA module can have. #ifndef YR_MAX_FUNCTION_ARGS #define YR_MAX_FUNCTION_ARGS 128 diff --git a/src/libyara/include/yara/object.h b/src/libyara/include/yara/object.h index f127add..5bb3f25 100644 --- a/src/libyara/include/yara/object.h +++ b/src/libyara/include/yara/object.h @@ -123,9 +123,10 @@ int yr_object_set_string( const char* field, ...) YR_PRINTF_LIKE(4, 5); -int yr_object_array_length(YR_OBJECT* object); +YR_API int yr_object_array_length(YR_OBJECT* object); -YR_OBJECT* yr_object_array_get_item(YR_OBJECT* object, int flags, int index); +YR_API YR_OBJECT* yr_object_array_get_item(YR_OBJECT* object, int flags, + int index); int yr_object_array_set_item(YR_OBJECT* object, YR_OBJECT* item, int index); diff --git a/src/libyara/include/yara/re.h b/src/libyara/include/yara/re.h index 1098c27..160cb0f 100644 --- a/src/libyara/include/yara/re.h +++ b/src/libyara/include/yara/re.h @@ -101,6 +101,11 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define RE_FLAGS_GREEDY 0x400 #define RE_FLAGS_UNGREEDY 0x800 +enum YR_RE_PARSER_FLAGS { + RE_PARSER_FLAG_NONE = 0 << 0, + RE_PARSER_FLAG_ENABLE_STRICT_ESCAPE_SEQUENCES = 1 << 0, +}; + typedef int RE_MATCH_CALLBACK_FUNC( const uint8_t* match, int match_length, @@ -155,13 +160,14 @@ int yr_re_fast_exec( void* callback_args, int* matches); -int yr_re_parse(const char* re_string, RE_AST** re_ast, RE_ERROR* error); +int yr_re_parse(const char* re_string, RE_AST** re_ast, RE_ERROR* error, int flags); int yr_re_parse_hex(const char* hex_string, RE_AST** re_ast, RE_ERROR* error); int yr_re_compile( const char* re_string, int flags, + int parser_flags, YR_ARENA* arena, YR_ARENA_REF* ref, RE_ERROR* error); diff --git a/src/libyara/include/yara/re_lexer.h b/src/libyara/include/yara/re_lexer.h index 297aa03..bb0a701 100644 --- a/src/libyara/include/yara/re_lexer.h +++ b/src/libyara/include/yara/re_lexer.h @@ -30,6 +30,7 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #undef yyparse #undef yylex #undef yyerror +#undef yywarning #undef yyfatal #undef yychar #undef yydebug @@ -44,6 +45,7 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define yyparse re_yyparse #define yylex re_yylex #define yyerror re_yyerror +#define yywarning re_yywarning #define yyfatal re_yyfatal #define yychar re_yychar #define yydebug re_yydebug @@ -51,6 +53,13 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define yyget_extra re_yyget_extra #define yyget_lineno re_yyget_lineno +// Define the ECHO macro as an empty macro in order to avoid the default +// implementation from being used. The default implementation of ECHO +// prints to the console any byte that is not matched by the lexer. It's +// not safe to print random bytes to the console as it may cause the calling +// program to terminate. See: https://github.com/VirusTotal/yara/issues/2007 +#define ECHO + #ifndef YY_TYPEDEF_YY_SCANNER_T #define YY_TYPEDEF_YY_SCANNER_T typedef void* yyscan_t; @@ -59,11 +68,15 @@ typedef void* yyscan_t; #define YY_EXTRA_TYPE RE_AST* #define YY_USE_CONST +#define VALID_ESCAPE_SEQUENCE 1 +#define UNKNOWN_ESCAPE_SEQUENCE 2 + typedef struct _RE_LEX_ENVIRONMENT { RE_CLASS re_class; int last_error; char last_error_message[256]; + bool strict_escape; } RE_LEX_ENVIRONMENT; @@ -94,8 +107,17 @@ void yyerror( RE_LEX_ENVIRONMENT* lex_env, const char* error_message); +void yywarning( + yyscan_t yyscanner, + RE_LEX_ENVIRONMENT* lex_env, + const char* error_message); + void yyfatal(yyscan_t yyscanner, const char* error_message); int yyparse(void* yyscanner, RE_LEX_ENVIRONMENT* lex_env); -int yr_parse_re_string(const char* re_string, RE_AST** re_ast, RE_ERROR* error); +int yr_parse_re_string( + const char* re_string, + RE_AST** re_ast, + RE_ERROR* error, + int flags); diff --git a/src/libyara/include/yara/rules.h b/src/libyara/include/yara/rules.h index 2147d47..2606e93 100644 --- a/src/libyara/include/yara/rules.h +++ b/src/libyara/include/yara/rules.h @@ -42,6 +42,7 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define CALLBACK_MSG_MODULE_IMPORTED 5 #define CALLBACK_MSG_TOO_MANY_MATCHES 6 #define CALLBACK_MSG_CONSOLE_LOG 7 +#define CALLBACK_MSG_TOO_SLOW_SCANNING 8 #define CALLBACK_CONTINUE 0 #define CALLBACK_ABORT 1 diff --git a/src/libyara/include/yara/types.h b/src/libyara/include/yara/types.h index 89e3b26..973aba9 100644 --- a/src/libyara/include/yara/types.h +++ b/src/libyara/include/yara/types.h @@ -293,6 +293,8 @@ struct YR_RULE // Number of atoms generated for this rule. int32_t num_atoms; + uint32_t required_strings; + DECLARE_REFERENCE(const char*, identifier); DECLARE_REFERENCE(const char*, tags); DECLARE_REFERENCE(YR_META*, metas); @@ -563,7 +565,7 @@ struct YR_RULES // deprecated, which will raise a warning if used. // TODO(vmalvarez): Remove this field when a reasonable a few versions // after 4.1 has been released. - YR_RULE* rules_list_head YR_DEPRECATED; + YR_DEPRECATED(YR_RULE* rules_list_head); }; // Array of pointers with an entry for each of the defined strings. The idx @@ -576,7 +578,7 @@ struct YR_RULES // deprecated, which will raise a warning if used. // TODO(vmalvarez): Remove this field when a reasonable a few versions // after 4.1 has been released. - YR_STRING* strings_list_head YR_DEPRECATED; + YR_DEPRECATED(YR_STRING* strings_list_head); }; // Array of pointers with an entry for each external variable. @@ -588,7 +590,7 @@ struct YR_RULES // as deprecated, which will raise a warning if used. // TODO(vmalvarez): Remove this field when a reasonable a few versions // after 4.1 has been released. - YR_EXTERNAL_VARIABLE* externals_list_head YR_DEPRECATED; + YR_DEPRECATED(YR_EXTERNAL_VARIABLE* externals_list_head); }; // Pointer to the Aho-Corasick transition table. @@ -611,6 +613,10 @@ struct YR_RULES // the instructions are defined by the OP_X macros in exec.h. const uint8_t* code_start; + // A bitmap with one bit per rule, bit N is set when the condition for rule + // might evaluate to true even without any string matches. + YR_BITMASK* no_required_strings; + // Total number of rules. uint32_t num_rules; @@ -700,6 +706,8 @@ struct YR_MEMORY_BLOCK YR_MEMORY_BLOCK_FETCH_DATA_FUNC fetch_data; }; +YR_API const uint8_t* yr_fetch_block_data(YR_MEMORY_BLOCK* self); + /////////////////////////////////////////////////////////////////////////////// // YR_MEMORY_BLOCK_ITERATOR represents an iterator that returns a series of // memory blocks to be scanned by yr_scanner_scan_mem_blocks. The iterator have @@ -815,6 +823,10 @@ struct YR_SCAN_CONTEXT // until they can be confirmed or discarded. YR_MATCHES* unconfirmed_matches; + // A bitmap with one bit per rule, bit N is set if the corresponding rule + // must evaluated. + YR_BITMASK* required_eval; + // profiling_info is a pointer to an array of YR_PROFILING_INFO structures, // one per rule. Entry N has the profiling information for rule with index N. YR_PROFILING_INFO* profiling_info; diff --git a/src/libyara/include/yara/utils.h b/src/libyara/include/yara/utils.h index 302ab8b..9e6945a 100644 --- a/src/libyara/include/yara/utils.h +++ b/src/libyara/include/yara/utils.h @@ -55,38 +55,38 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #if defined(_WIN32) || defined(__CYGWIN__) #ifdef YR_BUILDING_DLL #ifdef __GNUC__ -#define YR_API EXTERNC __attribute__((dllexport)) -#define YR_DEPRECATED_API EXTERNC __attribute__((deprecated)) -#define YR_DEPRECATED __attribute__((deprecated)) +#define YR_API EXTERNC __attribute__((dllexport)) +#define YR_DEPRECATED_API EXTERNC __attribute__((deprecated)) +#define YR_DEPRECATED(statement) statement __attribute__((deprecated)) #else -#define YR_API EXTERNC __declspec(dllexport) -#define YR_DEPRECATED_API EXTERNC __declspec(deprecated) -#define YR_DEPRECATED __declspec(deprecated) +#define YR_API EXTERNC __declspec(dllexport) +#define YR_DEPRECATED_API EXTERNC __declspec(deprecated) +#define YR_DEPRECATED(statement) __declspec(deprecated) statement #endif #elif defined(YR_IMPORTING_DLL) #ifdef __GNUC__ -#define YR_API EXTERNC __attribute__((dllimport)) -#define YR_DEPRECATED_API EXTERNC __attribute__((deprecated)) -#define YR_DEPRECATED __attribute__((deprecated)) +#define YR_API EXTERNC __attribute__((dllimport)) +#define YR_DEPRECATED_API EXTERNC __attribute__((deprecated)) +#define YR_DEPRECATED(statement) statement __attribute__((deprecated)) #else -#define YR_API EXTERNC __declspec(dllimport) -#define YR_DEPRECATED_API EXTERNC __declspec(deprecated) -#define YR_DEPRECATED __declspec(deprecated) +#define YR_API EXTERNC __declspec(dllimport) +#define YR_DEPRECATED_API EXTERNC __declspec(deprecated) +#define YR_DEPRECATED(statement) __declspec(deprecated) statement #endif #else -#define YR_API EXTERNC -#define YR_DEPRECATED_API EXTERNC -#define YR_DEPRECATED +#define YR_API EXTERNC +#define YR_DEPRECATED_API EXTERNC +#define YR_DEPRECATED(statement) statement #endif #else #if __GNUC__ >= 4 -#define YR_API EXTERNC __attribute__((visibility("default"))) -#define YR_DEPRECATED_API YR_API __attribute__((deprecated)) -#define YR_DEPRECATED __attribute__((deprecated)) +#define YR_API EXTERNC __attribute__((visibility("default"))) +#define YR_DEPRECATED_API YR_API __attribute__((deprecated)) +#define YR_DEPRECATED(statement) statement __attribute__((deprecated)) #else -#define YR_API EXTERNC -#define YR_DEPRECATED_API EXTERNC -#define YR_DEPRECATED +#define YR_API EXTERNC +#define YR_DEPRECATED_API EXTERNC +#define YR_DEPRECATED(statement) statement #endif #endif diff --git a/src/libyara/lexer.c b/src/libyara/lexer.c index 9e940aa..f232f94 100644 --- a/src/libyara/lexer.c +++ b/src/libyara/lexer.c @@ -1,6 +1,6 @@ -#line 1 "lexer.c" +#line 1 "libyara/lexer.c" -#line 3 "lexer.c" +#line 3 "libyara/lexer.c" #define YY_INT_ALIGNED short int @@ -894,7 +894,7 @@ static const flex_int32_t yy_rule_can_match_eol[87] = #define yymore() yymore_used_but_not_detected #define YY_MORE_ADJ 0 #define YY_RESTORE_YY_MORE_OFFSET -#line 1 "lexer.l" +#line 1 "libyara/lexer.l" /* Copyright (c) 2007-2013. The YARA Authors. All Rights Reserved. @@ -924,7 +924,7 @@ ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ /* Lexical analyzer for YARA */ -#line 33 "lexer.l" +#line 33 "libyara/lexer.l" /* Disable warnings for unused functions in this file. @@ -1030,11 +1030,11 @@ static bool is_absolute_path( #endif } -#line 1033 "lexer.c" +#line 1033 "libyara/lexer.c" #define YY_NO_UNISTD_H 1 #define YY_NO_INPUT 1 -#line 1037 "lexer.c" +#line 1037 "libyara/lexer.c" #define INITIAL 0 #define str 1 @@ -1310,10 +1310,10 @@ YY_DECL } { -#line 163 "lexer.l" +#line 163 "libyara/lexer.l" -#line 1316 "lexer.c" +#line 1316 "libyara/lexer.c" while ( /*CONSTCOND*/1 ) /* loops until end-of-file is reached */ { @@ -1380,263 +1380,263 @@ YY_DECL case 1: YY_RULE_SETUP -#line 165 "lexer.l" +#line 165 "libyara/lexer.l" { return _DOT_DOT_; } YY_BREAK case 2: YY_RULE_SETUP -#line 166 "lexer.l" +#line 166 "libyara/lexer.l" { return _LT_; } YY_BREAK case 3: YY_RULE_SETUP -#line 167 "lexer.l" +#line 167 "libyara/lexer.l" { return _GT_; } YY_BREAK case 4: YY_RULE_SETUP -#line 168 "lexer.l" +#line 168 "libyara/lexer.l" { return _LE_; } YY_BREAK case 5: YY_RULE_SETUP -#line 169 "lexer.l" +#line 169 "libyara/lexer.l" { return _GE_; } YY_BREAK case 6: YY_RULE_SETUP -#line 170 "lexer.l" +#line 170 "libyara/lexer.l" { return _EQ_; } YY_BREAK case 7: YY_RULE_SETUP -#line 171 "lexer.l" +#line 171 "libyara/lexer.l" { return _NEQ_; } YY_BREAK case 8: YY_RULE_SETUP -#line 172 "lexer.l" +#line 172 "libyara/lexer.l" { return _SHIFT_LEFT_; } YY_BREAK case 9: YY_RULE_SETUP -#line 173 "lexer.l" +#line 173 "libyara/lexer.l" { return _SHIFT_RIGHT_; } YY_BREAK case 10: YY_RULE_SETUP -#line 174 "lexer.l" +#line 174 "libyara/lexer.l" { return _PRIVATE_; } YY_BREAK case 11: YY_RULE_SETUP -#line 175 "lexer.l" +#line 175 "libyara/lexer.l" { return _GLOBAL_; } YY_BREAK case 12: YY_RULE_SETUP -#line 176 "lexer.l" +#line 176 "libyara/lexer.l" { return _RULE_; } YY_BREAK case 13: YY_RULE_SETUP -#line 177 "lexer.l" +#line 177 "libyara/lexer.l" { return _META_; } YY_BREAK case 14: YY_RULE_SETUP -#line 178 "lexer.l" +#line 178 "libyara/lexer.l" { return _STRINGS_; } YY_BREAK case 15: YY_RULE_SETUP -#line 179 "lexer.l" +#line 179 "libyara/lexer.l" { return _ASCII_; } YY_BREAK case 16: YY_RULE_SETUP -#line 180 "lexer.l" +#line 180 "libyara/lexer.l" { return _WIDE_; } YY_BREAK case 17: YY_RULE_SETUP -#line 181 "lexer.l" +#line 181 "libyara/lexer.l" { return _XOR_; } YY_BREAK case 18: YY_RULE_SETUP -#line 182 "lexer.l" +#line 182 "libyara/lexer.l" { return _BASE64_; } YY_BREAK case 19: YY_RULE_SETUP -#line 183 "lexer.l" +#line 183 "libyara/lexer.l" { return _BASE64_WIDE_; } YY_BREAK case 20: YY_RULE_SETUP -#line 184 "lexer.l" +#line 184 "libyara/lexer.l" { return _FULLWORD_; } YY_BREAK case 21: YY_RULE_SETUP -#line 185 "lexer.l" +#line 185 "libyara/lexer.l" { return _NOCASE_; } YY_BREAK case 22: YY_RULE_SETUP -#line 186 "lexer.l" +#line 186 "libyara/lexer.l" { return _CONDITION_; } YY_BREAK case 23: YY_RULE_SETUP -#line 187 "lexer.l" +#line 187 "libyara/lexer.l" { return _TRUE_; } YY_BREAK case 24: YY_RULE_SETUP -#line 188 "lexer.l" +#line 188 "libyara/lexer.l" { return _FALSE_; } YY_BREAK case 25: YY_RULE_SETUP -#line 189 "lexer.l" +#line 189 "libyara/lexer.l" { return _NOT_; } YY_BREAK case 26: YY_RULE_SETUP -#line 190 "lexer.l" +#line 190 "libyara/lexer.l" { return _AND_; } YY_BREAK case 27: YY_RULE_SETUP -#line 191 "lexer.l" +#line 191 "libyara/lexer.l" { return _OR_; } YY_BREAK case 28: YY_RULE_SETUP -#line 192 "lexer.l" +#line 192 "libyara/lexer.l" { return _AT_; } YY_BREAK case 29: YY_RULE_SETUP -#line 193 "lexer.l" +#line 193 "libyara/lexer.l" { return _IN_; } YY_BREAK case 30: YY_RULE_SETUP -#line 194 "lexer.l" +#line 194 "libyara/lexer.l" { return _OF_; } YY_BREAK case 31: YY_RULE_SETUP -#line 195 "lexer.l" +#line 195 "libyara/lexer.l" { return _THEM_; } YY_BREAK case 32: YY_RULE_SETUP -#line 196 "lexer.l" +#line 196 "libyara/lexer.l" { return _FOR_; } YY_BREAK case 33: YY_RULE_SETUP -#line 197 "lexer.l" +#line 197 "libyara/lexer.l" { return _ALL_; } YY_BREAK case 34: YY_RULE_SETUP -#line 198 "lexer.l" +#line 198 "libyara/lexer.l" { return _ANY_; } YY_BREAK case 35: YY_RULE_SETUP -#line 199 "lexer.l" +#line 199 "libyara/lexer.l" { return _NONE_; } YY_BREAK case 36: YY_RULE_SETUP -#line 200 "lexer.l" +#line 200 "libyara/lexer.l" { return _ENTRYPOINT_; } YY_BREAK case 37: YY_RULE_SETUP -#line 201 "lexer.l" +#line 201 "libyara/lexer.l" { return _FILESIZE_; } YY_BREAK case 38: YY_RULE_SETUP -#line 202 "lexer.l" +#line 202 "libyara/lexer.l" { return _MATCHES_; } YY_BREAK case 39: YY_RULE_SETUP -#line 203 "lexer.l" +#line 203 "libyara/lexer.l" { return _CONTAINS_; } YY_BREAK case 40: YY_RULE_SETUP -#line 204 "lexer.l" +#line 204 "libyara/lexer.l" { return _STARTSWITH_; } YY_BREAK case 41: YY_RULE_SETUP -#line 205 "lexer.l" +#line 205 "libyara/lexer.l" { return _ENDSWITH_; } YY_BREAK case 42: YY_RULE_SETUP -#line 206 "lexer.l" +#line 206 "libyara/lexer.l" { return _ICONTAINS_; } YY_BREAK case 43: YY_RULE_SETUP -#line 207 "lexer.l" +#line 207 "libyara/lexer.l" { return _ISTARTSWITH_; } YY_BREAK case 44: YY_RULE_SETUP -#line 208 "lexer.l" +#line 208 "libyara/lexer.l" { return _IENDSWITH_; } YY_BREAK case 45: YY_RULE_SETUP -#line 209 "lexer.l" +#line 209 "libyara/lexer.l" { return _IEQUALS_; } YY_BREAK case 46: YY_RULE_SETUP -#line 210 "lexer.l" +#line 210 "libyara/lexer.l" { return _IMPORT_; } YY_BREAK case 47: YY_RULE_SETUP -#line 211 "lexer.l" +#line 211 "libyara/lexer.l" { return _DEFINED_; } YY_BREAK case 48: YY_RULE_SETUP -#line 214 "lexer.l" +#line 214 "libyara/lexer.l" { BEGIN(comment); } YY_BREAK case 49: YY_RULE_SETUP -#line 215 "lexer.l" +#line 215 "libyara/lexer.l" { BEGIN(INITIAL); } YY_BREAK case 50: /* rule 50 can match eol */ YY_RULE_SETUP -#line 216 "lexer.l" +#line 216 "libyara/lexer.l" { /* skip comments */ } YY_BREAK case 51: YY_RULE_SETUP -#line 219 "lexer.l" +#line 219 "libyara/lexer.l" { /* skip single-line comments */ } YY_BREAK case 52: YY_RULE_SETUP -#line 222 "lexer.l" +#line 222 "libyara/lexer.l" { yyextra->lex_buf_ptr = yyextra->lex_buf; yyextra->lex_buf_len = 0; @@ -1646,12 +1646,12 @@ YY_RULE_SETUP case 53: /* rule 53 can match eol */ YY_RULE_SETUP -#line 229 "lexer.l" +#line 229 "libyara/lexer.l" { yytext_to_buffer; } YY_BREAK case 54: YY_RULE_SETUP -#line 232 "lexer.l" +#line 232 "libyara/lexer.l" { if (compiler->include_callback != NULL) @@ -1788,7 +1788,7 @@ case YY_STATE_EOF(str): case YY_STATE_EOF(regexp): case YY_STATE_EOF(include): case YY_STATE_EOF(comment): -#line 364 "lexer.l" +#line 364 "libyara/lexer.l" { yypop_buffer_state(yyscanner); @@ -1801,7 +1801,7 @@ case YY_STATE_EOF(comment): YY_BREAK case 55: YY_RULE_SETUP -#line 375 "lexer.l" +#line 375 "libyara/lexer.l" { yylval->c_string = yr_strdup(yytext); @@ -1814,7 +1814,7 @@ YY_RULE_SETUP YY_BREAK case 56: YY_RULE_SETUP -#line 386 "lexer.l" +#line 386 "libyara/lexer.l" { yylval->c_string = yr_strdup(yytext); @@ -1827,7 +1827,7 @@ YY_RULE_SETUP YY_BREAK case 57: YY_RULE_SETUP -#line 397 "lexer.l" +#line 397 "libyara/lexer.l" { yylval->c_string = yr_strdup(yytext); @@ -1846,7 +1846,7 @@ YY_RULE_SETUP YY_BREAK case 58: YY_RULE_SETUP -#line 414 "lexer.l" +#line 414 "libyara/lexer.l" { yylval->c_string = yr_strdup(yytext); @@ -1865,7 +1865,7 @@ YY_RULE_SETUP YY_BREAK case 59: YY_RULE_SETUP -#line 431 "lexer.l" +#line 431 "libyara/lexer.l" { yylval->c_string = yr_strdup(yytext); @@ -1884,7 +1884,7 @@ YY_RULE_SETUP YY_BREAK case 60: YY_RULE_SETUP -#line 448 "lexer.l" +#line 448 "libyara/lexer.l" { char* text = yytext; @@ -1925,7 +1925,7 @@ YY_RULE_SETUP YY_BREAK case 61: YY_RULE_SETUP -#line 487 "lexer.l" +#line 487 "libyara/lexer.l" { if (strlen(yytext) > 128) @@ -1941,7 +1941,7 @@ YY_RULE_SETUP YY_BREAK case 62: YY_RULE_SETUP -#line 501 "lexer.l" +#line 501 "libyara/lexer.l" { char *endptr; @@ -1984,7 +1984,7 @@ YY_RULE_SETUP YY_BREAK case 63: YY_RULE_SETUP -#line 541 "lexer.l" +#line 541 "libyara/lexer.l" { yylval->double_ = atof(yytext); return _DOUBLE_; @@ -1992,7 +1992,7 @@ YY_RULE_SETUP YY_BREAK case 64: YY_RULE_SETUP -#line 546 "lexer.l" +#line 546 "libyara/lexer.l" { char *endptr; @@ -2011,7 +2011,7 @@ YY_RULE_SETUP YY_BREAK case 65: YY_RULE_SETUP -#line 562 "lexer.l" +#line 562 "libyara/lexer.l" { char *endptr; @@ -2030,7 +2030,7 @@ YY_RULE_SETUP YY_BREAK case 66: YY_RULE_SETUP -#line 579 "lexer.l" +#line 579 "libyara/lexer.l" { /* saw closing quote - all done */ alloc_sized_string(s, yyextra->lex_buf_len); @@ -2047,7 +2047,7 @@ YY_RULE_SETUP YY_BREAK case 67: YY_RULE_SETUP -#line 594 "lexer.l" +#line 594 "libyara/lexer.l" { lex_check_space_ok("\t", yyextra->lex_buf_len, YR_LEX_BUF_SIZE); @@ -2057,7 +2057,7 @@ YY_RULE_SETUP YY_BREAK case 68: YY_RULE_SETUP -#line 602 "lexer.l" +#line 602 "libyara/lexer.l" { lex_check_space_ok("\r", yyextra->lex_buf_len, YR_LEX_BUF_SIZE); @@ -2067,7 +2067,7 @@ YY_RULE_SETUP YY_BREAK case 69: YY_RULE_SETUP -#line 610 "lexer.l" +#line 610 "libyara/lexer.l" { lex_check_space_ok("\n", yyextra->lex_buf_len, YR_LEX_BUF_SIZE); @@ -2077,7 +2077,7 @@ YY_RULE_SETUP YY_BREAK case 70: YY_RULE_SETUP -#line 618 "lexer.l" +#line 618 "libyara/lexer.l" { lex_check_space_ok("\"", yyextra->lex_buf_len, YR_LEX_BUF_SIZE); @@ -2087,7 +2087,7 @@ YY_RULE_SETUP YY_BREAK case 71: YY_RULE_SETUP -#line 626 "lexer.l" +#line 626 "libyara/lexer.l" { lex_check_space_ok("\\", yyextra->lex_buf_len, YR_LEX_BUF_SIZE); @@ -2097,7 +2097,7 @@ YY_RULE_SETUP YY_BREAK case 72: YY_RULE_SETUP -#line 634 "lexer.l" +#line 634 "libyara/lexer.l" { int result; @@ -2110,13 +2110,13 @@ YY_RULE_SETUP YY_BREAK case 73: YY_RULE_SETUP -#line 645 "lexer.l" +#line 645 "libyara/lexer.l" { yytext_to_buffer; } YY_BREAK case 74: /* rule 74 can match eol */ YY_RULE_SETUP -#line 648 "lexer.l" +#line 648 "libyara/lexer.l" { syntax_error("unterminated string"); } @@ -2124,14 +2124,14 @@ YY_RULE_SETUP case 75: /* rule 75 can match eol */ YY_RULE_SETUP -#line 653 "lexer.l" +#line 653 "libyara/lexer.l" { syntax_error("illegal escape sequence"); } YY_BREAK case 76: YY_RULE_SETUP -#line 658 "lexer.l" +#line 658 "libyara/lexer.l" { if (yyextra->lex_buf_len > 0) @@ -2160,7 +2160,7 @@ YY_RULE_SETUP YY_BREAK case 77: YY_RULE_SETUP -#line 685 "lexer.l" +#line 685 "libyara/lexer.l" { lex_check_space_ok("/", yyextra->lex_buf_len, YR_LEX_BUF_SIZE); @@ -2170,7 +2170,7 @@ YY_RULE_SETUP YY_BREAK case 78: YY_RULE_SETUP -#line 693 "lexer.l" +#line 693 "libyara/lexer.l" { lex_check_space_ok("\\.", yyextra->lex_buf_len, YR_LEX_BUF_SIZE); @@ -2185,20 +2185,20 @@ YY_RULE_SETUP YY_BREAK case 79: YY_RULE_SETUP -#line 706 "lexer.l" +#line 706 "libyara/lexer.l" { yytext_to_buffer; } YY_BREAK case 80: /* rule 80 can match eol */ YY_RULE_SETUP -#line 709 "lexer.l" +#line 709 "libyara/lexer.l" { syntax_error("unterminated regular expression"); } YY_BREAK case 81: YY_RULE_SETUP -#line 714 "lexer.l" +#line 714 "libyara/lexer.l" { yylval->sized_string = NULL; @@ -2209,7 +2209,7 @@ YY_RULE_SETUP YY_BREAK case 82: YY_RULE_SETUP -#line 723 "lexer.l" +#line 723 "libyara/lexer.l" { yylval->sized_string = NULL; @@ -2221,7 +2221,7 @@ YY_RULE_SETUP case 83: /* rule 83 can match eol */ YY_RULE_SETUP -#line 732 "lexer.l" +#line 732 "libyara/lexer.l" { // Match hex-digits with whitespace or comments. The latter are stripped // out by hex_lexer.l @@ -2241,12 +2241,12 @@ YY_RULE_SETUP case 84: /* rule 84 can match eol */ YY_RULE_SETUP -#line 749 "lexer.l" +#line 749 "libyara/lexer.l" /* skip whitespace */ YY_BREAK case 85: YY_RULE_SETUP -#line 751 "lexer.l" +#line 751 "libyara/lexer.l" { if (yytext[0] >= 32 && yytext[0] < 127) @@ -2261,10 +2261,10 @@ YY_RULE_SETUP YY_BREAK case 86: YY_RULE_SETUP -#line 763 "lexer.l" +#line 763 "libyara/lexer.l" ECHO; YY_BREAK -#line 2267 "lexer.c" +#line 2267 "libyara/lexer.c" case YY_END_OF_BUFFER: { @@ -3413,7 +3413,7 @@ void yyfree (void * ptr , yyscan_t yyscanner) #define YYTABLES_NAME "yytables" -#line 763 "lexer.l" +#line 763 "libyara/lexer.l" @@ -3540,6 +3540,39 @@ void yyerror( } +int yr_lex_parse_rules_bytes( + const void* rules_data, + size_t rules_size, + YR_COMPILER* compiler) +{ + yyscan_t yyscanner; + + compiler->errors = 0; + + if (yylex_init(&yyscanner) != 0) + { + compiler->errors = 1; + compiler->last_error = ERROR_INSUFFICIENT_MEMORY; + return compiler->errors; + } + + if (setjmp(compiler->error_recovery) != 0) + return compiler->errors; + + #if YYDEBUG + yydebug = 1; + #endif + + yyset_extra(compiler, yyscanner); + yy_scan_bytes(rules_data, rules_size, yyscanner); + yyset_lineno(1, yyscanner); + yyparse(yyscanner, compiler); + yylex_destroy(yyscanner); + + return compiler->errors; +} + + int yr_lex_parse_rules_string( const char* rules_string, YR_COMPILER* compiler) diff --git a/src/libyara/lexer.l b/src/libyara/lexer.l index 53e6d58..ec6d4c6 100644 --- a/src/libyara/lexer.l +++ b/src/libyara/lexer.l @@ -886,6 +886,39 @@ void yyerror( } +int yr_lex_parse_rules_bytes( + const void* rules_data, + size_t rules_size, + YR_COMPILER* compiler) +{ + yyscan_t yyscanner; + + compiler->errors = 0; + + if (yylex_init(&yyscanner) != 0) + { + compiler->errors = 1; + compiler->last_error = ERROR_INSUFFICIENT_MEMORY; + return compiler->errors; + } + + if (setjmp(compiler->error_recovery) != 0) + return compiler->errors; + + #if YYDEBUG + yydebug = 1; + #endif + + yyset_extra(compiler, yyscanner); + yy_scan_bytes(rules_data, rules_size, yyscanner); + yyset_lineno(1, yyscanner); + yyparse(yyscanner, compiler); + yylex_destroy(yyscanner); + + return compiler->errors; +} + + int yr_lex_parse_rules_string( const char* rules_string, YR_COMPILER* compiler) diff --git a/src/libyara/libyara.c b/src/libyara/libyara.c index 0ce3154..f9a2c60 100644 --- a/src/libyara/libyara.c +++ b/src/libyara/libyara.c @@ -52,6 +52,17 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. YR_THREAD_STORAGE_KEY yr_yyfatal_trampoline_tls; YR_THREAD_STORAGE_KEY yr_trycatch_trampoline_tls; +#if !(_WIN32 || __CYGWIN__) + +#include +#include + +struct sigaction old_sigsegv_exception_handler; +struct sigaction old_sigbus_exception_handler; +int exception_handler_usecount = 0; +pthread_mutex_t exception_handler_mutex = PTHREAD_MUTEX_INITIALIZER; +#endif + static int init_count = 0; static struct yr_config_var diff --git a/src/libyara/modules/console/console.c b/src/libyara/modules/console/console.c index a5c46ac..1342093 100644 --- a/src/libyara/modules/console/console.c +++ b/src/libyara/modules/console/console.c @@ -46,9 +46,18 @@ define_function(log_string) // Assume the entire string is non-printable, so allocate 4 times the // space so that we can represent each byte as an escaped value. eg: \x00 // Add an extra byte for the NULL terminator. - char* msg = (char*) yr_calloc((s->length * 4) + 1, sizeof(char)); - if (msg == NULL) - return_integer(YR_UNDEFINED); + char* msg; + if (s->length == 0) + { + callback(ctx, CALLBACK_MSG_CONSOLE_LOG, (void*) "", ctx->user_data); + return_integer(1); + } + else + { + msg = (char*) yr_calloc((s->length * 4) + 1, sizeof(char)); + if (msg == NULL) + return_integer(YR_UNDEFINED); + } char* p = msg; for (size_t i = 0; i < s->length; i++) @@ -86,7 +95,7 @@ define_function(log_string_msg) // Add an extra byte for the NULL terminator. size_t msg_len = strlen(m) + (s->length * 4) + 1; char* msg = (char*) yr_calloc(msg_len, sizeof(char)); - if (msg == NULL) + if (msg == NULL && msg_len > 0) return_integer(YR_UNDEFINED); char* p = msg; diff --git a/src/libyara/modules/dex/dex.c b/src/libyara/modules/dex/dex.c index 6f302e5..f850d20 100644 --- a/src/libyara/modules/dex/dex.c +++ b/src/libyara/modules/dex/dex.c @@ -369,39 +369,56 @@ end_declarations // https://android.googlesource.com/platform/dalvik/+/android-4.4.2_r2/libdex/Leb128.cpp -static int32_t read_uleb128(const uint8_t* pStream, uint32_t* size) +static int32_t read_uleb128_bounded( + const uint8_t* pStream, + const uint8_t* pStreamEnd, + uint32_t* size, + bool* error) { const uint8_t* ptr = pStream; + int32_t result = 0; - int32_t result = *(ptr++); + *error = false; + if (ptr == pStreamEnd) + goto error; + + result = *(ptr++); *size = *size + 1; if (result > 0x7f) { + if (ptr == pStreamEnd) + goto error; int cur = *(ptr++); *size = *size + 1; result = (result & 0x7f) | ((cur & 0x7f) << 7); if (cur > 0x7f) { + if (ptr == pStreamEnd) + goto error; cur = *(ptr++); *size = *size + 1; result |= (cur & 0x7f) << 14; if (cur > 0x7f) { + if (ptr == pStreamEnd) + goto error; cur = *(ptr++); *size = *size + 1; result |= (cur & 0x7f) << 21; if (cur > 0x7f) { + if (ptr == pStreamEnd) + goto error; /* * Note: We don't check to see if cur is out of * range here, meaning we tolerate garbage in the * high four-order bits. */ - cur = *(ptr++); + cur = *ptr; *size = *size + 1; result |= cur << 28; } @@ -410,6 +427,10 @@ static int32_t read_uleb128(const uint8_t* pStream, uint32_t* size) } return result; + +error: + *error = true; + return result; } @@ -564,18 +585,26 @@ uint32_t load_encoded_field( printf("[DEX] Parse encoded field start_offset:0x%zx\n", start_offset); #endif + const uint8_t* data_cur_start = dex->data + start_offset; if (!fits_in_dex(dex, dex->data + start_offset, sizeof(uint32_t) * 2)) return 0; + const uint8_t* data_end = dex->data + dex->data_size; uint32_t current_size = 0; - + bool error = false; encoded_field_t encoded_field; - encoded_field.field_idx_diff = (uint32_t) read_uleb128( - (dex->data + start_offset + current_size), ¤t_size); + encoded_field.field_idx_diff = + (uint32_t) read_uleb128_bounded((dex->data + start_offset + current_size), + data_end, ¤t_size, &error); + if (error) + return 0; - encoded_field.access_flags = (uint32_t) read_uleb128( - (dex->data + start_offset + current_size), ¤t_size); + encoded_field.access_flags = + (uint32_t) read_uleb128_bounded((dex->data + start_offset + current_size), + data_end, ¤t_size, &error); + if (error) + return 0; yr_set_integer( encoded_field.field_idx_diff, @@ -700,20 +729,29 @@ uint32_t load_encoded_method( printf("[DEX] Parse encoded method start_offset:0x%zx\n", start_offset); #endif - if (!fits_in_dex(dex, dex->data + start_offset, sizeof(uint32_t) * 3)) + const uint8_t* data_cur_start = dex->data + start_offset; + if (!fits_in_dex(dex, data_cur_start, sizeof(uint32_t) * 3)) return 0; + const uint8_t* data_end = dex->data + dex->data_size; uint32_t current_size = 0; + bool error = false; encoded_method_t encoded_method; - encoded_method.method_idx_diff = (uint32_t) read_uleb128( - (dex->data + start_offset + current_size), ¤t_size); + encoded_method.method_idx_diff = (uint32_t) read_uleb128_bounded( + (data_cur_start + current_size), data_end, ¤t_size, &error); + if (error) + return 0; - encoded_method.access_flags = (uint32_t) read_uleb128( - (dex->data + start_offset + current_size), ¤t_size); + encoded_method.access_flags = (uint32_t) read_uleb128_bounded( + (data_cur_start + current_size), data_end, ¤t_size, &error); + if (error) + return 0; - encoded_method.code_off = (uint32_t) read_uleb128( - (dex->data + start_offset + current_size), ¤t_size); + encoded_method.code_off = (uint32_t) read_uleb128_bounded( + (data_cur_start + current_size), data_end, ¤t_size, &error); + if (error) + return 0; yr_set_integer( encoded_method.method_idx_diff, @@ -909,6 +947,8 @@ void dex_parse(DEX* dex, uint64_t base_address) uint32_t index_encoded_method = 0; uint32_t index_encoded_field = 0; + const uint8_t* data_end = dex->data + dex->data_size; + if (!struct_fits_in_dex(dex, dex->data, dex_header_t)) return; @@ -944,9 +984,12 @@ void dex_parse(DEX* dex, uint64_t base_address) sizeof(uint32_t))) continue; - uint32_t value = (uint32_t) read_uleb128( + bool error = false; + uint32_t value = (uint32_t) read_uleb128_bounded( (dex->data + yr_le32toh(string_id_item->string_data_offset)), - &uleb128_size); + data_end, &uleb128_size, &error); + if (error) + continue; #ifdef DEBUG_DEX_MODULE printf("[DEX] STRING ID item size:0x%x\n", value); @@ -1234,25 +1277,34 @@ void dex_parse(DEX* dex, uint64_t base_address) return; uleb128_size = 0; + bool error = false; - class_data_item.static_fields_size = (uint32_t) read_uleb128( + class_data_item.static_fields_size = (uint32_t) read_uleb128_bounded( (dex->data + yr_le32toh(class_id_item->class_data_offset)), - &uleb128_size); + data_end, &uleb128_size, &error); + if (error) + return; - class_data_item.instance_fields_size = (uint32_t) read_uleb128( + class_data_item.instance_fields_size = (uint32_t) read_uleb128_bounded( (dex->data + yr_le32toh(class_id_item->class_data_offset) + uleb128_size), - &uleb128_size); + data_end, &uleb128_size, &error); + if (error) + return; - class_data_item.direct_methods_size = (uint32_t) read_uleb128( + class_data_item.direct_methods_size = (uint32_t) read_uleb128_bounded( (dex->data + yr_le32toh(class_id_item->class_data_offset) + uleb128_size), - &uleb128_size); + data_end, &uleb128_size, &error); + if (error) + return; - class_data_item.virtual_methods_size = (uint32_t) read_uleb128( + class_data_item.virtual_methods_size = (uint32_t) read_uleb128_bounded( (dex->data + yr_le32toh(class_id_item->class_data_offset) + uleb128_size), - &uleb128_size); + data_end, &uleb128_size, &error); + if (error) + return; yr_set_integer( class_data_item.static_fields_size, @@ -1462,7 +1514,7 @@ int module_load( foreach_memory_block(iterator, block) { - const uint8_t* block_data = block->fetch_data(block); + const uint8_t* block_data = yr_fetch_block_data(block); if (block_data == NULL) continue; diff --git a/src/libyara/modules/dotnet/dotnet.c b/src/libyara/modules/dotnet/dotnet.c index cbbe87f..5c70e31 100644 --- a/src/libyara/modules/dotnet/dotnet.c +++ b/src/libyara/modules/dotnet/dotnet.c @@ -555,13 +555,13 @@ static int32_t read_blob_signed(const uint8_t** data, uint32_t* len) // first byte is enough to decode the length // without worrying about endiannity - uint8_t first_byte = *(*data); + int8_t first_byte = *(*data); // Encode as a one-byte integer, bit 7 clear, rotated value in bits 6 // through 0, giving 0x01 (-2^6) to 0x7E (2^6-1). if (!(first_byte & 0x80)) { - uint8_t tmp = first_byte >> 1; + int8_t tmp = first_byte >> 1; // sign extension in case of negative number if (first_byte & 0x1) tmp |= 0xC0; @@ -882,7 +882,7 @@ static char* parse_signature_type( if (!tmp) break; - uint32_t* sizes = NULL; + int32_t* sizes = NULL; int32_t* lo_bounds = NULL; // Read number of dimensions @@ -928,9 +928,9 @@ static char* parse_signature_type( if (num_sizes > i || num_lowbounds > i) { if (num_lowbounds > i && lo_bounds[i] != 0) - sstr_appendf(ss, "%lu...", lo_bounds[i]); + sstr_appendf(ss, "%d...", lo_bounds[i]); if (num_sizes > i && sizes[i] != 0) - sstr_appendf(ss, "%lu", sizes[i]); + sstr_appendf(ss, "%d", sizes[i]); } if (i + 1 != rank) sstr_appendf(ss, ","); @@ -1179,7 +1179,7 @@ static bool parse_method_params( // Array to hold all the possible parameters PARAMETERS* params = yr_calloc(param_count, sizeof(PARAMETERS)); - if (!params) + if (params == NULL && param_count > 0) return false; for (uint32_t idx = 0; idx < param_count; ++idx) @@ -1190,7 +1190,7 @@ static bool parse_method_params( char* name = NULL; bool alloc = false; // Flag if name needs freeing - if (data) // We need param table mostly just for the param name + if (data) // We need param table mostly just for the param name { PARAM_ROW row = {0}; bool result = read_param(ctx, data, &row); @@ -3515,7 +3515,7 @@ int module_load( { PIMAGE_NT_HEADERS32 pe_header; - block_data = block->fetch_data(block); + block_data = yr_fetch_block_data(block); if (block_data == NULL) continue; diff --git a/src/libyara/modules/elf/elf.c b/src/libyara/modules/elf/elf.c index 2a34cc8..ff54186 100644 --- a/src/libyara/modules/elf/elf.c +++ b/src/libyara/modules/elf/elf.c @@ -39,6 +39,7 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include #include #include "../crypto.h" +#include "../exception.h" #define MODULE_NAME elf @@ -82,7 +83,7 @@ define_function(telfhash) int symbol_count = 0; char** clean_names = yr_calloc(list->count, sizeof(*clean_names)); - if (!clean_names) + if (clean_names == NULL && list->count > 0) return_string(YR_UNDEFINED); for (ELF_SYMBOL* i = list->symbols; i != NULL; i = i->next) @@ -1113,7 +1114,7 @@ int module_load( foreach_memory_block(iterator, block) { - const uint8_t* block_data = block->fetch_data(block); + const uint8_t* block_data = yr_fetch_block_data(block); if (block_data == NULL) continue; @@ -1123,91 +1124,79 @@ int module_load( return ERROR_INSUFFICIENT_MEMORY; module_object->data = elf; - switch (get_elf_class_data(block_data, block->size)) + int class_data = get_elf_class_data(block_data, block->size); + + if (class_data == CLASS_DATA(ELF_CLASS_32, ELF_DATA_2LSB) && + block->size > sizeof(elf32_header_t)) { - case CLASS_DATA(ELF_CLASS_32, ELF_DATA_2LSB): + elf_header32 = (elf32_header_t*) block_data; - if (block->size > sizeof(elf32_header_t)) + if (!(context->flags & SCAN_FLAGS_PROCESS_MEMORY) || + yr_le16toh(elf_header32->type) == ELF_ET_EXEC) { - elf_header32 = (elf32_header_t*) block_data; - - if (!(context->flags & SCAN_FLAGS_PROCESS_MEMORY) || - yr_le16toh(elf_header32->type) == ELF_ET_EXEC) - { - parse_result = parse_elf_header_32_le( - elf, - elf_header32, - block->base, - block->size, - context->flags, - module_object); - } + parse_result = parse_elf_header_32_le( + elf, + elf_header32, + block->base, + block->size, + context->flags, + module_object); + break; } + } else if ( + class_data == CLASS_DATA(ELF_CLASS_32, ELF_DATA_2MSB) && + block->size > sizeof(elf32_header_t)) + { + elf_header32 = (elf32_header_t*) block_data; - break; - - case CLASS_DATA(ELF_CLASS_32, ELF_DATA_2MSB): - - if (block->size > sizeof(elf32_header_t)) + if (!(context->flags & SCAN_FLAGS_PROCESS_MEMORY) || + yr_be16toh(elf_header32->type) == ELF_ET_EXEC) { - elf_header32 = (elf32_header_t*) block_data; - - if (!(context->flags & SCAN_FLAGS_PROCESS_MEMORY) || - yr_be16toh(elf_header32->type) == ELF_ET_EXEC) - { - parse_result = parse_elf_header_32_be( - elf, - elf_header32, - block->base, - block->size, - context->flags, - module_object); - } + parse_result = parse_elf_header_32_be( + elf, + elf_header32, + block->base, + block->size, + context->flags, + module_object); + break; } + } else if ( + class_data == CLASS_DATA(ELF_CLASS_64, ELF_DATA_2LSB) && + block->size > sizeof(elf64_header_t)) + { + elf_header64 = (elf64_header_t*) block_data; - break; - - case CLASS_DATA(ELF_CLASS_64, ELF_DATA_2LSB): - - if (block->size > sizeof(elf64_header_t)) + if (!(context->flags & SCAN_FLAGS_PROCESS_MEMORY) || + yr_le16toh(elf_header64->type) == ELF_ET_EXEC) { - elf_header64 = (elf64_header_t*) block_data; - - if (!(context->flags & SCAN_FLAGS_PROCESS_MEMORY) || - yr_le16toh(elf_header64->type) == ELF_ET_EXEC) - { - parse_result = parse_elf_header_64_le( - elf, - elf_header64, - block->base, - block->size, - context->flags, - module_object); - } + parse_result = parse_elf_header_64_le( + elf, + elf_header64, + block->base, + block->size, + context->flags, + module_object); + break; } + } else if ( + class_data == CLASS_DATA(ELF_CLASS_64, ELF_DATA_2MSB) && + block->size > sizeof(elf64_header_t)) + { + elf_header64 = (elf64_header_t*) block_data; - break; - - case CLASS_DATA(ELF_CLASS_64, ELF_DATA_2MSB): - - if (block->size > sizeof(elf64_header_t)) + if (!(context->flags & SCAN_FLAGS_PROCESS_MEMORY) || + yr_be16toh(elf_header64->type) == ELF_ET_EXEC) { - elf_header64 = (elf64_header_t*) block_data; - - if (!(context->flags & SCAN_FLAGS_PROCESS_MEMORY) || - yr_be16toh(elf_header64->type) == ELF_ET_EXEC) - { - parse_result = parse_elf_header_64_be( - elf, - elf_header64, - block->base, - block->size, - context->flags, - module_object); - } + parse_result = parse_elf_header_64_be( + elf, + elf_header64, + block->base, + block->size, + context->flags, + module_object); + break; } - - break; } } diff --git a/src/libyara/modules/hash/hash.c b/src/libyara/modules/hash/hash.c index 84feb4e..51f5007 100644 --- a/src/libyara/modules/hash/hash.c +++ b/src/libyara/modules/hash/hash.c @@ -326,7 +326,7 @@ define_function(data_md5) if (offset >= block->base && offset < block->base + block->size) { - const uint8_t* block_data = block->fetch_data(block); + const uint8_t* block_data = yr_fetch_block_data(block); if (block_data != NULL) { @@ -361,7 +361,7 @@ define_function(data_md5) return_string(YR_UNDEFINED); } - if (block->base + block->size > offset + length) + if (block->base + block->size >= offset + length) break; } @@ -456,7 +456,7 @@ define_function(data_sha1) // if desired block within current block if (offset >= block->base && offset < block->base + block->size) { - const uint8_t* block_data = block->fetch_data(block); + const uint8_t* block_data = yr_fetch_block_data(block); if (block_data != NULL) { @@ -490,7 +490,7 @@ define_function(data_sha1) return_string(YR_UNDEFINED); } - if (block->base + block->size > offset + length) + if (block->base + block->size >= offset + length) break; } @@ -585,7 +585,7 @@ define_function(data_sha256) // if desired block within current block if (offset >= block->base && offset < block->base + block->size) { - const uint8_t* block_data = block->fetch_data(block); + const uint8_t* block_data = yr_fetch_block_data(block); if (block_data != NULL) { @@ -618,7 +618,7 @@ define_function(data_sha256) return_string(YR_UNDEFINED); } - if (block->base + block->size > offset + length) + if (block->base + block->size >= offset + length) break; } @@ -674,7 +674,7 @@ define_function(data_checksum32) { if (offset >= block->base && offset < block->base + block->size) { - const uint8_t* block_data = block->fetch_data(block); + const uint8_t* block_data = yr_fetch_block_data(block); if (block_data != NULL) { @@ -703,7 +703,7 @@ define_function(data_checksum32) return_integer(YR_UNDEFINED); } - if (block->base + block->size > offset + length) + if (block->base + block->size >= offset + length) break; } @@ -765,7 +765,7 @@ define_function(data_crc32) { if (offset >= block->base && offset < block->base + block->size) { - const uint8_t* block_data = block->fetch_data(block); + const uint8_t* block_data = yr_fetch_block_data(block); if (block_data != NULL) { @@ -796,7 +796,7 @@ define_function(data_crc32) return_integer(YR_UNDEFINED); } - if (block->base + block->size > offset + length) + if (block->base + block->size >= offset + length) break; } diff --git a/src/libyara/modules/macho/macho.c b/src/libyara/modules/macho/macho.c index b682621..b0d5697 100644 --- a/src/libyara/modules/macho/macho.c +++ b/src/libyara/modules/macho/macho.c @@ -35,7 +35,6 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define MODULE_NAME macho // Check for Mach-O binary magic constant. - int is_macho_file_block(const uint32_t* magic) { return *magic == MH_MAGIC || *magic == MH_CIGAM || *magic == MH_MAGIC_64 || @@ -43,23 +42,12 @@ int is_macho_file_block(const uint32_t* magic) } // Check if file is for 32-bit architecture. - -int macho_is_32(const uint8_t* magic) -{ - // Magic must be [CE]FAEDFE or FEEDFA[CE]. - return magic[0] == 0xce || magic[3] == 0xce; -} - -// Check if file is for big-endian architecture. - -int macho_is_big(const uint8_t* magic) +int macho_is_32(uint32_t magic) { - // Magic must be [FE]EDFACE or [FE]EDFACF. - return magic[0] == 0xfe; + return magic == MH_MAGIC || magic == MH_CIGAM; } // Check for Mach-O fat binary magic constant. - int is_fat_macho_file_block(const uint32_t* magic) { return *magic == FAT_MAGIC || *magic == FAT_CIGAM || *magic == FAT_MAGIC_64 || @@ -67,17 +55,23 @@ int is_fat_macho_file_block(const uint32_t* magic) } // Check if file is 32-bit fat file. - -int macho_fat_is_32(const uint8_t* magic) +int macho_fat_is_32(const uint32_t* magic) { - // Magic must be CAFEBA[BE]. - return magic[3] == 0xbe; + return yr_be32toh(*magic) == FAT_MAGIC; } static int should_swap_bytes(const uint32_t magic) { +// In big-endian platforms byte swapping is needed for little-endian files +// but in little-endian platforms the files that need swapping are the +// the big-endian ones. +#if defined(WORDS_BIGENDIAN) return magic == MH_CIGAM || magic == MH_CIGAM_64 || magic == FAT_CIGAM || magic == FAT_CIGAM_64; +#else + return magic == MH_MAGIC || magic == MH_MAGIC_64 || magic == FAT_MAGIC || + magic == FAT_MAGIC_64; +#endif } static void swap_mach_header(yr_mach_header_64_t* mh) @@ -90,7 +84,7 @@ static void swap_mach_header(yr_mach_header_64_t* mh) mh->sizeofcmds = yr_bswap32(mh->sizeofcmds); mh->flags = yr_bswap32(mh->flags); - if (!macho_is_32((const uint8_t*) &mh->magic)) + if (!macho_is_32(mh->magic)) mh->reserved = yr_bswap32(mh->reserved); } @@ -194,7 +188,7 @@ int macho_offset_to_rva(uint64_t offset, uint64_t* result, YR_OBJECT* object) for (int i = 0; i < segment_count; i++) { uint64_t start = yr_get_integer(object, "segments[%i].fileoff", i); - uint64_t end = start + yr_get_integer(object, "segments[%i].filesize", i); + uint64_t end = start + yr_get_integer(object, "segments[%i].fsize", i); if (offset >= start && offset < end) { @@ -211,6 +205,7 @@ int macho_offset_to_rva(uint64_t offset, uint64_t* result, YR_OBJECT* object) void macho_handle_unixthread( const uint8_t* data, size_t size, + uint64_t base_address, YR_OBJECT* object, YR_SCAN_CONTEXT* context) { @@ -221,8 +216,8 @@ void macho_handle_unixthread( return; // command_size is the size indicated in yr_thread_command_t structure, but - // limited to the data's size because we can't rely on the structure having a - // valid size. + // limited to the data's size because we can't rely on the structure having + // a valid size. uint32_t command_size = yr_min(size, ((yr_thread_command_t*) data)->cmdsize); // command_size should be at least the size of yr_thread_command_t. @@ -244,58 +239,67 @@ void macho_handle_unixthread( { case CPU_TYPE_MC680X0: { - if (thread_state_size >= sizeof(yr_m68k_thread_state_t)) - address = ((yr_m68k_thread_state_t*) thread_state)->pc; + if (thread_state_size < sizeof(yr_m68k_thread_state_t)) + return; + address = ((yr_m68k_thread_state_t*) thread_state)->pc; break; } case CPU_TYPE_MC88000: { - if (thread_state_size >= sizeof(yr_m88k_thread_state_t)) - address = ((yr_m88k_thread_state_t*) thread_state)->xip; + if (thread_state_size < sizeof(yr_m88k_thread_state_t)) + return; + address = ((yr_m88k_thread_state_t*) thread_state)->xip; break; } case CPU_TYPE_SPARC: { - if (thread_state_size >= sizeof(yr_sparc_thread_state_t)) - address = ((yr_sparc_thread_state_t*) thread_state)->pc; + if (thread_state_size < sizeof(yr_sparc_thread_state_t)) + return; + address = ((yr_sparc_thread_state_t*) thread_state)->pc; break; } case CPU_TYPE_POWERPC: { - if (thread_state_size >= sizeof(yr_ppc_thread_state_t)) - address = ((yr_ppc_thread_state_t*) thread_state)->srr0; + if (thread_state_size < sizeof(yr_ppc_thread_state_t)) + return; + address = ((yr_ppc_thread_state_t*) thread_state)->srr0; break; } case CPU_TYPE_X86: { - if (thread_state_size >= sizeof(yr_x86_thread_state_t)) - address = ((yr_x86_thread_state_t*) thread_state)->eip; + if (thread_state_size < sizeof(yr_x86_thread_state_t)) + return; + address = ((yr_x86_thread_state_t*) thread_state)->eip; break; } case CPU_TYPE_ARM: { - if (thread_state_size >= sizeof(yr_arm_thread_state_t)) - address = ((yr_arm_thread_state_t*) thread_state)->pc; + if (thread_state_size < sizeof(yr_arm_thread_state_t)) + return; + address = ((yr_arm_thread_state_t*) thread_state)->pc; break; } case CPU_TYPE_X86_64: { - if (thread_state_size >= sizeof(yr_x86_thread_state64_t)) - address = ((yr_x86_thread_state64_t*) thread_state)->rip; + if (thread_state_size < sizeof(yr_x86_thread_state64_t)) + return; + address = ((yr_x86_thread_state64_t*) thread_state)->rip; is64 = true; break; } case CPU_TYPE_ARM64: { - if (thread_state_size >= sizeof(yr_arm_thread_state64_t)) - address = ((yr_arm_thread_state64_t*) thread_state)->pc; + if (thread_state_size < sizeof(yr_arm_thread_state64_t)) + return; + address = ((yr_arm_thread_state64_t*) thread_state)->pc; is64 = true; break; } case CPU_TYPE_POWERPC64: { - if (thread_state_size >= sizeof(yr_ppc_thread_state64_t)) - address = ((yr_ppc_thread_state64_t*) thread_state)->srr0; + if (thread_state_size < sizeof(yr_ppc_thread_state64_t)) + return; + address = ((yr_ppc_thread_state64_t*) thread_state)->srr0; is64 = true; break; } @@ -314,7 +318,7 @@ void macho_handle_unixthread( if (context->flags & SCAN_FLAGS_PROCESS_MEMORY) { - yr_set_integer(address, object, "entry_point"); + yr_set_integer(base_address + address, object, "entry_point"); } else { @@ -431,13 +435,16 @@ void macho_handle_segment( yr_set_integer(sec.size, object, "segments[%i].sections[%i].size", i, j); - yr_set_integer(sec.offset, object, "segments[%i].sections[%i].offset", i, j); + yr_set_integer( + sec.offset, object, "segments[%i].sections[%i].offset", i, j); yr_set_integer(sec.align, object, "segments[%i].sections[%i].align", i, j); - yr_set_integer(sec.reloff, object, "segments[%i].sections[%i].reloff", i, j); + yr_set_integer( + sec.reloff, object, "segments[%i].sections[%i].reloff", i, j); - yr_set_integer(sec.nreloc, object, "segments[%i].sections[%i].nreloc", i, j); + yr_set_integer( + sec.nreloc, object, "segments[%i].sections[%i].nreloc", i, j); yr_set_integer(sec.flags, object, "segments[%i].sections[%i].flags", i, j); @@ -518,13 +525,16 @@ void macho_handle_segment_64( yr_set_integer(sec.size, object, "segments[%i].sections[%i].size", i, j); - yr_set_integer(sec.offset, object, "segments[%i].sections[%i].offset", i, j); + yr_set_integer( + sec.offset, object, "segments[%i].sections[%i].offset", i, j); yr_set_integer(sec.align, object, "segments[%i].sections[%i].align", i, j); - yr_set_integer(sec.reloff, object, "segments[%i].sections[%i].reloff", i, j); + yr_set_integer( + sec.reloff, object, "segments[%i].sections[%i].reloff", i, j); - yr_set_integer(sec.nreloc, object, "segments[%i].sections[%i].nreloc", i, j); + yr_set_integer( + sec.nreloc, object, "segments[%i].sections[%i].nreloc", i, j); yr_set_integer(sec.flags, object, "segments[%i].sections[%i].flags", i, j); @@ -544,6 +554,7 @@ void macho_handle_segment_64( void macho_parse_file( const uint8_t* data, const uint64_t size, + const uint64_t base_address, YR_OBJECT* object, YR_SCAN_CONTEXT* context) { @@ -552,15 +563,20 @@ void macho_parse_file( if (size < sizeof(yr_mach_header_64_t)) return; - size_t header_size = macho_is_32(data) ? sizeof(yr_mach_header_32_t) - : sizeof(yr_mach_header_64_t); - - // yr_mach_header_64_t is used for storing the header for both for 32-bits and - // 64-bits files. yr_mach_header_64_t is exactly like yr_mach_header_32_t - // but with an extra "reserved" field at the end. + // yr_mach_header_64_t is used for storing the header for both for 32-bits + // and 64-bits files. yr_mach_header_64_t is exactly like + // yr_mach_header_32_t but with an extra "reserved" field at the end. yr_mach_header_64_t header; - memcpy(&header, data, header_size); + memcpy(&header, data, sizeof(yr_mach_header_64_t)); + + // The magic number is always handled as big-endian. If the magic bytes are + // CA FE BA BE, then header.magic is 0xCAFEBABE. + header.magic = yr_be32toh(header.magic); + + size_t header_size = (header.magic == MH_MAGIC || header.magic == MH_CIGAM) + ? sizeof(yr_mach_header_32_t) + : sizeof(yr_mach_header_64_t); int should_swap = should_swap_bytes(header.magic); @@ -576,7 +592,7 @@ void macho_parse_file( yr_set_integer(header.flags, object, "flags"); // The "reserved" field exists only in 64 bits files. - if (!macho_is_32(data)) + if (!macho_is_32(header.magic)) yr_set_integer(header.reserved, object, "reserved"); // The first command parsing pass handles only segments. @@ -641,7 +657,8 @@ void macho_parse_file( switch (command_struct.cmd) { case LC_UNIXTHREAD: - macho_handle_unixthread(command, size - parsed_size, object, context); + macho_handle_unixthread( + command, size - parsed_size, base_address, object, context); break; case LC_MAIN: macho_handle_main(command, size - parsed_size, object, context); @@ -661,10 +678,11 @@ void macho_load_fat_arch_header( uint32_t num, yr_fat_arch_64_t* arch) { - if (macho_fat_is_32(data)) + if (macho_fat_is_32((uint32_t*) data)) { yr_fat_arch_32_t* arch32 = - (yr_fat_arch_32_t*) (data + sizeof(yr_fat_header_t) + (num * sizeof(yr_fat_arch_32_t))); + (yr_fat_arch_32_t*) (data + sizeof(yr_fat_header_t) + + (num * sizeof(yr_fat_arch_32_t))); arch->cputype = yr_be32toh(arch32->cputype); arch->cpusubtype = yr_be32toh(arch32->cpusubtype); @@ -676,7 +694,8 @@ void macho_load_fat_arch_header( else { yr_fat_arch_64_t* arch64 = - (yr_fat_arch_64_t*) (data + sizeof(yr_fat_header_t) + (num * sizeof(yr_fat_arch_64_t))); + (yr_fat_arch_64_t*) (data + sizeof(yr_fat_header_t) + + (num * sizeof(yr_fat_arch_64_t))); arch->cputype = yr_be32toh(arch64->cputype); arch->cpusubtype = yr_be32toh(arch64->cpusubtype); @@ -690,12 +709,13 @@ void macho_load_fat_arch_header( void macho_parse_fat_file( const uint8_t* data, const uint64_t size, + const uint64_t base_address, YR_OBJECT* object, YR_SCAN_CONTEXT* context) { size_t fat_arch_sz = sizeof(yr_fat_arch_64_t); - if (macho_fat_is_32(data)) + if (macho_fat_is_32((uint32_t*) data)) fat_arch_sz = sizeof(yr_fat_arch_32_t); if (size < sizeof(yr_fat_header_t)) @@ -739,6 +759,7 @@ void macho_parse_fat_file( macho_parse_file( data + arch.offset, arch.size, + base_address, yr_get_object(object, "file[%i]", i), context); } @@ -797,10 +818,12 @@ void macho_set_definitions(YR_OBJECT* object) yr_set_integer(CPU_SUBTYPE_PENTII_M3, object, "CPU_SUBTYPE_PENTII_M3"); yr_set_integer(CPU_SUBTYPE_PENTII_M5, object, "CPU_SUBTYPE_PENTII_M5"); yr_set_integer(CPU_SUBTYPE_CELERON, object, "CPU_SUBTYPE_CELERON"); - yr_set_integer(CPU_SUBTYPE_CELERON_MOBILE, object, "CPU_SUBTYPE_CELERON_MOBILE"); + yr_set_integer( + CPU_SUBTYPE_CELERON_MOBILE, object, "CPU_SUBTYPE_CELERON_MOBILE"); yr_set_integer(CPU_SUBTYPE_PENTIUM_3, object, "CPU_SUBTYPE_PENTIUM_3"); yr_set_integer(CPU_SUBTYPE_PENTIUM_3_M, object, "CPU_SUBTYPE_PENTIUM_3_M"); - yr_set_integer(CPU_SUBTYPE_PENTIUM_3_XEON, object, "CPU_SUBTYPE_PENTIUM_3_XEON"); + yr_set_integer( + CPU_SUBTYPE_PENTIUM_3_XEON, object, "CPU_SUBTYPE_PENTIUM_3_XEON"); yr_set_integer(CPU_SUBTYPE_PENTIUM_M, object, "CPU_SUBTYPE_PENTIUM_M"); yr_set_integer(CPU_SUBTYPE_PENTIUM_4, object, "CPU_SUBTYPE_PENTIUM_4"); yr_set_integer(CPU_SUBTYPE_PENTIUM_4_M, object, "CPU_SUBTYPE_PENTIUM_4_M"); @@ -830,7 +853,8 @@ void macho_set_definitions(YR_OBJECT* object) yr_set_integer(CPU_SUBTYPE_POWERPC_602, object, "CPU_SUBTYPE_POWERPC_602"); yr_set_integer(CPU_SUBTYPE_POWERPC_603, object, "CPU_SUBTYPE_POWERPC_603"); yr_set_integer(CPU_SUBTYPE_POWERPC_603e, object, "CPU_SUBTYPE_POWERPC_603e"); - yr_set_integer(CPU_SUBTYPE_POWERPC_603ev, object, "CPU_SUBTYPE_POWERPC_603ev"); + yr_set_integer( + CPU_SUBTYPE_POWERPC_603ev, object, "CPU_SUBTYPE_POWERPC_603ev"); yr_set_integer(CPU_SUBTYPE_POWERPC_604, object, "CPU_SUBTYPE_POWERPC_604"); yr_set_integer(CPU_SUBTYPE_POWERPC_604e, object, "CPU_SUBTYPE_POWERPC_604e"); yr_set_integer(CPU_SUBTYPE_POWERPC_620, object, "CPU_SUBTYPE_POWERPC_620"); @@ -868,7 +892,8 @@ void macho_set_definitions(YR_OBJECT* object) yr_set_integer(MH_NOFIXPREBINDING, object, "MH_NOFIXPREBINDING"); yr_set_integer(MH_PREBINDABLE, object, "MH_PREBINDABLE"); yr_set_integer(MH_ALLMODSBOUND, object, "MH_ALLMODSBOUND"); - yr_set_integer(MH_SUBSECTIONS_VIA_SYMBOLS, object, "MH_SUBSECTIONS_VIA_SYMBOLS"); + yr_set_integer( + MH_SUBSECTIONS_VIA_SYMBOLS, object, "MH_SUBSECTIONS_VIA_SYMBOLS"); yr_set_integer(MH_CANONICAL, object, "MH_CANONICAL"); yr_set_integer(MH_WEAK_DEFINES, object, "MH_WEAK_DEFINES"); yr_set_integer(MH_BINDS_TO_WEAK, object, "MH_BINDS_TO_WEAK"); @@ -901,7 +926,8 @@ void macho_set_definitions(YR_OBJECT* object) yr_set_integer(S_CSTRING_LITERALS, object, "S_CSTRING_LITERALS"); yr_set_integer(S_4BYTE_LITERALS, object, "S_4BYTE_LITERALS"); yr_set_integer(S_8BYTE_LITERALS, object, "S_8BYTE_LITERALS"); - yr_set_integer(S_NON_LAZY_SYMBOL_POINTERS, object, "S_NON_LAZY_SYMBOL_POINTERS"); + yr_set_integer( + S_NON_LAZY_SYMBOL_POINTERS, object, "S_NON_LAZY_SYMBOL_POINTERS"); yr_set_integer(S_LAZY_SYMBOL_POINTERS, object, "S_LAZY_SYMBOL_POINTERS"); yr_set_integer(S_LITERAL_POINTERS, object, "S_LITERAL_POINTERS"); yr_set_integer(S_SYMBOL_STUBS, object, "S_SYMBOL_STUBS"); @@ -933,7 +959,8 @@ void macho_set_definitions(YR_OBJECT* object) yr_set_integer(S_ATTR_STRIP_STATIC_SYMS, object, "S_ATTR_STRIP_STATIC_SYMS"); yr_set_integer(S_ATTR_NO_DEAD_STRIP, object, "S_ATTR_NO_DEAD_STRIP"); yr_set_integer(S_ATTR_LIVE_SUPPORT, object, "S_ATTR_LIVE_SUPPORT"); - yr_set_integer(S_ATTR_SELF_MODIFYING_CODE, object, "S_ATTR_SELF_MODIFYING_CODE"); + yr_set_integer( + S_ATTR_SELF_MODIFYING_CODE, object, "S_ATTR_SELF_MODIFYING_CODE"); yr_set_integer(S_ATTR_DEBUG, object, "S_ATTR_DEBUG"); yr_set_integer(S_ATTR_SOME_INSTRUCTIONS, object, "S_ATTR_SOME_INSTRUCTIONS"); yr_set_integer(S_ATTR_EXT_RELOC, object, "S_ATTR_EXT_RELOC"); @@ -1035,9 +1062,12 @@ define_function(ep_for_arch_subtype) uint64_t entry_point = yr_get_integer(module, "file[%i].entry_point", i); uint64_t file_offset = yr_get_integer(module, "fat_arch[%i].offset", i); - if (entry_point == YR_UNDEFINED) { + if (entry_point == YR_UNDEFINED) + { return_integer(YR_UNDEFINED); - } else { + } + else + { return_integer(file_offset + entry_point); } } @@ -1350,7 +1380,7 @@ int module_load( foreach_memory_block(iterator, block) { - const uint8_t* block_data = block->fetch_data(block); + const uint8_t* block_data = yr_fetch_block_data(block); if (block_data == NULL || block->size < 4) continue; @@ -1358,14 +1388,16 @@ int module_load( // Parse Mach-O binary. if (is_macho_file_block((uint32_t*) block_data)) { - macho_parse_file(block_data, block->size, module_object, context); + macho_parse_file( + block_data, block->size, block->base, module_object, context); break; } // Parse fat Mach-O binary. if (is_fat_macho_file_block((uint32_t*) block_data)) { - macho_parse_fat_file(block_data, block->size, module_object, context); + macho_parse_fat_file( + block_data, block->size, block->base, module_object, context); break; } } diff --git a/src/libyara/modules/magic/magic.c b/src/libyara/modules/magic/magic.c index 5e435d8..124ed2a 100644 --- a/src/libyara/modules/magic/magic.c +++ b/src/libyara/modules/magic/magic.c @@ -101,14 +101,20 @@ define_function(magic_mime_type) if (cache->cached_mime_type == NULL) { block = first_memory_block(context); - block_data = block->fetch_data(block); + + if (block == NULL) + return_string(YR_UNDEFINED); + + block_data = yr_fetch_block_data(block); if (block_data != NULL) { magic_setflags(cache->magic_cookie, MAGIC_MIME_TYPE); - cache->cached_mime_type = yr_strdup( - magic_buffer(cache->magic_cookie, block_data, block->size)); + const char* type = magic_buffer( + cache->magic_cookie, block_data, block->size); + + cache->cached_mime_type = (type == NULL) ? NULL : yr_strdup(type); } } @@ -134,14 +140,20 @@ define_function(magic_type) if (cache->cached_type == NULL) { block = first_memory_block(context); - block_data = block->fetch_data(block); + + if (block == NULL) + return_string(YR_UNDEFINED); + + block_data = yr_fetch_block_data(block); if (block_data != NULL) { magic_setflags(cache->magic_cookie, 0); - cache->cached_type = yr_strdup( - magic_buffer(cache->magic_cookie, block_data, block->size)); + const char* type = magic_buffer( + cache->magic_cookie, block_data, block->size); + + cache->cached_type = (type == NULL) ? NULL : yr_strdup(type); } } diff --git a/src/libyara/modules/math/math.c b/src/libyara/modules/math/math.c index 98193c4..3cb9d49 100644 --- a/src/libyara/modules/math/math.c +++ b/src/libyara/modules/math/math.c @@ -27,8 +27,8 @@ ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -#include #include +#include #include #include #include @@ -50,21 +50,24 @@ static double log2(double n) } #endif -uint32_t* get_distribution(int64_t offset, int64_t length, YR_SCAN_CONTEXT* context) { +uint32_t* get_distribution( + int64_t offset, + int64_t length, + YR_SCAN_CONTEXT* context) +{ bool past_first_block = false; size_t i; uint32_t* data = (uint32_t*) yr_calloc(256, sizeof(uint32_t)); - if (data == NULL) { + if (data == NULL) return NULL; - } YR_MEMORY_BLOCK* block = first_memory_block(context); YR_MEMORY_BLOCK_ITERATOR* iterator = context->iterator; - if (offset < 0 || length < 0 || offset < block->base) + if (block == NULL || offset < 0 || length < 0 || offset < block->base) { yr_free(data); return NULL; @@ -74,11 +77,11 @@ uint32_t* get_distribution(int64_t offset, int64_t length, YR_SCAN_CONTEXT* cont { if (offset >= block->base && offset < block->base + block->size) { - size_t data_offset = (size_t)(offset - block->base); + size_t data_offset = (size_t) (offset - block->base); size_t data_len = (size_t) yr_min( - length, (size_t)(block->size - data_offset)); + length, (size_t) (block->size - data_offset)); - const uint8_t* block_data = block->fetch_data(block); + const uint8_t* block_data = yr_fetch_block_data(block); if (block_data == NULL) { @@ -109,7 +112,7 @@ uint32_t* get_distribution(int64_t offset, int64_t length, YR_SCAN_CONTEXT* cont return NULL; } - if (block->base + block->size > offset + length) + if (block->base + block->size >= offset + length) break; } @@ -121,8 +124,8 @@ uint32_t* get_distribution(int64_t offset, int64_t length, YR_SCAN_CONTEXT* cont return data; } -uint32_t* get_distribution_global(YR_SCAN_CONTEXT* context) { - +uint32_t* get_distribution_global(YR_SCAN_CONTEXT* context) +{ size_t i; int64_t expected_next_offset = 0; @@ -132,21 +135,20 @@ uint32_t* get_distribution_global(YR_SCAN_CONTEXT* context) { if (data == NULL) return NULL; - YR_MEMORY_BLOCK* block = first_memory_block(context); + YR_MEMORY_BLOCK* block; YR_MEMORY_BLOCK_ITERATOR* iterator = context->iterator; - foreach_memory_block(iterator, block) { if (expected_next_offset != block->base) { - // If offset is not directly after the current block then - // we are trying to compute the distribution over a range of non - // contiguous blocks. As the range contains gaps of + // If offset is not directly after the current block then + // we are trying to compute the distribution over a range of non + // contiguous blocks. As the range contains gaps of // undefined data the distribution is undefined. yr_free(data); return NULL; } - const uint8_t* block_data = block->fetch_data(block); + const uint8_t* block_data = yr_fetch_block_data(block); if (block_data == NULL) { @@ -156,8 +158,8 @@ uint32_t* get_distribution_global(YR_SCAN_CONTEXT* context) { for (i = 0; i < block->size; i++) { - uint8_t c = *(block_data + i); - data[c] += 1; + uint8_t c = *(block_data + i); + data[c] += 1; } expected_next_offset = block->base + block->size; } @@ -209,6 +211,7 @@ define_function(data_entropy) size_t total_len = 0; uint32_t* data = get_distribution(offset, length, context); + if (data == NULL) return_float(YR_UNDEFINED); @@ -258,6 +261,7 @@ define_function(data_deviation) YR_SCAN_CONTEXT* context = yr_scan_context(); uint32_t* data = get_distribution(offset, length, context); + if (data == NULL) return_float(YR_UNDEFINED); @@ -296,6 +300,7 @@ define_function(data_mean) size_t i; uint32_t* data = get_distribution(offset, length, context); + if (data == NULL) return_float(YR_UNDEFINED); @@ -323,6 +328,9 @@ define_function(data_serial_correlation) YR_MEMORY_BLOCK* block = first_memory_block(context); YR_MEMORY_BLOCK_ITERATOR* iterator = context->iterator; + if (block == NULL) + return_float(YR_UNDEFINED); + double sccun = 0; double sccfirst = 0; double scclast = 0; @@ -338,11 +346,11 @@ define_function(data_serial_correlation) { if (offset >= block->base && offset < block->base + block->size) { - size_t data_offset = (size_t)(offset - block->base); + size_t data_offset = (size_t) (offset - block->base); size_t data_len = (size_t) yr_min( - length, (size_t)(block->size - data_offset)); + length, (size_t) (block->size - data_offset)); - const uint8_t* block_data = block->fetch_data(block); + const uint8_t* block_data = yr_fetch_block_data(block); if (block_data == NULL) return_float(YR_UNDEFINED); @@ -354,8 +362,9 @@ define_function(data_serial_correlation) for (i = 0; i < data_len; i++) { sccun = (double) *(block_data + data_offset + i); - if (i == 0) { - sccfirst = sccun; + if (i == 0) + { + sccfirst = sccun; } scct1 += scclast * sccun; scct2 += sccun; @@ -375,7 +384,7 @@ define_function(data_serial_correlation) return_float(YR_UNDEFINED); } - if (block->base + block->size > offset + length) + if (block->base + block->size >= offset + length) break; } @@ -417,8 +426,9 @@ define_function(string_serial_correlation) scclast = sccun; } - if (s->length > 0) { - scct1 += scclast * (double) s->c_string[0]; + if (s->length > 0) + { + scct1 += scclast * (double) s->c_string[0]; } scct2 *= scct2; @@ -450,6 +460,9 @@ define_function(data_monte_carlo_pi) YR_MEMORY_BLOCK* block = first_memory_block(context); YR_MEMORY_BLOCK_ITERATOR* iterator = context->iterator; + if (block == NULL) + return_float(YR_UNDEFINED); + if (offset < 0 || length < 0 || offset < block->base) return_float(YR_UNDEFINED); @@ -459,11 +472,11 @@ define_function(data_monte_carlo_pi) { unsigned int monte[6]; - size_t data_offset = (size_t)(offset - block->base); + size_t data_offset = (size_t) (offset - block->base); size_t data_len = (size_t) yr_min( - length, (size_t)(block->size - data_offset)); + length, (size_t) (block->size - data_offset)); - const uint8_t* block_data = block->fetch_data(block); + const uint8_t* block_data = yr_fetch_block_data(block); if (block_data == NULL) return_float(YR_UNDEFINED); @@ -506,7 +519,7 @@ define_function(data_monte_carlo_pi) return_float(YR_UNDEFINED); } - if (block->base + block->size > offset + length) + if (block->base + block->size >= offset + length) break; } @@ -605,17 +618,20 @@ define_function(yr_math_abs) define_function(count_range) { - uint8_t byte = (uint8_t) integer_argument(1); + int64_t byte = integer_argument(1); int64_t offset = integer_argument(2); int64_t length = integer_argument(3); + if (byte < 0 || byte > 255) + return_integer(YR_UNDEFINED); + YR_SCAN_CONTEXT* context = yr_scan_context(); uint32_t* distribution = get_distribution(offset, length, context); + if (distribution == NULL) - { return_integer(YR_UNDEFINED); - } + int64_t count = (int64_t) distribution[byte]; yr_free(distribution); return_integer(count); @@ -623,15 +639,18 @@ define_function(count_range) define_function(count_global) { - uint8_t byte = (uint8_t) integer_argument(1); + int64_t byte = integer_argument(1); + + if (byte < 0 || byte > 255) + return_integer(YR_UNDEFINED); YR_SCAN_CONTEXT* context = yr_scan_context(); uint32_t* distribution = get_distribution_global(context); + if (distribution == NULL) - { return_integer(YR_UNDEFINED); - } + int64_t count = (int64_t) distribution[byte]; yr_free(distribution); return_integer(count); @@ -639,42 +658,50 @@ define_function(count_global) define_function(percentage_range) { - uint8_t byte = (uint8_t) integer_argument(1); + int64_t byte = integer_argument(1); int64_t offset = integer_argument(2); int64_t length = integer_argument(3); + if (byte < 0 || byte > 255) + return_float(YR_UNDEFINED); + YR_SCAN_CONTEXT* context = yr_scan_context(); uint32_t* distribution = get_distribution(offset, length, context); - if (distribution == NULL) { + + if (distribution == NULL) return_float(YR_UNDEFINED); - } + int64_t count = (int64_t) distribution[byte]; int64_t total_count = 0; int64_t i; - for (i = 0; i < 256; i++) { - total_count += distribution[i]; - } + + for (i = 0; i < 256; i++) total_count += distribution[i]; + yr_free(distribution); return_float(((float) count) / ((float) total_count)); } define_function(percentage_global) { - uint8_t byte = (uint8_t) integer_argument(1); + int64_t byte = integer_argument(1); + + if (byte < 0 || byte > 255) + return_float(YR_UNDEFINED); YR_SCAN_CONTEXT* context = yr_scan_context(); uint32_t* distribution = get_distribution_global(context); - if (distribution == NULL) { + + if (distribution == NULL) return_float(YR_UNDEFINED); - } + int64_t count = (int64_t) distribution[byte]; int64_t total_count = 0; int64_t i; - for (i = 0; i < 256; i++) { - total_count += distribution[i]; - } + + for (i = 0; i < 256; i++) total_count += distribution[i]; + yr_free(distribution); return_float(((float) count) / ((float) total_count)); } @@ -687,19 +714,19 @@ define_function(mode_range) YR_SCAN_CONTEXT* context = yr_scan_context(); uint32_t* distribution = get_distribution(offset, length, context); - if (distribution == NULL) { + + if (distribution == NULL) return_integer(YR_UNDEFINED); - } int64_t most_common = 0; size_t i; + for (i = 0; i < 256; i++) { if (distribution[i] > distribution[most_common]) - { most_common = (int64_t) i; - } } + yr_free(distribution); return_integer(most_common); } @@ -709,19 +736,19 @@ define_function(mode_global) YR_SCAN_CONTEXT* context = yr_scan_context(); uint32_t* distribution = get_distribution_global(context); - if (distribution == NULL) { + + if (distribution == NULL) return_integer(YR_UNDEFINED); - } int64_t most_common = 0; size_t i; + for (i = 0; i < 256; i++) { if (distribution[i] > distribution[most_common]) - { most_common = (int64_t) i; - } } + yr_free(distribution); return_integer(most_common); } @@ -739,7 +766,7 @@ define_function(to_string_base) int64_t i = integer_argument(1); int64_t base = integer_argument(2); char str[INT64_MAX_STRING]; - char *fmt; + char* fmt; switch (base) { case 10: diff --git a/src/libyara/modules/pe/authenticode-parser/authenticode.c b/src/libyara/modules/pe/authenticode-parser/authenticode.c index a6a92b5..892bfda 100644 --- a/src/libyara/modules/pe/authenticode-parser/authenticode.c +++ b/src/libyara/modules/pe/authenticode-parser/authenticode.c @@ -22,6 +22,7 @@ SOFTWARE. #include #include #include +#include #include #include #include @@ -43,443 +44,394 @@ SOFTWARE. /* Moves signatures from src to dst, returns 0 on success, * else 1. If error occurs, arguments are unchanged */ -static int authenticode_array_move( - AuthenticodeArray* dst, - AuthenticodeArray* src) +static int authenticode_array_move(AuthenticodeArray* dst, AuthenticodeArray* src) { - size_t newCount = dst->count + src->count; + size_t newCount = dst->count + src->count; - Authenticode** tmp = (Authenticode**) realloc( - dst->signatures, newCount * sizeof(Authenticode*)); - if (!tmp) - return 1; + Authenticode** tmp = (Authenticode**)realloc(dst->signatures, newCount * sizeof(Authenticode*)); + if (!tmp) + return 1; - dst->signatures = tmp; + dst->signatures = tmp; - for (size_t i = 0; i < src->count; ++i) - dst->signatures[i + dst->count] = src->signatures[i]; + for (size_t i = 0; i < src->count; ++i) + dst->signatures[i + dst->count] = src->signatures[i]; - dst->count = newCount; + dst->count = newCount; - free(src->signatures); - src->signatures = NULL; - src->count = 0; + free(src->signatures); + src->signatures = NULL; + src->count = 0; - return 0; + return 0; } static SpcIndirectDataContent* get_content(PKCS7* content) { - if (!content) - return NULL; + if (!content) + return NULL; - if (OBJ_obj2nid(content->type) != OBJ_txt2nid(NID_spc_indirect_data)) - return NULL; + if (OBJ_obj2nid(content->type) != OBJ_txt2nid(NID_spc_indirect_data)) + return NULL; - SpcIndirectDataContent* spcContent = SpcIndirectDataContent_new(); - if (!spcContent) - return NULL; + SpcIndirectDataContent* spcContent = SpcIndirectDataContent_new(); + if (!spcContent) + return NULL; - int len = content->d.other->value.sequence->length; - const uint8_t* data = content->d.other->value.sequence->data; + int len = content->d.other->value.sequence->length; + const uint8_t* data = content->d.other->value.sequence->data; - d2i_SpcIndirectDataContent(&spcContent, &data, len); + d2i_SpcIndirectDataContent(&spcContent, &data, len); - return spcContent; + return spcContent; } static char* parse_program_name(ASN1_TYPE* spcAttr) { - const uint8_t* spcData = spcAttr->value.sequence->data; - int spcLen = spcAttr->value.sequence->length; - SpcSpOpusInfo* spcInfo = d2i_SpcSpOpusInfo(NULL, &spcData, spcLen); - if (!spcInfo) - return NULL; - - char* result = NULL; - - if (spcInfo->programName) - { - uint8_t* data = NULL; - /* Should be Windows UTF16..., try to convert it to UTF8 */ - int nameLen = ASN1_STRING_to_UTF8( - &data, spcInfo->programName->value.unicode); - if (nameLen >= 0 && nameLen < spcLen) - { - result = (char*) malloc(nameLen + 1); - if (result) - { - memcpy(result, data, nameLen); - result[nameLen] = 0; - } - OPENSSL_free(data); + const uint8_t* spcData = spcAttr->value.sequence->data; + int spcLen = spcAttr->value.sequence->length; + SpcSpOpusInfo* spcInfo = d2i_SpcSpOpusInfo(NULL, &spcData, spcLen); + if (!spcInfo) + return NULL; + + char* result = NULL; + + if (spcInfo->programName) { + uint8_t* data = NULL; + /* Should be Windows UTF16..., try to convert it to UTF8 */ + int nameLen = ASN1_STRING_to_UTF8(&data, spcInfo->programName->value.unicode); + if (nameLen >= 0 && nameLen < spcLen) { + result = (char*)malloc(nameLen + 1); + if (result) { + memcpy(result, data, nameLen); + result[nameLen] = 0; + } + OPENSSL_free(data); + } } - } - SpcSpOpusInfo_free(spcInfo); - return result; + SpcSpOpusInfo_free(spcInfo); + return result; } -/* Parses X509* certs into internal representation and inserts into - * CertificateArray Array is assumed to have enough space to hold all - * certificates storted in the STACK */ -static void parse_certificates( - const STACK_OF(X509) * certs, - CertificateArray* result) +/* Parses X509* certs into internal representation and inserts into CertificateArray + * Array is assumed to have enough space to hold all certificates storted in the STACK */ +static void parse_certificates(const STACK_OF(X509) * certs, CertificateArray* result) { - int certCount = sk_X509_num(certs); - int i = 0; - for (; i < certCount; ++i) - { - Certificate* cert = certificate_new(sk_X509_value(certs, i)); - if (!cert) - break; - - /* Write to the result */ - result->certs[i] = cert; - } - result->count = i; + int certCount = sk_X509_num(certs); + int i = 0; + for (; i < certCount; ++i) { + Certificate* cert = certificate_new(sk_X509_value(certs, i)); + if (!cert) + break; + + /* Write to the result */ + result->certs[i] = cert; + } + result->count = i; } -static void parse_nested_authenticode( - PKCS7_SIGNER_INFO* si, - AuthenticodeArray* result) +static void parse_nested_authenticode(PKCS7_SIGNER_INFO* si, AuthenticodeArray* result) { - STACK_OF(X509_ATTRIBUTE)* attrs = PKCS7_get_attributes(si); - int idx = X509at_get_attr_by_NID( - attrs, OBJ_txt2nid(NID_spc_nested_signature), -1); - X509_ATTRIBUTE* attr = X509at_get_attr(attrs, idx); - - int attrCount = X509_ATTRIBUTE_count(attr); - if (!attrCount) - return; - - /* Limit the maximum amount of nested attributes to be safe from malformed - * samples */ - attrCount = attrCount > MAX_NESTED_COUNT ? MAX_NESTED_COUNT : attrCount; - - for (int i = 0; i < attrCount; ++i) - { - ASN1_TYPE* nested = X509_ATTRIBUTE_get0_type(attr, i); - if (nested == NULL) - break; - int len = nested->value.sequence->length; - const uint8_t* data = nested->value.sequence->data; - AuthenticodeArray* auth = authenticode_new(data, len); - if (!auth) - continue; - - authenticode_array_move(result, auth); - authenticode_array_free(auth); - } + STACK_OF(X509_ATTRIBUTE)* attrs = PKCS7_get_attributes(si); + int idx = X509at_get_attr_by_NID(attrs, OBJ_txt2nid(NID_spc_nested_signature), -1); + X509_ATTRIBUTE* attr = X509at_get_attr(attrs, idx); + + int attrCount = X509_ATTRIBUTE_count(attr); + if (!attrCount) + return; + + /* Limit the maximum amount of nested attributes to be safe from malformed samples */ + attrCount = attrCount > MAX_NESTED_COUNT ? MAX_NESTED_COUNT : attrCount; + + for (int i = 0; i < attrCount; ++i) { + ASN1_TYPE* nested = X509_ATTRIBUTE_get0_type(attr, i); + if (nested == NULL) + break; + int len = nested->value.sequence->length; + const uint8_t* data = nested->value.sequence->data; + AuthenticodeArray* auth = authenticode_new(data, len); + if (!auth) + continue; + + authenticode_array_move(result, auth); + authenticode_array_free(auth); + } } static void parse_pkcs9_countersig(PKCS7* p7, Authenticode* auth) { - PKCS7_SIGNER_INFO* si = sk_PKCS7_SIGNER_INFO_value( - PKCS7_get_signer_info(p7), 0); + PKCS7_SIGNER_INFO* si = sk_PKCS7_SIGNER_INFO_value(PKCS7_get_signer_info(p7), 0); - STACK_OF(X509_ATTRIBUTE)* attrs = PKCS7_get_attributes(si); + STACK_OF(X509_ATTRIBUTE)* attrs = PKCS7_get_attributes(si); - int idx = X509at_get_attr_by_NID(attrs, NID_pkcs9_countersignature, -1); - X509_ATTRIBUTE* attr = X509at_get_attr(attrs, idx); + int idx = X509at_get_attr_by_NID(attrs, NID_pkcs9_countersignature, -1); + X509_ATTRIBUTE* attr = X509at_get_attr(attrs, idx); - int attrCount = X509_ATTRIBUTE_count(attr); - if (!attrCount) - return; + int attrCount = X509_ATTRIBUTE_count(attr); + if (!attrCount) + return; - /* Limit the maximum amount of nested attributes to be safe from malformed - * samples */ - attrCount = attrCount > MAX_NESTED_COUNT ? MAX_NESTED_COUNT : attrCount; + /* Limit the maximum amount of nested attributes to be safe from malformed samples */ + attrCount = attrCount > MAX_NESTED_COUNT ? MAX_NESTED_COUNT : attrCount; - for (int i = 0; i < attrCount; ++i) - { - ASN1_TYPE* nested = X509_ATTRIBUTE_get0_type(attr, i); - if (nested == NULL) - break; - int len = nested->value.sequence->length; - const uint8_t* data = nested->value.sequence->data; + for (int i = 0; i < attrCount; ++i) { + ASN1_TYPE* nested = X509_ATTRIBUTE_get0_type(attr, i); + if (nested == NULL) + break; + int len = nested->value.sequence->length; + const uint8_t* data = nested->value.sequence->data; - Countersignature* sig = pkcs9_countersig_new( - data, len, p7->d.sign->cert, si->enc_digest); - if (!sig) - continue; + Countersignature* sig = pkcs9_countersig_new(data, len, p7->d.sign->cert, si->enc_digest); + if (!sig) + continue; - countersignature_array_insert(auth->countersigs, sig); - } + countersignature_array_insert(auth->countersigs, sig); + } } -/* Extracts X509 certificates from MS countersignature and stores them into - * result */ -static void extract_ms_counter_certs( - const uint8_t* data, - int len, - CertificateArray* result) +/* Extracts X509 certificates from MS countersignature and stores them into result */ +static void extract_ms_counter_certs(const uint8_t* data, int len, CertificateArray* result) { - PKCS7* p7 = d2i_PKCS7(NULL, &data, len); - if (!p7) - return; - - STACK_OF(X509)* certs = p7->d.sign->cert; - CertificateArray* certArr = certificate_array_new(sk_X509_num(certs)); - if (!certArr) - { - PKCS7_free(p7); - return; - } - parse_certificates(certs, certArr); - certificate_array_move(result, certArr); - certificate_array_free(certArr); + PKCS7* p7 = d2i_PKCS7(NULL, &data, len); + if (!p7) + return; + + /* We expect SignedData type of PKCS7 */ + if (!PKCS7_type_is_signed(p7) || !p7->d.sign) { + PKCS7_free(p7); + return; + } + + STACK_OF(X509)* certs = p7->d.sign->cert; + CertificateArray* certArr = certificate_array_new(sk_X509_num(certs)); + if (!certArr) { + PKCS7_free(p7); + return; + } + parse_certificates(certs, certArr); + certificate_array_move(result, certArr); + certificate_array_free(certArr); - PKCS7_free(p7); + PKCS7_free(p7); } static void parse_ms_countersig(PKCS7* p7, Authenticode* auth) { - PKCS7_SIGNER_INFO* si = sk_PKCS7_SIGNER_INFO_value( - PKCS7_get_signer_info(p7), 0); - - STACK_OF(X509_ATTRIBUTE)* attrs = PKCS7_get_attributes(si); - - int idx = X509at_get_attr_by_NID( - attrs, OBJ_txt2nid(NID_spc_ms_countersignature), -1); - X509_ATTRIBUTE* attr = X509at_get_attr(attrs, idx); - - int attrCount = X509_ATTRIBUTE_count(attr); - if (!attrCount) - return; - - /* Limit the maximum amount of nested attributes to be safe from malformed - * samples */ - attrCount = attrCount > MAX_NESTED_COUNT ? MAX_NESTED_COUNT : attrCount; - - for (int i = 0; i < attrCount; ++i) - { - ASN1_TYPE* nested = X509_ATTRIBUTE_get0_type(attr, i); - if (nested == NULL) - break; - int len = nested->value.sequence->length; - const uint8_t* data = nested->value.sequence->data; - - Countersignature* sig = ms_countersig_new(data, len, si->enc_digest); - if (!sig) - return; - - /* Because MS TimeStamp countersignature has it's own SET of certificates - * extract it back into parent signature for consistency with PKCS9 */ - countersignature_array_insert(auth->countersigs, sig); - extract_ms_counter_certs(data, len, auth->certs); - } + PKCS7_SIGNER_INFO* si = sk_PKCS7_SIGNER_INFO_value(PKCS7_get_signer_info(p7), 0); + + STACK_OF(X509_ATTRIBUTE)* attrs = PKCS7_get_attributes(si); + + int idx = X509at_get_attr_by_NID(attrs, OBJ_txt2nid(NID_spc_ms_countersignature), -1); + X509_ATTRIBUTE* attr = X509at_get_attr(attrs, idx); + + int attrCount = X509_ATTRIBUTE_count(attr); + if (!attrCount) + return; + + /* Limit the maximum amount of nested attributes to be safe from malformed samples */ + attrCount = attrCount > MAX_NESTED_COUNT ? MAX_NESTED_COUNT : attrCount; + + for (int i = 0; i < attrCount; ++i) { + ASN1_TYPE* nested = X509_ATTRIBUTE_get0_type(attr, i); + if (nested == NULL) + break; + int len = nested->value.sequence->length; + const uint8_t* data = nested->value.sequence->data; + + Countersignature* sig = ms_countersig_new(data, len, si->enc_digest); + if (!sig) + return; + + /* Because MS TimeStamp countersignature has it's own SET of certificates + * extract it back into parent signature for consistency with PKCS9 */ + countersignature_array_insert(auth->countersigs, sig); + extract_ms_counter_certs(data, len, auth->certs); + } } -static bool authenticode_verify( - PKCS7* p7, - PKCS7_SIGNER_INFO* si, - X509* signCert) +static bool authenticode_verify(PKCS7* p7, PKCS7_SIGNER_INFO* si, X509* signCert) { - const uint8_t* contentData = - p7->d.sign->contents->d.other->value.sequence->data; - long contentLen = p7->d.sign->contents->d.other->value.sequence->length; - - uint64_t version = 0; - ASN1_INTEGER_get_uint64(&version, p7->d.sign->version); - if (version == 1) - { - /* Move the pointer to the actual contents - skip OID and length */ - int pclass = 0, ptag = 0; - ASN1_get_object(&contentData, &contentLen, &ptag, &pclass, contentLen); - } + const uint8_t* contentData = p7->d.sign->contents->d.other->value.sequence->data; + long contentLen = p7->d.sign->contents->d.other->value.sequence->length; + + uint64_t version = 0; + ASN1_INTEGER_get_uint64(&version, p7->d.sign->version); + if (version == 1) { + /* Move the pointer to the actual contents - skip OID and length */ + int pclass = 0, ptag = 0; + ASN1_get_object(&contentData, &contentLen, &ptag, &pclass, contentLen); + } - BIO* contentBio = BIO_new_mem_buf(contentData, contentLen); - /* Create `digest` type BIO to calculate content digest for verification */ - BIO* p7bio = PKCS7_dataInit(p7, contentBio); + BIO* contentBio = BIO_new_mem_buf(contentData, contentLen); + /* Create `digest` type BIO to calculate content digest for verification */ + BIO* p7bio = PKCS7_dataInit(p7, contentBio); - char buf[4096]; - /* We now have to 'read' from p7bio to calculate content digest */ - while (BIO_read(p7bio, buf, sizeof(buf)) > 0) continue; + char buf[4096]; + /* We now have to 'read' from p7bio to calculate content digest */ + while (BIO_read(p7bio, buf, sizeof(buf)) > 0) + continue; - /* Pass it to the PKCS7_signatureVerify, to do the hard work for us */ - bool isValid = PKCS7_signatureVerify(p7bio, p7, si, signCert) == 1; + /* Pass it to the PKCS7_signatureVerify, to do the hard work for us */ + bool isValid = PKCS7_signatureVerify(p7bio, p7, si, signCert) == 1; - BIO_free_all(p7bio); + BIO_free_all(p7bio); - return isValid; + return isValid; } -/* Creates all the Authenticode objects so we can parse them with OpenSSL, is - * not thread-safe, needs to be called once before any multi-threading - * environmentt - https://github.com/openssl/openssl/issues/13524 */ +/* Creates all the Authenticode objects so we can parse them with OpenSSL, is not thread-safe, needs + * to be called once before any multi-threading environmentt - + * https://github.com/openssl/openssl/issues/13524 */ void initialize_authenticode_parser() { - OBJ_create( - "1.3.6.1.4.1.311.2.1.12", "spcSpOpusInfo", "SPC_SP_OPUS_INFO_OBJID"); - OBJ_create( - "1.3.6.1.4.1.311.3.3.1", - "spcMsCountersignature", - "SPC_MICROSOFT_COUNTERSIGNATURE"); - OBJ_create( - "1.3.6.1.4.1.311.2.4.1", "spcNestedSignature", "SPC_NESTED_SIGNATUREs"); - OBJ_create("1.3.6.1.4.1.311.2.1.4", "spcIndirectData", "SPC_INDIRECT_DATA"); + OBJ_create("1.3.6.1.4.1.311.2.1.12", "spcSpOpusInfo", "SPC_SP_OPUS_INFO_OBJID"); + OBJ_create("1.3.6.1.4.1.311.3.3.1", "spcMsCountersignature", "SPC_MICROSOFT_COUNTERSIGNATURE"); + OBJ_create("1.3.6.1.4.1.311.2.4.1", "spcNestedSignature", "SPC_NESTED_SIGNATUREs"); + OBJ_create("1.3.6.1.4.1.311.2.1.4", "spcIndirectData", "SPC_INDIRECT_DATA"); } -/* Return array of Authenticode signatures stored in the data, there can be - * multiple of signatures as Authenticode signatures are often nested through - * unauth attributes */ -AuthenticodeArray* authenticode_new(const uint8_t* data, long len) +/* Return array of Authenticode signatures stored in the data, there can be multiple + * of signatures as Authenticode signatures are often nested through unauth attributes */ +AuthenticodeArray* authenticode_new(const uint8_t* data, int32_t len) { - if (!data || len == 0) - return NULL; - - AuthenticodeArray* result = (AuthenticodeArray*) calloc(1, sizeof(*result)); - if (!result) - return NULL; - - result->signatures = (Authenticode**) malloc(sizeof(Authenticode*)); - if (!result->signatures) - { - free(result); - return NULL; - } - - Authenticode* auth = (Authenticode*) calloc(1, sizeof(*auth)); - if (!auth) - { - free(result->signatures); - free(result); - return NULL; - } - - result->count = 1; - result->signatures[0] = auth; - - /* Let openssl parse the PKCS7 structure */ - PKCS7* p7 = d2i_PKCS7(NULL, &data, len); - if (!p7) - { - auth->verify_flags = AUTHENTICODE_VFY_CANT_PARSE; - goto end; - } - - /* We expect SignedData type of PKCS7 */ - if (!PKCS7_type_is_signed(p7)) - { - auth->verify_flags = AUTHENTICODE_VFY_WRONG_PKCS7_TYPE; - goto end; - } - - PKCS7_SIGNED* p7data = p7->d.sign; - - uint64_t version = 0; - if (ASN1_INTEGER_get_uint64(&version, p7data->version)) - auth->version = version; - - STACK_OF(X509)* certs = p7data->cert; - - auth->certs = certificate_array_new(sk_X509_num(certs)); - if (!auth->certs) - { - auth->verify_flags = AUTHENTICODE_VFY_INTERNAL_ERROR; - goto end; - } - parse_certificates(certs, auth->certs); - - /* Get Signature content that contains the message digest and it's algorithm - */ - SpcIndirectDataContent* dataContent = get_content(p7data->contents); - if (!dataContent) - { - auth->verify_flags = AUTHENTICODE_VFY_BAD_CONTENT; - goto end; - } - - DigestInfo* messageDigest = dataContent->messageDigest; - - int digestnid = OBJ_obj2nid(messageDigest->digestAlgorithm->algorithm); - auth->digest_alg = strdup(OBJ_nid2ln(digestnid)); - - int digestLen = messageDigest->digest->length; - const uint8_t* digestData = messageDigest->digest->data; - byte_array_init(&auth->digest, digestData, digestLen); - - SpcIndirectDataContent_free(dataContent); - - Signer* signer = (Signer*) calloc(1, sizeof(Signer)); - if (!signer) - { - auth->verify_flags = AUTHENTICODE_VFY_INTERNAL_ERROR; - goto end; - } - auth->signer = signer; - - /* Authenticode is supposed to have only one SignerInfo value - * that contains all information for actual signing purposes - * and nested signatures or countersignatures */ - PKCS7_SIGNER_INFO* si = sk_PKCS7_SIGNER_INFO_value( - PKCS7_get_signer_info(p7), 0); - if (!si) - { - auth->verify_flags = AUTHENTICODE_VFY_NO_SIGNER_INFO; - goto end; - } - - auth->countersigs = (CountersignatureArray*) calloc( - 1, sizeof(CountersignatureArray)); - if (!auth->countersigs) - { - auth->verify_flags = AUTHENTICODE_VFY_INTERNAL_ERROR; - goto end; - } - /* Authenticode can contain SET of nested Authenticode signatures - * and countersignatures in unauthenticated attributes */ - parse_nested_authenticode(si, result); - parse_pkcs9_countersig(p7, auth); - parse_ms_countersig(p7, auth); - - /* Get the signing certificate for the first SignerInfo */ - STACK_OF(X509)* signCertStack = PKCS7_get0_signers(p7, certs, 0); - - X509* signCert = sk_X509_value(signCertStack, 0); - if (!signCert) - { - auth->verify_flags = AUTHENTICODE_VFY_NO_SIGNER_CERT; - sk_X509_free(signCertStack); - goto end; - } + if (!data || len <= 0) + return NULL; + + AuthenticodeArray* result = (AuthenticodeArray*)calloc(1, sizeof(*result)); + if (!result) + return NULL; + + result->signatures = (Authenticode**)malloc(sizeof(Authenticode*)); + if (!result->signatures) { + free(result); + return NULL; + } + + Authenticode* auth = (Authenticode*)calloc(1, sizeof(*auth)); + if (!auth) { + free(result->signatures); + free(result); + return NULL; + } + + result->count = 1; + result->signatures[0] = auth; + + /* Let openssl parse the PKCS7 structure */ + PKCS7* p7 = d2i_PKCS7(NULL, &data, len); + if (!p7) { + auth->verify_flags = AUTHENTICODE_VFY_CANT_PARSE; + goto end; + } + + /* We expect SignedData type of PKCS7 */ + if (!PKCS7_type_is_signed(p7) || !p7->d.sign) { + auth->verify_flags = AUTHENTICODE_VFY_WRONG_PKCS7_TYPE; + goto end; + } + + PKCS7_SIGNED* p7data = p7->d.sign; + + uint64_t version = 0; + if (ASN1_INTEGER_get_uint64(&version, p7data->version)) + auth->version = version; + + STACK_OF(X509)* certs = p7data->cert; + + auth->certs = certificate_array_new(sk_X509_num(certs)); + if (!auth->certs) { + auth->verify_flags = AUTHENTICODE_VFY_INTERNAL_ERROR; + goto end; + } + parse_certificates(certs, auth->certs); + + /* Get Signature content that contains the message digest and it's algorithm */ + SpcIndirectDataContent* dataContent = get_content(p7data->contents); + if (!dataContent) { + auth->verify_flags = AUTHENTICODE_VFY_BAD_CONTENT; + goto end; + } + + DigestInfo* messageDigest = dataContent->messageDigest; - sk_X509_free(signCertStack); + int digestnid = OBJ_obj2nid(messageDigest->digestAlgorithm->algorithm); + auth->digest_alg = strdup(OBJ_nid2ln(digestnid)); - signer->chain = parse_signer_chain(signCert, certs); + int digestLen = messageDigest->digest->length; + const uint8_t* digestData = messageDigest->digest->data; + byte_array_init(&auth->digest, digestData, digestLen); - /* Get the Signers digest of Authenticode content */ - ASN1_TYPE* digest = PKCS7_get_signed_attribute(si, NID_pkcs9_messageDigest); - if (!digest) - { - auth->verify_flags = AUTHENTICODE_VFY_DIGEST_MISSING; - goto end; - } + SpcIndirectDataContent_free(dataContent); - digestnid = OBJ_obj2nid(si->digest_alg->algorithm); - signer->digest_alg = strdup(OBJ_nid2ln(digestnid)); + Signer* signer = (Signer*)calloc(1, sizeof(Signer)); + if (!signer) { + auth->verify_flags = AUTHENTICODE_VFY_INTERNAL_ERROR; + goto end; + } + auth->signer = signer; + + /* Authenticode is supposed to have only one SignerInfo value + * that contains all information for actual signing purposes + * and nested signatures or countersignatures */ + PKCS7_SIGNER_INFO* si = sk_PKCS7_SIGNER_INFO_value(PKCS7_get_signer_info(p7), 0); + if (!si) { + auth->verify_flags = AUTHENTICODE_VFY_NO_SIGNER_INFO; + goto end; + } + + auth->countersigs = (CountersignatureArray*)calloc(1, sizeof(CountersignatureArray)); + if (!auth->countersigs) { + auth->verify_flags = AUTHENTICODE_VFY_INTERNAL_ERROR; + goto end; + } + /* Authenticode can contain SET of nested Authenticode signatures + * and countersignatures in unauthenticated attributes */ + parse_nested_authenticode(si, result); + parse_pkcs9_countersig(p7, auth); + parse_ms_countersig(p7, auth); + + /* Get the signing certificate for the first SignerInfo */ + STACK_OF(X509)* signCertStack = PKCS7_get0_signers(p7, certs, 0); + + X509* signCert = sk_X509_value(signCertStack, 0); + if (!signCert) { + auth->verify_flags = AUTHENTICODE_VFY_NO_SIGNER_CERT; + sk_X509_free(signCertStack); + goto end; + } + + sk_X509_free(signCertStack); + + signer->chain = parse_signer_chain(signCert, certs); + + /* Get the Signers digest of Authenticode content */ + ASN1_TYPE* digest = PKCS7_get_signed_attribute(si, NID_pkcs9_messageDigest); + if (!digest) { + auth->verify_flags = AUTHENTICODE_VFY_DIGEST_MISSING; + goto end; + } - digestLen = digest->value.asn1_string->length; - digestData = digest->value.asn1_string->data; - byte_array_init(&signer->digest, digestData, digestLen); + digestnid = OBJ_obj2nid(si->digest_alg->algorithm); + signer->digest_alg = strdup(OBJ_nid2ln(digestnid)); - /* Authenticode stores optional programName in non-optional SpcSpOpusInfo - * attribute */ - ASN1_TYPE* spcInfo = PKCS7_get_signed_attribute( - si, OBJ_txt2nid(NID_spc_info)); - if (spcInfo) - signer->program_name = parse_program_name(spcInfo); + digestLen = digest->value.asn1_string->length; + digestData = digest->value.asn1_string->data; + byte_array_init(&signer->digest, digestData, digestLen); - /* If we got to this point, we got all we need to start verifying */ - bool isValid = authenticode_verify(p7, si, signCert); - if (!isValid) - auth->verify_flags = AUTHENTICODE_VFY_INVALID; + /* Authenticode stores optional programName in non-optional SpcSpOpusInfo attribute */ + ASN1_TYPE* spcInfo = PKCS7_get_signed_attribute(si, OBJ_txt2nid(NID_spc_info)); + if (spcInfo) + signer->program_name = parse_program_name(spcInfo); + + /* If we got to this point, we got all we need to start verifying */ + bool isValid = authenticode_verify(p7, si, signCert); + if (!isValid) + auth->verify_flags = AUTHENTICODE_VFY_INVALID; end: - PKCS7_free(p7); - return result; + PKCS7_free(p7); + return result; } static int authenticode_digest( @@ -490,227 +442,216 @@ static int authenticode_digest( uint32_t cert_table_addr, uint8_t* digest) { - uint32_t buffer_size = 0xFFFF; - uint8_t* buffer = (uint8_t*) malloc(buffer_size); - - /* BIO with the file data */ - BIO* bio = BIO_new_mem_buf(pe_data, cert_table_addr); - - EVP_MD_CTX* mdctx = EVP_MD_CTX_new(); - if (!buffer || !bio || !mdctx) - goto error; - - if (!EVP_DigestInit(mdctx, md)) - goto error; - - /* Calculate size of the space between file start and PE header */ - /* Checksum starts at 0x58th byte of the header */ - uint32_t pe_checksum_offset = pe_hdr_offset + 0x58; - /* Space between DOS and PE header could have arbitrary amount of data, read - * in chunks */ - uint32_t fpos = 0; - while (fpos < pe_checksum_offset) - { - uint32_t len_to_read = pe_checksum_offset - fpos; - if (len_to_read > buffer_size) - len_to_read = buffer_size; - - int rlen = BIO_read(bio, buffer, len_to_read); - if (rlen <= 0) - goto error; - - if (!EVP_DigestUpdate(mdctx, buffer, rlen)) - goto error; - - fpos += rlen; - } - - /* Skip the checksum */ - if (BIO_read(bio, buffer, 4) <= 0) - goto error; - - /* 64bit PE file is larger than 32bit */ - uint32_t pe64_extra = is_64bit ? 16 : 0; - - /* Read up to certificate table*/ - uint32_t cert_table_offset = 0x3c + pe64_extra; - - if (BIO_read(bio, buffer, cert_table_offset) <= 0) - goto error; - - if (!EVP_DigestUpdate(mdctx, buffer, cert_table_offset)) - goto error; - - /* Skip certificate table */ - if (BIO_read(bio, buffer, 8) <= 0) - goto error; - - /* PE header with check sum + checksum + cert table offset + cert table len */ - fpos = pe_checksum_offset + 4 + cert_table_offset + 8; - - /* Hash everything up to the signature (assuming signature is stored in the - * end of the file) */ - /* Read chunks of the file in case the file is large */ - while (fpos < cert_table_addr) - { - uint32_t len_to_read = cert_table_addr - fpos; - if (len_to_read > buffer_size) - len_to_read = buffer_size; - - int rlen = BIO_read(bio, buffer, len_to_read); - if (rlen <= 0) - goto error; - - if (!EVP_DigestUpdate(mdctx, buffer, rlen)) - goto error; - fpos += rlen; - } - - /* Calculate the digest, write it into digest */ - if (!EVP_DigestFinal(mdctx, digest, NULL)) - goto error; - - EVP_MD_CTX_free(mdctx); - BIO_free_all(bio); - free(buffer); - return 0; + uint32_t buffer_size = 0xFFFF; + uint8_t* buffer = (uint8_t*)malloc(buffer_size); + + /* BIO with the file data */ + BIO* bio = BIO_new_mem_buf(pe_data, cert_table_addr); + + EVP_MD_CTX* mdctx = EVP_MD_CTX_new(); + if (!buffer || !bio || !mdctx) + goto error; + + if (!EVP_DigestInit(mdctx, md)) + goto error; + + /* Calculate size of the space between file start and PE header */ + /* Checksum starts at 0x58th byte of the header */ + uint32_t pe_checksum_offset = pe_hdr_offset + 0x58; + /* Space between DOS and PE header could have arbitrary amount of data, read in chunks */ + uint32_t fpos = 0; + while (fpos < pe_checksum_offset) { + uint32_t len_to_read = pe_checksum_offset - fpos; + if (len_to_read > buffer_size) + len_to_read = buffer_size; + + int rlen = BIO_read(bio, buffer, len_to_read); + if (rlen <= 0) + goto error; + + if (!EVP_DigestUpdate(mdctx, buffer, rlen)) + goto error; + + fpos += rlen; + } + + /* Skip the checksum */ + if (BIO_read(bio, buffer, 4) <= 0) + goto error; + + /* 64bit PE file is larger than 32bit */ + uint32_t pe64_extra = is_64bit ? 16 : 0; + + /* Read up to certificate table*/ + uint32_t cert_table_offset = 0x3c + pe64_extra; + + if (BIO_read(bio, buffer, cert_table_offset) <= 0) + goto error; + + if (!EVP_DigestUpdate(mdctx, buffer, cert_table_offset)) + goto error; + + /* Skip certificate table */ + if (BIO_read(bio, buffer, 8) <= 0) + goto error; + + /* PE header with check sum + checksum + cert table offset + cert table len */ + fpos = pe_checksum_offset + 4 + cert_table_offset + 8; + + /* Hash everything up to the signature (assuming signature is stored in the + * end of the file) */ + /* Read chunks of the file in case the file is large */ + while (fpos < cert_table_addr) { + uint32_t len_to_read = cert_table_addr - fpos; + if (len_to_read > buffer_size) + len_to_read = buffer_size; + + int rlen = BIO_read(bio, buffer, len_to_read); + if (rlen <= 0) + goto error; + + if (!EVP_DigestUpdate(mdctx, buffer, rlen)) + goto error; + fpos += rlen; + } + + /* Calculate the digest, write it into digest */ + if (!EVP_DigestFinal(mdctx, digest, NULL)) + goto error; + + EVP_MD_CTX_free(mdctx); + BIO_free_all(bio); + free(buffer); + return 0; error: - EVP_MD_CTX_free(mdctx); - BIO_free_all(bio); - free(buffer); - return 1; + EVP_MD_CTX_free(mdctx); + BIO_free_all(bio); + free(buffer); + return 1; } AuthenticodeArray* parse_authenticode(const uint8_t* pe_data, uint64_t pe_len) { - const uint64_t dos_hdr_size = 0x40; - if (pe_len < dos_hdr_size) - return NULL; - - /* Check if it has DOS signature, so we don't parse random gibberish */ - uint8_t dos_prefix[] = {0x4d, 0x5a}; - if (memcmp(pe_data, dos_prefix, sizeof(dos_prefix)) != 0) - return NULL; - - /* offset to pointer in DOS header, that points to PE header */ - const int pe_hdr_ptr_offset = 0x3c; - /* Read the PE offset */ - uint32_t pe_offset = letoh32(*(uint32_t*) (pe_data + pe_hdr_ptr_offset)); - /* Offset to Magic, to know the PE class (32/64bit) */ - uint32_t magic_addr = pe_offset + 0x18; - - if (pe_len < magic_addr + sizeof(uint16_t)) - return NULL; - - /* Read the magic and check if we have 64bit PE */ - uint16_t magic = letoh16(*(uint16_t*) (pe_data + magic_addr)); - bool is_64bit = magic == 0x20b; - /* If PE is 64bit, header is 16 bytes larger */ - uint8_t pe64_extra = is_64bit ? 16 : 0; - - /* Calculate offset to certificate table directory */ - uint32_t pe_cert_table_addr = pe_offset + pe64_extra + 0x98; - - if (pe_len < pe_cert_table_addr + 2 * sizeof(uint32_t)) - return NULL; - - /* Use 64bit type due to the potential overflow in crafted binaries */ - uint64_t cert_addr = letoh32(*(uint32_t*) (pe_data + pe_cert_table_addr)); - uint64_t cert_len = letoh32(*(uint32_t*) (pe_data + pe_cert_table_addr + 4)); - - /* we need atleast 8 bytes to read dwLength, revision and certType */ - if (cert_len < 8 || pe_len < cert_addr + 8) - return NULL; - - uint32_t dwLength = letoh32(*(uint32_t*) (pe_data + cert_addr)); - if (pe_len < cert_addr + dwLength) - return NULL; - /* dwLength = offsetof(WIN_CERTIFICATE, bCertificate) + (size of the - * variable-length binary array contained within bCertificate) */ - AuthenticodeArray* auth_array = authenticode_new( - pe_data + cert_addr + 0x8, dwLength - 0x8); - if (!auth_array) - return NULL; - - /* Compare valid signatures file digests to actual file digest, to complete - * verification */ - for (size_t i = 0; i < auth_array->count; ++i) - { - Authenticode* sig = auth_array->signatures[i]; - - const EVP_MD* md = EVP_get_digestbyname(sig->digest_alg); - if (!md || !sig->digest.len || !sig->digest.data) - { - /* If there is an verification error, keep the first error */ - if (sig->verify_flags == AUTHENTICODE_VFY_VALID) - sig->verify_flags = AUTHENTICODE_VFY_UNKNOWN_ALGORITHM; - - continue; - } + const uint64_t dos_hdr_size = 0x40; + if (pe_len < dos_hdr_size) + return NULL; + + /* Check if it has DOS signature, so we don't parse random gibberish */ + uint8_t dos_prefix[] = {0x4d, 0x5a}; + if (memcmp(pe_data, dos_prefix, sizeof(dos_prefix)) != 0) + return NULL; + + /* offset to pointer in DOS header, that points to PE header */ + const int pe_hdr_ptr_offset = 0x3c; + /* Read the PE offset */ + uint32_t pe_offset = letoh32(*(uint32_t*)(pe_data + pe_hdr_ptr_offset)); + /* Offset to Magic, to know the PE class (32/64bit) */ + uint32_t magic_addr = pe_offset + 0x18; + + if (pe_len < magic_addr + sizeof(uint16_t)) + return NULL; + + /* Read the magic and check if we have 64bit PE */ + uint16_t magic = letoh16(*(uint16_t*)(pe_data + magic_addr)); + bool is_64bit = magic == 0x20b; + /* If PE is 64bit, header is 16 bytes larger */ + uint8_t pe64_extra = is_64bit ? 16 : 0; + + /* Calculate offset to certificate table directory */ + uint32_t pe_cert_table_addr = pe_offset + pe64_extra + 0x98; + + if (pe_len < pe_cert_table_addr + 2 * sizeof(uint32_t)) + return NULL; + + /* Use 64bit type due to the potential overflow in crafted binaries */ + uint64_t cert_addr = letoh32(*(uint32_t*)(pe_data + pe_cert_table_addr)); + uint64_t cert_len = letoh32(*(uint32_t*)(pe_data + pe_cert_table_addr + 4)); + + /* we need atleast 8 bytes to read dwLength, revision and certType */ + if (cert_len < 8 || pe_len < cert_addr + 8) + return NULL; + + uint32_t dwLength = letoh32(*(uint32_t*)(pe_data + cert_addr)); + if (pe_len < cert_addr + dwLength) + return NULL; + /* dwLength = offsetof(WIN_CERTIFICATE, bCertificate) + (size of the variable-length binary + * array contained within bCertificate) */ + AuthenticodeArray* auth_array = authenticode_new(pe_data + cert_addr + 0x8, dwLength - 0x8); + if (!auth_array) + return NULL; + + /* Compare valid signatures file digests to actual file digest, to complete verification */ + for (size_t i = 0; i < auth_array->count; ++i) { + Authenticode* sig = auth_array->signatures[i]; + + const EVP_MD* md = EVP_get_digestbyname(sig->digest_alg); + if (!md || !sig->digest.len || !sig->digest.data) { + /* If there is an verification error, keep the first error */ + if (sig->verify_flags == AUTHENTICODE_VFY_VALID) + sig->verify_flags = AUTHENTICODE_VFY_UNKNOWN_ALGORITHM; + + continue; + } #if OPENSSL_VERSION_NUMBER >= 0x3000000fL - int mdlen = EVP_MD_get_size(md); + int mdlen = EVP_MD_get_size(md); #else - int mdlen = EVP_MD_size(md); + int mdlen = EVP_MD_size(md); #endif - sig->file_digest.len = mdlen; - sig->file_digest.data = (uint8_t*) malloc(mdlen); - if (!sig->file_digest.data) - continue; - - if (authenticode_digest( - md, pe_data, pe_offset, is_64bit, cert_addr, sig->file_digest.data)) - { - /* If there is an verification error, keep the first error */ - if (sig->verify_flags == AUTHENTICODE_VFY_VALID) - sig->verify_flags = AUTHENTICODE_VFY_INTERNAL_ERROR; - break; + sig->file_digest.len = mdlen; + sig->file_digest.data = (uint8_t*)malloc(mdlen); + if (!sig->file_digest.data) + continue; + + if (authenticode_digest( + md, pe_data, pe_offset, is_64bit, cert_addr, sig->file_digest.data)) { + + /* If there is an verification error, keep the first error */ + if (sig->verify_flags == AUTHENTICODE_VFY_VALID) + sig->verify_flags = AUTHENTICODE_VFY_INTERNAL_ERROR; + break; + } + + /* Complete the verification */ + if (memcmp(sig->file_digest.data, sig->digest.data, mdlen) != 0) + sig->verify_flags = AUTHENTICODE_VFY_WRONG_FILE_DIGEST; } - /* Complete the verification */ - if (memcmp(sig->file_digest.data, sig->digest.data, mdlen) != 0) - sig->verify_flags = AUTHENTICODE_VFY_WRONG_FILE_DIGEST; - } - - return auth_array; + return auth_array; } static void signer_free(Signer* si) { - if (si) - { - free(si->digest.data); - free(si->digest_alg); - free(si->program_name); - certificate_array_free(si->chain); - free(si); - } + if (si) { + free(si->digest.data); + free(si->digest_alg); + free(si->program_name); + certificate_array_free(si->chain); + free(si); + } } static void authenticode_free(Authenticode* auth) { - if (auth) - { - free(auth->digest.data); - free(auth->file_digest.data); - free(auth->digest_alg); - signer_free(auth->signer); - certificate_array_free(auth->certs); - countersignature_array_free(auth->countersigs); - free(auth); - } + if (auth) { + free(auth->digest.data); + free(auth->file_digest.data); + free(auth->digest_alg); + signer_free(auth->signer); + certificate_array_free(auth->certs); + countersignature_array_free(auth->countersigs); + free(auth); + } } void authenticode_array_free(AuthenticodeArray* arr) { - if (arr) - { - for (size_t i = 0; i < arr->count; ++i) - { - authenticode_free(arr->signatures[i]); + if (arr) { + for (size_t i = 0; i < arr->count; ++i) { + authenticode_free(arr->signatures[i]); + } + free(arr->signatures); + free(arr); } - free(arr->signatures); - free(arr); - } } diff --git a/src/libyara/modules/pe/authenticode-parser/certificate.c b/src/libyara/modules/pe/authenticode-parser/certificate.c index 455b2bb..cf688e7 100644 --- a/src/libyara/modules/pe/authenticode-parser/certificate.c +++ b/src/libyara/modules/pe/authenticode-parser/certificate.c @@ -24,6 +24,7 @@ SOFTWARE. #include #include #include +#include #include #include #include @@ -35,410 +36,386 @@ SOFTWARE. from OpenSSL 3.0 */ static void parse_oneline_string(char* string) { - size_t len = strlen(string); - char* tmp = string; - while (true) - { - char* ptr = strstr(tmp, "\\/"); - if (!ptr) - break; - - memmove(ptr, ptr + 1, strlen(ptr + 1)); - tmp = ptr + 1; - len--; - } - - string[len] = 0; + size_t len = strlen(string); + char* tmp = string; + while (true) { + char* ptr = strstr(tmp, "\\/"); + if (!ptr) + break; + + memmove(ptr, ptr + 1, strlen(ptr + 1)); + tmp = ptr + 1; + len--; + } + + string[len] = 0; } #endif static void parse_name_attributes(X509_NAME* raw, Attributes* attr) { - if (!raw || !attr) - return; - - int entryCount = X509_NAME_entry_count(raw); - for (int i = entryCount - 1; i >= 0; --i) - { - X509_NAME_ENTRY* entryName = X509_NAME_get_entry(raw, i); - ASN1_STRING* asn1String = X509_NAME_ENTRY_get_data(entryName); - - const char* key = OBJ_nid2sn( - OBJ_obj2nid(X509_NAME_ENTRY_get_object(entryName))); - - ByteArray array = {0}; - if (byte_array_init(&array, asn1String->data, asn1String->length) == -1) - break; - - if (strcmp(key, "C") == 0 && !attr->country.data) - attr->country = array; - else if (strcmp(key, "O") == 0 && !attr->organization.data) - attr->organization = array; - else if (strcmp(key, "OU") == 0 && !attr->organizationalUnit.data) - attr->organizationalUnit = array; - else if (strcmp(key, "dnQualifier") == 0 && !attr->nameQualifier.data) - attr->nameQualifier = array; - else if (strcmp(key, "ST") == 0 && !attr->state.data) - attr->state = array; - else if (strcmp(key, "CN") == 0 && !attr->commonName.data) - attr->commonName = array; - else if (strcmp(key, "serialNumber") == 0 && !attr->serialNumber.data) - attr->serialNumber = array; - else if (strcmp(key, "L") == 0 && !attr->locality.data) - attr->locality = array; - else if (strcmp(key, "title") == 0 && !attr->title.data) - attr->title = array; - else if (strcmp(key, "SN") == 0 && !attr->surname.data) - attr->surname = array; - else if (strcmp(key, "GN") == 0 && !attr->givenName.data) - attr->givenName = array; - else if (strcmp(key, "initials") == 0 && !attr->initials.data) - attr->initials = array; - else if (strcmp(key, "pseudonym") == 0 && !attr->pseudonym.data) - attr->pseudonym = array; - else if ( - strcmp(key, "generationQualifier") == 0 && - !attr->generationQualifier.data) - attr->generationQualifier = array; - else if (strcmp(key, "emailAddress") == 0 && !attr->emailAddress.data) - attr->emailAddress = array; - else - free(array.data); - } + if (!raw || !attr) + return; + + int entryCount = X509_NAME_entry_count(raw); + for (int i = entryCount - 1; i >= 0; --i) { + X509_NAME_ENTRY* entryName = X509_NAME_get_entry(raw, i); + ASN1_STRING* asn1String = X509_NAME_ENTRY_get_data(entryName); + + const char* key = OBJ_nid2sn(OBJ_obj2nid(X509_NAME_ENTRY_get_object(entryName))); + + ByteArray array = {0}; + if (byte_array_init(&array, asn1String->data, asn1String->length) == -1) + break; + + if (strcmp(key, "C") == 0 && !attr->country.data) + attr->country = array; + else if (strcmp(key, "O") == 0 && !attr->organization.data) + attr->organization = array; + else if (strcmp(key, "OU") == 0 && !attr->organizationalUnit.data) + attr->organizationalUnit = array; + else if (strcmp(key, "dnQualifier") == 0 && !attr->nameQualifier.data) + attr->nameQualifier = array; + else if (strcmp(key, "ST") == 0 && !attr->state.data) + attr->state = array; + else if (strcmp(key, "CN") == 0 && !attr->commonName.data) + attr->commonName = array; + else if (strcmp(key, "serialNumber") == 0 && !attr->serialNumber.data) + attr->serialNumber = array; + else if (strcmp(key, "L") == 0 && !attr->locality.data) + attr->locality = array; + else if (strcmp(key, "title") == 0 && !attr->title.data) + attr->title = array; + else if (strcmp(key, "SN") == 0 && !attr->surname.data) + attr->surname = array; + else if (strcmp(key, "GN") == 0 && !attr->givenName.data) + attr->givenName = array; + else if (strcmp(key, "initials") == 0 && !attr->initials.data) + attr->initials = array; + else if (strcmp(key, "pseudonym") == 0 && !attr->pseudonym.data) + attr->pseudonym = array; + else if (strcmp(key, "generationQualifier") == 0 && !attr->generationQualifier.data) + attr->generationQualifier = array; + else if (strcmp(key, "emailAddress") == 0 && !attr->emailAddress.data) + attr->emailAddress = array; + else + free(array.data); + } } /* Reconstructs signers certificate chain */ CertificateArray* parse_signer_chain(X509* signCert, STACK_OF(X509) * certs) { - if (!signCert || !certs) - return NULL; + if (!signCert || !certs) + return NULL; - X509_STORE* store = X509_STORE_new(); - if (!store) - return NULL; + X509_STORE* store = X509_STORE_new(); + if (!store) + return NULL; - X509_STORE_CTX* storeCtx = X509_STORE_CTX_new(); - if (!storeCtx) - { - X509_STORE_CTX_free(storeCtx); - return NULL; - } + X509_STORE_CTX* storeCtx = X509_STORE_CTX_new(); + if (!storeCtx) { + X509_STORE_CTX_free(storeCtx); + return NULL; + } - X509_STORE_CTX_init(storeCtx, store, signCert, certs); + X509_STORE_CTX_init(storeCtx, store, signCert, certs); - /* I can't find ability to use this function for static verification with - * missing trust anchors, because roots are generally not part of the PKCS7 - * signatures, so the return value is currently ignored and the function is - * only used to build the certificate chain */ - X509_verify_cert(storeCtx); + /* I can't find ability to use this function for static verification with missing trust anchors, + * because roots are generally not part of the PKCS7 signatures, so the return value is + * currently ignored and the function is only used to build the certificate chain */ + X509_verify_cert(storeCtx); - STACK_OF(X509)* chain = X509_STORE_CTX_get_chain(storeCtx); + STACK_OF(X509)* chain = X509_STORE_CTX_get_chain(storeCtx); - int certCount = sk_X509_num(chain); + int certCount = sk_X509_num(chain); - CertificateArray* result = (CertificateArray*) calloc(1, sizeof(*result)); - if (!result) - goto error; + CertificateArray* result = (CertificateArray*)calloc(1, sizeof(*result)); + if (!result) + goto error; - result->certs = (Certificate**) calloc(certCount, sizeof(Certificate*)); - if (!result->certs) - goto error; + result->certs = (Certificate**)calloc(certCount, sizeof(Certificate*)); + if (!result->certs) + goto error; - /* Convert each certificate to internal representation */ - for (int i = 0; i < certCount; ++i) - { - Certificate* cert = certificate_new(sk_X509_value(chain, i)); - if (!cert) - goto error; + /* Convert each certificate to internal representation */ + for (int i = 0; i < certCount; ++i) { + Certificate* cert = certificate_new(sk_X509_value(chain, i)); + if (!cert) + goto error; - result->certs[i] = cert; - result->count++; - } + result->certs[i] = cert; + result->count++; + } - X509_STORE_free(store); - X509_STORE_CTX_free(storeCtx); - return result; + X509_STORE_free(store); + X509_STORE_CTX_free(storeCtx); + return result; error: /* In case of error, return nothing */ - if (result) - { - for (size_t i = 0; i < result->count; ++i) - { - certificate_free(result->certs[i]); + if (result) { + for (size_t i = 0; i < result->count; ++i) { + certificate_free(result->certs[i]); + } + free(result->certs); + free(result); } - free(result->certs); - free(result); - } - X509_STORE_free(store); - X509_STORE_CTX_free(storeCtx); + X509_STORE_free(store); + X509_STORE_CTX_free(storeCtx); - return NULL; + return NULL; } /* Taken from YARA for compatibility */ static char* integer_to_serial(ASN1_INTEGER* serial) { - int bytes = i2d_ASN1_INTEGER(serial, NULL); - - char* res = NULL; - /* According to X.509 specification the maximum length for the - * serial number is 20 octets. Add two bytes to account for - * DER type and length information. */ - if (bytes < 2 || bytes > 22) - return NULL; - - /* Now that we know the size of the serial number allocate enough - * space to hold it, and use i2d_ASN1_INTEGER() one last time to - * hold it in the allocated buffer. */ - uint8_t* serial_der = (uint8_t*) malloc(bytes); - if (!serial_der) - return NULL; - - uint8_t* serial_bytes; - - bytes = i2d_ASN1_INTEGER(serial, &serial_der); - - /* i2d_ASN1_INTEGER() moves the pointer as it writes into - serial_bytes. Move it back. */ - serial_der -= bytes; - - /* Skip over DER type, length information */ - serial_bytes = serial_der + 2; - bytes -= 2; - - /* Also allocate space to hold the "common" string format: - * 00:01:02:03:04... - * - * For each byte in the serial to convert to hexlified format we - * need three bytes, two for the byte itself and one for colon. - * The last one doesn't have the colon, but the extra byte is used - * for the NULL terminator. */ - res = (char*) malloc(bytes * 3); - if (res) - { - for (int i = 0; i < bytes; i++) - { - /* Don't put the colon on the last one. */ - if (i < bytes - 1) - snprintf(res + 3 * i, 4, "%02x:", serial_bytes[i]); - else - snprintf(res + 3 * i, 3, "%02x", serial_bytes[i]); + int bytes = i2d_ASN1_INTEGER(serial, NULL); + + char* res = NULL; + /* According to X.509 specification the maximum length for the + * serial number is 20 octets. Add two bytes to account for + * DER type and length information. */ + if (bytes < 2 || bytes > 22) + return NULL; + + /* Now that we know the size of the serial number allocate enough + * space to hold it, and use i2d_ASN1_INTEGER() one last time to + * hold it in the allocated buffer. */ + uint8_t* serial_der = (uint8_t*)malloc(bytes); + if (!serial_der) + return NULL; + + uint8_t* serial_bytes; + + bytes = i2d_ASN1_INTEGER(serial, &serial_der); + + /* i2d_ASN1_INTEGER() moves the pointer as it writes into + serial_bytes. Move it back. */ + serial_der -= bytes; + + /* Skip over DER type, length information */ + serial_bytes = serial_der + 2; + bytes -= 2; + + /* Also allocate space to hold the "common" string format: + * 00:01:02:03:04... + * + * For each byte in the serial to convert to hexlified format we + * need three bytes, two for the byte itself and one for colon. + * The last one doesn't have the colon, but the extra byte is used + * for the NULL terminator. */ + res = (char*)malloc(bytes * 3); + if (res) { + for (int i = 0; i < bytes; i++) { + /* Don't put the colon on the last one. */ + if (i < bytes - 1) + snprintf(res + 3 * i, 4, "%02x:", serial_bytes[i]); + else + snprintf(res + 3 * i, 3, "%02x", serial_bytes[i]); + } } - } - free(serial_der); + free(serial_der); - return (char*) res; + return (char*)res; } /* Converts the pubkey to pem, which is just * Base64 encoding of the DER representation */ static char* pubkey_to_pem(EVP_PKEY* pubkey) { - uint8_t* der = NULL; - int len = i2d_PUBKEY(pubkey, &der); /* Convert to DER */ - if (len <= 0) - return NULL; + uint8_t* der = NULL; + int len = i2d_PUBKEY(pubkey, &der); /* Convert to DER */ + if (len <= 0) + return NULL; + + /* Approximate the result length (padding, newlines, 4 out bytes for every 3 in) */ + uint8_t* result = (uint8_t*)malloc(len * 3 / 2); + if (!result) { + OPENSSL_free(der); + return NULL; + } - /* Approximate the result length (padding, newlines, 4 out bytes for every 3 - * in) */ - uint8_t* result = (uint8_t*) malloc(len * 3 / 2); - if (!result) - { - OPENSSL_free(der); - return NULL; - } + /* Base64 encode the DER data */ + EVP_ENCODE_CTX* ctx = EVP_ENCODE_CTX_new(); + if (!ctx) { + OPENSSL_free(der); + free(result); + return NULL; + } + + int resultLen = 0; + int tmp = 0; + EVP_EncodeInit(ctx); + EVP_EncodeUpdate(ctx, result, &tmp, der, len); + resultLen += tmp; + EVP_EncodeFinal(ctx, result + resultLen, &tmp); + resultLen += tmp; - /* Base64 encode the DER data */ - EVP_ENCODE_CTX* ctx = EVP_ENCODE_CTX_new(); - if (!ctx) - { + EVP_ENCODE_CTX_free(ctx); OPENSSL_free(der); - free(result); - return NULL; - } - - int resultLen = 0; - int tmp = 0; - EVP_EncodeInit(ctx); - EVP_EncodeUpdate(ctx, result, &tmp, der, len); - resultLen += tmp; - EVP_EncodeFinal(ctx, result + resultLen, &tmp); - resultLen += tmp; - - EVP_ENCODE_CTX_free(ctx); - OPENSSL_free(der); - - /* Remove all newlines from the encoded base64 - * resultLen is excluding NULL terminator */ - for (int i = 0; result[i] != 0; i++) - { - if (result[i] == '\n') - memmove(result + i, result + i + 1, resultLen - i); - } - - return (char*) result; + + /* Remove all newlines from the encoded base64 + * resultLen is excluding NULL terminator */ + for (int i = 0; result[i] != 0; i++) { + if (result[i] == '\n') + memmove(result + i, result + i + 1, resultLen - i); + } + + return (char*)result; } Certificate* certificate_new(X509* x509) { - Certificate* result = (Certificate*) calloc(1, sizeof(*result)); - if (!result) - return NULL; + Certificate* result = (Certificate*)calloc(1, sizeof(*result)); + if (!result) + return NULL; + + /* Calculate SHA1 and SHA256 digests of the X509 structure */ + result->sha1.data = (uint8_t*)malloc(SHA_DIGEST_LENGTH); + if (result->sha1.data) { + X509_digest(x509, EVP_sha1(), result->sha1.data, NULL); + result->sha1.len = SHA_DIGEST_LENGTH; + } - /* Calculate SHA1 and SHA256 digests of the X509 structure */ - result->sha1.data = (uint8_t*) malloc(SHA_DIGEST_LENGTH); - if (result->sha1.data) - { - X509_digest(x509, EVP_sha1(), result->sha1.data, NULL); - result->sha1.len = SHA_DIGEST_LENGTH; - } - - result->sha256.data = (uint8_t*) malloc(SHA256_DIGEST_LENGTH); - if (result->sha256.data) - { - X509_digest(x509, EVP_sha256(), result->sha256.data, NULL); - result->sha256.len = SHA256_DIGEST_LENGTH; - } - - /* 256 bytes should be enough for any name */ - char buffer[256]; - - /* X509_NAME_online is deprecated and shouldn't be used per OpenSSL docs - * but we want to comply with existing YARA code */ - X509_NAME* issuerName = X509_get_issuer_name(x509); - X509_NAME_oneline(issuerName, buffer, sizeof(buffer)); - - result->issuer = strdup(buffer); - /* This is a little ugly hack for 3.0 compatibility */ + result->sha256.data = (uint8_t*)malloc(SHA256_DIGEST_LENGTH); + if (result->sha256.data) { + X509_digest(x509, EVP_sha256(), result->sha256.data, NULL); + result->sha256.len = SHA256_DIGEST_LENGTH; + } + + /* 256 bytes should be enough for any name */ + char buffer[256]; + + /* X509_NAME_online is deprecated and shouldn't be used per OpenSSL docs + * but we want to comply with existing YARA code */ + X509_NAME* issuerName = X509_get_issuer_name(x509); + X509_NAME_oneline(issuerName, buffer, sizeof(buffer)); + + result->issuer = strdup(buffer); + /* This is a little ugly hack for 3.0 compatibility */ #if OPENSSL_VERSION_NUMBER >= 0x3000000fL - parse_oneline_string(result->issuer); + parse_oneline_string(result->issuer); #endif - X509_NAME* subjectName = X509_get_subject_name(x509); - X509_NAME_oneline(subjectName, buffer, sizeof(buffer)); - result->subject = strdup(buffer); + X509_NAME* subjectName = X509_get_subject_name(x509); + X509_NAME_oneline(subjectName, buffer, sizeof(buffer)); + result->subject = strdup(buffer); #if OPENSSL_VERSION_NUMBER >= 0x3000000fL - parse_oneline_string(result->subject); + parse_oneline_string(result->subject); #endif - parse_name_attributes(issuerName, &result->issuer_attrs); - parse_name_attributes(subjectName, &result->subject_attrs); + parse_name_attributes(issuerName, &result->issuer_attrs); + parse_name_attributes(subjectName, &result->subject_attrs); - result->version = X509_get_version(x509); - result->serial = integer_to_serial(X509_get_serialNumber(x509)); - result->not_after = ASN1_TIME_to_time_t(X509_get0_notAfter(x509)); - result->not_before = ASN1_TIME_to_time_t(X509_get0_notBefore(x509)); - int sig_nid = X509_get_signature_nid(x509); - result->sig_alg = strdup(OBJ_nid2ln(sig_nid)); + result->version = X509_get_version(x509); + result->serial = integer_to_serial(X509_get_serialNumber(x509)); + result->not_after = ASN1_TIME_to_int64_t(X509_get0_notAfter(x509)); + result->not_before = ASN1_TIME_to_int64_t(X509_get0_notBefore(x509)); + int sig_nid = X509_get_signature_nid(x509); + result->sig_alg = strdup(OBJ_nid2ln(sig_nid)); - OBJ_obj2txt(buffer, sizeof(buffer), OBJ_nid2obj(sig_nid), 1); - result->sig_alg_oid = strdup(buffer); + OBJ_obj2txt(buffer, sizeof(buffer), OBJ_nid2obj(sig_nid), 1); + result->sig_alg_oid = strdup(buffer); - EVP_PKEY* pkey = X509_get0_pubkey(x509); - if (pkey) - { - result->key = pubkey_to_pem(pkey); + EVP_PKEY* pkey = X509_get0_pubkey(x509); + if (pkey) { + result->key = pubkey_to_pem(pkey); #if OPENSSL_VERSION_NUMBER >= 0x3000000fL - result->key_alg = strdup(OBJ_nid2sn(EVP_PKEY_get_base_id(pkey))); + result->key_alg = strdup(OBJ_nid2sn(EVP_PKEY_get_base_id(pkey))); #else - result->key_alg = strdup(OBJ_nid2sn(EVP_PKEY_base_id(pkey))); + result->key_alg = strdup(OBJ_nid2sn(EVP_PKEY_base_id(pkey))); #endif - } + } - return result; + return result; } /* Moves certificates from src to dst, returns 0 on success, * else 1. If error occurs, arguments are unchanged */ int certificate_array_move(CertificateArray* dst, CertificateArray* src) { - size_t newCount = dst->count + src->count; + size_t newCount = dst->count + src->count; - Certificate** tmp = (Certificate**) realloc( - dst->certs, newCount * sizeof(Certificate*)); - if (!tmp) - return 1; + Certificate** tmp = (Certificate**)realloc(dst->certs, newCount * sizeof(Certificate*)); + if (!tmp) + return 1; - dst->certs = tmp; + dst->certs = tmp; - for (size_t i = 0; i < src->count; ++i) - dst->certs[i + dst->count] = src->certs[i]; + for (size_t i = 0; i < src->count; ++i) + dst->certs[i + dst->count] = src->certs[i]; - dst->count = newCount; + dst->count = newCount; - free(src->certs); - src->certs = NULL; - src->count = 0; + free(src->certs); + src->certs = NULL; + src->count = 0; - return 0; + return 0; } /* Allocates empty certificate array with reserved space for certCount certs */ CertificateArray* certificate_array_new(int certCount) { - CertificateArray* arr = (CertificateArray*) malloc(sizeof(*arr)); - if (!arr) - return NULL; - - arr->certs = (Certificate**) malloc(sizeof(Certificate*) * certCount); - if (!arr->certs) - { - free(arr); - return NULL; - } + CertificateArray* arr = (CertificateArray*)malloc(sizeof(*arr)); + if (!arr) + return NULL; + + arr->certs = (Certificate**)malloc(sizeof(Certificate*) * certCount); + if (!arr->certs) { + free(arr); + return NULL; + } - arr->count = certCount; + arr->count = certCount; - return arr; + return arr; } -static void certificate_attributes_free(Attributes* attrs) +static void certificate_attributes_free(Attributes attrs) { - free(attrs->country.data); - free(attrs->organization.data); - free(attrs->organizationalUnit.data); - free(attrs->nameQualifier.data); - free(attrs->state.data); - free(attrs->commonName.data); - free(attrs->serialNumber.data); - free(attrs->locality.data); - free(attrs->title.data); - free(attrs->surname.data); - free(attrs->givenName.data); - free(attrs->initials.data); - free(attrs->pseudonym.data); - free(attrs->generationQualifier.data); - free(attrs->emailAddress.data); + free(attrs.country.data); + free(attrs.organization.data); + free(attrs.organizationalUnit.data); + free(attrs.nameQualifier.data); + free(attrs.state.data); + free(attrs.commonName.data); + free(attrs.serialNumber.data); + free(attrs.locality.data); + free(attrs.title.data); + free(attrs.surname.data); + free(attrs.givenName.data); + free(attrs.initials.data); + free(attrs.pseudonym.data); + free(attrs.generationQualifier.data); + free(attrs.emailAddress.data); } void certificate_free(Certificate* cert) { - if (cert) - { - free(cert->issuer); - free(cert->subject); - free(cert->sig_alg); - free(cert->sig_alg_oid); - free(cert->key_alg); - free(cert->key); - free(cert->sha1.data); - free(cert->sha256.data); - free(cert->serial); - certificate_attributes_free(&(cert->issuer_attrs)); - certificate_attributes_free(&(cert->subject_attrs)); - free(cert); - } + if (cert) { + free(cert->issuer); + free(cert->subject); + free(cert->sig_alg); + free(cert->sig_alg_oid); + free(cert->key_alg); + free(cert->key); + free(cert->sha1.data); + free(cert->sha256.data); + free(cert->serial); + certificate_attributes_free(cert->issuer_attrs); + certificate_attributes_free(cert->subject_attrs); + free(cert); + } } void certificate_array_free(CertificateArray* arr) { - if (arr) - { - for (size_t i = 0; i < arr->count; ++i) - { - certificate_free(arr->certs[i]); + if (arr) { + for (size_t i = 0; i < arr->count; ++i) { + certificate_free(arr->certs[i]); + } + free(arr->certs); + free(arr); } - free(arr->certs); - free(arr); - } } diff --git a/src/libyara/modules/pe/authenticode-parser/certificate.h b/src/libyara/modules/pe/authenticode-parser/certificate.h index 058d7b8..aefb797 100644 --- a/src/libyara/modules/pe/authenticode-parser/certificate.h +++ b/src/libyara/modules/pe/authenticode-parser/certificate.h @@ -27,23 +27,16 @@ SOFTWARE. #include #ifdef __cplusplus -extern "C" -{ +extern "C" { #endif - Certificate* certificate_new(X509* x509); +Certificate* certificate_new(X509* x509); +void certificate_free(Certificate* cert); - void certificate_free(Certificate* cert); - - CertificateArray* parse_signer_chain( - X509* signer_cert, - STACK_OF(X509) * certs); - - int certificate_array_move(CertificateArray* dst, CertificateArray* src); - - CertificateArray* certificate_array_new(int certCount); - - void certificate_array_free(CertificateArray* arr); +CertificateArray* parse_signer_chain(X509* signer_cert, STACK_OF(X509) * certs); +int certificate_array_move(CertificateArray* dst, CertificateArray* src); +CertificateArray* certificate_array_new(int certCount); +void certificate_array_free(CertificateArray* arr); #ifdef __cplusplus } diff --git a/src/libyara/modules/pe/authenticode-parser/countersignature.c b/src/libyara/modules/pe/authenticode-parser/countersignature.c index 7b04d70..1c9ae7e 100644 --- a/src/libyara/modules/pe/authenticode-parser/countersignature.c +++ b/src/libyara/modules/pe/authenticode-parser/countersignature.c @@ -21,8 +21,11 @@ SOFTWARE. #include "countersignature.h" +#include +#include #include #include +#include #include #include #include @@ -36,378 +39,620 @@ SOFTWARE. #include "helper.h" #include "structs.h" +struct CountersignatureImplStruct; + +typedef TS_TST_INFO* get_ts_tst_info_func(struct CountersignatureImplStruct*); +typedef STACK_OF(X509) * get_signers_func(struct CountersignatureImplStruct*); +typedef STACK_OF(X509) * get_certs_func(struct CountersignatureImplStruct*); +typedef int +verify_digest_func(struct CountersignatureImplStruct*, uint8_t* digest, size_t digest_size); +typedef BIO* verify_signature_init_func(struct CountersignatureImplStruct*); +typedef int +verify_signature_finish_func(struct CountersignatureImplStruct*, BIO* bio, X509* signer); + +#define IMPL_FUNC_NAME(func, type) ms_countersig_impl_##func##_##type##_ + +#define DECLARE_FUNCS(type) \ + get_ts_tst_info_func IMPL_FUNC_NAME(get_ts_tst_info, type); \ + get_signers_func IMPL_FUNC_NAME(get_signers, type); \ + get_certs_func IMPL_FUNC_NAME(get_certs, type); \ + verify_digest_func IMPL_FUNC_NAME(verify_digest, type); \ + verify_signature_init_func IMPL_FUNC_NAME(verify_signature_init, type); \ + verify_signature_finish_func IMPL_FUNC_NAME(verify_signature_finish, type); + +DECLARE_FUNCS(pkcs7) +DECLARE_FUNCS(cms) + +typedef struct { + get_ts_tst_info_func* get_ts_tst_info; + get_signers_func* get_signers; + get_certs_func* get_certs; + verify_digest_func* verify_digest; + verify_signature_init_func* verify_signature_init; + verify_signature_finish_func* verify_signature_finish; +} CountersignatureImplFuncs; + +#define FUNC_ARRAY_NAME_FOR_IMPL(type) countersig_impl_funcs_##type##_ +#define FUNC_ARRAY_FOR_IMPL(type) \ + static const CountersignatureImplFuncs FUNC_ARRAY_NAME_FOR_IMPL(type) = { \ + &IMPL_FUNC_NAME(get_ts_tst_info, type), \ + &IMPL_FUNC_NAME(get_signers, type), \ + &IMPL_FUNC_NAME(get_certs, type), \ + &IMPL_FUNC_NAME(verify_digest, type), \ + &IMPL_FUNC_NAME(verify_signature_init, type), \ + &IMPL_FUNC_NAME(verify_signature_finish, type), \ + }; + +FUNC_ARRAY_FOR_IMPL(pkcs7) +FUNC_ARRAY_FOR_IMPL(cms) + +typedef enum { + CS_IMPL_PKCS7, + CS_IMPL_CMS, +} CountersignatureImplType; + +typedef struct CountersignatureImplStruct { + CountersignatureImplType type; + const CountersignatureImplFuncs* funcs; + union { + PKCS7* pkcs7; + CMS_ContentInfo* cms; + }; + // this is here to serve as a cache for CMS because the only way to obtain + // certs from CMS is to use CMS_get1_certs which leaves the deallocation + // to the caller but it just complicates things if you need to remember to + // deallocate also certs. This makes it easier if CountersignatureImpl itself + // is an owner of this thing. + STACK_OF(X509) * _certs; +} CountersignatureImpl; + Countersignature* pkcs9_countersig_new( - const uint8_t* data, - long size, - STACK_OF(X509) * certs, - ASN1_STRING* enc_digest) + const uint8_t* data, long size, STACK_OF(X509) * certs, ASN1_STRING* enc_digest) { - Countersignature* result = (Countersignature*) calloc(1, sizeof(*result)); - if (!result) - return NULL; + Countersignature* result = (Countersignature*)calloc(1, sizeof(*result)); + if (!result) + return NULL; + + PKCS7_SIGNER_INFO* si = d2i_PKCS7_SIGNER_INFO(NULL, &data, size); + if (!si) { + result->verify_flags = COUNTERSIGNATURE_VFY_CANT_PARSE; + return result; + } - PKCS7_SIGNER_INFO* si = d2i_PKCS7_SIGNER_INFO(NULL, &data, size); - if (!si) - { - result->verify_flags = COUNTERSIGNATURE_VFY_CANT_PARSE; - return result; - } - - int digestnid = OBJ_obj2nid(si->digest_alg->algorithm); - result->digest_alg = strdup(OBJ_nid2ln(digestnid)); - - const ASN1_TYPE* sign_time = PKCS7_get_signed_attribute( - si, NID_pkcs9_signingTime); - if (!sign_time) - { - result->verify_flags = COUNTERSIGNATURE_VFY_TIME_MISSING; - goto end; - } - - result->sign_time = ASN1_TIME_to_time_t(sign_time->value.utctime); - - X509* signCert = X509_find_by_issuer_and_serial( - certs, si->issuer_and_serial->issuer, si->issuer_and_serial->serial); - if (!signCert) - { - result->verify_flags = COUNTERSIGNATURE_VFY_NO_SIGNER_CERT; - goto end; - } - - /* PKCS9 stores certificates in the corresponding PKCS7 it countersigns */ - result->chain = parse_signer_chain(signCert, certs); - - /* Get digest that corresponds to decrypted encrypted digest in signature */ - ASN1_TYPE* messageDigest = PKCS7_get_signed_attribute( - si, NID_pkcs9_messageDigest); - if (!messageDigest) - { - result->verify_flags = COUNTERSIGNATURE_VFY_DIGEST_MISSING; - goto end; - } - - size_t digestLen = messageDigest->value.octet_string->length; - - if (!digestLen) - { - result->verify_flags = COUNTERSIGNATURE_VFY_DIGEST_MISSING; - goto end; - } - - const EVP_MD* md = EVP_get_digestbynid(digestnid); - if (!md) - { - result->verify_flags = COUNTERSIGNATURE_VFY_UNKNOWN_ALGORITHM; - goto end; - } - - const uint8_t* digestData = messageDigest->value.octet_string->data; - byte_array_init(&result->digest, digestData, digestLen); - - /* By this point we all necessary things for verification - * Get DER representation of the authenticated attributes to calculate its - * digest that should correspond with the one encrypted in SignerInfo */ - uint8_t* authAttrsData = NULL; - int authAttrsLen = ASN1_item_i2d( - (ASN1_VALUE*) si->auth_attr, - &authAttrsData, - ASN1_ITEM_rptr(PKCS7_ATTR_VERIFY)); - - uint8_t calc_digest[EVP_MAX_MD_SIZE]; - calculate_digest(md, authAttrsData, authAttrsLen, calc_digest); - OPENSSL_free(authAttrsData); - - /* Get public key to decrypt encrypted digest of auth attrs */ - EVP_PKEY* pkey = X509_get0_pubkey(signCert); - EVP_PKEY_CTX* ctx = EVP_PKEY_CTX_new(pkey, NULL); - - /* TODO try to get rid of hardcoded length bound */ - size_t decLen = 65536; - uint8_t* decData = (uint8_t*) malloc(decLen); - if (!decData) - { - EVP_PKEY_CTX_free(ctx); - result->verify_flags = COUNTERSIGNATURE_VFY_INTERNAL_ERROR; - goto end; - } + int digestnid = OBJ_obj2nid(si->digest_alg->algorithm); + result->digest_alg = strdup(OBJ_nid2ln(digestnid)); - uint8_t* encData = si->enc_digest->data; - size_t encLen = si->enc_digest->length; + const ASN1_TYPE* sign_time = PKCS7_get_signed_attribute(si, NID_pkcs9_signingTime); + if (!sign_time) { + result->verify_flags = COUNTERSIGNATURE_VFY_TIME_MISSING; + goto end; + } - /* Decrypt the encrypted digest */ - EVP_PKEY_verify_recover_init(ctx); - bool isDecrypted = EVP_PKEY_verify_recover( - ctx, decData, &decLen, encData, encLen) == 1; - EVP_PKEY_CTX_free(ctx); + result->sign_time = ASN1_TIME_to_int64_t(sign_time->value.utctime); - if (!isDecrypted) - { - free(decData); - result->verify_flags = COUNTERSIGNATURE_VFY_CANT_DECRYPT_DIGEST; - goto end; - } + X509* signCert = X509_find_by_issuer_and_serial( + certs, si->issuer_and_serial->issuer, si->issuer_and_serial->serial); + if (!signCert) { + result->verify_flags = COUNTERSIGNATURE_VFY_NO_SIGNER_CERT; + goto end; + } + + /* PKCS9 stores certificates in the corresponding PKCS7 it countersigns */ + result->chain = parse_signer_chain(signCert, certs); + + /* Get digest that corresponds to decrypted encrypted digest in signature */ + ASN1_TYPE* messageDigest = PKCS7_get_signed_attribute(si, NID_pkcs9_messageDigest); + if (!messageDigest) { + result->verify_flags = COUNTERSIGNATURE_VFY_DIGEST_MISSING; + goto end; + } + + size_t digestLen = messageDigest->value.octet_string->length; + + if (!digestLen) { + result->verify_flags = COUNTERSIGNATURE_VFY_DIGEST_MISSING; + goto end; + } + + const EVP_MD* md = EVP_get_digestbynid(digestnid); + if (!md) { + result->verify_flags = COUNTERSIGNATURE_VFY_UNKNOWN_ALGORITHM; + goto end; + } + + const uint8_t* digestData = messageDigest->value.octet_string->data; + byte_array_init(&result->digest, digestData, digestLen); + + /* By this point we all necessary things for verification + * Get DER representation of the authenticated attributes to calculate its + * digest that should correspond with the one encrypted in SignerInfo */ + uint8_t* authAttrsData = NULL; + int authAttrsLen = ASN1_item_i2d( + (ASN1_VALUE*)si->auth_attr, &authAttrsData, ASN1_ITEM_rptr(PKCS7_ATTR_VERIFY)); + + uint8_t calc_digest[EVP_MAX_MD_SIZE]; + calculate_digest(md, authAttrsData, authAttrsLen, calc_digest); + OPENSSL_free(authAttrsData); + + /* Get public key to decrypt encrypted digest of auth attrs */ + EVP_PKEY* pkey = X509_get0_pubkey(signCert); + EVP_PKEY_CTX* ctx = EVP_PKEY_CTX_new(pkey, NULL); + + /* TODO try to get rid of hardcoded length bound */ + size_t decLen = 65536; + uint8_t* decData = (uint8_t*)malloc(decLen); + if (!decData) { + EVP_PKEY_CTX_free(ctx); + result->verify_flags = COUNTERSIGNATURE_VFY_INTERNAL_ERROR; + goto end; + } + + uint8_t* encData = si->enc_digest->data; + size_t encLen = si->enc_digest->length; - /* compare the encrypted digest and calculated digest */ - bool isValid = false; + /* Decrypt the encrypted digest */ + EVP_PKEY_verify_recover_init(ctx); + bool isDecrypted = EVP_PKEY_verify_recover(ctx, decData, &decLen, encData, encLen) == 1; + EVP_PKEY_CTX_free(ctx); + + if (!isDecrypted) { + free(decData); + result->verify_flags = COUNTERSIGNATURE_VFY_CANT_DECRYPT_DIGEST; + goto end; + } + + /* compare the encrypted digest and calculated digest */ + bool isValid = false; #if OPENSSL_VERSION_NUMBER >= 0x3000000fL - size_t mdLen = EVP_MD_get_size(md); + size_t mdLen = EVP_MD_get_size(md); #else - size_t mdLen = EVP_MD_size(md); + size_t mdLen = EVP_MD_size(md); #endif - /* Sometimes signed data contains DER encoded DigestInfo structure which - * contains hash of authenticated attributes - * (39c9d136f026a9ad18fb9f41a64f76dd8418e8de625dce5d3a372bd242fc5edd) but - * other times it is just purely and I didn't find another way to distinguish - * it but only based on the length of data we get. Found mention of this in - * openssl mailing list: - * https://mta.openssl.org/pipermail/openssl-users/2015-September/002054.html - */ - if (mdLen == decLen) - { - isValid = !memcmp(calc_digest, decData, mdLen); - } - else - { - const uint8_t* data_ptr = decData; - DigestInfo* digest_info = d2i_DigestInfo(NULL, &data_ptr, decLen); - if (digest_info) - { - isValid = !memcmp(digest_info->digest->data, calc_digest, mdLen); - DigestInfo_free(digest_info); - } - else - { - isValid = false; - } - } - free(decData); - - if (!isValid) - { - result->verify_flags = COUNTERSIGNATURE_VFY_INVALID; - goto end; - } - - /* Now check the countersignature message-digest that should correspond - * to Signatures encrypted digest it countersigns */ - calculate_digest(md, enc_digest->data, enc_digest->length, calc_digest); - - /* Check if calculated one matches the stored one */ - if (digestLen != mdLen || memcmp(calc_digest, digestData, mdLen) != 0) - { - result->verify_flags = COUNTERSIGNATURE_VFY_DOESNT_MATCH_SIGNATURE; - goto end; - } + /* Sometimes signed data contains DER encoded DigestInfo structure which contains hash of + * authenticated attributes (39c9d136f026a9ad18fb9f41a64f76dd8418e8de625dce5d3a372bd242fc5edd) + * but other times it is just purely and I didn't find another way to distinguish it but only + * based on the length of data we get. Found mention of this in openssl mailing list: + * https://mta.openssl.org/pipermail/openssl-users/2015-September/002054.html */ + if (mdLen == decLen) { + isValid = !memcmp(calc_digest, decData, mdLen); + } else { + const uint8_t* data_ptr = decData; + DigestInfo* digest_info = d2i_DigestInfo(NULL, &data_ptr, decLen); + if (digest_info) { + isValid = !memcmp(digest_info->digest->data, calc_digest, mdLen); + DigestInfo_free(digest_info); + } else { + isValid = false; + } + } + free(decData); + + if (!isValid) { + result->verify_flags = COUNTERSIGNATURE_VFY_INVALID; + goto end; + } + + /* Now check the countersignature message-digest that should correspond + * to Signatures encrypted digest it countersigns */ + calculate_digest(md, enc_digest->data, enc_digest->length, calc_digest); + + /* Check if calculated one matches the stored one */ + if (digestLen != mdLen || memcmp(calc_digest, digestData, mdLen) != 0) { + result->verify_flags = COUNTERSIGNATURE_VFY_DOESNT_MATCH_SIGNATURE; + goto end; + } end: - PKCS7_SIGNER_INFO_free(si); - return result; + PKCS7_SIGNER_INFO_free(si); + return result; } -Countersignature* ms_countersig_new( - const uint8_t* data, - long size, - ASN1_STRING* enc_digest) +TS_TST_INFO* IMPL_FUNC_NAME(get_ts_tst_info, pkcs7)(CountersignatureImpl* impl) { - Countersignature* result = (Countersignature*) calloc(1, sizeof(*result)); - if (!result) - return NULL; + assert(impl->type == CS_IMPL_PKCS7); - PKCS7* p7 = d2i_PKCS7(NULL, &data, size); - if (!p7) - { - result->verify_flags = COUNTERSIGNATURE_VFY_CANT_PARSE; - return result; - } + return PKCS7_to_TS_TST_INFO(impl->pkcs7); +} - TS_TST_INFO* ts = PKCS7_to_TS_TST_INFO(p7); - if (!ts) - { - result->verify_flags = COUNTERSIGNATURE_VFY_CANT_PARSE; - PKCS7_free(p7); - return result; - } +TS_TST_INFO* IMPL_FUNC_NAME(get_ts_tst_info, cms)(CountersignatureImpl* impl) +{ + assert(impl->type == CS_IMPL_CMS); - const ASN1_TIME* rawTime = TS_TST_INFO_get_time(ts); - if (!rawTime) - { - result->verify_flags = COUNTERSIGNATURE_VFY_TIME_MISSING; - TS_TST_INFO_free(ts); - PKCS7_free(p7); - return result; - } + const ASN1_OBJECT* content_type = CMS_get0_eContentType(impl->cms); + if (!content_type || OBJ_obj2nid(content_type) != NID_id_smime_ct_TSTInfo) { + return NULL; + } - result->sign_time = ASN1_TIME_to_time_t(rawTime); + ASN1_OCTET_STRING** content = CMS_get0_content(impl->cms); + if (!content || !*content) { + return NULL; + } + + const uint8_t* data = (*content)->data; + TS_TST_INFO* ts_tst_info = d2i_TS_TST_INFO(NULL, &data, (*content)->length); + if (!ts_tst_info) { + return NULL; + } - STACK_OF(X509)* sigs = PKCS7_get0_signers(p7, p7->d.sign->cert, 0); - X509* signCert = sk_X509_value(sigs, 0); - if (!signCert) - { - result->verify_flags = COUNTERSIGNATURE_VFY_NO_SIGNER_CERT; - goto end; - } + return ts_tst_info; +} + +STACK_OF(X509) * IMPL_FUNC_NAME(get_signers, pkcs7)(CountersignatureImpl* impl) +{ + assert(impl->type == CS_IMPL_PKCS7); + + return PKCS7_get0_signers(impl->pkcs7, impl->pkcs7->d.sign->cert, 0); +} + +STACK_OF(X509) * IMPL_FUNC_NAME(get_signers, cms)(CountersignatureImpl* impl) +{ + assert(impl->type == CS_IMPL_CMS); + + STACK_OF(CMS_SignerInfo)* signer_infos = CMS_get0_SignerInfos(impl->cms); + if (!signer_infos) { + return NULL; + } - result->chain = parse_signer_chain(signCert, p7->d.sign->cert); + // Use our func points to cache the certs and don't create another copy + STACK_OF(X509)* certs = impl->funcs->get_certs(impl); + + int si_count = sk_CMS_SignerInfo_num(signer_infos); + int cert_count = certs ? sk_X509_num(certs) : 0; + STACK_OF(X509)* result = sk_X509_new_null(); + + // PKCS7_get0_signers() lets us specify the certificate array and looks up signer certificate + // there With CMS_ContentInfo, we don't have direct access to signer certificate, just all the + // certificates The only thing we can do is to go through all signer infos and find those which + // match some certificate in all certificates. It essentially simulates what + // PKCS7_get0_signers() does. + for (int i = 0; i < si_count; ++i) { + CMS_SignerInfo* si = sk_CMS_SignerInfo_value(signer_infos, i); + if (!si) { + continue; + } + + if (certs) { + for (int j = 0; j < cert_count; ++j) { + X509* cert = sk_X509_value(certs, j); + if (!cert) { + continue; + } + + if (CMS_SignerInfo_cert_cmp(si, cert) == 0) { + if (!sk_X509_push(result, cert)) { + return NULL; + } + } + } + } + } - /* Imprint == digest */ - TS_MSG_IMPRINT* imprint = TS_TST_INFO_get_msg_imprint(ts); - if (!imprint) - { - result->verify_flags = COUNTERSIGNATURE_VFY_DIGEST_MISSING; - goto end; - } + return result; +} - X509_ALGOR* digestAlg = TS_MSG_IMPRINT_get_algo(imprint); - int digestnid = OBJ_obj2nid(digestAlg->algorithm); - result->digest_alg = strdup(OBJ_nid2ln(digestnid)); +STACK_OF(X509) * IMPL_FUNC_NAME(get_certs, pkcs7)(CountersignatureImpl* impl) +{ + assert(impl->type == CS_IMPL_PKCS7); - ASN1_STRING* rawDigest = TS_MSG_IMPRINT_get_msg(imprint); + return impl->pkcs7->d.sign->cert; +} - int digestLen = rawDigest->length; - uint8_t* digestData = rawDigest->data; +STACK_OF(X509) * IMPL_FUNC_NAME(get_certs, cms)(CountersignatureImpl* impl) +{ + assert(impl->type == CS_IMPL_CMS); - byte_array_init(&result->digest, digestData, digestLen); + if (impl->_certs) { + return impl->_certs; + } - if (!digestLen) - { - result->verify_flags = COUNTERSIGNATURE_VFY_DIGEST_MISSING; - goto end; - } + impl->_certs = CMS_get1_certs(impl->cms); + return impl->_certs; +} - const EVP_MD* md = EVP_get_digestbynid(digestnid); - if (!md) - { - result->verify_flags = COUNTERSIGNATURE_VFY_UNKNOWN_ALGORITHM; - goto end; - } +int IMPL_FUNC_NAME(verify_digest, pkcs7)( + CountersignatureImpl* impl, uint8_t* digest, size_t digest_size) +{ + assert(impl->type == CS_IMPL_PKCS7); - uint8_t calc_digest[EVP_MAX_MD_SIZE]; - calculate_digest(md, enc_digest->data, enc_digest->length, calc_digest); + X509_STORE* store = X509_STORE_new(); + TS_VERIFY_CTX* ctx = TS_VERIFY_CTX_new(); + TS_VERIFY_CTX_init(ctx); + TS_VERIFY_CTX_set_flags(ctx, TS_VFY_VERSION | TS_VFY_IMPRINT); + TS_VERIFY_CTX_set_store(ctx, store); #if OPENSSL_VERSION_NUMBER >= 0x3000000fL - int mdLen = EVP_MD_get_size(md); + TS_VERIFY_CTX_set_certs(ctx, impl->funcs->get_certs(impl)); #else - int mdLen = EVP_MD_size(md); + TS_VERIFY_CTS_set_certs(ctx, impl->funcs->get_certs(impl)); #endif + TS_VERIFY_CTX_set_imprint(ctx, digest, digest_size); + + int result = TS_RESP_verify_token(ctx, impl->pkcs7); + + X509_STORE_free(store); + OPENSSL_free(ctx); + + return result; +} + +int IMPL_FUNC_NAME(verify_digest, cms)( + CountersignatureImpl* impl, uint8_t* digest, size_t digest_size) +{ + assert(impl->type == CS_IMPL_CMS); + + // This is essentially just reimplementation of TS_RESP_verify_token() from OpenSSL + TS_TST_INFO* ts_tst_info = impl->funcs->get_ts_tst_info(impl); + if (!ts_tst_info || TS_TST_INFO_get_version(ts_tst_info) != 1) { + if (ts_tst_info) + TS_TST_INFO_free(ts_tst_info); + return 0; + } - if (digestLen != mdLen || memcmp(calc_digest, digestData, mdLen) != 0) - { - result->verify_flags = COUNTERSIGNATURE_VFY_DOESNT_MATCH_SIGNATURE; - goto end; - } + TS_MSG_IMPRINT* ts_imprint = TS_TST_INFO_get_msg_imprint(ts_tst_info); + if (!ts_imprint) { + TS_TST_INFO_free(ts_tst_info); + return 0; + } + + ASN1_OCTET_STRING* ts_imprint_digest = TS_MSG_IMPRINT_get_msg(ts_imprint); + if (!ts_imprint_digest) { + TS_TST_INFO_free(ts_tst_info); + return 0; + } + + if (ts_imprint_digest->length != (int)digest_size || + memcmp(ts_imprint_digest->data, digest, digest_size) != 0) { + TS_TST_INFO_free(ts_tst_info); + return 0; + } + + TS_TST_INFO_free(ts_tst_info); + return 1; +} + +BIO* IMPL_FUNC_NAME(verify_signature_init, pkcs7)(CountersignatureImpl* impl) +{ + assert(impl->type == CS_IMPL_PKCS7); + + return PKCS7_dataInit(impl->pkcs7, NULL); +} + +BIO* IMPL_FUNC_NAME(verify_signature_init, cms)(CountersignatureImpl* impl) +{ + assert(impl->type == CS_IMPL_CMS); + + return CMS_dataInit(impl->cms, NULL); +} + +int IMPL_FUNC_NAME(verify_signature_finish, pkcs7)( + CountersignatureImpl* impl, BIO* bio, X509* signer) +{ + assert(impl->type == CS_IMPL_PKCS7); + + /* Verify signature with PKCS7_signatureVerify + because TS_RESP_verify_token would try to verify + chain and without trust anchors it always fails */ + PKCS7_SIGNER_INFO* si = sk_PKCS7_SIGNER_INFO_value(PKCS7_get_signer_info(impl->pkcs7), 0); + return PKCS7_signatureVerify(bio, impl->pkcs7, si, signer); +} + +int IMPL_FUNC_NAME(verify_signature_finish, cms)(CountersignatureImpl* impl, BIO* bio, X509* signer) +{ + assert(impl->type == CS_IMPL_CMS); + + (void)signer; + CMS_SignerInfo* si = sk_CMS_SignerInfo_value(CMS_get0_SignerInfos(impl->cms), 0); + return CMS_SignerInfo_verify_content(si, bio); +} + +CountersignatureImpl* ms_countersig_impl_new(const uint8_t* data, long size) +{ + const uint8_t* d = data; + PKCS7* p7 = d2i_PKCS7(NULL, &d, size); + if (p7 && PKCS7_type_is_signed(p7) && p7->d.sign) { + CountersignatureImpl* result = + (CountersignatureImpl*)calloc(1, sizeof(CountersignatureImpl)); + result->type = CS_IMPL_PKCS7; + result->funcs = &FUNC_ARRAY_NAME_FOR_IMPL(pkcs7); + result->pkcs7 = p7; + return result; + } + + d = data; + CMS_ContentInfo* cms = d2i_CMS_ContentInfo(NULL, &d, size); + if (cms) { + CountersignatureImpl* result = + (CountersignatureImpl*)calloc(1, sizeof(CountersignatureImpl)); + result->type = CS_IMPL_CMS; + result->funcs = &FUNC_ARRAY_NAME_FOR_IMPL(cms); + result->cms = cms; + return result; + } + + return NULL; +} + +void ms_countersig_impl_free(CountersignatureImpl* impl) +{ + switch (impl->type) { + case CS_IMPL_PKCS7: + PKCS7_free(impl->pkcs7); + break; + case CS_IMPL_CMS: + if (impl->_certs) { + sk_X509_pop_free(impl->_certs, X509_free); + } + CMS_ContentInfo_free(impl->cms); + break; + } + + free(impl); +} + +Countersignature* ms_countersig_new(const uint8_t* data, long size, ASN1_STRING* enc_digest) +{ + Countersignature* result = (Countersignature*)calloc(1, sizeof(*result)); + if (!result) + return NULL; + + CountersignatureImpl* impl = ms_countersig_impl_new(data, size); + if (!impl) { + result->verify_flags = COUNTERSIGNATURE_VFY_CANT_PARSE; + return result; + } + + TS_TST_INFO* ts = impl->funcs->get_ts_tst_info(impl); + if (!ts) { + result->verify_flags = COUNTERSIGNATURE_VFY_CANT_PARSE; + ms_countersig_impl_free(impl); + return result; + } + + const ASN1_TIME* rawTime = TS_TST_INFO_get_time(ts); + if (!rawTime) { + result->verify_flags = COUNTERSIGNATURE_VFY_TIME_MISSING; + TS_TST_INFO_free(ts); + ms_countersig_impl_free(impl); + return result; + } + + result->sign_time = ASN1_TIME_to_int64_t(rawTime); + + STACK_OF(X509)* sigs = impl->funcs->get_signers(impl); + X509* signCert = sk_X509_value(sigs, 0); + if (!signCert) { + result->verify_flags = COUNTERSIGNATURE_VFY_NO_SIGNER_CERT; + goto end; + } + + STACK_OF(X509)* certs = impl->funcs->get_certs(impl); + result->chain = parse_signer_chain(signCert, certs); + + /* Imprint == digest */ + TS_MSG_IMPRINT* imprint = TS_TST_INFO_get_msg_imprint(ts); + if (!imprint) { + result->verify_flags = COUNTERSIGNATURE_VFY_DIGEST_MISSING; + goto end; + } + + X509_ALGOR* digestAlg = TS_MSG_IMPRINT_get_algo(imprint); + int digestnid = OBJ_obj2nid(digestAlg->algorithm); + result->digest_alg = strdup(OBJ_nid2ln(digestnid)); - TS_VERIFY_CTX* ctx = TS_VERIFY_CTX_new(); - X509_STORE* store = X509_STORE_new(); - TS_VERIFY_CTX_init(ctx); + ASN1_STRING* rawDigest = TS_MSG_IMPRINT_get_msg(imprint); + + int digestLen = rawDigest->length; + uint8_t* digestData = rawDigest->data; + + byte_array_init(&result->digest, digestData, digestLen); + + if (!digestLen) { + result->verify_flags = COUNTERSIGNATURE_VFY_DIGEST_MISSING; + goto end; + } + + const EVP_MD* md = EVP_get_digestbynid(digestnid); + if (!md) { + result->verify_flags = COUNTERSIGNATURE_VFY_UNKNOWN_ALGORITHM; + goto end; + } + + uint8_t calc_digest[EVP_MAX_MD_SIZE]; + calculate_digest(md, enc_digest->data, enc_digest->length, calc_digest); - TS_VERIFY_CTX_set_flags(ctx, TS_VFY_VERSION | TS_VFY_IMPRINT); - TS_VERIFY_CTX_set_store(ctx, store); #if OPENSSL_VERSION_NUMBER >= 0x3000000fL - TS_VERIFY_CTX_set_certs(ctx, p7->d.sign->cert); + int mdLen = EVP_MD_get_size(md); #else - TS_VERIFY_CTS_set_certs(ctx, p7->d.sign->cert); + int mdLen = EVP_MD_size(md); #endif - TS_VERIFY_CTX_set_imprint(ctx, calc_digest, mdLen); - - bool isValid = TS_RESP_verify_token(ctx, p7) == 1; - - X509_STORE_free(store); - OPENSSL_free(ctx); - if (!isValid) - { - result->verify_flags = COUNTERSIGNATURE_VFY_INVALID; - goto end; - } + if (digestLen != mdLen || memcmp(calc_digest, digestData, mdLen) != 0) { + result->verify_flags = COUNTERSIGNATURE_VFY_DOESNT_MATCH_SIGNATURE; + goto end; + } - /* Verify signature with PKCS7_signatureVerify - because TS_RESP_verify_token would try to verify - chain and without trust anchors it always fails */ - BIO* p7bio = PKCS7_dataInit(p7, NULL); + bool isValid = impl->funcs->verify_digest(impl, calc_digest, mdLen) == 1; + if (!isValid) { + result->verify_flags = COUNTERSIGNATURE_VFY_INVALID; + goto end; + } - char buf[4096]; - /* We now have to 'read' from p7bio to calculate digests etc. */ - while (BIO_read(p7bio, buf, sizeof(buf)) > 0) continue; + BIO* bio = impl->funcs->verify_signature_init(impl); - PKCS7_SIGNER_INFO* si = sk_PKCS7_SIGNER_INFO_value( - PKCS7_get_signer_info(p7), 0); + char buf[4096]; + /* We now have to 'read' from bio to calculate digests etc. */ + while (BIO_read(bio, buf, sizeof(buf)) > 0) + continue; - isValid = PKCS7_signatureVerify(p7bio, p7, si, signCert) == 1; + isValid = impl->funcs->verify_signature_finish(impl, bio, signCert) == 1; - BIO_free_all(p7bio); + BIO_free_all(bio); - if (!isValid) - result->verify_flags = COUNTERSIGNATURE_VFY_INVALID; + if (!isValid) + result->verify_flags = COUNTERSIGNATURE_VFY_INVALID; end: - sk_X509_free(sigs); - PKCS7_free(p7); - TS_TST_INFO_free(ts); - return result; + sk_X509_free(sigs); + TS_TST_INFO_free(ts); + ms_countersig_impl_free(impl); + return result; } -int countersignature_array_insert( - CountersignatureArray* arr, - Countersignature* sig) +int countersignature_array_insert(CountersignatureArray* arr, Countersignature* sig) { - Countersignature** tmp = (Countersignature**) realloc( - arr->counters, (arr->count + 1) * sizeof(Countersignature*)); - if (!tmp) - return 1; + Countersignature** tmp = + (Countersignature**)realloc(arr->counters, (arr->count + 1) * sizeof(Countersignature*)); + if (!tmp) + return 1; - arr->counters = tmp; - arr->counters[arr->count] = sig; - arr->count++; + arr->counters = tmp; + arr->counters[arr->count] = sig; + arr->count++; - return 0; + return 0; } -int countersignature_array_move( - CountersignatureArray* dst, - CountersignatureArray* src) +int countersignature_array_move(CountersignatureArray* dst, CountersignatureArray* src) { - size_t newCount = dst->count + src->count; + size_t newCount = dst->count + src->count; - Countersignature** tmp = (Countersignature**) realloc( - dst->counters, newCount * sizeof(Countersignature*)); - if (!tmp) - return 1; + Countersignature** tmp = + (Countersignature**)realloc(dst->counters, newCount * sizeof(Countersignature*)); + if (!tmp) + return 1; - dst->counters = tmp; + dst->counters = tmp; - for (size_t i = 0; i < src->count; ++i) - dst->counters[i + dst->count] = src->counters[i]; + for (size_t i = 0; i < src->count; ++i) + dst->counters[i + dst->count] = src->counters[i]; - dst->count = newCount; + dst->count = newCount; - free(src->counters); - src->counters = NULL; - src->count = 0; + free(src->counters); + src->counters = NULL; + src->count = 0; - return 0; + return 0; } void countersignature_free(Countersignature* sig) { - if (sig) - { - free(sig->digest_alg); - free(sig->digest.data); - certificate_array_free(sig->chain); - free(sig); - } + if (sig) { + free(sig->digest_alg); + free(sig->digest.data); + certificate_array_free(sig->chain); + free(sig); + } } void countersignature_array_free(CountersignatureArray* arr) { - if (arr) - { - for (size_t i = 0; i < arr->count; ++i) - { - countersignature_free(arr->counters[i]); - } - free(arr->counters); - free(arr); - } + if (arr) { + for (size_t i = 0; i < arr->count; ++i) { + countersignature_free(arr->counters[i]); + } + free(arr->counters); + free(arr); + } } diff --git a/src/libyara/modules/pe/authenticode-parser/countersignature.h b/src/libyara/modules/pe/authenticode-parser/countersignature.h index b14012c..294ffed 100644 --- a/src/libyara/modules/pe/authenticode-parser/countersignature.h +++ b/src/libyara/modules/pe/authenticode-parser/countersignature.h @@ -22,40 +22,29 @@ SOFTWARE. #ifndef AUTHENTICODE_PARSER_COUNTERSIGNATURE_H #define AUTHENTICODE_PARSER_COUNTERSIGNATURE_H +#include "certificate.h" +#include "helper.h" #include #include #include -#include "certificate.h" -#include "helper.h" #include #include #ifdef __cplusplus -extern "C" -{ +extern "C" { #endif - Countersignature* pkcs9_countersig_new( - const uint8_t* data, - long size, - STACK_OF(X509) * certs, - ASN1_STRING* enc_digest); - Countersignature* ms_countersig_new( - const uint8_t* data, - long size, - ASN1_STRING* enc_digest); +Countersignature* pkcs9_countersig_new( + const uint8_t* data, long size, STACK_OF(X509) * certs, ASN1_STRING* enc_digest); +Countersignature* ms_countersig_new(const uint8_t* data, long size, ASN1_STRING* enc_digest); - int countersignature_array_insert( - CountersignatureArray* arr, - Countersignature* sig); - /* Moves all countersignatures of src and inserts them into dst */ - int countersignature_array_move( - CountersignatureArray* dst, - CountersignatureArray* src); +int countersignature_array_insert(CountersignatureArray* arr, Countersignature* sig); +/* Moves all countersignatures of src and inserts them into dst */ +int countersignature_array_move(CountersignatureArray* dst, CountersignatureArray* src); - void countersignature_free(Countersignature* sig); - void countersignature_array_free(CountersignatureArray* arr); +void countersignature_free(Countersignature* sig); +void countersignature_array_free(CountersignatureArray* arr); #ifdef __cplusplus } diff --git a/src/libyara/modules/pe/authenticode-parser/helper.c b/src/libyara/modules/pe/authenticode-parser/helper.c index 6845c64..ecdf7ca 100644 --- a/src/libyara/modules/pe/authenticode-parser/helper.c +++ b/src/libyara/modules/pe/authenticode-parser/helper.c @@ -30,61 +30,55 @@ SOFTWARE. uint16_t bswap16(uint16_t d) { - return (d << 8) | (d >> 8); + return (d << 8) | (d >> 8); } uint32_t bswap32(uint32_t d) { - return (((d) &0xff000000) >> 24) | (((d) &0x00ff0000) >> 8) | - (((d) &0x0000ff00) << 8) | (((d) &0x000000ff) << 24); + return (((d)&0xff000000) >> 24) | (((d)&0x00ff0000) >> 8) | (((d)&0x0000ff00) << 8) | + (((d)&0x000000ff) << 24); } -int calculate_digest( - const EVP_MD* md, - const uint8_t* data, - size_t len, - uint8_t* digest) +int calculate_digest(const EVP_MD* md, const uint8_t* data, size_t len, uint8_t* digest) { - unsigned int outLen = 0; + unsigned int outLen = 0; - EVP_MD_CTX* mdCtx = EVP_MD_CTX_new(); - if (!mdCtx) - goto end; + EVP_MD_CTX* mdCtx = EVP_MD_CTX_new(); + if (!mdCtx) + goto end; - if (!EVP_DigestInit_ex(mdCtx, md, NULL) || - !EVP_DigestUpdate(mdCtx, data, len) || - !EVP_DigestFinal_ex(mdCtx, digest, &outLen)) - goto end; + if (!EVP_DigestInit_ex(mdCtx, md, NULL) || !EVP_DigestUpdate(mdCtx, data, len) || + !EVP_DigestFinal_ex(mdCtx, digest, &outLen)) + goto end; end: - EVP_MD_CTX_free(mdCtx); - return (int) outLen; + EVP_MD_CTX_free(mdCtx); + return (int)outLen; } int byte_array_init(ByteArray* arr, const uint8_t* data, int len) { - if (len == 0) - { - arr->data = NULL; - arr->len = 0; + if (len == 0) { + arr->data = NULL; + arr->len = 0; + return 0; + } + + arr->data = (uint8_t*)malloc(len); + if (!arr->data) + return -1; + + arr->len = len; + memcpy(arr->data, data, len); return 0; - } - - arr->data = (uint8_t*) malloc(len); - if (!arr->data) - return -1; - - arr->len = len; - memcpy(arr->data, data, len); - return 0; } -time_t ASN1_TIME_to_time_t(const ASN1_TIME* time) +int64_t ASN1_TIME_to_int64_t(const ASN1_TIME* time) { - struct tm t = {0}; - if (!time) - return timegm(&t); + struct tm t = {0}; + if (!time) + return timegm(&t); - ASN1_TIME_to_tm(time, &t); - return timegm(&t); + ASN1_TIME_to_tm(time, &t); + return timegm(&t); } diff --git a/src/libyara/modules/pe/authenticode-parser/helper.h b/src/libyara/modules/pe/authenticode-parser/helper.h index 130047a..dc1261d 100644 --- a/src/libyara/modules/pe/authenticode-parser/helper.h +++ b/src/libyara/modules/pe/authenticode-parser/helper.h @@ -34,13 +34,12 @@ SOFTWARE. #endif #ifdef __cplusplus -extern "C" -{ +extern "C" { #endif - /* Endianity related functions for PE reading */ - uint16_t bswap16(uint16_t d); - uint32_t bswap32(uint32_t d); +/* Endianity related functions for PE reading */ +uint16_t bswap16(uint16_t d); +uint32_t bswap32(uint32_t d); #if defined(WORDS_BIGENDIAN) #define letoh16(x) bswap16(x) @@ -54,17 +53,13 @@ extern "C" #define betoh32(x) bswap32(x) #endif - /* Calculates digest md of data, return bytes written to digest or 0 on error - * Maximum of EVP_MAX_MD_SIZE will be written to digest */ - int calculate_digest( - const EVP_MD* md, - const uint8_t* data, - size_t len, - uint8_t* digest); - /* Copies data of length len into already existing arr */ - int byte_array_init(ByteArray* arr, const uint8_t* data, int len); - /* Converts ASN1_TIME string time into a unix timestamp */ - time_t ASN1_TIME_to_time_t(const ASN1_TIME* time); +/* Calculates digest md of data, return bytes written to digest or 0 on error + * Maximum of EVP_MAX_MD_SIZE will be written to digest */ +int calculate_digest(const EVP_MD* md, const uint8_t* data, size_t len, uint8_t* digest); +/* Copies data of length len into already existing arr */ +int byte_array_init(ByteArray* arr, const uint8_t* data, int len); +/* Converts ASN1_TIME string time into a unix timestamp */ +int64_t ASN1_TIME_to_int64_t(const ASN1_TIME* time); #ifdef __cplusplus } diff --git a/src/libyara/modules/pe/authenticode-parser/structs.c b/src/libyara/modules/pe/authenticode-parser/structs.c index e9fcfa2..4eee46b 100644 --- a/src/libyara/modules/pe/authenticode-parser/structs.c +++ b/src/libyara/modules/pe/authenticode-parser/structs.c @@ -22,68 +22,50 @@ SOFTWARE. #include "structs.h" ASN1_CHOICE(SpcString) = { - ASN1_IMP_OPT(SpcString, value.unicode, ASN1_BMPSTRING, 0), - ASN1_IMP_OPT( - SpcString, - value.ascii, - ASN1_IA5STRING, - 1)} ASN1_CHOICE_END(SpcString); + ASN1_IMP_OPT(SpcString, value.unicode, ASN1_BMPSTRING, 0), + ASN1_IMP_OPT(SpcString, value.ascii, ASN1_IA5STRING, 1) +} ASN1_CHOICE_END(SpcString) ASN1_SEQUENCE(SpcSerializedObject) = { - ASN1_SIMPLE(SpcSerializedObject, classId, ASN1_OCTET_STRING), - ASN1_SIMPLE( - SpcSerializedObject, - serializedData, - ASN1_OCTET_STRING)} ASN1_SEQUENCE_END(SpcSerializedObject); + ASN1_SIMPLE(SpcSerializedObject, classId, ASN1_OCTET_STRING), + ASN1_SIMPLE(SpcSerializedObject, serializedData, ASN1_OCTET_STRING) +} ASN1_SEQUENCE_END(SpcSerializedObject) ASN1_CHOICE(SpcLink) = { - ASN1_IMP_OPT(SpcLink, value.url, ASN1_IA5STRING, 0), - ASN1_IMP_OPT(SpcLink, value.moniker, SpcSerializedObject, 1), - ASN1_EXP_OPT(SpcLink, value.file, SpcString, 2)} ASN1_CHOICE_END(SpcLink); + ASN1_IMP_OPT(SpcLink, value.url, ASN1_IA5STRING, 0), + ASN1_IMP_OPT(SpcLink, value.moniker, SpcSerializedObject, 1), + ASN1_EXP_OPT(SpcLink, value.file, SpcString, 2) +} ASN1_CHOICE_END(SpcLink) ASN1_SEQUENCE(SpcAttributeTypeAndOptionalValue) = { - ASN1_SIMPLE(SpcAttributeTypeAndOptionalValue, type, ASN1_OBJECT), - ASN1_OPT( - SpcAttributeTypeAndOptionalValue, - value, - ASN1_ANY)} ASN1_SEQUENCE_END(SpcAttributeTypeAndOptionalValue); + ASN1_SIMPLE(SpcAttributeTypeAndOptionalValue, type, ASN1_OBJECT), + ASN1_OPT(SpcAttributeTypeAndOptionalValue, value, ASN1_ANY) +} ASN1_SEQUENCE_END(SpcAttributeTypeAndOptionalValue) ASN1_SEQUENCE(SpcPeImageData) = { - ASN1_SIMPLE(SpcPeImageData, flags, ASN1_BIT_STRING), - ASN1_EXP_OPT( - SpcPeImageData, - file, - SpcLink, - 0)} ASN1_SEQUENCE_END(SpcPeImageData); + ASN1_SIMPLE(SpcPeImageData, flags, ASN1_BIT_STRING), + ASN1_EXP_OPT(SpcPeImageData, file, SpcLink, 0) +} ASN1_SEQUENCE_END(SpcPeImageData) ASN1_SEQUENCE(AlgorithmIdentifier) = { - ASN1_SIMPLE(AlgorithmIdentifier, algorithm, ASN1_OBJECT), - ASN1_OPT( - AlgorithmIdentifier, - parameters, - ASN1_ANY)} ASN1_SEQUENCE_END(AlgorithmIdentifier); + ASN1_SIMPLE(AlgorithmIdentifier, algorithm, ASN1_OBJECT), + ASN1_OPT(AlgorithmIdentifier, parameters, ASN1_ANY) +} ASN1_SEQUENCE_END(AlgorithmIdentifier) ASN1_SEQUENCE(DigestInfo) = { - ASN1_SIMPLE(DigestInfo, digestAlgorithm, AlgorithmIdentifier), - ASN1_SIMPLE( - DigestInfo, - digest, - ASN1_OCTET_STRING)} ASN1_SEQUENCE_END(DigestInfo); + ASN1_SIMPLE(DigestInfo, digestAlgorithm, AlgorithmIdentifier), + ASN1_SIMPLE(DigestInfo, digest, ASN1_OCTET_STRING) +} ASN1_SEQUENCE_END(DigestInfo) ASN1_SEQUENCE(SpcIndirectDataContent) = { - ASN1_SIMPLE(SpcIndirectDataContent, data, SpcAttributeTypeAndOptionalValue), - ASN1_SIMPLE( - SpcIndirectDataContent, - messageDigest, - DigestInfo)} ASN1_SEQUENCE_END(SpcIndirectDataContent); + ASN1_SIMPLE(SpcIndirectDataContent, data, SpcAttributeTypeAndOptionalValue), + ASN1_SIMPLE(SpcIndirectDataContent, messageDigest, DigestInfo) +} ASN1_SEQUENCE_END(SpcIndirectDataContent) ASN1_SEQUENCE(SpcSpOpusInfo) = { - ASN1_EXP_OPT(SpcSpOpusInfo, programName, SpcString, 0), - ASN1_EXP_OPT( - SpcSpOpusInfo, - moreInfo, - SpcLink, - 1)} ASN1_SEQUENCE_END(SpcSpOpusInfo); + ASN1_EXP_OPT(SpcSpOpusInfo, programName, SpcString, 0), + ASN1_EXP_OPT(SpcSpOpusInfo, moreInfo, SpcLink, 1) +} ASN1_SEQUENCE_END(SpcSpOpusInfo) IMPLEMENT_ASN1_FUNCTIONS(SpcString) IMPLEMENT_ASN1_FUNCTIONS(SpcSerializedObject) diff --git a/src/libyara/modules/pe/authenticode-parser/structs.h b/src/libyara/modules/pe/authenticode-parser/structs.h index 7a38d9b..1f90db6 100644 --- a/src/libyara/modules/pe/authenticode-parser/structs.h +++ b/src/libyara/modules/pe/authenticode-parser/structs.h @@ -28,8 +28,7 @@ SOFTWARE. #include #ifdef __cplusplus -extern "C" -{ +extern "C" { #endif #define NID_spc_info "1.3.6.1.4.1.311.2.1.12" @@ -37,85 +36,73 @@ extern "C" #define NID_spc_nested_signature "1.3.6.1.4.1.311.2.4.1" #define NID_spc_indirect_data "1.3.6.1.4.1.311.2.1.4" - typedef struct - { +typedef struct { int type; - union - { - ASN1_BMPSTRING *unicode; - ASN1_IA5STRING *ascii; + union { + ASN1_BMPSTRING *unicode; + ASN1_IA5STRING *ascii; } value; - } SpcString; +} SpcString; - typedef struct - { +typedef struct { ASN1_OCTET_STRING *classId; ASN1_OCTET_STRING *serializedData; - } SpcSerializedObject; +} SpcSerializedObject; - typedef struct - { +typedef struct { int type; - union - { - ASN1_IA5STRING *url; - SpcSerializedObject *moniker; - SpcString *file; + union { + ASN1_IA5STRING *url; + SpcSerializedObject *moniker; + SpcString *file; } value; - } SpcLink; +} SpcLink; - typedef struct - { +typedef struct { ASN1_OBJECT *type; ASN1_TYPE *value; - } SpcAttributeTypeAndOptionalValue; +} SpcAttributeTypeAndOptionalValue; - typedef struct - { +typedef struct { ASN1_BIT_STRING *flags; SpcLink *file; - } SpcPeImageData; +} SpcPeImageData; - typedef struct - { +typedef struct { ASN1_OBJECT *algorithm; ASN1_TYPE *parameters; - } AlgorithmIdentifier; +} AlgorithmIdentifier; - typedef struct - { +typedef struct { AlgorithmIdentifier *digestAlgorithm; ASN1_OCTET_STRING *digest; - } DigestInfo; +} DigestInfo; - typedef struct - { +typedef struct { SpcAttributeTypeAndOptionalValue *data; DigestInfo *messageDigest; - } SpcIndirectDataContent; +} SpcIndirectDataContent; - typedef struct - { +typedef struct { ASN1_OBJECT *contentType; SpcIndirectDataContent *content; - } SpcContentInfo; +} SpcContentInfo; - typedef struct - { +typedef struct { SpcString *programName; SpcLink *moreInfo; - } SpcSpOpusInfo; - - DECLARE_ASN1_FUNCTIONS(SpcString) - DECLARE_ASN1_FUNCTIONS(SpcSerializedObject) - DECLARE_ASN1_FUNCTIONS(SpcLink) - DECLARE_ASN1_FUNCTIONS(SpcAttributeTypeAndOptionalValue) - DECLARE_ASN1_FUNCTIONS(SpcPeImageData) - DECLARE_ASN1_FUNCTIONS(AlgorithmIdentifier) - DECLARE_ASN1_FUNCTIONS(DigestInfo) - DECLARE_ASN1_FUNCTIONS(SpcIndirectDataContent) - DECLARE_ASN1_FUNCTIONS(SpcSpOpusInfo) - DECLARE_ASN1_FUNCTIONS(SpcContentInfo) +} SpcSpOpusInfo; + +DECLARE_ASN1_FUNCTIONS(SpcString) +DECLARE_ASN1_FUNCTIONS(SpcSerializedObject) +DECLARE_ASN1_FUNCTIONS(SpcLink) +DECLARE_ASN1_FUNCTIONS(SpcAttributeTypeAndOptionalValue) +DECLARE_ASN1_FUNCTIONS(SpcPeImageData) +DECLARE_ASN1_FUNCTIONS(AlgorithmIdentifier) +DECLARE_ASN1_FUNCTIONS(DigestInfo) +DECLARE_ASN1_FUNCTIONS(SpcIndirectDataContent) +DECLARE_ASN1_FUNCTIONS(SpcSpOpusInfo) +DECLARE_ASN1_FUNCTIONS(SpcContentInfo) #ifdef __cplusplus } diff --git a/src/libyara/modules/pe/pe.c b/src/libyara/modules/pe/pe.c index 36b289e..fe323b0 100644 --- a/src/libyara/modules/pe/pe.c +++ b/src/libyara/modules/pe/pe.c @@ -83,10 +83,11 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define RESOURCE_ITERATOR_FINISHED 0 #define RESOURCE_ITERATOR_ABORTED 1 -#define MAX_PE_IMPORTS 16384 -#define MAX_PE_EXPORTS 8192 -#define MAX_EXPORT_NAME_LENGTH 512 -#define MAX_RESOURCES 65536 +#define MAX_PE_IMPORTS 16384 +#define MAX_PE_EXPORTS 16384 +#define MAX_EXPORT_NAME_LENGTH 512 +#define MAX_IMPORT_DLL_NAME_LENGTH 256 +#define MAX_RESOURCES 65536 #define IS_RESOURCE_SUBDIRECTORY(entry) \ (yr_le32toh((entry)->OffsetToData) & 0x80000000) @@ -309,9 +310,6 @@ static void pe_parse_debug_directory(PE* pe) if (yr_le32toh(data_dir->Size) == 0) return; - if (yr_le32toh(data_dir->Size) % sizeof(IMAGE_DEBUG_DIRECTORY) != 0) - return; - if (yr_le32toh(data_dir->VirtualAddress) == 0) return; @@ -326,8 +324,8 @@ static void pe_parse_debug_directory(PE* pe) { int64_t pcv_hdr_offset = 0; - debug_dir = - (PIMAGE_DEBUG_DIRECTORY) (pe->data + debug_dir_offset + i * sizeof(IMAGE_DEBUG_DIRECTORY)); + debug_dir = (PIMAGE_DEBUG_DIRECTORY) (pe->data + debug_dir_offset + + i * sizeof(IMAGE_DEBUG_DIRECTORY)); if (!struct_fits_in_pe(pe, debug_dir, IMAGE_DEBUG_DIRECTORY)) break; @@ -409,7 +407,8 @@ static const PIMAGE_RESOURCE_DIR_STRING_U parse_resource_name( if (yr_le32toh(entry->Name) & 0x80000000) { const PIMAGE_RESOURCE_DIR_STRING_U pNameString = - (PIMAGE_RESOURCE_DIR_STRING_U) (rsrc_data + (yr_le32toh(entry->Name) & 0x7FFFFFFF)); + (PIMAGE_RESOURCE_DIR_STRING_U) (rsrc_data + + (yr_le32toh(entry->Name) & 0x7FFFFFFF)); // A resource directory string is 2 bytes for the length and then a variable // length Unicode string. Make sure we have at least 2 bytes. @@ -419,7 +418,9 @@ static const PIMAGE_RESOURCE_DIR_STRING_U parse_resource_name( // Move past the length and make sure we have enough bytes for the string. if (!fits_in_pe( - pe, pNameString, sizeof(uint16_t) + yr_le16toh(pNameString->Length) * 2)) + pe, + pNameString, + sizeof(uint16_t) + yr_le16toh(pNameString->Length) * 2)) return NULL; return pNameString; @@ -806,6 +807,25 @@ static int pe_collect_resources( return RESOURCE_CALLBACK_CONTINUE; } +// Function names should have only lowercase, uppercase, digits and a small +// subset of special characters. This is to match behavior of pefile. See +// https://github.com/erocarrera/pefile/blob/593d094e35198dad92aaf040bef17eb800c8a373/pefile.py#L2326-L2348 +static int valid_function_name(char* name) +{ + if (!strcmp(name, "")) + return 0; + + size_t i = 0; + for (char c = name[i]; c != '\x00'; c = name[++i]) + { + if (!(c >= 'a' && c <= 'z') && !(c >= 'A' && c <= 'Z') && + !(c >= '0' && c <= '9') && c != '.' && c != '_' && c != '?' && + c != '@' && c != '$' && c != '(' && c != ')' && c != '<' && c != '>') + return 0; + } + return 1; +} + static IMPORT_FUNCTION* pe_parse_import_descriptor( PE* pe, PIMAGE_IMPORT_DESCRIPTOR import_descriptor, @@ -814,6 +834,11 @@ static IMPORT_FUNCTION* pe_parse_import_descriptor( { IMPORT_FUNCTION* head = NULL; IMPORT_FUNCTION* tail = NULL; + // This is tracked separately from num_function_imports because that is the + // number of successfully parsed imports, while this is the number of imports + // attempted to be parsed. This allows us to stop parsing on too many imports + // while still accurately recording the number of successfully parsed imports. + int parsed_imports = 0; int64_t offset = pe_rva_to_offset( pe, yr_le32toh(import_descriptor->OriginalFirstThunk)); @@ -834,13 +859,15 @@ static IMPORT_FUNCTION* pe_parse_import_descriptor( while (struct_fits_in_pe(pe, thunks64, IMAGE_THUNK_DATA64) && yr_le64toh(thunks64->u1.Ordinal) != 0 && - *num_function_imports < MAX_PE_IMPORTS) + parsed_imports < MAX_PE_IMPORTS) { char* name = NULL; uint16_t ordinal = 0; uint8_t has_ordinal = 0; uint64_t rva_address = 0; + parsed_imports++; + if (!(yr_le64toh(thunks64->u1.Ordinal) & IMAGE_ORDINAL_FLAG64)) { // If imported by name @@ -848,8 +875,8 @@ static IMPORT_FUNCTION* pe_parse_import_descriptor( if (offset >= 0) { - PIMAGE_IMPORT_BY_NAME import = - (PIMAGE_IMPORT_BY_NAME) (pe->data + offset); + PIMAGE_IMPORT_BY_NAME import = (PIMAGE_IMPORT_BY_NAME) (pe->data + + offset); if (struct_fits_in_pe(pe, import, IMAGE_IMPORT_BY_NAME)) { @@ -871,6 +898,14 @@ static IMPORT_FUNCTION* pe_parse_import_descriptor( rva_address = yr_le32toh(import_descriptor->FirstThunk) + (sizeof(uint64_t) * func_idx); + if (name != NULL && !valid_function_name(name)) + { + yr_free(name); + thunks64++; + func_idx++; + continue; + } + if (name != NULL || has_ordinal == 1) { IMPORT_FUNCTION* imported_func = (IMPORT_FUNCTION*) yr_calloc( @@ -879,25 +914,26 @@ static IMPORT_FUNCTION* pe_parse_import_descriptor( if (imported_func == NULL) { yr_free(name); - continue; } + else + { + imported_func->name = name; + imported_func->ordinal = ordinal; + imported_func->has_ordinal = has_ordinal; + imported_func->rva = rva_address; + imported_func->next = NULL; - imported_func->name = name; - imported_func->ordinal = ordinal; - imported_func->has_ordinal = has_ordinal; - imported_func->rva = rva_address; - imported_func->next = NULL; - - if (head == NULL) - head = imported_func; + if (head == NULL) + head = imported_func; - if (tail != NULL) - tail->next = imported_func; + if (tail != NULL) + tail->next = imported_func; - tail = imported_func; + tail = imported_func; + (*num_function_imports)++; + } } - (*num_function_imports)++; thunks64++; func_idx++; } @@ -916,6 +952,8 @@ static IMPORT_FUNCTION* pe_parse_import_descriptor( uint8_t has_ordinal = 0; uint32_t rva_address = 0; + parsed_imports++; + if (!(yr_le32toh(thunks32->u1.Ordinal) & IMAGE_ORDINAL_FLAG32)) { // If imported by name @@ -923,8 +961,8 @@ static IMPORT_FUNCTION* pe_parse_import_descriptor( if (offset >= 0) { - PIMAGE_IMPORT_BY_NAME import = - (PIMAGE_IMPORT_BY_NAME) (pe->data + offset); + PIMAGE_IMPORT_BY_NAME import = (PIMAGE_IMPORT_BY_NAME) (pe->data + + offset); if (struct_fits_in_pe(pe, import, IMAGE_IMPORT_BY_NAME)) { @@ -946,6 +984,14 @@ static IMPORT_FUNCTION* pe_parse_import_descriptor( rva_address = yr_le32toh(import_descriptor->FirstThunk) + (sizeof(uint32_t) * func_idx); + if (name != NULL && !valid_function_name(name)) + { + yr_free(name); + thunks32++; + func_idx++; + continue; + } + if (name != NULL || has_ordinal == 1) { IMPORT_FUNCTION* imported_func = (IMPORT_FUNCTION*) yr_calloc( @@ -954,25 +1000,26 @@ static IMPORT_FUNCTION* pe_parse_import_descriptor( if (imported_func == NULL) { yr_free(name); - continue; } + else + { + imported_func->name = name; + imported_func->ordinal = ordinal; + imported_func->has_ordinal = has_ordinal; + imported_func->rva = rva_address; + imported_func->next = NULL; - imported_func->name = name; - imported_func->ordinal = ordinal; - imported_func->has_ordinal = has_ordinal; - imported_func->rva = rva_address; - imported_func->next = NULL; - - if (head == NULL) - head = imported_func; + if (head == NULL) + head = imported_func; - if (tail != NULL) - tail->next = imported_func; + if (tail != NULL) + tail->next = imported_func; - tail = imported_func; + tail = imported_func; + (*num_function_imports)++; + } } - (*num_function_imports)++; thunks32++; func_idx++; } @@ -1072,6 +1119,7 @@ void pe_set_imports( static IMPORTED_DLL* pe_parse_imports(PE* pe) { int64_t offset; + int parsed_imports = 0; // Number of parsed DLLs int num_imports = 0; // Number of imported DLLs int num_function_imports = 0; // Total number of functions imported @@ -1101,8 +1149,10 @@ static IMPORTED_DLL* pe_parse_imports(PE* pe) imports = (PIMAGE_IMPORT_DESCRIPTOR) (pe->data + offset); while (struct_fits_in_pe(pe, imports, IMAGE_IMPORT_DESCRIPTOR) && - yr_le32toh(imports->Name) != 0 && num_imports < MAX_PE_IMPORTS) + yr_le32toh(imports->Name) != 0 && parsed_imports < MAX_PE_IMPORTS) { + parsed_imports++; + int64_t offset = pe_rva_to_offset(pe, yr_le32toh(imports->Name)); if (offset >= 0) @@ -1111,7 +1161,13 @@ static IMPORTED_DLL* pe_parse_imports(PE* pe) char* dll_name = (char*) (pe->data + offset); - if (!pe_valid_dll_name(dll_name, pe->data_size - (size_t) offset)) + if (!pe_valid_dll_name( + dll_name, + yr_min( + // DLL names longer than MAX_IMPORT_DLL_NAME_LENGTH + // are considered invalid. + pe->data_size - (size_t) offset, + MAX_IMPORT_DLL_NAME_LENGTH))) { imports++; continue; @@ -1127,7 +1183,6 @@ static IMPORTED_DLL* pe_parse_imports(PE* pe) if (functions != NULL) { imported_dll->name = yr_strdup(dll_name); - ; imported_dll->functions = functions; imported_dll->next = NULL; @@ -1138,6 +1193,7 @@ static IMPORTED_DLL* pe_parse_imports(PE* pe) tail->next = imported_dll; tail = imported_dll; + num_imports++; } else { @@ -1146,7 +1202,6 @@ static IMPORTED_DLL* pe_parse_imports(PE* pe) } } - num_imports++; imports++; } @@ -1604,6 +1659,12 @@ static void pe_parse_exports(PE* pe) yr_set_integer( ordinal_base + i, pe->object, "export_details[%i].ordinal", exp_sz); + yr_set_integer( + yr_le32toh(function_addrs[i]), + pe->object, + "export_details[%i].rva", + exp_sz); + // Don't check for a failure here since some packers make this an invalid // value. offset = pe_rva_to_offset(pe, yr_le32toh(function_addrs[i])); @@ -1696,19 +1757,17 @@ void _process_authenticode( if (!auth_array || !auth_array->count) return; - /* If any signature will be valid -> file is correctly signed */ bool signature_valid = false; for (size_t i = 0; i < auth_array->count; ++i) { const Authenticode* authenticode = auth_array->signatures[i]; + bool verified = authenticode->verify_flags == AUTHENTICODE_VFY_VALID; - signature_valid = authenticode->verify_flags == AUTHENTICODE_VFY_VALID - ? true - : false; + /* If any signature is valid -> file is correctly signed */ + signature_valid |= verified; - yr_set_integer( - signature_valid, pe->object, "signatures[%i].verified", *sig_count); + yr_set_integer(verified, pe->object, "signatures[%i].verified", *sig_count); yr_set_string( authenticode->digest_alg, @@ -1896,6 +1955,8 @@ static void pe_parse_certificates(PE* pe) // Default to 0 signatures until we know otherwise. yr_set_integer(0, pe->object, "number_of_signatures"); + // Default to not signed until we know otherwise. + yr_set_integer(0, pe->object, "is_signed"); PIMAGE_DATA_DIRECTORY directory = pe_get_directory_entry( pe, IMAGE_DIRECTORY_ENTRY_SECURITY); @@ -2670,7 +2731,7 @@ define_function(imphash) // If extension is 'ocx', 'sys' or 'dll', chop it. - char* ext = strstr(dll->name, "."); + char* ext = strrchr(dll->name, '.'); if (ext && (strncasecmp(ext, ".ocx", 4) == 0 || strncasecmp(ext, ".sys", 4) == 0 || @@ -3603,6 +3664,10 @@ begin_declarations declare_integer("IMAGE_DEBUG_TYPE_MPX"); declare_integer("IMAGE_DEBUG_TYPE_REPRO"); + declare_integer("IMPORT_DELAYED"); + declare_integer("IMPORT_STANDARD"); + declare_integer("IMPORT_ANY"); + declare_integer("is_pe"); declare_integer("machine"); declare_integer("number_of_sections"); @@ -3711,10 +3776,6 @@ begin_declarations declare_function("imphash", "", "s", imphash); #endif - declare_integer("IMPORT_DELAYED"); - declare_integer("IMPORT_STANDARD"); - declare_integer("IMPORT_ANY"); - declare_function("section_index", "s", "i", section_index_name); declare_function("section_index", "i", "i", section_index_addr); declare_function("exports", "s", "i", exports); @@ -3754,6 +3815,7 @@ begin_declarations declare_string("name"); declare_string("forward_name"); declare_integer("ordinal"); + declare_integer("rva"); end_struct_array("export_details") begin_struct_array("import_details") @@ -3884,10 +3946,17 @@ end_declarations int module_initialize(YR_MODULE* module) { -#if defined(HAVE_LIBCRYPTO) +#if defined(HAVE_LIBCRYPTO) && !defined(BORINGSSL) // Initialize OpenSSL global objects for the auth library before any - // multithreaded environment as it is not thread-safe - initialize_authenticode_parser(); + // multithreaded environment as it is not thread-safe. This can + // only be called once per process. + static bool s_initialized = false; + + if (!s_initialized) + { + s_initialized = true; + initialize_authenticode_parser(); + } #endif return ERROR_SUCCESS; } @@ -4249,7 +4318,7 @@ int module_load( foreach_memory_block(iterator, block) { - block_data = block->fetch_data(block); + block_data = yr_fetch_block_data(block); if (block_data == NULL) continue; diff --git a/src/libyara/notebook.c b/src/libyara/notebook.c index 4fe4b1f..3680df0 100644 --- a/src/libyara/notebook.c +++ b/src/libyara/notebook.c @@ -64,15 +64,18 @@ struct YR_NOTEBOOK_PAGE // Pointer to next page. YR_NOTEBOOK_PAGE* next; // Page's data. - uint8_t data[0]; + // + // This field must be 8-byte aligned to guarantee that all notebooks + // allocations are 8-byte aligned. + YR_ALIGN(8) uint8_t data[0]; }; //////////////////////////////////////////////////////////////////////////////// -// Creates a new notebook. The notebook initially has a single page of the -// specified size, but more pages are created if needed. +// Creates a new notebook. The notebook initially has a single page of size +// min_page_size, but more pages will be created as needed. // // Args: -// page_size: Size of each page in the notebook. +// min_page_size: The minimum size of each page in the notebook. // notebook: Address of a pointer to the newly created notebook. // // Returns: diff --git a/src/libyara/object.c b/src/libyara/object.c index 76d2531..f8cf685 100644 --- a/src/libyara/object.c +++ b/src/libyara/object.c @@ -655,7 +655,7 @@ int yr_object_structure_set_member(YR_OBJECT* object, YR_OBJECT* member) return ERROR_SUCCESS; } -int yr_object_array_length(YR_OBJECT* object) +YR_API int yr_object_array_length(YR_OBJECT* object) { YR_OBJECT_ARRAY* array; @@ -668,7 +668,8 @@ int yr_object_array_length(YR_OBJECT* object) return array->items->length; } -YR_OBJECT* yr_object_array_get_item(YR_OBJECT* object, int flags, int index) +YR_API YR_OBJECT* yr_object_array_get_item(YR_OBJECT* object, int flags, + int index) { YR_OBJECT* result = NULL; YR_OBJECT_ARRAY* array; diff --git a/src/libyara/parser.c b/src/libyara/parser.c index f536e42..2433e52 100644 --- a/src/libyara/parser.c +++ b/src/libyara/parser.c @@ -216,6 +216,7 @@ int yr_parser_emit_pushes_for_strings( string->flags |= STRING_FLAGS_REFERENCED; string->flags &= ~STRING_FLAGS_FIXED_OFFSET; + string->flags &= ~STRING_FLAGS_SINGLE_MATCH; matching++; } } @@ -747,23 +748,37 @@ int yr_parser_reduce_string_declaration( if (modifier.flags & STRING_FLAGS_HEXADECIMAL) result = yr_re_parse_hex(str->c_string, &re_ast, &re_error); else if (modifier.flags & STRING_FLAGS_REGEXP) - result = yr_re_parse(str->c_string, &re_ast, &re_error); + { + int flags = RE_PARSER_FLAG_NONE; + if (compiler->strict_escape) + flags |= RE_PARSER_FLAG_ENABLE_STRICT_ESCAPE_SEQUENCES; + result = yr_re_parse(str->c_string, &re_ast, &re_error, flags); + } else result = yr_base64_ast_from_string(str, modifier, &re_ast, &re_error); if (result != ERROR_SUCCESS) { - snprintf( - message, - sizeof(message), - "invalid %s \"%s\": %s", - (modifier.flags & STRING_FLAGS_HEXADECIMAL) ? "hex string" - : "regular expression", - identifier, - re_error.message); - - yr_compiler_set_error_extra_info(compiler, message); - goto _exit; + if (result == ERROR_UNKNOWN_ESCAPE_SEQUENCE) + { + yywarning( + yyscanner, + "unknown escape sequence"); + } + else + { + snprintf( + message, + sizeof(message), + "invalid %s \"%s\": %s", + (modifier.flags & STRING_FLAGS_HEXADECIMAL) ? "hex string" + : "regular expression", + identifier, + re_error.message); + + yr_compiler_set_error_extra_info(compiler, message); + goto _exit; + } } if (re_ast->flags & RE_FLAGS_FAST_REGEXP) @@ -1070,13 +1085,26 @@ int yr_parser_reduce_rule_declaration_phase_2( // Only the heading fragment in a chain of strings (the one with // chained_to == NULL) must be referenced. All other fragments // are never marked as referenced. + // + // Any string identifier that starts with '_' can be unreferenced. Anonymous + // strings must always be referenced. - if (!STRING_IS_REFERENCED(string) && string->chained_to == NULL) + if (!STRING_IS_REFERENCED(string) && string->chained_to == NULL && + (STRING_IS_ANONYMOUS(string) || + (!STRING_IS_ANONYMOUS(string) && string->identifier[1] != '_'))) { yr_compiler_set_error_extra_info( compiler, string->identifier) return ERROR_UNREFERENCED_STRING; } + // If a string is unreferenced we need to unset the FIXED_OFFSET flag so + // that it will match anywhere. + if (!STRING_IS_REFERENCED(string) && string->chained_to == NULL && + STRING_IS_FIXED_OFFSET(string)) + { + string->flags &= ~STRING_FLAGS_FIXED_OFFSET; + } + strings_in_rule++; if (strings_in_rule > max_strings_per_rule) @@ -1120,7 +1148,7 @@ int yr_parser_reduce_string_identifier( YR_STRING* string; YR_COMPILER* compiler = yyget_extra(yyscanner); - if (strcmp(identifier, "$") == 0) // is an anonymous string ? + if (strcmp(identifier, "$") == 0) // is an anonymous string ? { if (compiler->loop_for_of_var_index >= 0) // inside a loop ? { diff --git a/src/libyara/proc/linux.c b/src/libyara/proc/linux.c index 709cf0b..72f2ab4 100644 --- a/src/libyara/proc/linux.c +++ b/src/libyara/proc/linux.c @@ -97,7 +97,7 @@ int _yr_process_attach(int pid, YR_PROC_ITERATOR_CTX* context) snprintf(buffer, sizeof(buffer), "/proc/%u/pagemap", pid); proc_info->pagemap_fd = open(buffer, O_RDONLY); - if (proc_info->mem_fd == -1) + if (proc_info->pagemap_fd == -1) goto err; context->proc_info = proc_info; @@ -164,16 +164,10 @@ YR_API const uint8_t* yr_process_fetch_memory_block_data(YR_MEMORY_BLOCK* block) !(proc_info->map_dmaj == 0 && proc_info->map_dmin == 0)) { struct stat st; - fd = open(proc_info->map_path, O_RDONLY); - if (fd < 0) - { - fd = -1; // File does not exist. - } - else if (fstat(fd, &st) < 0) + if (stat(proc_info->map_path, &st) < 0) { // Why should stat fail after file open? Treat like missing. - close(fd); fd = -1; } else if ( @@ -182,22 +176,36 @@ YR_API const uint8_t* yr_process_fetch_memory_block_data(YR_MEMORY_BLOCK* block) (st.st_ino != proc_info->map_ino)) { // Wrong file, may have been replaced. Treat like missing. - close(fd); fd = -1; } else if (st.st_size < proc_info->map_offset + block->size) { // Mapping extends past end of file. Treat like missing. - close(fd); fd = -1; } else if ((st.st_mode & S_IFMT) != S_IFREG) { // Correct filesystem object, but not a regular file. Treat like // uninitialized mapping. - close(fd); fd = -2; } + else + { + fd = open(proc_info->map_path, O_RDONLY); + // Double-check against race conditions + struct stat st2; + if (fstat(fd, &st2) < 0) + { + close(fd); + fd = -1; + } + else if ((st.st_dev != st2.st_dev) || (st.st_ino != st2.st_ino)) + { + // File has been changed from under us, so ignore. + close(fd); + fd = -1; + } + } } if (fd >= 0) @@ -210,8 +218,16 @@ YR_API const uint8_t* yr_process_fetch_memory_block_data(YR_MEMORY_BLOCK* block) fd, proc_info->map_offset); close(fd); + if (context->buffer == MAP_FAILED) + { + // Notify the code below that we couldn't read from the file + // fallback to pread() from the process + fd = -1; + } + context->buffer_size = block->size; } - else + + if (fd < 0) { context->buffer = mmap( NULL, @@ -220,23 +236,20 @@ YR_API const uint8_t* yr_process_fetch_memory_block_data(YR_MEMORY_BLOCK* block) MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); - } - - if (context->buffer != NULL) - { + if (context->buffer == MAP_FAILED) + { + context->buffer = NULL; + context->buffer_size = 0; + goto _exit; + } context->buffer_size = block->size; } - else - { - context->buffer_size = 0; - goto _exit; - } // If mapping can't be accessed through the filesystem, read everything from // target process VM. if (fd == -1) { - if (pread( + if (pread64( proc_info->mem_fd, (void*) context->buffer, block->size, @@ -252,7 +265,7 @@ YR_API const uint8_t* yr_process_fetch_memory_block_data(YR_MEMORY_BLOCK* block) { goto _exit; } - if (pread( + if (pread64( proc_info->pagemap_fd, pagemap, sizeof(uint64_t) * block->size / page_size, @@ -271,7 +284,7 @@ YR_API const uint8_t* yr_process_fetch_memory_block_data(YR_MEMORY_BLOCK* block) // swap-backed and if it differs from our mapping. uint8_t buffer[page_size]; - if (pread( + if (pread64( proc_info->mem_fd, buffer, page_size, diff --git a/src/libyara/proc/mach.c b/src/libyara/proc/mach.c index 8ed3f56..b345667 100644 --- a/src/libyara/proc/mach.c +++ b/src/libyara/proc/mach.c @@ -147,7 +147,17 @@ YR_API YR_MEMORY_BLOCK* yr_process_get_next_memory_block( if (kr == KERN_SUCCESS) { - size_t chunk_size = size - (size_t) (current_begin - address); + size_t chunk_size; + + if (current_begin < address) { + // current_begin is outside of any region, and the next region was + // returned, so advance to it. + current_begin = address; + chunk_size = size; + } else { + // address <= current_begin, compute the size for the current chunk. + chunk_size = size - (size_t) (current_begin - address); + } if (((uint64_t) chunk_size) > max_process_memory_chunk) { diff --git a/src/libyara/re.c b/src/libyara/re.c index c78ceb2..4edf441 100644 --- a/src/libyara/re.c +++ b/src/libyara/re.c @@ -214,9 +214,13 @@ void yr_re_ast_destroy(RE_AST* re_ast) // Parses a regexp but don't emit its code. A further call to // yr_re_ast_emit_code is required to get the code. // -int yr_re_parse(const char* re_string, RE_AST** re_ast, RE_ERROR* error) +int yr_re_parse( + const char* re_string, + RE_AST** re_ast, + RE_ERROR* error, + int flags) { - return yr_parse_re_string(re_string, re_ast, error); + return yr_parse_re_string(re_string, re_ast, error, flags); } //////////////////////////////////////////////////////////////////////////////// @@ -235,14 +239,18 @@ int yr_re_parse_hex(const char* hex_string, RE_AST** re_ast, RE_ERROR* error) int yr_re_compile( const char* re_string, int flags, + int parser_flags, YR_ARENA* arena, YR_ARENA_REF* ref, RE_ERROR* error) { RE_AST* re_ast; RE _re; + int result; - FAIL_ON_ERROR(yr_re_parse(re_string, &re_ast, error)); + result = yr_re_parse(re_string, &re_ast, error, parser_flags); + if (result != ERROR_UNKNOWN_ESCAPE_SEQUENCE) + FAIL_ON_ERROR(result); _re.flags = flags; @@ -255,7 +263,7 @@ int yr_re_compile( yr_re_ast_destroy(re_ast); - return ERROR_SUCCESS; + return result; } //////////////////////////////////////////////////////////////////////////////// @@ -1721,6 +1729,9 @@ int yr_re_exec( int kill; int action; + bool prev_is_word_char = false; + bool input_is_word_char = false; + #define ACTION_NONE 0 #define ACTION_CONTINUE 1 #define ACTION_KILL 2 @@ -1936,27 +1947,30 @@ int yr_re_exec( case RE_OPCODE_WORD_BOUNDARY: case RE_OPCODE_NON_WORD_BOUNDARY: - - if (bytes_matched == 0 && input_backwards_size < character_size) + if (input - input_incr + character_size <= input_data + input_forwards_size && + input - input_incr >= input_data - input_backwards_size) { - match = true; + prev_is_word_char = _yr_re_is_word_char( + input - input_incr, character_size); } - else if (bytes_matched >= max_bytes_matched) + else { - match = true; + prev_is_word_char = false; + } + + if (input + character_size <= input_data + input_forwards_size && + input >= input_data - input_backwards_size) + { + input_is_word_char = _yr_re_is_word_char(input, character_size); } else { - assert(input < input_data + input_forwards_size); - assert(input >= input_data - input_backwards_size); - - assert(input - input_incr < input_data + input_forwards_size); - assert(input - input_incr >= input_data - input_backwards_size); - - match = _yr_re_is_word_char(input, character_size) != - _yr_re_is_word_char(input - input_incr, character_size); + input_is_word_char = false; } + match = (prev_is_word_char && !input_is_word_char) || + (!prev_is_word_char && input_is_word_char); + if (*ip == RE_OPCODE_NON_WORD_BOUNDARY) match = !match; diff --git a/src/libyara/re_lexer.c b/src/libyara/re_lexer.c index d465ab1..2755986 100644 --- a/src/libyara/re_lexer.c +++ b/src/libyara/re_lexer.c @@ -1,6 +1,6 @@ -#line 2 "re_lexer.c" +#line 1 "libyara/re_lexer.c" -#line 4 "re_lexer.c" +#line 3 "libyara/re_lexer.c" #define YY_INT_ALIGNED short int @@ -702,7 +702,7 @@ static const flex_int32_t yy_rule_can_match_eol[30] = #define yymore() yymore_used_but_not_detected #define YY_MORE_ADJ 0 #define YY_RESTORE_YY_MORE_OFFSET -#line 1 "re_lexer.l" +#line 1 "libyara/re_lexer.l" /* Copyright (c) 2013. The YARA Authors. All Rights Reserved. @@ -732,7 +732,7 @@ ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ /* Lexical analyzer for regular expressions */ -#line 33 "re_lexer.l" +#line 33 "libyara/re_lexer.l" /* Disable warnings for unused functions in this file. @@ -759,6 +759,7 @@ with noyywrap then we can remove this pragma. #include #include #include +#include #ifdef _WIN32 @@ -785,16 +786,18 @@ static uint8_t space_chars[] = { int escaped_char_value( char* text, - uint8_t* value); + uint8_t* value, + bool strict_escape); int read_escaped_char( yyscan_t yyscanner, - uint8_t* escaped_char); + uint8_t* escaped_char, + bool strict_escape); -#line 795 "re_lexer.c" +#line 797 "libyara/re_lexer.c" #define YY_NO_UNISTD_H 1 -#line 798 "re_lexer.c" +#line 800 "libyara/re_lexer.c" #define INITIAL 0 #define char_class 1 @@ -1067,10 +1070,10 @@ YY_DECL } { -#line 111 "re_lexer.l" +#line 114 "libyara/re_lexer.l" -#line 1074 "re_lexer.c" +#line 1076 "libyara/re_lexer.c" while ( /*CONSTCOND*/1 ) /* loops until end-of-file is reached */ { @@ -1137,7 +1140,7 @@ YY_DECL case 1: YY_RULE_SETUP -#line 113 "re_lexer.l" +#line 116 "libyara/re_lexer.l" { // Examples: {3,8} {0,5} {,5} {7,} @@ -1166,12 +1169,6 @@ YY_RULE_SETUP yyterminate(); } - if (hi_bound == 0 && lo_bound == 0) - { - yyerror(yyscanner, lex_env, "bad repeat interval"); - yyterminate(); - } - yylval->range = (hi_bound << 16) | lo_bound; return _RANGE_; @@ -1179,7 +1176,7 @@ YY_RULE_SETUP YY_BREAK case 2: YY_RULE_SETUP -#line 153 "re_lexer.l" +#line 150 "libyara/re_lexer.l" { // Example: {10} @@ -1195,12 +1192,6 @@ YY_RULE_SETUP yyterminate(); } - if (value == 0) - { - yyerror(yyscanner, lex_env, "bad repeat interval"); - yyterminate(); - } - yylval->range = (value << 16) | value; return _RANGE_; @@ -1208,7 +1199,7 @@ YY_RULE_SETUP YY_BREAK case 3: YY_RULE_SETUP -#line 180 "re_lexer.l" +#line 171 "libyara/re_lexer.l" { // Start of a negated character class. Example: [^abcd] @@ -1220,7 +1211,7 @@ YY_RULE_SETUP YY_BREAK case 4: YY_RULE_SETUP -#line 189 "re_lexer.l" +#line 180 "libyara/re_lexer.l" { // Start of character negated class containing a ]. @@ -1235,7 +1226,7 @@ YY_RULE_SETUP YY_BREAK case 5: YY_RULE_SETUP -#line 202 "re_lexer.l" +#line 193 "libyara/re_lexer.l" { // Start of character class containing a ]. @@ -1250,7 +1241,7 @@ YY_RULE_SETUP YY_BREAK case 6: YY_RULE_SETUP -#line 215 "re_lexer.l" +#line 206 "libyara/re_lexer.l" { // Start of character class. Example: [abcd] @@ -1263,7 +1254,7 @@ YY_RULE_SETUP case 7: /* rule 7 can match eol */ YY_RULE_SETUP -#line 225 "re_lexer.l" +#line 216 "libyara/re_lexer.l" { // Any non-special character is passed as a CHAR token to the scanner. @@ -1274,63 +1265,63 @@ YY_RULE_SETUP YY_BREAK case 8: YY_RULE_SETUP -#line 234 "re_lexer.l" +#line 225 "libyara/re_lexer.l" { return _WORD_CHAR_; } YY_BREAK case 9: YY_RULE_SETUP -#line 239 "re_lexer.l" +#line 230 "libyara/re_lexer.l" { return _NON_WORD_CHAR_; } YY_BREAK case 10: YY_RULE_SETUP -#line 244 "re_lexer.l" +#line 235 "libyara/re_lexer.l" { return _SPACE_; } YY_BREAK case 11: YY_RULE_SETUP -#line 249 "re_lexer.l" +#line 240 "libyara/re_lexer.l" { return _NON_SPACE_; } YY_BREAK case 12: YY_RULE_SETUP -#line 254 "re_lexer.l" +#line 245 "libyara/re_lexer.l" { return _DIGIT_; } YY_BREAK case 13: YY_RULE_SETUP -#line 259 "re_lexer.l" +#line 250 "libyara/re_lexer.l" { return _NON_DIGIT_; } YY_BREAK case 14: YY_RULE_SETUP -#line 264 "re_lexer.l" +#line 255 "libyara/re_lexer.l" { return _WORD_BOUNDARY_; } YY_BREAK case 15: YY_RULE_SETUP -#line 268 "re_lexer.l" +#line 259 "libyara/re_lexer.l" { return _NON_WORD_BOUNDARY_; } YY_BREAK case 16: YY_RULE_SETUP -#line 273 "re_lexer.l" +#line 264 "libyara/re_lexer.l" { yyerror(yyscanner, lex_env, "backreferences are not allowed"); @@ -1339,16 +1330,24 @@ YY_RULE_SETUP YY_BREAK case 17: YY_RULE_SETUP -#line 280 "re_lexer.l" +#line 271 "libyara/re_lexer.l" { uint8_t c; + int return_code; - if (read_escaped_char(yyscanner, &c)) + return_code = read_escaped_char(yyscanner, &c, LEX_ENV->strict_escape); + if (return_code == VALID_ESCAPE_SEQUENCE) { yylval->integer = c; return _CHAR_; } + else if (return_code == UNKNOWN_ESCAPE_SEQUENCE) + { + yywarning(yyscanner, lex_env, "unknown escape sequence"); + yylval->integer = c; + return _CHAR_; + } else { yyerror(yyscanner, lex_env, "illegal escape sequence"); @@ -1358,7 +1357,7 @@ YY_RULE_SETUP YY_BREAK case 18: YY_RULE_SETUP -#line 297 "re_lexer.l" +#line 296 "libyara/re_lexer.l" { // End of character class. @@ -1374,7 +1373,7 @@ YY_RULE_SETUP case 19: /* rule 19 can match eol */ YY_RULE_SETUP -#line 311 "re_lexer.l" +#line 310 "libyara/re_lexer.l" { // A range inside a character class. The regexp is... @@ -1404,7 +1403,7 @@ YY_RULE_SETUP if (start == '\\') { - if (!escaped_char_value(yytext, &start)) + if (!escaped_char_value(yytext, &start, LEX_ENV->strict_escape)) { yyerror(yyscanner, lex_env, "illegal escape sequence"); yyterminate(); @@ -1418,7 +1417,7 @@ YY_RULE_SETUP if (end == '\\') { - if (!read_escaped_char(yyscanner, &end)) + if (!read_escaped_char(yyscanner, &end, LEX_ENV->strict_escape)) { yyerror(yyscanner, lex_env, "illegal escape sequence"); yyterminate(); @@ -1439,7 +1438,7 @@ YY_RULE_SETUP YY_BREAK case 20: YY_RULE_SETUP -#line 374 "re_lexer.l" +#line 373 "libyara/re_lexer.l" { for (int i = 0; i < 32; i++) @@ -1448,7 +1447,7 @@ YY_RULE_SETUP YY_BREAK case 21: YY_RULE_SETUP -#line 381 "re_lexer.l" +#line 380 "libyara/re_lexer.l" { for (int i = 0; i < 32; i++) @@ -1457,7 +1456,7 @@ YY_RULE_SETUP YY_BREAK case 22: YY_RULE_SETUP -#line 388 "re_lexer.l" +#line 387 "libyara/re_lexer.l" { for (int i = 0; i < 32; i++) @@ -1466,7 +1465,7 @@ YY_RULE_SETUP YY_BREAK case 23: YY_RULE_SETUP -#line 395 "re_lexer.l" +#line 394 "libyara/re_lexer.l" { for (int i = 0; i < 32; i++) @@ -1475,7 +1474,7 @@ YY_RULE_SETUP YY_BREAK case 24: YY_RULE_SETUP -#line 402 "re_lexer.l" +#line 401 "libyara/re_lexer.l" { for (char c = '0'; c <= '9'; c++) @@ -1484,7 +1483,7 @@ YY_RULE_SETUP YY_BREAK case 25: YY_RULE_SETUP -#line 409 "re_lexer.l" +#line 408 "libyara/re_lexer.l" { for (int i = 0; i < 32; i++) @@ -1504,15 +1503,22 @@ YY_RULE_SETUP YY_BREAK case 26: YY_RULE_SETUP -#line 427 "re_lexer.l" +#line 426 "libyara/re_lexer.l" { uint8_t c; + int return_code; - if (read_escaped_char(yyscanner, &c)) + return_code = read_escaped_char(yyscanner, &c, LEX_ENV->strict_escape); + if (return_code == VALID_ESCAPE_SEQUENCE) { LEX_ENV->re_class.bitmap[c / 8] |= 1 << c % 8; } + else if (return_code == UNKNOWN_ESCAPE_SEQUENCE) + { + yywarning(yyscanner, lex_env, "unknown escape sequence"); + LEX_ENV->re_class.bitmap[c / 8] |= 1 << c % 8; + } else { yyerror(yyscanner, lex_env, "illegal escape sequence"); @@ -1522,7 +1528,7 @@ YY_RULE_SETUP YY_BREAK case 27: YY_RULE_SETUP -#line 443 "re_lexer.l" +#line 449 "libyara/re_lexer.l" { if (yytext[0] >= 32 && yytext[0] < 127) @@ -1540,7 +1546,7 @@ YY_RULE_SETUP } YY_BREAK case YY_STATE_EOF(char_class): -#line 460 "re_lexer.l" +#line 466 "libyara/re_lexer.l" { // End of regexp reached while scanning a character class. @@ -1551,7 +1557,7 @@ case YY_STATE_EOF(char_class): YY_BREAK case 28: YY_RULE_SETUP -#line 469 "re_lexer.l" +#line 475 "libyara/re_lexer.l" { if (yytext[0] >= 32 && yytext[0] < 127) @@ -1566,7 +1572,7 @@ YY_RULE_SETUP } YY_BREAK case YY_STATE_EOF(INITIAL): -#line 483 "re_lexer.l" +#line 489 "libyara/re_lexer.l" { yyterminate(); @@ -1574,10 +1580,10 @@ case YY_STATE_EOF(INITIAL): YY_BREAK case 29: YY_RULE_SETUP -#line 488 "re_lexer.l" +#line 494 "libyara/re_lexer.l" ECHO; YY_BREAK -#line 1581 "re_lexer.c" +#line 1586 "libyara/re_lexer.c" case YY_END_OF_BUFFER: { @@ -2726,12 +2732,13 @@ void yyfree (void * ptr , yyscan_t yyscanner) #define YYTABLES_NAME "yytables" -#line 488 "re_lexer.l" +#line 494 "libyara/re_lexer.l" int escaped_char_value( char* text, - uint8_t* value) + uint8_t* value, + bool strict_escape) { unsigned int hex_value; char hex[3]; @@ -2770,11 +2777,49 @@ int escaped_char_value( *value = '\a'; break; + // Support metacharacters in escape sequences + case '\\': + case '^': + case '$': + case '.': + case '|': + case '(': + case ')': + case '[': + case ']': + + // Support other special characters that are used in rules and need to be escaped + case '*': + case '+': + case '?': + case '"': + case '\'': + case '-': + case '{': + case '}': + case '#': + case ':': + case '_': + case '=': + case '/': + case '!': + case ',': + case '@': + case '<': + case '>': + case '~': + case '&': + case '%': + *value = text[1]; + break; default: *value = text[1]; + if (strict_escape) + return UNKNOWN_ESCAPE_SEQUENCE; + return VALID_ESCAPE_SEQUENCE; } - return 1; + return VALID_ESCAPE_SEQUENCE; } @@ -2787,7 +2832,8 @@ int escaped_char_value( int read_escaped_char( yyscan_t yyscanner, - uint8_t* escaped_char) + uint8_t* escaped_char, + bool strict_escape) { char text[4] = {0, 0, 0, 0}; @@ -2810,7 +2856,7 @@ int read_escaped_char( return 0; } - return escaped_char_value(text, escaped_char); + return escaped_char_value(text, escaped_char, strict_escape); } @@ -2841,7 +2887,7 @@ void yyerror( // subsequent errors like "syntax error, unexpected $end" caused by // early parser termination. - if (lex_env->last_error == ERROR_SUCCESS) + if (lex_env->last_error == ERROR_SUCCESS || lex_env->last_error == ERROR_UNKNOWN_ESCAPE_SEQUENCE) { lex_env->last_error = ERROR_INVALID_REGULAR_EXPRESSION; @@ -2852,17 +2898,40 @@ void yyerror( } } +void yywarning( + yyscan_t yyscanner, + RE_LEX_ENVIRONMENT* lex_env, + const char *error_message) +{ + // Do not overwrite Errors + // print out warning only if there is not any other error beforehand + + if (lex_env->last_error == ERROR_SUCCESS) + { + lex_env->last_error = ERROR_UNKNOWN_ESCAPE_SEQUENCE; + + strlcpy( + lex_env->last_error_message, + error_message, + sizeof(lex_env->last_error_message)); + } +} int yr_parse_re_string( const char* re_string, RE_AST** re_ast, - RE_ERROR* error) + RE_ERROR* error, + int flags) { yyscan_t yyscanner; jmp_buf recovery_trampoline; RE_LEX_ENVIRONMENT lex_env; lex_env.last_error = ERROR_SUCCESS; + if (flags & RE_PARSER_FLAG_ENABLE_STRICT_ESCAPE_SEQUENCES) + lex_env.strict_escape = true; + else + lex_env.strict_escape = false; lex_env.last_error_message[0] = '\0'; yr_thread_storage_set_value( @@ -2890,8 +2959,11 @@ int yr_parse_re_string( if (lex_env.last_error != ERROR_SUCCESS) { - yr_re_ast_destroy(*re_ast); - *re_ast = NULL; + if (lex_env.last_error != ERROR_UNKNOWN_ESCAPE_SEQUENCE) + { + yr_re_ast_destroy(*re_ast); + *re_ast = NULL; + } strlcpy( error->message, diff --git a/src/libyara/re_lexer.l b/src/libyara/re_lexer.l index 6cdc30b..21bef61 100644 --- a/src/libyara/re_lexer.l +++ b/src/libyara/re_lexer.l @@ -56,6 +56,7 @@ with noyywrap then we can remove this pragma. #include #include #include +#include #ifdef _WIN32 @@ -82,11 +83,13 @@ static uint8_t space_chars[] = { int escaped_char_value( char* text, - uint8_t* value); + uint8_t* value, + bool strict_escape); int read_escaped_char( yyscan_t yyscanner, - uint8_t* escaped_char); + uint8_t* escaped_char, + bool strict_escape); %} @@ -138,12 +141,6 @@ hex_digit [0-9a-fA-F] yyterminate(); } - if (hi_bound == 0 && lo_bound == 0) - { - yyerror(yyscanner, lex_env, "bad repeat interval"); - yyterminate(); - } - yylval->range = (hi_bound << 16) | lo_bound; return _RANGE_; @@ -165,12 +162,6 @@ hex_digit [0-9a-fA-F] yyterminate(); } - if (value == 0) - { - yyerror(yyscanner, lex_env, "bad repeat interval"); - yyterminate(); - } - yylval->range = (value << 16) | value; return _RANGE_; @@ -280,12 +271,20 @@ hex_digit [0-9a-fA-F] \\ { uint8_t c; + int return_code; - if (read_escaped_char(yyscanner, &c)) + return_code = read_escaped_char(yyscanner, &c, LEX_ENV->strict_escape); + if (return_code == VALID_ESCAPE_SEQUENCE) { yylval->integer = c; return _CHAR_; } + else if (return_code == UNKNOWN_ESCAPE_SEQUENCE) + { + yywarning(yyscanner, lex_env, "unknown escape sequence"); + yylval->integer = c; + return _CHAR_; + } else { yyerror(yyscanner, lex_env, "illegal escape sequence"); @@ -337,7 +336,7 @@ hex_digit [0-9a-fA-F] if (start == '\\') { - if (!escaped_char_value(yytext, &start)) + if (!escaped_char_value(yytext, &start, LEX_ENV->strict_escape)) { yyerror(yyscanner, lex_env, "illegal escape sequence"); yyterminate(); @@ -351,7 +350,7 @@ hex_digit [0-9a-fA-F] if (end == '\\') { - if (!read_escaped_char(yyscanner, &end)) + if (!read_escaped_char(yyscanner, &end, LEX_ENV->strict_escape)) { yyerror(yyscanner, lex_env, "illegal escape sequence"); yyterminate(); @@ -427,11 +426,18 @@ hex_digit [0-9a-fA-F] \\ { uint8_t c; + int return_code; - if (read_escaped_char(yyscanner, &c)) + return_code = read_escaped_char(yyscanner, &c, LEX_ENV->strict_escape); + if (return_code == VALID_ESCAPE_SEQUENCE) { LEX_ENV->re_class.bitmap[c / 8] |= 1 << c % 8; } + else if (return_code == UNKNOWN_ESCAPE_SEQUENCE) + { + yywarning(yyscanner, lex_env, "unknown escape sequence"); + LEX_ENV->re_class.bitmap[c / 8] |= 1 << c % 8; + } else { yyerror(yyscanner, lex_env, "illegal escape sequence"); @@ -489,7 +495,8 @@ hex_digit [0-9a-fA-F] int escaped_char_value( char* text, - uint8_t* value) + uint8_t* value, + bool strict_escape) { unsigned int hex_value; char hex[3]; @@ -528,11 +535,49 @@ int escaped_char_value( *value = '\a'; break; + // Support metacharacters in escape sequences + case '\\': + case '^': + case '$': + case '.': + case '|': + case '(': + case ')': + case '[': + case ']': + + // Support other special characters that are used in rules and need to be escaped + case '*': + case '+': + case '?': + case '"': + case '\'': + case '-': + case '{': + case '}': + case '#': + case ':': + case '_': + case '=': + case '/': + case '!': + case ',': + case '@': + case '<': + case '>': + case '~': + case '&': + case '%': + *value = text[1]; + break; default: *value = text[1]; + if (strict_escape) + return UNKNOWN_ESCAPE_SEQUENCE; + return VALID_ESCAPE_SEQUENCE; } - return 1; + return VALID_ESCAPE_SEQUENCE; } @@ -545,7 +590,8 @@ int escaped_char_value( int read_escaped_char( yyscan_t yyscanner, - uint8_t* escaped_char) + uint8_t* escaped_char, + bool strict_escape) { char text[4] = {0, 0, 0, 0}; @@ -568,7 +614,7 @@ int read_escaped_char( return 0; } - return escaped_char_value(text, escaped_char); + return escaped_char_value(text, escaped_char, strict_escape); } @@ -599,7 +645,7 @@ void yyerror( // subsequent errors like "syntax error, unexpected $end" caused by // early parser termination. - if (lex_env->last_error == ERROR_SUCCESS) + if (lex_env->last_error == ERROR_SUCCESS || lex_env->last_error == ERROR_UNKNOWN_ESCAPE_SEQUENCE) { lex_env->last_error = ERROR_INVALID_REGULAR_EXPRESSION; @@ -610,17 +656,40 @@ void yyerror( } } +void yywarning( + yyscan_t yyscanner, + RE_LEX_ENVIRONMENT* lex_env, + const char *error_message) +{ + // Do not overwrite Errors + // print out warning only if there is not any other error beforehand + + if (lex_env->last_error == ERROR_SUCCESS) + { + lex_env->last_error = ERROR_UNKNOWN_ESCAPE_SEQUENCE; + + strlcpy( + lex_env->last_error_message, + error_message, + sizeof(lex_env->last_error_message)); + } +} int yr_parse_re_string( const char* re_string, RE_AST** re_ast, - RE_ERROR* error) + RE_ERROR* error, + int flags) { yyscan_t yyscanner; jmp_buf recovery_trampoline; RE_LEX_ENVIRONMENT lex_env; lex_env.last_error = ERROR_SUCCESS; + if (flags & RE_PARSER_FLAG_ENABLE_STRICT_ESCAPE_SEQUENCES) + lex_env.strict_escape = true; + else + lex_env.strict_escape = false; lex_env.last_error_message[0] = '\0'; yr_thread_storage_set_value( @@ -648,8 +717,11 @@ int yr_parse_re_string( if (lex_env.last_error != ERROR_SUCCESS) { - yr_re_ast_destroy(*re_ast); - *re_ast = NULL; + if (lex_env.last_error != ERROR_UNKNOWN_ESCAPE_SEQUENCE) + { + yr_re_ast_destroy(*re_ast); + *re_ast = NULL; + } strlcpy( error->message, diff --git a/src/libyara/rules.c b/src/libyara/rules.c index 63bfa10..5a55d91 100644 --- a/src/libyara/rules.c +++ b/src/libyara/rules.c @@ -336,6 +336,15 @@ int yr_rules_from_arena(YR_ARENA* arena, YR_RULES** rules) if (new_rules == NULL) return ERROR_INSUFFICIENT_MEMORY; + new_rules->no_required_strings = (YR_BITMASK*) yr_calloc( + sizeof(YR_BITMASK), YR_BITMASK_SIZE(summary->num_rules)); + + if (new_rules->no_required_strings == NULL) + { + yr_free(new_rules); + return ERROR_INSUFFICIENT_MEMORY; + } + // Now YR_RULES relies on this arena, let's increment the arena's // reference count so that if the original owner of the arena calls // yr_arena_destroy the arena is not destroyed. @@ -364,6 +373,15 @@ int yr_rules_from_arena(YR_ARENA* arena, YR_RULES** rules) new_rules->code_start = yr_arena_get_ptr(arena, YR_CODE_SECTION, 0); + // If a rule has no required_strings, this means that the condition might + // evaluate to true without any matching strings, and we therefore have to + // mark it as "to be evaluated" from the beginning. + for (int i = 0; i < new_rules->num_rules; i++) + { + if (new_rules->rules_table[i].required_strings == 0) + yr_bitmask_set(new_rules->no_required_strings, i); + } + *rules = new_rules; return ERROR_SUCCESS; @@ -524,6 +542,7 @@ YR_API int yr_rules_destroy(YR_RULES* rules) external++; } + yr_free(rules->no_required_strings); yr_arena_release(rules->arena); yr_free(rules); diff --git a/src/libyara/scan.c b/src/libyara/scan.c index c96a625..5e4c8dc 100644 --- a/src/libyara/scan.c +++ b/src/libyara/scan.c @@ -558,8 +558,8 @@ static int _yr_scan_verify_chained_string_match( _yr_scan_remove_match_from_list( match, &context->unconfirmed_matches[string->idx]); - match->match_length = - (int32_t) (match_offset - match->offset + match_length); + match->match_length = (int32_t) (match_offset - match->offset + + match_length); match->data_length = yr_min( match->match_length, (int32_t) max_match_data); @@ -575,6 +575,10 @@ static int _yr_scan_verify_chained_string_match( match_data - match_offset + match->offset, match->data_length); + // Once a string is found, the rule containing that string is + // required to be evaluated. + yr_bitmask_set(context->required_eval, string->rule_idx); + FAIL_ON_ERROR(_yr_scan_add_match_to_list( match, &context->matches[string->idx], false)); } @@ -750,6 +754,8 @@ static int _yr_scan_match_callback( new_match->is_private = STRING_IS_PRIVATE(string); new_match->xor_key = callback_args->xor_key; + yr_bitmask_set(callback_args->context->required_eval, string->rule_idx); + FAIL_ON_ERROR(_yr_scan_add_match_to_list( new_match, &callback_args->context->matches[string->idx], @@ -796,8 +802,6 @@ static int _yr_scan_verify_re_match( CALLBACK_ARGS callback_args; RE_EXEC_FUNC exec; - int forward_matches = -1; - int backward_matches = -1; int flags = 0; if (STRING_IS_GREEDY_REGEXP(ac_match->string)) @@ -814,7 +818,21 @@ static int _yr_scan_verify_re_match( else exec = yr_re_exec; - if (STRING_IS_ASCII(ac_match->string) || STRING_IS_BASE64(ac_match->string) || + callback_args.string = ac_match->string; + callback_args.context = context; + callback_args.data = data; + callback_args.data_size = data_size; + callback_args.data_base = data_base; + callback_args.forward_matches = -1; + callback_args.full_word = STRING_IS_FULL_WORD(ac_match->string); + // xor modifier is not valid for RE but set it so we don't leak stack values. + callback_args.xor_key = 0; + + if (STRING_IS_ASCII(ac_match->string) || + // The base64 and base64wide are not supported in regexps, but strings + // with these modifiers are converted to a regexp with three + // alternatives. + STRING_IS_BASE64(ac_match->string) || STRING_IS_BASE64_WIDE(ac_match->string)) { FAIL_ON_ERROR(exec( @@ -826,59 +844,59 @@ static int _yr_scan_verify_re_match( flags, NULL, NULL, - &forward_matches)); + &callback_args.forward_matches)); + + if (callback_args.forward_matches != -1 && ac_match->backward_code != NULL) + { + FAIL_ON_ERROR(exec( + context, + ac_match->backward_code, + data + offset, + data_size - offset, + offset, + flags | RE_FLAGS_BACKWARDS | RE_FLAGS_EXHAUSTIVE, + _yr_scan_match_callback, + (void*) &callback_args, + NULL)); + } + else if (callback_args.forward_matches >= 0) + { + FAIL_ON_ERROR( + _yr_scan_match_callback(data + offset, 0, flags, &callback_args)); + } } - if ((forward_matches == -1) && (STRING_IS_WIDE(ac_match->string) && - !(STRING_IS_BASE64(ac_match->string) || - STRING_IS_BASE64_WIDE(ac_match->string)))) + if (STRING_IS_WIDE(ac_match->string)) { - flags |= RE_FLAGS_WIDE; FAIL_ON_ERROR(exec( context, ac_match->forward_code, data + offset, data_size - offset, offset, - flags, + flags | RE_FLAGS_WIDE, NULL, NULL, - &forward_matches)); - } - - if (forward_matches == -1) - return ERROR_SUCCESS; + &callback_args.forward_matches)); - if (forward_matches == 0 && ac_match->backward_code == NULL) - return ERROR_SUCCESS; - - callback_args.string = ac_match->string; - callback_args.context = context; - callback_args.data = data; - callback_args.data_size = data_size; - callback_args.data_base = data_base; - callback_args.forward_matches = forward_matches; - callback_args.full_word = STRING_IS_FULL_WORD(ac_match->string); - // xor modifier is not valid for RE but set it so we don't leak stack values. - callback_args.xor_key = 0; - - if (ac_match->backward_code != NULL) - { - FAIL_ON_ERROR(exec( - context, - ac_match->backward_code, - data + offset, - data_size - offset, - offset, - flags | RE_FLAGS_BACKWARDS | RE_FLAGS_EXHAUSTIVE, - _yr_scan_match_callback, - (void*) &callback_args, - &backward_matches)); - } - else - { - FAIL_ON_ERROR( - _yr_scan_match_callback(data + offset, 0, flags, &callback_args)); + if (callback_args.forward_matches != -1 && ac_match->backward_code != NULL) + { + FAIL_ON_ERROR(exec( + context, + ac_match->backward_code, + data + offset, + data_size - offset, + offset, + flags | RE_FLAGS_WIDE | RE_FLAGS_BACKWARDS | RE_FLAGS_EXHAUSTIVE, + _yr_scan_match_callback, + (void*) &callback_args, + NULL)); + } + else if (callback_args.forward_matches >= 0) + { + FAIL_ON_ERROR( + _yr_scan_match_callback(data + offset, 0, flags, &callback_args)); + } } return ERROR_SUCCESS; diff --git a/src/libyara/scanner.c b/src/libyara/scanner.c index 5051d76..3667561 100644 --- a/src/libyara/scanner.c +++ b/src/libyara/scanner.c @@ -68,6 +68,8 @@ static int _yr_scanner_scan_mem_block( size_t i = 0; uint32_t state = YR_AC_ROOT_STATE; uint16_t index; + YR_STRING* report_string = NULL; + YR_RULE* rule = NULL; while (i < block->size) { @@ -104,6 +106,14 @@ static int _yr_scanner_scan_mem_block( match = &rules->ac_match_pool[match_table[state] - 1]; + if (scanner->matches->count >= YR_SLOW_STRING_MATCHES) + { + report_string = match->string; + rule = report_string + ? &scanner->rules->rules_table[report_string->rule_idx] + : NULL; + } + while (match != NULL) { if (match->backtrack <= i) @@ -162,6 +172,24 @@ static int _yr_scanner_scan_mem_block( } } + if (rule != NULL && scanner->matches->count >= YR_SLOW_STRING_MATCHES && + scanner->matches->count < YR_MAX_STRING_MATCHES) + { + if (rule != NULL && report_string != NULL) + { + result = scanner->callback( + scanner, + CALLBACK_MSG_TOO_SLOW_SCANNING, + (void*) report_string, + scanner->user_data); + if (result != CALLBACK_CONTINUE) + { + result = ERROR_TOO_SLOW_SCANNING; + goto _exit; + } + } + } + _exit: YR_DEBUG_FPRINTF( @@ -184,6 +212,11 @@ static void _yr_scanner_clean_matches(YR_SCANNER* scanner) 0, sizeof(YR_BITMASK) * YR_BITMASK_SIZE(scanner->rules->num_rules)); + memset( + scanner->required_eval, + 0, + sizeof(YR_BITMASK) * YR_BITMASK_SIZE(scanner->rules->num_rules)); + memset( scanner->ns_unsatisfied_flags, 0, @@ -230,6 +263,9 @@ YR_API int yr_scanner_create(YR_RULES* rules, YR_SCANNER** scanner) new_scanner->rule_matches_flags = (YR_BITMASK*) yr_calloc( sizeof(YR_BITMASK), YR_BITMASK_SIZE(rules->num_rules)); + new_scanner->required_eval = (YR_BITMASK*) yr_calloc( + sizeof(YR_BITMASK), YR_BITMASK_SIZE(rules->num_rules)); + new_scanner->ns_unsatisfied_flags = (YR_BITMASK*) yr_calloc( sizeof(YR_BITMASK), YR_BITMASK_SIZE(rules->num_namespaces)); @@ -243,10 +279,11 @@ YR_API int yr_scanner_create(YR_RULES* rules, YR_SCANNER** scanner) rules->num_strings, sizeof(YR_MATCHES)); if (new_scanner->rule_matches_flags == NULL || + new_scanner->required_eval == NULL || new_scanner->ns_unsatisfied_flags == NULL || new_scanner->strings_temp_disabled == NULL || - new_scanner->matches == NULL || // - new_scanner->unconfirmed_matches == NULL) + (new_scanner->matches == NULL && rules->num_strings > 0) || + (new_scanner->unconfirmed_matches == NULL && rules->num_strings > 0)) { yr_scanner_destroy(new_scanner); return ERROR_INSUFFICIENT_MEMORY; @@ -256,7 +293,7 @@ YR_API int yr_scanner_create(YR_RULES* rules, YR_SCANNER** scanner) new_scanner->profiling_info = yr_calloc( rules->num_rules, sizeof(YR_PROFILING_INFO)); - if (new_scanner->profiling_info == NULL) + if (new_scanner->profiling_info == NULL && rules->num_rules > 0) { yr_scanner_destroy(new_scanner); return ERROR_INSUFFICIENT_MEMORY; @@ -329,6 +366,7 @@ YR_API void yr_scanner_destroy(YR_SCANNER* scanner) yr_free(scanner->rule_matches_flags); yr_free(scanner->ns_unsatisfied_flags); + yr_free(scanner->required_eval); yr_free(scanner->strings_temp_disabled); yr_free(scanner->matches); yr_free(scanner->unconfirmed_matches); @@ -469,26 +507,33 @@ YR_API int yr_scanner_scan_mem_blocks( if (result != ERROR_SUCCESS) goto _exit; + // Every rule that doesn't require a matching string must be evaluated + // regardless of whether a string matched or not. + memcpy( + scanner->required_eval, + scanner->rules->no_required_strings, + sizeof(YR_BITMASK) * YR_BITMASK_SIZE(rules->num_rules)); + yr_stopwatch_start(&scanner->stopwatch); block = iterator->first(iterator); } - while (block != NULL) - { - const uint8_t* data = block->fetch_data(block); + YR_TRYCATCH( + !(scanner->flags & SCAN_FLAGS_NO_TRYCATCH), + { + while (block != NULL) + { + const uint8_t* data = yr_fetch_block_data(block); - // fetch_data may fail and return NULL. - if (data == NULL) - { - block = iterator->next(iterator); - continue; - } + // fetch_data may fail and return NULL. + if (data == NULL) + { + block = iterator->next(iterator); + continue; + } - if (scanner->entry_point == YR_UNDEFINED) - { - YR_TRYCATCH( - !(scanner->flags & SCAN_FLAGS_NO_TRYCATCH), + if (scanner->entry_point == YR_UNDEFINED) { if (scanner->flags & SCAN_FLAGS_PROCESS_MEMORY) scanner->entry_point = yr_get_entry_point_address( @@ -496,20 +541,19 @@ YR_API int yr_scanner_scan_mem_blocks( else scanner->entry_point = yr_get_entry_point_offset( data, block->size); - }, - {}); - } - - YR_TRYCATCH( - !(scanner->flags & SCAN_FLAGS_NO_TRYCATCH), - { result = _yr_scanner_scan_mem_block(scanner, data, block); }, - { result = ERROR_COULD_NOT_MAP_FILE; }); - - if (result != ERROR_SUCCESS) - goto _exit; + } + result = _yr_scanner_scan_mem_block(scanner, data, block); + if (result != ERROR_SUCCESS) + { + break; + } + block = iterator->next(iterator); + } + }, + { result = ERROR_COULD_NOT_MAP_FILE; }); - block = iterator->next(iterator); - } + if (result != ERROR_SUCCESS) + goto _exit; result = iterator->last_error; @@ -640,6 +684,24 @@ static const uint8_t* _yr_fetch_block_data(YR_MEMORY_BLOCK* block) return (const uint8_t*) block->context; } +YR_API const uint8_t* yr_fetch_block_data(YR_MEMORY_BLOCK* block) +{ + const uint8_t* data = block->fetch_data(block); + if (data == NULL) + { + return NULL; + } + jumpinfo* info = (jumpinfo*) yr_thread_storage_get_value( + &yr_trycatch_trampoline_tls); + if (info == NULL) // Not called from YR_TRYCATCH + { + return data; + } + info->memfault_from = (void*) data; + info->memfault_to = (void*) (data + block->size); + return data; +} + YR_API int yr_scanner_scan_mem( YR_SCANNER* scanner, const uint8_t* buffer, @@ -655,6 +717,7 @@ YR_API int yr_scanner_scan_mem( YR_MEMORY_BLOCK block; YR_MEMORY_BLOCK_ITERATOR iterator; + int result; block.size = buffer_size; block.base = 0; @@ -667,7 +730,23 @@ YR_API int yr_scanner_scan_mem( iterator.file_size = _yr_get_file_size; iterator.last_error = ERROR_SUCCESS; - int result = yr_scanner_scan_mem_blocks(scanner, &iterator); + // Detect cases where every byte of input is checked for match and input size + // is bigger then 0.2 MB + if (scanner->rules->ac_match_table[YR_AC_ROOT_STATE] != 0 && + buffer_size > YR_FILE_SIZE_THRESHOLD) + { + YR_STRING* report_string = + scanner->rules->ac_match_pool[YR_AC_ROOT_STATE].string; + result = scanner->callback( + scanner, + CALLBACK_MSG_TOO_SLOW_SCANNING, + (void*) report_string, + scanner->user_data); + if (result != CALLBACK_CONTINUE) + return ERROR_TOO_SLOW_SCANNING; + } + + result = yr_scanner_scan_mem_blocks(scanner, &iterator); YR_DEBUG_FPRINTF( 2,