Skip to content

Commit

Permalink
Merge pull request #380 from bshifter/logmsg-check-program
Browse files Browse the repository at this point in the history
check program name for rfc3164
  • Loading branch information
MrAnno authored Nov 18, 2024
2 parents 0b99654 + 6fbb002 commit e152a6c
Show file tree
Hide file tree
Showing 13 changed files with 151 additions and 0 deletions.
4 changes: 4 additions & 0 deletions lib/cfg-grammar.y
Original file line number Diff line number Diff line change
Expand Up @@ -275,6 +275,7 @@ main_location_print (FILE *yyo, YYLTYPE const * const yylocp)
%token KW_BAD_HOSTNAME 10094
%token KW_LOG_LEVEL 10095
%token KW_IDLE_TIMEOUT 10096
%token KW_CHECK_PROGRAM 10097

%token KW_KEEP_TIMESTAMP 10100

Expand Down Expand Up @@ -1102,6 +1103,7 @@ options_item
| KW_CHAIN_HOSTNAMES '(' yesno ')' { configuration->chain_hostnames = $3; }
| KW_KEEP_HOSTNAME '(' yesno ')' { configuration->keep_hostname = $3; }
| KW_CHECK_HOSTNAME '(' yesno ')' { configuration->check_hostname = $3; }
| KW_CHECK_PROGRAM '(' yesno ')' { configuration->check_program = $3; }
| KW_BAD_HOSTNAME '(' string ')' { cfg_bad_hostname_set(configuration, $3); free($3); }
| KW_TIME_REOPEN '(' positive_integer ')' { configuration->time_reopen = $3; }
| KW_TIME_REAP '(' nonnegative_integer ')' { configuration->time_reap = $3; }
Expand Down Expand Up @@ -1485,6 +1487,7 @@ source_reader_option
/* NOTE: plugins need to set "last_reader_options" in order to incorporate this rule in their grammar */

: KW_CHECK_HOSTNAME '(' yesno ')' { last_reader_options->check_hostname = $3; }
| KW_CHECK_PROGRAM '(' yesno ')' { last_reader_options->check_program = $3; }
| KW_FLAGS '(' source_reader_option_flags ')'
| KW_LOG_FETCH_LIMIT '(' positive_integer ')' { last_reader_options->fetch_limit = $3; }
| KW_FORMAT '(' string ')' { last_reader_options->parse_options.format = g_strdup($3); free($3); }
Expand All @@ -1496,6 +1499,7 @@ source_reader_option
source_reader_option_flags
: string source_reader_option_flags { CHECK_ERROR(log_reader_options_process_flag(last_reader_options, $1), @1, "Unknown flag \"%s\"", $1); free($1); }
| KW_CHECK_HOSTNAME source_reader_option_flags { log_reader_options_process_flag(last_reader_options, "check-hostname"); }
| KW_CHECK_PROGRAM source_reader_option_flags { log_reader_options_process_flag(last_reader_options, "check-program"); }
|
;

Expand Down
1 change: 1 addition & 0 deletions lib/cfg-parser.c
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,7 @@ static CfgLexerKeyword main_keywords[] =
{ "normalize_hostnames", KW_NORMALIZE_HOSTNAMES },
{ "keep_hostname", KW_KEEP_HOSTNAME },
{ "check_hostname", KW_CHECK_HOSTNAME },
{ "check_program", KW_CHECK_PROGRAM },
{ "bad_hostname", KW_BAD_HOSTNAME },
{ "custom_domain", KW_CUSTOM_DOMAIN },
{ "keep_timestamp", KW_KEEP_TIMESTAMP },
Expand Down
1 change: 1 addition & 0 deletions lib/cfg.h
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@ struct _GlobalConfig
gboolean chain_hostnames;
gboolean keep_hostname;
gboolean check_hostname;
gboolean check_program;
gboolean bad_hostname_compiled;
regex_t bad_hostname;
gchar *bad_hostname_re;
Expand Down
1 change: 1 addition & 0 deletions lib/logmsg/logmsg.c
Original file line number Diff line number Diff line change
Expand Up @@ -2030,6 +2030,7 @@ log_msg_tags_init(void)
log_tags_register_predefined_tag("syslog.invalid_hostname", LM_T_SYSLOG_INVALID_HOSTNAME);
log_tags_register_predefined_tag("syslog.unexpected_framing", LM_T_SYSLOG_UNEXPECTED_FRAMING);
log_tags_register_predefined_tag("syslog.rfc3164_missing_header", LM_T_SYSLOG_RFC3164_MISSING_HEADER);
log_tags_register_predefined_tag("syslog.rfc3164_invalid_program", LM_T_SYSLOG_RFC_3164_INVALID_PROGRAM);

log_tags_register_predefined_tag("syslog.rfc5424_missing_hostname", LM_T_SYSLOG_RFC5424_MISSING_HOSTNAME);
log_tags_register_predefined_tag("syslog.rfc5424_missing_app_name", LM_T_SYSLOG_RFC5424_MISSING_APP_NAME);
Expand Down
3 changes: 3 additions & 0 deletions lib/logmsg/logmsg.h
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,9 @@ enum
LM_T_SYSLOG_RFC5424_MISSING_MESSAGE,
/* message field missing */
LM_T_SYSLOG_MISSING_MESSAGE,
/* invalid program name */
LM_T_SYSLOG_RFC_3164_INVALID_PROGRAM,

LM_T_PREDEFINED_MAX,
};

Expand Down
5 changes: 5 additions & 0 deletions lib/logreader.c
Original file line number Diff line number Diff line change
Expand Up @@ -852,6 +852,11 @@ log_reader_options_init(LogReaderOptions *options, GlobalConfig *cfg, const gcha
options->parse_options.flags |= LP_ASSUME_UTF8;
if (cfg->threaded)
options->flags |= LR_THREADED;
if (options->check_program == -1)
options->check_program = cfg->check_program;
if (options->check_program)
options->parse_options.flags |= LP_CHECK_PROGRAM;

options->initialized = TRUE;
}

Expand Down
1 change: 1 addition & 0 deletions lib/logreader.h
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ typedef struct _LogReaderOptions
gint fetch_limit;
const gchar *group_name;
gboolean check_hostname;
gboolean check_program;
} LogReaderOptions;

typedef struct _LogReader LogReader;
Expand Down
1 change: 1 addition & 0 deletions lib/msg-format.c
Original file line number Diff line number Diff line change
Expand Up @@ -333,6 +333,7 @@ CfgFlagHandler msg_format_flag_handlers[] =
{ "no-rfc3164-fallback", CFH_SET, offsetof(MsgFormatOptions, flags), LP_NO_RFC3164_FALLBACK },
{ "piggyback-errors", CFH_SET, offsetof(MsgFormatOptions, flags), LP_PIGGYBACK_ERRORS },
{ "no-piggyback-errors", CFH_CLEAR, offsetof(MsgFormatOptions, flags), LP_PIGGYBACK_ERRORS },
{ "check-program", CFH_SET, offsetof(MsgFormatOptions, flags), LP_CHECK_PROGRAM },
{ NULL },
};

Expand Down
1 change: 1 addition & 0 deletions lib/msg-format.h
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ enum
LP_NO_HEADER = 0x2000,
LP_NO_RFC3164_FALLBACK = 0x4000,
LP_PIGGYBACK_ERRORS = 0x8000,
LP_CHECK_PROGRAM = 0x10000,
};

typedef struct _MsgFormatHandler MsgFormatHandler;
Expand Down
32 changes: 32 additions & 0 deletions modules/syslogformat/syslog-format.c
Original file line number Diff line number Diff line change
Expand Up @@ -326,6 +326,24 @@ _syslog_format_parse_version(LogMessage *msg, const guchar **data, gint *length)
return TRUE;
}

static const gchar program_name_allowed_specials[] = ".-_()/";
static gsize program_name_allowed_spacial_chars_len = G_N_ELEMENTS(program_name_allowed_specials) - 1;

static inline gboolean
_validate_program_char(const guchar ch, gboolean *has_alpha)
{
if (isalpha(ch))
{
*has_alpha = TRUE;
return TRUE;
}
if (isdigit(ch))
return TRUE;
if (memchr(program_name_allowed_specials, ch, program_name_allowed_spacial_chars_len))
return TRUE;
return FALSE;
}

static void
_syslog_format_parse_legacy_program_name(LogMessage *msg, const guchar **data, gint *length, guint flags)
{
Expand All @@ -336,10 +354,24 @@ _syslog_format_parse_legacy_program_name(LogMessage *msg, const guchar **data, g
src = *data;
left = *length;
prog_start = src;
gboolean has_alpha_char = FALSE;

while (left && *src != ' ' && *src != '[' && *src != ':')
{
if (G_UNLIKELY(flags & LP_CHECK_PROGRAM) && !_validate_program_char(*src, &has_alpha_char))
{
log_msg_set_tag_by_id(msg, LM_T_SYSLOG_RFC_3164_INVALID_PROGRAM);
return;
}
_skip_char(&src, &left);
}

if (G_UNLIKELY(flags & LP_CHECK_PROGRAM) && !has_alpha_char)
{
log_msg_set_tag_by_id(msg, LM_T_SYSLOG_RFC_3164_INVALID_PROGRAM);
return;
}

log_msg_set_value(msg, LM_V_PROGRAM, (gchar *) prog_start, src - prog_start);
if (left > 0 && *src == '[')
{
Expand Down
1 change: 1 addition & 0 deletions modules/syslogformat/syslog-parser-grammar.ym
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,7 @@ parser_syslog_opt
parser_syslog_opt_flags
: string parser_syslog_opt_flags { CHECK_ERROR(msg_format_options_process_flag(&((SyslogParser *) last_parser)->parse_options, $1), @1, "Unknown flag %s", $1); free($1); }
| KW_CHECK_HOSTNAME parser_syslog_opt_flags { msg_format_options_process_flag(&((SyslogParser *) last_parser)->parse_options, "check-hostname"); }
| KW_CHECK_PROGRAM parser_syslog_opt_flags { msg_format_options_process_flag(&((SyslogParser *) last_parser)->parse_options, "check-program"); }
|
;

Expand Down
86 changes: 86 additions & 0 deletions modules/syslogformat/tests/test_syslog_format.c
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,92 @@ Test(syslog_format, rfc3164_error_invalid_pri)
log_msg_unref(msg);
}

Test(syslog_format, rfc3164_check_program_valid_name)
{
const gchar *data =
"<189> Feb 3 12:34:56 host program_0123456789-abcdefghijklmnopqrstuvwxyz(ABCDEFGHIJKLMNOPQRSTUVWXYZ).valid/chars[pid]: message";
gsize data_length = strlen(data);

LogMessage *msg = log_msg_new_empty();

parse_options.flags |= LP_CHECK_PROGRAM;

gsize problem_position;
cr_assert(syslog_format_handler(&parse_options, msg, (const guchar *) data, data_length, &problem_position));
assert_log_message_value_by_name(msg, "HOST", "host");
assert_log_message_value_by_name(msg, "PROGRAM",
"program_0123456789-abcdefghijklmnopqrstuvwxyz(ABCDEFGHIJKLMNOPQRSTUVWXYZ).valid/chars");
assert_log_message_value_by_name(msg, "PID", "pid");
assert_log_message_value_by_name(msg, "MSG", "message");
assert_log_message_value_by_name(msg, "MSGFORMAT", "syslog:rfc3164");
cr_assert(!log_msg_is_tag_by_name(msg, "syslog.rfc3164_invalid_program"));

log_msg_unref(msg);
}

Test(syslog_format, rfc3164_check_program_decimal_number)
{
const gchar *data = "<189> Feb 3 12:34:56 host 323235243.2354[pid]: message";
gsize data_length = strlen(data);

LogMessage *msg = log_msg_new_empty();

parse_options.flags |= LP_CHECK_PROGRAM;

gsize problem_position;
cr_assert(syslog_format_handler(&parse_options, msg, (const guchar *) data, data_length, &problem_position));
assert_log_message_value_by_name(msg, "HOST", "host");
assert_log_message_value_by_name(msg, "PROGRAM", "");
assert_log_message_value_by_name(msg, "PID", "");
assert_log_message_value_by_name(msg, "MSG", "323235243.2354[pid]: message");
assert_log_message_value_by_name(msg, "MSGFORMAT", "syslog:rfc3164");
assert_log_message_has_tag(msg, "syslog.rfc3164_invalid_program");

log_msg_unref(msg);
}

Test(syslog_format, rfc3164_check_program_invalid_character)
{
const gchar *data = "<189> Feb 3 12:34:56 host program![pid]: message";
gsize data_length = strlen(data);

LogMessage *msg = log_msg_new_empty();

parse_options.flags |= LP_CHECK_PROGRAM;

gsize problem_position;
cr_assert(syslog_format_handler(&parse_options, msg, (const guchar *) data, data_length, &problem_position));
assert_log_message_value_by_name(msg, "HOST", "host");
assert_log_message_value_by_name(msg, "PROGRAM", "");
assert_log_message_value_by_name(msg, "PID", "");
assert_log_message_value_by_name(msg, "MSG", "program![pid]: message");
assert_log_message_value_by_name(msg, "MSGFORMAT", "syslog:rfc3164");
assert_log_message_has_tag(msg, "syslog.rfc3164_invalid_program");

log_msg_unref(msg);
}

Test(syslog_format, rfc3164_check_program_ip_address)
{
const gchar *data = "<189> Feb 3 12:34:56 host 127.0.0.1[pid]: message";
gsize data_length = strlen(data);

LogMessage *msg = log_msg_new_empty();

parse_options.flags |= LP_CHECK_PROGRAM;

gsize problem_position;
cr_assert(syslog_format_handler(&parse_options, msg, (const guchar *) data, data_length, &problem_position));
assert_log_message_value_by_name(msg, "HOST", "host");
assert_log_message_value_by_name(msg, "PROGRAM", "");
assert_log_message_value_by_name(msg, "PID", "");
assert_log_message_value_by_name(msg, "MSG", "127.0.0.1[pid]: message");
assert_log_message_value_by_name(msg, "MSGFORMAT", "syslog:rfc3164");
assert_log_message_has_tag(msg, "syslog.rfc3164_invalid_program");

log_msg_unref(msg);
}

Test(syslog_format, rfc3164_error_missing_timestamp)
{
/* incorrect pri value */
Expand Down
14 changes: 14 additions & 0 deletions news/feature-380.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
`check-program`: Introduced as a flag for global or source options.

By default, this flag is set to false. Enabling the check-program flag triggers `program` name validation for `RFC3164` messages. Valid `program` names must adhere to the following criteria:

Contain only these characters: `[a-zA-Z0-9-_/().]`
Include at least one alphabetical character.
If a `program` name fails validation, it will be considered part of the log message.


Example:

```
source { network(flags(check-hostname, check-program)); };
```

0 comments on commit e152a6c

Please sign in to comment.