MINOR: log: add +json encoding option

In this patch, we add the "+json" log format option that can be set
globally or per log format node.

What it does, it that it sets the LOG_OPT_ENCODE_JSON flag for the
current context which is provided to all lf_* log building function.

This way, all lf_* are now aware of this option and try to comply with
JSON specification when the option is set.

If the option is set globally, then sess_build_logline() will produce a
map-like object with key=val pairs for named logformat nodes.
(logformat nodes that don't have a name are simply ignored).

Example:
  log-format "%{+json}o %[int(4)] test %(named_field)[str(ok)]"

Will produce:
  {"named_field": "ok"}

If the option isn't set globally, but on a specific node instead, then
only the value will be encoded according to JSON specification.

Example:
  log-format "{ \"manual_key\": %(named_field){+json}[bool(true)] }"

Will produce:
  {"manual_key": true}

When the option is set, +E option will be ignored, and partial numerical
values (ie: because of logasap) will be encoded as-is.
This commit is contained in:
Aurelien DARRAGON 2024-04-22 14:40:04 +02:00
parent b7c3d8c87c
commit 3f7c8387c0
3 changed files with 274 additions and 15 deletions

View File

@ -25747,6 +25747,11 @@ Flags are :
rings and binary-capable log endpoints.
This option can only be set globally (with %o), it will be ignored
if set on an individual node's options.
* json: automatically encode value in JSON format
(when set globally, only named format variables are considered)
Incomplete numerical values (e.g.: '%B' when logasap is used),
which are normally prefixed with '+' without encoding, will be
encoded as-is. Also, '+E' option will be ignored.
Example:
@ -25755,6 +25760,8 @@ Flags are :
log-format-sd %{+Q,+E}o\ [exampleSDID@1234\ header=%[capture.req.hdr(0)]]
log-format "%{+json}o %(request)r %(custom_expr)[str(custom)]"
Please refer to the table below for currently defined variables :
+---+------+------------------------------------------------------+---------+

View File

@ -48,6 +48,10 @@
#define LOG_OPT_ESC 0x00000040
#define LOG_OPT_MERGE_SPACES 0x00000080
#define LOG_OPT_BIN 0x00000100
/* unused: 0x00000200 ... 0x00000800 */
#define LOG_OPT_ENCODE_JSON 0x00001000
/* unused encode: 0x00002000 */
#define LOG_OPT_ENCODE 0x00003000
/* Fields that need to be extracted from the incoming connection or request for

278
src/log.c
View File

@ -97,6 +97,7 @@ static const struct log_fmt_st log_formats[LOG_FORMATS] = {
*/
long no_escape_map[(256/8) / sizeof(long)];
long rfc5424_escape_map[(256/8) / sizeof(long)];
long json_escape_map[(256/8) / sizeof(long)];
long hdr_encode_map[(256/8) / sizeof(long)];
long url_encode_map[(256/8) / sizeof(long)];
long http_encode_map[(256/8) / sizeof(long)];
@ -328,6 +329,7 @@ struct logformat_tag_args tag_args_list[] = {
{ "X", LOG_OPT_HEXA },
{ "E", LOG_OPT_ESC },
{ "bin", LOG_OPT_BIN },
{ "json", LOG_OPT_ENCODE_JSON },
{ 0, 0 }
};
@ -380,7 +382,13 @@ int parse_logformat_tag_args(char *args, struct logformat_node *node, char **err
for (i = 0; sp && tag_args_list[i].name; i++) {
if (strcmp(sp, tag_args_list[i].name) == 0) {
if (flags == 1) {
node->options |= tag_args_list[i].mask;
/* Ensure we don't mix encoding types, existing
* encoding type prevails over new ones
*/
if (node->options & LOG_OPT_ENCODE)
node->options |= (tag_args_list[i].mask & ~LOG_OPT_ENCODE);
else
node->options |= tag_args_list[i].mask;
break;
} else if (flags == 2) {
node->options &= ~tag_args_list[i].mask;
@ -1751,12 +1759,28 @@ static inline void lf_buildctx_prepare(struct lf_buildctx *ctx,
* globally
*
* Also, ignore LOG_OPT_BIN since it is a global-only option
*
* Finally, ensure we don't mix encoding types, global setting
* prevails over per-node one.
*/
ctx->options |= (node->options & ~LOG_OPT_BIN);
if (g_options & LOG_OPT_ENCODE)
ctx->options |= (node->options & ~(LOG_OPT_BIN | LOG_OPT_ENCODE));
else
ctx->options |= (node->options & ~LOG_OPT_BIN);
/* consider node's typecast setting */
ctx->typecast = node->typecast;
}
/* encoding is incompatible with HTTP option, so it is ignored
* if HTTP option is set
*/
if (ctx->options & LOG_OPT_HTTP)
ctx->options &= ~LOG_OPT_ENCODE;
/* when encoding is set, ignore +E option */
if (ctx->options & LOG_OPT_ENCODE)
ctx->options &= ~LOG_OPT_ESC;
}
/* helper function for _lf_encode_bytes() to escape a single byte
@ -1825,6 +1849,36 @@ static inline char *_lf_rfc5424_escape_byte(char *start, char *stop,
return start;
}
/* helper function for _lf_encode_bytes() to encode a single byte
* and escape it with <escape> if found in <map> or escape it with
* '\' if found in json_escape_map
*
* The function assumes that at least 1 byte is available for writing
*
* Returns the address of the last written byte on success, or NULL
* on error
*/
static inline char *_lf_json_escape_byte(char *start, char *stop,
const char *byte,
const char escape, const long *map,
struct lf_buildctx *ctx)
{
if (!ha_bit_test((unsigned char)(*byte), map)) {
if (!ha_bit_test((unsigned char)(*byte), json_escape_map))
*start++ = *byte;
else {
if (start + 2 >= stop)
return NULL;
*start++ = '\\';
*start++ = *byte;
}
}
else
start = _lf_escape_byte(start, stop, *byte, escape);
return start;
}
/*
* helper for lf_encode_{string,chunk}:
* encode the input bytes, input <bytes> is processed until <bytes_stop>
@ -1835,6 +1889,9 @@ static inline char *_lf_rfc5424_escape_byte(char *start, char *stop,
* characters with '\' as prefix. The same prefix should not be used as
* <escape>.
*
* When using json encoding, string will be escaped according to
* json escape map
*
* Return the address of the \0 character, or NULL on error
*/
static char *_lf_encode_bytes(char *start, char *stop,
@ -1848,7 +1905,9 @@ static char *_lf_encode_bytes(char *start, char *stop,
const char escape, const long *map,
struct lf_buildctx *ctx);
if (ctx->options & LOG_OPT_ESC)
if (ctx->options & LOG_OPT_ENCODE_JSON)
encode_byte = _lf_json_escape_byte;
else if (ctx->options & LOG_OPT_ESC)
encode_byte = _lf_rfc5424_escape_byte;
else
encode_byte = _lf_map_escape_byte;
@ -1912,19 +1971,29 @@ static char *lf_encode_chunk(char *start, char *stop,
* Write a raw string in the log string
* Take care of escape option
*
* When using json encoding, string will be escaped according
* to json escape map
*
* Return the address of the \0 character, or NULL on error
*/
static inline char *_lf_text_len(char *dst, const char *src,
size_t len, size_t size, struct lf_buildctx *ctx)
{
const long *escape_map = NULL;
if (ctx->options & LOG_OPT_ENCODE_JSON)
escape_map = json_escape_map;
else if (ctx->options & LOG_OPT_ESC)
escape_map = rfc5424_escape_map;
if (src && len) {
/* escape_string and strlcpy2 will both try to add terminating NULL-byte
* to dst
*/
if (ctx->options & LOG_OPT_ESC) {
if (escape_map) {
char *ret;
ret = escape_string(dst, dst + size, '\\', rfc5424_escape_map, src, src + len);
ret = escape_string(dst, dst + size, '\\', escape_map, src, src + len);
if (ret == NULL)
return NULL;
len = ret - dst;
@ -1979,13 +2048,13 @@ static inline char *_lf_quotetext_len(char *dst, const char *src,
/*
* Write a string in the log string
* Take care of quote, mandatory and escape options
* Take care of quote, mandatory and escape and encoding options
*
* Return the address of the \0 character, or NULL on error
*/
static char *lf_text_len(char *dst, const char *src, size_t len, size_t size, struct lf_buildctx *ctx)
{
if ((ctx->options & LOG_OPT_QUOTE))
if ((ctx->options & (LOG_OPT_QUOTE | LOG_OPT_ENCODE_JSON)))
return _lf_quotetext_len(dst, src, len, size, ctx);
else if (src && len)
return _lf_text_len(dst, src, len, size, ctx);
@ -2007,6 +2076,9 @@ static char *lf_text_len(char *dst, const char *src, size_t len, size_t size, st
*/
static char *lf_rawtext_len(char *dst, const char *src, size_t len, size_t size, struct lf_buildctx *ctx)
{
if (!ctx->in_text &&
(ctx->options & LOG_OPT_ENCODE_JSON))
return _lf_quotetext_len(dst, src, len, size, ctx);
return _lf_text_len(dst, src, len, size, ctx);
}
@ -2075,6 +2147,69 @@ static char *lf_ip(char *dst, const struct sockaddr *sockaddr, size_t size, stru
return ret;
}
/* Logformat expr wrapper to write a boolean according to node
* encoding settings
*/
static char *lf_bool_encode(char *dst, size_t size, uint8_t value,
struct lf_buildctx *ctx)
{
/* encode as a regular bool value */
if (ctx->options & LOG_OPT_ENCODE_JSON) {
char *ret = dst;
int iret;
if (value)
iret = snprintf(dst, size, "true");
else
iret = snprintf(dst, size, "false");
if (iret < 0 || iret >= size)
return NULL;
ret += iret;
return ret;
}
return NULL; /* not supported */
}
/* Logformat expr wrapper to write an integer according to node
* encoding settings and typecast settings.
*/
static char *lf_int_encode(char *dst, size_t size, int64_t value,
struct lf_buildctx *ctx)
{
if (ctx->typecast == SMP_T_BOOL) {
/* either true or false */
return lf_bool_encode(dst, size, !!value, ctx);
}
if (ctx->options & LOG_OPT_ENCODE_JSON) {
char *ret = dst;
int iret = 0;
if (ctx->typecast == SMP_T_STR) {
/* encode as a string number (base10 with "quotes"):
* may be useful to work around the limited resolution
* of JS number types for instance
*/
iret = snprintf(dst, size, "\"%lld\"", (long long int)value);
}
else {
/* encode as a regular int64 number (base10) */
iret = snprintf(dst, size, "%lld", (long long int)value);
}
if (iret < 0 || iret >= size)
return NULL;
ret += iret;
return ret;
}
return NULL; /* not supported */
}
enum lf_int_hdl {
LF_INT_LTOA = 0,
LF_INT_LLTOA,
@ -2084,12 +2219,15 @@ enum lf_int_hdl {
/*
* Logformat expr wrapper to write an integer, uses <dft_hdl> to know
* how to encode the value by default
* how to encode the value by default (if no encoding is used)
*/
static inline char *lf_int(char *dst, size_t size, int64_t value,
struct lf_buildctx *ctx,
enum lf_int_hdl dft_hdl)
{
if (ctx->options & LOG_OPT_ENCODE)
return lf_int_encode(dst, size, value, ctx);
switch (dft_hdl) {
case LF_INT_LTOA:
return ltoa_o(value, dst, size);
@ -2800,10 +2938,11 @@ const char sess_set_cookie[8] = "NPDIRU67"; /* No set-cookie, Set-cookie found a
*/
#define LOG_VARTEXT_START() do { \
ctx.in_text = 1; \
/* put the text within quotes if quoting is \
* enabled \
/* put the text within quotes if JSON encoding \
* is used or quoting is enabled \
*/ \
if (ctx.options & LOG_OPT_QUOTE) { \
if (ctx.options & \
(LOG_OPT_QUOTE | LOG_OPT_ENCODE_JSON)) { \
LOGCHAR('"'); \
} \
} while (0)
@ -2817,9 +2956,11 @@ const char sess_set_cookie[8] = "NPDIRU67"; /* No set-cookie, Set-cookie found a
if (!ctx.in_text) \
break; \
ctx.in_text = 0; \
/* add the ending quote if quoting is enabled \
/* add the ending quote if JSON encoding is \
* used or quoting is enabled \
*/ \
if (ctx.options & LOG_OPT_QUOTE) { \
if (ctx.options & \
(LOG_OPT_QUOTE | LOG_OPT_ENCODE_JSON)) { \
LOGCHAR('"'); \
} \
} while (0)
@ -2829,20 +2970,32 @@ const char sess_set_cookie[8] = "NPDIRU67"; /* No set-cookie, Set-cookie found a
* should be considered as optional metadata instead.
*/
#define LOGMETACHAR(chr) do { \
/* ignored when encoding is used */ \
if (ctx.options & LOG_OPT_ENCODE) \
break; \
LOGCHAR(chr); \
} while (0)
/* indicate the start of a string array */
#define LOG_STRARRAY_START() do { \
if (ctx.options & LOG_OPT_ENCODE_JSON) \
LOGCHAR('['); \
} while (0)
/* indicate that a new element is added to the string array */
#define LOG_STRARRAY_NEXT() do { \
LOGCHAR(' '); \
if (ctx.options & LOG_OPT_ENCODE_JSON) { \
LOGCHAR(','); \
LOGCHAR(' '); \
} \
else \
LOGCHAR(' '); \
} while (0)
/* indicate the end of a string array */
#define LOG_STRARRAY_END() do { \
if (ctx.options & LOG_OPT_ENCODE_JSON) \
LOGCHAR(']'); \
} while (0)
/* Initializes some log data at boot */
@ -2867,6 +3020,15 @@ static void init_log()
tmp++;
}
/* Initialize the escape map for JSON strings : '"\' */
memset(json_escape_map, 0, sizeof(json_escape_map));
tmp = "\"\\";
while (*tmp) {
ha_bit_set(*tmp, json_escape_map);
tmp++;
}
/* initialize the log header encoding map : '{|}"#' should be encoded with
* '#' as prefix, as well as non-printable characters ( <32 or >= 127 ).
* URL encoding only requires '"', '#' to be encoded as well as non-
@ -3106,6 +3268,7 @@ int sess_build_logline(struct session *sess, struct stream *s, char *dst, size_t
struct ist path;
struct http_uri_parser parser;
int g_options = lf_expr->nodes.options; /* global */
int first_node = 1;
/* FIXME: let's limit ourselves to frontend logging for now. */
@ -3191,18 +3354,57 @@ int sess_build_logline(struct session *sess, struct stream *s, char *dst, size_t
lf_buildctx_prepare(&ctx, g_options, NULL);
/* fill logbuffer */
if (lf_expr_isempty(lf_expr))
if (!(ctx.options & LOG_OPT_ENCODE) && lf_expr_isempty(lf_expr))
return 0;
if (ctx.options & LOG_OPT_ENCODE_JSON)
LOGCHAR('{');
list_for_each_entry(tmp, list_format, list) {
#ifdef USE_OPENSSL
struct connection *conn;
#endif
const struct sockaddr_storage *addr;
const char *src = NULL;
const char *value_beg = NULL;
struct sample *key;
const struct buffer empty = { };
if (ctx.options & LOG_OPT_ENCODE) {
/* only consider global ctx for key encoding */
lf_buildctx_prepare(&ctx, g_options, NULL);
/* types that cannot be named such as text or separator are ignored
* when encoding is set
*/
if (tmp->type != LOG_FMT_EXPR && tmp->type != LOG_FMT_TAG)
goto next_fmt;
if (!tmp->name)
goto next_fmt; /* cannot represent anonymous field, ignore */
if (!first_node) {
if (ctx.options & LOG_OPT_ENCODE_JSON) {
LOGCHAR(',');
LOGCHAR(' ');
}
}
if (ctx.options & LOG_OPT_ENCODE_JSON) {
LOGCHAR('"');
iret = strlcpy2(tmplog, tmp->name, dst + maxsize - tmplog);
if (iret == 0)
goto out;
tmplog += iret;
LOGCHAR('"');
LOGCHAR(':');
LOGCHAR(' ');
}
first_node = 0;
}
value_beg = tmplog;
/* get the chance to consider per-node options (if not already
* set globally) for printing the value
*/
@ -3250,6 +3452,25 @@ int sess_build_logline(struct session *sess, struct stream *s, char *dst, size_t
type = SMP_T_BIN;
}
/* if encoding is set, try to preserve output type
* with respect to typecast settings
* (ie: str, sint, bool)
*/
if (ctx.options & LOG_OPT_ENCODE) {
if (ctx.typecast == SMP_T_STR ||
ctx.typecast == SMP_T_SINT ||
ctx.typecast == SMP_T_BOOL) {
/* enforce type */
type = ctx.typecast;
}
else if (key &&
(key->data.type == SMP_T_SINT ||
key->data.type == SMP_T_BOOL)) {
/* preserve type */
type = key->data.type;
}
}
if (key && !sample_convert(key, type))
key = NULL;
@ -3262,6 +3483,12 @@ int sess_build_logline(struct session *sess, struct stream *s, char *dst, size_t
0, no_escape_map,
&key->data.u.str,
&ctx);
else if (key && type == SMP_T_SINT)
ret = lf_int_encode(tmplog, dst + maxsize - tmplog,
key->data.u.sint, &ctx);
else if (key && type == SMP_T_BOOL)
ret = lf_bool_encode(tmplog, dst + maxsize - tmplog,
key->data.u.sint, &ctx);
else
ret = lf_text_len(tmplog,
key ? key->data.u.str.area : NULL,
@ -4236,12 +4463,33 @@ int sess_build_logline(struct session *sess, struct stream *s, char *dst, size_t
if (tmp->type != LOG_FMT_SEPARATOR)
last_isspace = 0; // not a separator, hence not a space
if (value_beg == tmplog) {
/* handle the case where no data was generated for the value after
* the key was already announced
*/
if (ctx.options & LOG_OPT_ENCODE_JSON) {
/* for JSON, we simply output 'null' */
iret = snprintf(tmplog, dst + maxsize - tmplog, "null");
if (iret < 0 || iret >= dst + maxsize - tmplog)
goto out;
tmplog += iret;
}
}
/* if variable text was started for the current node data, we need
* to end it
*/
LOG_VARTEXT_END();
}
/* back to global ctx (some encoding types may need to output
* ending closure)
*/
lf_buildctx_prepare(&ctx, g_options, NULL);
if (ctx.options & LOG_OPT_ENCODE_JSON)
LOGCHAR('}');
out:
/* *tmplog is a unused character */
*tmplog = '\0';