From d1975e2fc7d6d65b009cb1802b75cc9bb42d73fe Mon Sep 17 00:00:00 2001 From: Amaury Denoyelle Date: Thu, 8 Dec 2022 16:53:58 +0100 Subject: [PATCH] MINOR: http: extract content-length parsing from H2 Extract function h2_parse_cont_len_header() in the generic HTTP module. This allows to reuse it for all HTTP/x parsers. The function is now available as http_parse_cont_len_header(). Most notably, this will be reused in the next bugfix for the H3 parser. This is necessary to check that content-length header match the length of DATA frames. Thus, it must be backported to 2.6. (cherry picked from commit 15f3cc4b389d1e92f7d537a2321ad027cf3b5a15) Signed-off-by: Christopher Faulet (cherry picked from commit 76d3becee5c10aacabb5cb26b6776c00ca5b9ae6) Signed-off-by: Christopher Faulet --- include/haproxy/http.h | 2 ++ src/h2.c | 80 ++++------------------------------------- src/http.c | 82 ++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 90 insertions(+), 74 deletions(-) diff --git a/include/haproxy/http.h b/include/haproxy/http.h index ee2f92208..f597ee4cd 100644 --- a/include/haproxy/http.h +++ b/include/haproxy/http.h @@ -42,6 +42,8 @@ int http_validate_scheme(const struct ist schm); struct ist http_parse_scheme(struct http_uri_parser *parser); struct ist http_parse_authority(struct http_uri_parser *parser, int no_userinfo); struct ist http_parse_path(struct http_uri_parser *parser); +int http_parse_cont_len_header(struct ist *value, unsigned long long *body_len, + int not_first); int http_header_match2(const char *hdr, const char *end, const char *name, int len); char *http_find_hdr_value_end(char *s, const char *e); diff --git a/src/h2.c b/src/h2.c index 7163a4427..f794262ee 100644 --- a/src/h2.c +++ b/src/h2.c @@ -66,78 +66,6 @@ static int has_forbidden_char(const struct ist ist, const char *start) return 0; } -/* Parse the Content-Length header field of an HTTP/2 request. The function - * checks all possible occurrences of a comma-delimited value, and verifies - * if any of them doesn't match a previous value. It returns <0 if a value - * differs, 0 if the whole header can be dropped (i.e. already known), or >0 - * if the value can be indexed (first one). In the last case, the value might - * be adjusted and the caller must only add the updated value. - */ -int h2_parse_cont_len_header(unsigned int *msgf, struct ist *value, unsigned long long *body_len) -{ - char *e, *n; - unsigned long long cl; - int not_first = !!(*msgf & H2_MSGF_BODY_CL); - struct ist word; - - word.ptr = value->ptr - 1; // -1 for next loop's pre-increment - e = value->ptr + value->len; - - while (++word.ptr < e) { - /* skip leading delimiter and blanks */ - if (unlikely(HTTP_IS_LWS(*word.ptr))) - continue; - - /* digits only now */ - for (cl = 0, n = word.ptr; n < e; n++) { - unsigned int c = *n - '0'; - if (unlikely(c > 9)) { - /* non-digit */ - if (unlikely(n == word.ptr)) // spaces only - goto fail; - break; - } - if (unlikely(cl > ULLONG_MAX / 10ULL)) - goto fail; /* multiply overflow */ - cl = cl * 10ULL; - if (unlikely(cl + c < cl)) - goto fail; /* addition overflow */ - cl = cl + c; - } - - /* keep a copy of the exact cleaned value */ - word.len = n - word.ptr; - - /* skip trailing LWS till next comma or EOL */ - for (; n < e; n++) { - if (!HTTP_IS_LWS(*n)) { - if (unlikely(*n != ',')) - goto fail; - break; - } - } - - /* if duplicate, must be equal */ - if (*msgf & H2_MSGF_BODY_CL && cl != *body_len) - goto fail; - - /* OK, store this result as the one to be indexed */ - *msgf |= H2_MSGF_BODY_CL; - *body_len = cl; - *value = word; - word.ptr = n; - } - /* here we've reached the end with a single value or a series of - * identical values, all matching previous series if any. The last - * parsed value was sent back into . We just have to decide - * if this occurrence has to be indexed (it's the first one) or - * silently skipped (it's not the first one) - */ - return !not_first; - fail: - return -1; -} - /* Prepare the request line into from pseudo headers stored in . * indicates what was found so far. This should be called once at the * detection of the first general header field or at the end of the request if @@ -479,10 +407,12 @@ int h2_make_htx_request(struct http_hdr *list, struct htx *htx, unsigned int *ms } if (isteq(list[idx].n, ist("content-length"))) { - ret = h2_parse_cont_len_header(msgf, &list[idx].v, body_len); + ret = http_parse_cont_len_header(&list[idx].v, body_len, + *msgf & H2_MSGF_BODY_CL); if (ret < 0) goto fail; + *msgf |= H2_MSGF_BODY_CL; sl_flags |= HTX_SL_F_CLEN; if (ret == 0) continue; // skip this duplicate @@ -742,10 +672,12 @@ int h2_make_htx_response(struct http_hdr *list, struct htx *htx, unsigned int *m } if (isteq(list[idx].n, ist("content-length"))) { - ret = h2_parse_cont_len_header(msgf, &list[idx].v, body_len); + ret = http_parse_cont_len_header(&list[idx].v, body_len, + *msgf & H2_MSGF_BODY_CL); if (ret < 0) goto fail; + *msgf |= H2_MSGF_BODY_CL; sl_flags |= HTX_SL_F_CLEN; if (ret == 0) continue; // skip this duplicate diff --git a/src/http.c b/src/http.c index fbb995e44..71094d0ab 100644 --- a/src/http.c +++ b/src/http.c @@ -677,6 +677,88 @@ struct ist http_parse_path(struct http_uri_parser *parser) return IST_NULL; } +/* Parse Content-Length header field of an HTTP request. The function + * checks all possible occurrences of a comma-delimited value, and verifies if + * any of them doesn't match a previous value. is sanitized on return + * to contain a single value if several identical values were found. + * + * must be a valid pointer and is used to return the parsed length + * unless values differ. Also if is true, is assumed to + * point to previously parsed value and which must be equal to the new length. + * This is useful if an HTTP message contains several Content-Length headers. + * + * Returns <0 if a value differs, 0 if the whole header can be dropped (i.e. + * already known), or >0 if the value can be indexed (first one). In the last + * case, the value might be adjusted and the caller must only add the updated + * value. + */ +int http_parse_cont_len_header(struct ist *value, unsigned long long *body_len, + int not_first) +{ + char *e, *n; + unsigned long long cl; + struct ist word; + int check_prev = not_first; + + word.ptr = value->ptr - 1; // -1 for next loop's pre-increment + e = value->ptr + value->len; + + while (++word.ptr < e) { + /* skip leading delimiter and blanks */ + if (unlikely(HTTP_IS_LWS(*word.ptr))) + continue; + + /* digits only now */ + for (cl = 0, n = word.ptr; n < e; n++) { + unsigned int c = *n - '0'; + if (unlikely(c > 9)) { + /* non-digit */ + if (unlikely(n == word.ptr)) // spaces only + goto fail; + break; + } + if (unlikely(cl > ULLONG_MAX / 10ULL)) + goto fail; /* multiply overflow */ + cl = cl * 10ULL; + if (unlikely(cl + c < cl)) + goto fail; /* addition overflow */ + cl = cl + c; + } + + /* keep a copy of the exact cleaned value */ + word.len = n - word.ptr; + + /* skip trailing LWS till next comma or EOL */ + for (; n < e; n++) { + if (!HTTP_IS_LWS(*n)) { + if (unlikely(*n != ',')) + goto fail; + break; + } + } + + /* if duplicate, must be equal */ + if (check_prev && cl != *body_len) + goto fail; + + /* OK, store this result as the one to be indexed */ + *body_len = cl; + *value = word; + word.ptr = n; + check_prev = 1; + } + + /* here we've reached the end with a single value or a series of + * identical values, all matching previous series if any. The last + * parsed value was sent back into . We just have to decide + * if this occurrence has to be indexed (it's the first one) or + * silently skipped (it's not the first one) + */ + return !not_first; + fail: + return -1; +} + /* * Checks if is exactly for chars, and ends with a colon. * If so, returns the position of the first non-space character relative to