MINOR: http: extract content-length parsing from H2

Extract function h2_parse_cont_len_header() in the generic HTTP module. This allows to reuse it for all HTTP/x parsers. The function is now available as http_parse_cont_len_header(). Most notably, this will be reused in the next bugfix for the H3 parser. This is necessary to check that content-length header match the length of DATA frames. Thus, it must be backported to 2.6. (cherry picked from commit 15f3cc4b389d1e92f7d537a2321ad027cf3b5a15) Signed-off-by: Christopher Faulet <cfaulet@haproxy.com> (cherry picked from commit 76d3becee5c10aacabb5cb26b6776c00ca5b9ae6) Signed-off-by: Christopher Faulet <cfaulet@haproxy.com>
2022-12-08 16:53:58 +01:00 · 2022-12-08 16:53:58 +01:00 · d1975e2fc7
commit d1975e2fc7
parent 1e4d26aee5
3 changed files with 90 additions and 74 deletions
--- a/include/haproxy/http.h
+++ b/include/haproxy/http.h
@ -42,6 +42,8 @@ int http_validate_scheme(const struct ist schm);
 struct ist http_parse_scheme(struct http_uri_parser *parser);
 struct ist http_parse_authority(struct http_uri_parser *parser, int no_userinfo);
 struct ist http_parse_path(struct http_uri_parser *parser);
 int http_parse_cont_len_header(struct ist *value, unsigned long long *body_len,
                               int not_first);
 int http_header_match2(const char *hdr, const char *end,
                       const char *name, int len);
 char *http_find_hdr_value_end(char *s, const char *e);
--- a/src/h2.c
+++ b/src/h2.c
@ -66,78 +66,6 @@ static int has_forbidden_char(const struct ist ist, const char *start)
 	return 0;
 }
 /* Parse the Content-Length header field of an HTTP/2 request. The function
 * checks all possible occurrences of a comma-delimited value, and verifies
 * if any of them doesn't match a previous value. It returns <0 if a value
 * differs, 0 if the whole header can be dropped (i.e. already known), or >0
 * if the value can be indexed (first one). In the last case, the value might
 * be adjusted and the caller must only add the updated value.
 */
 int h2_parse_cont_len_header(unsigned int *msgf, struct ist *value, unsigned long long *body_len)
 {
 	char *e, *n;
 	unsigned long long cl;
 	int not_first = !!(*msgf & H2_MSGF_BODY_CL);
 	struct ist word;
 	word.ptr = value->ptr - 1; // -1 for next loop's pre-increment
 	e = value->ptr + value->len;
 	while (++word.ptr < e) {
 		/* skip leading delimiter and blanks */
 		if (unlikely(HTTP_IS_LWS(*word.ptr)))
 			continue;
 		/* digits only now */
 		for (cl = 0, n = word.ptr; n < e; n++) {
 			unsigned int c = *n - '0';
 			if (unlikely(c > 9)) {
 				/* non-digit */
 				if (unlikely(n == word.ptr)) // spaces only
 					goto fail;
 				break;
 			}
 			if (unlikely(cl > ULLONG_MAX / 10ULL))
 				goto fail; /* multiply overflow */
 			cl = cl * 10ULL;
 			if (unlikely(cl + c < cl))
 				goto fail; /* addition overflow */
 			cl = cl + c;
 		}
 		/* keep a copy of the exact cleaned value */
 		word.len = n - word.ptr;
 		/* skip trailing LWS till next comma or EOL */
 		for (; n < e; n++) {
 			if (!HTTP_IS_LWS(*n)) {
 				if (unlikely(*n != ','))
 					goto fail;
 				break;
 			}
 		}
 		/* if duplicate, must be equal */
 		if (*msgf & H2_MSGF_BODY_CL && cl != *body_len)
 			goto fail;
 		/* OK, store this result as the one to be indexed */
 		*msgf |= H2_MSGF_BODY_CL;
 		*body_len = cl;
 		*value = word;
 		word.ptr = n;
 	}
 	/* here we've reached the end with a single value or a series of
 	 * identical values, all matching previous series if any. The last
 	 * parsed value was sent back into <value>. We just have to decide
 	 * if this occurrence has to be indexed (it's the first one) or
 	 * silently skipped (it's not the first one)
 	 */
 	return !not_first;
 fail:
 	return -1;
 }
 /* Prepare the request line into <htx> from pseudo headers stored in <phdr[]>.
 * <fields> indicates what was found so far. This should be called once at the
 * detection of the first general header field or at the end of the request if
@ -479,10 +407,12 @@ int h2_make_htx_request(struct http_hdr *list, struct htx *htx, unsigned int *ms
 		}
 		if (isteq(list[idx].n, ist("content-length"))) {
-			ret = h2_parse_cont_len_header(msgf, &list[idx].v, body_len);
+			ret = http_parse_cont_len_header(&list[idx].v, body_len,
 			                                 *msgf & H2_MSGF_BODY_CL);
 			if (ret < 0)
 				goto fail;
 			*msgf |= H2_MSGF_BODY_CL;
 			sl_flags |= HTX_SL_F_CLEN;
 			if (ret == 0)
 				continue; // skip this duplicate
@ -742,10 +672,12 @@ int h2_make_htx_response(struct http_hdr *list, struct htx *htx, unsigned int *m
 		}
 		if (isteq(list[idx].n, ist("content-length"))) {
-			ret = h2_parse_cont_len_header(msgf, &list[idx].v, body_len);
+			ret = http_parse_cont_len_header(&list[idx].v, body_len,
 			                                 *msgf & H2_MSGF_BODY_CL);
 			if (ret < 0)
 				goto fail;
 			*msgf |= H2_MSGF_BODY_CL;
 			sl_flags |= HTX_SL_F_CLEN;
 			if (ret == 0)
 				continue; // skip this duplicate
--- a/src/http.c
+++ b/src/http.c
@ -677,6 +677,88 @@ struct ist http_parse_path(struct http_uri_parser *parser)
 	return IST_NULL;
 }
 /* Parse <value> Content-Length header field of an HTTP request. The function
 * checks all possible occurrences of a comma-delimited value, and verifies if
 * any of them doesn't match a previous value. <value> is sanitized on return
 * to contain a single value if several identical values were found.
 *
 * <body_len> must be a valid pointer and is used to return the parsed length
 * unless values differ. Also if <not_first> is true, <body_len> is assumed to
 * point to previously parsed value and which must be equal to the new length.
 * This is useful if an HTTP message contains several Content-Length headers.
 *
 * Returns <0 if a value differs, 0 if the whole header can be dropped (i.e.
 * already known), or >0 if the value can be indexed (first one). In the last
 * case, the value might be adjusted and the caller must only add the updated
 * value.
 */
 int http_parse_cont_len_header(struct ist *value, unsigned long long *body_len,
                               int not_first)
 {
 	char *e, *n;
 	unsigned long long cl;
 	struct ist word;
 	int check_prev = not_first;
 	word.ptr = value->ptr - 1; // -1 for next loop's pre-increment
 	e = value->ptr + value->len;
 	while (++word.ptr < e) {
 		/* skip leading delimiter and blanks */
 		if (unlikely(HTTP_IS_LWS(*word.ptr)))
 			continue;
 		/* digits only now */
 		for (cl = 0, n = word.ptr; n < e; n++) {
 			unsigned int c = *n - '0';
 			if (unlikely(c > 9)) {
 				/* non-digit */
 				if (unlikely(n == word.ptr)) // spaces only
 					goto fail;
 				break;
 			}
 			if (unlikely(cl > ULLONG_MAX / 10ULL))
 				goto fail; /* multiply overflow */
 			cl = cl * 10ULL;
 			if (unlikely(cl + c < cl))
 				goto fail; /* addition overflow */
 			cl = cl + c;
 		}
 		/* keep a copy of the exact cleaned value */
 		word.len = n - word.ptr;
 		/* skip trailing LWS till next comma or EOL */
 		for (; n < e; n++) {
 			if (!HTTP_IS_LWS(*n)) {
 				if (unlikely(*n != ','))
 					goto fail;
 				break;
 			}
 		}
 		/* if duplicate, must be equal */
 		if (check_prev && cl != *body_len)
 			goto fail;
 		/* OK, store this result as the one to be indexed */
 		*body_len = cl;
 		*value = word;
 		word.ptr = n;
 		check_prev = 1;
 	}
 	/* here we've reached the end with a single value or a series of
 	 * identical values, all matching previous series if any. The last
 	 * parsed value was sent back into <value>. We just have to decide
 	 * if this occurrence has to be indexed (it's the first one) or
 	 * silently skipped (it's not the first one)
 	 */
 	return !not_first;
 fail:
 	return -1;
 }
 /*
 * Checks if <hdr> is exactly <name> for <len> chars, and ends with a colon.
 * If so, returns the position of the first non-space character relative to