http_download_file: fixed download of files >= 2GB size

* Use `unsigned long` to store the file size
* Return an error if the file size can not be found
* Avoid loading files >= 4GB on 32-bit systems

Related: #40710
This commit is contained in:
Alexey Sheplyakov 2021-08-08 13:53:45 +04:00
parent e34da5300d
commit 736c89620f
5 changed files with 154 additions and 11 deletions

View File

@ -126,6 +126,15 @@ clean:
.depend: version.h .depend: version.h
$(CPP) $(CFLAGS) -M $(ALLSRC) > .depend $(CPP) $(CFLAGS) -M $(ALLSRC) > .depend
ALL_TESTS := test_parse_content_length
test: $(ALL_TESTS)
@set -e; \
for tst in $(ALL_TESTS); do echo "$$tst"; ./$$tst; done
test_parse_content_length: test_parse_content_length.c url.c
$(CC) $(CFLAGS) -flto $(INIT_DEFS) -o $@ $<
ifeq (.depend,$(wildcard .depend)) ifeq (.depend,$(wildcard .depend))
include .depend include .depend
endif endif

View File

@ -996,7 +996,8 @@ enum return_type http_prepare(void)
do { do {
char location_full[500]; char location_full[500];
char *tmp; char *tmp;
int fd, size; int fd;
unsigned long size;
snprintf(location_full, sizeof(location_full), snprintf(location_full, sizeof(location_full),
"Please enter the name or IP address of the HTTP server, " "Please enter the name or IP address of the HTTP server, "
@ -1032,7 +1033,7 @@ enum return_type http_prepare(void)
return RETURN_ERROR; return RETURN_ERROR;
} }
log_message("HTTP: size of download %d bytes", size); log_message("HTTP: size of download %lu bytes", size);
results = load_ramdisk_fd(fd, size); results = load_ramdisk_fd(fd, size);
close(fd); close(fd);

View File

@ -0,0 +1,70 @@
#include <stdio.h>
#include <stdarg.h>
void log_message(const char *msg, ...) {
va_list args;
va_start(args, msg);
vfprintf(stderr, msg, args);
fprintf(stderr, "\n");
fflush(stderr);
}
#include "url.c" /* yes, include the source file */
static int test_parse_content_length(const char *headers, unsigned long expected, int shouldfail) {
int err;
unsigned long val;
err = parse_content_length(headers, &val);
if (err < 0 && !shouldfail) {
log_message("%s: unexpectedly failed on input '%s'", __func__, headers);
return 2;
}
if (err == 0 && shouldfail) {
log_message("%s: unexpectedly passed on invalid input '%s'", __func__, headers);
return 3;
}
if (!shouldfail && val != expected) {
log_message("%s: expected %lu, got %lu on input '%s'",
__func__, expected, val, headers);
return 5;
}
if (!shouldfail && val == expected) {
log_message("%s: OK: '%s'", __func__, headers);
}
return 0;
}
#define ARRAY_SIZE(a) (sizeof(a)/sizeof(a[0]))
int main(int argc, char **argv) {
int i, err = 0;
const char *invalid_inputs[] = {
"", /* empty */
"Abyrvalg: foobar", /* no Content-Length: */
"Content-Length: xyz", /* no digits */
"Content-Length: 12xyz", /* invalid characters */
"Content-Length: 12 34", /* stray symbols */
"Content-Length: 123456789012345678901234567890", /* too big */
};
const char *valid_inputs[] = {
"Content-Length: 1234",
"Content-Length: 1234",
"Content-Length: 1234 ",
"Content-Length: 1234 ",
"Content-Length: 1234 \r\n\r\n",
};
for (i = 0; i < ARRAY_SIZE(invalid_inputs); i++) {
err += test_parse_content_length(invalid_inputs[i], 0,
/* shouldfail = */ 1);
}
for (i = 0; i < ARRAY_SIZE(valid_inputs); i++) {
err += test_parse_content_length(valid_inputs[i], 1234,
/* shouldfail = */ 0);
}
return err;
}
/* compile command:
gcc -O2 -g -flto -o test_parse_content_length.c test_parse_content_length.c
*/

79
url.c
View File

@ -24,6 +24,7 @@
#include <sys/types.h> #include <sys/types.h>
#include <netinet/in_systm.h> #include <netinet/in_systm.h>
#include <limits.h>
#include <ctype.h> #include <ctype.h>
#include <errno.h> #include <errno.h>
#include <fcntl.h> #include <fcntl.h>
@ -394,11 +395,70 @@ int ftp_end_data_command(int sock)
return 0; return 0;
} }
static int parse_content_length(const char *headers, unsigned long *size) {
const char *header_content_length = "Content-Length: ";
const char *hdr = NULL, *ptr = NULL, *start = NULL, *end = NULL;
int http_download_file(char * hostname, char * remotename, int * size) hdr = strstr(headers, header_content_length);
if (!hdr) {
log_message("%s: error: Content-Length header not found", __func__);
return -1;
}
start = hdr + strlen(header_content_length);
errno = 0;
*size = strtoul(start, (char **)&end, 10);
/* HTTP headers are pretty standard, hence reject a bogus header
* instead of trying to download file of unknown length.
* Also bail out if the file is too long (i.e. size can't be
* represented by unsigned long).
* As a side effect this prevents 32-bit platforms from
* loading large (>= 4GB) files into RAM
*/
if (errno != 0 && errno != ERANGE) {
log_message("%s: error: invalid Content-Length header '%s', parse error: %s", __func__, hdr, strerror(errno));
return -1;
}
if (ERANGE == errno || *size == ULONG_MAX) {
log_message("%s: error: length '%s' is too big (>= %lu)", __func__, start, ULONG_MAX);
return -1;
}
/* XXX: on non-numerical inputs strtoul returns 0 *without*
* setting errno. Therefore validate the header manually: only
* a sequence of decimal digits surrounded by whitspace is OK.
* Note: endptr points first non-digit/space character or
* end of the string
*/
for (ptr = end; *ptr; ptr++) {
if (!isspace(*ptr) && !isdigit(*ptr)) {
log_message("%s: error: invalid character %c in Content-Length header '%s'", __func__, *ptr, hdr);
return -1;
}
if (isdigit(*ptr)) {
/* Content-Length: 123 456
* *end = '4'
*/
log_message("%s: error: stray data '%s' in Content-Length header: '%s'", __func__, ptr, hdr);
return -1;
}
}
if (*size == 0) {
log_message("%s: length is zero", __func__);
return -1;
}
return 0;
}
int http_download_file(char * hostname, char * remotename, unsigned long * size)
{ {
char * buf; char * buf;
char headers[4096]; char headers[4096];
char *headers_end = NULL; /* points to 1st \r in \r\n\r\n */
char * nextChar = headers; char * nextChar = headers;
int checkedCode; int checkedCode;
struct in_addr serverAddress; struct in_addr serverAddress;
@ -406,7 +466,6 @@ int http_download_file(char * hostname, char * remotename, int * size)
int sock; int sock;
int rc; int rc;
struct sockaddr_in destPort; struct sockaddr_in destPort;
char * header_content_length = "Content-Length: ";
if ((rc = get_host_address(hostname, &serverAddress))) return rc; if ((rc = get_host_address(hostname, &serverAddress))) return rc;
@ -435,7 +494,7 @@ int http_download_file(char * hostname, char * remotename, int * size)
*nextChar = '\0'; *nextChar = '\0';
checkedCode = 0; checkedCode = 0;
while (!strstr(headers, "\r\n\r\n")) { while (!(headers_end = strstr(headers, "\r\n\r\n"))) {
polls.fd = sock; polls.fd = sock;
polls.events = POLLIN; polls.events = POLLIN;
rc = poll(&polls, 1, TIMEOUT_SECS*1000); rc = poll(&polls, 1, TIMEOUT_SECS*1000);
@ -493,10 +552,14 @@ int http_download_file(char * hostname, char * remotename, int * size)
} }
} }
if ((buf = strstr(headers, header_content_length))) *headers_end = '\0'; /* skip \r\n\r\n */
*size = charstar_to_int(buf + strlen(header_content_length)); if (parse_content_length(headers, size) < 0) {
else goto out_err;
*size = 0; }
return sock; return sock;
out_err:
*size = 0; /* unknown/error */
close(sock);
return FTPERR_SERVER_IO_ERROR;
} }

2
url.h
View File

@ -26,7 +26,7 @@ int ftp_open_connection(char * host, char * name, char * password, char * proxy)
int ftp_start_download(int sock, char * remotename, int * size); int ftp_start_download(int sock, char * remotename, int * size);
int ftp_end_data_command(int sock); int ftp_end_data_command(int sock);
int http_download_file(char * hostname, char * remotename, int * size); int http_download_file(char * hostname, char * remotename, unsigned long * size);
#define FTPERR_BAD_SERVER_RESPONSE -1 #define FTPERR_BAD_SERVER_RESPONSE -1