MAJOR: compression: integrate support for libslz
This library is designed to emit a zlib-compatible stream with no memory usage and to favor resource savings over compression ratio. While zlib requires 256 kB of RAM per compression context (and can only support 4000 connections per GB of RAM), the stateless compression offered by libslz does not need to retain buffers between subsequent calls. In theory this slightly reduces the compression ratio but in practice it does not have that much of an effect since the zlib window is limited to 32kB. Libslz is available at : http://git.1wt.eu/web?p=libslz.git It was designed for web compression and provides a lot of savings over zlib in haproxy. Here are the preliminary results on a single core of a core2-quad 3.0 GHz in 32-bit for only 300 concurrent sessions visiting the home page of www.haproxy.org (76 kB) with the default 16kB buffers : BW In BW Out BW Saved Ratio memory VSZ/RSS zlib 237 Mbps 92 Mbps 145 Mbps 2.58 84M / 69M slz 733 Mbps 380 Mbps 353 Mbps 1.93 5.9M / 4.2M So while the compression ratio is lower, the bandwidth savings are much more important due to the significantly lower compression cost which allows to consume even more data from the servers. In the example above, zlib became the bottleneck at 24% of the output bandwidth. Also the difference in memory usage is obvious. More tests run on a single core of a core i5-3320M, with 500 concurrent users and the default 16kB buffers : At 100% CPU (no limit) : BW In BW Out BW Saved Ratio memory VSZ/RSS hits/s zlib 480 Mbps 188 Mbps 292 Mbps 2.55 130M / 101M 744 slz 1700 Mbps 810 Mbps 890 Mbps 2.10 23.7M / 9.7M 2382 At 85% CPU (limited) : BW In BW Out BW Saved Ratio memory VSZ/RSS hits/s zlib 1240 Mbps 976 Mbps 264 Mbps 1.27 130M / 100M 1738 slz 1600 Mbps 976 Mbps 624 Mbps 1.64 23.7M / 9.7M 2210 The most important benefit really happens when the CPU usage is limited by "maxcompcpuusage" or the BW limited by "maxcomprate" : in order to preserve resources, haproxy throttles the compression ratio until usage is within limits. Since slz is much cheaper, the average compression ratio is much higher and the input bandwidth is quite higher for one Gbps output. Other tests made with some reference files : BW In BW Out BW Saved Ratio hits/s daniels.html zlib 1320 Mbps 163 Mbps 1157 Mbps 8.10 1925 slz 3600 Mbps 580 Mbps 3020 Mbps 6.20 5300 tv.com/listing zlib 980 Mbps 124 Mbps 856 Mbps 7.90 310 slz 3300 Mbps 553 Mbps 2747 Mbps 5.97 1100 jquery.min.js zlib 430 Mbps 180 Mbps 250 Mbps 2.39 547 slz 1470 Mbps 764 Mbps 706 Mbps 1.92 1815 bootstrap.min.css zlib 790 Mbps 165 Mbps 625 Mbps 4.79 777 slz 2450 Mbps 650 Mbps 1800 Mbps 3.77 2400 So on top of saving a lot of memory, slz is constantly 2.5-3.5 times faster than zlib and results in providing more savings for a fixed CPU usage. For links smaller than 100 Mbps, zlib still provides a better compression ratio, at the expense of a much higher CPU usage. Larger input files provide slightly higher bandwidth for both libs, at the expense of a bit more memory usage for zlib (it converges to 256kB per connection).
This commit is contained in:
parent
7b21877888
commit
418b8c0c41
10
Makefile
10
Makefile
@ -33,6 +33,7 @@
|
||||
# USE_ACCEPT4 : enable use of accept4() on linux. Automatic.
|
||||
# USE_MY_ACCEPT4 : use own implemention of accept4() if glibc < 2.10.
|
||||
# USE_ZLIB : enable zlib library support.
|
||||
# USE_SLZ : enable slz library instead of zlib (pick at most one).
|
||||
# USE_CPU_AFFINITY : enable pinning processes to CPU on Linux. Automatic.
|
||||
# USE_TFO : enable TCP fast open. Supported on Linux >= 3.7.
|
||||
# USE_NS : enable network namespace support. Supported on Linux >= 2.6.24.
|
||||
@ -448,6 +449,15 @@ OPTIONS_CFLAGS += -DUSE_GETADDRINFO
|
||||
BUILD_OPTIONS += $(call ignore_implicit,USE_GETADDRINFO)
|
||||
endif
|
||||
|
||||
ifneq ($(USE_SLZ),)
|
||||
# Use SLZ_INC and SLZ_LIB to force path to zlib.h and libz.{a,so} if needed.
|
||||
SLZ_INC =
|
||||
SLZ_LIB =
|
||||
OPTIONS_CFLAGS += -DUSE_SLZ $(if $(SLZ_INC),-I$(SLZ_INC))
|
||||
BUILD_OPTIONS += $(call ignore_implicit,USE_SLZ)
|
||||
OPTIONS_LDFLAGS += $(if $(SLZ_LIB),-L$(SLZ_LIB)) -lslz
|
||||
endif
|
||||
|
||||
ifneq ($(USE_ZLIB),)
|
||||
# Use ZLIB_INC and ZLIB_LIB to force path to zlib.h and libz.{a,so} if needed.
|
||||
ZLIB_INC =
|
||||
|
4
README
4
README
@ -118,7 +118,9 @@ include additional libs with ADDLIB if needed (in this case for example libdl):
|
||||
|
||||
It is also possible to include native support for ZLIB to benefit from HTTP
|
||||
compression. For this, pass "USE_ZLIB=1" on the "make" command line and ensure
|
||||
that zlib is present on the system.
|
||||
that zlib is present on the system. Alternatively it is possible to use libslz
|
||||
for a faster, memory less, but slightly less efficient compression, by passing
|
||||
"USE_SLZ=1".
|
||||
|
||||
By default, the DEBUG variable is set to '-g' to enable debug symbols. It is
|
||||
not wise to disable it on uncommon systems, because it's often the only way to
|
||||
|
@ -23,11 +23,11 @@
|
||||
#ifndef _TYPES_COMP_H
|
||||
#define _TYPES_COMP_H
|
||||
|
||||
#ifdef USE_ZLIB
|
||||
|
||||
#if defined(USE_SLZ)
|
||||
#include <slz.h>
|
||||
#elif defined(USE_ZLIB)
|
||||
#include <zlib.h>
|
||||
|
||||
#endif /* USE_ZLIB */
|
||||
#endif
|
||||
|
||||
struct comp {
|
||||
struct comp_algo *algos;
|
||||
@ -36,14 +36,19 @@ struct comp {
|
||||
};
|
||||
|
||||
struct comp_ctx {
|
||||
#ifdef USE_ZLIB
|
||||
#if defined(USE_SLZ)
|
||||
struct slz_stream strm;
|
||||
const void *direct_ptr; /* NULL or pointer to beginning of data */
|
||||
int direct_len; /* length of direct_ptr if not NULL */
|
||||
struct buffer *queued; /* if not NULL, data already queued */
|
||||
#elif defined(USE_ZLIB)
|
||||
z_stream strm; /* zlib stream */
|
||||
void *zlib_deflate_state;
|
||||
void *zlib_window;
|
||||
void *zlib_prev;
|
||||
void *zlib_pending_buf;
|
||||
void *zlib_head;
|
||||
#endif /* USE_ZLIB */
|
||||
#endif
|
||||
int cur_lvl;
|
||||
};
|
||||
|
||||
|
@ -13,7 +13,9 @@
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
#ifdef USE_ZLIB
|
||||
#if defined(USE_SLZ)
|
||||
#include <slz.h>
|
||||
#elif defined(USE_ZLIB)
|
||||
/* Note: the crappy zlib and openssl libs both define the "free_func" type.
|
||||
* That's a very clever idea to use such a generic name in general purpose
|
||||
* libraries, really... The zlib one is easier to redefine than openssl's,
|
||||
@ -61,7 +63,17 @@ static int identity_flush(struct comp_ctx *comp_ctx, struct buffer *out);
|
||||
static int identity_finish(struct comp_ctx *comp_ctx, struct buffer *out);
|
||||
static int identity_end(struct comp_ctx **comp_ctx);
|
||||
|
||||
#ifdef USE_ZLIB
|
||||
#if defined(USE_SLZ)
|
||||
|
||||
static int rfc1950_init(struct comp_ctx **comp_ctx, int level);
|
||||
static int rfc1951_init(struct comp_ctx **comp_ctx, int level);
|
||||
static int rfc1952_init(struct comp_ctx **comp_ctx, int level);
|
||||
static int rfc195x_add_data(struct comp_ctx *comp_ctx, const char *in_data, int in_len, struct buffer *out);
|
||||
static int rfc195x_flush(struct comp_ctx *comp_ctx, struct buffer *out);
|
||||
static int rfc195x_finish(struct comp_ctx *comp_ctx, struct buffer *out);
|
||||
static int rfc195x_end(struct comp_ctx **comp_ctx);
|
||||
|
||||
#elif defined(USE_ZLIB)
|
||||
|
||||
static int gzip_init(struct comp_ctx **comp_ctx, int level);
|
||||
static int raw_def_init(struct comp_ctx **comp_ctx, int level);
|
||||
@ -77,7 +89,11 @@ static int deflate_end(struct comp_ctx **comp_ctx);
|
||||
const struct comp_algo comp_algos[] =
|
||||
{
|
||||
{ "identity", 8, "identity", 8, identity_init, identity_add_data, identity_flush, identity_finish, identity_end },
|
||||
#ifdef USE_ZLIB
|
||||
#if defined(USE_SLZ)
|
||||
{ "deflate", 7, "deflate", 7, rfc1950_init, rfc195x_add_data, rfc195x_flush, rfc195x_finish, rfc195x_end },
|
||||
{ "raw-deflate", 11, "deflate", 7, rfc1951_init, rfc195x_add_data, rfc195x_flush, rfc195x_finish, rfc195x_end },
|
||||
{ "gzip", 4, "gzip", 4, rfc1952_init, rfc195x_add_data, rfc195x_flush, rfc195x_finish, rfc195x_end },
|
||||
#elif defined(USE_ZLIB)
|
||||
{ "deflate", 7, "deflate", 7, deflate_init, deflate_add_data, deflate_flush, deflate_finish, deflate_end },
|
||||
{ "raw-deflate", 11, "deflate", 7, raw_def_init, deflate_add_data, deflate_flush, deflate_finish, deflate_end },
|
||||
{ "gzip", 4, "gzip", 4, gzip_init, deflate_add_data, deflate_flush, deflate_finish, deflate_end },
|
||||
@ -221,7 +237,7 @@ int http_compression_buffer_end(struct session *s, struct buffer **in, struct bu
|
||||
struct buffer *ib = *in, *ob = *out;
|
||||
char *tail;
|
||||
|
||||
#ifdef USE_ZLIB
|
||||
#if defined(USE_SLZ) || defined(USE_ZLIB)
|
||||
int ret;
|
||||
|
||||
/* flush data here */
|
||||
@ -357,7 +373,11 @@ static inline int init_comp_ctx(struct comp_ctx **comp_ctx)
|
||||
*comp_ctx = pool_alloc2(pool_comp_ctx);
|
||||
if (*comp_ctx == NULL)
|
||||
return -1;
|
||||
#ifdef USE_ZLIB
|
||||
#if defined(USE_SLZ)
|
||||
(*comp_ctx)->direct_ptr = NULL;
|
||||
(*comp_ctx)->direct_len = 0;
|
||||
(*comp_ctx)->queued = NULL;
|
||||
#elif defined(USE_ZLIB)
|
||||
zlib_used_memory += sizeof(struct comp_ctx);
|
||||
|
||||
strm = &(*comp_ctx)->strm;
|
||||
@ -427,11 +447,6 @@ static int identity_finish(struct comp_ctx *comp_ctx, struct buffer *out)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int identity_reset(struct comp_ctx *comp_ctx)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Deinit the algorithm
|
||||
*/
|
||||
@ -441,7 +456,148 @@ static int identity_end(struct comp_ctx **comp_ctx)
|
||||
}
|
||||
|
||||
|
||||
#ifdef USE_ZLIB
|
||||
#ifdef USE_SLZ
|
||||
|
||||
/* SLZ's gzip format (RFC1952). Returns < 0 on error. */
|
||||
static int rfc1952_init(struct comp_ctx **comp_ctx, int level)
|
||||
{
|
||||
if (init_comp_ctx(comp_ctx) < 0)
|
||||
return -1;
|
||||
|
||||
(*comp_ctx)->cur_lvl = !!level;
|
||||
return slz_rfc1952_init(&(*comp_ctx)->strm, !!level);
|
||||
}
|
||||
|
||||
/* SLZ's raw deflate format (RFC1951). Returns < 0 on error. */
|
||||
static int rfc1951_init(struct comp_ctx **comp_ctx, int level)
|
||||
{
|
||||
if (init_comp_ctx(comp_ctx) < 0)
|
||||
return -1;
|
||||
|
||||
(*comp_ctx)->cur_lvl = !!level;
|
||||
return slz_rfc1951_init(&(*comp_ctx)->strm, !!level);
|
||||
}
|
||||
|
||||
/* SLZ's zlib format (RFC1950). Returns < 0 on error. */
|
||||
static int rfc1950_init(struct comp_ctx **comp_ctx, int level)
|
||||
{
|
||||
if (init_comp_ctx(comp_ctx) < 0)
|
||||
return -1;
|
||||
|
||||
(*comp_ctx)->cur_lvl = !!level;
|
||||
return slz_rfc1950_init(&(*comp_ctx)->strm, !!level);
|
||||
}
|
||||
|
||||
/* Return the size of consumed data or -1. The output buffer is unused at this
|
||||
* point, we only keep a reference to the input data or a copy of them if the
|
||||
* reference is already used.
|
||||
*/
|
||||
static int rfc195x_add_data(struct comp_ctx *comp_ctx, const char *in_data, int in_len, struct buffer *out)
|
||||
{
|
||||
static struct buffer *tmpbuf = &buf_empty;
|
||||
|
||||
if (in_len <= 0)
|
||||
return 0;
|
||||
|
||||
if (comp_ctx->direct_ptr && !comp_ctx->queued) {
|
||||
/* data already being pointed to, we're in front of fragmented
|
||||
* data and need a buffer now. We reuse the same buffer, as it's
|
||||
* not used out of the scope of a series of add_data()*, end().
|
||||
*/
|
||||
if (unlikely(!tmpbuf->size)) {
|
||||
/* this is the first time we need the compression buffer */
|
||||
if (b_alloc(&tmpbuf) == NULL)
|
||||
return -1; /* no memory */
|
||||
}
|
||||
b_reset(tmpbuf);
|
||||
memcpy(bi_end(tmpbuf), comp_ctx->direct_ptr, comp_ctx->direct_len);
|
||||
tmpbuf->i += comp_ctx->direct_len;
|
||||
comp_ctx->direct_ptr = NULL;
|
||||
comp_ctx->direct_len = 0;
|
||||
comp_ctx->queued = tmpbuf;
|
||||
/* fall through buffer copy */
|
||||
}
|
||||
|
||||
if (comp_ctx->queued) {
|
||||
/* data already pending */
|
||||
memcpy(bi_end(comp_ctx->queued), in_data, in_len);
|
||||
comp_ctx->queued->i += in_len;
|
||||
return in_len;
|
||||
}
|
||||
|
||||
comp_ctx->direct_ptr = in_data;
|
||||
comp_ctx->direct_len = in_len;
|
||||
return in_len;
|
||||
}
|
||||
|
||||
/* Compresses the data accumulated using add_data(), and optionally sends the
|
||||
* format-specific trailer if <finish> is non-null. <out> is expected to have a
|
||||
* large enough free non-wrapping space as verified by http_comp_buffer_init().
|
||||
* The number of bytes emitted is reported.
|
||||
*/
|
||||
static int rfc195x_flush_or_finish(struct comp_ctx *comp_ctx, struct buffer *out, int finish)
|
||||
{
|
||||
struct slz_stream *strm = &comp_ctx->strm;
|
||||
const char *in_ptr;
|
||||
int in_len;
|
||||
int out_len;
|
||||
|
||||
in_ptr = comp_ctx->direct_ptr;
|
||||
in_len = comp_ctx->direct_len;
|
||||
|
||||
if (comp_ctx->queued) {
|
||||
in_ptr = comp_ctx->queued->p;
|
||||
in_len = comp_ctx->queued->i;
|
||||
}
|
||||
|
||||
out_len = out->i;
|
||||
|
||||
if (in_ptr)
|
||||
out->i += slz_encode(strm, bi_end(out), in_ptr, in_len, !finish);
|
||||
|
||||
if (finish)
|
||||
out->i += slz_finish(strm, bi_end(out));
|
||||
|
||||
out_len = out->i - out_len;
|
||||
|
||||
/* very important, we must wipe the data we've just flushed */
|
||||
comp_ctx->direct_len = 0;
|
||||
comp_ctx->direct_ptr = NULL;
|
||||
comp_ctx->queued = NULL;
|
||||
|
||||
/* Verify compression rate limiting and CPU usage */
|
||||
if ((global.comp_rate_lim > 0 && (read_freq_ctr(&global.comp_bps_out) > global.comp_rate_lim)) || /* rate */
|
||||
(idle_pct < compress_min_idle)) { /* idle */
|
||||
if (comp_ctx->cur_lvl > 0)
|
||||
strm->level = --comp_ctx->cur_lvl;
|
||||
}
|
||||
else if (comp_ctx->cur_lvl < global.tune.comp_maxlevel && comp_ctx->cur_lvl < 1) {
|
||||
strm->level = ++comp_ctx->cur_lvl;
|
||||
}
|
||||
|
||||
/* and that's all */
|
||||
return out_len;
|
||||
}
|
||||
|
||||
static int rfc195x_flush(struct comp_ctx *comp_ctx, struct buffer *out)
|
||||
{
|
||||
return rfc195x_flush_or_finish(comp_ctx, out, 0);
|
||||
}
|
||||
|
||||
static int rfc195x_finish(struct comp_ctx *comp_ctx, struct buffer *out)
|
||||
{
|
||||
return rfc195x_flush_or_finish(comp_ctx, out, 1);
|
||||
}
|
||||
|
||||
/* we just need to free the comp_ctx here, nothing was allocated */
|
||||
static int rfc195x_end(struct comp_ctx **comp_ctx)
|
||||
{
|
||||
deinit_comp_ctx(comp_ctx);
|
||||
return 0;
|
||||
}
|
||||
|
||||
#elif defined(USE_ZLIB) /* ! USE_SLZ */
|
||||
|
||||
/*
|
||||
* This is a tricky allocation function using the zlib.
|
||||
* This is based on the allocation order in deflateInit2.
|
||||
@ -719,6 +875,10 @@ static struct sample_fetch_kw_list sample_fetch_keywords = {ILH, {
|
||||
__attribute__((constructor))
|
||||
static void __comp_fetch_init(void)
|
||||
{
|
||||
#ifdef USE_SLZ
|
||||
slz_make_crc_table();
|
||||
slz_prepare_dist_table();
|
||||
#endif
|
||||
acl_register_keywords(&acl_kws);
|
||||
sample_register_fetches(&sample_fetch_keywords);
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user