staging/lustre/osc: use global osc_rq_pool to reduce memory usage
The per-osc request pools consume a lot of memory if there are hundreds of OSCs on one client. This will be a critical problem if the client doesn't have sufficient memory for both OSCs and applications. This patch replaces per-osc request pools with a global pool osc_rq_pool. The total memory usage is 5MB by default. And it can be set by a module parameter of OSC: "options osc osc_reqpool_mem_max=POOL_SIZE". The unit of POOL_SIZE is MB. If cl_max_rpcs_in_flight is the same for all OSCs, the memory usage of the OSC pool can be calculated as: Min(POOL_SIZE * 1M, (cl_max_rpcs_in_flight + 2) * OSC number * OST_MAXREQSIZE) Also, this patch changes the allocation logic of OSC write requests. The allocation from osc_rq_pool will only be tried after normal allocation failed. Signed-off-by: Wu Libin <lwu@ddn.com> Signed-off-by: Wang Shilong <wshilong@ddn.com> Signed-off-by: Li Xi <lixi@ddn.com> Reviewed-on: http://review.whamcloud.com/15422 Intel-bug-id: https://jira.hpdd.intel.com/browse/LU-6770 Reviewed-by: Jinshan Xiong <jinshan.xiong@intel.com> Reviewed-by: Andreas Dilger <andreas.dilger@intel.com> Signed-off-by: Oleg Drokin <oleg.drokin@intel.com> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
This commit is contained in:
parent
ae9c46d122
commit
aefd9d714d
@ -306,8 +306,6 @@ struct obd_import {
|
||||
__u32 imp_msg_magic;
|
||||
__u32 imp_msghdr_flags; /* adjusted based on server capability */
|
||||
|
||||
struct ptlrpc_request_pool *imp_rq_pool; /* emergency request pool */
|
||||
|
||||
struct imp_at imp_at; /* adaptive timeout data */
|
||||
time_t imp_last_reply_time; /* for health check */
|
||||
};
|
||||
|
@ -500,7 +500,7 @@ struct ptlrpc_request_pool {
|
||||
/** Maximum message size that would fit into a request from this pool */
|
||||
int prp_rq_size;
|
||||
/** Function to allocate more requests for this pool */
|
||||
void (*prp_populate)(struct ptlrpc_request_pool *, int);
|
||||
int (*prp_populate)(struct ptlrpc_request_pool *, int);
|
||||
};
|
||||
|
||||
struct lu_context;
|
||||
@ -2381,11 +2381,11 @@ void ptlrpc_set_add_new_req(struct ptlrpcd_ctl *pc,
|
||||
struct ptlrpc_request *req);
|
||||
|
||||
void ptlrpc_free_rq_pool(struct ptlrpc_request_pool *pool);
|
||||
void ptlrpc_add_rqs_to_pool(struct ptlrpc_request_pool *pool, int num_rq);
|
||||
int ptlrpc_add_rqs_to_pool(struct ptlrpc_request_pool *pool, int num_rq);
|
||||
|
||||
struct ptlrpc_request_pool *
|
||||
ptlrpc_init_rq_pool(int, int,
|
||||
void (*populate_pool)(struct ptlrpc_request_pool *, int));
|
||||
int (*populate_pool)(struct ptlrpc_request_pool *, int));
|
||||
|
||||
void ptlrpc_at_set_req_timeout(struct ptlrpc_request *req);
|
||||
struct ptlrpc_request *ptlrpc_request_alloc(struct obd_import *imp,
|
||||
@ -2957,7 +2957,6 @@ void ptlrpc_lprocfs_brw(struct ptlrpc_request *req, int bytes);
|
||||
|
||||
/* ptlrpc/llog_client.c */
|
||||
extern struct llog_operations llog_client_ops;
|
||||
|
||||
/** @} net */
|
||||
|
||||
#endif
|
||||
|
@ -626,10 +626,6 @@ static inline void obd_cleanup_client_import(struct obd_device *obd)
|
||||
CDEBUG(D_CONFIG, "%s: client import never connected\n",
|
||||
obd->obd_name);
|
||||
ptlrpc_invalidate_import(imp);
|
||||
if (imp->imp_rq_pool) {
|
||||
ptlrpc_free_rq_pool(imp->imp_rq_pool);
|
||||
imp->imp_rq_pool = NULL;
|
||||
}
|
||||
client_destroy_import(imp);
|
||||
obd->u.cli.cl_import = NULL;
|
||||
}
|
||||
|
@ -1663,7 +1663,6 @@ static void obd_zombie_export_add(struct obd_export *exp)
|
||||
static void obd_zombie_import_add(struct obd_import *imp)
|
||||
{
|
||||
LASSERT(!imp->imp_sec);
|
||||
LASSERT(!imp->imp_rq_pool);
|
||||
spin_lock(&obd_zombie_impexp_lock);
|
||||
LASSERT(list_empty(&imp->imp_zombie_chain));
|
||||
zombies_count++;
|
||||
|
@ -96,9 +96,9 @@ static ssize_t max_rpcs_in_flight_store(struct kobject *kobj,
|
||||
struct obd_device *dev = container_of(kobj, struct obd_device,
|
||||
obd_kobj);
|
||||
struct client_obd *cli = &dev->u.cli;
|
||||
struct ptlrpc_request_pool *pool = cli->cl_import->imp_rq_pool;
|
||||
int rc;
|
||||
unsigned long val;
|
||||
int adding, added, req_count;
|
||||
|
||||
rc = kstrtoul(buffer, 10, &val);
|
||||
if (rc)
|
||||
@ -107,8 +107,19 @@ static ssize_t max_rpcs_in_flight_store(struct kobject *kobj,
|
||||
if (val < 1 || val > OSC_MAX_RIF_MAX)
|
||||
return -ERANGE;
|
||||
|
||||
if (pool && val > cli->cl_max_rpcs_in_flight)
|
||||
pool->prp_populate(pool, val-cli->cl_max_rpcs_in_flight);
|
||||
adding = val - cli->cl_max_rpcs_in_flight;
|
||||
req_count = atomic_read(&osc_pool_req_count);
|
||||
if (adding > 0 && req_count < osc_reqpool_maxreqcount) {
|
||||
/*
|
||||
* There might be some race which will cause over-limit
|
||||
* allocation, but it is fine.
|
||||
*/
|
||||
if (req_count + adding > osc_reqpool_maxreqcount)
|
||||
adding = osc_reqpool_maxreqcount - req_count;
|
||||
|
||||
added = osc_rq_pool->prp_populate(osc_rq_pool, adding);
|
||||
atomic_add(added, &osc_pool_req_count);
|
||||
}
|
||||
|
||||
client_obd_list_lock(&cli->cl_loi_list_lock);
|
||||
cli->cl_max_rpcs_in_flight = val;
|
||||
|
@ -39,6 +39,10 @@
|
||||
|
||||
#define OAP_MAGIC 8675309
|
||||
|
||||
extern atomic_t osc_pool_req_count;
|
||||
extern unsigned int osc_reqpool_maxreqcount;
|
||||
extern struct ptlrpc_request_pool *osc_rq_pool;
|
||||
|
||||
struct lu_env;
|
||||
|
||||
enum async_flags {
|
||||
|
@ -50,9 +50,18 @@
|
||||
#include "../include/lustre_param.h"
|
||||
#include "../include/lustre_fid.h"
|
||||
#include "../include/obd_class.h"
|
||||
#include "../include/obd.h"
|
||||
#include "osc_internal.h"
|
||||
#include "osc_cl_internal.h"
|
||||
|
||||
atomic_t osc_pool_req_count;
|
||||
unsigned int osc_reqpool_maxreqcount;
|
||||
struct ptlrpc_request_pool *osc_rq_pool;
|
||||
|
||||
/* max memory used for request pool, unit is MB */
|
||||
static unsigned int osc_reqpool_mem_max = 5;
|
||||
module_param(osc_reqpool_mem_max, uint, 0444);
|
||||
|
||||
struct osc_brw_async_args {
|
||||
struct obdo *aa_oa;
|
||||
int aa_requested_nob;
|
||||
@ -1268,7 +1277,7 @@ static int osc_brw_prep_request(int cmd, struct client_obd *cli,
|
||||
if ((cmd & OBD_BRW_WRITE) != 0) {
|
||||
opc = OST_WRITE;
|
||||
req = ptlrpc_request_alloc_pool(cli->cl_import,
|
||||
cli->cl_import->imp_rq_pool,
|
||||
osc_rq_pool,
|
||||
&RQF_OST_BRW_WRITE);
|
||||
} else {
|
||||
opc = OST_READ;
|
||||
@ -3163,6 +3172,9 @@ int osc_setup(struct obd_device *obd, struct lustre_cfg *lcfg)
|
||||
struct client_obd *cli = &obd->u.cli;
|
||||
void *handler;
|
||||
int rc;
|
||||
int adding;
|
||||
int added;
|
||||
int req_count;
|
||||
|
||||
rc = ptlrpcd_addref();
|
||||
if (rc)
|
||||
@ -3191,15 +3203,20 @@ int osc_setup(struct obd_device *obd, struct lustre_cfg *lcfg)
|
||||
ptlrpc_lprocfs_register_obd(obd);
|
||||
}
|
||||
|
||||
/* We need to allocate a few requests more, because
|
||||
* brw_interpret tries to create new requests before freeing
|
||||
* previous ones, Ideally we want to have 2x max_rpcs_in_flight
|
||||
* reserved, but I'm afraid that might be too much wasted RAM
|
||||
* in fact, so 2 is just my guess and still should work. */
|
||||
cli->cl_import->imp_rq_pool =
|
||||
ptlrpc_init_rq_pool(cli->cl_max_rpcs_in_flight + 2,
|
||||
OST_MAXREQSIZE,
|
||||
ptlrpc_add_rqs_to_pool);
|
||||
/*
|
||||
* We try to control the total number of requests with a upper limit
|
||||
* osc_reqpool_maxreqcount. There might be some race which will cause
|
||||
* over-limit allocation, but it is fine.
|
||||
*/
|
||||
req_count = atomic_read(&osc_pool_req_count);
|
||||
if (req_count < osc_reqpool_maxreqcount) {
|
||||
adding = cli->cl_max_rpcs_in_flight + 2;
|
||||
if (req_count + adding > osc_reqpool_maxreqcount)
|
||||
adding = osc_reqpool_maxreqcount - req_count;
|
||||
|
||||
added = ptlrpc_add_rqs_to_pool(osc_rq_pool, adding);
|
||||
atomic_add(added, &osc_pool_req_count);
|
||||
}
|
||||
|
||||
INIT_LIST_HEAD(&cli->cl_grant_shrink_list);
|
||||
ns_register_cancel(obd->obd_namespace, osc_cancel_for_recovery);
|
||||
@ -3339,6 +3356,8 @@ extern struct lock_class_key osc_ast_guard_class;
|
||||
static int __init osc_init(void)
|
||||
{
|
||||
struct lprocfs_static_vars lvars = { NULL };
|
||||
unsigned int reqpool_size;
|
||||
unsigned int reqsize;
|
||||
int rc;
|
||||
|
||||
/* print an address of _any_ initialized kernel symbol from this
|
||||
@ -3354,14 +3373,45 @@ static int __init osc_init(void)
|
||||
|
||||
rc = class_register_type(&osc_obd_ops, NULL,
|
||||
LUSTRE_OSC_NAME, &osc_device_type);
|
||||
if (rc) {
|
||||
lu_kmem_fini(osc_caches);
|
||||
return rc;
|
||||
}
|
||||
if (rc)
|
||||
goto out_kmem;
|
||||
|
||||
spin_lock_init(&osc_ast_guard);
|
||||
lockdep_set_class(&osc_ast_guard, &osc_ast_guard_class);
|
||||
|
||||
/* This is obviously too much memory, only prevent overflow here */
|
||||
if (osc_reqpool_mem_max >= 1 << 12 || osc_reqpool_mem_max == 0) {
|
||||
rc = -EINVAL;
|
||||
goto out_type;
|
||||
}
|
||||
|
||||
reqpool_size = osc_reqpool_mem_max << 20;
|
||||
|
||||
reqsize = 1;
|
||||
while (reqsize < OST_MAXREQSIZE)
|
||||
reqsize = reqsize << 1;
|
||||
|
||||
/*
|
||||
* We don't enlarge the request count in OSC pool according to
|
||||
* cl_max_rpcs_in_flight. The allocation from the pool will only be
|
||||
* tried after normal allocation failed. So a small OSC pool won't
|
||||
* cause much performance degression in most of cases.
|
||||
*/
|
||||
osc_reqpool_maxreqcount = reqpool_size / reqsize;
|
||||
|
||||
atomic_set(&osc_pool_req_count, 0);
|
||||
osc_rq_pool = ptlrpc_init_rq_pool(0, OST_MAXREQSIZE,
|
||||
ptlrpc_add_rqs_to_pool);
|
||||
|
||||
if (osc_rq_pool)
|
||||
return 0;
|
||||
|
||||
rc = -ENOMEM;
|
||||
|
||||
out_type:
|
||||
class_unregister_type(LUSTRE_OSC_NAME);
|
||||
out_kmem:
|
||||
lu_kmem_fini(osc_caches);
|
||||
return rc;
|
||||
}
|
||||
|
||||
@ -3369,6 +3419,7 @@ static void /*__exit*/ osc_exit(void)
|
||||
{
|
||||
class_unregister_type(LUSTRE_OSC_NAME);
|
||||
lu_kmem_fini(osc_caches);
|
||||
ptlrpc_free_rq_pool(osc_rq_pool);
|
||||
}
|
||||
|
||||
MODULE_AUTHOR("Sun Microsystems, Inc. <http://www.lustre.org/>");
|
||||
|
@ -446,7 +446,7 @@ EXPORT_SYMBOL(ptlrpc_free_rq_pool);
|
||||
/**
|
||||
* Allocates, initializes and adds \a num_rq requests to the pool \a pool
|
||||
*/
|
||||
void ptlrpc_add_rqs_to_pool(struct ptlrpc_request_pool *pool, int num_rq)
|
||||
int ptlrpc_add_rqs_to_pool(struct ptlrpc_request_pool *pool, int num_rq)
|
||||
{
|
||||
int i;
|
||||
int size = 1;
|
||||
@ -468,11 +468,11 @@ void ptlrpc_add_rqs_to_pool(struct ptlrpc_request_pool *pool, int num_rq)
|
||||
spin_unlock(&pool->prp_lock);
|
||||
req = ptlrpc_request_cache_alloc(GFP_NOFS);
|
||||
if (!req)
|
||||
return;
|
||||
return i;
|
||||
msg = libcfs_kvzalloc(size, GFP_NOFS);
|
||||
if (!msg) {
|
||||
ptlrpc_request_cache_free(req);
|
||||
return;
|
||||
return i;
|
||||
}
|
||||
req->rq_reqbuf = msg;
|
||||
req->rq_reqbuf_len = size;
|
||||
@ -481,6 +481,7 @@ void ptlrpc_add_rqs_to_pool(struct ptlrpc_request_pool *pool, int num_rq)
|
||||
list_add_tail(&req->rq_list, &pool->prp_req_list);
|
||||
}
|
||||
spin_unlock(&pool->prp_lock);
|
||||
return num_rq;
|
||||
}
|
||||
EXPORT_SYMBOL(ptlrpc_add_rqs_to_pool);
|
||||
|
||||
@ -494,7 +495,7 @@ EXPORT_SYMBOL(ptlrpc_add_rqs_to_pool);
|
||||
*/
|
||||
struct ptlrpc_request_pool *
|
||||
ptlrpc_init_rq_pool(int num_rq, int msgsize,
|
||||
void (*populate_pool)(struct ptlrpc_request_pool *, int))
|
||||
int (*populate_pool)(struct ptlrpc_request_pool *, int))
|
||||
{
|
||||
struct ptlrpc_request_pool *pool;
|
||||
|
||||
@ -512,11 +513,6 @@ ptlrpc_init_rq_pool(int num_rq, int msgsize,
|
||||
|
||||
populate_pool(pool, num_rq);
|
||||
|
||||
if (list_empty(&pool->prp_req_list)) {
|
||||
/* have not allocated a single request for the pool */
|
||||
kfree(pool);
|
||||
pool = NULL;
|
||||
}
|
||||
return pool;
|
||||
}
|
||||
EXPORT_SYMBOL(ptlrpc_init_rq_pool);
|
||||
@ -702,11 +698,10 @@ struct ptlrpc_request *__ptlrpc_request_alloc(struct obd_import *imp,
|
||||
{
|
||||
struct ptlrpc_request *request = NULL;
|
||||
|
||||
if (pool)
|
||||
request = ptlrpc_prep_req_from_pool(pool);
|
||||
request = ptlrpc_request_cache_alloc(GFP_NOFS);
|
||||
|
||||
if (!request)
|
||||
request = ptlrpc_request_cache_alloc(GFP_NOFS);
|
||||
if (!request && pool)
|
||||
request = ptlrpc_prep_req_from_pool(pool);
|
||||
|
||||
if (request) {
|
||||
LASSERTF((unsigned long)imp > 0x1000, "%p", imp);
|
||||
|
Loading…
x
Reference in New Issue
Block a user