tier/ctr: Solving DB Lock issue due to write contention from db connections

Problem: The DB on the brick is been accessed by CTR, for write and
tier migrator, for read and write. The write from tier migrator is reseting
the heat counters after a cycle. Since we are using sqlite, two connections
trying to write would cause a db lock contention. As a result CTR used to fail
to update the db.

Solution: Using the same db connection of CTR for reseting the heat counters.
1) Introducted a new IPC FOP for CTR
2) After the query do a ipc syncop to the underlying client xlator associated
   to the brick.
3) CTR in brick will catch the IPC FOP and cleat the heat counters.

Change-Id: I53306bfc08dcdba479deb4ccc154896521336150
BUG: 1260730
Signed-off-by: Joseph Fernandes <josferna@redhat.com>
Reviewed-on: http://review.gluster.org/12031
Tested-by: NetBSD Build System <jenkins@build.gluster.org>
Tested-by: Gluster Build System <jenkins@build.gluster.com>
Reviewed-by: Dan Lambright <dlambrig@redhat.com>
Tested-by: Dan Lambright <dlambrig@redhat.com>
This commit is contained in:
Joseph Fernandes 2015-08-27 17:23:07 +05:30 committed by Dan Lambright
parent 9efce73fb3
commit 96af474045
12 changed files with 219 additions and 68 deletions

View File

@ -126,6 +126,7 @@ enum _gf_client_pid
enum _gf_xlator_ipc_targets {
GF_IPC_TARGET_CHANGELOG = 0,
GF_IPC_TARGET_CTR = 1
};
typedef enum _gf_boolean gf_boolean_t;

View File

@ -678,6 +678,40 @@ find_recently_changed_files_freq(gfdb_conn_node_t *_conn_node,
}
/*Libgfdb API Function: Clear the heat for all the files
*
* Arguments:
* _conn_node : GFDB Connection node
*
* Returns : if successful return 0 or
* -ve value in case of failure
**/
int
clear_files_heat (gfdb_conn_node_t *_conn_node) {
int ret = 0;
gfdb_db_operations_t *db_operations_t = NULL;
void *gf_db_connection = NULL;
CHECK_CONN_NODE(_conn_node);
db_operations_t = &_conn_node->gfdb_connection.gfdb_db_operations;
gf_db_connection = _conn_node->gfdb_connection.gf_db_connection;
if (db_operations_t->clear_files_heat_op) {
ret = db_operations_t->clear_files_heat_op (gf_db_connection);
if (ret) {
gf_msg (GFDB_DATA_STORE, GF_LOG_ERROR, 0,
LG_MSG_FIND_OP_FAILED,
"Clear files heat operation failed!");
}
}
return ret;
}
void get_gfdb_methods (gfdb_methods_t *methods)
{
methods->init_db = init_db;
@ -686,5 +720,4 @@ void get_gfdb_methods (gfdb_methods_t *methods)
methods->find_recently_changed_files = find_recently_changed_files;
methods->find_unchanged_for_time_freq = find_unchanged_for_time_freq;
methods->find_recently_changed_files_freq = find_recently_changed_files_freq;
methods->dbpath = strdup(GFDB_SQL_PARAM_DBPATH);
}

View File

@ -236,6 +236,19 @@ typedef int (*find_recently_changed_files_freq_t) (gfdb_conn_node_t *_conn_node,
int read_freq_thresold,
gf_boolean_t _clear_counters);
/*Libgfdb API Function: Clear the heat for all the files
*
* Arguments:
* _conn_node : GFDB Connection node
*
* Returns : if successful return 0 or
* -ve value in case of failure
**/
int
clear_files_heat (gfdb_conn_node_t *_conn_node);
typedef struct gfdb_methods_s {
init_db_t init_db;
fini_db_t fini_db;
@ -250,4 +263,5 @@ void get_gfdb_methods (gfdb_methods_t *methods);
typedef void (*get_gfdb_methods_t) (gfdb_methods_t *methods);
#endif

View File

@ -649,6 +649,7 @@ typedef int
gf_boolean_t _clear_counters);
typedef int (*gfdb_clear_files_heat_t)(void *db_conn);
/*Data structure holding all the above plugin function pointers*/
@ -664,10 +665,9 @@ typedef struct gfdb_db_operations {
find_unchanged_for_time_freq_op;
gfdb_find_recently_changed_files_freq_t
find_recently_changed_files_freq_op;
gfdb_clear_files_heat_t clear_files_heat_op;
} gfdb_db_operations_t;
/*******************************************************************************
*
* Database connection object: This objected is maitained by libgfdb for each

View File

@ -248,6 +248,8 @@ gf_sqlite3_fill_db_operations(gfdb_db_operations_t *gfdb_db_ops)
gf_sqlite3_find_unchanged_for_time_freq;
gfdb_db_ops->find_recently_changed_files_freq_op =
gf_sqlite3_find_recently_changed_files_freq;
gfdb_db_ops->clear_files_heat_op = gf_sqlite3_clear_files_heat;
}
@ -726,15 +728,6 @@ gf_sqlite3_find_recently_changed_files(void *db_conn,
goto out;
}
/*Clear freq counters of un-selected data*/
ret = gf_sql_clear_counters(sql_conn);
if (ret) {
gf_msg (GFDB_STR_SQLITE3, GF_LOG_ERROR, 0,
LG_MSG_CLEAR_COUNTER_FAILED, "Failed clearing"
" counters!");
goto out;
}
ret = 0;
out:
sqlite3_finalize(prep_stmt);
@ -820,15 +813,6 @@ gf_sqlite3_find_unchanged_for_time (void *db_conn,
goto out;
}
/*Clear freq counters of un-selected data*/
ret = gf_sql_clear_counters(sql_conn);
if (ret) {
gf_msg (GFDB_STR_SQLITE3, GF_LOG_ERROR, 0,
LG_MSG_CLEAR_COUNTER_FAILED, "Failed clearing"
" counters!");
goto out;
}
ret = 0;
out:
sqlite3_finalize(prep_stmt);
@ -1134,3 +1118,26 @@ out:
sqlite3_finalize(prep_stmt);
return ret;
}
int
gf_sqlite3_clear_files_heat (void *db_conn)
{
int ret = -1;
gf_sql_connection_t *sql_conn = db_conn;
CHECK_SQL_CONN (sql_conn, out);
ret = gf_sql_clear_counters (sql_conn);
if (ret) {
gf_msg (GFDB_STR_SQLITE3, GF_LOG_ERROR, 0,
LG_MSG_CLEAR_COUNTER_FAILED, "Failed clearing "
"files heat!");
goto out;
}
ret = 0;
out:
return ret;
}

View File

@ -279,6 +279,8 @@ int gf_sqlite3_find_recently_changed_files_freq (void *db_conn,
int read_freq_cnt,
gf_boolean_t clear_counters);
int gf_sqlite3_clear_files_heat (void *db_conn);
void gf_sqlite3_fill_db_operations (gfdb_db_operations_t *gfdb_db_ops);
#endif

View File

@ -1195,10 +1195,10 @@ gf_sql_clear_counters (gf_sql_connection_t *sql_conn)
CHECK_SQL_CONN (sql_conn, out);
query_str = "BEGIN;UPDATE "
query_str = "UPDATE "
GF_FILE_TABLE
" SET " GF_COL_READ_FREQ_CNTR " = 0 , "
GF_COL_WRITE_FREQ_CNTR " = 0 ;COMMIT;";
GF_COL_WRITE_FREQ_CNTR " = 0 ;";
ret = sqlite3_exec (sql_conn->sqlite3_db_conn, query_str, NULL, NULL,
&sql_strerror);

View File

@ -34,6 +34,7 @@ enum gf_dht_mem_types_ {
gf_dht_mt_container_t,
gf_dht_mt_octx_t,
gf_dht_mt_miginfo_t,
gf_tier_mt_bricklist_t,
gf_dht_mt_end
};
#endif

View File

@ -27,7 +27,7 @@ static void *libhandle;
static gfdb_methods_t gfdb_methods;
#define DB_QUERY_RECORD_SIZE 4096
#define PROMOTION_CYCLE_CNT 4
static int
@ -436,8 +436,7 @@ out:
* It picks up each bricks db and queries for eligible files for migration.
* The list of eligible files are populated in appropriate query files*/
static int
tier_process_brick_cbk (dict_t *brick_dict, char *key, data_t *value,
void *args) {
tier_process_brick_cbk (brick_list_t *local_brick, void *args) {
int ret = -1;
char *db_path = NULL;
query_cbk_args_t *query_cbk_args = NULL;
@ -456,8 +455,12 @@ tier_process_brick_cbk (dict_t *brick_dict, char *key, data_t *value,
GF_VALIDATE_OR_GOTO (this->name,
gfdb_brick_dict_info->_query_cbk_args, out);
GF_VALIDATE_OR_GOTO (this->name, value, out);
db_path = data_to_str(value);
GF_VALIDATE_OR_GOTO (this->name, local_brick, out);
GF_VALIDATE_OR_GOTO (this->name, local_brick->xlator, out);
GF_VALIDATE_OR_GOTO (this->name, local_brick->brick_db_path, out);
db_path = local_brick->brick_db_path;
/*Preparing DB parameters before init_db i.e getting db connection*/
params_dict = dict_new ();
@ -467,7 +470,7 @@ tier_process_brick_cbk (dict_t *brick_dict, char *key, data_t *value,
"DB Params cannot initialized!");
goto out;
}
SET_DB_PARAM_TO_DICT(this->name, params_dict, gfdb_methods.dbpath,
SET_DB_PARAM_TO_DICT(this->name, params_dict, GFDB_SQL_PARAM_DBPATH,
db_path, ret, out);
/*Get the db connection*/
@ -508,7 +511,7 @@ tier_process_brick_cbk (dict_t *brick_dict, char *key, data_t *value,
write_freq_threshold,
query_cbk_args->defrag->
read_freq_threshold,
_gf_true);
_gf_false);
}
} else {
if (query_cbk_args->defrag->write_freq_threshold == 0 &&
@ -527,7 +530,7 @@ tier_process_brick_cbk (dict_t *brick_dict, char *key, data_t *value,
query_cbk_args->defrag->
write_freq_threshold,
query_cbk_args->defrag->read_freq_threshold,
_gf_true);
_gf_false);
}
}
if (ret) {
@ -535,7 +538,17 @@ tier_process_brick_cbk (dict_t *brick_dict, char *key, data_t *value,
DHT_MSG_LOG_TIER_ERROR,
"FATAL: query from db failed");
goto out;
}
}
/*Clear the heat on the DB entries*/
ret = syncop_ipc (local_brick->xlator, GF_IPC_TARGET_CTR, NULL, NULL);
if (ret) {
gf_msg (this->name, GF_LOG_ERROR, 0,
DHT_MSG_LOG_TIER_ERROR, "Failed clearing the heat "
"on db %s", local_brick->brick_db_path);
goto out;
}
ret = 0;
out:
if (query_cbk_args && query_cbk_args->queryFILE) {
@ -555,6 +568,7 @@ tier_build_migration_qfile (demotion_args_t *args,
_gfdb_brick_dict_info_t gfdb_brick_dict_info;
gfdb_time_t time_in_past;
int ret = -1;
brick_list_t *local_brick = NULL;
/*
* The first time this function is called, query file will
@ -585,14 +599,18 @@ tier_build_migration_qfile (demotion_args_t *args,
gfdb_brick_dict_info.time_stamp = &time_in_past;
gfdb_brick_dict_info._gfdb_promote = is_promotion;
gfdb_brick_dict_info._query_cbk_args = query_cbk_args;
ret = dict_foreach (args->brick_list, tier_process_brick_cbk,
&gfdb_brick_dict_info);
if (ret) {
gf_msg (args->this->name, GF_LOG_ERROR, 0,
DHT_MSG_BRICK_QUERY_FAILED,
"Brick query failed\n");
goto out;
list_for_each_entry (local_brick, args->brick_list, list) {
ret = tier_process_brick_cbk (local_brick,
&gfdb_brick_dict_info);
if (ret) {
gf_msg (args->this->name, GF_LOG_ERROR, 0,
DHT_MSG_BRICK_QUERY_FAILED,
"Brick query failed\n");
goto out;
}
}
ret = 0;
out:
return ret;
}
@ -697,19 +715,19 @@ out:
}
static int
tier_get_bricklist (xlator_t *xl, dict_t *bricklist)
tier_get_bricklist (xlator_t *xl, struct list_head *local_bricklist_head)
{
xlator_list_t *child = NULL;
char *rv = NULL;
char *rh = NULL;
char localhost[256] = {0};
char *db_path = NULL;
char *brickname = NULL;
char db_name[PATH_MAX] = "";
int ret = 0;
brick_list_t *local_brick = NULL;
GF_VALIDATE_OR_GOTO ("tier", xl, out);
GF_VALIDATE_OR_GOTO ("tier", bricklist, out);
GF_VALIDATE_OR_GOTO ("tier", local_bricklist_head, out);
gethostname (localhost, sizeof (localhost));
@ -724,27 +742,38 @@ tier_get_bricklist (xlator_t *xl, dict_t *bricklist)
if (gf_is_local_addr (rh)) {
local_brick = GF_CALLOC (1, sizeof(brick_list_t),
gf_tier_mt_bricklist_t);
if (!local_brick) {
goto out;
}
ret = dict_get_str(xl->options, "remote-subvolume",
&rv);
if (ret < 0)
goto out;
brickname = strrchr(rv, '/') + 1;
snprintf(db_name, sizeof(db_name), "%s.db",
brickname);
db_path = GF_CALLOC (PATH_MAX, 1, gf_common_mt_char);
if (!db_path) {
local_brick->brick_db_path =
GF_CALLOC (PATH_MAX, 1, gf_common_mt_char);
if (!local_brick->brick_db_path) {
gf_msg ("tier", GF_LOG_ERROR, 0,
DHT_MSG_LOG_TIER_STATUS,
"Faile. to allocate memory for bricklist");
goto out;
}
sprintf(db_path, "%s/%s/%s", rv,
sprintf(local_brick->brick_db_path, "%s/%s/%s", rv,
GF_HIDDEN_PATH,
db_name);
if (dict_add_dynstr_with_alloc(bricklist, "brick",
db_path))
goto out;
local_brick->xlator = xl;
list_add_tail (&(local_brick->list),
local_bricklist_head);
ret = 0;
goto out;
@ -752,19 +781,48 @@ tier_get_bricklist (xlator_t *xl, dict_t *bricklist)
}
for (child = xl->children; child; child = child->next) {
ret = tier_get_bricklist(child->xlator, bricklist);
ret = tier_get_bricklist(child->xlator, local_bricklist_head);
if (ret) {
goto out;
}
}
ret = 0;
out:
GF_FREE (db_path);
if (ret) {
if (local_brick) {
GF_FREE (local_brick->brick_db_path);
}
GF_FREE (local_brick);
}
return ret;
}
void
clear_bricklist (struct list_head *brick_list)
{
brick_list_t *local_brick = NULL;
brick_list_t *temp = NULL;
if (list_empty(brick_list)) {
return;
}
list_for_each_entry_safe (local_brick, temp, brick_list, list) {
list_del (&local_brick->list);
GF_FREE (local_brick->brick_db_path);
GF_FREE (local_brick);
}
}
int
tier_start (xlator_t *this, gf_defrag_info_t *defrag)
{
dict_t *bricklist_cold = NULL;
dict_t *bricklist_hot = NULL;
struct list_head bricklist_hot = { 0 };
struct list_head bricklist_cold = { 0 };
dht_conf_t *conf = NULL;
gfdb_time_t current_time;
int freq_promote = 0;
@ -783,16 +841,11 @@ tier_start (xlator_t *this, gf_defrag_info_t *defrag)
conf = this->private;
bricklist_cold = dict_new();
if (!bricklist_cold)
return -1;
INIT_LIST_HEAD ((&bricklist_hot));
INIT_LIST_HEAD ((&bricklist_cold));
bricklist_hot = dict_new();
if (!bricklist_hot)
return -1;
tier_get_bricklist (conf->subvolumes[0], bricklist_cold);
tier_get_bricklist (conf->subvolumes[1], bricklist_hot);
tier_get_bricklist (conf->subvolumes[0], &bricklist_cold);
tier_get_bricklist (conf->subvolumes[1], &bricklist_hot);
gf_msg (this->name, GF_LOG_INFO, 0,
DHT_MSG_LOG_TIER_STATUS, "Begin run tier promote %d"
@ -873,7 +926,7 @@ tier_start (xlator_t *this, gf_defrag_info_t *defrag)
if (is_demotion_triggered) {
demotion_args.this = this;
demotion_args.brick_list = bricklist_hot;
demotion_args.brick_list = &bricklist_hot;
demotion_args.defrag = defrag;
demotion_args.freq_time = freq_demote;
ret_demotion = pthread_create (&demote_thread,
@ -889,9 +942,9 @@ tier_start (xlator_t *this, gf_defrag_info_t *defrag)
if (is_promotion_triggered) {
promotion_args.this = this;
promotion_args.brick_list = bricklist_cold;
promotion_args.brick_list = &bricklist_cold;
promotion_args.defrag = defrag;
promotion_args.freq_time = freq_promote;
promotion_args.freq_time = freq_promote * PROMOTION_CYCLE_CNT;
ret_promotion = pthread_create (&promote_thread,
NULL, &tier_promote,
&promotion_args);
@ -940,8 +993,8 @@ tier_start (xlator_t *this, gf_defrag_info_t *defrag)
ret = 0;
out:
dict_unref(bricklist_cold);
dict_unref(bricklist_hot);
clear_bricklist (&bricklist_cold);
clear_bricklist (&bricklist_hot);
return ret;
}

View File

@ -31,6 +31,7 @@
#define TIMER_SECS 3600
#include "gfdb_data_store.h"
#include "gfdb_sqlite3.h"
#include <ctype.h>
#include <sys/stat.h>
@ -56,10 +57,16 @@ typedef struct _gfdb_brick_dict_info {
query_cbk_args_t *_query_cbk_args;
} _gfdb_brick_dict_info_t;
typedef struct brick_list {
xlator_t *xlator;
char *brick_db_path;
struct list_head list;
} brick_list_t;
typedef struct _dm_thread_args {
xlator_t *this;
gf_defrag_info_t *defrag;
dict_t *brick_list;
struct list_head *brick_list;
int freq_time;
int return_value;
} promotion_args_t, demotion_args_t;

View File

@ -1379,6 +1379,36 @@ out:
fd, size, off, flags, xdata);
return 0;
}
/*******************************ctr_ipc****************************************/
/* IPC Call from tier migrator to clear the heat on the DB */
int32_t
ctr_ipc (call_frame_t *frame, xlator_t *this, int32_t op, dict_t *xdata)
{
int ret = -1;
gf_ctr_private_t *_priv = NULL;
GF_ASSERT(this);
_priv = this->private;
GF_ASSERT (_priv);
GF_ASSERT(_priv->_db_conn);
if (op != GF_IPC_TARGET_CTR)
goto wind;
ret = clear_files_heat (_priv->_db_conn);
STACK_UNWIND_STRICT (ipc, frame, ret, 0, NULL);
return 0;
wind:
STACK_WIND (frame, default_ipc_cbk, FIRST_CHILD (this),
FIRST_CHILD (this)->fops->ipc, op, xdata);
return 0;
}
/******************************************************************************/
int
@ -1596,7 +1626,9 @@ struct xlator_fops fops = {
.writev = ctr_writev,
.setattr = ctr_setattr,
/*read fops*/
.readv = ctr_readv
.readv = ctr_readv,
/* IPC call*/
.ipc = ctr_ipc
};
struct xlator_cbks cbks = {

View File

@ -17,6 +17,7 @@
#include "iatt.h"
#include "glusterfs.h"
#include "xlator.h"
#include "defaults.h"
#include "logging.h"
#include "common-utils.h"
#include <time.h>