2017-04-25 17:15:31 +02:00
/* -------------------------------------------------------------------------- */
/* Copyright 2002-2016, OpenNebula Project, OpenNebula Systems */
/* */
/* Licensed under the Apache License, Version 2.0 (the "License"); you may */
/* not use this file except in compliance with the License. You may obtain */
/* a copy of the License at */
/* */
/* http://www.apache.org/licenses/LICENSE-2.0 */
/* */
/* Unless required by applicable law or agreed to in writing, software */
/* distributed under the License is distributed on an "AS IS" BASIS, */
/* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. */
/* See the License for the specific language governing permissions and */
/* limitations under the License. */
/* -------------------------------------------------------------------------- */
# ifndef RAFT_MANAGER_H_
# define RAFT_MANAGER_H_
# include "ActionManager.h"
2017-04-27 01:03:44 +02:00
# include "ReplicaManager.h"
# include "ReplicaRequest.h"
2017-04-25 17:15:31 +02:00
extern " C " void * raft_manager_loop ( void * arg ) ;
/* -------------------------------------------------------------------------- */
/* -------------------------------------------------------------------------- */
class RaftManager : public ActionListener
{
public :
2017-04-27 01:03:44 +02:00
/**
* State of this server
*/
enum State {
SOLO = 0 ,
CANDIDATE = 1 ,
FOLLOWER = 2 ,
LEADER = 3
} ;
2017-04-28 19:35:57 +02:00
RaftManager ( bool solo ) : term ( 0 ) , num_servers ( 0 ) , commit ( 0 )
2017-04-27 01:03:44 +02:00
{
2017-04-27 02:16:13 +02:00
pthread_mutex_init ( & mutex , 0 ) ;
2017-04-27 01:03:44 +02:00
if ( solo )
{
state = SOLO ;
}
else
{
state = FOLLOWER ;
}
am . addListener ( this ) ;
} ;
2017-04-25 17:15:31 +02:00
virtual ~ RaftManager ( ) { } ;
2017-04-27 01:03:44 +02:00
// -------------------------------------------------------------------------
2017-04-28 22:23:32 +02:00
// Raft associated actions (synchronous)
2017-04-27 01:03:44 +02:00
// -------------------------------------------------------------------------
2017-04-25 17:15:31 +02:00
/**
2017-04-28 22:23:32 +02:00
* Follower successfully replicated a log entry :
* - Increment next entry to send to follower
* - Update match entry on follower
* - Evaluate majority to apply changes to DB
2017-04-25 17:15:31 +02:00
*/
2017-04-28 22:23:32 +02:00
void replicate_success ( unsigned int follower_id ) ;
2017-04-25 17:15:31 +02:00
2017-04-27 01:03:44 +02:00
/**
2017-04-28 22:23:32 +02:00
* Follower failed to replicate a log entry because an inconsistency was
* detected ( same index , different term ) :
* - Decrease follower next_index
* - Retry ( do not wait for replica events )
2017-04-27 01:03:44 +02:00
*/
2017-04-28 22:23:32 +02:00
void replicate_failure ( unsigned int follower_id ) ;
2017-04-27 01:03:44 +02:00
/**
* Triggers a REPLICATE event , it will notify the replica threads to
* send the log to the followers
*/
2017-04-28 22:23:32 +02:00
void replicate_log ( ReplicaRequest * rr ) ;
2017-04-27 01:03:44 +02:00
/**
* Makes this server leader , and start replica threads
*/
2017-04-28 22:23:32 +02:00
void leader ( unsigned int term ) ;
2017-04-27 01:03:44 +02:00
/**
* Makes this server follower . Stop associated replication facilities
*/
2017-04-28 22:23:32 +02:00
void follower ( unsigned int term ) ;
2017-04-27 01:03:44 +02:00
/**
* Finalizes the Raft Consensus Manager
*/
2017-04-25 17:15:31 +02:00
void finalize ( )
{
am . finalize ( ) ;
}
2017-04-27 01:03:44 +02:00
/**
* Starts the Raft Consensus Manager
*/
2017-04-25 17:15:31 +02:00
int start ( ) ;
pthread_t get_thread_id ( ) const
{
return raft_thread ;
} ;
2017-04-27 01:03:44 +02:00
// -------------------------------------------------------------------------
// Raft state query functions
// -------------------------------------------------------------------------
unsigned int get_term ( )
{
unsigned int _term ;
pthread_mutex_lock ( & mutex ) ;
_term = term ;
pthread_mutex_unlock ( & mutex ) ;
return _term ;
}
unsigned int get_commit ( )
{
unsigned int _commit ;
pthread_mutex_lock ( & mutex ) ;
_commit = commit ;
pthread_mutex_unlock ( & mutex ) ;
return _commit ;
}
2017-04-27 12:42:09 +02:00
/**
* @ param leader_commit index sent by leader in a replicate xml - rpc call
* @ param index of the last record inserted in the database
* @ return the updated commit index
*/
unsigned int update_commit ( unsigned int leader_commit , unsigned int index )
{
unsigned int _commit ;
2017-04-27 01:03:44 +02:00
pthread_mutex_lock ( & mutex ) ;
2017-04-27 12:42:09 +02:00
if ( leader_commit > commit )
{
if ( index < leader_commit )
{
commit = index ;
}
else
{
commit = leader_commit ;
}
}
_commit = commit ;
2017-04-27 01:03:44 +02:00
pthread_mutex_unlock ( & mutex ) ;
2017-04-27 12:42:09 +02:00
return _commit ;
}
2017-04-27 01:03:44 +02:00
/**
2017-04-27 12:42:09 +02:00
* @ return true if the server is the leader of the zone , runs in solo mode
* or is a follower
2017-04-27 01:03:44 +02:00
*/
2017-04-27 12:42:09 +02:00
bool is_leader ( )
2017-04-27 01:03:44 +02:00
{
2017-04-27 12:42:09 +02:00
return test_state ( LEADER ) ;
}
2017-04-27 01:03:44 +02:00
2017-04-27 12:42:09 +02:00
bool is_follower ( )
{
return test_state ( FOLLOWER ) ;
2017-04-27 01:03:44 +02:00
}
/**
* Get next index to send to the follower
* @ param follower server id
* @ return - 1 on failure , the next index if success
*/
int get_next_index ( unsigned int follower_id )
{
std : : map < unsigned int , unsigned int > : : iterator it ;
unsigned int _index = - 1 ;
pthread_mutex_lock ( & mutex ) ;
it = next . find ( follower_id ) ;
if ( it ! = next . end ( ) )
{
_index = it - > second ;
}
pthread_mutex_unlock ( & mutex ) ;
return _index ;
}
2017-04-25 17:15:31 +02:00
private :
friend void * raft_manager_loop ( void * arg ) ;
2017-04-27 01:03:44 +02:00
/**
* Thread id of the main event loop
*/
2017-04-25 17:15:31 +02:00
pthread_t raft_thread ;
2017-04-27 01:03:44 +02:00
pthread_mutex_t mutex ;
2017-04-25 17:15:31 +02:00
/**
* Event engine for the RaftManager
*/
ActionManager am ;
2017-04-27 01:03:44 +02:00
/**
* Clients waiting for a log replication
*/
std : : map < unsigned int , ReplicaRequest * > requests ;
// -------------------------------------------------------------------------
// Raft state
// -------------------------------------------------------------------------
/**
* Server state
*/
State state ;
/**
* Current term
*/
unsigned int term ;
2017-04-28 19:35:57 +02:00
/**
* Number of servers in zone
*/
unsigned int num_servers ;
2017-04-27 01:03:44 +02:00
//--------------------------------------------------------------------------
// Volatile log index variables
// - commit, highest log known to be committed
2017-04-27 11:12:30 +02:00
// - applied, highest log applied to DB (in LogDB)
2017-04-27 01:03:44 +02:00
//
//---------------------------- LEADER VARIABLES ----------------------------
//
// - next, next log to send to each follower <follower, next>
// - match, highest log replicated in this server <follower, match>
// -------------------------------------------------------------------------
ReplicaManager replica_manager ;
unsigned int commit ;
std : : map < unsigned int , unsigned int > next ;
std : : map < unsigned int , unsigned int > match ;
2017-04-25 17:15:31 +02:00
// -------------------------------------------------------------------------
// Action Listener interface
// -------------------------------------------------------------------------
/**
* Event dispatcher function
*/
void user_action ( const ActionRequest & ar ) ;
/**
* Termination function
*/
void finalize_action ( const ActionRequest & ar ) ;
2017-04-27 12:42:09 +02:00
/**
* @ param s the state to check
* @ return true if the server states matches the provided one
*/
bool test_state ( State s )
{
bool _is_state ;
pthread_mutex_lock ( & mutex ) ;
_is_state = state = = s ;
pthread_mutex_unlock ( & mutex ) ;
return _is_state ;
}
2017-04-25 17:15:31 +02:00
} ;
# endif /*RAFT_MANAGER_H_*/