2019-11-11 16:55:42 +03:00
/*
* auditd - plugin - clickhouse is an auditd plugin for sending auditd data
* to clickhouse DB .
2020-01-10 11:49:59 +03:00
* Copyright ( C ) 2019 - 2020 Aleksei Nikiforov < darktemplar @ basealt . ru >
2019-11-11 16:55:42 +03:00
*
* This program is free software : you can redistribute it and / or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation , either version 3 of the License , or
* ( at your option ) any later version .
*
* This program is distributed in the hope that it will be useful ,
* but WITHOUT ANY WARRANTY ; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE . See the
* GNU General Public License for more details .
*
* You should have received a copy of the GNU General Public License
* along with this program . If not , see < https : //www.gnu.org/licenses/>.
*
*/
2019-11-12 12:54:16 +03:00
# include <signal.h>
# include <poll.h>
# include <unistd.h>
# include <errno.h>
2019-12-09 11:44:51 +03:00
# include <time.h>
2019-11-12 12:54:16 +03:00
2020-01-13 12:52:33 +03:00
# include <algorithm>
2019-11-12 12:54:16 +03:00
# include <string>
# include <map>
# include <fstream>
2020-01-14 11:13:38 +03:00
# include <sstream>
2019-11-12 12:54:16 +03:00
# include <vector>
2019-12-12 15:01:19 +03:00
# include <set>
2020-01-13 12:52:33 +03:00
# include <thread>
# include <mutex>
# include <chrono>
2020-01-15 15:26:53 +03:00
# include <utility>
2020-01-27 11:56:43 +03:00
# include <memory>
2019-11-12 12:54:16 +03:00
# include <auparse.h>
2019-12-09 11:44:51 +03:00
# include <libaudit.h>
2019-11-12 12:54:16 +03:00
2019-11-11 16:55:42 +03:00
# include <clickhouse-cpp/client.h>
2019-11-12 12:54:16 +03:00
# include <boost/scope_exit.hpp>
# include <boost/property_tree/ptree.hpp>
# include <boost/property_tree/ini_parser.hpp>
2020-01-14 17:16:01 +03:00
# include <boost/property_tree/json_parser.hpp>
2019-12-09 11:44:51 +03:00
# include <boost/optional.hpp>
2020-01-14 17:16:01 +03:00
# include <boost/filesystem.hpp>
# include <boost/lexical_cast.hpp>
2019-11-12 12:54:16 +03:00
2019-12-12 16:12:44 +03:00
# include <regex>
2019-12-11 16:06:13 +03:00
# include "auditd-datatypes.hpp"
2020-01-10 14:59:03 +03:00
# include "logging.hpp"
2020-01-10 11:49:59 +03:00
# include "utils.hpp"
2019-12-11 16:06:13 +03:00
2019-11-12 12:54:16 +03:00
int runpipes [ 2 ] = { - 1 , - 1 } ;
volatile bool running = true ;
2020-01-13 12:52:33 +03:00
int write_thread_control_pipes [ 2 ] = { - 1 , - 1 } ;
int write_thread_data_pipes [ 2 ] = { - 1 , - 1 } ;
std : : mutex g_audit_records_list_mutex ;
2020-01-27 11:56:43 +03:00
std : : list < std : : shared_ptr < AuditRecord > > g_audit_records_list ;
template < typename T >
struct SharedPointerLess : std : : binary_function < std : : shared_ptr < T > , std : : shared_ptr < T > , bool >
{
constexpr bool operator ( ) ( const std : : shared_ptr < T > & lhs , const std : : shared_ptr < T > & rhs ) const
{
return ( lhs & & rhs ) ? ( * lhs < * rhs ) : ( lhs . get ( ) < rhs . get ( ) ) ;
}
} ;
2019-11-12 12:54:16 +03:00
2019-12-12 15:01:19 +03:00
struct CallbackData
{
std : : map < std : : string , std : : string > datatypes_map ;
std : : list < std : : tuple < std : : string , std : : string , std : : string > > datatype_regexps_map ;
std : : map < std : : string , std : : function < std : : shared_ptr < AbstractRecordField > ( const std : : string & name ) > > type_creation_map ;
std : : set < std : : string > all_fields_set ;
clickhouse : : Client * clickhouse_client ;
2019-12-16 16:09:45 +03:00
std : : string table_name ;
2019-12-12 15:01:19 +03:00
2020-01-13 12:52:33 +03:00
int write_timeout ;
int write_count_limit ;
2020-01-14 17:16:01 +03:00
std : : string data_directory ;
2019-12-12 15:01:19 +03:00
CallbackData ( )
2020-01-13 12:52:33 +03:00
: clickhouse_client ( nullptr ) ,
write_timeout ( - 1 ) ,
write_count_limit ( - 1 )
2019-12-12 15:01:19 +03:00
{
}
} ;
2020-01-13 12:52:33 +03:00
static void stop_running ( )
2019-11-12 12:54:16 +03:00
{
2020-01-13 12:52:33 +03:00
running = false ;
2019-11-12 12:54:16 +03:00
if ( runpipes [ 1 ] ! = - 1 )
{
write ( runpipes [ 1 ] , " 1 " , 1 ) ;
}
2020-01-13 12:52:33 +03:00
}
2019-11-12 12:54:16 +03:00
2020-01-13 12:52:33 +03:00
static void term_handler ( int sig )
{
stop_running ( ) ;
2019-11-12 12:54:16 +03:00
}
2020-01-14 11:13:38 +03:00
void initialize_data_block (
std : : map < std : : string , clickhouse : : ColumnRef > & data ,
const std : : map < std : : string , std : : string > & datatypes_map ,
const std : : list < std : : tuple < std : : string , std : : string , std : : string > > & datatype_regexps_map ,
const std : : map < std : : string , std : : function < std : : shared_ptr < AbstractRecordField > ( const std : : string & name ) > > & generators )
{
data [ " record_time " ] = std : : make_shared < clickhouse : : ColumnDateTime > ( ) ;
data [ " record_milli " ] = std : : make_shared < clickhouse : : ColumnUInt64 > ( ) ;
data [ " record_serial " ] = std : : make_shared < clickhouse : : ColumnUInt64 > ( ) ;
data [ " record_node " ] = std : : make_shared < clickhouse : : ColumnString > ( ) ;
for ( auto iter = datatypes_map . begin ( ) ; iter ! = datatypes_map . end ( ) ; + + iter )
{
auto factory_iter = generators . find ( iter - > second ) ;
if ( factory_iter ! = generators . end ( ) )
{
auto columns = factory_iter - > second ( sanitize_column_name ( iter - > first ) ) - > generateColumnsAndNames ( ) ;
for ( auto column_iter = columns . begin ( ) ; column_iter ! = columns . end ( ) ; + + column_iter )
{
data [ column_iter - > name ] = column_iter - > value ;
}
}
else
{
2020-01-15 16:17:46 +03:00
Logger : : write ( " Couldn't find registered type name for record with type \" %s \" " , iter - > second . c_str ( ) ) ;
2020-01-14 11:13:38 +03:00
}
}
for ( auto iter = datatype_regexps_map . begin ( ) ; iter ! = datatype_regexps_map . end ( ) ; + + iter )
{
auto factory_iter = generators . find ( std : : get < 1 > ( * iter ) ) ;
if ( factory_iter ! = generators . end ( ) )
{
auto columns = factory_iter - > second ( sanitize_column_name ( std : : get < 2 > ( * iter ) ) ) - > generateColumnsAndNames ( ) ;
for ( auto column_iter = columns . begin ( ) ; column_iter ! = columns . end ( ) ; + + column_iter )
{
data [ column_iter - > name ] = column_iter - > value ;
}
}
else
{
2020-01-15 16:17:46 +03:00
Logger : : write ( " Couldn't find registered type name for record with type \" %s \" " , std : : get < 2 > ( * iter ) . c_str ( ) ) ;
2020-01-14 11:13:38 +03:00
}
}
2020-01-14 15:10:11 +03:00
// also add "unknown_field"
{
auto columns = InterpretedStringArrayRecordField : : createRecord ( sanitize_column_name ( " unknown_field " ) ) - > generateColumnsAndNames ( ) ;
for ( auto column_iter = columns . begin ( ) ; column_iter ! = columns . end ( ) ; + + column_iter )
{
data [ column_iter - > name ] = column_iter - > value ;
}
}
2020-01-14 11:13:38 +03:00
}
void generate_clickhouse_columns_from_audit_records (
std : : map < std : : string , clickhouse : : ColumnRef > & data ,
const AuditRecord & record )
{
ensure_not_null ( data [ " record_time " ] - > As < clickhouse : : ColumnDateTime > ( ) ) - > Append ( record . time ) ;
ensure_not_null ( data [ " record_milli " ] - > As < clickhouse : : ColumnUInt64 > ( ) ) - > Append ( record . milliseconds ) ;
ensure_not_null ( data [ " record_serial " ] - > As < clickhouse : : ColumnUInt64 > ( ) ) - > Append ( record . serial ) ;
ensure_not_null ( data [ " record_node " ] - > As < clickhouse : : ColumnString > ( ) ) - > Append ( record . node ) ;
for ( auto iter = record . fields . begin ( ) ; iter ! = record . fields . end ( ) ; + + iter )
{
auto columns = iter - > second - > generateColumnsAndNames ( ) ;
iter - > second - > addToColumn ( columns ) ;
for ( auto column_iter = columns . begin ( ) ; column_iter ! = columns . end ( ) ; + + column_iter )
{
data [ sanitize_column_name ( column_iter - > name ) ] - > Append ( column_iter - > value ) ;
}
}
// now check that each column has same size
{
auto iter = data . begin ( ) ;
size_t count = iter - > second - > Size ( ) ;
for ( + + iter ; iter ! = data . end ( ) ; + + iter )
{
if ( count ! = iter - > second - > Size ( ) )
{
std : : stringstream errorstr ;
errorstr < < " Columns size doesn't match, expected " < < count < < " , got " < < iter - > second - > Size ( ) ;
throw std : : runtime_error ( errorstr . str ( ) ) ;
}
}
}
}
void add_audit_data_to_clickhouse_block (
const std : : map < std : : string , clickhouse : : ColumnRef > & data ,
clickhouse : : Block & block )
{
for ( auto iter = data . begin ( ) ; iter ! = data . end ( ) ; + + iter )
{
block . AppendColumn ( iter - > first , iter - > second ) ;
}
}
2019-11-12 12:54:16 +03:00
void auparse_callback ( auparse_state_t * au , auparse_cb_event_t cb_event_type , void * user_data )
{
2019-12-12 15:01:19 +03:00
CallbackData * callback_data = reinterpret_cast < CallbackData * > ( user_data ) ;
if ( ( ! callback_data ) | | ( auparse_first_record ( au ) < = 0 ) )
2019-12-09 11:44:51 +03:00
{
return ;
}
size_t record = 1 ;
for ( ; ; )
{
2019-12-16 15:43:29 +03:00
try
2019-12-09 11:44:51 +03:00
{
2019-12-16 15:43:29 +03:00
if ( cb_event_type = = AUPARSE_CB_EVENT_READY )
{
2020-01-27 11:56:43 +03:00
std : : shared_ptr < AuditRecord > audit_record = std : : make_shared < AuditRecord > ( ) ;
2019-12-12 15:01:19 +03:00
2020-01-27 11:56:43 +03:00
audit_record - > time = auparse_get_time ( au ) ;
audit_record - > milliseconds = auparse_get_milli ( au ) ;
audit_record - > serial = auparse_get_serial ( au ) ;
audit_record - > node = string_or_null_and_free ( auparse_get_node ( au ) ) ;
2019-12-09 11:44:51 +03:00
2019-12-16 15:43:29 +03:00
if ( auparse_first_field ( au ) > 0 )
2019-12-09 11:44:51 +03:00
{
2019-12-16 15:43:29 +03:00
do
2019-12-12 15:01:19 +03:00
{
2020-01-10 11:49:59 +03:00
const std : : string field_name = string_or_null ( auparse_get_field_name ( au ) ) ;
2019-12-12 16:12:44 +03:00
2019-12-16 15:43:29 +03:00
if ( field_name ! = " node " ) // skip node since it's already processed
2019-12-12 16:12:44 +03:00
{
2019-12-16 15:43:29 +03:00
auparse_type_t field_type = static_cast < auparse_type_t > ( auparse_get_field_type ( au ) ) ;
std : : string database_type ;
std : : string database_name ;
// first search for this field name in datatypes map,
// if it's not found there search all elements in datatypes regexp container
2019-12-12 16:12:44 +03:00
{
2019-12-16 15:43:29 +03:00
auto iter = callback_data - > datatypes_map . find ( field_name ) ;
if ( iter ! = callback_data - > datatypes_map . end ( ) )
2019-12-12 16:12:44 +03:00
{
2019-12-16 15:43:29 +03:00
database_type = iter - > second ;
database_name = iter - > first ;
}
else
{
for ( auto regexp_iter = callback_data - > datatype_regexps_map . begin ( ) ; regexp_iter ! = callback_data - > datatype_regexps_map . end ( ) ; + + regexp_iter )
2019-12-12 16:12:44 +03:00
{
2019-12-16 15:43:29 +03:00
std : : regex audit_name_regex ( std : : get < 0 > ( * regexp_iter ) ) ;
if ( std : : regex_match ( field_name , audit_name_regex ) )
{
database_type = std : : get < 1 > ( * regexp_iter ) ;
database_name = std : : get < 2 > ( * regexp_iter ) ;
break ;
}
2019-12-12 16:12:44 +03:00
}
}
}
2019-12-16 15:43:29 +03:00
if ( database_type . empty ( ) | | database_name . empty ( ) )
{
2020-01-15 16:17:46 +03:00
Logger : : write ( " Couldn't find matching database entry for field with name \" %s \" and type \" %s \" , putting it into \" unknown_field \" field " , field_name . c_str ( ) , field_type_to_string ( field_type ) . c_str ( ) ) ;
2019-12-12 16:12:44 +03:00
2020-01-14 15:10:11 +03:00
database_type = " string_array " ;
database_name = " unknown_field " ;
}
else if ( ! check_field_type ( field_type , database_type , field_name ) )
2019-12-12 16:12:44 +03:00
{
2020-01-15 16:17:46 +03:00
Logger : : write ( " Warning: expected datatype doesn't match database datatype for field \" %s \" : expected \" %s \" , actual \" %s \" " ,
2020-01-10 14:15:59 +03:00
field_name . c_str ( ) , database_type . c_str ( ) , field_type_to_string ( field_type ) . c_str ( ) ) ;
2019-12-12 16:12:44 +03:00
}
2019-12-16 15:43:29 +03:00
std : : shared_ptr < AbstractRecordField > data_ptr ;
// If field is present in audit record, reuse it
// and update it's value,
// otherwise create new one and register it
2019-12-12 16:12:44 +03:00
{
2020-01-27 11:56:43 +03:00
auto data_iter = audit_record - > fields . find ( database_name ) ;
if ( data_iter ! = audit_record - > fields . end ( ) )
2019-12-12 16:12:44 +03:00
{
2019-12-16 15:43:29 +03:00
data_ptr = data_iter - > second ;
2019-12-12 16:12:44 +03:00
}
else
{
2019-12-16 15:43:29 +03:00
auto iter = callback_data - > type_creation_map . find ( database_type ) ;
if ( iter ! = callback_data - > type_creation_map . end ( ) )
{
data_ptr = iter - > second ( database_name ) ;
}
else
{
2020-01-15 16:17:46 +03:00
Logger : : write ( " Warning: no creator function found for data type \" %s \" , using \" string \" as fallback " , database_type . c_str ( ) ) ;
2019-12-16 15:43:29 +03:00
data_ptr = InterpretedStringRecordField : : createRecord ( database_name ) ;
}
2019-12-12 16:12:44 +03:00
2020-01-27 11:56:43 +03:00
audit_record - > fields [ database_name ] = data_ptr ;
2019-12-16 15:43:29 +03:00
}
2019-12-12 16:12:44 +03:00
}
2019-12-12 15:01:19 +03:00
2019-12-16 15:43:29 +03:00
data_ptr - > addOrUpdateValue ( au ) ;
}
} while ( auparse_next_field ( au ) > 0 ) ;
2019-12-16 15:35:40 +03:00
}
2019-12-16 15:43:29 +03:00
// first add all missing fields, keep data empty
2019-12-16 15:35:40 +03:00
{
2019-12-16 15:43:29 +03:00
auto missing_fields = callback_data - > all_fields_set ;
2019-12-16 15:35:40 +03:00
2020-01-27 11:56:43 +03:00
for ( auto iter = audit_record - > fields . begin ( ) ; iter ! = audit_record - > fields . end ( ) ; + + iter )
2019-12-16 15:35:40 +03:00
{
2019-12-16 15:43:29 +03:00
missing_fields . erase ( iter - > first ) ;
2019-12-16 15:35:40 +03:00
}
2019-12-16 15:43:29 +03:00
for ( auto iter = missing_fields . begin ( ) ; iter ! = missing_fields . end ( ) ; + + iter )
2019-12-16 15:35:40 +03:00
{
2019-12-16 15:43:29 +03:00
std : : string type_name ;
auto type_iter = callback_data - > datatypes_map . find ( * iter ) ;
if ( type_iter ! = callback_data - > datatypes_map . end ( ) )
{
type_name = type_iter - > second ;
}
else
2019-12-16 15:35:40 +03:00
{
2019-12-16 15:43:29 +03:00
for ( auto regex_type_iter = callback_data - > datatype_regexps_map . begin ( ) ; regex_type_iter ! = callback_data - > datatype_regexps_map . end ( ) ; + + regex_type_iter )
2019-12-16 15:35:40 +03:00
{
2019-12-16 15:43:29 +03:00
if ( * iter = = std : : get < 2 > ( * regex_type_iter ) )
{
type_name = std : : get < 1 > ( * regex_type_iter ) ;
break ;
}
2019-12-16 15:35:40 +03:00
}
}
2020-01-14 15:10:11 +03:00
if ( type_name . empty ( ) & & ( * iter = = " unknown_field " ) )
{
type_name = " string_array " ;
}
2019-12-16 15:43:29 +03:00
if ( ! type_name . empty ( ) )
2019-12-16 15:35:40 +03:00
{
2019-12-16 15:43:29 +03:00
auto factory_iter = callback_data - > type_creation_map . find ( type_name ) ;
if ( factory_iter ! = callback_data - > type_creation_map . end ( ) )
{
2020-01-27 11:56:43 +03:00
audit_record - > fields [ * iter ] = factory_iter - > second ( * iter ) ;
2019-12-16 15:43:29 +03:00
}
}
else
{
2020-01-15 16:17:46 +03:00
Logger : : write ( " Couldn't find registered type name for record with name \" %s \" " , iter - > c_str ( ) ) ;
2019-12-16 15:43:29 +03:00
continue ;
2019-12-16 15:35:40 +03:00
}
}
}
2020-01-14 17:16:01 +03:00
if ( not callback_data - > data_directory . empty ( ) )
{
2020-01-27 11:56:43 +03:00
audit_record - > filename = callback_data - > data_directory + boost : : filesystem : : path : : separator + generate_name_for_audit_record ( * audit_record ) ;
2020-01-14 17:16:01 +03:00
2020-01-27 11:56:43 +03:00
boost : : property_tree : : write_json ( audit_record - > filename , audit_record - > toPtree ( ) ) ;
2020-01-14 17:16:01 +03:00
}
2020-01-13 12:52:33 +03:00
if ( callback_data - > clickhouse_client & & ( ! callback_data - > table_name . empty ( ) ) )
{
// send data directly
clickhouse : : Block block ;
std : : map < std : : string , clickhouse : : ColumnRef > data ;
initialize_data_block ( data , callback_data - > datatypes_map , callback_data - > datatype_regexps_map , callback_data - > type_creation_map ) ;
2020-01-27 11:56:43 +03:00
generate_clickhouse_columns_from_audit_records ( data , * audit_record ) ;
2020-01-13 12:52:33 +03:00
add_audit_data_to_clickhouse_block ( data , block ) ;
callback_data - > clickhouse_client - > Insert ( callback_data - > table_name , block ) ;
2020-01-15 11:28:10 +03:00
// Data written, remove it
2020-01-27 11:56:43 +03:00
if ( not audit_record - > filename . empty ( ) )
2020-01-15 11:28:10 +03:00
{
2020-01-27 11:56:43 +03:00
boost : : filesystem : : remove ( audit_record - > filename ) ;
2020-01-15 11:28:10 +03:00
}
2020-01-13 12:52:33 +03:00
}
else
{
// send data to buffering thread and notify it if necessary
2019-12-16 16:09:45 +03:00
2020-01-13 12:52:33 +03:00
size_t current_count = 0 ;
2019-12-16 16:09:45 +03:00
2020-01-13 12:52:33 +03:00
{ // lock
std : : lock_guard < std : : mutex > audit_records_list_lock ( g_audit_records_list_mutex ) ;
g_audit_records_list . push_front ( audit_record ) ;
current_count = g_audit_records_list . size ( ) ;
} //unlock
2020-01-27 18:11:20 +03:00
if ( ( callback_data - > write_count_limit > 0 ) & & ( current_count % static_cast < size_t > ( callback_data - > write_count_limit ) = = 0 ) )
2020-01-13 12:52:33 +03:00
{
if ( write_thread_data_pipes [ 1 ] ! = - 1 )
{
write ( write_thread_data_pipes [ 1 ] , " 1 " , 1 ) ;
}
}
}
2019-12-16 15:43:29 +03:00
}
}
catch ( const std : : exception & e )
{
2020-01-15 16:17:46 +03:00
Logger : : write ( " Caught exception while processing audit record %zu/%zu: %s " , record , ( size_t ) auparse_get_num_records ( au ) , e . what ( ) ) ;
2019-12-16 15:43:29 +03:00
}
catch ( . . . )
{
2020-01-15 16:17:46 +03:00
Logger : : write ( " Caught unknown exception while processing audit record %zu/%zu " , record , ( size_t ) auparse_get_num_records ( au ) ) ;
2019-12-09 11:44:51 +03:00
}
if ( auparse_get_num_records ( au ) > record )
{
+ + record ;
auparse_next_record ( au ) ;
}
else
{
break ;
}
}
2019-12-11 16:06:13 +03:00
}
2019-12-09 11:44:51 +03:00
2020-01-15 11:28:10 +03:00
void writer_thread_function ( clickhouse : : Client & client , const std : : string & table_name , int timeout , int count_limit , const std : : string & data_directory )
2020-01-13 12:52:33 +03:00
{
struct pollfd pollfds [ 2 ] ;
pollfds [ 0 ] . fd = write_thread_control_pipes [ 0 ] ;
pollfds [ 1 ] . fd = write_thread_data_pipes [ 0 ] ;
auto current_time = std : : chrono : : steady_clock : : now ( ) ;
auto last_time = current_time ;
bool thread_keep_running = true ;
while ( thread_keep_running )
{
pollfds [ 0 ] . events = POLLIN | POLLHUP ;
pollfds [ 0 ] . revents = 0 ;
pollfds [ 1 ] . events = POLLIN | POLLHUP ;
pollfds [ 1 ] . revents = 0 ;
int res = poll ( pollfds , sizeof ( pollfds ) / sizeof ( pollfds [ 0 ] ) , timeout ) ;
current_time = std : : chrono : : steady_clock : : now ( ) ;
bool write_data = false ;
2020-01-23 15:09:32 +03:00
bool poll_timeout = false ;
2020-01-13 12:52:33 +03:00
if ( res < 0 )
{
2020-01-14 15:25:27 +03:00
if ( errno ! = EINTR )
{
// error occured, finish running
2020-01-15 16:17:46 +03:00
Logger : : write ( " Poll returned error: %d " , errno ) ;
2020-01-14 15:25:27 +03:00
stop_running ( ) ;
thread_keep_running = false ;
}
2020-01-13 12:52:33 +03:00
}
else if ( res = = 0 )
{
// timeout
write_data = true ;
2020-01-23 15:09:32 +03:00
poll_timeout = true ;
2020-01-13 12:52:33 +03:00
}
else if ( pollfds [ 0 ] . revents & POLLIN )
{
// input data in control pipe means shutdown request
thread_keep_running = false ;
}
else if ( ( pollfds [ 0 ] . revents & POLLHUP ) | | ( pollfds [ 0 ] . revents & POLLERR ) )
{
// pipe closed too early, an error
2020-01-15 16:17:46 +03:00
Logger : : write ( " Read returned pollhup or pollerr " ) ;
2020-01-13 12:52:33 +03:00
stop_running ( ) ;
thread_keep_running = false ;
}
else if ( pollfds [ 1 ] . revents & POLLIN )
{
// data was received, and records count exceeds limit, then write it
{
char c ;
read ( pollfds [ 1 ] . fd , & c , sizeof ( c ) ) ;
}
write_data = true ;
}
else if ( ( pollfds [ 1 ] . revents & POLLHUP ) | | ( pollfds [ 1 ] . revents & POLLERR ) )
{
// pipe closed too early, an error
2020-01-15 16:17:46 +03:00
Logger : : write ( " Read returned pollhup or pollerr " ) ;
2020-01-13 12:52:33 +03:00
stop_running ( ) ;
thread_keep_running = false ;
}
if ( write_data | | ( ! thread_keep_running ) )
{
try
{
// write all saved up data and log about it
2020-01-27 11:56:43 +03:00
std : : list < std : : shared_ptr < AuditRecord > > local_audit_records_list ;
2020-01-13 12:52:33 +03:00
{ // lock
std : : lock_guard < std : : mutex > audit_records_list_lock ( g_audit_records_list_mutex ) ;
local_audit_records_list = g_audit_records_list ;
} // unlock
2020-01-23 16:08:53 +03:00
bool write_and_log = ( poll_timeout
| | ( ! thread_keep_running )
| | ( ( count_limit > 0 ) & & ( local_audit_records_list . size ( ) > = static_cast < size_t > ( count_limit ) ) ) ) ;
if ( write_and_log & & ( ! local_audit_records_list . empty ( ) ) )
2020-01-13 12:52:33 +03:00
{
2020-01-15 16:17:46 +03:00
Logger : : write ( " Writer thread: preparing to write %zu elements " , local_audit_records_list . size ( ) ) ;
2020-01-13 12:52:33 +03:00
clickhouse : : Block block ;
std : : map < std : : string , clickhouse : : ColumnRef > data ;
initialize_data_block ( data , get_datatypes_map ( ) , get_datatype_regexps_map ( ) , get_type_creation_map ( ) ) ;
for ( auto iter = local_audit_records_list . rbegin ( ) ; iter ! = local_audit_records_list . rend ( ) ; + + iter )
{
2020-01-27 11:56:43 +03:00
generate_clickhouse_columns_from_audit_records ( data , * * iter ) ;
2020-01-13 12:52:33 +03:00
}
add_audit_data_to_clickhouse_block ( data , block ) ;
client . Insert ( table_name , block ) ;
2020-01-15 11:28:10 +03:00
if ( not data_directory . empty ( ) )
{
for ( auto iter = local_audit_records_list . rbegin ( ) ; iter ! = local_audit_records_list . rend ( ) ; + + iter )
{
try
{
2020-01-27 11:56:43 +03:00
boost : : filesystem : : remove ( ( * iter ) - > filename ) ;
2020-01-15 11:28:10 +03:00
}
catch ( const std : : exception & e )
{
2020-01-27 11:56:43 +03:00
Logger : : write ( " Writer thread: caught exception while trying to remove data file \" %s \" : %s " , ( * iter ) - > filename . c_str ( ) , e . what ( ) ) ;
2020-01-15 11:28:10 +03:00
}
catch ( . . . )
{
2020-01-27 11:56:43 +03:00
Logger : : write ( " Writer thread: caught unknown exception while trying to remove data file \" %s \" " , ( * iter ) - > filename . c_str ( ) ) ;
2020-01-15 11:28:10 +03:00
}
}
}
2020-01-13 12:52:33 +03:00
// successfully wrote data, now remove all written data
{ // lock
std : : lock_guard < std : : mutex > audit_records_list_lock ( g_audit_records_list_mutex ) ;
g_audit_records_list . resize ( g_audit_records_list . size ( ) - local_audit_records_list . size ( ) ) ;
} // unlock
}
2020-01-23 16:08:53 +03:00
if ( write_and_log )
2020-01-23 15:09:32 +03:00
{
Logger : : write ( " Wrote %zu records in last %zu seconds " ,
local_audit_records_list . size ( ) ,
static_cast < size_t > ( std : : chrono : : duration_cast < std : : chrono : : seconds > ( current_time - last_time ) . count ( ) ) ) ;
}
2020-01-13 12:52:33 +03:00
last_time = current_time ;
}
catch ( const std : : exception & e )
{
2020-01-15 16:17:46 +03:00
Logger : : write ( " Writer thread: caught exception: %s " , e . what ( ) ) ;
2020-01-13 12:52:33 +03:00
}
catch ( . . . )
{
2020-01-15 16:17:46 +03:00
Logger : : write ( " Writer thread: caught unknown exception " ) ;
2020-01-13 12:52:33 +03:00
}
last_time = current_time ;
}
}
}
2019-12-11 16:06:13 +03:00
std : : string construct_clickhouse_datatype_string ( const std : : string & name , const std : : string & audit_type )
{
2019-12-17 14:24:32 +03:00
static const std : : map < std : : string , std : : vector < std : : string > > audit_table_map = {
{ " string " , { " Nullable(String) " } } ,
{ " integer " , { " _IntValue Nullable(Int64) " , " _InterpretedValue LowCardinality(Nullable(String)) " } } ,
{ " string_array " , { " _Name Array(String) " , " _Value Array(String) " } }
2019-12-11 16:06:13 +03:00
} ;
2019-12-17 14:24:32 +03:00
std : : vector < std : : string > clickhouse_types ;
2019-12-11 16:06:13 +03:00
auto iter = audit_table_map . find ( audit_type ) ;
if ( iter ! = audit_table_map . end ( ) )
{
2019-12-17 14:24:32 +03:00
clickhouse_types = iter - > second ;
2019-12-11 16:06:13 +03:00
}
else
2019-12-09 11:44:51 +03:00
{
2019-12-11 16:06:13 +03:00
// Fallback to string
2020-01-15 16:17:46 +03:00
Logger : : write ( " Warning: unknown database type for record name \" %s \" " , name . c_str ( ) ) ;
2019-12-17 14:24:32 +03:00
clickhouse_types . push_back ( " Nullable(String) " ) ;
2019-12-09 11:44:51 +03:00
}
2019-12-11 16:06:13 +03:00
2019-12-17 14:24:32 +03:00
std : : stringstream result ;
result < < sanitize_column_name ( name ) + clickhouse_types [ 0 ] ;
for ( size_t i = 1 ; i < clickhouse_types . size ( ) ; + + i )
{
result < < " , " < < sanitize_column_name ( name ) + clickhouse_types [ i ] ;
}
return result . str ( ) ;
2019-11-12 12:54:16 +03:00
}
2019-11-11 16:55:42 +03:00
int main ( int argc , char * * argv )
{
2019-11-12 12:54:16 +03:00
if ( argc ! = 2 )
{
2020-01-15 16:17:46 +03:00
Logger : : write ( " Error: USAGE: %s config " , argv [ 0 ] ) ;
2019-11-12 12:54:16 +03:00
return - 1 ;
}
try
{
clickhouse : : ClientOptions client_options ;
2019-12-11 16:06:13 +03:00
std : : string table_name = " AuditData " ;
std : : string datatypes_filename ;
2020-01-14 17:16:01 +03:00
std : : string data_directory ;
2019-11-12 12:54:16 +03:00
2020-01-13 12:52:33 +03:00
int write_timeout = - 1 ;
int write_count_limit = - 1 ;
bool use_writer_thread = false ;
2019-11-12 12:54:16 +03:00
if ( pipe ( runpipes ) < 0 )
{
throw std : : runtime_error ( " Failed to create pipes " ) ;
}
BOOST_SCOPE_EXIT ( & runpipes )
{
close ( runpipes [ 0 ] ) ;
close ( runpipes [ 1 ] ) ;
runpipes [ 0 ] = - 1 ;
runpipes [ 1 ] = - 1 ;
2020-01-13 12:52:33 +03:00
} BOOST_SCOPE_EXIT_END ;
2019-11-12 12:54:16 +03:00
{
struct sigaction sa ;
sa . sa_flags = 0 ;
sigemptyset ( & sa . sa_mask ) ;
sa . sa_handler = term_handler ;
if ( sigaction ( SIGTERM , & sa , NULL ) < 0 )
{
throw std : : runtime_error ( " Failed to set sigterm handler " ) ;
}
}
2020-01-13 12:52:33 +03:00
if ( pipe ( write_thread_control_pipes ) < 0 )
{
throw std : : runtime_error ( " Failed to create pipes " ) ;
}
BOOST_SCOPE_EXIT ( & write_thread_control_pipes )
{
close ( write_thread_control_pipes [ 0 ] ) ;
close ( write_thread_control_pipes [ 1 ] ) ;
write_thread_control_pipes [ 0 ] = - 1 ;
write_thread_control_pipes [ 1 ] = - 1 ;
} BOOST_SCOPE_EXIT_END ;
if ( pipe ( write_thread_data_pipes ) < 0 )
{
throw std : : runtime_error ( " Failed to create pipes " ) ;
}
BOOST_SCOPE_EXIT ( & write_thread_data_pipes )
{
close ( write_thread_data_pipes [ 0 ] ) ;
close ( write_thread_data_pipes [ 1 ] ) ;
write_thread_data_pipes [ 0 ] = - 1 ;
write_thread_data_pipes [ 1 ] = - 1 ;
} BOOST_SCOPE_EXIT_END ;
2019-11-12 12:54:16 +03:00
/* First read config */
{
boost : : property_tree : : ptree clickhouse_config_tree ;
std : : ifstream stream ( argv [ 1 ] ) ;
boost : : property_tree : : read_ini ( stream , clickhouse_config_tree ) ;
auto general = clickhouse_config_tree . get_child_optional ( " General " ) ;
if ( general )
{
2019-12-11 16:06:13 +03:00
optional_set ( datatypes_filename , * general , " DatatypesDescriptionFile " ) ;
2020-01-13 12:52:33 +03:00
optional_set ( write_timeout , * general , " WriteTimeout " ) ;
optional_set ( write_count_limit , * general , " WriteCountLimit " ) ;
2020-01-14 17:16:01 +03:00
optional_set ( data_directory , * general , " DataDirectory " ) ;
if ( ! data_directory . empty ( ) )
{
try
{
if ( boost : : filesystem : : create_directory ( boost : : filesystem : : path ( data_directory ) ) )
{
// if directory is created, set permissions
boost : : filesystem : : permissions ( boost : : filesystem : : path ( data_directory ) , boost : : filesystem : : owner_all ) ;
}
}
catch ( const std : : exception & exc )
{
2020-01-15 16:17:46 +03:00
Logger : : write ( " Caught exception while creating data directory: %s " , exc . what ( ) ) ;
2020-01-14 17:16:01 +03:00
}
catch ( . . . )
{
2020-01-15 16:17:46 +03:00
Logger : : write ( " Caught unknown exception while creating data directory " ) ;
2020-01-14 17:16:01 +03:00
}
}
2020-01-13 12:52:33 +03:00
write_timeout = ( write_timeout > 0 ) ? ( write_timeout * 1000 ) : - 1 ;
2019-11-12 12:54:16 +03:00
}
auto client_connection = clickhouse_config_tree . get_child_optional ( " Connection " ) ;
if ( client_connection )
{
2019-12-11 16:06:13 +03:00
optional_set ( table_name , * client_connection , " TableName " , & is_valid_table_name ) ;
2019-11-12 12:54:16 +03:00
optional_set ( client_options . host , * client_connection , " Hostname " ) ;
optional_set ( client_options . port , * client_connection , " Port " ) ;
optional_set ( client_options . default_database , * client_connection , " DefaultDatabase " ) ;
optional_set ( client_options . user , * client_connection , " Username " ) ;
optional_set ( client_options . password , * client_connection , " Password " ) ;
optional_set ( client_options . ping_before_query , * client_connection , " PingBeforeQuery " ) ;
optional_set ( client_options . send_retries , * client_connection , " SendRetries " ) ;
{
auto element = client_connection - > get_child_optional ( " RetryTimeout " ) ;
if ( element )
{
client_options . retry_timeout = std : : chrono : : seconds ( element - > get_value < int > ( ) ) ;
}
}
{
auto element = client_connection - > get_child_optional ( " CompressionMethod " ) ;
if ( element )
{
const auto value = element - > get_value < std : : string > ( ) ;
const std : : map < std : : string , clickhouse : : CompressionMethod > compression_method_map =
{
{ " None " , clickhouse : : CompressionMethod : : None } ,
{ " LZ4 " , clickhouse : : CompressionMethod : : LZ4 } ,
} ;
auto iter = compression_method_map . find ( value ) ;
if ( iter ! = compression_method_map . end ( ) )
{
client_options . compression_method = iter - > second ;
}
else
{
client_options . compression_method = clickhouse : : CompressionMethod : : None ;
}
}
}
}
2020-01-10 14:59:03 +03:00
auto logging = clickhouse_config_tree . get_child_optional ( " Logging " ) ;
if ( logging )
{
std : : string logfilename ;
optional_set ( logfilename , * logging , " Logfile " ) ;
if ( ! logfilename . empty ( ) )
{
Logger : : open ( logfilename . c_str ( ) ) ;
}
}
2019-11-12 12:54:16 +03:00
}
2020-01-23 15:22:58 +03:00
Logger : : initialize ( ) ;
Logger : : dump ( ) ;
2019-12-11 16:06:13 +03:00
read_datatypes_map ( datatypes_filename ) ;
2020-01-13 12:52:33 +03:00
// If no limits are set, don't use special writer thread
use_writer_thread = ( ( write_timeout > 0 ) | | ( write_count_limit > 0 ) ) ;
2019-11-12 12:54:16 +03:00
/* Now connect to clickhouse */
clickhouse : : Client client ( client_options ) ;
2019-12-11 16:06:13 +03:00
2019-12-12 15:01:19 +03:00
CallbackData callback_data ;
callback_data . datatypes_map = get_datatypes_map ( ) ;
callback_data . datatype_regexps_map = get_datatype_regexps_map ( ) ;
callback_data . type_creation_map = get_type_creation_map ( ) ;
2020-01-13 12:52:33 +03:00
if ( ! use_writer_thread )
{
callback_data . clickhouse_client = & client ;
callback_data . table_name = table_name ;
}
callback_data . write_timeout = write_timeout ;
callback_data . write_count_limit = write_count_limit ;
2019-12-12 15:01:19 +03:00
2020-01-14 17:16:01 +03:00
callback_data . data_directory = data_directory ;
2019-12-11 16:06:13 +03:00
{
std : : stringstream str ;
str < < " CREATE TABLE IF NOT EXISTS " < < table_name < < " (record_time DateTime, record_milli UInt64, record_serial UInt64, record_node String " ;
2019-12-12 15:01:19 +03:00
for ( auto iter = callback_data . datatypes_map . begin ( ) ; iter ! = callback_data . datatypes_map . end ( ) ; + + iter )
2019-12-11 16:06:13 +03:00
{
str < < " , " < < construct_clickhouse_datatype_string ( iter - > first , iter - > second ) ;
2019-12-12 15:01:19 +03:00
callback_data . all_fields_set . insert ( iter - > first ) ;
2019-12-11 16:06:13 +03:00
}
2019-12-12 15:01:19 +03:00
for ( auto iter = callback_data . datatype_regexps_map . begin ( ) ; iter ! = callback_data . datatype_regexps_map . end ( ) ; + + iter )
2019-12-11 16:06:13 +03:00
{
str < < " , " < < construct_clickhouse_datatype_string ( std : : get < 2 > ( * iter ) , std : : get < 1 > ( * iter ) ) ;
2019-12-12 15:01:19 +03:00
callback_data . all_fields_set . insert ( std : : get < 2 > ( * iter ) ) ;
2019-12-11 16:06:13 +03:00
}
2020-01-14 15:10:11 +03:00
str < < " , " < < construct_clickhouse_datatype_string ( " unknown_field " , " string_array " ) ;
callback_data . all_fields_set . insert ( " unknown_field " ) ;
2019-12-11 16:06:13 +03:00
str < < " ) ENGINE = MergeTree ORDER BY (record_time, record_milli, record_serial, record_node) " ;
client . Execute ( str . str ( ) ) ;
}
2019-11-12 12:54:16 +03:00
2020-01-15 14:41:46 +03:00
if ( not data_directory . empty ( ) )
{
// read all previously saved data, and send it to writer thread or write it right here
// and remove written files
2020-01-27 11:56:43 +03:00
std : : set < std : : shared_ptr < AuditRecord > , SharedPointerLess < AuditRecord > > audit_records_set ;
2020-01-15 14:41:46 +03:00
for ( auto iter = boost : : filesystem : : directory_iterator ( data_directory ) ; iter ! = boost : : filesystem : : directory_iterator ( ) ; + + iter )
{
2020-01-15 16:17:46 +03:00
Logger : : write ( " Reading saved file: %s " , iter - > path ( ) . c_str ( ) ) ;
2020-01-15 14:41:46 +03:00
try
{
boost : : property_tree : : ptree file_data ;
boost : : property_tree : : read_json ( iter - > path ( ) . native ( ) , file_data ) ;
2020-01-15 15:26:53 +03:00
auto audit_record = AuditRecord : : fromPtree ( file_data ) ;
2020-01-27 11:56:43 +03:00
audit_record - > filename = iter - > path ( ) . native ( ) ;
audit_records_set . insert ( audit_record ) ;
2020-01-15 14:41:46 +03:00
}
catch ( const std : : exception & exc )
{
2020-01-15 16:17:46 +03:00
Logger : : write ( " Error while reading saved file \" %s \" : %s " , iter - > path ( ) . c_str ( ) , exc . what ( ) ) ;
2020-01-15 14:41:46 +03:00
}
catch ( . . . )
{
2020-01-15 16:17:46 +03:00
Logger : : write ( " Unknown error while reading saved file \" %s \" " , iter - > path ( ) . c_str ( ) ) ;
2020-01-15 14:41:46 +03:00
}
}
if ( use_writer_thread )
{
// send data to buffering thread and notify it if necessary
size_t current_count = 0 ;
{ // lock
std : : lock_guard < std : : mutex > audit_records_list_lock ( g_audit_records_list_mutex ) ;
for ( auto iter = audit_records_set . begin ( ) ; iter ! = audit_records_set . end ( ) ; + + iter )
{
g_audit_records_list . push_front ( * iter ) ;
}
} // unlock
if ( ( write_count_limit > 0 ) & & ( current_count > = static_cast < size_t > ( write_count_limit ) ) )
{
if ( write_thread_data_pipes [ 1 ] ! = - 1 )
{
write ( write_thread_data_pipes [ 1 ] , " 1 " , 1 ) ;
}
}
2020-01-15 16:17:46 +03:00
Logger : : write ( " Sent all saved data to writer thread " ) ;
2020-01-15 14:41:46 +03:00
}
else
{
std : : map < std : : string , clickhouse : : ColumnRef > data_map ;
clickhouse : : Block block ;
initialize_data_block ( data_map , get_datatypes_map ( ) , get_datatype_regexps_map ( ) , get_type_creation_map ( ) ) ;
for ( auto iter = audit_records_set . begin ( ) ; iter ! = audit_records_set . end ( ) ; + + iter )
{
2020-01-27 11:56:43 +03:00
generate_clickhouse_columns_from_audit_records ( data_map , * * iter ) ;
2020-01-15 14:41:46 +03:00
}
add_audit_data_to_clickhouse_block ( data_map , block ) ;
client . Insert ( table_name , block ) ;
for ( auto iter = audit_records_set . begin ( ) ; iter ! = audit_records_set . end ( ) ; + + iter )
{
try
{
2020-01-27 11:56:43 +03:00
boost : : filesystem : : remove ( ( * iter ) - > filename ) ;
2020-01-15 14:41:46 +03:00
}
catch ( const std : : exception & e )
{
2020-01-27 11:56:43 +03:00
Logger : : write ( " Caught exception while trying to remove data file \" %s \" : %s " , ( * iter ) - > filename . c_str ( ) , e . what ( ) ) ;
2020-01-15 14:41:46 +03:00
}
catch ( . . . )
{
2020-01-27 11:56:43 +03:00
Logger : : write ( " Caught unknown exception while trying to remove data file \" %s \" " , ( * iter ) - > filename . c_str ( ) ) ;
2020-01-15 14:41:46 +03:00
}
}
}
}
2020-01-13 12:52:33 +03:00
std : : unique_ptr < std : : thread , void ( * ) ( std : : thread * ) > writer_thread (
2020-01-15 11:28:10 +03:00
use_writer_thread ? new std : : thread (
writer_thread_function ,
std : : ref ( client ) ,
std : : cref ( table_name ) ,
write_timeout ,
write_count_limit ,
std : : cref ( data_directory )
) : nullptr ,
2020-01-13 12:52:33 +03:00
[ ] ( std : : thread * thread ) {
if ( thread )
{
if ( thread - > joinable ( ) )
{
if ( write_thread_control_pipes [ 1 ] ! = - 1 )
{
write ( write_thread_control_pipes [ 1 ] , " 1 " , 1 ) ;
}
thread - > join ( ) ;
}
delete thread ;
}
} ) ;
2020-01-23 18:20:49 +03:00
std : : unique_ptr < auparse_state_t , void ( * ) ( auparse_state_t * ) > au ( auparse_init ( AUSOURCE_FEED , 0 ) , [ ] ( auparse_state_t * obj ) {
if ( obj )
{
auparse_flush_feed ( obj ) ;
auparse_destroy ( obj ) ;
}
} ) ;
if ( ! au )
{
throw std : : runtime_error ( " Failed to initialize audit " ) ;
}
2019-12-12 15:01:19 +03:00
auparse_add_callback ( au . get ( ) , auparse_callback , & callback_data , NULL ) ;
2019-11-12 12:54:16 +03:00
2020-01-27 12:25:48 +03:00
char data [ MAX_AUDIT_MESSAGE_LENGTH ] ;
2019-11-12 12:54:16 +03:00
struct pollfd pollfds [ 2 ] ;
pollfds [ 0 ] . fd = STDIN_FILENO ;
pollfds [ 1 ] . fd = runpipes [ 0 ] ;
while ( running )
{
pollfds [ 0 ] . events = POLLIN | POLLHUP ;
pollfds [ 0 ] . revents = 0 ;
pollfds [ 1 ] . events = POLLIN | POLLHUP ;
pollfds [ 1 ] . revents = 0 ;
int res = poll ( pollfds , sizeof ( pollfds ) / sizeof ( pollfds [ 0 ] ) , - 1 ) ;
if ( res < 0 )
{
2020-01-14 15:25:27 +03:00
if ( errno ! = EINTR )
{
// error occured, finish running
2020-01-15 16:17:46 +03:00
Logger : : write ( " Poll returned error: %d " , errno ) ;
2020-01-14 15:25:27 +03:00
running = false ;
}
2019-11-12 12:54:16 +03:00
}
else if ( res = = 0 )
{
// timeout, do nothing
}
else if ( pollfds [ 0 ] . revents & POLLIN )
{
2020-01-27 12:25:48 +03:00
ssize_t readsize = read ( STDIN_FILENO , data , sizeof ( data ) ) ;
2019-11-12 12:54:16 +03:00
if ( readsize > 0 )
{
2020-01-27 12:25:48 +03:00
auparse_feed ( au . get ( ) , data , readsize ) ;
2020-01-27 17:06:42 +03:00
// assume that data wasn't split, and flush it
// otherwise, libauparse keeps increasing it's memory footprint
auparse_flush_feed ( au . get ( ) ) ;
2019-11-12 12:54:16 +03:00
}
else
{
// error occured, finish running
2020-01-15 16:17:46 +03:00
Logger : : write ( " Read returned error: %d " , errno ) ;
2019-11-12 12:54:16 +03:00
running = false ;
}
}
2019-12-17 14:24:28 +03:00
else if ( ( pollfds [ 0 ] . revents & POLLHUP ) | | ( pollfds [ 0 ] . revents & POLLERR ) )
2019-11-12 12:54:16 +03:00
{
// stdin closed, no more data, finish running
running = false ;
}
}
}
catch ( const std : : exception & e )
{
2020-01-15 16:17:46 +03:00
Logger : : write ( " Caught exception: %s " , e . what ( ) ) ;
2019-11-12 12:54:16 +03:00
return - 1 ;
}
catch ( . . . )
{
2020-01-15 16:17:46 +03:00
Logger : : write ( " Caught unknown exception " ) ;
2019-11-12 12:54:16 +03:00
return - 1 ;
}
2019-11-11 16:55:42 +03:00
return 0 ;
}