2017-05-11 09:02:01 +02:00
package PVE::API2::Replication ;
use warnings ;
use strict ;
use PVE::JSONSchema qw( get_standard_option ) ;
use PVE::RPCEnvironment ;
2022-04-27 09:49:11 +02:00
use PVE::Format qw( render_timestamp ) ;
2017-05-15 12:28:33 +02:00
use PVE::ProcFSTools ;
2022-04-27 09:49:11 +02:00
2017-05-11 09:02:01 +02:00
use PVE::ReplicationConfig ;
2017-06-12 06:59:21 +02:00
use PVE::ReplicationState ;
2017-05-11 09:02:01 +02:00
use PVE::Replication ;
2017-06-09 09:45:44 +02:00
use PVE::QemuConfig ;
use PVE::QemuServer ;
use PVE::LXC::Config ;
use PVE::LXC ;
2023-08-03 14:16:55 +02:00
use PVE::Notify ;
2017-05-11 09:02:01 +02:00
use PVE::RESTHandler ;
use base qw( PVE::RESTHandler ) ;
2017-06-27 14:55:44 +02:00
our $ pvesr_lock_path = "/var/lock/pvesr.lck" ;
2017-06-09 09:45:44 +02:00
2017-06-26 16:34:33 +02:00
our $ lookup_guest_class = sub {
2017-06-09 09:45:44 +02:00
my ( $ vmtype ) = @ _ ;
if ( $ vmtype eq 'qemu' ) {
return 'PVE::QemuConfig' ;
} elsif ( $ vmtype eq 'lxc' ) {
return 'PVE::LXC::Config' ;
} else {
die "unknown guest type '$vmtype' - internal error" ;
}
} ;
# passing $now is useful for regression testing
sub run_single_job {
my ( $ jobid , $ now , $ logfunc ) = @ _ ;
my $ local_node = PVE::INotify:: nodename ( ) ;
my $ code = sub {
$ now // = time ( ) ;
my $ cfg = PVE::ReplicationConfig - > new ( ) ;
my $ jobcfg = $ cfg - > { ids } - > { $ jobid } ;
die "no such job '$jobid'\n" if ! $ jobcfg ;
die "internal error - not implemented" if $ jobcfg - > { type } ne 'local' ;
die "job '$jobid' is disabled\n" if $ jobcfg - > { disable } ;
my $ vms = PVE::Cluster:: get_vmlist ( ) ;
my $ vmid = $ jobcfg - > { guest } ;
die "no such guest '$vmid'\n" if ! $ vms - > { ids } - > { $ vmid } ;
die "guest '$vmid' is not on local node\n"
if $ vms - > { ids } - > { $ vmid } - > { node } ne $ local_node ;
die "unable to sync to local node\n" if $ jobcfg - > { target } eq $ local_node ;
2017-06-29 07:45:57 +02:00
my $ vmtype = $ vms - > { ids } - > { $ vmid } - > { type } ;
2017-06-09 09:45:44 +02:00
2017-06-29 07:45:57 +02:00
my $ guest_class = $ lookup_guest_class - > ( $ vmtype ) ;
2017-06-09 09:45:44 +02:00
PVE::Replication:: run_replication ( $ guest_class , $ jobcfg , $ now , $ now , $ logfunc ) ;
} ;
my $ res = PVE::Tools:: lock_file ( $ pvesr_lock_path , 60 , $ code ) ;
die $@ if $@ ;
}
2022-04-27 09:49:11 +02:00
# TODO: below two should probably part of the general job framework/plugin system
2022-04-27 09:48:27 +02:00
my sub _should_mail_at_failcount {
my ( $ fail_count ) = @ _ ;
2022-04-22 14:15:48 +02:00
2022-06-14 11:47:32 +02:00
# avoid spam during migration (bug #4111): when failing to obtain the guest's migration lock,
# fail_count will be 0
return 0 if $ fail_count == 0 ;
2022-04-27 09:48:27 +02:00
return 1 if $ fail_count <= 3 ; # always send the first few for better visibility of the issue
2022-04-22 14:15:48 +02:00
2022-04-27 09:48:27 +02:00
# failing job is re-tried every half hour, try to send one mail after 1, 2, 4, 8, etc. days
my $ i = 1 ;
while ( $ i * 48 < $ fail_count ) {
$ i = $ i * 2 ;
}
return $ i * 48 == $ fail_count ;
} ;
2023-08-03 14:16:55 +02:00
2022-04-27 09:49:11 +02:00
my sub _handle_job_err {
my ( $ job , $ err , $ mail ) = @ _ ;
warn "$job->{id}: got unexpected replication job error - $err" ;
return if ! $ mail ;
my $ state = PVE::ReplicationState:: read_state ( ) ;
my $ jobstate = PVE::ReplicationState:: extract_job_state ( $ state , $ job ) ;
my $ fail_count = $ jobstate - > { fail_count } ;
return if ! _should_mail_at_failcount ( $ fail_count ) ;
# not yet updated, so $job->next_sync here is actually the current one.
# NOTE: Copied from PVE::ReplicationState::job_status()
my $ next_sync = $ job - > { next_sync } + 60 * ( $ fail_count <= 3 ? 5 * $ fail_count : 30 ) ;
2023-08-03 14:16:55 +02:00
# The replication job is run every 15 mins if no schedule is set.
my $ schedule = $ job - > { schedule } // '*/15' ;
2022-04-27 09:49:11 +02:00
2023-11-14 13:59:37 +01:00
my $ template_data = {
2023-08-03 14:16:55 +02:00
"failure-count" = > $ fail_count ,
"last-sync" = > $ jobstate - > { last_sync } ,
"next-sync" = > $ next_sync ,
"job-id" = > $ job - > { id } ,
"job-target" = > $ job - > { target } ,
"job-schedule" = > $ schedule ,
"error" = > $ err ,
} ;
2022-04-27 09:49:11 +02:00
2023-11-14 13:59:37 +01:00
my $ metadata_fields = {
type = > "replication" ,
2024-06-10 10:40:24 +02:00
"job-id" = > $ job - > { id } ,
2024-06-10 10:40:26 +02:00
# Hostname (without domain part)
hostname = > PVE::INotify:: nodename ( ) ,
2023-11-14 13:59:37 +01:00
} ;
2022-04-27 09:49:11 +02:00
eval {
2023-11-14 13:59:37 +01:00
PVE::Notify:: error (
2024-05-21 15:31:48 +02:00
"replication" ,
2023-11-14 13:59:37 +01:00
$ template_data ,
$ metadata_fields
) ;
2023-08-03 14:16:55 +02:00
2022-04-27 09:49:11 +02:00
} ;
warn ": $@" if $@ ;
}
2022-04-27 09:48:27 +02:00
# passing $now and $verbose is useful for regression testing
sub run_jobs {
my ( $ now , $ logfunc , $ verbose , $ mail ) = @ _ ;
2022-04-22 14:15:48 +02:00
2017-06-09 09:45:44 +02:00
my $ iteration = $ now // time ( ) ;
my $ code = sub {
2017-12-07 12:06:58 +01:00
my $ start_time = $ now // time ( ) ;
2017-06-09 09:45:44 +02:00
2017-12-07 12:06:58 +01:00
PVE::ReplicationState:: purge_old_states ( ) ;
2017-06-27 11:03:06 +02:00
2017-12-07 12:06:58 +01:00
while ( my $ jobcfg = PVE::ReplicationState:: get_next_job ( $ iteration , $ start_time ) ) {
my $ guest_class = $ lookup_guest_class - > ( $ jobcfg - > { vmtype } ) ;
2017-12-07 12:07:00 +01:00
eval {
PVE::Replication:: run_replication ( $ guest_class , $ jobcfg , $ iteration , $ start_time , $ logfunc , $ verbose ) ;
} ;
if ( my $ err = $@ ) {
2022-04-27 09:49:11 +02:00
_handle_job_err ( $ jobcfg , $ err , $ mail ) ;
2017-12-14 09:07:00 +01:00
}
2017-12-07 12:07:00 +01:00
2017-12-07 12:06:58 +01:00
$ start_time = $ now // time ( ) ;
}
2017-06-09 09:45:44 +02:00
} ;
my $ res = PVE::Tools:: lock_file ( $ pvesr_lock_path , 60 , $ code ) ;
die $@ if $@ ;
}
2017-06-08 07:08:33 +02:00
my $ extract_job_status = sub {
my ( $ jobcfg , $ jobid ) = @ _ ;
# Note: we modify $jobcfg
my $ state = delete $ jobcfg - > { state } ;
my $ data = $ jobcfg ;
$ data - > { id } = $ jobid ;
foreach my $ k ( qw( last_sync last_try fail_count error duration ) ) {
$ data - > { $ k } = $ state - > { $ k } if defined ( $ state - > { $ k } ) ;
}
if ( $ state - > { pid } && $ state - > { ptime } ) {
if ( PVE::ProcFSTools:: check_process_running ( $ state - > { pid } , $ state - > { ptime } ) ) {
$ data - > { pid } = $ state - > { pid } ;
}
}
return $ data ;
} ;
2017-05-11 09:02:01 +02:00
__PACKAGE__ - > register_method ( {
2017-06-08 07:08:33 +02:00
name = > 'status' ,
2017-05-11 09:02:01 +02:00
path = > '' ,
method = > 'GET' ,
2017-06-08 07:08:33 +02:00
description = > "List status of all replication jobs on this node." ,
permissions = > {
description = > "Requires the VM.Audit permission on /vms/<vmid>." ,
user = > 'all' ,
} ,
protected = > 1 ,
proxyto = > 'node' ,
parameters = > {
additionalProperties = > 0 ,
properties = > {
node = > get_standard_option ( 'pve-node' ) ,
guest = > get_standard_option ( 'pve-vmid' , {
optional = > 1 ,
description = > "Only list replication jobs for this guest." ,
} ) ,
} ,
} ,
returns = > {
type = > 'array' ,
items = > {
type = > "object" ,
properties = > {
id = > { type = > 'string' } ,
} ,
} ,
links = > [ { rel = > 'child' , href = > "{id}" } ] ,
} ,
code = > sub {
my ( $ param ) = @ _ ;
my $ rpcenv = PVE::RPCEnvironment:: get ( ) ;
my $ authuser = $ rpcenv - > get_user ( ) ;
2017-06-29 10:08:29 +02:00
my $ jobs = PVE::ReplicationState:: job_status ( 1 ) ;
2017-06-08 07:08:33 +02:00
my $ res = [] ;
foreach my $ id ( sort keys %$ jobs ) {
my $ data = $ extract_job_status - > ( $ jobs - > { $ id } , $ id ) ;
my $ guest = $ data - > { guest } ;
next if defined ( $ param - > { guest } ) && $ guest != $ param - > { guest } ;
next if ! $ rpcenv - > check ( $ authuser , "/vms/$guest" , [ 'VM.Audit' ] ) ;
push @$ res , $ data ;
}
return $ res ;
} } ) ;
__PACKAGE__ - > register_method ( {
name = > 'index' ,
path = > '{id}' ,
method = > 'GET' ,
2017-05-11 09:02:01 +02:00
permissions = > { user = > 'all' } ,
description = > "Directory index." ,
parameters = > {
additionalProperties = > 0 ,
properties = > {
2017-06-08 07:08:33 +02:00
id = > get_standard_option ( 'pve-replication-id' ) ,
2017-05-11 09:02:01 +02:00
node = > get_standard_option ( 'pve-node' ) ,
} ,
} ,
returns = > {
type = > 'array' ,
items = > {
type = > "object" ,
properties = > { } ,
} ,
links = > [ { rel = > 'child' , href = > "{name}" } ] ,
} ,
code = > sub {
my ( $ param ) = @ _ ;
return [
2017-06-28 06:55:16 +02:00
{ name = > 'schedule_now' } ,
2017-06-08 07:08:33 +02:00
{ name = > 'log' } ,
2017-05-11 09:02:01 +02:00
{ name = > 'status' } ,
2017-06-08 07:08:33 +02:00
] ;
2017-05-11 09:02:01 +02:00
} } ) ;
__PACKAGE__ - > register_method ( {
2017-06-08 07:08:33 +02:00
name = > 'job_status' ,
path = > '{id}/status' ,
2017-05-11 09:02:01 +02:00
method = > 'GET' ,
2017-06-08 07:08:33 +02:00
description = > "Get replication job status." ,
2017-05-11 09:02:01 +02:00
permissions = > {
description = > "Requires the VM.Audit permission on /vms/<vmid>." ,
user = > 'all' ,
} ,
protected = > 1 ,
proxyto = > 'node' ,
parameters = > {
additionalProperties = > 0 ,
properties = > {
2017-06-08 07:08:33 +02:00
id = > get_standard_option ( 'pve-replication-id' ) ,
2017-05-11 09:02:01 +02:00
node = > get_standard_option ( 'pve-node' ) ,
} ,
} ,
returns = > {
2017-06-08 07:08:33 +02:00
type = > "object" ,
properties = > { } ,
2017-05-11 09:02:01 +02:00
} ,
code = > sub {
my ( $ param ) = @ _ ;
my $ rpcenv = PVE::RPCEnvironment:: get ( ) ;
my $ authuser = $ rpcenv - > get_user ( ) ;
2023-05-30 13:32:52 +02:00
my $ jobs = PVE::ReplicationState:: job_status ( 1 ) ;
2017-06-08 07:08:33 +02:00
my $ jobid = $ param - > { id } ;
my $ jobcfg = $ jobs - > { $ jobid } ;
2017-05-11 09:02:01 +02:00
2017-06-08 07:08:33 +02:00
die "no such replication job '$jobid'\n" if ! defined ( $ jobcfg ) ;
my $ data = $ extract_job_status - > ( $ jobcfg , $ jobid ) ;
my $ guest = $ data - > { guest } ;
raise_perm_exc ( ) if ! $ rpcenv - > check ( $ authuser , "/vms/$guest" , [ 'VM.Audit' ] ) ;
return $ data ;
} } ) ;
__PACKAGE__ - > register_method ( {
name = > 'read_job_log' ,
path = > '{id}/log' ,
method = > 'GET' ,
permissions = > {
description = > "Requires the VM.Audit permission on /vms/<vmid>, or 'Sys.Audit' on '/nodes/<node>'" ,
user = > 'all' ,
} ,
protected = > 1 ,
description = > "Read replication job log." ,
proxyto = > 'node' ,
parameters = > {
additionalProperties = > 0 ,
properties = > {
id = > get_standard_option ( 'pve-replication-id' ) ,
node = > get_standard_option ( 'pve-node' ) ,
start = > {
type = > 'integer' ,
minimum = > 0 ,
optional = > 1 ,
} ,
limit = > {
type = > 'integer' ,
minimum = > 0 ,
optional = > 1 ,
} ,
} ,
} ,
returns = > {
type = > 'array' ,
items = > {
type = > "object" ,
properties = > {
n = > {
description = > "Line number" ,
type = > 'integer' ,
} ,
t = > {
description = > "Line text" ,
type = > 'string' ,
2017-05-15 12:28:33 +02:00
}
}
2017-05-11 09:02:01 +02:00
}
2017-06-08 07:08:33 +02:00
} ,
code = > sub {
my ( $ param ) = @ _ ;
2017-05-11 09:02:01 +02:00
2017-06-08 07:08:33 +02:00
my $ rpcenv = PVE::RPCEnvironment:: get ( ) ;
my $ authuser = $ rpcenv - > get_user ( ) ;
my $ jobid = $ param - > { id } ;
2017-06-12 15:01:13 +02:00
my $ filename = PVE::ReplicationState:: job_logfile_name ( $ jobid ) ;
2017-06-08 07:08:33 +02:00
my $ cfg = PVE::ReplicationConfig - > new ( ) ;
my $ data = $ cfg - > { ids } - > { $ jobid } ;
die "no such replication job '$jobid'\n" if ! defined ( $ data ) ;
my $ node = $ param - > { node } ;
my $ vmid = $ data - > { guest } ;
raise_perm_exc ( ) if ( ! ( $ rpcenv - > check ( $ authuser , "/vms/$vmid" , [ 'VM.Audit' ] ) ||
$ rpcenv - > check ( $ authuser , "/nodes/$node" , [ 'Sys.Audit' ] ) ) ) ;
my ( $ count , $ lines ) = PVE::Tools:: dump_logfile ( $ filename , $ param - > { start } , $ param - > { limit } ) ;
$ rpcenv - > set_result_attrib ( 'total' , $ count ) ;
return $ lines ;
2017-05-11 09:02:01 +02:00
} } ) ;
2017-06-28 06:55:16 +02:00
__PACKAGE__ - > register_method ( {
name = > 'schedule_now' ,
path = > '{id}/schedule_now' ,
method = > 'POST' ,
description = > "Schedule replication job to start as soon as possible." ,
proxyto = > 'node' ,
protected = > 1 ,
permissions = > {
check = > [ 'perm' , '/storage' , [ 'Datastore.Allocate' ] ] ,
} ,
parameters = > {
additionalProperties = > 0 ,
properties = > {
id = > get_standard_option ( 'pve-replication-id' ) ,
node = > get_standard_option ( 'pve-node' ) ,
} ,
} ,
returns = > {
type = > 'string' ,
} ,
code = > sub {
my ( $ param ) = @ _ ;
my $ jobid = $ param - > { id } ;
my $ cfg = PVE::ReplicationConfig - > new ( ) ;
my $ jobcfg = $ cfg - > { ids } - > { $ jobid } ;
die "no such replication job '$jobid'\n" if ! defined ( $ jobcfg ) ;
PVE::ReplicationState:: schedule_job_now ( $ jobcfg ) ;
} } ) ;
2017-05-11 09:02:01 +02:00
1 ;