5
0
mirror of git://git.proxmox.com/git/pve-ha-manager.git synced 2025-01-18 10:03:53 +03:00

tests: simulate adding running services to HA with rebalance-on-start

Split out from Fiona's original series, to better show what actually
changes with her fix.

Currently, a newly added service that's already running is shut down,
offline migrated and started again if rebalance selects a new node
for it. This is unexpected and should be fixed, encode that behavior
as a test now, showing still the undesired behavior, and fix it in
the next commit

Originally-by: Fiona Ebner <f.ebner@proxmox.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
This commit is contained in:
Thomas Lamprecht 2023-06-06 19:02:40 +02:00
parent c0dbab3c32
commit c1aaa05b85
8 changed files with 95 additions and 0 deletions

View File

@ -0,0 +1,3 @@
Test how adding new services behaves with ha-rebalance-on-start.
Expect that already running services are not affected, but others are.

View File

@ -0,0 +1,9 @@
[
[ "power node1 on", "power node2 on", "power node3 on"],
[ "service vm:100 add node2 started 1" ],
[ "service vm:101 add node2 started 0" ],
[ "service vm:102 add node2 started 1" ],
[ "service vm:103 add node2 started 0" ],
[ "service vm:104 add node2 stopped 0" ],
[ "service vm:105 add node2 stopped 0" ]
]

View File

@ -0,0 +1,7 @@
{
"crs": {
"ha": "static",
"ha-rebalance-on-start": 1
}
}

View File

@ -0,0 +1,5 @@
{
"node1": { "power": "off", "network": "off", "cpus": 40, "memory": 384000000000 },
"node2": { "power": "off", "network": "off", "cpus": 32, "memory": 256000000000 },
"node3": { "power": "off", "network": "off", "cpus": 32, "memory": 256000000000 }
}

View File

@ -0,0 +1,68 @@
info 0 hardware: starting simulation
info 20 cmdlist: execute power node1 on
info 20 node1/crm: status change startup => wait_for_quorum
info 20 node1/lrm: status change startup => wait_for_agent_lock
info 20 cmdlist: execute power node2 on
info 20 node2/crm: status change startup => wait_for_quorum
info 20 node2/lrm: status change startup => wait_for_agent_lock
info 20 cmdlist: execute power node3 on
info 20 node3/crm: status change startup => wait_for_quorum
info 20 node3/lrm: status change startup => wait_for_agent_lock
info 120 cmdlist: execute service vm:100 add node2 started 1
info 120 node1/crm: got lock 'ha_manager_lock'
info 120 node1/crm: status change wait_for_quorum => master
info 120 node1/crm: using scheduler mode 'static'
info 120 node1/crm: node 'node1': state changed from 'unknown' => 'online'
info 120 node1/crm: node 'node2': state changed from 'unknown' => 'online'
info 120 node1/crm: node 'node3': state changed from 'unknown' => 'online'
info 120 node1/crm: adding new service 'vm:100' on node 'node2'
info 120 node1/crm: service vm:100: re-balance selected new node node1 for startup
info 120 node1/crm: service 'vm:100': state changed from 'request_start' to 'request_start_balance' (node = node2, target = node1)
info 122 node2/crm: status change wait_for_quorum => slave
info 123 node2/lrm: got lock 'ha_agent_node2_lock'
info 123 node2/lrm: status change wait_for_agent_lock => active
info 123 node2/lrm: service vm:100 - start relocate to node 'node1'
info 123 node2/lrm: stopping service vm:100 (relocate)
info 123 node2/lrm: service status vm:100 stopped
info 123 node2/lrm: service vm:100 - end relocate to node 'node1'
info 124 node3/crm: status change wait_for_quorum => slave
info 140 node1/crm: service 'vm:100': state changed from 'request_start_balance' to 'started' (node = node1)
info 141 node1/lrm: got lock 'ha_agent_node1_lock'
info 141 node1/lrm: status change wait_for_agent_lock => active
info 141 node1/lrm: starting service vm:100
info 141 node1/lrm: service status vm:100 started
info 220 cmdlist: execute service vm:101 add node2 started 0
info 220 node1/crm: adding new service 'vm:101' on node 'node2'
info 220 node1/crm: service vm:101: re-balance selected current node node2 for startup
info 220 node1/crm: service 'vm:101': state changed from 'request_start' to 'started' (node = node2)
info 223 node2/lrm: starting service vm:101
info 223 node2/lrm: service status vm:101 started
info 320 cmdlist: execute service vm:102 add node2 started 1
info 320 node1/crm: adding new service 'vm:102' on node 'node2'
info 320 node1/crm: service vm:102: re-balance selected new node node3 for startup
info 320 node1/crm: service 'vm:102': state changed from 'request_start' to 'request_start_balance' (node = node2, target = node3)
info 323 node2/lrm: service vm:102 - start relocate to node 'node3'
info 323 node2/lrm: stopping service vm:102 (relocate)
info 323 node2/lrm: service status vm:102 stopped
info 323 node2/lrm: service vm:102 - end relocate to node 'node3'
info 340 node1/crm: service 'vm:102': state changed from 'request_start_balance' to 'started' (node = node3)
info 345 node3/lrm: got lock 'ha_agent_node3_lock'
info 345 node3/lrm: status change wait_for_agent_lock => active
info 345 node3/lrm: starting service vm:102
info 345 node3/lrm: service status vm:102 started
info 420 cmdlist: execute service vm:103 add node2 started 0
info 420 node1/crm: adding new service 'vm:103' on node 'node2'
info 420 node1/crm: service vm:103: re-balance selected new node node1 for startup
info 420 node1/crm: service 'vm:103': state changed from 'request_start' to 'request_start_balance' (node = node2, target = node1)
info 423 node2/lrm: service vm:103 - start relocate to node 'node1'
info 423 node2/lrm: service vm:103 - end relocate to node 'node1'
info 440 node1/crm: service 'vm:103': state changed from 'request_start_balance' to 'started' (node = node1)
info 441 node1/lrm: starting service vm:103
info 441 node1/lrm: service status vm:103 started
info 520 cmdlist: execute service vm:104 add node2 stopped 0
info 520 node1/crm: adding new service 'vm:104' on node 'node2'
info 540 node1/crm: service 'vm:104': state changed from 'request_stop' to 'stopped'
info 620 cmdlist: execute service vm:105 add node2 stopped 0
info 620 node1/crm: adding new service 'vm:105' on node 'node2'
info 640 node1/crm: service 'vm:105': state changed from 'request_stop' to 'stopped'
info 1220 hardware: exit simulation - done

View File

@ -0,0 +1 @@
{}

View File

@ -0,0 +1 @@
{}

View File

@ -0,0 +1 @@
{}