vfio/mlx5: Improve the target side flow to reduce downtime
Improve the target side flow to reduce downtime as of below. - Support reading an optional record which includes the expected stop_copy size. - Once the source sends this record data, which expects to be sent as part of the pre_copy flow, prepare the data buffers that may be large enough to hold the final stop_copy data. The above reduces the migration downtime as the relevant stuff that is needed to load the image data is prepared ahead as part of pre_copy. Signed-off-by: Yishai Hadas <yishaih@nvidia.com> Link: https://lore.kernel.org/r/20230124144955.139901-4-yishaih@nvidia.com Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
This commit is contained in:
parent
b04e2e86e9
commit
f4f0c25e5d
@ -27,6 +27,8 @@ enum mlx5_vf_migf_state {
|
||||
enum mlx5_vf_load_state {
|
||||
MLX5_VF_LOAD_STATE_READ_IMAGE_NO_HEADER,
|
||||
MLX5_VF_LOAD_STATE_READ_HEADER,
|
||||
MLX5_VF_LOAD_STATE_PREP_HEADER_DATA,
|
||||
MLX5_VF_LOAD_STATE_READ_HEADER_DATA,
|
||||
MLX5_VF_LOAD_STATE_PREP_IMAGE,
|
||||
MLX5_VF_LOAD_STATE_READ_IMAGE,
|
||||
MLX5_VF_LOAD_STATE_LOAD_IMAGE,
|
||||
@ -59,7 +61,6 @@ struct mlx5_vhca_data_buffer {
|
||||
loff_t start_pos;
|
||||
u64 length;
|
||||
u64 allocated_length;
|
||||
u64 header_image_size;
|
||||
u32 mkey;
|
||||
enum dma_data_direction dma_dir;
|
||||
u8 dmaed:1;
|
||||
@ -89,6 +90,9 @@ struct mlx5_vf_migration_file {
|
||||
enum mlx5_vf_load_state load_state;
|
||||
u32 pdn;
|
||||
loff_t max_pos;
|
||||
u64 record_size;
|
||||
u32 record_tag;
|
||||
u64 stop_copy_prep_size;
|
||||
u64 pre_copy_initial_bytes;
|
||||
struct mlx5_vhca_data_buffer *buf;
|
||||
struct mlx5_vhca_data_buffer *buf_header;
|
||||
|
@ -703,6 +703,56 @@ mlx5vf_resume_read_image(struct mlx5_vf_migration_file *migf,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
mlx5vf_resume_read_header_data(struct mlx5_vf_migration_file *migf,
|
||||
struct mlx5_vhca_data_buffer *vhca_buf,
|
||||
const char __user **buf, size_t *len,
|
||||
loff_t *pos, ssize_t *done)
|
||||
{
|
||||
size_t copy_len, to_copy;
|
||||
size_t required_data;
|
||||
u8 *to_buff;
|
||||
int ret;
|
||||
|
||||
required_data = migf->record_size - vhca_buf->length;
|
||||
to_copy = min_t(size_t, *len, required_data);
|
||||
copy_len = to_copy;
|
||||
while (to_copy) {
|
||||
ret = mlx5vf_append_page_to_mig_buf(vhca_buf, buf, &to_copy, pos,
|
||||
done);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
|
||||
*len -= copy_len;
|
||||
if (vhca_buf->length == migf->record_size) {
|
||||
switch (migf->record_tag) {
|
||||
case MLX5_MIGF_HEADER_TAG_STOP_COPY_SIZE:
|
||||
{
|
||||
struct page *page;
|
||||
|
||||
page = mlx5vf_get_migration_page(vhca_buf, 0);
|
||||
if (!page)
|
||||
return -EINVAL;
|
||||
to_buff = kmap_local_page(page);
|
||||
migf->stop_copy_prep_size = min_t(u64,
|
||||
le64_to_cpup((__le64 *)to_buff), MAX_LOAD_SIZE);
|
||||
kunmap_local(to_buff);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
/* Optional tag */
|
||||
break;
|
||||
}
|
||||
|
||||
migf->load_state = MLX5_VF_LOAD_STATE_READ_HEADER;
|
||||
migf->max_pos += migf->record_size;
|
||||
vhca_buf->length = 0;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
mlx5vf_resume_read_header(struct mlx5_vf_migration_file *migf,
|
||||
struct mlx5_vhca_data_buffer *vhca_buf,
|
||||
@ -733,23 +783,38 @@ mlx5vf_resume_read_header(struct mlx5_vf_migration_file *migf,
|
||||
*len -= copy_len;
|
||||
vhca_buf->length += copy_len;
|
||||
if (vhca_buf->length == sizeof(struct mlx5_vf_migration_header)) {
|
||||
u64 flags;
|
||||
u64 record_size;
|
||||
u32 flags;
|
||||
|
||||
vhca_buf->header_image_size = le64_to_cpup((__le64 *)to_buff);
|
||||
if (vhca_buf->header_image_size > MAX_LOAD_SIZE) {
|
||||
record_size = le64_to_cpup((__le64 *)to_buff);
|
||||
if (record_size > MAX_LOAD_SIZE) {
|
||||
ret = -ENOMEM;
|
||||
goto end;
|
||||
}
|
||||
|
||||
flags = le64_to_cpup((__le64 *)(to_buff +
|
||||
migf->record_size = record_size;
|
||||
flags = le32_to_cpup((__le32 *)(to_buff +
|
||||
offsetof(struct mlx5_vf_migration_header, flags)));
|
||||
if (flags) {
|
||||
ret = -EOPNOTSUPP;
|
||||
goto end;
|
||||
migf->record_tag = le32_to_cpup((__le32 *)(to_buff +
|
||||
offsetof(struct mlx5_vf_migration_header, tag)));
|
||||
switch (migf->record_tag) {
|
||||
case MLX5_MIGF_HEADER_TAG_FW_DATA:
|
||||
migf->load_state = MLX5_VF_LOAD_STATE_PREP_IMAGE;
|
||||
break;
|
||||
case MLX5_MIGF_HEADER_TAG_STOP_COPY_SIZE:
|
||||
migf->load_state = MLX5_VF_LOAD_STATE_PREP_HEADER_DATA;
|
||||
break;
|
||||
default:
|
||||
if (!(flags & MLX5_MIGF_HEADER_FLAGS_TAG_OPTIONAL)) {
|
||||
ret = -EOPNOTSUPP;
|
||||
goto end;
|
||||
}
|
||||
/* We may read and skip this optional record data */
|
||||
migf->load_state = MLX5_VF_LOAD_STATE_PREP_HEADER_DATA;
|
||||
}
|
||||
|
||||
migf->load_state = MLX5_VF_LOAD_STATE_PREP_IMAGE;
|
||||
migf->max_pos += vhca_buf->length;
|
||||
vhca_buf->length = 0;
|
||||
*has_work = true;
|
||||
}
|
||||
end:
|
||||
@ -793,9 +858,34 @@ static ssize_t mlx5vf_resume_write(struct file *filp, const char __user *buf,
|
||||
if (ret)
|
||||
goto out_unlock;
|
||||
break;
|
||||
case MLX5_VF_LOAD_STATE_PREP_HEADER_DATA:
|
||||
if (vhca_buf_header->allocated_length < migf->record_size) {
|
||||
mlx5vf_free_data_buffer(vhca_buf_header);
|
||||
|
||||
migf->buf_header = mlx5vf_alloc_data_buffer(migf,
|
||||
migf->record_size, DMA_NONE);
|
||||
if (IS_ERR(migf->buf_header)) {
|
||||
ret = PTR_ERR(migf->buf_header);
|
||||
migf->buf_header = NULL;
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
vhca_buf_header = migf->buf_header;
|
||||
}
|
||||
|
||||
vhca_buf_header->start_pos = migf->max_pos;
|
||||
migf->load_state = MLX5_VF_LOAD_STATE_READ_HEADER_DATA;
|
||||
break;
|
||||
case MLX5_VF_LOAD_STATE_READ_HEADER_DATA:
|
||||
ret = mlx5vf_resume_read_header_data(migf, vhca_buf_header,
|
||||
&buf, &len, pos, &done);
|
||||
if (ret)
|
||||
goto out_unlock;
|
||||
break;
|
||||
case MLX5_VF_LOAD_STATE_PREP_IMAGE:
|
||||
{
|
||||
u64 size = vhca_buf_header->header_image_size;
|
||||
u64 size = max(migf->record_size,
|
||||
migf->stop_copy_prep_size);
|
||||
|
||||
if (vhca_buf->allocated_length < size) {
|
||||
mlx5vf_free_data_buffer(vhca_buf);
|
||||
@ -824,7 +914,7 @@ static ssize_t mlx5vf_resume_write(struct file *filp, const char __user *buf,
|
||||
break;
|
||||
case MLX5_VF_LOAD_STATE_READ_IMAGE:
|
||||
ret = mlx5vf_resume_read_image(migf, vhca_buf,
|
||||
vhca_buf_header->header_image_size,
|
||||
migf->record_size,
|
||||
&buf, &len, pos, &done, &has_work);
|
||||
if (ret)
|
||||
goto out_unlock;
|
||||
@ -837,7 +927,6 @@ static ssize_t mlx5vf_resume_write(struct file *filp, const char __user *buf,
|
||||
|
||||
/* prep header buf for next image */
|
||||
vhca_buf_header->length = 0;
|
||||
vhca_buf_header->header_image_size = 0;
|
||||
/* prep data buf for next image */
|
||||
vhca_buf->length = 0;
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user