2023-01-25 21:00:44 +01:00
/* SPDX-License-Identifier: GPL-2.0-only */
2005-04-16 15:20:36 -07:00
/*
* Copyright ( C ) 2003 Sistina Software .
* Copyright ( C ) 2004 Red Hat , Inc . All rights reserved .
*
* Module Author : Heinz Mauelshagen
*
* This file is released under the GPL .
*
* Path - Selector registration .
*/
# ifndef DM_PATH_SELECTOR_H
# define DM_PATH_SELECTOR_H
# include <linux/device-mapper.h>
# include "dm-mpath.h"
/*
* We provide an abstraction for the code that chooses which path
* to send some io down .
*/
struct path_selector_type ;
struct path_selector {
struct path_selector_type * type ;
void * context ;
} ;
dm mpath: provide high-resolution timer to HST for bio-based
The precision loss of reading IO start_time with jiffies_to_nsecs
instead of using a high resolution timer degrades HST path prediction
for BIO-based mpath on high load workloads.
Below, I show the utilization percentage of a 10 disk multipath with
asymmetrical disk access cost, while being exercised by a randwrite FIO
benchmark with high submission queue depth (depth=64). It is possible
to see that the HST path selection degrades heavily for high-iops in
BIO-mpath, underutilizing the slower paths way beyond expected. This
seems to be caused by the start_time truncation, which makes some IO to
seem much slower than it actually is. In this scenario ST outperforms
HST for bio-mpath, but not for mq-mpath, which already uses ktime_get_ns().
The third column shows utilization with this patch applied. It is easy
to see that now HST prediction is much closer to the ideal distribution
(calculated considering the real cost of each path).
| | ST | HST (orig) | HST(ktime) | Best |
| sdd | 0.17 | 0.20 | 0.17 | 0.18 |
| sde | 0.17 | 0.20 | 0.17 | 0.18 |
| sdf | 0.17 | 0.20 | 0.17 | 0.18 |
| sdg | 0.06 | 0.00 | 0.06 | 0.04 |
| sdh | 0.03 | 0.00 | 0.03 | 0.02 |
| sdi | 0.03 | 0.00 | 0.03 | 0.02 |
| sdj | 0.02 | 0.00 | 0.01 | 0.01 |
| sdk | 0.02 | 0.00 | 0.01 | 0.01 |
| sdl | 0.17 | 0.20 | 0.17 | 0.18 |
| sdm | 0.17 | 0.20 | 0.17 | 0.18 |
This issue was originally discussed [1] when we first merged HST, and
this patch was left as a low hanging fruit to be solved later.
Regarding the implementation, as suggested by Mike in that mail thread,
in order to avoid the overhead of ktime_get_ns for other selectors, this
patch adds a flag for the selector code to request the high-resolution
timer.
I tested this using the same benchmark used in the original HST submission.
Full test and benchmark scripts are available here:
https://people.collabora.com/~krisman/HST-BIO-MPATH/
[1] https://lore.kernel.org/lkml/85tv0am9de.fsf@collabora.com/T/
Signed-off-by: Gabriel Krisman Bertazi <krisman@collabora.com>
[snitzer: cleaned up various implementation details]
Signed-off-by: Mike Snitzer <snitzer@kernel.org>
2022-04-27 12:57:10 -04:00
/*
* If a path selector uses this flag , a high resolution timer is used
* ( via ktime_get_ns ) to account for IO start time in BIO - based mpath .
* This improves performance of some path selectors ( i . e . HST ) , in
* exchange for slightly higher overhead when submitting the BIO .
* The extra cost is usually offset by improved path selection for
* some benchmarks .
*
* This has no effect for request - based mpath , since it already uses a
* higher precision timer by default .
*/
# define DM_PS_USE_HR_TIMER 0x00000001
# define dm_ps_use_hr_timer(type) ((type)->features & DM_PS_USE_HR_TIMER)
2005-04-16 15:20:36 -07:00
/* Information about a path selector type */
struct path_selector_type {
char * name ;
struct module * module ;
dm mpath: provide high-resolution timer to HST for bio-based
The precision loss of reading IO start_time with jiffies_to_nsecs
instead of using a high resolution timer degrades HST path prediction
for BIO-based mpath on high load workloads.
Below, I show the utilization percentage of a 10 disk multipath with
asymmetrical disk access cost, while being exercised by a randwrite FIO
benchmark with high submission queue depth (depth=64). It is possible
to see that the HST path selection degrades heavily for high-iops in
BIO-mpath, underutilizing the slower paths way beyond expected. This
seems to be caused by the start_time truncation, which makes some IO to
seem much slower than it actually is. In this scenario ST outperforms
HST for bio-mpath, but not for mq-mpath, which already uses ktime_get_ns().
The third column shows utilization with this patch applied. It is easy
to see that now HST prediction is much closer to the ideal distribution
(calculated considering the real cost of each path).
| | ST | HST (orig) | HST(ktime) | Best |
| sdd | 0.17 | 0.20 | 0.17 | 0.18 |
| sde | 0.17 | 0.20 | 0.17 | 0.18 |
| sdf | 0.17 | 0.20 | 0.17 | 0.18 |
| sdg | 0.06 | 0.00 | 0.06 | 0.04 |
| sdh | 0.03 | 0.00 | 0.03 | 0.02 |
| sdi | 0.03 | 0.00 | 0.03 | 0.02 |
| sdj | 0.02 | 0.00 | 0.01 | 0.01 |
| sdk | 0.02 | 0.00 | 0.01 | 0.01 |
| sdl | 0.17 | 0.20 | 0.17 | 0.18 |
| sdm | 0.17 | 0.20 | 0.17 | 0.18 |
This issue was originally discussed [1] when we first merged HST, and
this patch was left as a low hanging fruit to be solved later.
Regarding the implementation, as suggested by Mike in that mail thread,
in order to avoid the overhead of ktime_get_ns for other selectors, this
patch adds a flag for the selector code to request the high-resolution
timer.
I tested this using the same benchmark used in the original HST submission.
Full test and benchmark scripts are available here:
https://people.collabora.com/~krisman/HST-BIO-MPATH/
[1] https://lore.kernel.org/lkml/85tv0am9de.fsf@collabora.com/T/
Signed-off-by: Gabriel Krisman Bertazi <krisman@collabora.com>
[snitzer: cleaned up various implementation details]
Signed-off-by: Mike Snitzer <snitzer@kernel.org>
2022-04-27 12:57:10 -04:00
unsigned int features ;
2005-04-16 15:20:36 -07:00
unsigned int table_args ;
unsigned int info_args ;
/*
* Constructs a path selector object , takes custom arguments
*/
2023-02-01 22:31:43 +01:00
int ( * create ) ( struct path_selector * ps , unsigned int argc , char * * argv ) ;
void ( * destroy ) ( struct path_selector * ps ) ;
2005-04-16 15:20:36 -07:00
/*
* Add an opaque path object , along with some selector specific
* path args ( eg , path priority ) .
*/
2023-02-01 22:31:43 +01:00
int ( * add_path ) ( struct path_selector * ps , struct dm_path * path ,
int argc , char * * argv , char * * error ) ;
2005-04-16 15:20:36 -07:00
/*
* Chooses a path for this io , if no paths are available then
* NULL will be returned .
*/
2023-02-01 22:31:43 +01:00
struct dm_path * ( * select_path ) ( struct path_selector * ps , size_t nr_bytes ) ;
2005-04-16 15:20:36 -07:00
/*
* Notify the selector that a path has failed .
*/
2023-02-01 22:31:43 +01:00
void ( * fail_path ) ( struct path_selector * ps , struct dm_path * p ) ;
2005-04-16 15:20:36 -07:00
/*
* Ask selector to reinstate a path .
*/
2023-02-01 22:31:43 +01:00
int ( * reinstate_path ) ( struct path_selector * ps , struct dm_path * p ) ;
2005-04-16 15:20:36 -07:00
/*
* Table content based on parameters added in ps_add_path_fn
* or path selector status
*/
2023-02-01 22:31:43 +01:00
int ( * status ) ( struct path_selector * ps , struct dm_path * path ,
status_type_t type , char * result , unsigned int maxlen ) ;
2005-04-16 15:20:36 -07:00
2023-02-01 22:31:43 +01:00
int ( * start_io ) ( struct path_selector * ps , struct dm_path * path ,
size_t nr_bytes ) ;
int ( * end_io ) ( struct path_selector * ps , struct dm_path * path ,
size_t nr_bytes , u64 start_time ) ;
2005-04-16 15:20:36 -07:00
} ;
/* Register a path selector */
int dm_register_path_selector ( struct path_selector_type * type ) ;
/* Unregister a path selector */
int dm_unregister_path_selector ( struct path_selector_type * type ) ;
/* Returns a registered path selector type */
struct path_selector_type * dm_get_path_selector ( const char * name ) ;
/* Releases a path selector */
void dm_put_path_selector ( struct path_selector_type * pst ) ;
# endif