blkcg: add tools/cgroup/iocost_coef_gen.py

Add a script which can be used to generate device-specific iocost
linear model coefficients.

Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
This commit is contained in:
Tejun Heo 2019-08-28 15:06:00 -07:00 committed by Jens Axboe
parent 6954ff185e
commit 8504dea783
3 changed files with 184 additions and 0 deletions
Documentation/admin-guide
block
tools/cgroup

View File

@ -1529,6 +1529,9 @@ IO Interface Files
The IO cost model isn't expected to be accurate in absolute The IO cost model isn't expected to be accurate in absolute
sense and is scaled to the device behavior dynamically. sense and is scaled to the device behavior dynamically.
If needed, tools/cgroup/iocost_coef_gen.py can be used to
generate device-specific coefficients.
io.weight io.weight
A read-write flat-keyed file which exists on non-root cgroups. A read-write flat-keyed file which exists on non-root cgroups.
The default is "default 100". The default is "default 100".

View File

@ -46,6 +46,9 @@
* If needed, tools/cgroup/iocost_coef_gen.py can be used to generate * If needed, tools/cgroup/iocost_coef_gen.py can be used to generate
* device-specific coefficients. * device-specific coefficients.
* *
* If needed, tools/cgroup/iocost_coef_gen.py can be used to generate
* device-specific coefficients.
*
* 2. Control Strategy * 2. Control Strategy
* *
* The device virtual time (vtime) is used as the primary control metric. * The device virtual time (vtime) is used as the primary control metric.

View File

@ -0,0 +1,178 @@
#!/usr/bin/env python3
#
# Copyright (C) 2019 Tejun Heo <tj@kernel.org>
# Copyright (C) 2019 Andy Newell <newella@fb.com>
# Copyright (C) 2019 Facebook
desc = """
Generate linear IO cost model coefficients used by the blk-iocost
controller. If the target raw testdev is specified, destructive tests
are performed against the whole device; otherwise, on
./iocost-coef-fio.testfile. The result can be written directly to
/sys/fs/cgroup/io.cost.model.
On high performance devices, --numjobs > 1 is needed to achieve
saturation.
See Documentation/admin-guide/cgroup-v2.rst and block/blk-iocost.c
for more details.
"""
import argparse
import re
import json
import glob
import os
import sys
import atexit
import shutil
import tempfile
import subprocess
parser = argparse.ArgumentParser(description=desc,
formatter_class=argparse.RawTextHelpFormatter)
parser.add_argument('--testdev', metavar='DEV',
help='Raw block device to use for testing, ignores --testfile-size')
parser.add_argument('--testfile-size-gb', type=float, metavar='GIGABYTES', default=16,
help='Testfile size in gigabytes (default: %(default)s)')
parser.add_argument('--duration', type=int, metavar='SECONDS', default=120,
help='Individual test run duration in seconds (default: %(default)s)')
parser.add_argument('--seqio-block-mb', metavar='MEGABYTES', type=int, default=128,
help='Sequential test block size in megabytes (default: %(default)s)')
parser.add_argument('--seq-depth', type=int, metavar='DEPTH', default=64,
help='Sequential test queue depth (default: %(default)s)')
parser.add_argument('--rand-depth', type=int, metavar='DEPTH', default=64,
help='Random test queue depth (default: %(default)s)')
parser.add_argument('--numjobs', type=int, metavar='JOBS', default=1,
help='Number of parallel fio jobs to run (default: %(default)s)')
parser.add_argument('--quiet', action='store_true')
parser.add_argument('--verbose', action='store_true')
def info(msg):
if not args.quiet:
print(msg)
def dbg(msg):
if args.verbose and not args.quiet:
print(msg)
# determine ('DEVNAME', 'MAJ:MIN') for @path
def dir_to_dev(path):
# find the block device the current directory is on
devname = subprocess.run(f'findmnt -nvo SOURCE -T{path}',
stdout=subprocess.PIPE, shell=True).stdout
devname = os.path.basename(devname).decode('utf-8').strip()
# partition -> whole device
parents = glob.glob('/sys/block/*/' + devname)
if len(parents):
devname = os.path.basename(os.path.dirname(parents[0]))
rdev = os.stat(f'/dev/{devname}').st_rdev
return (devname, f'{os.major(rdev)}:{os.minor(rdev)}')
def create_testfile(path, size):
global args
if os.path.isfile(path) and os.stat(path).st_size == size:
return
info(f'Creating testfile {path}')
subprocess.check_call(f'rm -f {path}', shell=True)
subprocess.check_call(f'touch {path}', shell=True)
subprocess.call(f'chattr +C {path}', shell=True)
subprocess.check_call(
f'pv -s {size} -pr /dev/urandom {"-q" if args.quiet else ""} | '
f'dd of={path} count={size} '
f'iflag=count_bytes,fullblock oflag=direct bs=16M status=none',
shell=True)
def run_fio(testfile, duration, iotype, iodepth, blocksize, jobs):
global args
eta = 'never' if args.quiet else 'always'
outfile = tempfile.NamedTemporaryFile()
cmd = (f'fio --direct=1 --ioengine=libaio --name=coef '
f'--filename={testfile} --runtime={round(duration)} '
f'--readwrite={iotype} --iodepth={iodepth} --blocksize={blocksize} '
f'--eta={eta} --output-format json --output={outfile.name} '
f'--time_based --numjobs={jobs}')
if args.verbose:
dbg(f'Running {cmd}')
subprocess.check_call(cmd, shell=True)
with open(outfile.name, 'r') as f:
d = json.loads(f.read())
return sum(j['read']['bw_bytes'] + j['write']['bw_bytes'] for j in d['jobs'])
def restore_elevator_nomerges():
global elevator_path, nomerges_path, elevator, nomerges
info(f'Restoring elevator to {elevator} and nomerges to {nomerges}')
with open(elevator_path, 'w') as f:
f.write(elevator)
with open(nomerges_path, 'w') as f:
f.write(nomerges)
args = parser.parse_args()
missing = False
for cmd in [ 'findmnt', 'pv', 'dd', 'fio' ]:
if not shutil.which(cmd):
print(f'Required command "{cmd}" is missing', file=sys.stderr)
missing = True
if missing:
sys.exit(1)
if args.testdev:
devname = os.path.basename(args.testdev)
rdev = os.stat(f'/dev/{devname}').st_rdev
devno = f'{os.major(rdev)}:{os.minor(rdev)}'
testfile = f'/dev/{devname}'
info(f'Test target: {devname}({devno})')
else:
devname, devno = dir_to_dev('.')
testfile = 'iocost-coef-fio.testfile'
testfile_size = int(args.testfile_size_gb * 2 ** 30)
create_testfile(testfile, testfile_size)
info(f'Test target: {testfile} on {devname}({devno})')
elevator_path = f'/sys/block/{devname}/queue/scheduler'
nomerges_path = f'/sys/block/{devname}/queue/nomerges'
with open(elevator_path, 'r') as f:
elevator = re.sub(r'.*\[(.*)\].*', r'\1', f.read().strip())
with open(nomerges_path, 'r') as f:
nomerges = f.read().strip()
info(f'Temporarily disabling elevator and merges')
atexit.register(restore_elevator_nomerges)
with open(elevator_path, 'w') as f:
f.write('none')
with open(nomerges_path, 'w') as f:
f.write('1')
info('Determining rbps...')
rbps = run_fio(testfile, args.duration, 'read',
1, args.seqio_block_mb * (2 ** 20), args.numjobs)
info(f'\nrbps={rbps}, determining rseqiops...')
rseqiops = round(run_fio(testfile, args.duration, 'read',
args.seq_depth, 4096, args.numjobs) / 4096)
info(f'\nrseqiops={rseqiops}, determining rrandiops...')
rrandiops = round(run_fio(testfile, args.duration, 'randread',
args.rand_depth, 4096, args.numjobs) / 4096)
info(f'\nrrandiops={rrandiops}, determining wbps...')
wbps = run_fio(testfile, args.duration, 'write',
1, args.seqio_block_mb * (2 ** 20), args.numjobs)
info(f'\nwbps={wbps}, determining wseqiops...')
wseqiops = round(run_fio(testfile, args.duration, 'write',
args.seq_depth, 4096, args.numjobs) / 4096)
info(f'\nwseqiops={wseqiops}, determining wrandiops...')
wrandiops = round(run_fio(testfile, args.duration, 'randwrite',
args.rand_depth, 4096, args.numjobs) / 4096)
info(f'\nwrandiops={wrandiops}')
restore_elevator_nomerges()
atexit.unregister(restore_elevator_nomerges)
info('')
print(f'{devno} rbps={rbps} rseqiops={rseqiops} rrandiops={rrandiops} '
f'wbps={wbps} wseqiops={wseqiops} wrandiops={wrandiops}')