From 025de7b6a6dd44e78d0d45b4f3b4f104ed54b0fd Mon Sep 17 00:00:00 2001 From: Pedro Tammela Date: Fri, 17 Nov 2023 14:12:03 -0300 Subject: [PATCH 1/6] selftests: tc-testing: cap parallel tdc to 4 cores We have observed a lot of lock contention and test instability when running with >8 cores. Enough to actually make the tests run slower than with fewer cores. Cap the maximum cores of parallel tdc to 4 which showed in testing to be a reasonable number for efficiency and stability in different kernel config scenarios. Signed-off-by: Pedro Tammela Reviewed-by: Simon Horman Acked-by: Jamal Hadi Salim Link: https://lore.kernel.org/r/20231117171208.2066136-2-pctammela@mojatatu.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/tc-testing/tdc.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/testing/selftests/tc-testing/tdc.py b/tools/testing/selftests/tc-testing/tdc.py index a6718192aff3..f764b43f112b 100755 --- a/tools/testing/selftests/tc-testing/tdc.py +++ b/tools/testing/selftests/tc-testing/tdc.py @@ -1017,6 +1017,7 @@ def main(): parser = pm.call_add_args(parser) (args, remaining) = parser.parse_known_args() args.NAMES = NAMES + args.mp = min(args.mp, 4) pm.set_args(args) check_default_settings(args, remaining, pm) if args.verbose > 2: From 50a5988a7a540fb1ad4e620e1bbf11cc646e3dc7 Mon Sep 17 00:00:00 2001 From: Pedro Tammela Date: Fri, 17 Nov 2023 14:12:04 -0300 Subject: [PATCH 2/6] selftests: tc-testing: move back to per test ns setup Surprisingly in kernel configs with most of the debug knobs turned on, pre-allocating the test resources makes tdc run much slower overall than when allocating resources on a per test basis. As these knobs are used in kselftests in downstream CIs, let's go back to the old way of doing things to avoid kselftests timeouts. Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-lkp/202311161129.3b45ed53-oliver.sang@intel.com Signed-off-by: Pedro Tammela Reviewed-by: Simon Horman Acked-by: Jamal Hadi Salim Link: https://lore.kernel.org/r/20231117171208.2066136-3-pctammela@mojatatu.com Signed-off-by: Jakub Kicinski --- .../tc-testing/plugin-lib/nsPlugin.py | 68 +++++++------------ 1 file changed, 25 insertions(+), 43 deletions(-) diff --git a/tools/testing/selftests/tc-testing/plugin-lib/nsPlugin.py b/tools/testing/selftests/tc-testing/plugin-lib/nsPlugin.py index 62974bd3a4a5..2b8cbfdf1083 100644 --- a/tools/testing/selftests/tc-testing/plugin-lib/nsPlugin.py +++ b/tools/testing/selftests/tc-testing/plugin-lib/nsPlugin.py @@ -17,44 +17,6 @@ except ImportError: netlink = False print("!!! Consider installing pyroute2 !!!") -def prepare_suite(obj, test): - original = obj.args.NAMES - - if 'skip' in test and test['skip'] == 'yes': - return - - if 'nsPlugin' not in test['plugins']: - return - - shadow = {} - shadow['IP'] = original['IP'] - shadow['TC'] = original['TC'] - shadow['NS'] = '{}-{}'.format(original['NS'], test['random']) - shadow['DEV0'] = '{}id{}'.format(original['DEV0'], test['id']) - shadow['DEV1'] = '{}id{}'.format(original['DEV1'], test['id']) - shadow['DUMMY'] = '{}id{}'.format(original['DUMMY'], test['id']) - shadow['DEV2'] = original['DEV2'] - obj.args.NAMES = shadow - - if netlink == True: - obj._nl_ns_create() - else: - obj._ns_create() - - # Make sure the netns is visible in the fs - while True: - obj._proc_check() - try: - ns = obj.args.NAMES['NS'] - f = open('/run/netns/{}'.format(ns)) - f.close() - break - except: - time.sleep(0.1) - continue - - obj.args.NAMES = original - class SubPlugin(TdcPlugin): def __init__(self): self.sub_class = 'ns/SubPlugin' @@ -65,19 +27,39 @@ class SubPlugin(TdcPlugin): super().pre_suite(testcount, testlist) - print("Setting up namespaces and devices...") + def prepare_test(self, test): + if 'skip' in test and test['skip'] == 'yes': + return - with Pool(self.args.mp) as p: - it = zip(cycle([self]), testlist) - p.starmap(prepare_suite, it) + if 'nsPlugin' not in test['plugins']: + return - def pre_case(self, caseinfo, test_skip): + if netlink == True: + self._nl_ns_create() + else: + self._ns_create() + + # Make sure the netns is visible in the fs + while True: + self._proc_check() + try: + ns = self.args.NAMES['NS'] + f = open('/run/netns/{}'.format(ns)) + f.close() + break + except: + time.sleep(0.1) + continue + + def pre_case(self, test, test_skip): if self.args.verbose: print('{}.pre_case'.format(self.sub_class)) if test_skip: return + self.prepare_test(test) + def post_case(self): if self.args.verbose: print('{}.post_case'.format(self.sub_class)) From 3d5026fc5adbc796a0547fcef19d997786e0bb31 Mon Sep 17 00:00:00 2001 From: Pedro Tammela Date: Fri, 17 Nov 2023 14:12:05 -0300 Subject: [PATCH 3/6] selftests: tc-testing: use netns delete from pyroute2 When pyroute2 is available, use the native netns delete routine instead of calling iproute2 to do it. As forks are expensive with some kernel configs, minimize its usage to avoid kselftests timeouts. Signed-off-by: Pedro Tammela Reviewed-by: Simon Horman Acked-by: Jamal Hadi Salim Link: https://lore.kernel.org/r/20231117171208.2066136-4-pctammela@mojatatu.com Signed-off-by: Jakub Kicinski --- .../testing/selftests/tc-testing/plugin-lib/nsPlugin.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/tc-testing/plugin-lib/nsPlugin.py b/tools/testing/selftests/tc-testing/plugin-lib/nsPlugin.py index 2b8cbfdf1083..920dcbedc395 100644 --- a/tools/testing/selftests/tc-testing/plugin-lib/nsPlugin.py +++ b/tools/testing/selftests/tc-testing/plugin-lib/nsPlugin.py @@ -64,7 +64,10 @@ class SubPlugin(TdcPlugin): if self.args.verbose: print('{}.post_case'.format(self.sub_class)) - self._ns_destroy() + if netlink == True: + self._nl_ns_destroy() + else: + self._ns_destroy() def post_suite(self, index): if self.args.verbose: @@ -174,6 +177,10 @@ class SubPlugin(TdcPlugin): ''' self._exec_cmd_batched('pre', self._ns_create_cmds()) + def _nl_ns_destroy(self): + ns = self.args.NAMES['NS'] + netns.remove(ns) + def _ns_destroy_cmd(self): return self._replace_keywords('netns delete {}'.format(self.args.NAMES['NS'])) From 3f2d94a4ff489ebbc6b66cc33f9775cb33c00533 Mon Sep 17 00:00:00 2001 From: Pedro Tammela Date: Fri, 17 Nov 2023 14:12:06 -0300 Subject: [PATCH 4/6] selftests: tc-testing: leverage -all in suite ns teardown Instead of listing lingering ns pinned files and delete them one by one, leverage '-all' from iproute2 to do it in a single process fork. Signed-off-by: Pedro Tammela Reviewed-by: Simon Horman Acked-by: Jamal Hadi Salim Link: https://lore.kernel.org/r/20231117171208.2066136-5-pctammela@mojatatu.com Signed-off-by: Jakub Kicinski --- .../testing/selftests/tc-testing/plugin-lib/nsPlugin.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/tools/testing/selftests/tc-testing/plugin-lib/nsPlugin.py b/tools/testing/selftests/tc-testing/plugin-lib/nsPlugin.py index 920dcbedc395..7b674befceec 100644 --- a/tools/testing/selftests/tc-testing/plugin-lib/nsPlugin.py +++ b/tools/testing/selftests/tc-testing/plugin-lib/nsPlugin.py @@ -74,13 +74,12 @@ class SubPlugin(TdcPlugin): print('{}.post_suite'.format(self.sub_class)) # Make sure we don't leak resources - for f in os.listdir('/run/netns/'): - cmd = self._replace_keywords("$IP netns del {}".format(f)) + cmd = "$IP -a netns del" - if self.args.verbose > 3: - print('_exec_cmd: command "{}"'.format(cmd)) + if self.args.verbose > 3: + print('_exec_cmd: command "{}"'.format(cmd)) - subprocess.run(cmd, shell=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) + subprocess.run(cmd, shell=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) def adjust_command(self, stage, command): super().adjust_command(stage, command) From 4b480cfb1066a8394017697ff4a58a970641e9b7 Mon Sep 17 00:00:00 2001 From: Pedro Tammela Date: Fri, 17 Nov 2023 14:12:07 -0300 Subject: [PATCH 5/6] selftests: tc-testing: timeout on unbounded loops In the spirit of failing early, timeout on unbounded loops that take longer than 20 ticks to complete. Such loops are to ensure that objects created are already visible so tests can proceed without any issues. If a test setup takes more than 20 ticks to see an object, there's definetely something wrong. Signed-off-by: Pedro Tammela Reviewed-by: Simon Horman Acked-by: Jamal Hadi Salim Link: https://lore.kernel.org/r/20231117171208.2066136-6-pctammela@mojatatu.com Signed-off-by: Jakub Kicinski --- .../selftests/tc-testing/plugin-lib/nsPlugin.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/tools/testing/selftests/tc-testing/plugin-lib/nsPlugin.py b/tools/testing/selftests/tc-testing/plugin-lib/nsPlugin.py index 7b674befceec..65c8f3f983b9 100644 --- a/tools/testing/selftests/tc-testing/plugin-lib/nsPlugin.py +++ b/tools/testing/selftests/tc-testing/plugin-lib/nsPlugin.py @@ -40,7 +40,10 @@ class SubPlugin(TdcPlugin): self._ns_create() # Make sure the netns is visible in the fs + ticks = 20 while True: + if ticks == 0: + raise TimeoutError self._proc_check() try: ns = self.args.NAMES['NS'] @@ -49,6 +52,7 @@ class SubPlugin(TdcPlugin): break except: time.sleep(0.1) + ticks -= 1 continue def pre_case(self, test, test_skip): @@ -127,7 +131,10 @@ class SubPlugin(TdcPlugin): with IPRoute() as ip: ip.link('add', ifname=dev1, kind='veth', peer={'ifname': dev0, 'net_ns_fd':'/proc/1/ns/net'}) ip.link('add', ifname=dummy, kind='dummy') + ticks = 20 while True: + if ticks == 0: + raise TimeoutError try: dev1_idx = ip.link_lookup(ifname=dev1)[0] dummy_idx = ip.link_lookup(ifname=dummy)[0] @@ -136,17 +143,22 @@ class SubPlugin(TdcPlugin): break except: time.sleep(0.1) + ticks -= 1 continue netns.popns() with IPRoute() as ip: + ticks = 20 while True: + if ticks == 0: + raise TimeoutError try: dev0_idx = ip.link_lookup(ifname=dev0)[0] ip.link('set', index=dev0_idx, state='up') break except: time.sleep(0.1) + ticks -= 1 continue def _ns_create_cmds(self): From 4968afa0143dbff8a84b5ce3c302dd7d0bdcfa48 Mon Sep 17 00:00:00 2001 From: Pedro Tammela Date: Fri, 17 Nov 2023 14:12:08 -0300 Subject: [PATCH 6/6] selftests: tc-testing: report number of workers in use Report the number of workers in use to process the test batches. Since the number is now subject to a limit, avoid users getting confused. Signed-off-by: Pedro Tammela Reviewed-by: Simon Horman Acked-by: Jamal Hadi Salim Link: https://lore.kernel.org/r/20231117171208.2066136-7-pctammela@mojatatu.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/tc-testing/tdc.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/tc-testing/tdc.py b/tools/testing/selftests/tc-testing/tdc.py index f764b43f112b..669ec89ebfe1 100755 --- a/tools/testing/selftests/tc-testing/tdc.py +++ b/tools/testing/selftests/tc-testing/tdc.py @@ -616,7 +616,7 @@ def test_runner_mp(pm, args, alltests): batches.insert(0, serial) print("Executing {} tests in parallel and {} in serial".format(len(parallel), len(serial))) - print("Using {} batches".format(len(batches))) + print("Using {} batches and {} workers".format(len(batches), args.mp)) # We can't pickle these objects so workaround them global mp_pm