From 53d860952c8215cf9ae1ea33409c8cb71ad6ad3d Mon Sep 17 00:00:00 2001 From: Jens Thoms Toerring Date: Sun, 31 May 2020 11:53:00 +0200 Subject: [PATCH 001/462] regmap: fix alignment issue The assembly and disassembly of data to be sent to or received from a device invoke functions regmap_format_XX() and regmap_parse_XX() that extract or insert data items from or into a buffer, using assignments. In some cases the functions are called with a buffer pointer with an odd address. On architectures with strict alignment requirements this can result in a kernel crash. The assignments have been replaced by functions that take alignment into account. Signed-off-by: Jens Thoms Toerring Link: https://lore.kernel.org/r/20200531095300.GA27570@toerring.de Signed-off-by: Mark Brown --- drivers/base/regmap/regmap.c | 100 ++++++++++++++++------------------- 1 file changed, 46 insertions(+), 54 deletions(-) diff --git a/drivers/base/regmap/regmap.c b/drivers/base/regmap/regmap.c index c472f624382d..22d125da4a7d 100644 --- a/drivers/base/regmap/regmap.c +++ b/drivers/base/regmap/regmap.c @@ -17,6 +17,7 @@ #include #include #include +#include #define CREATE_TRACE_POINTS #include "trace.h" @@ -249,22 +250,20 @@ static void regmap_format_8(void *buf, unsigned int val, unsigned int shift) static void regmap_format_16_be(void *buf, unsigned int val, unsigned int shift) { - __be16 *b = buf; - - b[0] = cpu_to_be16(val << shift); + put_unaligned_be16(val << shift, buf); } static void regmap_format_16_le(void *buf, unsigned int val, unsigned int shift) { - __le16 *b = buf; - - b[0] = cpu_to_le16(val << shift); + put_unaligned_le16(val << shift, buf); } static void regmap_format_16_native(void *buf, unsigned int val, unsigned int shift) { - *(u16 *)buf = val << shift; + u16 v = val << shift; + + memcpy(buf, &v, sizeof(v)); } static void regmap_format_24(void *buf, unsigned int val, unsigned int shift) @@ -280,43 +279,39 @@ static void regmap_format_24(void *buf, unsigned int val, unsigned int shift) static void regmap_format_32_be(void *buf, unsigned int val, unsigned int shift) { - __be32 *b = buf; - - b[0] = cpu_to_be32(val << shift); + put_unaligned_be32(val << shift, buf); } static void regmap_format_32_le(void *buf, unsigned int val, unsigned int shift) { - __le32 *b = buf; - - b[0] = cpu_to_le32(val << shift); + put_unaligned_le32(val << shift, buf); } static void regmap_format_32_native(void *buf, unsigned int val, unsigned int shift) { - *(u32 *)buf = val << shift; + u32 v = val << shift; + + memcpy(buf, &v, sizeof(v)); } #ifdef CONFIG_64BIT static void regmap_format_64_be(void *buf, unsigned int val, unsigned int shift) { - __be64 *b = buf; - - b[0] = cpu_to_be64((u64)val << shift); + put_unaligned_be64((u64) val << shift, buf); } static void regmap_format_64_le(void *buf, unsigned int val, unsigned int shift) { - __le64 *b = buf; - - b[0] = cpu_to_le64((u64)val << shift); + put_unaligned_le64((u64) val << shift, buf); } static void regmap_format_64_native(void *buf, unsigned int val, unsigned int shift) { - *(u64 *)buf = (u64)val << shift; + u64 v = (u64) val << shift; + + memcpy(buf, &v, sizeof(v)); } #endif @@ -333,35 +328,34 @@ static unsigned int regmap_parse_8(const void *buf) static unsigned int regmap_parse_16_be(const void *buf) { - const __be16 *b = buf; - - return be16_to_cpu(b[0]); + return get_unaligned_be16(buf); } static unsigned int regmap_parse_16_le(const void *buf) { - const __le16 *b = buf; - - return le16_to_cpu(b[0]); + return get_unaligned_le16(buf); } static void regmap_parse_16_be_inplace(void *buf) { - __be16 *b = buf; + u16 v = get_unaligned_be16(buf); - b[0] = be16_to_cpu(b[0]); + memcpy(buf, &v, sizeof(v)); } static void regmap_parse_16_le_inplace(void *buf) { - __le16 *b = buf; + u16 v = get_unaligned_le16(buf); - b[0] = le16_to_cpu(b[0]); + memcpy(buf, &v, sizeof(v)); } static unsigned int regmap_parse_16_native(const void *buf) { - return *(u16 *)buf; + u16 v; + + memcpy(&v, buf, sizeof(v)); + return v; } static unsigned int regmap_parse_24(const void *buf) @@ -376,69 +370,67 @@ static unsigned int regmap_parse_24(const void *buf) static unsigned int regmap_parse_32_be(const void *buf) { - const __be32 *b = buf; - - return be32_to_cpu(b[0]); + return get_unaligned_be32(buf); } static unsigned int regmap_parse_32_le(const void *buf) { - const __le32 *b = buf; - - return le32_to_cpu(b[0]); + return get_unaligned_le32(buf); } static void regmap_parse_32_be_inplace(void *buf) { - __be32 *b = buf; + u32 v = get_unaligned_be32(buf); - b[0] = be32_to_cpu(b[0]); + memcpy(buf, &v, sizeof(v)); } static void regmap_parse_32_le_inplace(void *buf) { - __le32 *b = buf; + u32 v = get_unaligned_le32(buf); - b[0] = le32_to_cpu(b[0]); + memcpy(buf, &v, sizeof(v)); } static unsigned int regmap_parse_32_native(const void *buf) { - return *(u32 *)buf; + u32 v; + + memcpy(&v, buf, sizeof(v)); + return v; } #ifdef CONFIG_64BIT static unsigned int regmap_parse_64_be(const void *buf) { - const __be64 *b = buf; - - return be64_to_cpu(b[0]); + return get_unaligned_be64(buf); } static unsigned int regmap_parse_64_le(const void *buf) { - const __le64 *b = buf; - - return le64_to_cpu(b[0]); + return get_unaligned_le64(buf); } static void regmap_parse_64_be_inplace(void *buf) { - __be64 *b = buf; + u64 v = get_unaligned_be64(buf); - b[0] = be64_to_cpu(b[0]); + memcpy(buf, &v, sizeof(v)); } static void regmap_parse_64_le_inplace(void *buf) { - __le64 *b = buf; + u64 v = get_unaligned_le64(buf); - b[0] = le64_to_cpu(b[0]); + memcpy(buf, &v, sizeof(v)); } static unsigned int regmap_parse_64_native(const void *buf) { - return *(u64 *)buf; + u64 v; + + memcpy(&v, buf, sizeof(v)); + return v; } #endif From 8bdd79dae1ff5397351b95e249abcae126572617 Mon Sep 17 00:00:00 2001 From: Lingling Xu Date: Tue, 2 Jun 2020 16:24:15 +0800 Subject: [PATCH 002/462] spi: sprd: switch the sequence of setting WDG_LOAD_LOW and _HIGH The watchdog counter consists of WDG_LOAD_LOW and WDG_LOAD_HIGH, which would be loaded to watchdog counter once writing WDG_LOAD_LOW. Fixes: ac1775012058 ("spi: sprd: Add the support of restarting the system") Signed-off-by: Lingling Xu Signed-off-by: Chunyan Zhang Link: https://lore.kernel.org/r/20200602082415.5848-1-zhang.lyra@gmail.com Signed-off-by: Mark Brown --- drivers/spi/spi-sprd-adi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/spi/spi-sprd-adi.c b/drivers/spi/spi-sprd-adi.c index 88e6543648cb..bd23c4689b46 100644 --- a/drivers/spi/spi-sprd-adi.c +++ b/drivers/spi/spi-sprd-adi.c @@ -389,9 +389,9 @@ static int sprd_adi_restart_handler(struct notifier_block *this, sprd_adi_write(sadi, sadi->slave_pbase + REG_WDG_CTRL, val); /* Load the watchdog timeout value, 50ms is always enough. */ + sprd_adi_write(sadi, sadi->slave_pbase + REG_WDG_LOAD_HIGH, 0); sprd_adi_write(sadi, sadi->slave_pbase + REG_WDG_LOAD_LOW, WDG_LOAD_VAL & WDG_LOAD_MASK); - sprd_adi_write(sadi, sadi->slave_pbase + REG_WDG_LOAD_HIGH, 0); /* Start the watchdog to reset system */ sprd_adi_read(sadi, sadi->slave_pbase + REG_WDG_CTRL, &val); From 94579ac3f6d0820adc83b5dc5358ead0158101e9 Mon Sep 17 00:00:00 2001 From: Huy Nguyen Date: Mon, 1 Jun 2020 16:39:37 -0500 Subject: [PATCH 003/462] xfrm: Fix double ESP trailer insertion in IPsec crypto offload. During IPsec performance testing, we see bad ICMP checksum. The error packet has duplicated ESP trailer due to double validate_xmit_xfrm calls. The first call is from ip_output, but the packet cannot be sent because netif_xmit_frozen_or_stopped is true and the packet gets dev_requeue_skb. The second call is from NET_TX softirq. However after the first call, the packet already has the ESP trailer. Fix by marking the skb with XFRM_XMIT bit after the packet is handled by validate_xmit_xfrm to avoid duplicate ESP trailer insertion. Fixes: f6e27114a60a ("net: Add a xfrm validate function to validate_xmit_skb") Signed-off-by: Huy Nguyen Reviewed-by: Boris Pismenny Reviewed-by: Raed Salem Reviewed-by: Saeed Mahameed Signed-off-by: Steffen Klassert --- include/net/xfrm.h | 1 + net/xfrm/xfrm_device.c | 4 +++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 094fe682f5d7..c7d213c9f9d8 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -1008,6 +1008,7 @@ struct xfrm_offload { #define XFRM_GRO 32 #define XFRM_ESP_NO_TRAILER 64 #define XFRM_DEV_RESUME 128 +#define XFRM_XMIT 256 __u32 status; #define CRYPTO_SUCCESS 1 diff --git a/net/xfrm/xfrm_device.c b/net/xfrm/xfrm_device.c index f50d1f97cf8e..626096bd0d29 100644 --- a/net/xfrm/xfrm_device.c +++ b/net/xfrm/xfrm_device.c @@ -108,7 +108,7 @@ struct sk_buff *validate_xmit_xfrm(struct sk_buff *skb, netdev_features_t featur struct xfrm_offload *xo = xfrm_offload(skb); struct sec_path *sp; - if (!xo) + if (!xo || (xo->flags & XFRM_XMIT)) return skb; if (!(features & NETIF_F_HW_ESP)) @@ -129,6 +129,8 @@ struct sk_buff *validate_xmit_xfrm(struct sk_buff *skb, netdev_features_t featur return skb; } + xo->flags |= XFRM_XMIT; + if (skb_is_gso(skb)) { struct net_device *dev = skb->dev; From eea1238867205b9e48a67c1a63219529a73c46fd Mon Sep 17 00:00:00 2001 From: Navid Emamdoost Date: Thu, 4 Jun 2020 22:06:43 -0500 Subject: [PATCH 004/462] sata_rcar: handle pm_runtime_get_sync failure cases Calling pm_runtime_get_sync increments the counter even in case of failure, causing incorrect ref count. Call pm_runtime_put if pm_runtime_get_sync fails. Signed-off-by: Navid Emamdoost Signed-off-by: Jens Axboe --- drivers/ata/sata_rcar.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/ata/sata_rcar.c b/drivers/ata/sata_rcar.c index 980aacdbcf3b..141ac600b64c 100644 --- a/drivers/ata/sata_rcar.c +++ b/drivers/ata/sata_rcar.c @@ -907,7 +907,7 @@ static int sata_rcar_probe(struct platform_device *pdev) pm_runtime_enable(dev); ret = pm_runtime_get_sync(dev); if (ret < 0) - goto err_pm_disable; + goto err_pm_put; host = ata_host_alloc(dev, 1); if (!host) { @@ -937,7 +937,6 @@ static int sata_rcar_probe(struct platform_device *pdev) err_pm_put: pm_runtime_put(dev); -err_pm_disable: pm_runtime_disable(dev); return ret; } @@ -991,8 +990,10 @@ static int sata_rcar_resume(struct device *dev) int ret; ret = pm_runtime_get_sync(dev); - if (ret < 0) + if (ret < 0) { + pm_runtime_put(dev); return ret; + } if (priv->type == RCAR_GEN3_SATA) { sata_rcar_init_module(priv); @@ -1017,8 +1018,10 @@ static int sata_rcar_restore(struct device *dev) int ret; ret = pm_runtime_get_sync(dev); - if (ret < 0) + if (ret < 0) { + pm_runtime_put(dev); return ret; + } sata_rcar_setup_port(host); From f650ef61e040bcb175dd8762164b00a5d627f20e Mon Sep 17 00:00:00 2001 From: Ye Bin Date: Fri, 5 Jun 2020 09:41:49 +0800 Subject: [PATCH 005/462] ata/libata: Fix usage of page address by page_address in ata_scsi_mode_select_xlat function BUG: KASAN: use-after-free in ata_scsi_mode_select_xlat+0x10bd/0x10f0 drivers/ata/libata-scsi.c:4045 Read of size 1 at addr ffff88803b8cd003 by task syz-executor.6/12621 CPU: 1 PID: 12621 Comm: syz-executor.6 Not tainted 4.19.95 #1 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1ubuntu1 04/01/2014 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0xac/0xee lib/dump_stack.c:118 print_address_description+0x60/0x223 mm/kasan/report.c:253 kasan_report_error mm/kasan/report.c:351 [inline] kasan_report mm/kasan/report.c:409 [inline] kasan_report.cold+0xae/0x2d8 mm/kasan/report.c:393 ata_scsi_mode_select_xlat+0x10bd/0x10f0 drivers/ata/libata-scsi.c:4045 ata_scsi_translate+0x2da/0x680 drivers/ata/libata-scsi.c:2035 __ata_scsi_queuecmd drivers/ata/libata-scsi.c:4360 [inline] ata_scsi_queuecmd+0x2e4/0x790 drivers/ata/libata-scsi.c:4409 scsi_dispatch_cmd+0x2ee/0x6c0 drivers/scsi/scsi_lib.c:1867 scsi_queue_rq+0xfd7/0x1990 drivers/scsi/scsi_lib.c:2170 blk_mq_dispatch_rq_list+0x1e1/0x19a0 block/blk-mq.c:1186 blk_mq_do_dispatch_sched+0x147/0x3d0 block/blk-mq-sched.c:108 blk_mq_sched_dispatch_requests+0x427/0x680 block/blk-mq-sched.c:204 __blk_mq_run_hw_queue+0xbc/0x200 block/blk-mq.c:1308 __blk_mq_delay_run_hw_queue+0x3c0/0x460 block/blk-mq.c:1376 blk_mq_run_hw_queue+0x152/0x310 block/blk-mq.c:1413 blk_mq_sched_insert_request+0x337/0x6c0 block/blk-mq-sched.c:397 blk_execute_rq_nowait+0x124/0x320 block/blk-exec.c:64 blk_execute_rq+0xc5/0x112 block/blk-exec.c:101 sg_scsi_ioctl+0x3b0/0x6a0 block/scsi_ioctl.c:507 sg_ioctl+0xd37/0x23f0 drivers/scsi/sg.c:1106 vfs_ioctl fs/ioctl.c:46 [inline] file_ioctl fs/ioctl.c:501 [inline] do_vfs_ioctl+0xae6/0x1030 fs/ioctl.c:688 ksys_ioctl+0x76/0xa0 fs/ioctl.c:705 __do_sys_ioctl fs/ioctl.c:712 [inline] __se_sys_ioctl fs/ioctl.c:710 [inline] __x64_sys_ioctl+0x6f/0xb0 fs/ioctl.c:710 do_syscall_64+0xa0/0x2e0 arch/x86/entry/common.c:293 entry_SYSCALL_64_after_hwframe+0x44/0xa9 RIP: 0033:0x45c479 Code: ad b6 fb ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 7b b6 fb ff c3 66 2e 0f 1f 84 00 00 00 00 RSP: 002b:00007fb0e9602c78 EFLAGS: 00000246 ORIG_RAX: 0000000000000010 RAX: ffffffffffffffda RBX: 00007fb0e96036d4 RCX: 000000000045c479 RDX: 0000000020000040 RSI: 0000000000000001 RDI: 0000000000000003 RBP: 000000000076bfc0 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 00000000ffffffff R13: 000000000000046d R14: 00000000004c6e1a R15: 000000000076bfcc Allocated by task 12577: set_track mm/kasan/kasan.c:460 [inline] kasan_kmalloc mm/kasan/kasan.c:553 [inline] kasan_kmalloc+0xbf/0xe0 mm/kasan/kasan.c:531 __kmalloc+0xf3/0x1e0 mm/slub.c:3749 kmalloc include/linux/slab.h:520 [inline] load_elf_phdrs+0x118/0x1b0 fs/binfmt_elf.c:441 load_elf_binary+0x2de/0x4610 fs/binfmt_elf.c:737 search_binary_handler fs/exec.c:1654 [inline] search_binary_handler+0x15c/0x4e0 fs/exec.c:1632 exec_binprm fs/exec.c:1696 [inline] __do_execve_file.isra.0+0xf52/0x1a90 fs/exec.c:1820 do_execveat_common fs/exec.c:1866 [inline] do_execve fs/exec.c:1883 [inline] __do_sys_execve fs/exec.c:1964 [inline] __se_sys_execve fs/exec.c:1959 [inline] __x64_sys_execve+0x8a/0xb0 fs/exec.c:1959 do_syscall_64+0xa0/0x2e0 arch/x86/entry/common.c:293 entry_SYSCALL_64_after_hwframe+0x44/0xa9 Freed by task 12577: set_track mm/kasan/kasan.c:460 [inline] __kasan_slab_free+0x129/0x170 mm/kasan/kasan.c:521 slab_free_hook mm/slub.c:1370 [inline] slab_free_freelist_hook mm/slub.c:1397 [inline] slab_free mm/slub.c:2952 [inline] kfree+0x8b/0x1a0 mm/slub.c:3904 load_elf_binary+0x1be7/0x4610 fs/binfmt_elf.c:1118 search_binary_handler fs/exec.c:1654 [inline] search_binary_handler+0x15c/0x4e0 fs/exec.c:1632 exec_binprm fs/exec.c:1696 [inline] __do_execve_file.isra.0+0xf52/0x1a90 fs/exec.c:1820 do_execveat_common fs/exec.c:1866 [inline] do_execve fs/exec.c:1883 [inline] __do_sys_execve fs/exec.c:1964 [inline] __se_sys_execve fs/exec.c:1959 [inline] __x64_sys_execve+0x8a/0xb0 fs/exec.c:1959 do_syscall_64+0xa0/0x2e0 arch/x86/entry/common.c:293 entry_SYSCALL_64_after_hwframe+0x44/0xa9 The buggy address belongs to the object at ffff88803b8ccf00 which belongs to the cache kmalloc-512 of size 512 The buggy address is located 259 bytes inside of 512-byte region [ffff88803b8ccf00, ffff88803b8cd100) The buggy address belongs to the page: page:ffffea0000ee3300 count:1 mapcount:0 mapping:ffff88806cc03080 index:0xffff88803b8cc780 compound_mapcount: 0 flags: 0x100000000008100(slab|head) raw: 0100000000008100 ffffea0001104080 0000000200000002 ffff88806cc03080 raw: ffff88803b8cc780 00000000800c000b 00000001ffffffff 0000000000000000 page dumped because: kasan: bad access detected Memory state around the buggy address: ffff88803b8ccf00: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ffff88803b8ccf80: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb >ffff88803b8cd000: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ^ ffff88803b8cd080: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ffff88803b8cd100: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc You can refer to "https://www.lkml.org/lkml/2019/1/17/474" reproduce this error. The exception code is "bd_len = p[3];", "p" value is ffff88803b8cd000 which belongs to the cache kmalloc-512 of size 512. The "page_address(sg_page(scsi_sglist(scmd)))" maybe from sg_scsi_ioctl function "buffer" which allocated by kzalloc, so "buffer" may not page aligned. This also looks completely buggy on highmem systems and really needs to use a kmap_atomic. --Christoph Hellwig To address above bugs, Paolo Bonzini advise to simpler to just make a char array of size CACHE_MPAGE_LEN+8+8+4-2(or just 64 to make it easy), use sg_copy_to_buffer to copy from the sglist into the buffer, and workthere. Signed-off-by: Ye Bin Signed-off-by: Jens Axboe --- drivers/ata/libata-scsi.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c index 435781a16875..46336084b1a9 100644 --- a/drivers/ata/libata-scsi.c +++ b/drivers/ata/libata-scsi.c @@ -3684,12 +3684,13 @@ static unsigned int ata_scsi_mode_select_xlat(struct ata_queued_cmd *qc) { struct scsi_cmnd *scmd = qc->scsicmd; const u8 *cdb = scmd->cmnd; - const u8 *p; u8 pg, spg; unsigned six_byte, pg_len, hdr_len, bd_len; int len; u16 fp = (u16)-1; u8 bp = 0xff; + u8 buffer[64]; + const u8 *p = buffer; VPRINTK("ENTER\n"); @@ -3723,12 +3724,14 @@ static unsigned int ata_scsi_mode_select_xlat(struct ata_queued_cmd *qc) if (!scsi_sg_count(scmd) || scsi_sglist(scmd)->length < len) goto invalid_param_len; - p = page_address(sg_page(scsi_sglist(scmd))); - /* Move past header and block descriptors. */ if (len < hdr_len) goto invalid_param_len; + if (!sg_copy_to_buffer(scsi_sglist(scmd), scsi_sg_count(scmd), + buffer, sizeof(buffer))) + goto invalid_param_len; + if (six_byte) bd_len = p[3]; else From a4902d914e508f3691fa7ef885a76d2b7e735805 Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Mon, 11 May 2020 13:00:15 +1000 Subject: [PATCH 006/462] xfrm: merge fixup for "remove output_finish indirection from xfrm_state_afinfo" Signed-off-by: Stephen Rothwell Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_output.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c index e4c23f69f69f..a7ab19353313 100644 --- a/net/xfrm/xfrm_output.c +++ b/net/xfrm/xfrm_output.c @@ -574,16 +574,12 @@ int xfrm_output(struct sock *sk, struct sk_buff *skb) switch (x->outer_mode.family) { case AF_INET: memset(IPCB(skb), 0, sizeof(*IPCB(skb))); -#ifdef CONFIG_NETFILTER IPCB(skb)->flags |= IPSKB_XFRM_TRANSFORMED; -#endif break; case AF_INET6: memset(IP6CB(skb), 0, sizeof(*IP6CB(skb))); -#ifdef CONFIG_NETFILTER IP6CB(skb)->flags |= IP6SKB_XFRM_TRANSFORMED; -#endif break; } From e680a4098f6404191e0e438a9758715b0bff6d7f Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Sun, 7 Jun 2020 11:34:21 +0200 Subject: [PATCH 007/462] regmap: fix the kerneldoc for regmap_test_bits() The kerneldoc comment for regmap_test_bits() says that it returns -1 on regmap_read() failure. This is not true - it will propagate the error code returned by regmap_read(). Fix it. Fixes: aa2ff9dbaedd ("regmap: provide helpers for simple bit operations") Signed-off-by: Bartosz Golaszewski Link: https://lore.kernel.org/r/20200607093421.22209-1-brgl@bgdev.pl Signed-off-by: Mark Brown --- drivers/base/regmap/regmap.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/base/regmap/regmap.c b/drivers/base/regmap/regmap.c index 22d125da4a7d..183ec789c3fd 100644 --- a/drivers/base/regmap/regmap.c +++ b/drivers/base/regmap/regmap.c @@ -2936,8 +2936,9 @@ EXPORT_SYMBOL_GPL(regmap_update_bits_base); * @reg: Register to read from * @bits: Bits to test * - * Returns -1 if the underlying regmap_read() fails, 0 if at least one of the - * tested bits is not set and 1 if all tested bits are set. + * Returns 0 if at least one of the tested bits is not set, 1 if all tested + * bits are set and a negative error number if the underlying regmap_read() + * fails. */ int regmap_test_bits(struct regmap *map, unsigned int reg, unsigned int bits) { From 9ec8ade812240f3ef553b5e8bfc74deec3f14339 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Mon, 8 Jun 2020 12:00:49 +0200 Subject: [PATCH 008/462] spi: spidev_test: Use %u to format unsigned numbers Consistently use %u to format unsigned numbers. For "bits" this doesn't matter that much, as it is "uint8_t". However, "speed" is "uint32_t", so in case people use "-s -1" to force the maximum, they would see: max speed: -1 Hz (4294967 KHz) While at it, use "k" (kilo) instead of "K" (kelvin) in "kHz". Signed-off-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/20200608100049.30648-1-geert+renesas@glider.be Signed-off-by: Mark Brown --- tools/spi/spidev_test.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/spi/spidev_test.c b/tools/spi/spidev_test.c index eec23fa693bd..56ea053f72f2 100644 --- a/tools/spi/spidev_test.c +++ b/tools/spi/spidev_test.c @@ -461,8 +461,8 @@ int main(int argc, char *argv[]) pabort("can't get max speed hz"); printf("spi mode: 0x%x\n", mode); - printf("bits per word: %d\n", bits); - printf("max speed: %d Hz (%d KHz)\n", speed, speed/1000); + printf("bits per word: %u\n", bits); + printf("max speed: %u Hz (%u kHz)\n", speed, speed/1000); if (input_tx) transfer_escaped_string(fd, input_tx); From e0fe70051f12c25c4afb04cb10ca8648c6e761cf Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Mon, 8 Jun 2020 11:59:34 +0200 Subject: [PATCH 009/462] spi: rspi: Use requested instead of maximum bit rate Currently, the RSPI driver always tries to use the maximum configured bit rate for communicating with a slave device, even if the transfer(s) in the current message specify a lower rate. Use the mininum rate specified in the message instead. Rename rspi_data.max_speed_hz accordingly. Signed-off-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/20200608095940.30516-3-geert+renesas@glider.be Signed-off-by: Mark Brown --- drivers/spi/spi-rspi.c | 28 +++++++++++++++++++++------- 1 file changed, 21 insertions(+), 7 deletions(-) diff --git a/drivers/spi/spi-rspi.c b/drivers/spi/spi-rspi.c index 06192c9ea813..cbc2387d450c 100644 --- a/drivers/spi/spi-rspi.c +++ b/drivers/spi/spi-rspi.c @@ -179,7 +179,7 @@ struct rspi_data { void __iomem *addr; - u32 max_speed_hz; + u32 speed_hz; struct spi_controller *ctlr; struct platform_device *pdev; wait_queue_head_t wait; @@ -258,8 +258,7 @@ static int rspi_set_config_register(struct rspi_data *rspi, int access_size) rspi_write8(rspi, rspi->sppcr, RSPI_SPPCR); /* Sets transfer bit rate */ - spbr = DIV_ROUND_UP(clk_get_rate(rspi->clk), - 2 * rspi->max_speed_hz) - 1; + spbr = DIV_ROUND_UP(clk_get_rate(rspi->clk), 2 * rspi->speed_hz) - 1; rspi_write8(rspi, clamp(spbr, 0, 255), RSPI_SPBR); /* Disable dummy transmission, set 16-bit word access, 1 frame */ @@ -299,14 +298,14 @@ static int rspi_rz_set_config_register(struct rspi_data *rspi, int access_size) clksrc = clk_get_rate(rspi->clk); while (div < 3) { - if (rspi->max_speed_hz >= clksrc/4) /* 4=(CLK/2)/2 */ + if (rspi->speed_hz >= clksrc/4) /* 4=(CLK/2)/2 */ break; div++; clksrc /= 2; } /* Sets transfer bit rate */ - spbr = DIV_ROUND_UP(clksrc, 2 * rspi->max_speed_hz) - 1; + spbr = DIV_ROUND_UP(clksrc, 2 * rspi->speed_hz) - 1; rspi_write8(rspi, clamp(spbr, 0, 255), RSPI_SPBR); rspi->spcmd |= div << 2; @@ -341,7 +340,7 @@ static int qspi_set_config_register(struct rspi_data *rspi, int access_size) rspi_write8(rspi, rspi->sppcr, RSPI_SPPCR); /* Sets transfer bit rate */ - spbr = DIV_ROUND_UP(clk_get_rate(rspi->clk), 2 * rspi->max_speed_hz); + spbr = DIV_ROUND_UP(clk_get_rate(rspi->clk), 2 * rspi->speed_hz); rspi_write8(rspi, clamp(spbr, 0, 255), RSPI_SPBR); /* Disable dummy transmission, set byte access */ @@ -949,9 +948,24 @@ static int rspi_prepare_message(struct spi_controller *ctlr, { struct rspi_data *rspi = spi_controller_get_devdata(ctlr); struct spi_device *spi = msg->spi; + const struct spi_transfer *xfer; int ret; - rspi->max_speed_hz = spi->max_speed_hz; + /* + * As the Bit Rate Register must not be changed while the device is + * active, all transfers in a message must use the same bit rate. + * In theory, the sequencer could be enabled, and each Command Register + * could divide the base bit rate by a different value. + * However, most RSPI variants do not have Transfer Data Length + * Multiplier Setting Registers, so each sequence step would be limited + * to a single word, making this feature unsuitable for large + * transfers, which would gain most from it. + */ + rspi->speed_hz = spi->max_speed_hz; + list_for_each_entry(xfer, &msg->transfers, transfer_list) { + if (xfer->speed_hz < rspi->speed_hz) + rspi->speed_hz = xfer->speed_hz; + } rspi->spcmd = SPCMD_SSLKP; if (spi->mode & SPI_CPOL) From 3ee06a6d532f75f20528ff4d2c473cda36c484fe Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Mon, 8 Jun 2020 15:22:17 +0200 Subject: [PATCH 010/462] dma-pool: fix too large DMA pools on medium memory size systems On systems with at least 32 MiB, but less than 32 GiB of RAM, the DMA memory pools are much larger than intended (e.g. 2 MiB instead of 128 KiB on a 256 MiB system). Fix this by correcting the calculation of the number of GiBs of RAM in the system. Invert the order of the min/max operations, to keep on calculating in pages until the last step, which aids readability. Fixes: 1d659236fb43c4d2 ("dma-pool: scale the default DMA coherent pool size with memory capacity") Signed-off-by: Geert Uytterhoeven Acked-by: David Rientjes Signed-off-by: Christoph Hellwig --- kernel/dma/pool.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/kernel/dma/pool.c b/kernel/dma/pool.c index 35bb51c31fff..8cfa01243ed2 100644 --- a/kernel/dma/pool.c +++ b/kernel/dma/pool.c @@ -175,10 +175,9 @@ static int __init dma_atomic_pool_init(void) * sizes to 128KB per 1GB of memory, min 128KB, max MAX_ORDER-1. */ if (!atomic_pool_size) { - atomic_pool_size = max(totalram_pages() >> PAGE_SHIFT, 1UL) * - SZ_128K; - atomic_pool_size = min_t(size_t, atomic_pool_size, - 1 << (PAGE_SHIFT + MAX_ORDER-1)); + unsigned long pages = totalram_pages() / (SZ_1G / SZ_128K); + pages = min_t(unsigned long, pages, MAX_ORDER_NR_PAGES); + atomic_pool_size = max_t(size_t, pages << PAGE_SHIFT, SZ_128K); } INIT_WORK(&atomic_pool_work, atomic_pool_work_fn); From 6c3c184fc420a2d8233d0e0f977e7df8051c5edb Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 5 Jun 2020 13:36:38 -0300 Subject: [PATCH 011/462] tools headers API: Update faccessat2 affected files Update the copies of files affected by: c8ffd8bcdd28 ("vfs: add faccessat2 syscall") To address this perf build warning: Warning: Kernel ABI header at 'tools/include/uapi/linux/fcntl.h' differs from latest version at 'include/uapi/linux/fcntl.h' diff -u tools/include/uapi/linux/fcntl.h include/uapi/linux/fcntl.h Warning: Kernel ABI header at 'tools/include/uapi/asm-generic/unistd.h' differs from latest version at 'include/uapi/asm-generic/unistd.h' diff -u tools/include/uapi/asm-generic/unistd.h include/uapi/asm-generic/unistd.h Warning: Kernel ABI header at 'tools/perf/arch/x86/entry/syscalls/syscall_64.tbl' differs from latest version at 'arch/x86/entry/syscalls/syscall_64.tbl' diff -u tools/perf/arch/x86/entry/syscalls/syscall_64.tbl arch/x86/entry/syscalls/syscall_64.tbl Which results in 'perf trace' gaining support for the 'faccessat2' syscall, now one can use: # perf trace -e faccessat2 And have system wide tracing of this syscall. And this also will include it; # perf trace -e faccess* Together with the other variants. How it affects building/usage (on an x86_64 system): $ cp /tmp/build/perf/arch/x86/include/generated/asm/syscalls_64.c /tmp/syscalls_64.c.before $ [root@five ~]# perf trace -e faccessat2 event syntax error: 'faccessat2' \___ parser error Run 'perf list' for a list of valid events Usage: perf trace [] [] or: perf trace [] -- [] or: perf trace record [] [] or: perf trace record [] -- [] -e, --event event/syscall selector. use 'perf list' to list available events [root@five ~]# $ cp arch/x86/entry/syscalls/syscall_64.tbl tools/perf/arch/x86/entry/syscalls/syscall_64.tbl $ git diff diff --git a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl index 37b844f839bc..78847b32e137 100644 --- a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl +++ b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl @@ -359,6 +359,7 @@ 435 common clone3 sys_clone3 437 common openat2 sys_openat2 438 common pidfd_getfd sys_pidfd_getfd +439 common faccessat2 sys_faccessat2 # # x32-specific system call numbers start at 512 to avoid cache impact $ $ make -C tools/perf O=/tmp/build/perf/ install-bin CC /tmp/build/perf/util/syscalltbl.o LD /tmp/build/perf/util/perf-in.o LD /tmp/build/perf/perf-in.o LINK /tmp/build/perf/perf [root@five ~]# perf trace -e faccessat2 ^C[root@five ~]# Cc: Miklos Szeredi Cc: Adrian Hunter Cc: Jiri Olsa Cc: Namhyung Kim Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/asm-generic/unistd.h | 4 +++- tools/include/uapi/linux/fcntl.h | 10 ++++++++++ tools/perf/arch/x86/entry/syscalls/syscall_64.tbl | 1 + 3 files changed, 14 insertions(+), 1 deletion(-) diff --git a/tools/include/uapi/asm-generic/unistd.h b/tools/include/uapi/asm-generic/unistd.h index 3a3201e4618e..f4a01305d9a6 100644 --- a/tools/include/uapi/asm-generic/unistd.h +++ b/tools/include/uapi/asm-generic/unistd.h @@ -855,9 +855,11 @@ __SYSCALL(__NR_clone3, sys_clone3) __SYSCALL(__NR_openat2, sys_openat2) #define __NR_pidfd_getfd 438 __SYSCALL(__NR_pidfd_getfd, sys_pidfd_getfd) +#define __NR_faccessat2 439 +__SYSCALL(__NR_faccessat2, sys_faccessat2) #undef __NR_syscalls -#define __NR_syscalls 439 +#define __NR_syscalls 440 /* * 32 bit systems traditionally used different diff --git a/tools/include/uapi/linux/fcntl.h b/tools/include/uapi/linux/fcntl.h index ca88b7bce553..2f86b2ad6d7e 100644 --- a/tools/include/uapi/linux/fcntl.h +++ b/tools/include/uapi/linux/fcntl.h @@ -84,10 +84,20 @@ #define DN_ATTRIB 0x00000020 /* File changed attibutes */ #define DN_MULTISHOT 0x80000000 /* Don't remove notifier */ +/* + * The constants AT_REMOVEDIR and AT_EACCESS have the same value. AT_EACCESS is + * meaningful only to faccessat, while AT_REMOVEDIR is meaningful only to + * unlinkat. The two functions do completely different things and therefore, + * the flags can be allowed to overlap. For example, passing AT_REMOVEDIR to + * faccessat would be undefined behavior and thus treating it equivalent to + * AT_EACCESS is valid undefined behavior. + */ #define AT_FDCWD -100 /* Special value used to indicate openat should use the current working directory. */ #define AT_SYMLINK_NOFOLLOW 0x100 /* Do not follow symbolic links. */ +#define AT_EACCESS 0x200 /* Test access permitted for + effective IDs, not real IDs. */ #define AT_REMOVEDIR 0x200 /* Remove directory instead of unlinking file. */ #define AT_SYMLINK_FOLLOW 0x400 /* Follow symbolic links. */ diff --git a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl index 37b844f839bc..78847b32e137 100644 --- a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl +++ b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl @@ -359,6 +359,7 @@ 435 common clone3 sys_clone3 437 common openat2 sys_openat2 438 common pidfd_getfd sys_pidfd_getfd +439 common faccessat2 sys_faccessat2 # # x32-specific system call numbers start at 512 to avoid cache impact From 7e579f3a074c9348389ea0cf0491264e1ffbc585 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 8 Jun 2020 11:23:33 -0300 Subject: [PATCH 012/462] tools arch x86 uapi: Synch asm/unistd.h with the kernel sources To pick up the change in: 700d3a5a664d ("x86/syscalls: Revert "x86/syscalls: Make __X32_SYSCALL_BIT be unsigned long"") That doesn't trigger any changes in tooling and silences this perf build warning: Warning: Kernel ABI header at 'tools/arch/x86/include/uapi/asm/unistd.h' differs from latest version at 'arch/x86/include/uapi/asm/unistd.h' diff -u tools/arch/x86/include/uapi/asm/unistd.h arch/x86/include/uapi/asm/unistd.h Cc: Adrian Hunter Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Jiri Olsa Cc: Namhyung Kim Signed-off-by: Arnaldo Carvalho de Melo --- tools/arch/x86/include/uapi/asm/unistd.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/tools/arch/x86/include/uapi/asm/unistd.h b/tools/arch/x86/include/uapi/asm/unistd.h index 30d7d04d72d6..be5e2e747f50 100644 --- a/tools/arch/x86/include/uapi/asm/unistd.h +++ b/tools/arch/x86/include/uapi/asm/unistd.h @@ -2,7 +2,14 @@ #ifndef _UAPI_ASM_X86_UNISTD_H #define _UAPI_ASM_X86_UNISTD_H -/* x32 syscall flag bit */ +/* + * x32 syscall flag bit. Some user programs expect syscall NR macros + * and __X32_SYSCALL_BIT to have type int, even though syscall numbers + * are, for practical purposes, unsigned long. + * + * Fortunately, expressions like (nr & ~__X32_SYSCALL_BIT) do the right + * thing regardless. + */ #define __X32_SYSCALL_BIT 0x40000000 #ifndef __KERNEL__ From 93dc627f48018df6739f09fec9141bce356cf193 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 8 Jun 2020 11:32:48 -0300 Subject: [PATCH 013/462] tools headers uapi: Sync linux/stat.h with the kernel sources To pick the changes from: 80340fe3605c ("statx: add mount_root") fa2fcf4f1df1 ("statx: add mount ID") 581701b7efd6 ("uapi: deprecate STATX_ALL") 712b2698e4c0 ("fs/stat: Define DAX statx attribute") These add some constants that will have to be manually added in a followup cset, at some point this should move to the shell based automated way. This silences this perf build warning: Warning: Kernel ABI header at 'tools/include/uapi/linux/stat.h' differs from latest version at 'include/uapi/linux/stat.h' diff -u tools/include/uapi/linux/stat.h include/uapi/linux/stat.h Cc: Adrian Hunter Cc: Darrick J. Wong Cc: Ira Weiny Cc: Jiri Olsa Cc: Miklos Szeredi Cc: Namhyung Kim Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/linux/stat.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/tools/include/uapi/linux/stat.h b/tools/include/uapi/linux/stat.h index d1192783139a..82cc58fe9368 100644 --- a/tools/include/uapi/linux/stat.h +++ b/tools/include/uapi/linux/stat.h @@ -123,7 +123,10 @@ struct statx { __u32 stx_dev_major; /* ID of device containing file [uncond] */ __u32 stx_dev_minor; /* 0x90 */ - __u64 __spare2[14]; /* Spare space for future expansion */ + __u64 stx_mnt_id; + __u64 __spare2; + /* 0xa0 */ + __u64 __spare3[12]; /* Spare space for future expansion */ /* 0x100 */ }; @@ -148,6 +151,7 @@ struct statx { #define STATX_BLOCKS 0x00000400U /* Want/got stx_blocks */ #define STATX_BASIC_STATS 0x000007ffU /* The stuff in the normal stat struct */ #define STATX_BTIME 0x00000800U /* Want/got stx_btime */ +#define STATX_MNT_ID 0x00001000U /* Got stx_mnt_id */ #define STATX__RESERVED 0x80000000U /* Reserved for future struct statx expansion */ @@ -177,7 +181,9 @@ struct statx { #define STATX_ATTR_NODUMP 0x00000040 /* [I] File is not to be dumped */ #define STATX_ATTR_ENCRYPTED 0x00000800 /* [I] File requires key to decrypt in fs */ #define STATX_ATTR_AUTOMOUNT 0x00001000 /* Dir: Automount trigger */ +#define STATX_ATTR_MOUNT_ROOT 0x00002000 /* Root of a mount */ #define STATX_ATTR_VERITY 0x00100000 /* [I] Verity protected file */ +#define STATX_ATTR_DAX 0x00002000 /* [I] File is DAX */ #endif /* _UAPI_LINUX_STAT_H */ From 5d33cbfedb51f73283f5f995b27e4aecdd614abc Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 8 Jun 2020 11:38:11 -0300 Subject: [PATCH 014/462] perf beauty: Add support to STATX_MNT_ID in the 'statx' syscall 'mask' argument Introduced in: fa2fcf4f1df1 ("statx: add mount ID") Cc: Adrian Hunter Cc: Jiri Olsa Cc: Miklos Szeredi Cc: Namhyung Kim Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/trace/beauty/statx.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/perf/trace/beauty/statx.c b/tools/perf/trace/beauty/statx.c index 811cc0eeb2d5..110f0c609d84 100644 --- a/tools/perf/trace/beauty/statx.c +++ b/tools/perf/trace/beauty/statx.c @@ -65,6 +65,7 @@ size_t syscall_arg__scnprintf_statx_mask(char *bf, size_t size, struct syscall_a P_FLAG(SIZE); P_FLAG(BLOCKS); P_FLAG(BTIME); + P_FLAG(MNT_ID); #undef P_FLAG From d8e1ef67722bba39a229a289844ac135998a4066 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 8 Jun 2020 11:44:32 -0300 Subject: [PATCH 015/462] tools headers UAPI: Sync linux/fscrypt.h with the kernel sources To pick the changes from: e3b1078bedd3 ("fscrypt: add support for IV_INO_LBLK_32 policies") That don't trigger any changes in tooling. This silences this perf build warning: Warning: Kernel ABI header at 'tools/include/uapi/linux/fscrypt.h' differs from latest version at 'include/uapi/linux/fscrypt.h' diff -u tools/include/uapi/linux/fscrypt.h include/uapi/linux/fscrypt.h Cc: Adrian Hunter Cc: Eric Biggers Cc: Jiri Olsa Cc: Namhyung Kim Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/linux/fscrypt.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/include/uapi/linux/fscrypt.h b/tools/include/uapi/linux/fscrypt.h index a10e3cdc2839..7875709ccfeb 100644 --- a/tools/include/uapi/linux/fscrypt.h +++ b/tools/include/uapi/linux/fscrypt.h @@ -19,7 +19,8 @@ #define FSCRYPT_POLICY_FLAGS_PAD_MASK 0x03 #define FSCRYPT_POLICY_FLAG_DIRECT_KEY 0x04 #define FSCRYPT_POLICY_FLAG_IV_INO_LBLK_64 0x08 -#define FSCRYPT_POLICY_FLAGS_VALID 0x0F +#define FSCRYPT_POLICY_FLAG_IV_INO_LBLK_32 0x10 +#define FSCRYPT_POLICY_FLAGS_VALID 0x1F /* Encryption algorithms */ #define FSCRYPT_MODE_AES_256_XTS 1 From 377cb673cf35ec22878975f7eec0373378d52d3b Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 8 Jun 2020 11:53:23 -0300 Subject: [PATCH 016/462] tools headers UAPI: Sync drm/i915_drm.h with the kernel sources To pick the change in: 4ef10fe05ba0 ("drm/i915/perf: add new open param to configure polling of OA buffer") 11ecbdddf2f8 ("drm/i915/perf: introduce global sseu pinning") That don't result in any changes in tooling, just silences this perf build warning: Warning: Kernel ABI header at 'tools/include/uapi/drm/i915_drm.h' differs from latest version at 'include/uapi/drm/i915_drm.h' diff -u tools/include/uapi/drm/i915_drm.h include/uapi/drm/i915_drm.h Cc: Adrian Hunter Cc: Jiri Olsa Cc: Lionel Landwerlin Cc: Namhyung Kim Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/drm/i915_drm.h | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/tools/include/uapi/drm/i915_drm.h b/tools/include/uapi/drm/i915_drm.h index 2813e579b480..14b67cd6b54b 100644 --- a/tools/include/uapi/drm/i915_drm.h +++ b/tools/include/uapi/drm/i915_drm.h @@ -1969,6 +1969,30 @@ enum drm_i915_perf_property_id { */ DRM_I915_PERF_PROP_HOLD_PREEMPTION, + /** + * Specifying this pins all contexts to the specified SSEU power + * configuration for the duration of the recording. + * + * This parameter's value is a pointer to a struct + * drm_i915_gem_context_param_sseu. + * + * This property is available in perf revision 4. + */ + DRM_I915_PERF_PROP_GLOBAL_SSEU, + + /** + * This optional parameter specifies the timer interval in nanoseconds + * at which the i915 driver will check the OA buffer for available data. + * Minimum allowed value is 100 microseconds. A default value is used by + * the driver if this parameter is not specified. Note that larger timer + * values will reduce cpu consumption during OA perf captures. However, + * excessively large values would potentially result in OA buffer + * overwrites as captures reach end of the OA buffer. + * + * This property is available in perf revision 5. + */ + DRM_I915_PERF_PROP_POLL_OA_PERIOD, + DRM_I915_PERF_PROP_MAX /* non-ABI */ }; From dd76c302955de405ea589e36c574f6f10eca99b6 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 8 Jun 2020 12:28:16 -0300 Subject: [PATCH 017/462] tools headers UAPI: Sync kvm.h headers with the kernel sources To pick the changes in: f97f5a56f597 ("x86/kvm/hyper-v: Add support for synthetic debugger interface") 850448f35aaf ("KVM: nVMX: Fix VMX preemption timer migration") 2c4c41325540 ("KVM: x86: Print symbolic names of VMX VM-Exit flags in traces") cc440cdad5b7 ("KVM: nSVM: implement KVM_GET_NESTED_STATE and KVM_SET_NESTED_STATE") f7d31e65368a ("x86/kvm/hyper-v: Explicitly align hcall param for kvm_hyperv_exit") 72de5fa4c161 ("KVM: x86: announce KVM_FEATURE_ASYNC_PF_INT") acd05785e48c ("kvm: add capability for halt polling") 3ecad8c2c1ff ("docs: fix broken references for ReST files that moved around") That do not result in any change in tooling, as the additions are not being used in any table generator. This silences these perf build warning: Warning: Kernel ABI header at 'tools/include/uapi/linux/kvm.h' differs from latest version at 'include/uapi/linux/kvm.h' diff -u tools/include/uapi/linux/kvm.h include/uapi/linux/kvm.h Warning: Kernel ABI header at 'tools/arch/x86/include/uapi/asm/kvm.h' differs from latest version at 'arch/x86/include/uapi/asm/kvm.h' diff -u tools/arch/x86/include/uapi/asm/kvm.h arch/x86/include/uapi/asm/kvm.h Warning: Kernel ABI header at 'tools/arch/x86/include/uapi/asm/vmx.h' differs from latest version at 'arch/x86/include/uapi/asm/vmx.h' diff -u tools/arch/x86/include/uapi/asm/vmx.h arch/x86/include/uapi/asm/vmx.h Cc: Adrian Hunter Cc: David Matlack Cc: Jiri Olsa Cc: Jon Doron Cc: Namhyung Kim Cc: Paolo Bonzini Cc: Peter Shier Cc: Sean Christopherson Cc: Vitaly Kuznetsov Signed-off-by: Arnaldo Carvalho de Melo --- tools/arch/x86/include/uapi/asm/kvm.h | 21 +++++++++++++++++++-- tools/arch/x86/include/uapi/asm/vmx.h | 3 +++ tools/include/uapi/linux/kvm.h | 14 ++++++++++++++ 3 files changed, 36 insertions(+), 2 deletions(-) diff --git a/tools/arch/x86/include/uapi/asm/kvm.h b/tools/arch/x86/include/uapi/asm/kvm.h index 43e24903812c..17c5a038f42d 100644 --- a/tools/arch/x86/include/uapi/asm/kvm.h +++ b/tools/arch/x86/include/uapi/asm/kvm.h @@ -385,33 +385,48 @@ struct kvm_sync_regs { #define KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT (1 << 4) #define KVM_STATE_NESTED_FORMAT_VMX 0 -#define KVM_STATE_NESTED_FORMAT_SVM 1 /* unused */ +#define KVM_STATE_NESTED_FORMAT_SVM 1 #define KVM_STATE_NESTED_GUEST_MODE 0x00000001 #define KVM_STATE_NESTED_RUN_PENDING 0x00000002 #define KVM_STATE_NESTED_EVMCS 0x00000004 #define KVM_STATE_NESTED_MTF_PENDING 0x00000008 +#define KVM_STATE_NESTED_GIF_SET 0x00000100 #define KVM_STATE_NESTED_SMM_GUEST_MODE 0x00000001 #define KVM_STATE_NESTED_SMM_VMXON 0x00000002 #define KVM_STATE_NESTED_VMX_VMCS_SIZE 0x1000 +#define KVM_STATE_NESTED_SVM_VMCB_SIZE 0x1000 + +#define KVM_STATE_VMX_PREEMPTION_TIMER_DEADLINE 0x00000001 + struct kvm_vmx_nested_state_data { __u8 vmcs12[KVM_STATE_NESTED_VMX_VMCS_SIZE]; __u8 shadow_vmcs12[KVM_STATE_NESTED_VMX_VMCS_SIZE]; - __u64 preemption_timer_deadline; }; struct kvm_vmx_nested_state_hdr { + __u32 flags; __u64 vmxon_pa; __u64 vmcs12_pa; + __u64 preemption_timer_deadline; struct { __u16 flags; } smm; }; +struct kvm_svm_nested_state_data { + /* Save area only used if KVM_STATE_NESTED_RUN_PENDING. */ + __u8 vmcb12[KVM_STATE_NESTED_SVM_VMCB_SIZE]; +}; + +struct kvm_svm_nested_state_hdr { + __u64 vmcb_pa; +}; + /* for KVM_CAP_NESTED_STATE */ struct kvm_nested_state { __u16 flags; @@ -420,6 +435,7 @@ struct kvm_nested_state { union { struct kvm_vmx_nested_state_hdr vmx; + struct kvm_svm_nested_state_hdr svm; /* Pad the header to 128 bytes. */ __u8 pad[120]; @@ -432,6 +448,7 @@ struct kvm_nested_state { */ union { struct kvm_vmx_nested_state_data vmx[0]; + struct kvm_svm_nested_state_data svm[0]; } data; }; diff --git a/tools/arch/x86/include/uapi/asm/vmx.h b/tools/arch/x86/include/uapi/asm/vmx.h index e95b72ec19bc..b8ff9e8ac0d5 100644 --- a/tools/arch/x86/include/uapi/asm/vmx.h +++ b/tools/arch/x86/include/uapi/asm/vmx.h @@ -150,6 +150,9 @@ { EXIT_REASON_UMWAIT, "UMWAIT" }, \ { EXIT_REASON_TPAUSE, "TPAUSE" } +#define VMX_EXIT_REASON_FLAGS \ + { VMX_EXIT_REASONS_FAILED_VMENTRY, "FAILED_VMENTRY" } + #define VMX_ABORT_SAVE_GUEST_MSR_FAIL 1 #define VMX_ABORT_LOAD_HOST_PDPTE_FAIL 2 #define VMX_ABORT_LOAD_HOST_MSR_FAIL 4 diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h index fdd632c833b4..4fdf30316582 100644 --- a/tools/include/uapi/linux/kvm.h +++ b/tools/include/uapi/linux/kvm.h @@ -188,10 +188,13 @@ struct kvm_s390_cmma_log { struct kvm_hyperv_exit { #define KVM_EXIT_HYPERV_SYNIC 1 #define KVM_EXIT_HYPERV_HCALL 2 +#define KVM_EXIT_HYPERV_SYNDBG 3 __u32 type; + __u32 pad1; union { struct { __u32 msr; + __u32 pad2; __u64 control; __u64 evt_page; __u64 msg_page; @@ -201,6 +204,15 @@ struct kvm_hyperv_exit { __u64 result; __u64 params[2]; } hcall; + struct { + __u32 msr; + __u32 pad2; + __u64 control; + __u64 status; + __u64 send_page; + __u64 recv_page; + __u64 pending_page; + } syndbg; } u; }; @@ -1017,6 +1029,8 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_S390_VCPU_RESETS 179 #define KVM_CAP_S390_PROTECTED 180 #define KVM_CAP_PPC_SECURE_GUEST 181 +#define KVM_CAP_HALT_POLL 182 +#define KVM_CAP_ASYNC_PF_INT 183 #ifdef KVM_CAP_IRQ_ROUTING From 11b6e5482e178055ec1f2444b55f2518713809d1 Mon Sep 17 00:00:00 2001 From: Gaurav Singh Date: Mon, 8 Jun 2020 13:18:17 -0300 Subject: [PATCH 018/462] perf report: Fix NULL pointer dereference in hists__fprintf_nr_sample_events() The 'evname' variable can be NULL, as it is checked a few lines back, check it before using. Fixes: 9e207ddfa207 ("perf report: Show call graph from reference events") Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Ingo Molnar Cc: Jiri Olsa Cc: Kan Liang Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/ Signed-off-by: Gaurav Singh --- tools/perf/builtin-report.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index b63b3fb2de70..5f1d2a878fad 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -478,8 +478,7 @@ static size_t hists__fprintf_nr_sample_events(struct hists *hists, struct report if (rep->time_str) ret += fprintf(fp, " (time slices: %s)", rep->time_str); - if (symbol_conf.show_ref_callgraph && - strstr(evname, "call-graph=no")) { + if (symbol_conf.show_ref_callgraph && evname && strstr(evname, "call-graph=no")) { ret += fprintf(fp, ", show reference callgraph"); } From c0c652fc705de75f4ba52e93053acc1ed3933e74 Mon Sep 17 00:00:00 2001 From: Hongbo Yao Date: Fri, 5 Jun 2020 17:17:40 +0800 Subject: [PATCH 019/462] perf stat: Fix NULL pointer dereference If config->aggr_map is NULL and config->aggr_get_id is not NULL, the function print_aggr() will still calling arrg_update_shadow(), which can result in accessing the invalid pointer. Fixes: 088519f318be ("perf stat: Move the display functions to stat-display.c") Signed-off-by: Hongbo Yao Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Wei Li Link: https://lore.kernel.org/lkml/20200608163625.GC3073@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/stat-display.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index 3c6976f7574c..57d0706e1330 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -668,7 +668,7 @@ static void print_aggr(struct perf_stat_config *config, int s; bool first; - if (!(config->aggr_map || config->aggr_get_id)) + if (!config->aggr_map || !config->aggr_get_id) return; aggr_update_shadow(config, evlist); @@ -1169,7 +1169,7 @@ static void print_percore(struct perf_stat_config *config, int s; bool first = true; - if (!(config->aggr_map || config->aggr_get_id)) + if (!config->aggr_map || !config->aggr_get_id) return; if (config->percore_show_thread) From 9256c3031eb9beafa3957c61093104c3c75a6148 Mon Sep 17 00:00:00 2001 From: Sumanth Korikkar Date: Tue, 9 Jun 2020 10:10:18 +0200 Subject: [PATCH 020/462] perf probe: Fix user attribute access in kprobes Issue: # perf probe -a 'do_sched_setscheduler pid policy param->sched_priority@user' did not work before. Fix: Make: # perf probe -a 'do_sched_setscheduler pid policy param->sched_priority@user' output equivalent to ftrace: # echo 'p:probe/do_sched_setscheduler _text+517384 pid=%r2:s32 policy=%r3:s32 sched_priority=+u0(%r4):s32' > /sys/kernel/debug/tracing/kprobe_events Other: 1. Right now, __match_glob() does not handle [u]. For now, use *u]. 2. @user attribute was introduced in commit 1e032f7cfa14 ("perf-probe: Add user memory access attribute support") Test: 1. perf probe -a 'do_sched_setscheduler pid policy param->sched_priority@user' 2 ./perf script sched 305669 [000] 1614458.838675: perf_bpf_probe:func: (2904e508) pid=261614 policy=2 sched_priority=1 3. cat /sys/kernel/debug/tracing/trace <...>-309956 [006] .... 1616098.093957: 0: prio: 1 Committer testing: Before: # perf probe -a 'do_sched_setscheduler pid policy param->sched_priority@user' param(type:sched_param) has no member sched_priority@user. Error: Failed to add events. # pahole sched_param struct sched_param { int sched_priority; /* 0 4 */ /* size: 4, cachelines: 1, members: 1 */ /* last cacheline: 4 bytes */ }; # After: # perf probe -a 'do_sched_setscheduler pid policy param->sched_priority@user' Added new event: probe:do_sched_setscheduler (on do_sched_setscheduler with pid policy sched_priority=param->sched_priority) You can now use it in all perf tools, such as: perf record -e probe:do_sched_setscheduler -aR sleep 1 # cat /sys/kernel/debug/tracing/kprobe_events p:probe/do_sched_setscheduler _text+1113792 pid=%di:s32 policy=%si:s32 sched_priority=+u0(%dx):s32 # Fixes: 1e032f7cfa14 ("perf-probe: Add user memory access attribute support") Signed-off-by: Sumanth Korikkar Reviewed-by: Thomas Richter Acked-by: Masami Hiramatsu Tested-by: Arnaldo Carvalho de Melo Cc: Heiko Carstens Cc: Ilya Leoshkevich Cc: Jiri Olsa Cc: Steven Rostedt (VMware) Cc: bpf@vger.kernel.org LPU-Reference: 20200609081019.60234-2-sumanthk@linux.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/probe-event.c | 7 +++++-- tools/perf/util/probe-file.c | 2 +- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index a08f373d3305..df713a5d1e26 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -1575,7 +1575,7 @@ static int parse_perf_probe_arg(char *str, struct perf_probe_arg *arg) } tmp = strchr(str, '@'); - if (tmp && tmp != str && strcmp(tmp + 1, "user")) { /* user attr */ + if (tmp && tmp != str && !strcmp(tmp + 1, "user")) { /* user attr */ if (!user_access_is_supported()) { semantic_error("ftrace does not support user access\n"); return -EINVAL; @@ -1995,7 +1995,10 @@ static int __synthesize_probe_trace_arg_ref(struct probe_trace_arg_ref *ref, if (depth < 0) return depth; } - err = strbuf_addf(buf, "%+ld(", ref->offset); + if (ref->user_access) + err = strbuf_addf(buf, "%s%ld(", "+u", ref->offset); + else + err = strbuf_addf(buf, "%+ld(", ref->offset); return (err < 0) ? err : depth; } diff --git a/tools/perf/util/probe-file.c b/tools/perf/util/probe-file.c index 8c852948513e..064b63a6a3f3 100644 --- a/tools/perf/util/probe-file.c +++ b/tools/perf/util/probe-file.c @@ -1044,7 +1044,7 @@ static struct { DEFINE_TYPE(FTRACE_README_PROBE_TYPE_X, "*type: * x8/16/32/64,*"), DEFINE_TYPE(FTRACE_README_KRETPROBE_OFFSET, "*place (kretprobe): *"), DEFINE_TYPE(FTRACE_README_UPROBE_REF_CTR, "*ref_ctr_offset*"), - DEFINE_TYPE(FTRACE_README_USER_ACCESS, "*[u]*"), + DEFINE_TYPE(FTRACE_README_USER_ACCESS, "*u]*"), DEFINE_TYPE(FTRACE_README_MULTIPROBE_EVENT, "*Create/append/*"), DEFINE_TYPE(FTRACE_README_IMMEDIATE_VALUE, "*\\imm-value,*"), }; From d38c692f160ea437585cb1ad1dc8f81c8898b46e Mon Sep 17 00:00:00 2001 From: Sumanth Korikkar Date: Tue, 9 Jun 2020 10:10:19 +0200 Subject: [PATCH 021/462] perf bpf: Fix bpf prologue generation Issue: bpf_probe_read() is no longer available for architecture which has overlapping address space. Hence bpf prologue generation fails Fix: Use bpf_probe_read_kernel for kernel member access. For user attribute access in kprobes, use bpf_probe_read_user. Other: @user attribute was introduced in commit 1e032f7cfa14 ("perf-probe: Add user memory access attribute support") Test: 1. ulimit -l 128 ; ./perf record -e tests/bpf_sched_setscheduler.c 2. cat tests/bpf_sched_setscheduler.c static void (*bpf_trace_printk)(const char *fmt, int fmt_size, ...) = (void *) 6; static int (*bpf_probe_read_user)(void *dst, __u32 size, const void *unsafe_ptr) = (void *) 112; static int (*bpf_probe_read_kernel)(void *dst, __u32 size, const void *unsafe_ptr) = (void *) 113; SEC("func=do_sched_setscheduler pid policy param->sched_priority@user") int bpf_func__setscheduler(void *ctx, int err, pid_t pid, int policy, int param) { char fmt[] = "prio: %ld"; bpf_trace_printk(fmt, sizeof(fmt), param); return 1; } char _license[] SEC("license") = "GPL"; int _version SEC("version") = LINUX_VERSION_CODE; 3. ./perf script sched 305669 [000] 1614458.838675: perf_bpf_probe:func: (2904e508) pid=261614 policy=2 sched_priority=1 4. cat /sys/kernel/debug/tracing/trace <...>-309956 [006] .... 1616098.093957: 0: prio: 1 Committer testing: I had to add some missing headers in the bpf_sched_setscheduler.c test proggie, then instead of using record+script I used 'perf trace' to drive everything in one go: # cat bpf_sched_setscheduler.c #include #include static void (*bpf_trace_printk)(const char *fmt, int fmt_size, ...) = (void *) 6; static int (*bpf_probe_read_user)(void *dst, __u32 size, const void *unsafe_ptr) = (void *) 112; static int (*bpf_probe_read_kernel)(void *dst, __u32 size, const void *unsafe_ptr) = (void *) 113; SEC("func=do_sched_setscheduler pid policy param->sched_priority@user") int bpf_func__setscheduler(void *ctx, int err, pid_t pid, int policy, int param) { char fmt[] = "prio: %ld"; bpf_trace_printk(fmt, sizeof(fmt), param); return 1; } char _license[] SEC("license") = "GPL"; int _version SEC("version") = LINUX_VERSION_CODE; # # # perf trace -e bpf_sched_setscheduler.c chrt -f 42 sleep 1 0.000 chrt/80125 perf_bpf_probe:func(__probe_ip: -1676607808, policy: 1, sched_priority: 42) # And even with backtraces :-) # perf trace -e bpf_sched_setscheduler.c/max-stack=8/ chrt -f 42 sleep 1 0.000 chrt/79805 perf_bpf_probe:func(__probe_ip: -1676607808, policy: 1, sched_priority: 42) do_sched_setscheduler ([kernel.kallsyms]) __x64_sys_sched_setscheduler ([kernel.kallsyms]) do_syscall_64 ([kernel.kallsyms]) entry_SYSCALL_64 ([kernel.kallsyms]) __GI___sched_setscheduler (/usr/lib64/libc-2.30.so) # Signed-off-by: Sumanth Korikkar Reviewed-by: Thomas Richter Tested-by: Arnaldo Carvalho de Melo Cc: Heiko Carstens Cc: Ilya Leoshkevich Cc: Jiri Olsa Cc: Masami Hiramatsu Cc: bpf@vger.kernel.org LPU-Reference: 20200609081019.60234-3-sumanthk@linux.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/bpf-prologue.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/tools/perf/util/bpf-prologue.c b/tools/perf/util/bpf-prologue.c index b020a8678eb9..9887ae09242d 100644 --- a/tools/perf/util/bpf-prologue.c +++ b/tools/perf/util/bpf-prologue.c @@ -142,7 +142,8 @@ static int gen_read_mem(struct bpf_insn_pos *pos, int src_base_addr_reg, int dst_addr_reg, - long offset) + long offset, + int probeid) { /* mov arg3, src_base_addr_reg */ if (src_base_addr_reg != BPF_REG_ARG3) @@ -159,7 +160,7 @@ gen_read_mem(struct bpf_insn_pos *pos, ins(BPF_MOV64_REG(BPF_REG_ARG1, dst_addr_reg), pos); /* Call probe_read */ - ins(BPF_EMIT_CALL(BPF_FUNC_probe_read), pos); + ins(BPF_EMIT_CALL(probeid), pos); /* * Error processing: if read fail, goto error code, * will be relocated. Target should be the start of @@ -241,7 +242,7 @@ static int gen_prologue_slowpath(struct bpf_insn_pos *pos, struct probe_trace_arg *args, int nargs) { - int err, i; + int err, i, probeid; for (i = 0; i < nargs; i++) { struct probe_trace_arg *arg = &args[i]; @@ -276,11 +277,16 @@ gen_prologue_slowpath(struct bpf_insn_pos *pos, stack_offset), pos); ref = arg->ref; + probeid = BPF_FUNC_probe_read_kernel; while (ref) { pr_debug("prologue: arg %d: offset %ld\n", i, ref->offset); + + if (ref->user_access) + probeid = BPF_FUNC_probe_read_user; + err = gen_read_mem(pos, BPF_REG_3, BPF_REG_7, - ref->offset); + ref->offset, probeid); if (err) { pr_err("prologue: failed to generate probe_read function call\n"); goto errout; From c2412fae3f01725615b0de472095a9e16ed30ca9 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Mon, 8 Jun 2020 22:36:09 -0700 Subject: [PATCH 022/462] perf parse-events: Fix an incompatible pointer Arrays are pointer types and don't need their address taking. Fixes: 8255718f4bed (perf pmu: Expand PMU events by prefix match) Signed-off-by: Ian Rogers Cc: Alexander Shishkin Cc: Andi Kleen Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lore.kernel.org/lkml/20200609053610.206588-1-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/parse-events.y | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/parse-events.y b/tools/perf/util/parse-events.y index c4ca932d092d..d4e076c9c2ab 100644 --- a/tools/perf/util/parse-events.y +++ b/tools/perf/util/parse-events.y @@ -349,7 +349,7 @@ PE_PMU_EVENT_PRE '-' PE_PMU_EVENT_SUF sep_dc struct list_head *list; char pmu_name[128]; - snprintf(&pmu_name, 128, "%s-%s", $1, $3); + snprintf(pmu_name, sizeof(pmu_name), "%s-%s", $1, $3); free($1); free($3); if (parse_events_multi_pmu_add(_parse_state, pmu_name, &list) < 0) From ffaecd7d1f88a955f0b321749de8c0158f973afd Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Mon, 8 Jun 2020 22:36:10 -0700 Subject: [PATCH 023/462] perf parse-events: Fix an old style declaration Fixes: a26e47162d76 (perf tools: Move ALLOC_LIST into a function) Signed-off-by: Ian Rogers Cc: Alexander Shishkin Cc: Andi Kleen Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lore.kernel.org/lkml/20200609053610.206588-2-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/parse-events.y | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/parse-events.y b/tools/perf/util/parse-events.y index d4e076c9c2ab..acef87d9af58 100644 --- a/tools/perf/util/parse-events.y +++ b/tools/perf/util/parse-events.y @@ -26,7 +26,7 @@ do { \ YYABORT; \ } while (0) -static struct list_head* alloc_list() +static struct list_head* alloc_list(void) { struct list_head *list; From a575357b6435c6dd988b8194c59eec83f6e5d5af Mon Sep 17 00:00:00 2001 From: Alexander Stein Date: Tue, 9 Jun 2020 18:55:27 +0200 Subject: [PATCH 024/462] spi: dt-bindings: amlogic, meson-gx-spicc: Fix schema for meson-g12a spi@13000: clock-names: Additional items are not allowed ('pclk' was unexpected) spi@13000: clock-names: ['core', 'pclk'] is too long spi@13000: clocks: [[2, 23], [2, 258]] is too long spi@15000: clock-names: Additional items are not allowed ('pclk' was unexpected) spi@15000: clock-names: ['core', 'pclk'] is too long spi@15000: clocks: [[2, 29], [2, 261]] is too long Conditional schema properties don't overwrite others. Instead of restrictions have to be validated. So general clock amount is 1-2 and depending on the actual device type limit the mount to 1 or 2. Signed-off-by: Alexander Stein Link: https://lore.kernel.org/r/20200609165527.55183-1-alexander.stein@mailbox.org Signed-off-by: Mark Brown --- .../bindings/spi/amlogic,meson-gx-spicc.yaml | 26 ++++++++++++------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/Documentation/devicetree/bindings/spi/amlogic,meson-gx-spicc.yaml b/Documentation/devicetree/bindings/spi/amlogic,meson-gx-spicc.yaml index 9147df29022a..38efb50081e3 100644 --- a/Documentation/devicetree/bindings/spi/amlogic,meson-gx-spicc.yaml +++ b/Documentation/devicetree/bindings/spi/amlogic,meson-gx-spicc.yaml @@ -34,12 +34,15 @@ properties: maxItems: 1 clocks: - maxItems: 1 + minItems: 1 + maxItems: 2 + items: + - description: controller register bus clock + - description: baud rate generator and delay control clock clock-names: - description: input clock for the baud rate generator - items: - - const: core + minItems: 1 + maxItems: 2 if: properties: @@ -51,17 +54,22 @@ if: then: properties: clocks: - contains: - items: - - description: controller register bus clock - - description: baud rate generator and delay control clock + minItems: 2 clock-names: - minItems: 2 items: - const: core - const: pclk +else: + properties: + clocks: + maxItems: 1 + + clock-names: + items: + - const: core + required: - compatible - reg From 85d0f9ad8212d655ce71e9034d95eacce63538f3 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Tue, 9 Jun 2020 16:43:43 -0700 Subject: [PATCH 025/462] perf pmu: Remove unused declaration This avoids multiple declarations if the flex header is included. Signed-off-by: Ian Rogers Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Andi Kleen Cc: Jin Yao Cc: Jiri Olsa Cc: John Garry Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lore.kernel.org/lkml/20200609234344.3795-1-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/pmu.h | 1 - 1 file changed, 1 deletion(-) diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h index 85e0c7f2515c..f971d9aa4570 100644 --- a/tools/perf/util/pmu.h +++ b/tools/perf/util/pmu.h @@ -86,7 +86,6 @@ int perf_pmu__check_alias(struct perf_pmu *pmu, struct list_head *head_terms, struct perf_pmu_info *info); struct list_head *perf_pmu__alias(struct perf_pmu *pmu, struct list_head *head_terms); -int perf_pmu_wrap(void); void perf_pmu_error(struct list_head *list, char *name, char const *msg); int perf_pmu__new_format(struct list_head *list, char *name, From 81921a828b94ce2816932c19a5ec74d302972833 Mon Sep 17 00:00:00 2001 From: Denis Efremov Date: Fri, 5 Jun 2020 20:37:43 +0300 Subject: [PATCH 026/462] drm/amd/display: Use kvfree() to free coeff in build_regamma() Use kvfree() instead of kfree() to free coeff in build_regamma() because the memory is allocated with kvzalloc(). Fixes: e752058b8671 ("drm/amd/display: Optimize gamma calculations") Cc: stable@vger.kernel.org Signed-off-by: Denis Efremov Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/modules/color/color_gamma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/modules/color/color_gamma.c b/drivers/gpu/drm/amd/display/modules/color/color_gamma.c index 9431b48aecb4..56bb1f9f77ce 100644 --- a/drivers/gpu/drm/amd/display/modules/color/color_gamma.c +++ b/drivers/gpu/drm/amd/display/modules/color/color_gamma.c @@ -843,7 +843,7 @@ static bool build_regamma(struct pwl_float_data_ex *rgb_regamma, pow_buffer_ptr = -1; // reset back to no optimize ret = true; release: - kfree(coeff); + kvfree(coeff); return ret; } From 43a562774fceba867e8eebba977d7d42f8a2eac7 Mon Sep 17 00:00:00 2001 From: Denis Efremov Date: Fri, 5 Jun 2020 20:37:44 +0300 Subject: [PATCH 027/462] drm/amd/display: Use kfree() to free rgb_user in calculate_user_regamma_ramp() Use kfree() instead of kvfree() to free rgb_user in calculate_user_regamma_ramp() because the memory is allocated with kcalloc(). Signed-off-by: Denis Efremov Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/modules/color/color_gamma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/modules/color/color_gamma.c b/drivers/gpu/drm/amd/display/modules/color/color_gamma.c index 56bb1f9f77ce..bcfe34ef8c28 100644 --- a/drivers/gpu/drm/amd/display/modules/color/color_gamma.c +++ b/drivers/gpu/drm/amd/display/modules/color/color_gamma.c @@ -1777,7 +1777,7 @@ bool calculate_user_regamma_ramp(struct dc_transfer_func *output_tf, kfree(rgb_regamma); rgb_regamma_alloc_fail: - kvfree(rgb_user); + kfree(rgb_user); rgb_user_alloc_fail: return ret; } From 65de50969a77509452ae590e9449b70a22b923bb Mon Sep 17 00:00:00 2001 From: Tom Rix Date: Wed, 10 Jun 2020 14:57:13 -0700 Subject: [PATCH 028/462] selinux: fix double free Clang's static analysis tool reports these double free memory errors. security/selinux/ss/services.c:2987:4: warning: Attempt to free released memory [unix.Malloc] kfree(bnames[i]); ^~~~~~~~~~~~~~~~ security/selinux/ss/services.c:2990:2: warning: Attempt to free released memory [unix.Malloc] kfree(bvalues); ^~~~~~~~~~~~~~ So improve the security_get_bools error handling by freeing these variables and setting their return pointers to NULL and the return len to 0 Cc: stable@vger.kernel.org Signed-off-by: Tom Rix Acked-by: Stephen Smalley Signed-off-by: Paul Moore --- security/selinux/ss/services.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/security/selinux/ss/services.c b/security/selinux/ss/services.c index 313919bd42f8..ef0afd878bfc 100644 --- a/security/selinux/ss/services.c +++ b/security/selinux/ss/services.c @@ -2888,8 +2888,12 @@ err: if (*names) { for (i = 0; i < *len; i++) kfree((*names)[i]); + kfree(*names); } kfree(*values); + *len = 0; + *names = NULL; + *values = NULL; goto out; } From 4008b29eb47433c15ed3f242ee0132ba27dbdb67 Mon Sep 17 00:00:00 2001 From: Steve Lee Date: Thu, 11 Jun 2020 18:48:00 +0900 Subject: [PATCH 029/462] ASoC: max98390: Update regmap readable reg and volatile Update max98390_readable_register and max98390_volatile_reg Signed-off-by: Steve Lee Link: https://lore.kernel.org/r/20200611094800.18422-1-steves.lee@maximintegrated.com Signed-off-by: Mark Brown --- sound/soc/codecs/max98390.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sound/soc/codecs/max98390.c b/sound/soc/codecs/max98390.c index 0d63ebfbff2f..e6613b52bd78 100644 --- a/sound/soc/codecs/max98390.c +++ b/sound/soc/codecs/max98390.c @@ -700,8 +700,8 @@ static bool max98390_readable_register(struct device *dev, unsigned int reg) case MAX98390_IRQ_CTRL ... MAX98390_WDOG_CTRL: case MAX98390_MEAS_ADC_THERM_WARN_THRESH ... MAX98390_BROWNOUT_INFINITE_HOLD: - case MAX98390_BROWNOUT_LVL_HOLD ... THERMAL_COILTEMP_RD_BACK_BYTE0: - case DSMIG_DEBUZZER_THRESHOLD ... MAX98390_R24FF_REV_ID: + case MAX98390_BROWNOUT_LVL_HOLD ... DSMIG_DEBUZZER_THRESHOLD: + case DSM_VOL_ENA ... MAX98390_R24FF_REV_ID: return true; default: return false; @@ -717,7 +717,7 @@ static bool max98390_volatile_reg(struct device *dev, unsigned int reg) case MAX98390_BROWNOUT_LOWEST_STATUS: case MAX98390_ENV_TRACK_BOOST_VOUT_READ: case DSM_STBASS_HPF_B0_BYTE0 ... DSM_DEBUZZER_ATTACK_TIME_BYTE2: - case THERMAL_RDC_RD_BACK_BYTE1 ... THERMAL_COILTEMP_RD_BACK_BYTE0: + case THERMAL_RDC_RD_BACK_BYTE1 ... DSMIG_DEBUZZER_THRESHOLD: case DSM_THERMAL_GAIN ... DSM_WBDRC_GAIN: return true; default: From 6476b60f32866be49d05e2e0163f337374c55b06 Mon Sep 17 00:00:00 2001 From: Srinivas Kandagatla Date: Thu, 11 Jun 2020 13:41:53 +0100 Subject: [PATCH 030/462] ASoC: q6asm: handle EOS correctly Successful send of EOS command does not indicate that EOS is actually finished, correct event to wait EOS is finished is EOS_RENDERED event. EOS_RENDERED means that the DSP has finished processing all the buffers for that particular session and stream. This patch fixes EOS handling! Fixes: 68fd8480bb7b ("ASoC: qdsp6: q6asm: Add support to audio stream apis") Signed-off-by: Srinivas Kandagatla Link: https://lore.kernel.org/r/20200611124159.20742-3-srinivas.kandagatla@linaro.org Signed-off-by: Mark Brown --- sound/soc/qcom/qdsp6/q6asm.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/sound/soc/qcom/qdsp6/q6asm.c b/sound/soc/qcom/qdsp6/q6asm.c index 0e0e8f7a460a..ae4b2cabdf2d 100644 --- a/sound/soc/qcom/qdsp6/q6asm.c +++ b/sound/soc/qcom/qdsp6/q6asm.c @@ -25,6 +25,7 @@ #define ASM_STREAM_CMD_FLUSH 0x00010BCE #define ASM_SESSION_CMD_PAUSE 0x00010BD3 #define ASM_DATA_CMD_EOS 0x00010BDB +#define ASM_DATA_EVENT_RENDERED_EOS 0x00010C1C #define ASM_NULL_POPP_TOPOLOGY 0x00010C68 #define ASM_STREAM_CMD_FLUSH_READBUFS 0x00010C09 #define ASM_STREAM_CMD_SET_ENCDEC_PARAM 0x00010C10 @@ -622,9 +623,6 @@ static int32_t q6asm_stream_callback(struct apr_device *adev, case ASM_SESSION_CMD_SUSPEND: client_event = ASM_CLIENT_EVENT_CMD_SUSPEND_DONE; break; - case ASM_DATA_CMD_EOS: - client_event = ASM_CLIENT_EVENT_CMD_EOS_DONE; - break; case ASM_STREAM_CMD_FLUSH: client_event = ASM_CLIENT_EVENT_CMD_FLUSH_DONE; break; @@ -727,6 +725,9 @@ static int32_t q6asm_stream_callback(struct apr_device *adev, spin_unlock_irqrestore(&ac->lock, flags); } + break; + case ASM_DATA_EVENT_RENDERED_EOS: + client_event = ASM_CLIENT_EVENT_CMD_EOS_DONE; break; } From bd2077915bfebf6965480753f9dd6a8319d06d14 Mon Sep 17 00:00:00 2001 From: Qing Zhang Date: Thu, 11 Jun 2020 20:56:51 +0800 Subject: [PATCH 031/462] spi: tools: Make default_tx/rx and input_tx static Fix the following sparse warning: ./spidev_test.c:50:9: warning: symbol 'default_tx' was not declared. Should it be static? ./spidev_test.c:59:9: warning: symbol 'default_rx' was not declared. Should it be static? ./spidev_test.c:60:6: warning: symbol 'input_tx' was not declared. Should it be static? Signed-off-by: Qing Zhang Link: https://lore.kernel.org/r/1591880212-13479-1-git-send-email-zhangqing@loongson.cn Signed-off-by: Mark Brown --- tools/spi/spidev_test.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/spi/spidev_test.c b/tools/spi/spidev_test.c index 56ea053f72f2..83844f8b862a 100644 --- a/tools/spi/spidev_test.c +++ b/tools/spi/spidev_test.c @@ -47,7 +47,7 @@ static int transfer_size; static int iterations; static int interval = 5; /* interval in seconds for showing transfer rate */ -uint8_t default_tx[] = { +static uint8_t default_tx[] = { 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x40, 0x00, 0x00, 0x00, 0x00, 0x95, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, @@ -56,8 +56,8 @@ uint8_t default_tx[] = { 0xF0, 0x0D, }; -uint8_t default_rx[ARRAY_SIZE(default_tx)] = {0, }; -char *input_tx; +static uint8_t default_rx[ARRAY_SIZE(default_tx)] = {0, }; +static char *input_tx; static void hex_dump(const void *src, size_t length, size_t line_size, char *prefix) From 7bb64402a092136fb2fda995b0e0304b43c163c5 Mon Sep 17 00:00:00 2001 From: Qing Zhang Date: Thu, 11 Jun 2020 20:56:52 +0800 Subject: [PATCH 032/462] spi: tools: Add macro definitions to fix build errors MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add SPI_TX_OCTAL and SPI_RX_OCTAL to fix the following build errors: CC spidev_test.o spidev_test.c: In function ‘transfer’: spidev_test.c:131:13: error: ‘SPI_TX_OCTAL’ undeclared (first use in this function) if (mode & SPI_TX_OCTAL) ^ spidev_test.c:131:13: note: each undeclared identifier is reported only once for each function it appears in spidev_test.c:137:13: error: ‘SPI_RX_OCTAL’ undeclared (first use in this function) if (mode & SPI_RX_OCTAL) ^ spidev_test.c: In function ‘parse_opts’: spidev_test.c:290:12: error: ‘SPI_TX_OCTAL’ undeclared (first use in this function) mode |= SPI_TX_OCTAL; ^ spidev_test.c:308:12: error: ‘SPI_RX_OCTAL’ undeclared (first use in this function) mode |= SPI_RX_OCTAL; ^ LD spidev_test-in.o ld: cannot find spidev_test.o: No such file or directory Additionally, maybe SPI_CS_WORD and SPI_3WIRE_HIZ will be used in the future, so add them too. Fixes: 896fa735084e ("spi: spidev_test: Add support for Octal mode data transfers") Signed-off-by: Qing Zhang Reviewed-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/1591880212-13479-2-git-send-email-zhangqing@loongson.cn Signed-off-by: Mark Brown --- include/uapi/linux/spi/spidev.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/include/uapi/linux/spi/spidev.h b/include/uapi/linux/spi/spidev.h index ee0f2460bff6..9390615d52f0 100644 --- a/include/uapi/linux/spi/spidev.h +++ b/include/uapi/linux/spi/spidev.h @@ -48,6 +48,10 @@ #define SPI_TX_QUAD 0x200 #define SPI_RX_DUAL 0x400 #define SPI_RX_QUAD 0x800 +#define SPI_CS_WORD 0x1000 +#define SPI_TX_OCTAL 0x2000 +#define SPI_RX_OCTAL 0x4000 +#define SPI_3WIRE_HIZ 0x8000 /*---------------------------------------------------------------------------*/ From 03fe7aaf0c3d40ef7feff2bdc7180c146989586a Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Wed, 10 Jun 2020 17:41:57 +0200 Subject: [PATCH 033/462] spi: spi-fsl-dspi: Free DMA memory with matching function Driver allocates DMA memory with dma_alloc_coherent() but frees it with dma_unmap_single(). This causes DMA warning during system shutdown (with DMA debugging) on Toradex Colibri VF50 module: WARNING: CPU: 0 PID: 1 at ../kernel/dma/debug.c:1036 check_unmap+0x3fc/0xb04 DMA-API: fsl-edma 40098000.dma-controller: device driver frees DMA memory with wrong function [device address=0x0000000087040000] [size=8 bytes] [mapped as coherent] [unmapped as single] Hardware name: Freescale Vybrid VF5xx/VF6xx (Device Tree) (unwind_backtrace) from [<8010bb34>] (show_stack+0x10/0x14) (show_stack) from [<8011ced8>] (__warn+0xf0/0x108) (__warn) from [<8011cf64>] (warn_slowpath_fmt+0x74/0xb8) (warn_slowpath_fmt) from [<8017d170>] (check_unmap+0x3fc/0xb04) (check_unmap) from [<8017d900>] (debug_dma_unmap_page+0x88/0x90) (debug_dma_unmap_page) from [<80601d68>] (dspi_release_dma+0x88/0x110) (dspi_release_dma) from [<80601e4c>] (dspi_shutdown+0x5c/0x80) (dspi_shutdown) from [<805845f8>] (device_shutdown+0x17c/0x220) (device_shutdown) from [<80143ef8>] (kernel_restart+0xc/0x50) (kernel_restart) from [<801441cc>] (__do_sys_reboot+0x18c/0x210) (__do_sys_reboot) from [<80100060>] (ret_fast_syscall+0x0/0x28) DMA-API: Mapped at: dma_alloc_attrs+0xa4/0x130 dspi_probe+0x568/0x7b4 platform_drv_probe+0x6c/0xa4 really_probe+0x208/0x348 driver_probe_device+0x5c/0xb4 Fixes: 90ba37033cb9 ("spi: spi-fsl-dspi: Add DMA support for Vybrid") Signed-off-by: Krzysztof Kozlowski Acked-by: Vladimir Oltean Cc: Link: https://lore.kernel.org/r/1591803717-11218-1-git-send-email-krzk@kernel.org Signed-off-by: Mark Brown --- drivers/spi/spi-fsl-dspi.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/spi/spi-fsl-dspi.c b/drivers/spi/spi-fsl-dspi.c index a35faced0456..58190c94561f 100644 --- a/drivers/spi/spi-fsl-dspi.c +++ b/drivers/spi/spi-fsl-dspi.c @@ -588,14 +588,14 @@ static void dspi_release_dma(struct fsl_dspi *dspi) return; if (dma->chan_tx) { - dma_unmap_single(dma->chan_tx->device->dev, dma->tx_dma_phys, - dma_bufsize, DMA_TO_DEVICE); + dma_free_coherent(dma->chan_tx->device->dev, dma_bufsize, + dma->tx_dma_buf, dma->tx_dma_phys); dma_release_channel(dma->chan_tx); } if (dma->chan_rx) { - dma_unmap_single(dma->chan_rx->device->dev, dma->rx_dma_phys, - dma_bufsize, DMA_FROM_DEVICE); + dma_free_coherent(dma->chan_rx->device->dev, dma_bufsize, + dma->rx_dma_buf, dma->rx_dma_phys); dma_release_channel(dma->chan_rx); } } From 790243d3bf78f9830a3b2ffbca1ed0f528295d48 Mon Sep 17 00:00:00 2001 From: Sandeep Raghuraman Date: Thu, 11 Jun 2020 01:36:26 +0530 Subject: [PATCH 034/462] drm/amdgpu: Replace invalid device ID with a valid device ID Initializes Powertune data for a specific Hawaii card by fixing what looks like a typo in the code. The device ID 66B1 is not a supported device ID for this driver, and is not mentioned elsewhere. 67B1 is a valid device ID, and is a Hawaii Pro GPU. I have tested on my R9 390 which has device ID 67B1, and it works fine without problems. Signed-off-by: Sandeep Raghuraman Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/powerplay/smumgr/ci_smumgr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/ci_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/ci_smumgr.c index 85e5b1ed22c2..56923a96b450 100644 --- a/drivers/gpu/drm/amd/powerplay/smumgr/ci_smumgr.c +++ b/drivers/gpu/drm/amd/powerplay/smumgr/ci_smumgr.c @@ -239,7 +239,7 @@ static void ci_initialize_power_tune_defaults(struct pp_hwmgr *hwmgr) switch (dev_id) { case 0x67BA: - case 0x66B1: + case 0x67B1: smu_data->power_tune_defaults = &defaults_hawaii_pro; break; case 0x67B8: From 9ec051bf4470ee20505c3c1bca9dc441944de4df Mon Sep 17 00:00:00 2001 From: Rodrigo Siqueira Date: Wed, 29 Apr 2020 10:53:02 -0400 Subject: [PATCH 035/462] drm/amd/display: Rework dsc to isolate FPU operations MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When we want to use float point operation on Linux we need to use within special kernel protection (`kernel_fpu_{begin,end}()`.), otherwise the kernel can clobber userspace FPU register state. For detecting these issues we use a tool named objtool (with -Ffa flags) to highlight the FPU problems, all warnings can be summed up as follows: ./tools/objtool/objtool check -Ffa drivers/gpu/drm/amd/display/dc/dml/dml_common_defs.o [..] dc/dsc/rc_calc.o: warning: objtool: get_qp_set()+0x2f8: FPU instruction outside of kernel_fpu_{begin,end}() [..] dc/dsc/rc_calc.o: warning: objtool: dsc_roundf()+0x5: FPU instruction outside of kernel_fpu_{begin,end}() [..] dc/dsc/rc_calc.o: warning: objtool: dsc_ceil()+0x5: FPU instruction outside of kernel_fpu_{begin,end}() [..] dc/dsc/rc_calc.o: warning: objtool: get_ofs_set()+0x3eb: FPU instruction outside of kernel_fpu_{begin,end}() [..] dc/dsc/rc_calc.o: warning: objtool: calc_rc_params()+0x3c: FPU instruction outside of kernel_fpu_{begin,end}() [..] dc/dsc/dc_dsc.o: warning: objtool: get_dsc_bandwidth_range.isra.0()+0x8d: FPU instruction outside of kernel_fpu_{begin,end}() [..] dc/dsc/dc_dsc.o: warning: objtool: setup_dsc_config()+0x2ef: FPU instruction outside of kernel_fpu_{begin,end}() [..] dc/dsc/rc_calc_dpi.o: warning: objtool:copy_pps_fields()+0xbb: FPU instruction outside of kernel_fpu_{begin,end}() [..] dc/dsc/rc_calc_dpi.o: warning: objtool: dscc_compute_dsc_parameters()+0x7b: FPU instruction outside of kernel_fpu_{begin,end}() This commit fixes the above issues by rework DSC as described: 1. Isolate all FPU operations in a single file; 2. Use FPU flags only in the file that handles FPU operations; 3. Isolate all functions that require float point operation in static functions; 4. Add a mid-layer function that does not use any float point operation, and that could be safely invoked in other parts of the code. 5. Keep float point operation under DC_FP_{START/END} macro. CC: Christian König CC: Alexander Deucher CC: Peter Zijlstra CC: Tony Cheng CC: Harry Wentland Signed-off-by: Rodrigo Siqueira Reviewed-by: Mikita Lipski Acked-by: Qingqing Zhuo Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dsc/Makefile | 2 - drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c | 18 +-- drivers/gpu/drm/amd/display/dc/dsc/rc_calc.c | 151 +++++++++++++++++- drivers/gpu/drm/amd/display/dc/dsc/rc_calc.h | 5 +- .../gpu/drm/amd/display/dc/dsc/rc_calc_dpi.c | 27 +--- 5 files changed, 153 insertions(+), 50 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dsc/Makefile b/drivers/gpu/drm/amd/display/dc/dsc/Makefile index 3f66868df171..ea29cf95d470 100644 --- a/drivers/gpu/drm/amd/display/dc/dsc/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dsc/Makefile @@ -28,8 +28,6 @@ endif endif CFLAGS_$(AMDDALPATH)/dc/dsc/rc_calc.o := $(dsc_ccflags) -CFLAGS_$(AMDDALPATH)/dc/dsc/rc_calc_dpi.o := $(dsc_ccflags) -CFLAGS_$(AMDDALPATH)/dc/dsc/dc_dsc.o := $(dsc_ccflags) DSC = dc_dsc.o rc_calc.o rc_calc_dpi.o diff --git a/drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c b/drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c index 0ea6662a1563..0c7f247bb7de 100644 --- a/drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c +++ b/drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c @@ -22,10 +22,12 @@ * Author: AMD */ +#include #include "dc_hw_types.h" #include "dsc.h" #include #include "dc.h" +#include "rc_calc.h" /* This module's internal functions */ @@ -304,22 +306,6 @@ static inline uint32_t dsc_div_by_10_round_up(uint32_t value) return (value + 9) / 10; } -static inline uint32_t calc_dsc_bpp_x16(uint32_t stream_bandwidth_kbps, uint32_t pix_clk_100hz, uint32_t bpp_increment_div) -{ - uint32_t dsc_target_bpp_x16; - float f_dsc_target_bpp; - float f_stream_bandwidth_100bps = stream_bandwidth_kbps * 10.0f; - uint32_t precision = bpp_increment_div; // bpp_increment_div is actually precision - - f_dsc_target_bpp = f_stream_bandwidth_100bps / pix_clk_100hz; - - // Round down to the nearest precision stop to bring it into DSC spec range - dsc_target_bpp_x16 = (uint32_t)(f_dsc_target_bpp * precision); - dsc_target_bpp_x16 = (dsc_target_bpp_x16 * 16) / precision; - - return dsc_target_bpp_x16; -} - /* Get DSC bandwidth range based on [min_bpp, max_bpp] target bitrate range, and timing's pixel clock * and uncompressed bandwidth. */ diff --git a/drivers/gpu/drm/amd/display/dc/dsc/rc_calc.c b/drivers/gpu/drm/amd/display/dc/dsc/rc_calc.c index 03ae15946c6d..667afbc260f9 100644 --- a/drivers/gpu/drm/amd/display/dc/dsc/rc_calc.c +++ b/drivers/gpu/drm/amd/display/dc/dsc/rc_calc.c @@ -23,6 +23,7 @@ * Authors: AMD * */ +#include #include "os_types.h" #include "rc_calc.h" @@ -40,7 +41,8 @@ break -void get_qp_set(qp_set qps, enum colour_mode cm, enum bits_per_comp bpc, enum max_min max_min, float bpp) +static void get_qp_set(qp_set qps, enum colour_mode cm, enum bits_per_comp bpc, + enum max_min max_min, float bpp) { int mode = MODE_SELECT(444, 422, 420); int sel = table_hash(mode, bpc, max_min); @@ -85,7 +87,7 @@ void get_qp_set(qp_set qps, enum colour_mode cm, enum bits_per_comp bpc, enum ma memcpy(qps, table[index].qps, sizeof(qp_set)); } -double dsc_roundf(double num) +static double dsc_roundf(double num) { if (num < 0.0) num = num - 0.5; @@ -95,7 +97,7 @@ double dsc_roundf(double num) return (int)(num); } -double dsc_ceil(double num) +static double dsc_ceil(double num) { double retval = (int)num; @@ -105,7 +107,7 @@ double dsc_ceil(double num) return (int)retval; } -void get_ofs_set(qp_set ofs, enum colour_mode mode, float bpp) +static void get_ofs_set(qp_set ofs, enum colour_mode mode, float bpp) { int *p = ofs; @@ -160,7 +162,7 @@ void get_ofs_set(qp_set ofs, enum colour_mode mode, float bpp) } } -int median3(int a, int b, int c) +static int median3(int a, int b, int c) { if (a > b) swap(a, b); @@ -172,13 +174,25 @@ int median3(int a, int b, int c) return b; } -void calc_rc_params(struct rc_params *rc, enum colour_mode cm, enum bits_per_comp bpc, float bpp, int slice_width, int slice_height, int minor_version) +static void _do_calc_rc_params(struct rc_params *rc, enum colour_mode cm, + enum bits_per_comp bpc, u8 drm_bpp, + bool is_navite_422_or_420, + int slice_width, int slice_height, + int minor_version) { + float bpp; float bpp_group; float initial_xmit_delay_factor; int padding_pixels; int i; + bpp = ((float)drm_bpp / 16.0); + /* in native_422 or native_420 modes, the bits_per_pixel is double the + * target bpp (the latter is what calc_rc_params expects) + */ + if (is_navite_422_or_420) + bpp /= 2.0; + rc->rc_quant_incr_limit0 = ((bpc == BPC_8) ? 11 : (bpc == BPC_10 ? 15 : 19)) - ((minor_version == 1 && cm == CM_444) ? 1 : 0); rc->rc_quant_incr_limit1 = ((bpc == BPC_8) ? 11 : (bpc == BPC_10 ? 15 : 19)) - ((minor_version == 1 && cm == CM_444) ? 1 : 0); @@ -251,3 +265,128 @@ void calc_rc_params(struct rc_params *rc, enum colour_mode cm, enum bits_per_com rc->rc_buf_thresh[13] = 8064; } +static u32 _do_bytes_per_pixel_calc(int slice_width, u8 drm_bpp, + bool is_navite_422_or_420) +{ + float bpp; + u32 bytes_per_pixel; + double d_bytes_per_pixel; + + bpp = ((float)drm_bpp / 16.0); + d_bytes_per_pixel = dsc_ceil(bpp * slice_width / 8.0) / slice_width; + // TODO: Make sure the formula for calculating this is precise (ceiling + // vs. floor, and at what point they should be applied) + if (is_navite_422_or_420) + d_bytes_per_pixel /= 2; + + bytes_per_pixel = (u32)dsc_ceil(d_bytes_per_pixel * 0x10000000); + + return bytes_per_pixel; +} + +static u32 _do_calc_dsc_bpp_x16(u32 stream_bandwidth_kbps, u32 pix_clk_100hz, + u32 bpp_increment_div) +{ + u32 dsc_target_bpp_x16; + float f_dsc_target_bpp; + float f_stream_bandwidth_100bps; + // bpp_increment_div is actually precision + u32 precision = bpp_increment_div; + + f_stream_bandwidth_100bps = stream_bandwidth_kbps * 10.0f; + f_dsc_target_bpp = f_stream_bandwidth_100bps / pix_clk_100hz; + + // Round down to the nearest precision stop to bring it into DSC spec + // range + dsc_target_bpp_x16 = (u32)(f_dsc_target_bpp * precision); + dsc_target_bpp_x16 = (dsc_target_bpp_x16 * 16) / precision; + + return dsc_target_bpp_x16; +} + +/** + * calc_rc_params - reads the user's cmdline mode + * @rc: DC internal DSC parameters + * @pps: DRM struct with all required DSC values + * + * This function expects a drm_dsc_config data struct with all the required DSC + * values previously filled out by our driver and based on this information it + * computes some of the DSC values. + * + * @note This calculation requires float point operation, most of it executes + * under kernel_fpu_{begin,end}. + */ +void calc_rc_params(struct rc_params *rc, const struct drm_dsc_config *pps) +{ + enum colour_mode mode; + enum bits_per_comp bpc; + bool is_navite_422_or_420; + u8 drm_bpp = pps->bits_per_pixel; + int slice_width = pps->slice_width; + int slice_height = pps->slice_height; + + mode = pps->convert_rgb ? CM_RGB : (pps->simple_422 ? CM_444 : + (pps->native_422 ? CM_422 : + pps->native_420 ? CM_420 : CM_444)); + bpc = (pps->bits_per_component == 8) ? BPC_8 : (pps->bits_per_component == 10) + ? BPC_10 : BPC_12; + + is_navite_422_or_420 = pps->native_422 || pps->native_420; + + DC_FP_START(); + _do_calc_rc_params(rc, mode, bpc, drm_bpp, is_navite_422_or_420, + slice_width, slice_height, + pps->dsc_version_minor); + DC_FP_END(); +} + +/** + * calc_dsc_bytes_per_pixel - calculate bytes per pixel + * @pps: DRM struct with all required DSC values + * + * Based on the information inside drm_dsc_config, this function calculates the + * total of bytes per pixel. + * + * @note This calculation requires float point operation, most of it executes + * under kernel_fpu_{begin,end}. + * + * Return: + * Return the number of bytes per pixel + */ +u32 calc_dsc_bytes_per_pixel(const struct drm_dsc_config *pps) + +{ + u32 ret; + u8 drm_bpp = pps->bits_per_pixel; + int slice_width = pps->slice_width; + bool is_navite_422_or_420 = pps->native_422 || pps->native_420; + + DC_FP_START(); + ret = _do_bytes_per_pixel_calc(slice_width, drm_bpp, + is_navite_422_or_420); + DC_FP_END(); + return ret; +} + +/** + * calc_dsc_bpp_x16 - retrieve the dsc bits per pixel + * @stream_bandwidth_kbps: + * @pix_clk_100hz: + * @bpp_increment_div: + * + * Calculate the total of bits per pixel for DSC configuration. + * + * @note This calculation requires float point operation, most of it executes + * under kernel_fpu_{begin,end}. + */ +u32 calc_dsc_bpp_x16(u32 stream_bandwidth_kbps, u32 pix_clk_100hz, + u32 bpp_increment_div) +{ + u32 dsc_bpp; + + DC_FP_START(); + dsc_bpp = _do_calc_dsc_bpp_x16(stream_bandwidth_kbps, pix_clk_100hz, + bpp_increment_div); + DC_FP_END(); + return dsc_bpp; +} diff --git a/drivers/gpu/drm/amd/display/dc/dsc/rc_calc.h b/drivers/gpu/drm/amd/display/dc/dsc/rc_calc.h index b6b1f09c2009..21723fa6561e 100644 --- a/drivers/gpu/drm/amd/display/dc/dsc/rc_calc.h +++ b/drivers/gpu/drm/amd/display/dc/dsc/rc_calc.h @@ -77,7 +77,10 @@ struct qp_entry { typedef struct qp_entry qp_table[]; -void calc_rc_params(struct rc_params *rc, enum colour_mode cm, enum bits_per_comp bpc, float bpp, int slice_width, int slice_height, int minor_version); +void calc_rc_params(struct rc_params *rc, const struct drm_dsc_config *pps); +u32 calc_dsc_bytes_per_pixel(const struct drm_dsc_config *pps); +u32 calc_dsc_bpp_x16(u32 stream_bandwidth_kbps, u32 pix_clk_100hz, + u32 bpp_increment_div); #endif diff --git a/drivers/gpu/drm/amd/display/dc/dsc/rc_calc_dpi.c b/drivers/gpu/drm/amd/display/dc/dsc/rc_calc_dpi.c index 1f6e63b71456..ef830aded5b1 100644 --- a/drivers/gpu/drm/amd/display/dc/dsc/rc_calc_dpi.c +++ b/drivers/gpu/drm/amd/display/dc/dsc/rc_calc_dpi.c @@ -27,8 +27,6 @@ #include "dscc_types.h" #include "rc_calc.h" -double dsc_ceil(double num); - static void copy_pps_fields(struct drm_dsc_config *to, const struct drm_dsc_config *from) { to->line_buf_depth = from->line_buf_depth; @@ -100,34 +98,13 @@ static void copy_rc_to_cfg(struct drm_dsc_config *dsc_cfg, const struct rc_param int dscc_compute_dsc_parameters(const struct drm_dsc_config *pps, struct dsc_parameters *dsc_params) { - enum colour_mode mode = pps->convert_rgb ? CM_RGB : - (pps->simple_422 ? CM_444 : - (pps->native_422 ? CM_422 : - pps->native_420 ? CM_420 : CM_444)); - enum bits_per_comp bpc = (pps->bits_per_component == 8) ? BPC_8 : - (pps->bits_per_component == 10) ? BPC_10 : BPC_12; - float bpp = ((float) pps->bits_per_pixel / 16.0); - int slice_width = pps->slice_width; - int slice_height = pps->slice_height; int ret; struct rc_params rc; struct drm_dsc_config dsc_cfg; - double d_bytes_per_pixel = dsc_ceil(bpp * slice_width / 8.0) / slice_width; + dsc_params->bytes_per_pixel = calc_dsc_bytes_per_pixel(pps); - // TODO: Make sure the formula for calculating this is precise (ceiling vs. floor, and at what point they should be applied) - if (pps->native_422 || pps->native_420) - d_bytes_per_pixel /= 2; - - dsc_params->bytes_per_pixel = (uint32_t)dsc_ceil(d_bytes_per_pixel * 0x10000000); - - /* in native_422 or native_420 modes, the bits_per_pixel is double the target bpp - * (the latter is what calc_rc_params expects) - */ - if (pps->native_422 || pps->native_420) - bpp /= 2.0; - - calc_rc_params(&rc, mode, bpc, bpp, slice_width, slice_height, pps->dsc_version_minor); + calc_rc_params(&rc, pps); dsc_params->pps = *pps; dsc_params->pps.initial_scale_value = 8 * rc.rc_model_size / (rc.rc_model_size - rc.initial_fullness_offset); From adb36a8203831e40494a92095dacd566b2ad4a69 Mon Sep 17 00:00:00 2001 From: Aaron Plattner Date: Thu, 11 Jun 2020 11:08:45 -0700 Subject: [PATCH 036/462] ALSA: hda: Add NVIDIA codec IDs 9a & 9d through a0 to patch table These IDs are for upcoming NVIDIA chips with audio functions that are largely similar to the existing ones. Signed-off-by: Aaron Plattner Cc: Link: https://lore.kernel.org/r/20200611180845.39942-1-aplattner@nvidia.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_hdmi.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/sound/pci/hda/patch_hdmi.c b/sound/pci/hda/patch_hdmi.c index fbd7cc6026d8..e2b21ef5d7d1 100644 --- a/sound/pci/hda/patch_hdmi.c +++ b/sound/pci/hda/patch_hdmi.c @@ -4145,6 +4145,11 @@ HDA_CODEC_ENTRY(0x10de0095, "GPU 95 HDMI/DP", patch_nvhdmi), HDA_CODEC_ENTRY(0x10de0097, "GPU 97 HDMI/DP", patch_nvhdmi), HDA_CODEC_ENTRY(0x10de0098, "GPU 98 HDMI/DP", patch_nvhdmi), HDA_CODEC_ENTRY(0x10de0099, "GPU 99 HDMI/DP", patch_nvhdmi), +HDA_CODEC_ENTRY(0x10de009a, "GPU 9a HDMI/DP", patch_nvhdmi), +HDA_CODEC_ENTRY(0x10de009d, "GPU 9d HDMI/DP", patch_nvhdmi), +HDA_CODEC_ENTRY(0x10de009e, "GPU 9e HDMI/DP", patch_nvhdmi), +HDA_CODEC_ENTRY(0x10de009f, "GPU 9f HDMI/DP", patch_nvhdmi), +HDA_CODEC_ENTRY(0x10de00a0, "GPU a0 HDMI/DP", patch_nvhdmi), HDA_CODEC_ENTRY(0x10de8001, "MCP73 HDMI", patch_nvhdmi_2ch), HDA_CODEC_ENTRY(0x10de8067, "MCP67/68 HDMI", patch_nvhdmi_2ch), HDA_CODEC_ENTRY(0x11069f80, "VX900 HDMI/DP", patch_via_hdmi), From e7585db1b0b5b4e4eb1967bb1472df308f7ffcbf Mon Sep 17 00:00:00 2001 From: Laurence Tratt Date: Fri, 12 Jun 2020 12:18:07 +0100 Subject: [PATCH 037/462] ALSA: usb-audio: Add implicit feedback quirk for SSL2+. This uses the same quirk as the Motu M2 and M4 to ensure the driver uses the audio interface's clock. Tested on an SSL2+. Signed-off-by: Laurence Tratt Cc: Link: https://lore.kernel.org/r/20200612111807.dgnig6rwhmsl2bod@overdrive.tratt.net Signed-off-by: Takashi Iwai --- sound/usb/pcm.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/usb/pcm.c b/sound/usb/pcm.c index 8a05dcb1344f..84c0ae431936 100644 --- a/sound/usb/pcm.c +++ b/sound/usb/pcm.c @@ -367,6 +367,7 @@ static int set_sync_ep_implicit_fb_quirk(struct snd_usb_substream *subs, ifnum = 0; goto add_sync_ep_from_ifnum; case USB_ID(0x07fd, 0x0008): /* MOTU M Series */ + case USB_ID(0x31e9, 0x0002): /* Solid State Logic SSL2+ */ ep = 0x81; ifnum = 2; goto add_sync_ep_from_ifnum; From 6fbea6b6a838f9aa941fe53a3637fd8d8aab1eba Mon Sep 17 00:00:00 2001 From: Shengjiu Wang Date: Fri, 12 Jun 2020 15:37:48 +0800 Subject: [PATCH 038/462] ASoC: soc-card: export snd_soc_lookup_component_nolocked snd_soc_lookup_component_nolocked can be used for the DPCM case that Front-End needs to get the unused platform component but added by Back-End cpu dai driver. If the component is gotten, then we can get the dma chan created by Back-End component and reused it in Front-End. Signed-off-by: Shengjiu Wang Reviewed-by: Nicolin Chen Link: https://lore.kernel.org/r/55f6e0d76f67a517b9a44136d790ff2a06b5caa8.1591947428.git.shengjiu.wang@nxp.com Signed-off-by: Mark Brown --- include/sound/soc.h | 2 ++ sound/soc/soc-core.c | 3 ++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/include/sound/soc.h b/include/sound/soc.h index 74868436ac79..565612a8d690 100644 --- a/include/sound/soc.h +++ b/include/sound/soc.h @@ -444,6 +444,8 @@ int devm_snd_soc_register_component(struct device *dev, const struct snd_soc_component_driver *component_driver, struct snd_soc_dai_driver *dai_drv, int num_dai); void snd_soc_unregister_component(struct device *dev); +struct snd_soc_component *snd_soc_lookup_component_nolocked(struct device *dev, + const char *driver_name); struct snd_soc_component *snd_soc_lookup_component(struct device *dev, const char *driver_name); diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c index 7b387202c5db..0f30f5aabaa8 100644 --- a/sound/soc/soc-core.c +++ b/sound/soc/soc-core.c @@ -310,7 +310,7 @@ struct snd_soc_component *snd_soc_rtdcom_lookup(struct snd_soc_pcm_runtime *rtd, } EXPORT_SYMBOL_GPL(snd_soc_rtdcom_lookup); -static struct snd_soc_component +struct snd_soc_component *snd_soc_lookup_component_nolocked(struct device *dev, const char *driver_name) { struct snd_soc_component *component; @@ -329,6 +329,7 @@ static struct snd_soc_component return found_component; } +EXPORT_SYMBOL_GPL(snd_soc_lookup_component_nolocked); struct snd_soc_component *snd_soc_lookup_component(struct device *dev, const char *driver_name) From a9a21e1eafc94b79502cab8272b392f7f63ef7bb Mon Sep 17 00:00:00 2001 From: Shengjiu Wang Date: Fri, 12 Jun 2020 15:37:49 +0800 Subject: [PATCH 039/462] ASoC: dmaengine_pcm: export soc_component_to_pcm In DPCM case, Front-End needs to get the dma chan which has been requested by Back-End and reuse it. Signed-off-by: Shengjiu Wang Reviewed-by: Nicolin Chen Link: https://lore.kernel.org/r/429c6ae1f3c5b47eb893f475d531d71cdcfe34c0.1591947428.git.shengjiu.wang@nxp.com Signed-off-by: Mark Brown --- include/sound/dmaengine_pcm.h | 11 +++++++++++ sound/soc/soc-generic-dmaengine-pcm.c | 12 ------------ 2 files changed, 11 insertions(+), 12 deletions(-) diff --git a/include/sound/dmaengine_pcm.h b/include/sound/dmaengine_pcm.h index b65220685920..8c5e38180fb0 100644 --- a/include/sound/dmaengine_pcm.h +++ b/include/sound/dmaengine_pcm.h @@ -161,4 +161,15 @@ int snd_dmaengine_pcm_prepare_slave_config(struct snd_pcm_substream *substream, #define SND_DMAENGINE_PCM_DRV_NAME "snd_dmaengine_pcm" +struct dmaengine_pcm { + struct dma_chan *chan[SNDRV_PCM_STREAM_LAST + 1]; + const struct snd_dmaengine_pcm_config *config; + struct snd_soc_component component; + unsigned int flags; +}; + +static inline struct dmaengine_pcm *soc_component_to_pcm(struct snd_soc_component *p) +{ + return container_of(p, struct dmaengine_pcm, component); +} #endif diff --git a/sound/soc/soc-generic-dmaengine-pcm.c b/sound/soc/soc-generic-dmaengine-pcm.c index f728309a0833..80a4e71f2d95 100644 --- a/sound/soc/soc-generic-dmaengine-pcm.c +++ b/sound/soc/soc-generic-dmaengine-pcm.c @@ -21,18 +21,6 @@ */ #define SND_DMAENGINE_PCM_FLAG_NO_RESIDUE BIT(31) -struct dmaengine_pcm { - struct dma_chan *chan[SNDRV_PCM_STREAM_LAST + 1]; - const struct snd_dmaengine_pcm_config *config; - struct snd_soc_component component; - unsigned int flags; -}; - -static struct dmaengine_pcm *soc_component_to_pcm(struct snd_soc_component *p) -{ - return container_of(p, struct dmaengine_pcm, component); -} - static struct device *dmaengine_dma_dev(struct dmaengine_pcm *pcm, struct snd_pcm_substream *substream) { From 706e2c8811585f42612b6cff218ab3adbe63a4ee Mon Sep 17 00:00:00 2001 From: Shengjiu Wang Date: Fri, 12 Jun 2020 15:37:50 +0800 Subject: [PATCH 040/462] ASoC: fsl_asrc_dma: Reuse the dma channel if available in Back-End The dma channel has been requested by Back-End cpu dai driver already. If fsl_asrc_dma requests dma chan with same dma:tx symlink, then there will be below warning with SDMA. [ 48.174236] fsl-esai-dai 2024000.esai: Cannot create DMA dma:tx symlink So if we can reuse the dma channel of Back-End, then the issue can be fixed. In order to get the dma channel which is already requested in Back-End. we use the exported two functions (snd_soc_lookup_component_nolocked and soc_component_to_pcm). If we can get the dma channel, then reuse it, if can't, then request a new one. Signed-off-by: Shengjiu Wang Reviewed-by: Nicolin Chen Link: https://lore.kernel.org/r/3a79f0442cb4930c633cf72145cfe95a45b9c78e.1591947428.git.shengjiu.wang@nxp.com Signed-off-by: Mark Brown --- sound/soc/fsl/fsl_asrc_dma.c | 25 ++++++++++++++++++++----- 1 file changed, 20 insertions(+), 5 deletions(-) diff --git a/sound/soc/fsl/fsl_asrc_dma.c b/sound/soc/fsl/fsl_asrc_dma.c index d6a3fc5f87e5..d88e6343e0a2 100644 --- a/sound/soc/fsl/fsl_asrc_dma.c +++ b/sound/soc/fsl/fsl_asrc_dma.c @@ -135,6 +135,8 @@ static int fsl_asrc_dma_hw_params(struct snd_soc_component *component, struct snd_dmaengine_dai_dma_data *dma_params_be = NULL; struct snd_pcm_runtime *runtime = substream->runtime; struct fsl_asrc_pair *pair = runtime->private_data; + struct dma_chan *tmp_chan = NULL, *be_chan = NULL; + struct snd_soc_component *component_be = NULL; struct fsl_asrc *asrc = pair->asrc; struct dma_slave_config config_fe, config_be; enum asrc_pair_index index = pair->index; @@ -142,7 +144,6 @@ static int fsl_asrc_dma_hw_params(struct snd_soc_component *component, int stream = substream->stream; struct imx_dma_data *tmp_data; struct snd_soc_dpcm *dpcm; - struct dma_chan *tmp_chan; struct device *dev_be; u8 dir = tx ? OUT : IN; dma_cap_mask_t mask; @@ -197,18 +198,30 @@ static int fsl_asrc_dma_hw_params(struct snd_soc_component *component, dma_cap_set(DMA_SLAVE, mask); dma_cap_set(DMA_CYCLIC, mask); + /* + * The Back-End device might have already requested a DMA channel, + * so try to reuse it first, and then request a new one upon NULL. + */ + component_be = snd_soc_lookup_component_nolocked(dev_be, SND_DMAENGINE_PCM_DRV_NAME); + if (component_be) { + be_chan = soc_component_to_pcm(component_be)->chan[substream->stream]; + tmp_chan = be_chan; + } + if (!tmp_chan) + tmp_chan = dma_request_slave_channel(dev_be, tx ? "tx" : "rx"); + /* * An EDMA DEV_TO_DEV channel is fixed and bound with DMA event of each * peripheral, unlike SDMA channel that is allocated dynamically. So no - * need to configure dma_request and dma_request2, but get dma_chan via - * dma_request_slave_channel directly with dma name of Front-End device + * need to configure dma_request and dma_request2, but get dma_chan of + * Back-End device directly via dma_request_slave_channel. */ if (!asrc->use_edma) { /* Get DMA request of Back-End */ - tmp_chan = dma_request_slave_channel(dev_be, tx ? "tx" : "rx"); tmp_data = tmp_chan->private; pair->dma_data.dma_request = tmp_data->dma_request; - dma_release_channel(tmp_chan); + if (!be_chan) + dma_release_channel(tmp_chan); /* Get DMA request of Front-End */ tmp_chan = asrc->get_dma_channel(pair, dir); @@ -221,6 +234,8 @@ static int fsl_asrc_dma_hw_params(struct snd_soc_component *component, pair->dma_chan[dir] = dma_request_channel(mask, filter, &pair->dma_data); } else { + if (!be_chan) + dma_release_channel(tmp_chan); pair->dma_chan[dir] = asrc->get_dma_channel(pair, dir); } From b287a6d9723c601dd947f1c27d4cc0192e384a5a Mon Sep 17 00:00:00 2001 From: Shengjiu Wang Date: Fri, 12 Jun 2020 15:37:51 +0800 Subject: [PATCH 041/462] ASoC: fsl_asrc_dma: Fix data copying speed issue with EDMA With EDMA, there is two dma channels can be used for dev_to_dev, one is from ASRC, one is from another peripheral (ESAI or SAI). If we select the dma channel of ASRC, there is an issue for ideal ratio case, the speed of copy data is faster than sample frequency, because ASRC output data is very fast in ideal ratio mode. So it is reasonable to use the dma channel of Back-End peripheral. then copying speed of DMA is controlled by data consumption speed in the peripheral FIFO, Signed-off-by: Shengjiu Wang Reviewed-by: Nicolin Chen Link: https://lore.kernel.org/r/424ed6c249bafcbe30791c9de0352821c5ea67e2.1591947428.git.shengjiu.wang@nxp.com Signed-off-by: Mark Brown --- sound/soc/fsl/fsl_asrc_common.h | 2 ++ sound/soc/fsl/fsl_asrc_dma.c | 26 +++++++++++++++----------- 2 files changed, 17 insertions(+), 11 deletions(-) diff --git a/sound/soc/fsl/fsl_asrc_common.h b/sound/soc/fsl/fsl_asrc_common.h index 77665b15c8db..7e1c13ca37f1 100644 --- a/sound/soc/fsl/fsl_asrc_common.h +++ b/sound/soc/fsl/fsl_asrc_common.h @@ -32,6 +32,7 @@ enum asrc_pair_index { * @dma_chan: inputer and output DMA channels * @dma_data: private dma data * @pos: hardware pointer position + * @req_dma_chan: flag to release dev_to_dev chan * @private: pair private area */ struct fsl_asrc_pair { @@ -45,6 +46,7 @@ struct fsl_asrc_pair { struct dma_chan *dma_chan[2]; struct imx_dma_data dma_data; unsigned int pos; + bool req_dma_chan; void *private; }; diff --git a/sound/soc/fsl/fsl_asrc_dma.c b/sound/soc/fsl/fsl_asrc_dma.c index d88e6343e0a2..5f01a58f422a 100644 --- a/sound/soc/fsl/fsl_asrc_dma.c +++ b/sound/soc/fsl/fsl_asrc_dma.c @@ -233,11 +233,11 @@ static int fsl_asrc_dma_hw_params(struct snd_soc_component *component, pair->dma_chan[dir] = dma_request_channel(mask, filter, &pair->dma_data); + pair->req_dma_chan = true; } else { - if (!be_chan) - dma_release_channel(tmp_chan); - pair->dma_chan[dir] = - asrc->get_dma_channel(pair, dir); + pair->dma_chan[dir] = tmp_chan; + /* Do not flag to release if we are reusing the Back-End one */ + pair->req_dma_chan = !be_chan; } if (!pair->dma_chan[dir]) { @@ -276,7 +276,8 @@ static int fsl_asrc_dma_hw_params(struct snd_soc_component *component, ret = dmaengine_slave_config(pair->dma_chan[dir], &config_be); if (ret) { dev_err(dev, "failed to config DMA channel for Back-End\n"); - dma_release_channel(pair->dma_chan[dir]); + if (pair->req_dma_chan) + dma_release_channel(pair->dma_chan[dir]); return ret; } @@ -288,19 +289,22 @@ static int fsl_asrc_dma_hw_params(struct snd_soc_component *component, static int fsl_asrc_dma_hw_free(struct snd_soc_component *component, struct snd_pcm_substream *substream) { + bool tx = substream->stream == SNDRV_PCM_STREAM_PLAYBACK; struct snd_pcm_runtime *runtime = substream->runtime; struct fsl_asrc_pair *pair = runtime->private_data; + u8 dir = tx ? OUT : IN; snd_pcm_set_runtime_buffer(substream, NULL); - if (pair->dma_chan[IN]) - dma_release_channel(pair->dma_chan[IN]); + if (pair->dma_chan[!dir]) + dma_release_channel(pair->dma_chan[!dir]); - if (pair->dma_chan[OUT]) - dma_release_channel(pair->dma_chan[OUT]); + /* release dev_to_dev chan if we aren't reusing the Back-End one */ + if (pair->dma_chan[dir] && pair->req_dma_chan) + dma_release_channel(pair->dma_chan[dir]); - pair->dma_chan[IN] = NULL; - pair->dma_chan[OUT] = NULL; + pair->dma_chan[!dir] = NULL; + pair->dma_chan[dir] = NULL; return 0; } From d7442ba13d62de9afc4e57344a676f9f4623dcaa Mon Sep 17 00:00:00 2001 From: Martin Fuzzey Date: Fri, 12 Jun 2020 12:50:33 +0200 Subject: [PATCH 042/462] regulator: da9063: fix LDO9 suspend and warning. Commit 99f75ce66619 ("regulator: da9063: fix suspend") converted the regulators to use a common (corrected) suspend bit setting but one of regulators (LDO9) slipped through the crack. This means that the original problem was not fixed for LDO9 and also leads to a warning found by the test robot. da9063-regulator.c:515:3: warning: initialized field overwritten Fix this by converting that regulator too like the others. Fixes: 99f75ce66619 ("regulator: da9063: fix suspend") Reported-by: kernel test robot Signed-off-by: Martin Fuzzey Link: https://lore.kernel.org/r/1591959073-16792-1-git-send-email-martin.fuzzey@flowbird.group Signed-off-by: Mark Brown --- drivers/regulator/da9063-regulator.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/regulator/da9063-regulator.c b/drivers/regulator/da9063-regulator.c index e1d6c8f6d40b..fe65b5acaf28 100644 --- a/drivers/regulator/da9063-regulator.c +++ b/drivers/regulator/da9063-regulator.c @@ -512,7 +512,6 @@ static const struct da9063_regulator_info da9063_regulator_info[] = { }, { DA9063_LDO(DA9063, LDO9, 950, 50, 3600), - .suspend = BFIELD(DA9063_REG_LDO9_CONT, DA9063_VLDO9_SEL), }, { DA9063_LDO(DA9063, LDO11, 900, 50, 3600), From 92919679d31721381e3c376488477efe49b915f3 Mon Sep 17 00:00:00 2001 From: Matti Vaittinen Date: Fri, 12 Jun 2020 12:23:17 +0300 Subject: [PATCH 043/462] regulator: Fix pickable ranges mapping Pickable ranges mapping function never used range min selector. Thus existing drivers broke when proper linear_ranges functionality was taken in use. Fix this for now just by ignoring the minimum selector. Fixes: 60ab7f4153b6 ("regulator: use linear_ranges helper") Signed-off-by: Matti Vaittinen Link: https://lore.kernel.org/r/20200612090225.GA3243@localhost.localdomain Signed-off-by: Mark Brown --- drivers/regulator/helpers.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/regulator/helpers.c b/drivers/regulator/helpers.c index e970e9d2f8be..e4bb09bbd3fa 100644 --- a/drivers/regulator/helpers.c +++ b/drivers/regulator/helpers.c @@ -486,7 +486,7 @@ int regulator_map_voltage_pickable_linear_range(struct regulator_dev *rdev, continue; } - ret = selector + sel; + ret = selector + sel - range->min_sel; voltage = rdev->desc->ops->list_voltage(rdev, ret); From 6c58f25e6938c073198af8b1e1832f83f8f0df33 Mon Sep 17 00:00:00 2001 From: Nathan Huckleberry Date: Thu, 11 Jun 2020 18:32:35 +0000 Subject: [PATCH 044/462] riscv/atomic: Fix sign extension for RV64I The argument passed to cmpxchg is not guaranteed to be sign extended, but lr.w sign extends on RV64I. This makes cmpxchg fail on clang built kernels when __old is negative. To fix this, we just cast __old to long which sign extends on RV64I. With this fix, clang built RISC-V kernels now boot. Link: https://github.com/ClangBuiltLinux/linux/issues/867 Signed-off-by: Nathan Huckleberry Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/cmpxchg.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/riscv/include/asm/cmpxchg.h b/arch/riscv/include/asm/cmpxchg.h index d969bab4a26b..262e5bbb2776 100644 --- a/arch/riscv/include/asm/cmpxchg.h +++ b/arch/riscv/include/asm/cmpxchg.h @@ -179,7 +179,7 @@ " bnez %1, 0b\n" \ "1:\n" \ : "=&r" (__ret), "=&r" (__rc), "+A" (*__ptr) \ - : "rJ" (__old), "rJ" (__new) \ + : "rJ" ((long)__old), "rJ" (__new) \ : "memory"); \ break; \ case 8: \ @@ -224,7 +224,7 @@ RISCV_ACQUIRE_BARRIER \ "1:\n" \ : "=&r" (__ret), "=&r" (__rc), "+A" (*__ptr) \ - : "rJ" (__old), "rJ" (__new) \ + : "rJ" ((long)__old), "rJ" (__new) \ : "memory"); \ break; \ case 8: \ @@ -270,7 +270,7 @@ " bnez %1, 0b\n" \ "1:\n" \ : "=&r" (__ret), "=&r" (__rc), "+A" (*__ptr) \ - : "rJ" (__old), "rJ" (__new) \ + : "rJ" ((long)__old), "rJ" (__new) \ : "memory"); \ break; \ case 8: \ @@ -316,7 +316,7 @@ " fence rw, rw\n" \ "1:\n" \ : "=&r" (__ret), "=&r" (__rc), "+A" (*__ptr) \ - : "rJ" (__old), "rJ" (__new) \ + : "rJ" ((long)__old), "rJ" (__new) \ : "memory"); \ break; \ case 8: \ From be23e837333a914df3f24bf0b32e87b0331ab8d1 Mon Sep 17 00:00:00 2001 From: Zhiqiang Liu Date: Mon, 15 Jun 2020 00:53:30 +0800 Subject: [PATCH 045/462] bcache: fix potential deadlock problem in btree_gc_coalesce coccicheck reports: drivers/md//bcache/btree.c:1538:1-7: preceding lock on line 1417 In btree_gc_coalesce func, if the coalescing process fails, we will goto to out_nocoalesce tag directly without releasing new_nodes[i]->write_lock. Then, it will cause a deadlock when trying to acquire new_nodes[i]-> write_lock for freeing new_nodes[i] before return. btree_gc_coalesce func details as follows: if alloc new_nodes[i] fails: goto out_nocoalesce; // obtain new_nodes[i]->write_lock mutex_lock(&new_nodes[i]->write_lock) // main coalescing process for (i = nodes - 1; i > 0; --i) [snipped] if coalescing process fails: // Here, directly goto out_nocoalesce // tag will cause a deadlock goto out_nocoalesce; [snipped] // release new_nodes[i]->write_lock mutex_unlock(&new_nodes[i]->write_lock) // coalesing succ, return return; out_nocoalesce: btree_node_free(new_nodes[i]) // free new_nodes[i] // obtain new_nodes[i]->write_lock mutex_lock(&new_nodes[i]->write_lock); // set flag for reuse clear_bit(BTREE_NODE_dirty, &ew_nodes[i]->flags); // release new_nodes[i]->write_lock mutex_unlock(&new_nodes[i]->write_lock); To fix the problem, we add a new tag 'out_unlock_nocoalesce' for releasing new_nodes[i]->write_lock before out_nocoalesce tag. If coalescing process fails, we will go to out_unlock_nocoalesce tag for releasing new_nodes[i]->write_lock before free new_nodes[i] in out_nocoalesce tag. (Coly Li helps to clean up commit log format.) Fixes: 2a285686c109816 ("bcache: btree locking rework") Signed-off-by: Zhiqiang Liu Signed-off-by: Coly Li Signed-off-by: Jens Axboe --- drivers/md/bcache/btree.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c index 39de94edd73a..6548a601edf0 100644 --- a/drivers/md/bcache/btree.c +++ b/drivers/md/bcache/btree.c @@ -1389,7 +1389,7 @@ static int btree_gc_coalesce(struct btree *b, struct btree_op *op, if (__set_blocks(n1, n1->keys + n2->keys, block_bytes(b->c)) > btree_blocks(new_nodes[i])) - goto out_nocoalesce; + goto out_unlock_nocoalesce; keys = n2->keys; /* Take the key of the node we're getting rid of */ @@ -1418,7 +1418,7 @@ static int btree_gc_coalesce(struct btree *b, struct btree_op *op, if (__bch_keylist_realloc(&keylist, bkey_u64s(&new_nodes[i]->key))) - goto out_nocoalesce; + goto out_unlock_nocoalesce; bch_btree_node_write(new_nodes[i], &cl); bch_keylist_add(&keylist, &new_nodes[i]->key); @@ -1464,6 +1464,10 @@ static int btree_gc_coalesce(struct btree *b, struct btree_op *op, /* Invalidated our iterator */ return -EINTR; +out_unlock_nocoalesce: + for (i = 0; i < nodes; i++) + mutex_unlock(&new_nodes[i]->write_lock); + out_nocoalesce: closure_sync(&cl); From dcacbc1242c71e18fa9d2eadc5647e115c9c627d Mon Sep 17 00:00:00 2001 From: Mauricio Faria de Oliveira Date: Mon, 15 Jun 2020 00:53:31 +0800 Subject: [PATCH 046/462] bcache: check and adjust logical block size for backing devices It's possible for a block driver to set logical block size to a value greater than page size incorrectly; e.g. bcache takes the value from the superblock, set by the user w/ make-bcache. This causes a BUG/NULL pointer dereference in the path: __blkdev_get() -> set_init_blocksize() // set i_blkbits based on ... -> bdev_logical_block_size() -> queue_logical_block_size() // ... this value -> bdev_disk_changed() ... -> blkdev_readpage() -> block_read_full_page() -> create_page_buffers() // size = 1 << i_blkbits -> create_empty_buffers() // give size/take pointer -> alloc_page_buffers() // return NULL .. BUG! Because alloc_page_buffers() is called with size > PAGE_SIZE, thus it initializes head = NULL, skips the loop, return head; then create_empty_buffers() gets (and uses) the NULL pointer. This has been around longer than commit ad6bf88a6c19 ("block: fix an integer overflow in logical block size"); however, it increased the range of values that can trigger the issue. Previously only 8k/16k/32k (on x86/4k page size) would do it, as greater values overflow unsigned short to zero, and queue_ logical_block_size() would then use the default of 512. Now the range with unsigned int is much larger, and users w/ the 512k value, which happened to be zero'ed previously and work fine, started to hit this issue -- as the zero is gone, and queue_logical_block_size() does return 512k (>PAGE_SIZE.) Fix this by checking the bcache device's logical block size, and if it's greater than page size, fallback to the backing/ cached device's logical page size. This doesn't affect cache devices as those are still checked for block/page size in read_super(); only the backing/cached devices are not. Apparently it's a regression from commit 2903381fce71 ("bcache: Take data offset from the bdev superblock."), moving the check into BCACHE_SB_VERSION_CDEV only. Now that we have superblocks of backing devices out there with this larger value, we cannot refuse to load them (i.e., have a similar check in _BDEV.) Ideally perhaps bcache should use all values from the backing device (physical/logical/io_min block size)? But for now just fix the problematic case. Test-case: # IMG=/root/disk.img # dd if=/dev/zero of=$IMG bs=1 count=0 seek=1G # DEV=$(losetup --find --show $IMG) # make-bcache --bdev $DEV --block 8k < see dmesg > Before: # uname -r 5.7.0-rc7 [ 55.944046] BUG: kernel NULL pointer dereference, address: 0000000000000000 ... [ 55.949742] CPU: 3 PID: 610 Comm: bcache-register Not tainted 5.7.0-rc7 #4 ... [ 55.952281] RIP: 0010:create_empty_buffers+0x1a/0x100 ... [ 55.966434] Call Trace: [ 55.967021] create_page_buffers+0x48/0x50 [ 55.967834] block_read_full_page+0x49/0x380 [ 55.972181] do_read_cache_page+0x494/0x610 [ 55.974780] read_part_sector+0x2d/0xaa [ 55.975558] read_lba+0x10e/0x1e0 [ 55.977904] efi_partition+0x120/0x5a6 [ 55.980227] blk_add_partitions+0x161/0x390 [ 55.982177] bdev_disk_changed+0x61/0xd0 [ 55.982961] __blkdev_get+0x350/0x490 [ 55.983715] __device_add_disk+0x318/0x480 [ 55.984539] bch_cached_dev_run+0xc5/0x270 [ 55.986010] register_bcache.cold+0x122/0x179 [ 55.987628] kernfs_fop_write+0xbc/0x1a0 [ 55.988416] vfs_write+0xb1/0x1a0 [ 55.989134] ksys_write+0x5a/0xd0 [ 55.989825] do_syscall_64+0x43/0x140 [ 55.990563] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 55.991519] RIP: 0033:0x7f7d60ba3154 ... After: # uname -r 5.7.0.bcachelbspgsz [ 31.672460] bcache: bcache_device_init() bcache0: sb/logical block size (8192) greater than page size (4096) falling back to device logical block size (512) [ 31.675133] bcache: register_bdev() registered backing device loop0 # grep ^ /sys/block/bcache0/queue/*_block_size /sys/block/bcache0/queue/logical_block_size:512 /sys/block/bcache0/queue/physical_block_size:8192 Reported-by: Ryan Finnie Reported-by: Sebastian Marsching Signed-off-by: Mauricio Faria de Oliveira Signed-off-by: Coly Li Signed-off-by: Jens Axboe --- drivers/md/bcache/super.c | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index f9975c22bf7e..904ea9bbb5c4 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -819,7 +819,8 @@ static void bcache_device_free(struct bcache_device *d) } static int bcache_device_init(struct bcache_device *d, unsigned int block_size, - sector_t sectors, make_request_fn make_request_fn) + sector_t sectors, make_request_fn make_request_fn, + struct block_device *cached_bdev) { struct request_queue *q; const size_t max_stripes = min_t(size_t, INT_MAX, @@ -885,6 +886,21 @@ static int bcache_device_init(struct bcache_device *d, unsigned int block_size, q->limits.io_min = block_size; q->limits.logical_block_size = block_size; q->limits.physical_block_size = block_size; + + if (q->limits.logical_block_size > PAGE_SIZE && cached_bdev) { + /* + * This should only happen with BCACHE_SB_VERSION_BDEV. + * Block/page size is checked for BCACHE_SB_VERSION_CDEV. + */ + pr_info("%s: sb/logical block size (%u) greater than page size " + "(%lu) falling back to device logical block size (%u)", + d->disk->disk_name, q->limits.logical_block_size, + PAGE_SIZE, bdev_logical_block_size(cached_bdev)); + + /* This also adjusts physical block size/min io size if needed */ + blk_queue_logical_block_size(q, bdev_logical_block_size(cached_bdev)); + } + blk_queue_flag_set(QUEUE_FLAG_NONROT, d->disk->queue); blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, d->disk->queue); blk_queue_flag_set(QUEUE_FLAG_DISCARD, d->disk->queue); @@ -1340,7 +1356,7 @@ static int cached_dev_init(struct cached_dev *dc, unsigned int block_size) ret = bcache_device_init(&dc->disk, block_size, dc->bdev->bd_part->nr_sects - dc->sb.data_offset, - cached_dev_make_request); + cached_dev_make_request, dc->bdev); if (ret) return ret; @@ -1453,7 +1469,7 @@ static int flash_dev_run(struct cache_set *c, struct uuid_entry *u) kobject_init(&d->kobj, &bch_flash_dev_ktype); if (bcache_device_init(d, block_bytes(c), u->sectors, - flash_dev_make_request)) + flash_dev_make_request, NULL)) goto err; bcache_device_attach(d, c, u - c->uuids); From ee4a36f414617aaae01f93133e828363e7e9dd17 Mon Sep 17 00:00:00 2001 From: Coly Li Date: Mon, 15 Jun 2020 00:53:32 +0800 Subject: [PATCH 047/462] bcache: use delayed kworker fo asynchronous devices registration This patch changes the asynchronous registration kworker to a delayed kworker. There is probability queue_work() queues the async registration kworker to the same CPU (even though very little), then the process which writing sysfs interface to reigster bcache device may won't return immeidately. queue_delayed_work() in this patch will delay 10 jiffies before insert the kworker to run queue, which makes sure the registering process may always returns to user space in time. Fixes: 9e23ccf8f0a22 ("bcache: asynchronous devices registration") Signed-off-by: Coly Li Cc: Hannes Reinecke Signed-off-by: Jens Axboe --- drivers/md/bcache/super.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index 904ea9bbb5c4..a6e79083010a 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -2380,7 +2381,7 @@ static bool bch_is_open(struct block_device *bdev) } struct async_reg_args { - struct work_struct reg_work; + struct delayed_work reg_work; char *path; struct cache_sb *sb; struct cache_sb_disk *sb_disk; @@ -2391,7 +2392,7 @@ static void register_bdev_worker(struct work_struct *work) { int fail = false; struct async_reg_args *args = - container_of(work, struct async_reg_args, reg_work); + container_of(work, struct async_reg_args, reg_work.work); struct cached_dev *dc; dc = kzalloc(sizeof(*dc), GFP_KERNEL); @@ -2421,7 +2422,7 @@ static void register_cache_worker(struct work_struct *work) { int fail = false; struct async_reg_args *args = - container_of(work, struct async_reg_args, reg_work); + container_of(work, struct async_reg_args, reg_work.work); struct cache *ca; ca = kzalloc(sizeof(*ca), GFP_KERNEL); @@ -2449,11 +2450,12 @@ out: static void register_device_aync(struct async_reg_args *args) { if (SB_IS_BDEV(args->sb)) - INIT_WORK(&args->reg_work, register_bdev_worker); + INIT_DELAYED_WORK(&args->reg_work, register_bdev_worker); else - INIT_WORK(&args->reg_work, register_cache_worker); + INIT_DELAYED_WORK(&args->reg_work, register_cache_worker); - queue_work(system_wq, &args->reg_work); + /* 10 jiffies is enough for a delay */ + queue_delayed_work(system_wq, &args->reg_work, 10); } static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr, From 4b25bbf52a55b58f03473333eafe61c0d46125aa Mon Sep 17 00:00:00 2001 From: Coly Li Date: Mon, 15 Jun 2020 00:53:33 +0800 Subject: [PATCH 048/462] bcache: pr_info() format clean up in bcache_device_init() scripts/checkpatch.pl reports following warning for patch ("bcache: check and adjust logical block size for backing devices"), WARNING: quoted string split across lines #146: FILE: drivers/md/bcache/super.c:896: + pr_info("%s: sb/logical block size (%u) greater than page size " + "(%lu) falling back to device logical block size (%u)", There are two things to fix up, - The kernel message print should be in a single line. - pr_info() won't automatically add new line since v5.8, a '\n' should be added. This patch just does the above cleanup in bcache_device_init(). Signed-off-by: Coly Li Signed-off-by: Jens Axboe --- drivers/md/bcache/super.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index a6e79083010a..2014016f9a60 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -893,8 +893,7 @@ static int bcache_device_init(struct bcache_device *d, unsigned int block_size, * This should only happen with BCACHE_SB_VERSION_BDEV. * Block/page size is checked for BCACHE_SB_VERSION_CDEV. */ - pr_info("%s: sb/logical block size (%u) greater than page size " - "(%lu) falling back to device logical block size (%u)", + pr_info("%s: sb/logical block size (%u) greater than page size (%lu) falling back to device logical block size (%u)\n", d->disk->disk_name, q->limits.logical_block_size, PAGE_SIZE, bdev_logical_block_size(cached_bdev)); From 7b16994437c7359832dd51d66c5c387995a91438 Mon Sep 17 00:00:00 2001 From: Arvind Sankar Date: Thu, 11 Jun 2020 18:03:39 -0400 Subject: [PATCH 049/462] Makefile: Improve compressed debug info support detection Commit 10e68b02c861 ("Makefile: support compressed debug info") added support for compressed debug sections. Support is detected by checking - does the compiler support -gz=zlib - does the assembler support --compressed-debug-sections=zlib - does the linker support --compressed-debug-sections=zlib However, the gcc driver's support for this option is somewhat convoluted. The driver's builtin specs are set based on the version of binutils that it was configured with. It reports an error if the configure-time linker/assembler (i.e., not necessarily the actual assembler that will be run) do not support the option, but only if the assembler (or linker) is actually invoked when -gz=zlib is passed. The cc-option check in scripts/Kconfig.include does not invoke the assembler, so the gcc driver reports success even if it does not support the option being passed to the assembler. Because the as-option check passes the option directly to the assembler via -Wa,--compressed-debug-sections=zlib, the gcc driver does not see this option and will never report an error. Combined with an installed version of binutils that is more recent than the one the compiler was built with, it is possible for all three tests to succeed, yet an actual compilation with -gz=zlib to fail. Moreover, it is unnecessary to explicitly pass --compressed-debug-sections=zlib to the assembler via -Wa, since the driver will do that automatically when it supports -gz=zlib. Convert the as-option to just -gz=zlib, simplifying it as well as performing a better test of the gcc driver's capabilities. Reported-by: kernel test robot Signed-off-by: Arvind Sankar Reviewed-by: Nick Desaulniers Signed-off-by: Masahiro Yamada --- Makefile | 2 +- lib/Kconfig.debug | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index ae5d8220f431..29abe44ada91 100644 --- a/Makefile +++ b/Makefile @@ -828,7 +828,7 @@ endif ifdef CONFIG_DEBUG_INFO_COMPRESSED DEBUG_CFLAGS += -gz=zlib -KBUILD_AFLAGS += -Wa,--compress-debug-sections=zlib +KBUILD_AFLAGS += -gz=zlib KBUILD_LDFLAGS += --compress-debug-sections=zlib endif diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index d74ac0fd6b2d..96999d4d2dda 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -229,7 +229,7 @@ config DEBUG_INFO_COMPRESSED bool "Compressed debugging information" depends on DEBUG_INFO depends on $(cc-option,-gz=zlib) - depends on $(as-option,-Wa$(comma)--compress-debug-sections=zlib) + depends on $(as-option,-gz=zlib) depends on $(ld-option,--compress-debug-sections=zlib) help Compress the debug information using zlib. Requires GCC 5.0+ or Clang From 7d4e3919592593594296944ae7da1a207706e8c5 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Wed, 10 Jun 2020 09:14:35 -0700 Subject: [PATCH 050/462] esp, ah: consolidate the crypto algorithm selections Instead of duplicating the algorithm selections between INET_AH and INET6_AH and between INET_ESP and INET6_ESP, create new tristates XFRM_AH and XFRM_ESP that do the algorithm selections, and make these be selected by the corresponding INET* options. Suggested-by: Herbert Xu Acked-by: Herbert Xu Cc: Corentin Labbe Cc: Greg Kroah-Hartman Cc: Steffen Klassert Signed-off-by: Eric Biggers Signed-off-by: Steffen Klassert --- net/ipv4/Kconfig | 16 ++-------------- net/ipv6/Kconfig | 16 ++-------------- net/xfrm/Kconfig | 20 ++++++++++++++++++++ 3 files changed, 24 insertions(+), 28 deletions(-) diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index 23ba5045e3d3..39a7a21744dc 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig @@ -340,11 +340,7 @@ config NET_FOU_IP_TUNNELS config INET_AH tristate "IP: AH transformation" - select XFRM_ALGO - select CRYPTO - select CRYPTO_HMAC - select CRYPTO_MD5 - select CRYPTO_SHA1 + select XFRM_AH ---help--- Support for IPsec AH. @@ -352,15 +348,7 @@ config INET_AH config INET_ESP tristate "IP: ESP transformation" - select XFRM_ALGO - select CRYPTO - select CRYPTO_AUTHENC - select CRYPTO_HMAC - select CRYPTO_MD5 - select CRYPTO_CBC - select CRYPTO_SHA1 - select CRYPTO_DES - select CRYPTO_ECHAINIV + select XFRM_ESP ---help--- Support for IPsec ESP. diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig index 4f03aece2980..70313f16319d 100644 --- a/net/ipv6/Kconfig +++ b/net/ipv6/Kconfig @@ -49,11 +49,7 @@ config IPV6_OPTIMISTIC_DAD config INET6_AH tristate "IPv6: AH transformation" - select XFRM_ALGO - select CRYPTO - select CRYPTO_HMAC - select CRYPTO_MD5 - select CRYPTO_SHA1 + select XFRM_AH ---help--- Support for IPsec AH. @@ -61,15 +57,7 @@ config INET6_AH config INET6_ESP tristate "IPv6: ESP transformation" - select XFRM_ALGO - select CRYPTO - select CRYPTO_AUTHENC - select CRYPTO_HMAC - select CRYPTO_MD5 - select CRYPTO_CBC - select CRYPTO_SHA1 - select CRYPTO_DES - select CRYPTO_ECHAINIV + select XFRM_ESP ---help--- Support for IPsec ESP. diff --git a/net/xfrm/Kconfig b/net/xfrm/Kconfig index b7fd9c838416..169c22140709 100644 --- a/net/xfrm/Kconfig +++ b/net/xfrm/Kconfig @@ -67,6 +67,26 @@ config XFRM_STATISTICS If unsure, say N. +config XFRM_AH + tristate + select XFRM_ALGO + select CRYPTO + select CRYPTO_HMAC + select CRYPTO_MD5 + select CRYPTO_SHA1 + +config XFRM_ESP + tristate + select XFRM_ALGO + select CRYPTO + select CRYPTO_AUTHENC + select CRYPTO_HMAC + select CRYPTO_MD5 + select CRYPTO_CBC + select CRYPTO_SHA1 + select CRYPTO_DES + select CRYPTO_ECHAINIV + config XFRM_IPCOMP tristate select XFRM_ALGO From 37ea0f18fb19e0646c166037043232915cd9e995 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Wed, 10 Jun 2020 09:14:36 -0700 Subject: [PATCH 051/462] esp: select CRYPTO_SEQIV Commit f23efcbcc523 ("crypto: ctr - no longer needs CRYPTO_SEQIV") made CRYPTO_CTR stop selecting CRYPTO_SEQIV. This breaks IPsec for most users since GCM and several other encryption algorithms require "seqiv" -- and RFC 8221 lists AES-GCM as "MUST" be implemented. Just make XFRM_ESP select CRYPTO_SEQIV. Fixes: f23efcbcc523 ("crypto: ctr - no longer needs CRYPTO_SEQIV") Acked-by: Herbert Xu Cc: Corentin Labbe Cc: Greg Kroah-Hartman Cc: Steffen Klassert Signed-off-by: Eric Biggers Signed-off-by: Steffen Klassert --- net/xfrm/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/net/xfrm/Kconfig b/net/xfrm/Kconfig index 169c22140709..b2ff8df2c836 100644 --- a/net/xfrm/Kconfig +++ b/net/xfrm/Kconfig @@ -86,6 +86,7 @@ config XFRM_ESP select CRYPTO_SHA1 select CRYPTO_DES select CRYPTO_ECHAINIV + select CRYPTO_SEQIV config XFRM_IPCOMP tristate From be01369859b8aa07346e497381bb46d377da0d8c Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Wed, 10 Jun 2020 09:14:37 -0700 Subject: [PATCH 052/462] esp, ah: modernize the crypto algorithm selections The crypto algorithms selected by the ESP and AH kconfig options are out-of-date with the guidance of RFC 8221, which lists the legacy algorithms MD5 and DES as "MUST NOT" be implemented, and some more modern algorithms like AES-GCM and HMAC-SHA256 as "MUST" be implemented. But the options select the legacy algorithms, not the modern ones. Therefore, modify these options to select the MUST algorithms -- and *only* the MUST algorithms. Also improve the help text. Note that other algorithms may still be explicitly enabled in the kconfig, and the choice of which to actually use is still controlled by userspace. This change only modifies the list of algorithms for which kernel support is guaranteed to be present. Suggested-by: Herbert Xu Suggested-by: Steffen Klassert Acked-by: Herbert Xu Cc: Corentin Labbe Cc: Greg Kroah-Hartman Signed-off-by: Eric Biggers Signed-off-by: Steffen Klassert --- net/ipv4/Kconfig | 18 ++++++++++++++++-- net/ipv6/Kconfig | 18 ++++++++++++++++-- net/xfrm/Kconfig | 15 +++++++++------ 3 files changed, 41 insertions(+), 10 deletions(-) diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index 39a7a21744dc..dc9dfaef77e5 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig @@ -342,7 +342,14 @@ config INET_AH tristate "IP: AH transformation" select XFRM_AH ---help--- - Support for IPsec AH. + Support for IPsec AH (Authentication Header). + + AH can be used with various authentication algorithms. Besides + enabling AH support itself, this option enables the generic + implementations of the algorithms that RFC 8221 lists as MUST be + implemented. If you need any other algorithms, you'll need to enable + them in the crypto API. You should also enable accelerated + implementations of any needed algorithms when available. If unsure, say Y. @@ -350,7 +357,14 @@ config INET_ESP tristate "IP: ESP transformation" select XFRM_ESP ---help--- - Support for IPsec ESP. + Support for IPsec ESP (Encapsulating Security Payload). + + ESP can be used with various encryption and authentication algorithms. + Besides enabling ESP support itself, this option enables the generic + implementations of the algorithms that RFC 8221 lists as MUST be + implemented. If you need any other algorithms, you'll need to enable + them in the crypto API. You should also enable accelerated + implementations of any needed algorithms when available. If unsure, say Y. diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig index 70313f16319d..414a68b16869 100644 --- a/net/ipv6/Kconfig +++ b/net/ipv6/Kconfig @@ -51,7 +51,14 @@ config INET6_AH tristate "IPv6: AH transformation" select XFRM_AH ---help--- - Support for IPsec AH. + Support for IPsec AH (Authentication Header). + + AH can be used with various authentication algorithms. Besides + enabling AH support itself, this option enables the generic + implementations of the algorithms that RFC 8221 lists as MUST be + implemented. If you need any other algorithms, you'll need to enable + them in the crypto API. You should also enable accelerated + implementations of any needed algorithms when available. If unsure, say Y. @@ -59,7 +66,14 @@ config INET6_ESP tristate "IPv6: ESP transformation" select XFRM_ESP ---help--- - Support for IPsec ESP. + Support for IPsec ESP (Encapsulating Security Payload). + + ESP can be used with various encryption and authentication algorithms. + Besides enabling ESP support itself, this option enables the generic + implementations of the algorithms that RFC 8221 lists as MUST be + implemented. If you need any other algorithms, you'll need to enable + them in the crypto API. You should also enable accelerated + implementations of any needed algorithms when available. If unsure, say Y. diff --git a/net/xfrm/Kconfig b/net/xfrm/Kconfig index b2ff8df2c836..e77ba529229c 100644 --- a/net/xfrm/Kconfig +++ b/net/xfrm/Kconfig @@ -67,26 +67,29 @@ config XFRM_STATISTICS If unsure, say N. +# This option selects XFRM_ALGO along with the AH authentication algorithms that +# RFC 8221 lists as MUST be implemented. config XFRM_AH tristate select XFRM_ALGO select CRYPTO select CRYPTO_HMAC - select CRYPTO_MD5 - select CRYPTO_SHA1 + select CRYPTO_SHA256 +# This option selects XFRM_ALGO along with the ESP encryption and authentication +# algorithms that RFC 8221 lists as MUST be implemented. config XFRM_ESP tristate select XFRM_ALGO select CRYPTO + select CRYPTO_AES select CRYPTO_AUTHENC - select CRYPTO_HMAC - select CRYPTO_MD5 select CRYPTO_CBC - select CRYPTO_SHA1 - select CRYPTO_DES select CRYPTO_ECHAINIV + select CRYPTO_GCM + select CRYPTO_HMAC select CRYPTO_SEQIV + select CRYPTO_SHA256 config XFRM_IPCOMP tristate From dbed452a078d56bc7f1abecc3edd6a75e8e4484e Mon Sep 17 00:00:00 2001 From: David Rientjes Date: Thu, 11 Jun 2020 00:25:57 -0700 Subject: [PATCH 053/462] dma-pool: decouple DMA_REMAP from DMA_COHERENT_POOL DMA_REMAP is an unnecessary requirement for AMD SEV, which requires DMA_COHERENT_POOL, so avoid selecting it when it is otherwise unnecessary. The only other requirement for DMA coherent pools is DMA_DIRECT_REMAP, so ensure that properly selects the config option when needed. Fixes: 82fef0ad811f ("x86/mm: unencrypted non-blocking DMA allocations use coherent pools") Reported-by: Alex Xu (Hello71) Signed-off-by: David Rientjes Tested-by: Alex Xu (Hello71) Signed-off-by: Christoph Hellwig --- kernel/dma/Kconfig | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/kernel/dma/Kconfig b/kernel/dma/Kconfig index d006668c0027..a0ce3c1494fd 100644 --- a/kernel/dma/Kconfig +++ b/kernel/dma/Kconfig @@ -73,18 +73,18 @@ config SWIOTLB config DMA_NONCOHERENT_MMAP bool +config DMA_COHERENT_POOL + bool + config DMA_REMAP + bool depends on MMU select GENERIC_ALLOCATOR select DMA_NONCOHERENT_MMAP - bool - -config DMA_COHERENT_POOL - bool - select DMA_REMAP config DMA_DIRECT_REMAP bool + select DMA_REMAP select DMA_COHERENT_POOL config DMA_CMA From c9808bbfed3cfc911ecb60fe8e80c0c27876c657 Mon Sep 17 00:00:00 2001 From: "Yick W. Tse" Date: Sat, 13 Jun 2020 11:40:06 +0000 Subject: [PATCH 054/462] ALSA: usb-audio: add quirk for Denon DCD-1500RE fix error "clock source 41 is not valid, cannot use" [] New USB device found, idVendor=154e, idProduct=1002, bcdDevice= 1.00 [] New USB device strings: Mfr=1, Product=2, SerialNumber=0 [] Product: DCD-1500RE [] Manufacturer: D & M Holdings Inc. [] [] clock source 41 is not valid, cannot use [] usbcore: registered new interface driver snd-usb-audio Signed-off-by: Yick W. Tse Cc: Link: https://lore.kernel.org/r/1373857985.210365.1592048406997@mail.yahoo.com Signed-off-by: Takashi Iwai --- sound/usb/quirks.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c index bca0179a0ef8..c495e720e2f1 100644 --- a/sound/usb/quirks.c +++ b/sound/usb/quirks.c @@ -1532,6 +1532,7 @@ bool snd_usb_get_sample_rate_quirk(struct snd_usb_audio *chip) static bool is_itf_usb_dsd_dac(unsigned int id) { switch (id) { + case USB_ID(0x154e, 0x1002): /* Denon DCD-1500RE */ case USB_ID(0x154e, 0x1003): /* Denon DA-300USB */ case USB_ID(0x154e, 0x3005): /* Marantz HD-DAC1 */ case USB_ID(0x154e, 0x3006): /* Marantz SA-14S1 */ From 8abf41dcd1bcdda0d09905fb59d18f45c035c752 Mon Sep 17 00:00:00 2001 From: Christopher Swenson Date: Sun, 14 Jun 2020 17:11:47 -0700 Subject: [PATCH 055/462] ALSA: usb-audio: Set 48 kHz rate for Rodecaster Like the Line6 devices, the Rode Rodecaster Pro does not support UAC2_CS_RANGE and only supports a sample rate of 48 kHz. Tested against a Rode Rodecaster Pro. Tested-by: Christopher Swenson Signed-off-by: Christopher Swenson Cc: Link: https://lore.kernel.org/r/ebdb9e72-9649-0b5e-b9b9-d757dbf26927@swenson.io Signed-off-by: Takashi Iwai --- sound/usb/format.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/sound/usb/format.c b/sound/usb/format.c index 5ffb457cc88c..1b28d01d1f4c 100644 --- a/sound/usb/format.c +++ b/sound/usb/format.c @@ -394,8 +394,9 @@ skip_rate: return nr_rates; } -/* Line6 Helix series don't support the UAC2_CS_RANGE usb function - * call. Return a static table of known clock rates. +/* Line6 Helix series and the Rode Rodecaster Pro don't support the + * UAC2_CS_RANGE usb function call. Return a static table of known + * clock rates. */ static int line6_parse_audio_format_rates_quirk(struct snd_usb_audio *chip, struct audioformat *fp) @@ -408,6 +409,7 @@ static int line6_parse_audio_format_rates_quirk(struct snd_usb_audio *chip, case USB_ID(0x0e41, 0x4248): /* Line6 Helix >= fw 2.82 */ case USB_ID(0x0e41, 0x4249): /* Line6 Helix Rack >= fw 2.82 */ case USB_ID(0x0e41, 0x424a): /* Line6 Helix LT >= fw 2.82 */ + case USB_ID(0x19f7, 0x0011): /* Rode Rodecaster Pro */ return set_fixed_rate(fp, 48000, SNDRV_PCM_RATE_48000); } From 95459261c99f1621d90bc628c2a48e60b7cf9a88 Mon Sep 17 00:00:00 2001 From: Dinghao Liu Date: Thu, 28 May 2020 15:21:04 +0800 Subject: [PATCH 056/462] hwrng: ks-sa - Fix runtime PM imbalance on error pm_runtime_get_sync() increments the runtime PM usage counter even the call returns an error code. Thus a pairing decrement is needed on the error handling path to keep the counter balanced. Signed-off-by: Dinghao Liu Reviewed-by: Alexander Sverdlin Signed-off-by: Herbert Xu --- drivers/char/hw_random/ks-sa-rng.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/char/hw_random/ks-sa-rng.c b/drivers/char/hw_random/ks-sa-rng.c index e2330e757f1f..001617033d6a 100644 --- a/drivers/char/hw_random/ks-sa-rng.c +++ b/drivers/char/hw_random/ks-sa-rng.c @@ -244,6 +244,7 @@ static int ks_sa_rng_probe(struct platform_device *pdev) ret = pm_runtime_get_sync(dev); if (ret < 0) { dev_err(dev, "Failed to enable SA power-domain\n"); + pm_runtime_put_noidle(dev); pm_runtime_disable(dev); return ret; } From 7cf81954705b7e5b057f7dc39a7ded54422ab6e1 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Fri, 29 May 2020 14:54:43 +1000 Subject: [PATCH 057/462] crypto: algif_skcipher - Cap recv SG list at ctx->used Somewhere along the line the cap on the SG list length for receive was lost. This patch restores it and removes the subsequent test which is now redundant. Fixes: 2d97591ef43d ("crypto: af_alg - consolidation of...") Cc: Signed-off-by: Herbert Xu Reviewed-by: Stephan Mueller Signed-off-by: Herbert Xu --- crypto/algif_skcipher.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/crypto/algif_skcipher.c b/crypto/algif_skcipher.c index e2c8ab408bed..4c3bdffe0c3a 100644 --- a/crypto/algif_skcipher.c +++ b/crypto/algif_skcipher.c @@ -74,14 +74,10 @@ static int _skcipher_recvmsg(struct socket *sock, struct msghdr *msg, return PTR_ERR(areq); /* convert iovecs of output buffers into RX SGL */ - err = af_alg_get_rsgl(sk, msg, flags, areq, -1, &len); + err = af_alg_get_rsgl(sk, msg, flags, areq, ctx->used, &len); if (err) goto free; - /* Process only as much RX buffers for which we have TX data */ - if (len > ctx->used) - len = ctx->used; - /* * If more buffers are to be expected to be processed, process only * full block size buffers. From c61e5644c69775ae9d54b86018fca238aca64a9b Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Thu, 4 Jun 2020 17:37:50 +1000 Subject: [PATCH 058/462] crypto: hisilicon - Cap block size at 2^31 The function hisi_acc_create_sg_pool may allocate a block of memory of size PAGE_SIZE * 2^(MAX_ORDER - 1). This value may exceed 2^31 on ia64, which would overflow the u32. This patch caps it at 2^31. Reported-by: kernel test robot Fixes: d8ac7b85236b ("crypto: hisilicon - fix large sgl memory...") Signed-off-by: Herbert Xu --- drivers/crypto/hisilicon/sgl.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/crypto/hisilicon/sgl.c b/drivers/crypto/hisilicon/sgl.c index 0e8c7e324fb4..725a739800b0 100644 --- a/drivers/crypto/hisilicon/sgl.c +++ b/drivers/crypto/hisilicon/sgl.c @@ -66,7 +66,8 @@ struct hisi_acc_sgl_pool *hisi_acc_create_sgl_pool(struct device *dev, sgl_size = sizeof(struct acc_hw_sge) * sge_nr + sizeof(struct hisi_acc_hw_sgl); - block_size = PAGE_SIZE * (1 << (MAX_ORDER - 1)); + block_size = 1 << (PAGE_SHIFT + MAX_ORDER <= 32 ? + PAGE_SHIFT + MAX_ORDER - 1 : 31); sgl_num_per_block = block_size / sgl_size; block_num = count / sgl_num_per_block; remain_sgl = count % sgl_num_per_block; From 376bd28d03c97b4d53f5797d5f9b3522c8bd4d3d Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Thu, 4 Jun 2020 18:09:41 +1000 Subject: [PATCH 059/462] crypto: ccp - Fix sparse warnings in sev-dev This patch fixes a bunch of sparse warnings in sev-dev where the __user marking is incorrectly handled. Reported-by: kbuild test robot Fixes: 7360e4b14350 ("crypto: ccp: Implement SEV_PEK_CERT_IMPORT...") Fixes: e799035609e1 ("crypto: ccp: Implement SEV_PEK_CSR ioctl...") Fixes: 76a2b524a4b1 ("crypto: ccp: Implement SEV_PDH_CERT_EXPORT...") Fixes: d6112ea0cb34 ("crypto: ccp - introduce SEV_GET_ID2 command") Signed-off-by: Herbert Xu Reviewed-by: Brijesh Singh Acked-by: Tom Lendacky Signed-off-by: Herbert Xu --- drivers/crypto/ccp/sev-dev.c | 23 ++++++++++++++++------- include/linux/psp-sev.h | 2 +- 2 files changed, 17 insertions(+), 8 deletions(-) diff --git a/drivers/crypto/ccp/sev-dev.c b/drivers/crypto/ccp/sev-dev.c index a2426334be61..476113e12489 100644 --- a/drivers/crypto/ccp/sev-dev.c +++ b/drivers/crypto/ccp/sev-dev.c @@ -376,6 +376,7 @@ static int sev_ioctl_do_pek_csr(struct sev_issue_cmd *argp, bool writable) struct sev_device *sev = psp_master->sev_data; struct sev_user_data_pek_csr input; struct sev_data_pek_csr *data; + void __user *input_address; void *blob = NULL; int ret; @@ -394,6 +395,7 @@ static int sev_ioctl_do_pek_csr(struct sev_issue_cmd *argp, bool writable) goto cmd; /* allocate a physically contiguous buffer to store the CSR blob */ + input_address = (void __user *)input.address; if (input.length > SEV_FW_BLOB_MAX_SIZE) { ret = -EFAULT; goto e_free; @@ -426,7 +428,7 @@ cmd: } if (blob) { - if (copy_to_user((void __user *)input.address, blob, input.length)) + if (copy_to_user(input_address, blob, input.length)) ret = -EFAULT; } @@ -437,7 +439,7 @@ e_free: return ret; } -void *psp_copy_user_blob(u64 __user uaddr, u32 len) +void *psp_copy_user_blob(u64 uaddr, u32 len) { if (!uaddr || !len) return ERR_PTR(-EINVAL); @@ -446,7 +448,7 @@ void *psp_copy_user_blob(u64 __user uaddr, u32 len) if (len > SEV_FW_BLOB_MAX_SIZE) return ERR_PTR(-EINVAL); - return memdup_user((void __user *)(uintptr_t)uaddr, len); + return memdup_user((void __user *)uaddr, len); } EXPORT_SYMBOL_GPL(psp_copy_user_blob); @@ -621,6 +623,7 @@ static int sev_ioctl_do_get_id2(struct sev_issue_cmd *argp) { struct sev_user_data_get_id2 input; struct sev_data_get_id *data; + void __user *input_address; void *id_blob = NULL; int ret; @@ -631,6 +634,8 @@ static int sev_ioctl_do_get_id2(struct sev_issue_cmd *argp) if (copy_from_user(&input, (void __user *)argp->data, sizeof(input))) return -EFAULT; + input_address = (void __user *)input.address; + data = kzalloc(sizeof(*data), GFP_KERNEL); if (!data) return -ENOMEM; @@ -660,8 +665,7 @@ static int sev_ioctl_do_get_id2(struct sev_issue_cmd *argp) } if (id_blob) { - if (copy_to_user((void __user *)input.address, - id_blob, data->len)) { + if (copy_to_user(input_address, id_blob, data->len)) { ret = -EFAULT; goto e_free; } @@ -720,6 +724,8 @@ static int sev_ioctl_do_pdh_export(struct sev_issue_cmd *argp, bool writable) struct sev_user_data_pdh_cert_export input; void *pdh_blob = NULL, *cert_blob = NULL; struct sev_data_pdh_cert_export *data; + void __user *input_cert_chain_address; + void __user *input_pdh_cert_address; int ret; /* If platform is not in INIT state then transition it to INIT. */ @@ -745,6 +751,9 @@ static int sev_ioctl_do_pdh_export(struct sev_issue_cmd *argp, bool writable) !input.cert_chain_address) goto cmd; + input_pdh_cert_address = (void __user *)input.pdh_cert_address; + input_cert_chain_address = (void __user *)input.cert_chain_address; + /* Allocate a physically contiguous buffer to store the PDH blob. */ if (input.pdh_cert_len > SEV_FW_BLOB_MAX_SIZE) { ret = -EFAULT; @@ -788,7 +797,7 @@ cmd: } if (pdh_blob) { - if (copy_to_user((void __user *)input.pdh_cert_address, + if (copy_to_user(input_pdh_cert_address, pdh_blob, input.pdh_cert_len)) { ret = -EFAULT; goto e_free_cert; @@ -796,7 +805,7 @@ cmd: } if (cert_blob) { - if (copy_to_user((void __user *)input.cert_chain_address, + if (copy_to_user(input_cert_chain_address, cert_blob, input.cert_chain_len)) ret = -EFAULT; } diff --git a/include/linux/psp-sev.h b/include/linux/psp-sev.h index 7fbc8679145c..49d155cd2dfe 100644 --- a/include/linux/psp-sev.h +++ b/include/linux/psp-sev.h @@ -597,7 +597,7 @@ int sev_guest_df_flush(int *error); */ int sev_guest_decommission(struct sev_data_decommission *data, int *error); -void *psp_copy_user_blob(u64 __user uaddr, u32 len); +void *psp_copy_user_blob(u64 uaddr, u32 len); #else /* !CONFIG_CRYPTO_DEV_SP_PSP */ From 24c7bf089453f6a1eed4c77ed0604bf4bb8cfb03 Mon Sep 17 00:00:00 2001 From: Heinrich Schuchardt Date: Thu, 4 Jun 2020 12:39:47 +0200 Subject: [PATCH 060/462] crypto: caam - fix typos MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix CAAM related typos. Signed-off-by: Heinrich Schuchardt Reviewed-by: Horia Geantă Signed-off-by: Herbert Xu --- drivers/crypto/caam/Kconfig | 2 +- drivers/crypto/caam/ctrl.c | 18 +++++++++--------- drivers/crypto/caam/desc.h | 4 ++-- drivers/crypto/caam/pdb.h | 2 +- 4 files changed, 13 insertions(+), 13 deletions(-) diff --git a/drivers/crypto/caam/Kconfig b/drivers/crypto/caam/Kconfig index a62f228be6da..bc35aa0ec07a 100644 --- a/drivers/crypto/caam/Kconfig +++ b/drivers/crypto/caam/Kconfig @@ -147,7 +147,7 @@ config CRYPTO_DEV_FSL_CAAM_RNG_API select HW_RANDOM help Selecting this will register the SEC4 hardware rng to - the hw_random API for suppying the kernel entropy pool. + the hw_random API for supplying the kernel entropy pool. endif # CRYPTO_DEV_FSL_CAAM_JR diff --git a/drivers/crypto/caam/ctrl.c b/drivers/crypto/caam/ctrl.c index 4fcdd262e581..f3d20b7645e0 100644 --- a/drivers/crypto/caam/ctrl.c +++ b/drivers/crypto/caam/ctrl.c @@ -54,7 +54,7 @@ static void build_instantiation_desc(u32 *desc, int handle, int do_sk) /* * load 1 to clear written reg: - * resets the done interrrupt and returns the RNG to idle. + * resets the done interrupt and returns the RNG to idle. */ append_load_imm_u32(desc, 1, LDST_SRCDST_WORD_CLRW); @@ -156,7 +156,7 @@ static inline int run_descriptor_deco0(struct device *ctrldev, u32 *desc, DESC_DER_DECO_STAT_SHIFT; /* - * If an error occured in the descriptor, then + * If an error occurred in the descriptor, then * the DECO status field will be set to 0x0D */ if (deco_state == DECO_STAT_HOST_ERR) @@ -264,7 +264,7 @@ static void devm_deinstantiate_rng(void *data) * - -ENODEV if DECO0 couldn't be acquired * - -EAGAIN if an error occurred when executing the descriptor * f.i. there was a RNG hardware error due to not "good enough" - * entropy being aquired. + * entropy being acquired. */ static int instantiate_rng(struct device *ctrldev, int state_handle_mask, int gen_sk) @@ -733,8 +733,8 @@ static int caam_probe(struct platform_device *pdev) handle_imx6_err005766(&ctrl->mcr); /* - * Read the Compile Time paramters and SCFGR to determine - * if Virtualization is enabled for this platform + * Read the Compile Time parameters and SCFGR to determine + * if virtualization is enabled for this platform */ scfgr = rd_reg32(&ctrl->scfgr); @@ -863,9 +863,9 @@ static int caam_probe(struct platform_device *pdev) } /* * if instantiate_rng(...) fails, the loop will rerun - * and the kick_trng(...) function will modfiy the + * and the kick_trng(...) function will modify the * upper and lower limits of the entropy sampling - * interval, leading to a sucessful initialization of + * interval, leading to a successful initialization of * the RNG. */ ret = instantiate_rng(dev, inst_handles, @@ -882,8 +882,8 @@ static int caam_probe(struct platform_device *pdev) return ret; } /* - * Set handles init'ed by this module as the complement of the - * already initialized ones + * Set handles initialized by this module as the complement of + * the already initialized ones */ ctrlpriv->rng4_sh_init = ~ctrlpriv->rng4_sh_init & RDSTA_MASK; diff --git a/drivers/crypto/caam/desc.h b/drivers/crypto/caam/desc.h index e796d3cb9be8..e13470901586 100644 --- a/drivers/crypto/caam/desc.h +++ b/drivers/crypto/caam/desc.h @@ -18,7 +18,7 @@ */ #define SEC4_SG_LEN_EXT 0x80000000 /* Entry points to table */ -#define SEC4_SG_LEN_FIN 0x40000000 /* Last ent in table */ +#define SEC4_SG_LEN_FIN 0x40000000 /* Last entry in table */ #define SEC4_SG_BPID_MASK 0x000000ff #define SEC4_SG_BPID_SHIFT 16 #define SEC4_SG_LEN_MASK 0x3fffffff /* Excludes EXT and FINAL */ @@ -113,7 +113,7 @@ */ #define HDR_REVERSE 0x00000800 -/* Propogate DNR property to SharedDesc */ +/* Propagate DNR property to SharedDesc */ #define HDR_PROP_DNR 0x00000800 /* JobDesc/SharedDesc share property */ diff --git a/drivers/crypto/caam/pdb.h b/drivers/crypto/caam/pdb.h index 68c1fd5dee5d..8ccc22075043 100644 --- a/drivers/crypto/caam/pdb.h +++ b/drivers/crypto/caam/pdb.h @@ -453,7 +453,7 @@ struct srtp_decap_pdb { #define DSA_PDB_N_MASK 0x7f struct dsa_sign_pdb { - u32 sgf_ln; /* Use DSA_PDB_ defintions per above */ + u32 sgf_ln; /* Use DSA_PDB_ definitions per above */ u8 *q; u8 *r; u8 *g; /* or Gx,y */ From 77251e41f89a813b4090f5199442f217bbf11297 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Thu, 4 Jun 2020 11:52:53 -0700 Subject: [PATCH 061/462] crypto: algboss - don't wait during notifier callback MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When a crypto template needs to be instantiated, CRYPTO_MSG_ALG_REQUEST is sent to crypto_chain. cryptomgr_schedule_probe() handles this by starting a thread to instantiate the template, then waiting for this thread to complete via crypto_larval::completion. This can deadlock because instantiating the template may require loading modules, and this (apparently depending on userspace) may need to wait for the crc-t10dif module (lib/crc-t10dif.c) to be loaded. But crc-t10dif's module_init function uses crypto_register_notifier() and therefore takes crypto_chain.rwsem for write. That can't proceed until the notifier callback has finished, as it holds this semaphore for read. Fix this by removing the wait on crypto_larval::completion from within cryptomgr_schedule_probe(). It's actually unnecessary because crypto_alg_mod_lookup() calls crypto_larval_wait() itself after sending CRYPTO_MSG_ALG_REQUEST. This only actually became a problem in v4.20 due to commit b76377543b73 ("crc-t10dif: Pick better transform if one becomes available"), but the unnecessary wait was much older. BugLink: https://bugzilla.kernel.org/show_bug.cgi?id=207159 Reported-by: Mike Gerow Fixes: 398710379f51 ("crypto: algapi - Move larval completion into algboss") Cc: # v3.6+ Cc: Martin K. Petersen Signed-off-by: Eric Biggers Reported-by: Kai Lüke Signed-off-by: Herbert Xu --- crypto/algboss.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/crypto/algboss.c b/crypto/algboss.c index 535f1f87e6c1..5ebccbd6b74e 100644 --- a/crypto/algboss.c +++ b/crypto/algboss.c @@ -178,8 +178,6 @@ static int cryptomgr_schedule_probe(struct crypto_larval *larval) if (IS_ERR(thread)) goto err_put_larval; - wait_for_completion_interruptible(&larval->completion); - return NOTIFY_STOP; err_put_larval: From 1f5b07f5dd1748a6f9363fb1a76d599c74af8231 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 5 Jun 2020 14:03:39 +0300 Subject: [PATCH 062/462] crypto: marvell/octeontx - Fix a potential NULL dereference Smatch reports that: drivers/crypto/marvell/octeontx/otx_cptvf_algs.c:132 otx_cpt_aead_callback() warn: variable dereferenced before check 'cpt_info' (see line 121) This function is called from process_pending_queue() as: drivers/crypto/marvell/octeontx/otx_cptvf_reqmgr.c 599 /* 600 * Call callback after current pending entry has been 601 * processed, we don't do it if the callback pointer is 602 * invalid. 603 */ 604 if (callback) 605 callback(res_code, areq, cpt_info); It does appear to me that "cpt_info" can be NULL so this could lead to a NULL dereference. Fixes: 10b4f09491bf ("crypto: marvell - add the Virtual Function driver for CPT") Signed-off-by: Dan Carpenter Signed-off-by: Herbert Xu --- drivers/crypto/marvell/octeontx/otx_cptvf_algs.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/crypto/marvell/octeontx/otx_cptvf_algs.c b/drivers/crypto/marvell/octeontx/otx_cptvf_algs.c index 60e744f680d3..1e0a1d70ebd3 100644 --- a/drivers/crypto/marvell/octeontx/otx_cptvf_algs.c +++ b/drivers/crypto/marvell/octeontx/otx_cptvf_algs.c @@ -118,6 +118,9 @@ static void otx_cpt_aead_callback(int status, void *arg1, void *arg2) struct otx_cpt_req_info *cpt_req; struct pci_dev *pdev; + if (!cpt_info) + goto complete; + cpt_req = cpt_info->req; if (!status) { /* @@ -129,10 +132,10 @@ static void otx_cpt_aead_callback(int status, void *arg1, void *arg2) !cpt_req->is_enc) status = validate_hmac_cipher_null(cpt_req); } - if (cpt_info) { - pdev = cpt_info->pdev; - do_request_cleanup(pdev, cpt_info); - } + pdev = cpt_info->pdev; + do_request_cleanup(pdev, cpt_info); + +complete: if (areq) areq->complete(areq, status); } From 819966c06b759022e9932f328284314d9272b9f3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Stephan=20M=C3=BCller?= Date: Sun, 7 Jun 2020 15:20:26 +0200 Subject: [PATCH 063/462] crypto: drbg - always try to free Jitter RNG instance The Jitter RNG is unconditionally allocated as a seed source follwoing the patch 97f2650e5040. Thus, the instance must always be deallocated. Reported-by: syzbot+2e635807decef724a1fa@syzkaller.appspotmail.com Fixes: 97f2650e5040 ("crypto: drbg - always seeded with SP800-90B ...") Signed-off-by: Stephan Mueller Signed-off-by: Herbert Xu --- crypto/drbg.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/crypto/drbg.c b/crypto/drbg.c index 37526eb8c5d5..8d80d93cab97 100644 --- a/crypto/drbg.c +++ b/crypto/drbg.c @@ -1631,10 +1631,12 @@ static int drbg_uninstantiate(struct drbg_state *drbg) if (drbg->random_ready.func) { del_random_ready_callback(&drbg->random_ready); cancel_work_sync(&drbg->seed_work); - crypto_free_rng(drbg->jent); - drbg->jent = NULL; } + if (!IS_ERR_OR_NULL(drbg->jent)) + crypto_free_rng(drbg->jent); + drbg->jent = NULL; + if (drbg->d_ops) drbg->d_ops->crypto_fini(drbg); drbg_dealloc_state(drbg); From 0ae705f3d2b22d9d762f67fd49aa6c290987c6a3 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Mon, 15 Jun 2020 14:56:55 +0800 Subject: [PATCH 064/462] KVM: MIPS: Fix a build error for !CPU_LOONGSON64 During the KVM merging progress, a CONFIG_CPU_LOONGSON64 guard in commit 7f2a83f1c2a941ebfee53 ("KVM: MIPS: Add CPUCFG emulation for Loongson-3") is missing by accident. So add it to avoid building error. Fixes: 7f2a83f1c2a941ebfee53 ("KVM: MIPS: Add CPUCFG emulation for Loongson-3") Reported-by: kernel test robot Signed-off-by: Huacai Chen Message-Id: <1592204215-28704-1-git-send-email-chenhc@lemote.com> Signed-off-by: Paolo Bonzini --- arch/mips/kvm/mips.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c index 521bd5891e84..666d3350b4ac 100644 --- a/arch/mips/kvm/mips.c +++ b/arch/mips/kvm/mips.c @@ -67,7 +67,9 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { VCPU_STAT("vz_ghfc", vz_ghfc_exits), VCPU_STAT("vz_gpa", vz_gpa_exits), VCPU_STAT("vz_resvd", vz_resvd_exits), +#ifdef CONFIG_CPU_LOONGSON64 VCPU_STAT("vz_cpucfg", vz_cpucfg_exits), +#endif #endif VCPU_STAT("halt_successful_poll", halt_successful_poll), VCPU_STAT("halt_attempted_poll", halt_attempted_poll), From 8497376707be6d2ab30f6d6662f42ab0c15d2ba1 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 19 May 2020 14:20:46 +0100 Subject: [PATCH 065/462] drm/i915/gt: Incorporate the virtual engine into timeslicing It was quite the oversight to only factor in the normal queue to decide the timeslicing switch priority. By leaving out the next virtual request from the priority decision, we would not timeslice the current engine if there was an available virtual request. Testcase: igt/gem_exec_balancer/sliced Fixes: 3df2deed411e ("drm/i915/execlists: Enable timeslice on partial virtual engine dequeue") Signed-off-by: Chris Wilson Cc: Mika Kuoppala Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20200519132046.22443-3-chris@chris-wilson.co.uk (cherry picked from commit 6ad249ba59badc7ff157d4db1f835748f0e2c9b6) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/gt/intel_lrc.c | 30 +++++++++++++++++++++++------ 1 file changed, 24 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index 87e6c5bdd2dc..0b58e6c1e235 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -1895,7 +1895,8 @@ static void defer_active(struct intel_engine_cs *engine) static bool need_timeslice(const struct intel_engine_cs *engine, - const struct i915_request *rq) + const struct i915_request *rq, + const struct rb_node *rb) { int hint; @@ -1903,6 +1904,24 @@ need_timeslice(const struct intel_engine_cs *engine, return false; hint = engine->execlists.queue_priority_hint; + + if (rb) { + const struct virtual_engine *ve = + rb_entry(rb, typeof(*ve), nodes[engine->id].rb); + const struct intel_engine_cs *inflight = + intel_context_inflight(&ve->context); + + if (!inflight || inflight == engine) { + struct i915_request *next; + + rcu_read_lock(); + next = READ_ONCE(ve->request); + if (next) + hint = max(hint, rq_prio(next)); + rcu_read_unlock(); + } + } + if (!list_is_last(&rq->sched.link, &engine->active.requests)) hint = max(hint, rq_prio(list_next_entry(rq, sched.link))); @@ -1977,10 +1996,9 @@ static void set_timeslice(struct intel_engine_cs *engine) set_timer_ms(&engine->execlists.timer, duration); } -static void start_timeslice(struct intel_engine_cs *engine) +static void start_timeslice(struct intel_engine_cs *engine, int prio) { struct intel_engine_execlists *execlists = &engine->execlists; - const int prio = queue_prio(execlists); unsigned long duration; if (!intel_engine_has_timeslices(engine)) @@ -2140,7 +2158,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) __unwind_incomplete_requests(engine); last = NULL; - } else if (need_timeslice(engine, last) && + } else if (need_timeslice(engine, last, rb) && timeslice_expired(execlists, last)) { if (i915_request_completed(last)) { tasklet_hi_schedule(&execlists->tasklet); @@ -2188,7 +2206,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) * Even if ELSP[1] is occupied and not worthy * of timeslices, our queue might be. */ - start_timeslice(engine); + start_timeslice(engine, queue_prio(execlists)); return; } } @@ -2223,7 +2241,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) if (last && !can_merge_rq(last, rq)) { spin_unlock(&ve->base.active.lock); - start_timeslice(engine); + start_timeslice(engine, rq_prio(rq)); return; /* leave this for another sibling */ } From 54a9adc4608144c7a17530822246f3cfa9b15d76 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 27 May 2020 16:05:08 +0200 Subject: [PATCH 066/462] drm/i915/pmu: avoid an maybe-uninitialized warning Conditional spinlocks make it hard for gcc and for lockdep to follow the code flow. This one causes a warning with at least gcc-9 and higher: In file included from include/linux/irq.h:14, from drivers/gpu/drm/i915/i915_pmu.c:7: drivers/gpu/drm/i915/i915_pmu.c: In function 'i915_sample': include/linux/spinlock.h:289:3: error: 'flags' may be used uninitialized in this function [-Werror=maybe-uninitialized] 289 | _raw_spin_unlock_irqrestore(lock, flags); \ | ^~~~~~~~~~~~~~~~~~~~~~~~~~~ drivers/gpu/drm/i915/i915_pmu.c:288:17: note: 'flags' was declared here 288 | unsigned long flags; | ^~~~~ Split out the part between the locks into a separate function for readability and to let the compiler figure out what the logic actually is. Fixes: d79e1bd676f0 ("drm/i915/pmu: Only use exclusive mmio access for gen7") Signed-off-by: Arnd Bergmann Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20200527140526.1458215-1-arnd@arndb.de (cherry picked from commit 6ec81b82732e2b4a5ac0853fd33919ff1ca94238) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/i915_pmu.c | 84 ++++++++++++++++----------------- 1 file changed, 42 insertions(+), 42 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c index e991a707bdb7..962ded9ce73f 100644 --- a/drivers/gpu/drm/i915/i915_pmu.c +++ b/drivers/gpu/drm/i915/i915_pmu.c @@ -269,12 +269,48 @@ static bool exclusive_mmio_access(const struct drm_i915_private *i915) return IS_GEN(i915, 7); } +static void engine_sample(struct intel_engine_cs *engine, unsigned int period_ns) +{ + struct intel_engine_pmu *pmu = &engine->pmu; + bool busy; + u32 val; + + val = ENGINE_READ_FW(engine, RING_CTL); + if (val == 0) /* powerwell off => engine idle */ + return; + + if (val & RING_WAIT) + add_sample(&pmu->sample[I915_SAMPLE_WAIT], period_ns); + if (val & RING_WAIT_SEMAPHORE) + add_sample(&pmu->sample[I915_SAMPLE_SEMA], period_ns); + + /* No need to sample when busy stats are supported. */ + if (intel_engine_supports_stats(engine)) + return; + + /* + * While waiting on a semaphore or event, MI_MODE reports the + * ring as idle. However, previously using the seqno, and with + * execlists sampling, we account for the ring waiting as the + * engine being busy. Therefore, we record the sample as being + * busy if either waiting or !idle. + */ + busy = val & (RING_WAIT_SEMAPHORE | RING_WAIT); + if (!busy) { + val = ENGINE_READ_FW(engine, RING_MI_MODE); + busy = !(val & MODE_IDLE); + } + if (busy) + add_sample(&pmu->sample[I915_SAMPLE_BUSY], period_ns); +} + static void engines_sample(struct intel_gt *gt, unsigned int period_ns) { struct drm_i915_private *i915 = gt->i915; struct intel_engine_cs *engine; enum intel_engine_id id; + unsigned long flags; if ((i915->pmu.enable & ENGINE_SAMPLE_MASK) == 0) return; @@ -283,53 +319,17 @@ engines_sample(struct intel_gt *gt, unsigned int period_ns) return; for_each_engine(engine, gt, id) { - struct intel_engine_pmu *pmu = &engine->pmu; - spinlock_t *mmio_lock; - unsigned long flags; - bool busy; - u32 val; - if (!intel_engine_pm_get_if_awake(engine)) continue; - mmio_lock = NULL; - if (exclusive_mmio_access(i915)) - mmio_lock = &engine->uncore->lock; - - if (unlikely(mmio_lock)) - spin_lock_irqsave(mmio_lock, flags); - - val = ENGINE_READ_FW(engine, RING_CTL); - if (val == 0) /* powerwell off => engine idle */ - goto skip; - - if (val & RING_WAIT) - add_sample(&pmu->sample[I915_SAMPLE_WAIT], period_ns); - if (val & RING_WAIT_SEMAPHORE) - add_sample(&pmu->sample[I915_SAMPLE_SEMA], period_ns); - - /* No need to sample when busy stats are supported. */ - if (intel_engine_supports_stats(engine)) - goto skip; - - /* - * While waiting on a semaphore or event, MI_MODE reports the - * ring as idle. However, previously using the seqno, and with - * execlists sampling, we account for the ring waiting as the - * engine being busy. Therefore, we record the sample as being - * busy if either waiting or !idle. - */ - busy = val & (RING_WAIT_SEMAPHORE | RING_WAIT); - if (!busy) { - val = ENGINE_READ_FW(engine, RING_MI_MODE); - busy = !(val & MODE_IDLE); + if (exclusive_mmio_access(i915)) { + spin_lock_irqsave(&engine->uncore->lock, flags); + engine_sample(engine, period_ns); + spin_unlock_irqrestore(&engine->uncore->lock, flags); + } else { + engine_sample(engine, period_ns); } - if (busy) - add_sample(&pmu->sample[I915_SAMPLE_BUSY], period_ns); -skip: - if (unlikely(mmio_lock)) - spin_unlock_irqrestore(mmio_lock, flags); intel_engine_pm_put_async(engine); } } From 70cac501b576710b426f3e9f67fcd0a0558df72c Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 27 May 2020 16:05:09 +0200 Subject: [PATCH 067/462] drm/i915: work around false-positive maybe-uninitialized warning gcc-9 gets confused by the code flow in check_dirty_whitelist: drivers/gpu/drm/i915/gt/selftest_workarounds.c: In function 'check_dirty_whitelist': drivers/gpu/drm/i915/gt/selftest_workarounds.c:492:17: error: 'rsvd' may be used uninitialized in this function [-Werror=maybe-uninitialized] I could not figure out a good way to do this in a way that gcc understands better, so initialize the variable to zero, as last resort. Fixes: aee20aaed887 ("drm/i915: Implement read-only support in whitelist selftest") Signed-off-by: Arnd Bergmann Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20200527140526.1458215-2-arnd@arndb.de (cherry picked from commit cc649a9eafc1ef5c40db023084cb94422d08aa84) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/gt/selftest_workarounds.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/i915/gt/selftest_workarounds.c b/drivers/gpu/drm/i915/gt/selftest_workarounds.c index 5ed323254ee1..32785463ec9e 100644 --- a/drivers/gpu/drm/i915/gt/selftest_workarounds.c +++ b/drivers/gpu/drm/i915/gt/selftest_workarounds.c @@ -623,6 +623,8 @@ err_request: err = -EINVAL; goto out_unpin; } + } else { + rsvd = 0; } expect = results[0]; From 751c263bb74fd36b5fc2589d36abc75042336444 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 15 Jun 2020 12:19:39 +0200 Subject: [PATCH 068/462] arm64: remove TEXT_OFFSET randomization TEXT_OFFSET was recently changed to 0x0, in preparation for its removal at a later stage, and a warning is emitted into the kernel log when the bootloader appears to have failed to take the TEXT_OFFSET image header value into account. Ironically, this warning itself fails to take TEXT_OFFSET into account, and compares the kernel image's alignment modulo 2M against a hardcoded value of 0x0, and so the warning will trigger spuriously when TEXT_OFFSET randomization is enabled. Given the intent to get rid of TEXT_OFFSET entirely, let's fix this oversight by just removing support for TEXT_OFFSET randomization. Signed-off-by: Ard Biesheuvel Acked-by: Mark Rutland Link: https://lore.kernel.org/r/20200615101939.634391-1-ardb@kernel.org Signed-off-by: Will Deacon --- arch/arm64/Kconfig.debug | 15 --------------- arch/arm64/Makefile | 6 ------ 2 files changed, 21 deletions(-) diff --git a/arch/arm64/Kconfig.debug b/arch/arm64/Kconfig.debug index cdf7ec0b975e..265c4461031f 100644 --- a/arch/arm64/Kconfig.debug +++ b/arch/arm64/Kconfig.debug @@ -8,21 +8,6 @@ config PID_IN_CONTEXTIDR instructions during context switch. Say Y here only if you are planning to use hardware trace tools with this kernel. -config ARM64_RANDOMIZE_TEXT_OFFSET - bool "Randomize TEXT_OFFSET at build time" - help - Say Y here if you want the image load offset (AKA TEXT_OFFSET) - of the kernel to be randomized at build-time. When selected, - this option will cause TEXT_OFFSET to be randomized upon any - build of the kernel, and the offset will be reflected in the - text_offset field of the resulting Image. This can be used to - fuzz-test bootloaders which respect text_offset. - - This option is intended for bootloader and/or kernel testing - only. Bootloaders must make no assumptions regarding the value - of TEXT_OFFSET and platforms must not require a specific - value. - config DEBUG_EFI depends on EFI && DEBUG_INFO bool "UEFI debugging" diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index 76359cfb328a..a0d94d063fa8 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -121,13 +121,7 @@ endif head-y := arch/arm64/kernel/head.o # The byte offset of the kernel image in RAM from the start of RAM. -ifeq ($(CONFIG_ARM64_RANDOMIZE_TEXT_OFFSET), y) -TEXT_OFFSET := $(shell awk "BEGIN {srand(); printf \"0x%06x\n\", \ - int(2 * 1024 * 1024 / (2 ^ $(CONFIG_ARM64_PAGE_SHIFT)) * \ - rand()) * (2 ^ $(CONFIG_ARM64_PAGE_SHIFT))}") -else TEXT_OFFSET := 0x0 -endif ifeq ($(CONFIG_KASAN_SW_TAGS), y) KASAN_SHADOW_SCALE_SHIFT := 4 From 9ba6a9efa4a4c721d0115089bf422f82b8a59e45 Mon Sep 17 00:00:00 2001 From: Dave Martin Date: Wed, 10 Jun 2020 18:03:09 +0100 Subject: [PATCH 069/462] docs/arm64: Fix typo'd #define in sve.rst sve.rst describes a flag PR_SVE_SET_VL_INHERIT for the PR_SVE_SET_VL prctl, but there is no flag of this name. The flag is shared between the _GET and _SET calls, so the _SET prefix was dropped, giving the name PR_SVE_VL_INHERIT in the headers. Fix it. Signed-off-by: Dave Martin Link: https://lore.kernel.org/r/1591808590-20210-2-git-send-email-Dave.Martin@arm.com Signed-off-by: Will Deacon --- Documentation/arm64/sve.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Documentation/arm64/sve.rst b/Documentation/arm64/sve.rst index 5689c74c8082..bfd55f468258 100644 --- a/Documentation/arm64/sve.rst +++ b/Documentation/arm64/sve.rst @@ -186,7 +186,7 @@ prctl(PR_SVE_SET_VL, unsigned long arg) flags: - PR_SVE_SET_VL_INHERIT + PR_SVE_VL_INHERIT Inherit the current vector length across execve(). Otherwise, the vector length is reset to the system default at execve(). (See @@ -247,7 +247,7 @@ prctl(PR_SVE_GET_VL) The following flag may be OR-ed into the result: - PR_SVE_SET_VL_INHERIT + PR_SVE_VL_INHERIT Vector length will be inherited across execve(). @@ -393,7 +393,7 @@ The regset data starts with struct user_sve_header, containing: * At every execve() call, the new vector length of the new process is set to the system default vector length, unless - * PR_SVE_SET_VL_INHERIT (or equivalently SVE_PT_VL_INHERIT) is set for the + * PR_SVE_VL_INHERIT (or equivalently SVE_PT_VL_INHERIT) is set for the calling thread, or * a deferred vector length change is pending, established via the From a6e2c226c3d51fd93636320e47cabc8a8f0824c5 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Sun, 24 May 2020 15:08:19 +0530 Subject: [PATCH 070/462] powerpc: Fix kernel crash in show_instructions() w/DEBUG_VIRTUAL With CONFIG_DEBUG_VIRTUAL=y, we can hit a BUG() if we take a hard lockup watchdog interrupt when in OPAL mode. This happens in show_instructions() if the kernel takes the watchdog NMI IPI, or any other interrupt, with MSR_IR == 0. show_instructions() updates the variable pc in the loop and the second iteration will result in BUG(). We hit the BUG_ON due the below check in __va() #define __va(x) ({ VIRTUAL_BUG_ON((unsigned long)(x) >= PAGE_OFFSET); (void *)(unsigned long)((phys_addr_t)(x) | PAGE_OFFSET); }) Fix it by moving the check out of the loop. Also update nip so that the nip == pc check still matches. Fixes: 4dd7554a6456 ("powerpc/64: Add VIRTUAL_BUG_ON checks for __va and __pa addresses") Signed-off-by: Aneesh Kumar K.V [mpe: Use IS_ENABLED(), massage change log] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200524093822.423487-1-aneesh.kumar@linux.ibm.com --- arch/powerpc/kernel/process.c | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 7bb7faf84490..a2f1f0e70a4b 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -1252,29 +1252,31 @@ struct task_struct *__switch_to(struct task_struct *prev, static void show_instructions(struct pt_regs *regs) { int i; + unsigned long nip = regs->nip; unsigned long pc = regs->nip - (NR_INSN_TO_PRINT * 3 / 4 * sizeof(int)); printk("Instruction dump:"); + /* + * If we were executing with the MMU off for instructions, adjust pc + * rather than printing XXXXXXXX. + */ + if (!IS_ENABLED(CONFIG_BOOKE) && !(regs->msr & MSR_IR)) { + pc = (unsigned long)phys_to_virt(pc); + nip = (unsigned long)phys_to_virt(regs->nip); + } + for (i = 0; i < NR_INSN_TO_PRINT; i++) { int instr; if (!(i % 8)) pr_cont("\n"); -#if !defined(CONFIG_BOOKE) - /* If executing with the IMMU off, adjust pc rather - * than print XXXXXXXX. - */ - if (!(regs->msr & MSR_IR)) - pc = (unsigned long)phys_to_virt(pc); -#endif - if (!__kernel_text_address(pc) || probe_kernel_address((const void *)pc, instr)) { pr_cont("XXXXXXXX "); } else { - if (regs->nip == pc) + if (nip == pc) pr_cont("<%08x> ", instr); else pr_cont("%08x ", instr); From 0fae253af563cf5d1f5dc651d520c3eafd74f183 Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Fri, 12 Jun 2020 15:59:37 -0500 Subject: [PATCH 071/462] ASoC: soc-devres: add devm_snd_soc_register_dai() The registration of DAIs may be done at two distinct times, once during a component registration and later when loading a topology. Since devm_ managed resources are freed in the reverse order they were allocated, when a component starts unregistering DAIs by walking through the DAI list, the memory allocated for the topology-registered DAIs was freed already, which leads to 100% reproducible KASAN use-after-free reports. This patch suggests a new devm_ function to force the DAI list to be updated prior to freeing the memory chunks referenced by the list pointers. Suggested-by: Bard Liao Signed-off-by: Pierre-Louis Bossart Reviewed-by: Ranjani Sridharan Reviewed-by: Bard Liao Reviewed-by: Kai Vehmanen BugLink: https://github.com/thesofproject/linux/issues/2186 Link: https://lore.kernel.org/r/20200612205938.26415-2-pierre-louis.bossart@linux.intel.com Signed-off-by: Mark Brown --- include/sound/soc.h | 4 ++++ sound/soc/soc-devres.c | 37 +++++++++++++++++++++++++++++++++++++ 2 files changed, 41 insertions(+) diff --git a/include/sound/soc.h b/include/sound/soc.h index 565612a8d690..fddab504c227 100644 --- a/include/sound/soc.h +++ b/include/sound/soc.h @@ -1363,6 +1363,10 @@ void snd_soc_remove_pcm_runtime(struct snd_soc_card *card, struct snd_soc_dai *snd_soc_register_dai(struct snd_soc_component *component, struct snd_soc_dai_driver *dai_drv, bool legacy_dai_naming); +struct snd_soc_dai *devm_snd_soc_register_dai(struct device *dev, + struct snd_soc_component *component, + struct snd_soc_dai_driver *dai_drv, + bool legacy_dai_naming); void snd_soc_unregister_dai(struct snd_soc_dai *dai); struct snd_soc_dai *snd_soc_find_dai( diff --git a/sound/soc/soc-devres.c b/sound/soc/soc-devres.c index a9ea172a66a7..11e5d7962370 100644 --- a/sound/soc/soc-devres.c +++ b/sound/soc/soc-devres.c @@ -9,6 +9,43 @@ #include #include +static void devm_dai_release(struct device *dev, void *res) +{ + snd_soc_unregister_dai(*(struct snd_soc_dai **)res); +} + +/** + * devm_snd_soc_register_dai - resource-managed dai registration + * @dev: Device used to manage component + * @component: The component the DAIs are registered for + * @dai_drv: DAI driver to use for the DAI + * @legacy_dai_naming: if %true, use legacy single-name format; + * if %false, use multiple-name format; + */ +struct snd_soc_dai *devm_snd_soc_register_dai(struct device *dev, + struct snd_soc_component *component, + struct snd_soc_dai_driver *dai_drv, + bool legacy_dai_naming) +{ + struct snd_soc_dai **ptr; + struct snd_soc_dai *dai; + + ptr = devres_alloc(devm_dai_release, sizeof(*ptr), GFP_KERNEL); + if (!ptr) + return NULL; + + dai = snd_soc_register_dai(component, dai_drv, legacy_dai_naming); + if (dai) { + *ptr = dai; + devres_add(dev, ptr); + } else { + devres_free(ptr); + } + + return dai; +} +EXPORT_SYMBOL_GPL(devm_snd_soc_register_dai); + static void devm_component_release(struct device *dev, void *res) { snd_soc_unregister_component(*(struct device **)res); From 6ae4902f2f3400503f9b78e87e8371e4ffde1e0c Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Fri, 12 Jun 2020 15:59:38 -0500 Subject: [PATCH 072/462] ASoC: soc-topology: use devm_snd_soc_register_dai() Use devm_ to avoid use-after-free KASAN reports and simplify error handling. Signed-off-by: Pierre-Louis Bossart Reviewed-by: Ranjani Sridharan Reviewed-by: Bard Liao Reviewed-by: Kai Vehmanen BugLink: https://github.com/thesofproject/linux/issues/2186 Link: https://lore.kernel.org/r/20200612205938.26415-3-pierre-louis.bossart@linux.intel.com Signed-off-by: Mark Brown --- sound/soc/soc-topology.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/sound/soc/soc-topology.c b/sound/soc/soc-topology.c index 9e89633676b7..43e5745b06aa 100644 --- a/sound/soc/soc-topology.c +++ b/sound/soc/soc-topology.c @@ -1851,7 +1851,7 @@ static int soc_tplg_dai_create(struct soc_tplg *tplg, list_add(&dai_drv->dobj.list, &tplg->comp->dobj_list); /* register the DAI to the component */ - dai = snd_soc_register_dai(tplg->comp, dai_drv, false); + dai = devm_snd_soc_register_dai(tplg->comp->dev, tplg->comp, dai_drv, false); if (!dai) return -ENOMEM; @@ -1859,7 +1859,6 @@ static int soc_tplg_dai_create(struct soc_tplg *tplg, ret = snd_soc_dapm_new_dai_widgets(dapm, dai); if (ret != 0) { dev_err(dai->dev, "Failed to create DAI widgets %d\n", ret); - snd_soc_unregister_dai(dai); return ret; } From b95273f1272398a9f7145de37703f1930244e465 Mon Sep 17 00:00:00 2001 From: Qian Cai Date: Wed, 15 Apr 2020 11:37:09 -0400 Subject: [PATCH 073/462] kvm/svm: disable KCSAN for svm_vcpu_run() For some reasons, running a simple qemu-kvm command with KCSAN will reset AMD hosts. It turns out svm_vcpu_run() could not be instrumented. Disable it for now. # /usr/libexec/qemu-kvm -name ubuntu-18.04-server-cloudimg -cpu host -smp 2 -m 2G -hda ubuntu-18.04-server-cloudimg.qcow2 === console output === Kernel 5.6.0-next-20200408+ on an x86_64 hp-dl385g10-05 login: <...host reset...> HPE ProLiant System BIOS A40 v1.20 (03/09/2018) (C) Copyright 1982-2018 Hewlett Packard Enterprise Development LP Early system initialization, please wait... Signed-off-by: Qian Cai Message-Id: <20200415153709.1559-1-cai@lca.pw> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/svm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 8ccfa4197d9c..c0da4dd78ac5 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -3344,7 +3344,7 @@ static fastpath_t svm_exit_handlers_fastpath(struct kvm_vcpu *vcpu) void __svm_vcpu_run(unsigned long vmcb_pa, unsigned long *regs); -static fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu) +static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu) { fastpath_t exit_fastpath; struct vcpu_svm *svm = to_svm(vcpu); From 2ef5612391f0a7a631c42a8afc867095b49a1992 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 4 Jun 2020 15:39:02 +0100 Subject: [PATCH 074/462] RDMA/mlx5: Remove duplicated assignment to resp.response_length The assignment to resp.response_length is never read since it is being updated again on the next statement. The assignment is redundant so removed it. Fixes: a645a89d9a78 ("RDMA/mlx5: Return ECE DC support") Link: https://lore.kernel.org/r/20200604143902.56021-1-colin.king@canonical.com Addresses-Coverity: ("Unused value") Signed-off-by: Colin Ian King Acked-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/qp.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 81bf6b975e0e..d61ca85033de 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -4162,8 +4162,6 @@ static int mlx5_ib_modify_dct(struct ib_qp *ibqp, struct ib_qp_attr *attr, if (udata->outlen < min_resp_len) return -EINVAL; - resp.response_length = min_resp_len; - /* * If we don't have enough space for the ECE options, * simply indicate it with resp.response_length. From 19ab0f005b165146ea4a93f71e9cb5e71de9c0ce Mon Sep 17 00:00:00 2001 From: "derek.fang" Date: Fri, 12 Jun 2020 13:15:25 +0800 Subject: [PATCH 075/462] ASoC: rt5682: Let dai clks be registered whether mclk exists or not According to ideal rt5682 CCF, the root clk is mclk. But in some platforms, mclk is not exported to CCF. In this condition, rt5682_register_dai_clks will not be called. This patch lets dai clks could be registered whether mclk exists or not. Signed-off-by: derek.fang Link: https://lore.kernel.org/r/1591938925-1070-5-git-send-email-derek.fang@realtek.com Signed-off-by: Mark Brown --- sound/soc/codecs/rt5682.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/sound/soc/codecs/rt5682.c b/sound/soc/codecs/rt5682.c index d3245123101d..3e9d2c6c51f9 100644 --- a/sound/soc/codecs/rt5682.c +++ b/sound/soc/codecs/rt5682.c @@ -2829,12 +2829,13 @@ static int rt5682_probe(struct snd_soc_component *component) return ret; } rt5682->mclk = NULL; - } else { - /* Register CCF DAI clock control */ - ret = rt5682_register_dai_clks(component); - if (ret) - return ret; } + + /* Register CCF DAI clock control */ + ret = rt5682_register_dai_clks(component); + if (ret) + return ret; + /* Initial setup for CCF */ rt5682->lrck[RT5682_AIF1] = CLK_48; #endif From 96bf62f018f40cb5d4e4bed95e50fd990a2354af Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Fri, 12 Jun 2020 15:35:07 -0500 Subject: [PATCH 076/462] ASoC: soc-pcm: fix checks for multi-cpu FE dailinks soc_dpcm_fe_runtime_update() is called for all dailinks, and we want to first discard all back-ends, then deal with front-ends. The existing code first reports an error with multi-cpu front-ends, and that check needs to be moved after we know that we are dealing with a front-end. Fixes: 6e1276a5e613d ('ASoC: Return error if the function does not support multi-cpu') Signed-off-by: Pierre-Louis Bossart Reviewed-by: Ranjani Sridharan Reviewed-by: Bard Liao BugLink: https://github.com/thesofproject/linux/issues/1970 Link: https://lore.kernel.org/r/20200612203507.25621-1-pierre-louis.bossart@linux.intel.com Signed-off-by: Mark Brown --- sound/soc/soc-pcm.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sound/soc/soc-pcm.c b/sound/soc/soc-pcm.c index 2c114b4542ce..c517064f5391 100644 --- a/sound/soc/soc-pcm.c +++ b/sound/soc/soc-pcm.c @@ -2630,15 +2630,15 @@ static int soc_dpcm_fe_runtime_update(struct snd_soc_pcm_runtime *fe, int new) int count, paths; int ret; + if (!fe->dai_link->dynamic) + return 0; + if (fe->num_cpus > 1) { dev_err(fe->dev, "%s doesn't support Multi CPU yet\n", __func__); return -EINVAL; } - if (!fe->dai_link->dynamic) - return 0; - /* only check active links */ if (!snd_soc_dai_active(asoc_rtd_to_cpu(fe, 0))) return 0; From 4a95737440d426e93441d49d11abf4c6526d4666 Mon Sep 17 00:00:00 2001 From: Srinivas Kandagatla Date: Fri, 12 Jun 2020 13:37:10 +0100 Subject: [PATCH 077/462] ASoc: q6afe: add support to get port direction This patch adds support to q6afe_is_rx_port() to get direction of DSP BE dai port, this is useful for setting dailink directions correctly. Fixes: c25e295cd77b (ASoC: qcom: Add support to parse common audio device nodes) Reported-by: John Stultz Signed-off-by: Srinivas Kandagatla Reviewed-by: Vinod Koul Link: https://lore.kernel.org/r/20200612123711.29130-1-srinivas.kandagatla@linaro.org Signed-off-by: Mark Brown --- sound/soc/qcom/qdsp6/q6afe.c | 8 ++++++++ sound/soc/qcom/qdsp6/q6afe.h | 1 + 2 files changed, 9 insertions(+) diff --git a/sound/soc/qcom/qdsp6/q6afe.c b/sound/soc/qcom/qdsp6/q6afe.c index e0945f7a58c8..0ce4eb60f984 100644 --- a/sound/soc/qcom/qdsp6/q6afe.c +++ b/sound/soc/qcom/qdsp6/q6afe.c @@ -800,6 +800,14 @@ int q6afe_get_port_id(int index) } EXPORT_SYMBOL_GPL(q6afe_get_port_id); +int q6afe_is_rx_port(int index) +{ + if (index < 0 || index >= AFE_PORT_MAX) + return -EINVAL; + + return port_maps[index].is_rx; +} +EXPORT_SYMBOL_GPL(q6afe_is_rx_port); static int afe_apr_send_pkt(struct q6afe *afe, struct apr_pkt *pkt, struct q6afe_port *port) { diff --git a/sound/soc/qcom/qdsp6/q6afe.h b/sound/soc/qcom/qdsp6/q6afe.h index c7ed5422baff..1a0f80a14afe 100644 --- a/sound/soc/qcom/qdsp6/q6afe.h +++ b/sound/soc/qcom/qdsp6/q6afe.h @@ -198,6 +198,7 @@ int q6afe_port_start(struct q6afe_port *port); int q6afe_port_stop(struct q6afe_port *port); void q6afe_port_put(struct q6afe_port *port); int q6afe_get_port_id(int index); +int q6afe_is_rx_port(int index); void q6afe_hdmi_port_prepare(struct q6afe_port *port, struct q6afe_hdmi_cfg *cfg); void q6afe_slim_port_prepare(struct q6afe_port *port, From a2120089251f1fe221305e88df99af16f940e236 Mon Sep 17 00:00:00 2001 From: Srinivas Kandagatla Date: Fri, 12 Jun 2020 13:37:11 +0100 Subject: [PATCH 078/462] ASoC: qcom: common: set correct directions for dailinks Currently both FE and BE dai-links are configured bi-directional, However the DSP BE dais are only single directional, so set the directions as supported by the BE dais. Fixes: c25e295cd77b (ASoC: qcom: Add support to parse common audio device nodes) Reported-by: John Stultz Signed-off-by: Srinivas Kandagatla Tested-by: John Stultz Reviewed-by: Vinod Koul Link: https://lore.kernel.org/r/20200612123711.29130-2-srinivas.kandagatla@linaro.org Signed-off-by: Mark Brown --- sound/soc/qcom/common.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/sound/soc/qcom/common.c b/sound/soc/qcom/common.c index 6c20bdd850f3..8ada4ecba847 100644 --- a/sound/soc/qcom/common.c +++ b/sound/soc/qcom/common.c @@ -4,6 +4,7 @@ #include #include "common.h" +#include "qdsp6/q6afe.h" int qcom_snd_parse_of(struct snd_soc_card *card) { @@ -101,6 +102,15 @@ int qcom_snd_parse_of(struct snd_soc_card *card) } link->no_pcm = 1; link->ignore_pmdown_time = 1; + + if (q6afe_is_rx_port(link->id)) { + link->dpcm_playback = 1; + link->dpcm_capture = 0; + } else { + link->dpcm_playback = 0; + link->dpcm_capture = 1; + } + } else { dlc = devm_kzalloc(dev, sizeof(*dlc), GFP_KERNEL); if (!dlc) @@ -113,12 +123,12 @@ int qcom_snd_parse_of(struct snd_soc_card *card) link->codecs->dai_name = "snd-soc-dummy-dai"; link->codecs->name = "snd-soc-dummy"; link->dynamic = 1; + link->dpcm_playback = 1; + link->dpcm_capture = 1; } link->ignore_suspend = 1; link->nonatomic = 1; - link->dpcm_playback = 1; - link->dpcm_capture = 1; link->stream_name = link->name; link++; From e74a1e7eaed95f2c6422e7cf9ed70154f17a6db3 Mon Sep 17 00:00:00 2001 From: Jack Yu Date: Mon, 15 Jun 2020 11:24:32 +0800 Subject: [PATCH 079/462] ASoC: rt1015: Update rt1015 default register value according to spec modification. Update rt1015 default register value according to spec modification. Signed-off-by: Jack Yu Link: https://lore.kernel.org/r/20200615032433.31061-1-jack.yu@realtek.com Signed-off-by: Mark Brown --- sound/soc/codecs/rt1015.c | 124 +++++++++++++++++++++++--------------- sound/soc/codecs/rt1015.h | 15 ++++- 2 files changed, 89 insertions(+), 50 deletions(-) diff --git a/sound/soc/codecs/rt1015.c b/sound/soc/codecs/rt1015.c index 67e2e944d21b..2cccb310fa96 100644 --- a/sound/soc/codecs/rt1015.c +++ b/sound/soc/codecs/rt1015.c @@ -34,30 +34,32 @@ static const struct reg_default rt1015_reg[] = { { 0x0000, 0x0000 }, { 0x0004, 0xa000 }, { 0x0006, 0x0003 }, - { 0x000a, 0x0802 }, - { 0x000c, 0x0020 }, + { 0x000a, 0x081e }, + { 0x000c, 0x0006 }, { 0x000e, 0x0000 }, { 0x0010, 0x0000 }, { 0x0012, 0x0000 }, + { 0x0014, 0x0000 }, + { 0x0016, 0x0000 }, + { 0x0018, 0x0000 }, { 0x0020, 0x8000 }, - { 0x0022, 0x471b }, - { 0x006a, 0x0000 }, - { 0x006c, 0x4020 }, + { 0x0022, 0x8043 }, { 0x0076, 0x0000 }, { 0x0078, 0x0000 }, - { 0x007a, 0x0000 }, + { 0x007a, 0x0002 }, { 0x007c, 0x10ec }, { 0x007d, 0x1015 }, { 0x00f0, 0x5000 }, - { 0x00f2, 0x0774 }, - { 0x00f3, 0x8400 }, + { 0x00f2, 0x004c }, + { 0x00f3, 0xecfe }, { 0x00f4, 0x0000 }, + { 0x00f6, 0x0400 }, { 0x0100, 0x0028 }, { 0x0102, 0xff02 }, - { 0x0104, 0x8232 }, + { 0x0104, 0xa213 }, { 0x0106, 0x200c }, - { 0x010c, 0x002f }, - { 0x010e, 0xc000 }, + { 0x010c, 0x0000 }, + { 0x010e, 0x0058 }, { 0x0111, 0x0200 }, { 0x0112, 0x0400 }, { 0x0114, 0x0022 }, @@ -65,38 +67,46 @@ static const struct reg_default rt1015_reg[] = { { 0x0118, 0x0000 }, { 0x011a, 0x0123 }, { 0x011c, 0x4567 }, - { 0x0300, 0xdddd }, - { 0x0302, 0x0000 }, - { 0x0311, 0x9330 }, - { 0x0313, 0x0000 }, - { 0x0314, 0x0000 }, + { 0x0300, 0x203d }, + { 0x0302, 0x001e }, + { 0x0311, 0x0000 }, + { 0x0313, 0x6014 }, + { 0x0314, 0x00a2 }, { 0x031a, 0x00a0 }, { 0x031c, 0x001f }, { 0x031d, 0xffff }, { 0x031e, 0x0000 }, { 0x031f, 0x0000 }, + { 0x0320, 0x0000 }, { 0x0321, 0x0000 }, - { 0x0322, 0x0000 }, - { 0x0328, 0x0000 }, - { 0x0329, 0x0000 }, - { 0x032a, 0x0000 }, - { 0x032b, 0x0000 }, - { 0x032c, 0x0000 }, - { 0x032d, 0x0000 }, - { 0x032e, 0x030e }, - { 0x0330, 0x0080 }, + { 0x0322, 0xd7df }, + { 0x0328, 0x10b2 }, + { 0x0329, 0x0175 }, + { 0x032a, 0x36ad }, + { 0x032b, 0x7e55 }, + { 0x032c, 0x0520 }, + { 0x032d, 0xaa00 }, + { 0x032e, 0x570e }, + { 0x0330, 0xe180 }, { 0x0332, 0x0034 }, - { 0x0334, 0x0000 }, - { 0x0336, 0x0000 }, + { 0x0334, 0x0001 }, + { 0x0336, 0x0010 }, + { 0x0338, 0x0000 }, + { 0x04fa, 0x0030 }, + { 0x04fc, 0x35c8 }, + { 0x04fe, 0x0800 }, + { 0x0500, 0x0400 }, + { 0x0502, 0x1000 }, + { 0x0504, 0x0000 }, { 0x0506, 0x04ff }, - { 0x0508, 0x0030 }, - { 0x050a, 0x0018 }, - { 0x0519, 0x307f }, - { 0x051a, 0xffff }, - { 0x051b, 0x4000 }, + { 0x0508, 0x0010 }, + { 0x050a, 0x001a }, + { 0x0519, 0x1c68 }, + { 0x051a, 0x0ccc }, + { 0x051b, 0x0666 }, { 0x051d, 0x0000 }, { 0x051f, 0x0000 }, - { 0x0536, 0x1000 }, + { 0x0536, 0x061c }, { 0x0538, 0x0000 }, { 0x053a, 0x0000 }, { 0x053c, 0x0000 }, @@ -110,19 +120,18 @@ static const struct reg_default rt1015_reg[] = { { 0x0544, 0x0000 }, { 0x0568, 0x0000 }, { 0x056a, 0x0000 }, - { 0x1000, 0x0000 }, - { 0x1002, 0x6505 }, + { 0x1000, 0x0040 }, + { 0x1002, 0x5405 }, { 0x1006, 0x5515 }, - { 0x1007, 0x003f }, - { 0x1009, 0x770f }, - { 0x100a, 0x01ff }, - { 0x100c, 0x0000 }, + { 0x1007, 0x05f7 }, + { 0x1009, 0x0b0a }, + { 0x100a, 0x00ef }, { 0x100d, 0x0003 }, { 0x1010, 0xa433 }, { 0x1020, 0x0000 }, - { 0x1200, 0x3d02 }, - { 0x1202, 0x0813 }, - { 0x1204, 0x0211 }, + { 0x1200, 0x5a01 }, + { 0x1202, 0x6524 }, + { 0x1204, 0x1f00 }, { 0x1206, 0x0000 }, { 0x1208, 0x0000 }, { 0x120a, 0x0000 }, @@ -130,16 +139,16 @@ static const struct reg_default rt1015_reg[] = { { 0x120e, 0x0000 }, { 0x1210, 0x0000 }, { 0x1212, 0x0000 }, - { 0x1300, 0x0701 }, - { 0x1302, 0x12f9 }, - { 0x1304, 0x3405 }, + { 0x1300, 0x10a1 }, + { 0x1302, 0x12ff }, + { 0x1304, 0x0400 }, { 0x1305, 0x0844 }, - { 0x1306, 0x1611 }, + { 0x1306, 0x4611 }, { 0x1308, 0x555e }, { 0x130a, 0x0000 }, - { 0x130c, 0x2400}, - { 0x130e, 0x7700 }, - { 0x130f, 0x0000 }, + { 0x130c, 0x2000 }, + { 0x130e, 0x0100 }, + { 0x130f, 0x0001 }, { 0x1310, 0x0000 }, { 0x1312, 0x0000 }, { 0x1314, 0x0000 }, @@ -209,6 +218,9 @@ static bool rt1015_volatile_register(struct device *dev, unsigned int reg) case RT1015_DC_CALIB_CLSD7: case RT1015_DC_CALIB_CLSD8: case RT1015_S_BST_TIMING_INTER1: + case RT1015_OSCK_STA: + case RT1015_MONO_DYNA_CTRL1: + case RT1015_MONO_DYNA_CTRL5: return true; default: @@ -224,6 +236,12 @@ static bool rt1015_readable_register(struct device *dev, unsigned int reg) case RT1015_CLK3: case RT1015_PLL1: case RT1015_PLL2: + case RT1015_DUM_RW1: + case RT1015_DUM_RW2: + case RT1015_DUM_RW3: + case RT1015_DUM_RW4: + case RT1015_DUM_RW5: + case RT1015_DUM_RW6: case RT1015_CLK_DET: case RT1015_SIL_DET: case RT1015_CUSTOMER_ID: @@ -235,6 +253,7 @@ static bool rt1015_readable_register(struct device *dev, unsigned int reg) case RT1015_PAD_DRV2: case RT1015_GAT_BOOST: case RT1015_PRO_ALT: + case RT1015_OSCK_STA: case RT1015_MAN_I2C: case RT1015_DAC1: case RT1015_DAC2: @@ -272,6 +291,13 @@ static bool rt1015_readable_register(struct device *dev, unsigned int reg) case RT1015_SMART_BST_CTRL2: case RT1015_ANA_CTRL1: case RT1015_ANA_CTRL2: + case RT1015_PWR_STATE_CTRL: + case RT1015_MONO_DYNA_CTRL: + case RT1015_MONO_DYNA_CTRL1: + case RT1015_MONO_DYNA_CTRL2: + case RT1015_MONO_DYNA_CTRL3: + case RT1015_MONO_DYNA_CTRL4: + case RT1015_MONO_DYNA_CTRL5: case RT1015_SPK_VOL: case RT1015_SHORT_DETTOP1: case RT1015_SHORT_DETTOP2: diff --git a/sound/soc/codecs/rt1015.h b/sound/soc/codecs/rt1015.h index 6fbe802082c4..8169962935a5 100644 --- a/sound/soc/codecs/rt1015.h +++ b/sound/soc/codecs/rt1015.h @@ -21,6 +21,12 @@ #define RT1015_CLK3 0x0006 #define RT1015_PLL1 0x000a #define RT1015_PLL2 0x000c +#define RT1015_DUM_RW1 0x000e +#define RT1015_DUM_RW2 0x0010 +#define RT1015_DUM_RW3 0x0012 +#define RT1015_DUM_RW4 0x0014 +#define RT1015_DUM_RW5 0x0016 +#define RT1015_DUM_RW6 0x0018 #define RT1015_CLK_DET 0x0020 #define RT1015_SIL_DET 0x0022 #define RT1015_CUSTOMER_ID 0x0076 @@ -32,6 +38,7 @@ #define RT1015_PAD_DRV2 0x00f2 #define RT1015_GAT_BOOST 0x00f3 #define RT1015_PRO_ALT 0x00f4 +#define RT1015_OSCK_STA 0x00f6 #define RT1015_MAN_I2C 0x0100 #define RT1015_DAC1 0x0102 #define RT1015_DAC2 0x0104 @@ -70,7 +77,13 @@ #define RT1015_ANA_CTRL1 0x0334 #define RT1015_ANA_CTRL2 0x0336 #define RT1015_PWR_STATE_CTRL 0x0338 -#define RT1015_SPK_VOL 0x0506 +#define RT1015_MONO_DYNA_CTRL 0x04fa +#define RT1015_MONO_DYNA_CTRL1 0x04fc +#define RT1015_MONO_DYNA_CTRL2 0x04fe +#define RT1015_MONO_DYNA_CTRL3 0x0500 +#define RT1015_MONO_DYNA_CTRL4 0x0502 +#define RT1015_MONO_DYNA_CTRL5 0x0504 +#define RT1015_SPK_VOL 0x0506 #define RT1015_SHORT_DETTOP1 0x0508 #define RT1015_SHORT_DETTOP2 0x050a #define RT1015_SPK_DC_DETECT1 0x0519 From 40e2c465894e5b79b49f55d9574dbcda4ac0f08f Mon Sep 17 00:00:00 2001 From: Brent Lu Date: Fri, 12 Jun 2020 18:50:48 +0800 Subject: [PATCH 080/462] ASoC: SOF: Intel: hda: Clear RIRB status before reading WP Port commit 6d011d5057ff ("ALSA: hda: Clear RIRB status before reading WP") from legacy HDA driver to fix the get response timeout issue. Current SOF driver does not suffer from this issue because sync write is enabled in hda_init. The issue will come back if the sync write is disabled for some reason. Signed-off-by: Brent Lu Reviewed-by: Takashi Iwai Reviewed-by: Kai Vehmanen Reviewed-by: Pierre-Louis Bossart Link: https://lore.kernel.org/r/1591959048-15813-1-git-send-email-brent.lu@intel.com Signed-off-by: Mark Brown --- sound/soc/sof/intel/hda-stream.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/sound/soc/sof/intel/hda-stream.c b/sound/soc/sof/intel/hda-stream.c index 7f65dcc95811..1bda14c3590c 100644 --- a/sound/soc/sof/intel/hda-stream.c +++ b/sound/soc/sof/intel/hda-stream.c @@ -653,11 +653,16 @@ irqreturn_t hda_dsp_stream_threaded_handler(int irq, void *context) if (status & AZX_INT_CTRL_EN) { rirb_status = snd_hdac_chip_readb(bus, RIRBSTS); if (rirb_status & RIRB_INT_MASK) { + /* + * Clearing the interrupt status here ensures + * that no interrupt gets masked after the RIRB + * wp is read in snd_hdac_bus_update_rirb. + */ + snd_hdac_chip_writeb(bus, RIRBSTS, + RIRB_INT_MASK); active = true; if (rirb_status & RIRB_INT_RESPONSE) snd_hdac_bus_update_rirb(bus); - snd_hdac_chip_writeb(bus, RIRBSTS, - RIRB_INT_MASK); } } #endif From a8a5e383cf41c3852621f8cdfb9141c77b004cdd Mon Sep 17 00:00:00 2001 From: Baolin Wang Date: Mon, 15 Jun 2020 17:12:23 +0800 Subject: [PATCH 081/462] blk-mq: Remove redundant 'return' statement The blk_mq_all_tag_iter() is a void function, thus remove the redundant 'return' statement in this function. Signed-off-by: Baolin Wang Reviewed-by: Ming Lei Signed-off-by: Jens Axboe --- block/blk-mq-tag.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c index 44f3d0967cb4..ae722f8b13fb 100644 --- a/block/blk-mq-tag.c +++ b/block/blk-mq-tag.c @@ -376,7 +376,7 @@ static void __blk_mq_all_tag_iter(struct blk_mq_tags *tags, void blk_mq_all_tag_iter(struct blk_mq_tags *tags, busy_tag_iter_fn *fn, void *priv) { - return __blk_mq_all_tag_iter(tags, fn, priv, BT_TAG_ITER_STATIC_RQS); + __blk_mq_all_tag_iter(tags, fn, priv, BT_TAG_ITER_STATIC_RQS); } /** From 59960b9deb5354e4cdb0b6ed3a3b653a2b4eb602 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Mon, 15 Jun 2020 16:36:30 +0300 Subject: [PATCH 082/462] io_uring: fix lazy work init Don't leave garbage in req.work before punting async on -EAGAIN in io_iopoll_queue(). [ 140.922099] general protection fault, probably for non-canonical address 0xdead000000000100: 0000 [#1] PREEMPT SMP PTI ... [ 140.922105] RIP: 0010:io_worker_handle_work+0x1db/0x480 ... [ 140.922114] Call Trace: [ 140.922118] ? __next_timer_interrupt+0xe0/0xe0 [ 140.922119] io_wqe_worker+0x2a9/0x360 [ 140.922121] ? _raw_spin_unlock_irqrestore+0x24/0x40 [ 140.922124] kthread+0x12c/0x170 [ 140.922125] ? io_worker_handle_work+0x480/0x480 [ 140.922126] ? kthread_park+0x90/0x90 [ 140.922127] ret_from_fork+0x22/0x30 Fixes: 7cdaf587de7c ("io_uring: avoid whole io_wq_work copy for requests completed inline") Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- fs/io_uring.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/io_uring.c b/fs/io_uring.c index 155f3d830ddb..c04b20bf2803 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -1087,6 +1087,7 @@ static inline void io_prep_async_work(struct io_kiocb *req, req->work.flags |= IO_WQ_WORK_UNBOUND; } + io_req_init_async(req); io_req_work_grab_env(req, def); *link = io_prep_linked_timeout(req); From 6f1cf5257acc6e6242ddf2f52bc7912aed77b79f Mon Sep 17 00:00:00 2001 From: Robin Gong Date: Mon, 15 Jun 2020 05:54:08 +0800 Subject: [PATCH 083/462] regualtor: pfuze100: correct sw1a/sw2 on pfuze3000 PFUZE100_SWB_REG is not proper for sw1a/sw2, because enable_mask/enable_reg is not correct. On PFUZE3000, sw1a/sw2 should be the same as sw1a/sw2 on pfuze100 except that voltages are not linear, so add new PFUZE3000_SW_REG and pfuze3000_sw_regulator_ops which like the non-linear PFUZE100_SW_REG and pfuze100_sw_regulator_ops. Fixes: 1dced996ee70 ("regulator: pfuze100: update voltage setting for pfuze3000 sw1a") Reported-by: Christophe Meynard Signed-off-by: Robin Gong Link: https://lore.kernel.org/r/1592171648-8752-1-git-send-email-yibin.gong@nxp.com Signed-off-by: Mark Brown --- drivers/regulator/pfuze100-regulator.c | 60 +++++++++++++++++--------- 1 file changed, 39 insertions(+), 21 deletions(-) diff --git a/drivers/regulator/pfuze100-regulator.c b/drivers/regulator/pfuze100-regulator.c index 689537927f6f..4c8e8b472287 100644 --- a/drivers/regulator/pfuze100-regulator.c +++ b/drivers/regulator/pfuze100-regulator.c @@ -209,6 +209,19 @@ static const struct regulator_ops pfuze100_swb_regulator_ops = { }; +static const struct regulator_ops pfuze3000_sw_regulator_ops = { + .enable = regulator_enable_regmap, + .disable = regulator_disable_regmap, + .is_enabled = regulator_is_enabled_regmap, + .list_voltage = regulator_list_voltage_table, + .map_voltage = regulator_map_voltage_ascend, + .set_voltage_sel = regulator_set_voltage_sel_regmap, + .get_voltage_sel = regulator_get_voltage_sel_regmap, + .set_voltage_time_sel = regulator_set_voltage_time_sel, + .set_ramp_delay = pfuze100_set_ramp_delay, + +}; + #define PFUZE100_FIXED_REG(_chip, _name, base, voltage) \ [_chip ## _ ## _name] = { \ .desc = { \ @@ -318,23 +331,28 @@ static const struct regulator_ops pfuze100_swb_regulator_ops = { .stby_mask = 0x20, \ } - -#define PFUZE3000_SW2_REG(_chip, _name, base, min, max, step) { \ - .desc = { \ - .name = #_name,\ - .n_voltages = ((max) - (min)) / (step) + 1, \ - .ops = &pfuze100_sw_regulator_ops, \ - .type = REGULATOR_VOLTAGE, \ - .id = _chip ## _ ## _name, \ - .owner = THIS_MODULE, \ - .min_uV = (min), \ - .uV_step = (step), \ - .vsel_reg = (base) + PFUZE100_VOL_OFFSET, \ - .vsel_mask = 0x7, \ - }, \ - .stby_reg = (base) + PFUZE100_STANDBY_OFFSET, \ - .stby_mask = 0x7, \ -} +/* No linar case for the some switches of PFUZE3000 */ +#define PFUZE3000_SW_REG(_chip, _name, base, mask, voltages) \ + [_chip ## _ ## _name] = { \ + .desc = { \ + .name = #_name, \ + .n_voltages = ARRAY_SIZE(voltages), \ + .ops = &pfuze3000_sw_regulator_ops, \ + .type = REGULATOR_VOLTAGE, \ + .id = _chip ## _ ## _name, \ + .owner = THIS_MODULE, \ + .volt_table = voltages, \ + .vsel_reg = (base) + PFUZE100_VOL_OFFSET, \ + .vsel_mask = (mask), \ + .enable_reg = (base) + PFUZE100_MODE_OFFSET, \ + .enable_mask = 0xf, \ + .enable_val = 0x8, \ + .enable_time = 500, \ + }, \ + .stby_reg = (base) + PFUZE100_STANDBY_OFFSET, \ + .stby_mask = (mask), \ + .sw_reg = true, \ + } #define PFUZE3000_SW3_REG(_chip, _name, base, min, max, step) { \ .desc = { \ @@ -391,9 +409,9 @@ static struct pfuze_regulator pfuze200_regulators[] = { }; static struct pfuze_regulator pfuze3000_regulators[] = { - PFUZE100_SWB_REG(PFUZE3000, SW1A, PFUZE100_SW1ABVOL, 0x1f, pfuze3000_sw1a), + PFUZE3000_SW_REG(PFUZE3000, SW1A, PFUZE100_SW1ABVOL, 0x1f, pfuze3000_sw1a), PFUZE100_SW_REG(PFUZE3000, SW1B, PFUZE100_SW1CVOL, 700000, 1475000, 25000), - PFUZE100_SWB_REG(PFUZE3000, SW2, PFUZE100_SW2VOL, 0x7, pfuze3000_sw2lo), + PFUZE3000_SW_REG(PFUZE3000, SW2, PFUZE100_SW2VOL, 0x7, pfuze3000_sw2lo), PFUZE3000_SW3_REG(PFUZE3000, SW3, PFUZE100_SW3AVOL, 900000, 1650000, 50000), PFUZE100_SWB_REG(PFUZE3000, SWBST, PFUZE100_SWBSTCON1, 0x3, pfuze100_swbst), PFUZE100_SWB_REG(PFUZE3000, VSNVS, PFUZE100_VSNVSVOL, 0x7, pfuze100_vsnvs), @@ -407,8 +425,8 @@ static struct pfuze_regulator pfuze3000_regulators[] = { }; static struct pfuze_regulator pfuze3001_regulators[] = { - PFUZE100_SWB_REG(PFUZE3001, SW1, PFUZE100_SW1ABVOL, 0x1f, pfuze3000_sw1a), - PFUZE100_SWB_REG(PFUZE3001, SW2, PFUZE100_SW2VOL, 0x7, pfuze3000_sw2lo), + PFUZE3000_SW_REG(PFUZE3001, SW1, PFUZE100_SW1ABVOL, 0x1f, pfuze3000_sw1a), + PFUZE3000_SW_REG(PFUZE3001, SW2, PFUZE100_SW2VOL, 0x7, pfuze3000_sw2lo), PFUZE3000_SW3_REG(PFUZE3001, SW3, PFUZE100_SW3AVOL, 900000, 1650000, 50000), PFUZE100_SWB_REG(PFUZE3001, VSNVS, PFUZE100_VSNVSVOL, 0x7, pfuze100_vsnvs), PFUZE100_VGEN_REG(PFUZE3001, VLDO1, PFUZE100_VGEN1VOL, 1800000, 3300000, 100000), From f4c2665e33f48904f2766d644df33fb3fd54b5ec Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Mon, 15 Jun 2020 10:24:02 +0300 Subject: [PATCH 084/462] io-wq: reorder cancellation pending -> running Go all over all pending lists and cancel works there, and only then try to match running requests. No functional changes here, just a preparation for bulk cancellation. Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- fs/io-wq.c | 54 ++++++++++++++++++++++++++++++++---------------------- 1 file changed, 32 insertions(+), 22 deletions(-) diff --git a/fs/io-wq.c b/fs/io-wq.c index 0b65a912b036..03c7e37548c2 100644 --- a/fs/io-wq.c +++ b/fs/io-wq.c @@ -927,19 +927,14 @@ static bool io_wq_worker_cancel(struct io_worker *worker, void *data) return ret; } -static enum io_wq_cancel io_wqe_cancel_work(struct io_wqe *wqe, - struct io_cb_cancel_data *match) +static bool io_wqe_cancel_pending_work(struct io_wqe *wqe, + struct io_cb_cancel_data *match) { struct io_wq_work_node *node, *prev; struct io_wq_work *work; unsigned long flags; bool found = false; - /* - * First check pending list, if we're lucky we can just remove it - * from there. CANCEL_OK means that the work is returned as-new, - * no completion will be posted for it. - */ spin_lock_irqsave(&wqe->lock, flags); wq_list_for_each(node, prev, &wqe->work_list) { work = container_of(node, struct io_wq_work, list); @@ -952,21 +947,20 @@ static enum io_wq_cancel io_wqe_cancel_work(struct io_wqe *wqe, } spin_unlock_irqrestore(&wqe->lock, flags); - if (found) { + if (found) io_run_cancel(work, wqe); - return IO_WQ_CANCEL_OK; - } + return found; +} + +static bool io_wqe_cancel_running_work(struct io_wqe *wqe, + struct io_cb_cancel_data *match) +{ + bool found; - /* - * Now check if a free (going busy) or busy worker has the work - * currently running. If we find it there, we'll return CANCEL_RUNNING - * as an indication that we attempt to signal cancellation. The - * completion will run normally in this case. - */ rcu_read_lock(); found = io_wq_for_each_worker(wqe, io_wq_worker_cancel, match); rcu_read_unlock(); - return found ? IO_WQ_CANCEL_RUNNING : IO_WQ_CANCEL_NOTFOUND; + return found; } enum io_wq_cancel io_wq_cancel_cb(struct io_wq *wq, work_cancel_fn *cancel, @@ -976,18 +970,34 @@ enum io_wq_cancel io_wq_cancel_cb(struct io_wq *wq, work_cancel_fn *cancel, .fn = cancel, .data = data, }; - enum io_wq_cancel ret = IO_WQ_CANCEL_NOTFOUND; int node; + /* + * First check pending list, if we're lucky we can just remove it + * from there. CANCEL_OK means that the work is returned as-new, + * no completion will be posted for it. + */ for_each_node(node) { struct io_wqe *wqe = wq->wqes[node]; - ret = io_wqe_cancel_work(wqe, &match); - if (ret != IO_WQ_CANCEL_NOTFOUND) - break; + if (io_wqe_cancel_pending_work(wqe, &match)) + return IO_WQ_CANCEL_OK; } - return ret; + /* + * Now check if a free (going busy) or busy worker has the work + * currently running. If we find it there, we'll return CANCEL_RUNNING + * as an indication that we attempt to signal cancellation. The + * completion will run normally in this case. + */ + for_each_node(node) { + struct io_wqe *wqe = wq->wqes[node]; + + if (io_wqe_cancel_running_work(wqe, &match)) + return IO_WQ_CANCEL_RUNNING; + } + + return IO_WQ_CANCEL_NOTFOUND; } static bool io_wq_io_cb_cancel_data(struct io_wq_work *work, void *data) From 4f26bda1522c35d2701fc219368c7101c17005c1 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Mon, 15 Jun 2020 10:24:03 +0300 Subject: [PATCH 085/462] io-wq: add an option to cancel all matched reqs This adds support for cancelling all io-wq works matching a predicate. It isn't used yet, so no change in observable behaviour. Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- fs/io-wq.c | 60 +++++++++++++++++++++++++++++---------------------- fs/io-wq.h | 2 +- fs/io_uring.c | 2 +- 3 files changed, 36 insertions(+), 28 deletions(-) diff --git a/fs/io-wq.c b/fs/io-wq.c index 03c7e37548c2..e290202d6f64 100644 --- a/fs/io-wq.c +++ b/fs/io-wq.c @@ -903,13 +903,15 @@ void io_wq_cancel_all(struct io_wq *wq) struct io_cb_cancel_data { work_cancel_fn *fn; void *data; + int nr_running; + int nr_pending; + bool cancel_all; }; static bool io_wq_worker_cancel(struct io_worker *worker, void *data) { struct io_cb_cancel_data *match = data; unsigned long flags; - bool ret = false; /* * Hold the lock to avoid ->cur_work going out of scope, caller @@ -920,55 +922,55 @@ static bool io_wq_worker_cancel(struct io_worker *worker, void *data) !(worker->cur_work->flags & IO_WQ_WORK_NO_CANCEL) && match->fn(worker->cur_work, match->data)) { send_sig(SIGINT, worker->task, 1); - ret = true; + match->nr_running++; } spin_unlock_irqrestore(&worker->lock, flags); - return ret; + return match->nr_running && !match->cancel_all; } -static bool io_wqe_cancel_pending_work(struct io_wqe *wqe, +static void io_wqe_cancel_pending_work(struct io_wqe *wqe, struct io_cb_cancel_data *match) { struct io_wq_work_node *node, *prev; struct io_wq_work *work; unsigned long flags; - bool found = false; +retry: spin_lock_irqsave(&wqe->lock, flags); wq_list_for_each(node, prev, &wqe->work_list) { work = container_of(node, struct io_wq_work, list); + if (!match->fn(work, match->data)) + continue; - if (match->fn(work, match->data)) { - wq_list_del(&wqe->work_list, node, prev); - found = true; - break; - } + wq_list_del(&wqe->work_list, node, prev); + spin_unlock_irqrestore(&wqe->lock, flags); + io_run_cancel(work, wqe); + match->nr_pending++; + if (!match->cancel_all) + return; + + /* not safe to continue after unlock */ + goto retry; } spin_unlock_irqrestore(&wqe->lock, flags); - - if (found) - io_run_cancel(work, wqe); - return found; } -static bool io_wqe_cancel_running_work(struct io_wqe *wqe, +static void io_wqe_cancel_running_work(struct io_wqe *wqe, struct io_cb_cancel_data *match) { - bool found; - rcu_read_lock(); - found = io_wq_for_each_worker(wqe, io_wq_worker_cancel, match); + io_wq_for_each_worker(wqe, io_wq_worker_cancel, match); rcu_read_unlock(); - return found; } enum io_wq_cancel io_wq_cancel_cb(struct io_wq *wq, work_cancel_fn *cancel, - void *data) + void *data, bool cancel_all) { struct io_cb_cancel_data match = { - .fn = cancel, - .data = data, + .fn = cancel, + .data = data, + .cancel_all = cancel_all, }; int node; @@ -980,7 +982,8 @@ enum io_wq_cancel io_wq_cancel_cb(struct io_wq *wq, work_cancel_fn *cancel, for_each_node(node) { struct io_wqe *wqe = wq->wqes[node]; - if (io_wqe_cancel_pending_work(wqe, &match)) + io_wqe_cancel_pending_work(wqe, &match); + if (match.nr_pending && !match.cancel_all) return IO_WQ_CANCEL_OK; } @@ -993,10 +996,15 @@ enum io_wq_cancel io_wq_cancel_cb(struct io_wq *wq, work_cancel_fn *cancel, for_each_node(node) { struct io_wqe *wqe = wq->wqes[node]; - if (io_wqe_cancel_running_work(wqe, &match)) + io_wqe_cancel_running_work(wqe, &match); + if (match.nr_running && !match.cancel_all) return IO_WQ_CANCEL_RUNNING; } + if (match.nr_running) + return IO_WQ_CANCEL_RUNNING; + if (match.nr_pending) + return IO_WQ_CANCEL_OK; return IO_WQ_CANCEL_NOTFOUND; } @@ -1007,7 +1015,7 @@ static bool io_wq_io_cb_cancel_data(struct io_wq_work *work, void *data) enum io_wq_cancel io_wq_cancel_work(struct io_wq *wq, struct io_wq_work *cwork) { - return io_wq_cancel_cb(wq, io_wq_io_cb_cancel_data, (void *)cwork); + return io_wq_cancel_cb(wq, io_wq_io_cb_cancel_data, (void *)cwork, false); } static bool io_wq_pid_match(struct io_wq_work *work, void *data) @@ -1021,7 +1029,7 @@ enum io_wq_cancel io_wq_cancel_pid(struct io_wq *wq, pid_t pid) { void *data = (void *) (unsigned long) pid; - return io_wq_cancel_cb(wq, io_wq_pid_match, data); + return io_wq_cancel_cb(wq, io_wq_pid_match, data, false); } struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data) diff --git a/fs/io-wq.h b/fs/io-wq.h index 8e138fa88b9f..7d5bd431c5e3 100644 --- a/fs/io-wq.h +++ b/fs/io-wq.h @@ -130,7 +130,7 @@ enum io_wq_cancel io_wq_cancel_pid(struct io_wq *wq, pid_t pid); typedef bool (work_cancel_fn)(struct io_wq_work *, void *); enum io_wq_cancel io_wq_cancel_cb(struct io_wq *wq, work_cancel_fn *cancel, - void *data); + void *data, bool cancel_all); struct task_struct *io_wq_get_task(struct io_wq *wq); diff --git a/fs/io_uring.c b/fs/io_uring.c index c04b20bf2803..94bd88556c1e 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -4773,7 +4773,7 @@ static int io_async_cancel_one(struct io_ring_ctx *ctx, void *sqe_addr) enum io_wq_cancel cancel_ret; int ret = 0; - cancel_ret = io_wq_cancel_cb(ctx->io_wq, io_cancel_cb, sqe_addr); + cancel_ret = io_wq_cancel_cb(ctx->io_wq, io_cancel_cb, sqe_addr, false); switch (cancel_ret) { case IO_WQ_CANCEL_OK: ret = 0; From 44e728b8aae0bb6d4229129083974f9dea43f50b Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Mon, 15 Jun 2020 10:24:04 +0300 Subject: [PATCH 086/462] io_uring: cancel all task's requests on exit If a process is going away, io_uring_flush() will cancel only 1 request with a matching pid. Cancel all of them Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- fs/io-wq.c | 14 -------------- fs/io-wq.h | 1 - fs/io_uring.c | 14 ++++++++++++-- 3 files changed, 12 insertions(+), 17 deletions(-) diff --git a/fs/io-wq.c b/fs/io-wq.c index e290202d6f64..47c5f3aeb460 100644 --- a/fs/io-wq.c +++ b/fs/io-wq.c @@ -1018,20 +1018,6 @@ enum io_wq_cancel io_wq_cancel_work(struct io_wq *wq, struct io_wq_work *cwork) return io_wq_cancel_cb(wq, io_wq_io_cb_cancel_data, (void *)cwork, false); } -static bool io_wq_pid_match(struct io_wq_work *work, void *data) -{ - pid_t pid = (pid_t) (unsigned long) data; - - return work->task_pid == pid; -} - -enum io_wq_cancel io_wq_cancel_pid(struct io_wq *wq, pid_t pid) -{ - void *data = (void *) (unsigned long) pid; - - return io_wq_cancel_cb(wq, io_wq_pid_match, data, false); -} - struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data) { int ret = -ENOMEM, node; diff --git a/fs/io-wq.h b/fs/io-wq.h index 7d5bd431c5e3..b72538fe5afd 100644 --- a/fs/io-wq.h +++ b/fs/io-wq.h @@ -125,7 +125,6 @@ static inline bool io_wq_is_hashed(struct io_wq_work *work) void io_wq_cancel_all(struct io_wq *wq); enum io_wq_cancel io_wq_cancel_work(struct io_wq *wq, struct io_wq_work *cwork); -enum io_wq_cancel io_wq_cancel_pid(struct io_wq *wq, pid_t pid); typedef bool (work_cancel_fn)(struct io_wq_work *, void *); diff --git a/fs/io_uring.c b/fs/io_uring.c index 94bd88556c1e..ad5128a40c14 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -7424,6 +7424,13 @@ static void io_uring_cancel_files(struct io_ring_ctx *ctx, } } +static bool io_cancel_pid_cb(struct io_wq_work *work, void *data) +{ + pid_t pid = (pid_t) (unsigned long) data; + + return work->task_pid == pid; +} + static int io_uring_flush(struct file *file, void *data) { struct io_ring_ctx *ctx = file->private_data; @@ -7433,8 +7440,11 @@ static int io_uring_flush(struct file *file, void *data) /* * If the task is going away, cancel work it may have pending */ - if (fatal_signal_pending(current) || (current->flags & PF_EXITING)) - io_wq_cancel_pid(ctx->io_wq, task_pid_vnr(current)); + if (fatal_signal_pending(current) || (current->flags & PF_EXITING)) { + void *data = (void *) (unsigned long)task_pid_vnr(current); + + io_wq_cancel_cb(ctx->io_wq, io_cancel_pid_cb, data, true); + } return 0; } From 67c4d9e693e3bb7fb968af24e3584f821a78ba56 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Mon, 15 Jun 2020 10:24:05 +0300 Subject: [PATCH 087/462] io_uring: batch cancel in io_uring_cancel_files() Instead of waiting for each request one by one, first try to cancel all of them in a batched manner, and then go over inflight_list/etc to reap leftovers. Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- fs/io_uring.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/fs/io_uring.c b/fs/io_uring.c index ad5128a40c14..b6bcd5a7f4bc 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -7366,9 +7366,22 @@ static int io_uring_release(struct inode *inode, struct file *file) return 0; } +static bool io_wq_files_match(struct io_wq_work *work, void *data) +{ + struct files_struct *files = data; + + return work->files == files; +} + static void io_uring_cancel_files(struct io_ring_ctx *ctx, struct files_struct *files) { + if (list_empty_careful(&ctx->inflight_list)) + return; + + /* cancel all at once, should be faster than doing it one by one*/ + io_wq_cancel_cb(ctx->io_wq, io_wq_files_match, files, true); + while (!list_empty_careful(&ctx->inflight_list)) { struct io_kiocb *cancel_req = NULL, *req; DEFINE_WAIT(wait); From 4dd2824d6d5914949b5fe589538bc2622d84c5dd Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Mon, 15 Jun 2020 10:33:13 +0300 Subject: [PATCH 088/462] io_uring: lazy get task There will be multiple places where req->task is used, so refcount-pin it lazily with introduced *io_{get,put}_req_task(). We need to always have valid ->task for cancellation reasons, but don't care about pinning it in some cases. That's why it sets req->task in io_req_init() and implements get/put laziness with a flag. This also removes using @current from polling io_arm_poll_handler(), etc., but doesn't change observable behaviour. Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- fs/io_uring.c | 30 ++++++++++++++++++++++-------- 1 file changed, 22 insertions(+), 8 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index b6bcd5a7f4bc..5f946eb8b740 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -541,6 +541,7 @@ enum { REQ_F_NO_FILE_TABLE_BIT, REQ_F_QUEUE_TIMEOUT_BIT, REQ_F_WORK_INITIALIZED_BIT, + REQ_F_TASK_PINNED_BIT, /* not a real bit, just to check we're not overflowing the space */ __REQ_F_LAST_BIT, @@ -598,6 +599,8 @@ enum { REQ_F_QUEUE_TIMEOUT = BIT(REQ_F_QUEUE_TIMEOUT_BIT), /* io_wq_work is initialized */ REQ_F_WORK_INITIALIZED = BIT(REQ_F_WORK_INITIALIZED_BIT), + /* req->task is refcounted */ + REQ_F_TASK_PINNED = BIT(REQ_F_TASK_PINNED_BIT), }; struct async_poll { @@ -910,6 +913,21 @@ struct sock *io_uring_get_socket(struct file *file) } EXPORT_SYMBOL(io_uring_get_socket); +static void io_get_req_task(struct io_kiocb *req) +{ + if (req->flags & REQ_F_TASK_PINNED) + return; + get_task_struct(req->task); + req->flags |= REQ_F_TASK_PINNED; +} + +/* not idempotent -- it doesn't clear REQ_F_TASK_PINNED */ +static void __io_put_req_task(struct io_kiocb *req) +{ + if (req->flags & REQ_F_TASK_PINNED) + put_task_struct(req->task); +} + static void io_file_put_work(struct work_struct *work); /* @@ -1399,9 +1417,7 @@ static void __io_req_aux_free(struct io_kiocb *req) kfree(req->io); if (req->file) io_put_file(req, req->file, (req->flags & REQ_F_FIXED_FILE)); - if (req->task) - put_task_struct(req->task); - + __io_put_req_task(req); io_req_work_drop_env(req); } @@ -4367,8 +4383,7 @@ static bool io_arm_poll_handler(struct io_kiocb *req) memcpy(&apoll->work, &req->work, sizeof(req->work)); had_io = req->io != NULL; - get_task_struct(current); - req->task = current; + io_get_req_task(req); req->apoll = apoll; INIT_HLIST_NODE(&req->hash_node); @@ -4556,8 +4571,7 @@ static int io_poll_add_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe events = READ_ONCE(sqe->poll_events); poll->events = demangle_poll(events) | EPOLLERR | EPOLLHUP; - get_task_struct(current); - req->task = current; + io_get_req_task(req); return 0; } @@ -5818,7 +5832,7 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req, req->flags = 0; /* one is dropped after submission, the other at completion */ refcount_set(&req->refs, 2); - req->task = NULL; + req->task = current; req->result = 0; if (unlikely(req->opcode >= IORING_OP_LAST)) From 801dd57bd1d8c2c253f43635a3045bfa32a810b3 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Mon, 15 Jun 2020 10:33:14 +0300 Subject: [PATCH 089/462] io_uring: cancel by ->task not pid For an exiting process it tries to cancel all its inflight requests. Use req->task to match such instead of work.pid. We always have req->task set, and it will be valid because we're matching only current exiting task. Also, remove work.pid and everything related, it's useless now. Reported-by: Eric W. Biederman Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- fs/io-wq.h | 1 - fs/io_uring.c | 16 ++++++---------- 2 files changed, 6 insertions(+), 11 deletions(-) diff --git a/fs/io-wq.h b/fs/io-wq.h index b72538fe5afd..071f1a997800 100644 --- a/fs/io-wq.h +++ b/fs/io-wq.h @@ -90,7 +90,6 @@ struct io_wq_work { const struct cred *creds; struct fs_struct *fs; unsigned flags; - pid_t task_pid; }; static inline struct io_wq_work *wq_next_work(struct io_wq_work *work) diff --git a/fs/io_uring.c b/fs/io_uring.c index 5f946eb8b740..e17df662c191 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -1063,8 +1063,6 @@ static inline void io_req_work_grab_env(struct io_kiocb *req, } spin_unlock(¤t->fs->lock); } - if (!req->work.task_pid) - req->work.task_pid = task_pid_vnr(current); } static inline void io_req_work_drop_env(struct io_kiocb *req) @@ -7451,11 +7449,12 @@ static void io_uring_cancel_files(struct io_ring_ctx *ctx, } } -static bool io_cancel_pid_cb(struct io_wq_work *work, void *data) +static bool io_cancel_task_cb(struct io_wq_work *work, void *data) { - pid_t pid = (pid_t) (unsigned long) data; + struct io_kiocb *req = container_of(work, struct io_kiocb, work); + struct task_struct *task = data; - return work->task_pid == pid; + return req->task == task; } static int io_uring_flush(struct file *file, void *data) @@ -7467,11 +7466,8 @@ static int io_uring_flush(struct file *file, void *data) /* * If the task is going away, cancel work it may have pending */ - if (fatal_signal_pending(current) || (current->flags & PF_EXITING)) { - void *data = (void *) (unsigned long)task_pid_vnr(current); - - io_wq_cancel_cb(ctx->io_wq, io_cancel_pid_cb, data, true); - } + if (fatal_signal_pending(current) || (current->flags & PF_EXITING)) + io_wq_cancel_cb(ctx->io_wq, io_cancel_task_cb, current, true); return 0; } From 27784a256c2a453d891c0aaff84c3ac3f2e8a5a0 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Sat, 13 Jun 2020 09:37:54 +0200 Subject: [PATCH 090/462] spi: uapi: spidev: Use TABs for alignment The UAPI uses TABs for alignment. Convert the recently introduced spaces to TABs to restore consistency. Fixes: 7bb64402a092136 ("spi: tools: Add macro definitions to fix build errors") Signed-off-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/20200613073755.15906-1-geert+renesas@glider.be Signed-off-by: Mark Brown --- include/uapi/linux/spi/spidev.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/include/uapi/linux/spi/spidev.h b/include/uapi/linux/spi/spidev.h index 9390615d52f0..d56427c0b3e0 100644 --- a/include/uapi/linux/spi/spidev.h +++ b/include/uapi/linux/spi/spidev.h @@ -48,10 +48,10 @@ #define SPI_TX_QUAD 0x200 #define SPI_RX_DUAL 0x400 #define SPI_RX_QUAD 0x800 -#define SPI_CS_WORD 0x1000 -#define SPI_TX_OCTAL 0x2000 -#define SPI_RX_OCTAL 0x4000 -#define SPI_3WIRE_HIZ 0x8000 +#define SPI_CS_WORD 0x1000 +#define SPI_TX_OCTAL 0x2000 +#define SPI_RX_OCTAL 0x4000 +#define SPI_3WIRE_HIZ 0x8000 /*---------------------------------------------------------------------------*/ From 43708c0ab7c9861cf3fb8664ab327f8c7b1bada0 Mon Sep 17 00:00:00 2001 From: Flavio Suligoi Date: Tue, 9 Jun 2020 18:38:53 +0200 Subject: [PATCH 091/462] tools: testing: ftrace: trigger: fix spelling mistake Fix typo: "tigger" --> "trigger" Signed-off-by: Flavio Suligoi Acked-by: Steven Rostedt (VMware) Signed-off-by: Shuah Khan --- tools/testing/selftests/ftrace/test.d/trigger/trigger-hist.tc | 2 +- .../selftests/ftrace/test.d/trigger/trigger-stacktrace.tc | 2 +- .../ftrace/test.d/trigger/trigger-trace-marker-hist.tc | 2 +- .../ftrace/test.d/trigger/trigger-trace-marker-snapshot.tc | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist.tc b/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist.tc index 177e8d4c4744..bf6374e1f6ae 100644 --- a/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist.tc +++ b/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist.tc @@ -23,7 +23,7 @@ if [ ! -f events/sched/sched_process_fork/hist ]; then exit_unsupported fi -echo "Test histogram basic tigger" +echo "Test histogram basic trigger" echo 'hist:keys=parent_pid:vals=child_pid' > events/sched/sched_process_fork/trigger for i in `seq 1 10` ; do ( echo "forked" > /dev/null); done diff --git a/tools/testing/selftests/ftrace/test.d/trigger/trigger-stacktrace.tc b/tools/testing/selftests/ftrace/test.d/trigger/trigger-stacktrace.tc index 398c05c4d2a7..6248e6b40704 100644 --- a/tools/testing/selftests/ftrace/test.d/trigger/trigger-stacktrace.tc +++ b/tools/testing/selftests/ftrace/test.d/trigger/trigger-stacktrace.tc @@ -23,7 +23,7 @@ if [ -z "$FEATURE" ]; then exit_unsupported fi -echo "Test stacktrace tigger" +echo "Test stacktrace trigger" echo 0 > trace echo 0 > options/stacktrace echo 'stacktrace' > events/sched/sched_process_fork/trigger diff --git a/tools/testing/selftests/ftrace/test.d/trigger/trigger-trace-marker-hist.tc b/tools/testing/selftests/ftrace/test.d/trigger/trigger-trace-marker-hist.tc index ab6bedb25736..01fdfd50b4be 100644 --- a/tools/testing/selftests/ftrace/test.d/trigger/trigger-trace-marker-hist.tc +++ b/tools/testing/selftests/ftrace/test.d/trigger/trigger-trace-marker-hist.tc @@ -28,7 +28,7 @@ if [ ! -f events/ftrace/print/hist ]; then exit_unsupported fi -echo "Test histogram trace_marker tigger" +echo "Test histogram trace_marker trigger" echo 'hist:keys=common_pid' > events/ftrace/print/trigger for i in `seq 1 10` ; do echo "hello" > trace_marker; done diff --git a/tools/testing/selftests/ftrace/test.d/trigger/trigger-trace-marker-snapshot.tc b/tools/testing/selftests/ftrace/test.d/trigger/trigger-trace-marker-snapshot.tc index df246e505af7..a7fef298e476 100644 --- a/tools/testing/selftests/ftrace/test.d/trigger/trigger-trace-marker-snapshot.tc +++ b/tools/testing/selftests/ftrace/test.d/trigger/trigger-trace-marker-snapshot.tc @@ -46,7 +46,7 @@ test_trace() { done } -echo "Test snapshot trace_marker tigger" +echo "Test snapshot trace_marker trigger" echo 'snapshot' > events/ftrace/print/trigger From 1e570f512cbdc5e9e401ba640d9827985c1bea1e Mon Sep 17 00:00:00 2001 From: Dave Martin Date: Wed, 10 Jun 2020 18:03:10 +0100 Subject: [PATCH 092/462] arm64/sve: Eliminate data races on sve_default_vl sve_default_vl can be modified via the /proc/sys/abi/sve_default_vl sysctl concurrently with use, and modified concurrently by multiple threads. Adding a lock for this seems overkill, and I don't want to think any more than necessary, so just define wrappers using READ_ONCE()/ WRITE_ONCE(). This will avoid the possibility of torn accesses and repeated loads and stores. There's no evidence yet that this is going wrong in practice: this is just hygiene. For generic sysctl users, it would be better to build this kind of thing into the sysctl common code somehow. Reported-by: Will Deacon Signed-off-by: Dave Martin Link: https://lore.kernel.org/r/1591808590-20210-3-git-send-email-Dave.Martin@arm.com [will: move set_sve_default_vl() inside #ifdef to squash allnoconfig warning] Signed-off-by: Will Deacon --- arch/arm64/kernel/fpsimd.c | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index 35cb5e66c504..d9eee9194511 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -119,10 +120,20 @@ struct fpsimd_last_state_struct { static DEFINE_PER_CPU(struct fpsimd_last_state_struct, fpsimd_last_state); /* Default VL for tasks that don't set it explicitly: */ -static int sve_default_vl = -1; +static int __sve_default_vl = -1; + +static int get_sve_default_vl(void) +{ + return READ_ONCE(__sve_default_vl); +} #ifdef CONFIG_ARM64_SVE +static void set_sve_default_vl(int val) +{ + WRITE_ONCE(__sve_default_vl, val); +} + /* Maximum supported vector length across all CPUs (initially poisoned) */ int __ro_after_init sve_max_vl = SVE_VL_MIN; int __ro_after_init sve_max_virtualisable_vl = SVE_VL_MIN; @@ -344,7 +355,7 @@ static int sve_proc_do_default_vl(struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { int ret; - int vl = sve_default_vl; + int vl = get_sve_default_vl(); struct ctl_table tmp_table = { .data = &vl, .maxlen = sizeof(vl), @@ -361,7 +372,7 @@ static int sve_proc_do_default_vl(struct ctl_table *table, int write, if (!sve_vl_valid(vl)) return -EINVAL; - sve_default_vl = find_supported_vector_length(vl); + set_sve_default_vl(find_supported_vector_length(vl)); return 0; } @@ -868,7 +879,7 @@ void __init sve_setup(void) * For the default VL, pick the maximum supported value <= 64. * VL == 64 is guaranteed not to grow the signal frame. */ - sve_default_vl = find_supported_vector_length(64); + set_sve_default_vl(find_supported_vector_length(64)); bitmap_andnot(tmp_map, sve_vq_partial_map, sve_vq_map, SVE_VQ_MAX); @@ -889,7 +900,7 @@ void __init sve_setup(void) pr_info("SVE: maximum available vector length %u bytes per vector\n", sve_max_vl); pr_info("SVE: default vector length %u bytes per vector\n", - sve_default_vl); + get_sve_default_vl()); /* KVM decides whether to support mismatched systems. Just warn here: */ if (sve_max_virtualisable_vl < sve_max_vl) @@ -1029,13 +1040,13 @@ void fpsimd_flush_thread(void) * vector length configured: no kernel task can become a user * task without an exec and hence a call to this function. * By the time the first call to this function is made, all - * early hardware probing is complete, so sve_default_vl + * early hardware probing is complete, so __sve_default_vl * should be valid. * If a bug causes this to go wrong, we make some noise and * try to fudge thread.sve_vl to a safe value here. */ vl = current->thread.sve_vl_onexec ? - current->thread.sve_vl_onexec : sve_default_vl; + current->thread.sve_vl_onexec : get_sve_default_vl(); if (WARN_ON(!sve_vl_valid(vl))) vl = SVE_VL_MIN; From 413d3ea6b775d77b2057f13a9af75875eb066156 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Mon, 15 Jun 2020 12:23:16 +0100 Subject: [PATCH 093/462] arm64: traps: Dump registers prior to panic() in bad_mode() When panicing due to an unknown/unhandled exception at EL1, dump the registers of the faulting context so that it's easier to figure out what went wrong. In particular, this makes it a lot easier to debug in-kernel BTI failures since it pretty-prints PSTATE.BTYPE in the crash log. Cc: Mark Brown Cc: Catalin Marinas Acked-by: Mark Rutland Link: https://lore.kernel.org/r/20200615113458.2884-1-will@kernel.org Signed-off-by: Will Deacon --- arch/arm64/kernel/traps.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index 50cc30acf106..24f2af70ac2e 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -813,6 +813,7 @@ asmlinkage void bad_mode(struct pt_regs *regs, int reason, unsigned int esr) handler[reason], smp_processor_id(), esr, esr_get_class_string(esr)); + __show_regs(regs); local_daif_mask(); panic("bad mode"); } From 8dd4daa04278d7437641962ed53b843c0b0ec4a9 Mon Sep 17 00:00:00 2001 From: Shyam Thombre Date: Wed, 10 Jun 2020 16:39:44 +0530 Subject: [PATCH 094/462] arm64: mm: reset address tag set by kasan sw tagging KASAN sw tagging sets a random tag of 8 bits in the top byte of the pointer returned by the memory allocating functions. So for the functions unaware of this change, the top 8 bits of the address must be reset which is done by the function arch_kasan_reset_tag(). Signed-off-by: Shyam Thombre Link: https://lore.kernel.org/r/1591787384-5823-1-git-send-email-sthombre@codeaurora.org Signed-off-by: Will Deacon --- arch/arm64/mm/mmu.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 990929c8837e..1df25f26571d 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -723,6 +723,7 @@ int kern_addr_valid(unsigned long addr) pmd_t *pmdp, pmd; pte_t *ptep, pte; + addr = arch_kasan_reset_tag(addr); if ((((long)addr) >> VA_BITS) != -1UL) return 0; From 88c200d929c969408779dbae4c4fad32bc510373 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Mon, 8 Jun 2020 18:45:18 -0700 Subject: [PATCH 095/462] KVM: VMX: Add helpers to identify interrupt type from intr_info Add is_intr_type() and is_intr_type_n() to consolidate the boilerplate code for querying a specific type of interrupt given an encoded value from VMCS.VM_{ENTER,EXIT}_INTR_INFO, with and without an associated vector respectively. Signed-off-by: Sean Christopherson Message-Id: <20200609014518.26756-1-sean.j.christopherson@intel.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/vmcs.h | 32 ++++++++++++++++++++------------ 1 file changed, 20 insertions(+), 12 deletions(-) diff --git a/arch/x86/kvm/vmx/vmcs.h b/arch/x86/kvm/vmx/vmcs.h index 5c0ff80b85c0..7a3675fddec2 100644 --- a/arch/x86/kvm/vmx/vmcs.h +++ b/arch/x86/kvm/vmx/vmcs.h @@ -72,11 +72,24 @@ struct loaded_vmcs { struct vmcs_controls_shadow controls_shadow; }; +static inline bool is_intr_type(u32 intr_info, u32 type) +{ + const u32 mask = INTR_INFO_VALID_MASK | INTR_INFO_INTR_TYPE_MASK; + + return (intr_info & mask) == (INTR_INFO_VALID_MASK | type); +} + +static inline bool is_intr_type_n(u32 intr_info, u32 type, u8 vector) +{ + const u32 mask = INTR_INFO_VALID_MASK | INTR_INFO_INTR_TYPE_MASK | + INTR_INFO_VECTOR_MASK; + + return (intr_info & mask) == (INTR_INFO_VALID_MASK | type | vector); +} + static inline bool is_exception_n(u32 intr_info, u8 vector) { - return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK | - INTR_INFO_VALID_MASK)) == - (INTR_TYPE_HARD_EXCEPTION | vector | INTR_INFO_VALID_MASK); + return is_intr_type_n(intr_info, INTR_TYPE_HARD_EXCEPTION, vector); } static inline bool is_debug(u32 intr_info) @@ -106,28 +119,23 @@ static inline bool is_gp_fault(u32 intr_info) static inline bool is_machine_check(u32 intr_info) { - return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK | - INTR_INFO_VALID_MASK)) == - (INTR_TYPE_HARD_EXCEPTION | MC_VECTOR | INTR_INFO_VALID_MASK); + return is_exception_n(intr_info, MC_VECTOR); } /* Undocumented: icebp/int1 */ static inline bool is_icebp(u32 intr_info) { - return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VALID_MASK)) - == (INTR_TYPE_PRIV_SW_EXCEPTION | INTR_INFO_VALID_MASK); + return is_intr_type(intr_info, INTR_TYPE_PRIV_SW_EXCEPTION); } static inline bool is_nmi(u32 intr_info) { - return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VALID_MASK)) - == (INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK); + return is_intr_type(intr_info, INTR_TYPE_NMI_INTR); } static inline bool is_external_intr(u32 intr_info) { - return (intr_info & (INTR_INFO_VALID_MASK | INTR_INFO_INTR_TYPE_MASK)) - == (INTR_INFO_VALID_MASK | INTR_TYPE_EXT_INTR); + return is_intr_type(intr_info, INTR_TYPE_EXT_INTR); } enum vmcs_field_width { From 4f5747cf8e5947479a27a3597829e45d6d8d73e0 Mon Sep 17 00:00:00 2001 From: Tom Seewald Date: Thu, 4 Jun 2020 21:30:12 -0500 Subject: [PATCH 096/462] RDMA/mlx5: Fix -Wformat warning in check_ucmd_data() Variables of type size_t should use %zu rather than %lu [1]. The variables "inlen", "ucmd", "last", and "size" are all size_t, so use the correct format specifiers. [1] https://www.kernel.org/doc/html/latest/core-api/printk-formats.html Fixes: e383085c2425 ("RDMA/mlx5: Set ECE options during QP create") Link: https://lore.kernel.org/r/20200605023012.9527-1-tseewald@gmail.com Signed-off-by: Tom Seewald Acked-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/qp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index d61ca85033de..dbe82cdb8d2c 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -2907,7 +2907,7 @@ static int check_ucmd_data(struct mlx5_ib_dev *dev, if (!ret) mlx5_ib_dbg( dev, - "udata is not cleared, inlen = %lu, ucmd = %lu, last = %lu, size = %lu\n", + "udata is not cleared, inlen = %zu, ucmd = %zu, last = %zu, size = %zu\n", udata->inlen, params->ucmd_size, last, size); return ret ? 0 : -EINVAL; } From 0dc63bbee0fa6da20283ae6e22e99c6fde25ed8e Mon Sep 17 00:00:00 2001 From: Kieran Bingham Date: Tue, 9 Jun 2020 13:45:55 +0100 Subject: [PATCH 097/462] RDMA/hfi1: Fix trivial mis-spelling of 'descriptor' The word 'descriptor' is misspelled throughout the tree. Fix it up accordingly: decriptors -> descriptors Link: https://lore.kernel.org/r/20200609124610.3445662-3-kieran.bingham+renesas@ideasonboard.com Link: https://lore.kernel.org/r/20200609124610.3445662-12-kieran.bingham+renesas@ideasonboard.com Signed-off-by: Kieran Bingham Acked-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hfi1/iowait.h | 2 +- drivers/infiniband/hw/hfi1/ipoib_tx.c | 2 +- drivers/infiniband/hw/hfi1/verbs_txreq.h | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/iowait.h b/drivers/infiniband/hw/hfi1/iowait.h index 07847cb72169..d580aa17ae37 100644 --- a/drivers/infiniband/hw/hfi1/iowait.h +++ b/drivers/infiniband/hw/hfi1/iowait.h @@ -399,7 +399,7 @@ static inline void iowait_get_priority(struct iowait *w) * @wait_head: the wait queue * * This function is called to insert an iowait struct into a - * wait queue after a resource (eg, sdma decriptor or pio + * wait queue after a resource (eg, sdma descriptor or pio * buffer) is run out. */ static inline void iowait_queue(bool pkts_sent, struct iowait *w, diff --git a/drivers/infiniband/hw/hfi1/ipoib_tx.c b/drivers/infiniband/hw/hfi1/ipoib_tx.c index 883cb9d48022..175290c56db9 100644 --- a/drivers/infiniband/hw/hfi1/ipoib_tx.c +++ b/drivers/infiniband/hw/hfi1/ipoib_tx.c @@ -364,7 +364,7 @@ static struct ipoib_txreq *hfi1_ipoib_send_dma_common(struct net_device *dev, if (unlikely(!tx)) return ERR_PTR(-ENOMEM); - /* so that we can test if the sdma decriptors are there */ + /* so that we can test if the sdma descriptors are there */ tx->txreq.num_desc = 0; tx->priv = priv; tx->txq = txp->txq; diff --git a/drivers/infiniband/hw/hfi1/verbs_txreq.h b/drivers/infiniband/hw/hfi1/verbs_txreq.h index bfa6e081cb56..d2d526c5a756 100644 --- a/drivers/infiniband/hw/hfi1/verbs_txreq.h +++ b/drivers/infiniband/hw/hfi1/verbs_txreq.h @@ -91,7 +91,7 @@ static inline struct verbs_txreq *get_txreq(struct hfi1_ibdev *dev, tx->mr = NULL; tx->sde = priv->s_sde; tx->psc = priv->s_sendcontext; - /* so that we can test if the sdma decriptors are there */ + /* so that we can test if the sdma descriptors are there */ tx->txreq.num_desc = 0; /* Set the header type */ tx->phdr.hdr.hdr_type = priv->hdr_type; From 6769b275a313c76ddcd7d94c632032326db5f759 Mon Sep 17 00:00:00 2001 From: Tom Seewald Date: Wed, 10 Jun 2020 12:47:17 -0500 Subject: [PATCH 098/462] RDMA/siw: Fix pointer-to-int-cast warning in siw_rx_pbl() The variable buf_addr is type dma_addr_t, which may not be the same size as a pointer. To ensure it is the correct size, cast to a uintptr_t. Fixes: c536277e0db1 ("RDMA/siw: Fix 64/32bit pointer inconsistency") Link: https://lore.kernel.org/r/20200610174717.15932-1-tseewald@gmail.com Signed-off-by: Tom Seewald Reviewed-by: Bernard Metzler Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/siw/siw_qp_rx.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/sw/siw/siw_qp_rx.c b/drivers/infiniband/sw/siw/siw_qp_rx.c index 650520244ed0..7271d705f4b0 100644 --- a/drivers/infiniband/sw/siw/siw_qp_rx.c +++ b/drivers/infiniband/sw/siw/siw_qp_rx.c @@ -139,7 +139,8 @@ static int siw_rx_pbl(struct siw_rx_stream *srx, int *pbl_idx, break; bytes = min(bytes, len); - if (siw_rx_kva(srx, (void *)buf_addr, bytes) == bytes) { + if (siw_rx_kva(srx, (void *)(uintptr_t)buf_addr, bytes) == + bytes) { copied += bytes; offset += bytes; len -= bytes; From 5bcc066c05909d2af5c954911309f02312b0b02e Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Mon, 15 Jun 2020 12:22:53 -0700 Subject: [PATCH 099/462] trace/events/block.h: drop kernel-doc for dropped function parameter Fix kernel-doc warning: the parameter was removed, so also remove the kernel-doc notation for it. ../include/trace/events/block.h:278: warning: Excess function parameter 'error' description in 'trace_block_bio_complete' Fixes: d24de76af836 ("block: remove the error argument to the block_bio_complete tracepoint") Signed-off-by: Randy Dunlap Cc: Christoph Hellwig Cc: Jens Axboe Signed-off-by: Jens Axboe --- include/trace/events/block.h | 1 - 1 file changed, 1 deletion(-) diff --git a/include/trace/events/block.h b/include/trace/events/block.h index 1257f26bb887..93b114226af8 100644 --- a/include/trace/events/block.h +++ b/include/trace/events/block.h @@ -254,7 +254,6 @@ TRACE_EVENT(block_bio_bounce, * block_bio_complete - completed all work on the block operation * @q: queue holding the block operation * @bio: block operation completed - * @error: io error value * * This tracepoint indicates there is no further work to do on this * block IO operation @bio. From b0659d8a950d424e57cc0a67afc4740ee561224e Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Mon, 15 Jun 2020 14:49:26 -0700 Subject: [PATCH 100/462] bpf: Fix definition of bpf_ringbuf_output() helper in UAPI comments Fix definition of bpf_ringbuf_output() in UAPI header comments, which is used to generate libbpf's bpf_helper_defs.h header. Return value is a number (error code), not a pointer. Fixes: 457f44363a88 ("bpf: Implement BPF ring buffer and verifier support for it") Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20200615214926.3638836-1-andriin@fb.com --- include/uapi/linux/bpf.h | 2 +- tools/include/uapi/linux/bpf.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 19684813faae..974a71342aea 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -3168,7 +3168,7 @@ union bpf_attr { * Return * The id is returned or 0 in case the id could not be retrieved. * - * void *bpf_ringbuf_output(void *ringbuf, void *data, u64 size, u64 flags) + * int bpf_ringbuf_output(void *ringbuf, void *data, u64 size, u64 flags) * Description * Copy *size* bytes from *data* into a ring buffer *ringbuf*. * If BPF_RB_NO_WAKEUP is specified in *flags*, no notification of diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 19684813faae..974a71342aea 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -3168,7 +3168,7 @@ union bpf_attr { * Return * The id is returned or 0 in case the id could not be retrieved. * - * void *bpf_ringbuf_output(void *ringbuf, void *data, u64 size, u64 flags) + * int bpf_ringbuf_output(void *ringbuf, void *data, u64 size, u64 flags) * Description * Copy *size* bytes from *data* into a ring buffer *ringbuf*. * If BPF_RB_NO_WAKEUP is specified in *flags*, no notification of From c34a06c56df7c513cd19f591b26fe7d814c778cc Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Mon, 15 Jun 2020 15:53:55 -0700 Subject: [PATCH 101/462] tools/bpftool: Add ringbuf map to a list of known map types Add symbolic name "ringbuf" to map to BPF_MAP_TYPE_RINGBUF. Without this, users will see "type 27" instead of "ringbuf" in `map show` output. Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20200615225355.366256-1-andriin@fb.com --- tools/bpf/bpftool/map.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c index c5fac8068ba1..99109a6afe17 100644 --- a/tools/bpf/bpftool/map.c +++ b/tools/bpf/bpftool/map.c @@ -49,6 +49,7 @@ const char * const map_type_name[] = { [BPF_MAP_TYPE_STACK] = "stack", [BPF_MAP_TYPE_SK_STORAGE] = "sk_storage", [BPF_MAP_TYPE_STRUCT_OPS] = "struct_ops", + [BPF_MAP_TYPE_RINGBUF] = "ringbuf", }; const size_t map_type_name_size = ARRAY_SIZE(map_type_name); From 901e34905ac5e402fd690a1fdf51fc7f070bdc6e Mon Sep 17 00:00:00 2001 From: Vaibhav Jain Date: Mon, 15 Jun 2020 18:14:02 +0530 Subject: [PATCH 102/462] powerpc: Document details on H_SCM_HEALTH hcall Add documentation to 'papr_hcalls.rst' describing the bitmap flags that are returned from H_SCM_HEALTH hcall as per the PAPR-SCM specification. Signed-off-by: Vaibhav Jain Acked-by: Ira Weiny Cc: "Aneesh Kumar K . V" Cc: Dan Williams Cc: Michael Ellerman Cc: Ira Weiny Link: https://lore.kernel.org/r/20200615124407.32596-2-vaibhav@linux.ibm.com Signed-off-by: Dan Williams --- Documentation/powerpc/papr_hcalls.rst | 46 ++++++++++++++++++++++++--- 1 file changed, 42 insertions(+), 4 deletions(-) diff --git a/Documentation/powerpc/papr_hcalls.rst b/Documentation/powerpc/papr_hcalls.rst index 3493631a60f8..48fcf1255a33 100644 --- a/Documentation/powerpc/papr_hcalls.rst +++ b/Documentation/powerpc/papr_hcalls.rst @@ -220,13 +220,51 @@ from the LPAR memory. **H_SCM_HEALTH** | Input: drcIndex -| Out: *health-bitmap, health-bit-valid-bitmap* +| Out: *health-bitmap (r4), health-bit-valid-bitmap (r5)* | Return Value: *H_Success, H_Parameter, H_Hardware* Given a DRC Index return the info on predictive failure and overall health of -the NVDIMM. The asserted bits in the health-bitmap indicate a single predictive -failure and health-bit-valid-bitmap indicate which bits in health-bitmap are -valid. +the PMEM device. The asserted bits in the health-bitmap indicate one or more states +(described in table below) of the PMEM device and health-bit-valid-bitmap indicate +which bits in health-bitmap are valid. The bits are reported in +reverse bit ordering for example a value of 0xC400000000000000 +indicates bits 0, 1, and 5 are valid. + +Health Bitmap Flags: + ++------+-----------------------------------------------------------------------+ +| Bit | Definition | ++======+=======================================================================+ +| 00 | PMEM device is unable to persist memory contents. | +| | If the system is powered down, nothing will be saved. | ++------+-----------------------------------------------------------------------+ +| 01 | PMEM device failed to persist memory contents. Either contents were | +| | not saved successfully on power down or were not restored properly on | +| | power up. | ++------+-----------------------------------------------------------------------+ +| 02 | PMEM device contents are persisted from previous IPL. The data from | +| | the last boot were successfully restored. | ++------+-----------------------------------------------------------------------+ +| 03 | PMEM device contents are not persisted from previous IPL. There was no| +| | data to restore from the last boot. | ++------+-----------------------------------------------------------------------+ +| 04 | PMEM device memory life remaining is critically low | ++------+-----------------------------------------------------------------------+ +| 05 | PMEM device will be garded off next IPL due to failure | ++------+-----------------------------------------------------------------------+ +| 06 | PMEM device contents cannot persist due to current platform health | +| | status. A hardware failure may prevent data from being saved or | +| | restored. | ++------+-----------------------------------------------------------------------+ +| 07 | PMEM device is unable to persist memory contents in certain conditions| ++------+-----------------------------------------------------------------------+ +| 08 | PMEM device is encrypted | ++------+-----------------------------------------------------------------------+ +| 09 | PMEM device has successfully completed a requested erase or secure | +| | erase procedure. | ++------+-----------------------------------------------------------------------+ +|10:63 | Reserved / Unused | ++------+-----------------------------------------------------------------------+ **H_SCM_PERFORMANCE_STATS** From 97c02c723bcef11da2f46facdde7c34e72ec8a1f Mon Sep 17 00:00:00 2001 From: Vaibhav Jain Date: Mon, 15 Jun 2020 18:14:03 +0530 Subject: [PATCH 103/462] seq_buf: Export seq_buf_printf 'seq_buf' provides a very useful abstraction for writing to a string buffer without needing to worry about it over-flowing. However even though the API has been stable for couple of years now its still not exported to kernel loadable modules limiting its usage. Hence this patch proposes update to 'seq_buf.c' to mark seq_buf_printf() which is part of the seq_buf API to be exported to kernel loadable GPL modules. This symbol will be used in later parts of this patch-set to simplify content creation for a sysfs attribute. Signed-off-by: Vaibhav Jain Acked-by: Steven Rostedt (VMware) Cc: Piotr Maziarz Cc: Cezary Rojewski Cc: Christoph Hellwig Cc: Steven Rostedt Cc: Borislav Petkov Link: https://lore.kernel.org/r/20200615124407.32596-3-vaibhav@linux.ibm.com Signed-off-by: Dan Williams --- lib/seq_buf.c | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/seq_buf.c b/lib/seq_buf.c index 4e865d42ab03..707453f5d58e 100644 --- a/lib/seq_buf.c +++ b/lib/seq_buf.c @@ -91,6 +91,7 @@ int seq_buf_printf(struct seq_buf *s, const char *fmt, ...) return ret; } +EXPORT_SYMBOL_GPL(seq_buf_printf); #ifdef CONFIG_BINARY_PRINTF /** From b791abf3201d724ac372c2ba1fa6e90d192e1dbf Mon Sep 17 00:00:00 2001 From: Vaibhav Jain Date: Mon, 15 Jun 2020 18:14:04 +0530 Subject: [PATCH 104/462] powerpc/papr_scm: Fetch nvdimm health information from PHYP Implement support for fetching nvdimm health information via H_SCM_HEALTH hcall as documented in Ref[1]. The hcall returns a pair of 64-bit bitmap, bitwise-and of which is then stored in 'struct papr_scm_priv' and subsequently partially exposed to user-space via newly introduced dimm specific attribute 'papr/flags'. Since the hcall is costly, the health information is cached and only re-queried, 60s after the previous successful hcall. The patch also adds a documentation text describing flags reported by the the new sysfs attribute 'papr/flags' is also introduced at Documentation/ABI/testing/sysfs-bus-papr-pmem. [1] commit 58b278f568f0 ("powerpc: Provide initial documentation for PAPR hcalls") Signed-off-by: Vaibhav Jain Cc: "Aneesh Kumar K . V" Cc: Dan Williams Cc: Michael Ellerman Cc: Ira Weiny Link: https://lore.kernel.org/r/20200615124407.32596-4-vaibhav@linux.ibm.com Signed-off-by: Dan Williams --- Documentation/ABI/testing/sysfs-bus-papr-pmem | 27 +++ arch/powerpc/platforms/pseries/papr_scm.c | 168 +++++++++++++++++- 2 files changed, 193 insertions(+), 2 deletions(-) create mode 100644 Documentation/ABI/testing/sysfs-bus-papr-pmem diff --git a/Documentation/ABI/testing/sysfs-bus-papr-pmem b/Documentation/ABI/testing/sysfs-bus-papr-pmem new file mode 100644 index 000000000000..5b10d036a8d4 --- /dev/null +++ b/Documentation/ABI/testing/sysfs-bus-papr-pmem @@ -0,0 +1,27 @@ +What: /sys/bus/nd/devices/nmemX/papr/flags +Date: Apr, 2020 +KernelVersion: v5.8 +Contact: linuxppc-dev , linux-nvdimm@lists.01.org, +Description: + (RO) Report flags indicating various states of a + papr-pmem NVDIMM device. Each flag maps to a one or + more bits set in the dimm-health-bitmap retrieved in + response to H_SCM_HEALTH hcall. The details of the bit + flags returned in response to this hcall is available + at 'Documentation/powerpc/papr_hcalls.rst' . Below are + the flags reported in this sysfs file: + + * "not_armed" : Indicates that NVDIMM contents will not + survive a power cycle. + * "flush_fail" : Indicates that NVDIMM contents + couldn't be flushed during last + shut-down event. + * "restore_fail": Indicates that NVDIMM contents + couldn't be restored during NVDIMM + initialization. + * "encrypted" : NVDIMM contents are encrypted. + * "smart_notify": There is health event for the NVDIMM. + * "scrubbed" : Indicating that contents of the + NVDIMM have been scrubbed. + * "locked" : Indicating that NVDIMM contents cant + be modified until next power cycle. diff --git a/arch/powerpc/platforms/pseries/papr_scm.c b/arch/powerpc/platforms/pseries/papr_scm.c index f35592423380..0c091622b15e 100644 --- a/arch/powerpc/platforms/pseries/papr_scm.c +++ b/arch/powerpc/platforms/pseries/papr_scm.c @@ -12,6 +12,7 @@ #include #include #include +#include #include @@ -22,6 +23,44 @@ (1ul << ND_CMD_GET_CONFIG_DATA) | \ (1ul << ND_CMD_SET_CONFIG_DATA)) +/* DIMM health bitmap bitmap indicators */ +/* SCM device is unable to persist memory contents */ +#define PAPR_PMEM_UNARMED (1ULL << (63 - 0)) +/* SCM device failed to persist memory contents */ +#define PAPR_PMEM_SHUTDOWN_DIRTY (1ULL << (63 - 1)) +/* SCM device contents are persisted from previous IPL */ +#define PAPR_PMEM_SHUTDOWN_CLEAN (1ULL << (63 - 2)) +/* SCM device contents are not persisted from previous IPL */ +#define PAPR_PMEM_EMPTY (1ULL << (63 - 3)) +/* SCM device memory life remaining is critically low */ +#define PAPR_PMEM_HEALTH_CRITICAL (1ULL << (63 - 4)) +/* SCM device will be garded off next IPL due to failure */ +#define PAPR_PMEM_HEALTH_FATAL (1ULL << (63 - 5)) +/* SCM contents cannot persist due to current platform health status */ +#define PAPR_PMEM_HEALTH_UNHEALTHY (1ULL << (63 - 6)) +/* SCM device is unable to persist memory contents in certain conditions */ +#define PAPR_PMEM_HEALTH_NON_CRITICAL (1ULL << (63 - 7)) +/* SCM device is encrypted */ +#define PAPR_PMEM_ENCRYPTED (1ULL << (63 - 8)) +/* SCM device has been scrubbed and locked */ +#define PAPR_PMEM_SCRUBBED_AND_LOCKED (1ULL << (63 - 9)) + +/* Bits status indicators for health bitmap indicating unarmed dimm */ +#define PAPR_PMEM_UNARMED_MASK (PAPR_PMEM_UNARMED | \ + PAPR_PMEM_HEALTH_UNHEALTHY) + +/* Bits status indicators for health bitmap indicating unflushed dimm */ +#define PAPR_PMEM_BAD_SHUTDOWN_MASK (PAPR_PMEM_SHUTDOWN_DIRTY) + +/* Bits status indicators for health bitmap indicating unrestored dimm */ +#define PAPR_PMEM_BAD_RESTORE_MASK (PAPR_PMEM_EMPTY) + +/* Bit status indicators for smart event notification */ +#define PAPR_PMEM_SMART_EVENT_MASK (PAPR_PMEM_HEALTH_CRITICAL | \ + PAPR_PMEM_HEALTH_FATAL | \ + PAPR_PMEM_HEALTH_UNHEALTHY) + +/* private struct associated with each region */ struct papr_scm_priv { struct platform_device *pdev; struct device_node *dn; @@ -39,6 +78,15 @@ struct papr_scm_priv { struct resource res; struct nd_region *region; struct nd_interleave_set nd_set; + + /* Protect dimm health data from concurrent read/writes */ + struct mutex health_mutex; + + /* Last time the health information of the dimm was updated */ + unsigned long lasthealth_jiffies; + + /* Health information for the dimm */ + u64 health_bitmap; }; static int drc_pmem_bind(struct papr_scm_priv *p) @@ -144,6 +192,61 @@ err_out: return drc_pmem_bind(p); } +/* + * Issue hcall to retrieve dimm health info and populate papr_scm_priv with the + * health information. + */ +static int __drc_pmem_query_health(struct papr_scm_priv *p) +{ + unsigned long ret[PLPAR_HCALL_BUFSIZE]; + long rc; + + /* issue the hcall */ + rc = plpar_hcall(H_SCM_HEALTH, ret, p->drc_index); + if (rc != H_SUCCESS) { + dev_err(&p->pdev->dev, + "Failed to query health information, Err:%ld\n", rc); + return -ENXIO; + } + + p->lasthealth_jiffies = jiffies; + p->health_bitmap = ret[0] & ret[1]; + + dev_dbg(&p->pdev->dev, + "Queried dimm health info. Bitmap:0x%016lx Mask:0x%016lx\n", + ret[0], ret[1]); + + return 0; +} + +/* Min interval in seconds for assuming stable dimm health */ +#define MIN_HEALTH_QUERY_INTERVAL 60 + +/* Query cached health info and if needed call drc_pmem_query_health */ +static int drc_pmem_query_health(struct papr_scm_priv *p) +{ + unsigned long cache_timeout; + int rc; + + /* Protect concurrent modifications to papr_scm_priv */ + rc = mutex_lock_interruptible(&p->health_mutex); + if (rc) + return rc; + + /* Jiffies offset for which the health data is assumed to be same */ + cache_timeout = p->lasthealth_jiffies + + msecs_to_jiffies(MIN_HEALTH_QUERY_INTERVAL * 1000); + + /* Fetch new health info is its older than MIN_HEALTH_QUERY_INTERVAL */ + if (time_after(jiffies, cache_timeout)) + rc = __drc_pmem_query_health(p); + else + /* Assume cached health data is valid */ + rc = 0; + + mutex_unlock(&p->health_mutex); + return rc; +} static int papr_scm_meta_get(struct papr_scm_priv *p, struct nd_cmd_get_config_data_hdr *hdr) @@ -286,6 +389,64 @@ static int papr_scm_ndctl(struct nvdimm_bus_descriptor *nd_desc, return 0; } +static ssize_t flags_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct nvdimm *dimm = to_nvdimm(dev); + struct papr_scm_priv *p = nvdimm_provider_data(dimm); + struct seq_buf s; + u64 health; + int rc; + + rc = drc_pmem_query_health(p); + if (rc) + return rc; + + /* Copy health_bitmap locally, check masks & update out buffer */ + health = READ_ONCE(p->health_bitmap); + + seq_buf_init(&s, buf, PAGE_SIZE); + if (health & PAPR_PMEM_UNARMED_MASK) + seq_buf_printf(&s, "not_armed "); + + if (health & PAPR_PMEM_BAD_SHUTDOWN_MASK) + seq_buf_printf(&s, "flush_fail "); + + if (health & PAPR_PMEM_BAD_RESTORE_MASK) + seq_buf_printf(&s, "restore_fail "); + + if (health & PAPR_PMEM_ENCRYPTED) + seq_buf_printf(&s, "encrypted "); + + if (health & PAPR_PMEM_SMART_EVENT_MASK) + seq_buf_printf(&s, "smart_notify "); + + if (health & PAPR_PMEM_SCRUBBED_AND_LOCKED) + seq_buf_printf(&s, "scrubbed locked "); + + if (seq_buf_used(&s)) + seq_buf_printf(&s, "\n"); + + return seq_buf_used(&s); +} +DEVICE_ATTR_RO(flags); + +/* papr_scm specific dimm attributes */ +static struct attribute *papr_nd_attributes[] = { + &dev_attr_flags.attr, + NULL, +}; + +static struct attribute_group papr_nd_attribute_group = { + .name = "papr", + .attrs = papr_nd_attributes, +}; + +static const struct attribute_group *papr_nd_attr_groups[] = { + &papr_nd_attribute_group, + NULL, +}; + static int papr_scm_nvdimm_init(struct papr_scm_priv *p) { struct device *dev = &p->pdev->dev; @@ -312,8 +473,8 @@ static int papr_scm_nvdimm_init(struct papr_scm_priv *p) dimm_flags = 0; set_bit(NDD_LABELING, &dimm_flags); - p->nvdimm = nvdimm_create(p->bus, p, NULL, dimm_flags, - PAPR_SCM_DIMM_CMD_MASK, 0, NULL); + p->nvdimm = nvdimm_create(p->bus, p, papr_nd_attr_groups, + dimm_flags, PAPR_SCM_DIMM_CMD_MASK, 0, NULL); if (!p->nvdimm) { dev_err(dev, "Error creating DIMM object for %pOF\n", p->dn); goto err; @@ -399,6 +560,9 @@ static int papr_scm_probe(struct platform_device *pdev) if (!p) return -ENOMEM; + /* Initialize the dimm mutex */ + mutex_init(&p->health_mutex); + /* optional DT properties */ of_property_read_u32(dn, "ibm,metadata-size", &metadata_size); From b5f38f09e1558c20265a2976b0337bab143a66c7 Mon Sep 17 00:00:00 2001 From: Vaibhav Jain Date: Mon, 15 Jun 2020 18:14:05 +0530 Subject: [PATCH 105/462] powerpc/papr_scm: Improve error logging and handling papr_scm_ndctl() Since papr_scm_ndctl() can be called from outside papr_scm, its exposed to the possibility of receiving NULL as value of 'cmd_rc' argument. This patch updates papr_scm_ndctl() to protect against such possibility by assigning it pointer to a local variable in case cmd_rc == NULL. Finally the patch also updates the 'default' add a debug log unknown 'cmd' values. Signed-off-by: Vaibhav Jain Reviewed-by: Ira Weiny Cc: "Aneesh Kumar K . V" Cc: Dan Williams Cc: Michael Ellerman Cc: Ira Weiny Link: https://lore.kernel.org/r/20200615124407.32596-5-vaibhav@linux.ibm.com Signed-off-by: Dan Williams --- arch/powerpc/platforms/pseries/papr_scm.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/powerpc/platforms/pseries/papr_scm.c b/arch/powerpc/platforms/pseries/papr_scm.c index 0c091622b15e..692ad3d79826 100644 --- a/arch/powerpc/platforms/pseries/papr_scm.c +++ b/arch/powerpc/platforms/pseries/papr_scm.c @@ -355,11 +355,16 @@ static int papr_scm_ndctl(struct nvdimm_bus_descriptor *nd_desc, { struct nd_cmd_get_config_size *get_size_hdr; struct papr_scm_priv *p; + int rc; /* Only dimm-specific calls are supported atm */ if (!nvdimm) return -EINVAL; + /* Use a local variable in case cmd_rc pointer is NULL */ + if (!cmd_rc) + cmd_rc = &rc; + p = nvdimm_provider_data(nvdimm); switch (cmd) { @@ -381,6 +386,7 @@ static int papr_scm_ndctl(struct nvdimm_bus_descriptor *nd_desc, break; default: + dev_dbg(&p->pdev->dev, "Unknown command = %d\n", cmd); return -EINVAL; } From f517f7925b7b453cb83be06c268ba057b78e4792 Mon Sep 17 00:00:00 2001 From: Vaibhav Jain Date: Mon, 15 Jun 2020 18:14:06 +0530 Subject: [PATCH 106/462] ndctl/papr_scm,uapi: Add support for PAPR nvdimm specific methods Introduce support for PAPR NVDIMM Specific Methods (PDSM) in papr_scm module and add the command family NVDIMM_FAMILY_PAPR to the white list of NVDIMM command sets. Also advertise support for ND_CMD_CALL for the nvdimm command mask and implement necessary scaffolding in the module to handle ND_CMD_CALL ioctl and PDSM requests that we receive. The layout of the PDSM request as we expect from libnvdimm/libndctl is described in newly introduced uapi header 'papr_pdsm.h' which defines a 'struct nd_pkg_pdsm' and a maximal union named 'nd_pdsm_payload'. These new structs together with 'struct nd_cmd_pkg' for a pdsm envelop thats sent by libndctl to libnvdimm and serviced by papr_scm in 'papr_scm_service_pdsm()'. The PDSM request is communicated by member 'struct nd_cmd_pkg.nd_command' together with other information on the pdsm payload (size-in, size-out). The patch also introduces 'struct pdsm_cmd_desc' instances of which are stored in an array __pdsm_cmd_descriptors[] indexed with PDSM cmd and corresponding access function pdsm_cmd_desc() is introduced. 'struct pdsm_cdm_desc' holds the service function for a given PDSM and corresponding payload in/out sizes. A new function papr_scm_service_pdsm() is introduced and is called from papr_scm_ndctl() in case of a PDSM request is received via ND_CMD_CALL command from libnvdimm. The function performs validation on the PDSM payload based on info present in corresponding PDSM descriptor and if valid calls the 'struct pdcm_cmd_desc.service' function to service the PDSM. Signed-off-by: Vaibhav Jain Cc: "Aneesh Kumar K . V" Cc: Dan Williams Cc: Michael Ellerman Cc: Ira Weiny Link: https://lore.kernel.org/r/20200615124407.32596-6-vaibhav@linux.ibm.com Signed-off-by: Dan Williams --- arch/powerpc/include/uapi/asm/papr_pdsm.h | 95 +++++++++++ arch/powerpc/platforms/pseries/papr_scm.c | 193 +++++++++++++++++++++- include/uapi/linux/ndctl.h | 1 + 3 files changed, 285 insertions(+), 4 deletions(-) create mode 100644 arch/powerpc/include/uapi/asm/papr_pdsm.h diff --git a/arch/powerpc/include/uapi/asm/papr_pdsm.h b/arch/powerpc/include/uapi/asm/papr_pdsm.h new file mode 100644 index 000000000000..28115152aa4e --- /dev/null +++ b/arch/powerpc/include/uapi/asm/papr_pdsm.h @@ -0,0 +1,95 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * PAPR nvDimm Specific Methods (PDSM) and structs for libndctl + * + * (C) Copyright IBM 2020 + * + * Author: Vaibhav Jain + */ + +#ifndef _UAPI_ASM_POWERPC_PAPR_PDSM_H_ +#define _UAPI_ASM_POWERPC_PAPR_PDSM_H_ + +#include +#include + +/* + * PDSM Envelope: + * + * The ioctl ND_CMD_CALL exchange data between user-space and kernel via + * envelope which consists of 2 headers sections and payload sections as + * illustrated below: + * +-----------------+---------------+---------------------------+ + * | 64-Bytes | 8-Bytes | Max 184-Bytes | + * +-----------------+---------------+---------------------------+ + * | ND-HEADER | PDSM-HEADER | PDSM-PAYLOAD | + * +-----------------+---------------+---------------------------+ + * | nd_family | | | + * | nd_size_out | cmd_status | | + * | nd_size_in | reserved | nd_pdsm_payload | + * | nd_command | payload --> | | + * | nd_fw_size | | | + * | nd_payload ---> | | | + * +---------------+-----------------+---------------------------+ + * + * ND Header: + * This is the generic libnvdimm header described as 'struct nd_cmd_pkg' + * which is interpreted by libnvdimm before passed on to papr_scm. Important + * member fields used are: + * 'nd_family' : (In) NVDIMM_FAMILY_PAPR_SCM + * 'nd_size_in' : (In) PDSM-HEADER + PDSM-IN-PAYLOAD (usually 0) + * 'nd_size_out' : (In) PDSM-HEADER + PDSM-RETURN-PAYLOAD + * 'nd_command' : (In) One of PAPR_PDSM_XXX + * 'nd_fw_size' : (Out) PDSM-HEADER + size of actual payload returned + * + * PDSM Header: + * This is papr-scm specific header that precedes the payload. This is defined + * as nd_cmd_pdsm_pkg. Following fields aare available in this header: + * + * 'cmd_status' : (Out) Errors if any encountered while servicing PDSM. + * 'reserved' : Not used, reserved for future and should be set to 0. + * 'payload' : A union of all the possible payload structs + * + * PDSM Payload: + * + * The layout of the PDSM Payload is defined by various structs shared between + * papr_scm and libndctl so that contents of payload can be interpreted. As such + * its defined as a union of all possible payload structs as + * 'union nd_pdsm_payload'. Based on the value of 'nd_cmd_pkg.nd_command' + * appropriate member of the union is accessed. + */ + +/* Max payload size that we can handle */ +#define ND_PDSM_PAYLOAD_MAX_SIZE 184 + +/* Max payload size that we can handle */ +#define ND_PDSM_HDR_SIZE \ + (sizeof(struct nd_pkg_pdsm) - ND_PDSM_PAYLOAD_MAX_SIZE) + +/* + * Methods to be embedded in ND_CMD_CALL request. These are sent to the kernel + * via 'nd_cmd_pkg.nd_command' member of the ioctl struct + */ +enum papr_pdsm { + PAPR_PDSM_MIN = 0x0, + PAPR_PDSM_MAX, +}; + +/* Maximal union that can hold all possible payload types */ +union nd_pdsm_payload { + __u8 buf[ND_PDSM_PAYLOAD_MAX_SIZE]; +} __packed; + +/* + * PDSM-header + payload expected with ND_CMD_CALL ioctl from libnvdimm + * Valid member of union 'payload' is identified via 'nd_cmd_pkg.nd_command' + * that should always precede this struct when sent to papr_scm via CMD_CALL + * interface. + */ +struct nd_pkg_pdsm { + __s32 cmd_status; /* Out: Sub-cmd status returned back */ + __u16 reserved[2]; /* Ignored and to be set as '0' */ + union nd_pdsm_payload payload; +} __packed; + +#endif /* _UAPI_ASM_POWERPC_PAPR_PDSM_H_ */ diff --git a/arch/powerpc/platforms/pseries/papr_scm.c b/arch/powerpc/platforms/pseries/papr_scm.c index 692ad3d79826..d3bbf9940ba4 100644 --- a/arch/powerpc/platforms/pseries/papr_scm.c +++ b/arch/powerpc/platforms/pseries/papr_scm.c @@ -15,13 +15,15 @@ #include #include +#include #define BIND_ANY_ADDR (~0ul) #define PAPR_SCM_DIMM_CMD_MASK \ ((1ul << ND_CMD_GET_CONFIG_SIZE) | \ (1ul << ND_CMD_GET_CONFIG_DATA) | \ - (1ul << ND_CMD_SET_CONFIG_DATA)) + (1ul << ND_CMD_SET_CONFIG_DATA) | \ + (1ul << ND_CMD_CALL)) /* DIMM health bitmap bitmap indicators */ /* SCM device is unable to persist memory contents */ @@ -349,17 +351,195 @@ static int papr_scm_meta_set(struct papr_scm_priv *p, return 0; } +/* + * Do a sanity checks on the inputs args to dimm-control function and return + * '0' if valid. Validation of PDSM payloads happens later in + * papr_scm_service_pdsm. + */ +static int is_cmd_valid(struct nvdimm *nvdimm, unsigned int cmd, void *buf, + unsigned int buf_len) +{ + unsigned long cmd_mask = PAPR_SCM_DIMM_CMD_MASK; + struct nd_cmd_pkg *nd_cmd; + struct papr_scm_priv *p; + enum papr_pdsm pdsm; + + /* Only dimm-specific calls are supported atm */ + if (!nvdimm) + return -EINVAL; + + /* get the provider data from struct nvdimm */ + p = nvdimm_provider_data(nvdimm); + + if (!test_bit(cmd, &cmd_mask)) { + dev_dbg(&p->pdev->dev, "Unsupported cmd=%u\n", cmd); + return -EINVAL; + } + + /* For CMD_CALL verify pdsm request */ + if (cmd == ND_CMD_CALL) { + /* Verify the envelope and envelop size */ + if (!buf || + buf_len < (sizeof(struct nd_cmd_pkg) + ND_PDSM_HDR_SIZE)) { + dev_dbg(&p->pdev->dev, "Invalid pkg size=%u\n", + buf_len); + return -EINVAL; + } + + /* Verify that the nd_cmd_pkg.nd_family is correct */ + nd_cmd = (struct nd_cmd_pkg *)buf; + + if (nd_cmd->nd_family != NVDIMM_FAMILY_PAPR) { + dev_dbg(&p->pdev->dev, "Invalid pkg family=0x%llx\n", + nd_cmd->nd_family); + return -EINVAL; + } + + pdsm = (enum papr_pdsm)nd_cmd->nd_command; + + /* Verify if the pdsm command is valid */ + if (pdsm <= PAPR_PDSM_MIN || pdsm >= PAPR_PDSM_MAX) { + dev_dbg(&p->pdev->dev, "PDSM[0x%x]: Invalid PDSM\n", + pdsm); + return -EINVAL; + } + + /* Have enough space to hold returned 'nd_pkg_pdsm' header */ + if (nd_cmd->nd_size_out < ND_PDSM_HDR_SIZE) { + dev_dbg(&p->pdev->dev, "PDSM[0x%x]: Invalid payload\n", + pdsm); + return -EINVAL; + } + } + + /* Let the command be further processed */ + return 0; +} + +/* + * 'struct pdsm_cmd_desc' + * Identifies supported PDSMs' expected length of in/out payloads + * and pdsm service function. + * + * size_in : Size of input payload if any in the PDSM request. + * size_out : Size of output payload if any in the PDSM request. + * service : Service function for the PDSM request. Return semantics: + * rc < 0 : Error servicing PDSM and rc indicates the error. + * rc >=0 : Serviced successfully and 'rc' indicate number of + * bytes written to payload. + */ +struct pdsm_cmd_desc { + u32 size_in; + u32 size_out; + int (*service)(struct papr_scm_priv *dimm, + union nd_pdsm_payload *payload); +}; + +/* Holds all supported PDSMs' command descriptors */ +static const struct pdsm_cmd_desc __pdsm_cmd_descriptors[] = { + [PAPR_PDSM_MIN] = { + .size_in = 0, + .size_out = 0, + .service = NULL, + }, + /* New PDSM command descriptors to be added below */ + + /* Empty */ + [PAPR_PDSM_MAX] = { + .size_in = 0, + .size_out = 0, + .service = NULL, + }, +}; + +/* Given a valid pdsm cmd return its command descriptor else return NULL */ +static inline const struct pdsm_cmd_desc *pdsm_cmd_desc(enum papr_pdsm cmd) +{ + if (cmd >= 0 || cmd < ARRAY_SIZE(__pdsm_cmd_descriptors)) + return &__pdsm_cmd_descriptors[cmd]; + + return NULL; +} + +/* + * For a given pdsm request call an appropriate service function. + * Returns errors if any while handling the pdsm command package. + */ +static int papr_scm_service_pdsm(struct papr_scm_priv *p, + struct nd_cmd_pkg *pkg) +{ + /* Get the PDSM header and PDSM command */ + struct nd_pkg_pdsm *pdsm_pkg = (struct nd_pkg_pdsm *)pkg->nd_payload; + enum papr_pdsm pdsm = (enum papr_pdsm)pkg->nd_command; + const struct pdsm_cmd_desc *pdsc; + int rc; + + /* Fetch corresponding pdsm descriptor for validation and servicing */ + pdsc = pdsm_cmd_desc(pdsm); + + /* Validate pdsm descriptor */ + /* Ensure that reserved fields are 0 */ + if (pdsm_pkg->reserved[0] || pdsm_pkg->reserved[1]) { + dev_dbg(&p->pdev->dev, "PDSM[0x%x]: Invalid reserved field\n", + pdsm); + return -EINVAL; + } + + /* If pdsm expects some input, then ensure that the size_in matches */ + if (pdsc->size_in && + pkg->nd_size_in != (pdsc->size_in + ND_PDSM_HDR_SIZE)) { + dev_dbg(&p->pdev->dev, "PDSM[0x%x]: Mismatched size_in=%d\n", + pdsm, pkg->nd_size_in); + return -EINVAL; + } + + /* If pdsm wants to return data, then ensure that size_out matches */ + if (pdsc->size_out && + pkg->nd_size_out != (pdsc->size_out + ND_PDSM_HDR_SIZE)) { + dev_dbg(&p->pdev->dev, "PDSM[0x%x]: Mismatched size_out=%d\n", + pdsm, pkg->nd_size_out); + return -EINVAL; + } + + /* Service the pdsm */ + if (pdsc->service) { + dev_dbg(&p->pdev->dev, "PDSM[0x%x]: Servicing..\n", pdsm); + + rc = pdsc->service(p, &pdsm_pkg->payload); + + if (rc < 0) { + /* error encountered while servicing pdsm */ + pdsm_pkg->cmd_status = rc; + pkg->nd_fw_size = ND_PDSM_HDR_SIZE; + } else { + /* pdsm serviced and 'rc' bytes written to payload */ + pdsm_pkg->cmd_status = 0; + pkg->nd_fw_size = ND_PDSM_HDR_SIZE + rc; + } + } else { + dev_dbg(&p->pdev->dev, "PDSM[0x%x]: Unsupported PDSM request\n", + pdsm); + pdsm_pkg->cmd_status = -ENOENT; + pkg->nd_fw_size = ND_PDSM_HDR_SIZE; + } + + return pdsm_pkg->cmd_status; +} + static int papr_scm_ndctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm, unsigned int cmd, void *buf, unsigned int buf_len, int *cmd_rc) { struct nd_cmd_get_config_size *get_size_hdr; + struct nd_cmd_pkg *call_pkg = NULL; struct papr_scm_priv *p; int rc; - /* Only dimm-specific calls are supported atm */ - if (!nvdimm) - return -EINVAL; + rc = is_cmd_valid(nvdimm, cmd, buf, buf_len); + if (rc) { + pr_debug("Invalid cmd=0x%x. Err=%d\n", cmd, rc); + return rc; + } /* Use a local variable in case cmd_rc pointer is NULL */ if (!cmd_rc) @@ -385,6 +565,11 @@ static int papr_scm_ndctl(struct nvdimm_bus_descriptor *nd_desc, *cmd_rc = papr_scm_meta_set(p, buf); break; + case ND_CMD_CALL: + call_pkg = (struct nd_cmd_pkg *)buf; + *cmd_rc = papr_scm_service_pdsm(p, call_pkg); + break; + default: dev_dbg(&p->pdev->dev, "Unknown command = %d\n", cmd); return -EINVAL; diff --git a/include/uapi/linux/ndctl.h b/include/uapi/linux/ndctl.h index de5d90212409..0e09dc5cec19 100644 --- a/include/uapi/linux/ndctl.h +++ b/include/uapi/linux/ndctl.h @@ -244,6 +244,7 @@ struct nd_cmd_pkg { #define NVDIMM_FAMILY_HPE2 2 #define NVDIMM_FAMILY_MSFT 3 #define NVDIMM_FAMILY_HYPERV 4 +#define NVDIMM_FAMILY_PAPR 5 #define ND_IOCTL_CALL _IOWR(ND_IOCTL, ND_CMD_CALL,\ struct nd_cmd_pkg) From d35f18b554be015b6fa89fad6447c6fce8e6ad66 Mon Sep 17 00:00:00 2001 From: Vaibhav Jain Date: Mon, 15 Jun 2020 18:14:07 +0530 Subject: [PATCH 107/462] powerpc/papr_scm: Implement support for PAPR_PDSM_HEALTH This patch implements support for PDSM request 'PAPR_PDSM_HEALTH' that returns a newly introduced 'struct nd_papr_pdsm_health' instance containing dimm health information back to user space in response to ND_CMD_CALL. This functionality is implemented in newly introduced papr_pdsm_health() that queries the nvdimm health information and then copies this information to the package payload whose layout is defined by 'struct nd_papr_pdsm_health'. Signed-off-by: Vaibhav Jain Cc: "Aneesh Kumar K . V" Cc: Dan Williams Cc: Michael Ellerman Cc: Ira Weiny Link: https://lore.kernel.org/r/20200615124407.32596-7-vaibhav@linux.ibm.com Signed-off-by: Dan Williams --- arch/powerpc/include/uapi/asm/papr_pdsm.h | 37 ++++++++++++++++ arch/powerpc/platforms/pseries/papr_scm.c | 51 +++++++++++++++++++++++ 2 files changed, 88 insertions(+) diff --git a/arch/powerpc/include/uapi/asm/papr_pdsm.h b/arch/powerpc/include/uapi/asm/papr_pdsm.h index 28115152aa4e..9ccecc1d6840 100644 --- a/arch/powerpc/include/uapi/asm/papr_pdsm.h +++ b/arch/powerpc/include/uapi/asm/papr_pdsm.h @@ -66,17 +66,54 @@ #define ND_PDSM_HDR_SIZE \ (sizeof(struct nd_pkg_pdsm) - ND_PDSM_PAYLOAD_MAX_SIZE) +/* Various nvdimm health indicators */ +#define PAPR_PDSM_DIMM_HEALTHY 0 +#define PAPR_PDSM_DIMM_UNHEALTHY 1 +#define PAPR_PDSM_DIMM_CRITICAL 2 +#define PAPR_PDSM_DIMM_FATAL 3 + +/* + * Struct exchanged between kernel & ndctl in for PAPR_PDSM_HEALTH + * Various flags indicate the health status of the dimm. + * + * extension_flags : Any extension fields present in the struct. + * dimm_unarmed : Dimm not armed. So contents wont persist. + * dimm_bad_shutdown : Previous shutdown did not persist contents. + * dimm_bad_restore : Contents from previous shutdown werent restored. + * dimm_scrubbed : Contents of the dimm have been scrubbed. + * dimm_locked : Contents of the dimm cant be modified until CEC reboot + * dimm_encrypted : Contents of dimm are encrypted. + * dimm_health : Dimm health indicator. One of PAPR_PDSM_DIMM_XXXX + */ +struct nd_papr_pdsm_health { + union { + struct { + __u32 extension_flags; + __u8 dimm_unarmed; + __u8 dimm_bad_shutdown; + __u8 dimm_bad_restore; + __u8 dimm_scrubbed; + __u8 dimm_locked; + __u8 dimm_encrypted; + __u16 dimm_health; + }; + __u8 buf[ND_PDSM_PAYLOAD_MAX_SIZE]; + }; +}; + /* * Methods to be embedded in ND_CMD_CALL request. These are sent to the kernel * via 'nd_cmd_pkg.nd_command' member of the ioctl struct */ enum papr_pdsm { PAPR_PDSM_MIN = 0x0, + PAPR_PDSM_HEALTH, PAPR_PDSM_MAX, }; /* Maximal union that can hold all possible payload types */ union nd_pdsm_payload { + struct nd_papr_pdsm_health health; __u8 buf[ND_PDSM_PAYLOAD_MAX_SIZE]; } __packed; diff --git a/arch/powerpc/platforms/pseries/papr_scm.c b/arch/powerpc/platforms/pseries/papr_scm.c index d3bbf9940ba4..9c569078a09f 100644 --- a/arch/powerpc/platforms/pseries/papr_scm.c +++ b/arch/powerpc/platforms/pseries/papr_scm.c @@ -416,6 +416,52 @@ static int is_cmd_valid(struct nvdimm *nvdimm, unsigned int cmd, void *buf, return 0; } +/* Fetch the DIMM health info and populate it in provided package. */ +static int papr_pdsm_health(struct papr_scm_priv *p, + union nd_pdsm_payload *payload) +{ + int rc; + + /* Ensure dimm health mutex is taken preventing concurrent access */ + rc = mutex_lock_interruptible(&p->health_mutex); + if (rc) + goto out; + + /* Always fetch upto date dimm health data ignoring cached values */ + rc = __drc_pmem_query_health(p); + if (rc) { + mutex_unlock(&p->health_mutex); + goto out; + } + + /* update health struct with various flags derived from health bitmap */ + payload->health = (struct nd_papr_pdsm_health) { + .extension_flags = 0, + .dimm_unarmed = !!(p->health_bitmap & PAPR_PMEM_UNARMED_MASK), + .dimm_bad_shutdown = !!(p->health_bitmap & PAPR_PMEM_BAD_SHUTDOWN_MASK), + .dimm_bad_restore = !!(p->health_bitmap & PAPR_PMEM_BAD_RESTORE_MASK), + .dimm_scrubbed = !!(p->health_bitmap & PAPR_PMEM_SCRUBBED_AND_LOCKED), + .dimm_locked = !!(p->health_bitmap & PAPR_PMEM_SCRUBBED_AND_LOCKED), + .dimm_encrypted = !!(p->health_bitmap & PAPR_PMEM_ENCRYPTED), + .dimm_health = PAPR_PDSM_DIMM_HEALTHY, + }; + + /* Update field dimm_health based on health_bitmap flags */ + if (p->health_bitmap & PAPR_PMEM_HEALTH_FATAL) + payload->health.dimm_health = PAPR_PDSM_DIMM_FATAL; + else if (p->health_bitmap & PAPR_PMEM_HEALTH_CRITICAL) + payload->health.dimm_health = PAPR_PDSM_DIMM_CRITICAL; + else if (p->health_bitmap & PAPR_PMEM_HEALTH_UNHEALTHY) + payload->health.dimm_health = PAPR_PDSM_DIMM_UNHEALTHY; + + /* struct populated hence can release the mutex now */ + mutex_unlock(&p->health_mutex); + rc = sizeof(struct nd_papr_pdsm_health); + +out: + return rc; +} + /* * 'struct pdsm_cmd_desc' * Identifies supported PDSMs' expected length of in/out payloads @@ -444,6 +490,11 @@ static const struct pdsm_cmd_desc __pdsm_cmd_descriptors[] = { }, /* New PDSM command descriptors to be added below */ + [PAPR_PDSM_HEALTH] = { + .size_in = 0, + .size_out = sizeof(struct nd_papr_pdsm_health), + .service = papr_pdsm_health, + }, /* Empty */ [PAPR_PDSM_MAX] = { .size_in = 0, From a1e17eb03e69bb61bd1b1a14610436b7b9be12d9 Mon Sep 17 00:00:00 2001 From: Dinghao Liu Date: Fri, 22 May 2020 12:59:29 +0800 Subject: [PATCH 108/462] scsi: ufs-bsg: Fix runtime PM imbalance on error When ufs_bsg_alloc_desc_buffer() returns an error code, a pairing runtime PM usage counter decrement is needed to keep the counter balanced. Link: https://lore.kernel.org/r/20200522045932.31795-1-dinghao.liu@zju.edu.cn Fixes: 74e5e468b664 (scsi: ufs-bsg: Wake the device before sending raw upiu commands) Reviewed-by: Avri Altman Signed-off-by: Dinghao Liu Signed-off-by: Martin K. Petersen --- drivers/scsi/ufs/ufs_bsg.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/ufs/ufs_bsg.c b/drivers/scsi/ufs/ufs_bsg.c index 53dd87628cbe..516a7f573942 100644 --- a/drivers/scsi/ufs/ufs_bsg.c +++ b/drivers/scsi/ufs/ufs_bsg.c @@ -106,8 +106,10 @@ static int ufs_bsg_request(struct bsg_job *job) desc_op = bsg_request->upiu_req.qr.opcode; ret = ufs_bsg_alloc_desc_buffer(hba, job, &desc_buff, &desc_len, desc_op); - if (ret) + if (ret) { + pm_runtime_put_sync(hba->dev); goto out; + } /* fall through */ case UPIU_TRANSACTION_NOP_OUT: From 0bdcfa182506526fbe4e088ff9ca86a31b81828d Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Mon, 15 Jun 2020 16:12:47 +1000 Subject: [PATCH 109/462] powerpc/64s: Fix KVM interrupt using wrong save area The CTR register reload in the KVM interrupt path used the wrong save area for SLB (and NMI) interrupts. Fixes: 9600f261acaa ("powerpc/64s/exception: Move KVM test to common code") Cc: stable@vger.kernel.org # v5.7+ Reported-by: Christian Zigotzky Signed-off-by: Nicholas Piggin Tested-by: Christian Zigotzky Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200615061247.1310763-1-npiggin@gmail.com --- arch/powerpc/kernel/exceptions-64s.S | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index e70ebb5c318c..fa080694e581 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -270,7 +270,7 @@ BEGIN_FTR_SECTION END_FTR_SECTION_IFSET(CPU_FTR_CFAR) .endif - ld r10,PACA_EXGEN+EX_CTR(r13) + ld r10,IAREA+EX_CTR(r13) mtctr r10 BEGIN_FTR_SECTION ld r10,IAREA+EX_PPR(r13) @@ -298,7 +298,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) .if IKVM_SKIP 89: mtocrf 0x80,r9 - ld r10,PACA_EXGEN+EX_CTR(r13) + ld r10,IAREA+EX_CTR(r13) mtctr r10 ld r9,IAREA+EX_R9(r13) ld r10,IAREA+EX_R10(r13) From 7bb7ee8704fea9fec9eea53322b8464c3ed70a3d Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 15 Jun 2020 08:46:23 +0200 Subject: [PATCH 110/462] scsi: libata: Provide an ata_scsi_dma_need_drain stub for !CONFIG_ATA SAS drivers can be compiled with ata support disabled. Provide a stub so that the drivers don't have to ifdef around wiring up ata_scsi_dma_need_drain. Link: https://lore.kernel.org/r/20200615064624.37317-2-hch@lst.de Signed-off-by: Christoph Hellwig Signed-off-by: Martin K. Petersen --- include/linux/libata.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/include/linux/libata.h b/include/linux/libata.h index af832852e620..042e584daca7 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -1092,7 +1092,11 @@ extern int ata_scsi_ioctl(struct scsi_device *dev, unsigned int cmd, #define ATA_SCSI_COMPAT_IOCTL /* empty */ #endif extern int ata_scsi_queuecmd(struct Scsi_Host *h, struct scsi_cmnd *cmd); +#if IS_ENABLED(CONFIG_ATA) bool ata_scsi_dma_need_drain(struct request *rq); +#else +#define ata_scsi_dma_need_drain NULL +#endif extern int ata_sas_scsi_ioctl(struct ata_port *ap, struct scsi_device *dev, unsigned int cmd, void __user *arg); extern bool ata_link_online(struct ata_link *link); From b8f1d1e05817f5e5f7517911b55ea13d2c0438a0 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 15 Jun 2020 08:46:24 +0200 Subject: [PATCH 111/462] scsi: Wire up ata_scsi_dma_need_drain for SAS HBA drivers We need ata_scsi_dma_need_drain for all drivers wired up to drive ATAPI devices through libata. That also includes the SAS HBA drivers in addition to native libata HBA drivers. Link: https://lore.kernel.org/r/20200615064624.37317-3-hch@lst.de Fixes: cc97923a5bcc ("block: move dma drain handling to scsi") Reported-by: Michael Ellerman Tested-by: Michael Ellerman Acked-by: Jack Wang Acked-by: John Garry Signed-off-by: Christoph Hellwig Signed-off-by: Martin K. Petersen --- drivers/scsi/aic94xx/aic94xx_init.c | 1 + drivers/scsi/hisi_sas/hisi_sas_v1_hw.c | 1 + drivers/scsi/hisi_sas/hisi_sas_v2_hw.c | 1 + drivers/scsi/hisi_sas/hisi_sas_v3_hw.c | 1 + drivers/scsi/ipr.c | 1 + drivers/scsi/isci/init.c | 1 + drivers/scsi/mvsas/mv_init.c | 1 + drivers/scsi/pm8001/pm8001_init.c | 1 + 8 files changed, 8 insertions(+) diff --git a/drivers/scsi/aic94xx/aic94xx_init.c b/drivers/scsi/aic94xx/aic94xx_init.c index d022407e5645..bef47f38dd0d 100644 --- a/drivers/scsi/aic94xx/aic94xx_init.c +++ b/drivers/scsi/aic94xx/aic94xx_init.c @@ -40,6 +40,7 @@ static struct scsi_host_template aic94xx_sht = { /* .name is initialized */ .name = "aic94xx", .queuecommand = sas_queuecommand, + .dma_need_drain = ata_scsi_dma_need_drain, .target_alloc = sas_target_alloc, .slave_configure = sas_slave_configure, .scan_finished = asd_scan_finished, diff --git a/drivers/scsi/hisi_sas/hisi_sas_v1_hw.c b/drivers/scsi/hisi_sas/hisi_sas_v1_hw.c index 2e1718f9ade2..09a7669dad4c 100644 --- a/drivers/scsi/hisi_sas/hisi_sas_v1_hw.c +++ b/drivers/scsi/hisi_sas/hisi_sas_v1_hw.c @@ -1756,6 +1756,7 @@ static struct scsi_host_template sht_v1_hw = { .proc_name = DRV_NAME, .module = THIS_MODULE, .queuecommand = sas_queuecommand, + .dma_need_drain = ata_scsi_dma_need_drain, .target_alloc = sas_target_alloc, .slave_configure = hisi_sas_slave_configure, .scan_finished = hisi_sas_scan_finished, diff --git a/drivers/scsi/hisi_sas/hisi_sas_v2_hw.c b/drivers/scsi/hisi_sas/hisi_sas_v2_hw.c index e7e7849a4c14..968d38702353 100644 --- a/drivers/scsi/hisi_sas/hisi_sas_v2_hw.c +++ b/drivers/scsi/hisi_sas/hisi_sas_v2_hw.c @@ -3532,6 +3532,7 @@ static struct scsi_host_template sht_v2_hw = { .proc_name = DRV_NAME, .module = THIS_MODULE, .queuecommand = sas_queuecommand, + .dma_need_drain = ata_scsi_dma_need_drain, .target_alloc = sas_target_alloc, .slave_configure = hisi_sas_slave_configure, .scan_finished = hisi_sas_scan_finished, diff --git a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c index 3e6b78a1f993..55e2321a65bc 100644 --- a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c +++ b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c @@ -3075,6 +3075,7 @@ static struct scsi_host_template sht_v3_hw = { .proc_name = DRV_NAME, .module = THIS_MODULE, .queuecommand = sas_queuecommand, + .dma_need_drain = ata_scsi_dma_need_drain, .target_alloc = sas_target_alloc, .slave_configure = hisi_sas_slave_configure, .scan_finished = hisi_sas_scan_finished, diff --git a/drivers/scsi/ipr.c b/drivers/scsi/ipr.c index 7d77997d26d4..7d86f4ca266c 100644 --- a/drivers/scsi/ipr.c +++ b/drivers/scsi/ipr.c @@ -6731,6 +6731,7 @@ static struct scsi_host_template driver_template = { .compat_ioctl = ipr_ioctl, #endif .queuecommand = ipr_queuecommand, + .dma_need_drain = ata_scsi_dma_need_drain, .eh_abort_handler = ipr_eh_abort, .eh_device_reset_handler = ipr_eh_dev_reset, .eh_host_reset_handler = ipr_eh_host_reset, diff --git a/drivers/scsi/isci/init.c b/drivers/scsi/isci/init.c index 974c3b9116d5..085e285f427d 100644 --- a/drivers/scsi/isci/init.c +++ b/drivers/scsi/isci/init.c @@ -153,6 +153,7 @@ static struct scsi_host_template isci_sht = { .name = DRV_NAME, .proc_name = DRV_NAME, .queuecommand = sas_queuecommand, + .dma_need_drain = ata_scsi_dma_need_drain, .target_alloc = sas_target_alloc, .slave_configure = sas_slave_configure, .scan_finished = isci_host_scan_finished, diff --git a/drivers/scsi/mvsas/mv_init.c b/drivers/scsi/mvsas/mv_init.c index 5973eed94938..b0de3bdb01db 100644 --- a/drivers/scsi/mvsas/mv_init.c +++ b/drivers/scsi/mvsas/mv_init.c @@ -33,6 +33,7 @@ static struct scsi_host_template mvs_sht = { .module = THIS_MODULE, .name = DRV_NAME, .queuecommand = sas_queuecommand, + .dma_need_drain = ata_scsi_dma_need_drain, .target_alloc = sas_target_alloc, .slave_configure = sas_slave_configure, .scan_finished = mvs_scan_finished, diff --git a/drivers/scsi/pm8001/pm8001_init.c b/drivers/scsi/pm8001/pm8001_init.c index a8f5344fdfda..9e99262a2b9d 100644 --- a/drivers/scsi/pm8001/pm8001_init.c +++ b/drivers/scsi/pm8001/pm8001_init.c @@ -87,6 +87,7 @@ static struct scsi_host_template pm8001_sht = { .module = THIS_MODULE, .name = DRV_NAME, .queuecommand = sas_queuecommand, + .dma_need_drain = ata_scsi_dma_need_drain, .target_alloc = sas_target_alloc, .slave_configure = sas_slave_configure, .scan_finished = pm8001_scan_finished, From e9c15badbb7b20ccdbadf5da14e0a68fbad51015 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Mon, 15 Jun 2020 13:13:58 +0100 Subject: [PATCH 112/462] fs: Do not check if there is a fsnotify watcher on pseudo inodes The kernel uses internal mounts created by kern_mount() and populated with files with no lookup path by alloc_file_pseudo() for a variety of reasons. An example of such a mount is for anonymous pipes. For pipes, every vfs_write() regardless of filesystem, calls fsnotify_modify() to notify of any changes which incurs a small amount of overhead in fsnotify even when there are no watchers. It can also trigger for reads and readv and writev, it was simply vfs_write() that was noticed first. A patch is pending that reduces, but does not eliminate, the overhead of fsnotify but for files that cannot be looked up via a path, even that small overhead is unnecessary. The user API for all notification subsystems (inotify, fanotify, ...) is based on the pathname and a dirfd and proc entries appear to be the only visible representation of the files. Proc does not have the same pathname as the internal entry and the proc inode is not the same as the internal inode so even if fanotify is used on a file under /proc/XX/fd, no useful events are notified. This patch changes alloc_file_pseudo() to always opt out of fsnotify by setting FMODE_NONOTIFY flag so that no check is made for fsnotify watchers on pseudo files. This should be safe as the underlying helper for the dentry is d_alloc_pseudo() which explicitly states that no lookups are ever performed meaning that fanotify should have nothing useful to attach to. The test motivating this was "perf bench sched messaging --pipe". On a single-socket machine using threads the difference of the patch was as follows. 5.7.0 5.7.0 vanilla nofsnotify-v1r1 Amean 1 1.3837 ( 0.00%) 1.3547 ( 2.10%) Amean 3 3.7360 ( 0.00%) 3.6543 ( 2.19%) Amean 5 5.8130 ( 0.00%) 5.7233 * 1.54%* Amean 7 8.1490 ( 0.00%) 7.9730 * 2.16%* Amean 12 14.6843 ( 0.00%) 14.1820 ( 3.42%) Amean 18 21.8840 ( 0.00%) 21.7460 ( 0.63%) Amean 24 28.8697 ( 0.00%) 29.1680 ( -1.03%) Amean 30 36.0787 ( 0.00%) 35.2640 * 2.26%* Amean 32 38.0527 ( 0.00%) 38.1223 ( -0.18%) The difference is small but in some cases it's outside the noise so while marginal, there is still some small benefit to ignoring fsnotify for files allocated via alloc_file_pseudo() in some cases. Link: https://lore.kernel.org/r/20200615121358.GF3183@techsingularity.net Signed-off-by: Mel Gorman Reviewed-by: Amir Goldstein Signed-off-by: Jan Kara --- fs/file_table.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/file_table.c b/fs/file_table.c index 656647f9575a..65603502fed6 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -230,7 +230,7 @@ struct file *alloc_file_pseudo(struct inode *inode, struct vfsmount *mnt, d_set_d_op(path.dentry, &anon_ops); path.mnt = mntget(mnt); d_instantiate(path.dentry, inode); - file = alloc_file(&path, flags, fops); + file = alloc_file(&path, flags | FMODE_NONOTIFY, fops); if (IS_ERR(file)) { ihold(inode); path_put(&path); From 5e50311556c9f409a85740e3cb4c4511e7e27da0 Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Thu, 4 Jun 2020 03:28:17 +0300 Subject: [PATCH 113/462] pinctrl: qcom: spmi-gpio: fix warning about irq chip reusage Fix the following warnings caused by reusage of the same irq_chip instance for all spmi-gpio gpio_irq_chip instances. Instead embed irq_chip into pmic_gpio_state struct. gpio gpiochip2: (c440000.qcom,spmi:pmic@2:gpio@c000): detected irqchip that is shared with multiple gpiochips: please fix the driver. gpio gpiochip3: (c440000.qcom,spmi:pmic@4:gpio@c000): detected irqchip that is shared with multiple gpiochips: please fix the driver. gpio gpiochip4: (c440000.qcom,spmi:pmic@a:gpio@c000): detected irqchip that is shared with multiple gpiochips: please fix the driver. Signed-off-by: Dmitry Baryshkov Acked-by: Manivannan Sadhasivam Link: https://lore.kernel.org/r/20200604002817.667160-1-dmitry.baryshkov@linaro.org Signed-off-by: Linus Walleij --- drivers/pinctrl/qcom/pinctrl-spmi-gpio.c | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/drivers/pinctrl/qcom/pinctrl-spmi-gpio.c b/drivers/pinctrl/qcom/pinctrl-spmi-gpio.c index fe0be8a6ebb7..092a48e4dff5 100644 --- a/drivers/pinctrl/qcom/pinctrl-spmi-gpio.c +++ b/drivers/pinctrl/qcom/pinctrl-spmi-gpio.c @@ -170,6 +170,7 @@ struct pmic_gpio_state { struct regmap *map; struct pinctrl_dev *ctrl; struct gpio_chip chip; + struct irq_chip irq; }; static const struct pinconf_generic_params pmic_gpio_bindings[] = { @@ -917,16 +918,6 @@ static int pmic_gpio_populate(struct pmic_gpio_state *state, return 0; } -static struct irq_chip pmic_gpio_irq_chip = { - .name = "spmi-gpio", - .irq_ack = irq_chip_ack_parent, - .irq_mask = irq_chip_mask_parent, - .irq_unmask = irq_chip_unmask_parent, - .irq_set_type = irq_chip_set_type_parent, - .irq_set_wake = irq_chip_set_wake_parent, - .flags = IRQCHIP_MASK_ON_SUSPEND, -}; - static int pmic_gpio_domain_translate(struct irq_domain *domain, struct irq_fwspec *fwspec, unsigned long *hwirq, @@ -1053,8 +1044,16 @@ static int pmic_gpio_probe(struct platform_device *pdev) if (!parent_domain) return -ENXIO; + state->irq.name = "spmi-gpio", + state->irq.irq_ack = irq_chip_ack_parent, + state->irq.irq_mask = irq_chip_mask_parent, + state->irq.irq_unmask = irq_chip_unmask_parent, + state->irq.irq_set_type = irq_chip_set_type_parent, + state->irq.irq_set_wake = irq_chip_set_wake_parent, + state->irq.flags = IRQCHIP_MASK_ON_SUSPEND, + girq = &state->chip.irq; - girq->chip = &pmic_gpio_irq_chip; + girq->chip = &state->irq; girq->default_type = IRQ_TYPE_NONE; girq->handler = handle_level_irq; girq->fwnode = of_node_to_fwnode(state->dev->of_node); From 782b6b69847f34dda330530493ea62b7de3fd06a Mon Sep 17 00:00:00 2001 From: Vidya Sagar Date: Thu, 4 Jun 2020 23:19:35 +0530 Subject: [PATCH 114/462] pinctrl: tegra: Use noirq suspend/resume callbacks Use noirq suspend/resume callbacks as other drivers which implement noirq suspend/resume callbacks (Ex:- PCIe) depend on pinctrl driver to configure the signals used by their respective devices in the noirq phase. Signed-off-by: Vidya Sagar Reviewed-by: Dmitry Osipenko Link: https://lore.kernel.org/r/20200604174935.26560-1-vidyas@nvidia.com Signed-off-by: Linus Walleij --- drivers/pinctrl/tegra/pinctrl-tegra.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/pinctrl/tegra/pinctrl-tegra.c b/drivers/pinctrl/tegra/pinctrl-tegra.c index 21661f6490d6..195cfe557511 100644 --- a/drivers/pinctrl/tegra/pinctrl-tegra.c +++ b/drivers/pinctrl/tegra/pinctrl-tegra.c @@ -731,8 +731,8 @@ static int tegra_pinctrl_resume(struct device *dev) } const struct dev_pm_ops tegra_pinctrl_pm = { - .suspend = &tegra_pinctrl_suspend, - .resume = &tegra_pinctrl_resume + .suspend_noirq = &tegra_pinctrl_suspend, + .resume_noirq = &tegra_pinctrl_resume }; static bool tegra_pinctrl_gpio_node_has_range(struct tegra_pmx *pmx) From 3e5b8f87994e1269a7a9ea1862df163b110932eb Mon Sep 17 00:00:00 2001 From: kernel test robot Date: Mon, 8 Jun 2020 09:02:53 +0800 Subject: [PATCH 115/462] pinctrl: mcp23s08: Split to three parts: fix ptr_ret.cocci warnings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit drivers/pinctrl/pinctrl-mcp23s08_spi.c:129:1-3: WARNING: PTR_ERR_OR_ZERO can be used Use PTR_ERR_OR_ZERO rather than if(IS_ERR(...)) + PTR_ERR Generated by: scripts/coccinelle/api/ptr_ret.cocci Fixes: 0f04a81784fe ("pinctrl: mcp23s08: Split to three parts: core, I²C, SPI") Signed-off-by: kernel test robot Acked-by: Andy Shevchenko CC: Andy Shevchenko Link: https://lore.kernel.org/r/20200608010253.GA79576@44f7ab9e8d59 Signed-off-by: Linus Walleij --- drivers/pinctrl/pinctrl-mcp23s08_spi.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/pinctrl/pinctrl-mcp23s08_spi.c b/drivers/pinctrl/pinctrl-mcp23s08_spi.c index e06fb885fd2b..1f47a661b0a7 100644 --- a/drivers/pinctrl/pinctrl-mcp23s08_spi.c +++ b/drivers/pinctrl/pinctrl-mcp23s08_spi.c @@ -126,10 +126,7 @@ static int mcp23s08_spi_regmap_init(struct mcp23s08 *mcp, struct device *dev, copy->name = name; mcp->regmap = devm_regmap_init(dev, &mcp23sxx_spi_regmap, mcp, copy); - if (IS_ERR(mcp->regmap)) - return PTR_ERR(mcp->regmap); - - return 0; + return PTR_ERR_OR_ZERO(mcp->regmap); } static int mcp23s08_probe(struct spi_device *spi) From 3ffbe35321f4671c924a47bb50ca8087f87b2791 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 19 May 2020 07:31:14 +0100 Subject: [PATCH 116/462] drm/i915/selftests: Restore to default heartbeat Since we temporarily disable the heartbeat and restore back to the default value, we can use the stored defaults on the engine and avoid using a local. Signed-off-by: Chris Wilson Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20200519063123.20673-3-chris@chris-wilson.co.uk (cherry picked from commit 3a230a554dbbc6cd5016cf1b56ee77cfcd48c7d8) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/gt/selftest_hangcheck.c | 25 +++---- drivers/gpu/drm/i915/gt/selftest_lrc.c | 67 +++++++------------ drivers/gpu/drm/i915/gt/selftest_rps.c | 69 ++++++++------------ drivers/gpu/drm/i915/gt/selftest_timeline.c | 15 ++--- 4 files changed, 67 insertions(+), 109 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c index 2b2efff6e19d..4aa4cc917d8b 100644 --- a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c +++ b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c @@ -310,22 +310,20 @@ static bool wait_until_running(struct hang *h, struct i915_request *rq) 1000)); } -static void engine_heartbeat_disable(struct intel_engine_cs *engine, - unsigned long *saved) +static void engine_heartbeat_disable(struct intel_engine_cs *engine) { - *saved = engine->props.heartbeat_interval_ms; engine->props.heartbeat_interval_ms = 0; intel_engine_pm_get(engine); intel_engine_park_heartbeat(engine); } -static void engine_heartbeat_enable(struct intel_engine_cs *engine, - unsigned long saved) +static void engine_heartbeat_enable(struct intel_engine_cs *engine) { intel_engine_pm_put(engine); - engine->props.heartbeat_interval_ms = saved; + engine->props.heartbeat_interval_ms = + engine->defaults.heartbeat_interval_ms; } static int igt_hang_sanitycheck(void *arg) @@ -473,7 +471,6 @@ static int igt_reset_nop_engine(void *arg) for_each_engine(engine, gt, id) { unsigned int reset_count, reset_engine_count, count; struct intel_context *ce; - unsigned long heartbeat; IGT_TIMEOUT(end_time); int err; @@ -485,7 +482,7 @@ static int igt_reset_nop_engine(void *arg) reset_engine_count = i915_reset_engine_count(global, engine); count = 0; - engine_heartbeat_disable(engine, &heartbeat); + engine_heartbeat_disable(engine); set_bit(I915_RESET_ENGINE + id, >->reset.flags); do { int i; @@ -529,7 +526,7 @@ static int igt_reset_nop_engine(void *arg) } } while (time_before(jiffies, end_time)); clear_bit(I915_RESET_ENGINE + id, >->reset.flags); - engine_heartbeat_enable(engine, heartbeat); + engine_heartbeat_enable(engine); pr_info("%s(%s): %d resets\n", __func__, engine->name, count); @@ -564,7 +561,6 @@ static int __igt_reset_engine(struct intel_gt *gt, bool active) for_each_engine(engine, gt, id) { unsigned int reset_count, reset_engine_count; - unsigned long heartbeat; IGT_TIMEOUT(end_time); if (active && !intel_engine_can_store_dword(engine)) @@ -580,7 +576,7 @@ static int __igt_reset_engine(struct intel_gt *gt, bool active) reset_count = i915_reset_count(global); reset_engine_count = i915_reset_engine_count(global, engine); - engine_heartbeat_disable(engine, &heartbeat); + engine_heartbeat_disable(engine); set_bit(I915_RESET_ENGINE + id, >->reset.flags); do { if (active) { @@ -632,7 +628,7 @@ static int __igt_reset_engine(struct intel_gt *gt, bool active) } } while (time_before(jiffies, end_time)); clear_bit(I915_RESET_ENGINE + id, >->reset.flags); - engine_heartbeat_enable(engine, heartbeat); + engine_heartbeat_enable(engine); if (err) break; @@ -789,7 +785,6 @@ static int __igt_reset_engines(struct intel_gt *gt, struct active_engine threads[I915_NUM_ENGINES] = {}; unsigned long device = i915_reset_count(global); unsigned long count = 0, reported; - unsigned long heartbeat; IGT_TIMEOUT(end_time); if (flags & TEST_ACTIVE && @@ -832,7 +827,7 @@ static int __igt_reset_engines(struct intel_gt *gt, yield(); /* start all threads before we begin */ - engine_heartbeat_disable(engine, &heartbeat); + engine_heartbeat_disable(engine); set_bit(I915_RESET_ENGINE + id, >->reset.flags); do { struct i915_request *rq = NULL; @@ -906,7 +901,7 @@ static int __igt_reset_engines(struct intel_gt *gt, } } while (time_before(jiffies, end_time)); clear_bit(I915_RESET_ENGINE + id, >->reset.flags); - engine_heartbeat_enable(engine, heartbeat); + engine_heartbeat_enable(engine); pr_info("i915_reset_engine(%s:%s): %lu resets\n", engine->name, test_name, count); diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c index 824f99c4cc7c..ba97d1099e27 100644 --- a/drivers/gpu/drm/i915/gt/selftest_lrc.c +++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c @@ -51,22 +51,20 @@ static struct i915_vma *create_scratch(struct intel_gt *gt) return vma; } -static void engine_heartbeat_disable(struct intel_engine_cs *engine, - unsigned long *saved) +static void engine_heartbeat_disable(struct intel_engine_cs *engine) { - *saved = engine->props.heartbeat_interval_ms; engine->props.heartbeat_interval_ms = 0; intel_engine_pm_get(engine); intel_engine_park_heartbeat(engine); } -static void engine_heartbeat_enable(struct intel_engine_cs *engine, - unsigned long saved) +static void engine_heartbeat_enable(struct intel_engine_cs *engine) { intel_engine_pm_put(engine); - engine->props.heartbeat_interval_ms = saved; + engine->props.heartbeat_interval_ms = + engine->defaults.heartbeat_interval_ms; } static bool is_active(struct i915_request *rq) @@ -224,7 +222,6 @@ static int live_unlite_restore(struct intel_gt *gt, int prio) struct intel_context *ce[2] = {}; struct i915_request *rq[2]; struct igt_live_test t; - unsigned long saved; int n; if (prio && !intel_engine_has_preemption(engine)) @@ -237,7 +234,7 @@ static int live_unlite_restore(struct intel_gt *gt, int prio) err = -EIO; break; } - engine_heartbeat_disable(engine, &saved); + engine_heartbeat_disable(engine); for (n = 0; n < ARRAY_SIZE(ce); n++) { struct intel_context *tmp; @@ -345,7 +342,7 @@ err_ce: intel_context_put(ce[n]); } - engine_heartbeat_enable(engine, saved); + engine_heartbeat_enable(engine); if (igt_live_test_end(&t)) err = -EIO; if (err) @@ -466,7 +463,6 @@ static int live_hold_reset(void *arg) for_each_engine(engine, gt, id) { struct intel_context *ce; - unsigned long heartbeat; struct i915_request *rq; ce = intel_context_create(engine); @@ -475,7 +471,7 @@ static int live_hold_reset(void *arg) break; } - engine_heartbeat_disable(engine, &heartbeat); + engine_heartbeat_disable(engine); rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK); if (IS_ERR(rq)) { @@ -535,7 +531,7 @@ static int live_hold_reset(void *arg) i915_request_put(rq); out: - engine_heartbeat_enable(engine, heartbeat); + engine_heartbeat_enable(engine); intel_context_put(ce); if (err) break; @@ -580,10 +576,9 @@ static int live_error_interrupt(void *arg) for_each_engine(engine, gt, id) { const struct error_phase *p; - unsigned long heartbeat; int err = 0; - engine_heartbeat_disable(engine, &heartbeat); + engine_heartbeat_disable(engine); for (p = phases; p->error[0] != GOOD; p++) { struct i915_request *client[ARRAY_SIZE(phases->error)]; @@ -682,7 +677,7 @@ out: } } - engine_heartbeat_enable(engine, heartbeat); + engine_heartbeat_enable(engine); if (err) { intel_gt_set_wedged(gt); return err; @@ -895,16 +890,14 @@ static int live_timeslice_preempt(void *arg) enum intel_engine_id id; for_each_engine(engine, gt, id) { - unsigned long saved; - if (!intel_engine_has_preemption(engine)) continue; memset(vaddr, 0, PAGE_SIZE); - engine_heartbeat_disable(engine, &saved); + engine_heartbeat_disable(engine); err = slice_semaphore_queue(engine, vma, count); - engine_heartbeat_enable(engine, saved); + engine_heartbeat_enable(engine); if (err) goto err_pin; @@ -1009,7 +1002,6 @@ static int live_timeslice_rewind(void *arg) enum { X = 1, Z, Y }; struct i915_request *rq[3] = {}; struct intel_context *ce; - unsigned long heartbeat; unsigned long timeslice; int i, err = 0; u32 *slot; @@ -1028,7 +1020,7 @@ static int live_timeslice_rewind(void *arg) * Expect execution/evaluation order XZY */ - engine_heartbeat_disable(engine, &heartbeat); + engine_heartbeat_disable(engine); timeslice = xchg(&engine->props.timeslice_duration_ms, 1); slot = memset32(engine->status_page.addr + 1000, 0, 4); @@ -1122,7 +1114,7 @@ err: wmb(); engine->props.timeslice_duration_ms = timeslice; - engine_heartbeat_enable(engine, heartbeat); + engine_heartbeat_enable(engine); for (i = 0; i < 3; i++) i915_request_put(rq[i]); if (igt_flush_test(gt->i915)) @@ -1202,12 +1194,11 @@ static int live_timeslice_queue(void *arg) .priority = I915_USER_PRIORITY(I915_PRIORITY_MAX), }; struct i915_request *rq, *nop; - unsigned long saved; if (!intel_engine_has_preemption(engine)) continue; - engine_heartbeat_disable(engine, &saved); + engine_heartbeat_disable(engine); memset(vaddr, 0, PAGE_SIZE); /* ELSP[0]: semaphore wait */ @@ -1284,7 +1275,7 @@ static int live_timeslice_queue(void *arg) err_rq: i915_request_put(rq); err_heartbeat: - engine_heartbeat_enable(engine, saved); + engine_heartbeat_enable(engine); if (err) break; } @@ -4153,7 +4144,6 @@ static int reset_virtual_engine(struct intel_gt *gt, { struct intel_engine_cs *engine; struct intel_context *ve; - unsigned long *heartbeat; struct igt_spinner spin; struct i915_request *rq; unsigned int n; @@ -4165,15 +4155,9 @@ static int reset_virtual_engine(struct intel_gt *gt, * descendents are not executed while the capture is in progress. */ - heartbeat = kmalloc_array(nsibling, sizeof(*heartbeat), GFP_KERNEL); - if (!heartbeat) + if (igt_spinner_init(&spin, gt)) return -ENOMEM; - if (igt_spinner_init(&spin, gt)) { - err = -ENOMEM; - goto out_free; - } - ve = intel_execlists_create_virtual(siblings, nsibling); if (IS_ERR(ve)) { err = PTR_ERR(ve); @@ -4181,7 +4165,7 @@ static int reset_virtual_engine(struct intel_gt *gt, } for (n = 0; n < nsibling; n++) - engine_heartbeat_disable(siblings[n], &heartbeat[n]); + engine_heartbeat_disable(siblings[n]); rq = igt_spinner_create_request(&spin, ve, MI_ARB_CHECK); if (IS_ERR(rq)) { @@ -4252,13 +4236,11 @@ out_rq: i915_request_put(rq); out_heartbeat: for (n = 0; n < nsibling; n++) - engine_heartbeat_enable(siblings[n], heartbeat[n]); + engine_heartbeat_enable(siblings[n]); intel_context_put(ve); out_spin: igt_spinner_fini(&spin); -out_free: - kfree(heartbeat); return err; } @@ -4932,9 +4914,7 @@ static int live_lrc_gpr(void *arg) return PTR_ERR(scratch); for_each_engine(engine, gt, id) { - unsigned long heartbeat; - - engine_heartbeat_disable(engine, &heartbeat); + engine_heartbeat_disable(engine); err = __live_lrc_gpr(engine, scratch, false); if (err) @@ -4945,7 +4925,7 @@ static int live_lrc_gpr(void *arg) goto err; err: - engine_heartbeat_enable(engine, heartbeat); + engine_heartbeat_enable(engine); if (igt_flush_test(gt->i915)) err = -EIO; if (err) @@ -5092,10 +5072,9 @@ static int live_lrc_timestamp(void *arg) */ for_each_engine(data.engine, gt, id) { - unsigned long heartbeat; int i, err = 0; - engine_heartbeat_disable(data.engine, &heartbeat); + engine_heartbeat_disable(data.engine); for (i = 0; i < ARRAY_SIZE(data.ce); i++) { struct intel_context *tmp; @@ -5128,7 +5107,7 @@ static int live_lrc_timestamp(void *arg) } err: - engine_heartbeat_enable(data.engine, heartbeat); + engine_heartbeat_enable(data.engine); for (i = 0; i < ARRAY_SIZE(data.ce); i++) { if (!data.ce[i]) break; diff --git a/drivers/gpu/drm/i915/gt/selftest_rps.c b/drivers/gpu/drm/i915/gt/selftest_rps.c index 6275d69aa9cc..5049c3dd08a6 100644 --- a/drivers/gpu/drm/i915/gt/selftest_rps.c +++ b/drivers/gpu/drm/i915/gt/selftest_rps.c @@ -20,24 +20,20 @@ /* Try to isolate the impact of cstates from determing frequency response */ #define CPU_LATENCY 0 /* -1 to disable pm_qos, 0 to disable cstates */ -static unsigned long engine_heartbeat_disable(struct intel_engine_cs *engine) +static void engine_heartbeat_disable(struct intel_engine_cs *engine) { - unsigned long old; - - old = fetch_and_zero(&engine->props.heartbeat_interval_ms); + engine->props.heartbeat_interval_ms = 0; intel_engine_pm_get(engine); intel_engine_park_heartbeat(engine); - - return old; } -static void engine_heartbeat_enable(struct intel_engine_cs *engine, - unsigned long saved) +static void engine_heartbeat_enable(struct intel_engine_cs *engine) { intel_engine_pm_put(engine); - engine->props.heartbeat_interval_ms = saved; + engine->props.heartbeat_interval_ms = + engine->defaults.heartbeat_interval_ms; } static void dummy_rps_work(struct work_struct *wrk) @@ -246,7 +242,6 @@ int live_rps_clock_interval(void *arg) intel_gt_check_clock_frequency(gt); for_each_engine(engine, gt, id) { - unsigned long saved_heartbeat; struct i915_request *rq; u32 cycles; u64 dt; @@ -254,13 +249,13 @@ int live_rps_clock_interval(void *arg) if (!intel_engine_can_store_dword(engine)) continue; - saved_heartbeat = engine_heartbeat_disable(engine); + engine_heartbeat_disable(engine); rq = igt_spinner_create_request(&spin, engine->kernel_context, MI_NOOP); if (IS_ERR(rq)) { - engine_heartbeat_enable(engine, saved_heartbeat); + engine_heartbeat_enable(engine); err = PTR_ERR(rq); break; } @@ -271,7 +266,7 @@ int live_rps_clock_interval(void *arg) pr_err("%s: RPS spinner did not start\n", engine->name); igt_spinner_end(&spin); - engine_heartbeat_enable(engine, saved_heartbeat); + engine_heartbeat_enable(engine); intel_gt_set_wedged(engine->gt); err = -EIO; break; @@ -327,7 +322,7 @@ int live_rps_clock_interval(void *arg) intel_uncore_forcewake_put(gt->uncore, FORCEWAKE_ALL); igt_spinner_end(&spin); - engine_heartbeat_enable(engine, saved_heartbeat); + engine_heartbeat_enable(engine); if (err == 0) { u64 time = intel_gt_pm_interval_to_ns(gt, cycles); @@ -405,7 +400,6 @@ int live_rps_control(void *arg) intel_gt_pm_get(gt); for_each_engine(engine, gt, id) { - unsigned long saved_heartbeat; struct i915_request *rq; ktime_t min_dt, max_dt; int f, limit; @@ -414,7 +408,7 @@ int live_rps_control(void *arg) if (!intel_engine_can_store_dword(engine)) continue; - saved_heartbeat = engine_heartbeat_disable(engine); + engine_heartbeat_disable(engine); rq = igt_spinner_create_request(&spin, engine->kernel_context, @@ -430,7 +424,7 @@ int live_rps_control(void *arg) pr_err("%s: RPS spinner did not start\n", engine->name); igt_spinner_end(&spin); - engine_heartbeat_enable(engine, saved_heartbeat); + engine_heartbeat_enable(engine); intel_gt_set_wedged(engine->gt); err = -EIO; break; @@ -440,7 +434,7 @@ int live_rps_control(void *arg) pr_err("%s: could not set minimum frequency [%x], only %x!\n", engine->name, rps->min_freq, read_cagf(rps)); igt_spinner_end(&spin); - engine_heartbeat_enable(engine, saved_heartbeat); + engine_heartbeat_enable(engine); show_pstate_limits(rps); err = -EINVAL; break; @@ -457,7 +451,7 @@ int live_rps_control(void *arg) pr_err("%s: could not restore minimum frequency [%x], only %x!\n", engine->name, rps->min_freq, read_cagf(rps)); igt_spinner_end(&spin); - engine_heartbeat_enable(engine, saved_heartbeat); + engine_heartbeat_enable(engine); show_pstate_limits(rps); err = -EINVAL; break; @@ -472,7 +466,7 @@ int live_rps_control(void *arg) min_dt = ktime_sub(ktime_get(), min_dt); igt_spinner_end(&spin); - engine_heartbeat_enable(engine, saved_heartbeat); + engine_heartbeat_enable(engine); pr_info("%s: range:[%x:%uMHz, %x:%uMHz] limit:[%x:%uMHz], %x:%x response %lluns:%lluns\n", engine->name, @@ -635,7 +629,6 @@ int live_rps_frequency_cs(void *arg) rps->work.func = dummy_rps_work; for_each_engine(engine, gt, id) { - unsigned long saved_heartbeat; struct i915_request *rq; struct i915_vma *vma; u32 *cancel, *cntr; @@ -644,14 +637,14 @@ int live_rps_frequency_cs(void *arg) int freq; } min, max; - saved_heartbeat = engine_heartbeat_disable(engine); + engine_heartbeat_disable(engine); vma = create_spin_counter(engine, engine->kernel_context->vm, false, &cancel, &cntr); if (IS_ERR(vma)) { err = PTR_ERR(vma); - engine_heartbeat_enable(engine, saved_heartbeat); + engine_heartbeat_enable(engine); break; } @@ -732,7 +725,7 @@ err_vma: i915_vma_unpin(vma); i915_vma_put(vma); - engine_heartbeat_enable(engine, saved_heartbeat); + engine_heartbeat_enable(engine); if (igt_flush_test(gt->i915)) err = -EIO; if (err) @@ -778,7 +771,6 @@ int live_rps_frequency_srm(void *arg) rps->work.func = dummy_rps_work; for_each_engine(engine, gt, id) { - unsigned long saved_heartbeat; struct i915_request *rq; struct i915_vma *vma; u32 *cancel, *cntr; @@ -787,14 +779,14 @@ int live_rps_frequency_srm(void *arg) int freq; } min, max; - saved_heartbeat = engine_heartbeat_disable(engine); + engine_heartbeat_disable(engine); vma = create_spin_counter(engine, engine->kernel_context->vm, true, &cancel, &cntr); if (IS_ERR(vma)) { err = PTR_ERR(vma); - engine_heartbeat_enable(engine, saved_heartbeat); + engine_heartbeat_enable(engine); break; } @@ -874,7 +866,7 @@ err_vma: i915_vma_unpin(vma); i915_vma_put(vma); - engine_heartbeat_enable(engine, saved_heartbeat); + engine_heartbeat_enable(engine); if (igt_flush_test(gt->i915)) err = -EIO; if (err) @@ -1066,16 +1058,14 @@ int live_rps_interrupt(void *arg) for_each_engine(engine, gt, id) { /* Keep the engine busy with a spinner; expect an UP! */ if (pm_events & GEN6_PM_RP_UP_THRESHOLD) { - unsigned long saved_heartbeat; - intel_gt_pm_wait_for_idle(engine->gt); GEM_BUG_ON(intel_rps_is_active(rps)); - saved_heartbeat = engine_heartbeat_disable(engine); + engine_heartbeat_disable(engine); err = __rps_up_interrupt(rps, engine, &spin); - engine_heartbeat_enable(engine, saved_heartbeat); + engine_heartbeat_enable(engine); if (err) goto out; @@ -1084,15 +1074,13 @@ int live_rps_interrupt(void *arg) /* Keep the engine awake but idle and check for DOWN */ if (pm_events & GEN6_PM_RP_DOWN_THRESHOLD) { - unsigned long saved_heartbeat; - - saved_heartbeat = engine_heartbeat_disable(engine); + engine_heartbeat_disable(engine); intel_rc6_disable(>->rc6); err = __rps_down_interrupt(rps, engine); intel_rc6_enable(>->rc6); - engine_heartbeat_enable(engine, saved_heartbeat); + engine_heartbeat_enable(engine); if (err) goto out; } @@ -1168,7 +1156,6 @@ int live_rps_power(void *arg) rps->work.func = dummy_rps_work; for_each_engine(engine, gt, id) { - unsigned long saved_heartbeat; struct i915_request *rq; struct { u64 power; @@ -1178,13 +1165,13 @@ int live_rps_power(void *arg) if (!intel_engine_can_store_dword(engine)) continue; - saved_heartbeat = engine_heartbeat_disable(engine); + engine_heartbeat_disable(engine); rq = igt_spinner_create_request(&spin, engine->kernel_context, MI_NOOP); if (IS_ERR(rq)) { - engine_heartbeat_enable(engine, saved_heartbeat); + engine_heartbeat_enable(engine); err = PTR_ERR(rq); break; } @@ -1195,7 +1182,7 @@ int live_rps_power(void *arg) pr_err("%s: RPS spinner did not start\n", engine->name); igt_spinner_end(&spin); - engine_heartbeat_enable(engine, saved_heartbeat); + engine_heartbeat_enable(engine); intel_gt_set_wedged(engine->gt); err = -EIO; break; @@ -1208,7 +1195,7 @@ int live_rps_power(void *arg) min.power = measure_power_at(rps, &min.freq); igt_spinner_end(&spin); - engine_heartbeat_enable(engine, saved_heartbeat); + engine_heartbeat_enable(engine); pr_info("%s: min:%llumW @ %uMHz, max:%llumW @ %uMHz\n", engine->name, diff --git a/drivers/gpu/drm/i915/gt/selftest_timeline.c b/drivers/gpu/drm/i915/gt/selftest_timeline.c index c2578a0f2f14..ef1c35073dc0 100644 --- a/drivers/gpu/drm/i915/gt/selftest_timeline.c +++ b/drivers/gpu/drm/i915/gt/selftest_timeline.c @@ -751,22 +751,20 @@ out_free: return err; } -static void engine_heartbeat_disable(struct intel_engine_cs *engine, - unsigned long *saved) +static void engine_heartbeat_disable(struct intel_engine_cs *engine) { - *saved = engine->props.heartbeat_interval_ms; engine->props.heartbeat_interval_ms = 0; intel_engine_pm_get(engine); intel_engine_park_heartbeat(engine); } -static void engine_heartbeat_enable(struct intel_engine_cs *engine, - unsigned long saved) +static void engine_heartbeat_enable(struct intel_engine_cs *engine) { intel_engine_pm_put(engine); - engine->props.heartbeat_interval_ms = saved; + engine->props.heartbeat_interval_ms = + engine->defaults.heartbeat_interval_ms; } static int live_hwsp_rollover_kernel(void *arg) @@ -785,10 +783,9 @@ static int live_hwsp_rollover_kernel(void *arg) struct intel_context *ce = engine->kernel_context; struct intel_timeline *tl = ce->timeline; struct i915_request *rq[3] = {}; - unsigned long heartbeat; int i; - engine_heartbeat_disable(engine, &heartbeat); + engine_heartbeat_disable(engine); if (intel_gt_wait_for_idle(gt, HZ / 2)) { err = -EIO; goto out; @@ -839,7 +836,7 @@ static int live_hwsp_rollover_kernel(void *arg) out: for (i = 0; i < ARRAY_SIZE(rq); i++) i915_request_put(rq[i]); - engine_heartbeat_enable(engine, heartbeat); + engine_heartbeat_enable(engine); if (err) break; } From 4178b5a60cefd4721095cce974af7c142f2a6331 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 27 May 2020 17:24:18 +0100 Subject: [PATCH 117/462] drm/i915/gt: Prevent timeslicing into unpreemptable requests We have a I915_REQUEST_NOPREEMPT flag that we set when we must prevent the HW from preempting during the course of this request. We need to honour this flag and protect the HW even if we have a heartbeat request, or other maximum priority barrier, pending. As such, restrict the timeslicing check to avoid preempting into the topmost priority band, leaving the unpreemptable requests in blissful peace running uninterrupted on the HW. v2: Set the I915_PRIORITY_BARRIER to be less than I915_PRIORITY_UNPREEMPTABLE so that we never submit a request (heartbeat or barrier) that can legitimately preempt the current non-premptable request. Fixes: 2a98f4e65bba ("drm/i915: add infrastructure to hold off preemption on a request") Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20200527162418.24755-1-chris@chris-wilson.co.uk (cherry picked from commit b72f02d78e4f257761ed003444ae52083f962076) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/gt/intel_lrc.c | 1 + drivers/gpu/drm/i915/gt/selftest_lrc.c | 118 ++++++++++++++++++++- drivers/gpu/drm/i915/i915_priolist_types.h | 2 +- 3 files changed, 119 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index 0b58e6c1e235..1858331e423e 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -1925,6 +1925,7 @@ need_timeslice(const struct intel_engine_cs *engine, if (!list_is_last(&rq->sched.link, &engine->active.requests)) hint = max(hint, rq_prio(list_next_entry(rq, sched.link))); + GEM_BUG_ON(hint >= I915_PRIORITY_UNPREEMPTABLE); return hint >= effective_prio(rq); } diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c index ba97d1099e27..924bc01ef526 100644 --- a/drivers/gpu/drm/i915/gt/selftest_lrc.c +++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c @@ -823,7 +823,7 @@ slice_semaphore_queue(struct intel_engine_cs *outer, } } - err = release_queue(outer, vma, n, INT_MAX); + err = release_queue(outer, vma, n, I915_PRIORITY_BARRIER); if (err) goto out; @@ -1289,6 +1289,121 @@ err_obj: return err; } +static int live_timeslice_nopreempt(void *arg) +{ + struct intel_gt *gt = arg; + struct intel_engine_cs *engine; + enum intel_engine_id id; + struct igt_spinner spin; + int err = 0; + + /* + * We should not timeslice into a request that is marked with + * I915_REQUEST_NOPREEMPT. + */ + if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION)) + return 0; + + if (igt_spinner_init(&spin, gt)) + return -ENOMEM; + + for_each_engine(engine, gt, id) { + struct intel_context *ce; + struct i915_request *rq; + unsigned long timeslice; + + if (!intel_engine_has_preemption(engine)) + continue; + + ce = intel_context_create(engine); + if (IS_ERR(ce)) { + err = PTR_ERR(ce); + break; + } + + engine_heartbeat_disable(engine); + timeslice = xchg(&engine->props.timeslice_duration_ms, 1); + + /* Create an unpreemptible spinner */ + + rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK); + intel_context_put(ce); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto out_heartbeat; + } + + i915_request_get(rq); + i915_request_add(rq); + + if (!igt_wait_for_spinner(&spin, rq)) { + i915_request_put(rq); + err = -ETIME; + goto out_spin; + } + + set_bit(I915_FENCE_FLAG_NOPREEMPT, &rq->fence.flags); + i915_request_put(rq); + + /* Followed by a maximum priority barrier (heartbeat) */ + + ce = intel_context_create(engine); + if (IS_ERR(ce)) { + err = PTR_ERR(rq); + goto out_spin; + } + + rq = intel_context_create_request(ce); + intel_context_put(ce); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto out_spin; + } + + rq->sched.attr.priority = I915_PRIORITY_BARRIER; + i915_request_get(rq); + i915_request_add(rq); + + /* + * Wait until the barrier is in ELSP, and we know timeslicing + * will have been activated. + */ + if (wait_for_submit(engine, rq, HZ / 2)) { + i915_request_put(rq); + err = -ETIME; + goto out_spin; + } + + /* + * Since the ELSP[0] request is unpreemptible, it should not + * allow the maximum priority barrier through. Wait long + * enough to see if it is timesliced in by mistake. + */ + if (i915_request_wait(rq, 0, timeslice_threshold(engine)) >= 0) { + pr_err("%s: I915_PRIORITY_BARRIER request completed, bypassing no-preempt request\n", + engine->name); + err = -EINVAL; + } + i915_request_put(rq); + +out_spin: + igt_spinner_end(&spin); +out_heartbeat: + xchg(&engine->props.timeslice_duration_ms, timeslice); + engine_heartbeat_enable(engine); + if (err) + break; + + if (igt_flush_test(gt->i915)) { + err = -EIO; + break; + } + } + + igt_spinner_fini(&spin); + return err; +} + static int live_busywait_preempt(void *arg) { struct intel_gt *gt = arg; @@ -4296,6 +4411,7 @@ int intel_execlists_live_selftests(struct drm_i915_private *i915) SUBTEST(live_timeslice_preempt), SUBTEST(live_timeslice_rewind), SUBTEST(live_timeslice_queue), + SUBTEST(live_timeslice_nopreempt), SUBTEST(live_busywait_preempt), SUBTEST(live_preempt), SUBTEST(live_late_preempt), diff --git a/drivers/gpu/drm/i915/i915_priolist_types.h b/drivers/gpu/drm/i915/i915_priolist_types.h index 5003a71113cb..8aa7866ec6b6 100644 --- a/drivers/gpu/drm/i915/i915_priolist_types.h +++ b/drivers/gpu/drm/i915/i915_priolist_types.h @@ -42,7 +42,7 @@ enum { * active request. */ #define I915_PRIORITY_UNPREEMPTABLE INT_MAX -#define I915_PRIORITY_BARRIER INT_MAX +#define I915_PRIORITY_BARRIER (I915_PRIORITY_UNPREEMPTABLE - 1) struct i915_priolist { struct list_head requests[I915_PRIORITY_COUNT]; From a43555ac908c604f45ed98628805aec9355b9737 Mon Sep 17 00:00:00 2001 From: Khaled Almahallawy Date: Mon, 8 Jun 2020 13:45:37 -0700 Subject: [PATCH 118/462] drm/i915/tc: fix the reset of ln0 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Setting ln0 similar to ln1 Fixes: 3b51be4e4061b ("drm/i915/tc: Update DP_MODE programming") Cc: # v5.5+ Signed-off-by: Khaled Almahallawy Reviewed-by: José Roberto de Souza Signed-off-by: Imre Deak Link: https://patchwork.freedesktop.org/patch/msgid/20200608204537.28468-1-khaled.almahallawy@intel.com (cherry picked from commit 4f72a8ee819d57d7329d88f487a2fc9b45153177) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/display/intel_ddi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c b/drivers/gpu/drm/i915/display/intel_ddi.c index aa22465bb56e..f53de9664903 100644 --- a/drivers/gpu/drm/i915/display/intel_ddi.c +++ b/drivers/gpu/drm/i915/display/intel_ddi.c @@ -2987,7 +2987,7 @@ icl_program_mg_dp_mode(struct intel_digital_port *intel_dig_port, ln1 = intel_de_read(dev_priv, MG_DP_MODE(1, tc_port)); } - ln0 &= ~(MG_DP_MODE_CFG_DP_X1_MODE | MG_DP_MODE_CFG_DP_X1_MODE); + ln0 &= ~(MG_DP_MODE_CFG_DP_X1_MODE | MG_DP_MODE_CFG_DP_X2_MODE); ln1 &= ~(MG_DP_MODE_CFG_DP_X1_MODE | MG_DP_MODE_CFG_DP_X2_MODE); /* DPPATC */ From 8ab3a3812aa90e488813e719308ffd807b865624 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 9 Jun 2020 16:17:23 +0100 Subject: [PATCH 119/462] drm/i915/gt: Incrementally check for rewinding In commit 5ba32c7be81e ("drm/i915/execlists: Always force a context reload when rewinding RING_TAIL"), we placed the check for rewinding a context on actually submitting the next request in that context. This was so that we only had to check once, and could do so with precision avoiding as many forced restores as possible. For example, to ensure that we can resubmit the same request a couple of times, we include a small wa_tail such that on the next submission, the ring->tail will appear to move forwards when resubmitting the same request. This is very common as it will happen for every lite-restore to fill the second port after a context switch. However, intel_ring_direction() is limited in precision to movements of upto half the ring size. The consequence being that if we tried to unwind many requests, we could exceed half the ring and flip the sense of the direction, so missing a force restore. As no request can be greater than half the ring (i.e. 2048 bytes in the smallest case), we can check for rollback incrementally. As we check against the tail that would be submitted, we do not lose any sensitivity and allow lite restores for the simple case. We still need to double check upon submitting the context, to allow for multiple preemptions and resubmissions. Fixes: 5ba32c7be81e ("drm/i915/execlists: Always force a context reload when rewinding RING_TAIL") Signed-off-by: Chris Wilson Cc: Mika Kuoppala Cc: # v5.4+ Reviewed-by: Bruce Chang Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20200609151723.12971-1-chris@chris-wilson.co.uk (cherry picked from commit e36ba817fa966f81fb1c8d16f3721b5a644b2fa9) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/gt/intel_engine_cs.c | 4 +- drivers/gpu/drm/i915/gt/intel_lrc.c | 21 +++- drivers/gpu/drm/i915/gt/intel_ring.c | 4 + drivers/gpu/drm/i915/gt/selftest_mocs.c | 18 ++- drivers/gpu/drm/i915/gt/selftest_ring.c | 110 ++++++++++++++++++ .../drm/i915/selftests/i915_mock_selftests.h | 1 + 6 files changed, 154 insertions(+), 4 deletions(-) create mode 100644 drivers/gpu/drm/i915/gt/selftest_ring.c diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index da5b61085257..8691eb61e185 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -646,7 +646,7 @@ static int engine_setup_common(struct intel_engine_cs *engine) struct measure_breadcrumb { struct i915_request rq; struct intel_ring ring; - u32 cs[1024]; + u32 cs[2048]; }; static int measure_breadcrumb_dw(struct intel_context *ce) @@ -668,6 +668,8 @@ static int measure_breadcrumb_dw(struct intel_context *ce) frame->ring.vaddr = frame->cs; frame->ring.size = sizeof(frame->cs); + frame->ring.wrap = + BITS_PER_TYPE(frame->ring.size) - ilog2(frame->ring.size); frame->ring.effective_size = frame->ring.size; intel_ring_update_space(&frame->ring); frame->rq.ring = &frame->ring; diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index 1858331e423e..7c3d8ef4a47c 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -1134,6 +1134,13 @@ __unwind_incomplete_requests(struct intel_engine_cs *engine) list_move(&rq->sched.link, pl); set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); + /* Check in case we rollback so far we wrap [size/2] */ + if (intel_ring_direction(rq->ring, + intel_ring_wrap(rq->ring, + rq->tail), + rq->ring->tail) > 0) + rq->context->lrc.desc |= CTX_DESC_FORCE_RESTORE; + active = rq; } else { struct intel_engine_cs *owner = rq->context->engine; @@ -1498,8 +1505,9 @@ static u64 execlists_update_context(struct i915_request *rq) * HW has a tendency to ignore us rewinding the TAIL to the end of * an earlier request. */ + GEM_BUG_ON(ce->lrc_reg_state[CTX_RING_TAIL] != rq->ring->tail); + prev = rq->ring->tail; tail = intel_ring_set_tail(rq->ring, rq->tail); - prev = ce->lrc_reg_state[CTX_RING_TAIL]; if (unlikely(intel_ring_direction(rq->ring, tail, prev) <= 0)) desc |= CTX_DESC_FORCE_RESTORE; ce->lrc_reg_state[CTX_RING_TAIL] = tail; @@ -4758,6 +4766,14 @@ static int gen12_emit_flush(struct i915_request *request, u32 mode) return 0; } +static void assert_request_valid(struct i915_request *rq) +{ + struct intel_ring *ring __maybe_unused = rq->ring; + + /* Can we unwind this request without appearing to go forwards? */ + GEM_BUG_ON(intel_ring_direction(ring, rq->wa_tail, rq->head) <= 0); +} + /* * Reserve space for 2 NOOPs at the end of each request to be * used as a workaround for not being allowed to do lite @@ -4770,6 +4786,9 @@ static u32 *gen8_emit_wa_tail(struct i915_request *request, u32 *cs) *cs++ = MI_NOOP; request->wa_tail = intel_ring_offset(request, cs); + /* Check that entire request is less than half the ring */ + assert_request_valid(request); + return cs; } diff --git a/drivers/gpu/drm/i915/gt/intel_ring.c b/drivers/gpu/drm/i915/gt/intel_ring.c index 8cda1b7e17ba..bdb324167ef3 100644 --- a/drivers/gpu/drm/i915/gt/intel_ring.c +++ b/drivers/gpu/drm/i915/gt/intel_ring.c @@ -315,3 +315,7 @@ int intel_ring_cacheline_align(struct i915_request *rq) GEM_BUG_ON(rq->ring->emit & (CACHELINE_BYTES - 1)); return 0; } + +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) +#include "selftest_ring.c" +#endif diff --git a/drivers/gpu/drm/i915/gt/selftest_mocs.c b/drivers/gpu/drm/i915/gt/selftest_mocs.c index 8831ffee2061..63f87d8608c3 100644 --- a/drivers/gpu/drm/i915/gt/selftest_mocs.c +++ b/drivers/gpu/drm/i915/gt/selftest_mocs.c @@ -18,6 +18,20 @@ struct live_mocs { void *vaddr; }; +static struct intel_context *mocs_context_create(struct intel_engine_cs *engine) +{ + struct intel_context *ce; + + ce = intel_context_create(engine); + if (IS_ERR(ce)) + return ce; + + /* We build large requests to read the registers from the ring */ + ce->ring = __intel_context_ring_size(SZ_16K); + + return ce; +} + static int request_add_sync(struct i915_request *rq, int err) { i915_request_get(rq); @@ -301,7 +315,7 @@ static int live_mocs_clean(void *arg) for_each_engine(engine, gt, id) { struct intel_context *ce; - ce = intel_context_create(engine); + ce = mocs_context_create(engine); if (IS_ERR(ce)) { err = PTR_ERR(ce); break; @@ -395,7 +409,7 @@ static int live_mocs_reset(void *arg) for_each_engine(engine, gt, id) { struct intel_context *ce; - ce = intel_context_create(engine); + ce = mocs_context_create(engine); if (IS_ERR(ce)) { err = PTR_ERR(ce); break; diff --git a/drivers/gpu/drm/i915/gt/selftest_ring.c b/drivers/gpu/drm/i915/gt/selftest_ring.c new file mode 100644 index 000000000000..2a8c534dc125 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/selftest_ring.c @@ -0,0 +1,110 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright © 2020 Intel Corporation + */ + +static struct intel_ring *mock_ring(unsigned long sz) +{ + struct intel_ring *ring; + + ring = kzalloc(sizeof(*ring) + sz, GFP_KERNEL); + if (!ring) + return NULL; + + kref_init(&ring->ref); + ring->size = sz; + ring->wrap = BITS_PER_TYPE(ring->size) - ilog2(sz); + ring->effective_size = sz; + ring->vaddr = (void *)(ring + 1); + atomic_set(&ring->pin_count, 1); + + intel_ring_update_space(ring); + + return ring; +} + +static void mock_ring_free(struct intel_ring *ring) +{ + kfree(ring); +} + +static int check_ring_direction(struct intel_ring *ring, + u32 next, u32 prev, + int expected) +{ + int result; + + result = intel_ring_direction(ring, next, prev); + if (result < 0) + result = -1; + else if (result > 0) + result = 1; + + if (result != expected) { + pr_err("intel_ring_direction(%u, %u):%d != %d\n", + next, prev, result, expected); + return -EINVAL; + } + + return 0; +} + +static int check_ring_step(struct intel_ring *ring, u32 x, u32 step) +{ + u32 prev = x, next = intel_ring_wrap(ring, x + step); + int err = 0; + + err |= check_ring_direction(ring, next, next, 0); + err |= check_ring_direction(ring, prev, prev, 0); + err |= check_ring_direction(ring, next, prev, 1); + err |= check_ring_direction(ring, prev, next, -1); + + return err; +} + +static int check_ring_offset(struct intel_ring *ring, u32 x, u32 step) +{ + int err = 0; + + err |= check_ring_step(ring, x, step); + err |= check_ring_step(ring, intel_ring_wrap(ring, x + 1), step); + err |= check_ring_step(ring, intel_ring_wrap(ring, x - 1), step); + + return err; +} + +static int igt_ring_direction(void *dummy) +{ + struct intel_ring *ring; + unsigned int half = 2048; + int step, err = 0; + + ring = mock_ring(2 * half); + if (!ring) + return -ENOMEM; + + GEM_BUG_ON(ring->size != 2 * half); + + /* Precision of wrap detection is limited to ring->size / 2 */ + for (step = 1; step < half; step <<= 1) { + err |= check_ring_offset(ring, 0, step); + err |= check_ring_offset(ring, half, step); + } + err |= check_ring_step(ring, 0, half - 64); + + /* And check unwrapped handling for good measure */ + err |= check_ring_offset(ring, 0, 2 * half + 64); + err |= check_ring_offset(ring, 3 * half, 1); + + mock_ring_free(ring); + return err; +} + +int intel_ring_mock_selftests(void) +{ + static const struct i915_subtest tests[] = { + SUBTEST(igt_ring_direction), + }; + + return i915_subtests(tests, NULL); +} diff --git a/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h b/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h index 6a2be7d0dd95..6090ce35226b 100644 --- a/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h +++ b/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h @@ -21,6 +21,7 @@ selftest(fence, i915_sw_fence_mock_selftests) selftest(scatterlist, scatterlist_mock_selftests) selftest(syncmap, i915_syncmap_mock_selftests) selftest(uncore, intel_uncore_mock_selftests) +selftest(ring, intel_ring_mock_selftests) selftest(engine, intel_engine_cs_mock_selftests) selftest(timelines, intel_timeline_mock_selftests) selftest(requests, i915_request_mock_selftests) From 898e4e57adaeeefcfcf23f5d83360e513b69172f Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Wed, 10 Jun 2020 01:06:16 +0300 Subject: [PATCH 120/462] drm/i915/icl: Disable DIP on MST ports with the transcoder clock still on According to BSpec the Data Island Packet should be disabled after disabling the transcoder, but before the transcoder clock select is set to none. On an ICL RVP, daisy-chained MST config not following this leads to a hang with the following MCE when disabling the output: [ 870.948739] mce: [Hardware Error]: CPU 0: Machine Check Exception: 5 Bank 6: ba00000011000402 [ 871.019212] mce: [Hardware Error]: RIP !INEXACT! 10: {poll_idle+0x92/0xb0} [ 871.019212] mce: [Hardware Error]: TSC 135a261fe61 [ 871.019212] mce: [Hardware Error]: PROCESSOR 0:706e5 TIME 1591739604 SOCKET 0 APIC 0 microcode 20 [ 871.019212] mce: [Hardware Error]: Run the above through 'mcelog --ascii' [ 871.019212] mce: [Hardware Error]: Machine check: Processor context corrupt [ 871.019212] Kernel panic - not syncing: Fatal machine check [ 871.019212] Kernel Offset: disabled Bspec: 4287 Fixes: fa37a213275c ("drm/i915: Stop sending DP SDPs on ddi disable") Cc: Gwan-gyeong Mun Cc: Uma Shankar Signed-off-by: Imre Deak Reviewed-by: Uma Shankar Link: https://patchwork.freedesktop.org/patch/msgid/20200609220616.6015-1-imre.deak@intel.com (cherry picked from commit c980216dd224c52b5c70172753c209b653d84958) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/display/intel_ddi.c | 4 +++- drivers/gpu/drm/i915/display/intel_dp_mst.c | 8 ++++++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c b/drivers/gpu/drm/i915/display/intel_ddi.c index f53de9664903..f92c5ed3f5d2 100644 --- a/drivers/gpu/drm/i915/display/intel_ddi.c +++ b/drivers/gpu/drm/i915/display/intel_ddi.c @@ -3472,7 +3472,9 @@ static void intel_ddi_post_disable_dp(struct intel_atomic_state *state, INTEL_OUTPUT_DP_MST); enum phy phy = intel_port_to_phy(dev_priv, encoder->port); - intel_dp_set_infoframes(encoder, false, old_crtc_state, old_conn_state); + if (!is_mst) + intel_dp_set_infoframes(encoder, false, + old_crtc_state, old_conn_state); /* * Power down sink before disabling the port, otherwise we end diff --git a/drivers/gpu/drm/i915/display/intel_dp_mst.c b/drivers/gpu/drm/i915/display/intel_dp_mst.c index d18b406f2a7d..f29e51ce489c 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_mst.c +++ b/drivers/gpu/drm/i915/display/intel_dp_mst.c @@ -397,6 +397,14 @@ static void intel_mst_post_disable_dp(struct intel_atomic_state *state, */ drm_dp_send_power_updown_phy(&intel_dp->mst_mgr, connector->port, false); + + /* + * BSpec 4287: disable DIP after the transcoder is disabled and before + * the transcoder clock select is set to none. + */ + if (last_mst_stream) + intel_dp_set_infoframes(&intel_dig_port->base, false, + old_crtc_state, NULL); /* * From TGL spec: "If multi-stream slave transcoder: Configure * Transcoder Clock Select to direct no clock to the transcoder" From ef50fa9bd17d13d0611e39e13b37bbd3e1ea50bf Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 11 Jun 2020 10:30:15 +0100 Subject: [PATCH 121/462] drm/i915/gt: Move hsw GT workarounds from init_clock_gating to workarounds Rescue the GT workarounds from being buried inside init_clock_gating so that we remember to apply them after a GT reset, and that they are included in our verification that the workarounds are applied. v2: Leave HSW_SCRATCH to set an explicit value, not or in our disable bit. Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/2011 Signed-off-by: Chris Wilson Cc: Mika Kuoppala Reviewed-by: Mika Kuoppala Cc: stable@vger.kernel.org Link: https://patchwork.freedesktop.org/patch/msgid/20200611093015.11370-1-chris@chris-wilson.co.uk (cherry picked from commit f93ec5fb563779bda4501890b1854526de58e0f1) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/gt/intel_workarounds.c | 48 +++++++++++++++++++++ drivers/gpu/drm/i915/intel_pm.c | 39 +---------------- 2 files changed, 50 insertions(+), 37 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index 90a2b9e399b0..b5d0fbe3b211 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -178,6 +178,12 @@ wa_write_or(struct i915_wa_list *wal, i915_reg_t reg, u32 set) wa_write_masked_or(wal, reg, set, set); } +static void +wa_write_clr(struct i915_wa_list *wal, i915_reg_t reg, u32 clr) +{ + wa_write_masked_or(wal, reg, clr, 0); +} + static void wa_masked_en(struct i915_wa_list *wal, i915_reg_t reg, u32 val) { @@ -686,6 +692,46 @@ int intel_engine_emit_ctx_wa(struct i915_request *rq) return 0; } +static void +hsw_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) +{ + /* L3 caching of data atomics doesn't work -- disable it. */ + wa_write(wal, HSW_SCRATCH1, HSW_SCRATCH1_L3_DATA_ATOMICS_DISABLE); + + wa_add(wal, + HSW_ROW_CHICKEN3, 0, + _MASKED_BIT_ENABLE(HSW_ROW_CHICKEN3_L3_GLOBAL_ATOMICS_DISABLE), + 0 /* XXX does this reg exist? */); + + /* WaVSRefCountFullforceMissDisable:hsw */ + wa_write_clr(wal, GEN7_FF_THREAD_MODE, GEN7_FF_VS_REF_CNT_FFME); + + wa_masked_dis(wal, + CACHE_MODE_0_GEN7, + /* WaDisable_RenderCache_OperationalFlush:hsw */ + RC_OP_FLUSH_ENABLE | + /* enable HiZ Raw Stall Optimization */ + HIZ_RAW_STALL_OPT_DISABLE); + + /* WaDisable4x2SubspanOptimization:hsw */ + wa_masked_en(wal, CACHE_MODE_1, PIXEL_SUBSPAN_COLLECT_OPT_DISABLE); + + /* + * BSpec recommends 8x4 when MSAA is used, + * however in practice 16x4 seems fastest. + * + * Note that PS/WM thread counts depend on the WIZ hashing + * disable bit, which we don't touch here, but it's good + * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM). + */ + wa_add(wal, GEN7_GT_MODE, 0, + _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4), + GEN6_WIZ_HASHING_16x4); + + /* WaSampleCChickenBitEnable:hsw */ + wa_masked_en(wal, HALF_SLICE_CHICKEN3, HSW_SAMPLE_C_PERFORMANCE); +} + static void gen9_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) { @@ -963,6 +1009,8 @@ gt_init_workarounds(struct drm_i915_private *i915, struct i915_wa_list *wal) bxt_gt_workarounds_init(i915, wal); else if (IS_SKYLAKE(i915)) skl_gt_workarounds_init(i915, wal); + else if (IS_HASWELL(i915)) + hsw_gt_workarounds_init(i915, wal); else if (INTEL_GEN(i915) <= 8) return; else diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 696491d71a1d..0b7a4c5f179d 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -7230,45 +7230,10 @@ static void bdw_init_clock_gating(struct drm_i915_private *dev_priv) static void hsw_init_clock_gating(struct drm_i915_private *dev_priv) { - /* L3 caching of data atomics doesn't work -- disable it. */ - I915_WRITE(HSW_SCRATCH1, HSW_SCRATCH1_L3_DATA_ATOMICS_DISABLE); - I915_WRITE(HSW_ROW_CHICKEN3, - _MASKED_BIT_ENABLE(HSW_ROW_CHICKEN3_L3_GLOBAL_ATOMICS_DISABLE)); - /* This is required by WaCatErrorRejectionIssue:hsw */ I915_WRITE(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG, - I915_READ(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG) | - GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB); - - /* WaVSRefCountFullforceMissDisable:hsw */ - I915_WRITE(GEN7_FF_THREAD_MODE, - I915_READ(GEN7_FF_THREAD_MODE) & ~GEN7_FF_VS_REF_CNT_FFME); - - /* WaDisable_RenderCache_OperationalFlush:hsw */ - I915_WRITE(CACHE_MODE_0_GEN7, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE)); - - /* enable HiZ Raw Stall Optimization */ - I915_WRITE(CACHE_MODE_0_GEN7, - _MASKED_BIT_DISABLE(HIZ_RAW_STALL_OPT_DISABLE)); - - /* WaDisable4x2SubspanOptimization:hsw */ - I915_WRITE(CACHE_MODE_1, - _MASKED_BIT_ENABLE(PIXEL_SUBSPAN_COLLECT_OPT_DISABLE)); - - /* - * BSpec recommends 8x4 when MSAA is used, - * however in practice 16x4 seems fastest. - * - * Note that PS/WM thread counts depend on the WIZ hashing - * disable bit, which we don't touch here, but it's good - * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM). - */ - I915_WRITE(GEN7_GT_MODE, - _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4)); - - /* WaSampleCChickenBitEnable:hsw */ - I915_WRITE(HALF_SLICE_CHICKEN3, - _MASKED_BIT_ENABLE(HSW_SAMPLE_C_PERFORMANCE)); + I915_READ(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG) | + GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB); /* WaSwitchSolVfFArbitrationPriority:hsw */ I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) | HSW_ECOCHK_ARB_PRIO_SOL); From 7237b190add0794bd95979018a23eda698f2705d Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 11 Jun 2020 09:01:36 +0100 Subject: [PATCH 122/462] drm/i915/gt: Move ivb GT workarounds from init_clock_gating to workarounds Rescue the GT workarounds from being buried inside init_clock_gating so that we remember to apply them after a GT reset, and that they are included in our verification that the workarounds are applied. Signed-off-by: Chris Wilson Reviewed-by: Mika Kuoppala Cc: stable@vger.kernel.org Link: https://patchwork.freedesktop.org/patch/msgid/20200611080140.30228-2-chris@chris-wilson.co.uk (cherry picked from commit 19f1f627b33385a2f0855cbc7d33d86d7f4a1e78) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/gt/intel_workarounds.c | 62 +++++++++++++++++++++ drivers/gpu/drm/i915/i915_reg.h | 2 +- drivers/gpu/drm/i915/intel_pm.c | 48 ---------------- 3 files changed, 63 insertions(+), 49 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index b5d0fbe3b211..d9b72679ca82 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -692,6 +692,66 @@ int intel_engine_emit_ctx_wa(struct i915_request *rq) return 0; } +static void +ivb_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) +{ + /* WaDisableEarlyCull:ivb */ + wa_masked_en(wal, _3D_CHICKEN3, _3D_CHICKEN_SF_DISABLE_OBJEND_CULL); + + /* WaDisablePSDDualDispatchEnable:ivb */ + if (IS_IVB_GT1(i915)) + wa_masked_en(wal, + GEN7_HALF_SLICE_CHICKEN1, + GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE); + + /* WaDisable_RenderCache_OperationalFlush:ivb */ + wa_masked_dis(wal, CACHE_MODE_0_GEN7, RC_OP_FLUSH_ENABLE); + + /* Apply the WaDisableRHWOOptimizationForRenderHang:ivb workaround. */ + wa_masked_dis(wal, + GEN7_COMMON_SLICE_CHICKEN1, + GEN7_CSC1_RHWO_OPT_DISABLE_IN_RCC); + + /* WaApplyL3ControlAndL3ChickenMode:ivb */ + wa_write(wal, GEN7_L3CNTLREG1, GEN7_WA_FOR_GEN7_L3_CONTROL); + wa_write(wal, GEN7_L3_CHICKEN_MODE_REGISTER, GEN7_WA_L3_CHICKEN_MODE); + + /* WaForceL3Serialization:ivb */ + wa_write_clr(wal, GEN7_L3SQCREG4, L3SQ_URB_READ_CAM_MATCH_DISABLE); + + /* + * WaVSThreadDispatchOverride:ivb,vlv + * + * This actually overrides the dispatch + * mode for all thread types. + */ + wa_write_masked_or(wal, GEN7_FF_THREAD_MODE, + GEN7_FF_SCHED_MASK, + GEN7_FF_TS_SCHED_HW | + GEN7_FF_VS_SCHED_HW | + GEN7_FF_DS_SCHED_HW); + + if (0) { /* causes HiZ corruption on ivb:gt1 */ + /* enable HiZ Raw Stall Optimization */ + wa_masked_dis(wal, CACHE_MODE_0_GEN7, HIZ_RAW_STALL_OPT_DISABLE); + } + + /* WaDisable4x2SubspanOptimization:ivb */ + wa_masked_en(wal, CACHE_MODE_1, PIXEL_SUBSPAN_COLLECT_OPT_DISABLE); + + /* + * BSpec recommends 8x4 when MSAA is used, + * however in practice 16x4 seems fastest. + * + * Note that PS/WM thread counts depend on the WIZ hashing + * disable bit, which we don't touch here, but it's good + * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM). + */ + wa_add(wal, GEN7_GT_MODE, 0, + _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4), + GEN6_WIZ_HASHING_16x4); +} + static void hsw_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) { @@ -1011,6 +1071,8 @@ gt_init_workarounds(struct drm_i915_private *i915, struct i915_wa_list *wal) skl_gt_workarounds_init(i915, wal); else if (IS_HASWELL(i915)) hsw_gt_workarounds_init(i915, wal); + else if (IS_IVYBRIDGE(i915)) + ivb_gt_workarounds_init(i915, wal); else if (INTEL_GEN(i915) <= 8) return; else diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 7717581350bd..06cd1d28a176 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -7896,7 +7896,7 @@ enum { /* GEN7 chicken */ #define GEN7_COMMON_SLICE_CHICKEN1 _MMIO(0x7010) - #define GEN7_CSC1_RHWO_OPT_DISABLE_IN_RCC ((1 << 10) | (1 << 26)) + #define GEN7_CSC1_RHWO_OPT_DISABLE_IN_RCC (1 << 10) #define GEN9_RHWO_OPTIMIZATION_DISABLE (1 << 14) #define COMMON_SLICE_CHICKEN2 _MMIO(0x7014) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 0b7a4c5f179d..7236644af40b 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -7247,32 +7247,11 @@ static void ivb_init_clock_gating(struct drm_i915_private *dev_priv) I915_WRITE(ILK_DSPCLK_GATE_D, ILK_VRHUNIT_CLOCK_GATE_DISABLE); - /* WaDisableEarlyCull:ivb */ - I915_WRITE(_3D_CHICKEN3, - _MASKED_BIT_ENABLE(_3D_CHICKEN_SF_DISABLE_OBJEND_CULL)); - /* WaDisableBackToBackFlipFix:ivb */ I915_WRITE(IVB_CHICKEN3, CHICKEN3_DGMG_REQ_OUT_FIX_DISABLE | CHICKEN3_DGMG_DONE_FIX_DISABLE); - /* WaDisablePSDDualDispatchEnable:ivb */ - if (IS_IVB_GT1(dev_priv)) - I915_WRITE(GEN7_HALF_SLICE_CHICKEN1, - _MASKED_BIT_ENABLE(GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE)); - - /* WaDisable_RenderCache_OperationalFlush:ivb */ - I915_WRITE(CACHE_MODE_0_GEN7, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE)); - - /* Apply the WaDisableRHWOOptimizationForRenderHang:ivb workaround. */ - I915_WRITE(GEN7_COMMON_SLICE_CHICKEN1, - GEN7_CSC1_RHWO_OPT_DISABLE_IN_RCC); - - /* WaApplyL3ControlAndL3ChickenMode:ivb */ - I915_WRITE(GEN7_L3CNTLREG1, - GEN7_WA_FOR_GEN7_L3_CONTROL); - I915_WRITE(GEN7_L3_CHICKEN_MODE_REGISTER, - GEN7_WA_L3_CHICKEN_MODE); if (IS_IVB_GT1(dev_priv)) I915_WRITE(GEN7_ROW_CHICKEN2, _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE)); @@ -7284,10 +7263,6 @@ static void ivb_init_clock_gating(struct drm_i915_private *dev_priv) _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE)); } - /* WaForceL3Serialization:ivb */ - I915_WRITE(GEN7_L3SQCREG4, I915_READ(GEN7_L3SQCREG4) & - ~L3SQ_URB_READ_CAM_MATCH_DISABLE); - /* * According to the spec, bit 13 (RCZUNIT) must be set on IVB. * This implements the WaDisableRCZUnitClockGating:ivb workaround. @@ -7302,29 +7277,6 @@ static void ivb_init_clock_gating(struct drm_i915_private *dev_priv) g4x_disable_trickle_feed(dev_priv); - gen7_setup_fixed_func_scheduler(dev_priv); - - if (0) { /* causes HiZ corruption on ivb:gt1 */ - /* enable HiZ Raw Stall Optimization */ - I915_WRITE(CACHE_MODE_0_GEN7, - _MASKED_BIT_DISABLE(HIZ_RAW_STALL_OPT_DISABLE)); - } - - /* WaDisable4x2SubspanOptimization:ivb */ - I915_WRITE(CACHE_MODE_1, - _MASKED_BIT_ENABLE(PIXEL_SUBSPAN_COLLECT_OPT_DISABLE)); - - /* - * BSpec recommends 8x4 when MSAA is used, - * however in practice 16x4 seems fastest. - * - * Note that PS/WM thread counts depend on the WIZ hashing - * disable bit, which we don't touch here, but it's good - * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM). - */ - I915_WRITE(GEN7_GT_MODE, - _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4)); - snpcr = I915_READ(GEN6_MBCUNIT_SNPCR); snpcr &= ~GEN6_MBC_SNPCR_MASK; snpcr |= GEN6_MBC_SNPCR_MED; From 695a2b11649e99bbf15d278042247042c42b8728 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 11 Jun 2020 09:01:37 +0100 Subject: [PATCH 123/462] drm/i915/gt: Move vlv GT workarounds from init_clock_gating to workarounds Rescue the GT workarounds from being buried inside init_clock_gating so that we remember to apply them after a GT reset, and that they are included in our verification that the workarounds are applied. Signed-off-by: Chris Wilson Reviewed-by: Mika Kuoppala Cc: stable@vger.kernel.org Link: https://patchwork.freedesktop.org/patch/msgid/20200611080140.30228-3-chris@chris-wilson.co.uk (cherry picked from commit 7331c356b6d2d8a01422cacab27478a1dba9fa2a) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/gt/intel_workarounds.c | 59 ++++++++++++++++++++ drivers/gpu/drm/i915/intel_pm.c | 61 --------------------- 2 files changed, 59 insertions(+), 61 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index d9b72679ca82..9923ff1a3982 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -752,6 +752,63 @@ ivb_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) GEN6_WIZ_HASHING_16x4); } +static void +vlv_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) +{ + /* WaDisableEarlyCull:vlv */ + wa_masked_en(wal, _3D_CHICKEN3, _3D_CHICKEN_SF_DISABLE_OBJEND_CULL); + + /* WaPsdDispatchEnable:vlv */ + /* WaDisablePSDDualDispatchEnable:vlv */ + wa_masked_en(wal, + GEN7_HALF_SLICE_CHICKEN1, + GEN7_MAX_PS_THREAD_DEP | + GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE); + + /* WaDisable_RenderCache_OperationalFlush:vlv */ + wa_masked_dis(wal, CACHE_MODE_0_GEN7, RC_OP_FLUSH_ENABLE); + + /* WaForceL3Serialization:vlv */ + wa_write_clr(wal, GEN7_L3SQCREG4, L3SQ_URB_READ_CAM_MATCH_DISABLE); + + /* + * WaVSThreadDispatchOverride:ivb,vlv + * + * This actually overrides the dispatch + * mode for all thread types. + */ + wa_write_masked_or(wal, + GEN7_FF_THREAD_MODE, + GEN7_FF_SCHED_MASK, + GEN7_FF_TS_SCHED_HW | + GEN7_FF_VS_SCHED_HW | + GEN7_FF_DS_SCHED_HW); + + /* + * BSpec says this must be set, even though + * WaDisable4x2SubspanOptimization isn't listed for VLV. + */ + wa_masked_en(wal, CACHE_MODE_1, PIXEL_SUBSPAN_COLLECT_OPT_DISABLE); + + /* + * BSpec recommends 8x4 when MSAA is used, + * however in practice 16x4 seems fastest. + * + * Note that PS/WM thread counts depend on the WIZ hashing + * disable bit, which we don't touch here, but it's good + * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM). + */ + wa_add(wal, GEN7_GT_MODE, 0, + _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4), + GEN6_WIZ_HASHING_16x4); + + /* + * WaIncreaseL3CreditsForVLVB0:vlv + * This is the hardware default actually. + */ + wa_write(wal, GEN7_L3SQCREG1, VLV_B0_WA_L3SQCREG1_VALUE); +} + static void hsw_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) { @@ -1071,6 +1128,8 @@ gt_init_workarounds(struct drm_i915_private *i915, struct i915_wa_list *wal) skl_gt_workarounds_init(i915, wal); else if (IS_HASWELL(i915)) hsw_gt_workarounds_init(i915, wal); + else if (IS_VALLEYVIEW(i915)) + vlv_gt_workarounds_init(i915, wal); else if (IS_IVYBRIDGE(i915)) ivb_gt_workarounds_init(i915, wal); else if (INTEL_GEN(i915) <= 8) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 7236644af40b..cea7923c9cd6 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -6986,24 +6986,6 @@ static void gen6_init_clock_gating(struct drm_i915_private *dev_priv) gen6_check_mch_setup(dev_priv); } -static void gen7_setup_fixed_func_scheduler(struct drm_i915_private *dev_priv) -{ - u32 reg = I915_READ(GEN7_FF_THREAD_MODE); - - /* - * WaVSThreadDispatchOverride:ivb,vlv - * - * This actually overrides the dispatch - * mode for all thread types. - */ - reg &= ~GEN7_FF_SCHED_MASK; - reg |= GEN7_FF_TS_SCHED_HW; - reg |= GEN7_FF_VS_SCHED_HW; - reg |= GEN7_FF_DS_SCHED_HW; - - I915_WRITE(GEN7_FF_THREAD_MODE, reg); -} - static void lpt_init_clock_gating(struct drm_i915_private *dev_priv) { /* @@ -7290,28 +7272,11 @@ static void ivb_init_clock_gating(struct drm_i915_private *dev_priv) static void vlv_init_clock_gating(struct drm_i915_private *dev_priv) { - /* WaDisableEarlyCull:vlv */ - I915_WRITE(_3D_CHICKEN3, - _MASKED_BIT_ENABLE(_3D_CHICKEN_SF_DISABLE_OBJEND_CULL)); - /* WaDisableBackToBackFlipFix:vlv */ I915_WRITE(IVB_CHICKEN3, CHICKEN3_DGMG_REQ_OUT_FIX_DISABLE | CHICKEN3_DGMG_DONE_FIX_DISABLE); - /* WaPsdDispatchEnable:vlv */ - /* WaDisablePSDDualDispatchEnable:vlv */ - I915_WRITE(GEN7_HALF_SLICE_CHICKEN1, - _MASKED_BIT_ENABLE(GEN7_MAX_PS_THREAD_DEP | - GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE)); - - /* WaDisable_RenderCache_OperationalFlush:vlv */ - I915_WRITE(CACHE_MODE_0_GEN7, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE)); - - /* WaForceL3Serialization:vlv */ - I915_WRITE(GEN7_L3SQCREG4, I915_READ(GEN7_L3SQCREG4) & - ~L3SQ_URB_READ_CAM_MATCH_DISABLE); - /* WaDisableDopClockGating:vlv */ I915_WRITE(GEN7_ROW_CHICKEN2, _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE)); @@ -7321,8 +7286,6 @@ static void vlv_init_clock_gating(struct drm_i915_private *dev_priv) I915_READ(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG) | GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB); - gen7_setup_fixed_func_scheduler(dev_priv); - /* * According to the spec, bit 13 (RCZUNIT) must be set on IVB. * This implements the WaDisableRCZUnitClockGating:vlv workaround. @@ -7336,30 +7299,6 @@ static void vlv_init_clock_gating(struct drm_i915_private *dev_priv) I915_WRITE(GEN7_UCGCTL4, I915_READ(GEN7_UCGCTL4) | GEN7_L3BANK2X_CLOCK_GATE_DISABLE); - /* - * BSpec says this must be set, even though - * WaDisable4x2SubspanOptimization isn't listed for VLV. - */ - I915_WRITE(CACHE_MODE_1, - _MASKED_BIT_ENABLE(PIXEL_SUBSPAN_COLLECT_OPT_DISABLE)); - - /* - * BSpec recommends 8x4 when MSAA is used, - * however in practice 16x4 seems fastest. - * - * Note that PS/WM thread counts depend on the WIZ hashing - * disable bit, which we don't touch here, but it's good - * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM). - */ - I915_WRITE(GEN7_GT_MODE, - _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4)); - - /* - * WaIncreaseL3CreditsForVLVB0:vlv - * This is the hardware default actually. - */ - I915_WRITE(GEN7_L3SQCREG1, VLV_B0_WA_L3SQCREG1_VALUE); - /* * WaDisableVLVClockGating_VBIIssue:vlv * Disable clock gating on th GCFG unit to prevent a delay From fd2599bda5a989c3332f4956fd7760ec32bd51ee Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 11 Jun 2020 09:01:38 +0100 Subject: [PATCH 124/462] drm/i915/gt: Move snb GT workarounds from init_clock_gating to workarounds Rescue the GT workarounds from being buried inside init_clock_gating so that we remember to apply them after a GT reset, and that they are included in our verification that the workarounds are applied. Signed-off-by: Chris Wilson Reviewed-by: Mika Kuoppala Cc: stable@vger.kernel.org Link: https://patchwork.freedesktop.org/patch/msgid/20200611080140.30228-4-chris@chris-wilson.co.uk (cherry picked from commit c3b93a943f2c9ee4a106db100a2fc3b2f126bfc5) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/gt/intel_workarounds.c | 41 +++++++++++++++++++++ drivers/gpu/drm/i915/intel_pm.c | 33 ----------------- 2 files changed, 41 insertions(+), 33 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index 9923ff1a3982..c2d57f65b147 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -692,6 +692,45 @@ int intel_engine_emit_ctx_wa(struct i915_request *rq) return 0; } +static void +snb_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) +{ + /* WaDisableHiZPlanesWhenMSAAEnabled:snb */ + wa_masked_en(wal, + _3D_CHICKEN, + _3D_CHICKEN_HIZ_PLANE_DISABLE_MSAA_4X_SNB); + + /* WaDisable_RenderCache_OperationalFlush:snb */ + wa_masked_dis(wal, CACHE_MODE_0, RC_OP_FLUSH_ENABLE); + + /* + * BSpec recommends 8x4 when MSAA is used, + * however in practice 16x4 seems fastest. + * + * Note that PS/WM thread counts depend on the WIZ hashing + * disable bit, which we don't touch here, but it's good + * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM). + */ + wa_add(wal, + GEN6_GT_MODE, 0, + _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4), + GEN6_WIZ_HASHING_16x4); + + wa_masked_dis(wal, CACHE_MODE_0, CM0_STC_EVICT_DISABLE_LRA_SNB); + + wa_masked_en(wal, + _3D_CHICKEN3, + /* WaStripsFansDisableFastClipPerformanceFix:snb */ + _3D_CHICKEN3_SF_DISABLE_FASTCLIP_CULL | + /* + * Bspec says: + * "This bit must be set if 3DSTATE_CLIP clip mode is set + * to normal and 3DSTATE_SF number of SF output attributes + * is more than 16." + */ + _3D_CHICKEN3_SF_DISABLE_PIPELINED_ATTR_FETCH); +} + static void ivb_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) { @@ -1132,6 +1171,8 @@ gt_init_workarounds(struct drm_i915_private *i915, struct i915_wa_list *wal) vlv_gt_workarounds_init(i915, wal); else if (IS_IVYBRIDGE(i915)) ivb_gt_workarounds_init(i915, wal); + else if (IS_GEN(i915, 6)) + snb_gt_workarounds_init(i915, wal); else if (INTEL_GEN(i915) <= 8) return; else diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index cea7923c9cd6..5db0ebe5eee0 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -6902,27 +6902,6 @@ static void gen6_init_clock_gating(struct drm_i915_private *dev_priv) I915_READ(ILK_DISPLAY_CHICKEN2) | ILK_ELPIN_409_SELECT); - /* WaDisableHiZPlanesWhenMSAAEnabled:snb */ - I915_WRITE(_3D_CHICKEN, - _MASKED_BIT_ENABLE(_3D_CHICKEN_HIZ_PLANE_DISABLE_MSAA_4X_SNB)); - - /* WaDisable_RenderCache_OperationalFlush:snb */ - I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE)); - - /* - * BSpec recoomends 8x4 when MSAA is used, - * however in practice 16x4 seems fastest. - * - * Note that PS/WM thread counts depend on the WIZ hashing - * disable bit, which we don't touch here, but it's good - * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM). - */ - I915_WRITE(GEN6_GT_MODE, - _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4)); - - I915_WRITE(CACHE_MODE_0, - _MASKED_BIT_DISABLE(CM0_STC_EVICT_DISABLE_LRA_SNB)); - I915_WRITE(GEN6_UCGCTL1, I915_READ(GEN6_UCGCTL1) | GEN6_BLBUNIT_CLOCK_GATE_DISABLE | @@ -6945,18 +6924,6 @@ static void gen6_init_clock_gating(struct drm_i915_private *dev_priv) GEN6_RCPBUNIT_CLOCK_GATE_DISABLE | GEN6_RCCUNIT_CLOCK_GATE_DISABLE); - /* WaStripsFansDisableFastClipPerformanceFix:snb */ - I915_WRITE(_3D_CHICKEN3, - _MASKED_BIT_ENABLE(_3D_CHICKEN3_SF_DISABLE_FASTCLIP_CULL)); - - /* - * Bspec says: - * "This bit must be set if 3DSTATE_CLIP clip mode is set to normal and - * 3DSTATE_SF number of SF output attributes is more than 16." - */ - I915_WRITE(_3D_CHICKEN3, - _MASKED_BIT_ENABLE(_3D_CHICKEN3_SF_DISABLE_PIPELINED_ATTR_FETCH)); - /* * According to the spec the following bits should be * set in order to enable memory self-refresh and fbc: From eacf21040aa97fd1b3c6bb201bfd43820e1c49be Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 11 Jun 2020 09:01:39 +0100 Subject: [PATCH 125/462] drm/i915/gt: Move ilk GT workarounds from init_clock_gating to workarounds Rescue the GT workarounds from being buried inside init_clock_gating so that we remember to apply them after a GT reset, and that they are included in our verification that the workarounds are applied. Signed-off-by: Chris Wilson Reviewed-by: Mika Kuoppala Cc: stable@vger.kernel.org Link: https://patchwork.freedesktop.org/patch/msgid/20200611080140.30228-5-chris@chris-wilson.co.uk (cherry picked from commit 806a45c0838d253e306a6384057e851b65d11099) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/gt/intel_workarounds.c | 14 ++++++++++++++ drivers/gpu/drm/i915/intel_pm.c | 10 ---------- 2 files changed, 14 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index c2d57f65b147..5ccfe36c2978 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -692,6 +692,18 @@ int intel_engine_emit_ctx_wa(struct i915_request *rq) return 0; } +static void +ilk_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) +{ + wa_masked_en(wal, _3D_CHICKEN2, _3D_CHICKEN2_WM_READ_PIPELINED); + + /* WaDisableRenderCachePipelinedFlush:ilk */ + wa_masked_en(wal, CACHE_MODE_0, CM0_PIPELINED_RENDER_FLUSH_DISABLE); + + /* WaDisable_RenderCache_OperationalFlush:ilk */ + wa_masked_dis(wal, CACHE_MODE_0, RC_OP_FLUSH_ENABLE); +} + static void snb_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) { @@ -1173,6 +1185,8 @@ gt_init_workarounds(struct drm_i915_private *i915, struct i915_wa_list *wal) ivb_gt_workarounds_init(i915, wal); else if (IS_GEN(i915, 6)) snb_gt_workarounds_init(i915, wal); + else if (IS_GEN(i915, 5)) + ilk_gt_workarounds_init(i915, wal); else if (INTEL_GEN(i915) <= 8) return; else diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 5db0ebe5eee0..7ebe4fa3a162 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -6830,16 +6830,6 @@ static void ilk_init_clock_gating(struct drm_i915_private *dev_priv) I915_WRITE(ILK_DISPLAY_CHICKEN2, I915_READ(ILK_DISPLAY_CHICKEN2) | ILK_ELPIN_409_SELECT); - I915_WRITE(_3D_CHICKEN2, - _3D_CHICKEN2_WM_READ_PIPELINED << 16 | - _3D_CHICKEN2_WM_READ_PIPELINED); - - /* WaDisableRenderCachePipelinedFlush:ilk */ - I915_WRITE(CACHE_MODE_0, - _MASKED_BIT_ENABLE(CM0_PIPELINED_RENDER_FLUSH_DISABLE)); - - /* WaDisable_RenderCache_OperationalFlush:ilk */ - I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE)); g4x_disable_trickle_feed(dev_priv); From 27582a9c917940bc71c0df0b8e022cbde8d735d2 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 11 Jun 2020 09:01:40 +0100 Subject: [PATCH 126/462] drm/i915/gt: Move gen4 GT workarounds from init_clock_gating to workarounds Rescue the GT workarounds from being buried inside init_clock_gating so that we remember to apply them after a GT reset, and that they are included in our verification that the workarounds are applied. Signed-off-by: Chris Wilson Reviewed-by: Mika Kuoppala Cc: stable@vger.kernel.org Link: https://patchwork.freedesktop.org/patch/msgid/20200611080140.30228-6-chris@chris-wilson.co.uk (cherry picked from commit 2bcefd0d263ab4a72f0d61921ae6b0dc81606551) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/gt/intel_workarounds.c | 29 ++++++++++++++++----- drivers/gpu/drm/i915/intel_pm.c | 15 ----------- 2 files changed, 23 insertions(+), 21 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index 5ccfe36c2978..85d2bef51524 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -692,16 +692,29 @@ int intel_engine_emit_ctx_wa(struct i915_request *rq) return 0; } +static void +gen4_gt_workarounds_init(struct drm_i915_private *i915, + struct i915_wa_list *wal) +{ + /* WaDisable_RenderCache_OperationalFlush:gen4,ilk */ + wa_masked_dis(wal, CACHE_MODE_0, RC_OP_FLUSH_ENABLE); +} + +static void +g4x_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) +{ + gen4_gt_workarounds_init(i915, wal); + + /* WaDisableRenderCachePipelinedFlush:g4x,ilk */ + wa_masked_en(wal, CACHE_MODE_0, CM0_PIPELINED_RENDER_FLUSH_DISABLE); +} + static void ilk_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) { + g4x_gt_workarounds_init(i915, wal); + wa_masked_en(wal, _3D_CHICKEN2, _3D_CHICKEN2_WM_READ_PIPELINED); - - /* WaDisableRenderCachePipelinedFlush:ilk */ - wa_masked_en(wal, CACHE_MODE_0, CM0_PIPELINED_RENDER_FLUSH_DISABLE); - - /* WaDisable_RenderCache_OperationalFlush:ilk */ - wa_masked_dis(wal, CACHE_MODE_0, RC_OP_FLUSH_ENABLE); } static void @@ -1187,6 +1200,10 @@ gt_init_workarounds(struct drm_i915_private *i915, struct i915_wa_list *wal) snb_gt_workarounds_init(i915, wal); else if (IS_GEN(i915, 5)) ilk_gt_workarounds_init(i915, wal); + else if (IS_G4X(i915)) + g4x_gt_workarounds_init(i915, wal); + else if (IS_GEN(i915, 4)) + gen4_gt_workarounds_init(i915, wal); else if (INTEL_GEN(i915) <= 8) return; else diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 7ebe4fa3a162..07f663cd2d1c 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -7308,13 +7308,6 @@ static void g4x_init_clock_gating(struct drm_i915_private *dev_priv) dspclk_gate |= DSSUNIT_CLOCK_GATE_DISABLE; I915_WRITE(DSPCLK_GATE_D, dspclk_gate); - /* WaDisableRenderCachePipelinedFlush */ - I915_WRITE(CACHE_MODE_0, - _MASKED_BIT_ENABLE(CM0_PIPELINED_RENDER_FLUSH_DISABLE)); - - /* WaDisable_RenderCache_OperationalFlush:g4x */ - I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE)); - g4x_disable_trickle_feed(dev_priv); } @@ -7330,11 +7323,6 @@ static void i965gm_init_clock_gating(struct drm_i915_private *dev_priv) intel_uncore_write(uncore, MI_ARB_STATE, _MASKED_BIT_ENABLE(MI_ARB_DISPLAY_TRICKLE_FEED_DISABLE)); - - /* WaDisable_RenderCache_OperationalFlush:gen4 */ - intel_uncore_write(uncore, - CACHE_MODE_0, - _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE)); } static void i965g_init_clock_gating(struct drm_i915_private *dev_priv) @@ -7347,9 +7335,6 @@ static void i965g_init_clock_gating(struct drm_i915_private *dev_priv) I915_WRITE(RENCLK_GATE_D2, 0); I915_WRITE(MI_ARB_STATE, _MASKED_BIT_ENABLE(MI_ARB_DISPLAY_TRICKLE_FEED_DISABLE)); - - /* WaDisable_RenderCache_OperationalFlush:gen4 */ - I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE)); } static void gen3_init_clock_gating(struct drm_i915_private *dev_priv) From a3005c2edf7e8c3478880db1ca84028a2b6819bb Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Fri, 12 Jun 2020 15:17:31 +0300 Subject: [PATCH 127/462] drm/i915/icl+: Fix hotplug interrupt disabling after storm detection MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Atm, hotplug interrupts on TypeC ports are left enabled after detecting an interrupt storm, fix this. Reported-by: Kunal Joshi References: https://gitlab.freedesktop.org/drm/intel/-/issues/351 Bugzilla: https://gitlab.freedesktop.org/drm/intel/-/issues/1964 Cc: Kunal Joshi Cc: stable@vger.kernel.org Signed-off-by: Imre Deak Reviewed-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20200612121731.19596-1-imre.deak@intel.com (cherry picked from commit 587a87b9d7e94927edcdea018565bc1939381eb1) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/i915_irq.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 4dc601dffc08..284cf078135a 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -3125,6 +3125,7 @@ static void gen11_hpd_irq_setup(struct drm_i915_private *dev_priv) val = I915_READ(GEN11_DE_HPD_IMR); val &= ~hotplug_irqs; + val |= ~enabled_irqs & hotplug_irqs; I915_WRITE(GEN11_DE_HPD_IMR, val); POSTING_READ(GEN11_DE_HPD_IMR); From 8e68c6340d5833077b3753eabedab40755571383 Mon Sep 17 00:00:00 2001 From: Vandita Kulkarni Date: Fri, 12 Jun 2020 13:52:37 +0530 Subject: [PATCH 128/462] drm/i915/display: Fix the encoder type check For all ddi, encoder->type holds output type as ddi, assigning it to individual o/p types is no more valid. Fixes: 362bfb995b78 ("drm/i915/tgl: Add DKL PHY vswing table for HDMI") v2: Rebase, no functional change. Signed-off-by: Vandita Kulkarni Reviewed-by: Uma Shankar Signed-off-by: Uma Shankar Link: https://patchwork.freedesktop.org/patch/msgid/20200612082237.11886-1-vandita.kulkarni@intel.com (cherry picked from commit 94641eb6c69682884abbecf22fe5b7c185af6a06) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/display/intel_ddi.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c b/drivers/gpu/drm/i915/display/intel_ddi.c index f92c5ed3f5d2..0575a1eea2a1 100644 --- a/drivers/gpu/drm/i915/display/intel_ddi.c +++ b/drivers/gpu/drm/i915/display/intel_ddi.c @@ -2579,14 +2579,14 @@ static void icl_ddi_vswing_sequence(struct intel_encoder *encoder, static void tgl_dkl_phy_ddi_vswing_sequence(struct intel_encoder *encoder, int link_clock, - u32 level) + u32 level, enum intel_output_type type) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); enum tc_port tc_port = intel_port_to_tc(dev_priv, encoder->port); const struct tgl_dkl_phy_ddi_buf_trans *ddi_translations; u32 n_entries, val, ln, dpcnt_mask, dpcnt_val; - if (encoder->type == INTEL_OUTPUT_HDMI) { + if (type == INTEL_OUTPUT_HDMI) { n_entries = ARRAY_SIZE(tgl_dkl_phy_hdmi_ddi_trans); ddi_translations = tgl_dkl_phy_hdmi_ddi_trans; } else { @@ -2638,7 +2638,7 @@ static void tgl_ddi_vswing_sequence(struct intel_encoder *encoder, if (intel_phy_is_combo(dev_priv, phy)) icl_combo_phy_ddi_vswing_sequence(encoder, level, type); else - tgl_dkl_phy_ddi_vswing_sequence(encoder, link_clock, level); + tgl_dkl_phy_ddi_vswing_sequence(encoder, link_clock, level, type); } static u32 translate_signal_level(struct intel_dp *intel_dp, int signal_levels) From ed1220df6e666500ebf58c4f2fccc681941646fb Mon Sep 17 00:00:00 2001 From: Shengjiu Wang Date: Tue, 16 Jun 2020 10:53:48 +0800 Subject: [PATCH 129/462] ASoC: fsl_ssi: Fix bclk calculation for mono channel For mono channel, SSI will switch to Normal mode. In Normal mode and Network mode, the Word Length Control bits control the word length divider in clock generator, which is different with I2S Master mode (the word length is fixed to 32bit), it should be the value of params_width(hw_params). The condition "slots == 2" is not good for I2S Master mode, because for Network mode and Normal mode, the slots can also be 2. Then we need to use (ssi->i2s_net & SSI_SCR_I2S_MODE_MASK) to check if it is I2S Master mode. So we refine the formula for mono channel, otherwise there will be sound issue for S24_LE. Fixes: b0a7043d5c2c ("ASoC: fsl_ssi: Caculate bit clock rate using slot number and width") Signed-off-by: Shengjiu Wang Reviewed-by: Nicolin Chen Link: https://lore.kernel.org/r/034eff1435ff6ce300b6c781130cefd9db22ab9a.1592276147.git.shengjiu.wang@nxp.com Signed-off-by: Mark Brown --- sound/soc/fsl/fsl_ssi.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/sound/soc/fsl/fsl_ssi.c b/sound/soc/fsl/fsl_ssi.c index bad89b0d129e..1a2fa7f18142 100644 --- a/sound/soc/fsl/fsl_ssi.c +++ b/sound/soc/fsl/fsl_ssi.c @@ -678,8 +678,9 @@ static int fsl_ssi_set_bclk(struct snd_pcm_substream *substream, struct regmap *regs = ssi->regs; u32 pm = 999, div2, psr, stccr, mask, afreq, factor, i; unsigned long clkrate, baudrate, tmprate; - unsigned int slots = params_channels(hw_params); - unsigned int slot_width = 32; + unsigned int channels = params_channels(hw_params); + unsigned int slot_width = params_width(hw_params); + unsigned int slots = 2; u64 sub, savesub = 100000; unsigned int freq; bool baudclk_is_used; @@ -688,10 +689,14 @@ static int fsl_ssi_set_bclk(struct snd_pcm_substream *substream, /* Override slots and slot_width if being specifically set... */ if (ssi->slots) slots = ssi->slots; - /* ...but keep 32 bits if slots is 2 -- I2S Master mode */ - if (ssi->slot_width && slots != 2) + if (ssi->slot_width) slot_width = ssi->slot_width; + /* ...but force 32 bits for stereo audio using I2S Master Mode */ + if (channels == 2 && + (ssi->i2s_net & SSI_SCR_I2S_MODE_MASK) == SSI_SCR_I2S_MODE_MASTER) + slot_width = 32; + /* Generate bit clock based on the slot number and slot width */ freq = slots * slot_width * params_rate(hw_params); From c119a8a3c395b0850f131728bdddd166d172842f Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Fri, 8 May 2020 17:00:21 +0200 Subject: [PATCH 130/462] s390/qdio: fine-tune SLSB update xchg() for a single-byte location assembles to a 4-byte Compare&Swap, wrapped into a non-trivial amount of retry code that deals with concurrent modifications to the unaffected bytes. Change it to a simple byte-store, but preserve the memory ordering semantics that the CS provided. This simplifies the generated code for a hot path, and in theory also allows us to amortize the memory barriers over multiple SLSB updates. CC: Andreas Krebbel Signed-off-by: Julian Wiedmann Signed-off-by: Vasily Gorbik --- drivers/s390/cio/qdio_main.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/s390/cio/qdio_main.c b/drivers/s390/cio/qdio_main.c index 610c05f59589..bb137f962c3f 100644 --- a/drivers/s390/cio/qdio_main.c +++ b/drivers/s390/cio/qdio_main.c @@ -254,10 +254,17 @@ static inline int set_buf_states(struct qdio_q *q, int bufnr, if (is_qebsm(q)) return qdio_do_sqbs(q, state, bufnr, count); + /* Ensure that all preceding changes to the SBALs are visible: */ + mb(); + for (i = 0; i < count; i++) { - xchg(&q->slsb.val[bufnr], state); + WRITE_ONCE(q->slsb.val[bufnr], state); bufnr = next_buf(bufnr); } + + /* Make our SLSB changes visible: */ + mb(); + return count; } From 664f5f8de825648d1d31f6f5652e3cd117c77b50 Mon Sep 17 00:00:00 2001 From: Sven Schnelle Date: Wed, 4 Mar 2020 16:07:34 +0100 Subject: [PATCH 131/462] s390/seccomp: pass syscall arguments via seccomp_data Use __secure_computing() and pass the register data via seccomp_data so secure computing doesn't have to fetch it again. Signed-off-by: Sven Schnelle Signed-off-by: Vasily Gorbik --- arch/s390/kernel/ptrace.c | 31 ++++++++++++++++++++++++++----- 1 file changed, 26 insertions(+), 5 deletions(-) diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c index ce60a459a143..e319482da5f0 100644 --- a/arch/s390/kernel/ptrace.c +++ b/arch/s390/kernel/ptrace.c @@ -838,6 +838,9 @@ asmlinkage long do_syscall_trace_enter(struct pt_regs *regs) { unsigned long mask = -1UL; + if (is_compat_task()) + mask = 0xffffffff; + /* * The sysc_tracesys code in entry.S stored the system * call number to gprs[2]. @@ -854,17 +857,35 @@ asmlinkage long do_syscall_trace_enter(struct pt_regs *regs) return -1; } +#ifdef CONFIG_SECCOMP /* Do the secure computing check after ptrace. */ - if (secure_computing()) { - /* seccomp failures shouldn't expose any additional code. */ - return -1; + if (unlikely(test_thread_flag(TIF_SECCOMP))) { + struct seccomp_data sd; + + if (is_compat_task()) { + sd.instruction_pointer = regs->psw.addr & 0x7fffffff; + sd.arch = AUDIT_ARCH_S390; + } else { + sd.instruction_pointer = regs->psw.addr; + sd.arch = AUDIT_ARCH_S390X; + } + + sd.nr = regs->gprs[2] & 0xffff; + sd.args[0] = regs->orig_gpr2 & mask; + sd.args[1] = regs->gprs[3] & mask; + sd.args[2] = regs->gprs[4] & mask; + sd.args[3] = regs->gprs[5] & mask; + sd.args[4] = regs->gprs[6] & mask; + sd.args[5] = regs->gprs[7] & mask; + + if (__secure_computing(&sd) == -1) + return -1; } +#endif /* CONFIG_SECCOMP */ if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT))) trace_sys_enter(regs, regs->gprs[2]); - if (is_compat_task()) - mask = 0xffffffff; audit_syscall_entry(regs->gprs[2], regs->orig_gpr2 & mask, regs->gprs[3] &mask, regs->gprs[4] &mask, From cd29fa798001075a554b978df3a64e6656c25794 Mon Sep 17 00:00:00 2001 From: Sven Schnelle Date: Fri, 6 Mar 2020 13:18:31 +0100 Subject: [PATCH 132/462] s390/ptrace: return -ENOSYS when invalid syscall is supplied The current code returns the syscall number which an invalid syscall number is supplied and tracing is enabled. This makes the strace testsuite fail. Signed-off-by: Sven Schnelle Signed-off-by: Vasily Gorbik --- arch/s390/kernel/ptrace.c | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c index e319482da5f0..ceb8105a8086 100644 --- a/arch/s390/kernel/ptrace.c +++ b/arch/s390/kernel/ptrace.c @@ -837,6 +837,7 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request, asmlinkage long do_syscall_trace_enter(struct pt_regs *regs) { unsigned long mask = -1UL; + long ret = -1; if (is_compat_task()) mask = 0xffffffff; @@ -853,8 +854,7 @@ asmlinkage long do_syscall_trace_enter(struct pt_regs *regs) * debugger stored an invalid system call number. Skip * the system call and the system call restart handling. */ - clear_pt_regs_flag(regs, PIF_SYSCALL); - return -1; + goto skip; } #ifdef CONFIG_SECCOMP @@ -870,7 +870,7 @@ asmlinkage long do_syscall_trace_enter(struct pt_regs *regs) sd.arch = AUDIT_ARCH_S390X; } - sd.nr = regs->gprs[2] & 0xffff; + sd.nr = regs->int_code & 0xffff; sd.args[0] = regs->orig_gpr2 & mask; sd.args[1] = regs->gprs[3] & mask; sd.args[2] = regs->gprs[4] & mask; @@ -879,19 +879,26 @@ asmlinkage long do_syscall_trace_enter(struct pt_regs *regs) sd.args[5] = regs->gprs[7] & mask; if (__secure_computing(&sd) == -1) - return -1; + goto skip; } #endif /* CONFIG_SECCOMP */ if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT))) - trace_sys_enter(regs, regs->gprs[2]); + trace_sys_enter(regs, regs->int_code & 0xffff); - audit_syscall_entry(regs->gprs[2], regs->orig_gpr2 & mask, + audit_syscall_entry(regs->int_code & 0xffff, regs->orig_gpr2 & mask, regs->gprs[3] &mask, regs->gprs[4] &mask, regs->gprs[5] &mask); + if ((signed long)regs->gprs[2] >= NR_syscalls) { + regs->gprs[2] = -ENOSYS; + ret = -ENOSYS; + } return regs->gprs[2]; +skip: + clear_pt_regs_flag(regs, PIF_SYSCALL); + return ret; } asmlinkage void do_syscall_trace_exit(struct pt_regs *regs) From 00332c16b1604242a56289ff2b26e283dbad0812 Mon Sep 17 00:00:00 2001 From: Sven Schnelle Date: Fri, 6 Mar 2020 13:19:34 +0100 Subject: [PATCH 133/462] s390/ptrace: pass invalid syscall numbers to tracing tracing expects to see invalid syscalls, so pass it through. The syscall path in entry.S checks the syscall number before looking up the handler, so it is still safe. Signed-off-by: Sven Schnelle Signed-off-by: Vasily Gorbik --- arch/s390/kernel/entry.S | 2 +- arch/s390/kernel/ptrace.c | 6 ++---- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index 50ff6dd0f995..496f74d98473 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -401,9 +401,9 @@ ENTRY(system_call) jnz .Lsysc_nr_ok # svc 0: system call number in %r1 llgfr %r1,%r1 # clear high word in r1 + sth %r1,__PT_INT_CODE+2(%r11) cghi %r1,NR_syscalls jnl .Lsysc_nr_ok - sth %r1,__PT_INT_CODE+2(%r11) slag %r8,%r1,3 .Lsysc_nr_ok: xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c index ceb8105a8086..1fdbb2d19477 100644 --- a/arch/s390/kernel/ptrace.c +++ b/arch/s390/kernel/ptrace.c @@ -847,11 +847,9 @@ asmlinkage long do_syscall_trace_enter(struct pt_regs *regs) * call number to gprs[2]. */ if (test_thread_flag(TIF_SYSCALL_TRACE) && - (tracehook_report_syscall_entry(regs) || - regs->gprs[2] >= NR_syscalls)) { + tracehook_report_syscall_entry(regs)) { /* - * Tracing decided this syscall should not happen or the - * debugger stored an invalid system call number. Skip + * Tracing decided this syscall should not happen. Skip * the system call and the system call restart handling. */ goto skip; From 873e5a763d604c32988c4a78913a8dab3862d2f9 Mon Sep 17 00:00:00 2001 From: Sven Schnelle Date: Mon, 9 Mar 2020 16:44:50 +0100 Subject: [PATCH 134/462] s390/ptrace: fix setting syscall number When strace wants to update the syscall number, it sets GPR2 to the desired number and updates the GPR via PTRACE_SETREGSET. It doesn't update regs->int_code which would cause the old syscall executed on syscall restart. As we cannot change the ptrace ABI and don't have a field for the interruption code, check whether the tracee is in a syscall and the last instruction was svc. In that case assume that the tracer wants to update the syscall number and copy the GPR2 value to regs->int_code. Signed-off-by: Sven Schnelle Signed-off-by: Vasily Gorbik --- arch/s390/kernel/ptrace.c | 31 ++++++++++++++++++++++++++++++- 1 file changed, 30 insertions(+), 1 deletion(-) diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c index 1fdbb2d19477..3cc15c066298 100644 --- a/arch/s390/kernel/ptrace.c +++ b/arch/s390/kernel/ptrace.c @@ -323,6 +323,25 @@ static inline void __poke_user_per(struct task_struct *child, child->thread.per_user.end = data; } +static void fixup_int_code(struct task_struct *child, addr_t data) +{ + struct pt_regs *regs = task_pt_regs(child); + int ilc = regs->int_code >> 16; + u16 insn; + + if (ilc > 6) + return; + + if (ptrace_access_vm(child, regs->psw.addr - (regs->int_code >> 16), + &insn, sizeof(insn), FOLL_FORCE) != sizeof(insn)) + return; + + /* double check that tracee stopped on svc instruction */ + if ((insn >> 8) != 0xa) + return; + + regs->int_code = 0x20000 | (data & 0xffff); +} /* * Write a word to the user area of a process at location addr. This * operation does have an additional problem compared to peek_user. @@ -334,7 +353,9 @@ static int __poke_user(struct task_struct *child, addr_t addr, addr_t data) struct user *dummy = NULL; addr_t offset; + if (addr < (addr_t) &dummy->regs.acrs) { + struct pt_regs *regs = task_pt_regs(child); /* * psw and gprs are stored on the stack */ @@ -352,7 +373,11 @@ static int __poke_user(struct task_struct *child, addr_t addr, addr_t data) /* Invalid addressing mode bits */ return -EINVAL; } - *(addr_t *)((addr_t) &task_pt_regs(child)->psw + addr) = data; + + if (test_pt_regs_flag(regs, PIF_SYSCALL) && + addr == offsetof(struct user, regs.gprs[2])) + fixup_int_code(child, data); + *(addr_t *)((addr_t) ®s->psw + addr) = data; } else if (addr < (addr_t) (&dummy->regs.orig_gpr2)) { /* @@ -718,6 +743,10 @@ static int __poke_user_compat(struct task_struct *child, regs->psw.mask = (regs->psw.mask & ~PSW_MASK_BA) | (__u64)(tmp & PSW32_ADDR_AMODE); } else { + + if (test_pt_regs_flag(regs, PIF_SYSCALL) && + addr == offsetof(struct compat_user, regs.gprs[2])) + fixup_int_code(child, data); /* gpr 0-15 */ *(__u32*)((addr_t) ®s->psw + addr*2 + 4) = tmp; } From 4bae85b620dc1f7aa4d2338b923d9d9b394b58c4 Mon Sep 17 00:00:00 2001 From: Sven Schnelle Date: Mon, 9 Mar 2020 16:56:53 +0100 Subject: [PATCH 135/462] selftests/seccomp: s390 shares the syscall and return value register s390 cannot set syscall number and reture code at the same time, so set the appropriate flag to indicate it. Signed-off-by: Sven Schnelle Signed-off-by: Vasily Gorbik --- tools/testing/selftests/seccomp/seccomp_bpf.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c index c0aa46ce14f6..252140a52553 100644 --- a/tools/testing/selftests/seccomp/seccomp_bpf.c +++ b/tools/testing/selftests/seccomp/seccomp_bpf.c @@ -1615,6 +1615,7 @@ TEST_F(TRACE_poke, getpid_runs_normally) # define ARCH_REGS s390_regs # define SYSCALL_NUM gprs[2] # define SYSCALL_RET gprs[2] +# define SYSCALL_NUM_RET_SHARE_REG #elif defined(__mips__) # define ARCH_REGS struct pt_regs # define SYSCALL_NUM regs[2] From a87ee11607b853a31c8612d9f47b7fe974953b77 Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Tue, 21 Apr 2020 11:35:00 +0100 Subject: [PATCH 136/462] s390/qdio: reduce SLSB writes during Input Queue processing Streamline the processing of QDIO Input Queues, and remove some intermittent SLSB updates (no deleting of old ACKs, no redundant transitions through NOT_INIT). Rather than counting ACKs, we now keep track of the whole batch of SBALs that were completed during the current polling cycle. Most completed SBALs stay in their initial state (ie. PRIMED or ERROR), except that the most recent SBAL in each sub-run is ACKed for IRQ reduction. The only logic changes happen in inbound_handle_work(), the other delta is just a renaming of the variables that track the SBAL batch. Note that in particular we don't need to flip the _oldest_ SBAL to an idle state (eg. NOT_INIT or ACKed) as a guard against catching our own tail. Since get_inbound_buffer_frontier() will never scan more than the remaining nr_buf_used SBALs, this scenario just doesn't occur. Signed-off-by: Julian Wiedmann Signed-off-by: Vasily Gorbik --- drivers/s390/cio/qdio.h | 7 ++--- drivers/s390/cio/qdio_debug.c | 4 +-- drivers/s390/cio/qdio_main.c | 59 +++++++++-------------------------- 3 files changed, 20 insertions(+), 50 deletions(-) diff --git a/drivers/s390/cio/qdio.h b/drivers/s390/cio/qdio.h index eb13c479e11d..bb1c8402c67d 100644 --- a/drivers/s390/cio/qdio.h +++ b/drivers/s390/cio/qdio.h @@ -182,10 +182,9 @@ enum qdio_irq_poll_states { }; struct qdio_input_q { - /* first ACK'ed buffer */ - int ack_start; - /* how many SBALs are acknowledged */ - int ack_count; + /* Batch of SBALs that we processed while polling the queue: */ + unsigned int batch_start; + unsigned int batch_count; /* last time of noticing incoming data */ u64 timestamp; }; diff --git a/drivers/s390/cio/qdio_debug.c b/drivers/s390/cio/qdio_debug.c index 286b044fb027..da95c923d81a 100644 --- a/drivers/s390/cio/qdio_debug.c +++ b/drivers/s390/cio/qdio_debug.c @@ -110,8 +110,8 @@ static int qstat_show(struct seq_file *m, void *v) seq_printf(m, "nr_used: %d ftc: %d\n", atomic_read(&q->nr_buf_used), q->first_to_check); if (q->is_input_q) { - seq_printf(m, "ack start: %d ack count: %d\n", - q->u.in.ack_start, q->u.in.ack_count); + seq_printf(m, "batch start: %u batch count: %u\n", + q->u.in.batch_start, q->u.in.batch_count); seq_printf(m, "DSCI: %x IRQs disabled: %u\n", *(u8 *)q->irq_ptr->dsci, test_bit(QDIO_IRQ_DISABLED, diff --git a/drivers/s390/cio/qdio_main.c b/drivers/s390/cio/qdio_main.c index bb137f962c3f..1aa94683d789 100644 --- a/drivers/s390/cio/qdio_main.c +++ b/drivers/s390/cio/qdio_main.c @@ -400,15 +400,15 @@ int debug_get_buf_state(struct qdio_q *q, unsigned int bufnr, static inline void qdio_stop_polling(struct qdio_q *q) { - if (!q->u.in.ack_count) + if (!q->u.in.batch_count) return; qperf_inc(q, stop_polling); /* show the card that we are not polling anymore */ - set_buf_states(q, q->u.in.ack_start, SLSB_P_INPUT_NOT_INIT, - q->u.in.ack_count); - q->u.in.ack_count = 0; + set_buf_states(q, q->u.in.batch_start, SLSB_P_INPUT_NOT_INIT, + q->u.in.batch_count); + q->u.in.batch_count = 0; } static inline void account_sbals(struct qdio_q *q, unsigned int count) @@ -448,42 +448,13 @@ static void process_buffer_error(struct qdio_q *q, unsigned int start, static inline void inbound_handle_work(struct qdio_q *q, unsigned int start, int count, bool auto_ack) { - int new; + /* ACK the newest SBAL: */ + if (!auto_ack) + set_buf_state(q, add_buf(start, count - 1), SLSB_P_INPUT_ACK); - if (auto_ack) { - if (!q->u.in.ack_count) { - q->u.in.ack_count = count; - q->u.in.ack_start = start; - return; - } - - /* delete the previous ACK's */ - set_buf_states(q, q->u.in.ack_start, SLSB_P_INPUT_NOT_INIT, - q->u.in.ack_count); - q->u.in.ack_count = count; - q->u.in.ack_start = start; - return; - } - - /* - * ACK the newest buffer. The ACK will be removed in qdio_stop_polling - * or by the next inbound run. - */ - new = add_buf(start, count - 1); - set_buf_state(q, new, SLSB_P_INPUT_ACK); - - /* delete the previous ACKs */ - if (q->u.in.ack_count) - set_buf_states(q, q->u.in.ack_start, SLSB_P_INPUT_NOT_INIT, - q->u.in.ack_count); - - q->u.in.ack_count = 1; - q->u.in.ack_start = new; - count--; - if (!count) - return; - /* need to change ALL buffers to get more interrupts */ - set_buf_states(q, start, SLSB_P_INPUT_NOT_INIT, count); + if (!q->u.in.batch_count) + q->u.in.batch_start = start; + q->u.in.batch_count += count; } static int get_inbound_buffer_frontier(struct qdio_q *q, unsigned int start) @@ -1453,12 +1424,12 @@ static int handle_inbound(struct qdio_q *q, unsigned int callflags, qperf_inc(q, inbound_call); - /* If any ACKed SBALs are returned to HW, adjust ACK tracking: */ - overlap = min(count - sub_buf(q->u.in.ack_start, bufnr), - q->u.in.ack_count); + /* If any processed SBALs are returned to HW, adjust our tracking: */ + overlap = min_t(int, count - sub_buf(q->u.in.batch_start, bufnr), + q->u.in.batch_count); if (overlap > 0) { - q->u.in.ack_start = add_buf(q->u.in.ack_start, overlap); - q->u.in.ack_count -= overlap; + q->u.in.batch_start = add_buf(q->u.in.batch_start, overlap); + q->u.in.batch_count -= overlap; } count = set_buf_states(q, bufnr, SLSB_CU_INPUT_EMPTY, count); From ecc28f58e6cc479253bbd9aced013d8d34fef896 Mon Sep 17 00:00:00 2001 From: Cornelia Huck Date: Tue, 26 May 2020 11:36:29 +0200 Subject: [PATCH 137/462] s390/virtio: remove unused pm callbacks Support for hibernation on s390 has been recently been removed with commit 394216275c7d ("s390: remove broken hibernate / power management support"), no need to keep unused code around. Link: https://lkml.kernel.org/r/20200526093629.257649-1-cohuck@redhat.com Signed-off-by: Cornelia Huck Reviewed-by: Halil Pasic Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- drivers/s390/virtio/virtio_ccw.c | 26 -------------------------- 1 file changed, 26 deletions(-) diff --git a/drivers/s390/virtio/virtio_ccw.c b/drivers/s390/virtio/virtio_ccw.c index 957889a42d2e..5730572b52cd 100644 --- a/drivers/s390/virtio/virtio_ccw.c +++ b/drivers/s390/virtio/virtio_ccw.c @@ -1372,27 +1372,6 @@ static struct ccw_device_id virtio_ids[] = { {}, }; -#ifdef CONFIG_PM_SLEEP -static int virtio_ccw_freeze(struct ccw_device *cdev) -{ - struct virtio_ccw_device *vcdev = dev_get_drvdata(&cdev->dev); - - return virtio_device_freeze(&vcdev->vdev); -} - -static int virtio_ccw_restore(struct ccw_device *cdev) -{ - struct virtio_ccw_device *vcdev = dev_get_drvdata(&cdev->dev); - int ret; - - ret = virtio_ccw_set_transport_rev(vcdev); - if (ret) - return ret; - - return virtio_device_restore(&vcdev->vdev); -} -#endif - static struct ccw_driver virtio_ccw_driver = { .driver = { .owner = THIS_MODULE, @@ -1405,11 +1384,6 @@ static struct ccw_driver virtio_ccw_driver = { .set_online = virtio_ccw_online, .notify = virtio_ccw_cio_notify, .int_class = IRQIO_VIR, -#ifdef CONFIG_PM_SLEEP - .freeze = virtio_ccw_freeze, - .thaw = virtio_ccw_restore, - .restore = virtio_ccw_restore, -#endif }; static int __init pure_hex(char **cp, unsigned int *val, int min_digit, From 79d6c50227103a9f7df9690b38ff74127f69d3cf Mon Sep 17 00:00:00 2001 From: Zou Wei Date: Tue, 21 Apr 2020 20:35:48 +0800 Subject: [PATCH 138/462] s390/zcrypt: use kzalloc This patch fixes below warning reported by coccicheck drivers/s390/crypto/zcrypt_ep11misc.c:198:8-15: WARNING: kzalloc should be used for cprb, instead of kmalloc/memset Link: https://lkml.kernel.org/r/1587472548-105240-1-git-send-email-zou_wei@huawei.com Reported-by: Hulk Robot Signed-off-by: Zou Wei Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- drivers/s390/crypto/zcrypt_ep11misc.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/s390/crypto/zcrypt_ep11misc.c b/drivers/s390/crypto/zcrypt_ep11misc.c index 004ce022fc78..3c3d403abe92 100644 --- a/drivers/s390/crypto/zcrypt_ep11misc.c +++ b/drivers/s390/crypto/zcrypt_ep11misc.c @@ -195,11 +195,10 @@ static inline struct ep11_cprb *alloc_cprb(size_t payload_len) size_t len = sizeof(struct ep11_cprb) + payload_len; struct ep11_cprb *cprb; - cprb = kmalloc(len, GFP_KERNEL); + cprb = kzalloc(len, GFP_KERNEL); if (!cprb) return NULL; - memset(cprb, 0, len); cprb->cprb_len = sizeof(struct ep11_cprb); cprb->cprb_ver_id = 0x04; memcpy(cprb->func_id, "T4", 2); From df8cea2a4bef3088c8570af543835992ce1d327e Mon Sep 17 00:00:00 2001 From: Chen Zhou Date: Sat, 9 May 2020 16:56:06 +0800 Subject: [PATCH 139/462] s390/crypto: use scnprintf() instead of snprintf() snprintf() returns the number of bytes that would be written, which may be greater than the the actual length to be written. show() methods should return the number of bytes printed into the buffer. This is the return value of scnprintf(). Link: https://lkml.kernel.org/r/20200509085608.41061-2-chenzhou10@huawei.com Signed-off-by: Chen Zhou Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/crypto/prng.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/arch/s390/crypto/prng.c b/arch/s390/crypto/prng.c index d977643fa627..e1ae23911ccd 100644 --- a/arch/s390/crypto/prng.c +++ b/arch/s390/crypto/prng.c @@ -693,7 +693,7 @@ static ssize_t prng_chunksize_show(struct device *dev, struct device_attribute *attr, char *buf) { - return snprintf(buf, PAGE_SIZE, "%u\n", prng_chunk_size); + return scnprintf(buf, PAGE_SIZE, "%u\n", prng_chunk_size); } static DEVICE_ATTR(chunksize, 0444, prng_chunksize_show, NULL); @@ -712,7 +712,7 @@ static ssize_t prng_counter_show(struct device *dev, counter = prng_data->prngws.byte_counter; mutex_unlock(&prng_data->mutex); - return snprintf(buf, PAGE_SIZE, "%llu\n", counter); + return scnprintf(buf, PAGE_SIZE, "%llu\n", counter); } static DEVICE_ATTR(byte_counter, 0444, prng_counter_show, NULL); @@ -721,7 +721,7 @@ static ssize_t prng_errorflag_show(struct device *dev, struct device_attribute *attr, char *buf) { - return snprintf(buf, PAGE_SIZE, "%d\n", prng_errorflag); + return scnprintf(buf, PAGE_SIZE, "%d\n", prng_errorflag); } static DEVICE_ATTR(errorflag, 0444, prng_errorflag_show, NULL); @@ -731,9 +731,9 @@ static ssize_t prng_mode_show(struct device *dev, char *buf) { if (prng_mode == PRNG_MODE_TDES) - return snprintf(buf, PAGE_SIZE, "TDES\n"); + return scnprintf(buf, PAGE_SIZE, "TDES\n"); else - return snprintf(buf, PAGE_SIZE, "SHA512\n"); + return scnprintf(buf, PAGE_SIZE, "SHA512\n"); } static DEVICE_ATTR(mode, 0444, prng_mode_show, NULL); @@ -756,7 +756,7 @@ static ssize_t prng_reseed_limit_show(struct device *dev, struct device_attribute *attr, char *buf) { - return snprintf(buf, PAGE_SIZE, "%u\n", prng_reseed_limit); + return scnprintf(buf, PAGE_SIZE, "%u\n", prng_reseed_limit); } static ssize_t prng_reseed_limit_store(struct device *dev, struct device_attribute *attr, @@ -787,7 +787,7 @@ static ssize_t prng_strength_show(struct device *dev, struct device_attribute *attr, char *buf) { - return snprintf(buf, PAGE_SIZE, "256\n"); + return scnprintf(buf, PAGE_SIZE, "256\n"); } static DEVICE_ATTR(strength, 0444, prng_strength_show, NULL); From 92fd356514b7505f40ca72b38ef84070e6502a70 Mon Sep 17 00:00:00 2001 From: Chen Zhou Date: Sat, 9 May 2020 16:56:07 +0800 Subject: [PATCH 140/462] s390: use scnprintf() in sys_##_prefix##_##_name##_show snprintf() returns the number of bytes that would be written, which may be greater than the the actual length to be written. show() methods should return the number of bytes printed into the buffer. This is the return value of scnprintf(). Link: https://lkml.kernel.org/r/20200509085608.41061-3-chenzhou10@huawei.com Signed-off-by: Chen Zhou Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/kernel/ipl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c index ccea9a245867..90a2a17239b0 100644 --- a/arch/s390/kernel/ipl.c +++ b/arch/s390/kernel/ipl.c @@ -181,7 +181,7 @@ static ssize_t sys_##_prefix##_##_name##_show(struct kobject *kobj, \ struct kobj_attribute *attr, \ char *page) \ { \ - return snprintf(page, PAGE_SIZE, _format, ##args); \ + return scnprintf(page, PAGE_SIZE, _format, ##args); \ } #define IPL_ATTR_CCW_STORE_FN(_prefix, _name, _ipl_blk) \ From 99448016ac792ac096def056828ab72c21f8582b Mon Sep 17 00:00:00 2001 From: Chen Zhou Date: Sat, 9 May 2020 16:56:08 +0800 Subject: [PATCH 141/462] s390/protvirt: use scnprintf() instead of snprintf() snprintf() returns the number of bytes that would be written, which may be greater than the the actual length to be written. uv_query_facilities() should return the number of bytes printed into the buffer. This is the return value of scnprintf(). The other functions are the same. Link: https://lkml.kernel.org/r/20200509085608.41061-4-chenzhou10@huawei.com Signed-off-by: Chen Zhou Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/kernel/uv.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/s390/kernel/uv.c b/arch/s390/kernel/uv.c index 66e89b2866d7..c296e5c8dbf9 100644 --- a/arch/s390/kernel/uv.c +++ b/arch/s390/kernel/uv.c @@ -331,7 +331,7 @@ EXPORT_SYMBOL_GPL(arch_make_page_accessible); static ssize_t uv_query_facilities(struct kobject *kobj, struct kobj_attribute *attr, char *page) { - return snprintf(page, PAGE_SIZE, "%lx\n%lx\n%lx\n%lx\n", + return scnprintf(page, PAGE_SIZE, "%lx\n%lx\n%lx\n%lx\n", uv_info.inst_calls_list[0], uv_info.inst_calls_list[1], uv_info.inst_calls_list[2], @@ -344,7 +344,7 @@ static struct kobj_attribute uv_query_facilities_attr = static ssize_t uv_query_max_guest_cpus(struct kobject *kobj, struct kobj_attribute *attr, char *page) { - return snprintf(page, PAGE_SIZE, "%d\n", + return scnprintf(page, PAGE_SIZE, "%d\n", uv_info.max_guest_cpus); } @@ -354,7 +354,7 @@ static struct kobj_attribute uv_query_max_guest_cpus_attr = static ssize_t uv_query_max_guest_vms(struct kobject *kobj, struct kobj_attribute *attr, char *page) { - return snprintf(page, PAGE_SIZE, "%d\n", + return scnprintf(page, PAGE_SIZE, "%d\n", uv_info.max_num_sec_conf); } @@ -364,7 +364,7 @@ static struct kobj_attribute uv_query_max_guest_vms_attr = static ssize_t uv_query_max_guest_addr(struct kobject *kobj, struct kobj_attribute *attr, char *page) { - return snprintf(page, PAGE_SIZE, "%lx\n", + return scnprintf(page, PAGE_SIZE, "%lx\n", uv_info.max_sec_stor_addr); } From 2b2a25845d534ac6d55086e35c033961fdd83a26 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Tue, 2 Jun 2020 12:25:24 -0700 Subject: [PATCH 142/462] s390/vdso: Use $(LD) instead of $(CC) to link vDSO Currently, the VDSO is being linked through $(CC). This does not match how the rest of the kernel links objects, which is through the $(LD) variable. When clang is built in a default configuration, it first attempts to use the target triple's default linker, which is just ld. However, the user can override this through the CLANG_DEFAULT_LINKER cmake define so that clang uses another linker by default, such as LLVM's own linker, ld.lld. This can be useful to get more optimized links across various different projects. However, this is problematic for the s390 vDSO because ld.lld does not have any s390 emulatiom support: https://github.com/llvm/llvm-project/blob/llvmorg-10.0.1-rc1/lld/ELF/Driver.cpp#L132-L150 Thus, if a user is using a toolchain with ld.lld as the default, they will see an error, even if they have specified ld.bfd through the LD make variable: $ make -j"$(nproc)" -s ARCH=s390 CROSS_COMPILE=s390x-linux-gnu- LLVM=1 \ LD=s390x-linux-gnu-ld \ defconfig arch/s390/kernel/vdso64/ ld.lld: error: unknown emulation: elf64_s390 clang-11: error: linker command failed with exit code 1 (use -v to see invocation) Normally, '-fuse-ld=bfd' could be used to get around this; however, this can be fragile, depending on paths and variable naming. The cleaner solution for the kernel is to take advantage of the fact that $(LD) can be invoked directly, which bypasses the heuristics of $(CC) and respects the user's choice. Similar changes have been done for ARM, ARM64, and MIPS. Link: https://lkml.kernel.org/r/20200602192523.32758-1-natechancellor@gmail.com Link: https://github.com/ClangBuiltLinux/linux/issues/1041 Signed-off-by: Nathan Chancellor Reviewed-by: Nick Desaulniers [heiko.carstens@de.ibm.com: add --build-id flag] Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/kernel/vdso64/Makefile | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/arch/s390/kernel/vdso64/Makefile b/arch/s390/kernel/vdso64/Makefile index bec19e7e6e1c..4a66a1cb919b 100644 --- a/arch/s390/kernel/vdso64/Makefile +++ b/arch/s390/kernel/vdso64/Makefile @@ -18,8 +18,8 @@ KBUILD_AFLAGS_64 += -m64 -s KBUILD_CFLAGS_64 := $(filter-out -m64,$(KBUILD_CFLAGS)) KBUILD_CFLAGS_64 += -m64 -fPIC -shared -fno-common -fno-builtin -KBUILD_CFLAGS_64 += -nostdlib -Wl,-soname=linux-vdso64.so.1 \ - -Wl,--hash-style=both +ldflags-y := -fPIC -shared -nostdlib -soname=linux-vdso64.so.1 \ + --hash-style=both --build-id -T $(targets:%=$(obj)/%.dbg): KBUILD_CFLAGS = $(KBUILD_CFLAGS_64) $(targets:%=$(obj)/%.dbg): KBUILD_AFLAGS = $(KBUILD_AFLAGS_64) @@ -37,8 +37,8 @@ KASAN_SANITIZE := n $(obj)/vdso64_wrapper.o : $(obj)/vdso64.so # link rule for the .so file, .lds has to be first -$(obj)/vdso64.so.dbg: $(src)/vdso64.lds $(obj-vdso64) FORCE - $(call if_changed,vdso64ld) +$(obj)/vdso64.so.dbg: $(obj)/vdso64.lds $(obj-vdso64) FORCE + $(call if_changed,ld) # strip rule for the .so file $(obj)/%.so: OBJCOPYFLAGS := -S @@ -50,8 +50,6 @@ $(obj-vdso64): %.o: %.S FORCE $(call if_changed_dep,vdso64as) # actual build commands -quiet_cmd_vdso64ld = VDSO64L $@ - cmd_vdso64ld = $(CC) $(c_flags) -Wl,-T $(filter %.lds %.o,$^) -o $@ quiet_cmd_vdso64as = VDSO64A $@ cmd_vdso64as = $(CC) $(a_flags) -c -o $@ $< From 478237a595120a18e9b52fd2c57a6e8b7a01e411 Mon Sep 17 00:00:00 2001 From: Vincenzo Frascino Date: Tue, 24 Mar 2020 12:10:27 +0000 Subject: [PATCH 143/462] s390/vdso: fix vDSO clock_getres() clock_getres in the vDSO library has to preserve the same behaviour of posix_get_hrtimer_res(). In particular, posix_get_hrtimer_res() does: sec = 0; ns = hrtimer_resolution; and hrtimer_resolution depends on the enablement of the high resolution timers that can happen either at compile or at run time. Fix the s390 vdso implementation of clock_getres keeping a copy of hrtimer_resolution in vdso data and using that directly. Link: https://lkml.kernel.org/r/20200324121027.21665-1-vincenzo.frascino@arm.com Signed-off-by: Vincenzo Frascino Acked-by: Martin Schwidefsky [heiko.carstens@de.ibm.com: use llgf for proper zero extension] Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/vdso.h | 1 + arch/s390/kernel/asm-offsets.c | 2 +- arch/s390/kernel/time.c | 1 + arch/s390/kernel/vdso64/clock_getres.S | 10 +++++----- 4 files changed, 8 insertions(+), 6 deletions(-) diff --git a/arch/s390/include/asm/vdso.h b/arch/s390/include/asm/vdso.h index 3bcfdeb01395..0cd085cdeb4f 100644 --- a/arch/s390/include/asm/vdso.h +++ b/arch/s390/include/asm/vdso.h @@ -36,6 +36,7 @@ struct vdso_data { __u32 tk_shift; /* Shift used for xtime_nsec 0x60 */ __u32 ts_dir; /* TOD steering direction 0x64 */ __u64 ts_end; /* TOD steering end 0x68 */ + __u32 hrtimer_res; /* hrtimer resolution 0x70 */ }; struct vdso_per_cpu_data { diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c index 165031bd3370..5d8cc1864566 100644 --- a/arch/s390/kernel/asm-offsets.c +++ b/arch/s390/kernel/asm-offsets.c @@ -76,6 +76,7 @@ int main(void) OFFSET(__VDSO_TK_SHIFT, vdso_data, tk_shift); OFFSET(__VDSO_TS_DIR, vdso_data, ts_dir); OFFSET(__VDSO_TS_END, vdso_data, ts_end); + OFFSET(__VDSO_CLOCK_REALTIME_RES, vdso_data, hrtimer_res); OFFSET(__VDSO_ECTG_BASE, vdso_per_cpu_data, ectg_timer_base); OFFSET(__VDSO_ECTG_USER, vdso_per_cpu_data, ectg_user_time); OFFSET(__VDSO_GETCPU_VAL, vdso_per_cpu_data, getcpu_val); @@ -86,7 +87,6 @@ int main(void) DEFINE(__CLOCK_REALTIME_COARSE, CLOCK_REALTIME_COARSE); DEFINE(__CLOCK_MONOTONIC_COARSE, CLOCK_MONOTONIC_COARSE); DEFINE(__CLOCK_THREAD_CPUTIME_ID, CLOCK_THREAD_CPUTIME_ID); - DEFINE(__CLOCK_REALTIME_RES, MONOTONIC_RES_NSEC); DEFINE(__CLOCK_COARSE_RES, LOW_RES_NSEC); BLANK(); /* idle data offsets */ diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c index f9d070d016e3..b1113b519432 100644 --- a/arch/s390/kernel/time.c +++ b/arch/s390/kernel/time.c @@ -301,6 +301,7 @@ void update_vsyscall(struct timekeeper *tk) vdso_data->tk_mult = tk->tkr_mono.mult; vdso_data->tk_shift = tk->tkr_mono.shift; + vdso_data->hrtimer_res = hrtimer_resolution; smp_wmb(); ++vdso_data->tb_update_count; } diff --git a/arch/s390/kernel/vdso64/clock_getres.S b/arch/s390/kernel/vdso64/clock_getres.S index 081435398e0a..0c79caa32b59 100644 --- a/arch/s390/kernel/vdso64/clock_getres.S +++ b/arch/s390/kernel/vdso64/clock_getres.S @@ -17,12 +17,14 @@ .type __kernel_clock_getres,@function __kernel_clock_getres: CFI_STARTPROC - larl %r1,4f + larl %r1,3f + lg %r0,0(%r1) cghi %r2,__CLOCK_REALTIME_COARSE je 0f cghi %r2,__CLOCK_MONOTONIC_COARSE je 0f - larl %r1,3f + larl %r1,_vdso_data + llgf %r0,__VDSO_CLOCK_REALTIME_RES(%r1) cghi %r2,__CLOCK_REALTIME je 0f cghi %r2,__CLOCK_MONOTONIC @@ -36,7 +38,6 @@ __kernel_clock_getres: jz 2f 0: ltgr %r3,%r3 jz 1f /* res == NULL */ - lg %r0,0(%r1) xc 0(8,%r3),0(%r3) /* set tp->tv_sec to zero */ stg %r0,8(%r3) /* store tp->tv_usec */ 1: lghi %r2,0 @@ -45,6 +46,5 @@ __kernel_clock_getres: svc 0 br %r14 CFI_ENDPROC -3: .quad __CLOCK_REALTIME_RES -4: .quad __CLOCK_COARSE_RES +3: .quad __CLOCK_COARSE_RES .size __kernel_clock_getres,.-__kernel_clock_getres From 64438e1bc0cdbe6d30bcdcb976f935eb3c297adc Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 10 Jun 2020 10:36:05 +0200 Subject: [PATCH 144/462] s390/numa: let NODES_SHIFT depend on NEED_MULTIPLE_NODES Qian Cai reported: """ When NUMA=n and nr_node_ids=2, in apply_wqattrs_prepare(), it has, for_each_node(node) { if (wq_calc_node_cpumask(... where it will trigger a booting warning, WARNING: workqueue cpumask: online intersect > possible intersect because it found 2 nodes and wq_numa_possible_cpumask[1] is an empty cpumask. """ Let NODES_SHIFT depend on NEED_MULTIPLE_NODES like it is done on other architectures in order to fix this. Fixes: 701dc81e7412 ("s390/mm: remove fake numa support") Reported-by: Qian Cai Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 194824932a60..c7d7ede6300c 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -462,6 +462,7 @@ config NUMA config NODES_SHIFT int + depends on NEED_MULTIPLE_NODES default "1" config SCHED_SMT From 6903cdae9f9f08d61e49c16cbef11c293e33a615 Mon Sep 17 00:00:00 2001 From: Gaurav Singh Date: Fri, 12 Jun 2020 14:53:27 -0400 Subject: [PATCH 145/462] bpf, xdp, samples: Fix null pointer dereference in *_user code Memset on the pointer right after malloc can cause a NULL pointer deference if it failed to allocate memory. A simple fix is to replace malloc()/memset() pair with a simple call to calloc(). Fixes: 0fca931a6f21 ("samples/bpf: program demonstrating access to xdp_rxq_info") Signed-off-by: Gaurav Singh Signed-off-by: Daniel Borkmann Acked-by: Jesper Dangaard Brouer Acked-by: John Fastabend --- samples/bpf/xdp_monitor_user.c | 8 ++------ samples/bpf/xdp_redirect_cpu_user.c | 7 ++----- samples/bpf/xdp_rxq_info_user.c | 13 +++---------- 3 files changed, 7 insertions(+), 21 deletions(-) diff --git a/samples/bpf/xdp_monitor_user.c b/samples/bpf/xdp_monitor_user.c index dd558cbb2309..ef53b93db573 100644 --- a/samples/bpf/xdp_monitor_user.c +++ b/samples/bpf/xdp_monitor_user.c @@ -509,11 +509,8 @@ static void *alloc_rec_per_cpu(int record_size) { unsigned int nr_cpus = bpf_num_possible_cpus(); void *array; - size_t size; - size = record_size * nr_cpus; - array = malloc(size); - memset(array, 0, size); + array = calloc(nr_cpus, record_size); if (!array) { fprintf(stderr, "Mem alloc error (nr_cpus:%u)\n", nr_cpus); exit(EXIT_FAIL_MEM); @@ -528,8 +525,7 @@ static struct stats_record *alloc_stats_record(void) int i; /* Alloc main stats_record structure */ - rec = malloc(sizeof(*rec)); - memset(rec, 0, sizeof(*rec)); + rec = calloc(1, sizeof(*rec)); if (!rec) { fprintf(stderr, "Mem alloc error\n"); exit(EXIT_FAIL_MEM); diff --git a/samples/bpf/xdp_redirect_cpu_user.c b/samples/bpf/xdp_redirect_cpu_user.c index f3468168982e..f4e755e0dd73 100644 --- a/samples/bpf/xdp_redirect_cpu_user.c +++ b/samples/bpf/xdp_redirect_cpu_user.c @@ -207,11 +207,8 @@ static struct datarec *alloc_record_per_cpu(void) { unsigned int nr_cpus = bpf_num_possible_cpus(); struct datarec *array; - size_t size; - size = sizeof(struct datarec) * nr_cpus; - array = malloc(size); - memset(array, 0, size); + array = calloc(nr_cpus, sizeof(struct datarec)); if (!array) { fprintf(stderr, "Mem alloc error (nr_cpus:%u)\n", nr_cpus); exit(EXIT_FAIL_MEM); @@ -226,11 +223,11 @@ static struct stats_record *alloc_stats_record(void) size = sizeof(*rec) + n_cpus * sizeof(struct record); rec = malloc(size); - memset(rec, 0, size); if (!rec) { fprintf(stderr, "Mem alloc error\n"); exit(EXIT_FAIL_MEM); } + memset(rec, 0, size); rec->rx_cnt.cpu = alloc_record_per_cpu(); rec->redir_err.cpu = alloc_record_per_cpu(); rec->kthread.cpu = alloc_record_per_cpu(); diff --git a/samples/bpf/xdp_rxq_info_user.c b/samples/bpf/xdp_rxq_info_user.c index 4fe47502ebed..caa4e7ffcfc7 100644 --- a/samples/bpf/xdp_rxq_info_user.c +++ b/samples/bpf/xdp_rxq_info_user.c @@ -198,11 +198,8 @@ static struct datarec *alloc_record_per_cpu(void) { unsigned int nr_cpus = bpf_num_possible_cpus(); struct datarec *array; - size_t size; - size = sizeof(struct datarec) * nr_cpus; - array = malloc(size); - memset(array, 0, size); + array = calloc(nr_cpus, sizeof(struct datarec)); if (!array) { fprintf(stderr, "Mem alloc error (nr_cpus:%u)\n", nr_cpus); exit(EXIT_FAIL_MEM); @@ -214,11 +211,8 @@ static struct record *alloc_record_per_rxq(void) { unsigned int nr_rxqs = bpf_map__def(rx_queue_index_map)->max_entries; struct record *array; - size_t size; - size = sizeof(struct record) * nr_rxqs; - array = malloc(size); - memset(array, 0, size); + array = calloc(nr_rxqs, sizeof(struct record)); if (!array) { fprintf(stderr, "Mem alloc error (nr_rxqs:%u)\n", nr_rxqs); exit(EXIT_FAIL_MEM); @@ -232,8 +226,7 @@ static struct stats_record *alloc_stats_record(void) struct stats_record *rec; int i; - rec = malloc(sizeof(*rec)); - memset(rec, 0, sizeof(*rec)); + rec = calloc(1, sizeof(struct stats_record)); if (!rec) { fprintf(stderr, "Mem alloc error\n"); exit(EXIT_FAIL_MEM); From a0b03952a797591d4b6d6fa7b9b7872e27783729 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 16 Jun 2020 15:21:50 +0200 Subject: [PATCH 146/462] ALSA: hda/realtek - Add quirk for MSI GE63 laptop MSI GE63 laptop with ALC1220 codec requires the very same quirk (ALC1220_FIXUP_CLEVO_P950) as other MSI devices for the proper sound output. BugLink: https://bugzilla.kernel.org/show_bug.cgi?id=208057 Cc: Link: https://lore.kernel.org/r/20200616132150.8778-1-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 6d73f8beadb6..2713560e0c4b 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -2461,6 +2461,7 @@ static const struct snd_pci_quirk alc882_fixup_tbl[] = { SND_PCI_QUIRK(0x1458, 0xa0b8, "Gigabyte AZ370-Gaming", ALC1220_FIXUP_GB_DUAL_CODECS), SND_PCI_QUIRK(0x1458, 0xa0cd, "Gigabyte X570 Aorus Master", ALC1220_FIXUP_CLEVO_P950), SND_PCI_QUIRK(0x1458, 0xa0ce, "Gigabyte X570 Aorus Xtreme", ALC1220_FIXUP_CLEVO_P950), + SND_PCI_QUIRK(0x1462, 0x11f7, "MSI-GE63", ALC1220_FIXUP_CLEVO_P950), SND_PCI_QUIRK(0x1462, 0x1228, "MSI-GP63", ALC1220_FIXUP_CLEVO_P950), SND_PCI_QUIRK(0x1462, 0x1275, "MSI-GL63", ALC1220_FIXUP_CLEVO_P950), SND_PCI_QUIRK(0x1462, 0x1276, "MSI-GL73", ALC1220_FIXUP_CLEVO_P950), From 22d2cfdffa5bff3566e16cb7320e13ceb814674b Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Thu, 4 Jun 2020 11:12:34 +0200 Subject: [PATCH 147/462] libceph: move away from global osd_req_flags osd_req_flags is overly general and doesn't suit its only user (read_from_replica option) well: - applying osd_req_flags in account_request() affects all OSD requests, including linger (i.e. watch and notify). However, linger requests should always go to the primary even though some of them are reads (e.g. notify has side effects but it is a read because it doesn't result in mutation on the OSDs). - calls to class methods that are reads are allowed to go to the replica, but most such calls issued for "rbd map" and/or exclusive lock transitions are requested to be resent to the primary via EAGAIN, doubling the latency. Get rid of global osd_req_flags and set read_from_replica flag only on specific OSD requests instead. Fixes: 8ad44d5e0d1e ("libceph: read_from_replica option") Signed-off-by: Ilya Dryomov Reviewed-by: Jeff Layton --- drivers/block/rbd.c | 4 +++- include/linux/ceph/libceph.h | 4 ++-- net/ceph/ceph_common.c | 14 ++++++-------- net/ceph/osd_client.c | 7 ++----- 4 files changed, 13 insertions(+), 16 deletions(-) diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 7420648a1de6..4f61e9209461 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -1451,8 +1451,10 @@ static void rbd_osd_req_callback(struct ceph_osd_request *osd_req) static void rbd_osd_format_read(struct ceph_osd_request *osd_req) { struct rbd_obj_request *obj_request = osd_req->r_priv; + struct rbd_device *rbd_dev = obj_request->img_request->rbd_dev; + struct ceph_options *opt = rbd_dev->rbd_client->client->options; - osd_req->r_flags = CEPH_OSD_FLAG_READ; + osd_req->r_flags = CEPH_OSD_FLAG_READ | opt->read_from_replica; osd_req->r_snapid = obj_request->img_request->snap_id; } diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h index 2247e71beb83..e5ed1c541e7f 100644 --- a/include/linux/ceph/libceph.h +++ b/include/linux/ceph/libceph.h @@ -52,8 +52,7 @@ struct ceph_options { unsigned long osd_idle_ttl; /* jiffies */ unsigned long osd_keepalive_timeout; /* jiffies */ unsigned long osd_request_timeout; /* jiffies */ - - u32 osd_req_flags; /* CEPH_OSD_FLAG_*, applied to each OSD request */ + u32 read_from_replica; /* CEPH_OSD_FLAG_BALANCE/LOCALIZE_READS */ /* * any type that can't be simply compared or doesn't need @@ -76,6 +75,7 @@ struct ceph_options { #define CEPH_OSD_KEEPALIVE_DEFAULT msecs_to_jiffies(5 * 1000) #define CEPH_OSD_IDLE_TTL_DEFAULT msecs_to_jiffies(60 * 1000) #define CEPH_OSD_REQUEST_TIMEOUT_DEFAULT 0 /* no timeout */ +#define CEPH_READ_FROM_REPLICA_DEFAULT 0 /* read from primary */ #define CEPH_MONC_HUNT_INTERVAL msecs_to_jiffies(3 * 1000) #define CEPH_MONC_PING_INTERVAL msecs_to_jiffies(10 * 1000) diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c index afe0e8184c23..4e7edd707a14 100644 --- a/net/ceph/ceph_common.c +++ b/net/ceph/ceph_common.c @@ -332,6 +332,7 @@ struct ceph_options *ceph_alloc_options(void) opt->mount_timeout = CEPH_MOUNT_TIMEOUT_DEFAULT; opt->osd_idle_ttl = CEPH_OSD_IDLE_TTL_DEFAULT; opt->osd_request_timeout = CEPH_OSD_REQUEST_TIMEOUT_DEFAULT; + opt->read_from_replica = CEPH_READ_FROM_REPLICA_DEFAULT; return opt; } EXPORT_SYMBOL(ceph_alloc_options); @@ -490,16 +491,13 @@ int ceph_parse_param(struct fs_parameter *param, struct ceph_options *opt, case Opt_read_from_replica: switch (result.uint_32) { case Opt_read_from_replica_no: - opt->osd_req_flags &= ~(CEPH_OSD_FLAG_BALANCE_READS | - CEPH_OSD_FLAG_LOCALIZE_READS); + opt->read_from_replica = 0; break; case Opt_read_from_replica_balance: - opt->osd_req_flags |= CEPH_OSD_FLAG_BALANCE_READS; - opt->osd_req_flags &= ~CEPH_OSD_FLAG_LOCALIZE_READS; + opt->read_from_replica = CEPH_OSD_FLAG_BALANCE_READS; break; case Opt_read_from_replica_localize: - opt->osd_req_flags |= CEPH_OSD_FLAG_LOCALIZE_READS; - opt->osd_req_flags &= ~CEPH_OSD_FLAG_BALANCE_READS; + opt->read_from_replica = CEPH_OSD_FLAG_LOCALIZE_READS; break; default: BUG(); @@ -613,9 +611,9 @@ int ceph_print_client_options(struct seq_file *m, struct ceph_client *client, } seq_putc(m, ','); } - if (opt->osd_req_flags & CEPH_OSD_FLAG_BALANCE_READS) { + if (opt->read_from_replica == CEPH_OSD_FLAG_BALANCE_READS) { seq_puts(m, "read_from_replica=balance,"); - } else if (opt->osd_req_flags & CEPH_OSD_FLAG_LOCALIZE_READS) { + } else if (opt->read_from_replica == CEPH_OSD_FLAG_LOCALIZE_READS) { seq_puts(m, "read_from_replica=localize,"); } diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index 4fea3c33af2a..a37d159019a0 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -1117,10 +1117,10 @@ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc, truncate_size, truncate_seq); } - req->r_flags = flags; req->r_base_oloc.pool = layout->pool_id; req->r_base_oloc.pool_ns = ceph_try_get_string(layout->pool_ns); ceph_oid_printf(&req->r_base_oid, "%llx.%08llx", vino.ino, objnum); + req->r_flags = flags | osdc->client->options->read_from_replica; req->r_snapid = vino.snap; if (flags & CEPH_OSD_FLAG_WRITE) @@ -2431,14 +2431,11 @@ promote: static void account_request(struct ceph_osd_request *req) { - struct ceph_osd_client *osdc = req->r_osdc; - WARN_ON(req->r_flags & (CEPH_OSD_FLAG_ACK | CEPH_OSD_FLAG_ONDISK)); WARN_ON(!(req->r_flags & (CEPH_OSD_FLAG_READ | CEPH_OSD_FLAG_WRITE))); req->r_flags |= CEPH_OSD_FLAG_ONDISK; - req->r_flags |= osdc->client->options->osd_req_flags; - atomic_inc(&osdc->num_requests); + atomic_inc(&req->r_osdc->num_requests); req->r_start_stamp = jiffies; req->r_start_latency = ktime_get(); From 2f3fead62144002557f322c2a7c15e1255df0653 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Tue, 9 Jun 2020 11:57:56 +0200 Subject: [PATCH 148/462] libceph: don't omit recovery_deletes in target_copy() Currently target_copy() is used only for sending linger pings, so this doesn't come up, but generally omitting recovery_deletes can result in unneeded resends (force_resend in calc_target()). Fixes: ae78dd8139ce ("libceph: make RECOVERY_DELETES feature create a new interval") Signed-off-by: Ilya Dryomov Reviewed-by: Jeff Layton --- net/ceph/osd_client.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index a37d159019a0..8f7fbe861dff 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -445,6 +445,7 @@ static void target_copy(struct ceph_osd_request_target *dest, dest->size = src->size; dest->min_size = src->min_size; dest->sort_bitwise = src->sort_bitwise; + dest->recovery_deletes = src->recovery_deletes; dest->flags = src->flags; dest->paused = src->paused; From 7ed286f3e061ee394782bd9fb4ed96bff0b5a021 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Tue, 9 Jun 2020 11:59:08 +0200 Subject: [PATCH 149/462] libceph: don't omit used_replica in target_copy() Currently target_copy() is used only for sending linger pings, so this doesn't come up, but generally omitting used_replica can hang the client as we wouldn't notice the acting set change (legacy_change in calc_target()) or trigger a warning in handle_reply(). Fixes: 117d96a04f00 ("libceph: support for balanced and localized reads") Signed-off-by: Ilya Dryomov Reviewed-by: Jeff Layton --- net/ceph/osd_client.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index 8f7fbe861dff..2db8b44e70c2 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -448,6 +448,7 @@ static void target_copy(struct ceph_osd_request_target *dest, dest->recovery_deletes = src->recovery_deletes; dest->flags = src->flags; + dest->used_replica = src->used_replica; dest->paused = src->paused; dest->epoch = src->epoch; From 76ebbc2736434f210d0fe4a41cc3232b0dae4563 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Wed, 3 Jun 2020 11:40:10 +0900 Subject: [PATCH 150/462] selftests/ftrace: Allow ":" in description Allow ":" in the description line. Currently if there is ":" in the test description line, the description is cut at that point, but that was unintended. Signed-off-by: Masami Hiramatsu Reviewed-by: Tom Zanussi Signed-off-by: Shuah Khan --- tools/testing/selftests/ftrace/ftracetest | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/ftrace/ftracetest b/tools/testing/selftests/ftrace/ftracetest index a4605b5ee66d..d3f6652311ef 100755 --- a/tools/testing/selftests/ftrace/ftracetest +++ b/tools/testing/selftests/ftrace/ftracetest @@ -263,7 +263,7 @@ CASENO=0 testcase() { # testfile CASENO=$((CASENO+1)) - desc=`grep "^#[ \t]*description:" $1 | cut -f2 -d:` + desc=`grep "^#[ \t]*description:" $1 | cut -f2- -d:` prlog -n "[$CASENO]$INSTANCE$desc" } From 1e11b7dbef17bb6315f08771a64d86ab20fc037b Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Wed, 3 Jun 2020 11:40:19 +0900 Subject: [PATCH 151/462] selftests/ftrace: Return unsupported for the unconfigured features As same as other test cases, return unsupported if kprobe_events or argument access feature are not found. There can be a new arch which does not port those features yet, and an older kernel which doesn't support it. Those can not enable the features. Signed-off-by: Masami Hiramatsu Reviewed-by: Tom Zanussi Signed-off-by: Shuah Khan --- tools/testing/selftests/ftrace/test.d/direct/kprobe-direct.tc | 2 +- .../testing/selftests/ftrace/test.d/kprobe/kprobe_args_user.tc | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/ftrace/test.d/direct/kprobe-direct.tc b/tools/testing/selftests/ftrace/test.d/direct/kprobe-direct.tc index 801ecb63e84c..e95b744b23e4 100644 --- a/tools/testing/selftests/ftrace/test.d/direct/kprobe-direct.tc +++ b/tools/testing/selftests/ftrace/test.d/direct/kprobe-direct.tc @@ -10,7 +10,7 @@ fi if [ ! -f kprobe_events ]; then echo "No kprobe_events file -please build CONFIG_KPROBE_EVENTS" - exit_unresolved; + exit_unsupported; fi echo "Let the module run a little" diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_user.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_user.tc index 0f60087583d8..b41471f301ab 100644 --- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_user.tc +++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_user.tc @@ -4,7 +4,7 @@ [ -f kprobe_events ] || exit_unsupported # this is configurable -grep -q '\$arg' README || exit_unresolved # depends on arch +grep -q '\$arg' README || exit_unsupported # depends on arch grep -A10 "fetcharg:" README | grep -q 'ustring' || exit_unsupported grep -A10 "fetcharg:" README | grep -q '\[u\]' || exit_unsupported From fa33e6236f5fa425a3ccf8925010d51fae9f0929 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Wed, 3 Jun 2020 11:40:28 +0900 Subject: [PATCH 152/462] selftests/ftrace: Add "requires:" list support Introduce "requires:" list to check required ftrace interface for each test. This will simplify the interface checking code and unify the error message. Another good point is, it can skip the ftrace initializing. Note that this requires list must be written as a shell comment. Signed-off-by: Masami Hiramatsu Reviewed-by: Tom Zanussi Signed-off-by: Shuah Khan --- tools/testing/selftests/ftrace/ftracetest | 8 +++++++- tools/testing/selftests/ftrace/test.d/functions | 9 +++++++++ tools/testing/selftests/ftrace/test.d/template | 1 + 3 files changed, 17 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/ftrace/ftracetest b/tools/testing/selftests/ftrace/ftracetest index d3f6652311ef..cdf7940b6610 100755 --- a/tools/testing/selftests/ftrace/ftracetest +++ b/tools/testing/selftests/ftrace/ftracetest @@ -267,6 +267,11 @@ testcase() { # testfile prlog -n "[$CASENO]$INSTANCE$desc" } +checkreq() { # testfile + requires=`grep "^#[ \t]*requires:" $1 | cut -f2- -d:` + check_requires $requires +} + test_on_instance() { # testfile grep -q "^#[ \t]*flags:.*instance" $1 } @@ -356,7 +361,8 @@ trap 'SIG_RESULT=$XFAIL' $SIG_XFAIL __run_test() { # testfile # setup PID and PPID, $$ is not updated. - (cd $TRACING_DIR; read PID _ < /proc/self/stat; set -e; set -x; initialize_ftrace; . $1) + (cd $TRACING_DIR; read PID _ < /proc/self/stat; set -e; set -x; + checkreq $1; initialize_ftrace; . $1) [ $? -ne 0 ] && kill -s $SIG_FAIL $SIG_PID } diff --git a/tools/testing/selftests/ftrace/test.d/functions b/tools/testing/selftests/ftrace/test.d/functions index 697c77ef2e2b..5100eb1ada0f 100644 --- a/tools/testing/selftests/ftrace/test.d/functions +++ b/tools/testing/selftests/ftrace/test.d/functions @@ -113,6 +113,15 @@ initialize_ftrace() { # Reset ftrace to initial-state enable_tracing } +check_requires() { # Check required files + for i in $* ; do + if [ ! -e $i ]; then + echo "Required feature interface $i doesn't exist." + exit_unsupported + fi + done +} + LOCALHOST=127.0.0.1 yield() { diff --git a/tools/testing/selftests/ftrace/test.d/template b/tools/testing/selftests/ftrace/test.d/template index e1a5d14c4eaf..611423c4e75f 100644 --- a/tools/testing/selftests/ftrace/test.d/template +++ b/tools/testing/selftests/ftrace/test.d/template @@ -1,6 +1,7 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0 # description: %HERE DESCRIBE WHAT THIS DOES% +# requires: %HERE LIST THE REQUIRED FILES% # you have to add ".tc" extention for your testcase file # Note that all tests are run with "errexit" option. From 034aa9cd698e315c767af1bac3fd1ff8898d2cd7 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Mon, 15 Jun 2020 16:27:43 +0100 Subject: [PATCH 153/462] arm64: pgtable: Clear the GP bit for non-executable kernel pages Commit cca98e9f8b5e ("mm: enforce that vmap can't map pages executable") introduced 'pgprot_nx(prot)' for arm64 but collided silently with the BTI support during the merge window, which endeavours to clear the GP bit for non-executable kernel mappings in set_memory_nx(). For consistency between the two APIs, clear the GP bit in pgprot_nx(). Acked-by: Mark Rutland Reviewed-by: Mark Brown Link: https://lore.kernel.org/r/20200615154642.3579-1-will@kernel.org Signed-off-by: Will Deacon --- arch/arm64/include/asm/pgtable.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 6dbd267ab931..758e2d1577d0 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -416,7 +416,7 @@ static inline pmd_t pmd_mkdevmap(pmd_t pmd) __pgprot((pgprot_val(prot) & ~(mask)) | (bits)) #define pgprot_nx(prot) \ - __pgprot_modify(prot, 0, PTE_PXN) + __pgprot_modify(prot, PTE_MAYBE_GP, PTE_PXN) /* * Mark the prot value as uncacheable and unbufferable. From 2d3a8e2deddea6c89961c422ec0c5b851e648c14 Mon Sep 17 00:00:00 2001 From: Jason Yan Date: Tue, 16 Jun 2020 20:16:55 +0800 Subject: [PATCH 154/462] block: Fix use-after-free in blkdev_get() In blkdev_get() we call __blkdev_get() to do some internal jobs and if there is some errors in __blkdev_get(), the bdput() is called which means we have released the refcount of the bdev (actually the refcount of the bdev inode). This means we cannot access bdev after that point. But acctually bdev is still accessed in blkdev_get() after calling __blkdev_get(). This results in use-after-free if the refcount is the last one we released in __blkdev_get(). Let's take a look at the following scenerio: CPU0 CPU1 CPU2 blkdev_open blkdev_open Remove disk bd_acquire blkdev_get __blkdev_get del_gendisk bdev_unhash_inode bd_acquire bdev_get_gendisk bd_forget failed because of unhashed bdput bdput (the last one) bdev_evict_inode access bdev => use after free [ 459.350216] BUG: KASAN: use-after-free in __lock_acquire+0x24c1/0x31b0 [ 459.351190] Read of size 8 at addr ffff88806c815a80 by task syz-executor.0/20132 [ 459.352347] [ 459.352594] CPU: 0 PID: 20132 Comm: syz-executor.0 Not tainted 4.19.90 #2 [ 459.353628] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1ubuntu1 04/01/2014 [ 459.354947] Call Trace: [ 459.355337] dump_stack+0x111/0x19e [ 459.355879] ? __lock_acquire+0x24c1/0x31b0 [ 459.356523] print_address_description+0x60/0x223 [ 459.357248] ? __lock_acquire+0x24c1/0x31b0 [ 459.357887] kasan_report.cold+0xae/0x2d8 [ 459.358503] __lock_acquire+0x24c1/0x31b0 [ 459.359120] ? _raw_spin_unlock_irq+0x24/0x40 [ 459.359784] ? lockdep_hardirqs_on+0x37b/0x580 [ 459.360465] ? _raw_spin_unlock_irq+0x24/0x40 [ 459.361123] ? finish_task_switch+0x125/0x600 [ 459.361812] ? finish_task_switch+0xee/0x600 [ 459.362471] ? mark_held_locks+0xf0/0xf0 [ 459.363108] ? __schedule+0x96f/0x21d0 [ 459.363716] lock_acquire+0x111/0x320 [ 459.364285] ? blkdev_get+0xce/0xbe0 [ 459.364846] ? blkdev_get+0xce/0xbe0 [ 459.365390] __mutex_lock+0xf9/0x12a0 [ 459.365948] ? blkdev_get+0xce/0xbe0 [ 459.366493] ? bdev_evict_inode+0x1f0/0x1f0 [ 459.367130] ? blkdev_get+0xce/0xbe0 [ 459.367678] ? destroy_inode+0xbc/0x110 [ 459.368261] ? mutex_trylock+0x1a0/0x1a0 [ 459.368867] ? __blkdev_get+0x3e6/0x1280 [ 459.369463] ? bdev_disk_changed+0x1d0/0x1d0 [ 459.370114] ? blkdev_get+0xce/0xbe0 [ 459.370656] blkdev_get+0xce/0xbe0 [ 459.371178] ? find_held_lock+0x2c/0x110 [ 459.371774] ? __blkdev_get+0x1280/0x1280 [ 459.372383] ? lock_downgrade+0x680/0x680 [ 459.373002] ? lock_acquire+0x111/0x320 [ 459.373587] ? bd_acquire+0x21/0x2c0 [ 459.374134] ? do_raw_spin_unlock+0x4f/0x250 [ 459.374780] blkdev_open+0x202/0x290 [ 459.375325] do_dentry_open+0x49e/0x1050 [ 459.375924] ? blkdev_get_by_dev+0x70/0x70 [ 459.376543] ? __x64_sys_fchdir+0x1f0/0x1f0 [ 459.377192] ? inode_permission+0xbe/0x3a0 [ 459.377818] path_openat+0x148c/0x3f50 [ 459.378392] ? kmem_cache_alloc+0xd5/0x280 [ 459.379016] ? entry_SYSCALL_64_after_hwframe+0x49/0xbe [ 459.379802] ? path_lookupat.isra.0+0x900/0x900 [ 459.380489] ? __lock_is_held+0xad/0x140 [ 459.381093] do_filp_open+0x1a1/0x280 [ 459.381654] ? may_open_dev+0xf0/0xf0 [ 459.382214] ? find_held_lock+0x2c/0x110 [ 459.382816] ? lock_downgrade+0x680/0x680 [ 459.383425] ? __lock_is_held+0xad/0x140 [ 459.384024] ? do_raw_spin_unlock+0x4f/0x250 [ 459.384668] ? _raw_spin_unlock+0x1f/0x30 [ 459.385280] ? __alloc_fd+0x448/0x560 [ 459.385841] do_sys_open+0x3c3/0x500 [ 459.386386] ? filp_open+0x70/0x70 [ 459.386911] ? trace_hardirqs_on_thunk+0x1a/0x1c [ 459.387610] ? trace_hardirqs_off_caller+0x55/0x1c0 [ 459.388342] ? do_syscall_64+0x1a/0x520 [ 459.388930] do_syscall_64+0xc3/0x520 [ 459.389490] entry_SYSCALL_64_after_hwframe+0x49/0xbe [ 459.390248] RIP: 0033:0x416211 [ 459.390720] Code: 75 14 b8 02 00 00 00 0f 05 48 3d 01 f0 ff ff 0f 83 04 19 00 00 c3 48 83 ec 08 e8 0a fa ff ff 48 89 04 24 b8 02 00 00 00 0f 05 <48> 8b 3c 24 48 89 c2 e8 53 fa ff ff 48 89 d0 48 83 c4 08 48 3d 01 [ 459.393483] RSP: 002b:00007fe45dfe9a60 EFLAGS: 00000293 ORIG_RAX: 0000000000000002 [ 459.394610] RAX: ffffffffffffffda RBX: 00007fe45dfea6d4 RCX: 0000000000416211 [ 459.395678] RDX: 00007fe45dfe9b0a RSI: 0000000000000002 RDI: 00007fe45dfe9b00 [ 459.396758] RBP: 000000000076bf20 R08: 0000000000000000 R09: 000000000000000a [ 459.397930] R10: 0000000000000075 R11: 0000000000000293 R12: 00000000ffffffff [ 459.399022] R13: 0000000000000bd9 R14: 00000000004cdb80 R15: 000000000076bf2c [ 459.400168] [ 459.400430] Allocated by task 20132: [ 459.401038] kasan_kmalloc+0xbf/0xe0 [ 459.401652] kmem_cache_alloc+0xd5/0x280 [ 459.402330] bdev_alloc_inode+0x18/0x40 [ 459.402970] alloc_inode+0x5f/0x180 [ 459.403510] iget5_locked+0x57/0xd0 [ 459.404095] bdget+0x94/0x4e0 [ 459.404607] bd_acquire+0xfa/0x2c0 [ 459.405113] blkdev_open+0x110/0x290 [ 459.405702] do_dentry_open+0x49e/0x1050 [ 459.406340] path_openat+0x148c/0x3f50 [ 459.406926] do_filp_open+0x1a1/0x280 [ 459.407471] do_sys_open+0x3c3/0x500 [ 459.408010] do_syscall_64+0xc3/0x520 [ 459.408572] entry_SYSCALL_64_after_hwframe+0x49/0xbe [ 459.409415] [ 459.409679] Freed by task 1262: [ 459.410212] __kasan_slab_free+0x129/0x170 [ 459.410919] kmem_cache_free+0xb2/0x2a0 [ 459.411564] rcu_process_callbacks+0xbb2/0x2320 [ 459.412318] __do_softirq+0x225/0x8ac Fix this by delaying bdput() to the end of blkdev_get() which means we have finished accessing bdev. Fixes: 77ea887e433a ("implement in-kernel gendisk events handling") Reported-by: Hulk Robot Signed-off-by: Jason Yan Tested-by: Sedat Dilek Reviewed-by: Jan Kara Reviewed-by: Christoph Hellwig Reviewed-by: Dan Carpenter Cc: Christoph Hellwig Cc: Jens Axboe Cc: Ming Lei Cc: Jan Kara Cc: Dan Carpenter Signed-off-by: Jens Axboe --- fs/block_dev.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/fs/block_dev.c b/fs/block_dev.c index 47860e589388..08c87db3a92b 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -1565,10 +1565,8 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) */ if (!for_part) { ret = devcgroup_inode_permission(bdev->bd_inode, perm); - if (ret != 0) { - bdput(bdev); + if (ret != 0) return ret; - } } restart: @@ -1637,8 +1635,10 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) goto out_clear; BUG_ON(for_part); ret = __blkdev_get(whole, mode, 1); - if (ret) + if (ret) { + bdput(whole); goto out_clear; + } bdev->bd_contains = whole; bdev->bd_part = disk_get_part(disk, partno); if (!(disk->flags & GENHD_FL_UP) || @@ -1688,7 +1688,6 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) disk_unblock_events(disk); put_disk_and_module(disk); out: - bdput(bdev); return ret; } @@ -1755,6 +1754,9 @@ int blkdev_get(struct block_device *bdev, fmode_t mode, void *holder) bdput(whole); } + if (res) + bdput(bdev); + return res; } EXPORT_SYMBOL(blkdev_get); From 3591e90fe1108909f822948bd7628412282f7e9d Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Wed, 3 Jun 2020 11:40:38 +0900 Subject: [PATCH 155/462] selftests/ftrace: Convert required interface checks into requires list Convert the required tracefs interface checking code with requires: list. Fixed merge conflicts in trigger-hist.tc and trigger-trace-marker-hist.tc Shuah Khan Signed-off-by: Masami Hiramatsu Reviewed-by: Tom Zanussi Signed-off-by: Shuah Khan --- .../ftrace/test.d/00basic/snapshot.tc | 3 +- .../ftrace/test.d/00basic/trace_pipe.tc | 3 +- .../ftrace/test.d/direct/kprobe-direct.tc | 6 +--- .../test.d/dynevent/add_remove_kprobe.tc | 3 +- .../test.d/dynevent/add_remove_synth.tc | 3 +- .../test.d/dynevent/clear_select_events.tc | 6 +--- .../test.d/dynevent/generic_clear_event.tc | 3 +- .../ftrace/test.d/event/event-enable.tc | 6 +--- .../ftrace/test.d/event/event-no-pid.tc | 11 +------ .../ftrace/test.d/event/event-pid.tc | 11 +------ .../ftrace/test.d/event/subsystem-enable.tc | 6 +--- .../ftrace/test.d/event/toplevel-enable.tc | 6 +--- .../test.d/ftrace/fgraph-filter-stack.tc | 7 +---- .../test.d/ftrace/func-filter-notrace-pid.tc | 6 +--- .../ftrace/test.d/ftrace/func-filter-pid.tc | 6 +--- .../ftrace/test.d/ftrace/func_profile_stat.tc | 3 +- .../ftrace/test.d/ftrace/func_profiler.tc | 6 +--- .../ftrace/test.d/ftrace/func_stack_tracer.tc | 6 +--- .../ftrace/test.d/ftrace/tracing-error-log.tc | 12 ++----- .../ftrace/test.d/instances/instance-event.tc | 6 +--- .../ftrace/test.d/instances/instance.tc | 6 +--- .../ftrace/test.d/kprobe/add_and_remove.tc | 3 +- .../ftrace/test.d/kprobe/busy_check.tc | 3 +- .../ftrace/test.d/kprobe/kprobe_args.tc | 3 +- .../ftrace/test.d/kprobe/kprobe_args_comm.tc | 3 +- .../test.d/kprobe/kprobe_args_string.tc | 3 +- .../test.d/kprobe/kprobe_args_symbol.tc | 3 +- .../test.d/kprobe/kprobe_args_syntax.tc | 3 +- .../ftrace/test.d/kprobe/kprobe_args_type.tc | 3 +- .../ftrace/test.d/kprobe/kprobe_args_user.tc | 3 +- .../ftrace/test.d/kprobe/kprobe_eventname.tc | 3 +- .../ftrace/test.d/kprobe/kprobe_ftrace.tc | 2 +- .../ftrace/test.d/kprobe/kprobe_module.tc | 3 +- .../ftrace/test.d/kprobe/kprobe_multiprobe.tc | 3 +- .../test.d/kprobe/kprobe_syntax_errors.tc | 5 +-- .../ftrace/test.d/kprobe/kretprobe_args.tc | 3 +- .../test.d/kprobe/kretprobe_maxactive.tc | 2 +- .../ftrace/test.d/kprobe/multiple_kprobes.tc | 3 +- .../ftrace/test.d/kprobe/probepoint.tc | 3 +- .../selftests/ftrace/test.d/kprobe/profile.tc | 3 +- .../test.d/kprobe/uprobe_syntax_errors.tc | 5 +-- .../inter-event/trigger-action-hist-xfail.tc | 11 +------ .../trigger-field-variable-support.tc | 16 +--------- .../trigger-inter-event-combined-hist.tc | 16 +--------- .../trigger-multi-actions-accept.tc | 16 +--------- .../trigger-onchange-action-hist.tc | 6 +--- .../trigger-onmatch-action-hist.tc | 16 +--------- .../trigger-onmatch-onmax-action-hist.tc | 16 +--------- .../inter-event/trigger-onmax-action-hist.tc | 16 +--------- .../trigger-snapshot-action-hist.tc | 16 +--------- .../trigger-synthetic-event-createremove.tc | 11 +------ .../trigger-synthetic-event-syntax.tc | 11 +------ .../inter-event/trigger-trace-action-hist.tc | 16 +--------- .../test.d/trigger/trigger-eventonoff.tc | 11 +------ .../ftrace/test.d/trigger/trigger-filter.tc | 11 +------ .../ftrace/test.d/trigger/trigger-hist-mod.tc | 16 +--------- .../trigger/trigger-hist-syntax-errors.tc | 18 +---------- .../ftrace/test.d/trigger/trigger-hist.tc | 16 +--------- .../test.d/trigger/trigger-multihist.tc | 16 +--------- .../ftrace/test.d/trigger/trigger-snapshot.tc | 16 +--------- .../test.d/trigger/trigger-stacktrace.tc | 11 +------ .../trigger/trigger-trace-marker-hist.tc | 21 +------------ .../trigger/trigger-trace-marker-snapshot.tc | 21 +------------ .../trigger-trace-marker-synthetic-kernel.tc | 31 +------------------ .../trigger/trigger-trace-marker-synthetic.tc | 26 +--------------- .../test.d/trigger/trigger-traceonoff.tc | 11 +------ 66 files changed, 68 insertions(+), 511 deletions(-) diff --git a/tools/testing/selftests/ftrace/test.d/00basic/snapshot.tc b/tools/testing/selftests/ftrace/test.d/00basic/snapshot.tc index 3b1f45e13a2e..13b4dabcf46e 100644 --- a/tools/testing/selftests/ftrace/test.d/00basic/snapshot.tc +++ b/tools/testing/selftests/ftrace/test.d/00basic/snapshot.tc @@ -1,9 +1,8 @@ #!/bin/sh # description: Snapshot and tracing setting +# requires: snapshot # flags: instance -[ ! -f snapshot ] && exit_unsupported - echo "Set tracing off" echo 0 > tracing_on diff --git a/tools/testing/selftests/ftrace/test.d/00basic/trace_pipe.tc b/tools/testing/selftests/ftrace/test.d/00basic/trace_pipe.tc index 5058fbcfd90f..435d07b13407 100644 --- a/tools/testing/selftests/ftrace/test.d/00basic/trace_pipe.tc +++ b/tools/testing/selftests/ftrace/test.d/00basic/trace_pipe.tc @@ -1,10 +1,9 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0 # description: trace_pipe and trace_marker +# requires: trace_marker # flags: instance -[ ! -f trace_marker ] && exit_unsupported - echo "test input 1" > trace_marker : "trace interface never consume the ring buffer" diff --git a/tools/testing/selftests/ftrace/test.d/direct/kprobe-direct.tc b/tools/testing/selftests/ftrace/test.d/direct/kprobe-direct.tc index e95b744b23e4..e52e470a1f8f 100644 --- a/tools/testing/selftests/ftrace/test.d/direct/kprobe-direct.tc +++ b/tools/testing/selftests/ftrace/test.d/direct/kprobe-direct.tc @@ -1,6 +1,7 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0 # description: Test ftrace direct functions against kprobes +# requires: kprobe_events rmmod ftrace-direct ||: if ! modprobe ftrace-direct ; then @@ -8,11 +9,6 @@ if ! modprobe ftrace-direct ; then exit_unresolved; fi -if [ ! -f kprobe_events ]; then - echo "No kprobe_events file -please build CONFIG_KPROBE_EVENTS" - exit_unsupported; -fi - echo "Let the module run a little" sleep 1 diff --git a/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_kprobe.tc b/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_kprobe.tc index c6d8387dbbb8..1a8c56b197a8 100644 --- a/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_kprobe.tc +++ b/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_kprobe.tc @@ -1,8 +1,7 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0 # description: Generic dynamic event - add/remove kprobe events - -[ -f dynamic_events ] || exit_unsupported +# requires: dynamic_events grep -q "place: \[:\]" README || exit_unsupported grep -q "place (kretprobe): \[:\]" README || exit_unsupported diff --git a/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_synth.tc b/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_synth.tc index 62b77b5941d0..f5018b3afb39 100644 --- a/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_synth.tc +++ b/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_synth.tc @@ -1,8 +1,7 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0 # description: Generic dynamic event - add/remove synthetic events - -[ -f dynamic_events ] || exit_unsupported +# requires: dynamic_events grep -q "s:\[synthetic/\]" README || exit_unsupported diff --git a/tools/testing/selftests/ftrace/test.d/dynevent/clear_select_events.tc b/tools/testing/selftests/ftrace/test.d/dynevent/clear_select_events.tc index e0842109cb57..e2484445ddec 100644 --- a/tools/testing/selftests/ftrace/test.d/dynevent/clear_select_events.tc +++ b/tools/testing/selftests/ftrace/test.d/dynevent/clear_select_events.tc @@ -1,17 +1,13 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0 # description: Generic dynamic event - selective clear (compatibility) - -[ -f dynamic_events ] || exit_unsupported +# requires: dynamic_events kprobe_events synthetic_events grep -q "place: \[:\]" README || exit_unsupported grep -q "place (kretprobe): \[:\]" README || exit_unsupported grep -q "s:\[synthetic/\]" README || exit_unsupported -[ -f synthetic_events ] || exit_unsupported -[ -f kprobe_events ] || exit_unsupported - echo 0 > events/enable echo > dynamic_events diff --git a/tools/testing/selftests/ftrace/test.d/dynevent/generic_clear_event.tc b/tools/testing/selftests/ftrace/test.d/dynevent/generic_clear_event.tc index 901922e97878..e56cb60b4e56 100644 --- a/tools/testing/selftests/ftrace/test.d/dynevent/generic_clear_event.tc +++ b/tools/testing/selftests/ftrace/test.d/dynevent/generic_clear_event.tc @@ -1,8 +1,7 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0 # description: Generic dynamic event - generic clear event - -[ -f dynamic_events ] || exit_unsupported +# requires: dynamic_events grep -q "place: \[:\]" README || exit_unsupported grep -q "place (kretprobe): \[:\]" README || exit_unsupported diff --git a/tools/testing/selftests/ftrace/test.d/event/event-enable.tc b/tools/testing/selftests/ftrace/test.d/event/event-enable.tc index dfb0d5122f7b..cfe5bd2d4267 100644 --- a/tools/testing/selftests/ftrace/test.d/event/event-enable.tc +++ b/tools/testing/selftests/ftrace/test.d/event/event-enable.tc @@ -1,6 +1,7 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0 # description: event tracing - enable/disable with event level files +# requires: set_event events/sched # flags: instance do_reset() { @@ -13,11 +14,6 @@ fail() { #msg exit_fail } -if [ ! -f set_event -o ! -d events/sched ]; then - echo "event tracing is not supported" - exit_unsupported -fi - echo 'sched:sched_switch' > set_event yield diff --git a/tools/testing/selftests/ftrace/test.d/event/event-no-pid.tc b/tools/testing/selftests/ftrace/test.d/event/event-no-pid.tc index f0f366f18d0c..e6eb78f0b954 100644 --- a/tools/testing/selftests/ftrace/test.d/event/event-no-pid.tc +++ b/tools/testing/selftests/ftrace/test.d/event/event-no-pid.tc @@ -1,6 +1,7 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0 # description: event tracing - restricts events based on pid notrace filtering +# requires: set_event events/sched set_event_pid set_event_notrace_pid # flags: instance do_reset() { @@ -56,16 +57,6 @@ enable_events() { echo 1 > tracing_on } -if [ ! -f set_event -o ! -d events/sched ]; then - echo "event tracing is not supported" - exit_unsupported -fi - -if [ ! -f set_event_pid -o ! -f set_event_notrace_pid ]; then - echo "event pid notrace filtering is not supported" - exit_unsupported -fi - echo 0 > options/event-fork do_reset diff --git a/tools/testing/selftests/ftrace/test.d/event/event-pid.tc b/tools/testing/selftests/ftrace/test.d/event/event-pid.tc index f9cb214220b1..7f5f97dffdc3 100644 --- a/tools/testing/selftests/ftrace/test.d/event/event-pid.tc +++ b/tools/testing/selftests/ftrace/test.d/event/event-pid.tc @@ -1,6 +1,7 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0 # description: event tracing - restricts events based on pid +# requires: set_event set_event_pid events/sched # flags: instance do_reset() { @@ -16,16 +17,6 @@ fail() { #msg exit_fail } -if [ ! -f set_event -o ! -d events/sched ]; then - echo "event tracing is not supported" - exit_unsupported -fi - -if [ ! -f set_event_pid ]; then - echo "event pid filtering is not supported" - exit_unsupported -fi - echo 0 > options/event-fork echo 1 > events/sched/sched_switch/enable diff --git a/tools/testing/selftests/ftrace/test.d/event/subsystem-enable.tc b/tools/testing/selftests/ftrace/test.d/event/subsystem-enable.tc index 83a8c571e93a..b1ede6249866 100644 --- a/tools/testing/selftests/ftrace/test.d/event/subsystem-enable.tc +++ b/tools/testing/selftests/ftrace/test.d/event/subsystem-enable.tc @@ -1,6 +1,7 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0 # description: event tracing - enable/disable with subsystem level files +# requires: set_event events/sched/enable # flags: instance do_reset() { @@ -13,11 +14,6 @@ fail() { #msg exit_fail } -if [ ! -f set_event -o ! -d events/sched ]; then - echo "event tracing is not supported" - exit_unsupported -fi - echo 'sched:*' > set_event yield diff --git a/tools/testing/selftests/ftrace/test.d/event/toplevel-enable.tc b/tools/testing/selftests/ftrace/test.d/event/toplevel-enable.tc index 84d7bda08d2a..93c10ea42a68 100644 --- a/tools/testing/selftests/ftrace/test.d/event/toplevel-enable.tc +++ b/tools/testing/selftests/ftrace/test.d/event/toplevel-enable.tc @@ -1,6 +1,7 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0 # description: event tracing - enable/disable with top level files +# requires: available_events set_event events/enable do_reset() { echo > set_event @@ -12,11 +13,6 @@ fail() { #msg exit_fail } -if [ ! -f available_events -o ! -f set_event -o ! -d events ]; then - echo "event tracing is not supported" - exit_unsupported -fi - echo '*:*' > set_event yield diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/fgraph-filter-stack.tc b/tools/testing/selftests/ftrace/test.d/ftrace/fgraph-filter-stack.tc index f59853857ad2..8dd22561d5b0 100644 --- a/tools/testing/selftests/ftrace/test.d/ftrace/fgraph-filter-stack.tc +++ b/tools/testing/selftests/ftrace/test.d/ftrace/fgraph-filter-stack.tc @@ -1,6 +1,7 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0 # description: ftrace - function graph filters with stack tracer +# requires: stack_trace # Make sure that function graph filtering works, and is not # affected by other tracers enabled (like stack tracer) @@ -37,12 +38,6 @@ fi echo function_graph > current_tracer -if [ ! -f stack_trace ]; then - echo "Stack tracer not configured" - do_reset - exit_unsupported; -fi - echo "Now testing with stack tracer" echo 1 > /proc/sys/kernel/stack_tracer_enabled diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-notrace-pid.tc b/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-notrace-pid.tc index 71db68a7975f..799cd1b9c42c 100644 --- a/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-notrace-pid.tc +++ b/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-notrace-pid.tc @@ -1,6 +1,7 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0 # description: ftrace - function pid notrace filters +# requires: set_ftrace_notrace_pid # flags: instance # Make sure that function pid matching filter with notrace works. @@ -10,11 +11,6 @@ if ! grep -q function available_tracers; then exit_unsupported fi -if [ ! -f set_ftrace_notrace_pid ]; then - echo "set_ftrace_notrace_pid not found? Is function tracer not set?" - exit_unsupported -fi - check_filter_file set_ftrace_filter do_function_fork=1 diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-pid.tc b/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-pid.tc index d58403c4b7cd..9497031913bb 100644 --- a/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-pid.tc +++ b/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-pid.tc @@ -1,6 +1,7 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0 # description: ftrace - function pid filters +# requires: set_ftrace_pid # flags: instance # Make sure that function pid matching filter works. @@ -11,11 +12,6 @@ if ! grep -q function available_tracers; then exit_unsupported fi -if [ ! -f set_ftrace_pid ]; then - echo "set_ftrace_pid not found? Is function tracer not set?" - exit_unsupported -fi - check_filter_file set_ftrace_filter do_function_fork=1 diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/func_profile_stat.tc b/tools/testing/selftests/ftrace/test.d/ftrace/func_profile_stat.tc index 0d501058aa75..4daeffb02fd8 100644 --- a/tools/testing/selftests/ftrace/test.d/ftrace/func_profile_stat.tc +++ b/tools/testing/selftests/ftrace/test.d/ftrace/func_profile_stat.tc @@ -1,8 +1,7 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0 # description: ftrace - function profiling - -[ ! -f function_profile_enabled ] && exit_unsupported +# requires: function_profile_enabled : "Enable function profile" echo 1 > function_profile_enabled diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/func_profiler.tc b/tools/testing/selftests/ftrace/test.d/ftrace/func_profiler.tc index a3dadb6b93b4..817d504f751c 100644 --- a/tools/testing/selftests/ftrace/test.d/ftrace/func_profiler.tc +++ b/tools/testing/selftests/ftrace/test.d/ftrace/func_profiler.tc @@ -1,6 +1,7 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0 # description: ftrace - function profiler with function tracing +# requires: function_profile_enabled # There was a bug after a rewrite of the ftrace infrastructure that # caused the function_profiler not to be able to run with the function @@ -20,11 +21,6 @@ fi check_filter_file set_ftrace_filter -if [ ! -f function_profile_enabled ]; then - echo "function_profile_enabled not found, function profiling enabled?" - exit_unsupported -fi - fail() { # mesg echo $1 exit_fail diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/func_stack_tracer.tc b/tools/testing/selftests/ftrace/test.d/ftrace/func_stack_tracer.tc index 51e9e80bc0e6..b3d78ef6c639 100644 --- a/tools/testing/selftests/ftrace/test.d/ftrace/func_stack_tracer.tc +++ b/tools/testing/selftests/ftrace/test.d/ftrace/func_stack_tracer.tc @@ -1,13 +1,9 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0 # description: ftrace - Max stack tracer +# requires: stack_trace # Test the basic function of max-stack usage tracing -if [ ! -f stack_trace ]; then - echo "Max stack tracer is not supported - please make CONFIG_STACK_TRACER=y" - exit_unsupported -fi - check_filter_file stack_trace_filter echo > stack_trace_filter diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/tracing-error-log.tc b/tools/testing/selftests/ftrace/test.d/ftrace/tracing-error-log.tc index 23465823532b..6c190620db47 100644 --- a/tools/testing/selftests/ftrace/test.d/ftrace/tracing-error-log.tc +++ b/tools/testing/selftests/ftrace/test.d/ftrace/tracing-error-log.tc @@ -1,21 +1,15 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0 # description: ftrace - test tracing error log support +# event tracing is currently the only ftrace tracer that uses the +# tracing error_log, hence this check +# requires: set_event error_log fail() { #msg echo $1 exit_fail } -# event tracing is currently the only ftrace tracer that uses the -# tracing error_log, hence this check -if [ ! -f set_event ]; then - echo "event tracing is not supported" - exit_unsupported -fi - -[ -f error_log ] || exit_unsupported - ftrace_errlog_check 'event filter parse error' '((sig >= 10 && sig < 15) || dsig ^== 17) && comm != bash' 'events/signal/signal_generate/filter' exit 0 diff --git a/tools/testing/selftests/ftrace/test.d/instances/instance-event.tc b/tools/testing/selftests/ftrace/test.d/instances/instance-event.tc index 4fa0f79144f4..0eb47fbb3f44 100644 --- a/tools/testing/selftests/ftrace/test.d/instances/instance-event.tc +++ b/tools/testing/selftests/ftrace/test.d/instances/instance-event.tc @@ -1,11 +1,7 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0 # description: Test creation and deletion of trace instances while setting an event - -if [ ! -d instances ] ; then - echo "no instance directory with this kernel" - exit_unsupported; -fi +# requires: instances fail() { # mesg rmdir foo 2>/dev/null diff --git a/tools/testing/selftests/ftrace/test.d/instances/instance.tc b/tools/testing/selftests/ftrace/test.d/instances/instance.tc index b84651283bf3..607521d2592b 100644 --- a/tools/testing/selftests/ftrace/test.d/instances/instance.tc +++ b/tools/testing/selftests/ftrace/test.d/instances/instance.tc @@ -1,11 +1,7 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0 # description: Test creation and deletion of trace instances - -if [ ! -d instances ] ; then - echo "no instance directory with this kernel" - exit_unsupported; -fi +# requires: instances fail() { # mesg rmdir x y z 2>/dev/null diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/add_and_remove.tc b/tools/testing/selftests/ftrace/test.d/kprobe/add_and_remove.tc index bb1eb5a7c64e..eba858c21815 100644 --- a/tools/testing/selftests/ftrace/test.d/kprobe/add_and_remove.tc +++ b/tools/testing/selftests/ftrace/test.d/kprobe/add_and_remove.tc @@ -1,8 +1,7 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0 # description: Kprobe dynamic event - adding and removing - -[ -f kprobe_events ] || exit_unsupported # this is configurable +# requires: kprobe_events echo p:myevent _do_fork > kprobe_events grep myevent kprobe_events diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/busy_check.tc b/tools/testing/selftests/ftrace/test.d/kprobe/busy_check.tc index 442c1a8c5edf..d10bf4f05bc8 100644 --- a/tools/testing/selftests/ftrace/test.d/kprobe/busy_check.tc +++ b/tools/testing/selftests/ftrace/test.d/kprobe/busy_check.tc @@ -1,8 +1,7 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0 # description: Kprobe dynamic event - busy event check - -[ -f kprobe_events ] || exit_unsupported +# requires: kprobe_events echo p:myevent _do_fork > kprobe_events test -d events/kprobes/myevent diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args.tc index bcdecf80a8f1..61f2ac441aec 100644 --- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args.tc +++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args.tc @@ -1,8 +1,7 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0 # description: Kprobe dynamic event with arguments - -[ -f kprobe_events ] || exit_unsupported # this is configurable +# requires: kprobe_events echo 'p:testprobe _do_fork $stack $stack0 +0($stack)' > kprobe_events grep testprobe kprobe_events | grep -q 'arg1=\$stack arg2=\$stack0 arg3=+0(\$stack)' diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_comm.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_comm.tc index 15c1f70fcaf9..05aaeed6987f 100644 --- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_comm.tc +++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_comm.tc @@ -1,8 +1,7 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0 # description: Kprobe event with comm arguments - -[ -f kprobe_events ] || exit_unsupported # this is configurable +# requires: kprobe_events grep -A1 "fetcharg:" README | grep -q "\$comm" || exit_unsupported # this is too old diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_string.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_string.tc index 46e7744f8358..b5fa05443b39 100644 --- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_string.tc +++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_string.tc @@ -1,8 +1,7 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0 # description: Kprobe event string type argument - -[ -f kprobe_events ] || exit_unsupported # this is configurable +# requires: kprobe_events case `uname -m` in x86_64) diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_symbol.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_symbol.tc index 2b6dd33f9076..b8c75a3d003c 100644 --- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_symbol.tc +++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_symbol.tc @@ -1,8 +1,7 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0 # description: Kprobe event symbol argument - -[ -f kprobe_events ] || exit_unsupported # this is configurable +# requires: kprobe_events SYMBOL="linux_proc_banner" diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_syntax.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_syntax.tc index 6f0f19953193..f06c1477f00f 100644 --- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_syntax.tc +++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_syntax.tc @@ -1,8 +1,7 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0 # description: Kprobe event argument syntax - -[ -f kprobe_events ] || exit_unsupported # this is configurable +# requires: kprobe_events grep "x8/16/32/64" README > /dev/null || exit_unsupported # version issue diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_type.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_type.tc index 81490ecaaa92..d599d6f544d4 100644 --- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_type.tc +++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_type.tc @@ -1,8 +1,7 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0 # description: Kprobes event arguments with types - -[ -f kprobe_events ] || exit_unsupported # this is configurable +# requires: kprobe_events grep "x8/16/32/64" README > /dev/null || exit_unsupported # version issue diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_user.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_user.tc index b41471f301ab..a9b3a788183f 100644 --- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_user.tc +++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_user.tc @@ -1,8 +1,7 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0 # description: Kprobe event user-memory access - -[ -f kprobe_events ] || exit_unsupported # this is configurable +# requires: kprobe_events grep -q '\$arg' README || exit_unsupported # depends on arch grep -A10 "fetcharg:" README | grep -q 'ustring' || exit_unsupported diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_eventname.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_eventname.tc index 3ff236719b6e..1f6981ef7afa 100644 --- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_eventname.tc +++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_eventname.tc @@ -1,8 +1,7 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0 # description: Kprobe event auto/manual naming - -[ -f kprobe_events ] || exit_unsupported # this is configurable +# requires: kprobe_events :;: "Add an event on function without name" ;: diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_ftrace.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_ftrace.tc index df5072815b87..b7f8536438bd 100644 --- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_ftrace.tc +++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_ftrace.tc @@ -1,8 +1,8 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0 # description: Kprobe dynamic event with function tracer +# requires: kprobe_events -[ -f kprobe_events ] || exit_unsupported # this is configurable grep "function" available_tracers || exit_unsupported # this is configurable check_filter_file set_ftrace_filter diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_module.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_module.tc index d861bd776c5e..7e74ee11edf9 100644 --- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_module.tc +++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_module.tc @@ -1,8 +1,7 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0 # description: Kprobe dynamic event - probing module - -[ -f kprobe_events ] || exit_unsupported # this is configurable +# requires: kprobe_events rmmod trace-printk ||: if ! modprobe trace-printk ; then diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_multiprobe.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_multiprobe.tc index 44494bac86d1..8205d588bed4 100644 --- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_multiprobe.tc +++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_multiprobe.tc @@ -1,8 +1,7 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0 # description: Create/delete multiprobe on kprobe event - -[ -f kprobe_events ] || exit_unsupported +# requires: kprobe_events grep -q "Create/append/" README || exit_unsupported diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc index eb0f4ab4e070..b4d834675e59 100644 --- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc +++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc @@ -1,10 +1,7 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0 # description: Kprobe event parser error log check - -[ -f kprobe_events ] || exit_unsupported # this is configurable - -[ -f error_log ] || exit_unsupported +# requires: kprobe_events error_log check_error() { # command-with-error-pos-by-^ ftrace_errlog_check 'trace_kprobe' "$1" 'kprobe_events' diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kretprobe_args.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kretprobe_args.tc index ac9ab4a12e53..523fde6d1aa5 100644 --- a/tools/testing/selftests/ftrace/test.d/kprobe/kretprobe_args.tc +++ b/tools/testing/selftests/ftrace/test.d/kprobe/kretprobe_args.tc @@ -1,8 +1,7 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0 # description: Kretprobe dynamic event with arguments - -[ -f kprobe_events ] || exit_unsupported # this is configurable +# requires: kprobe_events # Add new kretprobe event echo 'r:testprobe2 _do_fork $retval' > kprobe_events diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kretprobe_maxactive.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kretprobe_maxactive.tc index 8e05b178519a..0f96a45010d1 100644 --- a/tools/testing/selftests/ftrace/test.d/kprobe/kretprobe_maxactive.tc +++ b/tools/testing/selftests/ftrace/test.d/kprobe/kretprobe_maxactive.tc @@ -1,8 +1,8 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0 # description: Kretprobe dynamic event with maxactive +# requires: kprobe_events -[ -f kprobe_events ] || exit_unsupported # this is configurable grep -q 'r\[maxactive\]' README || exit_unsupported # this is older version # Test if we successfully reject unknown messages diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/multiple_kprobes.tc b/tools/testing/selftests/ftrace/test.d/kprobe/multiple_kprobes.tc index 6e3dbe5f96b7..312d23780096 100644 --- a/tools/testing/selftests/ftrace/test.d/kprobe/multiple_kprobes.tc +++ b/tools/testing/selftests/ftrace/test.d/kprobe/multiple_kprobes.tc @@ -1,8 +1,7 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0 # description: Register/unregister many kprobe events - -[ -f kprobe_events ] || exit_unsupported # this is configurable +# requires: kprobe_events # ftrace fentry skip size depends on the machine architecture. # Currently HAVE_KPROBES_ON_FTRACE defined on x86 and powerpc64le diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/probepoint.tc b/tools/testing/selftests/ftrace/test.d/kprobe/probepoint.tc index a902aa0aaabc..624269c8d534 100644 --- a/tools/testing/selftests/ftrace/test.d/kprobe/probepoint.tc +++ b/tools/testing/selftests/ftrace/test.d/kprobe/probepoint.tc @@ -1,8 +1,7 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0 # description: Kprobe events - probe points - -[ -f kprobe_events ] || exit_unsupported # this is configurable +# requires: kprobe_events TARGET_FUNC=tracefs_create_dir diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/profile.tc b/tools/testing/selftests/ftrace/test.d/kprobe/profile.tc index 0384b525cdee..ff6c44adc8a0 100644 --- a/tools/testing/selftests/ftrace/test.d/kprobe/profile.tc +++ b/tools/testing/selftests/ftrace/test.d/kprobe/profile.tc @@ -1,8 +1,7 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0 # description: Kprobe dynamic event - adding and removing - -[ -f kprobe_events ] || exit_unsupported # this is configurable +# requires: kprobe_events ! grep -q 'myevent' kprobe_profile echo p:myevent _do_fork > kprobe_events diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/uprobe_syntax_errors.tc b/tools/testing/selftests/ftrace/test.d/kprobe/uprobe_syntax_errors.tc index 14229d5778a0..7b5b60c3c5a2 100644 --- a/tools/testing/selftests/ftrace/test.d/kprobe/uprobe_syntax_errors.tc +++ b/tools/testing/selftests/ftrace/test.d/kprobe/uprobe_syntax_errors.tc @@ -1,10 +1,7 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0 # description: Uprobe event parser error log check - -[ -f uprobe_events ] || exit_unsupported # this is configurable - -[ -f error_log ] || exit_unsupported +# requires: uprobe_events error_log check_error() { # command-with-error-pos-by-^ ftrace_errlog_check 'trace_uprobe' "$1" 'uprobe_events' diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-action-hist-xfail.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-action-hist-xfail.tc index 3f2aee115f6e..07093bbd9816 100644 --- a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-action-hist-xfail.tc +++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-action-hist-xfail.tc @@ -1,22 +1,13 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0 # description: event trigger - test inter-event histogram trigger expected fail actions +# requires: set_event snapshot fail() { #msg echo $1 exit_fail } -if [ ! -f set_event ]; then - echo "event tracing is not supported" - exit_unsupported -fi - -if [ ! -f snapshot ]; then - echo "snapshot is not supported" - exit_unsupported -fi - grep -q "snapshot()" README || exit_unsupported # version issue echo "Test expected snapshot action failure" diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-field-variable-support.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-field-variable-support.tc index e232059a8ab2..41119e0440e9 100644 --- a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-field-variable-support.tc +++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-field-variable-support.tc @@ -1,27 +1,13 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0 # description: event trigger - test field variable support +# requires: set_event synthetic_events events/sched/sched_process_fork/hist fail() { #msg echo $1 exit_fail } -if [ ! -f set_event ]; then - echo "event tracing is not supported" - exit_unsupported -fi - -if [ ! -f synthetic_events ]; then - echo "synthetic event is not supported" - exit_unsupported -fi - -if [ ! -f events/sched/sched_process_fork/hist ]; then - echo "hist trigger is not supported" - exit_unsupported -fi - echo "Test field variable support" echo 'wakeup_latency u64 lat; pid_t pid; int prio; char comm[16]' > synthetic_events diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-inter-event-combined-hist.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-inter-event-combined-hist.tc index 07cfcb8157b6..7449a4b8f1f9 100644 --- a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-inter-event-combined-hist.tc +++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-inter-event-combined-hist.tc @@ -1,27 +1,13 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0 # description: event trigger - test inter-event combined histogram trigger +# requires: set_event synthetic_events events/sched/sched_process_fork/hist fail() { #msg echo $1 exit_fail } -if [ ! -f set_event ]; then - echo "event tracing is not supported" - exit_unsupported -fi - -if [ ! -f synthetic_events ]; then - echo "synthetic event is not supported" - exit_unsupported -fi - -if [ ! -f events/sched/sched_process_fork/hist ]; then - echo "hist trigger is not supported" - exit_unsupported -fi - echo "Test create synthetic event" echo 'waking_latency u64 lat pid_t pid' > synthetic_events diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-multi-actions-accept.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-multi-actions-accept.tc index 73e413c2ca26..3ad6e3fd8ac9 100644 --- a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-multi-actions-accept.tc +++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-multi-actions-accept.tc @@ -1,27 +1,13 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0 # description: event trigger - test multiple actions on hist trigger +# requires: set_event synthetic_events events/sched/sched_process_fork/hist fail() { #msg echo $1 exit_fail } -if [ ! -f set_event ]; then - echo "event tracing is not supported" - exit_unsupported -fi - -if [ ! -f synthetic_events ]; then - echo "synthetic event is not supported" - exit_unsupported -fi - -if [ ! -f events/sched/sched_process_fork/hist ]; then - echo "hist trigger is not supported" - exit_unsupported -fi - echo "Test multiple actions on hist trigger" echo 'wakeup_latency u64 lat; pid_t pid' >> synthetic_events TRIGGER1=events/sched/sched_wakeup/trigger diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onchange-action-hist.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onchange-action-hist.tc index c80007aa9f86..2366a8f8f8f2 100644 --- a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onchange-action-hist.tc +++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onchange-action-hist.tc @@ -1,17 +1,13 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0 # description: event trigger - test inter-event histogram trigger onchange action +# requires: set_event fail() { #msg echo $1 exit_fail } -if [ ! -f set_event ]; then - echo "event tracing is not supported" - exit_unsupported -fi - grep -q "onchange(var)" README || exit_unsupported # version issue echo "Test onchange action" diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmatch-action-hist.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmatch-action-hist.tc index ebe0ad827f9f..20e39471052e 100644 --- a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmatch-action-hist.tc +++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmatch-action-hist.tc @@ -1,27 +1,13 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0 # description: event trigger - test inter-event histogram trigger onmatch action +# requires: set_event synthetic_events events/sched/sched_process_fork/hist fail() { #msg echo $1 exit_fail } -if [ ! -f set_event ]; then - echo "event tracing is not supported" - exit_unsupported -fi - -if [ ! -f synthetic_events ]; then - echo "synthetic event is not supported" - exit_unsupported -fi - -if [ ! -f events/sched/sched_process_fork/hist ]; then - echo "hist trigger is not supported" - exit_unsupported -fi - echo "Test create synthetic event" echo 'wakeup_latency u64 lat pid_t pid char comm[16]' > synthetic_events diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmatch-onmax-action-hist.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmatch-onmax-action-hist.tc index 2a2ef767249e..f4b03ab7c287 100644 --- a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmatch-onmax-action-hist.tc +++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmatch-onmax-action-hist.tc @@ -1,27 +1,13 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0 # description: event trigger - test inter-event histogram trigger onmatch-onmax action +# requires: set_event synthetic_events events/sched/sched_process_fork/hist fail() { #msg echo $1 exit_fail } -if [ ! -f set_event ]; then - echo "event tracing is not supported" - exit_unsupported -fi - -if [ ! -f synthetic_events ]; then - echo "synthetic event is not supported" - exit_unsupported -fi - -if [ ! -f events/sched/sched_process_fork/hist ]; then - echo "hist trigger is not supported" - exit_unsupported -fi - echo "Test create synthetic event" echo 'wakeup_latency u64 lat pid_t pid char comm[16]' > synthetic_events diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmax-action-hist.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmax-action-hist.tc index 98d73bfb0296..71c9b5911c70 100644 --- a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmax-action-hist.tc +++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmax-action-hist.tc @@ -1,27 +1,13 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0 # description: event trigger - test inter-event histogram trigger onmax action +# requires: set_event synthetic_events events/sched/sched_process_fork/hist fail() { #msg echo $1 exit_fail } -if [ ! -f set_event ]; then - echo "event tracing is not supported" - exit_unsupported -fi - -if [ ! -f synthetic_events ]; then - echo "synthetic event is not supported" - exit_unsupported -fi - -if [ ! -f events/sched/sched_process_fork/hist ]; then - echo "hist trigger is not supported" - exit_unsupported -fi - echo "Test create synthetic event" echo 'wakeup_latency u64 lat pid_t pid char comm[16]' > synthetic_events diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-snapshot-action-hist.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-snapshot-action-hist.tc index 01b01b9c4e07..2902a897255f 100644 --- a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-snapshot-action-hist.tc +++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-snapshot-action-hist.tc @@ -1,27 +1,13 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0 # description: event trigger - test inter-event histogram trigger snapshot action +# requires: set_event snapshot events/sched/sched_process_fork/hist fail() { #msg echo $1 exit_fail } -if [ ! -f set_event ]; then - echo "event tracing is not supported" - exit_unsupported -fi - -if [ ! -f events/sched/sched_process_fork/hist ]; then - echo "hist trigger is not supported" - exit_unsupported -fi - -if [ ! -f snapshot ]; then - echo "snapshot is not supported" - exit_unsupported -fi - grep -q "onchange(var)" README || exit_unsupported # version issue grep -q "snapshot()" README || exit_unsupported # version issue diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-createremove.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-createremove.tc index df44b14724a4..a152b558b40a 100644 --- a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-createremove.tc +++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-createremove.tc @@ -1,22 +1,13 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0 # description: event trigger - test synthetic event create remove +# requires: set_event synthetic_events fail() { #msg echo $1 exit_fail } -if [ ! -f set_event ]; then - echo "event tracing is not supported" - exit_unsupported -fi - -if [ ! -f synthetic_events ]; then - echo "synthetic event is not supported" - exit_unsupported -fi - echo "Test create synthetic event" echo 'wakeup_latency u64 lat pid_t pid char comm[16]' > synthetic_events diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-syntax.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-syntax.tc index 88e6c3f43006..59216f3cfb12 100644 --- a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-syntax.tc +++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-syntax.tc @@ -1,6 +1,7 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0 # description: event trigger - test synthetic_events syntax parser +# requires: set_event synthetic_events do_reset() { reset_trigger @@ -14,16 +15,6 @@ fail() { #msg exit_fail } -if [ ! -f set_event ]; then - echo "event tracing is not supported" - exit_unsupported -fi - -if [ ! -f synthetic_events ]; then - echo "synthetic event is not supported" - exit_unsupported -fi - reset_tracer do_reset diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-trace-action-hist.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-trace-action-hist.tc index c3baa486aeb4..e2ef5268b7e9 100644 --- a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-trace-action-hist.tc +++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-trace-action-hist.tc @@ -1,27 +1,13 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0 # description: event trigger - test inter-event histogram trigger trace action +# requires: set_event synthetic_events events/sched/sched_process_fork/hist fail() { #msg echo $1 exit_fail } -if [ ! -f set_event ]; then - echo "event tracing is not supported" - exit_unsupported -fi - -if [ ! -f synthetic_events ]; then - echo "synthetic event is not supported" - exit_unsupported -fi - -if [ ! -f events/sched/sched_process_fork/hist ]; then - echo "hist trigger is not supported" - exit_unsupported -fi - grep -q "trace(" README || exit_unsupported # version issue echo "Test create synthetic event" diff --git a/tools/testing/selftests/ftrace/test.d/trigger/trigger-eventonoff.tc b/tools/testing/selftests/ftrace/test.d/trigger/trigger-eventonoff.tc index eddb51e1fbf7..c226acee74bf 100644 --- a/tools/testing/selftests/ftrace/test.d/trigger/trigger-eventonoff.tc +++ b/tools/testing/selftests/ftrace/test.d/trigger/trigger-eventonoff.tc @@ -1,6 +1,7 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0 # description: event trigger - test event enable/disable trigger +# requires: set_event events/sched/sched_process_fork/trigger # flags: instance fail() { #msg @@ -8,16 +9,6 @@ fail() { #msg exit_fail } -if [ ! -f set_event -o ! -d events/sched ]; then - echo "event tracing is not supported" - exit_unsupported -fi - -if [ ! -f events/sched/sched_process_fork/trigger ]; then - echo "event trigger is not supported" - exit_unsupported -fi - FEATURE=`grep enable_event events/sched/sched_process_fork/trigger` if [ -z "$FEATURE" ]; then echo "event enable/disable trigger is not supported" diff --git a/tools/testing/selftests/ftrace/test.d/trigger/trigger-filter.tc b/tools/testing/selftests/ftrace/test.d/trigger/trigger-filter.tc index 2dcc2296ebdd..d9a198cb0f81 100644 --- a/tools/testing/selftests/ftrace/test.d/trigger/trigger-filter.tc +++ b/tools/testing/selftests/ftrace/test.d/trigger/trigger-filter.tc @@ -1,6 +1,7 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0 # description: event trigger - test trigger filter +# requires: set_event events/sched/sched_process_fork/trigger # flags: instance fail() { #msg @@ -8,16 +9,6 @@ fail() { #msg exit_fail } -if [ ! -f set_event -o ! -d events/sched ]; then - echo "event tracing is not supported" - exit_unsupported -fi - -if [ ! -f events/sched/sched_process_fork/trigger ]; then - echo "event trigger is not supported" - exit_unsupported -fi - echo "Test trigger filter" echo 1 > tracing_on echo 'traceoff if child_pid == 0' > events/sched/sched_process_fork/trigger diff --git a/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist-mod.tc b/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist-mod.tc index fab4431639d3..4562e13cb26b 100644 --- a/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist-mod.tc +++ b/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist-mod.tc @@ -1,6 +1,7 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0 # description: event trigger - test histogram modifiers +# requires: set_event events/sched/sched_process_fork/trigger events/sched/sched_process_fork/hist # flags: instance fail() { #msg @@ -8,21 +9,6 @@ fail() { #msg exit_fail } -if [ ! -f set_event -o ! -d events/sched ]; then - echo "event tracing is not supported" - exit_unsupported -fi - -if [ ! -f events/sched/sched_process_fork/trigger ]; then - echo "event trigger is not supported" - exit_unsupported -fi - -if [ ! -f events/sched/sched_process_fork/hist ]; then - echo "hist trigger is not supported" - exit_unsupported -fi - echo "Test histogram with execname modifier" echo 'hist:keys=common_pid.execname' > events/sched/sched_process_fork/trigger diff --git a/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist-syntax-errors.tc b/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist-syntax-errors.tc index d44087a2f3d1..52cfe7828e8a 100644 --- a/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist-syntax-errors.tc +++ b/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist-syntax-errors.tc @@ -1,23 +1,7 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0 # description: event trigger - test histogram parser errors - -if [ ! -f set_event -o ! -d events/kmem ]; then - echo "event tracing is not supported" - exit_unsupported -fi - -if [ ! -f events/kmem/kmalloc/trigger ]; then - echo "event trigger is not supported" - exit_unsupported -fi - -if [ ! -f events/kmem/kmalloc/hist ]; then - echo "hist trigger is not supported" - exit_unsupported -fi - -[ -f error_log ] || exit_unsupported +# requires: set_event events/kmem/kmalloc/trigger events/kmem/kmalloc/hist error_log check_error() { # command-with-error-pos-by-^ ftrace_errlog_check 'hist:kmem:kmalloc' "$1" 'events/kmem/kmalloc/trigger' diff --git a/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist.tc b/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist.tc index bf6374e1f6ae..2950bfbc6fce 100644 --- a/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist.tc +++ b/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist.tc @@ -1,6 +1,7 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0 # description: event trigger - test histogram trigger +# requires: set_event events/sched/sched_process_fork/trigger events/sched/sched_process_fork/hist # flags: instance fail() { #msg @@ -8,21 +9,6 @@ fail() { #msg exit_fail } -if [ ! -f set_event -o ! -d events/sched ]; then - echo "event tracing is not supported" - exit_unsupported -fi - -if [ ! -f events/sched/sched_process_fork/trigger ]; then - echo "event trigger is not supported" - exit_unsupported -fi - -if [ ! -f events/sched/sched_process_fork/hist ]; then - echo "hist trigger is not supported" - exit_unsupported -fi - echo "Test histogram basic trigger" echo 'hist:keys=parent_pid:vals=child_pid' > events/sched/sched_process_fork/trigger diff --git a/tools/testing/selftests/ftrace/test.d/trigger/trigger-multihist.tc b/tools/testing/selftests/ftrace/test.d/trigger/trigger-multihist.tc index 68ff3f45c720..7129b52da947 100644 --- a/tools/testing/selftests/ftrace/test.d/trigger/trigger-multihist.tc +++ b/tools/testing/selftests/ftrace/test.d/trigger/trigger-multihist.tc @@ -1,6 +1,7 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0 # description: event trigger - test multiple histogram triggers +# requires: set_event events/sched/sched_process_fork/trigger events/sched/sched_process_fork/hist # flags: instance fail() { #msg @@ -8,21 +9,6 @@ fail() { #msg exit_fail } -if [ ! -f set_event -o ! -d events/sched ]; then - echo "event tracing is not supported" - exit_unsupported -fi - -if [ ! -f events/sched/sched_process_fork/trigger ]; then - echo "event trigger is not supported" - exit_unsupported -fi - -if [ ! -f events/sched/sched_process_fork/hist ]; then - echo "hist trigger is not supported" - exit_unsupported -fi - echo "Test histogram multiple triggers" echo 'hist:keys=parent_pid:vals=child_pid' > events/sched/sched_process_fork/trigger diff --git a/tools/testing/selftests/ftrace/test.d/trigger/trigger-snapshot.tc b/tools/testing/selftests/ftrace/test.d/trigger/trigger-snapshot.tc index ac738500d17f..33f5bdee387f 100644 --- a/tools/testing/selftests/ftrace/test.d/trigger/trigger-snapshot.tc +++ b/tools/testing/selftests/ftrace/test.d/trigger/trigger-snapshot.tc @@ -1,27 +1,13 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0 # description: event trigger - test snapshot-trigger +# requires: set_event events/sched/sched_process_fork/trigger snapshot fail() { #msg echo $1 exit_fail } -if [ ! -f set_event -o ! -d events/sched ]; then - echo "event tracing is not supported" - exit_unsupported -fi - -if [ ! -f events/sched/sched_process_fork/trigger ]; then - echo "event trigger is not supported" - exit_unsupported -fi - -if [ ! -f snapshot ]; then - echo "snapshot is not supported" - exit_unsupported -fi - FEATURE=`grep snapshot events/sched/sched_process_fork/trigger` if [ -z "$FEATURE" ]; then echo "snapshot trigger is not supported" diff --git a/tools/testing/selftests/ftrace/test.d/trigger/trigger-stacktrace.tc b/tools/testing/selftests/ftrace/test.d/trigger/trigger-stacktrace.tc index 6248e6b40704..320ea9b3c6cd 100644 --- a/tools/testing/selftests/ftrace/test.d/trigger/trigger-stacktrace.tc +++ b/tools/testing/selftests/ftrace/test.d/trigger/trigger-stacktrace.tc @@ -1,22 +1,13 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0 # description: event trigger - test stacktrace-trigger +# requires: set_event events/sched/sched_process_fork/trigger fail() { #msg echo $1 exit_fail } -if [ ! -f set_event -o ! -d events/sched ]; then - echo "event tracing is not supported" - exit_unsupported -fi - -if [ ! -f events/sched/sched_process_fork/trigger ]; then - echo "event trigger is not supported" - exit_unsupported -fi - FEATURE=`grep stacktrace events/sched/sched_process_fork/trigger` if [ -z "$FEATURE" ]; then echo "stacktrace trigger is not supported" diff --git a/tools/testing/selftests/ftrace/test.d/trigger/trigger-trace-marker-hist.tc b/tools/testing/selftests/ftrace/test.d/trigger/trigger-trace-marker-hist.tc index 01fdfd50b4be..68f3af9d9910 100644 --- a/tools/testing/selftests/ftrace/test.d/trigger/trigger-trace-marker-hist.tc +++ b/tools/testing/selftests/ftrace/test.d/trigger/trigger-trace-marker-hist.tc @@ -1,6 +1,7 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0 # description: trace_marker trigger - test histogram trigger +# requires: set_event events/ftrace/print/trigger events/ftrace/print/hist # flags: instance fail() { #msg @@ -8,26 +9,6 @@ fail() { #msg exit_fail } -if [ ! -f set_event ]; then - echo "event tracing is not supported" - exit_unsupported -fi - -if [ ! -d events/ftrace/print ]; then - echo "event trace_marker is not supported" - exit_unsupported -fi - -if [ ! -f events/ftrace/print/trigger ]; then - echo "event trigger is not supported" - exit_unsupported -fi - -if [ ! -f events/ftrace/print/hist ]; then - echo "hist trigger is not supported" - exit_unsupported -fi - echo "Test histogram trace_marker trigger" echo 'hist:keys=common_pid' > events/ftrace/print/trigger diff --git a/tools/testing/selftests/ftrace/test.d/trigger/trigger-trace-marker-snapshot.tc b/tools/testing/selftests/ftrace/test.d/trigger/trigger-trace-marker-snapshot.tc index a7fef298e476..27da2dba9b9e 100644 --- a/tools/testing/selftests/ftrace/test.d/trigger/trigger-trace-marker-snapshot.tc +++ b/tools/testing/selftests/ftrace/test.d/trigger/trigger-trace-marker-snapshot.tc @@ -1,6 +1,7 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0 # description: trace_marker trigger - test snapshot trigger +# requires: set_event snapshot events/ftrace/print/trigger # flags: instance fail() { #msg @@ -8,26 +9,6 @@ fail() { #msg exit_fail } -if [ ! -f set_event ]; then - echo "event tracing is not supported" - exit_unsupported -fi - -if [ ! -f snapshot ]; then - echo "snapshot is not supported" - exit_unsupported -fi - -if [ ! -d events/ftrace/print ]; then - echo "event trace_marker is not supported" - exit_unsupported -fi - -if [ ! -f events/ftrace/print/trigger ]; then - echo "event trigger is not supported" - exit_unsupported -fi - test_trace() { file=$1 x=$2 diff --git a/tools/testing/selftests/ftrace/test.d/trigger/trigger-trace-marker-synthetic-kernel.tc b/tools/testing/selftests/ftrace/test.d/trigger/trigger-trace-marker-synthetic-kernel.tc index 18b4d1c2807e..531139f41e94 100644 --- a/tools/testing/selftests/ftrace/test.d/trigger/trigger-trace-marker-synthetic-kernel.tc +++ b/tools/testing/selftests/ftrace/test.d/trigger/trigger-trace-marker-synthetic-kernel.tc @@ -1,6 +1,7 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0 # description: trace_marker trigger - test histogram with synthetic event against kernel event +# requires: set_event synthetic_events events/sched/sched_waking events/ftrace/print/trigger events/ftrace/print/hist # flags: fail() { #msg @@ -8,36 +9,6 @@ fail() { #msg exit_fail } -if [ ! -f set_event ]; then - echo "event tracing is not supported" - exit_unsupported -fi - -if [ ! -f synthetic_events ]; then - echo "synthetic events not supported" - exit_unsupported -fi - -if [ ! -d events/ftrace/print ]; then - echo "event trace_marker is not supported" - exit_unsupported -fi - -if [ ! -d events/sched/sched_waking ]; then - echo "event sched_waking is not supported" - exit_unsupported -fi - -if [ ! -f events/ftrace/print/trigger ]; then - echo "event trigger is not supported" - exit_unsupported -fi - -if [ ! -f events/ftrace/print/hist ]; then - echo "hist trigger is not supported" - exit_unsupported -fi - echo "Test histogram kernel event to trace_marker latency histogram trigger" echo 'latency u64 lat' > synthetic_events diff --git a/tools/testing/selftests/ftrace/test.d/trigger/trigger-trace-marker-synthetic.tc b/tools/testing/selftests/ftrace/test.d/trigger/trigger-trace-marker-synthetic.tc index dd262d6d0db6..cc99cbb06d5e 100644 --- a/tools/testing/selftests/ftrace/test.d/trigger/trigger-trace-marker-synthetic.tc +++ b/tools/testing/selftests/ftrace/test.d/trigger/trigger-trace-marker-synthetic.tc @@ -1,6 +1,7 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0 # description: trace_marker trigger - test histogram with synthetic event +# requires: set_event synthetic_events events/ftrace/print/trigger events/ftrace/print/hist # flags: fail() { #msg @@ -8,31 +9,6 @@ fail() { #msg exit_fail } -if [ ! -f set_event ]; then - echo "event tracing is not supported" - exit_unsupported -fi - -if [ ! -f synthetic_events ]; then - echo "synthetic events not supported" - exit_unsupported -fi - -if [ ! -d events/ftrace/print ]; then - echo "event trace_marker is not supported" - exit_unsupported -fi - -if [ ! -f events/ftrace/print/trigger ]; then - echo "event trigger is not supported" - exit_unsupported -fi - -if [ ! -f events/ftrace/print/hist ]; then - echo "hist trigger is not supported" - exit_unsupported -fi - echo "Test histogram trace_marker to trace_marker latency histogram trigger" echo 'latency u64 lat' > synthetic_events diff --git a/tools/testing/selftests/ftrace/test.d/trigger/trigger-traceonoff.tc b/tools/testing/selftests/ftrace/test.d/trigger/trigger-traceonoff.tc index d5d2dcbc9cab..9ca04678f4da 100644 --- a/tools/testing/selftests/ftrace/test.d/trigger/trigger-traceonoff.tc +++ b/tools/testing/selftests/ftrace/test.d/trigger/trigger-traceonoff.tc @@ -1,22 +1,13 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0 # description: event trigger - test traceon/off trigger +# requires: set_event events/sched/sched_process_fork/trigger fail() { #msg echo $1 exit_fail } -if [ ! -f set_event -o ! -d events/sched ]; then - echo "event tracing is not supported" - exit_unsupported -fi - -if [ ! -f events/sched/sched_process_fork/trigger ]; then - echo "event trigger is not supported" - exit_unsupported -fi - echo "Test traceoff trigger" echo 1 > tracing_on echo 'traceoff' > events/sched/sched_process_fork/trigger From 74e6072894d6ccd9c950d1c9bce3870596731810 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Wed, 3 Jun 2020 11:40:49 +0900 Subject: [PATCH 156/462] selftests/ftrace: Convert check_filter_file() with requires list Since check_filter_file() is basically checking the filter tracefs file, we can convert it into requires list. Signed-off-by: Masami Hiramatsu Reviewed-by: Tom Zanussi Signed-off-by: Shuah Khan --- .../selftests/ftrace/test.d/ftrace/fgraph-filter-stack.tc | 4 +--- .../selftests/ftrace/test.d/ftrace/fgraph-filter.tc | 3 +-- .../selftests/ftrace/test.d/ftrace/func-filter-glob.tc | 3 +-- .../ftrace/test.d/ftrace/func-filter-notrace-pid.tc | 4 +--- .../selftests/ftrace/test.d/ftrace/func-filter-pid.tc | 4 +--- .../ftrace/test.d/ftrace/func-filter-stacktrace.tc | 3 +-- .../selftests/ftrace/test.d/ftrace/func_event_triggers.tc | 7 +++---- .../selftests/ftrace/test.d/ftrace/func_mod_trace.tc | 3 +-- .../selftests/ftrace/test.d/ftrace/func_profiler.tc | 4 +--- .../selftests/ftrace/test.d/ftrace/func_set_ftrace_file.tc | 6 +++--- .../selftests/ftrace/test.d/ftrace/func_stack_tracer.tc | 4 +--- .../ftrace/test.d/ftrace/func_traceonoff_triggers.tc | 6 +++--- tools/testing/selftests/ftrace/test.d/functions | 7 ------- .../selftests/ftrace/test.d/kprobe/kprobe_ftrace.tc | 4 +--- 14 files changed, 19 insertions(+), 43 deletions(-) diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/fgraph-filter-stack.tc b/tools/testing/selftests/ftrace/test.d/ftrace/fgraph-filter-stack.tc index 8dd22561d5b0..62af263fa75e 100644 --- a/tools/testing/selftests/ftrace/test.d/ftrace/fgraph-filter-stack.tc +++ b/tools/testing/selftests/ftrace/test.d/ftrace/fgraph-filter-stack.tc @@ -1,7 +1,7 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0 # description: ftrace - function graph filters with stack tracer -# requires: stack_trace +# requires: stack_trace set_ftrace_filter # Make sure that function graph filtering works, and is not # affected by other tracers enabled (like stack tracer) @@ -11,8 +11,6 @@ if ! grep -q function_graph available_tracers; then exit_unsupported fi -check_filter_file set_ftrace_filter - do_reset() { if [ -e /proc/sys/kernel/stack_tracer_enabled ]; then echo 0 > /proc/sys/kernel/stack_tracer_enabled diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/fgraph-filter.tc b/tools/testing/selftests/ftrace/test.d/ftrace/fgraph-filter.tc index d610f47edd90..56a86a2fc8ba 100644 --- a/tools/testing/selftests/ftrace/test.d/ftrace/fgraph-filter.tc +++ b/tools/testing/selftests/ftrace/test.d/ftrace/fgraph-filter.tc @@ -1,6 +1,7 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0 # description: ftrace - function graph filters +# requires: set_ftrace_filter # Make sure that function graph filtering works @@ -9,8 +10,6 @@ if ! grep -q function_graph available_tracers; then exit_unsupported fi -check_filter_file set_ftrace_filter - fail() { # msg echo $1 exit_fail diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-glob.tc b/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-glob.tc index 28936f434ee5..ac3e4d35f181 100644 --- a/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-glob.tc +++ b/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-glob.tc @@ -1,6 +1,7 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0 # description: ftrace - function glob filters +# requires: set_ftrace_filter # Make sure that function glob matching filter works. @@ -9,8 +10,6 @@ if ! grep -q function available_tracers; then exit_unsupported fi -check_filter_file set_ftrace_filter - disable_tracing clear_trace diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-notrace-pid.tc b/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-notrace-pid.tc index 799cd1b9c42c..b7e18d30104a 100644 --- a/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-notrace-pid.tc +++ b/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-notrace-pid.tc @@ -1,7 +1,7 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0 # description: ftrace - function pid notrace filters -# requires: set_ftrace_notrace_pid +# requires: set_ftrace_notrace_pid set_ftrace_filter # flags: instance # Make sure that function pid matching filter with notrace works. @@ -11,8 +11,6 @@ if ! grep -q function available_tracers; then exit_unsupported fi -check_filter_file set_ftrace_filter - do_function_fork=1 if [ ! -f options/function-fork ]; then diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-pid.tc b/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-pid.tc index 9497031913bb..53ec48a92f39 100644 --- a/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-pid.tc +++ b/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-pid.tc @@ -1,7 +1,7 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0 # description: ftrace - function pid filters -# requires: set_ftrace_pid +# requires: set_ftrace_pid set_ftrace_filter # flags: instance # Make sure that function pid matching filter works. @@ -12,8 +12,6 @@ if ! grep -q function available_tracers; then exit_unsupported fi -check_filter_file set_ftrace_filter - do_function_fork=1 if [ ! -f options/function-fork ]; then diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-stacktrace.tc b/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-stacktrace.tc index b2aff786c1a2..0f41e441c203 100644 --- a/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-stacktrace.tc +++ b/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-stacktrace.tc @@ -1,10 +1,9 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0 # description: ftrace - stacktrace filter command +# requires: set_ftrace_filter # flags: instance -check_filter_file set_ftrace_filter - echo _do_fork:stacktrace >> set_ftrace_filter grep -q "_do_fork:stacktrace:unlimited" set_ftrace_filter diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/func_event_triggers.tc b/tools/testing/selftests/ftrace/test.d/ftrace/func_event_triggers.tc index e9b1fd534e96..3145b0f1835c 100644 --- a/tools/testing/selftests/ftrace/test.d/ftrace/func_event_triggers.tc +++ b/tools/testing/selftests/ftrace/test.d/ftrace/func_event_triggers.tc @@ -3,15 +3,14 @@ # description: ftrace - test for function event triggers # flags: instance # +# The triggers are set within the set_ftrace_filter file +# requires: set_ftrace_filter +# # Ftrace allows to add triggers to functions, such as enabling or disabling # tracing, enabling or disabling trace events, or recording a stack trace # within the ring buffer. # # This test is designed to test event triggers -# - -# The triggers are set within the set_ftrace_filter file -check_filter_file set_ftrace_filter do_reset() { reset_ftrace_filter diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/func_mod_trace.tc b/tools/testing/selftests/ftrace/test.d/ftrace/func_mod_trace.tc index 1a4b4a442d33..37c8feb9078b 100644 --- a/tools/testing/selftests/ftrace/test.d/ftrace/func_mod_trace.tc +++ b/tools/testing/selftests/ftrace/test.d/ftrace/func_mod_trace.tc @@ -1,8 +1,7 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0 # description: ftrace - function trace on module - -check_filter_file set_ftrace_filter +# requires: set_ftrace_filter : "mod: allows to filter a non exist function" echo 'non_exist_func:mod:non_exist_module' > set_ftrace_filter diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/func_profiler.tc b/tools/testing/selftests/ftrace/test.d/ftrace/func_profiler.tc index 817d504f751c..8b487daaaf68 100644 --- a/tools/testing/selftests/ftrace/test.d/ftrace/func_profiler.tc +++ b/tools/testing/selftests/ftrace/test.d/ftrace/func_profiler.tc @@ -1,7 +1,7 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0 # description: ftrace - function profiler with function tracing -# requires: function_profile_enabled +# requires: function_profile_enabled set_ftrace_filter # There was a bug after a rewrite of the ftrace infrastructure that # caused the function_profiler not to be able to run with the function @@ -19,8 +19,6 @@ if ! grep -q function_graph available_tracers; then exit_unsupported; fi -check_filter_file set_ftrace_filter - fail() { # mesg echo $1 exit_fail diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/func_set_ftrace_file.tc b/tools/testing/selftests/ftrace/test.d/ftrace/func_set_ftrace_file.tc index 70bad441fa7d..e96e279e0533 100644 --- a/tools/testing/selftests/ftrace/test.d/ftrace/func_set_ftrace_file.tc +++ b/tools/testing/selftests/ftrace/test.d/ftrace/func_set_ftrace_file.tc @@ -2,6 +2,9 @@ # SPDX-License-Identifier: GPL-2.0 # description: ftrace - test reading of set_ftrace_filter # +# The triggers are set within the set_ftrace_filter file +# requires: set_ftrace_filter +# # The set_ftrace_filter file of ftrace is used to list functions as well as # triggers (probes) attached to functions. The code to read this file is not # straight forward and has had various bugs in the past. This test is designed @@ -9,9 +12,6 @@ # file in various ways (cat vs dd). # -# The triggers are set within the set_ftrace_filter file -check_filter_file set_ftrace_filter - fail() { # mesg echo $1 exit_fail diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/func_stack_tracer.tc b/tools/testing/selftests/ftrace/test.d/ftrace/func_stack_tracer.tc index b3d78ef6c639..61264e422699 100644 --- a/tools/testing/selftests/ftrace/test.d/ftrace/func_stack_tracer.tc +++ b/tools/testing/selftests/ftrace/test.d/ftrace/func_stack_tracer.tc @@ -1,11 +1,9 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0 # description: ftrace - Max stack tracer -# requires: stack_trace +# requires: stack_trace stack_trace_filter # Test the basic function of max-stack usage tracing -check_filter_file stack_trace_filter - echo > stack_trace_filter echo 0 > stack_max_size echo 1 > /proc/sys/kernel/stack_tracer_enabled diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/func_traceonoff_triggers.tc b/tools/testing/selftests/ftrace/test.d/ftrace/func_traceonoff_triggers.tc index 3ed173f2944a..aee22289536b 100644 --- a/tools/testing/selftests/ftrace/test.d/ftrace/func_traceonoff_triggers.tc +++ b/tools/testing/selftests/ftrace/test.d/ftrace/func_traceonoff_triggers.tc @@ -3,6 +3,9 @@ # description: ftrace - test for function traceon/off triggers # flags: instance # +# The triggers are set within the set_ftrace_filter file +# requires: set_ftrace_filter +# # Ftrace allows to add triggers to functions, such as enabling or disabling # tracing, enabling or disabling trace events, or recording a stack trace # within the ring buffer. @@ -10,9 +13,6 @@ # This test is designed to test enabling and disabling tracing triggers # -# The triggers are set within the set_ftrace_filter file -check_filter_file set_ftrace_filter - fail() { # mesg echo $1 exit_fail diff --git a/tools/testing/selftests/ftrace/test.d/functions b/tools/testing/selftests/ftrace/test.d/functions index 5100eb1ada0f..cd2756c77ed9 100644 --- a/tools/testing/selftests/ftrace/test.d/functions +++ b/tools/testing/selftests/ftrace/test.d/functions @@ -1,10 +1,3 @@ -check_filter_file() { # check filter file introduced by dynamic ftrace - if [ ! -f "$1" ]; then - echo "$1 not found? Is dynamic ftrace not set?" - exit_unsupported - fi -} - clear_trace() { # reset trace output echo > trace } diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_ftrace.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_ftrace.tc index b7f8536438bd..9a983df5ae17 100644 --- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_ftrace.tc +++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_ftrace.tc @@ -1,12 +1,10 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0 # description: Kprobe dynamic event with function tracer -# requires: kprobe_events +# requires: kprobe_events stack_trace_filter grep "function" available_tracers || exit_unsupported # this is configurable -check_filter_file set_ftrace_filter - # prepare echo nop > current_tracer echo _do_fork > set_ftrace_filter From 305c8388fd0c92f37ab766dbf864b7bea4b66f5f Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Wed, 3 Jun 2020 11:40:59 +0900 Subject: [PATCH 157/462] selftests/ftrace: Support ":tracer" suffix for requires Add ":tracer" suffix support for the requires list, so that the testcase can list up the required tracer (e.g. function) to the requires list. For example, if the testcase requires function_graph tracer, it can write requires list as below instead of checking available_tracers. # requires: function_graph:tracer Signed-off-by: Masami Hiramatsu Reviewed-by: Tom Zanussi Signed-off-by: Shuah Khan --- .../ftrace/test.d/ftrace/fgraph-filter-stack.tc | 7 +------ .../selftests/ftrace/test.d/ftrace/fgraph-filter.tc | 7 +------ .../selftests/ftrace/test.d/ftrace/func-filter-glob.tc | 7 +------ .../ftrace/test.d/ftrace/func-filter-notrace-pid.tc | 7 +------ .../selftests/ftrace/test.d/ftrace/func-filter-pid.tc | 7 +------ .../selftests/ftrace/test.d/ftrace/func_cpumask.tc | 6 +----- .../selftests/ftrace/test.d/ftrace/func_profiler.tc | 6 +----- tools/testing/selftests/ftrace/test.d/functions | 10 ++++++++-- .../selftests/ftrace/test.d/kprobe/kprobe_ftrace.tc | 4 +--- .../ftrace/test.d/preemptirq/irqsoff_tracer.tc | 4 +--- tools/testing/selftests/ftrace/test.d/template | 3 ++- tools/testing/selftests/ftrace/test.d/tracer/wakeup.tc | 6 +----- .../selftests/ftrace/test.d/tracer/wakeup_rt.tc | 6 +----- 13 files changed, 21 insertions(+), 59 deletions(-) diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/fgraph-filter-stack.tc b/tools/testing/selftests/ftrace/test.d/ftrace/fgraph-filter-stack.tc index 62af263fa75e..cf3ea42b12b0 100644 --- a/tools/testing/selftests/ftrace/test.d/ftrace/fgraph-filter-stack.tc +++ b/tools/testing/selftests/ftrace/test.d/ftrace/fgraph-filter-stack.tc @@ -1,16 +1,11 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0 # description: ftrace - function graph filters with stack tracer -# requires: stack_trace set_ftrace_filter +# requires: stack_trace set_ftrace_filter function_graph:tracer # Make sure that function graph filtering works, and is not # affected by other tracers enabled (like stack tracer) -if ! grep -q function_graph available_tracers; then - echo "no function graph tracer configured" - exit_unsupported -fi - do_reset() { if [ -e /proc/sys/kernel/stack_tracer_enabled ]; then echo 0 > /proc/sys/kernel/stack_tracer_enabled diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/fgraph-filter.tc b/tools/testing/selftests/ftrace/test.d/ftrace/fgraph-filter.tc index 56a86a2fc8ba..b3ccdaec2a61 100644 --- a/tools/testing/selftests/ftrace/test.d/ftrace/fgraph-filter.tc +++ b/tools/testing/selftests/ftrace/test.d/ftrace/fgraph-filter.tc @@ -1,15 +1,10 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0 # description: ftrace - function graph filters -# requires: set_ftrace_filter +# requires: set_ftrace_filter function_graph:tracer # Make sure that function graph filtering works -if ! grep -q function_graph available_tracers; then - echo "no function graph tracer configured" - exit_unsupported -fi - fail() { # msg echo $1 exit_fail diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-glob.tc b/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-glob.tc index ac3e4d35f181..4b994b6df5ac 100644 --- a/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-glob.tc +++ b/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-glob.tc @@ -1,15 +1,10 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0 # description: ftrace - function glob filters -# requires: set_ftrace_filter +# requires: set_ftrace_filter function:tracer # Make sure that function glob matching filter works. -if ! grep -q function available_tracers; then - echo "no function tracer configured" - exit_unsupported -fi - disable_tracing clear_trace diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-notrace-pid.tc b/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-notrace-pid.tc index b7e18d30104a..acb17ce543d2 100644 --- a/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-notrace-pid.tc +++ b/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-notrace-pid.tc @@ -1,16 +1,11 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0 # description: ftrace - function pid notrace filters -# requires: set_ftrace_notrace_pid set_ftrace_filter +# requires: set_ftrace_notrace_pid set_ftrace_filter function:tracer # flags: instance # Make sure that function pid matching filter with notrace works. -if ! grep -q function available_tracers; then - echo "no function tracer configured" - exit_unsupported -fi - do_function_fork=1 if [ ! -f options/function-fork ]; then diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-pid.tc b/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-pid.tc index 53ec48a92f39..9f0a9687c773 100644 --- a/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-pid.tc +++ b/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-pid.tc @@ -1,17 +1,12 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0 # description: ftrace - function pid filters -# requires: set_ftrace_pid set_ftrace_filter +# requires: set_ftrace_pid set_ftrace_filter function:tracer # flags: instance # Make sure that function pid matching filter works. # Also test it on an instance directory -if ! grep -q function available_tracers; then - echo "no function tracer configured" - exit_unsupported -fi - do_function_fork=1 if [ ! -f options/function-fork ]; then diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/func_cpumask.tc b/tools/testing/selftests/ftrace/test.d/ftrace/func_cpumask.tc index 71fa3f49e35e..0c6cf7725110 100644 --- a/tools/testing/selftests/ftrace/test.d/ftrace/func_cpumask.tc +++ b/tools/testing/selftests/ftrace/test.d/ftrace/func_cpumask.tc @@ -1,6 +1,7 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0 # description: ftrace - function trace with cpumask +# requires: function:tracer if ! which nproc ; then nproc() { @@ -15,11 +16,6 @@ if [ $NP -eq 1 ] ;then exit_unresolved fi -if ! grep -q "function" available_tracers ; then - echo "Function trace is not enabled" - exit_unsupported -fi - ORIG_CPUMASK=`cat tracing_cpumask` do_reset() { diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/func_profiler.tc b/tools/testing/selftests/ftrace/test.d/ftrace/func_profiler.tc index 8b487daaaf68..1dbd766c0cd2 100644 --- a/tools/testing/selftests/ftrace/test.d/ftrace/func_profiler.tc +++ b/tools/testing/selftests/ftrace/test.d/ftrace/func_profiler.tc @@ -1,7 +1,7 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0 # description: ftrace - function profiler with function tracing -# requires: function_profile_enabled set_ftrace_filter +# requires: function_profile_enabled set_ftrace_filter function_graph:tracer # There was a bug after a rewrite of the ftrace infrastructure that # caused the function_profiler not to be able to run with the function @@ -14,10 +14,6 @@ # This test triggers those bugs on those kernels. # # We need function_graph and profiling to to run this test -if ! grep -q function_graph available_tracers; then - echo "no function graph tracer configured" - exit_unsupported; -fi fail() { # mesg echo $1 diff --git a/tools/testing/selftests/ftrace/test.d/functions b/tools/testing/selftests/ftrace/test.d/functions index cd2756c77ed9..35de6bc9613b 100644 --- a/tools/testing/selftests/ftrace/test.d/functions +++ b/tools/testing/selftests/ftrace/test.d/functions @@ -106,9 +106,15 @@ initialize_ftrace() { # Reset ftrace to initial-state enable_tracing } -check_requires() { # Check required files +check_requires() { # Check required files and tracers for i in $* ; do - if [ ! -e $i ]; then + t=${i%:tracer} + if [ $t != $i ]; then + if ! grep -wq $t available_tracers ; then + echo "Required tracer $t is not configured." + exit_unsupported + fi + elif [ ! -e $i ]; then echo "Required feature interface $i doesn't exist." exit_unsupported fi diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_ftrace.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_ftrace.tc index 9a983df5ae17..81d8b58c03bc 100644 --- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_ftrace.tc +++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_ftrace.tc @@ -1,9 +1,7 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0 # description: Kprobe dynamic event with function tracer -# requires: kprobe_events stack_trace_filter - -grep "function" available_tracers || exit_unsupported # this is configurable +# requires: kprobe_events stack_trace_filter function:tracer # prepare echo nop > current_tracer diff --git a/tools/testing/selftests/ftrace/test.d/preemptirq/irqsoff_tracer.tc b/tools/testing/selftests/ftrace/test.d/preemptirq/irqsoff_tracer.tc index 2b82c80edf69..22bff122b933 100644 --- a/tools/testing/selftests/ftrace/test.d/preemptirq/irqsoff_tracer.tc +++ b/tools/testing/selftests/ftrace/test.d/preemptirq/irqsoff_tracer.tc @@ -1,6 +1,7 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0 # description: test for the preemptirqsoff tracer +# requires: preemptoff:tracer irqsoff:tracer MOD=preemptirq_delay_test @@ -27,9 +28,6 @@ unres() { #msg modprobe $MOD || unres "$MOD module not available" rmmod $MOD -grep -q "preemptoff" available_tracers || unsup "preemptoff tracer not enabled" -grep -q "irqsoff" available_tracers || unsup "irqsoff tracer not enabled" - reset_tracer # Simulate preemptoff section for half a second couple of times diff --git a/tools/testing/selftests/ftrace/test.d/template b/tools/testing/selftests/ftrace/test.d/template index 611423c4e75f..b1c2f1920897 100644 --- a/tools/testing/selftests/ftrace/test.d/template +++ b/tools/testing/selftests/ftrace/test.d/template @@ -1,7 +1,8 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0 # description: %HERE DESCRIBE WHAT THIS DOES% -# requires: %HERE LIST THE REQUIRED FILES% +# requires: %HERE LIST THE REQUIRED FILES OR TRACERS% +# The required tracer needs :tracer suffix, e.g. function:tracer # you have to add ".tc" extention for your testcase file # Note that all tests are run with "errexit" option. diff --git a/tools/testing/selftests/ftrace/test.d/tracer/wakeup.tc b/tools/testing/selftests/ftrace/test.d/tracer/wakeup.tc index b0893d7edda3..11be10e1bf96 100644 --- a/tools/testing/selftests/ftrace/test.d/tracer/wakeup.tc +++ b/tools/testing/selftests/ftrace/test.d/tracer/wakeup.tc @@ -1,17 +1,13 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0 # description: Test wakeup tracer +# requires: wakeup:tracer if ! which chrt ; then echo "chrt is not found. This test requires nice command." exit_unresolved fi -if ! grep -wq "wakeup" available_tracers ; then - echo "wakeup tracer is not supported" - exit_unsupported -fi - echo wakeup > current_tracer echo 1 > tracing_on echo 0 > tracing_max_latency diff --git a/tools/testing/selftests/ftrace/test.d/tracer/wakeup_rt.tc b/tools/testing/selftests/ftrace/test.d/tracer/wakeup_rt.tc index b9b6669a623b..3a77198b3c69 100644 --- a/tools/testing/selftests/ftrace/test.d/tracer/wakeup_rt.tc +++ b/tools/testing/selftests/ftrace/test.d/tracer/wakeup_rt.tc @@ -1,17 +1,13 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0 # description: Test wakeup RT tracer +# requires: wakeup_rt:tracer if ! which chrt ; then echo "chrt is not found. This test requires chrt command." exit_unresolved fi -if ! grep -wq "wakeup_rt" available_tracers ; then - echo "wakeup_rt tracer is not supported" - exit_unsupported -fi - echo wakeup_rt > current_tracer echo 1 > tracing_on echo 0 > tracing_max_latency From 1b8eec510ba641418573eacc98a7e9c07726af30 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Wed, 3 Jun 2020 11:41:09 +0900 Subject: [PATCH 158/462] selftests/ftrace: Support ":README" suffix for requires Add ":README" suffix support for the requires list, so that the testcase can list up the required string for README file to the requires list. Note that the required string is treated as a fixed string, instead of regular expression. Also, the testcase can specify a string containing spaces with quotes. E.g. # requires: "place: [:]":README Signed-off-by: Masami Hiramatsu Reviewed-by: Tom Zanussi Signed-off-by: Shuah Khan --- tools/testing/selftests/ftrace/ftracetest | 3 ++- .../selftests/ftrace/test.d/dynevent/add_remove_kprobe.tc | 5 +---- .../selftests/ftrace/test.d/dynevent/add_remove_synth.tc | 4 +--- .../ftrace/test.d/dynevent/clear_select_events.tc | 7 +------ .../ftrace/test.d/dynevent/generic_clear_event.tc | 7 +------ tools/testing/selftests/ftrace/test.d/functions | 8 +++++++- .../selftests/ftrace/test.d/kprobe/kprobe_args_syntax.tc | 4 +--- .../selftests/ftrace/test.d/kprobe/kprobe_args_type.tc | 4 +--- .../selftests/ftrace/test.d/kprobe/kprobe_args_user.tc | 3 +-- .../selftests/ftrace/test.d/kprobe/kprobe_multiprobe.tc | 4 +--- .../selftests/ftrace/test.d/kprobe/kretprobe_maxactive.tc | 4 +--- tools/testing/selftests/ftrace/test.d/template | 4 +++- .../trigger/inter-event/trigger-action-hist-xfail.tc | 4 +--- .../trigger/inter-event/trigger-onchange-action-hist.tc | 4 +--- .../trigger/inter-event/trigger-snapshot-action-hist.tc | 6 +----- .../trigger/inter-event/trigger-trace-action-hist.tc | 4 +--- 16 files changed, 25 insertions(+), 50 deletions(-) diff --git a/tools/testing/selftests/ftrace/ftracetest b/tools/testing/selftests/ftrace/ftracetest index cdf7940b6610..8ec1922e974e 100755 --- a/tools/testing/selftests/ftrace/ftracetest +++ b/tools/testing/selftests/ftrace/ftracetest @@ -269,7 +269,8 @@ testcase() { # testfile checkreq() { # testfile requires=`grep "^#[ \t]*requires:" $1 | cut -f2- -d:` - check_requires $requires + # Use eval to pass quoted-patterns correctly. + eval check_requires "$requires" } test_on_instance() { # testfile diff --git a/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_kprobe.tc b/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_kprobe.tc index 1a8c56b197a8..68550f97d3c3 100644 --- a/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_kprobe.tc +++ b/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_kprobe.tc @@ -1,10 +1,7 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0 # description: Generic dynamic event - add/remove kprobe events -# requires: dynamic_events - -grep -q "place: \[:\]" README || exit_unsupported -grep -q "place (kretprobe): \[:\]" README || exit_unsupported +# requires: dynamic_events "place: [:]":README "place (kretprobe): [:]":README echo 0 > events/enable echo > dynamic_events diff --git a/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_synth.tc b/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_synth.tc index f5018b3afb39..2b94611e1a28 100644 --- a/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_synth.tc +++ b/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_synth.tc @@ -1,9 +1,7 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0 # description: Generic dynamic event - add/remove synthetic events -# requires: dynamic_events - -grep -q "s:\[synthetic/\]" README || exit_unsupported +# requires: dynamic_events "s:[synthetic/]":README echo 0 > events/enable echo > dynamic_events diff --git a/tools/testing/selftests/ftrace/test.d/dynevent/clear_select_events.tc b/tools/testing/selftests/ftrace/test.d/dynevent/clear_select_events.tc index e2484445ddec..c969be9eb7de 100644 --- a/tools/testing/selftests/ftrace/test.d/dynevent/clear_select_events.tc +++ b/tools/testing/selftests/ftrace/test.d/dynevent/clear_select_events.tc @@ -1,12 +1,7 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0 # description: Generic dynamic event - selective clear (compatibility) -# requires: dynamic_events kprobe_events synthetic_events - -grep -q "place: \[:\]" README || exit_unsupported -grep -q "place (kretprobe): \[:\]" README || exit_unsupported - -grep -q "s:\[synthetic/\]" README || exit_unsupported +# requires: dynamic_events kprobe_events synthetic_events "place: [:]":README "place (kretprobe): [:]":README "s:[synthetic/]":README echo 0 > events/enable echo > dynamic_events diff --git a/tools/testing/selftests/ftrace/test.d/dynevent/generic_clear_event.tc b/tools/testing/selftests/ftrace/test.d/dynevent/generic_clear_event.tc index e56cb60b4e56..16d543eaac88 100644 --- a/tools/testing/selftests/ftrace/test.d/dynevent/generic_clear_event.tc +++ b/tools/testing/selftests/ftrace/test.d/dynevent/generic_clear_event.tc @@ -1,12 +1,7 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0 # description: Generic dynamic event - generic clear event -# requires: dynamic_events - -grep -q "place: \[:\]" README || exit_unsupported -grep -q "place (kretprobe): \[:\]" README || exit_unsupported - -grep -q "s:\[synthetic/\]" README || exit_unsupported +# requires: dynamic_events "place: [:]":README "place (kretprobe): [:]":README "s:[synthetic/]":README echo 0 > events/enable echo > dynamic_events diff --git a/tools/testing/selftests/ftrace/test.d/functions b/tools/testing/selftests/ftrace/test.d/functions index 35de6bc9613b..c5dec55b7d95 100644 --- a/tools/testing/selftests/ftrace/test.d/functions +++ b/tools/testing/selftests/ftrace/test.d/functions @@ -107,13 +107,19 @@ initialize_ftrace() { # Reset ftrace to initial-state } check_requires() { # Check required files and tracers - for i in $* ; do + for i in "$@" ; do + r=${i%:README} t=${i%:tracer} if [ $t != $i ]; then if ! grep -wq $t available_tracers ; then echo "Required tracer $t is not configured." exit_unsupported fi + elif [ $r != $i ]; then + if ! grep -Fq "$r" README ; then + echo "Required feature pattern \"$r\" is not in README." + exit_unsupported + fi elif [ ! -e $i ]; then echo "Required feature interface $i doesn't exist." exit_unsupported diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_syntax.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_syntax.tc index f06c1477f00f..474ca1a9a088 100644 --- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_syntax.tc +++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_syntax.tc @@ -1,9 +1,7 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0 # description: Kprobe event argument syntax -# requires: kprobe_events - -grep "x8/16/32/64" README > /dev/null || exit_unsupported # version issue +# requires: kprobe_events "x8/16/32/64":README PROBEFUNC="vfs_read" GOODREG= diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_type.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_type.tc index d599d6f544d4..0610e0b5587c 100644 --- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_type.tc +++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_type.tc @@ -1,9 +1,7 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0 # description: Kprobes event arguments with types -# requires: kprobe_events - -grep "x8/16/32/64" README > /dev/null || exit_unsupported # version issue +# requires: kprobe_events "x8/16/32/64":README gen_event() { # Bitsize echo "p:testprobe _do_fork \$stack0:s$1 \$stack0:u$1 \$stack0:x$1 \$stack0:b4@4/$1" diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_user.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_user.tc index a9b3a788183f..a30a9c07290d 100644 --- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_user.tc +++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_user.tc @@ -1,9 +1,8 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0 # description: Kprobe event user-memory access -# requires: kprobe_events +# requires: kprobe_events '$arg':README -grep -q '\$arg' README || exit_unsupported # depends on arch grep -A10 "fetcharg:" README | grep -q 'ustring' || exit_unsupported grep -A10 "fetcharg:" README | grep -q '\[u\]' || exit_unsupported diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_multiprobe.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_multiprobe.tc index 8205d588bed4..366b7e1b6718 100644 --- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_multiprobe.tc +++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_multiprobe.tc @@ -1,9 +1,7 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0 # description: Create/delete multiprobe on kprobe event -# requires: kprobe_events - -grep -q "Create/append/" README || exit_unsupported +# requires: kprobe_events "Create/append/":README # Choose 2 symbols for target SYM1=_do_fork diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kretprobe_maxactive.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kretprobe_maxactive.tc index 0f96a45010d1..4f0b268c1233 100644 --- a/tools/testing/selftests/ftrace/test.d/kprobe/kretprobe_maxactive.tc +++ b/tools/testing/selftests/ftrace/test.d/kprobe/kretprobe_maxactive.tc @@ -1,9 +1,7 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0 # description: Kretprobe dynamic event with maxactive -# requires: kprobe_events - -grep -q 'r\[maxactive\]' README || exit_unsupported # this is older version +# requires: kprobe_events 'r[maxactive]':README # Test if we successfully reject unknown messages if echo 'a:myprobeaccept inet_csk_accept' > kprobe_events; then false; else true; fi diff --git a/tools/testing/selftests/ftrace/test.d/template b/tools/testing/selftests/ftrace/test.d/template index b1c2f1920897..2cd8947edf72 100644 --- a/tools/testing/selftests/ftrace/test.d/template +++ b/tools/testing/selftests/ftrace/test.d/template @@ -1,8 +1,10 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0 # description: %HERE DESCRIBE WHAT THIS DOES% -# requires: %HERE LIST THE REQUIRED FILES OR TRACERS% +# requires: %HERE LIST THE REQUIRED FILES, TRACERS OR README-STRINGS% # The required tracer needs :tracer suffix, e.g. function:tracer +# The required README string needs :README suffix, e.g. "x8/16/32/64":README +# and the README string is treated as a fixed-string instead of regexp pattern. # you have to add ".tc" extention for your testcase file # Note that all tests are run with "errexit" option. diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-action-hist-xfail.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-action-hist-xfail.tc index 07093bbd9816..1590d6bfb857 100644 --- a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-action-hist-xfail.tc +++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-action-hist-xfail.tc @@ -1,15 +1,13 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0 # description: event trigger - test inter-event histogram trigger expected fail actions -# requires: set_event snapshot +# requires: set_event snapshot "snapshot()":README fail() { #msg echo $1 exit_fail } -grep -q "snapshot()" README || exit_unsupported # version issue - echo "Test expected snapshot action failure" echo 'hist:keys=comm:onmatch(sched.sched_wakeup).snapshot()' >> events/sched/sched_waking/trigger && exit_fail diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onchange-action-hist.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onchange-action-hist.tc index 2366a8f8f8f2..adaabb873ed4 100644 --- a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onchange-action-hist.tc +++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onchange-action-hist.tc @@ -1,15 +1,13 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0 # description: event trigger - test inter-event histogram trigger onchange action -# requires: set_event +# requires: set_event "onchange(var)":README fail() { #msg echo $1 exit_fail } -grep -q "onchange(var)" README || exit_unsupported # version issue - echo "Test onchange action" echo 'hist:keys=comm:newprio=prio:onchange($newprio).save(comm,prio) if comm=="ping"' >> events/sched/sched_waking/trigger diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-snapshot-action-hist.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-snapshot-action-hist.tc index 2902a897255f..67fa328b830f 100644 --- a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-snapshot-action-hist.tc +++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-snapshot-action-hist.tc @@ -1,17 +1,13 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0 # description: event trigger - test inter-event histogram trigger snapshot action -# requires: set_event snapshot events/sched/sched_process_fork/hist +# requires: set_event snapshot events/sched/sched_process_fork/hist "onchange(var)":README "snapshot()":README fail() { #msg echo $1 exit_fail } -grep -q "onchange(var)" README || exit_unsupported # version issue - -grep -q "snapshot()" README || exit_unsupported # version issue - echo "Test snapshot action" echo 1 > events/sched/enable diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-trace-action-hist.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-trace-action-hist.tc index e2ef5268b7e9..c126d2350a6d 100644 --- a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-trace-action-hist.tc +++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-trace-action-hist.tc @@ -1,15 +1,13 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0 # description: event trigger - test inter-event histogram trigger trace action -# requires: set_event synthetic_events events/sched/sched_process_fork/hist +# requires: set_event synthetic_events events/sched/sched_process_fork/hist "trace(":README fail() { #msg echo $1 exit_fail } -grep -q "trace(" README || exit_unsupported # version issue - echo "Test create synthetic event" echo 'wakeup_latency u64 lat pid_t pid char comm[16]' > synthetic_events From 9fecd13202f520f3f25d5b1c313adb740fe19773 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Mon, 1 Jun 2020 19:12:06 +0100 Subject: [PATCH 159/462] btrfs: fix a block group ref counter leak after failure to remove block group When removing a block group, if we fail to delete the block group's item from the extent tree, we jump to the 'out' label and end up decrementing the block group's reference count once only (by 1), resulting in a counter leak because the block group at that point was already removed from the block group cache rbtree - so we have to decrement the reference count twice, once for the rbtree and once for our lookup at the start of the function. There is a second bug where if removing the free space tree entries (the call to remove_block_group_free_space()) fails we end up jumping to the 'out_put_group' label but end up decrementing the reference count only once, when we should have done it twice, since we have already removed the block group from the block group cache rbtree. This happens because the reference count decrement for the rbtree reference happens after attempting to remove the free space tree entries, which is far away from the place where we remove the block group from the rbtree. To make things less error prone, decrement the reference count for the rbtree immediately after removing the block group from it. This also eleminates the need for two different exit labels on error, renaming 'out_put_label' to just 'out' and removing the old 'out'. Fixes: f6033c5e333238 ("btrfs: fix block group leak when removing fails") CC: stable@vger.kernel.org # 4.4+ Reviewed-by: Nikolay Borisov Reviewed-by: Anand Jain Signed-off-by: Filipe Manana Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/block-group.c | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c index 176e8a292fd1..6462dd0b155c 100644 --- a/fs/btrfs/block-group.c +++ b/fs/btrfs/block-group.c @@ -940,7 +940,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, path = btrfs_alloc_path(); if (!path) { ret = -ENOMEM; - goto out_put_group; + goto out; } /* @@ -978,7 +978,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, ret = btrfs_orphan_add(trans, BTRFS_I(inode)); if (ret) { btrfs_add_delayed_iput(inode); - goto out_put_group; + goto out; } clear_nlink(inode); /* One for the block groups ref */ @@ -1001,13 +1001,13 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, ret = btrfs_search_slot(trans, tree_root, &key, path, -1, 1); if (ret < 0) - goto out_put_group; + goto out; if (ret > 0) btrfs_release_path(path); if (ret == 0) { ret = btrfs_del_item(trans, tree_root, path); if (ret) - goto out_put_group; + goto out; btrfs_release_path(path); } @@ -1016,6 +1016,9 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, &fs_info->block_group_cache_tree); RB_CLEAR_NODE(&block_group->cache_node); + /* Once for the block groups rbtree */ + btrfs_put_block_group(block_group); + if (fs_info->first_logical_byte == block_group->start) fs_info->first_logical_byte = (u64)-1; spin_unlock(&fs_info->block_group_cache_lock); @@ -1125,10 +1128,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, ret = remove_block_group_free_space(trans, block_group); if (ret) - goto out_put_group; - - /* Once for the block groups rbtree */ - btrfs_put_block_group(block_group); + goto out; ret = remove_block_group_item(trans, path, block_group); if (ret < 0) @@ -1145,10 +1145,9 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, free_extent_map(em); } -out_put_group: +out: /* Once for the lookup reference */ btrfs_put_block_group(block_group); -out: if (remove_rsv) btrfs_delayed_refs_rsv_release(fs_info, 1); btrfs_free_path(path); From ffcb9d44572afbaf8fa6dbf5115bff6dab7b299e Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Mon, 1 Jun 2020 19:12:19 +0100 Subject: [PATCH 160/462] btrfs: fix race between block group removal and block group creation There is a race between block group removal and block group creation when the removal is completed by a task running fitrim or scrub. When this happens we end up failing the block group creation with an error -EEXIST since we attempt to insert a duplicate block group item key in the extent tree. That results in a transaction abort. The race happens like this: 1) Task A is doing a fitrim, and at btrfs_trim_block_group() it freezes block group X with btrfs_freeze_block_group() (until very recently that was named btrfs_get_block_group_trimming()); 2) Task B starts removing block group X, either because it's now unused or due to relocation for example. So at btrfs_remove_block_group(), while holding the chunk mutex and the block group's lock, it sets the 'removed' flag of the block group and it sets the local variable 'remove_em' to false, because the block group is currently frozen (its 'frozen' counter is > 0, until very recently this counter was named 'trimming'); 3) Task B unlocks the block group and the chunk mutex; 4) Task A is done trimming the block group and unfreezes the block group by calling btrfs_unfreeze_block_group() (until very recently this was named btrfs_put_block_group_trimming()). In this function we lock the block group and set the local variable 'cleanup' to true because we were able to decrement the block group's 'frozen' counter down to 0 and the flag 'removed' is set in the block group. Since 'cleanup' is set to true, it locks the chunk mutex and removes the extent mapping representing the block group from the mapping tree; 5) Task C allocates a new block group Y and it picks up the logical address that block group X had as the logical address for Y, because X was the block group with the highest logical address and now the second block group with the highest logical address, the last in the fs mapping tree, ends at an offset corresponding to block group X's logical address (this logical address selection is done at volumes.c:find_next_chunk()). At this point the new block group Y does not have yet its item added to the extent tree (nor the corresponding device extent items and chunk item in the device and chunk trees). The new group Y is added to the list of pending block groups in the transaction handle; 6) Before task B proceeds to removing the block group item for block group X from the extent tree, which has a key matching: (X logical offset, BTRFS_BLOCK_GROUP_ITEM_KEY, length) task C while ending its transaction handle calls btrfs_create_pending_block_groups(), which finds block group Y and tries to insert the block group item for Y into the exten tree, which fails with -EEXIST since logical offset is the same that X had and task B hasn't yet deleted the key from the extent tree. This failure results in a transaction abort, producing a stack like the following: ------------[ cut here ]------------ BTRFS: Transaction aborted (error -17) WARNING: CPU: 2 PID: 19736 at fs/btrfs/block-group.c:2074 btrfs_create_pending_block_groups+0x1eb/0x260 [btrfs] Modules linked in: btrfs blake2b_generic xor raid6_pq (...) CPU: 2 PID: 19736 Comm: fsstress Tainted: G W 5.6.0-rc7-btrfs-next-58 #5 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.12.0-59-gc9ba5276e321-prebuilt.qemu.org 04/01/2014 RIP: 0010:btrfs_create_pending_block_groups+0x1eb/0x260 [btrfs] Code: ff ff ff 48 8b 55 50 f0 48 (...) RSP: 0018:ffffa4160a1c7d58 EFLAGS: 00010286 RAX: 0000000000000000 RBX: ffff961581909d98 RCX: 0000000000000000 RDX: 0000000000000001 RSI: ffffffffb3d63990 RDI: 0000000000000001 RBP: ffff9614f3356a58 R08: 0000000000000000 R09: 0000000000000001 R10: ffff9615b65b0040 R11: 0000000000000000 R12: ffff961581909c10 R13: ffff9615b0c32000 R14: ffff9614f3356ab0 R15: ffff9614be779000 FS: 00007f2ce2841e80(0000) GS:ffff9615bae00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000555f18780000 CR3: 0000000131d34005 CR4: 00000000003606e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: btrfs_start_dirty_block_groups+0x398/0x4e0 [btrfs] btrfs_commit_transaction+0xd0/0xc50 [btrfs] ? btrfs_attach_transaction_barrier+0x1e/0x50 [btrfs] ? __ia32_sys_fdatasync+0x20/0x20 iterate_supers+0xdb/0x180 ksys_sync+0x60/0xb0 __ia32_sys_sync+0xa/0x10 do_syscall_64+0x5c/0x280 entry_SYSCALL_64_after_hwframe+0x49/0xbe RIP: 0033:0x7f2ce1d4d5b7 Code: 83 c4 08 48 3d 01 (...) RSP: 002b:00007ffd8b558c58 EFLAGS: 00000202 ORIG_RAX: 00000000000000a2 RAX: ffffffffffffffda RBX: 000000000000002c RCX: 00007f2ce1d4d5b7 RDX: 00000000ffffffff RSI: 00000000186ba07b RDI: 000000000000002c RBP: 0000555f17b9e520 R08: 0000000000000012 R09: 000000000000ce00 R10: 0000000000000078 R11: 0000000000000202 R12: 0000000000000032 R13: 0000000051eb851f R14: 00007ffd8b558cd0 R15: 0000555f1798ec20 irq event stamp: 0 hardirqs last enabled at (0): [<0000000000000000>] 0x0 hardirqs last disabled at (0): [] copy_process+0x74f/0x2020 softirqs last enabled at (0): [] copy_process+0x74f/0x2020 softirqs last disabled at (0): [<0000000000000000>] 0x0 ---[ end trace bd7c03622e0b0a9c ]--- Fix this simply by making btrfs_remove_block_group() remove the block group's item from the extent tree before it flags the block group as removed. Also make the free space deletion from the free space tree before flagging the block group as removed, to avoid a similar race with adding and removing free space entries for the free space tree. Fixes: 04216820fe83d5 ("Btrfs: fix race between fs trimming and block group remove/allocation") CC: stable@vger.kernel.org # 4.4+ Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/block-group.c | 27 +++++++++++++++++++-------- 1 file changed, 19 insertions(+), 8 deletions(-) diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c index 6462dd0b155c..c037ef514b64 100644 --- a/fs/btrfs/block-group.c +++ b/fs/btrfs/block-group.c @@ -1092,6 +1092,25 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, spin_unlock(&block_group->space_info->lock); + /* + * Remove the free space for the block group from the free space tree + * and the block group's item from the extent tree before marking the + * block group as removed. This is to prevent races with tasks that + * freeze and unfreeze a block group, this task and another task + * allocating a new block group - the unfreeze task ends up removing + * the block group's extent map before the task calling this function + * deletes the block group item from the extent tree, allowing for + * another task to attempt to create another block group with the same + * item key (and failing with -EEXIST and a transaction abort). + */ + ret = remove_block_group_free_space(trans, block_group); + if (ret) + goto out; + + ret = remove_block_group_item(trans, path, block_group); + if (ret < 0) + goto out; + mutex_lock(&fs_info->chunk_mutex); spin_lock(&block_group->lock); block_group->removed = 1; @@ -1126,14 +1145,6 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, mutex_unlock(&fs_info->chunk_mutex); - ret = remove_block_group_free_space(trans, block_group); - if (ret) - goto out; - - ret = remove_block_group_item(trans, path, block_group); - if (ret < 0) - goto out; - if (remove_em) { struct extent_map_tree *em_tree; From 432cd2a10f1c10cead91fe706ff5dc52f06d642a Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Mon, 8 Jun 2020 13:32:55 +0100 Subject: [PATCH 161/462] btrfs: fix data block group relocation failure due to concurrent scrub When running relocation of a data block group while scrub is running in parallel, it is possible that the relocation will fail and abort the current transaction with an -EINVAL error: [134243.988595] BTRFS info (device sdc): found 14 extents, stage: move data extents [134243.999871] ------------[ cut here ]------------ [134244.000741] BTRFS: Transaction aborted (error -22) [134244.001692] WARNING: CPU: 0 PID: 26954 at fs/btrfs/ctree.c:1071 __btrfs_cow_block+0x6a7/0x790 [btrfs] [134244.003380] Modules linked in: btrfs blake2b_generic xor raid6_pq (...) [134244.012577] CPU: 0 PID: 26954 Comm: btrfs Tainted: G W 5.6.0-rc7-btrfs-next-58 #5 [134244.014162] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.12.0-59-gc9ba5276e321-prebuilt.qemu.org 04/01/2014 [134244.016184] RIP: 0010:__btrfs_cow_block+0x6a7/0x790 [btrfs] [134244.017151] Code: 48 c7 c7 (...) [134244.020549] RSP: 0018:ffffa41607863888 EFLAGS: 00010286 [134244.021515] RAX: 0000000000000000 RBX: ffff9614bdfe09c8 RCX: 0000000000000000 [134244.022822] RDX: 0000000000000001 RSI: ffffffffb3d63980 RDI: 0000000000000001 [134244.024124] RBP: ffff961589e8c000 R08: 0000000000000000 R09: 0000000000000001 [134244.025424] R10: ffffffffc0ae5955 R11: 0000000000000000 R12: ffff9614bd530d08 [134244.026725] R13: ffff9614ced41b88 R14: ffff9614bdfe2a48 R15: 0000000000000000 [134244.028024] FS: 00007f29b63c08c0(0000) GS:ffff9615ba600000(0000) knlGS:0000000000000000 [134244.029491] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [134244.030560] CR2: 00007f4eb339b000 CR3: 0000000130d6e006 CR4: 00000000003606f0 [134244.031997] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [134244.033153] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [134244.034484] Call Trace: [134244.034984] btrfs_cow_block+0x12b/0x2b0 [btrfs] [134244.035859] do_relocation+0x30b/0x790 [btrfs] [134244.036681] ? do_raw_spin_unlock+0x49/0xc0 [134244.037460] ? _raw_spin_unlock+0x29/0x40 [134244.038235] relocate_tree_blocks+0x37b/0x730 [btrfs] [134244.039245] relocate_block_group+0x388/0x770 [btrfs] [134244.040228] btrfs_relocate_block_group+0x161/0x2e0 [btrfs] [134244.041323] btrfs_relocate_chunk+0x36/0x110 [btrfs] [134244.041345] btrfs_balance+0xc06/0x1860 [btrfs] [134244.043382] ? btrfs_ioctl_balance+0x27c/0x310 [btrfs] [134244.045586] btrfs_ioctl_balance+0x1ed/0x310 [btrfs] [134244.045611] btrfs_ioctl+0x1880/0x3760 [btrfs] [134244.049043] ? do_raw_spin_unlock+0x49/0xc0 [134244.049838] ? _raw_spin_unlock+0x29/0x40 [134244.050587] ? __handle_mm_fault+0x11b3/0x14b0 [134244.051417] ? ksys_ioctl+0x92/0xb0 [134244.052070] ksys_ioctl+0x92/0xb0 [134244.052701] ? trace_hardirqs_off_thunk+0x1a/0x1c [134244.053511] __x64_sys_ioctl+0x16/0x20 [134244.054206] do_syscall_64+0x5c/0x280 [134244.054891] entry_SYSCALL_64_after_hwframe+0x49/0xbe [134244.055819] RIP: 0033:0x7f29b51c9dd7 [134244.056491] Code: 00 00 00 (...) [134244.059767] RSP: 002b:00007ffcccc1dd08 EFLAGS: 00000202 ORIG_RAX: 0000000000000010 [134244.061168] RAX: ffffffffffffffda RBX: 0000000000000001 RCX: 00007f29b51c9dd7 [134244.062474] RDX: 00007ffcccc1dda0 RSI: 00000000c4009420 RDI: 0000000000000003 [134244.063771] RBP: 0000000000000003 R08: 00005565cea4b000 R09: 0000000000000000 [134244.065032] R10: 0000000000000541 R11: 0000000000000202 R12: 00007ffcccc2060a [134244.066327] R13: 00007ffcccc1dda0 R14: 0000000000000002 R15: 00007ffcccc1dec0 [134244.067626] irq event stamp: 0 [134244.068202] hardirqs last enabled at (0): [<0000000000000000>] 0x0 [134244.069351] hardirqs last disabled at (0): [] copy_process+0x74f/0x2020 [134244.070909] softirqs last enabled at (0): [] copy_process+0x74f/0x2020 [134244.072392] softirqs last disabled at (0): [<0000000000000000>] 0x0 [134244.073432] ---[ end trace bd7c03622e0b0a99 ]--- The -EINVAL error comes from the following chain of function calls: __btrfs_cow_block() <-- aborts the transaction btrfs_reloc_cow_block() replace_file_extents() get_new_location() <-- returns -EINVAL When relocating a data block group, for each allocated extent of the block group, we preallocate another extent (at prealloc_file_extent_cluster()), associated with the data relocation inode, and then dirty all its pages. These preallocated extents have, and must have, the same size that extents from the data block group being relocated have. Later before we start the relocation stage that updates pointers (bytenr field of file extent items) to point to the the new extents, we trigger writeback for the data relocation inode. The expectation is that writeback will write the pages to the previously preallocated extents, that it follows the NOCOW path. That is generally the case, however, if a scrub is running it may have turned the block group that contains those extents into RO mode, in which case writeback falls back to the COW path. However in the COW path instead of allocating exactly one extent with the expected size, the allocator may end up allocating several smaller extents due to free space fragmentation - because we tell it at cow_file_range() that the minimum allocation size can match the filesystem's sector size. This later breaks the relocation's expectation that an extent associated to a file extent item in the data relocation inode has the same size as the respective extent pointed by a file extent item in another tree - in this case the extent to which the relocation inode poins to is smaller, causing relocation.c:get_new_location() to return -EINVAL. For example, if we are relocating a data block group X that has a logical address of X and the block group has an extent allocated at the logical address X + 128KiB with a size of 64KiB: 1) At prealloc_file_extent_cluster() we allocate an extent for the data relocation inode with a size of 64KiB and associate it to the file offset 128KiB (X + 128KiB - X) of the data relocation inode. This preallocated extent was allocated at block group Z; 2) A scrub running in parallel turns block group Z into RO mode and starts scrubing its extents; 3) Relocation triggers writeback for the data relocation inode; 4) When running delalloc (btrfs_run_delalloc_range()), we try first the NOCOW path because the data relocation inode has BTRFS_INODE_PREALLOC set in its flags. However, because block group Z is in RO mode, the NOCOW path (run_delalloc_nocow()) falls back into the COW path, by calling cow_file_range(); 5) At cow_file_range(), in the first iteration of the while loop we call btrfs_reserve_extent() to allocate a 64KiB extent and pass it a minimum allocation size of 4KiB (fs_info->sectorsize). Due to free space fragmentation, btrfs_reserve_extent() ends up allocating two extents of 32KiB each, each one on a different iteration of that while loop; 6) Writeback of the data relocation inode completes; 7) Relocation proceeds and ends up at relocation.c:replace_file_extents(), with a leaf which has a file extent item that points to the data extent from block group X, that has a logical address (bytenr) of X + 128KiB and a size of 64KiB. Then it calls get_new_location(), which does a lookup in the data relocation tree for a file extent item starting at offset 128KiB (X + 128KiB - X) and belonging to the data relocation inode. It finds a corresponding file extent item, however that item points to an extent that has a size of 32KiB, which doesn't match the expected size of 64KiB, resuling in -EINVAL being returned from this function and propagated up to __btrfs_cow_block(), which aborts the current transaction. To fix this make sure that at cow_file_range() when we call the allocator we pass it a minimum allocation size corresponding the desired extent size if the inode belongs to the data relocation tree, otherwise pass it the filesystem's sector size as the minimum allocation size. CC: stable@vger.kernel.org # 4.4+ Reviewed-by: Josef Bacik Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/inode.c | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 12b5d61f23bb..62c3f4972ff6 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -985,6 +985,7 @@ static noinline int cow_file_range(struct inode *inode, u64 num_bytes; unsigned long ram_size; u64 cur_alloc_size = 0; + u64 min_alloc_size; u64 blocksize = fs_info->sectorsize; struct btrfs_key ins; struct extent_map *em; @@ -1035,10 +1036,26 @@ static noinline int cow_file_range(struct inode *inode, btrfs_drop_extent_cache(BTRFS_I(inode), start, start + num_bytes - 1, 0); + /* + * Relocation relies on the relocated extents to have exactly the same + * size as the original extents. Normally writeback for relocation data + * extents follows a NOCOW path because relocation preallocates the + * extents. However, due to an operation such as scrub turning a block + * group to RO mode, it may fallback to COW mode, so we must make sure + * an extent allocated during COW has exactly the requested size and can + * not be split into smaller extents, otherwise relocation breaks and + * fails during the stage where it updates the bytenr of file extent + * items. + */ + if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID) + min_alloc_size = num_bytes; + else + min_alloc_size = fs_info->sectorsize; + while (num_bytes > 0) { cur_alloc_size = num_bytes; ret = btrfs_reserve_extent(root, cur_alloc_size, cur_alloc_size, - fs_info->sectorsize, 0, alloc_hint, + min_alloc_size, 0, alloc_hint, &ins, 1, 1); if (ret < 0) goto out_unlock; From 6bd335b469f945f75474c11e3f577f85409f39c3 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Mon, 8 Jun 2020 13:33:05 +0100 Subject: [PATCH 162/462] btrfs: fix bytes_may_use underflow when running balance and scrub in parallel When balance and scrub are running in parallel it is possible to end up with an underflow of the bytes_may_use counter of the data space_info object, which triggers a warning like the following: [134243.793196] BTRFS info (device sdc): relocating block group 1104150528 flags data [134243.806891] ------------[ cut here ]------------ [134243.807561] WARNING: CPU: 1 PID: 26884 at fs/btrfs/space-info.h:125 btrfs_add_reserved_bytes+0x1da/0x280 [btrfs] [134243.808819] Modules linked in: btrfs blake2b_generic xor (...) [134243.815779] CPU: 1 PID: 26884 Comm: kworker/u8:8 Tainted: G W 5.6.0-rc7-btrfs-next-58 #5 [134243.816944] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.12.0-59-gc9ba5276e321-prebuilt.qemu.org 04/01/2014 [134243.818389] Workqueue: writeback wb_workfn (flush-btrfs-108483) [134243.819186] RIP: 0010:btrfs_add_reserved_bytes+0x1da/0x280 [btrfs] [134243.819963] Code: 0b f2 85 (...) [134243.822271] RSP: 0018:ffffa4160aae7510 EFLAGS: 00010287 [134243.822929] RAX: 000000000000c000 RBX: ffff96159a8c1000 RCX: 0000000000000000 [134243.823816] RDX: 0000000000008000 RSI: 0000000000000000 RDI: ffff96158067a810 [134243.824742] RBP: ffff96158067a800 R08: 0000000000000001 R09: 0000000000000000 [134243.825636] R10: ffff961501432a40 R11: 0000000000000000 R12: 000000000000c000 [134243.826532] R13: 0000000000000001 R14: ffffffffffff4000 R15: ffff96158067a810 [134243.827432] FS: 0000000000000000(0000) GS:ffff9615baa00000(0000) knlGS:0000000000000000 [134243.828451] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [134243.829184] CR2: 000055bd7e414000 CR3: 00000001077be004 CR4: 00000000003606e0 [134243.830083] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [134243.830975] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [134243.831867] Call Trace: [134243.832211] find_free_extent+0x4a0/0x16c0 [btrfs] [134243.832846] btrfs_reserve_extent+0x91/0x180 [btrfs] [134243.833487] cow_file_range+0x12d/0x490 [btrfs] [134243.834080] fallback_to_cow+0x82/0x1b0 [btrfs] [134243.834689] ? release_extent_buffer+0x121/0x170 [btrfs] [134243.835370] run_delalloc_nocow+0x33f/0xa30 [btrfs] [134243.836032] btrfs_run_delalloc_range+0x1ea/0x6d0 [btrfs] [134243.836725] ? find_lock_delalloc_range+0x221/0x250 [btrfs] [134243.837450] writepage_delalloc+0xe8/0x150 [btrfs] [134243.838059] __extent_writepage+0xe8/0x4c0 [btrfs] [134243.838674] extent_write_cache_pages+0x237/0x530 [btrfs] [134243.839364] extent_writepages+0x44/0xa0 [btrfs] [134243.839946] do_writepages+0x23/0x80 [134243.840401] __writeback_single_inode+0x59/0x700 [134243.841006] writeback_sb_inodes+0x267/0x5f0 [134243.841548] __writeback_inodes_wb+0x87/0xe0 [134243.842091] wb_writeback+0x382/0x590 [134243.842574] ? wb_workfn+0x4a2/0x6c0 [134243.843030] wb_workfn+0x4a2/0x6c0 [134243.843468] process_one_work+0x26d/0x6a0 [134243.843978] worker_thread+0x4f/0x3e0 [134243.844452] ? process_one_work+0x6a0/0x6a0 [134243.844981] kthread+0x103/0x140 [134243.845400] ? kthread_create_worker_on_cpu+0x70/0x70 [134243.846030] ret_from_fork+0x3a/0x50 [134243.846494] irq event stamp: 0 [134243.846892] hardirqs last enabled at (0): [<0000000000000000>] 0x0 [134243.847682] hardirqs last disabled at (0): [] copy_process+0x74f/0x2020 [134243.848687] softirqs last enabled at (0): [] copy_process+0x74f/0x2020 [134243.849913] softirqs last disabled at (0): [<0000000000000000>] 0x0 [134243.850698] ---[ end trace bd7c03622e0b0a96 ]--- [134243.851335] ------------[ cut here ]------------ When relocating a data block group, for each extent allocated in the block group we preallocate another extent with the same size for the data relocation inode (we do it at prealloc_file_extent_cluster()). We reserve space by calling btrfs_check_data_free_space(), which ends up incrementing the data space_info's bytes_may_use counter, and then call btrfs_prealloc_file_range() to allocate the extent, which always decrements the bytes_may_use counter by the same amount. The expectation is that writeback of the data relocation inode always follows a NOCOW path, by writing into the preallocated extents. However, when starting writeback we might end up falling back into the COW path, because the block group that contains the preallocated extent was turned into RO mode by a scrub running in parallel. The COW path then calls the extent allocator which ends up calling btrfs_add_reserved_bytes(), and this function decrements the bytes_may_use counter of the data space_info object by an amount corresponding to the size of the allocated extent, despite we haven't previously incremented it. When the counter currently has a value smaller then the allocated extent we reset the counter to 0 and emit a warning, otherwise we just decrement it and slowly mess up with this counter which is crucial for space reservation, the end result can be granting reserved space to tasks when there isn't really enough free space, and having the tasks fail later in critical places where error handling consists of a transaction abort or hitting a BUG_ON(). Fix this by making sure that if we fallback to the COW path for a data relocation inode, we increment the bytes_may_use counter of the data space_info object. The COW path will then decrement it at btrfs_add_reserved_bytes() on success or through its error handling part by a call to extent_clear_unlock_delalloc() (which ends up calling btrfs_clear_delalloc_extent() that does the decrement operation) in case of an error. Test case btrfs/061 from fstests could sporadically trigger this. CC: stable@vger.kernel.org # 4.4+ Reviewed-by: Josef Bacik Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/inode.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 62c3f4972ff6..62b49d2db928 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -1378,6 +1378,8 @@ static int fallback_to_cow(struct inode *inode, struct page *locked_page, int *page_started, unsigned long *nr_written) { const bool is_space_ino = btrfs_is_free_space_inode(BTRFS_I(inode)); + const bool is_reloc_ino = (BTRFS_I(inode)->root->root_key.objectid == + BTRFS_DATA_RELOC_TREE_OBJECTID); const u64 range_bytes = end + 1 - start; struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; u64 range_start = start; @@ -1408,18 +1410,23 @@ static int fallback_to_cow(struct inode *inode, struct page *locked_page, * data space info, which we incremented in the step above. * * If we need to fallback to cow and the inode corresponds to a free - * space cache inode, we must also increment bytes_may_use of the data - * space_info for the same reason. Space caches always get a prealloc + * space cache inode or an inode of the data relocation tree, we must + * also increment bytes_may_use of the data space_info for the same + * reason. Space caches and relocated data extents always get a prealloc * extent for them, however scrub or balance may have set the block - * group that contains that extent to RO mode. + * group that contains that extent to RO mode and therefore force COW + * when starting writeback. */ count = count_range_bits(io_tree, &range_start, end, range_bytes, EXTENT_NORESERVE, 0); - if (count > 0 || is_space_ino) { - const u64 bytes = is_space_ino ? range_bytes : count; + if (count > 0 || is_space_ino || is_reloc_ino) { + u64 bytes = count; struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info; struct btrfs_space_info *sinfo = fs_info->data_sinfo; + if (is_space_ino || is_reloc_ino) + bytes = range_bytes; + spin_lock(&sinfo->lock); btrfs_space_info_update_bytes_may_use(fs_info, sinfo, bytes); spin_unlock(&sinfo->lock); From e7a79811d0db136dc2d336b56d54cf1b774ce972 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Mon, 15 Jun 2020 10:38:44 +0100 Subject: [PATCH 163/462] btrfs: check if a log root exists before locking the log_mutex on unlink This brings back an optimization that commit e678934cbe5f02 ("btrfs: Remove unnecessary check from join_running_log_trans") removed, but in a different form. So it's almost equivalent to a revert. That commit removed an optimization where we avoid locking a root's log_mutex when there is no log tree created in the current transaction. The affected code path is triggered through unlink operations. That commit was based on the assumption that the optimization was not necessary because we used to have the following checks when the patch was authored: int btrfs_del_dir_entries_in_log(...) { (...) if (dir->logged_trans < trans->transid) return 0; ret = join_running_log_trans(root); (...) } int btrfs_del_inode_ref_in_log(...) { (...) if (inode->logged_trans < trans->transid) return 0; ret = join_running_log_trans(root); (...) } However before that patch was merged, another patch was merged first which replaced those checks because they were buggy. That other patch corresponds to commit 803f0f64d17769 ("Btrfs: fix fsync not persisting dentry deletions due to inode evictions"). The assumption that if the logged_trans field of an inode had a smaller value then the current transaction's generation (transid) meant that the inode was not logged in the current transaction was only correct if the inode was not evicted and reloaded in the current transaction. So the corresponding bug fix changed those checks and replaced them with the following helper function: static bool inode_logged(struct btrfs_trans_handle *trans, struct btrfs_inode *inode) { if (inode->logged_trans == trans->transid) return true; if (inode->last_trans == trans->transid && test_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &inode->runtime_flags) && !test_bit(BTRFS_FS_LOG_RECOVERING, &trans->fs_info->flags)) return true; return false; } So if we have a subvolume without a log tree in the current transaction (because we had no fsyncs), every time we unlink an inode we can end up trying to lock the log_mutex of the root through join_running_log_trans() twice, once for the inode being unlinked (by btrfs_del_inode_ref_in_log()) and once for the parent directory (with btrfs_del_dir_entries_in_log()). This means if we have several unlink operations happening in parallel for inodes in the same subvolume, and the those inodes and/or their parent inode were changed in the current transaction, we end up having a lot of contention on the log_mutex. The test robots from intel reported a -30.7% performance regression for a REAIM test after commit e678934cbe5f02 ("btrfs: Remove unnecessary check from join_running_log_trans"). So just bring back the optimization to join_running_log_trans() where we check first if a log root exists before trying to lock the log_mutex. This is done by checking for a bit that is set on the root when a log tree is created and removed when a log tree is freed (at transaction commit time). Commit e678934cbe5f02 ("btrfs: Remove unnecessary check from join_running_log_trans") was merged in the 5.4 merge window while commit 803f0f64d17769 ("Btrfs: fix fsync not persisting dentry deletions due to inode evictions") was merged in the 5.3 merge window. But the first commit was actually authored before the second commit (May 23 2019 vs June 19 2019). Reported-by: kernel test robot Link: https://lore.kernel.org/lkml/20200611090233.GL12456@shao2-debian/ Fixes: e678934cbe5f02 ("btrfs: Remove unnecessary check from join_running_log_trans") CC: stable@vger.kernel.org # 5.4+ Reviewed-by: Josef Bacik Signed-off-by: Filipe Manana Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/ctree.h | 2 ++ fs/btrfs/tree-log.c | 5 +++++ 2 files changed, 7 insertions(+) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 30ce7039bc27..d404cce8ae40 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1009,6 +1009,8 @@ enum { BTRFS_ROOT_DEAD_RELOC_TREE, /* Mark dead root stored on device whose cleanup needs to be resumed */ BTRFS_ROOT_DEAD_TREE, + /* The root has a log tree. Used only for subvolume roots. */ + BTRFS_ROOT_HAS_LOG_TREE, }; /* diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 920cee312f4e..cd5348f352dd 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -169,6 +169,7 @@ static int start_log_trans(struct btrfs_trans_handle *trans, if (ret) goto out; + set_bit(BTRFS_ROOT_HAS_LOG_TREE, &root->state); clear_bit(BTRFS_ROOT_MULTI_LOG_TASKS, &root->state); root->log_start_pid = current->pid; } @@ -195,6 +196,9 @@ static int join_running_log_trans(struct btrfs_root *root) { int ret = -ENOENT; + if (!test_bit(BTRFS_ROOT_HAS_LOG_TREE, &root->state)) + return ret; + mutex_lock(&root->log_mutex); if (root->log_root) { ret = 0; @@ -3303,6 +3307,7 @@ int btrfs_free_log(struct btrfs_trans_handle *trans, struct btrfs_root *root) if (root->log_root) { free_log_tree(trans, root->log_root); root->log_root = NULL; + clear_bit(BTRFS_ROOT_HAS_LOG_TREE, &root->state); } return 0; } From f2cb2f39ccc30fa13d3ac078d461031a63960e5b Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Mon, 15 Jun 2020 18:46:01 +0100 Subject: [PATCH 164/462] btrfs: fix hang on snapshot creation after RWF_NOWAIT write If we do a successful RWF_NOWAIT write we end up locking the snapshot lock of the inode, through a call to check_can_nocow(), but we never unlock it. This means the next attempt to create a snapshot on the subvolume will hang forever. Trivial reproducer: $ mkfs.btrfs -f /dev/sdb $ mount /dev/sdb /mnt $ touch /mnt/foobar $ chattr +C /mnt/foobar $ xfs_io -d -c "pwrite -S 0xab 0 64K" /mnt/foobar $ xfs_io -d -c "pwrite -N -V 1 -S 0xfe 0 64K" /mnt/foobar $ btrfs subvolume snapshot -r /mnt /mnt/snap --> hangs Fix this by unlocking the snapshot lock if check_can_nocow() returned success. Fixes: edf064e7c6fec3 ("btrfs: nowait aio support") CC: stable@vger.kernel.org # 4.14+ Signed-off-by: Filipe Manana Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/file.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 2c14312b05e8..04faa04fccd1 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1914,6 +1914,8 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb, inode_unlock(inode); return -EAGAIN; } + /* check_can_nocow() locks the snapshot lock on success */ + btrfs_drew_write_unlock(&root->snapshot_lock); } current->backing_dev_info = inode_to_bdi(inode); From 4b1946284dd6641afdb9457101056d9e6ee6204c Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Mon, 15 Jun 2020 18:48:58 +0100 Subject: [PATCH 165/462] btrfs: fix failure of RWF_NOWAIT write into prealloc extent beyond eof If we attempt to write to prealloc extent located after eof using a RWF_NOWAIT write, we always fail with -EAGAIN. We do actually check if we have an allocated extent for the write at the start of btrfs_file_write_iter() through a call to check_can_nocow(), but later when we go into the actual direct IO write path we simply return -EAGAIN if the write starts at or beyond EOF. Trivial to reproduce: $ mkfs.btrfs -f /dev/sdb $ mount /dev/sdb /mnt $ touch /mnt/foo $ chattr +C /mnt/foo $ xfs_io -d -c "pwrite -S 0xab 0 64K" /mnt/foo wrote 65536/65536 bytes at offset 0 64 KiB, 16 ops; 0.0004 sec (135.575 MiB/sec and 34707.1584 ops/sec) $ xfs_io -c "falloc -k 64K 1M" /mnt/foo $ xfs_io -d -c "pwrite -N -V 1 -S 0xfe -b 64K 64K 64K" /mnt/foo pwrite: Resource temporarily unavailable On xfs and ext4 the write succeeds, as expected. Fix this by removing the wrong check at btrfs_direct_IO(). Fixes: edf064e7c6fec3 ("btrfs: nowait aio support") CC: stable@vger.kernel.org # 4.14+ Signed-off-by: Filipe Manana Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/inode.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 62b49d2db928..cfa863d2d97c 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -7889,9 +7889,6 @@ static ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter) dio_data.overwrite = 1; inode_unlock(inode); relock = true; - } else if (iocb->ki_flags & IOCB_NOWAIT) { - ret = -EAGAIN; - goto out; } ret = btrfs_delalloc_reserve_space(inode, &data_reserved, offset, count); From 260a63395f90f67d6ab89e4266af9e3dc34a77e9 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Mon, 15 Jun 2020 18:49:13 +0100 Subject: [PATCH 166/462] btrfs: fix RWF_NOWAIT write not failling when we need to cow If we attempt to do a RWF_NOWAIT write against a file range for which we can only do NOCOW for a part of it, due to the existence of holes or shared extents for example, we proceed with the write as if it were possible to NOCOW the whole range. Example: $ mkfs.btrfs -f /dev/sdb $ mount /dev/sdb /mnt $ touch /mnt/sdj/bar $ chattr +C /mnt/sdj/bar $ xfs_io -d -c "pwrite -S 0xab -b 256K 0 256K" /mnt/bar wrote 262144/262144 bytes at offset 0 256 KiB, 1 ops; 0.0003 sec (694.444 MiB/sec and 2777.7778 ops/sec) $ xfs_io -c "fpunch 64K 64K" /mnt/bar $ sync $ xfs_io -d -c "pwrite -N -V 1 -b 128K -S 0xfe 0 128K" /mnt/bar wrote 131072/131072 bytes at offset 0 128 KiB, 1 ops; 0.0007 sec (160.051 MiB/sec and 1280.4097 ops/sec) This last write should fail with -EAGAIN since the file range from 64K to 128K is a hole. On xfs it fails, as expected, but on ext4 it currently succeeds because apparently it is expensive to check if there are extents allocated for the whole range, but I'll check with the ext4 people. Fix the issue by checking if check_can_nocow() returns a number of NOCOW'able bytes smaller then the requested number of bytes, and if it does return -EAGAIN. Fixes: edf064e7c6fec3 ("btrfs: nowait aio support") CC: stable@vger.kernel.org # 4.14+ Signed-off-by: Filipe Manana Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/file.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 04faa04fccd1..6d5d905281c6 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1904,18 +1904,29 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb, pos = iocb->ki_pos; count = iov_iter_count(from); if (iocb->ki_flags & IOCB_NOWAIT) { + size_t nocow_bytes = count; + /* * We will allocate space in case nodatacow is not set, * so bail */ if (!(BTRFS_I(inode)->flags & (BTRFS_INODE_NODATACOW | BTRFS_INODE_PREALLOC)) || - check_can_nocow(BTRFS_I(inode), pos, &count) <= 0) { + check_can_nocow(BTRFS_I(inode), pos, &nocow_bytes) <= 0) { inode_unlock(inode); return -EAGAIN; } /* check_can_nocow() locks the snapshot lock on success */ btrfs_drew_write_unlock(&root->snapshot_lock); + /* + * There are holes in the range or parts of the range that must + * be COWed (shared extents, RO block groups, etc), so just bail + * out. + */ + if (nocow_bytes < count) { + inode_unlock(inode); + return -EAGAIN; + } } current->backing_dev_info = inode_to_bdi(inode); From 5dbb75ed6900048e146247b6325742d92c892548 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Mon, 15 Jun 2020 18:49:39 +0100 Subject: [PATCH 167/462] btrfs: fix RWF_NOWAIT writes blocking on extent locks and waiting for IO A RWF_NOWAIT write is not supposed to wait on filesystem locks that can be held for a long time or for ongoing IO to complete. However when calling check_can_nocow(), if the inode has prealloc extents or has the NOCOW flag set, we can block on extent (file range) locks through the call to btrfs_lock_and_flush_ordered_range(). Such lock can take a significant amount of time to be available. For example, a fiemap task may be running, and iterating through the entire file range checking all extents and doing backref walking to determine if they are shared, or a readpage operation may be in progress. Also at btrfs_lock_and_flush_ordered_range(), called by check_can_nocow(), after locking the file range we wait for any existing ordered extent that is in progress to complete. Another operation that can take a significant amount of time and defeat the purpose of RWF_NOWAIT. So fix this by trying to lock the file range and if it's currently locked return -EAGAIN to user space. If we are able to lock the file range without waiting and there is an ordered extent in the range, return -EAGAIN as well, instead of waiting for it to complete. Finally, don't bother trying to lock the snapshot lock of the root when attempting a RWF_NOWAIT write, as that is only important for buffered writes. Fixes: edf064e7c6fec3 ("btrfs: nowait aio support") Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/file.c | 39 +++++++++++++++++++++++++++------------ 1 file changed, 27 insertions(+), 12 deletions(-) diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 6d5d905281c6..2520605afc25 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1533,7 +1533,7 @@ lock_and_cleanup_extent_if_need(struct btrfs_inode *inode, struct page **pages, } static noinline int check_can_nocow(struct btrfs_inode *inode, loff_t pos, - size_t *write_bytes) + size_t *write_bytes, bool nowait) { struct btrfs_fs_info *fs_info = inode->root->fs_info; struct btrfs_root *root = inode->root; @@ -1541,27 +1541,43 @@ static noinline int check_can_nocow(struct btrfs_inode *inode, loff_t pos, u64 num_bytes; int ret; - if (!btrfs_drew_try_write_lock(&root->snapshot_lock)) + if (!nowait && !btrfs_drew_try_write_lock(&root->snapshot_lock)) return -EAGAIN; lockstart = round_down(pos, fs_info->sectorsize); lockend = round_up(pos + *write_bytes, fs_info->sectorsize) - 1; - - btrfs_lock_and_flush_ordered_range(inode, lockstart, - lockend, NULL); - num_bytes = lockend - lockstart + 1; + + if (nowait) { + struct btrfs_ordered_extent *ordered; + + if (!try_lock_extent(&inode->io_tree, lockstart, lockend)) + return -EAGAIN; + + ordered = btrfs_lookup_ordered_range(inode, lockstart, + num_bytes); + if (ordered) { + btrfs_put_ordered_extent(ordered); + ret = -EAGAIN; + goto out_unlock; + } + } else { + btrfs_lock_and_flush_ordered_range(inode, lockstart, + lockend, NULL); + } + ret = can_nocow_extent(&inode->vfs_inode, lockstart, &num_bytes, NULL, NULL, NULL); if (ret <= 0) { ret = 0; - btrfs_drew_write_unlock(&root->snapshot_lock); + if (!nowait) + btrfs_drew_write_unlock(&root->snapshot_lock); } else { *write_bytes = min_t(size_t, *write_bytes , num_bytes - pos + lockstart); } - +out_unlock: unlock_extent(&inode->io_tree, lockstart, lockend); return ret; @@ -1633,7 +1649,7 @@ static noinline ssize_t btrfs_buffered_write(struct kiocb *iocb, if ((BTRFS_I(inode)->flags & (BTRFS_INODE_NODATACOW | BTRFS_INODE_PREALLOC)) && check_can_nocow(BTRFS_I(inode), pos, - &write_bytes) > 0) { + &write_bytes, false) > 0) { /* * For nodata cow case, no need to reserve * data space. @@ -1912,12 +1928,11 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb, */ if (!(BTRFS_I(inode)->flags & (BTRFS_INODE_NODATACOW | BTRFS_INODE_PREALLOC)) || - check_can_nocow(BTRFS_I(inode), pos, &nocow_bytes) <= 0) { + check_can_nocow(BTRFS_I(inode), pos, &nocow_bytes, + true) <= 0) { inode_unlock(inode); return -EAGAIN; } - /* check_can_nocow() locks the snapshot lock on success */ - btrfs_drew_write_unlock(&root->snapshot_lock); /* * There are holes in the range or parts of the range that must * be COWed (shared extents, RO block groups, etc), so just bail From b091f7fede97cc64f7aaad3eeb37965aebee3082 Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Tue, 16 Jun 2020 11:31:59 -0400 Subject: [PATCH 168/462] btrfs: use kfree() in btrfs_ioctl_get_subvol_info() In btrfs_ioctl_get_subvol_info(), there is a classic case where kzalloc() was incorrectly paired with kzfree(). According to David Sterba, there isn't any sensitive information in the subvol_info that needs to be cleared before freeing. So kzfree() isn't really needed, use kfree() instead. Signed-off-by: Waiman Long Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/ioctl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 168deb8ef68a..e8f7c5f00894 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -2692,7 +2692,7 @@ out: btrfs_put_root(root); out_free: btrfs_free_path(path); - kzfree(subvol_info); + kfree(subvol_info); return ret; } From e575fb9e76c8e33440fb859572a8b7d430f053d6 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 16 Jun 2020 18:29:11 +0100 Subject: [PATCH 169/462] arm64: sve: Fix build failure when ARM64_SVE=y and SYSCTL=n When I squashed the 'allnoconfig' compiler warning about the set_sve_default_vl() function being defined but not used in commit 1e570f512cbd ("arm64/sve: Eliminate data races on sve_default_vl"), I accidentally broke the build for configs where ARM64_SVE is enabled, but SYSCTL is not. Fix this by only compiling the SVE sysctl support if both CONFIG_SVE=y and CONFIG_SYSCTL=y. Cc: Dave Martin Reported-by: Qian Cai Link: https://lore.kernel.org/r/20200616131808.GA1040@lca.pw Signed-off-by: Will Deacon --- arch/arm64/kernel/fpsimd.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index d9eee9194511..55c8f3ec6705 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -349,7 +349,7 @@ static unsigned int find_supported_vector_length(unsigned int vl) return sve_vl_from_vq(__bit_to_vq(bit)); } -#ifdef CONFIG_SYSCTL +#if defined(CONFIG_ARM64_SVE) && defined(CONFIG_SYSCTL) static int sve_proc_do_default_vl(struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) @@ -394,9 +394,9 @@ static int __init sve_sysctl_init(void) return 0; } -#else /* ! CONFIG_SYSCTL */ +#else /* ! (CONFIG_ARM64_SVE && CONFIG_SYSCTL) */ static int __init sve_sysctl_init(void) { return 0; } -#endif /* ! CONFIG_SYSCTL */ +#endif /* ! (CONFIG_ARM64_SVE && CONFIG_SYSCTL) */ #define ZREG(sve_state, vq, n) ((char *)(sve_state) + \ (SVE_SIG_ZREG_OFFSET(vq, n) - SVE_SIG_REGS_OFFSET)) From e571d4ee334719727f22cce30c4c74471d4ef68a Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Wed, 17 Jun 2020 00:33:12 +0200 Subject: [PATCH 170/462] nsproxy: restore EINVAL for non-namespace file descriptor The LTP testsuite reported a regression where users would now see EBADF returned instead of EINVAL when an fd was passed that referred to an open file but the file was not a nsfd. Fix this by continuing to report EINVAL. Reported-by: kernel test robot Cc: Jan Stancek Cc: Cyril Hrubis Link: https://lore.kernel.org/lkml/20200615085836.GR12456@shao2-debian Fixes: 303cc571d107 ("nsproxy: attach to namespaces via pidfds") Signed-off-by: Christian Brauner --- kernel/nsproxy.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c index b03df67621d0..cd356630a311 100644 --- a/kernel/nsproxy.c +++ b/kernel/nsproxy.c @@ -531,7 +531,7 @@ SYSCALL_DEFINE2(setns, int, fd, int, flags) } else if (!IS_ERR(pidfd_pid(file))) { err = check_setns_flags(flags); } else { - err = -EBADF; + err = -EINVAL; } if (err) goto out; From 86f56395feb2b106b125c47e72192e37da5dd088 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Wed, 17 Jun 2020 00:48:54 +0200 Subject: [PATCH 171/462] tests: test for setns() EINVAL regression Verify that setns() reports EINVAL when an fd is passed that refers to an open file but the file is not a file descriptor useable to interact with namespaces. Cc: Jan Stancek Cc: Cyril Hrubis Link: https://lore.kernel.org/lkml/20200615085836.GR12456@shao2-debian Signed-off-by: Christian Brauner --- tools/testing/selftests/pidfd/pidfd.h | 5 +++++ tools/testing/selftests/pidfd/pidfd_getfd_test.c | 5 ----- tools/testing/selftests/pidfd/pidfd_setns_test.c | 12 ++++++++++++ 3 files changed, 17 insertions(+), 5 deletions(-) diff --git a/tools/testing/selftests/pidfd/pidfd.h b/tools/testing/selftests/pidfd/pidfd.h index c1921a53dbed..8d728eda783d 100644 --- a/tools/testing/selftests/pidfd/pidfd.h +++ b/tools/testing/selftests/pidfd/pidfd.h @@ -95,4 +95,9 @@ static inline int sys_pidfd_getfd(int pidfd, int fd, int flags) return syscall(__NR_pidfd_getfd, pidfd, fd, flags); } +static inline int sys_memfd_create(const char *name, unsigned int flags) +{ + return syscall(__NR_memfd_create, name, flags); +} + #endif /* __PIDFD_H */ diff --git a/tools/testing/selftests/pidfd/pidfd_getfd_test.c b/tools/testing/selftests/pidfd/pidfd_getfd_test.c index 401a7c1d0312..84b65ecccb04 100644 --- a/tools/testing/selftests/pidfd/pidfd_getfd_test.c +++ b/tools/testing/selftests/pidfd/pidfd_getfd_test.c @@ -34,11 +34,6 @@ static int sys_kcmp(pid_t pid1, pid_t pid2, int type, unsigned long idx1, return syscall(__NR_kcmp, pid1, pid2, type, idx1, idx2); } -static int sys_memfd_create(const char *name, unsigned int flags) -{ - return syscall(__NR_memfd_create, name, flags); -} - static int __child(int sk, int memfd) { int ret; diff --git a/tools/testing/selftests/pidfd/pidfd_setns_test.c b/tools/testing/selftests/pidfd/pidfd_setns_test.c index 133ec5b6cda8..9418108eae13 100644 --- a/tools/testing/selftests/pidfd/pidfd_setns_test.c +++ b/tools/testing/selftests/pidfd/pidfd_setns_test.c @@ -470,4 +470,16 @@ TEST_F(current_nsset, no_foul_play) } } +TEST(setns_einval) +{ + int fd; + + fd = sys_memfd_create("rostock", 0); + EXPECT_GT(fd, 0); + + ASSERT_NE(setns(fd, 0), 0); + EXPECT_EQ(errno, EINVAL); + close(fd); +} + TEST_HARNESS_MAIN From aa449a7965a6172a89d48844c313708962216f1f Mon Sep 17 00:00:00 2001 From: Tom Rix Date: Mon, 15 Jun 2020 13:45:48 -0700 Subject: [PATCH 172/462] selinux: fix a double free in cond_read_node()/cond_read_list() Clang static analysis reports this double free error security/selinux/ss/conditional.c:139:2: warning: Attempt to free released memory [unix.Malloc] kfree(node->expr.nodes); ^~~~~~~~~~~~~~~~~~~~~~~ When cond_read_node fails, it calls cond_node_destroy which frees the node but does not poison the entry in the node list. So when it returns to its caller cond_read_list, cond_read_list deletes the partial list. The latest entry in the list will be deleted twice. So instead of freeing the node in cond_read_node, let list freeing in code_read_list handle the freeing the problem node along with all of the earlier nodes. Because cond_read_node no longer does any error handling, the goto's the error case are redundant. Instead just return the error code. Cc: stable@vger.kernel.org Fixes: 60abd3181db2 ("selinux: convert cond_list to array") Signed-off-by: Tom Rix Reviewed-by: Ondrej Mosnacek [PM: subject line tweaks] Signed-off-by: Paul Moore --- security/selinux/ss/conditional.c | 18 +++++------------- 1 file changed, 5 insertions(+), 13 deletions(-) diff --git a/security/selinux/ss/conditional.c b/security/selinux/ss/conditional.c index 939a74fd8fb4..4867dfc5337a 100644 --- a/security/selinux/ss/conditional.c +++ b/security/selinux/ss/conditional.c @@ -392,27 +392,19 @@ static int cond_read_node(struct policydb *p, struct cond_node *node, void *fp) rc = next_entry(buf, fp, sizeof(u32) * 2); if (rc) - goto err; + return rc; expr->expr_type = le32_to_cpu(buf[0]); expr->bool = le32_to_cpu(buf[1]); - if (!expr_node_isvalid(p, expr)) { - rc = -EINVAL; - goto err; - } + if (!expr_node_isvalid(p, expr)) + return -EINVAL; } rc = cond_read_av_list(p, fp, &node->true_list, NULL); if (rc) - goto err; - rc = cond_read_av_list(p, fp, &node->false_list, &node->true_list); - if (rc) - goto err; - return 0; -err: - cond_node_destroy(node); - return rc; + return rc; + return cond_read_av_list(p, fp, &node->false_list, &node->true_list); } int cond_read_list(struct policydb *p, void *fp) From f2f02ebd8f3833626642688b2d2c6a7b3c141fa9 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Sun, 14 Jun 2020 23:43:40 +0900 Subject: [PATCH 173/462] kbuild: improve cc-option to clean up all temporary files When cc-option and friends evaluate compiler flags, the temporary file $$TMP is created as an output object, and automatically cleaned up. The actual file path of $$TMP is ..tmp, here is the process ID of $(shell ...) invoked from cc-option. (Please note $$$$ is the escape sequence of $$). Such garbage files are cleaned up in most cases, but some compiler flags create additional output files. For example, -gsplit-dwarf creates a .dwo file. When CONFIG_DEBUG_INFO_SPLIT=y, you will see a bunch of ..dwo files left in the top of build directories. You may not notice them unless you do 'ls -a', but the garbage files will increase every time you run 'make'. This commit changes the temporary object path to .tmp_/tmp, and removes .tmp_ directory when exiting. Separate build artifacts such as *.dwo will be cleaned up all together because their file paths are usually determined based on the base name of the object. Another example is -ftest-coverage, which outputs the coverage data into .gcno Signed-off-by: Masahiro Yamada --- scripts/Kbuild.include | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/scripts/Kbuild.include b/scripts/Kbuild.include index 0c3dc983439b..9a15fbf66aa1 100644 --- a/scripts/Kbuild.include +++ b/scripts/Kbuild.include @@ -86,20 +86,21 @@ cc-cross-prefix = $(firstword $(foreach c, $(1), \ $(if $(shell command -v -- $(c)gcc 2>/dev/null), $(c)))) # output directory for tests below -TMPOUT := $(if $(KBUILD_EXTMOD),$(firstword $(KBUILD_EXTMOD))/) +TMPOUT = $(if $(KBUILD_EXTMOD),$(firstword $(KBUILD_EXTMOD))/).tmp_$$$$ # try-run # Usage: option = $(call try-run, $(CC)...-o "$$TMP",option-ok,otherwise) # Exit code chooses option. "$$TMP" serves as a temporary file and is # automatically cleaned up. try-run = $(shell set -e; \ - TMP="$(TMPOUT).$$$$.tmp"; \ - TMPO="$(TMPOUT).$$$$.o"; \ + TMP=$(TMPOUT)/tmp; \ + TMPO=$(TMPOUT)/tmp.o; \ + mkdir -p $(TMPOUT); \ + trap "rm -rf $(TMPOUT)" EXIT; \ if ($(1)) >/dev/null 2>&1; \ then echo "$(2)"; \ else echo "$(3)"; \ - fi; \ - rm -f "$$TMP" "$$TMPO") + fi) # as-option # Usage: cflags-y += $(call as-option,-Wa$(comma)-isa=foo,) From 4ef57b21d6fb49d2b25c47e4cff467a0c2c8b6b7 Mon Sep 17 00:00:00 2001 From: Sami Tolvanen Date: Fri, 24 Apr 2020 12:30:46 -0700 Subject: [PATCH 174/462] recordmcount: support >64k sections When compiling a kernel with Clang and LTO, we need to run recordmcount on vmlinux.o with a large number of sections, which currently fails as the program doesn't understand extended section indexes. This change adds support for processing binaries with >64k sections. Link: https://lkml.kernel.org/r/20200424193046.160744-1-samitolvanen@google.com Link: https://lore.kernel.org/lkml/CAK7LNARbZhoaA=Nnuw0=gBrkuKbr_4Ng_Ei57uafujZf7Xazgw@mail.gmail.com/ Cc: Kees Cook Reviewed-by: Matt Helsley Signed-off-by: Sami Tolvanen Signed-off-by: Steven Rostedt (VMware) --- scripts/recordmcount.h | 98 +++++++++++++++++++++++++++++++++++++++--- 1 file changed, 92 insertions(+), 6 deletions(-) diff --git a/scripts/recordmcount.h b/scripts/recordmcount.h index 74eab03e31d4..f9b19524da11 100644 --- a/scripts/recordmcount.h +++ b/scripts/recordmcount.h @@ -29,6 +29,11 @@ #undef has_rel_mcount #undef tot_relsize #undef get_mcountsym +#undef find_symtab +#undef get_shnum +#undef set_shnum +#undef get_shstrndx +#undef get_symindex #undef get_sym_str_and_relp #undef do_func #undef Elf_Addr @@ -58,6 +63,11 @@ # define __has_rel_mcount __has64_rel_mcount # define has_rel_mcount has64_rel_mcount # define tot_relsize tot64_relsize +# define find_symtab find_symtab64 +# define get_shnum get_shnum64 +# define set_shnum set_shnum64 +# define get_shstrndx get_shstrndx64 +# define get_symindex get_symindex64 # define get_sym_str_and_relp get_sym_str_and_relp_64 # define do_func do64 # define get_mcountsym get_mcountsym_64 @@ -91,6 +101,11 @@ # define __has_rel_mcount __has32_rel_mcount # define has_rel_mcount has32_rel_mcount # define tot_relsize tot32_relsize +# define find_symtab find_symtab32 +# define get_shnum get_shnum32 +# define set_shnum set_shnum32 +# define get_shstrndx get_shstrndx32 +# define get_symindex get_symindex32 # define get_sym_str_and_relp get_sym_str_and_relp_32 # define do_func do32 # define get_mcountsym get_mcountsym_32 @@ -173,6 +188,67 @@ static int MIPS_is_fake_mcount(Elf_Rel const *rp) return is_fake; } +static unsigned int get_symindex(Elf_Sym const *sym, Elf32_Word const *symtab, + Elf32_Word const *symtab_shndx) +{ + unsigned long offset; + int index; + + if (sym->st_shndx != SHN_XINDEX) + return w2(sym->st_shndx); + + offset = (unsigned long)sym - (unsigned long)symtab; + index = offset / sizeof(*sym); + + return w(symtab_shndx[index]); +} + +static unsigned int get_shnum(Elf_Ehdr const *ehdr, Elf_Shdr const *shdr0) +{ + if (shdr0 && !ehdr->e_shnum) + return w(shdr0->sh_size); + + return w2(ehdr->e_shnum); +} + +static void set_shnum(Elf_Ehdr *ehdr, Elf_Shdr *shdr0, unsigned int new_shnum) +{ + if (new_shnum >= SHN_LORESERVE) { + ehdr->e_shnum = 0; + shdr0->sh_size = w(new_shnum); + } else + ehdr->e_shnum = w2(new_shnum); +} + +static int get_shstrndx(Elf_Ehdr const *ehdr, Elf_Shdr const *shdr0) +{ + if (ehdr->e_shstrndx != SHN_XINDEX) + return w2(ehdr->e_shstrndx); + + return w(shdr0->sh_link); +} + +static void find_symtab(Elf_Ehdr *const ehdr, Elf_Shdr const *shdr0, + unsigned const nhdr, Elf32_Word **symtab, + Elf32_Word **symtab_shndx) +{ + Elf_Shdr const *relhdr; + unsigned k; + + *symtab = NULL; + *symtab_shndx = NULL; + + for (relhdr = shdr0, k = nhdr; k; --k, ++relhdr) { + if (relhdr->sh_type == SHT_SYMTAB) + *symtab = (void *)ehdr + relhdr->sh_offset; + else if (relhdr->sh_type == SHT_SYMTAB_SHNDX) + *symtab_shndx = (void *)ehdr + relhdr->sh_offset; + + if (*symtab && *symtab_shndx) + break; + } +} + /* Append the new shstrtab, Elf_Shdr[], __mcount_loc and its relocations. */ static int append_func(Elf_Ehdr *const ehdr, Elf_Shdr *const shstr, @@ -188,10 +264,12 @@ static int append_func(Elf_Ehdr *const ehdr, char const *mc_name = (sizeof(Elf_Rela) == rel_entsize) ? ".rela__mcount_loc" : ".rel__mcount_loc"; - unsigned const old_shnum = w2(ehdr->e_shnum); uint_t const old_shoff = _w(ehdr->e_shoff); uint_t const old_shstr_sh_size = _w(shstr->sh_size); uint_t const old_shstr_sh_offset = _w(shstr->sh_offset); + Elf_Shdr *const shdr0 = (Elf_Shdr *)(old_shoff + (void *)ehdr); + unsigned int const old_shnum = get_shnum(ehdr, shdr0); + unsigned int const new_shnum = 2 + old_shnum; /* {.rel,}__mcount_loc */ uint_t t = 1 + strlen(mc_name) + _w(shstr->sh_size); uint_t new_e_shoff; @@ -201,6 +279,8 @@ static int append_func(Elf_Ehdr *const ehdr, t += (_align & -t); /* word-byte align */ new_e_shoff = t; + set_shnum(ehdr, shdr0, new_shnum); + /* body for new shstrtab */ if (ulseek(sb.st_size, SEEK_SET) < 0) return -1; @@ -255,7 +335,6 @@ static int append_func(Elf_Ehdr *const ehdr, return -1; ehdr->e_shoff = _w(new_e_shoff); - ehdr->e_shnum = w2(2 + w2(ehdr->e_shnum)); /* {.rel,}__mcount_loc */ if (ulseek(0, SEEK_SET) < 0) return -1; if (uwrite(ehdr, sizeof(*ehdr)) < 0) @@ -434,6 +513,8 @@ static int find_secsym_ndx(unsigned const txtndx, uint_t *const recvalp, unsigned int *sym_index, Elf_Shdr const *const symhdr, + Elf32_Word const *symtab, + Elf32_Word const *symtab_shndx, Elf_Ehdr const *const ehdr) { Elf_Sym const *const sym0 = (Elf_Sym const *)(_w(symhdr->sh_offset) @@ -445,7 +526,7 @@ static int find_secsym_ndx(unsigned const txtndx, for (symp = sym0, t = nsym; t; --t, ++symp) { unsigned int const st_bind = ELF_ST_BIND(symp->st_info); - if (txtndx == w2(symp->st_shndx) + if (txtndx == get_symindex(symp, symtab, symtab_shndx) /* avoid STB_WEAK */ && (STB_LOCAL == st_bind || STB_GLOBAL == st_bind)) { /* function symbols on ARM have quirks, avoid them */ @@ -516,21 +597,23 @@ static unsigned tot_relsize(Elf_Shdr const *const shdr0, return totrelsz; } - /* Overall supervision for Elf32 ET_REL file. */ static int do_func(Elf_Ehdr *const ehdr, char const *const fname, unsigned const reltype) { Elf_Shdr *const shdr0 = (Elf_Shdr *)(_w(ehdr->e_shoff) + (void *)ehdr); - unsigned const nhdr = w2(ehdr->e_shnum); - Elf_Shdr *const shstr = &shdr0[w2(ehdr->e_shstrndx)]; + unsigned const nhdr = get_shnum(ehdr, shdr0); + Elf_Shdr *const shstr = &shdr0[get_shstrndx(ehdr, shdr0)]; char const *const shstrtab = (char const *)(_w(shstr->sh_offset) + (void *)ehdr); Elf_Shdr const *relhdr; unsigned k; + Elf32_Word *symtab; + Elf32_Word *symtab_shndx; + /* Upper bound on space: assume all relevant relocs are for mcount. */ unsigned totrelsz; @@ -561,6 +644,8 @@ static int do_func(Elf_Ehdr *const ehdr, char const *const fname, return -1; } + find_symtab(ehdr, shdr0, nhdr, &symtab, &symtab_shndx); + for (relhdr = shdr0, k = nhdr; k; --k, ++relhdr) { char const *const txtname = has_rel_mcount(relhdr, shdr0, shstrtab, fname); @@ -577,6 +662,7 @@ static int do_func(Elf_Ehdr *const ehdr, char const *const fname, result = find_secsym_ndx(w(relhdr->sh_info), txtname, &recval, &recsym, &shdr0[symsec_sh_link], + symtab, symtab_shndx, ehdr); if (result) goto out; From 6743ad432ec92e680cd0d9db86cb17b949cf5a43 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Tue, 12 May 2020 17:02:33 +0900 Subject: [PATCH 175/462] kprobes: Suppress the suspicious RCU warning on kprobes Anders reported that the lockdep warns that suspicious RCU list usage in register_kprobe() (detected by CONFIG_PROVE_RCU_LIST.) This is because get_kprobe() access kprobe_table[] by hlist_for_each_entry_rcu() without rcu_read_lock. If we call get_kprobe() from the breakpoint handler context, it is run with preempt disabled, so this is not a problem. But in other cases, instead of rcu_read_lock(), we locks kprobe_mutex so that the kprobe_table[] is not updated. So, current code is safe, but still not good from the view point of RCU. Joel suggested that we can silent that warning by passing lockdep_is_held() to the last argument of hlist_for_each_entry_rcu(). Add lockdep_is_held(&kprobe_mutex) at the end of the hlist_for_each_entry_rcu() to suppress the warning. Link: http://lkml.kernel.org/r/158927055350.27680.10261450713467997503.stgit@devnote2 Reported-by: Anders Roxell Suggested-by: Joel Fernandes Reviewed-by: Joel Fernandes (Google) Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- kernel/kprobes.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kernel/kprobes.c b/kernel/kprobes.c index 50cd84f53df0..8b2fd4145ab3 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -326,7 +326,8 @@ struct kprobe *get_kprobe(void *addr) struct kprobe *p; head = &kprobe_table[hash_ptr(addr, KPROBE_HASH_BITS)]; - hlist_for_each_entry_rcu(p, head, hlist) { + hlist_for_each_entry_rcu(p, head, hlist, + lockdep_is_held(&kprobe_mutex)) { if (p->addr == addr) return p; } From 7e6a71d8e60187726e29b13d9e9b23b77026c17a Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Tue, 12 May 2020 17:02:44 +0900 Subject: [PATCH 176/462] kprobes: Use non RCU traversal APIs on kprobe_tables if possible Current kprobes uses RCU traversal APIs on kprobe_tables even if it is safe because kprobe_mutex is locked. Make those traversals to non-RCU APIs where the kprobe_mutex is locked. Link: http://lkml.kernel.org/r/158927056452.27680.9710575332163005121.stgit@devnote2 Reviewed-by: Joel Fernandes (Google) Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- kernel/kprobes.c | 29 ++++++++++++++++++++--------- 1 file changed, 20 insertions(+), 9 deletions(-) diff --git a/kernel/kprobes.c b/kernel/kprobes.c index 8b2fd4145ab3..ceb0e273bd69 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -46,6 +46,11 @@ static int kprobes_initialized; +/* kprobe_table can be accessed by + * - Normal hlist traversal and RCU add/del under kprobe_mutex is held. + * Or + * - RCU hlist traversal under disabling preempt (breakpoint handlers) + */ static struct hlist_head kprobe_table[KPROBE_TABLE_SIZE]; static struct hlist_head kretprobe_inst_table[KPROBE_TABLE_SIZE]; @@ -850,7 +855,7 @@ static void optimize_all_kprobes(void) kprobes_allow_optimization = true; for (i = 0; i < KPROBE_TABLE_SIZE; i++) { head = &kprobe_table[i]; - hlist_for_each_entry_rcu(p, head, hlist) + hlist_for_each_entry(p, head, hlist) if (!kprobe_disabled(p)) optimize_kprobe(p); } @@ -877,7 +882,7 @@ static void unoptimize_all_kprobes(void) kprobes_allow_optimization = false; for (i = 0; i < KPROBE_TABLE_SIZE; i++) { head = &kprobe_table[i]; - hlist_for_each_entry_rcu(p, head, hlist) { + hlist_for_each_entry(p, head, hlist) { if (!kprobe_disabled(p)) unoptimize_kprobe(p, false); } @@ -1500,12 +1505,14 @@ static struct kprobe *__get_valid_kprobe(struct kprobe *p) { struct kprobe *ap, *list_p; + lockdep_assert_held(&kprobe_mutex); + ap = get_kprobe(p->addr); if (unlikely(!ap)) return NULL; if (p != ap) { - list_for_each_entry_rcu(list_p, &ap->list, list) + list_for_each_entry(list_p, &ap->list, list) if (list_p == p) /* kprobe p is a valid probe */ goto valid; @@ -1670,7 +1677,9 @@ static int aggr_kprobe_disabled(struct kprobe *ap) { struct kprobe *kp; - list_for_each_entry_rcu(kp, &ap->list, list) + lockdep_assert_held(&kprobe_mutex); + + list_for_each_entry(kp, &ap->list, list) if (!kprobe_disabled(kp)) /* * There is an active probe on the list. @@ -1749,7 +1758,7 @@ static int __unregister_kprobe_top(struct kprobe *p) else { /* If disabling probe has special handlers, update aggrprobe */ if (p->post_handler && !kprobe_gone(p)) { - list_for_each_entry_rcu(list_p, &ap->list, list) { + list_for_each_entry(list_p, &ap->list, list) { if ((list_p != p) && (list_p->post_handler)) goto noclean; } @@ -2063,13 +2072,15 @@ static void kill_kprobe(struct kprobe *p) { struct kprobe *kp; + lockdep_assert_held(&kprobe_mutex); + p->flags |= KPROBE_FLAG_GONE; if (kprobe_aggrprobe(p)) { /* * If this is an aggr_kprobe, we have to list all the * chained probes and mark them GONE. */ - list_for_each_entry_rcu(kp, &p->list, list) + list_for_each_entry(kp, &p->list, list) kp->flags |= KPROBE_FLAG_GONE; p->post_handler = NULL; kill_optimized_kprobe(p); @@ -2313,7 +2324,7 @@ static int kprobes_module_callback(struct notifier_block *nb, mutex_lock(&kprobe_mutex); for (i = 0; i < KPROBE_TABLE_SIZE; i++) { head = &kprobe_table[i]; - hlist_for_each_entry_rcu(p, head, hlist) + hlist_for_each_entry(p, head, hlist) if (within_module_init((unsigned long)p->addr, mod) || (checkcore && within_module_core((unsigned long)p->addr, mod))) { @@ -2551,7 +2562,7 @@ static int arm_all_kprobes(void) for (i = 0; i < KPROBE_TABLE_SIZE; i++) { head = &kprobe_table[i]; /* Arm all kprobes on a best-effort basis */ - hlist_for_each_entry_rcu(p, head, hlist) { + hlist_for_each_entry(p, head, hlist) { if (!kprobe_disabled(p)) { err = arm_kprobe(p); if (err) { @@ -2594,7 +2605,7 @@ static int disarm_all_kprobes(void) for (i = 0; i < KPROBE_TABLE_SIZE; i++) { head = &kprobe_table[i]; /* Disarm all kprobes on a best-effort basis */ - hlist_for_each_entry_rcu(p, head, hlist) { + hlist_for_each_entry(p, head, hlist) { if (!arch_trampoline_kprobe(p) && !kprobe_disabled(p)) { err = disarm_kprobe(p, false); if (err) { From 1a0aa991a6274161c95a844c58cfb801d681eb59 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Tue, 12 May 2020 17:02:56 +0900 Subject: [PATCH 177/462] kprobes: Fix to protect kick_kprobe_optimizer() by kprobe_mutex In kprobe_optimizer() kick_kprobe_optimizer() is called without kprobe_mutex, but this can race with other caller which is protected by kprobe_mutex. To fix that, expand kprobe_mutex protected area to protect kick_kprobe_optimizer() call. Link: http://lkml.kernel.org/r/158927057586.27680.5036330063955940456.stgit@devnote2 Fixes: cd7ebe2298ff ("kprobes: Use text_poke_smp_batch for optimizing") Cc: Ingo Molnar Cc: "Gustavo A . R . Silva" Cc: Anders Roxell Cc: "Naveen N . Rao" Cc: Anil S Keshavamurthy Cc: David Miller Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Ziqian SUN Cc: stable@vger.kernel.org Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- kernel/kprobes.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kernel/kprobes.c b/kernel/kprobes.c index ceb0e273bd69..0e185763578b 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -592,11 +592,12 @@ static void kprobe_optimizer(struct work_struct *work) mutex_unlock(&module_mutex); mutex_unlock(&text_mutex); cpus_read_unlock(); - mutex_unlock(&kprobe_mutex); /* Step 5: Kick optimizer again if needed */ if (!list_empty(&optimizing_list) || !list_empty(&unoptimizing_list)) kick_kprobe_optimizer(); + + mutex_unlock(&kprobe_mutex); } /* Wait for completing optimization and unoptimization */ From 75ddf64dd276e3fc8906f27549afa229798ad916 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Tue, 12 May 2020 17:03:07 +0900 Subject: [PATCH 178/462] kprobes: Remove redundant arch_disarm_kprobe() call Fix to remove redundant arch_disarm_kprobe() call in force_unoptimize_kprobe(). This arch_disarm_kprobe() will be invoked if the kprobe is optimized but disabled, but that means the kprobe (optprobe) is unused (and unoptimized) state. In that case, unoptimize_kprobe() puts it in freeing_list and kprobe_optimizer (do_unoptimize_kprobes()) automatically disarm it. Thus this arch_disarm_kprobe() is redundant. Link: http://lkml.kernel.org/r/158927058719.27680.17183632908465341189.stgit@devnote2 Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- kernel/kprobes.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/kernel/kprobes.c b/kernel/kprobes.c index 0e185763578b..5cb7791c16b3 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -675,8 +675,6 @@ static void force_unoptimize_kprobe(struct optimized_kprobe *op) lockdep_assert_cpus_held(); arch_unoptimize_kprobe(op); op->kp.flags &= ~KPROBE_FLAG_OPTIMIZED; - if (kprobe_disabled(&op->kp)) - arch_disarm_kprobe(&op->kp); } /* Unoptimize a kprobe if p is optimized */ From 9b38cc704e844e41d9cf74e647bff1d249512cb3 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 12 May 2020 17:03:18 +0900 Subject: [PATCH 179/462] kretprobe: Prevent triggering kretprobe from within kprobe_flush_task Ziqian reported lockup when adding retprobe on _raw_spin_lock_irqsave. My test was also able to trigger lockdep output: ============================================ WARNING: possible recursive locking detected 5.6.0-rc6+ #6 Not tainted -------------------------------------------- sched-messaging/2767 is trying to acquire lock: ffffffff9a492798 (&(kretprobe_table_locks[i].lock)){-.-.}, at: kretprobe_hash_lock+0x52/0xa0 but task is already holding lock: ffffffff9a491a18 (&(kretprobe_table_locks[i].lock)){-.-.}, at: kretprobe_trampoline+0x0/0x50 other info that might help us debug this: Possible unsafe locking scenario: CPU0 ---- lock(&(kretprobe_table_locks[i].lock)); lock(&(kretprobe_table_locks[i].lock)); *** DEADLOCK *** May be due to missing lock nesting notation 1 lock held by sched-messaging/2767: #0: ffffffff9a491a18 (&(kretprobe_table_locks[i].lock)){-.-.}, at: kretprobe_trampoline+0x0/0x50 stack backtrace: CPU: 3 PID: 2767 Comm: sched-messaging Not tainted 5.6.0-rc6+ #6 Call Trace: dump_stack+0x96/0xe0 __lock_acquire.cold.57+0x173/0x2b7 ? native_queued_spin_lock_slowpath+0x42b/0x9e0 ? lockdep_hardirqs_on+0x590/0x590 ? __lock_acquire+0xf63/0x4030 lock_acquire+0x15a/0x3d0 ? kretprobe_hash_lock+0x52/0xa0 _raw_spin_lock_irqsave+0x36/0x70 ? kretprobe_hash_lock+0x52/0xa0 kretprobe_hash_lock+0x52/0xa0 trampoline_handler+0xf8/0x940 ? kprobe_fault_handler+0x380/0x380 ? find_held_lock+0x3a/0x1c0 kretprobe_trampoline+0x25/0x50 ? lock_acquired+0x392/0xbc0 ? _raw_spin_lock_irqsave+0x50/0x70 ? __get_valid_kprobe+0x1f0/0x1f0 ? _raw_spin_unlock_irqrestore+0x3b/0x40 ? finish_task_switch+0x4b9/0x6d0 ? __switch_to_asm+0x34/0x70 ? __switch_to_asm+0x40/0x70 The code within the kretprobe handler checks for probe reentrancy, so we won't trigger any _raw_spin_lock_irqsave probe in there. The problem is in outside kprobe_flush_task, where we call: kprobe_flush_task kretprobe_table_lock raw_spin_lock_irqsave _raw_spin_lock_irqsave where _raw_spin_lock_irqsave triggers the kretprobe and installs kretprobe_trampoline handler on _raw_spin_lock_irqsave return. The kretprobe_trampoline handler is then executed with already locked kretprobe_table_locks, and first thing it does is to lock kretprobe_table_locks ;-) the whole lockup path like: kprobe_flush_task kretprobe_table_lock raw_spin_lock_irqsave _raw_spin_lock_irqsave ---> probe triggered, kretprobe_trampoline installed ---> kretprobe_table_locks locked kretprobe_trampoline trampoline_handler kretprobe_hash_lock(current, &head, &flags); <--- deadlock Adding kprobe_busy_begin/end helpers that mark code with fake probe installed to prevent triggering of another kprobe within this code. Using these helpers in kprobe_flush_task, so the probe recursion protection check is hit and the probe is never set to prevent above lockup. Link: http://lkml.kernel.org/r/158927059835.27680.7011202830041561604.stgit@devnote2 Fixes: ef53d9c5e4da ("kprobes: improve kretprobe scalability with hashed locking") Cc: Ingo Molnar Cc: "Gustavo A . R . Silva" Cc: Anders Roxell Cc: "Naveen N . Rao" Cc: Anil S Keshavamurthy Cc: David Miller Cc: Ingo Molnar Cc: Peter Zijlstra Cc: stable@vger.kernel.org Reported-by: "Ziqian SUN (Zamir)" Acked-by: Masami Hiramatsu Signed-off-by: Jiri Olsa Signed-off-by: Steven Rostedt (VMware) --- arch/x86/kernel/kprobes/core.c | 16 +++------------- include/linux/kprobes.h | 4 ++++ kernel/kprobes.c | 24 ++++++++++++++++++++++++ 3 files changed, 31 insertions(+), 13 deletions(-) diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c index 3bafe1bd4dc7..8a5ec10e95dc 100644 --- a/arch/x86/kernel/kprobes/core.c +++ b/arch/x86/kernel/kprobes/core.c @@ -753,16 +753,11 @@ asm( NOKPROBE_SYMBOL(kretprobe_trampoline); STACK_FRAME_NON_STANDARD(kretprobe_trampoline); -static struct kprobe kretprobe_kprobe = { - .addr = (void *)kretprobe_trampoline, -}; - /* * Called from kretprobe_trampoline */ __used __visible void *trampoline_handler(struct pt_regs *regs) { - struct kprobe_ctlblk *kcb; struct kretprobe_instance *ri = NULL; struct hlist_head *head, empty_rp; struct hlist_node *tmp; @@ -772,16 +767,12 @@ __used __visible void *trampoline_handler(struct pt_regs *regs) void *frame_pointer; bool skipped = false; - preempt_disable(); - /* * Set a dummy kprobe for avoiding kretprobe recursion. * Since kretprobe never run in kprobe handler, kprobe must not * be running at this point. */ - kcb = get_kprobe_ctlblk(); - __this_cpu_write(current_kprobe, &kretprobe_kprobe); - kcb->kprobe_status = KPROBE_HIT_ACTIVE; + kprobe_busy_begin(); INIT_HLIST_HEAD(&empty_rp); kretprobe_hash_lock(current, &head, &flags); @@ -857,7 +848,7 @@ __used __visible void *trampoline_handler(struct pt_regs *regs) __this_cpu_write(current_kprobe, &ri->rp->kp); ri->ret_addr = correct_ret_addr; ri->rp->handler(ri, regs); - __this_cpu_write(current_kprobe, &kretprobe_kprobe); + __this_cpu_write(current_kprobe, &kprobe_busy); } recycle_rp_inst(ri, &empty_rp); @@ -873,8 +864,7 @@ __used __visible void *trampoline_handler(struct pt_regs *regs) kretprobe_hash_unlock(current, &flags); - __this_cpu_write(current_kprobe, NULL); - preempt_enable(); + kprobe_busy_end(); hlist_for_each_entry_safe(ri, tmp, &empty_rp, hlist) { hlist_del(&ri->hlist); diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h index 594265bfd390..05ed663e6c7b 100644 --- a/include/linux/kprobes.h +++ b/include/linux/kprobes.h @@ -350,6 +350,10 @@ static inline struct kprobe_ctlblk *get_kprobe_ctlblk(void) return this_cpu_ptr(&kprobe_ctlblk); } +extern struct kprobe kprobe_busy; +void kprobe_busy_begin(void); +void kprobe_busy_end(void); + kprobe_opcode_t *kprobe_lookup_name(const char *name, unsigned int offset); int register_kprobe(struct kprobe *p); void unregister_kprobe(struct kprobe *p); diff --git a/kernel/kprobes.c b/kernel/kprobes.c index 5cb7791c16b3..4a904cc56d68 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -1241,6 +1241,26 @@ __releases(hlist_lock) } NOKPROBE_SYMBOL(kretprobe_table_unlock); +struct kprobe kprobe_busy = { + .addr = (void *) get_kprobe, +}; + +void kprobe_busy_begin(void) +{ + struct kprobe_ctlblk *kcb; + + preempt_disable(); + __this_cpu_write(current_kprobe, &kprobe_busy); + kcb = get_kprobe_ctlblk(); + kcb->kprobe_status = KPROBE_HIT_ACTIVE; +} + +void kprobe_busy_end(void) +{ + __this_cpu_write(current_kprobe, NULL); + preempt_enable(); +} + /* * This function is called from finish_task_switch when task tk becomes dead, * so that we can recycle any function-return probe instances associated @@ -1258,6 +1278,8 @@ void kprobe_flush_task(struct task_struct *tk) /* Early boot. kretprobe_table_locks not yet initialized. */ return; + kprobe_busy_begin(); + INIT_HLIST_HEAD(&empty_rp); hash = hash_ptr(tk, KPROBE_HASH_BITS); head = &kretprobe_inst_table[hash]; @@ -1271,6 +1293,8 @@ void kprobe_flush_task(struct task_struct *tk) hlist_del(&ri->hlist); kfree(ri); } + + kprobe_busy_end(); } NOKPROBE_SYMBOL(kprobe_flush_task); From e9b7b1c0c103a623be1a65c39f98719803440871 Mon Sep 17 00:00:00 2001 From: Kefeng Wang Date: Wed, 10 Jun 2020 01:12:44 +0000 Subject: [PATCH 180/462] sample-trace-array: Fix sleeping function called from invalid context BUG: sleeping function called from invalid context at kernel/locking/mutex.c:935 in_atomic(): 1, irqs_disabled(): 0, non_block: 0, pid: 0, name: swapper/5 1 lock held by swapper/5/0: #0: ffff80001002bd90 (samples/ftrace/sample-trace-array.c:38){+.-.}-{0:0}, at: call_timer_fn+0x8/0x3e0 CPU: 5 PID: 0 Comm: swapper/5 Not tainted 5.7.0+ #8 Hardware name: QEMU QEMU Virtual Machine, BIOS 0.0.0 02/06/2015 Call trace: dump_backtrace+0x0/0x1a0 show_stack+0x20/0x30 dump_stack+0xe4/0x150 ___might_sleep+0x160/0x200 __might_sleep+0x58/0x90 __mutex_lock+0x64/0x948 mutex_lock_nested+0x3c/0x58 __ftrace_set_clr_event+0x44/0x88 trace_array_set_clr_event+0x24/0x38 mytimer_handler+0x34/0x40 [sample_trace_array] mutex_lock() will be called in interrupt context, using workqueue to fix it. Link: https://lkml.kernel.org/r/20200610011244.2209486-1-wangkefeng.wang@huawei.com Cc: stable@vger.kernel.org Fixes: 89ed42495ef4 ("tracing: Sample module to demonstrate kernel access to Ftrace instances.") Reviewed-by: Divya Indi Signed-off-by: Kefeng Wang Signed-off-by: Steven Rostedt (VMware) --- samples/ftrace/sample-trace-array.c | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/samples/ftrace/sample-trace-array.c b/samples/ftrace/sample-trace-array.c index d523450d73eb..9e437f930280 100644 --- a/samples/ftrace/sample-trace-array.c +++ b/samples/ftrace/sample-trace-array.c @@ -6,6 +6,7 @@ #include #include #include +#include /* * Any file that uses trace points, must include the header. @@ -20,6 +21,16 @@ struct trace_array *tr; static void mytimer_handler(struct timer_list *unused); static struct task_struct *simple_tsk; +static void trace_work_fn(struct work_struct *work) +{ + /* + * Disable tracing for event "sample_event". + */ + trace_array_set_clr_event(tr, "sample-subsystem", "sample_event", + false); +} +static DECLARE_WORK(trace_work, trace_work_fn); + /* * mytimer: Timer setup to disable tracing for event "sample_event". This * timer is only for the purposes of the sample module to demonstrate access of @@ -29,11 +40,7 @@ static DEFINE_TIMER(mytimer, mytimer_handler); static void mytimer_handler(struct timer_list *unused) { - /* - * Disable tracing for event "sample_event". - */ - trace_array_set_clr_event(tr, "sample-subsystem", "sample_event", - false); + schedule_work(&trace_work); } static void simple_thread_func(int count) @@ -76,6 +83,7 @@ static int simple_thread(void *arg) simple_thread_func(count++); del_timer(&mytimer); + cancel_work_sync(&trace_work); /* * trace_array_put() decrements the reference counter associated with From 9fbc01cdba66e988122ccdc6094cfd85d9587769 Mon Sep 17 00:00:00 2001 From: Kefeng Wang Date: Tue, 9 Jun 2020 13:52:00 +0000 Subject: [PATCH 181/462] sample-trace-array: Remove trace_array 'sample-instance' Remove trace_array 'sample-instance' if kthread_run fails in sample_trace_array_init(). Link: https://lkml.kernel.org/r/20200609135200.2206726-1-wangkefeng.wang@huawei.com Cc: stable@vger.kernel.org Fixes: 89ed42495ef4a ("tracing: Sample module to demonstrate kernel access to Ftrace instances.") Reviewed-by: Divya Indi Signed-off-by: Kefeng Wang Signed-off-by: Steven Rostedt (VMware) --- samples/ftrace/sample-trace-array.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/samples/ftrace/sample-trace-array.c b/samples/ftrace/sample-trace-array.c index 9e437f930280..6aba02a31c96 100644 --- a/samples/ftrace/sample-trace-array.c +++ b/samples/ftrace/sample-trace-array.c @@ -115,8 +115,12 @@ static int __init sample_trace_array_init(void) trace_printk_init_buffers(); simple_tsk = kthread_run(simple_thread, NULL, "sample-instance"); - if (IS_ERR(simple_tsk)) + if (IS_ERR(simple_tsk)) { + trace_array_put(tr); + trace_array_destroy(tr); return -1; + } + return 0; } From 4649079b9de1ad86be9f4c989373adb8235a8485 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Tue, 9 Jun 2020 22:00:41 -0400 Subject: [PATCH 182/462] tracing: Make ftrace packed events have align of 1 When using trace-cmd on 5.6-rt for the function graph tracer, the output was corrupted. It gave output like this: funcgraph_entry: func=0xffffffff depth=38982 funcgraph_entry: func=0x1ffffffff depth=16044 funcgraph_exit: func=0xffffffff overrun=0x92539aaf00000000 calltime=0x92539c9900000072 rettime=0x100000072 depth=11084 funcgraph_exit: func=0xffffffff overrun=0x9253946e00000000 calltime=0x92539e2100000072 rettime=0x72 depth=26033702 funcgraph_entry: func=0xffffffff depth=85798 funcgraph_entry: func=0x1ffffffff depth=12044 The reason was because the tracefs/events/ftrace/funcgraph_entry/exit format file was incorrect. The -rt kernel adds more common fields to the trace events. Namely, common_migrate_disable and common_preempt_lazy_count. Each is one byte in size. This changes the alignment of the normal payload. Most events are aligned normally, but the function and function graph events are defined with a "PACKED" macro, that packs their payload. As the offsets displayed in the format files are now calculated by an aligned field, the aligned field for function and function graph events should be 1, not their normal alignment. With aligning of the funcgraph_entry event, the format file has: field:unsigned short common_type; offset:0; size:2; signed:0; field:unsigned char common_flags; offset:2; size:1; signed:0; field:unsigned char common_preempt_count; offset:3; size:1; signed:0; field:int common_pid; offset:4; size:4; signed:1; field:unsigned char common_migrate_disable; offset:8; size:1; signed:0; field:unsigned char common_preempt_lazy_count; offset:9; size:1; signed:0; field:unsigned long func; offset:16; size:8; signed:0; field:int depth; offset:24; size:4; signed:1; But the actual alignment is: field:unsigned short common_type; offset:0; size:2; signed:0; field:unsigned char common_flags; offset:2; size:1; signed:0; field:unsigned char common_preempt_count; offset:3; size:1; signed:0; field:int common_pid; offset:4; size:4; signed:1; field:unsigned char common_migrate_disable; offset:8; size:1; signed:0; field:unsigned char common_preempt_lazy_count; offset:9; size:1; signed:0; field:unsigned long func; offset:12; size:8; signed:0; field:int depth; offset:20; size:4; signed:1; Link: https://lkml.kernel.org/r/20200609220041.2a3b527f@oasis.local.home Cc: stable@vger.kernel.org Fixes: 04ae87a52074e ("ftrace: Rework event_create_dir()") Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace.h | 3 +++ kernel/trace/trace_entries.h | 14 +++++++------- kernel/trace/trace_export.c | 16 ++++++++++++++++ 3 files changed, 26 insertions(+), 7 deletions(-) diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index def769df5bf1..13db4000af3f 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -61,6 +61,9 @@ enum trace_type { #undef __field_desc #define __field_desc(type, container, item) +#undef __field_packed +#define __field_packed(type, container, item) + #undef __array #define __array(type, item, size) type item[size]; diff --git a/kernel/trace/trace_entries.h b/kernel/trace/trace_entries.h index a523da0dae0a..18c4a58aff79 100644 --- a/kernel/trace/trace_entries.h +++ b/kernel/trace/trace_entries.h @@ -78,8 +78,8 @@ FTRACE_ENTRY_PACKED(funcgraph_entry, ftrace_graph_ent_entry, F_STRUCT( __field_struct( struct ftrace_graph_ent, graph_ent ) - __field_desc( unsigned long, graph_ent, func ) - __field_desc( int, graph_ent, depth ) + __field_packed( unsigned long, graph_ent, func ) + __field_packed( int, graph_ent, depth ) ), F_printk("--> %ps (%d)", (void *)__entry->func, __entry->depth) @@ -92,11 +92,11 @@ FTRACE_ENTRY_PACKED(funcgraph_exit, ftrace_graph_ret_entry, F_STRUCT( __field_struct( struct ftrace_graph_ret, ret ) - __field_desc( unsigned long, ret, func ) - __field_desc( unsigned long, ret, overrun ) - __field_desc( unsigned long long, ret, calltime) - __field_desc( unsigned long long, ret, rettime ) - __field_desc( int, ret, depth ) + __field_packed( unsigned long, ret, func ) + __field_packed( unsigned long, ret, overrun ) + __field_packed( unsigned long long, ret, calltime) + __field_packed( unsigned long long, ret, rettime ) + __field_packed( int, ret, depth ) ), F_printk("<-- %ps (%d) (start: %llx end: %llx) over: %d", diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c index 77ce5a3b6773..70d3d0a09053 100644 --- a/kernel/trace/trace_export.c +++ b/kernel/trace/trace_export.c @@ -45,6 +45,9 @@ static int ftrace_event_register(struct trace_event_call *call, #undef __field_desc #define __field_desc(type, container, item) type item; +#undef __field_packed +#define __field_packed(type, container, item) type item; + #undef __array #define __array(type, item, size) type item[size]; @@ -85,6 +88,13 @@ static void __always_unused ____ftrace_check_##name(void) \ .size = sizeof(_type), .align = __alignof__(_type), \ is_signed_type(_type), .filter_type = _filter_type }, + +#undef __field_ext_packed +#define __field_ext_packed(_type, _item, _filter_type) { \ + .type = #_type, .name = #_item, \ + .size = sizeof(_type), .align = 1, \ + is_signed_type(_type), .filter_type = _filter_type }, + #undef __field #define __field(_type, _item) __field_ext(_type, _item, FILTER_OTHER) @@ -94,6 +104,9 @@ static void __always_unused ____ftrace_check_##name(void) \ #undef __field_desc #define __field_desc(_type, _container, _item) __field_ext(_type, _item, FILTER_OTHER) +#undef __field_packed +#define __field_packed(_type, _container, _item) __field_ext_packed(_type, _item, FILTER_OTHER) + #undef __array #define __array(_type, _item, _len) { \ .type = #_type"["__stringify(_len)"]", .name = #_item, \ @@ -129,6 +142,9 @@ static struct trace_event_fields ftrace_event_fields_##name[] = { \ #undef __field_desc #define __field_desc(type, container, item) +#undef __field_packed +#define __field_packed(type, container, item) + #undef __array #define __array(type, item, len) From 48a42f5d138435242529726b8802076a24b6db17 Mon Sep 17 00:00:00 2001 From: Wei Yang Date: Wed, 10 Jun 2020 11:32:51 +0800 Subject: [PATCH 183/462] trace: Fix typo in allocate_ftrace_ops()'s comment No functional change, just correct the word. Link: https://lkml.kernel.org/r/20200610033251.31713-1-richard.weiyang@linux.alibaba.com Signed-off-by: Wei Yang Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_functions.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c index 8a4c8d5c2c98..dd4dff71d89a 100644 --- a/kernel/trace/trace_functions.c +++ b/kernel/trace/trace_functions.c @@ -42,7 +42,7 @@ static int allocate_ftrace_ops(struct trace_array *tr) if (!ops) return -ENOMEM; - /* Currently only the non stack verision is supported */ + /* Currently only the non stack version is supported */ ops->func = function_trace_call; ops->flags = FTRACE_OPS_FL_RECURSION_SAFE | FTRACE_OPS_FL_PID; From 3aa8fdc37d16735e8891035becf25b3857d3efe0 Mon Sep 17 00:00:00 2001 From: Vamshi K Sthambamkadi Date: Mon, 15 Jun 2020 20:00:38 +0530 Subject: [PATCH 184/462] tracing/probe: Fix memleak in fetch_op_data operations kmemleak report: [<57dcc2ca>] __kmalloc_track_caller+0x139/0x2b0 [] kstrndup+0x37/0x80 [] parse_probe_arg.isra.7+0x3cc/0x630 [<055bf2ba>] traceprobe_parse_probe_arg+0x2f5/0x810 [<655a7766>] trace_kprobe_create+0x2ca/0x950 [<4fc6a02a>] create_or_delete_trace_kprobe+0xf/0x30 [<6d1c8a52>] trace_run_command+0x67/0x80 [] trace_parse_run_command+0xa7/0x140 [] probes_write+0x10/0x20 [<2027641c>] __vfs_write+0x30/0x1e0 [<6a4aeee1>] vfs_write+0x96/0x1b0 [<3517fb7d>] ksys_write+0x53/0xc0 [] __ia32_sys_write+0x15/0x20 [] do_syscall_32_irqs_on+0x3d/0x260 [] do_fast_syscall_32+0x39/0xb0 [] entry_SYSENTER_32+0xaf/0x102 Post parse_probe_arg(), the FETCH_OP_DATA operation type is overwritten to FETCH_OP_ST_STRING, as a result memory is never freed since traceprobe_free_probe_arg() iterates only over SYMBOL and DATA op types Setup fetch string operation correctly after fetch_op_data operation. Link: https://lkml.kernel.org/r/20200615143034.GA1734@cosmos Cc: stable@vger.kernel.org Fixes: a42e3c4de964 ("tracing/probe: Add immediate string parameter support") Acked-by: Masami Hiramatsu Signed-off-by: Vamshi K Sthambamkadi Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_probe.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c index b8a928e925c7..d2867ccc6aca 100644 --- a/kernel/trace/trace_probe.c +++ b/kernel/trace/trace_probe.c @@ -639,8 +639,8 @@ static int traceprobe_parse_probe_arg_body(char *arg, ssize_t *size, ret = -EINVAL; goto fail; } - if ((code->op == FETCH_OP_IMM || code->op == FETCH_OP_COMM) || - parg->count) { + if ((code->op == FETCH_OP_IMM || code->op == FETCH_OP_COMM || + code->op == FETCH_OP_DATA) || parg->count) { /* * IMM, DATA and COMM is pointing actual address, those * must be kept, and if parg->count != 0, this is an From 69243720c0932b8672e571a873c78bcf3326575a Mon Sep 17 00:00:00 2001 From: YangHui Date: Tue, 16 Jun 2020 11:36:46 +0800 Subject: [PATCH 185/462] tracing: Remove unused event variable in tracing_iter_reset We do not use the event variable, just remove it. Signed-off-by: YangHui Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index ec44b0e2a19c..bb62269724d5 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -3570,7 +3570,6 @@ static void *s_next(struct seq_file *m, void *v, loff_t *pos) void tracing_iter_reset(struct trace_iterator *iter, int cpu) { - struct ring_buffer_event *event; struct ring_buffer_iter *buf_iter; unsigned long entries = 0; u64 ts; @@ -3588,7 +3587,7 @@ void tracing_iter_reset(struct trace_iterator *iter, int cpu) * that a reset never took place on a cpu. This is evident * by the timestamp being before the start of the buffer. */ - while ((event = ring_buffer_iter_peek(buf_iter, &ts))) { + while (ring_buffer_iter_peek(buf_iter, &ts)) { if (ts >= iter->array_buffer->time_start) break; entries++; From 4e264ffd953463cd14c0720eaa9315ac052f5973 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Tue, 16 Jun 2020 19:14:08 +0900 Subject: [PATCH 186/462] proc/bootconfig: Fix to use correct quotes for value Fix /proc/bootconfig to select double or single quotes corrctly according to the value. If a bootconfig value includes a double quote character, we must use single-quotes to quote that value. This modifies if() condition and blocks for avoiding double-quote in value check in 2 places. Anyway, since xbc_array_for_each_value() can handle the array which has a single node correctly. Thus, if (vnode && xbc_node_is_array(vnode)) { xbc_array_for_each_value(vnode) /* vnode->next != NULL */ ... } else { snprintf(val); /* val is an empty string if !vnode */ } is equivalent to if (vnode) { xbc_array_for_each_value(vnode) /* vnode->next can be NULL */ ... } else { snprintf(""); /* value is always empty */ } Link: http://lkml.kernel.org/r/159230244786.65555.3763894451251622488.stgit@devnote2 Cc: stable@vger.kernel.org Fixes: c1a3c36017d4 ("proc: bootconfig: Add /proc/bootconfig to show boot config list") Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- fs/proc/bootconfig.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/fs/proc/bootconfig.c b/fs/proc/bootconfig.c index 9955d75c0585..ad31ec4ad627 100644 --- a/fs/proc/bootconfig.c +++ b/fs/proc/bootconfig.c @@ -26,8 +26,9 @@ static int boot_config_proc_show(struct seq_file *m, void *v) static int __init copy_xbc_key_value_list(char *dst, size_t size) { struct xbc_node *leaf, *vnode; - const char *val; char *key, *end = dst + size; + const char *val; + char q; int ret = 0; key = kzalloc(XBC_KEYLEN_MAX, GFP_KERNEL); @@ -41,16 +42,20 @@ static int __init copy_xbc_key_value_list(char *dst, size_t size) break; dst += ret; vnode = xbc_node_get_child(leaf); - if (vnode && xbc_node_is_array(vnode)) { + if (vnode) { xbc_array_for_each_value(vnode, val) { - ret = snprintf(dst, rest(dst, end), "\"%s\"%s", - val, vnode->next ? ", " : "\n"); + if (strchr(val, '"')) + q = '\''; + else + q = '"'; + ret = snprintf(dst, rest(dst, end), "%c%s%c%s", + q, val, q, vnode->next ? ", " : "\n"); if (ret < 0) goto out; dst += ret; } } else { - ret = snprintf(dst, rest(dst, end), "\"%s\"\n", val); + ret = snprintf(dst, rest(dst, end), "\"\"\n"); if (ret < 0) break; dst += ret; From 272da3279df191f028fd63d1683e5ecd56fcb13b Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Tue, 16 Jun 2020 19:14:17 +0900 Subject: [PATCH 187/462] tools/bootconfig: Fix to use correct quotes for value Fix bootconfig tool to select double or single quotes correctly according to the value. If a bootconfig value includes a double quote character, we must use single-quotes to quote that value. Link: http://lkml.kernel.org/r/159230245697.65555.12444299015852932304.stgit@devnote2 Cc: stable@vger.kernel.org Fixes: 950313ebf79c ("tools: bootconfig: Add bootconfig command") Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- tools/bootconfig/main.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/tools/bootconfig/main.c b/tools/bootconfig/main.c index 0efaf45f7367..21896a6675fd 100644 --- a/tools/bootconfig/main.c +++ b/tools/bootconfig/main.c @@ -14,13 +14,18 @@ #include #include -static int xbc_show_array(struct xbc_node *node) +static int xbc_show_value(struct xbc_node *node) { const char *val; + char q; int i = 0; xbc_array_for_each_value(node, val) { - printf("\"%s\"%s", val, node->next ? ", " : ";\n"); + if (strchr(val, '"')) + q = '\''; + else + q = '"'; + printf("%c%s%c%s", q, val, q, node->next ? ", " : ";\n"); i++; } return i; @@ -48,10 +53,7 @@ static void xbc_show_compact_tree(void) continue; } else if (cnode && xbc_node_is_value(cnode)) { printf("%s = ", xbc_node_get_data(node)); - if (cnode->next) - xbc_show_array(cnode); - else - printf("\"%s\";\n", xbc_node_get_data(cnode)); + xbc_show_value(cnode); } else { printf("%s;\n", xbc_node_get_data(node)); } From f91cb5b7476a603068eae31e5b2cc170dd2b9b1b Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Tue, 16 Jun 2020 19:14:25 +0900 Subject: [PATCH 188/462] tools/bootconfig: Fix to return 0 if succeeded to show the bootconfig Fix bootconfig to return 0 if succeeded to show the bootconfig in initrd. Without this fix, "bootconfig INITRD" command returns !0 even if the command succeeded to show the bootconfig. Link: http://lkml.kernel.org/r/159230246566.65555.11891772258543514487.stgit@devnote2 Cc: stable@vger.kernel.org Fixes: 950313ebf79c ("tools: bootconfig: Add bootconfig command") Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- tools/bootconfig/main.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/tools/bootconfig/main.c b/tools/bootconfig/main.c index 21896a6675fd..e0878f5f74b1 100644 --- a/tools/bootconfig/main.c +++ b/tools/bootconfig/main.c @@ -207,11 +207,13 @@ int show_xbc(const char *path) } ret = load_xbc_from_initrd(fd, &buf); - if (ret < 0) + if (ret < 0) { pr_err("Failed to load a boot config from initrd: %d\n", ret); - else - xbc_show_compact_tree(); - + goto out; + } + xbc_show_compact_tree(); + ret = 0; +out: close(fd); free(buf); From 5414251aa2e23ad50cc769656b05ea196090e792 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Tue, 16 Jun 2020 19:14:34 +0900 Subject: [PATCH 189/462] tools/bootconfig: Add testcase for show-command and quotes test Add testcases for the return value of the command to show bootconfig in initrd, and double/single quotes selecting. Link: http://lkml.kernel.org/r/159230247428.65555.2109472942519215104.stgit@devnote2 Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- tools/bootconfig/test-bootconfig.sh | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/tools/bootconfig/test-bootconfig.sh b/tools/bootconfig/test-bootconfig.sh index eff16b77d5eb..3c2ab9e75730 100755 --- a/tools/bootconfig/test-bootconfig.sh +++ b/tools/bootconfig/test-bootconfig.sh @@ -55,6 +55,9 @@ echo "Apply command test" xpass $BOOTCONF -a $TEMPCONF $INITRD new_size=$(stat -c %s $INITRD) +echo "Show command test" +xpass $BOOTCONF $INITRD + echo "File size check" xpass test $new_size -eq $(expr $bconf_size + $initrd_size + 9 + 12) @@ -114,6 +117,13 @@ xpass grep -q "bar" $OUTFILE xpass grep -q "baz" $OUTFILE xpass grep -q "qux" $OUTFILE +echo "Double/single quotes test" +echo "key = '\"string\"';" > $TEMPCONF +$BOOTCONF -a $TEMPCONF $INITRD +$BOOTCONF $INITRD > $TEMPCONF +cat $TEMPCONF +xpass grep \'\"string\"\' $TEMPCONF + echo "=== expected failure cases ===" for i in samples/bad-* ; do xfail $BOOTCONF -a $i $INITRD From 4d0831e8a029c03f49f434f28b8faef9f0bd403f Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Sun, 14 Jun 2020 23:43:41 +0900 Subject: [PATCH 190/462] kconfig: unify cc-option and as-option cc-option and as-option are almost the same; both pass the flag to $(CC). The main difference is the cc-option stops before the assemble stage (-S option) whereas as-option stops after (-c option). I chose -S because it is slightly faster, but $(cc-option,-gz=zlib) returns a wrong result (https://lkml.org/lkml/2020/6/9/1529). It has been fixed by commit 7b16994437c7 ("Makefile: Improve compressed debug info support detection"), but the assembler should always be invoked for more reliable compiler option tests. However, you cannot simply replace -S with -c because the following code in lib/Kconfig.debug would break: depends on $(cc-option,-gsplit-dwarf) The combination of -c and -gsplit-dwarf does not accept /dev/null as output. $ cat /dev/null | gcc -gsplit-dwarf -S -x c - -o /dev/null $ echo $? 0 $ cat /dev/null | gcc -gsplit-dwarf -c -x c - -o /dev/null objcopy: Warning: '/dev/null' is not an ordinary file $ echo $? 1 $ cat /dev/null | gcc -gsplit-dwarf -c -x c - -o tmp.o $ echo $? 0 There is another flag that creates an separate file based on the object file path: $ cat /dev/null | gcc -ftest-coverage -c -x c - -o /dev/null :1: error: cannot open /dev/null.gcno So, we cannot use /dev/null to sink the output. Align the cc-option implementation with scripts/Kbuild.include. With -c option used in cc-option, as-option is unneeded. Signed-off-by: Masahiro Yamada Acked-by: Will Deacon --- arch/arm64/Kconfig | 2 +- lib/Kconfig.debug | 1 - scripts/Kconfig.include | 8 +------- 3 files changed, 2 insertions(+), 9 deletions(-) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 31380da53689..6eb18f45258e 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -1564,7 +1564,7 @@ config CC_HAS_SIGN_RETURN_ADDRESS def_bool $(cc-option,-msign-return-address=all) config AS_HAS_PAC - def_bool $(as-option,-Wa$(comma)-march=armv8.3-a) + def_bool $(cc-option,-Wa$(comma)-march=armv8.3-a) config AS_HAS_CFI_NEGATE_RA_STATE def_bool $(as-instr,.cfi_startproc\n.cfi_negate_ra_state\n.cfi_endproc\n) diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 96999d4d2dda..9ad9210d70a1 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -229,7 +229,6 @@ config DEBUG_INFO_COMPRESSED bool "Compressed debugging information" depends on DEBUG_INFO depends on $(cc-option,-gz=zlib) - depends on $(as-option,-gz=zlib) depends on $(ld-option,--compress-debug-sections=zlib) help Compress the debug information using zlib. Requires GCC 5.0+ or Clang diff --git a/scripts/Kconfig.include b/scripts/Kconfig.include index c264da2b9b30..a5fe72c504ff 100644 --- a/scripts/Kconfig.include +++ b/scripts/Kconfig.include @@ -25,18 +25,12 @@ failure = $(if-success,$(1),n,y) # $(cc-option,) # Return y if the compiler supports , n otherwise -cc-option = $(success,$(CC) -Werror $(CLANG_FLAGS) $(1) -S -x c /dev/null -o /dev/null) +cc-option = $(success,mkdir .tmp_$$$$; trap "rm -rf .tmp_$$$$" EXIT; $(CC) -Werror $(CLANG_FLAGS) $(1) -c -x c /dev/null -o .tmp_$$$$/tmp.o) # $(ld-option,) # Return y if the linker supports , n otherwise ld-option = $(success,$(LD) -v $(1)) -# $(as-option,) -# /dev/zero is used as output instead of /dev/null as some assembler cribs when -# both input and output are same. Also both of them have same write behaviour so -# can be easily substituted. -as-option = $(success, $(CC) $(CLANG_FLAGS) $(1) -c -x assembler /dev/null -o /dev/zero) - # $(as-instr,) # Return y if the assembler supports , n otherwise as-instr = $(success,printf "%b\n" "$(1)" | $(CC) $(CLANG_FLAGS) -c -x assembler -o /dev/null -) From 0f50d21ade110e519425a5c1e2cfee7a130d1a0e Mon Sep 17 00:00:00 2001 From: Masanari Iida Date: Tue, 16 Jun 2020 21:51:32 +0900 Subject: [PATCH 191/462] scripts: Fix typo in headers_install.sh This patch fixes a spelling typo in scripts/headers_install.sh Signed-off-by: Masanari Iida Signed-off-by: Masahiro Yamada --- scripts/headers_install.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/headers_install.sh b/scripts/headers_install.sh index 955cf3aedf21..224f51012b6e 100755 --- a/scripts/headers_install.sh +++ b/scripts/headers_install.sh @@ -11,7 +11,7 @@ then echo "asm/inline/volatile keywords." echo echo "INFILE: header file to operate on" - echo "OUTFILE: output file which the processed header is writen to" + echo "OUTFILE: output file which the processed header is written to" exit 1 fi From b19d57d0f3cc6f1022edf94daf1d70506a09e3c2 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Mon, 8 Jun 2020 20:22:33 -0500 Subject: [PATCH 192/462] overflow.h: Add flex_array_size() helper Add flex_array_size() helper for the calculation of the size, in bytes, of a flexible array member contained within an enclosing structure. Example of usage: struct something { size_t count; struct foo items[]; }; struct something *instance; instance = kmalloc(struct_size(instance, items, count), GFP_KERNEL); instance->count = count; memcpy(instance->items, src, flex_array_size(instance, items, instance->count)); The helper returns SIZE_MAX on overflow instead of wrapping around. Additionally replaces parameter "n" with "count" in struct_size() helper for greater clarity and unification. Signed-off-by: Gustavo A. R. Silva Link: https://lore.kernel.org/r/20200609012233.GA3371@embeddedor Signed-off-by: Kees Cook --- include/linux/overflow.h | 25 +++++++++++++++++++++---- 1 file changed, 21 insertions(+), 4 deletions(-) diff --git a/include/linux/overflow.h b/include/linux/overflow.h index 659045046468..93fcef105061 100644 --- a/include/linux/overflow.h +++ b/include/linux/overflow.h @@ -304,16 +304,33 @@ static inline __must_check size_t __ab_c_size(size_t a, size_t b, size_t c) * struct_size() - Calculate size of structure with trailing array. * @p: Pointer to the structure. * @member: Name of the array member. - * @n: Number of elements in the array. + * @count: Number of elements in the array. * * Calculates size of memory needed for structure @p followed by an - * array of @n @member elements. + * array of @count number of @member elements. * * Return: number of bytes needed or SIZE_MAX on overflow. */ -#define struct_size(p, member, n) \ - __ab_c_size(n, \ +#define struct_size(p, member, count) \ + __ab_c_size(count, \ sizeof(*(p)->member) + __must_be_array((p)->member),\ sizeof(*(p))) +/** + * flex_array_size() - Calculate size of a flexible array member + * within an enclosing structure. + * + * @p: Pointer to the structure. + * @member: Name of the flexible array member. + * @count: Number of elements in the array. + * + * Calculates size of a flexible array of @count number of @member + * elements, at the end of structure @p. + * + * Return: number of bytes needed or SIZE_MAX on overflow. + */ +#define flex_array_size(p, member, count) \ + array_size(count, \ + sizeof(*(p)->member) + __must_be_array((p)->member)) + #endif /* __LINUX_OVERFLOW_H */ From ff58bbc7b9704a5869204176f804eff57307fef0 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 16 Jun 2020 14:09:21 +0200 Subject: [PATCH 193/462] ALSA: usb-audio: Fix potential use-after-free of streams With the recent full-duplex support of implicit feedback streams, an endpoint can be still running after closing the capture stream as long as the playback stream with the sync-endpoint is running. In such a state, the URBs are still be handled and they may call retire_data_urb callback, which tries to transfer the data from the PCM buffer. Since the PCM stream gets closed, this may lead to use-after-free. This patch adds the proper clearance of the callback at stopping the capture stream for addressing the possible UAF above. Fixes: 10ce77e4817f ("ALSA: usb-audio: Add duplex sound support for USB devices using implicit feedback") Link: https://lore.kernel.org/r/20200616120921.12249-1-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/usb/pcm.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/usb/pcm.c b/sound/usb/pcm.c index 84c0ae431936..a777d36c4f5a 100644 --- a/sound/usb/pcm.c +++ b/sound/usb/pcm.c @@ -1787,6 +1787,7 @@ static int snd_usb_substream_capture_trigger(struct snd_pcm_substream *substream return 0; case SNDRV_PCM_TRIGGER_STOP: stop_endpoints(subs); + subs->data_endpoint->retire_data_urb = NULL; subs->running = 0; return 0; case SNDRV_PCM_TRIGGER_PAUSE_PUSH: From b9249cba25a5dce5de87e5404503a5e11832c2dd Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 16 Jun 2020 19:03:49 +0100 Subject: [PATCH 194/462] arm64: bti: Require clang >= 10.0.1 for in-kernel BTI support Unfortunately, most versions of clang that support BTI are capable of miscompiling the kernel when converting a switch statement into a jump table. As an example, attempting to spawn a KVM guest results in a panic: [ 56.253312] Kernel panic - not syncing: bad mode [ 56.253834] CPU: 0 PID: 279 Comm: lkvm Not tainted 5.8.0-rc1 #2 [ 56.254225] Hardware name: QEMU QEMU Virtual Machine, BIOS 0.0.0 02/06/2015 [ 56.254712] Call trace: [ 56.254952] dump_backtrace+0x0/0x1d4 [ 56.255305] show_stack+0x1c/0x28 [ 56.255647] dump_stack+0xc4/0x128 [ 56.255905] panic+0x16c/0x35c [ 56.256146] bad_el0_sync+0x0/0x58 [ 56.256403] el1_sync_handler+0xb4/0xe0 [ 56.256674] el1_sync+0x7c/0x100 [ 56.256928] kvm_vm_ioctl_check_extension_generic+0x74/0x98 [ 56.257286] __arm64_sys_ioctl+0x94/0xcc [ 56.257569] el0_svc_common+0x9c/0x150 [ 56.257836] do_el0_svc+0x84/0x90 [ 56.258083] el0_sync_handler+0xf8/0x298 [ 56.258361] el0_sync+0x158/0x180 This is because the switch in kvm_vm_ioctl_check_extension_generic() is executed as an indirect branch to tail-call through a jump table: ffff800010032dc8: 3869694c ldrb w12, [x10, x9] ffff800010032dcc: 8b0c096b add x11, x11, x12, lsl #2 ffff800010032dd0: d61f0160 br x11 However, where the target case uses the stack, the landing pad is elided due to the presence of a paciasp instruction: ffff800010032e14: d503233f paciasp ffff800010032e18: a9bf7bfd stp x29, x30, [sp, #-16]! ffff800010032e1c: 910003fd mov x29, sp ffff800010032e20: aa0803e0 mov x0, x8 ffff800010032e24: 940017c0 bl ffff800010038d24 ffff800010032e28: 93407c00 sxtw x0, w0 ffff800010032e2c: a8c17bfd ldp x29, x30, [sp], #16 ffff800010032e30: d50323bf autiasp ffff800010032e34: d65f03c0 ret Unfortunately, this results in a fatal exception because paciasp is compatible only with branch-and-link (call) instructions and not simple indirect branches. A fix is being merged into Clang 10.0.1 so that a 'bti j' instruction is emitted as an explicit landing pad in this situation. Make in-kernel BTI depend on that compiler version when building with clang. Cc: Tom Stellard Cc: Daniel Kiss Reviewed-by: Mark Brown Acked-by: Dave Martin Reviewed-by: Nathan Chancellor Acked-by: Nick Desaulniers Link: https://lore.kernel.org/r/20200615105524.GA2694@willie-the-truck Link: https://lore.kernel.org/r/20200616183630.2445-1-will@kernel.org Signed-off-by: Will Deacon --- arch/arm64/Kconfig | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 31380da53689..4ae2419c14a8 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -1630,6 +1630,8 @@ config ARM64_BTI_KERNEL depends on CC_HAS_BRANCH_PROT_PAC_RET_BTI # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=94697 depends on !CC_IS_GCC || GCC_VERSION >= 100100 + # https://reviews.llvm.org/rGb8ae3fdfa579dbf366b1bb1cbfdbf8c51db7fa55 + depends on !CC_IS_CLANG || CLANG_VERSION >= 100001 depends on !(CC_IS_CLANG && GCOV_KERNEL) depends on (!FUNCTION_GRAPH_TRACER || DYNAMIC_FTRACE_WITH_REGS) help From 1b3bcca2085865c1facfbea9baf2f5cde5dc15e4 Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Tue, 16 Jun 2020 21:50:30 +0800 Subject: [PATCH 195/462] regulator: mt6358: Remove BROKEN dependency The MFD part is merged into v5.8-rc1, thus remove BROKEN dependency. Signed-off-by: Axel Lin Link: https://lore.kernel.org/r/20200616135030.1163660-1-axel.lin@ingics.com Signed-off-by: Mark Brown --- drivers/regulator/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/regulator/Kconfig b/drivers/regulator/Kconfig index 8f677f5d79b4..edb1c4f8b496 100644 --- a/drivers/regulator/Kconfig +++ b/drivers/regulator/Kconfig @@ -684,7 +684,7 @@ config REGULATOR_MT6323 config REGULATOR_MT6358 tristate "MediaTek MT6358 PMIC" - depends on MFD_MT6397 && BROKEN + depends on MFD_MT6397 help Say y here to select this option to enable the power regulator of MediaTek MT6358 PMIC. From 35700e221b18fa53401e5f315be90af9e0bbcdca Mon Sep 17 00:00:00 2001 From: Patrice Chotard Date: Tue, 16 Jun 2020 13:30:35 +0200 Subject: [PATCH 196/462] spi: stm32-qspi: Fix error path in case of -EPROBE_DEFER In case of -EPROBE_DEFER, stm32_qspi_release() was called in any case which unregistered driver from pm_runtime framework even if it has not been registered yet to it. This leads to: stm32-qspi 58003000.spi: can't setup spi0.0, status -13 spi_master spi0: spi_device register error /soc/spi@58003000/mx66l51235l@0 spi_master spi0: Failed to create SPI device for /soc/spi@58003000/mx66l51235l@0 stm32-qspi 58003000.spi: can't setup spi0.1, status -13 spi_master spi0: spi_device register error /soc/spi@58003000/mx66l51235l@1 spi_master spi0: Failed to create SPI device for /soc/spi@58003000/mx66l51235l@1 On v5.7 kernel,this issue was not "visible", qspi driver was probed successfully. Fixes: 9d282c17b023 ("spi: stm32-qspi: Add pm_runtime support") Signed-off-by: Patrice Chotard Link: https://lore.kernel.org/r/20200616113035.4514-1-patrice.chotard@st.com Signed-off-by: Mark Brown --- drivers/spi/spi-stm32-qspi.c | 46 ++++++++++++++++++++---------------- 1 file changed, 26 insertions(+), 20 deletions(-) diff --git a/drivers/spi/spi-stm32-qspi.c b/drivers/spi/spi-stm32-qspi.c index 3c44bb2fd9b1..a900962b4336 100644 --- a/drivers/spi/spi-stm32-qspi.c +++ b/drivers/spi/spi-stm32-qspi.c @@ -553,20 +553,6 @@ static const struct spi_controller_mem_ops stm32_qspi_mem_ops = { .exec_op = stm32_qspi_exec_op, }; -static void stm32_qspi_release(struct stm32_qspi *qspi) -{ - pm_runtime_get_sync(qspi->dev); - /* disable qspi */ - writel_relaxed(0, qspi->io_base + QSPI_CR); - stm32_qspi_dma_free(qspi); - mutex_destroy(&qspi->lock); - pm_runtime_put_noidle(qspi->dev); - pm_runtime_disable(qspi->dev); - pm_runtime_set_suspended(qspi->dev); - pm_runtime_dont_use_autosuspend(qspi->dev); - clk_disable_unprepare(qspi->clk); -} - static int stm32_qspi_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; @@ -642,7 +628,7 @@ static int stm32_qspi_probe(struct platform_device *pdev) if (IS_ERR(rstc)) { ret = PTR_ERR(rstc); if (ret == -EPROBE_DEFER) - goto err_qspi_release; + goto err_clk_disable; } else { reset_control_assert(rstc); udelay(2); @@ -653,7 +639,7 @@ static int stm32_qspi_probe(struct platform_device *pdev) platform_set_drvdata(pdev, qspi); ret = stm32_qspi_dma_setup(qspi); if (ret) - goto err_qspi_release; + goto err_dma_free; mutex_init(&qspi->lock); @@ -673,15 +659,26 @@ static int stm32_qspi_probe(struct platform_device *pdev) ret = devm_spi_register_master(dev, ctrl); if (ret) - goto err_qspi_release; + goto err_pm_runtime_free; pm_runtime_mark_last_busy(dev); pm_runtime_put_autosuspend(dev); return 0; -err_qspi_release: - stm32_qspi_release(qspi); +err_pm_runtime_free: + pm_runtime_get_sync(qspi->dev); + /* disable qspi */ + writel_relaxed(0, qspi->io_base + QSPI_CR); + mutex_destroy(&qspi->lock); + pm_runtime_put_noidle(qspi->dev); + pm_runtime_disable(qspi->dev); + pm_runtime_set_suspended(qspi->dev); + pm_runtime_dont_use_autosuspend(qspi->dev); +err_dma_free: + stm32_qspi_dma_free(qspi); +err_clk_disable: + clk_disable_unprepare(qspi->clk); err_master_put: spi_master_put(qspi->ctrl); @@ -692,7 +689,16 @@ static int stm32_qspi_remove(struct platform_device *pdev) { struct stm32_qspi *qspi = platform_get_drvdata(pdev); - stm32_qspi_release(qspi); + pm_runtime_get_sync(qspi->dev); + /* disable qspi */ + writel_relaxed(0, qspi->io_base + QSPI_CR); + stm32_qspi_dma_free(qspi); + mutex_destroy(&qspi->lock); + pm_runtime_put_noidle(qspi->dev); + pm_runtime_disable(qspi->dev); + pm_runtime_set_suspended(qspi->dev); + pm_runtime_dont_use_autosuspend(qspi->dev); + clk_disable_unprepare(qspi->clk); return 0; } From 687993ccf3b05070598b89fad97410b26d7bc9d2 Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Mon, 15 Jun 2020 12:22:29 +0300 Subject: [PATCH 197/462] powerpc/8xx: use pmd_off() to access a PMD entry in pte_update() The pte_update() implementation for PPC_8xx unfolds page table from the PGD level to access a PMD entry. Since 8xx has only 2-level page table this can be simplified with pmd_off() shortcut. Replace explicit unfolding with pmd_off() and drop defines of pgd_index() and pgd_offset() that are no longer needed. Signed-off-by: Mike Rapoport Reviewed-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200615092229.23142-1-rppt@kernel.org --- arch/powerpc/include/asm/nohash/32/pgtable.h | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/include/asm/nohash/32/pgtable.h b/arch/powerpc/include/asm/nohash/32/pgtable.h index b56f14160ae5..5a590ceaec14 100644 --- a/arch/powerpc/include/asm/nohash/32/pgtable.h +++ b/arch/powerpc/include/asm/nohash/32/pgtable.h @@ -205,10 +205,6 @@ static inline void pmd_clear(pmd_t *pmdp) *pmdp = __pmd(0); } -/* to find an entry in a page-table-directory */ -#define pgd_index(address) ((address) >> PGDIR_SHIFT) -#define pgd_offset(mm, address) ((mm)->pgd + pgd_index(address)) - /* * PTE updates. This function is called whenever an existing * valid PTE is updated. This does -not- include set_pte_at() @@ -230,6 +226,8 @@ static inline void pmd_clear(pmd_t *pmdp) * For other page sizes, we have a single entry in the table. */ #ifdef CONFIG_PPC_8xx +static pmd_t *pmd_off(struct mm_struct *mm, unsigned long addr); + static inline pte_basic_t pte_update(struct mm_struct *mm, unsigned long addr, pte_t *p, unsigned long clr, unsigned long set, int huge) { @@ -237,7 +235,7 @@ static inline pte_basic_t pte_update(struct mm_struct *mm, unsigned long addr, p pte_basic_t old = pte_val(*p); pte_basic_t new = (old & ~(pte_basic_t)clr) | set; int num, i; - pmd_t *pmd = pmd_offset(pud_offset(p4d_offset(pgd_offset(mm, addr), addr), addr), addr); + pmd_t *pmd = pmd_off(mm, addr); if (!huge) num = PAGE_SIZE / SZ_4K; From 1497eea68624f6076bf3eaf66baec3771ea04045 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 16 Jun 2020 23:56:16 +1000 Subject: [PATCH 198/462] powerpc/syscalls: Use the number when building SPU syscall table Currently the macro that inserts entries into the SPU syscall table doesn't actually use the "nr" (syscall number) parameter. This does work, but it relies on the exact right number of syscall entries being emitted in order for the syscal numbers to line up with the array entries. If for example we had two entries with the same syscall number we wouldn't get an error, it would just cause all subsequent syscalls to be off by one in the spu_syscall_table. So instead change the macro to assign to the specific entry of the array, meaning any numbering overlap will be caught by the compiler. Signed-off-by: Michael Ellerman Acked-by: Arnd Bergmann Link: https://lore.kernel.org/r/20200616135617.2937252-1-mpe@ellerman.id.au --- arch/powerpc/platforms/cell/spu_callbacks.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/platforms/cell/spu_callbacks.c b/arch/powerpc/platforms/cell/spu_callbacks.c index cbee3666da07..abdef9bcf432 100644 --- a/arch/powerpc/platforms/cell/spu_callbacks.c +++ b/arch/powerpc/platforms/cell/spu_callbacks.c @@ -35,7 +35,7 @@ */ static void *spu_syscall_table[] = { -#define __SYSCALL(nr, entry) entry, +#define __SYSCALL(nr, entry) [nr] = entry, #include #undef __SYSCALL }; From 1b0b283648163dae2a214ca28ed5a99f62a77319 Mon Sep 17 00:00:00 2001 From: Luis Chamberlain Date: Fri, 5 Jun 2020 16:58:36 +0200 Subject: [PATCH 199/462] blktrace: break out of blktrace setup on concurrent calls We use one blktrace per request_queue, that means one per the entire disk. So we cannot run one blktrace on say /dev/vda and then /dev/vda1, or just two calls on /dev/vda. We check for concurrent setup only at the very end of the blktrace setup though. If we try to run two concurrent blktraces on the same block device the second one will fail, and the first one seems to go on. However when one tries to kill the first one one will see things like this: The kernel will show these: ``` debugfs: File 'dropped' in directory 'nvme1n1' already present! debugfs: File 'msg' in directory 'nvme1n1' already present! debugfs: File 'trace0' in directory 'nvme1n1' already present! `` And userspace just sees this error message for the second call: ``` blktrace /dev/nvme1n1 BLKTRACESETUP(2) /dev/nvme1n1 failed: 5/Input/output error ``` The first userspace process #1 will also claim that the files were taken underneath their nose as well. The files are taken away form the first process given that when the second blktrace fails, it will follow up with a BLKTRACESTOP and BLKTRACETEARDOWN. This means that even if go-happy process #1 is waiting for blktrace data, we *have* been asked to take teardown the blktrace. This can easily be reproduced with break-blktrace [0] run_0005.sh test. Just break out early if we know we're already going to fail, this will prevent trying to create the files all over again, which we know still exist. [0] https://github.com/mcgrof/break-blktrace Signed-off-by: Luis Chamberlain Signed-off-by: Jan Kara Reviewed-by: Bart Van Assche Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe --- kernel/trace/blktrace.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index 5773f0ba7e76..8fd36e827f14 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -3,6 +3,9 @@ * Copyright (C) 2006 Jens Axboe * */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include #include #include @@ -494,6 +497,16 @@ static int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev, */ strreplace(buts->name, '/', '_'); + /* + * bdev can be NULL, as with scsi-generic, this is a helpful as + * we can be. + */ + if (q->blk_trace) { + pr_warn("Concurrent blktraces are not allowed on %s\n", + buts->name); + return -EBUSY; + } + bt = kzalloc(sizeof(*bt), GFP_KERNEL); if (!bt) return -ENOMEM; From c3dbe541ef77754729de5e82be2d6e5d267c6c8c Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Fri, 5 Jun 2020 16:58:37 +0200 Subject: [PATCH 200/462] blktrace: Avoid sparse warnings when assigning q->blk_trace Mostly for historical reasons, q->blk_trace is assigned through xchg() and cmpxchg() atomic operations. Although this is correct, sparse complains about this because it violates rcu annotations since commit c780e86dd48e ("blktrace: Protect q->blk_trace with RCU") which started to use rcu for accessing q->blk_trace. Furthermore there's no real need for atomic operations anymore since all changes to q->blk_trace happen under q->blk_trace_mutex and since it also makes more sense to check if q->blk_trace is set with the mutex held earlier. So let's just replace xchg() with rcu_replace_pointer() and cmpxchg() with explicit check and rcu_assign_pointer(). This makes the code more efficient and sparse happy. Reported-by: kbuild test robot Signed-off-by: Jan Kara Signed-off-by: Jens Axboe --- kernel/trace/blktrace.c | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index 8fd36e827f14..5ef0484513ec 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -347,7 +347,8 @@ static int __blk_trace_remove(struct request_queue *q) { struct blk_trace *bt; - bt = xchg(&q->blk_trace, NULL); + bt = rcu_replace_pointer(q->blk_trace, NULL, + lockdep_is_held(&q->blk_trace_mutex)); if (!bt) return -EINVAL; @@ -501,7 +502,8 @@ static int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev, * bdev can be NULL, as with scsi-generic, this is a helpful as * we can be. */ - if (q->blk_trace) { + if (rcu_dereference_protected(q->blk_trace, + lockdep_is_held(&q->blk_trace_mutex))) { pr_warn("Concurrent blktraces are not allowed on %s\n", buts->name); return -EBUSY; @@ -556,10 +558,7 @@ static int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev, bt->pid = buts->pid; bt->trace_state = Blktrace_setup; - ret = -EBUSY; - if (cmpxchg(&q->blk_trace, NULL, bt)) - goto err; - + rcu_assign_pointer(q->blk_trace, bt); get_probe_ref(); ret = 0; @@ -1642,7 +1641,8 @@ static int blk_trace_remove_queue(struct request_queue *q) { struct blk_trace *bt; - bt = xchg(&q->blk_trace, NULL); + bt = rcu_replace_pointer(q->blk_trace, NULL, + lockdep_is_held(&q->blk_trace_mutex)); if (bt == NULL) return -EINVAL; @@ -1674,10 +1674,7 @@ static int blk_trace_setup_queue(struct request_queue *q, blk_trace_setup_lba(bt, bdev); - ret = -EBUSY; - if (cmpxchg(&q->blk_trace, NULL, bt)) - goto free_bt; - + rcu_assign_pointer(q->blk_trace, bt); get_probe_ref(); return 0; From b2c22910fe5aae10b7e17b0721e63a3edf0c9553 Mon Sep 17 00:00:00 2001 From: Kai-Heng Feng Date: Wed, 17 Jun 2020 18:29:02 +0800 Subject: [PATCH 201/462] ALSA: hda/realtek: Add mute LED and micmute LED support for HP systems There are two more HP systems control mute LED from HDA codec and need to expose micmute led class so SoF can control micmute LED. Add quirks to support them. Signed-off-by: Kai-Heng Feng Cc: Link: https://lore.kernel.org/r/20200617102906.16156-2-kai.heng.feng@canonical.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 2713560e0c4b..737ef82a75fd 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -7471,6 +7471,8 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x83b9, "HP Spectre x360", ALC269_FIXUP_HP_MUTE_LED_MIC3), SND_PCI_QUIRK(0x103c, 0x8497, "HP Envy x360", ALC269_FIXUP_HP_MUTE_LED_MIC3), SND_PCI_QUIRK(0x103c, 0x84e7, "HP Pavilion 15", ALC269_FIXUP_HP_MUTE_LED_MIC3), + SND_PCI_QUIRK(0x103c, 0x869d, "HP", ALC236_FIXUP_HP_MUTE_LED), + SND_PCI_QUIRK(0x103c, 0x8729, "HP", ALC285_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8736, "HP", ALC285_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x877a, "HP", ALC285_FIXUP_HP_MUTE_LED), SND_PCI_QUIRK(0x103c, 0x877d, "HP", ALC236_FIXUP_HP_MUTE_LED), From 02553b91da5deb63c8562b47529b09b734659af0 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Mon, 15 Jun 2020 22:04:30 -0700 Subject: [PATCH 202/462] bpf: bpf_probe_read_kernel_str() has to return amount of data read on success During recent refactorings, bpf_probe_read_kernel_str() started returning 0 on success, instead of amount of data successfully read. This majorly breaks applications relying on bpf_probe_read_kernel_str() and bpf_probe_read_str() and their results. Fix this by returning actual number of bytes read. Fixes: 8d92db5c04d1 ("bpf: rework the compat kernel probe handling") Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Reviewed-by: Christoph Hellwig Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/20200616050432.1902042-1-andriin@fb.com --- kernel/trace/bpf_trace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index e729c9e587a0..a3ac7de98baa 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -241,7 +241,7 @@ bpf_probe_read_kernel_str_common(void *dst, u32 size, const void *unsafe_ptr) if (unlikely(ret < 0)) goto fail; - return 0; + return ret; fail: memset(dst, 0, size); return ret; From 1c7fb20d6b0acd452cb93d447051eac7724edfa7 Mon Sep 17 00:00:00 2001 From: Tobias Klauser Date: Tue, 16 Jun 2020 13:33:03 +0200 Subject: [PATCH 203/462] tools, bpftool: Add ringbuf map type to map command docs Commit c34a06c56df7 ("tools/bpftool: Add ringbuf map to a list of known map types") added the symbolic "ringbuf" name. Document it in the bpftool map command docs and usage as well. Signed-off-by: Tobias Klauser Signed-off-by: Daniel Borkmann Acked-by: Andrii Nakryiko Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/20200616113303.8123-1-tklauser@distanz.ch --- tools/bpf/bpftool/Documentation/bpftool-map.rst | 2 +- tools/bpf/bpftool/map.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/bpf/bpftool/Documentation/bpftool-map.rst b/tools/bpf/bpftool/Documentation/bpftool-map.rst index 31101643e57c..70c78faa47ab 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-map.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-map.rst @@ -49,7 +49,7 @@ MAP COMMANDS | | **lru_percpu_hash** | **lpm_trie** | **array_of_maps** | **hash_of_maps** | | **devmap** | **devmap_hash** | **sockmap** | **cpumap** | **xskmap** | **sockhash** | | **cgroup_storage** | **reuseport_sockarray** | **percpu_cgroup_storage** -| | **queue** | **stack** | **sk_storage** | **struct_ops** } +| | **queue** | **stack** | **sk_storage** | **struct_ops** | **ringbuf** } DESCRIPTION =========== diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c index 99109a6afe17..1d3b60651078 100644 --- a/tools/bpf/bpftool/map.c +++ b/tools/bpf/bpftool/map.c @@ -1591,7 +1591,7 @@ static int do_help(int argc, char **argv) " lru_percpu_hash | lpm_trie | array_of_maps | hash_of_maps |\n" " devmap | devmap_hash | sockmap | cpumap | xskmap | sockhash |\n" " cgroup_storage | reuseport_sockarray | percpu_cgroup_storage |\n" - " queue | stack | sk_storage | struct_ops }\n" + " queue | stack | sk_storage | struct_ops | ringbuf }\n" " " HELP_SPEC_OPTIONS "\n" "", bin_name, argv[-2]); From 95b2c3ec4cb1689db2389c251d39f64490ba641c Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Wed, 17 Jun 2020 16:21:29 +0100 Subject: [PATCH 204/462] regmap: Fix memory leak from regmap_register_patch When a register patch is registered the reg_sequence is copied but the memory allocated is never freed. Add a kfree in regmap_exit to clean it up. Fixes: 22f0d90a3482 ("regmap: Support register patch sets") Signed-off-by: Charles Keepax Link: https://lore.kernel.org/r/20200617152129.19655-1-ckeepax@opensource.cirrus.com Signed-off-by: Mark Brown --- drivers/base/regmap/regmap.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/base/regmap/regmap.c b/drivers/base/regmap/regmap.c index 183ec789c3fd..06a796821e8b 100644 --- a/drivers/base/regmap/regmap.c +++ b/drivers/base/regmap/regmap.c @@ -1349,6 +1349,7 @@ void regmap_exit(struct regmap *map) if (map->hwlock) hwspin_lock_free(map->hwlock); kfree_const(map->name); + kfree(map->patch); kfree(map); } EXPORT_SYMBOL_GPL(regmap_exit); From b13b04d9382113f787e21c9567a4022fef8697b9 Mon Sep 17 00:00:00 2001 From: Milian Wolff Date: Sat, 13 Jun 2020 01:03:33 +0200 Subject: [PATCH 205/462] perf script: Initialize zstd_data Fixes segmentation fault when trying to interpret zstd-compressed data with perf script: ``` $ perf record -z ls ... [ perf record: Captured and wrote 0,010 MB perf.data, compressed (original 0,001 MB, ratio is 2,190) ] $ memcheck perf script ... ==67911== Invalid read of size 4 ==67911== at 0x5568188: ZSTD_decompressStream (in /usr/lib/libzstd.so.1.4.5) ==67911== by 0x6E726B: zstd_decompress_stream (zstd.c:100) ==67911== by 0x65729C: perf_session__process_compressed_event (session.c:72) ==67911== by 0x6598E8: perf_session__process_user_event (session.c:1583) ==67911== by 0x65BA59: reader__process_events (session.c:2177) ==67911== by 0x65BA59: __perf_session__process_events (session.c:2234) ==67911== by 0x65BA59: perf_session__process_events (session.c:2267) ==67911== by 0x5A7397: __cmd_script (builtin-script.c:2447) ==67911== by 0x5A7397: cmd_script (builtin-script.c:3840) ==67911== by 0x5FE9D2: run_builtin (perf.c:312) ==67911== by 0x711627: handle_internal_command (perf.c:364) ==67911== by 0x711627: run_argv (perf.c:408) ==67911== by 0x711627: main (perf.c:538) ==67911== Address 0x71d8 is not stack'd, malloc'd or (recently) free'd ``` Signed-off-by: Milian Wolff Acked-by: Alexey Budankov Tested-by: Arnaldo Carvalho de Melo LPU-Reference: 20200612230333.72140-1-milian.wolff@kdab.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-script.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 5da243676f12..181d65e5a450 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -3837,6 +3837,9 @@ int cmd_script(int argc, const char **argv) if (err) goto out_delete; + if (zstd_init(&(session->zstd_data), 0) < 0) + pr_warning("Decompression initialization failed. Reported data may be incomplete.\n"); + err = __cmd_script(&script); flush_scripting(); From 25ca7e5c0b0315f3b5e6c4d2123e5a821ec0b590 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 17 Jun 2020 10:07:59 -0300 Subject: [PATCH 206/462] tools arch x86: Sync the msr-index.h copy with the kernel sources To pick up the changes in: 7e5b3c267d25 ("x86/speculation: Add Special Register Buffer Data Sampling (SRBDS) mitigation") Addressing these tools/perf build warnings: Warning: Kernel ABI header at 'tools/arch/x86/include/asm/msr-index.h' differs from latest version at 'arch/x86/include/asm/msr-index.h' diff -u tools/arch/x86/include/asm/msr-index.h arch/x86/include/asm/msr-index.h Warning: Kernel ABI header at 'tools/arch/x86/include/asm/cpufeatures.h' differs from latest version at 'arch/x86/include/asm/cpufeatures.h' diff -u tools/arch/x86/include/asm/cpufeatures.h arch/x86/include/asm/cpufeatures.h With this one will be able to use these new AMD MSRs in filters, by name, e.g.: # perf trace -e msr:* --filter "msr==IA32_MCU_OPT_CTRL" ^C# Using -v we can see how it sets up the tracepoint filters, converting from the string in the filter to the numeric value: # perf trace -v -e msr:* --filter "msr==IA32_MCU_OPT_CTRL" Using CPUID GenuineIntel-6-8E-A 0x123 New filter for msr:read_msr: (msr==0x123) && (common_pid != 335 && common_pid != 30344) 0x123 New filter for msr:write_msr: (msr==0x123) && (common_pid != 335 && common_pid != 30344) 0x123 New filter for msr:rdpmc: (msr==0x123) && (common_pid != 335 && common_pid != 30344) mmap size 528384B ^C# The updating process shows how this affects tooling in more detail: $ diff -u tools/arch/x86/include/asm/msr-index.h arch/x86/include/asm/msr-index.h --- tools/arch/x86/include/asm/msr-index.h 2020-06-03 10:36:09.959910238 -0300 +++ arch/x86/include/asm/msr-index.h 2020-06-17 10:04:20.235052901 -0300 @@ -128,6 +128,10 @@ #define TSX_CTRL_RTM_DISABLE BIT(0) /* Disable RTM feature */ #define TSX_CTRL_CPUID_CLEAR BIT(1) /* Disable TSX enumeration */ +/* SRBDS support */ +#define MSR_IA32_MCU_OPT_CTRL 0x00000123 +#define RNGDS_MITG_DIS BIT(0) + #define MSR_IA32_SYSENTER_CS 0x00000174 #define MSR_IA32_SYSENTER_ESP 0x00000175 #define MSR_IA32_SYSENTER_EIP 0x00000176 $ set -o vi $ tools/perf/trace/beauty/tracepoints/x86_msr.sh > before $ cp arch/x86/include/asm/msr-index.h tools/arch/x86/include/asm/msr-index.h $ tools/perf/trace/beauty/tracepoints/x86_msr.sh > after $ diff -u before after --- before 2020-06-17 10:05:49.653114752 -0300 +++ after 2020-06-17 10:06:01.777258731 -0300 @@ -51,6 +51,7 @@ [0x0000011e] = "IA32_BBL_CR_CTL3", [0x00000120] = "IDT_MCR_CTRL", [0x00000122] = "IA32_TSX_CTRL", + [0x00000123] = "IA32_MCU_OPT_CTRL", [0x00000140] = "MISC_FEATURES_ENABLES", [0x00000174] = "IA32_SYSENTER_CS", [0x00000175] = "IA32_SYSENTER_ESP", $ The related change to cpu-features.h affects this: CC /tmp/build/perf/bench/mem-memcpy-x86-64-asm.o CC /tmp/build/perf/bench/mem-memset-x86-64-asm.o This shouldn't be affecting that 'perf bench' entry: $ find tools/perf/ -type f | xargs grep SRBDS $ Cc: Adrian Hunter Cc: Jiri Olsa Cc: Mark Gross Cc: Namhyung Kim Cc: Thomas Gleixner Signed-off-by: Arnaldo Carvalho de Melo --- tools/arch/x86/include/asm/cpufeatures.h | 2 ++ tools/arch/x86/include/asm/msr-index.h | 4 ++++ 2 files changed, 6 insertions(+) diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h index db189945e9b0..02dabc9e77b0 100644 --- a/tools/arch/x86/include/asm/cpufeatures.h +++ b/tools/arch/x86/include/asm/cpufeatures.h @@ -362,6 +362,7 @@ #define X86_FEATURE_AVX512_4FMAPS (18*32+ 3) /* AVX-512 Multiply Accumulation Single precision */ #define X86_FEATURE_FSRM (18*32+ 4) /* Fast Short Rep Mov */ #define X86_FEATURE_AVX512_VP2INTERSECT (18*32+ 8) /* AVX-512 Intersect for D/Q */ +#define X86_FEATURE_SRBDS_CTRL (18*32+ 9) /* "" SRBDS mitigation MSR available */ #define X86_FEATURE_MD_CLEAR (18*32+10) /* VERW clears CPU buffers */ #define X86_FEATURE_TSX_FORCE_ABORT (18*32+13) /* "" TSX_FORCE_ABORT */ #define X86_FEATURE_PCONFIG (18*32+18) /* Intel PCONFIG */ @@ -407,5 +408,6 @@ #define X86_BUG_SWAPGS X86_BUG(21) /* CPU is affected by speculation through SWAPGS */ #define X86_BUG_TAA X86_BUG(22) /* CPU is affected by TSX Async Abort(TAA) */ #define X86_BUG_ITLB_MULTIHIT X86_BUG(23) /* CPU may incur MCE during certain page attribute changes */ +#define X86_BUG_SRBDS X86_BUG(24) /* CPU may leak RNG bits if not mitigated */ #endif /* _ASM_X86_CPUFEATURES_H */ diff --git a/tools/arch/x86/include/asm/msr-index.h b/tools/arch/x86/include/asm/msr-index.h index ef452b817f44..e8370e64a155 100644 --- a/tools/arch/x86/include/asm/msr-index.h +++ b/tools/arch/x86/include/asm/msr-index.h @@ -128,6 +128,10 @@ #define TSX_CTRL_RTM_DISABLE BIT(0) /* Disable RTM feature */ #define TSX_CTRL_CPUID_CLEAR BIT(1) /* Disable TSX enumeration */ +/* SRBDS support */ +#define MSR_IA32_MCU_OPT_CTRL 0x00000123 +#define RNGDS_MITG_DIS BIT(0) + #define MSR_IA32_SYSENTER_CS 0x00000174 #define MSR_IA32_SYSENTER_ESP 0x00000175 #define MSR_IA32_SYSENTER_EIP 0x00000176 From f64925c1ebacce7cadc7f2b993b145c3bfd95b57 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 17 Jun 2020 10:16:53 -0300 Subject: [PATCH 207/462] tools include UAPI: Sync linux/vhost.h with the kernel sources To get the changes in: 776f395004d8 ("vhost_vdpa: Support config interrupt in vdpa") Silencing this perf build warning: Warning: Kernel ABI header at 'tools/include/uapi/linux/vhost.h' differs from latest version at 'include/uapi/linux/vhost.h' diff -u tools/include/uapi/linux/vhost.h include/uapi/linux/vhost.h This automatically picks the new ioctl introduced in the above patch, making tools such as 'perf trace' aware of them and possibly allowing to use the strings in filters, etc: # perf trace -e ioctl --pid 7951 0.178 ( 0.010 ms): CPU 0/KVM/8023 ioctl(fd: 14, cmd: KVM_RUN) = 0 0.194 ( 0.010 ms): CPU 0/KVM/8023 ioctl(fd: 14, cmd: KVM_RUN) = 0 0.209 ( 0.010 ms): CPU 0/KVM/8023 ioctl(fd: 14, cmd: KVM_RUN) = 0 0.224 (249.413 ms): CPU 0/KVM/8023 ioctl(fd: 14, cmd: KVM_RUN) = 0 249.660 ( 0.011 ms): CPU 0/KVM/8023 ioctl(fd: 14, cmd: KVM_RUN) = 0 249.675 ( 0.007 ms): CPU 0/KVM/8023 ioctl(fd: 14, cmd: KVM_RUN) = 0 249.686 ( 0.007 ms): CPU 0/KVM/8023 ioctl(fd: 14, cmd: KVM_RUN) = 0 249.697 ( 0.008 ms): CPU 0/KVM/8023 ioctl(fd: 14, cmd: KVM_RUN) = 0 249.709 ( 0.007 ms): CPU 0/KVM/8023 ioctl(fd: 14, cmd: KVM_RUN) = 0 249.720 ( 0.007 ms): CPU 0/KVM/8023 ioctl(fd: 14, cmd: KVM_RUN) = 0 249.730 ( 0.007 ms): CPU 0/KVM/8023 ioctl(fd: 14, cmd: KVM_RUN) = 0 249.740 ( 0.007 ms): CPU 0/KVM/8023 ioctl(fd: 14, cmd: KVM_RUN) = 0 249.752 ( 0.007 ms): CPU 0/KVM/8023 ioctl(fd: 14, cmd: KVM_RUN) = 0 249.762 ( 0.007 ms): CPU 0/KVM/8023 ioctl(fd: 14, cmd: KVM_RUN) = 0 249.772 ( 0.007 ms): CPU 0/KVM/8023 ioctl(fd: 14, cmd: KVM_RUN) = 0 249.782 (120.138 ms): CPU 0/KVM/8023 ioctl(fd: 14, cmd: KVM_RUN) = 0 370.201 ( 0.039 ms): CPU 0/KVM/8023 ioctl(fd: 12, cmd: KVM_IRQ_LINE_STATUS, arg: 0x7f744f9e1420) = 0 370.254 ( 0.052 ms): CPU 0/KVM/8023 ioctl(fd: 14, cmd: KVM_RUN) = 0 370.575 ( 0.365 ms): CPU 0/KVM/8023 ioctl(fd: 14, cmd: KVM_RUN) = 0 370.973 ( 0.028 ms): CPU 0/KVM/8023 ioctl(fd: 14, cmd: KVM_RUN) = 0 371.015 ( 0.037 ms): CPU 0/KVM/8023 ioctl(fd: 14, cmd: KVM_RUN) = 0 371.071 ( 0.009 ms): CPU 0/KVM/8023 ioctl(fd: 12, cmd: KVM_IRQ_LINE_STATUS, arg: 0x7f744f9e14b0) = 0 # Details about the update: $ diff -u tools/include/uapi/linux/vhost.h include/uapi/linux/vhost.h --- tools/include/uapi/linux/vhost.h 2020-04-16 13:19:12.056763843 -0300 +++ include/uapi/linux/vhost.h 2020-06-17 10:04:20.532056428 -0300 @@ -15,6 +15,8 @@ #include #include +#define VHOST_FILE_UNBIND -1 + /* ioctls */ #define VHOST_VIRTIO 0xAF @@ -140,4 +142,6 @@ /* Get the max ring size. */ #define VHOST_VDPA_GET_VRING_NUM _IOR(VHOST_VIRTIO, 0x76, __u16) +/* Set event fd for config interrupt*/ +#define VHOST_VDPA_SET_CONFIG_CALL _IOW(VHOST_VIRTIO, 0x77, int) #endif $ $ tools/perf/trace/beauty/vhost_virtio_ioctl.sh > before $ cp include/uapi/linux/vhost.h tools/include/uapi/linux/vhost.h $ tools/perf/trace/beauty/vhost_virtio_ioctl.sh > after $ diff -u before after --- before 2020-06-17 10:15:35.123275966 -0300 +++ after 2020-06-17 10:15:51.812482117 -0300 @@ -27,6 +27,7 @@ [0x72] = "VDPA_SET_STATUS", [0x74] = "VDPA_SET_CONFIG", [0x75] = "VDPA_SET_VRING_ENABLE", + [0x77] = "VDPA_SET_CONFIG_CALL", }; static const char *vhost_virtio_ioctl_read_cmds[] = { [0x00] = "GET_FEATURES", $ This causes these parts to get rebuilt: CC /tmp/build/perf/trace/beauty/ioctl.o INSTALL trace_plugins LD /tmp/build/perf/trace/beauty/perf-in.o LD /tmp/build/perf/perf-in.o LINK /tmp/build/perf/perf Cc: Adrian Hunter Cc: Jiri Olsa Cc: Michael S. Tsirkin Cc: Namhyung Kim Cc: Zhu Lingshan Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/linux/vhost.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tools/include/uapi/linux/vhost.h b/tools/include/uapi/linux/vhost.h index 9fe72e4b1373..0c2349612e77 100644 --- a/tools/include/uapi/linux/vhost.h +++ b/tools/include/uapi/linux/vhost.h @@ -15,6 +15,8 @@ #include #include +#define VHOST_FILE_UNBIND -1 + /* ioctls */ #define VHOST_VIRTIO 0xAF @@ -140,4 +142,6 @@ /* Get the max ring size. */ #define VHOST_VDPA_GET_VRING_NUM _IOR(VHOST_VIRTIO, 0x76, __u16) +/* Set event fd for config interrupt*/ +#define VHOST_VDPA_SET_CONFIG_CALL _IOW(VHOST_VIRTIO, 0x77, int) #endif From 0e093c77c5b0dc0d8cbe123a84c944aa8bf0b878 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 17 Jun 2020 10:23:51 -0300 Subject: [PATCH 208/462] tools headers UAPI: Sync linux/fs.h with the kernel sources To pick the changes from: b383a73f2b83 ("fs/ext4: Introduce DAX inode flag") And silence this perf build warning: Warning: Kernel ABI header at 'tools/include/uapi/linux/fs.h' differs from latest version at 'include/uapi/linux/fs.h' diff -u tools/include/uapi/linux/fs.h include/uapi/linux/fs.h It causes various beautifiers for things like fspick, fsmount, etc (see below) to get rebuilt, but this specific change doesn't make 'perf trace' be capable of decoding anything new, as we still don't decode what comes from ioctls, just its cmds. Details about the update: $ cp include/uapi/linux/fs.h tools/include/uapi/linux/fs.h $ git diff diff --git a/tools/include/uapi/linux/fs.h b/tools/include/uapi/linux/fs.h index 379a612f8f1d..f44eb0a04afd 100644 --- a/tools/include/uapi/linux/fs.h +++ b/tools/include/uapi/linux/fs.h @@ -262,6 +262,7 @@ struct fsxattr { #define FS_EA_INODE_FL 0x00200000 /* Inode used for large EA */ #define FS_EOFBLOCKS_FL 0x00400000 /* Reserved for ext4 */ #define FS_NOCOW_FL 0x00800000 /* Do not cow file */ +#define FS_DAX_FL 0x02000000 /* Inode is DAX */ #define FS_INLINE_DATA_FL 0x10000000 /* Reserved for ext4 */ #define FS_PROJINHERIT_FL 0x20000000 /* Create with parents projid */ #define FS_CASEFOLD_FL 0x40000000 /* Folder is case insensitive */ $ m make: Entering directory '/home/acme/git/perf/tools/perf' BUILD: Doing 'make -j8' parallel build INSTALL GTK UI CC /tmp/build/perf/builtin-trace.o DESCEND plugins CC /tmp/build/perf/trace/beauty/fsmount.o CC /tmp/build/perf/trace/beauty/fspick.o CC /tmp/build/perf/trace/beauty/mount_flags.o CC /tmp/build/perf/trace/beauty/move_mount.o CC /tmp/build/perf/trace/beauty/renameat.o CC /tmp/build/perf/trace/beauty/sync_file_range.o INSTALL trace_plugins LD /tmp/build/perf/trace/beauty/perf-in.o LD /tmp/build/perf/perf-in.o LINK /tmp/build/perf/perf Cc: Adrian Hunter Cc: Ira Weiny Cc: Jiri Olsa Cc: Namhyung Kim Cc: Theodore Ts'o Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/linux/fs.h | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/include/uapi/linux/fs.h b/tools/include/uapi/linux/fs.h index 379a612f8f1d..f44eb0a04afd 100644 --- a/tools/include/uapi/linux/fs.h +++ b/tools/include/uapi/linux/fs.h @@ -262,6 +262,7 @@ struct fsxattr { #define FS_EA_INODE_FL 0x00200000 /* Inode used for large EA */ #define FS_EOFBLOCKS_FL 0x00400000 /* Reserved for ext4 */ #define FS_NOCOW_FL 0x00800000 /* Do not cow file */ +#define FS_DAX_FL 0x02000000 /* Inode is DAX */ #define FS_INLINE_DATA_FL 0x10000000 /* Reserved for ext4 */ #define FS_PROJINHERIT_FL 0x20000000 /* Create with parents projid */ #define FS_CASEFOLD_FL 0x40000000 /* Folder is case insensitive */ From 3ff2351651a2ecb73ec9d29119793bde190b2850 Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Tue, 16 Jun 2020 18:35:18 +0800 Subject: [PATCH 209/462] xdp: Handle frame_sz in xdp_convert_zc_to_xdp_frame() In commit 34cc0b338a61 we only handled the frame_sz in convert_to_xdp_frame(). This patch will also handle frame_sz in xdp_convert_zc_to_xdp_frame(). Fixes: 34cc0b338a61 ("xdp: Xdp_frame add member frame_sz and handle in convert_to_xdp_frame") Signed-off-by: Hangbin Liu Signed-off-by: Alexei Starovoitov Acked-by: Jesper Dangaard Brouer Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/20200616103518.2963410-1-liuhangbin@gmail.com --- net/core/xdp.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/core/xdp.c b/net/core/xdp.c index 90f44f382115..3c45f99e26d5 100644 --- a/net/core/xdp.c +++ b/net/core/xdp.c @@ -462,6 +462,7 @@ struct xdp_frame *xdp_convert_zc_to_xdp_frame(struct xdp_buff *xdp) xdpf->len = totsize - metasize; xdpf->headroom = 0; xdpf->metasize = metasize; + xdpf->frame_sz = PAGE_SIZE; xdpf->mem.type = MEM_TYPE_PAGE_ORDER0; xsk_buff_free(xdp); From 99c51064fb06146b3d494b745c947e438a10aaa7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= Date: Tue, 16 Jun 2020 16:28:29 +0200 Subject: [PATCH 210/462] devmap: Use bpf_map_area_alloc() for allocating hash buckets MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Syzkaller discovered that creating a hash of type devmap_hash with a large number of entries can hit the memory allocator limit for allocating contiguous memory regions. There's really no reason to use kmalloc_array() directly in the devmap code, so just switch it to the existing bpf_map_area_alloc() function that is used elsewhere. Fixes: 6f9d451ab1a3 ("xdp: Add devmap_hash map type for looking up devices by hashed index") Reported-by: Xiumei Mu Signed-off-by: Toke Høiland-Jørgensen Signed-off-by: Alexei Starovoitov Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/20200616142829.114173-1-toke@redhat.com --- kernel/bpf/devmap.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c index 0cbb72cdaf63..5fdbc776a760 100644 --- a/kernel/bpf/devmap.c +++ b/kernel/bpf/devmap.c @@ -86,12 +86,13 @@ static DEFINE_PER_CPU(struct list_head, dev_flush_list); static DEFINE_SPINLOCK(dev_map_lock); static LIST_HEAD(dev_map_list); -static struct hlist_head *dev_map_create_hash(unsigned int entries) +static struct hlist_head *dev_map_create_hash(unsigned int entries, + int numa_node) { int i; struct hlist_head *hash; - hash = kmalloc_array(entries, sizeof(*hash), GFP_KERNEL); + hash = bpf_map_area_alloc(entries * sizeof(*hash), numa_node); if (hash != NULL) for (i = 0; i < entries; i++) INIT_HLIST_HEAD(&hash[i]); @@ -145,7 +146,8 @@ static int dev_map_init_map(struct bpf_dtab *dtab, union bpf_attr *attr) return -EINVAL; if (attr->map_type == BPF_MAP_TYPE_DEVMAP_HASH) { - dtab->dev_index_head = dev_map_create_hash(dtab->n_buckets); + dtab->dev_index_head = dev_map_create_hash(dtab->n_buckets, + dtab->map.numa_node); if (!dtab->dev_index_head) goto free_charge; @@ -232,7 +234,7 @@ static void dev_map_free(struct bpf_map *map) } } - kfree(dtab->dev_index_head); + bpf_map_area_free(dtab->dev_index_head); } else { for (i = 0; i < dtab->map.max_entries; i++) { struct bpf_dtab_netdev *dev; From d8fe449a9c51a37d844ab607e14e2f5c657d3cf2 Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Tue, 16 Jun 2020 18:04:14 -0700 Subject: [PATCH 211/462] bpf: Don't return EINVAL from {get,set}sockopt when optlen > PAGE_SIZE Attaching to these hooks can break iptables because its optval is usually quite big, or at least bigger than the current PAGE_SIZE limit. David also mentioned some SCTP options can be big (around 256k). For such optvals we expose only the first PAGE_SIZE bytes to the BPF program. BPF program has two options: 1. Set ctx->optlen to 0 to indicate that the BPF's optval should be ignored and the kernel should use original userspace value. 2. Set ctx->optlen to something that's smaller than the PAGE_SIZE. v5: * use ctx->optlen == 0 with trimmed buffer (Alexei Starovoitov) * update the docs accordingly v4: * use temporary buffer to avoid optval == optval_end == NULL; this removes the corner case in the verifier that might assume non-zero PTR_TO_PACKET/PTR_TO_PACKET_END. v3: * don't increase the limit, bypass the argument v2: * proper comments formatting (Jakub Kicinski) Fixes: 0d01da6afc54 ("bpf: implement getsockopt and setsockopt hooks") Signed-off-by: Stanislav Fomichev Signed-off-by: Alexei Starovoitov Cc: David Laight Link: https://lore.kernel.org/bpf/20200617010416.93086-1-sdf@google.com --- kernel/bpf/cgroup.c | 53 ++++++++++++++++++++++++++++----------------- 1 file changed, 33 insertions(+), 20 deletions(-) diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c index 4d76f16524cc..ac53102e244a 100644 --- a/kernel/bpf/cgroup.c +++ b/kernel/bpf/cgroup.c @@ -1276,16 +1276,23 @@ static bool __cgroup_bpf_prog_array_is_empty(struct cgroup *cgrp, static int sockopt_alloc_buf(struct bpf_sockopt_kern *ctx, int max_optlen) { - if (unlikely(max_optlen > PAGE_SIZE) || max_optlen < 0) + if (unlikely(max_optlen < 0)) return -EINVAL; + if (unlikely(max_optlen > PAGE_SIZE)) { + /* We don't expose optvals that are greater than PAGE_SIZE + * to the BPF program. + */ + max_optlen = PAGE_SIZE; + } + ctx->optval = kzalloc(max_optlen, GFP_USER); if (!ctx->optval) return -ENOMEM; ctx->optval_end = ctx->optval + max_optlen; - return 0; + return max_optlen; } static void sockopt_free_buf(struct bpf_sockopt_kern *ctx) @@ -1319,13 +1326,13 @@ int __cgroup_bpf_run_filter_setsockopt(struct sock *sk, int *level, */ max_optlen = max_t(int, 16, *optlen); - ret = sockopt_alloc_buf(&ctx, max_optlen); - if (ret) - return ret; + max_optlen = sockopt_alloc_buf(&ctx, max_optlen); + if (max_optlen < 0) + return max_optlen; ctx.optlen = *optlen; - if (copy_from_user(ctx.optval, optval, *optlen) != 0) { + if (copy_from_user(ctx.optval, optval, min(*optlen, max_optlen)) != 0) { ret = -EFAULT; goto out; } @@ -1353,8 +1360,14 @@ int __cgroup_bpf_run_filter_setsockopt(struct sock *sk, int *level, /* export any potential modifications */ *level = ctx.level; *optname = ctx.optname; - *optlen = ctx.optlen; - *kernel_optval = ctx.optval; + + /* optlen == 0 from BPF indicates that we should + * use original userspace data. + */ + if (ctx.optlen != 0) { + *optlen = ctx.optlen; + *kernel_optval = ctx.optval; + } } out: @@ -1385,12 +1398,12 @@ int __cgroup_bpf_run_filter_getsockopt(struct sock *sk, int level, __cgroup_bpf_prog_array_is_empty(cgrp, BPF_CGROUP_GETSOCKOPT)) return retval; - ret = sockopt_alloc_buf(&ctx, max_optlen); - if (ret) - return ret; - ctx.optlen = max_optlen; + max_optlen = sockopt_alloc_buf(&ctx, max_optlen); + if (max_optlen < 0) + return max_optlen; + if (!retval) { /* If kernel getsockopt finished successfully, * copy whatever was returned to the user back @@ -1404,10 +1417,8 @@ int __cgroup_bpf_run_filter_getsockopt(struct sock *sk, int level, goto out; } - if (ctx.optlen > max_optlen) - ctx.optlen = max_optlen; - - if (copy_from_user(ctx.optval, optval, ctx.optlen) != 0) { + if (copy_from_user(ctx.optval, optval, + min(ctx.optlen, max_optlen)) != 0) { ret = -EFAULT; goto out; } @@ -1436,10 +1447,12 @@ int __cgroup_bpf_run_filter_getsockopt(struct sock *sk, int level, goto out; } - if (copy_to_user(optval, ctx.optval, ctx.optlen) || - put_user(ctx.optlen, optlen)) { - ret = -EFAULT; - goto out; + if (ctx.optlen != 0) { + if (copy_to_user(optval, ctx.optval, ctx.optlen) || + put_user(ctx.optlen, optlen)) { + ret = -EFAULT; + goto out; + } } ret = ctx.retval; From a0cb12b03132befbdb29d835ca330c18792e8134 Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Tue, 16 Jun 2020 18:04:15 -0700 Subject: [PATCH 212/462] selftests/bpf: Make sure optvals > PAGE_SIZE are bypassed We are relying on the fact, that we can pass > sizeof(int) optvals to the SOL_IP+IP_FREEBIND option (the kernel will take first 4 bytes). In the BPF program we check that we can only touch PAGE_SIZE bytes, but the real optlen is PAGE_SIZE * 2. In both cases, we override it to some predefined value and trim the optlen. Also, let's modify exiting IP_TOS usecase to test optlen=0 case where BPF program just bypasses the data as is. Signed-off-by: Stanislav Fomichev Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200617010416.93086-2-sdf@google.com --- .../selftests/bpf/prog_tests/sockopt_sk.c | 46 +++++++++++++--- .../testing/selftests/bpf/progs/sockopt_sk.c | 54 ++++++++++++++++++- 2 files changed, 91 insertions(+), 9 deletions(-) diff --git a/tools/testing/selftests/bpf/prog_tests/sockopt_sk.c b/tools/testing/selftests/bpf/prog_tests/sockopt_sk.c index 2061a6beac0f..5f54c6aec7f0 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockopt_sk.c +++ b/tools/testing/selftests/bpf/prog_tests/sockopt_sk.c @@ -13,6 +13,7 @@ static int getsetsockopt(void) char cc[16]; /* TCP_CA_NAME_MAX */ } buf = {}; socklen_t optlen; + char *big_buf = NULL; fd = socket(AF_INET, SOCK_STREAM, 0); if (fd < 0) { @@ -22,24 +23,31 @@ static int getsetsockopt(void) /* IP_TOS - BPF bypass */ - buf.u8[0] = 0x08; - err = setsockopt(fd, SOL_IP, IP_TOS, &buf, 1); + optlen = getpagesize() * 2; + big_buf = calloc(1, optlen); + if (!big_buf) { + log_err("Couldn't allocate two pages"); + goto err; + } + + *(int *)big_buf = 0x08; + err = setsockopt(fd, SOL_IP, IP_TOS, big_buf, optlen); if (err) { log_err("Failed to call setsockopt(IP_TOS)"); goto err; } - buf.u8[0] = 0x00; + memset(big_buf, 0, optlen); optlen = 1; - err = getsockopt(fd, SOL_IP, IP_TOS, &buf, &optlen); + err = getsockopt(fd, SOL_IP, IP_TOS, big_buf, &optlen); if (err) { log_err("Failed to call getsockopt(IP_TOS)"); goto err; } - if (buf.u8[0] != 0x08) { - log_err("Unexpected getsockopt(IP_TOS) buf[0] 0x%02x != 0x08", - buf.u8[0]); + if (*(int *)big_buf != 0x08) { + log_err("Unexpected getsockopt(IP_TOS) optval 0x%x != 0x08", + *(int *)big_buf); goto err; } @@ -78,6 +86,28 @@ static int getsetsockopt(void) goto err; } + /* IP_FREEBIND - BPF can't access optval past PAGE_SIZE */ + + optlen = getpagesize() * 2; + memset(big_buf, 0, optlen); + + err = setsockopt(fd, SOL_IP, IP_FREEBIND, big_buf, optlen); + if (err != 0) { + log_err("Failed to call setsockopt, ret=%d", err); + goto err; + } + + err = getsockopt(fd, SOL_IP, IP_FREEBIND, big_buf, &optlen); + if (err != 0) { + log_err("Failed to call getsockopt, ret=%d", err); + goto err; + } + + if (optlen != 1 || *(__u8 *)big_buf != 0x55) { + log_err("Unexpected IP_FREEBIND getsockopt, optlen=%d, optval=0x%x", + optlen, *(__u8 *)big_buf); + } + /* SO_SNDBUF is overwritten */ buf.u32 = 0x01010101; @@ -124,9 +154,11 @@ static int getsetsockopt(void) goto err; } + free(big_buf); close(fd); return 0; err: + free(big_buf); close(fd); return -1; } diff --git a/tools/testing/selftests/bpf/progs/sockopt_sk.c b/tools/testing/selftests/bpf/progs/sockopt_sk.c index d5a5eeb5fb52..712df7b49cb1 100644 --- a/tools/testing/selftests/bpf/progs/sockopt_sk.c +++ b/tools/testing/selftests/bpf/progs/sockopt_sk.c @@ -8,6 +8,10 @@ char _license[] SEC("license") = "GPL"; __u32 _version SEC("version") = 1; +#ifndef PAGE_SIZE +#define PAGE_SIZE 4096 +#endif + #define SOL_CUSTOM 0xdeadbeef struct sockopt_sk { @@ -28,12 +32,14 @@ int _getsockopt(struct bpf_sockopt *ctx) __u8 *optval = ctx->optval; struct sockopt_sk *storage; - if (ctx->level == SOL_IP && ctx->optname == IP_TOS) + if (ctx->level == SOL_IP && ctx->optname == IP_TOS) { /* Not interested in SOL_IP:IP_TOS; * let next BPF program in the cgroup chain or kernel * handle it. */ + ctx->optlen = 0; /* bypass optval>PAGE_SIZE */ return 1; + } if (ctx->level == SOL_SOCKET && ctx->optname == SO_SNDBUF) { /* Not interested in SOL_SOCKET:SO_SNDBUF; @@ -51,6 +57,26 @@ int _getsockopt(struct bpf_sockopt *ctx) return 1; } + if (ctx->level == SOL_IP && ctx->optname == IP_FREEBIND) { + if (optval + 1 > optval_end) + return 0; /* EPERM, bounds check */ + + ctx->retval = 0; /* Reset system call return value to zero */ + + /* Always export 0x55 */ + optval[0] = 0x55; + ctx->optlen = 1; + + /* Userspace buffer is PAGE_SIZE * 2, but BPF + * program can only see the first PAGE_SIZE + * bytes of data. + */ + if (optval_end - optval != PAGE_SIZE) + return 0; /* EPERM, unexpected data size */ + + return 1; + } + if (ctx->level != SOL_CUSTOM) return 0; /* EPERM, deny everything except custom level */ @@ -81,12 +107,14 @@ int _setsockopt(struct bpf_sockopt *ctx) __u8 *optval = ctx->optval; struct sockopt_sk *storage; - if (ctx->level == SOL_IP && ctx->optname == IP_TOS) + if (ctx->level == SOL_IP && ctx->optname == IP_TOS) { /* Not interested in SOL_IP:IP_TOS; * let next BPF program in the cgroup chain or kernel * handle it. */ + ctx->optlen = 0; /* bypass optval>PAGE_SIZE */ return 1; + } if (ctx->level == SOL_SOCKET && ctx->optname == SO_SNDBUF) { /* Overwrite SO_SNDBUF value */ @@ -112,6 +140,28 @@ int _setsockopt(struct bpf_sockopt *ctx) return 1; } + if (ctx->level == SOL_IP && ctx->optname == IP_FREEBIND) { + /* Original optlen is larger than PAGE_SIZE. */ + if (ctx->optlen != PAGE_SIZE * 2) + return 0; /* EPERM, unexpected data size */ + + if (optval + 1 > optval_end) + return 0; /* EPERM, bounds check */ + + /* Make sure we can trim the buffer. */ + optval[0] = 0; + ctx->optlen = 1; + + /* Usepace buffer is PAGE_SIZE * 2, but BPF + * program can only see the first PAGE_SIZE + * bytes of data. + */ + if (optval_end - optval != PAGE_SIZE) + return 0; /* EPERM, unexpected data size */ + + return 1; + } + if (ctx->level != SOL_CUSTOM) return 0; /* EPERM, deny everything except custom level */ From 8030e250d882db174cbcd88273570ffb36a13080 Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Tue, 16 Jun 2020 18:04:16 -0700 Subject: [PATCH 213/462] bpf: Document optval > PAGE_SIZE behavior for sockopt hooks Extend existing doc with more details about requiring ctx->optlen = 0 for handling optval > PAGE_SIZE. Signed-off-by: Stanislav Fomichev Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200617010416.93086-3-sdf@google.com --- Documentation/bpf/prog_cgroup_sockopt.rst | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/Documentation/bpf/prog_cgroup_sockopt.rst b/Documentation/bpf/prog_cgroup_sockopt.rst index c47d974629ae..172f957204bf 100644 --- a/Documentation/bpf/prog_cgroup_sockopt.rst +++ b/Documentation/bpf/prog_cgroup_sockopt.rst @@ -86,6 +86,20 @@ then the next program in the chain (A) will see those changes, *not* the original input ``setsockopt`` arguments. The potentially modified values will be then passed down to the kernel. +Large optval +============ +When the ``optval`` is greater than the ``PAGE_SIZE``, the BPF program +can access only the first ``PAGE_SIZE`` of that data. So it has to options: + +* Set ``optlen`` to zero, which indicates that the kernel should + use the original buffer from the userspace. Any modifications + done by the BPF program to the ``optval`` are ignored. +* Set ``optlen`` to the value less than ``PAGE_SIZE``, which + indicates that the kernel should use BPF's trimmed ``optval``. + +When the BPF program returns with the ``optlen`` greater than +``PAGE_SIZE``, the userspace will receive ``EFAULT`` errno. + Example ======= From fe557319aa06c23cffc9346000f119547e0f289a Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 17 Jun 2020 09:37:53 +0200 Subject: [PATCH 214/462] maccess: rename probe_kernel_{read,write} to copy_{from,to}_kernel_nofault Better describe what these functions do. Suggested-by: Linus Torvalds Signed-off-by: Christoph Hellwig Signed-off-by: Linus Torvalds --- arch/arm/kernel/ftrace.c | 3 +- arch/arm/kernel/kgdb.c | 2 +- arch/arm64/kernel/insn.c | 4 +-- arch/csky/kernel/ftrace.c | 5 +-- arch/ia64/kernel/ftrace.c | 6 ++-- arch/mips/kernel/kprobes.c | 6 ++-- arch/nds32/kernel/ftrace.c | 5 +-- arch/parisc/kernel/ftrace.c | 2 +- arch/parisc/kernel/kgdb.c | 4 +-- arch/parisc/lib/memcpy.c | 2 +- arch/powerpc/kernel/module_64.c | 6 ++-- arch/powerpc/kernel/trace/ftrace.c | 4 +-- arch/powerpc/lib/inst.c | 6 ++-- arch/powerpc/perf/core-book3s.c | 3 +- arch/riscv/kernel/ftrace.c | 3 +- arch/riscv/kernel/kgdb.c | 4 +-- arch/riscv/kernel/patch.c | 4 +-- arch/s390/kernel/ftrace.c | 4 +-- arch/sh/kernel/ftrace.c | 6 ++-- arch/um/kernel/maccess.c | 2 +- arch/x86/include/asm/ptrace.h | 4 +-- arch/x86/kernel/dumpstack.c | 2 +- arch/x86/kernel/ftrace.c | 10 +++--- arch/x86/kernel/kgdb.c | 6 ++-- arch/x86/kernel/kprobes/core.c | 5 +-- arch/x86/kernel/kprobes/opt.c | 2 +- arch/x86/kernel/traps.c | 3 +- arch/x86/mm/fault.c | 2 +- arch/x86/mm/init_32.c | 2 +- arch/x86/mm/maccess.c | 4 +-- arch/x86/xen/enlighten_pv.c | 2 +- drivers/char/mem.c | 2 +- drivers/dio/dio.c | 6 ++-- drivers/input/serio/hp_sdc.c | 2 +- drivers/misc/kgdbts.c | 6 ++-- drivers/video/fbdev/hpfb.c | 2 +- fs/proc/kcore.c | 3 +- include/linux/uaccess.h | 13 ++++---- kernel/debug/debug_core.c | 6 ++-- kernel/debug/gdbstub.c | 6 ++-- kernel/debug/kdb/kdb_main.c | 3 +- kernel/debug/kdb/kdb_support.c | 7 +++-- kernel/kthread.c | 2 +- kernel/trace/bpf_trace.c | 4 +-- kernel/trace/trace_kprobe.c | 4 +-- kernel/workqueue.c | 10 +++--- mm/debug.c | 8 ++--- mm/maccess.c | 49 +++++++++++++++--------------- mm/rodata_test.c | 2 +- mm/slub.c | 2 +- 50 files changed, 138 insertions(+), 122 deletions(-) diff --git a/arch/arm/kernel/ftrace.c b/arch/arm/kernel/ftrace.c index 10499d44964a..9a79ef6b1876 100644 --- a/arch/arm/kernel/ftrace.c +++ b/arch/arm/kernel/ftrace.c @@ -84,7 +84,8 @@ static int ftrace_modify_code(unsigned long pc, unsigned long old, old = __opcode_to_mem_arm(old); if (validate) { - if (probe_kernel_read(&replaced, (void *)pc, MCOUNT_INSN_SIZE)) + if (copy_from_kernel_nofault(&replaced, (void *)pc, + MCOUNT_INSN_SIZE)) return -EFAULT; if (replaced != old) diff --git a/arch/arm/kernel/kgdb.c b/arch/arm/kernel/kgdb.c index 6a95b9296640..7bd30c0a4280 100644 --- a/arch/arm/kernel/kgdb.c +++ b/arch/arm/kernel/kgdb.c @@ -236,7 +236,7 @@ int kgdb_arch_set_breakpoint(struct kgdb_bkpt *bpt) /* patch_text() only supports int-sized breakpoints */ BUILD_BUG_ON(sizeof(int) != BREAK_INSTR_SIZE); - err = probe_kernel_read(bpt->saved_instr, (char *)bpt->bpt_addr, + err = copy_from_kernel_nofault(bpt->saved_instr, (char *)bpt->bpt_addr, BREAK_INSTR_SIZE); if (err) return err; diff --git a/arch/arm64/kernel/insn.c b/arch/arm64/kernel/insn.c index 684d871ae38d..a107375005bc 100644 --- a/arch/arm64/kernel/insn.c +++ b/arch/arm64/kernel/insn.c @@ -135,7 +135,7 @@ int __kprobes aarch64_insn_read(void *addr, u32 *insnp) int ret; __le32 val; - ret = probe_kernel_read(&val, addr, AARCH64_INSN_SIZE); + ret = copy_from_kernel_nofault(&val, addr, AARCH64_INSN_SIZE); if (!ret) *insnp = le32_to_cpu(val); @@ -151,7 +151,7 @@ static int __kprobes __aarch64_insn_write(void *addr, __le32 insn) raw_spin_lock_irqsave(&patch_lock, flags); waddr = patch_map(addr, FIX_TEXT_POKE0); - ret = probe_kernel_write(waddr, &insn, AARCH64_INSN_SIZE); + ret = copy_to_kernel_nofault(waddr, &insn, AARCH64_INSN_SIZE); patch_unmap(FIX_TEXT_POKE0); raw_spin_unlock_irqrestore(&patch_lock, flags); diff --git a/arch/csky/kernel/ftrace.c b/arch/csky/kernel/ftrace.c index 3c425b84e3be..b4a7ec1517ff 100644 --- a/arch/csky/kernel/ftrace.c +++ b/arch/csky/kernel/ftrace.c @@ -72,7 +72,8 @@ static int ftrace_check_current_nop(unsigned long hook) uint16_t olds[7]; unsigned long hook_pos = hook - 2; - if (probe_kernel_read((void *)olds, (void *)hook_pos, sizeof(nops))) + if (copy_from_kernel_nofault((void *)olds, (void *)hook_pos, + sizeof(nops))) return -EFAULT; if (memcmp((void *)nops, (void *)olds, sizeof(nops))) { @@ -97,7 +98,7 @@ static int ftrace_modify_code(unsigned long hook, unsigned long target, make_jbsr(target, hook, call, nolr); - ret = probe_kernel_write((void *)hook_pos, enable ? call : nops, + ret = copy_to_kernel_nofault((void *)hook_pos, enable ? call : nops, sizeof(nops)); if (ret) return -EPERM; diff --git a/arch/ia64/kernel/ftrace.c b/arch/ia64/kernel/ftrace.c index cee411e647ca..b2ab2d58fb30 100644 --- a/arch/ia64/kernel/ftrace.c +++ b/arch/ia64/kernel/ftrace.c @@ -108,7 +108,7 @@ ftrace_modify_code(unsigned long ip, unsigned char *old_code, goto skip_check; /* read the text we want to modify */ - if (probe_kernel_read(replaced, (void *)ip, MCOUNT_INSN_SIZE)) + if (copy_from_kernel_nofault(replaced, (void *)ip, MCOUNT_INSN_SIZE)) return -EFAULT; /* Make sure it is what we expect it to be */ @@ -117,7 +117,7 @@ ftrace_modify_code(unsigned long ip, unsigned char *old_code, skip_check: /* replace the text with the new text */ - if (probe_kernel_write(((void *)ip), new_code, MCOUNT_INSN_SIZE)) + if (copy_to_kernel_nofault(((void *)ip), new_code, MCOUNT_INSN_SIZE)) return -EPERM; flush_icache_range(ip, ip + MCOUNT_INSN_SIZE); @@ -129,7 +129,7 @@ static int ftrace_make_nop_check(struct dyn_ftrace *rec, unsigned long addr) unsigned char __attribute__((aligned(8))) replaced[MCOUNT_INSN_SIZE]; unsigned long ip = rec->ip; - if (probe_kernel_read(replaced, (void *)ip, MCOUNT_INSN_SIZE)) + if (copy_from_kernel_nofault(replaced, (void *)ip, MCOUNT_INSN_SIZE)) return -EFAULT; if (rec->flags & FTRACE_FL_CONVERTED) { struct ftrace_call_insn *call_insn, *tmp_call; diff --git a/arch/mips/kernel/kprobes.c b/arch/mips/kernel/kprobes.c index 6cfae2411c04..d043c2f897fc 100644 --- a/arch/mips/kernel/kprobes.c +++ b/arch/mips/kernel/kprobes.c @@ -86,9 +86,9 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p) goto out; } - if ((probe_kernel_read(&prev_insn, p->addr - 1, - sizeof(mips_instruction)) == 0) && - insn_has_delayslot(prev_insn)) { + if (copy_from_kernel_nofault(&prev_insn, p->addr - 1, + sizeof(mips_instruction)) == 0 && + insn_has_delayslot(prev_insn)) { pr_notice("Kprobes for branch delayslot are not supported\n"); ret = -EINVAL; goto out; diff --git a/arch/nds32/kernel/ftrace.c b/arch/nds32/kernel/ftrace.c index 22ab77ea27ad..3763b3f8c3db 100644 --- a/arch/nds32/kernel/ftrace.c +++ b/arch/nds32/kernel/ftrace.c @@ -131,13 +131,14 @@ static int __ftrace_modify_code(unsigned long pc, unsigned long *old_insn, unsigned long orig_insn[3]; if (validate) { - if (probe_kernel_read(orig_insn, (void *)pc, MCOUNT_INSN_SIZE)) + if (copy_from_kernel_nofault(orig_insn, (void *)pc, + MCOUNT_INSN_SIZE)) return -EFAULT; if (memcmp(orig_insn, old_insn, MCOUNT_INSN_SIZE)) return -EINVAL; } - if (probe_kernel_write((void *)pc, new_insn, MCOUNT_INSN_SIZE)) + if (copy_to_kernel_nofault((void *)pc, new_insn, MCOUNT_INSN_SIZE)) return -EPERM; return 0; diff --git a/arch/parisc/kernel/ftrace.c b/arch/parisc/kernel/ftrace.c index b836fc61a24f..1df0f67ed667 100644 --- a/arch/parisc/kernel/ftrace.c +++ b/arch/parisc/kernel/ftrace.c @@ -172,7 +172,7 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) ip = (void *)(rec->ip + 4 - size); - ret = probe_kernel_read(insn, ip, size); + ret = copy_from_kernel_nofault(insn, ip, size); if (ret) return ret; diff --git a/arch/parisc/kernel/kgdb.c b/arch/parisc/kernel/kgdb.c index 664278db9b97..c4554ac13eac 100644 --- a/arch/parisc/kernel/kgdb.c +++ b/arch/parisc/kernel/kgdb.c @@ -154,8 +154,8 @@ void kgdb_arch_set_pc(struct pt_regs *regs, unsigned long ip) int kgdb_arch_set_breakpoint(struct kgdb_bkpt *bpt) { - int ret = probe_kernel_read(bpt->saved_instr, (char *)bpt->bpt_addr, - BREAK_INSTR_SIZE); + int ret = copy_from_kernel_nofault(bpt->saved_instr, + (char *)bpt->bpt_addr, BREAK_INSTR_SIZE); if (ret) return ret; diff --git a/arch/parisc/lib/memcpy.c b/arch/parisc/lib/memcpy.c index 94a9fe2702c2..4b75388190b4 100644 --- a/arch/parisc/lib/memcpy.c +++ b/arch/parisc/lib/memcpy.c @@ -57,7 +57,7 @@ void * memcpy(void * dst,const void *src, size_t count) EXPORT_SYMBOL(raw_copy_in_user); EXPORT_SYMBOL(memcpy); -bool probe_kernel_read_allowed(const void *unsafe_src, size_t size) +bool copy_from_kernel_nofault_allowed(const void *unsafe_src, size_t size) { if ((unsigned long)unsafe_src < PAGE_SIZE) return false; diff --git a/arch/powerpc/kernel/module_64.c b/arch/powerpc/kernel/module_64.c index f4c2fa190192..ae2b188365b1 100644 --- a/arch/powerpc/kernel/module_64.c +++ b/arch/powerpc/kernel/module_64.c @@ -756,7 +756,8 @@ int module_trampoline_target(struct module *mod, unsigned long addr, stub = (struct ppc64_stub_entry *)addr; - if (probe_kernel_read(&magic, &stub->magic, sizeof(magic))) { + if (copy_from_kernel_nofault(&magic, &stub->magic, + sizeof(magic))) { pr_err("%s: fault reading magic for stub %lx for %s\n", __func__, addr, mod->name); return -EFAULT; } @@ -766,7 +767,8 @@ int module_trampoline_target(struct module *mod, unsigned long addr, return -EFAULT; } - if (probe_kernel_read(&funcdata, &stub->funcdata, sizeof(funcdata))) { + if (copy_from_kernel_nofault(&funcdata, &stub->funcdata, + sizeof(funcdata))) { pr_err("%s: fault reading funcdata for stub %lx for %s\n", __func__, addr, mod->name); return -EFAULT; } diff --git a/arch/powerpc/kernel/trace/ftrace.c b/arch/powerpc/kernel/trace/ftrace.c index 5e399628f51a..c1fede6ec934 100644 --- a/arch/powerpc/kernel/trace/ftrace.c +++ b/arch/powerpc/kernel/trace/ftrace.c @@ -226,7 +226,7 @@ __ftrace_make_nop(struct module *mod, unsigned long ip = rec->ip; unsigned long tramp; - if (probe_kernel_read(&op, (void *)ip, MCOUNT_INSN_SIZE)) + if (copy_from_kernel_nofault(&op, (void *)ip, MCOUNT_INSN_SIZE)) return -EFAULT; /* Make sure that that this is still a 24bit jump */ @@ -249,7 +249,7 @@ __ftrace_make_nop(struct module *mod, pr_devel("ip:%lx jumps to %lx", ip, tramp); /* Find where the trampoline jumps to */ - if (probe_kernel_read(jmp, (void *)tramp, sizeof(jmp))) { + if (copy_from_kernel_nofault(jmp, (void *)tramp, sizeof(jmp))) { pr_err("Failed to read %lx\n", tramp); return -EFAULT; } diff --git a/arch/powerpc/lib/inst.c b/arch/powerpc/lib/inst.c index aedfd6e31e53..6c7a20af9fd6 100644 --- a/arch/powerpc/lib/inst.c +++ b/arch/powerpc/lib/inst.c @@ -33,11 +33,11 @@ int probe_kernel_read_inst(struct ppc_inst *inst, unsigned int val, suffix; int err; - err = probe_kernel_read(&val, src, sizeof(val)); + err = copy_from_kernel_nofault(&val, src, sizeof(val)); if (err) return err; if (get_op(val) == OP_PREFIX) { - err = probe_kernel_read(&suffix, (void *)src + 4, 4); + err = copy_from_kernel_nofault(&suffix, (void *)src + 4, 4); *inst = ppc_inst_prefix(val, suffix); } else { *inst = ppc_inst(val); @@ -64,7 +64,7 @@ int probe_kernel_read_inst(struct ppc_inst *inst, unsigned int val; int err; - err = probe_kernel_read(&val, src, sizeof(val)); + err = copy_from_kernel_nofault(&val, src, sizeof(val)); if (!err) *inst = ppc_inst(val); diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index 13b9dd5e4a76..efe97ff82557 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c @@ -418,7 +418,8 @@ static __u64 power_pmu_bhrb_to(u64 addr) __u64 target; if (is_kernel_addr(addr)) { - if (probe_kernel_read(&instr, (void *)addr, sizeof(instr))) + if (copy_from_kernel_nofault(&instr, (void *)addr, + sizeof(instr))) return 0; return branch_target((struct ppc_inst *)&instr); diff --git a/arch/riscv/kernel/ftrace.c b/arch/riscv/kernel/ftrace.c index 08396614d6f4..2ff63d0cbb50 100644 --- a/arch/riscv/kernel/ftrace.c +++ b/arch/riscv/kernel/ftrace.c @@ -38,7 +38,8 @@ static int ftrace_check_current_call(unsigned long hook_pos, * Read the text we want to modify; * return must be -EFAULT on read error */ - if (probe_kernel_read(replaced, (void *)hook_pos, MCOUNT_INSN_SIZE)) + if (copy_from_kernel_nofault(replaced, (void *)hook_pos, + MCOUNT_INSN_SIZE)) return -EFAULT; /* diff --git a/arch/riscv/kernel/kgdb.c b/arch/riscv/kernel/kgdb.c index f16ade84a11f..a21fb21883e7 100644 --- a/arch/riscv/kernel/kgdb.c +++ b/arch/riscv/kernel/kgdb.c @@ -153,7 +153,7 @@ int do_single_step(struct pt_regs *regs) stepped_address = addr; /* Replace the op code with the break instruction */ - error = probe_kernel_write((void *)stepped_address, + error = copy_to_kernel_nofault((void *)stepped_address, arch_kgdb_ops.gdb_bpt_instr, BREAK_INSTR_SIZE); /* Flush and return */ @@ -173,7 +173,7 @@ int do_single_step(struct pt_regs *regs) static void undo_single_step(struct pt_regs *regs) { if (stepped_opcode != 0) { - probe_kernel_write((void *)stepped_address, + copy_to_kernel_nofault((void *)stepped_address, (void *)&stepped_opcode, BREAK_INSTR_SIZE); flush_icache_range(stepped_address, stepped_address + BREAK_INSTR_SIZE); diff --git a/arch/riscv/kernel/patch.c b/arch/riscv/kernel/patch.c index d4a64dfed342..3fe7a5296aa5 100644 --- a/arch/riscv/kernel/patch.c +++ b/arch/riscv/kernel/patch.c @@ -63,7 +63,7 @@ static int patch_insn_write(void *addr, const void *insn, size_t len) waddr = patch_map(addr, FIX_TEXT_POKE0); - ret = probe_kernel_write(waddr, insn, len); + ret = copy_to_kernel_nofault(waddr, insn, len); patch_unmap(FIX_TEXT_POKE0); @@ -76,7 +76,7 @@ NOKPROBE_SYMBOL(patch_insn_write); #else static int patch_insn_write(void *addr, const void *insn, size_t len) { - return probe_kernel_write(addr, insn, len); + return copy_to_kernel_nofault(addr, insn, len); } NOKPROBE_SYMBOL(patch_insn_write); #endif /* CONFIG_MMU */ diff --git a/arch/s390/kernel/ftrace.c b/arch/s390/kernel/ftrace.c index 44e01dd1e624..b388e87a08bf 100644 --- a/arch/s390/kernel/ftrace.c +++ b/arch/s390/kernel/ftrace.c @@ -83,7 +83,7 @@ int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, { struct ftrace_insn orig, new, old; - if (probe_kernel_read(&old, (void *) rec->ip, sizeof(old))) + if (copy_from_kernel_nofault(&old, (void *) rec->ip, sizeof(old))) return -EFAULT; if (addr == MCOUNT_ADDR) { /* Initial code replacement */ @@ -105,7 +105,7 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) { struct ftrace_insn orig, new, old; - if (probe_kernel_read(&old, (void *) rec->ip, sizeof(old))) + if (copy_from_kernel_nofault(&old, (void *) rec->ip, sizeof(old))) return -EFAULT; /* Replace nop with an ftrace call. */ ftrace_generate_nop_insn(&orig); diff --git a/arch/sh/kernel/ftrace.c b/arch/sh/kernel/ftrace.c index 1b04270e5460..0646c5961846 100644 --- a/arch/sh/kernel/ftrace.c +++ b/arch/sh/kernel/ftrace.c @@ -119,7 +119,7 @@ static void ftrace_mod_code(void) * But if one were to fail, then they all should, and if one were * to succeed, then they all should. */ - mod_code_status = probe_kernel_write(mod_code_ip, mod_code_newcode, + mod_code_status = copy_to_kernel_nofault(mod_code_ip, mod_code_newcode, MCOUNT_INSN_SIZE); /* if we fail, then kill any new writers */ @@ -203,7 +203,7 @@ static int ftrace_modify_code(unsigned long ip, unsigned char *old_code, */ /* read the text we want to modify */ - if (probe_kernel_read(replaced, (void *)ip, MCOUNT_INSN_SIZE)) + if (copy_from_kernel_nofault(replaced, (void *)ip, MCOUNT_INSN_SIZE)) return -EFAULT; /* Make sure it is what we expect it to be */ @@ -268,7 +268,7 @@ static int ftrace_mod(unsigned long ip, unsigned long old_addr, { unsigned char code[MCOUNT_INSN_SIZE]; - if (probe_kernel_read(code, (void *)ip, MCOUNT_INSN_SIZE)) + if (copy_from_kernel_nofault(code, (void *)ip, MCOUNT_INSN_SIZE)) return -EFAULT; if (old_addr != __raw_readl((unsigned long *)code)) diff --git a/arch/um/kernel/maccess.c b/arch/um/kernel/maccess.c index e929c0966696..8ccd56813f68 100644 --- a/arch/um/kernel/maccess.c +++ b/arch/um/kernel/maccess.c @@ -7,7 +7,7 @@ #include #include -bool probe_kernel_read_allowed(const void *src, size_t size) +bool copy_from_kernel_nofault_allowed(const void *src, size_t size) { void *psrc = (void *)rounddown((unsigned long)src, PAGE_SIZE); diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h index ebedeab48704..255b2dde2c1b 100644 --- a/arch/x86/include/asm/ptrace.h +++ b/arch/x86/include/asm/ptrace.h @@ -278,7 +278,7 @@ static inline unsigned long *regs_get_kernel_stack_nth_addr(struct pt_regs *regs } /* To avoid include hell, we can't include uaccess.h */ -extern long probe_kernel_read(void *dst, const void *src, size_t size); +extern long copy_from_kernel_nofault(void *dst, const void *src, size_t size); /** * regs_get_kernel_stack_nth() - get Nth entry of the stack @@ -298,7 +298,7 @@ static inline unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs, addr = regs_get_kernel_stack_nth_addr(regs, n); if (addr) { - ret = probe_kernel_read(&val, addr, sizeof(val)); + ret = copy_from_kernel_nofault(&val, addr, sizeof(val)); if (!ret) return val; } diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index 456511b2284e..b037cfa7c0c5 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c @@ -106,7 +106,7 @@ void show_opcodes(struct pt_regs *regs, const char *loglvl) bad_ip = user_mode(regs) && __chk_range_not_ok(prologue, OPCODE_BUFSIZE, TASK_SIZE_MAX); - if (bad_ip || probe_kernel_read(opcodes, (u8 *)prologue, + if (bad_ip || copy_from_kernel_nofault(opcodes, (u8 *)prologue, OPCODE_BUFSIZE)) { printk("%sCode: Bad RIP value.\n", loglvl); } else { diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index c84d28e90a58..51504566b3a6 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -86,7 +86,7 @@ static int ftrace_verify_code(unsigned long ip, const char *old_code) * sure what we read is what we expected it to be before modifying it. */ /* read the text we want to modify */ - if (probe_kernel_read(cur_code, (void *)ip, MCOUNT_INSN_SIZE)) { + if (copy_from_kernel_nofault(cur_code, (void *)ip, MCOUNT_INSN_SIZE)) { WARN_ON(1); return -EFAULT; } @@ -355,7 +355,7 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size) npages = DIV_ROUND_UP(*tramp_size, PAGE_SIZE); /* Copy ftrace_caller onto the trampoline memory */ - ret = probe_kernel_read(trampoline, (void *)start_offset, size); + ret = copy_from_kernel_nofault(trampoline, (void *)start_offset, size); if (WARN_ON(ret < 0)) goto fail; @@ -363,13 +363,13 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size) /* The trampoline ends with ret(q) */ retq = (unsigned long)ftrace_stub; - ret = probe_kernel_read(ip, (void *)retq, RET_SIZE); + ret = copy_from_kernel_nofault(ip, (void *)retq, RET_SIZE); if (WARN_ON(ret < 0)) goto fail; if (ops->flags & FTRACE_OPS_FL_SAVE_REGS) { ip = trampoline + (ftrace_regs_caller_ret - ftrace_regs_caller); - ret = probe_kernel_read(ip, (void *)retq, RET_SIZE); + ret = copy_from_kernel_nofault(ip, (void *)retq, RET_SIZE); if (WARN_ON(ret < 0)) goto fail; } @@ -506,7 +506,7 @@ static void *addr_from_call(void *ptr) union text_poke_insn call; int ret; - ret = probe_kernel_read(&call, ptr, CALL_INSN_SIZE); + ret = copy_from_kernel_nofault(&call, ptr, CALL_INSN_SIZE); if (WARN_ON_ONCE(ret < 0)) return NULL; diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c index c44fe7d8d9a4..68acd30c6b87 100644 --- a/arch/x86/kernel/kgdb.c +++ b/arch/x86/kernel/kgdb.c @@ -732,11 +732,11 @@ int kgdb_arch_set_breakpoint(struct kgdb_bkpt *bpt) int err; bpt->type = BP_BREAKPOINT; - err = probe_kernel_read(bpt->saved_instr, (char *)bpt->bpt_addr, + err = copy_from_kernel_nofault(bpt->saved_instr, (char *)bpt->bpt_addr, BREAK_INSTR_SIZE); if (err) return err; - err = probe_kernel_write((char *)bpt->bpt_addr, + err = copy_to_kernel_nofault((char *)bpt->bpt_addr, arch_kgdb_ops.gdb_bpt_instr, BREAK_INSTR_SIZE); if (!err) return err; @@ -768,7 +768,7 @@ int kgdb_arch_remove_breakpoint(struct kgdb_bkpt *bpt) return 0; knl_write: - return probe_kernel_write((char *)bpt->bpt_addr, + return copy_to_kernel_nofault((char *)bpt->bpt_addr, (char *)bpt->saved_instr, BREAK_INSTR_SIZE); } diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c index 3bafe1bd4dc7..f09985c87d73 100644 --- a/arch/x86/kernel/kprobes/core.c +++ b/arch/x86/kernel/kprobes/core.c @@ -243,7 +243,7 @@ __recover_probed_insn(kprobe_opcode_t *buf, unsigned long addr) * Fortunately, we know that the original code is the ideal 5-byte * long NOP. */ - if (probe_kernel_read(buf, (void *)addr, + if (copy_from_kernel_nofault(buf, (void *)addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t))) return 0UL; @@ -346,7 +346,8 @@ int __copy_instruction(u8 *dest, u8 *src, u8 *real, struct insn *insn) return 0; /* This can access kernel text if given address is not recovered */ - if (probe_kernel_read(dest, (void *)recovered_insn, MAX_INSN_SIZE)) + if (copy_from_kernel_nofault(dest, (void *)recovered_insn, + MAX_INSN_SIZE)) return 0; kernel_insn_init(insn, dest, MAX_INSN_SIZE); diff --git a/arch/x86/kernel/kprobes/opt.c b/arch/x86/kernel/kprobes/opt.c index 321c19950285..7af4c61dde52 100644 --- a/arch/x86/kernel/kprobes/opt.c +++ b/arch/x86/kernel/kprobes/opt.c @@ -56,7 +56,7 @@ found: * overwritten by jump destination address. In this case, original * bytes must be recovered from op->optinsn.copied_insn buffer. */ - if (probe_kernel_read(buf, (void *)addr, + if (copy_from_kernel_nofault(buf, (void *)addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t))) return 0UL; diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index af75109485c2..7003f2e7b163 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -488,7 +488,8 @@ static enum kernel_gp_hint get_kernel_gp_address(struct pt_regs *regs, u8 insn_buf[MAX_INSN_SIZE]; struct insn insn; - if (probe_kernel_read(insn_buf, (void *)regs->ip, MAX_INSN_SIZE)) + if (copy_from_kernel_nofault(insn_buf, (void *)regs->ip, + MAX_INSN_SIZE)) return GP_NO_HINT; kernel_insn_init(&insn, insn_buf, MAX_INSN_SIZE); diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 66be9bd60307..e996aa3833b8 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -442,7 +442,7 @@ static void show_ldttss(const struct desc_ptr *gdt, const char *name, u16 index) return; } - if (probe_kernel_read(&desc, (void *)(gdt->address + offset), + if (copy_from_kernel_nofault(&desc, (void *)(gdt->address + offset), sizeof(struct ldttss_desc))) { pr_alert("%s: 0x%hx -- GDT entry is not readable\n", name, index); diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index bda909e3e37e..8b4afad84f4a 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -737,7 +737,7 @@ static void __init test_wp_bit(void) __set_fixmap(FIX_WP_TEST, __pa_symbol(empty_zero_page), PAGE_KERNEL_RO); - if (probe_kernel_write((char *)fix_to_virt(FIX_WP_TEST), &z, 1)) { + if (copy_to_kernel_nofault((char *)fix_to_virt(FIX_WP_TEST), &z, 1)) { clear_fixmap(FIX_WP_TEST); printk(KERN_CONT "Ok.\n"); return; diff --git a/arch/x86/mm/maccess.c b/arch/x86/mm/maccess.c index e1d7d7477c22..92ec176a7293 100644 --- a/arch/x86/mm/maccess.c +++ b/arch/x86/mm/maccess.c @@ -9,7 +9,7 @@ static __always_inline u64 canonical_address(u64 vaddr, u8 vaddr_bits) return ((s64)vaddr << (64 - vaddr_bits)) >> (64 - vaddr_bits); } -bool probe_kernel_read_allowed(const void *unsafe_src, size_t size) +bool copy_from_kernel_nofault_allowed(const void *unsafe_src, size_t size) { unsigned long vaddr = (unsigned long)unsafe_src; @@ -22,7 +22,7 @@ bool probe_kernel_read_allowed(const void *unsafe_src, size_t size) canonical_address(vaddr, boot_cpu_data.x86_virt_bits) == vaddr; } #else -bool probe_kernel_read_allowed(const void *unsafe_src, size_t size) +bool copy_from_kernel_nofault_allowed(const void *unsafe_src, size_t size) { return (unsigned long)unsafe_src >= TASK_SIZE_MAX; } diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index 33b309d65955..acc49fa6a097 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@ -386,7 +386,7 @@ static void set_aliased_prot(void *v, pgprot_t prot) preempt_disable(); - probe_kernel_read(&dummy, v, 1); + copy_from_kernel_nofault(&dummy, v, 1); if (HYPERVISOR_update_va_mapping((unsigned long)v, pte, 0)) BUG(); diff --git a/drivers/char/mem.c b/drivers/char/mem.c index 31cae88a730b..934c92dcb9ab 100644 --- a/drivers/char/mem.c +++ b/drivers/char/mem.c @@ -171,7 +171,7 @@ static ssize_t read_mem(struct file *file, char __user *buf, if (!ptr) goto failed; - probe = probe_kernel_read(bounce, ptr, sz); + probe = copy_from_kernel_nofault(bounce, ptr, sz); unxlate_dev_mem_ptr(p, ptr); if (probe) goto failed; diff --git a/drivers/dio/dio.c b/drivers/dio/dio.c index c9aa15fb86a9..193b40e7aec0 100644 --- a/drivers/dio/dio.c +++ b/drivers/dio/dio.c @@ -135,7 +135,8 @@ int __init dio_find(int deviceid) else va = ioremap(pa, PAGE_SIZE); - if (probe_kernel_read(&i, (unsigned char *)va + DIO_IDOFF, 1)) { + if (copy_from_kernel_nofault(&i, + (unsigned char *)va + DIO_IDOFF, 1)) { if (scode >= DIOII_SCBASE) iounmap(va); continue; /* no board present at that select code */ @@ -208,7 +209,8 @@ static int __init dio_init(void) else va = ioremap(pa, PAGE_SIZE); - if (probe_kernel_read(&i, (unsigned char *)va + DIO_IDOFF, 1)) { + if (copy_from_kernel_nofault(&i, + (unsigned char *)va + DIO_IDOFF, 1)) { if (scode >= DIOII_SCBASE) iounmap(va); continue; /* no board present at that select code */ diff --git a/drivers/input/serio/hp_sdc.c b/drivers/input/serio/hp_sdc.c index 654252361653..13eacf6ab431 100644 --- a/drivers/input/serio/hp_sdc.c +++ b/drivers/input/serio/hp_sdc.c @@ -1021,7 +1021,7 @@ static int __init hp_sdc_register(void) hp_sdc.base_io = (unsigned long) 0xf0428000; hp_sdc.data_io = (unsigned long) hp_sdc.base_io + 1; hp_sdc.status_io = (unsigned long) hp_sdc.base_io + 3; - if (!probe_kernel_read(&i, (unsigned char *)hp_sdc.data_io, 1)) + if (!copy_from_kernel_nofault(&i, (unsigned char *)hp_sdc.data_io, 1)) hp_sdc.dev = (void *)1; hp_sdc.dev_err = hp_sdc_init(); #endif diff --git a/drivers/misc/kgdbts.c b/drivers/misc/kgdbts.c index bccd341e9ae1..d5d2af4d10e6 100644 --- a/drivers/misc/kgdbts.c +++ b/drivers/misc/kgdbts.c @@ -828,7 +828,7 @@ static void run_plant_and_detach_test(int is_early) char before[BREAK_INSTR_SIZE]; char after[BREAK_INSTR_SIZE]; - probe_kernel_read(before, (char *)kgdbts_break_test, + copy_from_kernel_nofault(before, (char *)kgdbts_break_test, BREAK_INSTR_SIZE); init_simple_test(); ts.tst = plant_and_detach_test; @@ -836,8 +836,8 @@ static void run_plant_and_detach_test(int is_early) /* Activate test with initial breakpoint */ if (!is_early) kgdb_breakpoint(); - probe_kernel_read(after, (char *)kgdbts_break_test, - BREAK_INSTR_SIZE); + copy_from_kernel_nofault(after, (char *)kgdbts_break_test, + BREAK_INSTR_SIZE); if (memcmp(before, after, BREAK_INSTR_SIZE)) { printk(KERN_CRIT "kgdbts: ERROR kgdb corrupted memory\n"); panic("kgdb memory corruption"); diff --git a/drivers/video/fbdev/hpfb.c b/drivers/video/fbdev/hpfb.c index f02be0db335e..8d418abdd767 100644 --- a/drivers/video/fbdev/hpfb.c +++ b/drivers/video/fbdev/hpfb.c @@ -402,7 +402,7 @@ int __init hpfb_init(void) if (err) return err; - err = probe_kernel_read(&i, (unsigned char *)INTFBVADDR + DIO_IDOFF, 1); + err = copy_from_kernel_nofault(&i, (unsigned char *)INTFBVADDR + DIO_IDOFF, 1); if (!err && (i == DIO_ID_FBUFFER) && topcat_sid_ok(sid = DIO_SECID(INTFBVADDR))) { if (!request_mem_region(INTFBPADDR, DIO_DEVSIZE, "Internal Topcat")) diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c index 8ba492d44e68..e502414b3556 100644 --- a/fs/proc/kcore.c +++ b/fs/proc/kcore.c @@ -512,7 +512,8 @@ read_kcore(struct file *file, char __user *buffer, size_t buflen, loff_t *fpos) * Using bounce buffer to bypass the * hardened user copy kernel text checks. */ - if (probe_kernel_read(buf, (void *) start, tsz)) { + if (copy_from_kernel_nofault(buf, (void *)start, + tsz)) { if (clear_user(buffer, tsz)) { ret = -EFAULT; goto out; diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h index 7bcadca22100..70a3d9cd9113 100644 --- a/include/linux/uaccess.h +++ b/include/linux/uaccess.h @@ -301,13 +301,14 @@ copy_struct_from_user(void *dst, size_t ksize, const void __user *src, return 0; } -bool probe_kernel_read_allowed(const void *unsafe_src, size_t size); +bool copy_from_kernel_nofault_allowed(const void *unsafe_src, size_t size); + +long copy_from_kernel_nofault(void *dst, const void *src, size_t size); +long notrace copy_to_kernel_nofault(void *dst, const void *src, size_t size); -extern long probe_kernel_read(void *dst, const void *src, size_t size); extern long probe_user_read(void *dst, const void __user *src, size_t size); - -extern long notrace probe_kernel_write(void *dst, const void *src, size_t size); -extern long notrace probe_user_write(void __user *dst, const void *src, size_t size); +extern long notrace probe_user_write(void __user *dst, const void *src, + size_t size); long strncpy_from_kernel_nofault(char *dst, const void *unsafe_addr, long count); @@ -324,7 +325,7 @@ long strnlen_user_nofault(const void __user *unsafe_addr, long count); * Returns 0 on success, or -EFAULT. */ #define probe_kernel_address(addr, retval) \ - probe_kernel_read(&retval, addr, sizeof(retval)) + copy_from_kernel_nofault(&retval, addr, sizeof(retval)) #ifndef user_access_begin #define user_access_begin(ptr,len) access_ok(ptr, len) diff --git a/kernel/debug/debug_core.c b/kernel/debug/debug_core.c index ccc0f98abdd4..bc8d25f2ac8a 100644 --- a/kernel/debug/debug_core.c +++ b/kernel/debug/debug_core.c @@ -169,18 +169,18 @@ int __weak kgdb_arch_set_breakpoint(struct kgdb_bkpt *bpt) { int err; - err = probe_kernel_read(bpt->saved_instr, (char *)bpt->bpt_addr, + err = copy_from_kernel_nofault(bpt->saved_instr, (char *)bpt->bpt_addr, BREAK_INSTR_SIZE); if (err) return err; - err = probe_kernel_write((char *)bpt->bpt_addr, + err = copy_to_kernel_nofault((char *)bpt->bpt_addr, arch_kgdb_ops.gdb_bpt_instr, BREAK_INSTR_SIZE); return err; } int __weak kgdb_arch_remove_breakpoint(struct kgdb_bkpt *bpt) { - return probe_kernel_write((char *)bpt->bpt_addr, + return copy_to_kernel_nofault((char *)bpt->bpt_addr, (char *)bpt->saved_instr, BREAK_INSTR_SIZE); } diff --git a/kernel/debug/gdbstub.c b/kernel/debug/gdbstub.c index 4b280fc7dd67..61774aec46b4 100644 --- a/kernel/debug/gdbstub.c +++ b/kernel/debug/gdbstub.c @@ -247,7 +247,7 @@ char *kgdb_mem2hex(char *mem, char *buf, int count) */ tmp = buf + count; - err = probe_kernel_read(tmp, mem, count); + err = copy_from_kernel_nofault(tmp, mem, count); if (err) return NULL; while (count > 0) { @@ -283,7 +283,7 @@ int kgdb_hex2mem(char *buf, char *mem, int count) *tmp_raw |= hex_to_bin(*tmp_hex--) << 4; } - return probe_kernel_write(mem, tmp_raw, count); + return copy_to_kernel_nofault(mem, tmp_raw, count); } /* @@ -335,7 +335,7 @@ static int kgdb_ebin2mem(char *buf, char *mem, int count) size++; } - return probe_kernel_write(mem, c, size); + return copy_to_kernel_nofault(mem, c, size); } #if DBG_MAX_REG_NUM > 0 diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c index ec190569f690..5c7949061671 100644 --- a/kernel/debug/kdb/kdb_main.c +++ b/kernel/debug/kdb/kdb_main.c @@ -2326,7 +2326,8 @@ void kdb_ps1(const struct task_struct *p) int cpu; unsigned long tmp; - if (!p || probe_kernel_read(&tmp, (char *)p, sizeof(unsigned long))) + if (!p || + copy_from_kernel_nofault(&tmp, (char *)p, sizeof(unsigned long))) return; cpu = kdb_process_cpu(p); diff --git a/kernel/debug/kdb/kdb_support.c b/kernel/debug/kdb/kdb_support.c index b8e6306e7e13..004c5b6c87f8 100644 --- a/kernel/debug/kdb/kdb_support.c +++ b/kernel/debug/kdb/kdb_support.c @@ -325,7 +325,7 @@ char *kdb_strdup(const char *str, gfp_t type) */ int kdb_getarea_size(void *res, unsigned long addr, size_t size) { - int ret = probe_kernel_read((char *)res, (char *)addr, size); + int ret = copy_from_kernel_nofault((char *)res, (char *)addr, size); if (ret) { if (!KDB_STATE(SUPPRESS)) { kdb_printf("kdb_getarea: Bad address 0x%lx\n", addr); @@ -350,7 +350,7 @@ int kdb_getarea_size(void *res, unsigned long addr, size_t size) */ int kdb_putarea_size(unsigned long addr, void *res, size_t size) { - int ret = probe_kernel_read((char *)addr, (char *)res, size); + int ret = copy_from_kernel_nofault((char *)addr, (char *)res, size); if (ret) { if (!KDB_STATE(SUPPRESS)) { kdb_printf("kdb_putarea: Bad address 0x%lx\n", addr); @@ -624,7 +624,8 @@ char kdb_task_state_char (const struct task_struct *p) char state; unsigned long tmp; - if (!p || probe_kernel_read(&tmp, (char *)p, sizeof(unsigned long))) + if (!p || + copy_from_kernel_nofault(&tmp, (char *)p, sizeof(unsigned long))) return 'E'; cpu = kdb_process_cpu(p); diff --git a/kernel/kthread.c b/kernel/kthread.c index 8e3d2d7fdf5e..132f84a5fde3 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c @@ -201,7 +201,7 @@ void *kthread_probe_data(struct task_struct *task) struct kthread *kthread = to_kthread(task); void *data = NULL; - probe_kernel_read(&data, &kthread->data, sizeof(data)); + copy_from_kernel_nofault(&data, &kthread->data, sizeof(data)); return data; } diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index e729c9e587a0..204afc12425f 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -196,7 +196,7 @@ bpf_probe_read_kernel_common(void *dst, u32 size, const void *unsafe_ptr) if (unlikely(ret < 0)) goto fail; - ret = probe_kernel_read(dst, unsafe_ptr, size); + ret = copy_from_kernel_nofault(dst, unsafe_ptr, size); if (unlikely(ret < 0)) goto fail; return ret; @@ -661,7 +661,7 @@ BPF_CALL_5(bpf_seq_printf, struct seq_file *, m, char *, fmt, u32, fmt_size, copy_size = (fmt[i + 2] == '4') ? 4 : 16; - err = probe_kernel_read(bufs->buf[memcpy_cnt], + err = copy_from_kernel_nofault(bufs->buf[memcpy_cnt], (void *) (long) args[fmt_cnt], copy_size); if (err < 0) diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 6048f1be26d2..841c74863ff8 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -1222,7 +1222,7 @@ fetch_store_strlen(unsigned long addr) #endif do { - ret = probe_kernel_read(&c, (u8 *)addr + len, 1); + ret = copy_from_kernel_nofault(&c, (u8 *)addr + len, 1); len++; } while (c && ret == 0 && len < MAX_STRING_SIZE); @@ -1300,7 +1300,7 @@ probe_mem_read(void *dest, void *src, size_t size) if ((unsigned long)src < TASK_SIZE) return probe_mem_read_user(dest, src, size); #endif - return probe_kernel_read(dest, src, size); + return copy_from_kernel_nofault(dest, src, size); } /* Note that we don't verify it, since the code does not come from user space */ diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 9fbe1e237563..c41c3c17b86a 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -4638,11 +4638,11 @@ void print_worker_info(const char *log_lvl, struct task_struct *task) * Carefully copy the associated workqueue's workfn, name and desc. * Keep the original last '\0' in case the original is garbage. */ - probe_kernel_read(&fn, &worker->current_func, sizeof(fn)); - probe_kernel_read(&pwq, &worker->current_pwq, sizeof(pwq)); - probe_kernel_read(&wq, &pwq->wq, sizeof(wq)); - probe_kernel_read(name, wq->name, sizeof(name) - 1); - probe_kernel_read(desc, worker->desc, sizeof(desc) - 1); + copy_from_kernel_nofault(&fn, &worker->current_func, sizeof(fn)); + copy_from_kernel_nofault(&pwq, &worker->current_pwq, sizeof(pwq)); + copy_from_kernel_nofault(&wq, &pwq->wq, sizeof(wq)); + copy_from_kernel_nofault(name, wq->name, sizeof(name) - 1); + copy_from_kernel_nofault(desc, worker->desc, sizeof(desc) - 1); if (fn || name[0] || desc[0]) { printk("%sWorkqueue: %s %ps", log_lvl, name, fn); diff --git a/mm/debug.c b/mm/debug.c index b5b1de8c71ac..4f376514744d 100644 --- a/mm/debug.c +++ b/mm/debug.c @@ -120,9 +120,9 @@ void __dump_page(struct page *page, const char *reason) * mapping can be invalid pointer and we don't want to crash * accessing it, so probe everything depending on it carefully */ - if (probe_kernel_read(&host, &mapping->host, + if (copy_from_kernel_nofault(&host, &mapping->host, sizeof(struct inode *)) || - probe_kernel_read(&a_ops, &mapping->a_ops, + copy_from_kernel_nofault(&a_ops, &mapping->a_ops, sizeof(struct address_space_operations *))) { pr_warn("failed to read mapping->host or a_ops, mapping not a valid kernel address?\n"); goto out_mapping; @@ -133,7 +133,7 @@ void __dump_page(struct page *page, const char *reason) goto out_mapping; } - if (probe_kernel_read(&dentry_first, + if (copy_from_kernel_nofault(&dentry_first, &host->i_dentry.first, sizeof(struct hlist_node *))) { pr_warn("mapping->a_ops:%ps with invalid mapping->host inode address %px\n", a_ops, host); @@ -146,7 +146,7 @@ void __dump_page(struct page *page, const char *reason) } dentry_ptr = container_of(dentry_first, struct dentry, d_u.d_alias); - if (probe_kernel_read(&dentry, dentry_ptr, + if (copy_from_kernel_nofault(&dentry, dentry_ptr, sizeof(struct dentry))) { pr_warn("mapping->aops:%ps with invalid mapping->host->i_dentry.first %px\n", a_ops, dentry_ptr); diff --git a/mm/maccess.c b/mm/maccess.c index 88845eda5047..cc5d8c6233c0 100644 --- a/mm/maccess.c +++ b/mm/maccess.c @@ -6,14 +6,15 @@ #include #include -bool __weak probe_kernel_read_allowed(const void *unsafe_src, size_t size) +bool __weak copy_from_kernel_nofault_allowed(const void *unsafe_src, + size_t size) { return true; } #ifdef HAVE_GET_KERNEL_NOFAULT -#define probe_kernel_read_loop(dst, src, len, type, err_label) \ +#define copy_from_kernel_nofault_loop(dst, src, len, type, err_label) \ while (len >= sizeof(type)) { \ __get_kernel_nofault(dst, src, type, err_label); \ dst += sizeof(type); \ @@ -21,25 +22,25 @@ bool __weak probe_kernel_read_allowed(const void *unsafe_src, size_t size) len -= sizeof(type); \ } -long probe_kernel_read(void *dst, const void *src, size_t size) +long copy_from_kernel_nofault(void *dst, const void *src, size_t size) { - if (!probe_kernel_read_allowed(src, size)) + if (!copy_from_kernel_nofault_allowed(src, size)) return -ERANGE; pagefault_disable(); - probe_kernel_read_loop(dst, src, size, u64, Efault); - probe_kernel_read_loop(dst, src, size, u32, Efault); - probe_kernel_read_loop(dst, src, size, u16, Efault); - probe_kernel_read_loop(dst, src, size, u8, Efault); + copy_from_kernel_nofault_loop(dst, src, size, u64, Efault); + copy_from_kernel_nofault_loop(dst, src, size, u32, Efault); + copy_from_kernel_nofault_loop(dst, src, size, u16, Efault); + copy_from_kernel_nofault_loop(dst, src, size, u8, Efault); pagefault_enable(); return 0; Efault: pagefault_enable(); return -EFAULT; } -EXPORT_SYMBOL_GPL(probe_kernel_read); +EXPORT_SYMBOL_GPL(copy_from_kernel_nofault); -#define probe_kernel_write_loop(dst, src, len, type, err_label) \ +#define copy_to_kernel_nofault_loop(dst, src, len, type, err_label) \ while (len >= sizeof(type)) { \ __put_kernel_nofault(dst, src, type, err_label); \ dst += sizeof(type); \ @@ -47,13 +48,13 @@ EXPORT_SYMBOL_GPL(probe_kernel_read); len -= sizeof(type); \ } -long probe_kernel_write(void *dst, const void *src, size_t size) +long copy_to_kernel_nofault(void *dst, const void *src, size_t size) { pagefault_disable(); - probe_kernel_write_loop(dst, src, size, u64, Efault); - probe_kernel_write_loop(dst, src, size, u32, Efault); - probe_kernel_write_loop(dst, src, size, u16, Efault); - probe_kernel_write_loop(dst, src, size, u8, Efault); + copy_to_kernel_nofault_loop(dst, src, size, u64, Efault); + copy_to_kernel_nofault_loop(dst, src, size, u32, Efault); + copy_to_kernel_nofault_loop(dst, src, size, u16, Efault); + copy_to_kernel_nofault_loop(dst, src, size, u8, Efault); pagefault_enable(); return 0; Efault: @@ -67,7 +68,7 @@ long strncpy_from_kernel_nofault(char *dst, const void *unsafe_addr, long count) if (unlikely(count <= 0)) return 0; - if (!probe_kernel_read_allowed(unsafe_addr, count)) + if (!copy_from_kernel_nofault_allowed(unsafe_addr, count)) return -ERANGE; pagefault_disable(); @@ -87,7 +88,7 @@ Efault: } #else /* HAVE_GET_KERNEL_NOFAULT */ /** - * probe_kernel_read(): safely attempt to read from kernel-space + * copy_from_kernel_nofault(): safely attempt to read from kernel-space * @dst: pointer to the buffer that shall take the data * @src: address to read from * @size: size of the data chunk @@ -98,15 +99,15 @@ Efault: * * We ensure that the copy_from_user is executed in atomic context so that * do_page_fault() doesn't attempt to take mmap_lock. This makes - * probe_kernel_read() suitable for use within regions where the caller + * copy_from_kernel_nofault() suitable for use within regions where the caller * already holds mmap_lock, or other locks which nest inside mmap_lock. */ -long probe_kernel_read(void *dst, const void *src, size_t size) +long copy_from_kernel_nofault(void *dst, const void *src, size_t size) { long ret; mm_segment_t old_fs = get_fs(); - if (!probe_kernel_read_allowed(src, size)) + if (!copy_from_kernel_nofault_allowed(src, size)) return -ERANGE; set_fs(KERNEL_DS); @@ -120,10 +121,10 @@ long probe_kernel_read(void *dst, const void *src, size_t size) return -EFAULT; return 0; } -EXPORT_SYMBOL_GPL(probe_kernel_read); +EXPORT_SYMBOL_GPL(copy_from_kernel_nofault); /** - * probe_kernel_write(): safely attempt to write to a location + * copy_to_kernel_nofault(): safely attempt to write to a location * @dst: address to write to * @src: pointer to the data that shall be written * @size: size of the data chunk @@ -131,7 +132,7 @@ EXPORT_SYMBOL_GPL(probe_kernel_read); * Safely write to address @dst from the buffer at @src. If a kernel fault * happens, handle that and return -EFAULT. */ -long probe_kernel_write(void *dst, const void *src, size_t size) +long copy_to_kernel_nofault(void *dst, const void *src, size_t size) { long ret; mm_segment_t old_fs = get_fs(); @@ -174,7 +175,7 @@ long strncpy_from_kernel_nofault(char *dst, const void *unsafe_addr, long count) if (unlikely(count <= 0)) return 0; - if (!probe_kernel_read_allowed(unsafe_addr, count)) + if (!copy_from_kernel_nofault_allowed(unsafe_addr, count)) return -ERANGE; set_fs(KERNEL_DS); diff --git a/mm/rodata_test.c b/mm/rodata_test.c index 5e313fa93276..2a99df7beeb3 100644 --- a/mm/rodata_test.c +++ b/mm/rodata_test.c @@ -25,7 +25,7 @@ void rodata_test(void) } /* test 2: write to the variable; this should fault */ - if (!probe_kernel_write((void *)&rodata_test_data, + if (!copy_to_kernel_nofault((void *)&rodata_test_data, (void *)&zero, sizeof(zero))) { pr_err("test data was not read only\n"); return; diff --git a/mm/slub.c b/mm/slub.c index b8f798b50d44..fe81773fd97e 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -292,7 +292,7 @@ static inline void *get_freepointer_safe(struct kmem_cache *s, void *object) return get_freepointer(s, object); freepointer_addr = (unsigned long)object + s->offset; - probe_kernel_read(&p, (void **)freepointer_addr, sizeof(p)); + copy_from_kernel_nofault(&p, (void **)freepointer_addr, sizeof(p)); return freelist_ptr(s, p, freepointer_addr); } From c0ee37e85e0e47402b8bbe35b6cec8e06937ca58 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 17 Jun 2020 09:37:54 +0200 Subject: [PATCH 215/462] maccess: rename probe_user_{read,write} to copy_{from,to}_user_nofault Better describe what these functions do. Suggested-by: Linus Torvalds Signed-off-by: Christoph Hellwig Signed-off-by: Linus Torvalds --- arch/powerpc/kernel/process.c | 3 ++- arch/powerpc/kvm/book3s_64_mmu_radix.c | 4 ++-- arch/powerpc/lib/inst.c | 6 +++--- arch/powerpc/oprofile/backtrace.c | 6 ++++-- arch/powerpc/perf/callchain_32.c | 2 +- arch/powerpc/perf/callchain_64.c | 2 +- arch/powerpc/perf/core-book3s.c | 3 ++- arch/powerpc/sysdev/fsl_pci.c | 4 ++-- include/linux/uaccess.h | 4 ++-- kernel/trace/bpf_trace.c | 4 ++-- kernel/trace/trace_kprobe.c | 2 +- mm/maccess.c | 12 ++++++------ 12 files changed, 28 insertions(+), 24 deletions(-) diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 7bb7faf84490..d4d0d1048500 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -1305,7 +1305,8 @@ void show_user_instructions(struct pt_regs *regs) for (i = 0; i < 8 && n; i++, n--, pc += sizeof(int)) { int instr; - if (probe_user_read(&instr, (void __user *)pc, sizeof(instr))) { + if (copy_from_user_nofault(&instr, (void __user *)pc, + sizeof(instr))) { seq_buf_printf(&s, "XXXXXXXX "); continue; } diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c index 3cb0c9843d01..e738ea652192 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_radix.c +++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c @@ -64,9 +64,9 @@ unsigned long __kvmhv_copy_tofrom_guest_radix(int lpid, int pid, isync(); if (is_load) - ret = probe_user_read(to, (const void __user *)from, n); + ret = copy_from_user_nofault(to, (const void __user *)from, n); else - ret = probe_user_write((void __user *)to, from, n); + ret = copy_to_user_nofault((void __user *)to, from, n); /* switch the pid first to avoid running host with unallocated pid */ if (quadrant == 1 && pid != old_pid) diff --git a/arch/powerpc/lib/inst.c b/arch/powerpc/lib/inst.c index 6c7a20af9fd6..9cc17eb62462 100644 --- a/arch/powerpc/lib/inst.c +++ b/arch/powerpc/lib/inst.c @@ -15,11 +15,11 @@ int probe_user_read_inst(struct ppc_inst *inst, unsigned int val, suffix; int err; - err = probe_user_read(&val, nip, sizeof(val)); + err = copy_from_user_nofault(&val, nip, sizeof(val)); if (err) return err; if (get_op(val) == OP_PREFIX) { - err = probe_user_read(&suffix, (void __user *)nip + 4, 4); + err = copy_from_user_nofault(&suffix, (void __user *)nip + 4, 4); *inst = ppc_inst_prefix(val, suffix); } else { *inst = ppc_inst(val); @@ -51,7 +51,7 @@ int probe_user_read_inst(struct ppc_inst *inst, unsigned int val; int err; - err = probe_user_read(&val, nip, sizeof(val)); + err = copy_from_user_nofault(&val, nip, sizeof(val)); if (!err) *inst = ppc_inst(val); diff --git a/arch/powerpc/oprofile/backtrace.c b/arch/powerpc/oprofile/backtrace.c index 6f347fa29f41..9db7ada79d10 100644 --- a/arch/powerpc/oprofile/backtrace.c +++ b/arch/powerpc/oprofile/backtrace.c @@ -33,7 +33,8 @@ static unsigned int user_getsp32(unsigned int sp, int is_first) * which means that we've done all that we can do from * interrupt context. */ - if (probe_user_read(stack_frame, (void __user *)p, sizeof(stack_frame))) + if (copy_from_user_nofault(stack_frame, (void __user *)p, + sizeof(stack_frame))) return 0; if (!is_first) @@ -51,7 +52,8 @@ static unsigned long user_getsp64(unsigned long sp, int is_first) { unsigned long stack_frame[3]; - if (probe_user_read(stack_frame, (void __user *)sp, sizeof(stack_frame))) + if (copy_from_user_nofault(stack_frame, (void __user *)sp, + sizeof(stack_frame))) return 0; if (!is_first) diff --git a/arch/powerpc/perf/callchain_32.c b/arch/powerpc/perf/callchain_32.c index f7d888d39cd3..542e68b8eae0 100644 --- a/arch/powerpc/perf/callchain_32.c +++ b/arch/powerpc/perf/callchain_32.c @@ -44,7 +44,7 @@ static int read_user_stack_32(unsigned int __user *ptr, unsigned int *ret) ((unsigned long)ptr & 3)) return -EFAULT; - rc = probe_user_read(ret, ptr, sizeof(*ret)); + rc = copy_from_user_nofault(ret, ptr, sizeof(*ret)); if (IS_ENABLED(CONFIG_PPC64) && rc) return read_user_stack_slow(ptr, ret, 4); diff --git a/arch/powerpc/perf/callchain_64.c b/arch/powerpc/perf/callchain_64.c index 814d1c2c2b9c..fa2a1b83b9b0 100644 --- a/arch/powerpc/perf/callchain_64.c +++ b/arch/powerpc/perf/callchain_64.c @@ -50,7 +50,7 @@ static int read_user_stack_64(unsigned long __user *ptr, unsigned long *ret) ((unsigned long)ptr & 7)) return -EFAULT; - if (!probe_user_read(ret, ptr, sizeof(*ret))) + if (!copy_from_user_nofault(ret, ptr, sizeof(*ret))) return 0; return read_user_stack_slow(ptr, ret, 8); diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index efe97ff82557..cd6a742ac6ef 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c @@ -426,7 +426,8 @@ static __u64 power_pmu_bhrb_to(u64 addr) } /* Userspace: need copy instruction here then translate it */ - if (probe_user_read(&instr, (unsigned int __user *)addr, sizeof(instr))) + if (copy_from_user_nofault(&instr, (unsigned int __user *)addr, + sizeof(instr))) return 0; target = branch_target((struct ppc_inst *)&instr); diff --git a/arch/powerpc/sysdev/fsl_pci.c b/arch/powerpc/sysdev/fsl_pci.c index 4a8874bc1057..73fa37ca40ef 100644 --- a/arch/powerpc/sysdev/fsl_pci.c +++ b/arch/powerpc/sysdev/fsl_pci.c @@ -1066,8 +1066,8 @@ int fsl_pci_mcheck_exception(struct pt_regs *regs) if (is_in_pci_mem_space(addr)) { if (user_mode(regs)) - ret = probe_user_read(&inst, (void __user *)regs->nip, - sizeof(inst)); + ret = copy_from_user_nofault(&inst, + (void __user *)regs->nip, sizeof(inst)); else ret = probe_kernel_address((void *)regs->nip, inst); diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h index 70a3d9cd9113..bef48da242cc 100644 --- a/include/linux/uaccess.h +++ b/include/linux/uaccess.h @@ -306,8 +306,8 @@ bool copy_from_kernel_nofault_allowed(const void *unsafe_src, size_t size); long copy_from_kernel_nofault(void *dst, const void *src, size_t size); long notrace copy_to_kernel_nofault(void *dst, const void *src, size_t size); -extern long probe_user_read(void *dst, const void __user *src, size_t size); -extern long notrace probe_user_write(void __user *dst, const void *src, +long copy_from_user_nofault(void *dst, const void __user *src, size_t size); +long notrace copy_to_user_nofault(void __user *dst, const void *src, size_t size); long strncpy_from_kernel_nofault(char *dst, const void *unsafe_addr, diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 204afc12425f..dc05626979b8 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -141,7 +141,7 @@ bpf_probe_read_user_common(void *dst, u32 size, const void __user *unsafe_ptr) { int ret; - ret = probe_user_read(dst, unsafe_ptr, size); + ret = copy_from_user_nofault(dst, unsafe_ptr, size); if (unlikely(ret < 0)) memset(dst, 0, size); return ret; @@ -326,7 +326,7 @@ BPF_CALL_3(bpf_probe_write_user, void __user *, unsafe_ptr, const void *, src, if (unlikely(!nmi_uaccess_okay())) return -EPERM; - return probe_user_write(unsafe_ptr, src, size); + return copy_to_user_nofault(unsafe_ptr, src, size); } static const struct bpf_func_proto bpf_probe_write_user_proto = { diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 841c74863ff8..aefb6065b508 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -1290,7 +1290,7 @@ probe_mem_read_user(void *dest, void *src, size_t size) { const void __user *uaddr = (__force const void __user *)src; - return probe_user_read(dest, uaddr, size); + return copy_from_user_nofault(dest, uaddr, size); } static nokprobe_inline int diff --git a/mm/maccess.c b/mm/maccess.c index cc5d8c6233c0..f98ff91e32c6 100644 --- a/mm/maccess.c +++ b/mm/maccess.c @@ -194,7 +194,7 @@ long strncpy_from_kernel_nofault(char *dst, const void *unsafe_addr, long count) #endif /* HAVE_GET_KERNEL_NOFAULT */ /** - * probe_user_read(): safely attempt to read from a user-space location + * copy_from_user_nofault(): safely attempt to read from a user-space location * @dst: pointer to the buffer that shall take the data * @src: address to read from. This must be a user address. * @size: size of the data chunk @@ -202,7 +202,7 @@ long strncpy_from_kernel_nofault(char *dst, const void *unsafe_addr, long count) * Safely read from user address @src to the buffer at @dst. If a kernel fault * happens, handle that and return -EFAULT. */ -long probe_user_read(void *dst, const void __user *src, size_t size) +long copy_from_user_nofault(void *dst, const void __user *src, size_t size) { long ret = -EFAULT; mm_segment_t old_fs = get_fs(); @@ -219,10 +219,10 @@ long probe_user_read(void *dst, const void __user *src, size_t size) return -EFAULT; return 0; } -EXPORT_SYMBOL_GPL(probe_user_read); +EXPORT_SYMBOL_GPL(copy_from_user_nofault); /** - * probe_user_write(): safely attempt to write to a user-space location + * copy_to_user_nofault(): safely attempt to write to a user-space location * @dst: address to write to * @src: pointer to the data that shall be written * @size: size of the data chunk @@ -230,7 +230,7 @@ EXPORT_SYMBOL_GPL(probe_user_read); * Safely write to address @dst from the buffer at @src. If a kernel fault * happens, handle that and return -EFAULT. */ -long probe_user_write(void __user *dst, const void *src, size_t size) +long copy_to_user_nofault(void __user *dst, const void *src, size_t size) { long ret = -EFAULT; mm_segment_t old_fs = get_fs(); @@ -247,7 +247,7 @@ long probe_user_write(void __user *dst, const void *src, size_t size) return -EFAULT; return 0; } -EXPORT_SYMBOL_GPL(probe_user_write); +EXPORT_SYMBOL_GPL(copy_to_user_nofault); /** * strncpy_from_user_nofault: - Copy a NUL terminated string from unsafe user From 2d7d67920e5c8e0854df23ca77da2dd5880ce5dd Mon Sep 17 00:00:00 2001 From: Xiaoguang Wang Date: Tue, 16 Jun 2020 02:06:37 +0800 Subject: [PATCH 216/462] io_uring: don't fail links for EAGAIN error in IOPOLL mode In IOPOLL mode, for EAGAIN error, we'll try to submit io request again using io-wq, so don't fail rest of links if this io request has links. Cc: stable@vger.kernel.org Signed-off-by: Xiaoguang Wang Signed-off-by: Jens Axboe --- fs/io_uring.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index e17df662c191..eb3797714539 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -1988,7 +1988,7 @@ static void io_complete_rw_iopoll(struct kiocb *kiocb, long res, long res2) if (kiocb->ki_flags & IOCB_WRITE) kiocb_end_write(req); - if (res != req->result) + if (res != -EAGAIN && res != req->result) req_set_fail_links(req); req->result = res; if (res != -EAGAIN) From bbde017a32b32d2fa8d5fddca25fade20132abf8 Mon Sep 17 00:00:00 2001 From: Xiaoguang Wang Date: Tue, 16 Jun 2020 02:06:38 +0800 Subject: [PATCH 217/462] io_uring: add memory barrier to synchronize io_kiocb's result and iopoll_completed In io_complete_rw_iopoll(), stores to io_kiocb's result and iopoll completed are two independent store operations, to ensure that once iopoll_completed is ture and then req->result must been perceived by the cpu executing io_do_iopoll(), proper memory barrier should be used. And in io_do_iopoll(), we check whether req->result is EAGAIN, if it is, we'll need to issue this io request using io-wq again. In order to just issue a single smp_rmb() on the completion side, move the re-submit work to io_iopoll_complete(). Cc: stable@vger.kernel.org Signed-off-by: Xiaoguang Wang [axboe: don't set ->iopoll_completed for -EAGAIN retry] Signed-off-by: Jens Axboe --- fs/io_uring.c | 53 ++++++++++++++++++++++++++++----------------------- 1 file changed, 29 insertions(+), 24 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index eb3797714539..9d2ae9aa8b45 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -1742,6 +1742,18 @@ static int io_put_kbuf(struct io_kiocb *req) return cflags; } +static void io_iopoll_queue(struct list_head *again) +{ + struct io_kiocb *req; + + do { + req = list_first_entry(again, struct io_kiocb, list); + list_del(&req->list); + refcount_inc(&req->refs); + io_queue_async_work(req); + } while (!list_empty(again)); +} + /* * Find and free completed poll iocbs */ @@ -1750,12 +1762,21 @@ static void io_iopoll_complete(struct io_ring_ctx *ctx, unsigned int *nr_events, { struct req_batch rb; struct io_kiocb *req; + LIST_HEAD(again); + + /* order with ->result store in io_complete_rw_iopoll() */ + smp_rmb(); rb.to_free = rb.need_iter = 0; while (!list_empty(done)) { int cflags = 0; req = list_first_entry(done, struct io_kiocb, list); + if (READ_ONCE(req->result) == -EAGAIN) { + req->iopoll_completed = 0; + list_move_tail(&req->list, &again); + continue; + } list_del(&req->list); if (req->flags & REQ_F_BUFFER_SELECTED) @@ -1773,18 +1794,9 @@ static void io_iopoll_complete(struct io_ring_ctx *ctx, unsigned int *nr_events, if (ctx->flags & IORING_SETUP_SQPOLL) io_cqring_ev_posted(ctx); io_free_req_many(ctx, &rb); -} -static void io_iopoll_queue(struct list_head *again) -{ - struct io_kiocb *req; - - do { - req = list_first_entry(again, struct io_kiocb, list); - list_del(&req->list); - refcount_inc(&req->refs); - io_queue_async_work(req); - } while (!list_empty(again)); + if (!list_empty(&again)) + io_iopoll_queue(&again); } static int io_do_iopoll(struct io_ring_ctx *ctx, unsigned int *nr_events, @@ -1792,7 +1804,6 @@ static int io_do_iopoll(struct io_ring_ctx *ctx, unsigned int *nr_events, { struct io_kiocb *req, *tmp; LIST_HEAD(done); - LIST_HEAD(again); bool spin; int ret; @@ -1818,13 +1829,6 @@ static int io_do_iopoll(struct io_ring_ctx *ctx, unsigned int *nr_events, if (!list_empty(&done)) break; - if (req->result == -EAGAIN) { - list_move_tail(&req->list, &again); - continue; - } - if (!list_empty(&again)) - break; - ret = kiocb->ki_filp->f_op->iopoll(kiocb, spin); if (ret < 0) break; @@ -1837,9 +1841,6 @@ static int io_do_iopoll(struct io_ring_ctx *ctx, unsigned int *nr_events, if (!list_empty(&done)) io_iopoll_complete(ctx, nr_events, &done); - if (!list_empty(&again)) - io_iopoll_queue(&again); - return ret; } @@ -1990,9 +1991,13 @@ static void io_complete_rw_iopoll(struct kiocb *kiocb, long res, long res2) if (res != -EAGAIN && res != req->result) req_set_fail_links(req); - req->result = res; - if (res != -EAGAIN) + + WRITE_ONCE(req->result, res); + /* order with io_poll_complete() checking ->result */ + if (res != -EAGAIN) { + smp_wmb(); WRITE_ONCE(req->iopoll_completed, 1); + } } /* From 9d8426a09195e2dcf2aa249de2aaadd792d491c7 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 16 Jun 2020 18:42:49 -0600 Subject: [PATCH 218/462] io_uring: acquire 'mm' for task_work for SQPOLL If we're unlucky with timing, we could be running task_work after having dropped the memory context in the sq thread. Since dropping the context requires a runnable task state, we cannot reliably drop it as part of our check-for-work loop in io_sq_thread(). Instead, abstract out the mm acquire for the sq thread into a helper, and call it from the async task work handler. Cc: stable@vger.kernel.org # v5.7 Signed-off-by: Jens Axboe --- fs/io_uring.c | 44 +++++++++++++++++++++++++++++--------------- 1 file changed, 29 insertions(+), 15 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 9d2ae9aa8b45..98c83fbf4f88 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -4256,6 +4256,28 @@ static void io_async_queue_proc(struct file *file, struct wait_queue_head *head, __io_queue_proc(&pt->req->apoll->poll, pt, head); } +static void io_sq_thread_drop_mm(struct io_ring_ctx *ctx) +{ + struct mm_struct *mm = current->mm; + + if (mm) { + kthread_unuse_mm(mm); + mmput(mm); + } +} + +static int io_sq_thread_acquire_mm(struct io_ring_ctx *ctx, + struct io_kiocb *req) +{ + if (io_op_defs[req->opcode].needs_mm && !current->mm) { + if (unlikely(!mmget_not_zero(ctx->sqo_mm))) + return -EFAULT; + kthread_use_mm(ctx->sqo_mm); + } + + return 0; +} + static void io_async_task_func(struct callback_head *cb) { struct io_kiocb *req = container_of(cb, struct io_kiocb, task_work); @@ -4290,11 +4312,16 @@ static void io_async_task_func(struct callback_head *cb) if (!canceled) { __set_current_state(TASK_RUNNING); + if (io_sq_thread_acquire_mm(ctx, req)) { + io_cqring_add_event(req, -EFAULT); + goto end_req; + } mutex_lock(&ctx->uring_lock); __io_queue_sqe(req, NULL); mutex_unlock(&ctx->uring_lock); } else { io_cqring_ev_posted(ctx); +end_req: req_set_fail_links(req); io_double_put_req(req); } @@ -5841,11 +5868,8 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req, if (unlikely(req->opcode >= IORING_OP_LAST)) return -EINVAL; - if (io_op_defs[req->opcode].needs_mm && !current->mm) { - if (unlikely(!mmget_not_zero(ctx->sqo_mm))) - return -EFAULT; - kthread_use_mm(ctx->sqo_mm); - } + if (unlikely(io_sq_thread_acquire_mm(ctx, req))) + return -EFAULT; sqe_flags = READ_ONCE(sqe->flags); /* enforce forwards compatibility on users */ @@ -5954,16 +5978,6 @@ fail_req: return submitted; } -static inline void io_sq_thread_drop_mm(struct io_ring_ctx *ctx) -{ - struct mm_struct *mm = current->mm; - - if (mm) { - kthread_unuse_mm(mm); - mmput(mm); - } -} - static int io_sq_thread(void *data) { struct io_ring_ctx *ctx = data; From 4228668eb936357657046b486207b167caea5175 Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Wed, 17 Jun 2020 11:47:53 -0500 Subject: [PATCH 219/462] ASoC: Intel: SOF: merge COMETLAKE_LP and COMETLAKE_H We already have two configurations for CometLake, and a third one coming. On other platforms, we used a single Kconfig option, so we should follow the same trend by merging the two cases in a backwards compatible way. The backwards compatibility is handled by overloading the COMETLAKE_LP kconfig as COMETLAKE. In practice we've never seen a case where COMETLAKE_H is not selected along with COMETLAKE_LP, so keeping one of the two is enough. Signed-off-by: Pierre-Louis Bossart Reviewed-by: Guennadi Liakhovetski Reviewed-by: Kai Vehmanen Link: https://lore.kernel.org/r/20200617164755.18104-2-pierre-louis.bossart@linux.intel.com Signed-off-by: Mark Brown --- sound/hda/intel-dsp-config.c | 4 +--- sound/soc/intel/boards/Kconfig | 4 ++-- sound/soc/sof/intel/Kconfig | 29 ++++++++--------------------- sound/soc/sof/sof-pci-dev.c | 12 ++++-------- 4 files changed, 15 insertions(+), 34 deletions(-) diff --git a/sound/hda/intel-dsp-config.c b/sound/hda/intel-dsp-config.c index be1df80ed013..5df0d2253844 100644 --- a/sound/hda/intel-dsp-config.c +++ b/sound/hda/intel-dsp-config.c @@ -192,8 +192,8 @@ static const struct config_entry config_table[] = { }, #endif +#if IS_ENABLED(CONFIG_SND_SOC_SOF_COMETLAKE) /* Cometlake-LP */ -#if IS_ENABLED(CONFIG_SND_SOC_SOF_COMETLAKE_LP) { .flags = FLAG_SOF, .device = 0x02c8, @@ -211,9 +211,7 @@ static const struct config_entry config_table[] = { .flags = FLAG_SOF | FLAG_SOF_ONLY_IF_DMIC, .device = 0x02c8, }, -#endif /* Cometlake-H */ -#if IS_ENABLED(CONFIG_SND_SOC_SOF_COMETLAKE_H) { .flags = FLAG_SOF | FLAG_SOF_ONLY_IF_DMIC, .device = 0x06c8, diff --git a/sound/soc/intel/boards/Kconfig b/sound/soc/intel/boards/Kconfig index a2a5798c9139..5dc489a79454 100644 --- a/sound/soc/intel/boards/Kconfig +++ b/sound/soc/intel/boards/Kconfig @@ -492,7 +492,7 @@ config SND_SOC_INTEL_SOF_PCM512x_MACH endif ## SND_SOC_SOF_HDA_LINK || SND_SOC_SOF_BAYTRAIL -if (SND_SOC_SOF_COMETLAKE_LP && SND_SOC_SOF_HDA_LINK) +if (SND_SOC_SOF_COMETLAKE && SND_SOC_SOF_HDA_LINK) config SND_SOC_INTEL_CML_LP_DA7219_MAX98357A_MACH tristate "CML_LP with DA7219 and MAX98357A in I2S Mode" @@ -520,7 +520,7 @@ config SND_SOC_INTEL_SOF_CML_RT1011_RT5682_MACH Say Y if you have such a device. If unsure select "N". -endif ## SND_SOC_SOF_COMETLAKE_LP && SND_SOC_SOF_HDA_LINK +endif ## SND_SOC_SOF_COMETLAKE && SND_SOC_SOF_HDA_LINK if SND_SOC_SOF_JASPERLAKE diff --git a/sound/soc/sof/intel/Kconfig b/sound/soc/sof/intel/Kconfig index c9a2bee4b55c..3aaf25e4f766 100644 --- a/sound/soc/sof/intel/Kconfig +++ b/sound/soc/sof/intel/Kconfig @@ -25,8 +25,7 @@ config SND_SOC_SOF_INTEL_PCI select SND_SOC_SOF_CANNONLAKE if SND_SOC_SOF_CANNONLAKE_SUPPORT select SND_SOC_SOF_COFFEELAKE if SND_SOC_SOF_COFFEELAKE_SUPPORT select SND_SOC_SOF_ICELAKE if SND_SOC_SOF_ICELAKE_SUPPORT - select SND_SOC_SOF_COMETLAKE_LP if SND_SOC_SOF_COMETLAKE_LP_SUPPORT - select SND_SOC_SOF_COMETLAKE_H if SND_SOC_SOF_COMETLAKE_H_SUPPORT + select SND_SOC_SOF_COMETLAKE if SND_SOC_SOF_COMETLAKE_SUPPORT select SND_SOC_SOF_TIGERLAKE if SND_SOC_SOF_TIGERLAKE_SUPPORT select SND_SOC_SOF_ELKHARTLAKE if SND_SOC_SOF_ELKHARTLAKE_SUPPORT select SND_SOC_SOF_JASPERLAKE if SND_SOC_SOF_JASPERLAKE_SUPPORT @@ -201,34 +200,22 @@ config SND_SOC_SOF_ICELAKE This option is not user-selectable but automagically handled by 'select' statements at a higher level -config SND_SOC_SOF_COMETLAKE_LP +config SND_SOC_SOF_COMETLAKE tristate select SND_SOC_SOF_HDA_COMMON help This option is not user-selectable but automagically handled by 'select' statements at a higher level +config SND_SOC_SOF_COMETLAKE_SUPPORT + bool + config SND_SOC_SOF_COMETLAKE_LP_SUPPORT - bool "SOF support for CometLake-LP" + bool "SOF support for CometLake" + select SND_SOC_SOF_COMETLAKE_SUPPORT help This adds support for Sound Open Firmware for Intel(R) platforms - using the Cometlake-LP processors. - Say Y if you have such a device. - If unsure select "N". - -config SND_SOC_SOF_COMETLAKE_H - tristate - select SND_SOC_SOF_HDA_COMMON - help - This option is not user-selectable but automagically handled by - 'select' statements at a higher level - -config SND_SOC_SOF_COMETLAKE_H_SUPPORT - bool "SOF support for CometLake-H" - help - This adds support for Sound Open Firmware for Intel(R) platforms - using the Cometlake-H processors. - Say Y if you have such a device. + using the Cometlake processors. If unsure select "N". config SND_SOC_SOF_TIGERLAKE_SUPPORT diff --git a/sound/soc/sof/sof-pci-dev.c b/sound/soc/sof/sof-pci-dev.c index b13697dab7c0..7b5f6e17b05f 100644 --- a/sound/soc/sof/sof-pci-dev.c +++ b/sound/soc/sof/sof-pci-dev.c @@ -151,9 +151,7 @@ static const struct sof_dev_desc cfl_desc = { }; #endif -#if IS_ENABLED(CONFIG_SND_SOC_SOF_COMETLAKE_LP) || \ - IS_ENABLED(CONFIG_SND_SOC_SOF_COMETLAKE_H) - +#if IS_ENABLED(CONFIG_SND_SOC_SOF_COMETLAKE) static const struct sof_dev_desc cml_desc = { .machines = snd_soc_acpi_intel_cml_machines, .alt_machines = snd_soc_acpi_intel_cml_sdw_machines, @@ -420,12 +418,10 @@ static const struct pci_device_id sof_pci_ids[] = { { PCI_DEVICE(0x8086, 0x4dc8), .driver_data = (unsigned long)&jsl_desc}, #endif -#if IS_ENABLED(CONFIG_SND_SOC_SOF_COMETLAKE_LP) - { PCI_DEVICE(0x8086, 0x02c8), +#if IS_ENABLED(CONFIG_SND_SOC_SOF_COMETLAKE) + { PCI_DEVICE(0x8086, 0x02c8), /* CML-LP */ .driver_data = (unsigned long)&cml_desc}, -#endif -#if IS_ENABLED(CONFIG_SND_SOC_SOF_COMETLAKE_H) - { PCI_DEVICE(0x8086, 0x06c8), + { PCI_DEVICE(0x8086, 0x06c8), /* CML-H */ .driver_data = (unsigned long)&cml_desc}, #endif #if IS_ENABLED(CONFIG_SND_SOC_SOF_TIGERLAKE) From 258fb4f4c34a0db9d3834aba6784d7b322176bb9 Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Wed, 17 Jun 2020 11:47:54 -0500 Subject: [PATCH 220/462] ASoC: SOF: Intel: add PCI ID for CometLake-S Mirror ID added for legacy HDaudio Signed-off-by: Pierre-Louis Bossart Reviewed-by: Guennadi Liakhovetski Reviewed-by: Kai Vehmanen Link: https://lore.kernel.org/r/20200617164755.18104-3-pierre-louis.bossart@linux.intel.com Signed-off-by: Mark Brown --- sound/soc/sof/sof-pci-dev.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/soc/sof/sof-pci-dev.c b/sound/soc/sof/sof-pci-dev.c index 7b5f6e17b05f..f3cb2db67130 100644 --- a/sound/soc/sof/sof-pci-dev.c +++ b/sound/soc/sof/sof-pci-dev.c @@ -423,6 +423,8 @@ static const struct pci_device_id sof_pci_ids[] = { .driver_data = (unsigned long)&cml_desc}, { PCI_DEVICE(0x8086, 0x06c8), /* CML-H */ .driver_data = (unsigned long)&cml_desc}, + { PCI_DEVICE(0x8086, 0xa3f0), /* CML-S */ + .driver_data = (unsigned long)&cml_desc}, #endif #if IS_ENABLED(CONFIG_SND_SOC_SOF_TIGERLAKE) { PCI_DEVICE(0x8086, 0xa0c8), From c8d2e2bfaeffa0f914330e8b4e45b986c8d30b58 Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Wed, 17 Jun 2020 11:47:55 -0500 Subject: [PATCH 221/462] ASoC: SOF: Intel: add PCI IDs for ICL-H and TGL-H Usually the DSP is not traditionally enabled on H skews but this might be used moving forward. Signed-off-by: Pierre-Louis Bossart Reviewed-by: Guennadi Liakhovetski Reviewed-by: Kai Vehmanen Link: https://lore.kernel.org/r/20200617164755.18104-4-pierre-louis.bossart@linux.intel.com Signed-off-by: Mark Brown --- sound/soc/sof/sof-pci-dev.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/sound/soc/sof/sof-pci-dev.c b/sound/soc/sof/sof-pci-dev.c index f3cb2db67130..aa3532ba1434 100644 --- a/sound/soc/sof/sof-pci-dev.c +++ b/sound/soc/sof/sof-pci-dev.c @@ -409,8 +409,11 @@ static const struct pci_device_id sof_pci_ids[] = { .driver_data = (unsigned long)&cfl_desc}, #endif #if IS_ENABLED(CONFIG_SND_SOC_SOF_ICELAKE) - { PCI_DEVICE(0x8086, 0x34C8), + { PCI_DEVICE(0x8086, 0x34C8), /* ICL-LP */ .driver_data = (unsigned long)&icl_desc}, + { PCI_DEVICE(0x8086, 0x3dc8), /* ICL-H */ + .driver_data = (unsigned long)&icl_desc}, + #endif #if IS_ENABLED(CONFIG_SND_SOC_SOF_JASPERLAKE) { PCI_DEVICE(0x8086, 0x38c8), @@ -427,8 +430,11 @@ static const struct pci_device_id sof_pci_ids[] = { .driver_data = (unsigned long)&cml_desc}, #endif #if IS_ENABLED(CONFIG_SND_SOC_SOF_TIGERLAKE) - { PCI_DEVICE(0x8086, 0xa0c8), + { PCI_DEVICE(0x8086, 0xa0c8), /* TGL-LP */ .driver_data = (unsigned long)&tgl_desc}, + { PCI_DEVICE(0x8086, 0x43c8), /* TGL-H */ + .driver_data = (unsigned long)&tgl_desc}, + #endif #if IS_ENABLED(CONFIG_SND_SOC_SOF_ELKHARTLAKE) { PCI_DEVICE(0x8086, 0x4b55), From a94eaccefea1186947c5c5451fcae2245dd7e714 Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Wed, 17 Jun 2020 11:41:44 -0500 Subject: [PATCH 222/462] ASoC: hdac_hda: fix memleak with regmap not freed on remove kmemleak throws error reports on module load/unload tests, add snd_hdac_regmap_exit() in .remove(). While we are at it, also fix the error handling flow in .probe() to use snd_hdac_regmap_exit() if needed. Signed-off-by: Pierre-Louis Bossart Reviewed-by: Daniel Baluta Reviewed-by: Kai Vehmanen Reviewed-by: Rander Wang Reviewed-by: Guennadi Liakhovetski Reviewed-by: Bard Liao Link: https://lore.kernel.org/r/20200617164144.17859-1-pierre-louis.bossart@linux.intel.com Signed-off-by: Mark Brown --- sound/soc/codecs/hdac_hda.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/sound/soc/codecs/hdac_hda.c b/sound/soc/codecs/hdac_hda.c index de003acb1951..473efe9ef998 100644 --- a/sound/soc/codecs/hdac_hda.c +++ b/sound/soc/codecs/hdac_hda.c @@ -441,13 +441,13 @@ static int hdac_hda_codec_probe(struct snd_soc_component *component) ret = snd_hda_codec_set_name(hcodec, hcodec->preset->name); if (ret < 0) { dev_err(&hdev->dev, "name failed %s\n", hcodec->preset->name); - goto error; + goto error_pm; } ret = snd_hdac_regmap_init(&hcodec->core); if (ret < 0) { dev_err(&hdev->dev, "regmap init failed\n"); - goto error; + goto error_pm; } patch = (hda_codec_patch_t)hcodec->preset->driver_data; @@ -455,7 +455,7 @@ static int hdac_hda_codec_probe(struct snd_soc_component *component) ret = patch(hcodec); if (ret < 0) { dev_err(&hdev->dev, "patch failed %d\n", ret); - goto error; + goto error_regmap; } } else { dev_dbg(&hdev->dev, "no patch file found\n"); @@ -467,7 +467,7 @@ static int hdac_hda_codec_probe(struct snd_soc_component *component) ret = snd_hda_codec_parse_pcms(hcodec); if (ret < 0) { dev_err(&hdev->dev, "unable to map pcms to dai %d\n", ret); - goto error; + goto error_regmap; } /* HDMI controls need to be created in machine drivers */ @@ -476,7 +476,7 @@ static int hdac_hda_codec_probe(struct snd_soc_component *component) if (ret < 0) { dev_err(&hdev->dev, "unable to create controls %d\n", ret); - goto error; + goto error_regmap; } } @@ -496,7 +496,9 @@ static int hdac_hda_codec_probe(struct snd_soc_component *component) return 0; -error: +error_regmap: + snd_hdac_regmap_exit(hdev); +error_pm: pm_runtime_put(&hdev->dev); error_no_pm: snd_hdac_ext_bus_link_put(hdev->bus, hlink); @@ -518,6 +520,8 @@ static void hdac_hda_codec_remove(struct snd_soc_component *component) pm_runtime_disable(&hdev->dev); snd_hdac_ext_bus_link_put(hdev->bus, hlink); + + snd_hdac_regmap_exit(hdev); } static const struct snd_soc_dapm_route hdac_hda_dapm_routes[] = { From 3d6c6f20d961209972a075ddc50b9a7207e9796e Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Tue, 2 Jun 2020 14:23:32 +0200 Subject: [PATCH 223/462] s390/qdio: clean up usage of qdio_data This removes the last remaining accesses to ->qdio_data from internal code. Just pass the qdio_irq struct where needed instead. Signed-off-by: Julian Wiedmann Signed-off-by: Vasily Gorbik --- drivers/s390/cio/qdio_main.c | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/drivers/s390/cio/qdio_main.c b/drivers/s390/cio/qdio_main.c index 1aa94683d789..f42129c02927 100644 --- a/drivers/s390/cio/qdio_main.c +++ b/drivers/s390/cio/qdio_main.c @@ -916,10 +916,10 @@ static void qdio_int_handler_pci(struct qdio_irq *irq_ptr) } } -static void qdio_handle_activate_check(struct ccw_device *cdev, - unsigned long intparm, int cstat, int dstat) +static void qdio_handle_activate_check(struct qdio_irq *irq_ptr, + unsigned long intparm, int cstat, + int dstat) { - struct qdio_irq *irq_ptr = cdev->private->qdio_data; struct qdio_q *q; DBF_ERROR("%4x ACT CHECK", irq_ptr->schid.sch_no); @@ -946,11 +946,9 @@ no_handler: lgr_info_log(); } -static void qdio_establish_handle_irq(struct ccw_device *cdev, int cstat, +static void qdio_establish_handle_irq(struct qdio_irq *irq_ptr, int cstat, int dstat) { - struct qdio_irq *irq_ptr = cdev->private->qdio_data; - DBF_DEV_EVENT(DBF_INFO, irq_ptr, "qest irq"); if (cstat) @@ -997,7 +995,7 @@ void qdio_int_handler(struct ccw_device *cdev, unsigned long intparm, switch (irq_ptr->state) { case QDIO_IRQ_STATE_INACTIVE: - qdio_establish_handle_irq(cdev, cstat, dstat); + qdio_establish_handle_irq(irq_ptr, cstat, dstat); break; case QDIO_IRQ_STATE_CLEANUP: qdio_set_state(irq_ptr, QDIO_IRQ_STATE_INACTIVE); @@ -1009,7 +1007,7 @@ void qdio_int_handler(struct ccw_device *cdev, unsigned long intparm, return; } if (cstat || dstat) - qdio_handle_activate_check(cdev, intparm, cstat, + qdio_handle_activate_check(irq_ptr, intparm, cstat, dstat); break; case QDIO_IRQ_STATE_STOPPED: @@ -1513,12 +1511,11 @@ static int handle_outbound(struct qdio_q *q, unsigned int callflags, int do_QDIO(struct ccw_device *cdev, unsigned int callflags, int q_nr, unsigned int bufnr, unsigned int count) { - struct qdio_irq *irq_ptr; + struct qdio_irq *irq_ptr = cdev->private->qdio_data; if (bufnr >= QDIO_MAX_BUFFERS_PER_Q || count > QDIO_MAX_BUFFERS_PER_Q) return -EINVAL; - irq_ptr = cdev->private->qdio_data; if (!irq_ptr) return -ENODEV; From c920c545286270302d29fd63c05c3bd84828daee Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Tue, 2 Jun 2020 14:26:36 +0200 Subject: [PATCH 224/462] s390/qdio: warn about unexpected SLSB states The way we produce SBALs to the device (first update q->nr_buf_used, then update the SLSB) should ensure that we never see some of the SLSB states when scanning the queue for progress. So make some noise if we do, this implies a bug in our SBAL tracking. Also tweak the WARN msg to provide more information. Signed-off-by: Julian Wiedmann Signed-off-by: Vasily Gorbik --- drivers/s390/cio/qdio_main.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/drivers/s390/cio/qdio_main.c b/drivers/s390/cio/qdio_main.c index f42129c02927..0c919a11a46e 100644 --- a/drivers/s390/cio/qdio_main.c +++ b/drivers/s390/cio/qdio_main.c @@ -503,15 +503,18 @@ static int get_inbound_buffer_frontier(struct qdio_q *q, unsigned int start) account_sbals_error(q, count); return count; case SLSB_CU_INPUT_EMPTY: - case SLSB_P_INPUT_NOT_INIT: - case SLSB_P_INPUT_ACK: if (q->irq_ptr->perf_stat_enabled) q->q_stats.nr_sbal_nop++; DBF_DEV_EVENT(DBF_INFO, q->irq_ptr, "in nop:%1d %#02x", q->nr, start); return 0; + case SLSB_P_INPUT_NOT_INIT: + case SLSB_P_INPUT_ACK: + /* We should never see this state, throw a WARN: */ default: - WARN_ON_ONCE(1); + dev_WARN_ONCE(&q->irq_ptr->cdev->dev, 1, + "found state %#x at index %u on queue %u\n", + state, start, q->nr); return 0; } } @@ -716,11 +719,14 @@ static int get_outbound_buffer_frontier(struct qdio_q *q, unsigned int start) DBF_DEV_EVENT(DBF_INFO, q->irq_ptr, "out primed:%1d", q->nr); return 0; - case SLSB_P_OUTPUT_NOT_INIT: case SLSB_P_OUTPUT_HALTED: return 0; + case SLSB_P_OUTPUT_NOT_INIT: + /* We should never see this state, throw a WARN: */ default: - WARN_ON_ONCE(1); + dev_WARN_ONCE(&q->irq_ptr->cdev->dev, 1, + "found state %#x at index %u on queue %u\n", + state, start, q->nr); return 0; } } From b3583fca5fb654af2cfc1c08259abb9728272538 Mon Sep 17 00:00:00 2001 From: "Dmitry V. Levin" Date: Tue, 2 Jun 2020 21:00:51 +0300 Subject: [PATCH 225/462] s390: fix syscall_get_error for compat processes If both the tracer and the tracee are compat processes, and gprs[2] is assigned a value by __poke_user_compat, then the higher 32 bits of gprs[2] are cleared, IS_ERR_VALUE() always returns false, and syscall_get_error() always returns 0. Fix the implementation by sign-extending the value for compat processes the same way as x86 implementation does. The bug was exposed to user space by commit 201766a20e30f ("ptrace: add PTRACE_GET_SYSCALL_INFO request") and detected by strace test suite. This change fixes strace syscall tampering on s390. Link: https://lkml.kernel.org/r/20200602180051.GA2427@altlinux.org Fixes: 753c4dd6a2fa2 ("[S390] ptrace changes") Cc: Elvira Khabirova Cc: stable@vger.kernel.org # v2.6.28+ Signed-off-by: Dmitry V. Levin Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/syscall.h | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/arch/s390/include/asm/syscall.h b/arch/s390/include/asm/syscall.h index f073292e9fdb..d9d5de0f67ff 100644 --- a/arch/s390/include/asm/syscall.h +++ b/arch/s390/include/asm/syscall.h @@ -33,7 +33,17 @@ static inline void syscall_rollback(struct task_struct *task, static inline long syscall_get_error(struct task_struct *task, struct pt_regs *regs) { - return IS_ERR_VALUE(regs->gprs[2]) ? regs->gprs[2] : 0; + unsigned long error = regs->gprs[2]; +#ifdef CONFIG_COMPAT + if (test_tsk_thread_flag(task, TIF_31BIT)) { + /* + * Sign-extend the value so (int)-EFOO becomes (long)-EFOO + * and will match correctly in comparisons. + */ + error = (long)(int)error; + } +#endif + return IS_ERR_VALUE(error) ? error : 0; } static inline long syscall_get_return_value(struct task_struct *task, From 56952e91acc93ed624fe9da840900defb75f1323 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 17 Jun 2020 15:00:04 -0600 Subject: [PATCH 226/462] io_uring: reap poll completions while waiting for refs to drop on exit If we're doing polled IO and end up having requests being submitted async, then completions can come in while we're waiting for refs to drop. We need to reap these manually, as nobody else will be looking for them. Break the wait into 1/20th of a second time waits, and check for done poll completions if we time out. Otherwise we can have done poll completions sitting in ctx->poll_list, which needs us to reap them but we're just waiting for them. Cc: stable@vger.kernel.org Signed-off-by: Jens Axboe --- fs/io_uring.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 98c83fbf4f88..2038d52c5450 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -7363,7 +7363,17 @@ static void io_ring_exit_work(struct work_struct *work) if (ctx->rings) io_cqring_overflow_flush(ctx, true); - wait_for_completion(&ctx->ref_comp); + /* + * If we're doing polled IO and end up having requests being + * submitted async (out-of-line), then completions can come in while + * we're waiting for refs to drop. We need to reap these manually, + * as nobody else will be looking for them. + */ + while (!wait_for_completion_timeout(&ctx->ref_comp, HZ/20)) { + io_iopoll_reap_events(ctx); + if (ctx->rings) + io_cqring_overflow_flush(ctx, true); + } io_ring_ctx_free(ctx); } From 543094e19c82b5d171e139d09a1a3ea0a7361117 Mon Sep 17 00:00:00 2001 From: Vishal Verma Date: Wed, 20 May 2020 16:50:26 -0600 Subject: [PATCH 227/462] nvdimm/region: always show the 'align' attribute It is possible that a platform that is capable of 'namespace labels' comes up without the labels properly initialized. In this case, the region's 'align' attribute is hidden. Howerver, once the user does initialize he labels, the 'align' attribute still stays hidden, which is unexpected. The sysfs_update_group() API is meant to address this, and could be called during region probe, but it has entanglements with the device 'lockdep_mutex'. Therefore, simply make the 'align' attribute always visible. It doesn't matter what it says for label-less namespaces, since it is not possible to change their allocation anyway. Suggested-by: Dan Williams Signed-off-by: Vishal Verma Cc: Dan Williams Link: https://lore.kernel.org/r/20200520225026.29426-1-vishal.l.verma@intel.com Signed-off-by: Dan Williams --- drivers/nvdimm/region_devs.c | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) diff --git a/drivers/nvdimm/region_devs.c b/drivers/nvdimm/region_devs.c index ccbb5b43b8b2..4502f9c4708d 100644 --- a/drivers/nvdimm/region_devs.c +++ b/drivers/nvdimm/region_devs.c @@ -679,18 +679,8 @@ static umode_t region_visible(struct kobject *kobj, struct attribute *a, int n) return a->mode; } - if (a == &dev_attr_align.attr) { - int i; - - for (i = 0; i < nd_region->ndr_mappings; i++) { - struct nd_mapping *nd_mapping = &nd_region->mapping[i]; - struct nvdimm *nvdimm = nd_mapping->nvdimm; - - if (test_bit(NDD_LABELING, &nvdimm->flags)) - return a->mode; - } - return 0; - } + if (a == &dev_attr_align.attr) + return a->mode; if (a != &dev_attr_set_cookie.attr && a != &dev_attr_available_size.attr) From 026bb845b0fff6dec91fe24511dad7d3067dc3ed Mon Sep 17 00:00:00 2001 From: Kaitao Cheng Date: Fri, 29 May 2020 22:12:14 +0800 Subject: [PATCH 228/462] ftrace: Fix maybe-uninitialized compiler warning During build compiler reports some 'false positive' warnings about variables {'seq_ops', 'filtered_pids', 'other_pids'} may be used uninitialized. This patch silences these warnings. Also delete some useless spaces Link: https://lkml.kernel.org/r/20200529141214.37648-1-pilgrimtao@gmail.com Signed-off-by: Kaitao Cheng Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/ftrace.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index c163c3531faf..1903b80db6eb 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -2260,7 +2260,7 @@ ftrace_find_tramp_ops_next(struct dyn_ftrace *rec, if (hash_contains_ip(ip, op->func_hash)) return op; - } + } return NULL; } @@ -3599,7 +3599,7 @@ static int t_show(struct seq_file *m, void *v) if (direct) seq_printf(m, "\n\tdirect-->%pS", (void *)direct); } - } + } seq_putc(m, '\n'); @@ -7151,6 +7151,10 @@ static int pid_open(struct inode *inode, struct file *file, int type) case TRACE_NO_PIDS: seq_ops = &ftrace_no_pid_sops; break; + default: + trace_array_put(tr); + WARN_ON_ONCE(1); + return -EINVAL; } ret = seq_open(file, seq_ops); @@ -7229,6 +7233,10 @@ pid_write(struct file *filp, const char __user *ubuf, other_pids = rcu_dereference_protected(tr->function_pids, lockdep_is_held(&ftrace_lock)); break; + default: + ret = -EINVAL; + WARN_ON_ONCE(1); + goto out; } ret = trace_pid_write(filtered_pids, &pid_list, ubuf, cnt); From 8231b0b9c322c894594fb42eb0eb9f93544a6acc Mon Sep 17 00:00:00 2001 From: Tom Rix Date: Wed, 17 Jun 2020 05:40:28 -0700 Subject: [PATCH 229/462] selinux: fix undefined return of cond_evaluate_expr clang static analysis reports an undefined return security/selinux/ss/conditional.c:79:2: warning: Undefined or garbage value returned to caller [core.uninitialized.UndefReturn] return s[0]; ^~~~~~~~~~~ static int cond_evaluate_expr( ... { u32 i; int s[COND_EXPR_MAXDEPTH]; for (i = 0; i < expr->len; i++) ... return s[0]; When expr->len is 0, the loop which sets s[0] never runs. So return -1 if the loop never runs. Cc: stable@vger.kernel.org Signed-off-by: Tom Rix Acked-by: Stephen Smalley Signed-off-by: Paul Moore --- security/selinux/ss/conditional.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/security/selinux/ss/conditional.c b/security/selinux/ss/conditional.c index 4867dfc5337a..7a92b028f722 100644 --- a/security/selinux/ss/conditional.c +++ b/security/selinux/ss/conditional.c @@ -27,6 +27,9 @@ static int cond_evaluate_expr(struct policydb *p, struct cond_expr *expr) int s[COND_EXPR_MAXDEPTH]; int sp = -1; + if (expr->len == 0) + return -1; + for (i = 0; i < expr->len; i++) { struct cond_expr_node *node = &expr->nodes[i]; From 99c7b309472787026ce52fd2bc5d00630567a872 Mon Sep 17 00:00:00 2001 From: Lorenz Brun Date: Thu, 11 Jun 2020 22:11:21 +0200 Subject: [PATCH 230/462] drm/amdkfd: Use correct major in devcgroup check The existing code used the major version number of the DRM driver instead of the device major number of the DRM subsystem for validating access for a devices cgroup. This meant that accesses allowed by the devices cgroup weren't permitted and certain accesses denied by the devices cgroup were permitted (if they matched the wrong major device number). Signed-off-by: Lorenz Brun Fixes: 6b855f7b83d2f ("drm/amdkfd: Check against device cgroup") Reviewed-off-by: Felix Kuehling Signed-off-by: Felix Kuehling Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index cde5e4c7caa1..06c5fdb1e818 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -40,6 +40,7 @@ #include #include #include +#include #include #include @@ -1076,7 +1077,7 @@ static inline int kfd_devcgroup_check_permission(struct kfd_dev *kfd) #if defined(CONFIG_CGROUP_DEVICE) struct drm_device *ddev = kfd->ddev; - return devcgroup_check_permission(DEVCG_DEV_CHAR, ddev->driver->major, + return devcgroup_check_permission(DEVCG_DEV_CHAR, DRM_MAJOR, ddev->render->index, DEVCG_ACC_WRITE | DEVCG_ACC_READ); #else From 7386f5c9c8080525985780ed8c3d1475258a4e34 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 15 Jun 2020 14:29:55 -0400 Subject: [PATCH 231/462] drm/amdgpu/pm: update comment to clarify Overdrive interfaces Vega10 and previous asics use one interface, vega20 and newer use another. Reviewed-by: Evan Quan Acked-by: Nirmoy Das Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c index 775e389c9a13..b3d33307ff09 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c @@ -696,7 +696,7 @@ static ssize_t amdgpu_set_pp_table(struct device *dev, * default power levels, write "r" (reset) to the file to reset them. * * - * < For Vega20 > + * < For Vega20 and newer ASICs > * * Reading the file will display: * From da9cebe16930f0273278fe893f2809450c61ae41 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 15 Jun 2020 16:36:49 -0400 Subject: [PATCH 232/462] drm/amdgpu: fix documentation around busy_percentage Add rename the gpu busy percentage for consistency and add the mem busy percentage documentation. Reviewed-by: Evan Quan Reviewed-by: Nirmoy Das Signed-off-by: Alex Deucher --- Documentation/gpu/amdgpu.rst | 9 ++++++--- drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c | 2 +- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/Documentation/gpu/amdgpu.rst b/Documentation/gpu/amdgpu.rst index 4cc74325bf91..17112352f605 100644 --- a/Documentation/gpu/amdgpu.rst +++ b/Documentation/gpu/amdgpu.rst @@ -197,11 +197,14 @@ pp_power_profile_mode .. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c :doc: pp_power_profile_mode -busy_percent -~~~~~~~~~~~~ +*_busy_percent +~~~~~~~~~~~~~~ .. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c - :doc: busy_percent + :doc: gpu_busy_percent + +.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c + :doc: mem_busy_percent GPU Product Information ======================= diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c index b3d33307ff09..16596a9ccabe 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c @@ -1668,7 +1668,7 @@ static ssize_t amdgpu_set_pp_power_profile_mode(struct device *dev, } /** - * DOC: busy_percent + * DOC: gpu_busy_percent * * The amdgpu driver provides a sysfs API for reading how busy the GPU * is as a percentage. The file gpu_busy_percent is used for this. From aadf9dcef9d4cd68c73a4ab934f93319c4becc47 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 17 Jun 2020 22:50:33 +0100 Subject: [PATCH 233/462] rxrpc: Fix trace string The trace symbol printer (__print_symbolic()) ignores symbols that map to an empty string and prints the hex value instead. Fix the symbol for rxrpc_cong_no_change to " -" instead of "" to avoid this. Fixes: b54a134a7de4 ("rxrpc: Fix handling of enums-to-string translation in tracing") Signed-off-by: David Howells --- include/trace/events/rxrpc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index ba9efdc848f9..059b6e45a028 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -400,7 +400,7 @@ enum rxrpc_tx_point { EM(rxrpc_cong_begin_retransmission, " Retrans") \ EM(rxrpc_cong_cleared_nacks, " Cleared") \ EM(rxrpc_cong_new_low_nack, " NewLowN") \ - EM(rxrpc_cong_no_change, "") \ + EM(rxrpc_cong_no_change, " -") \ EM(rxrpc_cong_progress, " Progres") \ EM(rxrpc_cong_retransmit_again, " ReTxAgn") \ EM(rxrpc_cong_rtt_window_end, " RttWinE") \ From e869e7a17798d85829fa7d4f9bbe1eebd4b2d3f6 Mon Sep 17 00:00:00 2001 From: Jeremy Kerr Date: Mon, 15 Jun 2020 10:54:56 +0800 Subject: [PATCH 234/462] net: usb: ax88179_178a: fix packet alignment padding Using a AX88179 device (0b95:1790), I see two bytes of appended data on every RX packet. For example, this 48-byte ping, using 0xff as a payload byte: 04:20:22.528472 IP 192.168.1.1 > 192.168.1.2: ICMP echo request, id 2447, seq 1, length 64 0x0000: 000a cd35 ea50 000a cd35 ea4f 0800 4500 0x0010: 0054 c116 4000 4001 f63e c0a8 0101 c0a8 0x0020: 0102 0800 b633 098f 0001 87ea cd5e 0000 0x0030: 0000 dcf2 0600 0000 0000 ffff ffff ffff 0x0040: ffff ffff ffff ffff ffff ffff ffff ffff 0x0050: ffff ffff ffff ffff ffff ffff ffff ffff 0x0060: ffff 961f Those last two bytes - 96 1f - aren't part of the original packet. In the ax88179 RX path, the usbnet rx_fixup function trims a 2-byte 'alignment pseudo header' from the start of the packet, and sets the length from a per-packet field populated by hardware. It looks like that length field *includes* the 2-byte header; the current driver assumes that it's excluded. This change trims the 2-byte alignment header after we've set the packet length, so the resulting packet length is correct. While we're moving the comment around, this also fixes the spelling of 'pseudo'. Signed-off-by: Jeremy Kerr Signed-off-by: David S. Miller --- drivers/net/usb/ax88179_178a.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/net/usb/ax88179_178a.c b/drivers/net/usb/ax88179_178a.c index 950711448f39..a38e868e44d4 100644 --- a/drivers/net/usb/ax88179_178a.c +++ b/drivers/net/usb/ax88179_178a.c @@ -1491,10 +1491,10 @@ static int ax88179_rx_fixup(struct usbnet *dev, struct sk_buff *skb) } if (pkt_cnt == 0) { - /* Skip IP alignment psudo header */ - skb_pull(skb, 2); skb->len = pkt_len; - skb_set_tail_pointer(skb, pkt_len); + /* Skip IP alignment pseudo header */ + skb_pull(skb, 2); + skb_set_tail_pointer(skb, skb->len); skb->truesize = pkt_len + sizeof(struct sk_buff); ax88179_rx_checksum(skb, pkt_hdr); return 1; @@ -1503,8 +1503,9 @@ static int ax88179_rx_fixup(struct usbnet *dev, struct sk_buff *skb) ax_skb = skb_clone(skb, GFP_ATOMIC); if (ax_skb) { ax_skb->len = pkt_len; - ax_skb->data = skb->data + 2; - skb_set_tail_pointer(ax_skb, pkt_len); + /* Skip IP alignment pseudo header */ + skb_pull(ax_skb, 2); + skb_set_tail_pointer(ax_skb, ax_skb->len); ax_skb->truesize = pkt_len + sizeof(struct sk_buff); ax88179_rx_checksum(ax_skb, pkt_hdr); usbnet_skb_return(dev, ax_skb); From a2ad7c21ad8cf1ce4ad65e13df1c2a1c29b38ac5 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 17 Jun 2020 23:01:23 +0100 Subject: [PATCH 235/462] rxrpc: Fix handling of rwind from an ACK packet The handling of the receive window size (rwind) from a received ACK packet is not correct. The rxrpc_input_ackinfo() function currently checks the current Tx window size against the rwind from the ACK to see if it has changed, but then limits the rwind size before storing it in the tx_winsize member and, if it increased, wake up the transmitting process. This means that if rwind > RXRPC_RXTX_BUFF_SIZE - 1, this path will always be followed. Fix this by limiting rwind before we compare it to tx_winsize. The effect of this can be seen by enabling the rxrpc_rx_rwind_change tracepoint. Fixes: 702f2ac87a9a ("rxrpc: Wake up the transmitter if Rx window size increases on the peer") Signed-off-by: David Howells --- net/rxrpc/input.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index 299ac98e9754..767579328a06 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -722,13 +722,12 @@ static void rxrpc_input_ackinfo(struct rxrpc_call *call, struct sk_buff *skb, ntohl(ackinfo->rxMTU), ntohl(ackinfo->maxMTU), rwind, ntohl(ackinfo->jumbo_max)); + if (rwind > RXRPC_RXTX_BUFF_SIZE - 1) + rwind = RXRPC_RXTX_BUFF_SIZE - 1; if (call->tx_winsize != rwind) { - if (rwind > RXRPC_RXTX_BUFF_SIZE - 1) - rwind = RXRPC_RXTX_BUFF_SIZE - 1; if (rwind > call->tx_winsize) wake = true; - trace_rxrpc_rx_rwind_change(call, sp->hdr.serial, - ntohl(ackinfo->rwind), wake); + trace_rxrpc_rx_rwind_change(call, sp->hdr.serial, rwind, wake); call->tx_winsize = rwind; } From 02c28dffb13abbaaedece1e4a6493b48ad3f913a Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 17 Jun 2020 15:46:33 +0100 Subject: [PATCH 236/462] rxrpc: Fix afs large storage transmission performance drop Commit 2ad6691d988c, which moved the modification of the status annotation for a packet in the Tx buffer prior to the retransmission moved the state clearance, but managed to lose the bit that set it to UNACK. Consequently, if a retransmission occurs, the packet is accidentally changed to the ACK state (ie. 0) by masking it off, which means that the packet isn't counted towards the tally of newly-ACK'd packets if it gets hard-ACK'd. This then prevents the congestion control algorithm from recovering properly. Fix by reinstating the change of state to UNACK. Spotted by the generic/460 xfstest. Fixes: 2ad6691d988c ("rxrpc: Fix race between incoming ACK parser and retransmitter") Signed-off-by: David Howells --- net/rxrpc/call_event.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c index aa1c8eee6557..6be2672a65ea 100644 --- a/net/rxrpc/call_event.c +++ b/net/rxrpc/call_event.c @@ -253,7 +253,7 @@ static void rxrpc_resend(struct rxrpc_call *call, unsigned long now_j) * confuse things */ annotation &= ~RXRPC_TX_ANNO_MASK; - annotation |= RXRPC_TX_ANNO_RESENT; + annotation |= RXRPC_TX_ANNO_UNACK | RXRPC_TX_ANNO_RESENT; call->rxtx_annotations[ix] = annotation; skb = call->rxtx_buffer[ix]; From 3103b6feb4454646558eedc50ece728bc469f341 Mon Sep 17 00:00:00 2001 From: Shannon Nelson Date: Mon, 15 Jun 2020 18:14:59 -0700 Subject: [PATCH 237/462] ionic: no link check while resetting queues If the driver is busy resetting queues after a change in MTU or queue parameters, don't bother checking the link, wait until the next watchdog cycle. Fixes: 987c0871e8ae ("ionic: check for linkup in watchdog") Signed-off-by: Shannon Nelson Acked-by: Jonathan Toppins Signed-off-by: David S. Miller --- drivers/net/ethernet/pensando/ionic/ionic_lif.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.c b/drivers/net/ethernet/pensando/ionic/ionic_lif.c index 9d8c969f21cb..bfadc4934702 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_lif.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.c @@ -96,7 +96,8 @@ static void ionic_link_status_check(struct ionic_lif *lif) u16 link_status; bool link_up; - if (!test_bit(IONIC_LIF_F_LINK_CHECK_REQUESTED, lif->state)) + if (!test_bit(IONIC_LIF_F_LINK_CHECK_REQUESTED, lif->state) || + test_bit(IONIC_LIF_F_QUEUE_RESET, lif->state)) return; link_status = le16_to_cpu(lif->info->status.link_status); From ef7232da6bcd4294cbb2d424bc35885721570f01 Mon Sep 17 00:00:00 2001 From: Shannon Nelson Date: Tue, 16 Jun 2020 08:06:26 -0700 Subject: [PATCH 238/462] ionic: export features for vlans to use Set up vlan_features for use by any vlans above us. Fixes: beead698b173 ("ionic: Add the basic NDO callbacks for netdev support") Signed-off-by: Shannon Nelson Acked-by: Jonathan Toppins Signed-off-by: David S. Miller --- drivers/net/ethernet/pensando/ionic/ionic_lif.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.c b/drivers/net/ethernet/pensando/ionic/ionic_lif.c index bfadc4934702..8f29ef133743 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_lif.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.c @@ -1246,6 +1246,7 @@ static int ionic_init_nic_features(struct ionic_lif *lif) netdev->hw_features |= netdev->hw_enc_features; netdev->features |= netdev->hw_features; + netdev->vlan_features |= netdev->features & ~NETIF_F_VLAN_FEATURES; netdev->priv_flags |= IFF_UNICAST_FLT | IFF_LIVE_ADDR_CHANGE; From fe35ec58f0d339221643287bbb7cee15c93a5389 Mon Sep 17 00:00:00 2001 From: Weiping Zhang Date: Wed, 17 Jun 2020 14:18:37 +0800 Subject: [PATCH 239/462] block: update hctx map when use multiple maps There is an issue when tune the number for read and write queues, if the total queue count was not changed. The hctx->type cannot be updated, since __blk_mq_update_nr_hw_queues will return directly if the total queue count has not been changed. Reproduce: dmesg | grep "default/read/poll" [ 2.607459] nvme nvme0: 48/0/0 default/read/poll queues cat /sys/kernel/debug/block/nvme0n1/hctx*/type | sort | uniq -c 48 default tune the write queues to 24: echo 24 > /sys/module/nvme/parameters/write_queues echo 1 > /sys/block/nvme0n1/device/reset_controller dmesg | grep "default/read/poll" [ 433.547235] nvme nvme0: 24/24/0 default/read/poll queues cat /sys/kernel/debug/block/nvme0n1/hctx*/type | sort | uniq -c 48 default The driver's hardware queue mapping is not same as block layer. Signed-off-by: Weiping Zhang Reviewed-by: Ming Lei Signed-off-by: Jens Axboe --- block/blk-mq.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/block/blk-mq.c b/block/blk-mq.c index 4f57d27bfa73..a9aa6d1e44cf 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -3479,7 +3479,9 @@ static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, if (set->nr_maps == 1 && nr_hw_queues > nr_cpu_ids) nr_hw_queues = nr_cpu_ids; - if (nr_hw_queues < 1 || nr_hw_queues == set->nr_hw_queues) + if (nr_hw_queues < 1) + return; + if (set->nr_maps == 1 && nr_hw_queues == set->nr_hw_queues) return; list_for_each_entry(q, &set->tag_list, tag_set_list) From d50313a5a0d803bcf55121a2b82086633060d05e Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Wed, 17 Jun 2020 11:49:09 -0500 Subject: [PATCH 240/462] ALSA: hda: Intel: add missing PCI IDs for ICL-H, TGL-H and EKL Mirror PCI ids used for SOF. Signed-off-by: Pierre-Louis Bossart Reviewed-by: Guennadi Liakhovetski Reviewed-by: Kai Vehmanen Link: https://lore.kernel.org/r/20200617164909.18225-1-pierre-louis.bossart@linux.intel.com Signed-off-by: Takashi Iwai --- sound/pci/hda/hda_intel.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c index 41a03c61a74b..11ec5c56c80e 100644 --- a/sound/pci/hda/hda_intel.c +++ b/sound/pci/hda/hda_intel.c @@ -2470,6 +2470,9 @@ static const struct pci_device_id azx_ids[] = { /* Icelake */ { PCI_DEVICE(0x8086, 0x34c8), .driver_data = AZX_DRIVER_SKL | AZX_DCAPS_INTEL_SKYLAKE}, + /* Icelake-H */ + { PCI_DEVICE(0x8086, 0x3dc8), + .driver_data = AZX_DRIVER_SKL | AZX_DCAPS_INTEL_SKYLAKE}, /* Jasperlake */ { PCI_DEVICE(0x8086, 0x38c8), .driver_data = AZX_DRIVER_SKL | AZX_DCAPS_INTEL_SKYLAKE}, @@ -2478,9 +2481,14 @@ static const struct pci_device_id azx_ids[] = { /* Tigerlake */ { PCI_DEVICE(0x8086, 0xa0c8), .driver_data = AZX_DRIVER_SKL | AZX_DCAPS_INTEL_SKYLAKE}, + /* Tigerlake-H */ + { PCI_DEVICE(0x8086, 0x43c8), + .driver_data = AZX_DRIVER_SKL | AZX_DCAPS_INTEL_SKYLAKE}, /* Elkhart Lake */ { PCI_DEVICE(0x8086, 0x4b55), .driver_data = AZX_DRIVER_SKL | AZX_DCAPS_INTEL_SKYLAKE}, + { PCI_DEVICE(0x8086, 0x4b58), + .driver_data = AZX_DRIVER_SKL | AZX_DCAPS_INTEL_SKYLAKE}, /* Broxton-P(Apollolake) */ { PCI_DEVICE(0x8086, 0x5a98), .driver_data = AZX_DRIVER_SKL | AZX_DCAPS_INTEL_BROXTON }, From 618e07865b7453d02410c1f3407c2d78a670eabb Mon Sep 17 00:00:00 2001 From: Barry Song Date: Thu, 18 Jun 2020 09:58:28 +1200 Subject: [PATCH 241/462] arm64: mm: reserve hugetlb CMA after numa_init hugetlb_cma_reserve() is called at the wrong place. numa_init has not been done yet. so all reserved memory will be located at node0. Fixes: cf11e85fc08c ("mm: hugetlb: optionally allocate gigantic hugepages using cma") Signed-off-by: Barry Song Reviewed-by: Anshuman Khandual Acked-by: Roman Gushchin Cc: Matthias Brugger Cc: Will Deacon Link: https://lore.kernel.org/r/20200617215828.25296-1-song.bao.hua@hisilicon.com Signed-off-by: Will Deacon --- arch/arm64/mm/init.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index e631e6425165..1e93cfc7c47a 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -404,11 +404,6 @@ void __init arm64_memblock_init(void) high_memory = __va(memblock_end_of_DRAM() - 1) + 1; dma_contiguous_reserve(arm64_dma32_phys_limit); - -#ifdef CONFIG_ARM64_4K_PAGES - hugetlb_cma_reserve(PUD_SHIFT - PAGE_SHIFT); -#endif - } void __init bootmem_init(void) @@ -424,6 +419,16 @@ void __init bootmem_init(void) min_low_pfn = min; arm64_numa_init(); + + /* + * must be done after arm64_numa_init() which calls numa_init() to + * initialize node_online_map that gets used in hugetlb_cma_reserve() + * while allocating required CMA size across online nodes. + */ +#ifdef CONFIG_ARM64_4K_PAGES + hugetlb_cma_reserve(PUD_SHIFT - PAGE_SHIFT); +#endif + /* * Sparsemem tries to allocate bootmem in memory_present(), so must be * done after the fixed reservations. From bf508ec95ca3b902f14bb311a7709e5cb57fbc49 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Wed, 17 Jun 2020 16:34:07 -0500 Subject: [PATCH 242/462] arm64: kexec_file: Use struct_size() in kmalloc() Make use of the struct_size() helper instead of an open-coded version in order to avoid any potential type mistakes. This code was detected with the help of Coccinelle and, audited and fixed manually. Signed-off-by: Gustavo A. R. Silva Link: https://lore.kernel.org/r/20200617213407.GA1385@embeddedor Signed-off-by: Will Deacon --- arch/arm64/kernel/machine_kexec_file.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/arm64/kernel/machine_kexec_file.c b/arch/arm64/kernel/machine_kexec_file.c index 522e6f517ec0..361a1143e09e 100644 --- a/arch/arm64/kernel/machine_kexec_file.c +++ b/arch/arm64/kernel/machine_kexec_file.c @@ -219,8 +219,7 @@ static int prepare_elf_headers(void **addr, unsigned long *sz) MEMBLOCK_NONE, &start, &end, NULL) nr_ranges++; - cmem = kmalloc(sizeof(struct crash_mem) + - sizeof(struct crash_mem_range) * nr_ranges, GFP_KERNEL); + cmem = kmalloc(struct_size(cmem, ranges, nr_ranges), GFP_KERNEL); if (!cmem) return -ENOMEM; From 24ebec25fb270100e252b19c288e21bd7d8cc7f7 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 29 May 2020 14:12:18 +0100 Subject: [PATCH 243/462] arm64: hw_breakpoint: Don't invoke overflow handler on uaccess watchpoints Unprivileged memory accesses generated by the so-called "translated" instructions (e.g. STTR) at EL1 can cause EL0 watchpoints to fire unexpectedly if kernel debugging is enabled. In such cases, the hw_breakpoint logic will invoke the user overflow handler which will typically raise a SIGTRAP back to the current task. This is futile when returning back to the kernel because (a) the signal won't have been delivered and (b) userspace can't handle the thing anyway. Avoid invoking the user overflow handler for watchpoints triggered by kernel uaccess routines, and instead single-step over the faulting instruction as we would if no overflow handler had been installed. (Fixes tag identifies the introduction of unprivileged memory accesses, which exposed this latent bug in the hw_breakpoint code) Cc: Catalin Marinas Cc: James Morse Fixes: 57f4959bad0a ("arm64: kernel: Add support for User Access Override") Reported-by: Luis Machado Signed-off-by: Will Deacon --- arch/arm64/kernel/hw_breakpoint.c | 44 ++++++++++++++++++------------- 1 file changed, 26 insertions(+), 18 deletions(-) diff --git a/arch/arm64/kernel/hw_breakpoint.c b/arch/arm64/kernel/hw_breakpoint.c index 0b727edf4104..af234a1e08b7 100644 --- a/arch/arm64/kernel/hw_breakpoint.c +++ b/arch/arm64/kernel/hw_breakpoint.c @@ -730,6 +730,27 @@ static u64 get_distance_from_watchpoint(unsigned long addr, u64 val, return 0; } +static int watchpoint_report(struct perf_event *wp, unsigned long addr, + struct pt_regs *regs) +{ + int step = is_default_overflow_handler(wp); + struct arch_hw_breakpoint *info = counter_arch_bp(wp); + + info->trigger = addr; + + /* + * If we triggered a user watchpoint from a uaccess routine, then + * handle the stepping ourselves since userspace really can't help + * us with this. + */ + if (!user_mode(regs) && info->ctrl.privilege == AARCH64_BREAKPOINT_EL0) + step = 1; + else + perf_bp_event(wp, regs); + + return step; +} + static int watchpoint_handler(unsigned long addr, unsigned int esr, struct pt_regs *regs) { @@ -739,7 +760,6 @@ static int watchpoint_handler(unsigned long addr, unsigned int esr, u64 val; struct perf_event *wp, **slots; struct debug_info *debug_info; - struct arch_hw_breakpoint *info; struct arch_hw_breakpoint_ctrl ctrl; slots = this_cpu_ptr(wp_on_reg); @@ -777,25 +797,13 @@ static int watchpoint_handler(unsigned long addr, unsigned int esr, if (dist != 0) continue; - info = counter_arch_bp(wp); - info->trigger = addr; - perf_bp_event(wp, regs); - - /* Do we need to handle the stepping? */ - if (is_default_overflow_handler(wp)) - step = 1; + step = watchpoint_report(wp, addr, regs); } - if (min_dist > 0 && min_dist != -1) { - /* No exact match found. */ - wp = slots[closest_match]; - info = counter_arch_bp(wp); - info->trigger = addr; - perf_bp_event(wp, regs); - /* Do we need to handle the stepping? */ - if (is_default_overflow_handler(wp)) - step = 1; - } + /* No exact match found? */ + if (min_dist > 0 && min_dist != -1) + step = watchpoint_report(slots[closest_match], addr, regs); + rcu_read_unlock(); if (!step) From 9f7041b71a2f5defc1629368e7dbe83a9c6ea388 Mon Sep 17 00:00:00 2001 From: Ravulapati Vishnu vardhan rao Date: Thu, 18 Jun 2020 12:56:52 +0530 Subject: [PATCH 244/462] ASoC: amd: closing specific instance. The steps to reproduce: Record from the internal mic : (arecord -D hw:1,2 -f dat /dev/null -V stereos) Record from the headphone mic: (arecord -D hw:1,0 -f dat /dev/null -V stereos) Kill the recording from internal mic. We can see the recording from the headphone mic is broken. This patch rectifies the issue reported. Signed-off-by: Ravulapati Vishnu vardhan rao Link: https://lore.kernel.org/r/20200618072653.27103-1-Vishnuvardhanrao.Ravulapati@amd.com Signed-off-by: Mark Brown --- sound/soc/amd/raven/acp3x-pcm-dma.c | 30 ++++++++++++++++++++++------- 1 file changed, 23 insertions(+), 7 deletions(-) diff --git a/sound/soc/amd/raven/acp3x-pcm-dma.c b/sound/soc/amd/raven/acp3x-pcm-dma.c index d8f554f369a8..e6386de20ac7 100644 --- a/sound/soc/amd/raven/acp3x-pcm-dma.c +++ b/sound/soc/amd/raven/acp3x-pcm-dma.c @@ -342,11 +342,34 @@ static int acp3x_dma_close(struct snd_soc_component *component, { struct snd_soc_pcm_runtime *prtd; struct i2s_dev_data *adata; + struct i2s_stream_instance *ins; prtd = substream->private_data; component = snd_soc_rtdcom_lookup(prtd, DRV_NAME); adata = dev_get_drvdata(component->dev); + ins = substream->runtime->private_data; + if (!ins) + return -EINVAL; + if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) { + switch (ins->i2s_instance) { + case I2S_BT_INSTANCE: + adata->play_stream = NULL; + break; + case I2S_SP_INSTANCE: + default: + adata->i2ssp_play_stream = NULL; + } + } else { + switch (ins->i2s_instance) { + case I2S_BT_INSTANCE: + adata->capture_stream = NULL; + break; + case I2S_SP_INSTANCE: + default: + adata->i2ssp_capture_stream = NULL; + } + } /* Disable ACP irq, when the current stream is being closed and * another stream is also not active. @@ -354,13 +377,6 @@ static int acp3x_dma_close(struct snd_soc_component *component, if (!adata->play_stream && !adata->capture_stream && !adata->i2ssp_play_stream && !adata->i2ssp_capture_stream) rv_writel(0, adata->acp3x_base + mmACP_EXTERNAL_INTR_ENB); - if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) { - adata->play_stream = NULL; - adata->i2ssp_play_stream = NULL; - } else { - adata->capture_stream = NULL; - adata->i2ssp_capture_stream = NULL; - } return 0; } From 1ea7c546b8b3f27bf7da673c265b09c8f79d11bc Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Thu, 11 Jun 2020 16:00:45 +0300 Subject: [PATCH 245/462] RDMA/core: Annotate CMA unlock helper routine Fix the following sparse error by adding annotation to cm_queue_work_unlock() that it releases cm_id_priv->lock lock. drivers/infiniband/core/cm.c:936:24: warning: context imbalance in 'cm_queue_work_unlock' - unexpected unlock Fixes: e83f195aa45c ("RDMA/cm: Pull duplicated code into cm_queue_work_unlock()") Link: https://lore.kernel.org/r/20200611130045.1994026-1-leon@kernel.org Reported-by: kernel test robot Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/cm.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index 9ce787e37e22..0d1377232933 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -918,6 +918,7 @@ static void cm_free_work(struct cm_work *work) static void cm_queue_work_unlock(struct cm_id_private *cm_id_priv, struct cm_work *work) + __releases(&cm_id_priv->lock) { bool immediate; From 90a239ee25fa3a483facec3de7c144361a3d3a51 Mon Sep 17 00:00:00 2001 From: Aditya Pakki Date: Sat, 13 Jun 2020 23:11:48 -0500 Subject: [PATCH 246/462] RDMA/rvt: Fix potential memory leak caused by rvt_alloc_rq In case of failure of alloc_ud_wq_attr(), the memory allocated by rvt_alloc_rq() is not freed. Fix it by calling rvt_free_rq() using the existing clean-up code. Fixes: d310c4bf8aea ("IB/{rdmavt, hfi1, qib}: Remove AH refcount for UD QPs") Link: https://lore.kernel.org/r/20200614041148.131983-1-pakki001@umn.edu Signed-off-by: Aditya Pakki Acked-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rdmavt/qp.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index 511b72809e14..7db35dd6ad74 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -1204,7 +1204,7 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd, err = alloc_ud_wq_attr(qp, rdi->dparms.node); if (err) { ret = (ERR_PTR(err)); - goto bail_driver_priv; + goto bail_rq_rvt; } if (init_attr->create_flags & IB_QP_CREATE_NETDEV_USE) @@ -1314,9 +1314,11 @@ bail_qpn: rvt_free_qpn(&rdi->qp_dev->qpn_table, qp->ibqp.qp_num); bail_rq_wq: - rvt_free_rq(&qp->r_rq); free_ud_wq_attr(qp); +bail_rq_rvt: + rvt_free_rq(&qp->r_rq); + bail_driver_priv: rdi->driver_f.qp_priv_free(rdi, qp); From 0133654d8eb8607eacc96badfe49bf992155f4cb Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Sun, 14 Jun 2020 13:35:34 +0300 Subject: [PATCH 247/462] RDMA/efa: Set maximum pkeys device attribute The max_pkeys device attribute was not set in query device verb, set it to one in order to account for the default pkey (0xffff). This information is exposed to userspace and can cause malfunction Fixes: 40909f664d27 ("RDMA/efa: Add EFA verbs implementation") Link: https://lore.kernel.org/r/20200614103534.88060-1-galpress@amazon.com Reviewed-by: Firas JahJah Reviewed-by: Yossi Leybovich Signed-off-by: Gal Pressman Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/efa/efa_verbs.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/infiniband/hw/efa/efa_verbs.c b/drivers/infiniband/hw/efa/efa_verbs.c index 08313f7c73bc..7dd082441333 100644 --- a/drivers/infiniband/hw/efa/efa_verbs.c +++ b/drivers/infiniband/hw/efa/efa_verbs.c @@ -212,6 +212,7 @@ int efa_query_device(struct ib_device *ibdev, props->max_send_sge = dev_attr->max_sq_sge; props->max_recv_sge = dev_attr->max_rq_sge; props->max_sge_rd = dev_attr->max_wr_rdma_sge; + props->max_pkeys = 1; if (udata && udata->outlen) { resp.max_sq_sge = dev_attr->max_sq_sge; From 0dfbd5ecf28cbcb81674c49d34ee97366db1be44 Mon Sep 17 00:00:00 2001 From: Michal Kalderon Date: Tue, 16 Jun 2020 12:34:08 +0300 Subject: [PATCH 248/462] RDMA/qedr: Fix KASAN: use-after-free in ucma_event_handler+0x532 Private data passed to iwarp_cm_handler is copied for connection request / response, but ignored otherwise. If junk is passed, it is stored in the event and used later in the event processing. The driver passes an old junk pointer during connection close which leads to a use-after-free on event processing. Set private data to NULL for events that don 't have private data. BUG: KASAN: use-after-free in ucma_event_handler+0x532/0x560 [rdma_ucm] kernel: Read of size 4 at addr ffff8886caa71200 by task kworker/u128:1/5250 kernel: kernel: Workqueue: iw_cm_wq cm_work_handler [iw_cm] kernel: Call Trace: kernel: dump_stack+0x8c/0xc0 kernel: print_address_description.constprop.0+0x1b/0x210 kernel: ? ucma_event_handler+0x532/0x560 [rdma_ucm] kernel: ? ucma_event_handler+0x532/0x560 [rdma_ucm] kernel: __kasan_report.cold+0x1a/0x33 kernel: ? ucma_event_handler+0x532/0x560 [rdma_ucm] kernel: kasan_report+0xe/0x20 kernel: check_memory_region+0x130/0x1a0 kernel: memcpy+0x20/0x50 kernel: ucma_event_handler+0x532/0x560 [rdma_ucm] kernel: ? __rpc_execute+0x608/0x620 [sunrpc] kernel: cma_iw_handler+0x212/0x330 [rdma_cm] kernel: ? iw_conn_req_handler+0x6e0/0x6e0 [rdma_cm] kernel: ? enqueue_timer+0x86/0x140 kernel: ? _raw_write_lock_irq+0xd0/0xd0 kernel: cm_work_handler+0xd3d/0x1070 [iw_cm] Fixes: e411e0587e0d ("RDMA/qedr: Add iWARP connection management functions") Link: https://lore.kernel.org/r/20200616093408.17827-1-michal.kalderon@marvell.com Signed-off-by: Ariel Elior Signed-off-by: Michal Kalderon Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/qedr/qedr_iw_cm.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/qedr/qedr_iw_cm.c b/drivers/infiniband/hw/qedr/qedr_iw_cm.c index 792eecd206b6..97fc7dd353b0 100644 --- a/drivers/infiniband/hw/qedr/qedr_iw_cm.c +++ b/drivers/infiniband/hw/qedr/qedr_iw_cm.c @@ -150,8 +150,17 @@ qedr_iw_issue_event(void *context, if (params->cm_info) { event.ird = params->cm_info->ird; event.ord = params->cm_info->ord; - event.private_data_len = params->cm_info->private_data_len; - event.private_data = (void *)params->cm_info->private_data; + /* Only connect_request and reply have valid private data + * the rest of the events this may be left overs from + * connection establishment. CONNECT_REQUEST is issued via + * qedr_iw_mpa_request + */ + if (event_type == IW_CM_EVENT_CONNECT_REPLY) { + event.private_data_len = + params->cm_info->private_data_len; + event.private_data = + (void *)params->cm_info->private_data; + } } if (ep->cm_id) From 730c8912484186d4623d0c76509066d285c3a755 Mon Sep 17 00:00:00 2001 From: Mark Zhang Date: Tue, 16 Jun 2020 13:43:04 +0300 Subject: [PATCH 249/462] RDMA/cma: Protect bind_list and listen_list while finding matching cm id The bind_list and listen_list must be accessed under a lock, add the missing locking around the access in cm_ib_id_from_event() In addition add lockdep asserts to make it clearer what the locking semantic is here. general protection fault: 0000 [#1] SMP NOPTI CPU: 226 PID: 126135 Comm: kworker/226:1 Tainted: G OE 4.12.14-150.47-default #1 SLE15 Hardware name: Cray Inc. Windom/Windom, BIOS 0.8.7 01-10-2020 Workqueue: ib_cm cm_work_handler [ib_cm] task: ffff9c5a60a1d2c0 task.stack: ffffc1d91f554000 RIP: 0010:cma_ib_req_handler+0x3f1/0x11b0 [rdma_cm] RSP: 0018:ffffc1d91f557b40 EFLAGS: 00010286 RAX: deacffffffffff30 RBX: 0000000000000001 RCX: ffff9c2af5bb6000 RDX: 00000000000000a9 RSI: ffff9c5aa4ed2f10 RDI: ffffc1d91f557b08 RBP: ffffc1d91f557d90 R08: ffff9c340cc80000 R09: ffff9c2c0f901900 R10: 0000000000000000 R11: 0000000000000001 R12: deacffffffffff30 R13: ffff9c5a48aeec00 R14: ffffc1d91f557c30 R15: ffff9c5c2eea3688 FS: 0000000000000000(0000) GS:ffff9c5c2fa80000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00002b5cc03fa320 CR3: 0000003f8500a000 CR4: 00000000003406e0 Call Trace: ? rdma_addr_cancel+0xa0/0xa0 [ib_core] ? cm_process_work+0x28/0x140 [ib_cm] cm_process_work+0x28/0x140 [ib_cm] ? cm_get_bth_pkey.isra.44+0x34/0xa0 [ib_cm] cm_work_handler+0xa06/0x1a6f [ib_cm] ? __switch_to_asm+0x34/0x70 ? __switch_to_asm+0x34/0x70 ? __switch_to_asm+0x40/0x70 ? __switch_to_asm+0x34/0x70 ? __switch_to_asm+0x40/0x70 ? __switch_to_asm+0x34/0x70 ? __switch_to_asm+0x40/0x70 ? __switch_to+0x7c/0x4b0 ? __switch_to_asm+0x40/0x70 ? __switch_to_asm+0x34/0x70 process_one_work+0x1da/0x400 worker_thread+0x2b/0x3f0 ? process_one_work+0x400/0x400 kthread+0x118/0x140 ? kthread_create_on_node+0x40/0x40 ret_from_fork+0x22/0x40 Code: 00 66 83 f8 02 0f 84 ca 05 00 00 49 8b 84 24 d0 01 00 00 48 85 c0 0f 84 68 07 00 00 48 2d d0 01 00 00 49 89 c4 0f 84 59 07 00 00 <41> 0f b7 44 24 20 49 8b 77 50 66 83 f8 0a 75 9e 49 8b 7c 24 28 Fixes: 4c21b5bcef73 ("IB/cma: Add net_dev and private data checks to RDMA CM") Link: https://lore.kernel.org/r/20200616104304.2426081-1-leon@kernel.org Signed-off-by: Mark Zhang Reviewed-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/cma.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 3d7cc9f0f3d4..c30cf5307ce3 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -1624,6 +1624,8 @@ static struct rdma_id_private *cma_find_listener( { struct rdma_id_private *id_priv, *id_priv_dev; + lockdep_assert_held(&lock); + if (!bind_list) return ERR_PTR(-EINVAL); @@ -1670,6 +1672,7 @@ cma_ib_id_from_event(struct ib_cm_id *cm_id, } } + mutex_lock(&lock); /* * Net namespace might be getting deleted while route lookup, * cm_id lookup is in progress. Therefore, perform netdevice @@ -1711,6 +1714,7 @@ cma_ib_id_from_event(struct ib_cm_id *cm_id, id_priv = cma_find_listener(bind_list, cm_id, ib_event, req, *net_dev); err: rcu_read_unlock(); + mutex_unlock(&lock); if (IS_ERR(id_priv) && *net_dev) { dev_put(*net_dev); *net_dev = NULL; @@ -2492,6 +2496,8 @@ static void cma_listen_on_dev(struct rdma_id_private *id_priv, struct net *net = id_priv->id.route.addr.dev_addr.net; int ret; + lockdep_assert_held(&lock); + if (cma_family(id_priv) == AF_IB && !rdma_cap_ib_cm(cma_dev->device, 1)) return; @@ -3342,6 +3348,8 @@ static void cma_bind_port(struct rdma_bind_list *bind_list, u64 sid, mask; __be16 port; + lockdep_assert_held(&lock); + addr = cma_src_addr(id_priv); port = htons(bind_list->port); @@ -3370,6 +3378,8 @@ static int cma_alloc_port(enum rdma_ucm_port_space ps, struct rdma_bind_list *bind_list; int ret; + lockdep_assert_held(&lock); + bind_list = kzalloc(sizeof *bind_list, GFP_KERNEL); if (!bind_list) return -ENOMEM; @@ -3396,6 +3406,8 @@ static int cma_port_is_unique(struct rdma_bind_list *bind_list, struct sockaddr *saddr = cma_src_addr(id_priv); __be16 dport = cma_port(daddr); + lockdep_assert_held(&lock); + hlist_for_each_entry(cur_id, &bind_list->owners, node) { struct sockaddr *cur_daddr = cma_dst_addr(cur_id); struct sockaddr *cur_saddr = cma_src_addr(cur_id); @@ -3435,6 +3447,8 @@ static int cma_alloc_any_port(enum rdma_ucm_port_space ps, unsigned int rover; struct net *net = id_priv->id.route.addr.dev_addr.net; + lockdep_assert_held(&lock); + inet_get_local_port_range(net, &low, &high); remaining = (high - low) + 1; rover = prandom_u32() % remaining + low; @@ -3482,6 +3496,8 @@ static int cma_check_port(struct rdma_bind_list *bind_list, struct rdma_id_private *cur_id; struct sockaddr *addr, *cur_addr; + lockdep_assert_held(&lock); + addr = cma_src_addr(id_priv); hlist_for_each_entry(cur_id, &bind_list->owners, node) { if (id_priv == cur_id) @@ -3512,6 +3528,8 @@ static int cma_use_port(enum rdma_ucm_port_space ps, unsigned short snum; int ret; + lockdep_assert_held(&lock); + snum = ntohs(cma_port(cma_src_addr(id_priv))); if (snum < PROT_SOCK && !capable(CAP_NET_BIND_SERVICE)) return -EACCES; From ab183d460daac6292cb0cfd989d88b37b2437844 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 16 Jun 2020 13:45:36 +0300 Subject: [PATCH 250/462] RDMA/mlx5: Add missed RST2INIT and INIT2INIT steps during ECE handshake Missed steps during ECE handshake left userspace application with less options for the ECE handshake. Pass ECE options in the additional transitions. Fixes: 50aec2c3135e ("RDMA/mlx5: Return ECE data after modify QP") Link: https://lore.kernel.org/r/20200616104536.2426384-1-leon@kernel.org Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/qpc.c | 8 ++++++++ include/linux/mlx5/mlx5_ifc.h | 10 ++++++---- 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/qpc.c b/drivers/infiniband/hw/mlx5/qpc.c index c19d91d6dce8..7c3968ef9cd1 100644 --- a/drivers/infiniband/hw/mlx5/qpc.c +++ b/drivers/infiniband/hw/mlx5/qpc.c @@ -346,6 +346,9 @@ static int get_ece_from_mbox(void *out, u16 opcode) int ece = 0; switch (opcode) { + case MLX5_CMD_OP_INIT2INIT_QP: + ece = MLX5_GET(init2init_qp_out, out, ece); + break; case MLX5_CMD_OP_INIT2RTR_QP: ece = MLX5_GET(init2rtr_qp_out, out, ece); break; @@ -355,6 +358,9 @@ static int get_ece_from_mbox(void *out, u16 opcode) case MLX5_CMD_OP_RTS2RTS_QP: ece = MLX5_GET(rts2rts_qp_out, out, ece); break; + case MLX5_CMD_OP_RST2INIT_QP: + ece = MLX5_GET(rst2init_qp_out, out, ece); + break; default: break; } @@ -406,6 +412,7 @@ static int modify_qp_mbox_alloc(struct mlx5_core_dev *dev, u16 opcode, int qpn, return -ENOMEM; MOD_QP_IN_SET_QPC(rst2init_qp, mbox->in, opcode, qpn, opt_param_mask, qpc, uid); + MLX5_SET(rst2init_qp_in, mbox->in, ece, ece); break; case MLX5_CMD_OP_INIT2RTR_QP: if (MBOX_ALLOC(mbox, init2rtr_qp)) @@ -439,6 +446,7 @@ static int modify_qp_mbox_alloc(struct mlx5_core_dev *dev, u16 opcode, int qpn, return -ENOMEM; MOD_QP_IN_SET_QPC(init2init_qp, mbox->in, opcode, qpn, opt_param_mask, qpc, uid); + MLX5_SET(init2init_qp_in, mbox->in, ece, ece); break; default: return -EINVAL; diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 116bd9bb347f..ca1887dd0423 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -4283,7 +4283,8 @@ struct mlx5_ifc_rst2init_qp_out_bits { u8 syndrome[0x20]; - u8 reserved_at_40[0x40]; + u8 reserved_at_40[0x20]; + u8 ece[0x20]; }; struct mlx5_ifc_rst2init_qp_in_bits { @@ -4300,7 +4301,7 @@ struct mlx5_ifc_rst2init_qp_in_bits { u8 opt_param_mask[0x20]; - u8 reserved_at_a0[0x20]; + u8 ece[0x20]; struct mlx5_ifc_qpc_bits qpc; @@ -6619,7 +6620,8 @@ struct mlx5_ifc_init2init_qp_out_bits { u8 syndrome[0x20]; - u8 reserved_at_40[0x40]; + u8 reserved_at_40[0x20]; + u8 ece[0x20]; }; struct mlx5_ifc_init2init_qp_in_bits { @@ -6636,7 +6638,7 @@ struct mlx5_ifc_init2init_qp_in_bits { u8 opt_param_mask[0x20]; - u8 reserved_at_a0[0x20]; + u8 ece[0x20]; struct mlx5_ifc_qpc_bits qpc; From 27d4d336f2872193e90ee5450559e1699fae0f6d Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Tue, 24 Mar 2020 16:08:46 -0400 Subject: [PATCH 251/462] tools lib traceevent: Add append() function helper for appending strings There's several locations that open code realloc and strcat() to append text to strings. Add an append() function that takes a delimiter and a string to append to another string. Signed-off-by: Steven Rostedt (VMware) Cc: Andrew Morton Cc: Jaewon Lim Cc: Jiri Olsa Cc: Kees Kook Cc: linux-mm@kvack.org Cc: linux-trace-devel@vger.kernel.org Cc: Namhyung Kim Cc: Vlastimil Babka Link: http://lore.kernel.org/lkml/20200324200956.515118403@goodmis.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/traceevent/event-parse.c | 98 ++++++++++++------------------ 1 file changed, 40 insertions(+), 58 deletions(-) diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c index e1bd2a93c6db..eec96c31ea9e 100644 --- a/tools/lib/traceevent/event-parse.c +++ b/tools/lib/traceevent/event-parse.c @@ -1425,6 +1425,19 @@ static unsigned int type_size(const char *name) return 0; } +static int append(char **buf, const char *delim, const char *str) +{ + char *new_buf; + + new_buf = realloc(*buf, strlen(*buf) + strlen(delim) + strlen(str) + 1); + if (!new_buf) + return -1; + strcat(new_buf, delim); + strcat(new_buf, str); + *buf = new_buf; + return 0; +} + static int event_read_fields(struct tep_event *event, struct tep_format_field **fields) { struct tep_format_field *field = NULL; @@ -1432,6 +1445,7 @@ static int event_read_fields(struct tep_event *event, struct tep_format_field ** char *token; char *last_token; int count = 0; + int ret; do { unsigned int size_dynamic = 0; @@ -1490,24 +1504,15 @@ static int event_read_fields(struct tep_event *event, struct tep_format_field ** field->flags |= TEP_FIELD_IS_POINTER; if (field->type) { - char *new_type; - new_type = realloc(field->type, - strlen(field->type) + - strlen(last_token) + 2); - if (!new_type) { - free(last_token); - goto fail; - } - field->type = new_type; - strcat(field->type, " "); - strcat(field->type, last_token); + ret = append(&field->type, " ", last_token); free(last_token); + if (ret < 0) + goto fail; } else field->type = last_token; last_token = token; continue; } - break; } @@ -1523,8 +1528,6 @@ static int event_read_fields(struct tep_event *event, struct tep_format_field ** if (strcmp(token, "[") == 0) { enum tep_event_type last_type = type; char *brackets = token; - char *new_brackets; - int len; field->flags |= TEP_FIELD_IS_ARRAY; @@ -1536,29 +1539,27 @@ static int event_read_fields(struct tep_event *event, struct tep_format_field ** field->arraylen = 0; while (strcmp(token, "]") != 0) { + const char *delim; + if (last_type == TEP_EVENT_ITEM && type == TEP_EVENT_ITEM) - len = 2; + delim = " "; else - len = 1; + delim = ""; + last_type = type; - new_brackets = realloc(brackets, - strlen(brackets) + - strlen(token) + len); - if (!new_brackets) { + ret = append(&brackets, delim, token); + if (ret < 0) { free(brackets); goto fail; } - brackets = new_brackets; - if (len == 2) - strcat(brackets, " "); - strcat(brackets, token); /* We only care about the last token */ field->arraylen = strtoul(token, NULL, 0); free_token(token); type = read_token(&token); if (type == TEP_EVENT_NONE) { + free(brackets); do_warning_event(event, "failed to find token"); goto fail; } @@ -1566,13 +1567,11 @@ static int event_read_fields(struct tep_event *event, struct tep_format_field ** free_token(token); - new_brackets = realloc(brackets, strlen(brackets) + 2); - if (!new_brackets) { + ret = append(&brackets, "", "]"); + if (ret < 0) { free(brackets); goto fail; } - brackets = new_brackets; - strcat(brackets, "]"); /* add brackets to type */ @@ -1582,34 +1581,23 @@ static int event_read_fields(struct tep_event *event, struct tep_format_field ** * the format: type [] item; */ if (type == TEP_EVENT_ITEM) { - char *new_type; - new_type = realloc(field->type, - strlen(field->type) + - strlen(field->name) + - strlen(brackets) + 2); - if (!new_type) { + ret = append(&field->type, " ", field->name); + if (ret < 0) { free(brackets); goto fail; } - field->type = new_type; - strcat(field->type, " "); - strcat(field->type, field->name); + ret = append(&field->type, "", brackets); + size_dynamic = type_size(field->name); free_token(field->name); - strcat(field->type, brackets); field->name = field->alias = token; type = read_token(&token); } else { - char *new_type; - new_type = realloc(field->type, - strlen(field->type) + - strlen(brackets) + 1); - if (!new_type) { + ret = append(&field->type, "", brackets); + if (ret < 0) { free(brackets); goto fail; } - field->type = new_type; - strcat(field->type, brackets); } free(brackets); } @@ -2046,19 +2034,16 @@ process_op(struct tep_event *event, struct tep_print_arg *arg, char **tok) /* could just be a type pointer */ if ((strcmp(arg->op.op, "*") == 0) && type == TEP_EVENT_DELIM && (strcmp(token, ")") == 0)) { - char *new_atom; + int ret; if (left->type != TEP_PRINT_ATOM) { do_warning_event(event, "bad pointer type"); goto out_free; } - new_atom = realloc(left->atom.atom, - strlen(left->atom.atom) + 3); - if (!new_atom) + ret = append(&left->atom.atom, " ", "*"); + if (ret < 0) goto out_warn_free; - left->atom.atom = new_atom; - strcat(left->atom.atom, " *"); free(arg->op.op); *arg = *left; free(left); @@ -3151,18 +3136,15 @@ process_arg_token(struct tep_event *event, struct tep_print_arg *arg, } /* atoms can be more than one token long */ while (type == TEP_EVENT_ITEM) { - char *new_atom; - new_atom = realloc(atom, - strlen(atom) + strlen(token) + 2); - if (!new_atom) { + int ret; + + ret = append(&atom, " ", token); + if (ret < 0) { free(atom); *tok = NULL; free_token(token); return TEP_EVENT_ERROR; } - atom = new_atom; - strcat(atom, " "); - strcat(atom, token); free_token(token); type = read_token_item(&token); } From 74621d929d944529a5e2878a84f48bfa6fb69a66 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Tue, 24 Mar 2020 16:08:47 -0400 Subject: [PATCH 252/462] tools lib traceevent: Handle __attribute__((user)) in field names Commit c61f13eaa1ee1 ("gcc-plugins: Add structleak for more stack initialization") added "__attribute__((user))" to the user when stackleak detector is enabled. This now appears in the field format of system call trace events for system calls that have user buffers. The "__attribute__((user))" breaks the parsing in libtraceevent. That needs to be handled. Signed-off-by: Steven Rostedt (VMware) Cc: Andrew Morton Cc: Jaewon Kim Cc: Jiri Olsa Cc: Kees Kook Cc: Namhyung Kim Cc: Vlastimil Babka Cc: linux-mm@kvack.org Cc: linux-trace-devel@vger.kernel.org Link: http://lore.kernel.org/lkml/20200324200956.663647256@goodmis.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/traceevent/event-parse.c | 39 +++++++++++++++++++++++++++++- 1 file changed, 38 insertions(+), 1 deletion(-) diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c index eec96c31ea9e..010e60d5a081 100644 --- a/tools/lib/traceevent/event-parse.c +++ b/tools/lib/traceevent/event-parse.c @@ -1444,6 +1444,7 @@ static int event_read_fields(struct tep_event *event, struct tep_format_field ** enum tep_event_type type; char *token; char *last_token; + char *delim = " "; int count = 0; int ret; @@ -1504,13 +1505,49 @@ static int event_read_fields(struct tep_event *event, struct tep_format_field ** field->flags |= TEP_FIELD_IS_POINTER; if (field->type) { - ret = append(&field->type, " ", last_token); + ret = append(&field->type, delim, last_token); free(last_token); if (ret < 0) goto fail; } else field->type = last_token; last_token = token; + delim = " "; + continue; + } + + /* Handle __attribute__((user)) */ + if ((type == TEP_EVENT_DELIM) && + strcmp("__attribute__", last_token) == 0 && + token[0] == '(') { + int depth = 1; + int ret; + + ret = append(&field->type, " ", last_token); + ret |= append(&field->type, "", "("); + if (ret < 0) + goto fail; + + delim = " "; + while ((type = read_token(&token)) != TEP_EVENT_NONE) { + if (type == TEP_EVENT_DELIM) { + if (token[0] == '(') + depth++; + else if (token[0] == ')') + depth--; + if (!depth) + break; + ret = append(&field->type, "", token); + delim = ""; + } else { + ret = append(&field->type, delim, token); + delim = " "; + } + if (ret < 0) + goto fail; + free(last_token); + last_token = token; + } continue; } break; From 1b20d9491cf9cf180f1f2a46c80d69af0f775d55 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Tue, 24 Mar 2020 16:08:48 -0400 Subject: [PATCH 253/462] tools lib traceevent: Add handler for __builtin_expect() In order to move pointer checks like IS_ERR_VALUE() out of the hotpath and into the reader path of a trace event, user space tools need to be able to parse that. IS_ERR_VALUE() is defined as: #define IS_ERR_VALUE() unlikely((unsigned long)(void *)(x) >= (unsigned long)-MAX_ERRNO) Which eventually turns into: __builtin_expect(!!((unsigned long)(void *)(x) >= (unsigned long)-4095), 0) Now the traceevent parser can handle most of that except for the __builtin_expect(), which needs to be added. Link: https://lore.kernel.org/linux-mm/20200320055823.27089-3-jaewon31.kim@samsung.com/ Signed-off-by: Steven Rostedt (VMware) Cc: Andrew Morton Cc: Jaewon Kim Cc: Jiri Olsa Cc: Kees Kook Cc: Namhyung Kim Cc: Vlastimil Babka Cc: linux-mm@kvack.org Cc: linux-trace-devel@vger.kernel.org Link: http://lore.kernel.org/lkml/20200324200956.821799393@goodmis.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/traceevent/event-parse.c | 35 ++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c index 010e60d5a081..5b36c589a029 100644 --- a/tools/lib/traceevent/event-parse.c +++ b/tools/lib/traceevent/event-parse.c @@ -3084,6 +3084,37 @@ err: return TEP_EVENT_ERROR; } +static enum tep_event_type +process_builtin_expect(struct tep_event *event, struct tep_print_arg *arg, char **tok) +{ + enum tep_event_type type; + char *token = NULL; + + /* Handle __builtin_expect( cond, #) */ + type = process_arg(event, arg, &token); + + if (type != TEP_EVENT_DELIM || token[0] != ',') + goto out_free; + + free_token(token); + + /* We don't care what the second parameter is of the __builtin_expect() */ + if (read_expect_type(TEP_EVENT_ITEM, &token) < 0) + goto out_free; + + if (read_expected(TEP_EVENT_DELIM, ")") < 0) + goto out_free; + + free_token(token); + type = read_token_item(tok); + return type; + +out_free: + free_token(token); + *tok = NULL; + return TEP_EVENT_ERROR; +} + static enum tep_event_type process_function(struct tep_event *event, struct tep_print_arg *arg, char *token, char **tok) @@ -3128,6 +3159,10 @@ process_function(struct tep_event *event, struct tep_print_arg *arg, free_token(token); return process_dynamic_array_len(event, arg, tok); } + if (strcmp(token, "__builtin_expect") == 0) { + free_token(token); + return process_builtin_expect(event, arg, tok); + } func = find_func_handler(event->tep, token); if (func) { From 6a1515c962b17e2596ae7b9f074fc5685d6b435b Mon Sep 17 00:00:00 2001 From: Tiezhu Yang Date: Thu, 18 Jun 2020 10:06:01 +0800 Subject: [PATCH 254/462] perf build: Fix error message when asking for -fsanitize=address without required libraries When build perf with ASan or UBSan, if libasan or libubsan can not find, the feature-glibc is 0 and there exists the following error log which is wrong, because we can find gnu/libc-version.h in /usr/include, glibc-devel is also installed. [yangtiezhu@linux perf]$ make DEBUG=1 EXTRA_CFLAGS='-fno-omit-frame-pointer -fsanitize=address' BUILD: Doing 'make -j4' parallel build HOSTCC fixdep.o HOSTLD fixdep-in.o LINK fixdep :1:0: warning: -fsanitize=address and -fsanitize=kernel-address are not supported for this target :1:0: warning: -fsanitize=address not supported for this target Auto-detecting system features: ... dwarf: [ OFF ] ... dwarf_getlocations: [ OFF ] ... glibc: [ OFF ] ... gtk2: [ OFF ] ... libaudit: [ OFF ] ... libbfd: [ OFF ] ... libcap: [ OFF ] ... libelf: [ OFF ] ... libnuma: [ OFF ] ... numa_num_possible_cpus: [ OFF ] ... libperl: [ OFF ] ... libpython: [ OFF ] ... libcrypto: [ OFF ] ... libunwind: [ OFF ] ... libdw-dwarf-unwind: [ OFF ] ... zlib: [ OFF ] ... lzma: [ OFF ] ... get_cpuid: [ OFF ] ... bpf: [ OFF ] ... libaio: [ OFF ] ... libzstd: [ OFF ] ... disassembler-four-args: [ OFF ] Makefile.config:393: *** No gnu/libc-version.h found, please install glibc-dev[el]. Stop. Makefile.perf:224: recipe for target 'sub-make' failed make[1]: *** [sub-make] Error 2 Makefile:69: recipe for target 'all' failed make: *** [all] Error 2 [yangtiezhu@linux perf]$ ls /usr/include/gnu/libc-version.h /usr/include/gnu/libc-version.h After install libasan and libubsan, the feature-glibc is 1 and the build process is success, so the cause is related with libasan or libubsan, we should check them and print an error log to reflect the reality. Committer testing: $ rm -rf /tmp/build/perf ; mkdir -p /tmp/build/perf $ make DEBUG=1 EXTRA_CFLAGS='-fno-omit-frame-pointer -fsanitize=address' O=/tmp/build/perf -C tools/perf/ install-bin make: Entering directory '/home/acme/git/perf/tools/perf' BUILD: Doing 'make -j12' parallel build HOSTCC /tmp/build/perf/fixdep.o HOSTLD /tmp/build/perf/fixdep-in.o LINK /tmp/build/perf/fixdep Auto-detecting system features: ... dwarf: [ OFF ] ... dwarf_getlocations: [ OFF ] ... glibc: [ OFF ] ... gtk2: [ OFF ] ... libbfd: [ OFF ] ... libcap: [ OFF ] ... libelf: [ OFF ] ... libnuma: [ OFF ] ... numa_num_possible_cpus: [ OFF ] ... libperl: [ OFF ] ... libpython: [ OFF ] ... libcrypto: [ OFF ] ... libunwind: [ OFF ] ... libdw-dwarf-unwind: [ OFF ] ... zlib: [ OFF ] ... lzma: [ OFF ] ... get_cpuid: [ OFF ] ... bpf: [ OFF ] ... libaio: [ OFF ] ... libzstd: [ OFF ] ... disassembler-four-args: [ OFF ] Makefile.config:401: *** No libasan found, please install libasan. Stop. make[1]: *** [Makefile.perf:231: sub-make] Error 2 make: *** [Makefile:70: all] Error 2 make: Leaving directory '/home/acme/git/perf/tools/perf' $ $ $ sudo dnf install libasan Installed: libasan-9.3.1-2.fc31.x86_64 $ $ $ make DEBUG=1 EXTRA_CFLAGS='-fno-omit-frame-pointer -fsanitize=address' O=/tmp/build/perf -C tools/perf/ install-bin make: Entering directory '/home/acme/git/perf/tools/perf' BUILD: Doing 'make -j12' parallel build Auto-detecting system features: ... dwarf: [ on ] ... dwarf_getlocations: [ on ] ... glibc: [ on ] ... gtk2: [ on ] ... libbfd: [ on ] ... libcap: [ on ] ... libelf: [ on ] ... libnuma: [ on ] ... numa_num_possible_cpus: [ on ] ... libperl: [ on ] ... libpython: [ on ] ... libcrypto: [ on ] ... libunwind: [ on ] ... libdw-dwarf-unwind: [ on ] ... zlib: [ on ] ... lzma: [ on ] ... get_cpuid: [ on ] ... bpf: [ on ] ... libaio: [ on ] ... libzstd: [ on ] ... disassembler-four-args: [ on ] CC /tmp/build/perf/util/pmu-flex.o FLEX /tmp/build/perf/util/expr-flex.c CC /tmp/build/perf/util/expr-bison.o CC /tmp/build/perf/util/expr.o CC /tmp/build/perf/util/expr-flex.o CC /tmp/build/perf/util/parse-events-flex.o CC /tmp/build/perf/util/parse-events.o LD /tmp/build/perf/util/intel-pt-decoder/perf-in.o LD /tmp/build/perf/util/perf-in.o LD /tmp/build/perf/perf-in.o LINK /tmp/build/perf/perf INSTALL python-scripts INSTALL perf_completion-script INSTALL perf-tip make: Leaving directory '/home/acme/git/perf/tools/perf' $ ldd ~/bin/perf | grep asan libasan.so.5 => /lib64/libasan.so.5 (0x00007f0904164000) $ And if we rebuild without -fsanitize-address: $ rm -rf /tmp/build/perf ; mkdir -p /tmp/build/perf $ make O=/tmp/build/perf -C tools/perf/ install-bin make: Entering directory '/home/acme/git/perf/tools/perf' BUILD: Doing 'make -j12' parallel build HOSTCC /tmp/build/perf/fixdep.o HOSTLD /tmp/build/perf/fixdep-in.o LINK /tmp/build/perf/fixdep Auto-detecting system features: ... dwarf: [ on ] ... dwarf_getlocations: [ on ] ... glibc: [ on ] ... gtk2: [ on ] ... libbfd: [ on ] ... libcap: [ on ] ... libelf: [ on ] ... libnuma: [ on ] ... numa_num_possible_cpus: [ on ] ... libperl: [ on ] ... libpython: [ on ] ... libcrypto: [ on ] ... libunwind: [ on ] ... libdw-dwarf-unwind: [ on ] ... zlib: [ on ] ... lzma: [ on ] ... get_cpuid: [ on ] ... bpf: [ on ] ... libaio: [ on ] ... libzstd: [ on ] ... disassembler-four-args: [ on ] GEN /tmp/build/perf/common-cmds.h CC /tmp/build/perf/exec-cmd.o INSTALL perf_completion-script INSTALL perf-tip make: Leaving directory '/home/acme/git/perf/tools/perf' $ ldd ~/bin/perf | grep asan $ Signed-off-by: Tiezhu Yang Acked-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: tiezhu yang Cc: xuefeng li Link: http://lore.kernel.org/lkml/1592445961-28044-1-git-send-email-yangtiezhu@loongson.cn Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile.config | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index 877ca6be0ed0..513633809c81 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -396,6 +396,18 @@ else NO_LIBBPF := 1 NO_JVMTI := 1 else + ifneq ($(filter s% -fsanitize=address%,$(EXTRA_CFLAGS),),) + ifneq ($(shell ldconfig -p | grep libasan >/dev/null 2>&1; echo $$?), 0) + msg := $(error No libasan found, please install libasan); + endif + endif + + ifneq ($(filter s% -fsanitize=undefined%,$(EXTRA_CFLAGS),),) + ifneq ($(shell ldconfig -p | grep libubsan >/dev/null 2>&1; echo $$?), 0) + msg := $(error No libubsan found, please install libubsan); + endif + endif + ifneq ($(filter s% -static%,$(LDFLAGS),),) msg := $(error No static glibc found, please install glibc-static); else From 98a6151907cb5512eb6ba8b90644e3ace2d2fc46 Mon Sep 17 00:00:00 2001 From: Yangyang Li Date: Tue, 16 Jun 2020 21:37:09 +0800 Subject: [PATCH 255/462] RDMA/hns: Fix a calltrace when registering MR from userspace ibmr.device is assigned after MR is successfully registered, but both write_mtpt() and frmr_write_mtpt() accesses it during the mr registration process, which may cause the following error when trying to register MR in userspace and pbl_hop_num is set to 0. pc : hns_roce_mtr_find+0xa0/0x200 [hns_roce] lr : set_mtpt_pbl+0x54/0x118 [hns_roce_hw_v2] sp : ffff00023e73ba20 x29: ffff00023e73ba20 x28: ffff00023e73bad8 x27: 0000000000000000 x26: 0000000000000000 x25: 0000000000000002 x24: 0000000000000000 x23: ffff00023e73bad0 x22: 0000000000000000 x21: ffff0000094d9000 x20: 0000000000000000 x19: ffff8020a6bdb2c0 x18: 0000000000000000 x17: 0000000000000000 x16: 0000000000000000 x15: 0000000000000000 x14: 0000000000000000 x13: 0140000000000000 x12: 0040000000000041 x11: ffff000240000000 x10: 0000000000001000 x9 : 0000000000000000 x8 : ffff802fb7558480 x7 : ffff802fb7558480 x6 : 000000000003483d x5 : ffff00023e73bad0 x4 : 0000000000000002 x3 : ffff00023e73bad8 x2 : 0000000000000000 x1 : 0000000000000000 x0 : ffff0000094d9708 Call trace: hns_roce_mtr_find+0xa0/0x200 [hns_roce] set_mtpt_pbl+0x54/0x118 [hns_roce_hw_v2] hns_roce_v2_write_mtpt+0x14c/0x168 [hns_roce_hw_v2] hns_roce_mr_enable+0x6c/0x148 [hns_roce] hns_roce_reg_user_mr+0xd8/0x130 [hns_roce] ib_uverbs_reg_mr+0x14c/0x2e0 [ib_uverbs] ib_uverbs_write+0x27c/0x3e8 [ib_uverbs] __vfs_write+0x60/0x190 vfs_write+0xac/0x1c0 ksys_write+0x6c/0xd8 __arm64_sys_write+0x24/0x30 el0_svc_common+0x78/0x130 el0_svc_handler+0x38/0x78 el0_svc+0x8/0xc Solve above issue by adding a pointer of structure hns_roce_dev as a parameter of write_mtpt() and frmr_write_mtpt(), so that both of these functions can access it before finishing MR's registration. Fixes: 9b2cf76c9f05 ("RDMA/hns: Optimize PBL buffer allocation process") Link: https://lore.kernel.org/r/1592314629-51715-1-git-send-email-liweihang@huawei.com Signed-off-by: Yangyang Li Signed-off-by: Weihang Li Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_device.h | 7 ++++--- drivers/infiniband/hw/hns/hns_roce_hw_v1.c | 4 ++-- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 15 ++++++++------- drivers/infiniband/hw/hns/hns_roce_mr.c | 5 +++-- 4 files changed, 17 insertions(+), 14 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index a77fa6730b2d..479fa557993e 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -898,13 +898,14 @@ struct hns_roce_hw { int (*set_mac)(struct hns_roce_dev *hr_dev, u8 phy_port, u8 *addr); void (*set_mtu)(struct hns_roce_dev *hr_dev, u8 phy_port, enum ib_mtu mtu); - int (*write_mtpt)(void *mb_buf, struct hns_roce_mr *mr, - unsigned long mtpt_idx); + int (*write_mtpt)(struct hns_roce_dev *hr_dev, void *mb_buf, + struct hns_roce_mr *mr, unsigned long mtpt_idx); int (*rereg_write_mtpt)(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr, int flags, u32 pdn, int mr_access_flags, u64 iova, u64 size, void *mb_buf); - int (*frmr_write_mtpt)(void *mb_buf, struct hns_roce_mr *mr); + int (*frmr_write_mtpt)(struct hns_roce_dev *hr_dev, void *mb_buf, + struct hns_roce_mr *mr); int (*mw_write_mtpt)(void *mb_buf, struct hns_roce_mw *mw); void (*write_cqc)(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq, void *mb_buf, u64 *mtts, diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c index d02207cd30df..cf39f560b800 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c @@ -1756,10 +1756,10 @@ static void hns_roce_v1_set_mtu(struct hns_roce_dev *hr_dev, u8 phy_port, val); } -static int hns_roce_v1_write_mtpt(void *mb_buf, struct hns_roce_mr *mr, +static int hns_roce_v1_write_mtpt(struct hns_roce_dev *hr_dev, void *mb_buf, + struct hns_roce_mr *mr, unsigned long mtpt_idx) { - struct hns_roce_dev *hr_dev = to_hr_dev(mr->ibmr.device); u64 pages[HNS_ROCE_MAX_INNER_MTPT_NUM] = { 0 }; struct ib_device *ibdev = &hr_dev->ib_dev; struct hns_roce_v1_mpt_entry *mpt_entry; diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index c597d7281629..bb86754cb94d 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -2529,10 +2529,10 @@ static int hns_roce_v2_set_mac(struct hns_roce_dev *hr_dev, u8 phy_port, return hns_roce_cmq_send(hr_dev, &desc, 1); } -static int set_mtpt_pbl(struct hns_roce_v2_mpt_entry *mpt_entry, +static int set_mtpt_pbl(struct hns_roce_dev *hr_dev, + struct hns_roce_v2_mpt_entry *mpt_entry, struct hns_roce_mr *mr) { - struct hns_roce_dev *hr_dev = to_hr_dev(mr->ibmr.device); u64 pages[HNS_ROCE_V2_MAX_INNER_MTPT_NUM] = { 0 }; struct ib_device *ibdev = &hr_dev->ib_dev; dma_addr_t pbl_ba; @@ -2571,7 +2571,8 @@ static int set_mtpt_pbl(struct hns_roce_v2_mpt_entry *mpt_entry, return 0; } -static int hns_roce_v2_write_mtpt(void *mb_buf, struct hns_roce_mr *mr, +static int hns_roce_v2_write_mtpt(struct hns_roce_dev *hr_dev, + void *mb_buf, struct hns_roce_mr *mr, unsigned long mtpt_idx) { struct hns_roce_v2_mpt_entry *mpt_entry; @@ -2620,7 +2621,7 @@ static int hns_roce_v2_write_mtpt(void *mb_buf, struct hns_roce_mr *mr, if (mr->type == MR_TYPE_DMA) return 0; - ret = set_mtpt_pbl(mpt_entry, mr); + ret = set_mtpt_pbl(hr_dev, mpt_entry, mr); return ret; } @@ -2666,15 +2667,15 @@ static int hns_roce_v2_rereg_write_mtpt(struct hns_roce_dev *hr_dev, mr->iova = iova; mr->size = size; - ret = set_mtpt_pbl(mpt_entry, mr); + ret = set_mtpt_pbl(hr_dev, mpt_entry, mr); } return ret; } -static int hns_roce_v2_frmr_write_mtpt(void *mb_buf, struct hns_roce_mr *mr) +static int hns_roce_v2_frmr_write_mtpt(struct hns_roce_dev *hr_dev, + void *mb_buf, struct hns_roce_mr *mr) { - struct hns_roce_dev *hr_dev = to_hr_dev(mr->ibmr.device); struct ib_device *ibdev = &hr_dev->ib_dev; struct hns_roce_v2_mpt_entry *mpt_entry; dma_addr_t pbl_ba = 0; diff --git a/drivers/infiniband/hw/hns/hns_roce_mr.c b/drivers/infiniband/hw/hns/hns_roce_mr.c index 4c0bbb12770d..0e71ebee9e52 100644 --- a/drivers/infiniband/hw/hns/hns_roce_mr.c +++ b/drivers/infiniband/hw/hns/hns_roce_mr.c @@ -180,9 +180,10 @@ static int hns_roce_mr_enable(struct hns_roce_dev *hr_dev, } if (mr->type != MR_TYPE_FRMR) - ret = hr_dev->hw->write_mtpt(mailbox->buf, mr, mtpt_idx); + ret = hr_dev->hw->write_mtpt(hr_dev, mailbox->buf, mr, + mtpt_idx); else - ret = hr_dev->hw->frmr_write_mtpt(mailbox->buf, mr); + ret = hr_dev->hw->frmr_write_mtpt(hr_dev, mailbox->buf, mr); if (ret) { dev_err(dev, "Write mtpt fail!\n"); goto err_page; From 3ec5f54f7a0fdf706951f379d3148798325e516f Mon Sep 17 00:00:00 2001 From: Yangyang Li Date: Tue, 16 Jun 2020 21:39:38 +0800 Subject: [PATCH 256/462] RDMA/hns: Fix an cmd queue issue when resetting If a IMP reset caused by some hardware errors and hns RoCE driver reset occurred at the same time, there is a possiblity that the IMP will stop dealing with command and users can't use the hardware. The logs are as follows: hns3 0000:fd:00.1: cleaned 0, need to clean 1 hns3 0000:fd:00.1: firmware version query failed -11 hns3 0000:fd:00.1: Cmd queue init failed hns3 0000:fd:00.1: Upgrade reset level hns3 0000:fd:00.1: global reset interrupt The hns NIC driver divides the reset process into 3 status: initialization, hardware resetting and softwaring restting. RoCE driver gets reset status by interfaces provided by NIC driver and commands will not be sent to the IMP if the driver is in any above status. The main reason for this issue is that there is a time gap between status 1 and 2, if the RoCE driver sends commands to the IMP during this gap, the IMP will stop working because it is not ready. To eliminate the time gap, the hns NIC driver has added a new interface in commit a4de02287abb9 ("net: hns3: provide .get_cmdq_stat interface for the client"), so RoCE driver can ensure that no commands will be sent during resetting. Link: https://lore.kernel.org/r/1592314778-52822-1-git-send-email-liweihang@huawei.com Signed-off-by: Yangyang Li Signed-off-by: Weihang Li Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index bb86754cb94d..dd01a51816cc 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -910,7 +910,7 @@ static int hns_roce_v2_rst_process_cmd(struct hns_roce_dev *hr_dev) instance_stage = handle->rinfo.instance_state; reset_stage = handle->rinfo.reset_state; reset_cnt = ops->ae_dev_reset_cnt(handle); - hw_resetting = ops->get_hw_reset_stat(handle); + hw_resetting = ops->get_cmdq_stat(handle); sw_resetting = ops->ae_dev_resetting(handle); if (reset_cnt != hr_dev->reset_cnt) From 4121fb0db68ed4de574f9bdc630b75fcc99b4835 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Wed, 17 Jun 2020 09:18:26 +0300 Subject: [PATCH 257/462] RDMA/core: Check that type_attrs is not NULL prior access In disassociate flow, the type_attrs is set to be NULL, which is in an implicit way is checked in alloc_uobj() by "if (!attrs->context)". Change the logic to rely on that check, to be consistent with other alloc_uobj() places that will fix the following kernel splat. BUG: kernel NULL pointer dereference, address: 0000000000000018 #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page PGD 0 P4D 0 Oops: 0000 [#1] SMP PTI CPU: 3 PID: 2743 Comm: python3 Not tainted 5.7.0-rc6-for-upstream-perf-2020-05-23_19-04-38-5 #1 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.12.1-0-ga5cab58e9a3f-prebuilt.qemu.org 04/01/2014 RIP: 0010:alloc_begin_fd_uobject+0x18/0xf0 [ib_uverbs] Code: 89 43 48 eb 97 66 66 66 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 41 55 49 89 f5 41 54 55 48 89 fd 53 48 83 ec 08 48 8b 1f <48> 8b 43 18 48 8b 80 80 00 00 00 48 3d 20 10 33 a0 74 1c 48 3d 30 RSP: 0018:ffffc90001127b70 EFLAGS: 00010282 RAX: ffffffffa0339fe0 RBX: 0000000000000000 RCX: 8000000000000007 RDX: fffffffffffffffb RSI: ffffc90001127d28 RDI: ffff88843fe1f600 RBP: ffff88843fe1f600 R08: ffff888461eb06d8 R09: ffff888461eb06f8 R10: ffff888461eb0700 R11: 0000000000000000 R12: ffff88846a5f6450 R13: ffffc90001127d28 R14: ffff88845d7d6ea0 R15: ffffc90001127cb8 FS: 00007f469bff1540(0000) GS:ffff88846f980000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000018 CR3: 0000000450018003 CR4: 0000000000760ee0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 PKRU: 55555554 Call Trace: ? xa_store+0x28/0x40 rdma_alloc_begin_uobject+0x4f/0x90 [ib_uverbs] ib_uverbs_create_comp_channel+0x87/0xf0 [ib_uverbs] ib_uverbs_handler_UVERBS_METHOD_INVOKE_WRITE+0xb1/0xf0 [ib_uverbs] ib_uverbs_cmd_verbs.isra.8+0x96d/0xae0 [ib_uverbs] ? get_page_from_freelist+0x3bb/0xf70 ? _copy_to_user+0x22/0x30 ? uverbs_disassociate_api+0xd0/0xd0 [ib_uverbs] ? __wake_up_common_lock+0x87/0xc0 ib_uverbs_ioctl+0xbc/0x130 [ib_uverbs] ksys_ioctl+0x83/0xc0 ? ksys_write+0x55/0xd0 __x64_sys_ioctl+0x16/0x20 do_syscall_64+0x48/0x130 entry_SYSCALL_64_after_hwframe+0x44/0xa9 RIP: 0033:0x7f469ac43267 Fixes: 849e149063bd ("RDMA/core: Do not allow alloc_commit to fail") Link: https://lore.kernel.org/r/20200617061826.2625359-1-leon@kernel.org Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/rdma_core.c | 38 +++++++++++++++++------------ 1 file changed, 22 insertions(+), 16 deletions(-) diff --git a/drivers/infiniband/core/rdma_core.c b/drivers/infiniband/core/rdma_core.c index 38de4942c682..3027cd2fb247 100644 --- a/drivers/infiniband/core/rdma_core.c +++ b/drivers/infiniband/core/rdma_core.c @@ -470,40 +470,46 @@ static struct ib_uobject * alloc_begin_fd_uobject(const struct uverbs_api_object *obj, struct uverbs_attr_bundle *attrs) { - const struct uverbs_obj_fd_type *fd_type = - container_of(obj->type_attrs, struct uverbs_obj_fd_type, type); + const struct uverbs_obj_fd_type *fd_type; int new_fd; - struct ib_uobject *uobj; + struct ib_uobject *uobj, *ret; struct file *filp; - if (WARN_ON(fd_type->fops->release != &uverbs_uobject_fd_release && - fd_type->fops->release != &uverbs_async_event_release)) - return ERR_PTR(-EINVAL); - - new_fd = get_unused_fd_flags(O_CLOEXEC); - if (new_fd < 0) - return ERR_PTR(new_fd); - uobj = alloc_uobj(attrs, obj); if (IS_ERR(uobj)) + return uobj; + + fd_type = + container_of(obj->type_attrs, struct uverbs_obj_fd_type, type); + if (WARN_ON(fd_type->fops->release != &uverbs_uobject_fd_release && + fd_type->fops->release != &uverbs_async_event_release)) { + ret = ERR_PTR(-EINVAL); goto err_fd; + } + + new_fd = get_unused_fd_flags(O_CLOEXEC); + if (new_fd < 0) { + ret = ERR_PTR(new_fd); + goto err_fd; + } /* Note that uverbs_uobject_fd_release() is called during abort */ filp = anon_inode_getfile(fd_type->name, fd_type->fops, NULL, fd_type->flags); if (IS_ERR(filp)) { - uverbs_uobject_put(uobj); - uobj = ERR_CAST(filp); - goto err_fd; + ret = ERR_CAST(filp); + goto err_getfile; } uobj->object = filp; uobj->id = new_fd; return uobj; -err_fd: +err_getfile: put_unused_fd(new_fd); - return uobj; +err_fd: + uverbs_uobject_put(uobj); + return ret; } struct ib_uobject *rdma_alloc_begin_uobject(const struct uverbs_api_object *obj, From 6f2cc1664db20676069cff27a461ccc97dbfd114 Mon Sep 17 00:00:00 2001 From: Xiaoguang Wang Date: Thu, 18 Jun 2020 15:01:56 +0800 Subject: [PATCH 258/462] io_uring: fix possible race condition against REQ_F_NEED_CLEANUP In io_read() or io_write(), when io request is submitted successfully, it'll go through the below sequence: kfree(iovec); req->flags &= ~REQ_F_NEED_CLEANUP; return ret; But clearing REQ_F_NEED_CLEANUP might be unsafe. The io request may already have been completed, and then io_complete_rw_iopoll() and io_complete_rw() will be called, both of which will also modify req->flags if needed. This causes a race condition, with concurrent non-atomic modification of req->flags. To eliminate this race, in io_read() or io_write(), if io request is submitted successfully, we don't remove REQ_F_NEED_CLEANUP flag. If REQ_F_NEED_CLEANUP is set, we'll leave __io_req_aux_free() to the iovec cleanup work correspondingly. Cc: stable@vger.kernel.org Signed-off-by: Xiaoguang Wang Signed-off-by: Jens Axboe --- fs/io_uring.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 2038d52c5450..a78201b96179 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -2670,8 +2670,8 @@ copy_iov: } } out_free: - kfree(iovec); - req->flags &= ~REQ_F_NEED_CLEANUP; + if (!(req->flags & REQ_F_NEED_CLEANUP)) + kfree(iovec); return ret; } @@ -2793,8 +2793,8 @@ copy_iov: } } out_free: - req->flags &= ~REQ_F_NEED_CLEANUP; - kfree(iovec); + if (!(req->flags & REQ_F_NEED_CLEANUP)) + kfree(iovec); return ret; } From bc163c2046c5867df7d2b03e85bca29adf83fa32 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 22 Apr 2020 16:03:17 +0300 Subject: [PATCH 259/462] partitions/ldm: Replace uuid_copy() with import_uuid() where it makes sense There is a specific API to treat raw data as UUID, i.e. import_uuid(). Use it instead of uuid_copy() with explicit casting. Signed-off-by: Andy Shevchenko Signed-off-by: Jens Axboe --- block/partitions/ldm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/block/partitions/ldm.c b/block/partitions/ldm.c index 6fdfcb40c537..d333786b5c7e 100644 --- a/block/partitions/ldm.c +++ b/block/partitions/ldm.c @@ -910,7 +910,7 @@ static bool ldm_parse_dsk4 (const u8 *buffer, int buflen, struct vblk *vb) return false; disk = &vb->vblk.disk; - uuid_copy(&disk->disk_id, (uuid_t *)(buffer + 0x18 + r_name)); + import_uuid(&disk->disk_id, buffer + 0x18 + r_name); return true; } From b5292111de9bb70cba3489075970889765302136 Mon Sep 17 00:00:00 2001 From: Kai-Heng Feng Date: Wed, 3 Jun 2020 15:48:19 +0800 Subject: [PATCH 260/462] libata: Use per port sync for detach Commit 130f4caf145c ("libata: Ensure ata_port probe has completed before detach") may cause system freeze during suspend. Using async_synchronize_full() in PM callbacks is wrong, since async callbacks that are already scheduled may wait for not-yet-scheduled callbacks, causes a circular dependency. Instead of using big hammer like async_synchronize_full(), use async cookie to make sure port probe are synced, without affecting other scheduled PM callbacks. Fixes: 130f4caf145c ("libata: Ensure ata_port probe has completed before detach") Suggested-by: John Garry Signed-off-by: Kai-Heng Feng Tested-by: John Garry BugLink: https://bugs.launchpad.net/bugs/1867983 Signed-off-by: Jens Axboe --- drivers/ata/libata-core.c | 11 +++++------ include/linux/libata.h | 3 +++ 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index 69361ec43db5..b1cd4d97bc2a 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -42,7 +42,6 @@ #include #include #include -#include #include #include #include @@ -5778,7 +5777,7 @@ int ata_host_register(struct ata_host *host, struct scsi_host_template *sht) /* perform each probe asynchronously */ for (i = 0; i < host->n_ports; i++) { struct ata_port *ap = host->ports[i]; - async_schedule(async_port_probe, ap); + ap->cookie = async_schedule(async_port_probe, ap); } return 0; @@ -5920,11 +5919,11 @@ void ata_host_detach(struct ata_host *host) { int i; - /* Ensure ata_port probe has completed */ - async_synchronize_full(); - - for (i = 0; i < host->n_ports; i++) + for (i = 0; i < host->n_ports; i++) { + /* Ensure ata_port probe has completed */ + async_synchronize_cookie(host->ports[i]->cookie + 1); ata_port_detach(host->ports[i]); + } /* the host is dead now, dissociate ACPI */ ata_acpi_dissociate(host); diff --git a/include/linux/libata.h b/include/linux/libata.h index af832852e620..8a4843704d28 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -22,6 +22,7 @@ #include #include #include +#include /* * Define if arch has non-standard setup. This is a _PCI_ standard @@ -872,6 +873,8 @@ struct ata_port { struct timer_list fastdrain_timer; unsigned long fastdrain_cnt; + async_cookie_t cookie; + int em_message_type; void *private_data; From f4bd34b139a3fa2808c4205f12714c65e1548c6c Mon Sep 17 00:00:00 2001 From: Zheng Bin Date: Thu, 18 Jun 2020 12:21:37 +0800 Subject: [PATCH 261/462] loop: replace kill_bdev with invalidate_bdev When a filesystem is mounted on a loop device and on a loop ioctl LOOP_SET_STATUS64, because of kill_bdev, buffer_head mappings are getting destroyed. kill_bdev truncate_inode_pages truncate_inode_pages_range do_invalidatepage block_invalidatepage discard_buffer -->clear BH_Mapped flag sb_bread __bread_gfp bh = __getblk_gfp -->discard_buffer clear BH_Mapped flag __bread_slow submit_bh submit_bh_wbc BUG_ON(!buffer_mapped(bh)) --> hit this BUG_ON Fixes: 5db470e229e2 ("loop: drop caches if offset or block_size are changed") Signed-off-by: Zheng Bin Reviewed-by: Christoph Hellwig Reviewed-by: Bart Van Assche Signed-off-by: Jens Axboe --- drivers/block/loop.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/block/loop.c b/drivers/block/loop.c index c33bbbfd1bd9..475e1a738560 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -1368,14 +1368,14 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info) lo->lo_sizelimit != info->lo_sizelimit) { size_changed = true; sync_blockdev(lo->lo_device); - kill_bdev(lo->lo_device); + invalidate_bdev(lo->lo_device); } /* I/O need to be drained during transfer transition */ blk_mq_freeze_queue(lo->lo_queue); if (size_changed && lo->lo_device->bd_inode->i_mapping->nrpages) { - /* If any pages were dirtied after kill_bdev(), try again */ + /* If any pages were dirtied after invalidate_bdev(), try again */ err = -EAGAIN; pr_warn("%s: loop%d (%s) has still dirty pages (nrpages=%lu)\n", __func__, lo->lo_number, lo->lo_file_name, @@ -1615,11 +1615,11 @@ static int loop_set_block_size(struct loop_device *lo, unsigned long arg) return 0; sync_blockdev(lo->lo_device); - kill_bdev(lo->lo_device); + invalidate_bdev(lo->lo_device); blk_mq_freeze_queue(lo->lo_queue); - /* kill_bdev should have truncated all the pages */ + /* invalidate_bdev should have truncated all the pages */ if (lo->lo_device->bd_inode->i_mapping->nrpages) { err = -EAGAIN; pr_warn("%s: loop%d (%s) has still dirty pages (nrpages=%lu)\n", From 3373a3461aa15b7f9a871fa4cb2c9ef21ac20b47 Mon Sep 17 00:00:00 2001 From: Zheng Bin Date: Thu, 18 Jun 2020 12:21:38 +0800 Subject: [PATCH 262/462] block: make function 'kill_bdev' static kill_bdev does not have any external user, so make it static. Signed-off-by: Zheng Bin Reviewed-by: Christoph Hellwig Reviewed-by: Bart Van Assche Signed-off-by: Jens Axboe --- fs/block_dev.c | 5 ++--- include/linux/fs.h | 2 -- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/fs/block_dev.c b/fs/block_dev.c index 08c87db3a92b..0ae656e022fd 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -75,7 +75,7 @@ static void bdev_write_inode(struct block_device *bdev) } /* Kill _all_ buffers and pagecache , dirty or not.. */ -void kill_bdev(struct block_device *bdev) +static void kill_bdev(struct block_device *bdev) { struct address_space *mapping = bdev->bd_inode->i_mapping; @@ -84,8 +84,7 @@ void kill_bdev(struct block_device *bdev) invalidate_bh_lrus(); truncate_inode_pages(mapping, 0); -} -EXPORT_SYMBOL(kill_bdev); +} /* Invalidate clean unused buffers and pagecache. */ void invalidate_bdev(struct block_device *bdev) diff --git a/include/linux/fs.h b/include/linux/fs.h index 6c4ab4dc1cd7..3f881a892ea7 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2592,7 +2592,6 @@ extern void bdput(struct block_device *); extern void invalidate_bdev(struct block_device *); extern void iterate_bdevs(void (*)(struct block_device *, void *), void *); extern int sync_blockdev(struct block_device *bdev); -extern void kill_bdev(struct block_device *); extern struct super_block *freeze_bdev(struct block_device *); extern void emergency_thaw_all(void); extern void emergency_thaw_bdev(struct super_block *sb); @@ -2608,7 +2607,6 @@ static inline bool sb_is_blkdev_sb(struct super_block *sb) #else static inline void bd_forget(struct inode *inode) {} static inline int sync_blockdev(struct block_device *bdev) { return 0; } -static inline void kill_bdev(struct block_device *bdev) {} static inline void invalidate_bdev(struct block_device *bdev) {} static inline struct super_block *freeze_bdev(struct block_device *sb) From f141a422159a199f4c8dedb7e0df55b3b2cf16cd Mon Sep 17 00:00:00 2001 From: Qiushi Wu Date: Sat, 13 Jun 2020 15:51:58 -0500 Subject: [PATCH 263/462] ASoC: rockchip: Fix a reference count leak. Calling pm_runtime_get_sync increments the counter even in case of failure, causing incorrect ref count if pm_runtime_put is not called in error handling paths. Call pm_runtime_put if pm_runtime_get_sync fails. Fixes: fc05a5b22253 ("ASoC: rockchip: add support for pdm controller") Signed-off-by: Qiushi Wu Reviewed-by: Heiko Stuebner Link: https://lore.kernel.org/r/20200613205158.27296-1-wu000273@umn.edu Signed-off-by: Mark Brown --- sound/soc/rockchip/rockchip_pdm.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/sound/soc/rockchip/rockchip_pdm.c b/sound/soc/rockchip/rockchip_pdm.c index 7cd42fcfcf38..1707414cfa92 100644 --- a/sound/soc/rockchip/rockchip_pdm.c +++ b/sound/soc/rockchip/rockchip_pdm.c @@ -590,8 +590,10 @@ static int rockchip_pdm_resume(struct device *dev) int ret; ret = pm_runtime_get_sync(dev); - if (ret < 0) + if (ret < 0) { + pm_runtime_put(dev); return ret; + } ret = regcache_sync(pdm->regmap); From abd42781c3d2155868821f1b947ae45bbc33330d Mon Sep 17 00:00:00 2001 From: Zhenzhong Duan Date: Thu, 18 Jun 2020 11:21:24 +0800 Subject: [PATCH 264/462] spi: spidev: fix a race between spidev_release and spidev_remove Imagine below scene, spidev is referenced after it's freed. spidev_release() spidev_remove() ... spin_lock_irq(&spidev->spi_lock); spidev->spi = NULL; spin_unlock_irq(&spidev->spi_lock); mutex_lock(&device_list_lock); dofree = (spidev->spi == NULL); if (dofree) kfree(spidev); mutex_unlock(&device_list_lock); mutex_lock(&device_list_lock); list_del(&spidev->device_entry); device_destroy(spidev_class, spidev->devt); clear_bit(MINOR(spidev->devt), minors); if (spidev->users == 0) kfree(spidev); mutex_unlock(&device_list_lock); Fix it by resetting spidev->spi in device_list_lock's protection. Signed-off-by: Zhenzhong Duan Link: https://lore.kernel.org/r/20200618032125.4650-1-zhenzhong.duan@gmail.com Signed-off-by: Mark Brown --- drivers/spi/spidev.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/spi/spidev.c b/drivers/spi/spidev.c index d753df700e9e..f74ea26c382f 100644 --- a/drivers/spi/spidev.c +++ b/drivers/spi/spidev.c @@ -787,13 +787,13 @@ static int spidev_remove(struct spi_device *spi) { struct spidev_data *spidev = spi_get_drvdata(spi); + /* prevent new opens */ + mutex_lock(&device_list_lock); /* make sure ops on existing fds can abort cleanly */ spin_lock_irq(&spidev->spi_lock); spidev->spi = NULL; spin_unlock_irq(&spidev->spi_lock); - /* prevent new opens */ - mutex_lock(&device_list_lock); list_del(&spidev->device_entry); device_destroy(spidev_class, spidev->devt); clear_bit(MINOR(spidev->devt), minors); From 06096cc6c5a84ced929634b0d79376b94c65a4bd Mon Sep 17 00:00:00 2001 From: Zhenzhong Duan Date: Thu, 18 Jun 2020 11:21:25 +0800 Subject: [PATCH 265/462] spi: spidev: fix a potential use-after-free in spidev_release() If an spi device is unbounded from the driver before the release process, there will be an NULL pointer reference when it's referenced in spi_slave_abort(). Fix it by checking it's already freed before reference. Signed-off-by: Zhenzhong Duan Link: https://lore.kernel.org/r/20200618032125.4650-2-zhenzhong.duan@gmail.com Signed-off-by: Mark Brown --- drivers/spi/spidev.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/spi/spidev.c b/drivers/spi/spidev.c index f74ea26c382f..59e07675ef86 100644 --- a/drivers/spi/spidev.c +++ b/drivers/spi/spidev.c @@ -609,15 +609,20 @@ err_find_dev: static int spidev_release(struct inode *inode, struct file *filp) { struct spidev_data *spidev; + int dofree; mutex_lock(&device_list_lock); spidev = filp->private_data; filp->private_data = NULL; + spin_lock_irq(&spidev->spi_lock); + /* ... after we unbound from the underlying device? */ + dofree = (spidev->spi == NULL); + spin_unlock_irq(&spidev->spi_lock); + /* last close? */ spidev->users--; if (!spidev->users) { - int dofree; kfree(spidev->tx_buffer); spidev->tx_buffer = NULL; @@ -625,19 +630,14 @@ static int spidev_release(struct inode *inode, struct file *filp) kfree(spidev->rx_buffer); spidev->rx_buffer = NULL; - spin_lock_irq(&spidev->spi_lock); - if (spidev->spi) - spidev->speed_hz = spidev->spi->max_speed_hz; - - /* ... after we unbound from the underlying device? */ - dofree = (spidev->spi == NULL); - spin_unlock_irq(&spidev->spi_lock); - if (dofree) kfree(spidev); + else + spidev->speed_hz = spidev->spi->max_speed_hz; } #ifdef CONFIG_SPI_SLAVE - spi_slave_abort(spidev->spi); + if (!dofree) + spi_slave_abort(spidev->spi); #endif mutex_unlock(&device_list_lock); From 670d0a4b10704667765f7d18f7592993d02783aa Mon Sep 17 00:00:00 2001 From: Luc Van Oostenryck Date: Thu, 18 Jun 2020 00:02:26 +0200 Subject: [PATCH 266/462] sparse: use identifiers to define address spaces Currently, address spaces in warnings are displayed as '' with 'X' being the address space's arbitrary number. But since sparse v0.6.0-rc1 (late December 2018), sparse allows you to define the address spaces using an identifier instead of a number. This identifier is then directly used in the warnings. So, use the identifiers '__user', '__iomem', '__percpu' & '__rcu' for the corresponding address spaces. The default address space, __kernel, being not displayed in warnings, stays defined as '0'. With this change, warnings that used to be displayed as: cast removes address space '' of expression ... void [noderef] * will now be displayed as: cast removes address space '__user' of expression ... void [noderef] __iomem * This also moves the __kernel annotation to be the first one, since it is quite different from the others because it's the default one, and so: - it's never displayed - it's normally not needed, nor in type annotations, nor in cast between address spaces. The only time it's needed is when it's combined with a typeof to express "the same type as this one but without the address space" - it can't be defined with a name, '0' must be used. So, it seemed strange to me to have it in the middle of the other ones. Signed-off-by: Luc Van Oostenryck Acked-by: Miguel Ojeda Signed-off-by: Linus Torvalds --- include/linux/compiler_types.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h index 21aed0981edf..e368384445b6 100644 --- a/include/linux/compiler_types.h +++ b/include/linux/compiler_types.h @@ -5,20 +5,20 @@ #ifndef __ASSEMBLY__ #ifdef __CHECKER__ -# define __user __attribute__((noderef, address_space(1))) # define __kernel __attribute__((address_space(0))) +# define __user __attribute__((noderef, address_space(__user))) # define __safe __attribute__((safe)) # define __force __attribute__((force)) # define __nocast __attribute__((nocast)) -# define __iomem __attribute__((noderef, address_space(2))) +# define __iomem __attribute__((noderef, address_space(__iomem))) # define __must_hold(x) __attribute__((context(x,1,1))) # define __acquires(x) __attribute__((context(x,0,1))) # define __releases(x) __attribute__((context(x,1,0))) # define __acquire(x) __context__(x,1) # define __release(x) __context__(x,-1) # define __cond_lock(x,c) ((c) ? ({ __acquire(x); 1; }) : 0) -# define __percpu __attribute__((noderef, address_space(3))) -# define __rcu __attribute__((noderef, address_space(4))) +# define __percpu __attribute__((noderef, address_space(__percpu))) +# define __rcu __attribute__((noderef, address_space(__rcu))) # define __private __attribute__((noderef)) extern void __chk_user_ptr(const volatile void __user *); extern void __chk_io_ptr(const volatile void __iomem *); From 6c41965d647a97b51ff665c7406ec9435aab7fc1 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Wed, 17 Jun 2020 16:01:48 +0300 Subject: [PATCH 267/462] RDMA/mlx5: Don't access ib_qp fields in internal destroy QP path destroy_qp_common is called for flows where QP is already created by HW. While it is called from IB/core, the ibqp.* fields will be fully initialized, but it is not the case if this function is called during QP creation. Don't rely on ibqp fields as much as possible and initialize send_cq/recv_cq as temporal solution till all drivers will be converted to IB/core QP allocation scheme. refcount_t: underflow; use-after-free. WARNING: CPU: 1 PID: 5372 at lib/refcount.c:28 refcount_warn_saturate+0xfe/0x1a0 Kernel panic - not syncing: panic_on_warn set ... CPU: 1 PID: 5372 Comm: syz-executor.2 Not tainted 5.5.0-rc5 #2 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.12.1-0-ga5cab58e9a3f-prebuilt.qemu.org 04/01/2014 Call Trace: mlx5_core_put_rsc+0x70/0x80 destroy_resource_common+0x8e/0xb0 mlx5_core_destroy_qp+0xaf/0x1d0 mlx5_ib_destroy_qp+0xeb0/0x1460 ib_destroy_qp_user+0x2d5/0x7d0 create_qp+0xed3/0x2130 ib_uverbs_create_qp+0x13e/0x190 ? ib_uverbs_ex_create_qp ib_uverbs_write+0xaa5/0xdf0 __vfs_write+0x7c/0x100 ksys_write+0xc8/0x200 do_syscall_64+0x9c/0x390 entry_SYSCALL_64_after_hwframe+0x44/0xa9 Fixes: 08d53976609a ("RDMA/mlx5: Copy response to the user in one place") Link: https://lore.kernel.org/r/20200617130148.2846643-1-leon@kernel.org Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/qp.c | 28 ++++++++++++++++++---------- 1 file changed, 18 insertions(+), 10 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index dbe82cdb8d2c..1e567fe3a527 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -2341,18 +2341,18 @@ static void destroy_qp_common(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, unsigned long flags; int err; - if (qp->ibqp.rwq_ind_tbl) { + if (qp->is_rss) { destroy_rss_raw_qp_tir(dev, qp); return; } - base = (qp->ibqp.qp_type == IB_QPT_RAW_PACKET || + base = (qp->type == IB_QPT_RAW_PACKET || qp->flags & IB_QP_CREATE_SOURCE_QPN) ? - &qp->raw_packet_qp.rq.base : - &qp->trans_qp.base; + &qp->raw_packet_qp.rq.base : + &qp->trans_qp.base; if (qp->state != IB_QPS_RESET) { - if (qp->ibqp.qp_type != IB_QPT_RAW_PACKET && + if (qp->type != IB_QPT_RAW_PACKET && !(qp->flags & IB_QP_CREATE_SOURCE_QPN)) { err = mlx5_core_qp_modify(dev, MLX5_CMD_OP_2RST_QP, 0, NULL, &base->mqp, NULL); @@ -2368,8 +2368,8 @@ static void destroy_qp_common(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, base->mqp.qpn); } - get_cqs(qp->ibqp.qp_type, qp->ibqp.send_cq, qp->ibqp.recv_cq, - &send_cq, &recv_cq); + get_cqs(qp->type, qp->ibqp.send_cq, qp->ibqp.recv_cq, &send_cq, + &recv_cq); spin_lock_irqsave(&dev->reset_flow_resource_lock, flags); mlx5_ib_lock_cqs(send_cq, recv_cq); @@ -2391,7 +2391,7 @@ static void destroy_qp_common(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, mlx5_ib_unlock_cqs(send_cq, recv_cq); spin_unlock_irqrestore(&dev->reset_flow_resource_lock, flags); - if (qp->ibqp.qp_type == IB_QPT_RAW_PACKET || + if (qp->type == IB_QPT_RAW_PACKET || qp->flags & IB_QP_CREATE_SOURCE_QPN) { destroy_raw_packet_qp(dev, qp); } else { @@ -3002,10 +3002,18 @@ struct ib_qp *mlx5_ib_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attr, return &qp->ibqp; destroy_qp: - if (qp->type == MLX5_IB_QPT_DCT) + if (qp->type == MLX5_IB_QPT_DCT) { mlx5_ib_destroy_dct(qp); - else + } else { + /* + * The two lines below are temp solution till QP allocation + * will be moved to be under IB/core responsiblity. + */ + qp->ibqp.send_cq = attr->send_cq; + qp->ibqp.recv_cq = attr->recv_cq; destroy_qp_common(dev, qp, udata); + } + qp = NULL; free_qp: kfree(qp); From d44335572f76020ef473d7977d9fd424e6fc0021 Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Thu, 18 Jun 2020 14:25:07 +0300 Subject: [PATCH 268/462] RDMA/mlx5: Fix remote gid value in query QP Remote gid is not copied to the right address. Fix it by using rdma_ah_set_dgid_raw to copy the remote gid value from the QP context on query QP. Fixes: 70bd7fb87625 ("RDMA/mlx5: Remove manually crafted QP context the query call") Link: https://lore.kernel.org/r/20200618112507.3453496-3-leon@kernel.org Signed-off-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/qp.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 1e567fe3a527..264e6d228803 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -4390,8 +4390,7 @@ static void to_rdma_ah_attr(struct mlx5_ib_dev *ibdev, MLX5_GET(ads, path, src_addr_index), MLX5_GET(ads, path, hop_limit), MLX5_GET(ads, path, tclass)); - memcpy(ah_attr, MLX5_ADDR_OF(ads, path, rgid_rip), - MLX5_FLD_SZ_BYTES(ads, rgid_rip)); + rdma_ah_set_dgid_raw(ah_attr, MLX5_ADDR_OF(ads, path, rgid_rip)); } } From 2c0f5292d5358c2c5576146071d641110c3c1612 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Thu, 18 Jun 2020 14:25:06 +0300 Subject: [PATCH 269/462] RDMA/mlx5: Remove ECE limitation from the RAW_PACKET QPs Like any other QP type, rely on FW for the RAW_PACKET QPs to decide if ECE is supported or not. This fixes an inability to create RAW_PACKET QPs with latest rdma-core with the ECE support. Fixes: e383085c2425 ("RDMA/mlx5: Set ECE options during QP create") Link: https://lore.kernel.org/r/20200618112507.3453496-2-leon@kernel.org Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/qp.c | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 264e6d228803..eb5b724b121b 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -2873,7 +2873,6 @@ static int mlx5_ib_destroy_dct(struct mlx5_ib_qp *mqp) static int check_ucmd_data(struct mlx5_ib_dev *dev, struct mlx5_create_qp_params *params) { - struct ib_qp_init_attr *attr = params->attr; struct ib_udata *udata = params->udata; size_t size, last; int ret; @@ -2885,14 +2884,7 @@ static int check_ucmd_data(struct mlx5_ib_dev *dev, */ last = sizeof(struct mlx5_ib_create_qp_rss); else - /* IB_QPT_RAW_PACKET doesn't have ECE data */ - switch (attr->qp_type) { - case IB_QPT_RAW_PACKET: - last = offsetof(struct mlx5_ib_create_qp, ece_options); - break; - default: - last = offsetof(struct mlx5_ib_create_qp, reserved); - } + last = offsetof(struct mlx5_ib_create_qp, reserved); if (udata->inlen <= last) return 0; From 25f12ae45fc1931a1dce3cc59f9989a9d87834b0 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 17 Jun 2020 09:37:55 +0200 Subject: [PATCH 270/462] maccess: rename probe_kernel_address to get_kernel_nofault Better describe what this helper does, and match the naming of copy_from_kernel_nofault. Also switch the argument order around, so that it acts and looks like get_user(). Signed-off-by: Christoph Hellwig Signed-off-by: Linus Torvalds --- arch/arm/kernel/traps.c | 2 +- arch/arm/mm/alignment.c | 4 ++-- arch/arm64/kernel/traps.c | 2 +- arch/ia64/include/asm/sections.h | 2 +- arch/parisc/kernel/process.c | 2 +- arch/powerpc/include/asm/sections.h | 2 +- arch/powerpc/kernel/kgdb.c | 2 +- arch/powerpc/kernel/kprobes.c | 2 +- arch/powerpc/kernel/process.c | 2 +- arch/powerpc/sysdev/fsl_pci.c | 2 +- arch/riscv/kernel/kgdb.c | 4 ++-- arch/riscv/kernel/traps.c | 4 ++-- arch/s390/mm/fault.c | 2 +- arch/sh/kernel/traps.c | 2 +- arch/x86/kernel/probe_roms.c | 20 ++++++++++---------- arch/x86/kernel/traps.c | 2 +- arch/x86/mm/fault.c | 6 +++--- arch/x86/pci/pcbios.c | 2 +- include/linux/uaccess.h | 10 +++++----- lib/test_lockup.c | 6 +++--- 20 files changed, 40 insertions(+), 40 deletions(-) diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c index 65a3b1e75480..49ce15c3612d 100644 --- a/arch/arm/kernel/traps.c +++ b/arch/arm/kernel/traps.c @@ -396,7 +396,7 @@ int is_valid_bugaddr(unsigned long pc) u32 insn = __opcode_to_mem_arm(BUG_INSTR_VALUE); #endif - if (probe_kernel_address((unsigned *)pc, bkpt)) + if (get_kernel_nofault(bkpt, (unsigned *)pc)) return 0; return bkpt == insn; diff --git a/arch/arm/mm/alignment.c b/arch/arm/mm/alignment.c index 84718eddae60..81a627e6e1c5 100644 --- a/arch/arm/mm/alignment.c +++ b/arch/arm/mm/alignment.c @@ -774,7 +774,7 @@ static int alignment_get_arm(struct pt_regs *regs, u32 *ip, u32 *inst) if (user_mode(regs)) fault = get_user(instr, ip); else - fault = probe_kernel_address(ip, instr); + fault = get_kernel_nofault(instr, ip); *inst = __mem_to_opcode_arm(instr); @@ -789,7 +789,7 @@ static int alignment_get_thumb(struct pt_regs *regs, u16 *ip, u16 *inst) if (user_mode(regs)) fault = get_user(instr, ip); else - fault = probe_kernel_address(ip, instr); + fault = get_kernel_nofault(instr, ip); *inst = __mem_to_opcode_thumb16(instr); diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index 50cc30acf106..227b2d9bae3d 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -376,7 +376,7 @@ static int call_undef_hook(struct pt_regs *regs) if (!user_mode(regs)) { __le32 instr_le; - if (probe_kernel_address((__force __le32 *)pc, instr_le)) + if (get_kernel_nofault(instr_le, (__force __le32 *)pc)) goto exit; instr = le32_to_cpu(instr_le); } else if (compat_thumb_mode(regs)) { diff --git a/arch/ia64/include/asm/sections.h b/arch/ia64/include/asm/sections.h index cea15f2dd38d..ad4fc06e5f4b 100644 --- a/arch/ia64/include/asm/sections.h +++ b/arch/ia64/include/asm/sections.h @@ -35,7 +35,7 @@ static inline void *dereference_function_descriptor(void *ptr) struct fdesc *desc = ptr; void *p; - if (!probe_kernel_address(&desc->ip, p)) + if (!get_kernel_nofault(p, &desc->ip)) ptr = p; return ptr; } diff --git a/arch/parisc/kernel/process.c b/arch/parisc/kernel/process.c index 230a6422b99f..6c435dbccca0 100644 --- a/arch/parisc/kernel/process.c +++ b/arch/parisc/kernel/process.c @@ -293,7 +293,7 @@ void *dereference_function_descriptor(void *ptr) Elf64_Fdesc *desc = ptr; void *p; - if (!probe_kernel_address(&desc->addr, p)) + if (!get_kernel_nofault(p, &desc->addr)) ptr = p; return ptr; } diff --git a/arch/powerpc/include/asm/sections.h b/arch/powerpc/include/asm/sections.h index d19871763ed4..bd311616fca8 100644 --- a/arch/powerpc/include/asm/sections.h +++ b/arch/powerpc/include/asm/sections.h @@ -85,7 +85,7 @@ static inline void *dereference_function_descriptor(void *ptr) struct ppc64_opd_entry *desc = ptr; void *p; - if (!probe_kernel_address(&desc->funcaddr, p)) + if (!get_kernel_nofault(p, &desc->funcaddr)) ptr = p; return ptr; } diff --git a/arch/powerpc/kernel/kgdb.c b/arch/powerpc/kernel/kgdb.c index 652b2852bea3..e14a1862a3ca 100644 --- a/arch/powerpc/kernel/kgdb.c +++ b/arch/powerpc/kernel/kgdb.c @@ -421,7 +421,7 @@ int kgdb_arch_set_breakpoint(struct kgdb_bkpt *bpt) unsigned int instr; struct ppc_inst *addr = (struct ppc_inst *)bpt->bpt_addr; - err = probe_kernel_address(addr, instr); + err = get_kernel_nofault(instr, addr); if (err) return err; diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c index 6f96f65ebfe8..9cc792a3a6a9 100644 --- a/arch/powerpc/kernel/kprobes.c +++ b/arch/powerpc/kernel/kprobes.c @@ -289,7 +289,7 @@ int kprobe_handler(struct pt_regs *regs) if (!p) { unsigned int instr; - if (probe_kernel_address(addr, instr)) + if (get_kernel_nofault(instr, addr)) goto no_kprobe; if (instr != BREAKPOINT_INSTRUCTION) { diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index d4d0d1048500..30955a0c32d0 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -1271,7 +1271,7 @@ static void show_instructions(struct pt_regs *regs) #endif if (!__kernel_text_address(pc) || - probe_kernel_address((const void *)pc, instr)) { + get_kernel_nofault(instr, (const void *)pc)) { pr_cont("XXXXXXXX "); } else { if (regs->nip == pc) diff --git a/arch/powerpc/sysdev/fsl_pci.c b/arch/powerpc/sysdev/fsl_pci.c index 73fa37ca40ef..040b9d01c079 100644 --- a/arch/powerpc/sysdev/fsl_pci.c +++ b/arch/powerpc/sysdev/fsl_pci.c @@ -1069,7 +1069,7 @@ int fsl_pci_mcheck_exception(struct pt_regs *regs) ret = copy_from_user_nofault(&inst, (void __user *)regs->nip, sizeof(inst)); else - ret = probe_kernel_address((void *)regs->nip, inst); + ret = get_kernel_nofault(inst, (void *)regs->nip); if (!ret && mcheck_handle_load(regs, inst)) { regs->nip += 4; diff --git a/arch/riscv/kernel/kgdb.c b/arch/riscv/kernel/kgdb.c index a21fb21883e7..c3275f42d1ac 100644 --- a/arch/riscv/kernel/kgdb.c +++ b/arch/riscv/kernel/kgdb.c @@ -62,7 +62,7 @@ int get_step_address(struct pt_regs *regs, unsigned long *next_addr) unsigned int rs1_num, rs2_num; int op_code; - if (probe_kernel_address((void *)pc, op_code)) + if (get_kernel_nofault(op_code, (void *)pc)) return -EINVAL; if ((op_code & __INSN_LENGTH_MASK) != __INSN_LENGTH_GE_32) { if (is_c_jalr_insn(op_code) || is_c_jr_insn(op_code)) { @@ -146,7 +146,7 @@ int do_single_step(struct pt_regs *regs) return error; /* Store the op code in the stepped address */ - error = probe_kernel_address((void *)addr, stepped_opcode); + error = get_kernel_nofault(stepped_opcode, (void *)addr); if (error) return error; diff --git a/arch/riscv/kernel/traps.c b/arch/riscv/kernel/traps.c index ecec1778e3a4..7d95cce5e47c 100644 --- a/arch/riscv/kernel/traps.c +++ b/arch/riscv/kernel/traps.c @@ -137,7 +137,7 @@ static inline unsigned long get_break_insn_length(unsigned long pc) { bug_insn_t insn; - if (probe_kernel_address((bug_insn_t *)pc, insn)) + if (get_kernel_nofault(insn, (bug_insn_t *)pc)) return 0; return GET_INSN_LENGTH(insn); @@ -165,7 +165,7 @@ int is_valid_bugaddr(unsigned long pc) if (pc < VMALLOC_START) return 0; - if (probe_kernel_address((bug_insn_t *)pc, insn)) + if (get_kernel_nofault(insn, (bug_insn_t *)pc)) return 0; if ((insn & __INSN_LENGTH_MASK) == __INSN_LENGTH_32) return (insn == __BUG_INSN_32); diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index 6a24751557f0..d53c2e2ea1fd 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -105,7 +105,7 @@ static int bad_address(void *p) { unsigned long dummy; - return probe_kernel_address((unsigned long *)p, dummy); + return get_kernel_nofault(dummy, (unsigned long *)p); } static void dump_pagetable(unsigned long asce, unsigned long address) diff --git a/arch/sh/kernel/traps.c b/arch/sh/kernel/traps.c index a33025451fcd..9c3d32b80038 100644 --- a/arch/sh/kernel/traps.c +++ b/arch/sh/kernel/traps.c @@ -118,7 +118,7 @@ int is_valid_bugaddr(unsigned long addr) if (addr < PAGE_OFFSET) return 0; - if (probe_kernel_address((insn_size_t *)addr, opcode)) + if (get_kernel_nofault(opcode, (insn_size_t *)addr)) return 0; if (opcode == TRAPA_BUG_OPCODE) return 1; diff --git a/arch/x86/kernel/probe_roms.c b/arch/x86/kernel/probe_roms.c index ee0286390a4c..65b0dd2bf25c 100644 --- a/arch/x86/kernel/probe_roms.c +++ b/arch/x86/kernel/probe_roms.c @@ -99,7 +99,7 @@ static bool probe_list(struct pci_dev *pdev, unsigned short vendor, unsigned short device; do { - if (probe_kernel_address(rom_list, device) != 0) + if (get_kernel_nofault(device, rom_list) != 0) device = 0; if (device && match_id(pdev, vendor, device)) @@ -125,13 +125,13 @@ static struct resource *find_oprom(struct pci_dev *pdev) break; rom = isa_bus_to_virt(res->start); - if (probe_kernel_address(rom + 0x18, offset) != 0) + if (get_kernel_nofault(offset, rom + 0x18) != 0) continue; - if (probe_kernel_address(rom + offset + 0x4, vendor) != 0) + if (get_kernel_nofault(vendor, rom + offset + 0x4) != 0) continue; - if (probe_kernel_address(rom + offset + 0x6, device) != 0) + if (get_kernel_nofault(device, rom + offset + 0x6) != 0) continue; if (match_id(pdev, vendor, device)) { @@ -139,8 +139,8 @@ static struct resource *find_oprom(struct pci_dev *pdev) break; } - if (probe_kernel_address(rom + offset + 0x8, list) == 0 && - probe_kernel_address(rom + offset + 0xc, rev) == 0 && + if (get_kernel_nofault(list, rom + offset + 0x8) == 0 && + get_kernel_nofault(rev, rom + offset + 0xc) == 0 && rev >= 3 && list && probe_list(pdev, vendor, rom + offset + list)) { oprom = res; @@ -183,14 +183,14 @@ static int __init romsignature(const unsigned char *rom) const unsigned short * const ptr = (const unsigned short *)rom; unsigned short sig; - return probe_kernel_address(ptr, sig) == 0 && sig == ROMSIGNATURE; + return get_kernel_nofault(sig, ptr) == 0 && sig == ROMSIGNATURE; } static int __init romchecksum(const unsigned char *rom, unsigned long length) { unsigned char sum, c; - for (sum = 0; length && probe_kernel_address(rom++, c) == 0; length--) + for (sum = 0; length && get_kernel_nofault(c, rom++) == 0; length--) sum += c; return !length && !sum; } @@ -211,7 +211,7 @@ void __init probe_roms(void) video_rom_resource.start = start; - if (probe_kernel_address(rom + 2, c) != 0) + if (get_kernel_nofault(c, rom + 2) != 0) continue; /* 0 < length <= 0x7f * 512, historically */ @@ -249,7 +249,7 @@ void __init probe_roms(void) if (!romsignature(rom)) continue; - if (probe_kernel_address(rom + 2, c) != 0) + if (get_kernel_nofault(c, rom + 2) != 0) continue; /* 0 < length <= 0x7f * 512, historically */ diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 7003f2e7b163..f9727b96961f 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -91,7 +91,7 @@ int is_valid_bugaddr(unsigned long addr) if (addr < TASK_SIZE_MAX) return 0; - if (probe_kernel_address((unsigned short *)addr, ud)) + if (get_kernel_nofault(ud, (unsigned short *)addr)) return 0; return ud == INSN_UD0 || ud == INSN_UD2; diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index e996aa3833b8..1ead568c0101 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -99,7 +99,7 @@ check_prefetch_opcode(struct pt_regs *regs, unsigned char *instr, return !instr_lo || (instr_lo>>1) == 1; case 0x00: /* Prefetch instruction is 0x0F0D or 0x0F18 */ - if (probe_kernel_address(instr, opcode)) + if (get_kernel_nofault(opcode, instr)) return 0; *prefetch = (instr_lo == 0xF) && @@ -133,7 +133,7 @@ is_prefetch(struct pt_regs *regs, unsigned long error_code, unsigned long addr) while (instr < max_instr) { unsigned char opcode; - if (probe_kernel_address(instr, opcode)) + if (get_kernel_nofault(opcode, instr)) break; instr++; @@ -301,7 +301,7 @@ static int bad_address(void *p) { unsigned long dummy; - return probe_kernel_address((unsigned long *)p, dummy); + return get_kernel_nofault(dummy, (unsigned long *)p); } static void dump_pagetable(unsigned long address) diff --git a/arch/x86/pci/pcbios.c b/arch/x86/pci/pcbios.c index 9c97d814125e..4f15280732ed 100644 --- a/arch/x86/pci/pcbios.c +++ b/arch/x86/pci/pcbios.c @@ -302,7 +302,7 @@ static const struct pci_raw_ops *__init pci_find_bios(void) check <= (union bios32 *) __va(0xffff0); ++check) { long sig; - if (probe_kernel_address(&check->fields.signature, sig)) + if (get_kernel_nofault(sig, &check->fields.signature)) continue; if (check->fields.signature != BIOS32_SIGNATURE) diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h index bef48da242cc..a508a3c08879 100644 --- a/include/linux/uaccess.h +++ b/include/linux/uaccess.h @@ -318,14 +318,14 @@ long strncpy_from_user_nofault(char *dst, const void __user *unsafe_addr, long strnlen_user_nofault(const void __user *unsafe_addr, long count); /** - * probe_kernel_address(): safely attempt to read from a location - * @addr: address to read from - * @retval: read into this variable + * get_kernel_nofault(): safely attempt to read from a location + * @val: read into this variable + * @ptr: address to read from * * Returns 0 on success, or -EFAULT. */ -#define probe_kernel_address(addr, retval) \ - copy_from_kernel_nofault(&retval, addr, sizeof(retval)) +#define get_kernel_nofault(val, ptr) \ + copy_from_kernel_nofault(&(val), (ptr), sizeof(val)) #ifndef user_access_begin #define user_access_begin(ptr,len) access_ok(ptr, len) diff --git a/lib/test_lockup.c b/lib/test_lockup.c index f258743a0d83..bd7c7ff39f6b 100644 --- a/lib/test_lockup.c +++ b/lib/test_lockup.c @@ -419,8 +419,8 @@ static bool test_kernel_ptr(unsigned long addr, int size) /* should be at least readable kernel address */ if (access_ok(ptr, 1) || access_ok(ptr + size - 1, 1) || - probe_kernel_address(ptr, buf) || - probe_kernel_address(ptr + size - 1, buf)) { + get_kernel_nofault(buf, ptr) || + get_kernel_nofault(buf, ptr + size - 1)) { pr_err("invalid kernel ptr: %#lx\n", addr); return true; } @@ -437,7 +437,7 @@ static bool __maybe_unused test_magic(unsigned long addr, int offset, if (!addr) return false; - if (probe_kernel_address(ptr, magic) || magic != expected) { + if (get_kernel_nofault(magic, ptr) || magic != expected) { pr_err("invalid magic at %#lx + %#x = %#x, expected %#x\n", addr, offset, magic, expected); return true; From 9e0dc7b9e1cbc9cd0dc4ab2377c0536d4b18bedc Mon Sep 17 00:00:00 2001 From: Max Gurtovoy Date: Wed, 17 Jun 2020 16:02:30 +0300 Subject: [PATCH 271/462] RDMA/mlx5: Fix integrity enabled QP creation create_flags checks was refactored and broke the creation on integrity enabled QPs and actually broke the NVMe/RDMA and iSER ULP's when using mlx5 driven devices. Fixes: 2978975ce7f1 ("RDMA/mlx5: Process create QP flags in one place") Link: https://lore.kernel.org/r/20200617130230.2846915-1-leon@kernel.org Signed-off-by: Max Gurtovoy Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/qp.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index eb5b724b121b..a7fcb00e37a5 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -2668,6 +2668,9 @@ static int process_create_flags(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, if (qp_type == IB_QPT_RAW_PACKET && attr->rwq_ind_tbl) return (create_flags) ? -EINVAL : 0; + process_create_flag(dev, &create_flags, + IB_QP_CREATE_INTEGRITY_EN, + MLX5_CAP_GEN(mdev, sho), qp); process_create_flag(dev, &create_flags, IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK, MLX5_CAP_GEN(mdev, block_lb_mc), qp); From 0c389d89abc28edf70ae847ee2fa55acb267b826 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 18 Jun 2020 12:10:37 -0700 Subject: [PATCH 272/462] maccess: make get_kernel_nofault() check for minimal type compatibility Now that we've renamed probe_kernel_address() to get_kernel_nofault() and made it look and behave more in line with get_user(), some of the subtle type behavior differences end up being more obvious and possibly dangerous. When you do get_user(val, user_ptr); the type of the access comes from the "user_ptr" part, and the above basically acts as val = *user_ptr; by design (except, of course, for the fact that the actual dereference is done with a user access). Note how in the above case, the type of the end result comes from the pointer argument, and then the value is cast to the type of 'val' as part of the assignment. So the type of the pointer is ultimately the more important type both for the access itself. But 'get_kernel_nofault()' may now _look_ similar, but it behaves very differently. When you do get_kernel_nofault(val, kernel_ptr); it behaves like val = *(typeof(val) *)kernel_ptr; except, of course, for the fact that the actual dereference is done with exception handling so that a faulting access is suppressed and returned as the error code. But note how different the casting behavior of the two superficially similar accesses are: one does the actual access in the size of the type the pointer points to, while the other does the access in the size of the target, and ignores the pointer type entirely. Actually changing get_kernel_nofault() to act like get_user() is almost certainly the right thing to do eventually, but in the meantime this patch adds logit to at least verify that the pointer type is compatible with the type of the result. In many cases, this involves just casting the pointer to 'void *' to make it obvious that the type of the pointer is not the important part. It's not how 'get_user()' acts, but at least the behavioral difference is now obvious and explicit. Cc: Christoph Hellwig Signed-off-by: Linus Torvalds --- arch/arm/kernel/traps.c | 2 +- arch/ia64/include/asm/sections.h | 2 +- arch/parisc/kernel/process.c | 2 +- arch/powerpc/include/asm/sections.h | 2 +- arch/powerpc/kernel/kgdb.c | 2 +- arch/x86/kernel/probe_roms.c | 4 ++-- include/linux/uaccess.h | 6 ++++-- 7 files changed, 11 insertions(+), 9 deletions(-) diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c index 49ce15c3612d..17d5a785df28 100644 --- a/arch/arm/kernel/traps.c +++ b/arch/arm/kernel/traps.c @@ -396,7 +396,7 @@ int is_valid_bugaddr(unsigned long pc) u32 insn = __opcode_to_mem_arm(BUG_INSTR_VALUE); #endif - if (get_kernel_nofault(bkpt, (unsigned *)pc)) + if (get_kernel_nofault(bkpt, (void *)pc)) return 0; return bkpt == insn; diff --git a/arch/ia64/include/asm/sections.h b/arch/ia64/include/asm/sections.h index ad4fc06e5f4b..3a033d2008b3 100644 --- a/arch/ia64/include/asm/sections.h +++ b/arch/ia64/include/asm/sections.h @@ -35,7 +35,7 @@ static inline void *dereference_function_descriptor(void *ptr) struct fdesc *desc = ptr; void *p; - if (!get_kernel_nofault(p, &desc->ip)) + if (!get_kernel_nofault(p, (void *)&desc->ip)) ptr = p; return ptr; } diff --git a/arch/parisc/kernel/process.c b/arch/parisc/kernel/process.c index 6c435dbccca0..b7abb12edd3a 100644 --- a/arch/parisc/kernel/process.c +++ b/arch/parisc/kernel/process.c @@ -293,7 +293,7 @@ void *dereference_function_descriptor(void *ptr) Elf64_Fdesc *desc = ptr; void *p; - if (!get_kernel_nofault(p, &desc->addr)) + if (!get_kernel_nofault(p, (void *)&desc->addr)) ptr = p; return ptr; } diff --git a/arch/powerpc/include/asm/sections.h b/arch/powerpc/include/asm/sections.h index bd311616fca8..324d7b298ec3 100644 --- a/arch/powerpc/include/asm/sections.h +++ b/arch/powerpc/include/asm/sections.h @@ -85,7 +85,7 @@ static inline void *dereference_function_descriptor(void *ptr) struct ppc64_opd_entry *desc = ptr; void *p; - if (!get_kernel_nofault(p, &desc->funcaddr)) + if (!get_kernel_nofault(p, (void *)&desc->funcaddr)) ptr = p; return ptr; } diff --git a/arch/powerpc/kernel/kgdb.c b/arch/powerpc/kernel/kgdb.c index e14a1862a3ca..409080208a6c 100644 --- a/arch/powerpc/kernel/kgdb.c +++ b/arch/powerpc/kernel/kgdb.c @@ -421,7 +421,7 @@ int kgdb_arch_set_breakpoint(struct kgdb_bkpt *bpt) unsigned int instr; struct ppc_inst *addr = (struct ppc_inst *)bpt->bpt_addr; - err = get_kernel_nofault(instr, addr); + err = get_kernel_nofault(instr, (unsigned *) addr); if (err) return err; diff --git a/arch/x86/kernel/probe_roms.c b/arch/x86/kernel/probe_roms.c index 65b0dd2bf25c..9e1def3744f2 100644 --- a/arch/x86/kernel/probe_roms.c +++ b/arch/x86/kernel/probe_roms.c @@ -94,7 +94,7 @@ static bool match_id(struct pci_dev *pdev, unsigned short vendor, unsigned short } static bool probe_list(struct pci_dev *pdev, unsigned short vendor, - const unsigned char *rom_list) + const void *rom_list) { unsigned short device; @@ -119,7 +119,7 @@ static struct resource *find_oprom(struct pci_dev *pdev) for (i = 0; i < ARRAY_SIZE(adapter_rom_resources); i++) { struct resource *res = &adapter_rom_resources[i]; unsigned short offset, vendor, device, list, rev; - const unsigned char *rom; + const void *rom; if (res->end == 0) break; diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h index a508a3c08879..0a76ddc07d59 100644 --- a/include/linux/uaccess.h +++ b/include/linux/uaccess.h @@ -324,8 +324,10 @@ long strnlen_user_nofault(const void __user *unsafe_addr, long count); * * Returns 0 on success, or -EFAULT. */ -#define get_kernel_nofault(val, ptr) \ - copy_from_kernel_nofault(&(val), (ptr), sizeof(val)) +#define get_kernel_nofault(val, ptr) ({ \ + const typeof(val) *__gk_ptr = (ptr); \ + copy_from_kernel_nofault(&(val), __gk_ptr, sizeof(val));\ +}) #ifndef user_access_begin #define user_access_begin(ptr,len) access_ok(ptr, len) From e0d17c842c0f824fd4df9f4688709fc6907201e1 Mon Sep 17 00:00:00 2001 From: Yash Shah Date: Tue, 16 Jun 2020 19:33:06 +0530 Subject: [PATCH 273/462] RISC-V: Don't allow write+exec only page mapping request in mmap As per the table 4.4 of version "20190608-Priv-MSU-Ratified" of the RISC-V instruction set manual[0], the PTE permission bit combination of "write+exec only" is reserved for future use. Hence, don't allow such mapping request in mmap call. An issue is been reported by David Abdurachmanov, that while running stress-ng with "sysbadaddr" argument, RCU stalls are observed on RISC-V specific kernel. This issue arises when the stress-sysbadaddr request for pages with "write+exec only" permission bits and then passes the address obtain from this mmap call to various system call. For the riscv kernel, the mmap call should fail for this particular combination of permission bits since it's not valid. [0]: http://dabbelt.com/~palmer/keep/riscv-isa-manual/riscv-privileged-20190608-1.pdf Signed-off-by: Yash Shah Reported-by: David Abdurachmanov [Palmer: Refer to the latest ISA specification at the only link I could find, and update the terminology.] Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/sys_riscv.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/riscv/kernel/sys_riscv.c b/arch/riscv/kernel/sys_riscv.c index f3619f59d85c..12f8a7fce78b 100644 --- a/arch/riscv/kernel/sys_riscv.c +++ b/arch/riscv/kernel/sys_riscv.c @@ -8,6 +8,7 @@ #include #include #include +#include static long riscv_sys_mmap(unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags, @@ -16,6 +17,11 @@ static long riscv_sys_mmap(unsigned long addr, unsigned long len, { if (unlikely(offset & (~PAGE_MASK >> page_shift_offset))) return -EINVAL; + + if ((prot & PROT_WRITE) && (prot & PROT_EXEC)) + if (unlikely(!(prot & PROT_READ))) + return -EINVAL; + return ksys_mmap_pgoff(addr, len, prot, flags, fd, offset >> (PAGE_SHIFT - page_shift_offset)); } From 0e2c09011d4de4161f615ff860a605a9186cf62a Mon Sep 17 00:00:00 2001 From: Atish Patra Date: Wed, 17 Jun 2020 13:37:32 -0700 Subject: [PATCH 274/462] RISC-V: Acquire mmap lock before invoking walk_page_range As per walk_page_range documentation, mmap lock should be acquired by the caller before invoking walk_page_range. mmap_assert_locked gets triggered without that. The details can be found here. http://lists.infradead.org/pipermail/linux-riscv/2020-June/010335.html Fixes: 395a21ff859c(riscv: add ARCH_HAS_SET_DIRECT_MAP support) Signed-off-by: Atish Patra Reviewed-by: Michel Lespinasse Reviewed-by: Zong Li Signed-off-by: Palmer Dabbelt --- arch/riscv/mm/pageattr.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/arch/riscv/mm/pageattr.c b/arch/riscv/mm/pageattr.c index ec2c70f84994..289a9a5ea5b5 100644 --- a/arch/riscv/mm/pageattr.c +++ b/arch/riscv/mm/pageattr.c @@ -151,6 +151,7 @@ int set_memory_nx(unsigned long addr, int numpages) int set_direct_map_invalid_noflush(struct page *page) { + int ret; unsigned long start = (unsigned long)page_address(page); unsigned long end = start + PAGE_SIZE; struct pageattr_masks masks = { @@ -158,11 +159,16 @@ int set_direct_map_invalid_noflush(struct page *page) .clear_mask = __pgprot(_PAGE_PRESENT) }; - return walk_page_range(&init_mm, start, end, &pageattr_ops, &masks); + mmap_read_lock(&init_mm); + ret = walk_page_range(&init_mm, start, end, &pageattr_ops, &masks); + mmap_read_unlock(&init_mm); + + return ret; } int set_direct_map_default_noflush(struct page *page) { + int ret; unsigned long start = (unsigned long)page_address(page); unsigned long end = start + PAGE_SIZE; struct pageattr_masks masks = { @@ -170,7 +176,11 @@ int set_direct_map_default_noflush(struct page *page) .clear_mask = __pgprot(0) }; - return walk_page_range(&init_mm, start, end, &pageattr_ops, &masks); + mmap_read_lock(&init_mm); + ret = walk_page_range(&init_mm, start, end, &pageattr_ops, &masks); + mmap_read_unlock(&init_mm); + + return ret; } void __kernel_map_pages(struct page *page, int numpages, int enable) From b4748553f53f2971e07d2619f13d461daac0f3bb Mon Sep 17 00:00:00 2001 From: Sascha Hauer Date: Tue, 16 Jun 2020 10:31:39 +0200 Subject: [PATCH 275/462] net: ethernet: mvneta: Fix Serdes configuration for SoCs without comphy The MVNETA_SERDES_CFG register is only available on older SoCs like the Armada XP. On newer SoCs like the Armada 38x the fields are moved to comphy. This patch moves the writes to this register next to the comphy initialization, so that depending on the SoC either comphy or MVNETA_SERDES_CFG is configured. With this we no longer write to the MVNETA_SERDES_CFG on SoCs where it doesn't exist. Suggested-by: Russell King Signed-off-by: Sascha Hauer Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/mvneta.c | 80 +++++++++++++++------------ 1 file changed, 44 insertions(+), 36 deletions(-) diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index 946925bbcb2d..987aaaabe4dc 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c @@ -106,6 +106,7 @@ #define MVNETA_TX_IN_PRGRS BIT(1) #define MVNETA_TX_FIFO_EMPTY BIT(8) #define MVNETA_RX_MIN_FRAME_SIZE 0x247c +/* Only exists on Armada XP and Armada 370 */ #define MVNETA_SERDES_CFG 0x24A0 #define MVNETA_SGMII_SERDES_PROTO 0x0cc7 #define MVNETA_QSGMII_SERDES_PROTO 0x0667 @@ -3529,26 +3530,55 @@ static int mvneta_setup_txqs(struct mvneta_port *pp) return 0; } -static int mvneta_comphy_init(struct mvneta_port *pp) +static int mvneta_comphy_init(struct mvneta_port *pp, phy_interface_t interface) { int ret; - if (!pp->comphy) - return 0; - - ret = phy_set_mode_ext(pp->comphy, PHY_MODE_ETHERNET, - pp->phy_interface); + ret = phy_set_mode_ext(pp->comphy, PHY_MODE_ETHERNET, interface); if (ret) return ret; return phy_power_on(pp->comphy); } +static int mvneta_config_interface(struct mvneta_port *pp, + phy_interface_t interface) +{ + int ret = 0; + + if (pp->comphy) { + if (interface == PHY_INTERFACE_MODE_SGMII || + interface == PHY_INTERFACE_MODE_1000BASEX || + interface == PHY_INTERFACE_MODE_2500BASEX) { + ret = mvneta_comphy_init(pp, interface); + } + } else { + switch (interface) { + case PHY_INTERFACE_MODE_QSGMII: + mvreg_write(pp, MVNETA_SERDES_CFG, + MVNETA_QSGMII_SERDES_PROTO); + break; + + case PHY_INTERFACE_MODE_SGMII: + case PHY_INTERFACE_MODE_1000BASEX: + mvreg_write(pp, MVNETA_SERDES_CFG, + MVNETA_SGMII_SERDES_PROTO); + break; + default: + return -EINVAL; + } + } + + pp->phy_interface = interface; + + return ret; +} + static void mvneta_start_dev(struct mvneta_port *pp) { int cpu; - WARN_ON(mvneta_comphy_init(pp)); + WARN_ON(mvneta_config_interface(pp, pp->phy_interface)); mvneta_max_rx_size_set(pp, pp->pkt_size); mvneta_txq_max_tx_size_set(pp, pp->pkt_size); @@ -3926,14 +3956,10 @@ static void mvneta_mac_config(struct phylink_config *config, unsigned int mode, if (state->speed == SPEED_2500) new_ctrl4 |= MVNETA_GMAC4_SHORT_PREAMBLE_ENABLE; - if (pp->comphy && pp->phy_interface != state->interface && - (state->interface == PHY_INTERFACE_MODE_SGMII || - state->interface == PHY_INTERFACE_MODE_1000BASEX || - state->interface == PHY_INTERFACE_MODE_2500BASEX)) { - pp->phy_interface = state->interface; - - WARN_ON(phy_power_off(pp->comphy)); - WARN_ON(mvneta_comphy_init(pp)); + if (pp->phy_interface != state->interface) { + if (pp->comphy) + WARN_ON(phy_power_off(pp->comphy)); + WARN_ON(mvneta_config_interface(pp, state->interface)); } if (new_ctrl0 != gmac_ctrl0) @@ -4977,20 +5003,10 @@ static void mvneta_conf_mbus_windows(struct mvneta_port *pp, } /* Power up the port */ -static int mvneta_port_power_up(struct mvneta_port *pp, int phy_mode) +static void mvneta_port_power_up(struct mvneta_port *pp, int phy_mode) { /* MAC Cause register should be cleared */ mvreg_write(pp, MVNETA_UNIT_INTR_CAUSE, 0); - - if (phy_mode == PHY_INTERFACE_MODE_QSGMII) - mvreg_write(pp, MVNETA_SERDES_CFG, MVNETA_QSGMII_SERDES_PROTO); - else if (phy_mode == PHY_INTERFACE_MODE_SGMII || - phy_interface_mode_is_8023z(phy_mode)) - mvreg_write(pp, MVNETA_SERDES_CFG, MVNETA_SGMII_SERDES_PROTO); - else if (!phy_interface_mode_is_rgmii(phy_mode)) - return -EINVAL; - - return 0; } /* Device initialization routine */ @@ -5176,11 +5192,7 @@ static int mvneta_probe(struct platform_device *pdev) if (err < 0) goto err_netdev; - err = mvneta_port_power_up(pp, phy_mode); - if (err < 0) { - dev_err(&pdev->dev, "can't power up port\n"); - goto err_netdev; - } + mvneta_port_power_up(pp, phy_mode); /* Armada3700 network controller does not support per-cpu * operation, so only single NAPI should be initialized. @@ -5334,11 +5346,7 @@ static int mvneta_resume(struct device *device) } } mvneta_defaults_set(pp); - err = mvneta_port_power_up(pp, pp->phy_interface); - if (err < 0) { - dev_err(device, "can't power up port\n"); - return err; - } + mvneta_port_power_up(pp, pp->phy_interface); netif_device_attach(dev); From 1a642ca7f38992b086101fe204a1ae3c90ed8016 Mon Sep 17 00:00:00 2001 From: Sascha Hauer Date: Tue, 16 Jun 2020 10:31:40 +0200 Subject: [PATCH 276/462] net: ethernet: mvneta: Add 2500BaseX support for SoCs without comphy The older SoCs like Armada XP support a 2500BaseX mode in the datasheets referred to as DR-SGMII (Double rated SGMII) or HS-SGMII (High Speed SGMII). This is an upclocked 1000BaseX mode, thus PHY_INTERFACE_MODE_2500BASEX is the appropriate mode define for it. adding support for it merely means writing the correct magic value into the MVNETA_SERDES_CFG register. Signed-off-by: Sascha Hauer Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/mvneta.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index 987aaaabe4dc..af6000172848 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c @@ -110,6 +110,7 @@ #define MVNETA_SERDES_CFG 0x24A0 #define MVNETA_SGMII_SERDES_PROTO 0x0cc7 #define MVNETA_QSGMII_SERDES_PROTO 0x0667 +#define MVNETA_HSGMII_SERDES_PROTO 0x1107 #define MVNETA_TYPE_PRIO 0x24bc #define MVNETA_FORCE_UNI BIT(21) #define MVNETA_TXQ_CMD_1 0x24e4 @@ -3564,6 +3565,11 @@ static int mvneta_config_interface(struct mvneta_port *pp, mvreg_write(pp, MVNETA_SERDES_CFG, MVNETA_SGMII_SERDES_PROTO); break; + + case PHY_INTERFACE_MODE_2500BASEX: + mvreg_write(pp, MVNETA_SERDES_CFG, + MVNETA_HSGMII_SERDES_PROTO); + break; default: return -EINVAL; } From 814152a89ed52c722ab92e9fbabcac3cb8a39245 Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Tue, 16 Jun 2020 09:39:21 +0000 Subject: [PATCH 277/462] net: fix memleak in register_netdevice() I got a memleak report when doing some fuzz test: unreferenced object 0xffff888112584000 (size 13599): comm "ip", pid 3048, jiffies 4294911734 (age 343.491s) hex dump (first 32 bytes): 74 61 70 30 00 00 00 00 00 00 00 00 00 00 00 00 tap0............ 00 ee d9 19 81 88 ff ff 00 00 00 00 00 00 00 00 ................ backtrace: [<000000002f60ba65>] __kmalloc_node+0x309/0x3a0 [<0000000075b211ec>] kvmalloc_node+0x7f/0xc0 [<00000000d3a97396>] alloc_netdev_mqs+0x76/0xfc0 [<00000000609c3655>] __tun_chr_ioctl+0x1456/0x3d70 [<000000001127ca24>] ksys_ioctl+0xe5/0x130 [<00000000b7d5e66a>] __x64_sys_ioctl+0x6f/0xb0 [<00000000e1023498>] do_syscall_64+0x56/0xa0 [<000000009ec0eb12>] entry_SYSCALL_64_after_hwframe+0x44/0xa9 unreferenced object 0xffff888111845cc0 (size 8): comm "ip", pid 3048, jiffies 4294911734 (age 343.491s) hex dump (first 8 bytes): 74 61 70 30 00 88 ff ff tap0.... backtrace: [<000000004c159777>] kstrdup+0x35/0x70 [<00000000d8b496ad>] kstrdup_const+0x3d/0x50 [<00000000494e884a>] kvasprintf_const+0xf1/0x180 [<0000000097880a2b>] kobject_set_name_vargs+0x56/0x140 [<000000008fbdfc7b>] dev_set_name+0xab/0xe0 [<000000005b99e3b4>] netdev_register_kobject+0xc0/0x390 [<00000000602704fe>] register_netdevice+0xb61/0x1250 [<000000002b7ca244>] __tun_chr_ioctl+0x1cd1/0x3d70 [<000000001127ca24>] ksys_ioctl+0xe5/0x130 [<00000000b7d5e66a>] __x64_sys_ioctl+0x6f/0xb0 [<00000000e1023498>] do_syscall_64+0x56/0xa0 [<000000009ec0eb12>] entry_SYSCALL_64_after_hwframe+0x44/0xa9 unreferenced object 0xffff88811886d800 (size 512): comm "ip", pid 3048, jiffies 4294911734 (age 343.491s) hex dump (first 32 bytes): 00 00 00 00 ad 4e ad de ff ff ff ff 00 00 00 00 .....N.......... ff ff ff ff ff ff ff ff c0 66 3d a3 ff ff ff ff .........f=..... backtrace: [<0000000050315800>] device_add+0x61e/0x1950 [<0000000021008dfb>] netdev_register_kobject+0x17e/0x390 [<00000000602704fe>] register_netdevice+0xb61/0x1250 [<000000002b7ca244>] __tun_chr_ioctl+0x1cd1/0x3d70 [<000000001127ca24>] ksys_ioctl+0xe5/0x130 [<00000000b7d5e66a>] __x64_sys_ioctl+0x6f/0xb0 [<00000000e1023498>] do_syscall_64+0x56/0xa0 [<000000009ec0eb12>] entry_SYSCALL_64_after_hwframe+0x44/0xa9 If call_netdevice_notifiers() failed, then rollback_registered() calls netdev_unregister_kobject() which holds the kobject. The reference cannot be put because the netdev won't be add to todo list, so it will leads a memleak, we need put the reference to avoid memleak. Reported-by: Hulk Robot Signed-off-by: Yang Yingliang Signed-off-by: David S. Miller --- net/core/dev.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/net/core/dev.c b/net/core/dev.c index 6bc2388141f6..44a14b41ad82 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -9547,6 +9547,13 @@ int register_netdevice(struct net_device *dev) rcu_barrier(); dev->reg_state = NETREG_UNREGISTERED; + /* We should put the kobject that hold in + * netdev_unregister_kobject(), otherwise + * the net device cannot be freed when + * driver calls free_netdev(), because the + * kobject is being hold. + */ + kobject_put(&dev->dev.kobj); } /* * Prevent userspace races by waiting until the network From fb7861d14c8d7edac65b2fcb6e8031cb138457b2 Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Tue, 16 Jun 2020 15:52:05 +0000 Subject: [PATCH 278/462] net: core: reduce recursion limit value In the current code, ->ndo_start_xmit() can be executed recursively only 10 times because of stack memory. But, in the case of the vxlan, 10 recursion limit value results in a stack overflow. In the current code, the nested interface is limited by 8 depth. There is no critical reason that the recursion limitation value should be 10. So, it would be good to be the same value with the limitation value of nesting interface depth. Test commands: ip link add vxlan10 type vxlan vni 10 dstport 4789 srcport 4789 4789 ip link set vxlan10 up ip a a 192.168.10.1/24 dev vxlan10 ip n a 192.168.10.2 dev vxlan10 lladdr fc:22:33:44:55:66 nud permanent for i in {9..0} do let A=$i+1 ip link add vxlan$i type vxlan vni $i dstport 4789 srcport 4789 4789 ip link set vxlan$i up ip a a 192.168.$i.1/24 dev vxlan$i ip n a 192.168.$i.2 dev vxlan$i lladdr fc:22:33:44:55:66 nud permanent bridge fdb add fc:22:33:44:55:66 dev vxlan$A dst 192.168.$i.2 self done hping3 192.168.10.2 -2 -d 60000 Splat looks like: [ 103.814237][ T1127] ============================================================================= [ 103.871955][ T1127] BUG kmalloc-2k (Tainted: G B ): Padding overwritten. 0x00000000897a2e4f-0x000 [ 103.873187][ T1127] ----------------------------------------------------------------------------- [ 103.873187][ T1127] [ 103.874252][ T1127] INFO: Slab 0x000000005cccc724 objects=5 used=5 fp=0x0000000000000000 flags=0x10000000001020 [ 103.881323][ T1127] CPU: 3 PID: 1127 Comm: hping3 Tainted: G B 5.7.0+ #575 [ 103.882131][ T1127] Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006 [ 103.883006][ T1127] Call Trace: [ 103.883324][ T1127] dump_stack+0x96/0xdb [ 103.883716][ T1127] slab_err+0xad/0xd0 [ 103.884106][ T1127] ? _raw_spin_unlock+0x1f/0x30 [ 103.884620][ T1127] ? get_partial_node.isra.78+0x140/0x360 [ 103.885214][ T1127] slab_pad_check.part.53+0xf7/0x160 [ 103.885769][ T1127] ? pskb_expand_head+0x110/0xe10 [ 103.886316][ T1127] check_slab+0x97/0xb0 [ 103.886763][ T1127] alloc_debug_processing+0x84/0x1a0 [ 103.887308][ T1127] ___slab_alloc+0x5a5/0x630 [ 103.887765][ T1127] ? pskb_expand_head+0x110/0xe10 [ 103.888265][ T1127] ? lock_downgrade+0x730/0x730 [ 103.888762][ T1127] ? pskb_expand_head+0x110/0xe10 [ 103.889244][ T1127] ? __slab_alloc+0x3e/0x80 [ 103.889675][ T1127] __slab_alloc+0x3e/0x80 [ 103.890108][ T1127] __kmalloc_node_track_caller+0xc7/0x420 [ ... ] Fixes: 11a766ce915f ("net: Increase xmit RECURSION_LIMIT to 10.") Signed-off-by: Taehee Yoo Signed-off-by: David S. Miller --- include/linux/netdevice.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 6fc613ed8eae..39e28e11863c 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3157,7 +3157,7 @@ static inline int dev_recursion_level(void) return this_cpu_read(softnet_data.xmit.recursion); } -#define XMIT_RECURSION_LIMIT 10 +#define XMIT_RECURSION_LIMIT 8 static inline bool dev_xmit_recursion(void) { return unlikely(__this_cpu_read(softnet_data.xmit.recursion) > From dafabb6590cb15f300b77c095d50312e2c7c8e0f Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Tue, 16 Jun 2020 16:04:00 +0000 Subject: [PATCH 279/462] ip6_gre: fix use-after-free in ip6gre_tunnel_lookup() In the datapath, the ip6gre_tunnel_lookup() is used and it internally uses fallback tunnel device pointer, which is fb_tunnel_dev. This pointer variable should be set to NULL when a fb interface is deleted. But there is no routine to set fb_tunnel_dev pointer to NULL. So, this pointer will be still used after interface is deleted and it eventually results in the use-after-free problem. Test commands: ip netns add A ip netns add B ip link add eth0 type veth peer name eth1 ip link set eth0 netns A ip link set eth1 netns B ip netns exec A ip link set lo up ip netns exec A ip link set eth0 up ip netns exec A ip link add ip6gre1 type ip6gre local fc:0::1 \ remote fc:0::2 ip netns exec A ip -6 a a fc:100::1/64 dev ip6gre1 ip netns exec A ip link set ip6gre1 up ip netns exec A ip -6 a a fc:0::1/64 dev eth0 ip netns exec A ip link set ip6gre0 up ip netns exec B ip link set lo up ip netns exec B ip link set eth1 up ip netns exec B ip link add ip6gre1 type ip6gre local fc:0::2 \ remote fc:0::1 ip netns exec B ip -6 a a fc:100::2/64 dev ip6gre1 ip netns exec B ip link set ip6gre1 up ip netns exec B ip -6 a a fc:0::2/64 dev eth1 ip netns exec B ip link set ip6gre0 up ip netns exec A ping fc:100::2 -s 60000 & ip netns del B Splat looks like: [ 73.087285][ C1] BUG: KASAN: use-after-free in ip6gre_tunnel_lookup+0x1064/0x13f0 [ip6_gre] [ 73.088361][ C1] Read of size 4 at addr ffff888040559218 by task ping/1429 [ 73.089317][ C1] [ 73.089638][ C1] CPU: 1 PID: 1429 Comm: ping Not tainted 5.7.0+ #602 [ 73.090531][ C1] Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006 [ 73.091725][ C1] Call Trace: [ 73.092160][ C1] [ 73.092556][ C1] dump_stack+0x96/0xdb [ 73.093122][ C1] print_address_description.constprop.6+0x2cc/0x450 [ 73.094016][ C1] ? ip6gre_tunnel_lookup+0x1064/0x13f0 [ip6_gre] [ 73.094894][ C1] ? ip6gre_tunnel_lookup+0x1064/0x13f0 [ip6_gre] [ 73.095767][ C1] ? ip6gre_tunnel_lookup+0x1064/0x13f0 [ip6_gre] [ 73.096619][ C1] kasan_report+0x154/0x190 [ 73.097209][ C1] ? ip6gre_tunnel_lookup+0x1064/0x13f0 [ip6_gre] [ 73.097989][ C1] ip6gre_tunnel_lookup+0x1064/0x13f0 [ip6_gre] [ 73.098750][ C1] ? gre_del_protocol+0x60/0x60 [gre] [ 73.099500][ C1] gre_rcv+0x1c5/0x1450 [ip6_gre] [ 73.100199][ C1] ? ip6gre_header+0xf00/0xf00 [ip6_gre] [ 73.100985][ C1] ? rcu_read_lock_sched_held+0xc0/0xc0 [ 73.101830][ C1] ? ip6_input_finish+0x5/0xf0 [ 73.102483][ C1] ip6_protocol_deliver_rcu+0xcbb/0x1510 [ 73.103296][ C1] ip6_input_finish+0x5b/0xf0 [ 73.103920][ C1] ip6_input+0xcd/0x2c0 [ 73.104473][ C1] ? ip6_input_finish+0xf0/0xf0 [ 73.105115][ C1] ? rcu_read_lock_held+0x90/0xa0 [ 73.105783][ C1] ? rcu_read_lock_sched_held+0xc0/0xc0 [ 73.106548][ C1] ipv6_rcv+0x1f1/0x300 [ ... ] Suggested-by: Eric Dumazet Fixes: c12b395a4664 ("gre: Support GRE over IPv6") Signed-off-by: Taehee Yoo Signed-off-by: David S. Miller --- net/ipv6/ip6_gre.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 781ca8c07a0d..6532bde82b40 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -127,6 +127,7 @@ static struct ip6_tnl *ip6gre_tunnel_lookup(struct net_device *dev, gre_proto == htons(ETH_P_ERSPAN2)) ? ARPHRD_ETHER : ARPHRD_IP6GRE; int score, cand_score = 4; + struct net_device *ndev; for_each_ip_tunnel_rcu(t, ign->tunnels_r_l[h0 ^ h1]) { if (!ipv6_addr_equal(local, &t->parms.laddr) || @@ -238,9 +239,9 @@ static struct ip6_tnl *ip6gre_tunnel_lookup(struct net_device *dev, if (t && t->dev->flags & IFF_UP) return t; - dev = ign->fb_tunnel_dev; - if (dev && dev->flags & IFF_UP) - return netdev_priv(dev); + ndev = READ_ONCE(ign->fb_tunnel_dev); + if (ndev && ndev->flags & IFF_UP) + return netdev_priv(ndev); return NULL; } @@ -413,6 +414,8 @@ static void ip6gre_tunnel_uninit(struct net_device *dev) ip6gre_tunnel_unlink_md(ign, t); ip6gre_tunnel_unlink(ign, t); + if (ign->fb_tunnel_dev == dev) + WRITE_ONCE(ign->fb_tunnel_dev, NULL); dst_cache_reset(&t->dst_cache); dev_put(dev); } From ba61539c6ae57f4146284a5cb4f7b7ed8d42bf45 Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Tue, 16 Jun 2020 16:51:51 +0000 Subject: [PATCH 280/462] ip_tunnel: fix use-after-free in ip_tunnel_lookup() In the datapath, the ip_tunnel_lookup() is used and it internally uses fallback tunnel device pointer, which is fb_tunnel_dev. This pointer variable should be set to NULL when a fb interface is deleted. But there is no routine to set fb_tunnel_dev pointer to NULL. So, this pointer will be still used after interface is deleted and it eventually results in the use-after-free problem. Test commands: ip netns add A ip netns add B ip link add eth0 type veth peer name eth1 ip link set eth0 netns A ip link set eth1 netns B ip netns exec A ip link set lo up ip netns exec A ip link set eth0 up ip netns exec A ip link add gre1 type gre local 10.0.0.1 \ remote 10.0.0.2 ip netns exec A ip link set gre1 up ip netns exec A ip a a 10.0.100.1/24 dev gre1 ip netns exec A ip a a 10.0.0.1/24 dev eth0 ip netns exec B ip link set lo up ip netns exec B ip link set eth1 up ip netns exec B ip link add gre1 type gre local 10.0.0.2 \ remote 10.0.0.1 ip netns exec B ip link set gre1 up ip netns exec B ip a a 10.0.100.2/24 dev gre1 ip netns exec B ip a a 10.0.0.2/24 dev eth1 ip netns exec A hping3 10.0.100.2 -2 --flood -d 60000 & ip netns del B Splat looks like: [ 77.793450][ C3] ================================================================== [ 77.794702][ C3] BUG: KASAN: use-after-free in ip_tunnel_lookup+0xcc4/0xf30 [ 77.795573][ C3] Read of size 4 at addr ffff888060bd9c84 by task hping3/2905 [ 77.796398][ C3] [ 77.796664][ C3] CPU: 3 PID: 2905 Comm: hping3 Not tainted 5.8.0-rc1+ #616 [ 77.797474][ C3] Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006 [ 77.798453][ C3] Call Trace: [ 77.798815][ C3] [ 77.799142][ C3] dump_stack+0x9d/0xdb [ 77.799605][ C3] print_address_description.constprop.7+0x2cc/0x450 [ 77.800365][ C3] ? ip_tunnel_lookup+0xcc4/0xf30 [ 77.800908][ C3] ? ip_tunnel_lookup+0xcc4/0xf30 [ 77.801517][ C3] ? ip_tunnel_lookup+0xcc4/0xf30 [ 77.802145][ C3] kasan_report+0x154/0x190 [ 77.802821][ C3] ? ip_tunnel_lookup+0xcc4/0xf30 [ 77.803503][ C3] ip_tunnel_lookup+0xcc4/0xf30 [ 77.804165][ C3] __ipgre_rcv+0x1ab/0xaa0 [ip_gre] [ 77.804862][ C3] ? rcu_read_lock_sched_held+0xc0/0xc0 [ 77.805621][ C3] gre_rcv+0x304/0x1910 [ip_gre] [ 77.806293][ C3] ? lock_acquire+0x1a9/0x870 [ 77.806925][ C3] ? gre_rcv+0xfe/0x354 [gre] [ 77.807559][ C3] ? erspan_xmit+0x2e60/0x2e60 [ip_gre] [ 77.808305][ C3] ? rcu_read_lock_sched_held+0xc0/0xc0 [ 77.809032][ C3] ? rcu_read_lock_held+0x90/0xa0 [ 77.809713][ C3] gre_rcv+0x1b8/0x354 [gre] [ ... ] Suggested-by: Eric Dumazet Fixes: c54419321455 ("GRE: Refactor GRE tunneling code.") Signed-off-by: Taehee Yoo Signed-off-by: David S. Miller --- net/ipv4/ip_tunnel.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index f4f1d11eab50..0c1f36404471 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -85,9 +85,10 @@ struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn, __be32 remote, __be32 local, __be32 key) { - unsigned int hash; struct ip_tunnel *t, *cand = NULL; struct hlist_head *head; + struct net_device *ndev; + unsigned int hash; hash = ip_tunnel_hash(key, remote); head = &itn->tunnels[hash]; @@ -162,8 +163,9 @@ struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn, if (t && t->dev->flags & IFF_UP) return t; - if (itn->fb_tunnel_dev && itn->fb_tunnel_dev->flags & IFF_UP) - return netdev_priv(itn->fb_tunnel_dev); + ndev = READ_ONCE(itn->fb_tunnel_dev); + if (ndev && ndev->flags & IFF_UP) + return netdev_priv(ndev); return NULL; } @@ -1259,9 +1261,9 @@ void ip_tunnel_uninit(struct net_device *dev) struct ip_tunnel_net *itn; itn = net_generic(net, tunnel->ip_tnl_net_id); - /* fb_tunnel_dev will be unregisted in net-exit call. */ - if (itn->fb_tunnel_dev != dev) - ip_tunnel_del(itn, netdev_priv(dev)); + ip_tunnel_del(itn, netdev_priv(dev)); + if (itn->fb_tunnel_dev == dev) + WRITE_ONCE(itn->fb_tunnel_dev, NULL); dst_cache_reset(&tunnel->dst_cache); } From 7024339a1cfa0d5bbfa628a7a35b67789cc86d7f Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Tue, 16 Jun 2020 22:25:20 +0200 Subject: [PATCH 281/462] net/sched: act_gate: fix NULL dereference in tcf_gate_init() it is possible to see a KASAN use-after-free, immediately followed by a NULL dereference crash, with the following command: # tc action add action gate index 3 cycle-time 100000000ns \ > cycle-time-ext 100000000ns clockid CLOCK_TAI BUG: KASAN: use-after-free in tcf_action_init_1+0x8eb/0x960 Write of size 1 at addr ffff88810a5908bc by task tc/883 CPU: 0 PID: 883 Comm: tc Not tainted 5.7.0+ #188 Hardware name: Red Hat KVM, BIOS 1.11.1-4.module+el8.1.0+4066+0f1aadab 04/01/2014 Call Trace: dump_stack+0x75/0xa0 print_address_description.constprop.6+0x1a/0x220 kasan_report.cold.9+0x37/0x7c tcf_action_init_1+0x8eb/0x960 tcf_action_init+0x157/0x2a0 tcf_action_add+0xd9/0x2f0 tc_ctl_action+0x2a3/0x39d rtnetlink_rcv_msg+0x5f3/0x920 netlink_rcv_skb+0x120/0x380 netlink_unicast+0x439/0x630 netlink_sendmsg+0x714/0xbf0 sock_sendmsg+0xe2/0x110 ____sys_sendmsg+0x5b4/0x890 ___sys_sendmsg+0xe9/0x160 __sys_sendmsg+0xd3/0x170 do_syscall_64+0x9a/0x370 entry_SYSCALL_64_after_hwframe+0x44/0xa9 [...] KASAN: null-ptr-deref in range [0x0000000000000070-0x0000000000000077] CPU: 0 PID: 883 Comm: tc Tainted: G B 5.7.0+ #188 Hardware name: Red Hat KVM, BIOS 1.11.1-4.module+el8.1.0+4066+0f1aadab 04/01/2014 RIP: 0010:tcf_action_fill_size+0xa3/0xf0 [....] RSP: 0018:ffff88813a48f250 EFLAGS: 00010212 RAX: dffffc0000000000 RBX: 0000000000000094 RCX: ffffffffa47c3eb6 RDX: 000000000000000e RSI: 0000000000000008 RDI: 0000000000000070 RBP: ffff88810a590800 R08: 0000000000000004 R09: ffffed1027491e03 R10: 0000000000000003 R11: ffffed1027491e03 R12: 0000000000000000 R13: 0000000000000000 R14: dffffc0000000000 R15: ffff88810a590800 FS: 00007f62cae8ce40(0000) GS:ffff888147c00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f62c9d20a10 CR3: 000000013a52a000 CR4: 0000000000340ef0 Call Trace: tcf_action_init+0x172/0x2a0 tcf_action_add+0xd9/0x2f0 tc_ctl_action+0x2a3/0x39d rtnetlink_rcv_msg+0x5f3/0x920 netlink_rcv_skb+0x120/0x380 netlink_unicast+0x439/0x630 netlink_sendmsg+0x714/0xbf0 sock_sendmsg+0xe2/0x110 ____sys_sendmsg+0x5b4/0x890 ___sys_sendmsg+0xe9/0x160 __sys_sendmsg+0xd3/0x170 do_syscall_64+0x9a/0x370 entry_SYSCALL_64_after_hwframe+0x44/0xa9 this is caused by the test on 'cycletime_ext', that is still unassigned when the action is newly created. This makes the action .init() return 0 without calling tcf_idr_insert(), hence the UAF + crash. rework the logic that prevents zero values of cycle-time, as follows: 1) 'tcfg_cycletime_ext' seems to be unused in the action software path, and it was already possible by other means to obtain non-zero cycletime and zero cycletime-ext. So, removing that test should not cause any damage. 2) while at it, we must prevent overwriting configuration data with wrong ones: use a temporary variable for 'tcfg_cycletime', and validate it preserving the original semantic (that allowed computing the cycle time as the sum of all intervals, when not specified by TCA_GATE_CYCLE_TIME). 3) remove the test on 'tcfg_cycletime', no more useful, and avoid returning -EFAULT, which did not seem an appropriate return value for a wrong netlink attribute. v3: fix uninitialized 'cycletime' (thanks to Vladimir Oltean) v2: remove useless 'return;' at the end of void gate_get_start_time() Fixes: a51c328df310 ("net: qos: introduce a gate control flow action") CC: Ivan Vecera Signed-off-by: Davide Caratti Acked-by: Vladimir Oltean Tested-by: Vladimir Oltean Signed-off-by: David S. Miller --- net/sched/act_gate.c | 36 +++++++++++++----------------------- 1 file changed, 13 insertions(+), 23 deletions(-) diff --git a/net/sched/act_gate.c b/net/sched/act_gate.c index 9c628591f452..94e560c2f81d 100644 --- a/net/sched/act_gate.c +++ b/net/sched/act_gate.c @@ -32,7 +32,7 @@ static ktime_t gate_get_time(struct tcf_gate *gact) return KTIME_MAX; } -static int gate_get_start_time(struct tcf_gate *gact, ktime_t *start) +static void gate_get_start_time(struct tcf_gate *gact, ktime_t *start) { struct tcf_gate_params *param = &gact->param; ktime_t now, base, cycle; @@ -43,18 +43,13 @@ static int gate_get_start_time(struct tcf_gate *gact, ktime_t *start) if (ktime_after(base, now)) { *start = base; - return 0; + return; } cycle = param->tcfg_cycletime; - /* cycle time should not be zero */ - if (!cycle) - return -EFAULT; - n = div64_u64(ktime_sub_ns(now, base), cycle); *start = ktime_add_ns(base, (n + 1) * cycle); - return 0; } static void gate_start_timer(struct tcf_gate *gact, ktime_t start) @@ -287,12 +282,12 @@ static int tcf_gate_init(struct net *net, struct nlattr *nla, enum tk_offsets tk_offset = TK_OFFS_TAI; struct nlattr *tb[TCA_GATE_MAX + 1]; struct tcf_chain *goto_ch = NULL; + u64 cycletime = 0, basetime = 0; struct tcf_gate_params *p; s32 clockid = CLOCK_TAI; struct tcf_gate *gact; struct tc_gate *parm; int ret = 0, err; - u64 basetime = 0; u32 gflags = 0; s32 prio = -1; ktime_t start; @@ -375,11 +370,8 @@ static int tcf_gate_init(struct net *net, struct nlattr *nla, spin_lock_bh(&gact->tcf_lock); p = &gact->param; - if (tb[TCA_GATE_CYCLE_TIME]) { - p->tcfg_cycletime = nla_get_u64(tb[TCA_GATE_CYCLE_TIME]); - if (!p->tcfg_cycletime_ext) - goto chain_put; - } + if (tb[TCA_GATE_CYCLE_TIME]) + cycletime = nla_get_u64(tb[TCA_GATE_CYCLE_TIME]); if (tb[TCA_GATE_ENTRY_LIST]) { err = parse_gate_list(tb[TCA_GATE_ENTRY_LIST], p, extack); @@ -387,14 +379,19 @@ static int tcf_gate_init(struct net *net, struct nlattr *nla, goto chain_put; } - if (!p->tcfg_cycletime) { + if (!cycletime) { struct tcfg_gate_entry *entry; ktime_t cycle = 0; list_for_each_entry(entry, &p->entries, list) cycle = ktime_add_ns(cycle, entry->interval); - p->tcfg_cycletime = cycle; + cycletime = cycle; + if (!cycletime) { + err = -EINVAL; + goto chain_put; + } } + p->tcfg_cycletime = cycletime; if (tb[TCA_GATE_CYCLE_TIME_EXT]) p->tcfg_cycletime_ext = @@ -408,14 +405,7 @@ static int tcf_gate_init(struct net *net, struct nlattr *nla, gact->tk_offset = tk_offset; hrtimer_init(&gact->hitimer, clockid, HRTIMER_MODE_ABS_SOFT); gact->hitimer.function = gate_timer_func; - - err = gate_get_start_time(gact, &start); - if (err < 0) { - NL_SET_ERR_MSG(extack, - "Internal error: failed get start time"); - release_entry_list(&p->entries); - goto chain_put; - } + gate_get_start_time(gact, &start); gact->current_close_time = start; gact->current_gate_status = GATE_ACT_GATE_OPEN | GATE_ACT_PENDING; From c362a06e96eae16ccb7b0f4f93f19224279b8610 Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Tue, 16 Jun 2020 22:25:21 +0200 Subject: [PATCH 282/462] net/sched: act_gate: fix configuration of the periodic timer assigning a dummy value of 'clock_id' to avoid cancellation of the cycle timer before its initialization was a temporary solution, and we still need to handle the case where act_gate timer parameters are changed by commands like the following one: # tc action replace action gate the fix consists in the following items: 1) remove the workaround assignment of 'clock_id', and init the list of entries before the first error path after IDR atomic check/allocation 2) validate 'clock_id' earlier: there is no need to do IDR atomic check/allocation if we know that 'clock_id' is a bad value 3) use a dedicated function, 'gate_setup_timer()', to ensure that the timer is cancelled and re-initialized on action overwrite, and also ensure we initialize the timer in the error path of tcf_gate_init() v3: improve comment in the error path of tcf_gate_init() (thanks to Vladimir Oltean) v2: avoid 'goto' in gate_setup_timer (thanks to Cong Wang) CC: Ivan Vecera Fixes: a01c245438c5 ("net/sched: fix a couple of splats in the error path of tfc_gate_init()") Fixes: a51c328df310 ("net: qos: introduce a gate control flow action") Signed-off-by: Davide Caratti Acked-by: Vladimir Oltean Tested-by: Vladimir Oltean Signed-off-by: David S. Miller --- net/sched/act_gate.c | 90 +++++++++++++++++++++++++++----------------- 1 file changed, 55 insertions(+), 35 deletions(-) diff --git a/net/sched/act_gate.c b/net/sched/act_gate.c index 94e560c2f81d..323ae7f6315d 100644 --- a/net/sched/act_gate.c +++ b/net/sched/act_gate.c @@ -272,6 +272,27 @@ release_list: return err; } +static void gate_setup_timer(struct tcf_gate *gact, u64 basetime, + enum tk_offsets tko, s32 clockid, + bool do_init) +{ + if (!do_init) { + if (basetime == gact->param.tcfg_basetime && + tko == gact->tk_offset && + clockid == gact->param.tcfg_clockid) + return; + + spin_unlock_bh(&gact->tcf_lock); + hrtimer_cancel(&gact->hitimer); + spin_lock_bh(&gact->tcf_lock); + } + gact->param.tcfg_basetime = basetime; + gact->param.tcfg_clockid = clockid; + gact->tk_offset = tko; + hrtimer_init(&gact->hitimer, clockid, HRTIMER_MODE_ABS_SOFT); + gact->hitimer.function = gate_timer_func; +} + static int tcf_gate_init(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **a, int ovr, int bind, bool rtnl_held, @@ -303,6 +324,27 @@ static int tcf_gate_init(struct net *net, struct nlattr *nla, if (!tb[TCA_GATE_PARMS]) return -EINVAL; + if (tb[TCA_GATE_CLOCKID]) { + clockid = nla_get_s32(tb[TCA_GATE_CLOCKID]); + switch (clockid) { + case CLOCK_REALTIME: + tk_offset = TK_OFFS_REAL; + break; + case CLOCK_MONOTONIC: + tk_offset = TK_OFFS_MAX; + break; + case CLOCK_BOOTTIME: + tk_offset = TK_OFFS_BOOT; + break; + case CLOCK_TAI: + tk_offset = TK_OFFS_TAI; + break; + default: + NL_SET_ERR_MSG(extack, "Invalid 'clockid'"); + return -EINVAL; + } + } + parm = nla_data(tb[TCA_GATE_PARMS]); index = parm->index; @@ -326,10 +368,6 @@ static int tcf_gate_init(struct net *net, struct nlattr *nla, tcf_idr_release(*a, bind); return -EEXIST; } - if (ret == ACT_P_CREATED) { - to_gate(*a)->param.tcfg_clockid = -1; - INIT_LIST_HEAD(&(to_gate(*a)->param.entries)); - } if (tb[TCA_GATE_PRIORITY]) prio = nla_get_s32(tb[TCA_GATE_PRIORITY]); @@ -340,33 +378,14 @@ static int tcf_gate_init(struct net *net, struct nlattr *nla, if (tb[TCA_GATE_FLAGS]) gflags = nla_get_u32(tb[TCA_GATE_FLAGS]); - if (tb[TCA_GATE_CLOCKID]) { - clockid = nla_get_s32(tb[TCA_GATE_CLOCKID]); - switch (clockid) { - case CLOCK_REALTIME: - tk_offset = TK_OFFS_REAL; - break; - case CLOCK_MONOTONIC: - tk_offset = TK_OFFS_MAX; - break; - case CLOCK_BOOTTIME: - tk_offset = TK_OFFS_BOOT; - break; - case CLOCK_TAI: - tk_offset = TK_OFFS_TAI; - break; - default: - NL_SET_ERR_MSG(extack, "Invalid 'clockid'"); - goto release_idr; - } - } + gact = to_gate(*a); + if (ret == ACT_P_CREATED) + INIT_LIST_HEAD(&gact->param.entries); err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack); if (err < 0) goto release_idr; - gact = to_gate(*a); - spin_lock_bh(&gact->tcf_lock); p = &gact->param; @@ -397,14 +416,10 @@ static int tcf_gate_init(struct net *net, struct nlattr *nla, p->tcfg_cycletime_ext = nla_get_u64(tb[TCA_GATE_CYCLE_TIME_EXT]); + gate_setup_timer(gact, basetime, tk_offset, clockid, + ret == ACT_P_CREATED); p->tcfg_priority = prio; - p->tcfg_basetime = basetime; - p->tcfg_clockid = clockid; p->tcfg_flags = gflags; - - gact->tk_offset = tk_offset; - hrtimer_init(&gact->hitimer, clockid, HRTIMER_MODE_ABS_SOFT); - gact->hitimer.function = gate_timer_func; gate_get_start_time(gact, &start); gact->current_close_time = start; @@ -433,6 +448,13 @@ chain_put: if (goto_ch) tcf_chain_put_by_act(goto_ch); release_idr: + /* action is not inserted in any list: it's safe to init hitimer + * without taking tcf_lock. + */ + if (ret == ACT_P_CREATED) + gate_setup_timer(gact, gact->param.tcfg_basetime, + gact->tk_offset, gact->param.tcfg_clockid, + true); tcf_idr_release(*a, bind); return err; } @@ -443,9 +465,7 @@ static void tcf_gate_cleanup(struct tc_action *a) struct tcf_gate_params *p; p = &gact->param; - if (p->tcfg_clockid != -1) - hrtimer_cancel(&gact->hitimer); - + hrtimer_cancel(&gact->hitimer); release_entry_list(&p->entries); } From 5b3b396c77677b51a6bd1aacd0fe2a04d1efa0ff Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Wed, 17 Jun 2020 02:58:41 +0300 Subject: [PATCH 283/462] net: dsa: sja1105: remove debugging code in sja1105_vl_gate This shouldn't be there. Fixes: 834f8933d5dd ("net: dsa: sja1105: implement tc-gate using time-triggered virtual links") Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- drivers/net/dsa/sja1105/sja1105_vl.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/net/dsa/sja1105/sja1105_vl.c b/drivers/net/dsa/sja1105/sja1105_vl.c index bdfd6c4e190d..32eca3e660e1 100644 --- a/drivers/net/dsa/sja1105/sja1105_vl.c +++ b/drivers/net/dsa/sja1105/sja1105_vl.c @@ -588,14 +588,10 @@ int sja1105_vl_gate(struct sja1105_private *priv, int port, if (priv->vlan_state == SJA1105_VLAN_UNAWARE && key->type != SJA1105_KEY_VLAN_UNAWARE_VL) { - dev_err(priv->ds->dev, "1: vlan state %d key type %d\n", - priv->vlan_state, key->type); NL_SET_ERR_MSG_MOD(extack, "Can only gate based on DMAC"); return -EOPNOTSUPP; } else if (key->type != SJA1105_KEY_VLAN_AWARE_VL) { - dev_err(priv->ds->dev, "2: vlan state %d key type %d\n", - priv->vlan_state, key->type); NL_SET_ERR_MSG_MOD(extack, "Can only gate based on {DMAC, VID, PCP}"); return -EOPNOTSUPP; From c6ae970bcc8e6f0b761111680ba78f7be43fb05a Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Wed, 17 Jun 2020 02:58:42 +0300 Subject: [PATCH 284/462] net: dsa: sja1105: fix checks for VLAN state in redirect action This action requires the VLAN awareness state of the switch to be of the same type as the key that's being added: - If the switch is unaware of VLAN, then the tc filter key must only contain the destination MAC address. - If the switch is VLAN-aware, the key must also contain the VLAN ID and PCP. But this check doesn't work unless we verify the VLAN awareness state on both the "if" and the "else" branches. Fixes: dfacc5a23e22 ("net: dsa: sja1105: support flow-based redirection via virtual links") Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- drivers/net/dsa/sja1105/sja1105_vl.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/dsa/sja1105/sja1105_vl.c b/drivers/net/dsa/sja1105/sja1105_vl.c index 32eca3e660e1..a176f39a052b 100644 --- a/drivers/net/dsa/sja1105/sja1105_vl.c +++ b/drivers/net/dsa/sja1105/sja1105_vl.c @@ -342,7 +342,9 @@ int sja1105_vl_redirect(struct sja1105_private *priv, int port, NL_SET_ERR_MSG_MOD(extack, "Can only redirect based on DMAC"); return -EOPNOTSUPP; - } else if (key->type != SJA1105_KEY_VLAN_AWARE_VL) { + } else if ((priv->vlan_state == SJA1105_VLAN_BEST_EFFORT || + priv->vlan_state == SJA1105_VLAN_FILTERING_FULL) && + key->type != SJA1105_KEY_VLAN_AWARE_VL) { NL_SET_ERR_MSG_MOD(extack, "Can only redirect based on {DMAC, VID, PCP}"); return -EOPNOTSUPP; From 5182a6222dd0f27c41cda2706bdc58d48a0d8f96 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Wed, 17 Jun 2020 02:58:43 +0300 Subject: [PATCH 285/462] net: dsa: sja1105: fix checks for VLAN state in gate action This action requires the VLAN awareness state of the switch to be of the same type as the key that's being added: - If the switch is unaware of VLAN, then the tc filter key must only contain the destination MAC address. - If the switch is VLAN-aware, the key must also contain the VLAN ID and PCP. But this check doesn't work unless we verify the VLAN awareness state on both the "if" and the "else" branches. Fixes: 834f8933d5dd ("net: dsa: sja1105: implement tc-gate using time-triggered virtual links") Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- drivers/net/dsa/sja1105/sja1105_vl.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/dsa/sja1105/sja1105_vl.c b/drivers/net/dsa/sja1105/sja1105_vl.c index a176f39a052b..0056f9c1e471 100644 --- a/drivers/net/dsa/sja1105/sja1105_vl.c +++ b/drivers/net/dsa/sja1105/sja1105_vl.c @@ -593,7 +593,9 @@ int sja1105_vl_gate(struct sja1105_private *priv, int port, NL_SET_ERR_MSG_MOD(extack, "Can only gate based on DMAC"); return -EOPNOTSUPP; - } else if (key->type != SJA1105_KEY_VLAN_AWARE_VL) { + } else if ((priv->vlan_state == SJA1105_VLAN_BEST_EFFORT || + priv->vlan_state == SJA1105_VLAN_FILTERING_FULL) && + key->type != SJA1105_KEY_VLAN_AWARE_VL) { NL_SET_ERR_MSG_MOD(extack, "Can only gate based on {DMAC, VID, PCP}"); return -EOPNOTSUPP; From 5eea3a63ff4aba6a26002e657a6d21934b7e2b96 Mon Sep 17 00:00:00 2001 From: guodeqing Date: Wed, 17 Jun 2020 10:07:16 +0800 Subject: [PATCH 286/462] net: Fix the arp error in some cases ie., $ ifconfig eth0 6.6.6.6 netmask 255.255.255.0 $ ip rule add from 6.6.6.6 table 6666 $ ip route add 9.9.9.9 via 6.6.6.6 $ ping -I 6.6.6.6 9.9.9.9 PING 9.9.9.9 (9.9.9.9) from 6.6.6.6 : 56(84) bytes of data. 3 packets transmitted, 0 received, 100% packet loss, time 2079ms $ arp Address HWtype HWaddress Flags Mask Iface 6.6.6.6 (incomplete) eth0 The arp request address is error, this is because fib_table_lookup in fib_check_nh lookup the destnation 9.9.9.9 nexthop, the scope of the fib result is RT_SCOPE_LINK,the correct scope is RT_SCOPE_HOST. Here I add a check of whether this is RT_TABLE_MAIN to solve this problem. Fixes: 3bfd847203c6 ("net: Use passed in table for nexthop lookups") Signed-off-by: guodeqing Reviewed-by: David Ahern Signed-off-by: David S. Miller --- net/ipv4/fib_semantics.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index e53871e4a097..1f75dc686b6b 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -1109,7 +1109,7 @@ static int fib_check_nh_v4_gw(struct net *net, struct fib_nh *nh, u32 table, if (fl4.flowi4_scope < RT_SCOPE_LINK) fl4.flowi4_scope = RT_SCOPE_LINK; - if (table) + if (table && table != RT_TABLE_MAIN) tbl = fib_get_table(net, table); if (tbl) From 8fd4de1275580a1befa1456d1070eaf6489fb48f Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Wed, 17 Jun 2020 12:08:56 +0200 Subject: [PATCH 287/462] mptcp: cache msk on MP_JOIN init_req The msk ownership is transferred to the child socket at 3rd ack time, so that we avoid more lookups later. If the request does not reach the 3rd ack, the MSK reference is dropped at request sock release time. As a side effect, fallback is now tracked by a NULL msk reference instead of zeroed 'mp_join' field. This will simplify the next patch. Signed-off-by: Paolo Abeni Reviewed-by: Mat Martineau Signed-off-by: David S. Miller --- net/mptcp/protocol.h | 1 + net/mptcp/subflow.c | 39 +++++++++++++++++---------------------- 2 files changed, 18 insertions(+), 22 deletions(-) diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index db56535dfc29..c6eeaf3e8dcb 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -249,6 +249,7 @@ struct mptcp_subflow_request_sock { u64 thmac; u32 local_nonce; u32 remote_nonce; + struct mptcp_sock *msk; }; static inline struct mptcp_subflow_request_sock * diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index bbdb74b8bc3c..4068bdb2523b 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -69,6 +69,9 @@ static void subflow_req_destructor(struct request_sock *req) pr_debug("subflow_req=%p", subflow_req); + if (subflow_req->msk) + sock_put((struct sock *)subflow_req->msk); + if (subflow_req->mp_capable) mptcp_token_destroy_request(subflow_req->token); tcp_request_sock_ops.destructor(req); @@ -86,8 +89,8 @@ static void subflow_generate_hmac(u64 key1, u64 key2, u32 nonce1, u32 nonce2, } /* validate received token and create truncated hmac and nonce for SYN-ACK */ -static bool subflow_token_join_request(struct request_sock *req, - const struct sk_buff *skb) +static struct mptcp_sock *subflow_token_join_request(struct request_sock *req, + const struct sk_buff *skb) { struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req); u8 hmac[SHA256_DIGEST_SIZE]; @@ -97,13 +100,13 @@ static bool subflow_token_join_request(struct request_sock *req, msk = mptcp_token_get_sock(subflow_req->token); if (!msk) { SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINNOTOKEN); - return false; + return NULL; } local_id = mptcp_pm_get_local_id(msk, (struct sock_common *)req); if (local_id < 0) { sock_put((struct sock *)msk); - return false; + return NULL; } subflow_req->local_id = local_id; @@ -114,9 +117,7 @@ static bool subflow_token_join_request(struct request_sock *req, subflow_req->remote_nonce, hmac); subflow_req->thmac = get_unaligned_be64(hmac); - - sock_put((struct sock *)msk); - return true; + return msk; } static void subflow_init_req(struct request_sock *req, @@ -133,6 +134,7 @@ static void subflow_init_req(struct request_sock *req, subflow_req->mp_capable = 0; subflow_req->mp_join = 0; + subflow_req->msk = NULL; #ifdef CONFIG_TCP_MD5SIG /* no MPTCP if MD5SIG is enabled on this socket or we may run out of @@ -166,12 +168,9 @@ static void subflow_init_req(struct request_sock *req, subflow_req->remote_id = mp_opt.join_id; subflow_req->token = mp_opt.token; subflow_req->remote_nonce = mp_opt.nonce; - pr_debug("token=%u, remote_nonce=%u", subflow_req->token, - subflow_req->remote_nonce); - if (!subflow_token_join_request(req, skb)) { - subflow_req->mp_join = 0; - // @@ need to trigger RST - } + subflow_req->msk = subflow_token_join_request(req, skb); + pr_debug("token=%u, remote_nonce=%u msk=%p", subflow_req->token, + subflow_req->remote_nonce, subflow_req->msk); } } @@ -354,10 +353,9 @@ static bool subflow_hmac_valid(const struct request_sock *req, const struct mptcp_subflow_request_sock *subflow_req; u8 hmac[SHA256_DIGEST_SIZE]; struct mptcp_sock *msk; - bool ret; subflow_req = mptcp_subflow_rsk(req); - msk = mptcp_token_get_sock(subflow_req->token); + msk = subflow_req->msk; if (!msk) return false; @@ -365,12 +363,7 @@ static bool subflow_hmac_valid(const struct request_sock *req, subflow_req->remote_nonce, subflow_req->local_nonce, hmac); - ret = true; - if (crypto_memneq(hmac, mp_opt->hmac, MPTCPOPT_HMAC_LEN)) - ret = false; - - sock_put((struct sock *)msk); - return ret; + return !crypto_memneq(hmac, mp_opt->hmac, MPTCPOPT_HMAC_LEN); } static void mptcp_sock_destruct(struct sock *sk) @@ -522,10 +515,12 @@ create_child: } else if (ctx->mp_join) { struct mptcp_sock *owner; - owner = mptcp_token_get_sock(ctx->token); + owner = subflow_req->msk; if (!owner) goto dispose_child; + /* move the msk reference ownership to the subflow */ + subflow_req->msk = NULL; ctx->conn = (struct sock *)owner; if (!mptcp_finish_join(child)) goto dispose_child; From 9e365ff576b7c1623bbc5ef31ec652c533e2f65e Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Wed, 17 Jun 2020 12:08:57 +0200 Subject: [PATCH 288/462] mptcp: drop MP_JOIN request sock on syn cookies Currently any MPTCP socket using syn cookies will fallback to TCP at 3rd ack time. In case of MP_JOIN requests, the RFC mandate closing the child and sockets, but the existing error paths do not handle the syncookie scenario correctly. Address the issue always forcing the child shutdown in case of MP_JOIN fallback. Fixes: ae2dd7164943 ("mptcp: handle tcp fallback when using syn cookies") Signed-off-by: Paolo Abeni Reviewed-by: Mat Martineau Signed-off-by: David S. Miller --- net/mptcp/subflow.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 4068bdb2523b..3838a0b3a21f 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -431,22 +431,25 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk, struct mptcp_subflow_context *listener = mptcp_subflow_ctx(sk); struct mptcp_subflow_request_sock *subflow_req; struct mptcp_options_received mp_opt; - bool fallback_is_fatal = false; + bool fallback, fallback_is_fatal; struct sock *new_msk = NULL; - bool fallback = false; struct sock *child; pr_debug("listener=%p, req=%p, conn=%p", listener, req, listener->conn); - /* we need later a valid 'mp_capable' value even when options are not - * parsed + /* After child creation we must look for 'mp_capable' even when options + * are not parsed */ mp_opt.mp_capable = 0; - if (tcp_rsk(req)->is_mptcp == 0) + + /* hopefully temporary handling for MP_JOIN+syncookie */ + subflow_req = mptcp_subflow_rsk(req); + fallback_is_fatal = subflow_req->mp_join; + fallback = !tcp_rsk(req)->is_mptcp; + if (fallback) goto create_child; /* if the sk is MP_CAPABLE, we try to fetch the client key */ - subflow_req = mptcp_subflow_rsk(req); if (subflow_req->mp_capable) { if (TCP_SKB_CB(skb)->seq != subflow_req->ssn_offset + 1) { /* here we can receive and accept an in-window, @@ -467,12 +470,11 @@ create_msk: if (!new_msk) fallback = true; } else if (subflow_req->mp_join) { - fallback_is_fatal = true; mptcp_get_options(skb, &mp_opt); if (!mp_opt.mp_join || !subflow_hmac_valid(req, &mp_opt)) { SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINACKMAC); - return NULL; + fallback = true; } } From e2dfcfba00ba4a414617ef4c5a8501fe21567eb3 Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Wed, 17 Jun 2020 16:54:52 +0200 Subject: [PATCH 289/462] s390/qeth: fix error handling for isolation mode cmds Current(?) OSA devices also store their cmd-specific return codes for SET_ACCESS_CONTROL cmds into the top-level cmd->hdr.return_code. So once we added stricter checking for the top-level field a while ago, none of the error logic that rolls back the user's configuration to its old state is applied any longer. For this specific cmd, go back to the old model where we peek into the cmd structure even though the top-level field indicated an error. Fixes: 686c97ee29c8 ("s390/qeth: fix error handling in adapter command callbacks") Signed-off-by: Julian Wiedmann Signed-off-by: David S. Miller --- drivers/s390/net/qeth_core_main.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c index 18a0fb75a710..9636415f810b 100644 --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@ -4544,9 +4544,6 @@ static int qeth_setadpparms_set_access_ctrl_cb(struct qeth_card *card, int fallback = *(int *)reply->param; QETH_CARD_TEXT(card, 4, "setaccb"); - if (cmd->hdr.return_code) - return -EIO; - qeth_setadpparms_inspect_rc(cmd); access_ctrl_req = &cmd->data.setadapterparms.data.set_access_ctrl; QETH_CARD_TEXT_(card, 2, "rc=%d", @@ -4556,7 +4553,7 @@ static int qeth_setadpparms_set_access_ctrl_cb(struct qeth_card *card, QETH_DBF_MESSAGE(3, "ERR:SET_ACCESS_CTRL(%#x) on device %x: %#x\n", access_ctrl_req->subcmd_code, CARD_DEVID(card), cmd->data.setadapterparms.hdr.return_code); - switch (cmd->data.setadapterparms.hdr.return_code) { + switch (qeth_setadpparms_inspect_rc(cmd)) { case SET_ACCESS_CTRL_RC_SUCCESS: if (card->options.isolation == ISOLATION_MODE_NONE) { dev_info(&card->gdev->dev, From 8cebedb643832586162aa5313cae781173c1f81b Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Wed, 17 Jun 2020 16:54:53 +0200 Subject: [PATCH 290/462] s390/qeth: let isolation mode override HW offload restrictions When a device is configured with ISOLATION_MODE_FWD, traffic never goes through the internal switch. Don't apply the offload restrictions in this case. Fixes: c619e9a6f52f ("s390/qeth: don't use restricted offloads for local traffic") Signed-off-by: Julian Wiedmann Signed-off-by: David S. Miller --- drivers/s390/net/qeth_core_main.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c index 9636415f810b..88e998de2d03 100644 --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@ -6837,9 +6837,11 @@ netdev_features_t qeth_features_check(struct sk_buff *skb, struct net_device *dev, netdev_features_t features) { + struct qeth_card *card = dev->ml_priv; + /* Traffic with local next-hop is not eligible for some offloads: */ - if (skb->ip_summed == CHECKSUM_PARTIAL) { - struct qeth_card *card = dev->ml_priv; + if (skb->ip_summed == CHECKSUM_PARTIAL && + card->options.isolation != ISOLATION_MODE_FWD) { netdev_features_t restricted = 0; if (skb_is_gso(skb) && !netif_needs_gso(skb, features)) From 4c98045c9b74feab837be58986c0517d3cc661f1 Mon Sep 17 00:00:00 2001 From: Martin Date: Wed, 17 Jun 2020 22:30:23 +0530 Subject: [PATCH 291/462] bareudp: Fixed multiproto mode configuration Code to handle multiproto configuration is missing. Fixes: 4b5f67232d95 ("net: Special handling for IP & MPLS") Signed-off-by: Martin Signed-off-by: David S. Miller --- drivers/net/bareudp.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/bareudp.c b/drivers/net/bareudp.c index 5d3c691a1c66..3dd46cd55114 100644 --- a/drivers/net/bareudp.c +++ b/drivers/net/bareudp.c @@ -572,6 +572,9 @@ static int bareudp2info(struct nlattr *data[], struct bareudp_conf *conf, if (data[IFLA_BAREUDP_SRCPORT_MIN]) conf->sport_min = nla_get_u16(data[IFLA_BAREUDP_SRCPORT_MIN]); + if (data[IFLA_BAREUDP_MULTIPROTO_MODE]) + conf->multi_proto_mode = true; + return 0; } From 3a2656a211caf35e56afc9425e6e518fa52f7fbc Mon Sep 17 00:00:00 2001 From: David Christensen Date: Wed, 17 Jun 2020 11:51:17 -0700 Subject: [PATCH 292/462] tg3: driver sleeps indefinitely when EEH errors exceed eeh_max_freezes The driver function tg3_io_error_detected() calls napi_disable twice, without an intervening napi_enable, when the number of EEH errors exceeds eeh_max_freezes, resulting in an indefinite sleep while holding rtnl_lock. Add check for pcierr_recovery which skips code already executed for the "Frozen" state. Signed-off-by: David Christensen Reviewed-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/tg3.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c index 7a3b22b35238..ebff1fc0d8ce 100644 --- a/drivers/net/ethernet/broadcom/tg3.c +++ b/drivers/net/ethernet/broadcom/tg3.c @@ -18168,8 +18168,8 @@ static pci_ers_result_t tg3_io_error_detected(struct pci_dev *pdev, rtnl_lock(); - /* We probably don't have netdev yet */ - if (!netdev || !netif_running(netdev)) + /* Could be second call or maybe we don't have netdev yet */ + if (!netdev || tp->pcierr_recovery || !netif_running(netdev)) goto done; /* We needn't recover from permanent error */ From eddbf5d0204e550ee59de02bdc19fe90d4203dd6 Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Wed, 17 Jun 2020 20:42:47 +0000 Subject: [PATCH 293/462] net: ethtool: add missing NETIF_F_GSO_FRAGLIST feature string Commit 3b33583265ed ("net: Add fraglist GRO/GSO feature flags") missed an entry for NETIF_F_GSO_FRAGLIST in netdev_features_strings array. As a result, fraglist GSO feature is not shown in 'ethtool -k' output and can't be toggled on/off. The fix is trivial. Fixes: 3b33583265ed ("net: Add fraglist GRO/GSO feature flags") Signed-off-by: Alexander Lobakin Reviewed-by: Michal Kubecek Signed-off-by: David S. Miller --- net/ethtool/common.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/ethtool/common.c b/net/ethtool/common.c index 423e640e3876..47f63526818e 100644 --- a/net/ethtool/common.c +++ b/net/ethtool/common.c @@ -43,6 +43,7 @@ const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN] = { [NETIF_F_GSO_SCTP_BIT] = "tx-sctp-segmentation", [NETIF_F_GSO_ESP_BIT] = "tx-esp-segmentation", [NETIF_F_GSO_UDP_L4_BIT] = "tx-udp-segmentation", + [NETIF_F_GSO_FRAGLIST_BIT] = "tx-gso-list", [NETIF_F_FCOE_CRC_BIT] = "tx-checksum-fcoe-crc", [NETIF_F_SCTP_CRC_BIT] = "tx-checksum-sctp", From 8dbe4c5d5e40fe140221024f7b16bec9f310bf70 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Wed, 17 Jun 2020 20:42:44 -0700 Subject: [PATCH 294/462] net: dsa: bcm_sf2: Fix node reference count of_find_node_by_name() will do an of_node_put() on the "from" argument. With CONFIG_OF_DYNAMIC enabled which checks for device_node reference counts, we would be getting a warning like this: [ 6.347230] refcount_t: increment on 0; use-after-free. [ 6.352498] WARNING: CPU: 3 PID: 77 at lib/refcount.c:156 refcount_inc_checked+0x38/0x44 [ 6.360601] Modules linked in: [ 6.363661] CPU: 3 PID: 77 Comm: kworker/3:1 Tainted: G W 5.4.46-gb78b3e9956e6 #13 [ 6.372546] Hardware name: BCM97278SV (DT) [ 6.376649] Workqueue: events deferred_probe_work_func [ 6.381796] pstate: 60000005 (nZCv daif -PAN -UAO) [ 6.386595] pc : refcount_inc_checked+0x38/0x44 [ 6.391133] lr : refcount_inc_checked+0x38/0x44 ... [ 6.478791] Call trace: [ 6.481243] refcount_inc_checked+0x38/0x44 [ 6.485433] kobject_get+0x3c/0x4c [ 6.488840] of_node_get+0x24/0x34 [ 6.492247] of_irq_find_parent+0x3c/0xe0 [ 6.496263] of_irq_parse_one+0xe4/0x1d0 [ 6.500191] irq_of_parse_and_map+0x44/0x84 [ 6.504381] bcm_sf2_sw_probe+0x22c/0x844 [ 6.508397] platform_drv_probe+0x58/0xa8 [ 6.512413] really_probe+0x238/0x3fc [ 6.516081] driver_probe_device+0x11c/0x12c [ 6.520358] __device_attach_driver+0xa8/0x100 [ 6.524808] bus_for_each_drv+0xb4/0xd0 [ 6.528650] __device_attach+0xd0/0x164 [ 6.532493] device_initial_probe+0x24/0x30 [ 6.536682] bus_probe_device+0x38/0x98 [ 6.540524] deferred_probe_work_func+0xa8/0xd4 [ 6.545061] process_one_work+0x178/0x288 [ 6.549078] process_scheduled_works+0x44/0x48 [ 6.553529] worker_thread+0x218/0x270 [ 6.557285] kthread+0xdc/0xe4 [ 6.560344] ret_from_fork+0x10/0x18 [ 6.563925] ---[ end trace 68f65caf69bb152a ]--- Fix this by adding a of_node_get() to increment the reference count prior to the call. Fixes: afa3b592953b ("net: dsa: bcm_sf2: Ensure correct sub-node is parsed") Signed-off-by: Florian Fainelli Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/dsa/bcm_sf2.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c index c7ac63f41918..946e41f020a5 100644 --- a/drivers/net/dsa/bcm_sf2.c +++ b/drivers/net/dsa/bcm_sf2.c @@ -1147,6 +1147,8 @@ static int bcm_sf2_sw_probe(struct platform_device *pdev) set_bit(0, priv->cfp.used); set_bit(0, priv->cfp.unique); + /* Balance of_node_put() done by of_find_node_by_name() */ + of_node_get(dn); ports = of_find_node_by_name(dn, "ports"); if (ports) { bcm_sf2_identify_ports(priv, ports); From 0ad6f6e767ec2f613418cbc7ebe5ec4c35af540c Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 17 Jun 2020 22:23:25 -0700 Subject: [PATCH 295/462] net: increment xmit_recursion level in dev_direct_xmit() Back in commit f60e5990d9c1 ("ipv6: protect skb->sk accesses from recursive dereference inside the stack") Hannes added code so that IPv6 stack would not trust skb->sk for typical cases where packet goes through 'standard' xmit path (__dev_queue_xmit()) Alas af_packet had a dev_direct_xmit() path that was not dealing yet with xmit_recursion level. Also change sk_mc_loop() to dump a stack once only. Without this patch, syzbot was able to trigger : [1] [ 153.567378] WARNING: CPU: 7 PID: 11273 at net/core/sock.c:721 sk_mc_loop+0x51/0x70 [ 153.567378] Modules linked in: nfnetlink ip6table_raw ip6table_filter iptable_raw iptable_nat nf_nat nf_conntrack nf_defrag_ipv4 nf_defrag_ipv6 iptable_filter macsec macvtap tap macvlan 8021q hsr wireguard libblake2s blake2s_x86_64 libblake2s_generic udp_tunnel ip6_udp_tunnel libchacha20poly1305 poly1305_x86_64 chacha_x86_64 libchacha curve25519_x86_64 libcurve25519_generic netdevsim batman_adv dummy team bridge stp llc w1_therm wire i2c_mux_pca954x i2c_mux cdc_acm ehci_pci ehci_hcd mlx4_en mlx4_ib ib_uverbs ib_core mlx4_core [ 153.567386] CPU: 7 PID: 11273 Comm: b159172088 Not tainted 5.8.0-smp-DEV #273 [ 153.567387] RIP: 0010:sk_mc_loop+0x51/0x70 [ 153.567388] Code: 66 83 f8 0a 75 24 0f b6 4f 12 b8 01 00 00 00 31 d2 d3 e0 a9 bf ef ff ff 74 07 48 8b 97 f0 02 00 00 0f b6 42 3a 83 e0 01 5d c3 <0f> 0b b8 01 00 00 00 5d c3 0f b6 87 18 03 00 00 5d c0 e8 04 83 e0 [ 153.567388] RSP: 0018:ffff95c69bb93990 EFLAGS: 00010212 [ 153.567388] RAX: 0000000000000011 RBX: ffff95c6e0ee3e00 RCX: 0000000000000007 [ 153.567389] RDX: ffff95c69ae50000 RSI: ffff95c6c30c3000 RDI: ffff95c6c30c3000 [ 153.567389] RBP: ffff95c69bb93990 R08: ffff95c69a77f000 R09: 0000000000000008 [ 153.567389] R10: 0000000000000040 R11: 00003e0e00026128 R12: ffff95c6c30c3000 [ 153.567390] R13: ffff95c6cc4fd500 R14: ffff95c6f84500c0 R15: ffff95c69aa13c00 [ 153.567390] FS: 00007fdc3a283700(0000) GS:ffff95c6ff9c0000(0000) knlGS:0000000000000000 [ 153.567390] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 153.567391] CR2: 00007ffee758e890 CR3: 0000001f9ba20003 CR4: 00000000001606e0 [ 153.567391] Call Trace: [ 153.567391] ip6_finish_output2+0x34e/0x550 [ 153.567391] __ip6_finish_output+0xe7/0x110 [ 153.567391] ip6_finish_output+0x2d/0xb0 [ 153.567392] ip6_output+0x77/0x120 [ 153.567392] ? __ip6_finish_output+0x110/0x110 [ 153.567392] ip6_local_out+0x3d/0x50 [ 153.567392] ipvlan_queue_xmit+0x56c/0x5e0 [ 153.567393] ? ksize+0x19/0x30 [ 153.567393] ipvlan_start_xmit+0x18/0x50 [ 153.567393] dev_direct_xmit+0xf3/0x1c0 [ 153.567393] packet_direct_xmit+0x69/0xa0 [ 153.567394] packet_sendmsg+0xbf0/0x19b0 [ 153.567394] ? plist_del+0x62/0xb0 [ 153.567394] sock_sendmsg+0x65/0x70 [ 153.567394] sock_write_iter+0x93/0xf0 [ 153.567394] new_sync_write+0x18e/0x1a0 [ 153.567395] __vfs_write+0x29/0x40 [ 153.567395] vfs_write+0xb9/0x1b0 [ 153.567395] ksys_write+0xb1/0xe0 [ 153.567395] __x64_sys_write+0x1a/0x20 [ 153.567395] do_syscall_64+0x43/0x70 [ 153.567396] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 153.567396] RIP: 0033:0x453549 [ 153.567396] Code: Bad RIP value. [ 153.567396] RSP: 002b:00007fdc3a282cc8 EFLAGS: 00000246 ORIG_RAX: 0000000000000001 [ 153.567397] RAX: ffffffffffffffda RBX: 00000000004d32d0 RCX: 0000000000453549 [ 153.567397] RDX: 0000000000000020 RSI: 0000000020000300 RDI: 0000000000000003 [ 153.567398] RBP: 00000000004d32d8 R08: 0000000000000000 R09: 0000000000000000 [ 153.567398] R10: 0000000000000000 R11: 0000000000000246 R12: 00000000004d32dc [ 153.567398] R13: 00007ffee742260f R14: 00007fdc3a282dc0 R15: 00007fdc3a283700 [ 153.567399] ---[ end trace c1d5ae2b1059ec62 ]--- f60e5990d9c1 ("ipv6: protect skb->sk accesses from recursive dereference inside the stack") Signed-off-by: Eric Dumazet Reported-by: syzbot Signed-off-by: David S. Miller --- net/core/dev.c | 2 ++ net/core/sock.c | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/net/core/dev.c b/net/core/dev.c index 44a14b41ad82..90b59fc50dc9 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4192,10 +4192,12 @@ int dev_direct_xmit(struct sk_buff *skb, u16 queue_id) local_bh_disable(); + dev_xmit_recursion_inc(); HARD_TX_LOCK(dev, txq, smp_processor_id()); if (!netif_xmit_frozen_or_drv_stopped(txq)) ret = netdev_start_xmit(skb, dev, txq, false); HARD_TX_UNLOCK(dev, txq); + dev_xmit_recursion_dec(); local_bh_enable(); diff --git a/net/core/sock.c b/net/core/sock.c index 6c4acf1f0220..94391da27754 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -718,7 +718,7 @@ bool sk_mc_loop(struct sock *sk) return inet6_sk(sk)->mc_loop; #endif } - WARN_ON(1); + WARN_ON_ONCE(1); return true; } EXPORT_SYMBOL(sk_mc_loop); From f140ad9fe2ae16f385f8fe4dc9cf67bb4c51d794 Mon Sep 17 00:00:00 2001 From: Ciara Loftus Date: Tue, 9 Jun 2020 13:19:43 +0000 Subject: [PATCH 296/462] ixgbe: protect ring accesses with READ- and WRITE_ONCE READ_ONCE should be used when reading rings prior to accessing the statistics pointer. Introduce this as well as the corresponding WRITE_ONCE usage when allocating and freeing the rings, to ensure protected access. Signed-off-by: Ciara Loftus Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c | 12 ++++++------ drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 14 +++++++++++--- 2 files changed, 17 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c index fd9f5d41b594..2e35c5706cf1 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c @@ -921,7 +921,7 @@ static int ixgbe_alloc_q_vector(struct ixgbe_adapter *adapter, ring->queue_index = txr_idx; /* assign ring to adapter */ - adapter->tx_ring[txr_idx] = ring; + WRITE_ONCE(adapter->tx_ring[txr_idx], ring); /* update count and index */ txr_count--; @@ -948,7 +948,7 @@ static int ixgbe_alloc_q_vector(struct ixgbe_adapter *adapter, set_ring_xdp(ring); /* assign ring to adapter */ - adapter->xdp_ring[xdp_idx] = ring; + WRITE_ONCE(adapter->xdp_ring[xdp_idx], ring); /* update count and index */ xdp_count--; @@ -991,7 +991,7 @@ static int ixgbe_alloc_q_vector(struct ixgbe_adapter *adapter, ring->queue_index = rxr_idx; /* assign ring to adapter */ - adapter->rx_ring[rxr_idx] = ring; + WRITE_ONCE(adapter->rx_ring[rxr_idx], ring); /* update count and index */ rxr_count--; @@ -1020,13 +1020,13 @@ static void ixgbe_free_q_vector(struct ixgbe_adapter *adapter, int v_idx) ixgbe_for_each_ring(ring, q_vector->tx) { if (ring_is_xdp(ring)) - adapter->xdp_ring[ring->queue_index] = NULL; + WRITE_ONCE(adapter->xdp_ring[ring->queue_index], NULL); else - adapter->tx_ring[ring->queue_index] = NULL; + WRITE_ONCE(adapter->tx_ring[ring->queue_index], NULL); } ixgbe_for_each_ring(ring, q_vector->rx) - adapter->rx_ring[ring->queue_index] = NULL; + WRITE_ONCE(adapter->rx_ring[ring->queue_index], NULL); adapter->q_vector[v_idx] = NULL; napi_hash_del(&q_vector->napi); diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index f162b8b8f345..97a423ecf808 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -7051,7 +7051,10 @@ void ixgbe_update_stats(struct ixgbe_adapter *adapter) } for (i = 0; i < adapter->num_rx_queues; i++) { - struct ixgbe_ring *rx_ring = adapter->rx_ring[i]; + struct ixgbe_ring *rx_ring = READ_ONCE(adapter->rx_ring[i]); + + if (!rx_ring) + continue; non_eop_descs += rx_ring->rx_stats.non_eop_descs; alloc_rx_page += rx_ring->rx_stats.alloc_rx_page; alloc_rx_page_failed += rx_ring->rx_stats.alloc_rx_page_failed; @@ -7072,15 +7075,20 @@ void ixgbe_update_stats(struct ixgbe_adapter *adapter) packets = 0; /* gather some stats to the adapter struct that are per queue */ for (i = 0; i < adapter->num_tx_queues; i++) { - struct ixgbe_ring *tx_ring = adapter->tx_ring[i]; + struct ixgbe_ring *tx_ring = READ_ONCE(adapter->tx_ring[i]); + + if (!tx_ring) + continue; restart_queue += tx_ring->tx_stats.restart_queue; tx_busy += tx_ring->tx_stats.tx_busy; bytes += tx_ring->stats.bytes; packets += tx_ring->stats.packets; } for (i = 0; i < adapter->num_xdp_queues; i++) { - struct ixgbe_ring *xdp_ring = adapter->xdp_ring[i]; + struct ixgbe_ring *xdp_ring = READ_ONCE(adapter->xdp_ring[i]); + if (!xdp_ring) + continue; restart_queue += xdp_ring->tx_stats.restart_queue; tx_busy += xdp_ring->tx_stats.tx_busy; bytes += xdp_ring->stats.bytes; From d59e267912cd90b0adf33b4659050d831e746317 Mon Sep 17 00:00:00 2001 From: Ciara Loftus Date: Tue, 9 Jun 2020 13:19:44 +0000 Subject: [PATCH 297/462] i40e: protect ring accesses with READ- and WRITE_ONCE READ_ONCE should be used when reading rings prior to accessing the statistics pointer. Introduce this as well as the corresponding WRITE_ONCE usage when allocating and freeing the rings, to ensure protected access. Signed-off-by: Ciara Loftus Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_main.c | 29 ++++++++++++++------- 1 file changed, 19 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 5d807c8004f8..56ecd6c3f236 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -439,11 +439,15 @@ static void i40e_get_netdev_stats_struct(struct net_device *netdev, i40e_get_netdev_stats_struct_tx(ring, stats); if (i40e_enabled_xdp_vsi(vsi)) { - ring++; + ring = READ_ONCE(vsi->xdp_rings[i]); + if (!ring) + continue; i40e_get_netdev_stats_struct_tx(ring, stats); } - ring++; + ring = READ_ONCE(vsi->rx_rings[i]); + if (!ring) + continue; do { start = u64_stats_fetch_begin_irq(&ring->syncp); packets = ring->stats.packets; @@ -787,6 +791,8 @@ static void i40e_update_vsi_stats(struct i40e_vsi *vsi) for (q = 0; q < vsi->num_queue_pairs; q++) { /* locate Tx ring */ p = READ_ONCE(vsi->tx_rings[q]); + if (!p) + continue; do { start = u64_stats_fetch_begin_irq(&p->syncp); @@ -800,8 +806,11 @@ static void i40e_update_vsi_stats(struct i40e_vsi *vsi) tx_linearize += p->tx_stats.tx_linearize; tx_force_wb += p->tx_stats.tx_force_wb; - /* Rx queue is part of the same block as Tx queue */ - p = &p[1]; + /* locate Rx ring */ + p = READ_ONCE(vsi->rx_rings[q]); + if (!p) + continue; + do { start = u64_stats_fetch_begin_irq(&p->syncp); packets = p->stats.packets; @@ -10824,10 +10833,10 @@ static void i40e_vsi_clear_rings(struct i40e_vsi *vsi) if (vsi->tx_rings && vsi->tx_rings[0]) { for (i = 0; i < vsi->alloc_queue_pairs; i++) { kfree_rcu(vsi->tx_rings[i], rcu); - vsi->tx_rings[i] = NULL; - vsi->rx_rings[i] = NULL; + WRITE_ONCE(vsi->tx_rings[i], NULL); + WRITE_ONCE(vsi->rx_rings[i], NULL); if (vsi->xdp_rings) - vsi->xdp_rings[i] = NULL; + WRITE_ONCE(vsi->xdp_rings[i], NULL); } } } @@ -10861,7 +10870,7 @@ static int i40e_alloc_rings(struct i40e_vsi *vsi) if (vsi->back->hw_features & I40E_HW_WB_ON_ITR_CAPABLE) ring->flags = I40E_TXR_FLAGS_WB_ON_ITR; ring->itr_setting = pf->tx_itr_default; - vsi->tx_rings[i] = ring++; + WRITE_ONCE(vsi->tx_rings[i], ring++); if (!i40e_enabled_xdp_vsi(vsi)) goto setup_rx; @@ -10879,7 +10888,7 @@ static int i40e_alloc_rings(struct i40e_vsi *vsi) ring->flags = I40E_TXR_FLAGS_WB_ON_ITR; set_ring_xdp(ring); ring->itr_setting = pf->tx_itr_default; - vsi->xdp_rings[i] = ring++; + WRITE_ONCE(vsi->xdp_rings[i], ring++); setup_rx: ring->queue_index = i; @@ -10892,7 +10901,7 @@ setup_rx: ring->size = 0; ring->dcb_tc = 0; ring->itr_setting = pf->rx_itr_default; - vsi->rx_rings[i] = ring; + WRITE_ONCE(vsi->rx_rings[i], ring); } return 0; From b1d95cc2391ffac0c5b27256a4fb0d2cfb021a29 Mon Sep 17 00:00:00 2001 From: Ciara Loftus Date: Tue, 9 Jun 2020 13:19:45 +0000 Subject: [PATCH 298/462] ice: protect ring accesses with WRITE_ONCE The READ_ONCE macro is used when reading rings prior to accessing the statistics pointer. The corresponding WRITE_ONCE usage when allocating and freeing the rings to ensure protected access was not in place. Introduce this. Signed-off-by: Ciara Loftus Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ice/ice_lib.c | 8 ++++---- drivers/net/ethernet/intel/ice/ice_main.c | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c index 28b46cc9f5cb..2e3a39cea2c0 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_lib.c @@ -1194,7 +1194,7 @@ static void ice_vsi_clear_rings(struct ice_vsi *vsi) for (i = 0; i < vsi->alloc_txq; i++) { if (vsi->tx_rings[i]) { kfree_rcu(vsi->tx_rings[i], rcu); - vsi->tx_rings[i] = NULL; + WRITE_ONCE(vsi->tx_rings[i], NULL); } } } @@ -1202,7 +1202,7 @@ static void ice_vsi_clear_rings(struct ice_vsi *vsi) for (i = 0; i < vsi->alloc_rxq; i++) { if (vsi->rx_rings[i]) { kfree_rcu(vsi->rx_rings[i], rcu); - vsi->rx_rings[i] = NULL; + WRITE_ONCE(vsi->rx_rings[i], NULL); } } } @@ -1235,7 +1235,7 @@ static int ice_vsi_alloc_rings(struct ice_vsi *vsi) ring->vsi = vsi; ring->dev = dev; ring->count = vsi->num_tx_desc; - vsi->tx_rings[i] = ring; + WRITE_ONCE(vsi->tx_rings[i], ring); } /* Allocate Rx rings */ @@ -1254,7 +1254,7 @@ static int ice_vsi_alloc_rings(struct ice_vsi *vsi) ring->netdev = vsi->netdev; ring->dev = dev; ring->count = vsi->num_rx_desc; - vsi->rx_rings[i] = ring; + WRITE_ONCE(vsi->rx_rings[i], ring); } return 0; diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 082825e3cb39..4cbd49c87568 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -1702,7 +1702,7 @@ static int ice_xdp_alloc_setup_rings(struct ice_vsi *vsi) xdp_ring->netdev = NULL; xdp_ring->dev = dev; xdp_ring->count = vsi->num_tx_desc; - vsi->xdp_rings[i] = xdp_ring; + WRITE_ONCE(vsi->xdp_rings[i], xdp_ring); if (ice_setup_tx_ring(xdp_ring)) goto free_xdp_rings; ice_set_ring_xdp(xdp_ring); From 3995ecbabc6f8019b3a55aeb551eeb9598b8b354 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20T=C3=B6pel?= Date: Fri, 12 Jun 2020 13:47:31 +0200 Subject: [PATCH 299/462] i40e: fix crash when Rx descriptor count is changed MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When the AF_XDP buffer allocator was introduced, the Rx SW ring "rx_bi" allocation was moved from i40e_setup_rx_descriptors() function, and was instead done in the i40e_configure_rx_ring() function. This broke the ethtool set_ringparam() hook for changing the Rx descriptor count, which was relying on i40e_setup_rx_descriptors() to handle the allocation. Fix this by adding an explicit i40e_alloc_rx_bi() call to i40e_set_ringparam(). Fixes: be1222b585fd ("i40e: Separate kernel allocated rx_bi rings from AF_XDP rings") Signed-off-by: Björn Töpel Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_ethtool.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c index aa8026b1eb81..67806b7b2f49 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c +++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c @@ -2070,6 +2070,9 @@ static int i40e_set_ringparam(struct net_device *netdev, */ rx_rings[i].tail = hw->hw_addr + I40E_PRTGEN_STATUS; err = i40e_setup_rx_descriptors(&rx_rings[i]); + if (err) + goto rx_unwind; + err = i40e_alloc_rx_bi(&rx_rings[i]); if (err) goto rx_unwind; From f78d4032de60f50fd4afaa0fb68ea03b985f820a Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Mon, 15 Jun 2020 09:58:10 +0200 Subject: [PATCH 300/462] drm: encoder_slave: fix refcouting error for modules module_put() balances try_module_get(), not request_module(). Fix the error path to match that. Fixes: 2066facca4c7 ("drm/kms: slave encoder interface.") Signed-off-by: Wolfram Sang Reviewed-by: Emil Velikov Acked-by: Daniel Vetter Signed-off-by: Wolfram Sang --- drivers/gpu/drm/drm_encoder_slave.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/drm_encoder_slave.c b/drivers/gpu/drm/drm_encoder_slave.c index cf804389f5ec..d50a7884e69e 100644 --- a/drivers/gpu/drm/drm_encoder_slave.c +++ b/drivers/gpu/drm/drm_encoder_slave.c @@ -84,7 +84,7 @@ int drm_i2c_encoder_init(struct drm_device *dev, err = encoder_drv->encoder_init(client, dev, encoder); if (err) - goto fail_unregister; + goto fail_module_put; if (info->platform_data) encoder->slave_funcs->set_config(&encoder->base, @@ -92,9 +92,10 @@ int drm_i2c_encoder_init(struct drm_device *dev, return 0; +fail_module_put: + module_put(module); fail_unregister: i2c_unregister_device(client); - module_put(module); fail: return err; } From bb7d93fff62f32f4ff0072b2ace695bd5f5137e7 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Mon, 15 Jun 2020 09:58:11 +0200 Subject: [PATCH 301/462] drm: encoder_slave: use new I2C API i2c_new_client() is deprecated, use the replacement i2c_new_client_device(). Also, we have a helper to check if a driver is bound. Use it to simplify the code. Note that this changes the errno for a failed device creation from ENOMEM to ENODEV. No callers currently interpret this errno, though, so we use this condensed error check. Signed-off-by: Wolfram Sang Reviewed-by: Emil Velikov Acked-by: Daniel Vetter Signed-off-by: Wolfram Sang --- drivers/gpu/drm/drm_encoder_slave.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/drm_encoder_slave.c b/drivers/gpu/drm/drm_encoder_slave.c index d50a7884e69e..e464429d32df 100644 --- a/drivers/gpu/drm/drm_encoder_slave.c +++ b/drivers/gpu/drm/drm_encoder_slave.c @@ -61,13 +61,8 @@ int drm_i2c_encoder_init(struct drm_device *dev, request_module("%s%s", I2C_MODULE_PREFIX, info->type); - client = i2c_new_device(adap, info); - if (!client) { - err = -ENOMEM; - goto fail; - } - - if (!client->dev.driver) { + client = i2c_new_client_device(adap, info); + if (!i2c_client_has_driver(client)) { err = -ENODEV; goto fail_unregister; } @@ -96,7 +91,6 @@ fail_module_put: module_put(module); fail_unregister: i2c_unregister_device(client); -fail: return err; } EXPORT_SYMBOL(drm_i2c_encoder_init); From f04a5ba1752512a46ffb61b88a8fa7d4ab7e02f3 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Mon, 15 Jun 2020 09:58:12 +0200 Subject: [PATCH 302/462] x86/platform/intel-mid: convert to use i2c_new_client_device() Move away from the deprecated API and return the shiny new ERRPTR where useful. Signed-off-by: Wolfram Sang Reviewed-by: Andy Shevchenko Signed-off-by: Wolfram Sang --- arch/x86/platform/intel-mid/sfi.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/platform/intel-mid/sfi.c b/arch/x86/platform/intel-mid/sfi.c index b8f7f193f383..30bd5714a3d4 100644 --- a/arch/x86/platform/intel-mid/sfi.c +++ b/arch/x86/platform/intel-mid/sfi.c @@ -287,8 +287,8 @@ void intel_scu_devices_create(void) adapter = i2c_get_adapter(i2c_bus[i]); if (adapter) { - client = i2c_new_device(adapter, i2c_devs[i]); - if (!client) + client = i2c_new_client_device(adapter, i2c_devs[i]); + if (IS_ERR(client)) pr_err("can't create i2c device %s\n", i2c_devs[i]->type); } else From 9e1b93b9f624e7f8ae836c47ee68dc2d732f2957 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Mon, 15 Jun 2020 09:58:13 +0200 Subject: [PATCH 303/462] video: backlight: tosa_lcd: convert to use i2c_new_client_device() Move away from the deprecated API and return the shiny new ERRPTR where useful. Signed-off-by: Wolfram Sang Reviewed-by: Daniel Thompson Acked-by: Lee Jones Signed-off-by: Wolfram Sang --- drivers/video/backlight/tosa_lcd.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/video/backlight/tosa_lcd.c b/drivers/video/backlight/tosa_lcd.c index e8ab583e5098..113116d3585c 100644 --- a/drivers/video/backlight/tosa_lcd.c +++ b/drivers/video/backlight/tosa_lcd.c @@ -107,7 +107,7 @@ static void tosa_lcd_tg_on(struct tosa_lcd_data *data) /* TG LCD GVSS */ tosa_tg_send(spi, TG_PINICTL, 0x0); - if (!data->i2c) { + if (IS_ERR_OR_NULL(data->i2c)) { /* * after the pannel is powered up the first time, * we can access the i2c bus so probe for the DAC @@ -119,7 +119,7 @@ static void tosa_lcd_tg_on(struct tosa_lcd_data *data) .addr = DAC_BASE, .platform_data = data->spi, }; - data->i2c = i2c_new_device(adap, &info); + data->i2c = i2c_new_client_device(adap, &info); } } From bc5a3e44af7edb2fe3a678dde104aab763bd1293 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Mon, 15 Jun 2020 09:58:14 +0200 Subject: [PATCH 304/462] Documentation: media: convert to use i2c_new_client_device() Move away from the deprecated API and advertise the new one. Signed-off-by: Wolfram Sang Reviewed-by: Mauro Carvalho Chehab Signed-off-by: Wolfram Sang --- Documentation/driver-api/media/v4l2-subdev.rst | 2 +- Documentation/userspace-api/media/conf_nitpick.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/driver-api/media/v4l2-subdev.rst b/Documentation/driver-api/media/v4l2-subdev.rst index 6e71f67455bb..bc7e1fc40a9d 100644 --- a/Documentation/driver-api/media/v4l2-subdev.rst +++ b/Documentation/driver-api/media/v4l2-subdev.rst @@ -451,7 +451,7 @@ The bridge driver also has some helper functions it can use: "module_foo", "chipid", 0x36, NULL); This loads the given module (can be ``NULL`` if no module needs to be loaded) -and calls :c:func:`i2c_new_device` with the given ``i2c_adapter`` and +and calls :c:func:`i2c_new_client_device` with the given ``i2c_adapter`` and chip/address arguments. If all goes well, then it registers the subdev with the v4l2_device. diff --git a/Documentation/userspace-api/media/conf_nitpick.py b/Documentation/userspace-api/media/conf_nitpick.py index d0c50d75f518..0a8e236d07ab 100644 --- a/Documentation/userspace-api/media/conf_nitpick.py +++ b/Documentation/userspace-api/media/conf_nitpick.py @@ -27,7 +27,7 @@ nitpick_ignore = [ ("c:func", "copy_to_user"), ("c:func", "determine_valid_ioctls"), ("c:func", "ERR_PTR"), - ("c:func", "i2c_new_device"), + ("c:func", "i2c_new_client_device"), ("c:func", "ioctl"), ("c:func", "IS_ERR"), ("c:func", "KERNEL_VERSION"), From 390fd0475af565d2fc31de98fcc84f3c2922e008 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Mon, 15 Jun 2020 09:58:15 +0200 Subject: [PATCH 305/462] i2c: remove deprecated i2c_new_device API All in-tree users have been converted to the new i2c_new_client_device function, so remove this deprecated one. Signed-off-by: Wolfram Sang Signed-off-by: Wolfram Sang --- drivers/i2c/i2c-core-base.c | 25 ------------------------- include/linux/i2c.h | 8 +++----- 2 files changed, 3 insertions(+), 30 deletions(-) diff --git a/drivers/i2c/i2c-core-base.c b/drivers/i2c/i2c-core-base.c index d1f278f73011..26f03a14a478 100644 --- a/drivers/i2c/i2c-core-base.c +++ b/drivers/i2c/i2c-core-base.c @@ -815,31 +815,6 @@ out_err_silent: } EXPORT_SYMBOL_GPL(i2c_new_client_device); -/** - * i2c_new_device - instantiate an i2c device - * @adap: the adapter managing the device - * @info: describes one I2C device; bus_num is ignored - * Context: can sleep - * - * This deprecated function has the same functionality as - * @i2c_new_client_device, it just returns NULL instead of an ERR_PTR in case of - * an error for compatibility with current I2C API. It will be removed once all - * users are converted. - * - * This returns the new i2c client, which may be saved for later use with - * i2c_unregister_device(); or NULL to indicate an error. - */ -struct i2c_client * -i2c_new_device(struct i2c_adapter *adap, struct i2c_board_info const *info) -{ - struct i2c_client *ret; - - ret = i2c_new_client_device(adap, info); - return IS_ERR(ret) ? NULL : ret; -} -EXPORT_SYMBOL_GPL(i2c_new_device); - - /** * i2c_unregister_device - reverse effect of i2c_new_*_device() * @client: value returned from i2c_new_*_device() diff --git a/include/linux/i2c.h b/include/linux/i2c.h index c10617bb980a..b8b8963f8bb9 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -408,7 +408,7 @@ static inline bool i2c_detect_slave_mode(struct device *dev) { return false; } * that are present. This information is used to grow the driver model tree. * For mainboards this is done statically using i2c_register_board_info(); * bus numbers identify adapters that aren't yet available. For add-on boards, - * i2c_new_device() does this dynamically with the adapter already known. + * i2c_new_client_device() does this dynamically with the adapter already known. */ struct i2c_board_info { char type[I2C_NAME_SIZE]; @@ -439,13 +439,11 @@ struct i2c_board_info { #if IS_ENABLED(CONFIG_I2C) -/* Add-on boards should register/unregister their devices; e.g. a board +/* + * Add-on boards should register/unregister their devices; e.g. a board * with integrated I2C, a config eeprom, sensors, and a codec that's * used in conjunction with the primary hardware. */ -struct i2c_client * -i2c_new_device(struct i2c_adapter *adap, struct i2c_board_info const *info); - struct i2c_client * i2c_new_client_device(struct i2c_adapter *adap, struct i2c_board_info const *info); From a5765124e697e67df01420f9ed8f9a851097d680 Mon Sep 17 00:00:00 2001 From: Daniel Schaefer Date: Sun, 14 Jun 2020 20:23:55 +0200 Subject: [PATCH 306/462] Documentation/i2c: SMBus start signal is S not A Just like all other I2C/SMBus commands, the start signal for the SMBus Quick Command is S, not A. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Daniel Schaefer Signed-off-by: Wolfram Sang --- Documentation/i2c/smbus-protocol.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/i2c/smbus-protocol.rst b/Documentation/i2c/smbus-protocol.rst index c2e29633071e..64689d19dd51 100644 --- a/Documentation/i2c/smbus-protocol.rst +++ b/Documentation/i2c/smbus-protocol.rst @@ -57,7 +57,7 @@ SMBus Quick Command This sends a single bit to the device, at the place of the Rd/Wr bit:: - A Addr Rd/Wr [A] P + S Addr Rd/Wr [A] P Functionality flag: I2C_FUNC_SMBUS_QUICK From a23ff37b32c7e682b42feab651c83a18c34e75eb Mon Sep 17 00:00:00 2001 From: Keyur Patel Date: Fri, 12 Jun 2020 17:26:35 -0400 Subject: [PATCH 307/462] i2c: smbus: Fix spelling mistake in the comments Fix spelling mistake in the comments with help of `codespell`. seperate ==> separate Signed-off-by: Keyur Patel Signed-off-by: Wolfram Sang --- drivers/i2c/i2c-core-smbus.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/i2c/i2c-core-smbus.c b/drivers/i2c/i2c-core-smbus.c index b34d2ff06931..56bb840142e3 100644 --- a/drivers/i2c/i2c-core-smbus.c +++ b/drivers/i2c/i2c-core-smbus.c @@ -4,7 +4,7 @@ * * This file contains the SMBus functions which are always included in the I2C * core because they can be emulated via I2C. SMBus specific extensions - * (e.g. smbalert) are handled in a seperate i2c-smbus module. + * (e.g. smbalert) are handled in a separate i2c-smbus module. * * All SMBus-related things are written by Frodo Looijaard * SMBus 2.0 support by Mark Studebaker and From 49097762fa405cdc16f8f597f6d27c078d4a31e9 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Fri, 19 Jun 2020 11:40:46 +0200 Subject: [PATCH 308/462] Revert "KVM: VMX: Micro-optimize vmexit time when not exposing PMU" Guest crashes are observed on a Cascade Lake system when 'perf top' is launched on the host, e.g. BUG: unable to handle kernel paging request at fffffe0000073038 PGD 7ffa7067 P4D 7ffa7067 PUD 7ffa6067 PMD 7ffa5067 PTE ffffffffff120 Oops: 0000 [#1] SMP PTI CPU: 1 PID: 1 Comm: systemd Not tainted 4.18.0+ #380 ... Call Trace: serial8250_console_write+0xfe/0x1f0 call_console_drivers.constprop.0+0x9d/0x120 console_unlock+0x1ea/0x460 Call traces are different but the crash is imminent. The problem was blindly bisected to the commit 041bc42ce2d0 ("KVM: VMX: Micro-optimize vmexit time when not exposing PMU"). It was also confirmed that the issue goes away if PMU is exposed to the guest. With some instrumentation of the guest we can see what is being switched (when we do atomic_switch_perf_msrs()): vmx_vcpu_run: switching 2 msrs vmx_vcpu_run: switching MSR38f guest: 70000000d host: 70000000f vmx_vcpu_run: switching MSR3f1 guest: 0 host: 2 The current guess is that PEBS (MSR_IA32_PEBS_ENABLE, 0x3f1) is to blame. Regardless of whether PMU is exposed to the guest or not, PEBS needs to be disabled upon switch. This reverts commit 041bc42ce2d0efac3b85bbb81dea8c74b81f4ef9. Reported-by: Maxime Coquelin Signed-off-by: Vitaly Kuznetsov Message-Id: <20200619094046.654019-1-vkuznets@redhat.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/vmx.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 36c771728c8c..b1a23ad986ff 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -6728,8 +6728,7 @@ reenter_guest: pt_guest_enter(vmx); - if (vcpu_to_pmu(vcpu)->version) - atomic_switch_perf_msrs(vmx); + atomic_switch_perf_msrs(vmx); atomic_switch_umwait_control_msr(vmx); if (enable_preemption_timer) From a17f4bed811c60712d8131883cdba11a105d0161 Mon Sep 17 00:00:00 2001 From: Fan Guo Date: Fri, 12 Jun 2020 14:38:24 +0800 Subject: [PATCH 309/462] RDMA/mad: Fix possible memory leak in ib_mad_post_receive_mads() If ib_dma_mapping_error() returns non-zero value, ib_mad_post_receive_mads() will jump out of loops and return -ENOMEM without freeing mad_priv. Fix this memory-leak problem by freeing mad_priv in this case. Fixes: 2c34e68f4261 ("IB/mad: Check and handle potential DMA mapping errors") Link: https://lore.kernel.org/r/20200612063824.180611-1-guofan5@huawei.com Signed-off-by: Fan Guo Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/mad.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c index 186e0d652e8b..5e080191a725 100644 --- a/drivers/infiniband/core/mad.c +++ b/drivers/infiniband/core/mad.c @@ -2718,6 +2718,7 @@ static int ib_mad_post_receive_mads(struct ib_mad_qp_info *qp_info, DMA_FROM_DEVICE); if (unlikely(ib_dma_mapping_error(qp_info->port_priv->device, sg_list.addr))) { + kfree(mad_priv); ret = -ENOMEM; break; } From 28f9f8fb4f405ade488058f817b6cbd108e45e4e Mon Sep 17 00:00:00 2001 From: Loic Poulain Date: Fri, 19 Jun 2020 12:17:44 +0200 Subject: [PATCH 310/462] MAINTAINERS: Add robert and myself as qcom i2c cci maintainers Signed-off-by: Loic Poulain Signed-off-by: Robert Foss [wsa: kept sorting] Signed-off-by: Wolfram Sang --- MAINTAINERS | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 68f21d46614c..bcc9703d2550 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -14192,6 +14192,15 @@ L: dmaengine@vger.kernel.org S: Supported F: drivers/dma/qcom/hidma* +QUALCOMM I2C CCI DRIVER +M: Loic Poulain +M: Robert Foss +L: linux-i2c@vger.kernel.org +L: linux-arm-msm@vger.kernel.org +S: Maintained +F: Documentation/devicetree/bindings/i2c/i2c-qcom-cci.txt +F: drivers/i2c/busses/i2c-qcom-cci.c + QUALCOMM IOMMU M: Rob Clark L: iommu@lists.linux-foundation.org From 24f5aa53afbf72ffd7d2587cd2edd44342e0270f Mon Sep 17 00:00:00 2001 From: Flavio Suligoi Date: Fri, 19 Jun 2020 11:02:04 +0200 Subject: [PATCH 311/462] net: ethernet: neterion: vxge: fix spelling mistake Fix typo: "trigered" --> "triggered" Signed-off-by: Flavio Suligoi Signed-off-by: David S. Miller --- drivers/net/ethernet/neterion/vxge/vxge-config.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/neterion/vxge/vxge-config.h b/drivers/net/ethernet/neterion/vxge/vxge-config.h index 628fa9b2f741..373165119850 100644 --- a/drivers/net/ethernet/neterion/vxge/vxge-config.h +++ b/drivers/net/ethernet/neterion/vxge/vxge-config.h @@ -297,7 +297,7 @@ struct vxge_hw_fifo_config { * @greedy_return: If Set it forces the device to return absolutely all RxD * that are consumed and still on board when a timer interrupt * triggers. If Clear, then if the device has already returned - * RxD before current timer interrupt trigerred and after the + * RxD before current timer interrupt triggered and after the * previous timer interrupt triggered, then the device is not * forced to returned the rest of the consumed RxD that it has * on board which account for a byte count less than the one From e2c0b9712452296cb50a9faf07cc688862238f24 Mon Sep 17 00:00:00 2001 From: Stefan Schmidt Date: Tue, 16 Jun 2020 08:47:32 +0200 Subject: [PATCH 312/462] docs: net: ieee802154: change link to new project URL We finally came around to setup a new project website. Update the reference here. Signed-off-by: Stefan Schmidt --- Documentation/networking/ieee802154.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/networking/ieee802154.rst b/Documentation/networking/ieee802154.rst index 36ca823a1122..6f4bf8447a21 100644 --- a/Documentation/networking/ieee802154.rst +++ b/Documentation/networking/ieee802154.rst @@ -30,8 +30,8 @@ Socket API The address family, socket addresses etc. are defined in the include/net/af_ieee802154.h header or in the special header -in the userspace package (see either http://wpan.cakelab.org/ or the -git tree at https://github.com/linux-wpan/wpan-tools). +in the userspace package (see either https://linux-wpan.org/wpan-tools.html +or the git tree at https://github.com/linux-wpan/wpan-tools). 6LoWPAN Linux implementation ============================ From e795a61a85e65b4f12c2ed3529911ac0f013fa40 Mon Sep 17 00:00:00 2001 From: Stefan Schmidt Date: Tue, 16 Jun 2020 08:49:10 +0200 Subject: [PATCH 313/462] MAINTAINERS: update ieee802154 project website URL Update URL to our new home. Signed-off-by: Stefan Schmidt --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 301330e02bca..a09e61268c08 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -8333,7 +8333,7 @@ M: Alexander Aring M: Stefan Schmidt L: linux-wpan@vger.kernel.org S: Maintained -W: http://wpan.cakelab.org/ +W: https://linux-wpan.org/ T: git git://git.kernel.org/pub/scm/linux/kernel/git/sschmidt/wpan.git T: git git://git.kernel.org/pub/scm/linux/kernel/git/sschmidt/wpan-next.git F: Documentation/networking/ieee802154.rst From 6564cfefb01c4c5b8c357f300feac5cb1585e1a9 Mon Sep 17 00:00:00 2001 From: Flavio Suligoi Date: Fri, 19 Jun 2020 11:18:11 +0200 Subject: [PATCH 314/462] net: ethernet: oki-semi: pch_gbe: fix spelling mistake Fix typo: "Triger" --> "Trigger" Signed-off-by: Flavio Suligoi Signed-off-by: David S. Miller --- drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe.h b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe.h index 32b9d7705404..55cef5b16aa5 100644 --- a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe.h +++ b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe.h @@ -147,7 +147,7 @@ struct pch_gbe_regs { #define PCH_GBE_RH_ALM_FULL_8 0x00001000 /* 8 words */ #define PCH_GBE_RH_ALM_FULL_16 0x00002000 /* 16 words */ #define PCH_GBE_RH_ALM_FULL_32 0x00003000 /* 32 words */ -/* RX FIFO Read Triger Threshold */ +/* RX FIFO Read Trigger Threshold */ #define PCH_GBE_RH_RD_TRG_4 0x00000000 /* 4 words */ #define PCH_GBE_RH_RD_TRG_8 0x00000200 /* 8 words */ #define PCH_GBE_RH_RD_TRG_16 0x00000400 /* 16 words */ From 5a8d7f126c97d04d893f5e5be2b286437a0d01b0 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Fri, 19 Jun 2020 11:47:46 -0700 Subject: [PATCH 315/462] of: of_mdio: Correct loop scanning logic Commit 209c65b61d94 ("drivers/of/of_mdio.c:fix of_mdiobus_register()") introduced a break of the loop on the premise that a successful registration should exit the loop. The premise is correct but not to code, because rc && rc != -ENODEV is just a special error condition, that means we would exit the loop even with rc == -ENODEV which is absolutely not correct since this is the error code to indicate to the MDIO bus layer that scanning should continue. Fix this by explicitly checking for rc = 0 as the only valid condition to break out of the loop. Fixes: 209c65b61d94 ("drivers/of/of_mdio.c:fix of_mdiobus_register()") Reviewed-by: Andrew Lunn Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/of/of_mdio.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/of/of_mdio.c b/drivers/of/of_mdio.c index a04afe79529c..ef6f818ce5b3 100644 --- a/drivers/of/of_mdio.c +++ b/drivers/of/of_mdio.c @@ -314,10 +314,15 @@ int of_mdiobus_register(struct mii_bus *mdio, struct device_node *np) child, addr); if (of_mdiobus_child_is_phy(child)) { + /* -ENODEV is the return code that PHYLIB has + * standardized on to indicate that bus + * scanning should continue. + */ rc = of_mdiobus_register_phy(mdio, child, addr); - if (rc && rc != -ENODEV) + if (!rc) + break; + if (rc != -ENODEV) goto unregister; - break; } } } From b2ffc75e2e990b09903f9d15ccd53bc5f3a4217c Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Fri, 19 Jun 2020 11:47:47 -0700 Subject: [PATCH 316/462] net: phy: Check harder for errors in get_phy_id() Commit 02a6efcab675 ("net: phy: allow scanning busses with missing phys") added a special condition to return -ENODEV in case -ENODEV or -EIO was returned from the first read of the MII_PHYSID1 register. In case the MDIO bus data line pull-up is not strong enough, the MDIO bus controller will not flag this as a read error. This can happen when a pluggable daughter card is not connected and weak internal pull-ups are used (since that is the only option, otherwise the pins are floating). The second read of MII_PHYSID2 will be correctly flagged an error though, but now we will return -EIO which will be treated as a hard error, thus preventing MDIO bus scanning loops to continue succesfully. Apply the same logic to both register reads, thus allowing the scanning logic to proceed. Fixes: 02a6efcab675 ("net: phy: allow scanning busses with missing phys") Reviewed-by: Andrew Lunn Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/phy/phy_device.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index 04946de74fa0..85ba95b598b5 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -794,8 +794,10 @@ static int get_phy_id(struct mii_bus *bus, int addr, u32 *phy_id, /* Grab the bits from PHYIR2, and put them in the lower half */ phy_reg = mdiobus_read(bus, addr, MII_PHYSID2); - if (phy_reg < 0) - return -EIO; + if (phy_reg < 0) { + /* returning -ENODEV doesn't stop bus scanning */ + return (phy_reg == -EIO || phy_reg == -ENODEV) ? -ENODEV : -EIO; + } *phy_id |= phy_reg; From faa620876b01d6744f1599e279042bb8149247ab Mon Sep 17 00:00:00 2001 From: Claudiu Beznea Date: Thu, 18 Jun 2020 11:37:40 +0300 Subject: [PATCH 317/462] net: macb: undo operations in case of failure Undo previously done operation in case macb_phylink_connect() fails. Since macb_reset_hw() is the 1st undo operation the napi_exit label was renamed to reset_hw. Fixes: 7897b071ac3b ("net: macb: convert to phylink") Signed-off-by: Claudiu Beznea Acked-by: Nicolas Ferre Signed-off-by: David S. Miller --- drivers/net/ethernet/cadence/macb_main.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index 67933079aeea..257c4920cb88 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -2558,7 +2558,7 @@ static int macb_open(struct net_device *dev) err = macb_phylink_connect(bp); if (err) - goto napi_exit; + goto reset_hw; netif_tx_start_all_queues(dev); @@ -2567,9 +2567,11 @@ static int macb_open(struct net_device *dev) return 0; -napi_exit: +reset_hw: + macb_reset_hw(bp); for (q = 0, queue = bp->queues; q < bp->num_queues; ++q, ++queue) napi_disable(&queue->napi); + macb_free_consistent(bp); pm_exit: pm_runtime_put_sync(&bp->pdev->dev); return err; From 9deba33f1b7266a3870c9da31f787b605748fc0c Mon Sep 17 00:00:00 2001 From: Claudiu Manoil Date: Thu, 18 Jun 2020 12:16:52 +0300 Subject: [PATCH 318/462] enetc: Fix HW_VLAN_CTAG_TX|RX toggling VLAN tag insertion/extraction offload is correctly activated at probe time but deactivation of this feature (i.e. via ethtool) is broken. Toggling works only for Tx/Rx ring 0 of a PF, and is ignored for the other rings, including the VF rings. To fix this, the existing VLAN offload toggling code was extended to all the rings assigned to a netdevice, instead of the default ring 0 (likely a leftover from the early validation days of this feature). And the code was moved to the common set_features() function to fix toggling for the VF driver too. Fixes: d4fd0404c1c9 ("enetc: Introduce basic PF and VF ENETC ethernet drivers") Signed-off-by: Claudiu Manoil Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/enetc/enetc.c | 26 +++++++++++++++++++ .../net/ethernet/freescale/enetc/enetc_hw.h | 16 ++++++------ .../net/ethernet/freescale/enetc/enetc_pf.c | 8 ------ 3 files changed, 34 insertions(+), 16 deletions(-) diff --git a/drivers/net/ethernet/freescale/enetc/enetc.c b/drivers/net/ethernet/freescale/enetc/enetc.c index 298c55786fd9..96831f49925c 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc.c +++ b/drivers/net/ethernet/freescale/enetc/enetc.c @@ -1595,6 +1595,24 @@ static int enetc_set_psfp(struct net_device *ndev, int en) return 0; } +static void enetc_enable_rxvlan(struct net_device *ndev, bool en) +{ + struct enetc_ndev_priv *priv = netdev_priv(ndev); + int i; + + for (i = 0; i < priv->num_rx_rings; i++) + enetc_bdr_enable_rxvlan(&priv->si->hw, i, en); +} + +static void enetc_enable_txvlan(struct net_device *ndev, bool en) +{ + struct enetc_ndev_priv *priv = netdev_priv(ndev); + int i; + + for (i = 0; i < priv->num_tx_rings; i++) + enetc_bdr_enable_txvlan(&priv->si->hw, i, en); +} + int enetc_set_features(struct net_device *ndev, netdev_features_t features) { @@ -1604,6 +1622,14 @@ int enetc_set_features(struct net_device *ndev, if (changed & NETIF_F_RXHASH) enetc_set_rss(ndev, !!(features & NETIF_F_RXHASH)); + if (changed & NETIF_F_HW_VLAN_CTAG_RX) + enetc_enable_rxvlan(ndev, + !!(features & NETIF_F_HW_VLAN_CTAG_RX)); + + if (changed & NETIF_F_HW_VLAN_CTAG_TX) + enetc_enable_txvlan(ndev, + !!(features & NETIF_F_HW_VLAN_CTAG_TX)); + if (changed & NETIF_F_HW_TC) err = enetc_set_psfp(ndev, !!(features & NETIF_F_HW_TC)); diff --git a/drivers/net/ethernet/freescale/enetc/enetc_hw.h b/drivers/net/ethernet/freescale/enetc/enetc_hw.h index 6314051bc6c1..ce0d321c0639 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_hw.h +++ b/drivers/net/ethernet/freescale/enetc/enetc_hw.h @@ -531,22 +531,22 @@ struct enetc_msg_cmd_header { /* Common H/W utility functions */ -static inline void enetc_enable_rxvlan(struct enetc_hw *hw, int si_idx, - bool en) +static inline void enetc_bdr_enable_rxvlan(struct enetc_hw *hw, int idx, + bool en) { - u32 val = enetc_rxbdr_rd(hw, si_idx, ENETC_RBMR); + u32 val = enetc_rxbdr_rd(hw, idx, ENETC_RBMR); val = (val & ~ENETC_RBMR_VTE) | (en ? ENETC_RBMR_VTE : 0); - enetc_rxbdr_wr(hw, si_idx, ENETC_RBMR, val); + enetc_rxbdr_wr(hw, idx, ENETC_RBMR, val); } -static inline void enetc_enable_txvlan(struct enetc_hw *hw, int si_idx, - bool en) +static inline void enetc_bdr_enable_txvlan(struct enetc_hw *hw, int idx, + bool en) { - u32 val = enetc_txbdr_rd(hw, si_idx, ENETC_TBMR); + u32 val = enetc_txbdr_rd(hw, idx, ENETC_TBMR); val = (val & ~ENETC_TBMR_VIH) | (en ? ENETC_TBMR_VIH : 0); - enetc_txbdr_wr(hw, si_idx, ENETC_TBMR, val); + enetc_txbdr_wr(hw, idx, ENETC_TBMR, val); } static inline void enetc_set_bdr_prio(struct enetc_hw *hw, int bdr_idx, diff --git a/drivers/net/ethernet/freescale/enetc/enetc_pf.c b/drivers/net/ethernet/freescale/enetc/enetc_pf.c index 824d211ec00f..4fac57dbb3c8 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_pf.c +++ b/drivers/net/ethernet/freescale/enetc/enetc_pf.c @@ -649,14 +649,6 @@ static int enetc_pf_set_features(struct net_device *ndev, netdev_features_t changed = ndev->features ^ features; struct enetc_ndev_priv *priv = netdev_priv(ndev); - if (changed & NETIF_F_HW_VLAN_CTAG_RX) - enetc_enable_rxvlan(&priv->si->hw, 0, - !!(features & NETIF_F_HW_VLAN_CTAG_RX)); - - if (changed & NETIF_F_HW_VLAN_CTAG_TX) - enetc_enable_txvlan(&priv->si->hw, 0, - !!(features & NETIF_F_HW_VLAN_CTAG_TX)); - if (changed & NETIF_F_HW_VLAN_CTAG_FILTER) { struct enetc_pf *pf = enetc_si_priv(priv->si); From 56c09de347e40804fc8dad155272fb9609e0a97b Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Thu, 18 Jun 2020 12:13:22 +0200 Subject: [PATCH 319/462] geneve: allow changing DF behavior after creation Currently, trying to change the DF parameter of a geneve device does nothing: # ip -d link show geneve1 14: geneve1: link/ether geneve id 1 remote 10.0.0.1 ttl auto df set dstport 6081 # ip link set geneve1 type geneve id 1 df unset # ip -d link show geneve1 14: geneve1: link/ether geneve id 1 remote 10.0.0.1 ttl auto df set dstport 6081 We just need to update the value in geneve_changelink. Fixes: a025fb5f49ad ("geneve: Allow configuration of DF behaviour") Signed-off-by: Sabrina Dubroca Reviewed-by: Stefano Brivio Signed-off-by: David S. Miller --- drivers/net/geneve.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c index 75266580b586..4661ef865807 100644 --- a/drivers/net/geneve.c +++ b/drivers/net/geneve.c @@ -1649,6 +1649,7 @@ static int geneve_changelink(struct net_device *dev, struct nlattr *tb[], geneve->collect_md = metadata; geneve->use_udp6_rx_checksums = use_udp6_rx_checksums; geneve->ttl_inherit = ttl_inherit; + geneve->df = df; geneve_unquiesce(geneve, gs4, gs6); return 0; From 26f2eb27d081081dbea6d3c11602c9ece36f4d9c Mon Sep 17 00:00:00 2001 From: wenxu Date: Thu, 18 Jun 2020 20:49:08 +0800 Subject: [PATCH 320/462] flow_offload: add flow_indr_block_cb_alloc/remove function Add flow_indr_block_cb_alloc/remove function for next fix patch. Signed-off-by: wenxu Signed-off-by: David S. Miller --- include/net/flow_offload.h | 13 +++++++++++++ net/core/flow_offload.c | 21 +++++++++++++++++++++ 2 files changed, 34 insertions(+) diff --git a/include/net/flow_offload.h b/include/net/flow_offload.h index f2c8311a0433..bf4343042956 100644 --- a/include/net/flow_offload.h +++ b/include/net/flow_offload.h @@ -467,6 +467,12 @@ struct flow_block_cb { struct flow_block_cb *flow_block_cb_alloc(flow_setup_cb_t *cb, void *cb_ident, void *cb_priv, void (*release)(void *cb_priv)); +struct flow_block_cb *flow_indr_block_cb_alloc(flow_setup_cb_t *cb, + void *cb_ident, void *cb_priv, + void (*release)(void *cb_priv), + struct flow_block_offload *bo, + struct net_device *dev, void *data, + void (*cleanup)(struct flow_block_cb *block_cb)); void flow_block_cb_free(struct flow_block_cb *block_cb); struct flow_block_cb *flow_block_cb_lookup(struct flow_block *block, @@ -488,6 +494,13 @@ static inline void flow_block_cb_remove(struct flow_block_cb *block_cb, list_move(&block_cb->list, &offload->cb_list); } +static inline void flow_indr_block_cb_remove(struct flow_block_cb *block_cb, + struct flow_block_offload *offload) +{ + list_del(&block_cb->indr.list); + list_move(&block_cb->list, &offload->cb_list); +} + bool flow_block_cb_is_busy(flow_setup_cb_t *cb, void *cb_ident, struct list_head *driver_block_list); diff --git a/net/core/flow_offload.c b/net/core/flow_offload.c index 0cfc35e6be28..1fd781d155b4 100644 --- a/net/core/flow_offload.c +++ b/net/core/flow_offload.c @@ -437,6 +437,27 @@ static void flow_block_indr_init(struct flow_block_cb *flow_block, flow_block->indr.cleanup = cleanup; } +struct flow_block_cb *flow_indr_block_cb_alloc(flow_setup_cb_t *cb, + void *cb_ident, void *cb_priv, + void (*release)(void *cb_priv), + struct flow_block_offload *bo, + struct net_device *dev, void *data, + void (*cleanup)(struct flow_block_cb *block_cb)) +{ + struct flow_block_cb *block_cb; + + block_cb = flow_block_cb_alloc(cb, cb_ident, cb_priv, release); + if (IS_ERR(block_cb)) + goto out; + + flow_block_indr_init(block_cb, bo, dev, data, cleanup); + list_add(&block_cb->indr.list, &flow_block_indr_list); + +out: + return block_cb; +} +EXPORT_SYMBOL(flow_indr_block_cb_alloc); + static void __flow_block_indr_binding(struct flow_block_offload *bo, struct net_device *dev, void *data, void (*cleanup)(struct flow_block_cb *block_cb)) From 66f1939a1b705305df820d65f4d9a8457d05759c Mon Sep 17 00:00:00 2001 From: wenxu Date: Thu, 18 Jun 2020 20:49:09 +0800 Subject: [PATCH 321/462] flow_offload: use flow_indr_block_cb_alloc/remove function Prepare fix the bug in the next patch. use flow_indr_block_cb_alloc/remove function and remove the __flow_block_indr_binding. Signed-off-by: wenxu Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c | 19 ++++++++++------ .../ethernet/mellanox/mlx5/core/en/rep/tc.c | 21 ++++++++++++------ .../net/ethernet/netronome/nfp/flower/main.h | 4 +++- .../ethernet/netronome/nfp/flower/offload.c | 18 +++++++++------ include/net/flow_offload.h | 4 +++- net/core/flow_offload.c | 22 +------------------ 6 files changed, 44 insertions(+), 44 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c index 0eef4f5e4a46..3e3a8841689d 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c @@ -1889,7 +1889,8 @@ static void bnxt_tc_setup_indr_rel(void *cb_priv) } static int bnxt_tc_setup_indr_block(struct net_device *netdev, struct bnxt *bp, - struct flow_block_offload *f) + struct flow_block_offload *f, void *data, + void (*cleanup)(struct flow_block_cb *block_cb)) { struct bnxt_flower_indr_block_cb_priv *cb_priv; struct flow_block_cb *block_cb; @@ -1907,9 +1908,10 @@ static int bnxt_tc_setup_indr_block(struct net_device *netdev, struct bnxt *bp, cb_priv->bp = bp; list_add(&cb_priv->list, &bp->tc_indr_block_list); - block_cb = flow_block_cb_alloc(bnxt_tc_setup_indr_block_cb, - cb_priv, cb_priv, - bnxt_tc_setup_indr_rel); + block_cb = flow_indr_block_cb_alloc(bnxt_tc_setup_indr_block_cb, + cb_priv, cb_priv, + bnxt_tc_setup_indr_rel, f, + netdev, data, cleanup); if (IS_ERR(block_cb)) { list_del(&cb_priv->list); kfree(cb_priv); @@ -1930,7 +1932,7 @@ static int bnxt_tc_setup_indr_block(struct net_device *netdev, struct bnxt *bp, if (!block_cb) return -ENOENT; - flow_block_cb_remove(block_cb, f); + flow_indr_block_cb_remove(block_cb, f); list_del(&block_cb->driver_list); break; default: @@ -1945,14 +1947,17 @@ static bool bnxt_is_netdev_indr_offload(struct net_device *netdev) } static int bnxt_tc_setup_indr_cb(struct net_device *netdev, void *cb_priv, - enum tc_setup_type type, void *type_data) + enum tc_setup_type type, void *type_data, + void *data, + void (*cleanup)(struct flow_block_cb *block_cb)) { if (!bnxt_is_netdev_indr_offload(netdev)) return -EOPNOTSUPP; switch (type) { case TC_SETUP_BLOCK: - return bnxt_tc_setup_indr_block(netdev, cb_priv, type_data); + return bnxt_tc_setup_indr_block(netdev, cb_priv, type_data, data, + cleanup); default: break; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c index 80713123de5c..d629864fc996 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c @@ -407,7 +407,9 @@ static int mlx5e_rep_indr_setup_block(struct net_device *netdev, struct mlx5e_rep_priv *rpriv, struct flow_block_offload *f, - flow_setup_cb_t *setup_cb) + flow_setup_cb_t *setup_cb, + void *data, + void (*cleanup)(struct flow_block_cb *block_cb)) { struct mlx5e_priv *priv = netdev_priv(rpriv->netdev); struct mlx5e_rep_indr_block_priv *indr_priv; @@ -438,8 +440,9 @@ mlx5e_rep_indr_setup_block(struct net_device *netdev, list_add(&indr_priv->list, &rpriv->uplink_priv.tc_indr_block_priv_list); - block_cb = flow_block_cb_alloc(setup_cb, indr_priv, indr_priv, - mlx5e_rep_indr_block_unbind); + block_cb = flow_indr_block_cb_alloc(setup_cb, indr_priv, indr_priv, + mlx5e_rep_indr_block_unbind, + f, netdev, data, cleanup); if (IS_ERR(block_cb)) { list_del(&indr_priv->list); kfree(indr_priv); @@ -458,7 +461,7 @@ mlx5e_rep_indr_setup_block(struct net_device *netdev, if (!block_cb) return -ENOENT; - flow_block_cb_remove(block_cb, f); + flow_indr_block_cb_remove(block_cb, f); list_del(&block_cb->driver_list); return 0; default: @@ -469,15 +472,19 @@ mlx5e_rep_indr_setup_block(struct net_device *netdev, static int mlx5e_rep_indr_setup_cb(struct net_device *netdev, void *cb_priv, - enum tc_setup_type type, void *type_data) + enum tc_setup_type type, void *type_data, + void *data, + void (*cleanup)(struct flow_block_cb *block_cb)) { switch (type) { case TC_SETUP_BLOCK: return mlx5e_rep_indr_setup_block(netdev, cb_priv, type_data, - mlx5e_rep_indr_setup_tc_cb); + mlx5e_rep_indr_setup_tc_cb, + data, cleanup); case TC_SETUP_FT: return mlx5e_rep_indr_setup_block(netdev, cb_priv, type_data, - mlx5e_rep_indr_setup_ft_cb); + mlx5e_rep_indr_setup_ft_cb, + data, cleanup); default: return -EOPNOTSUPP; } diff --git a/drivers/net/ethernet/netronome/nfp/flower/main.h b/drivers/net/ethernet/netronome/nfp/flower/main.h index 6c3dc3baf387..56b3b68e273e 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/main.h +++ b/drivers/net/ethernet/netronome/nfp/flower/main.h @@ -459,7 +459,9 @@ int nfp_flower_setup_qos_offload(struct nfp_app *app, struct net_device *netdev, struct tc_cls_matchall_offload *flow); void nfp_flower_stats_rlim_reply(struct nfp_app *app, struct sk_buff *skb); int nfp_flower_indr_setup_tc_cb(struct net_device *netdev, void *cb_priv, - enum tc_setup_type type, void *type_data); + enum tc_setup_type type, void *type_data, + void *data, + void (*cleanup)(struct flow_block_cb *block_cb)); int nfp_flower_setup_indr_block_cb(enum tc_setup_type type, void *type_data, void *cb_priv); diff --git a/drivers/net/ethernet/netronome/nfp/flower/offload.c b/drivers/net/ethernet/netronome/nfp/flower/offload.c index 695d24b9dd92..95c75250c355 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/offload.c +++ b/drivers/net/ethernet/netronome/nfp/flower/offload.c @@ -1647,7 +1647,8 @@ static void nfp_flower_setup_indr_tc_release(void *cb_priv) static int nfp_flower_setup_indr_tc_block(struct net_device *netdev, struct nfp_app *app, - struct flow_block_offload *f) + struct flow_block_offload *f, void *data, + void (*cleanup)(struct flow_block_cb *block_cb)) { struct nfp_flower_indr_block_cb_priv *cb_priv; struct nfp_flower_priv *priv = app->priv; @@ -1676,9 +1677,10 @@ nfp_flower_setup_indr_tc_block(struct net_device *netdev, struct nfp_app *app, cb_priv->app = app; list_add(&cb_priv->list, &priv->indr_block_cb_priv); - block_cb = flow_block_cb_alloc(nfp_flower_setup_indr_block_cb, - cb_priv, cb_priv, - nfp_flower_setup_indr_tc_release); + block_cb = flow_indr_block_cb_alloc(nfp_flower_setup_indr_block_cb, + cb_priv, cb_priv, + nfp_flower_setup_indr_tc_release, + f, netdev, data, cleanup); if (IS_ERR(block_cb)) { list_del(&cb_priv->list); kfree(cb_priv); @@ -1699,7 +1701,7 @@ nfp_flower_setup_indr_tc_block(struct net_device *netdev, struct nfp_app *app, if (!block_cb) return -ENOENT; - flow_block_cb_remove(block_cb, f); + flow_indr_block_cb_remove(block_cb, f); list_del(&block_cb->driver_list); return 0; default: @@ -1710,7 +1712,9 @@ nfp_flower_setup_indr_tc_block(struct net_device *netdev, struct nfp_app *app, int nfp_flower_indr_setup_tc_cb(struct net_device *netdev, void *cb_priv, - enum tc_setup_type type, void *type_data) + enum tc_setup_type type, void *type_data, + void *data, + void (*cleanup)(struct flow_block_cb *block_cb)) { if (!nfp_fl_is_netdev_to_offload(netdev)) return -EOPNOTSUPP; @@ -1718,7 +1722,7 @@ nfp_flower_indr_setup_tc_cb(struct net_device *netdev, void *cb_priv, switch (type) { case TC_SETUP_BLOCK: return nfp_flower_setup_indr_tc_block(netdev, cb_priv, - type_data); + type_data, data, cleanup); default: return -EOPNOTSUPP; } diff --git a/include/net/flow_offload.h b/include/net/flow_offload.h index bf4343042956..1961c7982273 100644 --- a/include/net/flow_offload.h +++ b/include/net/flow_offload.h @@ -545,7 +545,9 @@ static inline void flow_block_init(struct flow_block *flow_block) } typedef int flow_indr_block_bind_cb_t(struct net_device *dev, void *cb_priv, - enum tc_setup_type type, void *type_data); + enum tc_setup_type type, void *type_data, + void *data, + void (*cleanup)(struct flow_block_cb *block_cb)); int flow_indr_dev_register(flow_indr_block_bind_cb_t *cb, void *cb_priv); void flow_indr_dev_unregister(flow_indr_block_bind_cb_t *cb, void *cb_priv, diff --git a/net/core/flow_offload.c b/net/core/flow_offload.c index 1fd781d155b4..ddd958c6040e 100644 --- a/net/core/flow_offload.c +++ b/net/core/flow_offload.c @@ -458,25 +458,6 @@ out: } EXPORT_SYMBOL(flow_indr_block_cb_alloc); -static void __flow_block_indr_binding(struct flow_block_offload *bo, - struct net_device *dev, void *data, - void (*cleanup)(struct flow_block_cb *block_cb)) -{ - struct flow_block_cb *block_cb; - - list_for_each_entry(block_cb, &bo->cb_list, list) { - switch (bo->command) { - case FLOW_BLOCK_BIND: - flow_block_indr_init(block_cb, bo, dev, data, cleanup); - list_add(&block_cb->indr.list, &flow_block_indr_list); - break; - case FLOW_BLOCK_UNBIND: - list_del(&block_cb->indr.list); - break; - } - } -} - int flow_indr_dev_setup_offload(struct net_device *dev, enum tc_setup_type type, void *data, struct flow_block_offload *bo, @@ -486,9 +467,8 @@ int flow_indr_dev_setup_offload(struct net_device *dev, mutex_lock(&flow_indr_block_lock); list_for_each_entry(this, &flow_block_indr_dev_list, list) - this->cb(dev, this->cb_priv, type, bo); + this->cb(dev, this->cb_priv, type, bo, data, cleanup); - __flow_block_indr_binding(bo, dev, data, cleanup); mutex_unlock(&flow_indr_block_lock); return list_empty(&bo->cb_list) ? -EOPNOTSUPP : 0; From a1db217861f33b8d9ea8171bcacee51186e2d5ba Mon Sep 17 00:00:00 2001 From: wenxu Date: Thu, 18 Jun 2020 20:49:10 +0800 Subject: [PATCH 322/462] net: flow_offload: fix flow_indr_dev_unregister path If the representor is removed, then identify the indirect flow_blocks that need to be removed by the release callback and the port representor structure. To identify the port representor structure, a new indr.cb_priv field needs to be introduced. The flow_block also needs to be removed from the driver list from the cleanup path. Fixes: 1fac52da5942 ("net: flow_offload: consolidate indirect flow_block infrastructure") Signed-off-by: wenxu Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c | 4 ++-- .../net/ethernet/mellanox/mlx5/core/en/rep/tc.c | 5 +++-- drivers/net/ethernet/netronome/nfp/flower/main.c | 2 +- drivers/net/ethernet/netronome/nfp/flower/main.h | 3 +-- .../net/ethernet/netronome/nfp/flower/offload.c | 8 ++++---- include/net/flow_offload.h | 4 +++- net/core/flow_offload.c | 16 ++++++++++------ net/netfilter/nf_flow_table_offload.c | 1 + net/netfilter/nf_tables_offload.c | 1 + net/sched/cls_api.c | 1 + 10 files changed, 27 insertions(+), 18 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c index 3e3a8841689d..4a11c1e7cc02 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c @@ -1911,7 +1911,7 @@ static int bnxt_tc_setup_indr_block(struct net_device *netdev, struct bnxt *bp, block_cb = flow_indr_block_cb_alloc(bnxt_tc_setup_indr_block_cb, cb_priv, cb_priv, bnxt_tc_setup_indr_rel, f, - netdev, data, cleanup); + netdev, data, bp, cleanup); if (IS_ERR(block_cb)) { list_del(&cb_priv->list); kfree(cb_priv); @@ -2079,7 +2079,7 @@ void bnxt_shutdown_tc(struct bnxt *bp) return; flow_indr_dev_unregister(bnxt_tc_setup_indr_cb, bp, - bnxt_tc_setup_indr_block_cb); + bnxt_tc_setup_indr_rel); rhashtable_destroy(&tc_info->flow_table); rhashtable_destroy(&tc_info->l2_table); rhashtable_destroy(&tc_info->decap_l2_table); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c index d629864fc996..eefeb1cdc2ee 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c @@ -442,7 +442,8 @@ mlx5e_rep_indr_setup_block(struct net_device *netdev, block_cb = flow_indr_block_cb_alloc(setup_cb, indr_priv, indr_priv, mlx5e_rep_indr_block_unbind, - f, netdev, data, cleanup); + f, netdev, data, rpriv, + cleanup); if (IS_ERR(block_cb)) { list_del(&indr_priv->list); kfree(indr_priv); @@ -503,7 +504,7 @@ int mlx5e_rep_tc_netdevice_event_register(struct mlx5e_rep_priv *rpriv) void mlx5e_rep_tc_netdevice_event_unregister(struct mlx5e_rep_priv *rpriv) { flow_indr_dev_unregister(mlx5e_rep_indr_setup_cb, rpriv, - mlx5e_rep_indr_setup_tc_cb); + mlx5e_rep_indr_block_unbind); } #if IS_ENABLED(CONFIG_NET_TC_SKB_EXT) diff --git a/drivers/net/ethernet/netronome/nfp/flower/main.c b/drivers/net/ethernet/netronome/nfp/flower/main.c index c39327677a7d..bb448c82cdc2 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/main.c +++ b/drivers/net/ethernet/netronome/nfp/flower/main.c @@ -861,7 +861,7 @@ static void nfp_flower_clean(struct nfp_app *app) flush_work(&app_priv->cmsg_work); flow_indr_dev_unregister(nfp_flower_indr_setup_tc_cb, app, - nfp_flower_setup_indr_block_cb); + nfp_flower_setup_indr_tc_release); if (app_priv->flower_ext_feats & NFP_FL_FEATS_VF_RLIM) nfp_flower_qos_cleanup(app); diff --git a/drivers/net/ethernet/netronome/nfp/flower/main.h b/drivers/net/ethernet/netronome/nfp/flower/main.h index 56b3b68e273e..7f54a620acad 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/main.h +++ b/drivers/net/ethernet/netronome/nfp/flower/main.h @@ -462,8 +462,7 @@ int nfp_flower_indr_setup_tc_cb(struct net_device *netdev, void *cb_priv, enum tc_setup_type type, void *type_data, void *data, void (*cleanup)(struct flow_block_cb *block_cb)); -int nfp_flower_setup_indr_block_cb(enum tc_setup_type type, void *type_data, - void *cb_priv); +void nfp_flower_setup_indr_tc_release(void *cb_priv); void __nfp_flower_non_repr_priv_get(struct nfp_flower_non_repr_priv *non_repr_priv); diff --git a/drivers/net/ethernet/netronome/nfp/flower/offload.c b/drivers/net/ethernet/netronome/nfp/flower/offload.c index 95c75250c355..d7340dc09b4c 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/offload.c +++ b/drivers/net/ethernet/netronome/nfp/flower/offload.c @@ -1619,8 +1619,8 @@ nfp_flower_indr_block_cb_priv_lookup(struct nfp_app *app, return NULL; } -int nfp_flower_setup_indr_block_cb(enum tc_setup_type type, - void *type_data, void *cb_priv) +static int nfp_flower_setup_indr_block_cb(enum tc_setup_type type, + void *type_data, void *cb_priv) { struct nfp_flower_indr_block_cb_priv *priv = cb_priv; struct flow_cls_offload *flower = type_data; @@ -1637,7 +1637,7 @@ int nfp_flower_setup_indr_block_cb(enum tc_setup_type type, } } -static void nfp_flower_setup_indr_tc_release(void *cb_priv) +void nfp_flower_setup_indr_tc_release(void *cb_priv) { struct nfp_flower_indr_block_cb_priv *priv = cb_priv; @@ -1680,7 +1680,7 @@ nfp_flower_setup_indr_tc_block(struct net_device *netdev, struct nfp_app *app, block_cb = flow_indr_block_cb_alloc(nfp_flower_setup_indr_block_cb, cb_priv, cb_priv, nfp_flower_setup_indr_tc_release, - f, netdev, data, cleanup); + f, netdev, data, app, cleanup); if (IS_ERR(block_cb)) { list_del(&cb_priv->list); kfree(cb_priv); diff --git a/include/net/flow_offload.h b/include/net/flow_offload.h index 1961c7982273..6315324b9dc2 100644 --- a/include/net/flow_offload.h +++ b/include/net/flow_offload.h @@ -450,6 +450,7 @@ struct flow_block_indr { struct net_device *dev; enum flow_block_binder_type binder_type; void *data; + void *cb_priv; void (*cleanup)(struct flow_block_cb *block_cb); }; @@ -472,6 +473,7 @@ struct flow_block_cb *flow_indr_block_cb_alloc(flow_setup_cb_t *cb, void (*release)(void *cb_priv), struct flow_block_offload *bo, struct net_device *dev, void *data, + void *indr_cb_priv, void (*cleanup)(struct flow_block_cb *block_cb)); void flow_block_cb_free(struct flow_block_cb *block_cb); @@ -551,7 +553,7 @@ typedef int flow_indr_block_bind_cb_t(struct net_device *dev, void *cb_priv, int flow_indr_dev_register(flow_indr_block_bind_cb_t *cb, void *cb_priv); void flow_indr_dev_unregister(flow_indr_block_bind_cb_t *cb, void *cb_priv, - flow_setup_cb_t *setup_cb); + void (*release)(void *cb_priv)); int flow_indr_dev_setup_offload(struct net_device *dev, enum tc_setup_type type, void *data, struct flow_block_offload *bo, diff --git a/net/core/flow_offload.c b/net/core/flow_offload.c index ddd958c6040e..b739cfab796e 100644 --- a/net/core/flow_offload.c +++ b/net/core/flow_offload.c @@ -372,14 +372,15 @@ int flow_indr_dev_register(flow_indr_block_bind_cb_t *cb, void *cb_priv) } EXPORT_SYMBOL(flow_indr_dev_register); -static void __flow_block_indr_cleanup(flow_setup_cb_t *setup_cb, void *cb_priv, +static void __flow_block_indr_cleanup(void (*release)(void *cb_priv), + void *cb_priv, struct list_head *cleanup_list) { struct flow_block_cb *this, *next; list_for_each_entry_safe(this, next, &flow_block_indr_list, indr.list) { - if (this->cb == setup_cb && - this->cb_priv == cb_priv) { + if (this->release == release && + this->indr.cb_priv == cb_priv) { list_move(&this->indr.list, cleanup_list); return; } @@ -397,7 +398,7 @@ static void flow_block_indr_notify(struct list_head *cleanup_list) } void flow_indr_dev_unregister(flow_indr_block_bind_cb_t *cb, void *cb_priv, - flow_setup_cb_t *setup_cb) + void (*release)(void *cb_priv)) { struct flow_indr_dev *this, *next, *indr_dev = NULL; LIST_HEAD(cleanup_list); @@ -418,7 +419,7 @@ void flow_indr_dev_unregister(flow_indr_block_bind_cb_t *cb, void *cb_priv, return; } - __flow_block_indr_cleanup(setup_cb, cb_priv, &cleanup_list); + __flow_block_indr_cleanup(release, cb_priv, &cleanup_list); mutex_unlock(&flow_indr_block_lock); flow_block_indr_notify(&cleanup_list); @@ -429,10 +430,12 @@ EXPORT_SYMBOL(flow_indr_dev_unregister); static void flow_block_indr_init(struct flow_block_cb *flow_block, struct flow_block_offload *bo, struct net_device *dev, void *data, + void *cb_priv, void (*cleanup)(struct flow_block_cb *block_cb)) { flow_block->indr.binder_type = bo->binder_type; flow_block->indr.data = data; + flow_block->indr.cb_priv = cb_priv; flow_block->indr.dev = dev; flow_block->indr.cleanup = cleanup; } @@ -442,6 +445,7 @@ struct flow_block_cb *flow_indr_block_cb_alloc(flow_setup_cb_t *cb, void (*release)(void *cb_priv), struct flow_block_offload *bo, struct net_device *dev, void *data, + void *indr_cb_priv, void (*cleanup)(struct flow_block_cb *block_cb)) { struct flow_block_cb *block_cb; @@ -450,7 +454,7 @@ struct flow_block_cb *flow_indr_block_cb_alloc(flow_setup_cb_t *cb, if (IS_ERR(block_cb)) goto out; - flow_block_indr_init(block_cb, bo, dev, data, cleanup); + flow_block_indr_init(block_cb, bo, dev, data, indr_cb_priv, cleanup); list_add(&block_cb->indr.list, &flow_block_indr_list); out: diff --git a/net/netfilter/nf_flow_table_offload.c b/net/netfilter/nf_flow_table_offload.c index 62651e6683f6..5fff1e040168 100644 --- a/net/netfilter/nf_flow_table_offload.c +++ b/net/netfilter/nf_flow_table_offload.c @@ -950,6 +950,7 @@ static void nf_flow_table_indr_cleanup(struct flow_block_cb *block_cb) nf_flow_table_gc_cleanup(flowtable, dev); down_write(&flowtable->flow_block_lock); list_del(&block_cb->list); + list_del(&block_cb->driver_list); flow_block_cb_free(block_cb); up_write(&flowtable->flow_block_lock); } diff --git a/net/netfilter/nf_tables_offload.c b/net/netfilter/nf_tables_offload.c index 185fc82c99aa..c7cf1cde46de 100644 --- a/net/netfilter/nf_tables_offload.c +++ b/net/netfilter/nf_tables_offload.c @@ -296,6 +296,7 @@ static void nft_indr_block_cleanup(struct flow_block_cb *block_cb) nft_flow_block_offload_init(&bo, dev_net(dev), FLOW_BLOCK_UNBIND, basechain, &extack); mutex_lock(&net->nft.commit_mutex); + list_del(&block_cb->driver_list); list_move(&block_cb->list, &bo.cb_list); nft_flow_offload_unbind(&bo, basechain); mutex_unlock(&net->nft.commit_mutex); diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index a00a203b2ef5..f8028d73edf1 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -652,6 +652,7 @@ static void tc_block_indr_cleanup(struct flow_block_cb *block_cb) &block->flow_block, tcf_block_shared(block), &extack); down_write(&block->cb_lock); + list_del(&block_cb->driver_list); list_move(&block_cb->list, &bo.cb_list); up_write(&block->cb_lock); rtnl_lock(); From 3c005110d4083c52ff6c99018127adf92f3f23aa Mon Sep 17 00:00:00 2001 From: wenxu Date: Thu, 18 Jun 2020 20:49:11 +0800 Subject: [PATCH 323/462] net/sched: cls_api: fix nooffloaddevcnt warning dmesg log The block->nooffloaddevcnt should always count for indr block. even the indr block offload successful. The representor maybe gone away and the ingress qdisc can work in software mode. block->nooffloaddevcnt warning with following dmesg log: [ 760.667058] ##################################################### [ 760.668186] ## TEST test-ecmp-add-vxlan-encap-disable-sriov.sh ## [ 760.669179] ##################################################### [ 761.780655] :test: Fedora 30 (Thirty) [ 761.783794] :test: Linux reg-r-vrt-018-180 5.7.0+ [ 761.822890] :test: NIC ens1f0 FW 16.26.6000 PCI 0000:81:00.0 DEVICE 0x1019 ConnectX-5 Ex [ 761.860244] mlx5_core 0000:81:00.0 ens1f0: Link up [ 761.880693] IPv6: ADDRCONF(NETDEV_CHANGE): ens1f0: link becomes ready [ 762.059732] mlx5_core 0000:81:00.1 ens1f1: Link up [ 762.234341] :test: unbind vfs of ens1f0 [ 762.257825] :test: Change ens1f0 eswitch (0000:81:00.0) mode to switchdev [ 762.291363] :test: unbind vfs of ens1f1 [ 762.306914] :test: Change ens1f1 eswitch (0000:81:00.1) mode to switchdev [ 762.309237] mlx5_core 0000:81:00.1: E-Switch: Disable: mode(LEGACY), nvfs(2), active vports(3) [ 763.282598] mlx5_core 0000:81:00.1: E-Switch: Supported tc offload range - chains: 4294967294, prios: 4294967295 [ 763.362825] mlx5_core 0000:81:00.1: MLX5E: StrdRq(1) RqSz(8) StrdSz(2048) RxCqeCmprss(0) [ 763.444465] mlx5_core 0000:81:00.1 ens1f1: renamed from eth0 [ 763.460088] mlx5_core 0000:81:00.1: MLX5E: StrdRq(1) RqSz(8) StrdSz(2048) RxCqeCmprss(0) [ 763.502586] mlx5_core 0000:81:00.1: MLX5E: StrdRq(1) RqSz(8) StrdSz(2048) RxCqeCmprss(0) [ 763.552429] ens1f1_0: renamed from eth0 [ 763.569569] mlx5_core 0000:81:00.1: E-Switch: Enable: mode(OFFLOADS), nvfs(2), active vports(3) [ 763.629694] ens1f1_1: renamed from eth1 [ 764.631552] IPv6: ADDRCONF(NETDEV_CHANGE): ens1f1_0: link becomes ready [ 764.670841] :test: unbind vfs of ens1f0 [ 764.681966] :test: unbind vfs of ens1f1 [ 764.726762] mlx5_core 0000:81:00.0 ens1f0: Link up [ 764.766511] mlx5_core 0000:81:00.1 ens1f1: Link up [ 764.797325] :test: Add multipath vxlan encap rule and disable sriov [ 764.798544] :test: config multipath route [ 764.812732] mlx5_core 0000:81:00.0: lag map port 1:2 port 2:2 [ 764.874556] mlx5_core 0000:81:00.0: modify lag map port 1:1 port 2:2 [ 765.603681] :test: OK [ 765.659048] IPv6: ADDRCONF(NETDEV_CHANGE): ens1f1_1: link becomes ready [ 765.675085] :test: verify rule in hw [ 765.694237] IPv6: ADDRCONF(NETDEV_CHANGE): ens1f0: link becomes ready [ 765.711892] IPv6: ADDRCONF(NETDEV_CHANGE): ens1f1: link becomes ready [ 766.979230] :test: OK [ 768.125419] :test: OK [ 768.127519] :test: - disable sriov ens1f1 [ 768.131160] pci 0000:81:02.2: Removing from iommu group 75 [ 768.132646] pci 0000:81:02.3: Removing from iommu group 76 [ 769.179749] mlx5_core 0000:81:00.1: E-Switch: Disable: mode(OFFLOADS), nvfs(2), active vports(3) [ 769.455627] mlx5_core 0000:81:00.0: modify lag map port 1:1 port 2:1 [ 769.703990] mlx5_core 0000:81:00.1: MLX5E: StrdRq(1) RqSz(8) StrdSz(2048) RxCqeCmprss(0) [ 769.988637] mlx5_core 0000:81:00.1 ens1f1: renamed from eth0 [ 769.990022] :test: - disable sriov ens1f0 [ 769.994922] pci 0000:81:00.2: Removing from iommu group 73 [ 769.997048] pci 0000:81:00.3: Removing from iommu group 74 [ 771.035813] mlx5_core 0000:81:00.0: E-Switch: Disable: mode(OFFLOADS), nvfs(2), active vports(3) [ 771.339091] ------------[ cut here ]------------ [ 771.340812] WARNING: CPU: 6 PID: 3448 at net/sched/cls_api.c:749 tcf_block_offload_unbind.isra.0+0x5c/0x60 [ 771.341728] Modules linked in: act_mirred act_tunnel_key cls_flower dummy vxlan ip6_udp_tunnel udp_tunnel sch_ingress nfsv3 nfs_acl nfs lockd grace fscache tun bridge stp llc sunrpc rdma_ucm rdma_cm iw_cm ib_cm mlx5_ib ib_uverbs ib_core mlx5_core intel_rapl_msr intel_rapl_common sb_edac x86_pkg_temp_thermal intel_powerclamp coretemp mlxfw act_ct nf_flow_table kvm_intel nf_nat kvm nf_conntrack irqbypass crct10dif_pclmul igb crc32_pclmul nf_defrag_ipv6 libcrc32c nf_defrag_ipv4 crc32c_intel ghash_clmulni_intel ptp ipmi_ssif intel_cstate pps_c ore ses intel_uncore mei_me iTCO_wdt joydev ipmi_si iTCO_vendor_support i2c_i801 enclosure mei ioatdma dca lpc_ich wmi ipmi_devintf pcspkr acpi_power_meter ipmi_msghandler acpi_pad ast i2c_algo_bit drm_vram_helper drm_kms_helper drm_ttm_helper ttm drm mpt3sas raid_class scsi_transport_sas [ 771.347818] CPU: 6 PID: 3448 Comm: test-ecmp-add-v Not tainted 5.7.0+ #1146 [ 771.348727] Hardware name: Supermicro SYS-2028TP-DECR/X10DRT-P, BIOS 2.0b 03/30/2017 [ 771.349646] RIP: 0010:tcf_block_offload_unbind.isra.0+0x5c/0x60 [ 771.350553] Code: 4a fd ff ff 83 f8 a1 74 0e 5b 4c 89 e7 5d 41 5c 41 5d e9 07 93 89 ff 8b 83 a0 00 00 00 8d 50 ff 89 93 a0 00 00 00 85 c0 75 df <0f> 0b eb db 0f 1f 44 00 00 41 57 41 56 41 55 41 89 cd 41 54 49 89 [ 771.352420] RSP: 0018:ffffb33144cd3b00 EFLAGS: 00010246 [ 771.353353] RAX: 0000000000000000 RBX: ffff8b37cf4b2800 RCX: 0000000000000000 [ 771.354294] RDX: 00000000ffffffff RSI: ffff8b3b9aad0000 RDI: ffffffff8d5c6e20 [ 771.355245] RBP: ffff8b37eb546948 R08: ffffffffc0b7a348 R09: ffff8b3b9aad0000 [ 771.356189] R10: 0000000000000001 R11: ffff8b3ba7a0a1c0 R12: ffff8b37cf4b2850 [ 771.357123] R13: ffff8b3b9aad0000 R14: ffff8b37cf4b2820 R15: ffff8b37cf4b2820 [ 771.358039] FS: 00007f8a19b6e740(0000) GS:ffff8b3befa00000(0000) knlGS:0000000000000000 [ 771.358965] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 771.359885] CR2: 00007f3afb91c1a0 CR3: 000000045133c004 CR4: 00000000001606e0 [ 771.360825] Call Trace: [ 771.361764] __tcf_block_put+0x84/0x150 [ 771.362712] ingress_destroy+0x1b/0x20 [sch_ingress] [ 771.363658] qdisc_destroy+0x3e/0xc0 [ 771.364594] dev_shutdown+0x7a/0xa5 [ 771.365522] rollback_registered_many+0x20d/0x530 [ 771.366458] ? netdev_upper_dev_unlink+0x15d/0x1c0 [ 771.367387] unregister_netdevice_many.part.0+0xf/0x70 [ 771.368310] vxlan_netdevice_event+0xa4/0x110 [vxlan] [ 771.369454] notifier_call_chain+0x4c/0x70 [ 771.370579] rollback_registered_many+0x2f5/0x530 [ 771.371719] rollback_registered+0x56/0x90 [ 771.372843] unregister_netdevice_queue+0x73/0xb0 [ 771.373982] unregister_netdev+0x18/0x20 [ 771.375168] mlx5e_vport_rep_unload+0x56/0xc0 [mlx5_core] [ 771.376327] esw_offloads_disable+0x81/0x90 [mlx5_core] [ 771.377512] mlx5_eswitch_disable_locked.cold+0xcb/0x1af [mlx5_core] [ 771.378679] mlx5_eswitch_disable+0x44/0x60 [mlx5_core] [ 771.379822] mlx5_device_disable_sriov+0xad/0xb0 [mlx5_core] [ 771.380968] mlx5_core_sriov_configure+0xc1/0xe0 [mlx5_core] [ 771.382087] sriov_numvfs_store+0xfc/0x130 [ 771.383195] kernfs_fop_write+0xce/0x1b0 [ 771.384302] vfs_write+0xb6/0x1a0 [ 771.385410] ksys_write+0x5f/0xe0 [ 771.386500] do_syscall_64+0x5b/0x1d0 [ 771.387569] entry_SYSCALL_64_after_hwframe+0x44/0xa9 Fixes: 0fdcf78d5973 ("net: use flow_indr_dev_setup_offload()") Signed-off-by: wenxu Signed-off-by: David S. Miller --- net/sched/cls_api.c | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index f8028d73edf1..faa78b7dd962 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -672,25 +672,29 @@ static int tcf_block_offload_cmd(struct tcf_block *block, struct netlink_ext_ack *extack) { struct flow_block_offload bo = {}; - int err; tcf_block_offload_init(&bo, dev, command, ei->binder_type, &block->flow_block, tcf_block_shared(block), extack); - if (dev->netdev_ops->ndo_setup_tc) - err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_BLOCK, &bo); - else - err = flow_indr_dev_setup_offload(dev, TC_SETUP_BLOCK, block, - &bo, tc_block_indr_cleanup); + if (dev->netdev_ops->ndo_setup_tc) { + int err; - if (err < 0) { - if (err != -EOPNOTSUPP) - NL_SET_ERR_MSG(extack, "Driver ndo_setup_tc failed"); - return err; + err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_BLOCK, &bo); + if (err < 0) { + if (err != -EOPNOTSUPP) + NL_SET_ERR_MSG(extack, "Driver ndo_setup_tc failed"); + return err; + } + + return tcf_block_setup(block, &bo); } - return tcf_block_setup(block, &bo); + flow_indr_dev_setup_offload(dev, TC_SETUP_BLOCK, block, &bo, + tc_block_indr_cleanup); + tcf_block_setup(block, &bo); + + return -EOPNOTSUPP; } static int tcf_block_offload_bind(struct tcf_block *block, struct Qdisc *q, From 5948378b26d89f8aa5eac37629dbd0616ce8d7a7 Mon Sep 17 00:00:00 2001 From: Thomas Falcon Date: Thu, 18 Jun 2020 10:43:46 -0500 Subject: [PATCH 324/462] ibmveth: Fix max MTU limit The max MTU limit defined for ibmveth is not accounting for virtual ethernet buffer overhead, which is twenty-two additional bytes set aside for the ethernet header and eight additional bytes of an opaque handle reserved for use by the hypervisor. Update the max MTU to reflect this overhead. Fixes: d894be57ca92 ("ethernet: use net core MTU range checking in more drivers") Fixes: 110447f8269a ("ethernet: fix min/max MTU typos") Signed-off-by: Thomas Falcon Signed-off-by: David S. Miller --- drivers/net/ethernet/ibm/ibmveth.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/ibm/ibmveth.c b/drivers/net/ethernet/ibm/ibmveth.c index 96d36ae5049e..c5c732601e35 100644 --- a/drivers/net/ethernet/ibm/ibmveth.c +++ b/drivers/net/ethernet/ibm/ibmveth.c @@ -1715,7 +1715,7 @@ static int ibmveth_probe(struct vio_dev *dev, const struct vio_device_id *id) } netdev->min_mtu = IBMVETH_MIN_MTU; - netdev->max_mtu = ETH_MAX_MTU; + netdev->max_mtu = ETH_MAX_MTU - IBMVETH_BUFF_OH; memcpy(netdev->dev_addr, mac_addr_p, ETH_ALEN); From ca8826095e4d4afc0ccaead27bba6e4b623a12ae Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Thu, 18 Jun 2020 12:40:43 -0400 Subject: [PATCH 325/462] selftests/net: report etf errors correctly The ETF qdisc can queue skbs that it could not pace on the errqueue. Address a few issues in the selftest - recv buffer size was too small, and incorrectly calculated - compared errno to ee_code instead of ee_errno - missed invalid request error type v2: - fix a few checkpatch --strict indentation warnings Fixes: ea6a547669b3 ("selftests/net: make so_txtime more robust to timer variance") Signed-off-by: Willem de Bruijn Signed-off-by: David S. Miller --- tools/testing/selftests/net/so_txtime.c | 33 +++++++++++++++++++------ 1 file changed, 26 insertions(+), 7 deletions(-) diff --git a/tools/testing/selftests/net/so_txtime.c b/tools/testing/selftests/net/so_txtime.c index 383bac05ac32..ceaad78e9667 100644 --- a/tools/testing/selftests/net/so_txtime.c +++ b/tools/testing/selftests/net/so_txtime.c @@ -15,8 +15,9 @@ #include #include #include +#include #include -#include +#include #include #include #include @@ -140,8 +141,8 @@ static void do_recv_errqueue_timeout(int fdt) { char control[CMSG_SPACE(sizeof(struct sock_extended_err)) + CMSG_SPACE(sizeof(struct sockaddr_in6))] = {0}; - char data[sizeof(struct ipv6hdr) + - sizeof(struct tcphdr) + 1]; + char data[sizeof(struct ethhdr) + sizeof(struct ipv6hdr) + + sizeof(struct udphdr) + 1]; struct sock_extended_err *err; struct msghdr msg = {0}; struct iovec iov = {0}; @@ -159,6 +160,8 @@ static void do_recv_errqueue_timeout(int fdt) msg.msg_controllen = sizeof(control); while (1) { + const char *reason; + ret = recvmsg(fdt, &msg, MSG_ERRQUEUE); if (ret == -1 && errno == EAGAIN) break; @@ -176,14 +179,30 @@ static void do_recv_errqueue_timeout(int fdt) err = (struct sock_extended_err *)CMSG_DATA(cm); if (err->ee_origin != SO_EE_ORIGIN_TXTIME) error(1, 0, "errqueue: origin 0x%x\n", err->ee_origin); - if (err->ee_code != ECANCELED) - error(1, 0, "errqueue: code 0x%x\n", err->ee_code); + + switch (err->ee_errno) { + case ECANCELED: + if (err->ee_code != SO_EE_CODE_TXTIME_MISSED) + error(1, 0, "errqueue: unknown ECANCELED %u\n", + err->ee_code); + reason = "missed txtime"; + break; + case EINVAL: + if (err->ee_code != SO_EE_CODE_TXTIME_INVALID_PARAM) + error(1, 0, "errqueue: unknown EINVAL %u\n", + err->ee_code); + reason = "invalid txtime"; + break; + default: + error(1, 0, "errqueue: errno %u code %u\n", + err->ee_errno, err->ee_code); + }; tstamp = ((int64_t) err->ee_data) << 32 | err->ee_info; tstamp -= (int64_t) glob_tstart; tstamp /= 1000 * 1000; - fprintf(stderr, "send: pkt %c at %" PRId64 "ms dropped\n", - data[ret - 1], tstamp); + fprintf(stderr, "send: pkt %c at %" PRId64 "ms dropped: %s\n", + data[ret - 1], tstamp, reason); msg.msg_flags = 0; msg.msg_controllen = sizeof(control); From 55ca22633a9faa34c6a61d99433f9cc8e28dd7cc Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 15 Jun 2020 12:57:57 +0000 Subject: [PATCH 326/462] mm/gup: Use huge_ptep_get() in gup_hugepte() gup_hugepte() reads hugepage table entries, it can't read them directly, huge_ptep_get() must be used. Fixes: 9e343b467c70 ("READ_ONCE: Enforce atomicity for {READ,WRITE}_ONCE() memory accesses") Signed-off-by: Christophe Leroy Acked-by: Will Deacon Acked-by: Peter Zijlstra (Intel) Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/ffc3714334c3bfaca6f13788ad039e8759ae413f.1592225558.git.christophe.leroy@csgroup.eu --- mm/gup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/gup.c b/mm/gup.c index de9e36262ccb..761df4944ef5 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -2425,7 +2425,7 @@ static int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr, if (pte_end < end) end = pte_end; - pte = READ_ONCE(*ptep); + pte = huge_ptep_get(ptep); if (!pte_access_permitted(pte, flags & FOLL_WRITE)) return 0; From 481e980a7c199c5a4634fd7ea308067dd4ba75fa Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 15 Jun 2020 12:57:58 +0000 Subject: [PATCH 327/462] mm: Allow arches to provide ptep_get() Since commit 9e343b467c70 ("READ_ONCE: Enforce atomicity for {READ,WRITE}_ONCE() memory accesses") it is not possible anymore to use READ_ONCE() to access complex page table entries like the one defined for powerpc 8xx with 16k size pages. Define a ptep_get() helper that architectures can override instead of performing a READ_ONCE() on the page table entry pointer. Fixes: 9e343b467c70 ("READ_ONCE: Enforce atomicity for {READ,WRITE}_ONCE() memory accesses") Signed-off-by: Christophe Leroy Acked-by: Will Deacon Acked-by: Peter Zijlstra (Intel) Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/087fa12b6e920e32315136b998aa834f99242695.1592225558.git.christophe.leroy@csgroup.eu --- include/asm-generic/hugetlb.h | 2 +- include/linux/pgtable.h | 7 +++++++ mm/gup.c | 2 +- 3 files changed, 9 insertions(+), 2 deletions(-) diff --git a/include/asm-generic/hugetlb.h b/include/asm-generic/hugetlb.h index 40f85decc2ee..8e1e6244a89d 100644 --- a/include/asm-generic/hugetlb.h +++ b/include/asm-generic/hugetlb.h @@ -122,7 +122,7 @@ static inline int huge_ptep_set_access_flags(struct vm_area_struct *vma, #ifndef __HAVE_ARCH_HUGE_PTEP_GET static inline pte_t huge_ptep_get(pte_t *ptep) { - return READ_ONCE(*ptep); + return ptep_get(ptep); } #endif diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h index 32b6c52d41b9..56c1e8eb7bb0 100644 --- a/include/linux/pgtable.h +++ b/include/linux/pgtable.h @@ -249,6 +249,13 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm, } #endif +#ifndef __HAVE_ARCH_PTEP_GET +static inline pte_t ptep_get(pte_t *ptep) +{ + return READ_ONCE(*ptep); +} +#endif + #ifdef CONFIG_TRANSPARENT_HUGEPAGE #ifndef __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm, diff --git a/mm/gup.c b/mm/gup.c index 761df4944ef5..6f47697f8fb0 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -2196,7 +2196,7 @@ static inline pte_t gup_get_pte(pte_t *ptep) */ static inline pte_t gup_get_pte(pte_t *ptep) { - return READ_ONCE(*ptep); + return ptep_get(ptep); } #endif /* CONFIG_GUP_GET_PTE_LOW_HIGH */ From c0e1c8c22bebecef40097c80c1c74492ff96d081 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 15 Jun 2020 12:57:59 +0000 Subject: [PATCH 328/462] powerpc/8xx: Provide ptep_get() with 16k pages READ_ONCE() now enforces atomic read, which leads to: CC mm/gup.o In file included from ./include/linux/kernel.h:11:0, from mm/gup.c:2: In function 'gup_hugepte.constprop', inlined from 'gup_huge_pd.isra.79' at mm/gup.c:2465:8: ./include/linux/compiler.h:392:38: error: call to '__compiletime_assert_222' declared with attribute error: Unsupported access size for {READ,WRITE}_ONCE(). _compiletime_assert(condition, msg, __compiletime_assert_, __COUNTER__) ^ ./include/linux/compiler.h:373:4: note: in definition of macro '__compiletime_assert' prefix ## suffix(); \ ^ ./include/linux/compiler.h:392:2: note: in expansion of macro '_compiletime_assert' _compiletime_assert(condition, msg, __compiletime_assert_, __COUNTER__) ^ ./include/linux/compiler.h:405:2: note: in expansion of macro 'compiletime_assert' compiletime_assert(__native_word(t) || sizeof(t) == sizeof(long long), \ ^ ./include/linux/compiler.h:291:2: note: in expansion of macro 'compiletime_assert_rwonce_type' compiletime_assert_rwonce_type(x); \ ^ mm/gup.c:2428:8: note: in expansion of macro 'READ_ONCE' pte = READ_ONCE(*ptep); ^ In function 'gup_get_pte', inlined from 'gup_pte_range' at mm/gup.c:2228:9, inlined from 'gup_pmd_range' at mm/gup.c:2613:15, inlined from 'gup_pud_range' at mm/gup.c:2641:15, inlined from 'gup_p4d_range' at mm/gup.c:2666:15, inlined from 'gup_pgd_range' at mm/gup.c:2694:15, inlined from 'internal_get_user_pages_fast' at mm/gup.c:2795:3: ./include/linux/compiler.h:392:38: error: call to '__compiletime_assert_219' declared with attribute error: Unsupported access size for {READ,WRITE}_ONCE(). _compiletime_assert(condition, msg, __compiletime_assert_, __COUNTER__) ^ ./include/linux/compiler.h:373:4: note: in definition of macro '__compiletime_assert' prefix ## suffix(); \ ^ ./include/linux/compiler.h:392:2: note: in expansion of macro '_compiletime_assert' _compiletime_assert(condition, msg, __compiletime_assert_, __COUNTER__) ^ ./include/linux/compiler.h:405:2: note: in expansion of macro 'compiletime_assert' compiletime_assert(__native_word(t) || sizeof(t) == sizeof(long long), \ ^ ./include/linux/compiler.h:291:2: note: in expansion of macro 'compiletime_assert_rwonce_type' compiletime_assert_rwonce_type(x); \ ^ mm/gup.c:2199:9: note: in expansion of macro 'READ_ONCE' return READ_ONCE(*ptep); ^ make[2]: *** [mm/gup.o] Error 1 Define ptep_get() on 8xx when using 16k pages. Fixes: 9e343b467c70 ("READ_ONCE: Enforce atomicity for {READ,WRITE}_ONCE() memory accesses") Signed-off-by: Christophe Leroy Acked-by: Will Deacon Acked-by: Peter Zijlstra (Intel) Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/341688399c1b102756046d19ea6ce39db1ae4742.1592225558.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/nohash/32/pgtable.h | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/arch/powerpc/include/asm/nohash/32/pgtable.h b/arch/powerpc/include/asm/nohash/32/pgtable.h index 5a590ceaec14..b0afbdd07740 100644 --- a/arch/powerpc/include/asm/nohash/32/pgtable.h +++ b/arch/powerpc/include/asm/nohash/32/pgtable.h @@ -284,6 +284,16 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, return __pte(pte_update(mm, addr, ptep, ~0, 0, 0)); } +#if defined(CONFIG_PPC_8xx) && defined(CONFIG_PPC_16K_PAGES) +#define __HAVE_ARCH_PTEP_GET +static inline pte_t ptep_get(pte_t *ptep) +{ + pte_t pte = {READ_ONCE(ptep->pte), 0, 0, 0}; + + return pte; +} +#endif + #define __HAVE_ARCH_PTEP_SET_WRPROTECT static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep) From f8ea5c7bceeb6ce6e7b3e7fb28c9dda8c0a58dcb Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 19 Jun 2020 00:01:28 +0100 Subject: [PATCH 329/462] afs: Fix afs_do_lookup() to call correct fetch-status op variant Fix afs_do_lookup()'s fallback case for when FS.InlineBulkStatus isn't supported by the server. In the fallback, it calls FS.FetchStatus for the specific vnode it's meant to be looking up. Commit b6489a49f7b7 broke this by renaming one of the two identically-named afs_fetch_status_operation descriptors to something else so that one of them could be made non-static. The site that used the renamed one, however, wasn't renamed and didn't produce any warning because the other was declared in a header. Fix this by making afs_do_lookup() use the renamed variant. Note that there are two variants of the success method because one is called from ->lookup() where we may or may not have an inode, but can't call iget until after we've talked to the server - whereas the other is called from within iget where we have an inode, but it may or may not be initialised. The latter variant expects there to be an inode, but because it's being called from there former case, there might not be - resulting in an oops like the following: BUG: kernel NULL pointer dereference, address: 00000000000000b0 ... RIP: 0010:afs_fetch_status_success+0x27/0x7e ... Call Trace: afs_wait_for_operation+0xda/0x234 afs_do_lookup+0x2fe/0x3c1 afs_lookup+0x3c5/0x4bd __lookup_slow+0xcd/0x10f walk_component+0xa2/0x10c path_lookupat.isra.0+0x80/0x110 filename_lookup+0x81/0x104 vfs_statx+0x76/0x109 __do_sys_newlstat+0x39/0x6b do_syscall_64+0x4c/0x78 entry_SYSCALL_64_after_hwframe+0x44/0xa9 Fixes: b6489a49f7b7 ("afs: Fix silly rename") Signed-off-by: David Howells Signed-off-by: Linus Torvalds --- fs/afs/dir.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/afs/dir.c b/fs/afs/dir.c index 3e3c2bf0a722..96757f3abd74 100644 --- a/fs/afs/dir.c +++ b/fs/afs/dir.c @@ -845,7 +845,7 @@ static struct inode *afs_do_lookup(struct inode *dir, struct dentry *dentry, * to FS.FetchStatus for op->file[1]. */ op->fetch_status.which = 1; - op->ops = &afs_fetch_status_operation; + op->ops = &afs_lookup_fetch_status_operation; afs_begin_vnode_operation(op); afs_wait_for_operation(op); } From 5481fc6eb8a7f4b76d8ad1be371d2e11b22bfb55 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 19 Jun 2020 23:39:36 +0100 Subject: [PATCH 330/462] afs: Fix hang on rmmod due to outstanding timer The fileserver probe timer, net->fs_probe_timer, isn't cancelled when the kafs module is being removed and so the count it holds on net->servers_outstanding doesn't get dropped.. This causes rmmod to wait forever. The hung process shows a stack like: afs_purge_servers+0x1b5/0x23c [kafs] afs_net_exit+0x44/0x6e [kafs] ops_exit_list+0x72/0x93 unregister_pernet_operations+0x14c/0x1ba unregister_pernet_subsys+0x1d/0x2a afs_exit+0x29/0x6f [kafs] __do_sys_delete_module.isra.0+0x1a2/0x24b do_syscall_64+0x51/0x95 entry_SYSCALL_64_after_hwframe+0x44/0xa9 Fix this by: (1) Attempting to cancel the probe timer and, if successful, drop the count that the timer was holding. (2) Make the timer function just drop the count and not schedule the prober if the afs portion of net namespace is being destroyed. Also, whilst we're at it, make the following changes: (3) Initialise net->servers_outstanding to 1 and decrement it before waiting on it so that it doesn't generate wake up events by being decremented to 0 until we're cleaning up. (4) Switch the atomic_dec() on ->servers_outstanding for ->fs_timer in afs_purge_servers() to use the helper function for that. Fixes: f6cbb368bcb0 ("afs: Actively poll fileservers to maintain NAT or firewall openings") Signed-off-by: David Howells Signed-off-by: Linus Torvalds --- fs/afs/fs_probe.c | 11 ++++++++++- fs/afs/internal.h | 1 + fs/afs/main.c | 3 +++ fs/afs/server.c | 3 ++- 4 files changed, 16 insertions(+), 2 deletions(-) diff --git a/fs/afs/fs_probe.c b/fs/afs/fs_probe.c index b34f74b0f319..5d9ef517cf81 100644 --- a/fs/afs/fs_probe.c +++ b/fs/afs/fs_probe.c @@ -314,7 +314,7 @@ void afs_fs_probe_timer(struct timer_list *timer) { struct afs_net *net = container_of(timer, struct afs_net, fs_probe_timer); - if (!queue_work(afs_wq, &net->fs_prober)) + if (!net->live || !queue_work(afs_wq, &net->fs_prober)) afs_dec_servers_outstanding(net); } @@ -458,3 +458,12 @@ dont_wait: return -ETIME; return -EDESTADDRREQ; } + +/* + * Clean up the probing when the namespace is killed off. + */ +void afs_fs_probe_cleanup(struct afs_net *net) +{ + if (del_timer_sync(&net->fs_probe_timer)) + afs_dec_servers_outstanding(net); +} diff --git a/fs/afs/internal.h b/fs/afs/internal.h index 573a5922c3bb..d520535ddb62 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -1065,6 +1065,7 @@ extern int afs_wait_for_fs_probes(struct afs_server_list *, unsigned long); extern void afs_probe_fileserver(struct afs_net *, struct afs_server *); extern void afs_fs_probe_dispatcher(struct work_struct *); extern int afs_wait_for_one_fs_probe(struct afs_server *, bool); +extern void afs_fs_probe_cleanup(struct afs_net *); /* * inode.c diff --git a/fs/afs/main.c b/fs/afs/main.c index 9c79c91e8005..31b472f7c734 100644 --- a/fs/afs/main.c +++ b/fs/afs/main.c @@ -100,6 +100,7 @@ static int __net_init afs_net_init(struct net *net_ns) timer_setup(&net->fs_timer, afs_servers_timer, 0); INIT_WORK(&net->fs_prober, afs_fs_probe_dispatcher); timer_setup(&net->fs_probe_timer, afs_fs_probe_timer, 0); + atomic_set(&net->servers_outstanding, 1); ret = -ENOMEM; sysnames = kzalloc(sizeof(*sysnames), GFP_KERNEL); @@ -130,6 +131,7 @@ static int __net_init afs_net_init(struct net *net_ns) error_open_socket: net->live = false; + afs_fs_probe_cleanup(net); afs_cell_purge(net); afs_purge_servers(net); error_cell_init: @@ -150,6 +152,7 @@ static void __net_exit afs_net_exit(struct net *net_ns) struct afs_net *net = afs_net(net_ns); net->live = false; + afs_fs_probe_cleanup(net); afs_cell_purge(net); afs_purge_servers(net); afs_close_socket(net); diff --git a/fs/afs/server.c b/fs/afs/server.c index 039e3488511c..e82e452e2612 100644 --- a/fs/afs/server.c +++ b/fs/afs/server.c @@ -605,11 +605,12 @@ void afs_purge_servers(struct afs_net *net) _enter(""); if (del_timer_sync(&net->fs_timer)) - atomic_dec(&net->servers_outstanding); + afs_dec_servers_outstanding(net); afs_queue_server_manager(net); _debug("wait"); + atomic_dec(&net->servers_outstanding); wait_var_event(&net->servers_outstanding, !atomic_read(&net->servers_outstanding)); _leave(""); From 13f2d25b951f139064ec2dd53c0c7ebdf8d8007e Mon Sep 17 00:00:00 2001 From: Haibo Chen Date: Tue, 9 Jun 2020 11:27:03 +0800 Subject: [PATCH 331/462] Revert "pinctrl: freescale: imx: Use 'devm_of_iomap()' to avoid a resource leak in case of error in 'imx_pinctrl_probe()'" This reverts commit ba403242615c2c99e27af7984b1650771a2cc2c9. After commit 26d8cde5260b ("pinctrl: freescale: imx: add shared input select reg support"). i.MX7D has two iomux controllers iomuxc and iomuxc-lpsr which share select_input register for daisy chain settings. If use 'devm_of_iomap()', when probe the iomuxc-lpsr, will call devm_request_mem_region() for the region <0x30330000-0x3033ffff> for the first time. Then, next time when probe the iomuxc, API devm_platform_ioremap_resource() will also use the API devm_request_mem_region() for the share region <0x30330000-0x3033ffff> again, then cause issue, log like below: [ 0.179561] imx7d-pinctrl 302c0000.iomuxc-lpsr: initialized IMX pinctrl driver [ 0.191742] imx7d-pinctrl 30330000.pinctrl: can't request region for resource [mem 0x30330000-0x3033ffff] [ 0.191842] imx7d-pinctrl: probe of 30330000.pinctrl failed with error -16 Fixes: ba403242615c ("pinctrl: freescale: imx: Use 'devm_of_iomap()' to avoid a resource leak in case of error in 'imx_pinctrl_probe()'") Signed-off-by: Haibo Chen Reviewed-by: Dong Aisheng Link: https://lore.kernel.org/r/1591673223-1680-1-git-send-email-haibo.chen@nxp.com Signed-off-by: Linus Walleij --- drivers/pinctrl/freescale/pinctrl-imx.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/pinctrl/freescale/pinctrl-imx.c b/drivers/pinctrl/freescale/pinctrl-imx.c index cb7e0f08d2cf..1f81569c7ae3 100644 --- a/drivers/pinctrl/freescale/pinctrl-imx.c +++ b/drivers/pinctrl/freescale/pinctrl-imx.c @@ -824,13 +824,12 @@ int imx_pinctrl_probe(struct platform_device *pdev, return -EINVAL; } - ipctl->input_sel_base = devm_of_iomap(&pdev->dev, np, - 0, NULL); + ipctl->input_sel_base = of_iomap(np, 0); of_node_put(np); - if (IS_ERR(ipctl->input_sel_base)) { + if (!ipctl->input_sel_base) { dev_err(&pdev->dev, "iomuxc input select base address not found\n"); - return PTR_ERR(ipctl->input_sel_base); + return -ENOMEM; } } } From 7f5f4de83ca30a4922bb178b80144e2778faad01 Mon Sep 17 00:00:00 2001 From: Sivaprakash Murugesan Date: Fri, 19 Jun 2020 10:01:29 +0530 Subject: [PATCH 332/462] pinctrl: qcom: ipq6018 Add missing pins in qpic pin group The patch adds missing qpic data pins to qpic pingroup. These pins are necessary for the qpic nand to work. Fixes: ef1ea54eab0e ("pinctrl: qcom: Add ipq6018 pinctrl driver") Signed-off-by: Sivaprakash Murugesan Link: https://lore.kernel.org/r/1592541089-17700-1-git-send-email-sivaprak@codeaurora.org Signed-off-by: Linus Walleij --- drivers/pinctrl/qcom/pinctrl-ipq6018.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/pinctrl/qcom/pinctrl-ipq6018.c b/drivers/pinctrl/qcom/pinctrl-ipq6018.c index 38c33a778cb8..ec50a3b4bd16 100644 --- a/drivers/pinctrl/qcom/pinctrl-ipq6018.c +++ b/drivers/pinctrl/qcom/pinctrl-ipq6018.c @@ -367,7 +367,8 @@ static const char * const wci20_groups[] = { static const char * const qpic_pad_groups[] = { "gpio0", "gpio1", "gpio2", "gpio3", "gpio4", "gpio9", "gpio10", - "gpio11", "gpio17", + "gpio11", "gpio17", "gpio15", "gpio12", "gpio13", "gpio14", "gpio5", + "gpio6", "gpio7", "gpio8", }; static const char * const burn0_groups[] = { From 25fae752156db7253471347df08a2700501eafde Mon Sep 17 00:00:00 2001 From: Drew Fustini Date: Fri, 12 Jun 2020 13:27:58 +0200 Subject: [PATCH 333/462] pinctrl: single: fix function name in documentation Use the correct the function name in the documentation for "pcs_parse_one_pinctrl_entry()". "smux_parse_one_pinctrl_entry()" appears to be an artifact from the development of a prior patch series ("simple pinmux driver") which transformed into pinctrl-single. Signed-off-by: Drew Fustini Link: https://lore.kernel.org/r/20200612112758.GA3407886@x1 Signed-off-by: Linus Walleij --- drivers/pinctrl/pinctrl-single.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pinctrl/pinctrl-single.c b/drivers/pinctrl/pinctrl-single.c index 1e0614daee9b..f3a8a465d27e 100644 --- a/drivers/pinctrl/pinctrl-single.c +++ b/drivers/pinctrl/pinctrl-single.c @@ -958,7 +958,7 @@ static int pcs_parse_pinconf(struct pcs_device *pcs, struct device_node *np, } /** - * smux_parse_one_pinctrl_entry() - parses a device tree mux entry + * pcs_parse_one_pinctrl_entry() - parses a device tree mux entry * @pctldev: pin controller device * @pcs: pinctrl driver instance * @np: device node of the mux entry From b59eabd23ee53e8c3492602722e1697f9a2f63ad Mon Sep 17 00:00:00 2001 From: Shannon Nelson Date: Thu, 18 Jun 2020 10:29:04 -0700 Subject: [PATCH 334/462] ionic: tame the watchdog timer on reconfig Even with moving netif_tx_disable() to an earlier point when taking down the queues for a reconfiguration, we still end up with the occasional netdev watchdog Tx Timeout complaint. The old method of using netif_trans_update() works fine for queue 0, but has no effect on the remaining queues. Using netif_device_detach() allows us to signal to the watchdog to ignore us for the moment. Fixes: beead698b173 ("ionic: Add the basic NDO callbacks for netdev support") Signed-off-by: Shannon Nelson Acked-by: Jonathan Toppins Signed-off-by: David S. Miller --- drivers/net/ethernet/pensando/ionic/ionic_lif.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.c b/drivers/net/ethernet/pensando/ionic/ionic_lif.c index 8f29ef133743..aaa00edd9d5b 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_lif.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.c @@ -1694,15 +1694,15 @@ static void ionic_stop_queues(struct ionic_lif *lif) if (!test_and_clear_bit(IONIC_LIF_F_UP, lif->state)) return; - ionic_txrx_disable(lif); netif_tx_disable(lif->netdev); + ionic_txrx_disable(lif); } int ionic_stop(struct net_device *netdev) { struct ionic_lif *lif = netdev_priv(netdev); - if (!netif_device_present(netdev)) + if (test_bit(IONIC_LIF_F_FW_RESET, lif->state)) return 0; ionic_stop_queues(lif); @@ -1985,18 +1985,19 @@ int ionic_reset_queues(struct ionic_lif *lif) bool running; int err = 0; - /* Put off the next watchdog timeout */ - netif_trans_update(lif->netdev); - err = ionic_wait_for_bit(lif, IONIC_LIF_F_QUEUE_RESET); if (err) return err; running = netif_running(lif->netdev); - if (running) + if (running) { + netif_device_detach(lif->netdev); err = ionic_stop(lif->netdev); - if (!err && running) + } + if (!err && running) { ionic_open(lif->netdev); + netif_device_attach(lif->netdev); + } clear_bit(IONIC_LIF_F_QUEUE_RESET, lif->state); From 8b40eb73509f5704a0e8cd25de0163876299f1a7 Mon Sep 17 00:00:00 2001 From: Dany Madden Date: Thu, 18 Jun 2020 15:24:13 -0400 Subject: [PATCH 335/462] ibmvnic: continue to init in CRQ reset returns H_CLOSED Continue the reset path when partner adapter is not ready or H_CLOSED is returned from reset crq. This patch allows the CRQ init to proceed to establish a valid CRQ for traffic to flow after reset. Signed-off-by: Dany Madden Signed-off-by: David S. Miller --- drivers/net/ethernet/ibm/ibmvnic.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index 2baf7b3ff4cb..0fd7eae25fe9 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -1971,13 +1971,18 @@ static int do_reset(struct ibmvnic_adapter *adapter, release_sub_crqs(adapter, 1); } else { rc = ibmvnic_reset_crq(adapter); - if (!rc) + if (rc == H_CLOSED || rc == H_SUCCESS) { rc = vio_enable_interrupts(adapter->vdev); + if (rc) + netdev_err(adapter->netdev, + "Reset failed to enable interrupts. rc=%d\n", + rc); + } } if (rc) { netdev_err(adapter->netdev, - "Couldn't initialize crq. rc=%d\n", rc); + "Reset couldn't initialize crq. rc=%d\n", rc); goto out; } From 89fbd26cca7ec9e82ec4787a4b6e95939b57d073 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Thu, 18 Jun 2020 23:25:50 +0200 Subject: [PATCH 336/462] r8169: fix firmware not resetting tp->ocp_base Typically the firmware takes care that tp->ocp_base is reset to its default value. That's not the case (at least) for RTL8117. As a result subsequent PHY access reads/writes the wrong page and the link is broken. Fix this be resetting tp->ocp_base explicitly. Fixes: 229c1e0dfd3d ("r8169: load firmware for RTL8168fp/RTL8117") Reported-by: Aaron Ma Tested-by: Aaron Ma Signed-off-by: Heiner Kallweit Signed-off-by: David S. Miller --- drivers/net/ethernet/realtek/r8169_main.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c index dad84ecf5a77..b660ddbe4025 100644 --- a/drivers/net/ethernet/realtek/r8169_main.c +++ b/drivers/net/ethernet/realtek/r8169_main.c @@ -2114,8 +2114,11 @@ static void rtl_release_firmware(struct rtl8169_private *tp) void r8169_apply_firmware(struct rtl8169_private *tp) { /* TODO: release firmware if rtl_fw_write_firmware signals failure. */ - if (tp->rtl_fw) + if (tp->rtl_fw) { rtl_fw_write_firmware(tp, tp->rtl_fw); + /* At least one firmware doesn't reset tp->ocp_base. */ + tp->ocp_base = OCP_STD_PHY_BASE; + } } static void rtl8168_config_eee_mac(struct rtl8169_private *tp) From 54eeea0d707d4e6efb02f5a7896d6788472a2da4 Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Fri, 19 Jun 2020 11:24:45 +0800 Subject: [PATCH 337/462] tc-testing: update geneve options match in tunnel_key unit tests Since iproute2 commit f72c3ad00f3b ("tc: m_tunnel_key: add options support for vxlan"), the geneve opt output use key word "geneve_opts" instead of "geneve_opt". To make compatibility for both old and new iproute2, let's accept both "geneve_opt" and "geneve_opts". Suggested-by: Davide Caratti Signed-off-by: Hangbin Liu Reviewed-by: Simon Horman Tested-by: Davide Caratti Signed-off-by: David S. Miller --- .../tc-testing/tc-tests/actions/tunnel_key.json | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/tunnel_key.json b/tools/testing/selftests/tc-testing/tc-tests/actions/tunnel_key.json index fbeb9197697d..7357c58fa2dc 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/actions/tunnel_key.json +++ b/tools/testing/selftests/tc-testing/tc-tests/actions/tunnel_key.json @@ -629,7 +629,7 @@ "cmdUnderTest": "$TC actions add action tunnel_key set src_ip 1.1.1.1 dst_ip 2.2.2.2 id 42 dst_port 6081 geneve_opts 0102:80:00880022 index 1", "expExitCode": "0", "verifyCmd": "$TC actions get action tunnel_key index 1", - "matchPattern": "action order [0-9]+: tunnel_key.*set.*src_ip 1.1.1.1.*dst_ip 2.2.2.2.*key_id 42.*dst_port 6081.*geneve_opt 0102:80:00880022.*index 1", + "matchPattern": "action order [0-9]+: tunnel_key.*set.*src_ip 1.1.1.1.*dst_ip 2.2.2.2.*key_id 42.*dst_port 6081.*geneve_opt[s]? 0102:80:00880022.*index 1", "matchCount": "1", "teardown": [ "$TC actions flush action tunnel_key" @@ -653,7 +653,7 @@ "cmdUnderTest": "$TC actions add action tunnel_key set src_ip 1.1.1.1 dst_ip 2.2.2.2 id 42 dst_port 6081 geneve_opts 0102:80:00880022,0408:42:0040007611223344,0111:02:1020304011223344 index 1", "expExitCode": "0", "verifyCmd": "$TC actions get action tunnel_key index 1", - "matchPattern": "action order [0-9]+: tunnel_key.*set.*src_ip 1.1.1.1.*dst_ip 2.2.2.2.*key_id 42.*dst_port 6081.*geneve_opt 0102:80:00880022,0408:42:0040007611223344,0111:02:1020304011223344.*index 1", + "matchPattern": "action order [0-9]+: tunnel_key.*set.*src_ip 1.1.1.1.*dst_ip 2.2.2.2.*key_id 42.*dst_port 6081.*geneve_opt[s]? 0102:80:00880022,0408:42:0040007611223344,0111:02:1020304011223344.*index 1", "matchCount": "1", "teardown": [ "$TC actions flush action tunnel_key" @@ -677,7 +677,7 @@ "cmdUnderTest": "$TC actions add action tunnel_key set src_ip 1.1.1.1 dst_ip 2.2.2.2 id 42 dst_port 6081 geneve_opts 824212:80:00880022 index 1", "expExitCode": "255", "verifyCmd": "$TC actions get action tunnel_key index 1", - "matchPattern": "action order [0-9]+: tunnel_key.*set.*src_ip 1.1.1.1.*dst_ip 2.2.2.2.*key_id 42.*dst_port 6081.*geneve_opt 824212:80:00880022.*index 1", + "matchPattern": "action order [0-9]+: tunnel_key.*set.*src_ip 1.1.1.1.*dst_ip 2.2.2.2.*key_id 42.*dst_port 6081.*geneve_opt[s]? 824212:80:00880022.*index 1", "matchCount": "0", "teardown": [ "$TC actions flush action tunnel_key" @@ -701,7 +701,7 @@ "cmdUnderTest": "$TC actions add action tunnel_key set src_ip 1.1.1.1 dst_ip 2.2.2.2 id 42 dst_port 6081 geneve_opts 0102:4224:00880022 index 1", "expExitCode": "255", "verifyCmd": "$TC actions get action tunnel_key index 1", - "matchPattern": "action order [0-9]+: tunnel_key.*set.*src_ip 1.1.1.1.*dst_ip 2.2.2.2.*key_id 42.*dst_port 6081.*geneve_opt 0102:4224:00880022.*index 1", + "matchPattern": "action order [0-9]+: tunnel_key.*set.*src_ip 1.1.1.1.*dst_ip 2.2.2.2.*key_id 42.*dst_port 6081.*geneve_opt[s]? 0102:4224:00880022.*index 1", "matchCount": "0", "teardown": [ "$TC actions flush action tunnel_key" @@ -725,7 +725,7 @@ "cmdUnderTest": "$TC actions add action tunnel_key set src_ip 1.1.1.1 dst_ip 2.2.2.2 id 42 dst_port 6081 geneve_opts 0102:80:4288 index 1", "expExitCode": "255", "verifyCmd": "$TC actions get action tunnel_key index 1", - "matchPattern": "action order [0-9]+: tunnel_key.*set.*src_ip 1.1.1.1.*dst_ip 2.2.2.2.*key_id 42.*dst_port 6081.*geneve_opt 0102:80:4288.*index 1", + "matchPattern": "action order [0-9]+: tunnel_key.*set.*src_ip 1.1.1.1.*dst_ip 2.2.2.2.*key_id 42.*dst_port 6081.*geneve_opt[s]? 0102:80:4288.*index 1", "matchCount": "0", "teardown": [ "$TC actions flush action tunnel_key" @@ -749,7 +749,7 @@ "cmdUnderTest": "$TC actions add action tunnel_key set src_ip 1.1.1.1 dst_ip 2.2.2.2 id 42 dst_port 6081 geneve_opts 0102:80:4288428822 index 1", "expExitCode": "255", "verifyCmd": "$TC actions get action tunnel_key index 1", - "matchPattern": "action order [0-9]+: tunnel_key.*set.*src_ip 1.1.1.1.*dst_ip 2.2.2.2.*key_id 42.*dst_port 6081.*geneve_opt 0102:80:4288428822.*index 1", + "matchPattern": "action order [0-9]+: tunnel_key.*set.*src_ip 1.1.1.1.*dst_ip 2.2.2.2.*key_id 42.*dst_port 6081.*geneve_opt[s]? 0102:80:4288428822.*index 1", "matchCount": "0", "teardown": [ "$TC actions flush action tunnel_key" @@ -773,7 +773,7 @@ "cmdUnderTest": "$TC actions add action tunnel_key set src_ip 1.1.1.1 dst_ip 2.2.2.2 id 42 dst_port 6081 geneve_opts 0102:80:00880022,0408:42: index 1", "expExitCode": "255", "verifyCmd": "$TC actions get action tunnel_key index 1", - "matchPattern": "action order [0-9]+: tunnel_key.*set.*src_ip 1.1.1.1.*dst_ip 2.2.2.2.*key_id 42.*dst_port 6081.*geneve_opt 0102:80:00880022,0408:42:.*index 1", + "matchPattern": "action order [0-9]+: tunnel_key.*set.*src_ip 1.1.1.1.*dst_ip 2.2.2.2.*key_id 42.*dst_port 6081.*geneve_opt[s]? 0102:80:00880022,0408:42:.*index 1", "matchCount": "0", "teardown": [ "$TC actions flush action tunnel_key" From 0041cd5a50442db6e456b145892a0eaf2dff061f Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 19 Jun 2020 23:38:16 +0100 Subject: [PATCH 338/462] rxrpc: Fix notification call on completion of discarded calls When preallocated service calls are being discarded, they're passed to ->discard_new_call() to have the caller clean up any attached higher-layer preallocated pieces before being marked completed. However, the act of marking them completed now invokes the call's notification function - which causes a problem because that function might assume that the previously freed pieces of memory are still there. Fix this by setting a dummy notification function on the socket after calling ->discard_new_call(). This results in the following kasan message when the kafs module is removed. ================================================================== BUG: KASAN: use-after-free in afs_wake_up_async_call+0x6aa/0x770 fs/afs/rxrpc.c:707 Write of size 1 at addr ffff8880946c39e4 by task kworker/u4:1/21 CPU: 0 PID: 21 Comm: kworker/u4:1 Not tainted 5.8.0-rc1-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Workqueue: netns cleanup_net Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x18f/0x20d lib/dump_stack.c:118 print_address_description.constprop.0.cold+0xd3/0x413 mm/kasan/report.c:383 __kasan_report mm/kasan/report.c:513 [inline] kasan_report.cold+0x1f/0x37 mm/kasan/report.c:530 afs_wake_up_async_call+0x6aa/0x770 fs/afs/rxrpc.c:707 rxrpc_notify_socket+0x1db/0x5d0 net/rxrpc/recvmsg.c:40 __rxrpc_set_call_completion.part.0+0x172/0x410 net/rxrpc/recvmsg.c:76 __rxrpc_call_completed net/rxrpc/recvmsg.c:112 [inline] rxrpc_call_completed+0xca/0xf0 net/rxrpc/recvmsg.c:111 rxrpc_discard_prealloc+0x781/0xab0 net/rxrpc/call_accept.c:233 rxrpc_listen+0x147/0x360 net/rxrpc/af_rxrpc.c:245 afs_close_socket+0x95/0x320 fs/afs/rxrpc.c:110 afs_net_exit+0x1bc/0x310 fs/afs/main.c:155 ops_exit_list.isra.0+0xa8/0x150 net/core/net_namespace.c:186 cleanup_net+0x511/0xa50 net/core/net_namespace.c:603 process_one_work+0x965/0x1690 kernel/workqueue.c:2269 worker_thread+0x96/0xe10 kernel/workqueue.c:2415 kthread+0x3b5/0x4a0 kernel/kthread.c:291 ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:293 Allocated by task 6820: save_stack+0x1b/0x40 mm/kasan/common.c:48 set_track mm/kasan/common.c:56 [inline] __kasan_kmalloc mm/kasan/common.c:494 [inline] __kasan_kmalloc.constprop.0+0xbf/0xd0 mm/kasan/common.c:467 kmem_cache_alloc_trace+0x153/0x7d0 mm/slab.c:3551 kmalloc include/linux/slab.h:555 [inline] kzalloc include/linux/slab.h:669 [inline] afs_alloc_call+0x55/0x630 fs/afs/rxrpc.c:141 afs_charge_preallocation+0xe9/0x2d0 fs/afs/rxrpc.c:757 afs_open_socket+0x292/0x360 fs/afs/rxrpc.c:92 afs_net_init+0xa6c/0xe30 fs/afs/main.c:125 ops_init+0xaf/0x420 net/core/net_namespace.c:151 setup_net+0x2de/0x860 net/core/net_namespace.c:341 copy_net_ns+0x293/0x590 net/core/net_namespace.c:482 create_new_namespaces+0x3fb/0xb30 kernel/nsproxy.c:110 unshare_nsproxy_namespaces+0xbd/0x1f0 kernel/nsproxy.c:231 ksys_unshare+0x43d/0x8e0 kernel/fork.c:2983 __do_sys_unshare kernel/fork.c:3051 [inline] __se_sys_unshare kernel/fork.c:3049 [inline] __x64_sys_unshare+0x2d/0x40 kernel/fork.c:3049 do_syscall_64+0x60/0xe0 arch/x86/entry/common.c:359 entry_SYSCALL_64_after_hwframe+0x44/0xa9 Freed by task 21: save_stack+0x1b/0x40 mm/kasan/common.c:48 set_track mm/kasan/common.c:56 [inline] kasan_set_free_info mm/kasan/common.c:316 [inline] __kasan_slab_free+0xf7/0x140 mm/kasan/common.c:455 __cache_free mm/slab.c:3426 [inline] kfree+0x109/0x2b0 mm/slab.c:3757 afs_put_call+0x585/0xa40 fs/afs/rxrpc.c:190 rxrpc_discard_prealloc+0x764/0xab0 net/rxrpc/call_accept.c:230 rxrpc_listen+0x147/0x360 net/rxrpc/af_rxrpc.c:245 afs_close_socket+0x95/0x320 fs/afs/rxrpc.c:110 afs_net_exit+0x1bc/0x310 fs/afs/main.c:155 ops_exit_list.isra.0+0xa8/0x150 net/core/net_namespace.c:186 cleanup_net+0x511/0xa50 net/core/net_namespace.c:603 process_one_work+0x965/0x1690 kernel/workqueue.c:2269 worker_thread+0x96/0xe10 kernel/workqueue.c:2415 kthread+0x3b5/0x4a0 kernel/kthread.c:291 ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:293 The buggy address belongs to the object at ffff8880946c3800 which belongs to the cache kmalloc-1k of size 1024 The buggy address is located 484 bytes inside of 1024-byte region [ffff8880946c3800, ffff8880946c3c00) The buggy address belongs to the page: page:ffffea000251b0c0 refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 flags: 0xfffe0000000200(slab) raw: 00fffe0000000200 ffffea0002546508 ffffea00024fa248 ffff8880aa000c40 raw: 0000000000000000 ffff8880946c3000 0000000100000002 0000000000000000 page dumped because: kasan: bad access detected Memory state around the buggy address: ffff8880946c3880: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ffff8880946c3900: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb >ffff8880946c3980: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ^ ffff8880946c3a00: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ffff8880946c3a80: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ================================================================== Reported-by: syzbot+d3eccef36ddbd02713e9@syzkaller.appspotmail.com Fixes: 5ac0d62226a0 ("rxrpc: Fix missing notification") Signed-off-by: David Howells Signed-off-by: David S. Miller --- net/rxrpc/call_accept.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/net/rxrpc/call_accept.c b/net/rxrpc/call_accept.c index b7611cc159e5..032ed76c0166 100644 --- a/net/rxrpc/call_accept.c +++ b/net/rxrpc/call_accept.c @@ -22,6 +22,11 @@ #include #include "ar-internal.h" +static void rxrpc_dummy_notify(struct sock *sk, struct rxrpc_call *call, + unsigned long user_call_ID) +{ +} + /* * Preallocate a single service call, connection and peer and, if possible, * give them a user ID and attach the user's side of the ID to them. @@ -228,6 +233,8 @@ void rxrpc_discard_prealloc(struct rxrpc_sock *rx) if (rx->discard_new_call) { _debug("discard %lx", call->user_call_ID); rx->discard_new_call(call, call->user_call_ID); + if (call->notify_rx) + call->notify_rx = rxrpc_dummy_notify; rxrpc_put_call(call, rxrpc_call_put_kernel); } rxrpc_call_completed(call); From 67c20de35a3cc2e2cd940f95ebd85ed0a765315a Mon Sep 17 00:00:00 2001 From: Rob Gill Date: Sat, 20 Jun 2020 02:08:25 +0000 Subject: [PATCH 339/462] net: Add MODULE_DESCRIPTION entries to network modules The user tool modinfo is used to get information on kernel modules, including a description where it is available. This patch adds a brief MODULE_DESCRIPTION to the following modules: 9p drop_monitor esp4_offload esp6_offload fou fou6 ila sch_fq sch_fq_codel sch_hhf Signed-off-by: Rob Gill Signed-off-by: David S. Miller --- net/9p/mod.c | 1 + net/core/drop_monitor.c | 1 + net/ipv4/esp4_offload.c | 1 + net/ipv4/fou.c | 1 + net/ipv6/esp6_offload.c | 1 + net/ipv6/fou6.c | 1 + net/ipv6/ila/ila_main.c | 1 + net/sched/sch_fq.c | 1 + net/sched/sch_fq_codel.c | 1 + net/sched/sch_hhf.c | 1 + 10 files changed, 10 insertions(+) diff --git a/net/9p/mod.c b/net/9p/mod.c index c1b62428da7b..5126566850bd 100644 --- a/net/9p/mod.c +++ b/net/9p/mod.c @@ -189,3 +189,4 @@ MODULE_AUTHOR("Latchesar Ionkov "); MODULE_AUTHOR("Eric Van Hensbergen "); MODULE_AUTHOR("Ron Minnich "); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Plan 9 Resource Sharing Support (9P2000)"); diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c index 2ee7bc4c9e03..b09bebeadf0b 100644 --- a/net/core/drop_monitor.c +++ b/net/core/drop_monitor.c @@ -1721,3 +1721,4 @@ module_exit(exit_net_drop_monitor); MODULE_LICENSE("GPL v2"); MODULE_AUTHOR("Neil Horman "); MODULE_ALIAS_GENL_FAMILY("NET_DM"); +MODULE_DESCRIPTION("Monitoring code for network dropped packet alerts"); diff --git a/net/ipv4/esp4_offload.c b/net/ipv4/esp4_offload.c index d14133eac476..5bda5aeda579 100644 --- a/net/ipv4/esp4_offload.c +++ b/net/ipv4/esp4_offload.c @@ -361,3 +361,4 @@ module_exit(esp4_offload_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Steffen Klassert "); MODULE_ALIAS_XFRM_OFFLOAD_TYPE(AF_INET, XFRM_PROTO_ESP); +MODULE_DESCRIPTION("IPV4 GSO/GRO offload support"); diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c index dcc79ff54b41..abd083415f89 100644 --- a/net/ipv4/fou.c +++ b/net/ipv4/fou.c @@ -1304,3 +1304,4 @@ module_init(fou_init); module_exit(fou_fini); MODULE_AUTHOR("Tom Herbert "); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Foo over UDP"); diff --git a/net/ipv6/esp6_offload.c b/net/ipv6/esp6_offload.c index 55addea1948f..1ca516fb30e1 100644 --- a/net/ipv6/esp6_offload.c +++ b/net/ipv6/esp6_offload.c @@ -395,3 +395,4 @@ module_exit(esp6_offload_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Steffen Klassert "); MODULE_ALIAS_XFRM_OFFLOAD_TYPE(AF_INET6, XFRM_PROTO_ESP); +MODULE_DESCRIPTION("IPV6 GSO/GRO offload support"); diff --git a/net/ipv6/fou6.c b/net/ipv6/fou6.c index 091f94184dc1..430518ae26fa 100644 --- a/net/ipv6/fou6.c +++ b/net/ipv6/fou6.c @@ -224,3 +224,4 @@ module_init(fou6_init); module_exit(fou6_fini); MODULE_AUTHOR("Tom Herbert "); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Foo over UDP (IPv6)"); diff --git a/net/ipv6/ila/ila_main.c b/net/ipv6/ila/ila_main.c index 257d2b681246..36c58aa257e8 100644 --- a/net/ipv6/ila/ila_main.c +++ b/net/ipv6/ila/ila_main.c @@ -120,3 +120,4 @@ module_init(ila_init); module_exit(ila_fini); MODULE_AUTHOR("Tom Herbert "); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("IPv6: Identifier Locator Addressing (ILA)"); diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c index 8f06a808c59a..2fb76fc0cc31 100644 --- a/net/sched/sch_fq.c +++ b/net/sched/sch_fq.c @@ -1075,3 +1075,4 @@ module_init(fq_module_init) module_exit(fq_module_exit) MODULE_AUTHOR("Eric Dumazet"); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Fair Queue Packet Scheduler"); diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c index 436160be9c18..459a784056c0 100644 --- a/net/sched/sch_fq_codel.c +++ b/net/sched/sch_fq_codel.c @@ -721,3 +721,4 @@ module_init(fq_codel_module_init) module_exit(fq_codel_module_exit) MODULE_AUTHOR("Eric Dumazet"); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Fair Queue CoDel discipline"); diff --git a/net/sched/sch_hhf.c b/net/sched/sch_hhf.c index be35f03b657b..420ede875322 100644 --- a/net/sched/sch_hhf.c +++ b/net/sched/sch_hhf.c @@ -721,3 +721,4 @@ module_exit(hhf_module_exit) MODULE_AUTHOR("Terry Lam"); MODULE_AUTHOR("Nandita Dukkipati"); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Heavy-Hitter Filter (HHF)"); From 2c6d9636ad92083d79a060edc84259526c153670 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Sat, 20 Jun 2020 00:09:55 +0900 Subject: [PATCH 340/462] Revert "Makefile: install modules.builtin even if CONFIG_MODULES=n" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit e0b250b57dcf403529081e5898a9de717f96b76b, which broke build systems that need to install files to a certain path, but do not set INSTALL_MOD_PATH when invoking 'make install'. $ make INSTALL_PATH=/tmp/destdir install mkdir: cannot create directory ‘/lib/modules/5.8.0-rc1+/’: Permission denied Makefile:1342: recipe for target '_builtin_inst_' failed make: *** [_builtin_inst_] Error 1 While modules.builtin is useful also for CONFIG_MODULES=n, this change in the behavior is quite unexpected. Maybe "make modules_install" can install modules.builtin irrespective of CONFIG_MODULES as Jonas originally suggested. Anyway, that commit should be reverted ASAP. Reported-by: Douglas Anderson Reported-by: Guenter Roeck Cc: Jonas Karlman Signed-off-by: Masahiro Yamada Reviewed-by: Guenter Roeck Tested-by: Guenter Roeck --- Makefile | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) diff --git a/Makefile b/Makefile index 29abe44ada91..9880e911afe3 100644 --- a/Makefile +++ b/Makefile @@ -1336,16 +1336,6 @@ dt_binding_check: scripts_dtc # --------------------------------------------------------------------------- # Modules -# install modules.builtin regardless of CONFIG_MODULES -PHONY += _builtin_inst_ -_builtin_inst_: - @mkdir -p $(MODLIB)/ - @cp -f modules.builtin $(MODLIB)/ - @cp -f $(objtree)/modules.builtin.modinfo $(MODLIB)/ - -PHONY += install -install: _builtin_inst_ - ifdef CONFIG_MODULES # By default, build modules as well @@ -1389,7 +1379,7 @@ PHONY += modules_install modules_install: _modinst_ _modinst_post PHONY += _modinst_ -_modinst_: _builtin_inst_ +_modinst_: @rm -rf $(MODLIB)/kernel @rm -f $(MODLIB)/source @mkdir -p $(MODLIB)/kernel @@ -1399,6 +1389,8 @@ _modinst_: _builtin_inst_ ln -s $(CURDIR) $(MODLIB)/build ; \ fi @sed 's:^:kernel/:' modules.order > $(MODLIB)/modules.order + @cp -f modules.builtin $(MODLIB)/ + @cp -f $(objtree)/modules.builtin.modinfo $(MODLIB)/ $(Q)$(MAKE) -f $(srctree)/scripts/Makefile.modinst # This depmod is only for convenience to give the initial From 214377e9b7e3185abf5998b8a90450e01bab21a7 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 17 Jun 2020 11:08:38 +0900 Subject: [PATCH 341/462] samples: watch_queue: build sample program for target architecture This userspace program includes UAPI headers exported to usr/include/. 'make headers' always works for the target architecture (i.e. the same architecture as the kernel), so the sample program should be built for the target as well. Kbuild now supports 'userprogs' for that. I also guarded the CONFIG option by 'depends on CC_CAN_LINK' because $(CC) may not provide libc. Signed-off-by: Masahiro Yamada --- samples/Kconfig | 2 +- samples/watch_queue/Makefile | 10 ++++------ 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/samples/Kconfig b/samples/Kconfig index f3ac549a53b0..0ed6e4d71d87 100644 --- a/samples/Kconfig +++ b/samples/Kconfig @@ -211,7 +211,7 @@ config SAMPLE_WATCHDOG config SAMPLE_WATCH_QUEUE bool "Build example /dev/watch_queue notification consumer" - depends on HEADERS_INSTALL + depends on CC_CAN_LINK && HEADERS_INSTALL help Build example userspace program to use the new mount_notify(), sb_notify() syscalls and the KEYCTL_WATCH_KEY keyctl() function. diff --git a/samples/watch_queue/Makefile b/samples/watch_queue/Makefile index 8511fb6c53d2..792b22f593cf 100644 --- a/samples/watch_queue/Makefile +++ b/samples/watch_queue/Makefile @@ -1,7 +1,5 @@ -# List of programs to build -hostprogs := watch_test +# SPDX-License-Identifier: GPL-2.0-only +userprogs := watch_test +always-y := $(userprogs) -# Tell kbuild to always build the programs -always-y := $(hostprogs) - -HOSTCFLAGS_watch_test.o += -I$(objtree)/usr/include +userccflags += -I usr/include From 48778464bb7d346b47157d21ffde2af6b2d39110 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 21 Jun 2020 15:45:29 -0700 Subject: [PATCH 342/462] Linux 5.8-rc2 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 9880e911afe3..ac2c61c37a73 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 5 PATCHLEVEL = 8 SUBLEVEL = 0 -EXTRAVERSION = -rc1 +EXTRAVERSION = -rc2 NAME = Kleptomaniac Octopus # *DOCUMENTATION* From 625d3449788f85569096780592549d0340e9c0c7 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Sun, 21 Jun 2020 21:02:22 -0600 Subject: [PATCH 343/462] Revert "kernel/printk: add kmsg SEEK_CUR handling" This reverts commit 8ece3b3eb576a78d2e67ad4c3a80a39fa6708809. This commit broke userspace. Bash uses ESPIPE to determine whether or not the file should be read using "unbuffered I/O", which means reading 1 byte at a time instead of 128 bytes at a time. I used to use bash to read through kmsg in a really quite nasty way: while read -t 0.1 -r line 2>/dev/null || [[ $? -ne 142 ]]; do echo "SARU $line" done < /dev/kmsg This will show all lines that can fit into the 128 byte buffer, and skip lines that don't. That's pretty awful, but at least it worked. With this change, bash now tries to do 1-byte reads, which means it skips all the lines, which is worse than before. Now, I don't really care very much about this, and I'm already look for a workaround. But I did just spend an hour trying to figure out why my scripts were broken. Either way, it makes no difference to me personally whether this is reverted, but it might be something to consider. If you declare that "trying to read /dev/kmsg with bash is terminally stupid anyway," I might be inclined to agree with you. But do note that bash uses lseek(fd, 0, SEEK_CUR)==>ESPIPE to determine whether or not it's reading from a pipe. Cc: Bruno Meneguele Cc: Sergey Senozhatsky Cc: Steven Rostedt Cc: David Laight Cc: Sergey Senozhatsky Cc: Petr Mladek Signed-off-by: Jason A. Donenfeld Signed-off-by: Linus Torvalds --- Documentation/ABI/testing/dev-kmsg | 5 ----- kernel/printk/printk.c | 10 ---------- 2 files changed, 15 deletions(-) diff --git a/Documentation/ABI/testing/dev-kmsg b/Documentation/ABI/testing/dev-kmsg index 1e6c28b1942b..f307506eb54c 100644 --- a/Documentation/ABI/testing/dev-kmsg +++ b/Documentation/ABI/testing/dev-kmsg @@ -56,11 +56,6 @@ Description: The /dev/kmsg character device node provides userspace access seek after the last record available at the time the last SYSLOG_ACTION_CLEAR was issued. - Due to the record nature of this interface with a "read all" - behavior and the specific positions each seek operation sets, - SEEK_CUR is not supported, returning -ESPIPE (invalid seek) to - errno whenever requested. - The output format consists of a prefix carrying the syslog prefix including priority and facility, the 64 bit message sequence number and the monotonic timestamp in microseconds, diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index 8c14835be46c..b71eaf5f5a86 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -974,16 +974,6 @@ static loff_t devkmsg_llseek(struct file *file, loff_t offset, int whence) user->idx = log_next_idx; user->seq = log_next_seq; break; - case SEEK_CUR: - /* - * It isn't supported due to the record nature of this - * interface: _SET _DATA and _END point to very specific - * record positions, while _CUR would be more useful in case - * of a byte-based log. Because of that, return the default - * errno value for invalid seek operation. - */ - ret = -ESPIPE; - break; default: ret = -EINVAL; } From 24eae8ebfb29ca54980764245ffb8aa218666ff6 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Wed, 27 May 2020 14:05:28 +0800 Subject: [PATCH 344/462] vdpa: fix typos in the comments for __vdpa_alloc_device() Fix two typos in the comments for __vdpa_alloc_device(). Signed-off-by: Jason Wang Link: https://lore.kernel.org/r/20200527060528.9100-1-jasowang@redhat.com Signed-off-by: Michael S. Tsirkin --- drivers/vdpa/vdpa.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/vdpa/vdpa.c b/drivers/vdpa/vdpa.c index ff6562f602e0..de211ef3738c 100644 --- a/drivers/vdpa/vdpa.c +++ b/drivers/vdpa/vdpa.c @@ -63,7 +63,7 @@ static void vdpa_release_dev(struct device *d) * @config: the bus operations that is supported by this device * @size: size of the parent structure that contains private data * - * Drvier should use vdap_alloc_device() wrapper macro instead of + * Driver should use vdpa_alloc_device() wrapper macro instead of * using this directly. * * Returns an error when parent/config/dma_dev is not set or fail to get From c09cc2c319863f620caf5e723db496eed55f7c7f Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 10 Jun 2020 11:58:52 +0300 Subject: [PATCH 345/462] vhost_vdpa: Fix potential underflow in vhost_vdpa_mmap() The "vma->vm_pgoff" variable is an unsigned long so if it's larger than INT_MAX then "index" can be negative leading to an underflow. Fix this by changing the type of "index" to "unsigned long". Fixes: ddd89d0a059d ("vhost_vdpa: support doorbell mapping via mmap") Signed-off-by: Dan Carpenter Link: https://lore.kernel.org/r/20200610085852.GB5439@mwanda Signed-off-by: Michael S. Tsirkin --- drivers/vhost/vdpa.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c index 7580e34f76c1..a54b60d6623f 100644 --- a/drivers/vhost/vdpa.c +++ b/drivers/vhost/vdpa.c @@ -818,7 +818,7 @@ static int vhost_vdpa_mmap(struct file *file, struct vm_area_struct *vma) struct vdpa_device *vdpa = v->vdpa; const struct vdpa_config_ops *ops = vdpa->config; struct vdpa_notification_area notify; - int index = vma->vm_pgoff; + unsigned long index = vma->vm_pgoff; if (vma->vm_end - vma->vm_start != PAGE_SIZE) return -EINVAL; From 1c3d69ab5348b661616992206357a3ebf19b1008 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 10 Jun 2020 11:59:11 +0300 Subject: [PATCH 346/462] virtio-mem: silence a static checker warning Smatch complains that "rc" can be uninitialized if we hit the "break;" statement on the first iteration through the loop. I suspect that this can't happen in real life, but returning a zero literal is cleaner and silence the static checker warning. Fixes: 5f1f79bbc9e2 ("virtio-mem: Paravirtualized memory hotplug") Signed-off-by: Dan Carpenter Link: https://lore.kernel.org/r/20200610085911.GC5439@mwanda Signed-off-by: Michael S. Tsirkin --- drivers/virtio/virtio_mem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/virtio/virtio_mem.c b/drivers/virtio/virtio_mem.c index 50c689f25045..5210ff0c5681 100644 --- a/drivers/virtio/virtio_mem.c +++ b/drivers/virtio/virtio_mem.c @@ -1192,7 +1192,7 @@ static int virtio_mem_mb_plug_any_sb(struct virtio_mem *vm, unsigned long mb_id, VIRTIO_MEM_MB_STATE_OFFLINE); } - return rc; + return 0; } /* From b3562c6087b585abba53d906f127d6fd5a92829c Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 11 Jun 2020 11:35:18 +0200 Subject: [PATCH 347/462] virtio-mem: add memory via add_memory_driver_managed() Virtio-mem managed memory is always detected and added by the virtio-mem driver, never using something like the firmware-provided memory map. This is the case after an ordinary system reboot, and has to be guaranteed after kexec. Especially, virtio-mem added memory resources can contain inaccessible parts ("unblocked memory blocks"), blindly forwarding them to a kexec kernel is dangerous, as unplugged memory will get accessed (esp. written). Let's use the new way of adding special driver-managed memory introduced in commit 7b7b27214bba ("mm/memory_hotplug: introduce add_memory_driver_managed()"). This will result in no entries in /sys/firmware/memmap ("raw firmware- provided memory map"), the memory resource will be flagged IORESOURCE_MEM_DRIVER_MANAGED (esp., kexec_file_load() will not place kexec images on this memory), and it is exposed as "System RAM (virtio_mem)" in /proc/iomem, so esp. kexec-tools can properly handle it. Example /proc/iomem before this change: [...] 140000000-333ffffff : virtio0 140000000-147ffffff : System RAM 334000000-533ffffff : virtio1 338000000-33fffffff : System RAM 340000000-347ffffff : System RAM 348000000-34fffffff : System RAM [...] Example /proc/iomem after this change: [...] 140000000-333ffffff : virtio0 140000000-147ffffff : System RAM (virtio_mem) 334000000-533ffffff : virtio1 338000000-33fffffff : System RAM (virtio_mem) 340000000-347ffffff : System RAM (virtio_mem) 348000000-34fffffff : System RAM (virtio_mem) [...] Cc: "Michael S. Tsirkin" Cc: Pankaj Gupta Cc: teawater Fixes: 5f1f79bbc9e26 ("virtio-mem: Paravirtualized memory hotplug") Signed-off-by: David Hildenbrand Link: https://lore.kernel.org/r/20200611093518.5737-1-david@redhat.com Signed-off-by: Michael S. Tsirkin Reviewed-by: Pankaj Gupta --- drivers/virtio/virtio_mem.c | 25 ++++++++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) diff --git a/drivers/virtio/virtio_mem.c b/drivers/virtio/virtio_mem.c index 5210ff0c5681..f26f5f64ae82 100644 --- a/drivers/virtio/virtio_mem.c +++ b/drivers/virtio/virtio_mem.c @@ -101,6 +101,11 @@ struct virtio_mem { /* The parent resource for all memory added via this device. */ struct resource *parent_resource; + /* + * Copy of "System RAM (virtio_mem)" to be used for + * add_memory_driver_managed(). + */ + const char *resource_name; /* Summary of all memory block states. */ unsigned long nb_mb_state[VIRTIO_MEM_MB_STATE_COUNT]; @@ -414,8 +419,20 @@ static int virtio_mem_mb_add(struct virtio_mem *vm, unsigned long mb_id) if (nid == NUMA_NO_NODE) nid = memory_add_physaddr_to_nid(addr); + /* + * When force-unloading the driver and we still have memory added to + * Linux, the resource name has to stay. + */ + if (!vm->resource_name) { + vm->resource_name = kstrdup_const("System RAM (virtio_mem)", + GFP_KERNEL); + if (!vm->resource_name) + return -ENOMEM; + } + dev_dbg(&vm->vdev->dev, "adding memory block: %lu\n", mb_id); - return add_memory(nid, addr, memory_block_size_bytes()); + return add_memory_driver_managed(nid, addr, memory_block_size_bytes(), + vm->resource_name); } /* @@ -1890,10 +1907,12 @@ static void virtio_mem_remove(struct virtio_device *vdev) vm->nb_mb_state[VIRTIO_MEM_MB_STATE_OFFLINE_PARTIAL] || vm->nb_mb_state[VIRTIO_MEM_MB_STATE_ONLINE] || vm->nb_mb_state[VIRTIO_MEM_MB_STATE_ONLINE_PARTIAL] || - vm->nb_mb_state[VIRTIO_MEM_MB_STATE_ONLINE_MOVABLE]) + vm->nb_mb_state[VIRTIO_MEM_MB_STATE_ONLINE_MOVABLE]) { dev_warn(&vdev->dev, "device still has system memory added\n"); - else + } else { virtio_mem_delete_resource(vm); + kfree_const(vm->resource_name); + } /* remove all tracking data - no locking needed */ vfree(vm->mb_state); From 633fae33d562dc2538cc3d6033a2d5e277050783 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= Date: Sat, 18 Apr 2020 12:22:11 +0200 Subject: [PATCH 348/462] tools/virtio: Add --batch option MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This allow to test vhost having >1 buffers in flight Signed-off-by: Eugenio Pérez Link: https://lore.kernel.org/r/20200401183118.8334-5-eperezma@redhat.com Signed-off-by: Michael S. Tsirkin Link: https://lore.kernel.org/r/20200418102217.32327-3-eperezma@redhat.com Signed-off-by: Michael S. Tsirkin --- tools/virtio/virtio_test.c | 47 ++++++++++++++++++++++++++++++-------- 1 file changed, 37 insertions(+), 10 deletions(-) diff --git a/tools/virtio/virtio_test.c b/tools/virtio/virtio_test.c index b427def67e7e..c30de9088f3c 100644 --- a/tools/virtio/virtio_test.c +++ b/tools/virtio/virtio_test.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 #define _GNU_SOURCE #include +#include #include #include #include @@ -152,11 +153,11 @@ static void wait_for_interrupt(struct vdev_info *dev) } static void run_test(struct vdev_info *dev, struct vq_info *vq, - bool delayed, int bufs) + bool delayed, int batch, int bufs) { struct scatterlist sl; long started = 0, completed = 0; - long completed_before; + long completed_before, started_before; int r, test = 1; unsigned len; long long spurious = 0; @@ -165,28 +166,42 @@ static void run_test(struct vdev_info *dev, struct vq_info *vq, for (;;) { virtqueue_disable_cb(vq->vq); completed_before = completed; + started_before = started; do { - if (started < bufs) { + while (started < bufs && + (started - completed) < batch) { sg_init_one(&sl, dev->buf, dev->buf_size); r = virtqueue_add_outbuf(vq->vq, &sl, 1, dev->buf + started, GFP_ATOMIC); - if (likely(r == 0)) { - ++started; - if (unlikely(!virtqueue_kick(vq->vq))) + if (unlikely(r != 0)) { + if (r == -ENOSPC && + started > started_before) + r = 0; + else r = -1; + break; } - } else + + ++started; + + if (unlikely(!virtqueue_kick(vq->vq))) { + r = -1; + break; + } + } + + if (started >= bufs) r = -1; /* Flush out completed bufs if any */ - if (virtqueue_get_buf(vq->vq, &len)) { + while (virtqueue_get_buf(vq->vq, &len)) { ++completed; r = 0; } } while (r == 0); - if (completed == completed_before) + if (completed == completed_before && started == started_before) ++spurious; assert(completed <= bufs); assert(started <= bufs); @@ -244,6 +259,11 @@ const struct option longopts[] = { .name = "no-delayed-interrupt", .val = 'd', }, + { + .name = "batch", + .val = 'b', + .has_arg = required_argument, + }, { } }; @@ -255,6 +275,7 @@ static void help(void) " [--no-event-idx]" " [--no-virtio-1]" " [--delayed-interrupt]" + " [--batch=N]" "\n"); } @@ -263,6 +284,7 @@ int main(int argc, char **argv) struct vdev_info dev; unsigned long long features = (1ULL << VIRTIO_RING_F_INDIRECT_DESC) | (1ULL << VIRTIO_RING_F_EVENT_IDX) | (1ULL << VIRTIO_F_VERSION_1); + long batch = 1; int o; bool delayed = false; @@ -289,6 +311,11 @@ int main(int argc, char **argv) case 'D': delayed = true; break; + case 'b': + batch = strtol(optarg, NULL, 10); + assert(batch > 0); + assert(batch < (long)INT_MAX + 1); + break; default: assert(0); break; @@ -298,6 +325,6 @@ int main(int argc, char **argv) done: vdev_info_init(&dev, features); vq_info_add(&dev, 256); - run_test(&dev, &dev.vqs[0], delayed, 0x100000); + run_test(&dev, &dev.vqs[0], delayed, batch, 0x100000); return 0; } From 7add78b2a6b76d98fcf35e981c5f84b3c91459f5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= Date: Sat, 18 Apr 2020 12:22:12 +0200 Subject: [PATCH 349/462] tools/virtio: Add --batch=random option MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit So we can test with non-deterministic batches in flight. Signed-off-by: Eugenio Pérez Link: https://lore.kernel.org/r/20200418102217.32327-4-eperezma@redhat.com Signed-off-by: Michael S. Tsirkin --- tools/virtio/virtio_test.c | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/tools/virtio/virtio_test.c b/tools/virtio/virtio_test.c index c30de9088f3c..4a2b9d11f287 100644 --- a/tools/virtio/virtio_test.c +++ b/tools/virtio/virtio_test.c @@ -19,6 +19,8 @@ #include #include "../../drivers/vhost/test.h" +#define RANDOM_BATCH -1 + /* Unused */ void *__kmalloc_fake, *__kfree_ignore_start, *__kfree_ignore_end; @@ -161,6 +163,7 @@ static void run_test(struct vdev_info *dev, struct vq_info *vq, int r, test = 1; unsigned len; long long spurious = 0; + const bool random_batch = batch == RANDOM_BATCH; r = ioctl(dev->control, VHOST_TEST_RUN, &test); assert(r >= 0); for (;;) { @@ -168,6 +171,9 @@ static void run_test(struct vdev_info *dev, struct vq_info *vq, completed_before = completed; started_before = started; do { + if (random_batch) + batch = (random() % vq->vring.num) + 1; + while (started < bufs && (started - completed) < batch) { sg_init_one(&sl, dev->buf, dev->buf_size); @@ -275,7 +281,7 @@ static void help(void) " [--no-event-idx]" " [--no-virtio-1]" " [--delayed-interrupt]" - " [--batch=N]" + " [--batch=random/N]" "\n"); } @@ -312,9 +318,13 @@ int main(int argc, char **argv) delayed = true; break; case 'b': - batch = strtol(optarg, NULL, 10); - assert(batch > 0); - assert(batch < (long)INT_MAX + 1); + if (0 == strcmp(optarg, "random")) { + batch = RANDOM_BATCH; + } else { + batch = strtol(optarg, NULL, 10); + assert(batch > 0); + assert(batch < (long)INT_MAX + 1); + } break; default: assert(0); From 264ee5aa81ec87eebfb0e2fb70cc0a38df80bab2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= Date: Sat, 18 Apr 2020 12:22:13 +0200 Subject: [PATCH 350/462] tools/virtio: Add --reset MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently, it only removes and add backend, but it will reset vq position in future commits. Signed-off-by: Eugenio Pérez Link: https://lore.kernel.org/r/20200418102217.32327-5-eperezma@redhat.com Signed-off-by: Michael S. Tsirkin --- drivers/vhost/test.c | 57 ++++++++++++++++++++++++++++++++++++++ drivers/vhost/test.h | 1 + tools/virtio/virtio_test.c | 41 ++++++++++++++++++++++++--- 3 files changed, 95 insertions(+), 4 deletions(-) diff --git a/drivers/vhost/test.c b/drivers/vhost/test.c index 0466921f4772..a09dedc79f68 100644 --- a/drivers/vhost/test.c +++ b/drivers/vhost/test.c @@ -263,9 +263,62 @@ static int vhost_test_set_features(struct vhost_test *n, u64 features) return 0; } +static long vhost_test_set_backend(struct vhost_test *n, unsigned index, int fd) +{ + static void *backend; + + const bool enable = fd != -1; + struct vhost_virtqueue *vq; + int r; + + mutex_lock(&n->dev.mutex); + r = vhost_dev_check_owner(&n->dev); + if (r) + goto err; + + if (index >= VHOST_TEST_VQ_MAX) { + r = -ENOBUFS; + goto err; + } + vq = &n->vqs[index]; + mutex_lock(&vq->mutex); + + /* Verify that ring has been setup correctly. */ + if (!vhost_vq_access_ok(vq)) { + r = -EFAULT; + goto err_vq; + } + if (!enable) { + vhost_poll_stop(&vq->poll); + backend = vhost_vq_get_backend(vq); + vhost_vq_set_backend(vq, NULL); + } else { + vhost_vq_set_backend(vq, backend); + r = vhost_vq_init_access(vq); + if (r == 0) + r = vhost_poll_start(&vq->poll, vq->kick); + } + + mutex_unlock(&vq->mutex); + + if (enable) { + vhost_test_flush_vq(n, index); + } + + mutex_unlock(&n->dev.mutex); + return 0; + +err_vq: + mutex_unlock(&vq->mutex); +err: + mutex_unlock(&n->dev.mutex); + return r; +} + static long vhost_test_ioctl(struct file *f, unsigned int ioctl, unsigned long arg) { + struct vhost_vring_file backend; struct vhost_test *n = f->private_data; void __user *argp = (void __user *)arg; u64 __user *featurep = argp; @@ -277,6 +330,10 @@ static long vhost_test_ioctl(struct file *f, unsigned int ioctl, if (copy_from_user(&test, argp, sizeof test)) return -EFAULT; return vhost_test_run(n, test); + case VHOST_TEST_SET_BACKEND: + if (copy_from_user(&backend, argp, sizeof backend)) + return -EFAULT; + return vhost_test_set_backend(n, backend.index, backend.fd); case VHOST_GET_FEATURES: features = VHOST_FEATURES; if (copy_to_user(featurep, &features, sizeof features)) diff --git a/drivers/vhost/test.h b/drivers/vhost/test.h index 7dd265bfdf81..822bc4bee03a 100644 --- a/drivers/vhost/test.h +++ b/drivers/vhost/test.h @@ -4,5 +4,6 @@ /* Start a given test on the virtio null device. 0 stops all tests. */ #define VHOST_TEST_RUN _IOW(VHOST_VIRTIO, 0x31, int) +#define VHOST_TEST_SET_BACKEND _IOW(VHOST_VIRTIO, 0x32, int) #endif diff --git a/tools/virtio/virtio_test.c b/tools/virtio/virtio_test.c index 4a2b9d11f287..6bc3e172cc9b 100644 --- a/tools/virtio/virtio_test.c +++ b/tools/virtio/virtio_test.c @@ -46,6 +46,9 @@ struct vdev_info { struct vhost_memory *mem; }; +static const struct vhost_vring_file no_backend = { .fd = -1 }, + backend = { .fd = 1 }; + bool vq_notify(struct virtqueue *vq) { struct vq_info *info = vq->priv; @@ -155,10 +158,10 @@ static void wait_for_interrupt(struct vdev_info *dev) } static void run_test(struct vdev_info *dev, struct vq_info *vq, - bool delayed, int batch, int bufs) + bool delayed, int batch, int reset_n, int bufs) { struct scatterlist sl; - long started = 0, completed = 0; + long started = 0, completed = 0, next_reset = reset_n; long completed_before, started_before; int r, test = 1; unsigned len; @@ -171,6 +174,7 @@ static void run_test(struct vdev_info *dev, struct vq_info *vq, completed_before = completed; started_before = started; do { + const bool reset = reset_n && completed > next_reset; if (random_batch) batch = (random() % vq->vring.num) + 1; @@ -200,12 +204,26 @@ static void run_test(struct vdev_info *dev, struct vq_info *vq, if (started >= bufs) r = -1; + if (reset) { + r = ioctl(dev->control, VHOST_TEST_SET_BACKEND, + &no_backend); + assert(!r); + } + /* Flush out completed bufs if any */ while (virtqueue_get_buf(vq->vq, &len)) { ++completed; r = 0; } + if (reset) { + r = ioctl(dev->control, VHOST_TEST_SET_BACKEND, + &backend); + assert(!r); + + while (completed > next_reset) + next_reset += completed; + } } while (r == 0); if (completed == completed_before && started == started_before) ++spurious; @@ -270,6 +288,11 @@ const struct option longopts[] = { .val = 'b', .has_arg = required_argument, }, + { + .name = "reset", + .val = 'r', + .has_arg = optional_argument, + }, { } }; @@ -282,6 +305,7 @@ static void help(void) " [--no-virtio-1]" " [--delayed-interrupt]" " [--batch=random/N]" + " [--reset=N]" "\n"); } @@ -290,7 +314,7 @@ int main(int argc, char **argv) struct vdev_info dev; unsigned long long features = (1ULL << VIRTIO_RING_F_INDIRECT_DESC) | (1ULL << VIRTIO_RING_F_EVENT_IDX) | (1ULL << VIRTIO_F_VERSION_1); - long batch = 1; + long batch = 1, reset = 0; int o; bool delayed = false; @@ -326,6 +350,15 @@ int main(int argc, char **argv) assert(batch < (long)INT_MAX + 1); } break; + case 'r': + if (!optarg) { + reset = 1; + } else { + reset = strtol(optarg, NULL, 10); + assert(reset > 0); + assert(reset < (long)INT_MAX + 1); + } + break; default: assert(0); break; @@ -335,6 +368,6 @@ int main(int argc, char **argv) done: vdev_info_init(&dev, features); vq_info_add(&dev, 256); - run_test(&dev, &dev.vqs[0], delayed, batch, 0x100000); + run_test(&dev, &dev.vqs[0], delayed, batch, reset, 0x100000); return 0; } From 4cfb93935337d8f9680d865b0987583e462cfcc7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= Date: Sat, 18 Apr 2020 12:22:14 +0200 Subject: [PATCH 351/462] tools/virtio: Use __vring_new_virtqueue in virtio_test.c MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As updated in ("2a2d1382fe9d virtio: Add improved queue allocation API") Signed-off-by: Eugenio Pérez Link: https://lore.kernel.org/r/20200418102217.32327-6-eperezma@redhat.com Signed-off-by: Michael S. Tsirkin --- tools/virtio/virtio_test.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/tools/virtio/virtio_test.c b/tools/virtio/virtio_test.c index 6bc3e172cc9b..0a247ba3b899 100644 --- a/tools/virtio/virtio_test.c +++ b/tools/virtio/virtio_test.c @@ -105,10 +105,9 @@ static void vq_info_add(struct vdev_info *dev, int num) assert(r >= 0); memset(info->ring, 0, vring_size(num, 4096)); vring_init(&info->vring, num, info->ring, 4096); - info->vq = vring_new_virtqueue(info->idx, - info->vring.num, 4096, &dev->vdev, - true, false, info->ring, - vq_notify, vq_callback, "test"); + info->vq = + __vring_new_virtqueue(info->idx, info->vring, &dev->vdev, true, + false, vq_notify, vq_callback, "test"); assert(info->vq); info->vq->priv = info; vhost_vq_setup(dev, info); From 6741239260293e46a32a141fb61bf2c062b0b6ed Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= Date: Sat, 18 Apr 2020 12:22:15 +0200 Subject: [PATCH 352/462] tools/virtio: Extract virtqueue initialization in vq_reset MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit So we can reset after that in the main loop. Signed-off-by: Eugenio Pérez Link: https://lore.kernel.org/r/20200418102217.32327-7-eperezma@redhat.com Signed-off-by: Michael S. Tsirkin --- tools/virtio/virtio_test.c | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/tools/virtio/virtio_test.c b/tools/virtio/virtio_test.c index 0a247ba3b899..bc16c818bda3 100644 --- a/tools/virtio/virtio_test.c +++ b/tools/virtio/virtio_test.c @@ -94,6 +94,19 @@ void vhost_vq_setup(struct vdev_info *dev, struct vq_info *info) assert(r >= 0); } +static void vq_reset(struct vq_info *info, int num, struct virtio_device *vdev) +{ + if (info->vq) + vring_del_virtqueue(info->vq); + + memset(info->ring, 0, vring_size(num, 4096)); + vring_init(&info->vring, num, info->ring, 4096); + info->vq = __vring_new_virtqueue(info->idx, info->vring, vdev, true, + false, vq_notify, vq_callback, "test"); + assert(info->vq); + info->vq->priv = info; +} + static void vq_info_add(struct vdev_info *dev, int num) { struct vq_info *info = &dev->vqs[dev->nvqs]; @@ -103,13 +116,7 @@ static void vq_info_add(struct vdev_info *dev, int num) info->call = eventfd(0, EFD_NONBLOCK); r = posix_memalign(&info->ring, 4096, vring_size(num, 4096)); assert(r >= 0); - memset(info->ring, 0, vring_size(num, 4096)); - vring_init(&info->vring, num, info->ring, 4096); - info->vq = - __vring_new_virtqueue(info->idx, info->vring, &dev->vdev, true, - false, vq_notify, vq_callback, "test"); - assert(info->vq); - info->vq->priv = info; + vq_reset(info, num, &dev->vdev); vhost_vq_setup(dev, info); dev->fds[info->idx].fd = info->call; dev->fds[info->idx].events = POLLIN; From 1d8bf5c3a3a1b90c39ccbafb3aa63ffc5196b142 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= Date: Sat, 18 Apr 2020 12:22:16 +0200 Subject: [PATCH 353/462] tools/virtio: Reset index in virtio_test --reset. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This way behavior for vhost is more like a VM. Signed-off-by: Eugenio Pérez Link: https://lore.kernel.org/r/20200418102217.32327-8-eperezma@redhat.com Signed-off-by: Michael S. Tsirkin --- tools/virtio/virtio_test.c | 26 ++++++++++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) diff --git a/tools/virtio/virtio_test.c b/tools/virtio/virtio_test.c index bc16c818bda3..82902fc3ba2a 100644 --- a/tools/virtio/virtio_test.c +++ b/tools/virtio/virtio_test.c @@ -48,6 +48,7 @@ struct vdev_info { static const struct vhost_vring_file no_backend = { .fd = -1 }, backend = { .fd = 1 }; +static const struct vhost_vring_state null_state = {}; bool vq_notify(struct virtqueue *vq) { @@ -173,14 +174,19 @@ static void run_test(struct vdev_info *dev, struct vq_info *vq, unsigned len; long long spurious = 0; const bool random_batch = batch == RANDOM_BATCH; + r = ioctl(dev->control, VHOST_TEST_RUN, &test); assert(r >= 0); + if (!reset_n) { + next_reset = INT_MAX; + } + for (;;) { virtqueue_disable_cb(vq->vq); completed_before = completed; started_before = started; do { - const bool reset = reset_n && completed > next_reset; + const bool reset = completed > next_reset; if (random_batch) batch = (random() % vq->vring.num) + 1; @@ -223,10 +229,24 @@ static void run_test(struct vdev_info *dev, struct vq_info *vq, } if (reset) { + struct vhost_vring_state s = { .index = 0 }; + + vq_reset(vq, vq->vring.num, &dev->vdev); + + r = ioctl(dev->control, VHOST_GET_VRING_BASE, + &s); + assert(!r); + + s.num = 0; + r = ioctl(dev->control, VHOST_SET_VRING_BASE, + &null_state); + assert(!r); + r = ioctl(dev->control, VHOST_TEST_SET_BACKEND, &backend); assert(!r); + started = completed; while (completed > next_reset) next_reset += completed; } @@ -248,7 +268,9 @@ static void run_test(struct vdev_info *dev, struct vq_info *vq, test = 0; r = ioctl(dev->control, VHOST_TEST_RUN, &test); assert(r >= 0); - fprintf(stderr, "spurious wakeups: 0x%llx\n", spurious); + fprintf(stderr, + "spurious wakeups: 0x%llx started=0x%lx completed=0x%lx\n", + spurious, started, completed); } const char optstring[] = "h"; From cb91909e48a4809261ef4e967464e2009b214f06 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= Date: Sat, 18 Apr 2020 12:22:17 +0200 Subject: [PATCH 354/462] tools/virtio: Use tools/include/list.h instead of stubs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It should not make any significant difference but reduce stub code. Signed-off-by: Eugenio Pérez Link: https://lore.kernel.org/r/20200418102217.32327-9-eperezma@redhat.com Signed-off-by: Michael S. Tsirkin --- tools/virtio/linux/kernel.h | 7 +------ tools/virtio/linux/virtio.h | 5 ++--- tools/virtio/virtio_test.c | 1 + tools/virtio/vringh_test.c | 2 ++ 4 files changed, 6 insertions(+), 9 deletions(-) diff --git a/tools/virtio/linux/kernel.h b/tools/virtio/linux/kernel.h index 6683b4a70b05..caab980211a6 100644 --- a/tools/virtio/linux/kernel.h +++ b/tools/virtio/linux/kernel.h @@ -11,6 +11,7 @@ #include #include +#include #include #include #include @@ -135,10 +136,4 @@ static inline void free_page(unsigned long addr) (void) (&_min1 == &_min2); \ _min1 < _min2 ? _min1 : _min2; }) -/* TODO: empty stubs for now. Broken but enough for virtio_ring.c */ -#define list_add_tail(a, b) do {} while (0) -#define list_del(a) do {} while (0) -#define list_for_each_entry(a, b, c) while (0) -/* end of stubs */ - #endif /* KERNEL_H */ diff --git a/tools/virtio/linux/virtio.h b/tools/virtio/linux/virtio.h index b751350d4ce8..5d90254ddae4 100644 --- a/tools/virtio/linux/virtio.h +++ b/tools/virtio/linux/virtio.h @@ -11,12 +11,11 @@ struct device { struct virtio_device { struct device dev; u64 features; + struct list_head vqs; }; struct virtqueue { - /* TODO: commented as list macros are empty stubs for now. - * Broken but enough for virtio_ring.c - * struct list_head list; */ + struct list_head list; void (*callback)(struct virtqueue *vq); const char *name; struct virtio_device *vdev; diff --git a/tools/virtio/virtio_test.c b/tools/virtio/virtio_test.c index 82902fc3ba2a..cb3f29c09aff 100644 --- a/tools/virtio/virtio_test.c +++ b/tools/virtio/virtio_test.c @@ -129,6 +129,7 @@ static void vdev_info_init(struct vdev_info* dev, unsigned long long features) int r; memset(dev, 0, sizeof *dev); dev->vdev.features = features; + INIT_LIST_HEAD(&dev->vdev.vqs); dev->buf_size = 1024; dev->buf = malloc(dev->buf_size); assert(dev->buf); diff --git a/tools/virtio/vringh_test.c b/tools/virtio/vringh_test.c index 293653463303..fa87b58bd5fa 100644 --- a/tools/virtio/vringh_test.c +++ b/tools/virtio/vringh_test.c @@ -307,6 +307,7 @@ static int parallel_test(u64 features, close(to_host[0]); gvdev.vdev.features = features; + INIT_LIST_HEAD(&gvdev.vdev.vqs); gvdev.to_host_fd = to_host[1]; gvdev.notifies = 0; @@ -453,6 +454,7 @@ int main(int argc, char *argv[]) getrange = getrange_iov; vdev.features = 0; + INIT_LIST_HEAD(&vdev.vqs); while (argv[1]) { if (strcmp(argv[1], "--indirect") == 0) From af28dfacbe00d53df5dec2bf50640df33138b1fe Mon Sep 17 00:00:00 2001 From: Igor Mammedov Date: Mon, 22 Jun 2020 12:08:30 -0400 Subject: [PATCH 355/462] kvm: lapic: fix broken vcpu hotplug Guest fails to online hotplugged CPU with error smpboot: do_boot_cpu failed(-1) to wakeup CPU#4 It's caused by the fact that kvm_apic_set_state(), which used to call recalculate_apic_map() unconditionally and pulled hotplugged CPU into apic map, is updating map conditionally on state changes. In this case the APIC map is not considered dirty and the is not updated. Fix the issue by forcing unconditional update from kvm_apic_set_state(), like it used to be. Fixes: 4abaffce4d25a ("KVM: LAPIC: Recalculate apic map in batch") Cc: stable@vger.kernel.org Signed-off-by: Igor Mammedov Message-Id: <20200622160830.426022-1-imammedo@redhat.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/lapic.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 34a7e0533dad..6dc177da19da 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -2567,6 +2567,7 @@ int kvm_apic_set_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s) } memcpy(vcpu->arch.apic->regs, s->regs, sizeof(*s)); + apic->vcpu->kvm->arch.apic_map_dirty = true; kvm_recalculate_apic_map(vcpu->kvm); kvm_apic_set_version(vcpu); From c1d869d64a1955817c4d6fff08ecbbe8e59d36f8 Mon Sep 17 00:00:00 2001 From: Mark Zhang Date: Sun, 21 Jun 2020 14:00:00 +0300 Subject: [PATCH 356/462] RDMA/counter: Query a counter before release Query a dynamically-allocated counter before release it, to update it's hwcounters and log all of them into history data. Otherwise all values of these hwcounters will be lost. Fixes: f34a55e497e8 ("RDMA/core: Get sum value of all counters when perform a sysfs stat read") Link: https://lore.kernel.org/r/20200621110000.56059-1-leon@kernel.org Signed-off-by: Mark Zhang Reviewed-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/counters.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/core/counters.c b/drivers/infiniband/core/counters.c index 2257d7f7810f..738d1faf4bba 100644 --- a/drivers/infiniband/core/counters.c +++ b/drivers/infiniband/core/counters.c @@ -202,7 +202,7 @@ static int __rdma_counter_unbind_qp(struct ib_qp *qp) return ret; } -static void counter_history_stat_update(const struct rdma_counter *counter) +static void counter_history_stat_update(struct rdma_counter *counter) { struct ib_device *dev = counter->device; struct rdma_port_counter *port_counter; @@ -212,6 +212,8 @@ static void counter_history_stat_update(const struct rdma_counter *counter) if (!port_counter->hstats) return; + rdma_counter_query_stats(counter); + for (i = 0; i < counter->stats->num_counters; i++) port_counter->hstats->value[i] += counter->stats->value[i]; } From 44d527170731c75587e95052f3eea72b8c651daf Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 22 Jun 2020 16:37:42 +0200 Subject: [PATCH 357/462] KVM: LAPIC: ensure APIC map is up to date on concurrent update requests The following race can cause lost map update events: cpu1 cpu2 apic_map_dirty = true ------------------------------------------------------------ kvm_recalculate_apic_map: pass check mutex_lock(&kvm->arch.apic_map_lock); if (!kvm->arch.apic_map_dirty) and in process of updating map ------------------------------------------------------------- other calls to apic_map_dirty = true might be too late for affected cpu ------------------------------------------------------------- apic_map_dirty = false ------------------------------------------------------------- kvm_recalculate_apic_map: bail out on if (!kvm->arch.apic_map_dirty) To fix it, record the beginning of an update of the APIC map in apic_map_dirty. If another APIC map change switches apic_map_dirty back to DIRTY during the update, kvm_recalculate_apic_map should not make it CLEAN, and the other caller will go through the slow path. Reported-by: Igor Mammedov Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 2 +- arch/x86/kvm/lapic.c | 51 ++++++++++++++++++++------------- 2 files changed, 32 insertions(+), 21 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index f8998e97457f..f852ee350beb 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -943,7 +943,7 @@ struct kvm_arch { atomic_t vapics_in_nmi_mode; struct mutex apic_map_lock; struct kvm_apic_map *apic_map; - bool apic_map_dirty; + atomic_t apic_map_dirty; bool apic_access_page_done; unsigned long apicv_inhibit_reasons; diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 6dc177da19da..5bf72fc86a8e 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -169,6 +169,18 @@ static void kvm_apic_map_free(struct rcu_head *rcu) kvfree(map); } +/* + * CLEAN -> DIRTY and UPDATE_IN_PROGRESS -> DIRTY changes happen without a lock. + * + * DIRTY -> UPDATE_IN_PROGRESS and UPDATE_IN_PROGRESS -> CLEAN happen with + * apic_map_lock_held. + */ +enum { + CLEAN, + UPDATE_IN_PROGRESS, + DIRTY +}; + void kvm_recalculate_apic_map(struct kvm *kvm) { struct kvm_apic_map *new, *old = NULL; @@ -176,17 +188,17 @@ void kvm_recalculate_apic_map(struct kvm *kvm) int i; u32 max_id = 255; /* enough space for any xAPIC ID */ - if (!kvm->arch.apic_map_dirty) { - /* - * Read kvm->arch.apic_map_dirty before - * kvm->arch.apic_map - */ - smp_rmb(); + /* Read kvm->arch.apic_map_dirty before kvm->arch.apic_map. */ + if (atomic_read_acquire(&kvm->arch.apic_map_dirty) == CLEAN) return; - } mutex_lock(&kvm->arch.apic_map_lock); - if (!kvm->arch.apic_map_dirty) { + /* + * Read kvm->arch.apic_map_dirty before kvm->arch.apic_map + * (if clean) or the APIC registers (if dirty). + */ + if (atomic_cmpxchg_acquire(&kvm->arch.apic_map_dirty, + DIRTY, UPDATE_IN_PROGRESS) == CLEAN) { /* Someone else has updated the map. */ mutex_unlock(&kvm->arch.apic_map_lock); return; @@ -256,11 +268,11 @@ out: lockdep_is_held(&kvm->arch.apic_map_lock)); rcu_assign_pointer(kvm->arch.apic_map, new); /* - * Write kvm->arch.apic_map before - * clearing apic->apic_map_dirty + * Write kvm->arch.apic_map before clearing apic->apic_map_dirty. + * If another update has come in, leave it DIRTY. */ - smp_wmb(); - kvm->arch.apic_map_dirty = false; + atomic_cmpxchg_release(&kvm->arch.apic_map_dirty, + UPDATE_IN_PROGRESS, CLEAN); mutex_unlock(&kvm->arch.apic_map_lock); if (old) @@ -282,20 +294,20 @@ static inline void apic_set_spiv(struct kvm_lapic *apic, u32 val) else static_key_slow_inc(&apic_sw_disabled.key); - apic->vcpu->kvm->arch.apic_map_dirty = true; + atomic_set_release(&apic->vcpu->kvm->arch.apic_map_dirty, DIRTY); } } static inline void kvm_apic_set_xapic_id(struct kvm_lapic *apic, u8 id) { kvm_lapic_set_reg(apic, APIC_ID, id << 24); - apic->vcpu->kvm->arch.apic_map_dirty = true; + atomic_set_release(&apic->vcpu->kvm->arch.apic_map_dirty, DIRTY); } static inline void kvm_apic_set_ldr(struct kvm_lapic *apic, u32 id) { kvm_lapic_set_reg(apic, APIC_LDR, id); - apic->vcpu->kvm->arch.apic_map_dirty = true; + atomic_set_release(&apic->vcpu->kvm->arch.apic_map_dirty, DIRTY); } static inline u32 kvm_apic_calc_x2apic_ldr(u32 id) @@ -311,7 +323,7 @@ static inline void kvm_apic_set_x2apic_id(struct kvm_lapic *apic, u32 id) kvm_lapic_set_reg(apic, APIC_ID, id); kvm_lapic_set_reg(apic, APIC_LDR, ldr); - apic->vcpu->kvm->arch.apic_map_dirty = true; + atomic_set_release(&apic->vcpu->kvm->arch.apic_map_dirty, DIRTY); } static inline int apic_lvt_enabled(struct kvm_lapic *apic, int lvt_type) @@ -1976,7 +1988,7 @@ int kvm_lapic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val) case APIC_DFR: if (!apic_x2apic_mode(apic)) { kvm_lapic_set_reg(apic, APIC_DFR, val | 0x0FFFFFFF); - apic->vcpu->kvm->arch.apic_map_dirty = true; + atomic_set_release(&apic->vcpu->kvm->arch.apic_map_dirty, DIRTY); } else ret = 1; break; @@ -2232,7 +2244,7 @@ void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value) static_key_slow_dec_deferred(&apic_hw_disabled); } else { static_key_slow_inc(&apic_hw_disabled.key); - vcpu->kvm->arch.apic_map_dirty = true; + atomic_set_release(&apic->vcpu->kvm->arch.apic_map_dirty, DIRTY); } } @@ -2273,7 +2285,6 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event) if (!apic) return; - vcpu->kvm->arch.apic_map_dirty = false; /* Stop the timer in case it's a reset to an active apic */ hrtimer_cancel(&apic->lapic_timer.timer); @@ -2567,7 +2578,7 @@ int kvm_apic_set_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s) } memcpy(vcpu->arch.apic->regs, s->regs, sizeof(*s)); - apic->vcpu->kvm->arch.apic_map_dirty = true; + atomic_set_release(&apic->vcpu->kvm->arch.apic_map_dirty, DIRTY); kvm_recalculate_apic_map(vcpu->kvm); kvm_apic_set_version(vcpu); From 312d16c7c06174f44f96ef4a61c2936e6e360414 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Mon, 22 Jun 2020 17:14:35 +0200 Subject: [PATCH 358/462] KVM: x86/mmu: Avoid mixing gpa_t with gfn_t in walk_addr_generic() translate_gpa() returns a GPA, assigning it to 'real_gfn' seems obviously wrong. There is no real issue because both 'gpa_t' and 'gfn_t' are u64 and we don't use the value in 'real_gfn' as a GFN, we do real_gfn = gpa_to_gfn(real_gfn); instead. 'If you see a "buffalo" sign on an elephant's cage, do not trust your eyes', but let's fix it for good. No functional change intended. Signed-off-by: Vitaly Kuznetsov Message-Id: <20200622151435.752560-1-vkuznets@redhat.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu/paging_tmpl.h | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/arch/x86/kvm/mmu/paging_tmpl.h b/arch/x86/kvm/mmu/paging_tmpl.h index a6d484ea110b..58234bfaca07 100644 --- a/arch/x86/kvm/mmu/paging_tmpl.h +++ b/arch/x86/kvm/mmu/paging_tmpl.h @@ -360,7 +360,6 @@ retry_walk: ++walker->level; do { - gfn_t real_gfn; unsigned long host_addr; pt_access = pte_access; @@ -375,7 +374,7 @@ retry_walk: walker->table_gfn[walker->level - 1] = table_gfn; walker->pte_gpa[walker->level - 1] = pte_gpa; - real_gfn = mmu->translate_gpa(vcpu, gfn_to_gpa(table_gfn), + real_gpa = mmu->translate_gpa(vcpu, gfn_to_gpa(table_gfn), nested_access, &walker->fault); @@ -389,12 +388,10 @@ retry_walk: * information to fix the exit_qualification or exit_info_1 * fields. */ - if (unlikely(real_gfn == UNMAPPED_GVA)) + if (unlikely(real_gpa == UNMAPPED_GVA)) return 0; - real_gfn = gpa_to_gfn(real_gfn); - - host_addr = kvm_vcpu_gfn_to_hva_prot(vcpu, real_gfn, + host_addr = kvm_vcpu_gfn_to_hva_prot(vcpu, gpa_to_gfn(real_gpa), &walker->pte_writable[walker->level - 1]); if (unlikely(kvm_is_error_hva(host_addr))) goto error; From 6eefa839c4dddf2149e9f5f6f1aa3e1191c8db9c Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Sun, 21 Jun 2020 14:59:59 +0300 Subject: [PATCH 359/462] RDMA/mlx5: Protect from kernel crash if XRC_TGT doesn't have udata Don't deref udata if it is NULL BUG: kernel NULL pointer dereference, address: 0000000000000030 #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page PGD 0 P4D 0 Oops: 0000 SMP PTI CPU: 2 PID: 1592 Comm: python3 Not tainted 5.7.0-rc6+ #1 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.12.1-0-ga5cab58e9a3f-prebuilt.qemu.org 04/01/2014 RIP: 0010:create_qp+0x39e/0xae0 [mlx5_ib] Code: c0 0d 00 00 bf 10 01 00 00 e8 be a9 e4 e0 48 85 c0 49 89 c2 0f 84 0c 07 00 00 41 8b 85 74 63 01 00 0f c8 a9 00 00 00 10 74 0a <41> 8b 46 30 0f c8 41 89 42 14 41 8b 52 18 41 0f b6 4a 1c 0f ca 89 RSP: 0018:ffffc9000067f8b0 EFLAGS: 00010206 RAX: 0000000010170000 RBX: ffff888441313000 RCX: 0000000000000000 RDX: 0000000000000200 RSI: 0000000000000000 RDI: ffff88845b1d4400 RBP: ffffc9000067fa60 R08: 0000000000000200 R09: ffff88845b1d4200 R10: ffff88845b1d4200 R11: ffff888441313000 R12: ffffc9000067f950 R13: ffff88846ac00140 R14: 0000000000000000 R15: ffff88846c2bc000 FS: 00007faa1a3c0540(0000) GS:ffff88846fd00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000030 CR3: 0000000446dca003 CR4: 0000000000760ea0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 PKRU: 55555554 Call Trace: ? __switch_to_asm+0x40/0x70 ? __switch_to_asm+0x34/0x70 mlx5_ib_create_qp+0x897/0xfa0 [mlx5_ib] ib_create_qp+0x9e/0x300 [ib_core] create_qp+0x92d/0xb20 [ib_uverbs] ? ib_uverbs_cq_event_handler+0x30/0x30 [ib_uverbs] ? release_resource+0x30/0x30 ib_uverbs_create_qp+0xc4/0xe0 [ib_uverbs] ib_uverbs_handler_UVERBS_METHOD_INVOKE_WRITE+0xc8/0xf0 [ib_uverbs] ib_uverbs_run_method+0x223/0x770 [ib_uverbs] ? track_pfn_remap+0xa7/0x100 ? uverbs_disassociate_api+0xd0/0xd0 [ib_uverbs] ? remap_pfn_range+0x358/0x490 ib_uverbs_cmd_verbs.isra.6+0x19b/0x370 [ib_uverbs] ? rdma_umap_priv_init+0x82/0xe0 [ib_core] ? vm_mmap_pgoff+0xec/0x120 ib_uverbs_ioctl+0xc0/0x120 [ib_uverbs] ksys_ioctl+0x92/0xb0 __x64_sys_ioctl+0x16/0x20 do_syscall_64+0x48/0x130 entry_SYSCALL_64_after_hwframe+0x44/0xa9 Fixes: e383085c2425 ("RDMA/mlx5: Set ECE options during QP create") Link: https://lore.kernel.org/r/20200621115959.60126-1-leon@kernel.org Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/qp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index a7fcb00e37a5..f939c9b769f0 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -1862,7 +1862,7 @@ static int create_xrc_tgt_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, if (!in) return -ENOMEM; - if (MLX5_CAP_GEN(mdev, ece_support)) + if (MLX5_CAP_GEN(mdev, ece_support) && ucmd) MLX5_SET(create_qp_in, in, ece, ucmd->ece_options); qpc = MLX5_ADDR_OF(create_qp_in, in, qpc); From 116a1b9f1cb769b83e5adff323f977a62b1dcb2e Mon Sep 17 00:00:00 2001 From: Shay Drory Date: Sun, 21 Jun 2020 13:47:35 +0300 Subject: [PATCH 360/462] IB/mad: Fix use after free when destroying MAD agent Currently, when RMPP MADs are processed while the MAD agent is destroyed, it could result in use after free of rmpp_recv, as decribed below: cpu-0 cpu-1 ----- ----- ib_mad_recv_done() ib_mad_complete_recv() ib_process_rmpp_recv_wc() unregister_mad_agent() ib_cancel_rmpp_recvs() cancel_delayed_work() process_rmpp_data() start_rmpp() queue_delayed_work(rmpp_recv->cleanup_work) destroy_rmpp_recv() free_rmpp_recv() cleanup_work()[1] spin_lock_irqsave(&rmpp_recv->agent->lock) <-- use after free [1] cleanup_work() == recv_cleanup_handler Fix it by waiting for the MAD agent reference count becoming zero before calling to ib_cancel_rmpp_recvs(). Fixes: 9a41e38a467c ("IB/mad: Use IDR for agent IDs") Link: https://lore.kernel.org/r/20200621104738.54850-2-leon@kernel.org Signed-off-by: Shay Drory Reviewed-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/mad.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c index 5e080191a725..a09f8e3c7f3f 100644 --- a/drivers/infiniband/core/mad.c +++ b/drivers/infiniband/core/mad.c @@ -509,10 +509,10 @@ static void unregister_mad_agent(struct ib_mad_agent_private *mad_agent_priv) xa_erase(&ib_mad_clients, mad_agent_priv->agent.hi_tid); flush_workqueue(port_priv->wq); - ib_cancel_rmpp_recvs(mad_agent_priv); deref_mad_agent(mad_agent_priv); wait_for_completion(&mad_agent_priv->comp); + ib_cancel_rmpp_recvs(mad_agent_priv); ib_mad_agent_security_cleanup(&mad_agent_priv->agent); From b0c34bde72a59c05e826bf0a5aeca0d73f38f791 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Sun, 21 Jun 2020 17:06:30 +0200 Subject: [PATCH 361/462] MAINTAINERS: update email address for Felix Fietkau The old address has been bouncing for a while now Signed-off-by: Felix Fietkau Signed-off-by: David S. Miller --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index a09e61268c08..352904042a0d 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -10808,7 +10808,7 @@ F: Documentation/devicetree/bindings/dma/mtk-* F: drivers/dma/mediatek/ MEDIATEK ETHERNET DRIVER -M: Felix Fietkau +M: Felix Fietkau M: John Crispin M: Sean Wang M: Mark Lee From 2dbebf7ae1ed9a420d954305e2c9d5ed39ec57c3 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Mon, 22 Jun 2020 14:58:29 -0700 Subject: [PATCH 362/462] KVM: nVMX: Plumb L2 GPA through to PML emulation Explicitly pass the L2 GPA to kvm_arch_write_log_dirty(), which for all intents and purposes is vmx_write_pml_buffer(), instead of having the latter pull the GPA from vmcs.GUEST_PHYSICAL_ADDRESS. If the dirty bit update is the result of KVM emulation (rare for L2), then the GPA in the VMCS may be stale and/or hold a completely unrelated GPA. Fixes: c5f983f6e8455 ("nVMX: Implement emulated Page Modification Logging") Cc: stable@vger.kernel.org Signed-off-by: Sean Christopherson Message-Id: <20200622215832.22090-2-sean.j.christopherson@intel.com> Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 2 +- arch/x86/kvm/mmu.h | 2 +- arch/x86/kvm/mmu/mmu.c | 4 ++-- arch/x86/kvm/mmu/paging_tmpl.h | 7 ++++--- arch/x86/kvm/vmx/vmx.c | 6 +++--- 5 files changed, 11 insertions(+), 10 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index f852ee350beb..be5363b21540 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1220,7 +1220,7 @@ struct kvm_x86_ops { void (*enable_log_dirty_pt_masked)(struct kvm *kvm, struct kvm_memory_slot *slot, gfn_t offset, unsigned long mask); - int (*write_log_dirty)(struct kvm_vcpu *vcpu); + int (*write_log_dirty)(struct kvm_vcpu *vcpu, gpa_t l2_gpa); /* pmu operations of sub-arch */ const struct kvm_pmu_ops *pmu_ops; diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h index 0ad06bfe2c2c..444bb9c54548 100644 --- a/arch/x86/kvm/mmu.h +++ b/arch/x86/kvm/mmu.h @@ -222,7 +222,7 @@ void kvm_mmu_gfn_disallow_lpage(struct kvm_memory_slot *slot, gfn_t gfn); void kvm_mmu_gfn_allow_lpage(struct kvm_memory_slot *slot, gfn_t gfn); bool kvm_mmu_slot_gfn_write_protect(struct kvm *kvm, struct kvm_memory_slot *slot, u64 gfn); -int kvm_arch_write_log_dirty(struct kvm_vcpu *vcpu); +int kvm_arch_write_log_dirty(struct kvm_vcpu *vcpu, gpa_t l2_gpa); int kvm_mmu_post_init_vm(struct kvm *kvm); void kvm_mmu_pre_destroy_vm(struct kvm *kvm); diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index fdd05c233308..76817d13c86e 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -1745,10 +1745,10 @@ void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm, * Emulate arch specific page modification logging for the * nested hypervisor */ -int kvm_arch_write_log_dirty(struct kvm_vcpu *vcpu) +int kvm_arch_write_log_dirty(struct kvm_vcpu *vcpu, gpa_t l2_gpa) { if (kvm_x86_ops.write_log_dirty) - return kvm_x86_ops.write_log_dirty(vcpu); + return kvm_x86_ops.write_log_dirty(vcpu, l2_gpa); return 0; } diff --git a/arch/x86/kvm/mmu/paging_tmpl.h b/arch/x86/kvm/mmu/paging_tmpl.h index 58234bfaca07..bd70ece1ef8b 100644 --- a/arch/x86/kvm/mmu/paging_tmpl.h +++ b/arch/x86/kvm/mmu/paging_tmpl.h @@ -235,7 +235,7 @@ static inline unsigned FNAME(gpte_access)(u64 gpte) static int FNAME(update_accessed_dirty_bits)(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, struct guest_walker *walker, - int write_fault) + gpa_t addr, int write_fault) { unsigned level, index; pt_element_t pte, orig_pte; @@ -260,7 +260,7 @@ static int FNAME(update_accessed_dirty_bits)(struct kvm_vcpu *vcpu, !(pte & PT_GUEST_DIRTY_MASK)) { trace_kvm_mmu_set_dirty_bit(table_gfn, index, sizeof(pte)); #if PTTYPE == PTTYPE_EPT - if (kvm_arch_write_log_dirty(vcpu)) + if (kvm_arch_write_log_dirty(vcpu, addr)) return -EINVAL; #endif pte |= PT_GUEST_DIRTY_MASK; @@ -454,7 +454,8 @@ retry_walk: (PT_GUEST_DIRTY_SHIFT - PT_GUEST_ACCESSED_SHIFT); if (unlikely(!accessed_dirty)) { - ret = FNAME(update_accessed_dirty_bits)(vcpu, mmu, walker, write_fault); + ret = FNAME(update_accessed_dirty_bits)(vcpu, mmu, walker, + addr, write_fault); if (unlikely(ret < 0)) goto error; else if (ret) diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index b1a23ad986ff..ad0ac8bc85d9 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -7500,11 +7500,11 @@ static void vmx_flush_log_dirty(struct kvm *kvm) kvm_flush_pml_buffers(kvm); } -static int vmx_write_pml_buffer(struct kvm_vcpu *vcpu) +static int vmx_write_pml_buffer(struct kvm_vcpu *vcpu, gpa_t gpa) { struct vmcs12 *vmcs12; struct vcpu_vmx *vmx = to_vmx(vcpu); - gpa_t gpa, dst; + gpa_t dst; if (is_guest_mode(vcpu)) { WARN_ON_ONCE(vmx->nested.pml_full); @@ -7523,7 +7523,7 @@ static int vmx_write_pml_buffer(struct kvm_vcpu *vcpu) return 1; } - gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS) & ~0xFFFull; + gpa &= ~0xFFFull; dst = vmcs12->pml_address + sizeof(u64) * vmcs12->guest_pml_index; if (kvm_write_guest_page(vcpu->kvm, gpa_to_gfn(dst), &gpa, From 6d61f483f148b856d47a6c96d5d84054d5a9f849 Mon Sep 17 00:00:00 2001 From: Dejin Zheng Date: Sat, 20 Jun 2020 22:55:34 +0800 Subject: [PATCH 363/462] net: phy: smsc: fix printing too many logs Commit 7ae7ad2f11ef47 ("net: phy: smsc: use phy_read_poll_timeout() to simplify the code") will print a lot of logs as follows when Ethernet cable is not connected: [ 4.473105] SMSC LAN8710/LAN8720 2188000.ethernet-1:00: lan87xx_read_status failed: -110 When wait 640 ms for check ENERGYON bit, the timeout should not be regarded as an actual error and an error message also should not be printed. due to a hardware bug in LAN87XX device, it leads to unstable detection of plugging in Ethernet cable when LAN87xx is in Energy Detect Power-Down mode. the workaround for it involves, when the link is down, and at each read_status() call: - disable EDPD mode, forcing the PHY out of low-power mode - waiting 640ms to see if we have any energy detected from the media - re-enable entry to EDPD mode This is presumably enough to allow the PHY to notice that a cable is connected, and resume normal operations to negotiate with the partner. The problem is that when no media is detected, the 640ms wait times out and this commit was modified to prints an error message. it is an inappropriate conversion by used phy_read_poll_timeout() to introduce this bug. so fix this issue by use read_poll_timeout() to replace phy_read_poll_timeout(). Fixes: 7ae7ad2f11ef47 ("net: phy: smsc: use phy_read_poll_timeout() to simplify the code") Reported-by: Kevin Groeneveld Signed-off-by: Dejin Zheng Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/phy/smsc.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/net/phy/smsc.c b/drivers/net/phy/smsc.c index 93da7d3d0954..74568ae16125 100644 --- a/drivers/net/phy/smsc.c +++ b/drivers/net/phy/smsc.c @@ -122,10 +122,13 @@ static int lan87xx_read_status(struct phy_device *phydev) if (rc < 0) return rc; - /* Wait max 640 ms to detect energy */ - phy_read_poll_timeout(phydev, MII_LAN83C185_CTRL_STATUS, rc, - rc & MII_LAN83C185_ENERGYON, 10000, - 640000, true); + /* Wait max 640 ms to detect energy and the timeout is not + * an actual error. + */ + read_poll_timeout(phy_read, rc, + rc & MII_LAN83C185_ENERGYON || rc < 0, + 10000, 640000, true, phydev, + MII_LAN83C185_CTRL_STATUS); if (rc < 0) return rc; From f3fe412b0a634286a6a3753c3f9ff201e6bec716 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Sun, 21 Jun 2020 11:29:17 +0300 Subject: [PATCH 364/462] mlxsw: spectrum: Do not rely on machine endianness The second commit cited below performed a cast of 'u32 buffsize' to '(u16 *)' when calling mlxsw_sp_port_headroom_8x_adjust(): mlxsw_sp_port_headroom_8x_adjust(mlxsw_sp_port, (u16 *) &buffsize); Colin noted that this will behave differently on big endian architectures compared to little endian architectures. Fix this by following Colin's suggestion and have the function accept and return 'u32' instead of passing the current size by reference. Fixes: da382875c616 ("mlxsw: spectrum: Extend to support Spectrum-3 ASIC") Fixes: 60833d54d56c ("mlxsw: spectrum: Adjust headroom buffers for 8x ports") Signed-off-by: Ido Schimmel Reported-by: Colin Ian King Suggested-by: Colin Ian King Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum.c | 4 ++-- drivers/net/ethernet/mellanox/mlxsw/spectrum.h | 8 +++----- drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c | 2 +- drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c | 2 +- 4 files changed, 7 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index 55af877763ed..029ea344ad65 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -978,10 +978,10 @@ int __mlxsw_sp_port_headroom_set(struct mlxsw_sp_port *mlxsw_sp_port, int mtu, lossy = !(pfc || pause_en); thres_cells = mlxsw_sp_pg_buf_threshold_get(mlxsw_sp, mtu); - mlxsw_sp_port_headroom_8x_adjust(mlxsw_sp_port, &thres_cells); + thres_cells = mlxsw_sp_port_headroom_8x_adjust(mlxsw_sp_port, thres_cells); delay_cells = mlxsw_sp_pg_buf_delay_get(mlxsw_sp, mtu, delay, pfc, pause_en); - mlxsw_sp_port_headroom_8x_adjust(mlxsw_sp_port, &delay_cells); + delay_cells = mlxsw_sp_port_headroom_8x_adjust(mlxsw_sp_port, delay_cells); total_cells = thres_cells + delay_cells; taken_headroom_cells += total_cells; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h index 6e87457dd635..3abe3e7d89bc 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h @@ -374,17 +374,15 @@ mlxsw_sp_port_vlan_find_by_vid(const struct mlxsw_sp_port *mlxsw_sp_port, return NULL; } -static inline void +static inline u32 mlxsw_sp_port_headroom_8x_adjust(const struct mlxsw_sp_port *mlxsw_sp_port, - u16 *p_size) + u32 size_cells) { /* Ports with eight lanes use two headroom buffers between which the * configured headroom size is split. Therefore, multiply the calculated * headroom size by two. */ - if (mlxsw_sp_port->mapping.width != 8) - return; - *p_size *= 2; + return mlxsw_sp_port->mapping.width == 8 ? 2 * size_cells : size_cells; } enum mlxsw_sp_flood_type { diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c index f25a8b084b4b..6f84557a5a6f 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c @@ -312,7 +312,7 @@ static int mlxsw_sp_port_pb_init(struct mlxsw_sp_port *mlxsw_sp_port) if (i == MLXSW_SP_PB_UNUSED) continue; - mlxsw_sp_port_headroom_8x_adjust(mlxsw_sp_port, &size); + size = mlxsw_sp_port_headroom_8x_adjust(mlxsw_sp_port, size); mlxsw_reg_pbmc_lossy_buffer_pack(pbmc_pl, i, size); } mlxsw_reg_pbmc_lossy_buffer_pack(pbmc_pl, diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c index f843545d3478..92351a79addc 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c @@ -782,7 +782,7 @@ mlxsw_sp_span_port_buffer_update(struct mlxsw_sp_port *mlxsw_sp_port, u16 mtu) speed = 0; buffsize = mlxsw_sp_span_buffsize_get(mlxsw_sp, speed, mtu); - mlxsw_sp_port_headroom_8x_adjust(mlxsw_sp_port, (u16 *) &buffsize); + buffsize = mlxsw_sp_port_headroom_8x_adjust(mlxsw_sp_port, buffsize); mlxsw_reg_sbib_pack(sbib_pl, mlxsw_sp_port->local_port, buffsize); return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sbib), sbib_pl); } From b835a71ef64a61383c414d6bf2896d2c0161deca Mon Sep 17 00:00:00 2001 From: Tuomas Tynkkynen Date: Sun, 21 Jun 2020 13:43:26 +0300 Subject: [PATCH 365/462] usbnet: smsc95xx: Fix use-after-free after removal Syzbot reports an use-after-free in workqueue context: BUG: KASAN: use-after-free in mutex_unlock+0x19/0x40 kernel/locking/mutex.c:737 mutex_unlock+0x19/0x40 kernel/locking/mutex.c:737 __smsc95xx_mdio_read drivers/net/usb/smsc95xx.c:217 [inline] smsc95xx_mdio_read+0x583/0x870 drivers/net/usb/smsc95xx.c:278 check_carrier+0xd1/0x2e0 drivers/net/usb/smsc95xx.c:644 process_one_work+0x777/0xf90 kernel/workqueue.c:2274 worker_thread+0xa8f/0x1430 kernel/workqueue.c:2420 kthread+0x2df/0x300 kernel/kthread.c:255 It looks like that smsc95xx_unbind() is freeing the structures that are still in use by the concurrently running workqueue callback. Thus switch to using cancel_delayed_work_sync() to ensure the work callback really is no longer active. Reported-by: syzbot+29dc7d4ae19b703ff947@syzkaller.appspotmail.com Signed-off-by: Tuomas Tynkkynen Signed-off-by: David S. Miller --- drivers/net/usb/smsc95xx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/usb/smsc95xx.c b/drivers/net/usb/smsc95xx.c index 355be77f4241..3cf4dc3433f9 100644 --- a/drivers/net/usb/smsc95xx.c +++ b/drivers/net/usb/smsc95xx.c @@ -1324,7 +1324,7 @@ static void smsc95xx_unbind(struct usbnet *dev, struct usb_interface *intf) struct smsc95xx_priv *pdata = (struct smsc95xx_priv *)(dev->data[0]); if (pdata) { - cancel_delayed_work(&pdata->carrier_check); + cancel_delayed_work_sync(&pdata->carrier_check); netif_dbg(dev, ifdown, dev->net, "free pdata\n"); kfree(pdata); pdata = NULL; From bf09fb6cba4f7099620cc9ed32d94c27c4af992e Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Mon, 22 Jun 2020 17:51:35 -0700 Subject: [PATCH 366/462] KVM: VMX: Stop context switching MSR_IA32_UMWAIT_CONTROL Remove support for context switching between the guest's and host's desired UMWAIT_CONTROL. Propagating the guest's value to hardware isn't required for correct functionality, e.g. KVM intercepts reads and writes to the MSR, and the latency effects of the settings controlled by the MSR are not architecturally visible. As a general rule, KVM should not allow the guest to control power management settings unless explicitly enabled by userspace, e.g. see KVM_CAP_X86_DISABLE_EXITS. E.g. Intel's SDM explicitly states that C0.2 can improve the performance of SMT siblings. A devious guest could disable C0.2 so as to improve the performance of their workloads at the detriment to workloads running in the host or on other VMs. Wholesale removal of UMWAIT_CONTROL context switching also fixes a race condition where updates from the host may cause KVM to enter the guest with the incorrect value. Because updates are are propagated to all CPUs via IPI (SMP function callback), the value in hardware may be stale with respect to the cached value and KVM could enter the guest with the wrong value in hardware. As above, the guest can't observe the bad value, but it's a weird and confusing wart in the implementation. Removal also fixes the unnecessary usage of VMX's atomic load/store MSR lists. Using the lists is only necessary for MSRs that are required for correct functionality immediately upon VM-Enter/VM-Exit, e.g. EFER on old hardware, or for MSRs that need to-the-uop precision, e.g. perf related MSRs. For UMWAIT_CONTROL, the effects are only visible in the kernel via TPAUSE/delay(), and KVM doesn't do any form of delay in vcpu_vmx_run(). Using the atomic lists is undesirable as they are more expensive than direct RDMSR/WRMSR. Furthermore, even if giving the guest control of the MSR is legitimate, e.g. in pass-through scenarios, it's not clear that the benefits would outweigh the overhead. E.g. saving and restoring an MSR across a VMX roundtrip costs ~250 cycles, and if the guest diverged from the host that cost would be paid on every run of the guest. In other words, if there is a legitimate use case then it should be enabled by a new per-VM capability. Note, KVM still needs to emulate MSR_IA32_UMWAIT_CONTROL so that it can correctly expose other WAITPKG features to the guest, e.g. TPAUSE, UMWAIT and UMONITOR. Fixes: 6e3ba4abcea56 ("KVM: vmx: Emulate MSR IA32_UMWAIT_CONTROL") Cc: stable@vger.kernel.org Cc: Jingqi Liu Cc: Tao Xu Signed-off-by: Sean Christopherson Message-Id: <20200623005135.10414-1-sean.j.christopherson@intel.com> Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/mwait.h | 2 -- arch/x86/kernel/cpu/umwait.c | 6 ------ arch/x86/kvm/vmx/vmx.c | 18 ------------------ 3 files changed, 26 deletions(-) diff --git a/arch/x86/include/asm/mwait.h b/arch/x86/include/asm/mwait.h index 73d997aa2966..e039a933aca3 100644 --- a/arch/x86/include/asm/mwait.h +++ b/arch/x86/include/asm/mwait.h @@ -25,8 +25,6 @@ #define TPAUSE_C01_STATE 1 #define TPAUSE_C02_STATE 0 -u32 get_umwait_control_msr(void); - static inline void __monitor(const void *eax, unsigned long ecx, unsigned long edx) { diff --git a/arch/x86/kernel/cpu/umwait.c b/arch/x86/kernel/cpu/umwait.c index 300e3fd5ade3..ec8064c0ae03 100644 --- a/arch/x86/kernel/cpu/umwait.c +++ b/arch/x86/kernel/cpu/umwait.c @@ -18,12 +18,6 @@ */ static u32 umwait_control_cached = UMWAIT_CTRL_VAL(100000, UMWAIT_C02_ENABLE); -u32 get_umwait_control_msr(void) -{ - return umwait_control_cached; -} -EXPORT_SYMBOL_GPL(get_umwait_control_msr); - /* * Cache the original IA32_UMWAIT_CONTROL MSR value which is configured by * hardware or BIOS before kernel boot. diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index ad0ac8bc85d9..cb22f33bf1d8 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -6606,23 +6606,6 @@ static void atomic_switch_perf_msrs(struct vcpu_vmx *vmx) msrs[i].host, false); } -static void atomic_switch_umwait_control_msr(struct vcpu_vmx *vmx) -{ - u32 host_umwait_control; - - if (!vmx_has_waitpkg(vmx)) - return; - - host_umwait_control = get_umwait_control_msr(); - - if (vmx->msr_ia32_umwait_control != host_umwait_control) - add_atomic_switch_msr(vmx, MSR_IA32_UMWAIT_CONTROL, - vmx->msr_ia32_umwait_control, - host_umwait_control, false); - else - clear_atomic_switch_msr(vmx, MSR_IA32_UMWAIT_CONTROL); -} - static void vmx_update_hv_timer(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); @@ -6729,7 +6712,6 @@ reenter_guest: pt_guest_enter(vmx); atomic_switch_perf_msrs(vmx); - atomic_switch_umwait_control_msr(vmx); if (enable_preemption_timer) vmx_update_hv_timer(vcpu); From de0083c7ed7dba036d1ed6e012157649d45313c8 Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Sun, 21 Jun 2020 13:46:25 +0000 Subject: [PATCH 367/462] hsr: avoid to create proc file after unregister When an interface is being deleted, "/proc/net/dev_snmp6/" is deleted. The function for this is addrconf_ifdown() in the addrconf_notify() and it is called by notification, which is NETDEV_UNREGISTER. But, if NETDEV_CHANGEMTU is triggered after NETDEV_UNREGISTER, this proc file will be created again. This recreated proc file will be deleted by netdev_wati_allrefs(). Before netdev_wait_allrefs() is called, creating a new HSR interface routine can be executed and It tries to create a proc file but it will find an un-deleted proc file. At this point, it warns about it. To avoid this situation, it can use ->dellink() instead of ->ndo_uninit() to release resources because ->dellink() is called before NETDEV_UNREGISTER. So, a proc file will not be recreated. Test commands ip link add dummy0 type dummy ip link add dummy1 type dummy ip link set dummy0 mtu 1300 #SHELL1 while : do ip link add hsr0 type hsr slave1 dummy0 slave2 dummy1 done #SHELL2 while : do ip link del hsr0 done Splat looks like: [ 9888.980852][ T2752] proc_dir_entry 'dev_snmp6/hsr0' already registered [ 9888.981797][ C2] WARNING: CPU: 2 PID: 2752 at fs/proc/generic.c:372 proc_register+0x2d5/0x430 [ 9888.981798][ C2] Modules linked in: hsr dummy veth openvswitch nsh nf_conncount nf_nat nf_conntrack nf_defrag_ipv6x [ 9888.981814][ C2] CPU: 2 PID: 2752 Comm: ip Tainted: G W 5.8.0-rc1+ #616 [ 9888.981815][ C2] Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006 [ 9888.981816][ C2] RIP: 0010:proc_register+0x2d5/0x430 [ 9888.981818][ C2] Code: fc ff df 48 89 fa 48 c1 ea 03 80 3c 02 00 0f 85 65 01 00 00 49 8b b5 e0 00 00 00 48 89 ea 40 [ 9888.981819][ C2] RSP: 0018:ffff8880628dedf0 EFLAGS: 00010286 [ 9888.981821][ C2] RAX: dffffc0000000008 RBX: ffff888028c69170 RCX: ffffffffaae09a62 [ 9888.981822][ C2] RDX: 0000000000000001 RSI: 0000000000000008 RDI: ffff88806c9f75ac [ 9888.981823][ C2] RBP: ffff888028c693f4 R08: ffffed100d9401bd R09: ffffed100d9401bd [ 9888.981824][ C2] R10: ffffffffaddf406f R11: 0000000000000001 R12: ffff888028c69308 [ 9888.981825][ C2] R13: ffff8880663584c8 R14: dffffc0000000000 R15: ffffed100518d27e [ 9888.981827][ C2] FS: 00007f3876b3b0c0(0000) GS:ffff88806c800000(0000) knlGS:0000000000000000 [ 9888.981828][ C2] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 9888.981829][ C2] CR2: 00007f387601a8c0 CR3: 000000004101a002 CR4: 00000000000606e0 [ 9888.981830][ C2] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 9888.981831][ C2] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 9888.981832][ C2] Call Trace: [ 9888.981833][ C2] ? snmp6_seq_show+0x180/0x180 [ 9888.981834][ C2] proc_create_single_data+0x7c/0xa0 [ 9888.981835][ C2] snmp6_register_dev+0xb0/0x130 [ 9888.981836][ C2] ipv6_add_dev+0x4b7/0xf60 [ 9888.981837][ C2] addrconf_notify+0x684/0x1ca0 [ 9888.981838][ C2] ? __mutex_unlock_slowpath+0xd0/0x670 [ 9888.981839][ C2] ? kasan_unpoison_shadow+0x30/0x40 [ 9888.981840][ C2] ? wait_for_completion+0x250/0x250 [ 9888.981841][ C2] ? inet6_ifinfo_notify+0x100/0x100 [ 9888.981842][ C2] ? dropmon_net_event+0x227/0x410 [ 9888.981843][ C2] ? notifier_call_chain+0x90/0x160 [ 9888.981844][ C2] ? inet6_ifinfo_notify+0x100/0x100 [ 9888.981845][ C2] notifier_call_chain+0x90/0x160 [ 9888.981846][ C2] register_netdevice+0xbe5/0x1070 [ ... ] Reported-by: syzbot+1d51c8b74efa4c44adeb@syzkaller.appspotmail.com Fixes: e0a4b99773d3 ("hsr: use upper/lower device infrastructure") Signed-off-by: Taehee Yoo Signed-off-by: David S. Miller --- net/hsr/hsr_device.c | 21 +-------------------- net/hsr/hsr_device.h | 2 +- net/hsr/hsr_main.c | 9 ++++++--- net/hsr/hsr_netlink.c | 17 +++++++++++++++++ 4 files changed, 25 insertions(+), 24 deletions(-) diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c index cd99f548e440..478852ef98ef 100644 --- a/net/hsr/hsr_device.c +++ b/net/hsr/hsr_device.c @@ -339,7 +339,7 @@ static void hsr_announce(struct timer_list *t) rcu_read_unlock(); } -static void hsr_del_ports(struct hsr_priv *hsr) +void hsr_del_ports(struct hsr_priv *hsr) { struct hsr_port *port; @@ -356,31 +356,12 @@ static void hsr_del_ports(struct hsr_priv *hsr) hsr_del_port(port); } -/* This has to be called after all the readers are gone. - * Otherwise we would have to check the return value of - * hsr_port_get_hsr(). - */ -static void hsr_dev_destroy(struct net_device *hsr_dev) -{ - struct hsr_priv *hsr = netdev_priv(hsr_dev); - - hsr_debugfs_term(hsr); - hsr_del_ports(hsr); - - del_timer_sync(&hsr->prune_timer); - del_timer_sync(&hsr->announce_timer); - - hsr_del_self_node(hsr); - hsr_del_nodes(&hsr->node_db); -} - static const struct net_device_ops hsr_device_ops = { .ndo_change_mtu = hsr_dev_change_mtu, .ndo_open = hsr_dev_open, .ndo_stop = hsr_dev_close, .ndo_start_xmit = hsr_dev_xmit, .ndo_fix_features = hsr_fix_features, - .ndo_uninit = hsr_dev_destroy, }; static struct device_type hsr_type = { diff --git a/net/hsr/hsr_device.h b/net/hsr/hsr_device.h index a099d7de7e79..b8f9262ed101 100644 --- a/net/hsr/hsr_device.h +++ b/net/hsr/hsr_device.h @@ -11,6 +11,7 @@ #include #include "hsr_main.h" +void hsr_del_ports(struct hsr_priv *hsr); void hsr_dev_setup(struct net_device *dev); int hsr_dev_finalize(struct net_device *hsr_dev, struct net_device *slave[2], unsigned char multicast_spec, u8 protocol_version, @@ -18,5 +19,4 @@ int hsr_dev_finalize(struct net_device *hsr_dev, struct net_device *slave[2], void hsr_check_carrier_and_operstate(struct hsr_priv *hsr); bool is_hsr_master(struct net_device *dev); int hsr_get_max_mtu(struct hsr_priv *hsr); - #endif /* __HSR_DEVICE_H */ diff --git a/net/hsr/hsr_main.c b/net/hsr/hsr_main.c index e2564de67603..144da15f0a81 100644 --- a/net/hsr/hsr_main.c +++ b/net/hsr/hsr_main.c @@ -6,6 +6,7 @@ */ #include +#include #include #include #include @@ -100,8 +101,10 @@ static int hsr_netdev_notify(struct notifier_block *nb, unsigned long event, master = hsr_port_get_hsr(port->hsr, HSR_PT_MASTER); hsr_del_port(port); if (hsr_slave_empty(master->hsr)) { - unregister_netdevice_queue(master->dev, - &list_kill); + const struct rtnl_link_ops *ops; + + ops = master->dev->rtnl_link_ops; + ops->dellink(master->dev, &list_kill); unregister_netdevice_many(&list_kill); } } @@ -144,9 +147,9 @@ static int __init hsr_init(void) static void __exit hsr_exit(void) { - unregister_netdevice_notifier(&hsr_nb); hsr_netlink_exit(); hsr_debugfs_remove_root(); + unregister_netdevice_notifier(&hsr_nb); } module_init(hsr_init); diff --git a/net/hsr/hsr_netlink.c b/net/hsr/hsr_netlink.c index 1decb25f6764..6e14b7d22639 100644 --- a/net/hsr/hsr_netlink.c +++ b/net/hsr/hsr_netlink.c @@ -83,6 +83,22 @@ static int hsr_newlink(struct net *src_net, struct net_device *dev, return hsr_dev_finalize(dev, link, multicast_spec, hsr_version, extack); } +static void hsr_dellink(struct net_device *dev, struct list_head *head) +{ + struct hsr_priv *hsr = netdev_priv(dev); + + del_timer_sync(&hsr->prune_timer); + del_timer_sync(&hsr->announce_timer); + + hsr_debugfs_term(hsr); + hsr_del_ports(hsr); + + hsr_del_self_node(hsr); + hsr_del_nodes(&hsr->node_db); + + unregister_netdevice_queue(dev, head); +} + static int hsr_fill_info(struct sk_buff *skb, const struct net_device *dev) { struct hsr_priv *hsr = netdev_priv(dev); @@ -118,6 +134,7 @@ static struct rtnl_link_ops hsr_link_ops __read_mostly = { .priv_size = sizeof(struct hsr_priv), .setup = hsr_dev_setup, .newlink = hsr_newlink, + .dellink = hsr_dellink, .fill_info = hsr_fill_info, }; From 21a739c64d3e9871186483a0cc3e7b52638c3d59 Mon Sep 17 00:00:00 2001 From: Gaurav Singh Date: Sun, 21 Jun 2020 11:30:17 -0400 Subject: [PATCH 368/462] ethtool: Fix check in ethtool_rx_flow_rule_create Fix check in ethtool_rx_flow_rule_create Fixes: eca4205f9ec3 ("ethtool: add ethtool_rx_flow_spec to flow_rule structure translator") Signed-off-by: Gaurav Singh Signed-off-by: David S. Miller --- net/ethtool/ioctl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c index b5df90c981c2..21d5fc0f6bb3 100644 --- a/net/ethtool/ioctl.c +++ b/net/ethtool/ioctl.c @@ -2978,7 +2978,7 @@ ethtool_rx_flow_rule_create(const struct ethtool_rx_flow_spec_input *input) sizeof(match->mask.ipv6.dst)); } if (memcmp(v6_m_spec->ip6src, &zero_addr, sizeof(zero_addr)) || - memcmp(v6_m_spec->ip6src, &zero_addr, sizeof(zero_addr))) { + memcmp(v6_m_spec->ip6dst, &zero_addr, sizeof(zero_addr))) { match->dissector.used_keys |= BIT(FLOW_DISSECTOR_KEY_IPV6_ADDRS); match->dissector.offset[FLOW_DISSECTOR_KEY_IPV6_ADDRS] = From b562f58bbc12444219b74a5d6524977a3d87a022 Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Mon, 22 Jun 2020 19:45:58 +0800 Subject: [PATCH 369/462] mptcp: drop sndr_key in mptcp_syn_options In RFC 8684, we don't need to send sndr_key in SYN package anymore, so drop it. Fixes: cc7972ea1932 ("mptcp: parse and emit MP_CAPABLE option according to v1 spec") Signed-off-by: Geliang Tang Reviewed-by: Matthieu Baerts Signed-off-by: David S. Miller --- net/mptcp/options.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/net/mptcp/options.c b/net/mptcp/options.c index 490b92534afc..df9a51425c6f 100644 --- a/net/mptcp/options.c +++ b/net/mptcp/options.c @@ -336,9 +336,7 @@ bool mptcp_syn_options(struct sock *sk, const struct sk_buff *skb, */ subflow->snd_isn = TCP_SKB_CB(skb)->end_seq; if (subflow->request_mptcp) { - pr_debug("local_key=%llu", subflow->local_key); opts->suboptions = OPTION_MPTCP_MPC_SYN; - opts->sndr_key = subflow->local_key; *size = TCPOLEN_MPTCP_MPC_SYN; return true; } else if (subflow->request_join) { From 26ac10be3c80a26d03c950b7387c4b9566c260b6 Mon Sep 17 00:00:00 2001 From: Aiden Leong Date: Mon, 22 Jun 2020 20:04:58 -0700 Subject: [PATCH 370/462] GUE: Fix a typo Fix a typo in gue.h Signed-off-by: Aiden Leong Signed-off-by: David S. Miller --- include/net/gue.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/net/gue.h b/include/net/gue.h index 3a6595bfa641..e42402f180b7 100644 --- a/include/net/gue.h +++ b/include/net/gue.h @@ -21,7 +21,7 @@ * | | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * - * C bit indicates contol message when set, data message when unset. + * C bit indicates control message when set, data message when unset. * For a control message, proto/ctype is interpreted as a type of * control message. For data messages, proto/ctype is the IP protocol * of the next header. From bf10bd0be53282183f374af23577b18b5fbf7801 Mon Sep 17 00:00:00 2001 From: Xiaoyao Li Date: Tue, 16 Jun 2020 15:33:07 +0800 Subject: [PATCH 371/462] KVM: X86: Fix MSR range of APIC registers in X2APIC mode Only MSR address range 0x800 through 0x8ff is architecturally reserved and dedicated for accessing APIC registers in x2APIC mode. Fixes: 0105d1a52640 ("KVM: x2apic interface to lapic") Signed-off-by: Xiaoyao Li Message-Id: <20200616073307.16440-1-xiaoyao.li@intel.com> Cc: stable@vger.kernel.org Reviewed-by: Sean Christopherson Reviewed-by: Jim Mattson Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 00c88c2f34e4..29d9b078ce69 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2856,7 +2856,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) return kvm_mtrr_set_msr(vcpu, msr, data); case MSR_IA32_APICBASE: return kvm_set_apic_base(vcpu, msr_info); - case APIC_BASE_MSR ... APIC_BASE_MSR + 0x3ff: + case APIC_BASE_MSR ... APIC_BASE_MSR + 0xff: return kvm_x2apic_msr_write(vcpu, msr, data); case MSR_IA32_TSCDEADLINE: kvm_set_lapic_tscdeadline_msr(vcpu, data); @@ -3196,7 +3196,7 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) case MSR_IA32_APICBASE: msr_info->data = kvm_get_apic_base(vcpu); break; - case APIC_BASE_MSR ... APIC_BASE_MSR + 0x3ff: + case APIC_BASE_MSR ... APIC_BASE_MSR + 0xff: return kvm_x2apic_msr_read(vcpu, msr_info->index, &msr_info->data); case MSR_IA32_TSCDEADLINE: msr_info->data = kvm_get_lapic_tscdeadline_msr(vcpu); From 26769f96e6231095f6b1cc3090c903280d44bb57 Mon Sep 17 00:00:00 2001 From: Marcelo Tosatti Date: Tue, 16 Jun 2020 08:47:41 -0300 Subject: [PATCH 372/462] KVM: x86: allow TSC to differ by NTP correction bounds without TSC scaling The Linux TSC calibration procedure is subject to small variations (its common to see +-1 kHz difference between reboots on a given CPU, for example). So migrating a guest between two hosts with identical processor can fail, in case of a small variation in calibrated TSC between them. Without TSC scaling, the current kernel interface will either return an error (if user_tsc_khz <= tsc_khz) or enable TSC catchup mode. This change enables the following TSC tolerance check to accept KVM_SET_TSC_KHZ within tsc_tolerance_ppm (which is 250ppm by default). /* * Compute the variation in TSC rate which is acceptable * within the range of tolerance and decide if the * rate being applied is within that bounds of the hardware * rate. If so, no scaling or compensation need be done. */ thresh_lo = adjust_tsc_khz(tsc_khz, -tsc_tolerance_ppm); thresh_hi = adjust_tsc_khz(tsc_khz, tsc_tolerance_ppm); if (user_tsc_khz < thresh_lo || user_tsc_khz > thresh_hi) { pr_debug("kvm: requested TSC rate %u falls outside tolerance [%u,%u]\n", user_tsc_khz, thresh_lo, thresh_hi); use_scaling = 1; } NTP daemon in the guest can correct this difference (NTP can correct upto 500ppm). Signed-off-by: Marcelo Tosatti Message-Id: <20200616114741.GA298183@fuller.cnet> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 29d9b078ce69..3b92db412335 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4603,7 +4603,8 @@ long kvm_arch_vcpu_ioctl(struct file *filp, r = -EINVAL; user_tsc_khz = (u32)arg; - if (user_tsc_khz >= kvm_max_guest_tsc_khz) + if (kvm_has_tsc_control && + user_tsc_khz >= kvm_max_guest_tsc_khz) goto out; if (user_tsc_khz == 0) From e4553b4976d1178c13da295cb5c7b21f55baf8f9 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 16 Jun 2020 20:41:23 -0700 Subject: [PATCH 373/462] KVM: VMX: Remove vcpu_vmx's defunct copy of host_pkru Remove vcpu_vmx.host_pkru, which got left behind when PKRU support was moved to common x86 code. No functional change intended. Fixes: 37486135d3a7b ("KVM: x86: Fix pkru save/restore when guest CR4.PKE=0, move it to x86.c") Signed-off-by: Sean Christopherson Message-Id: <20200617034123.25647-1-sean.j.christopherson@intel.com> Reviewed-by: Vitaly Kuznetsov Reviewed-by: Jim Mattson Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/vmx.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h index 8a83b5edc820..639798e4a6ca 100644 --- a/arch/x86/kvm/vmx/vmx.h +++ b/arch/x86/kvm/vmx/vmx.h @@ -288,8 +288,6 @@ struct vcpu_vmx { u64 current_tsc_ratio; - u32 host_pkru; - unsigned long host_debugctlmsr; /* From 73094608b8e214952444fb104651704c98a37aeb Mon Sep 17 00:00:00 2001 From: Christoffer Nielsen Date: Fri, 19 Jun 2020 13:48:22 +0200 Subject: [PATCH 374/462] ALSA: usb-audio: Add registration quirk for Kingston HyperX Cloud Flight S Similar to the Kingston HyperX AMP, the Kingston HyperX Cloud Alpha S (0951:0x16ea) uses two interfaces, but only the second interface contains the capture stream. This patch delays the registration until the second interface appears. Signed-off-by: Christoffer Nielsen Cc: Link: https://lore.kernel.org/r/CAOtG2YHOM3zy+ed9KS-J4HkZo_QGzcUG9MigSp4e4_-13r6B=Q@mail.gmail.com Signed-off-by: Takashi Iwai --- sound/usb/quirks.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c index c495e720e2f1..54d4b4b5bd11 100644 --- a/sound/usb/quirks.c +++ b/sound/usb/quirks.c @@ -1857,6 +1857,7 @@ struct registration_quirk { static const struct registration_quirk registration_quirks[] = { REG_QUIRK_ENTRY(0x0951, 0x16d8, 2), /* Kingston HyperX AMP */ REG_QUIRK_ENTRY(0x0951, 0x16ed, 2), /* Kingston HyperX Cloud Alpha S */ + REG_QUIRK_ENTRY(0x0951, 0x16ea, 2), /* Kingston HyperX Cloud Flight S */ { 0 } /* terminator */ }; From e64a1618af8566d20991607913a4d90d39b30118 Mon Sep 17 00:00:00 2001 From: Sven Schnelle Date: Wed, 17 Jun 2020 17:30:28 +0200 Subject: [PATCH 375/462] s390: fix system call single stepping When single stepping an svc instruction on s390, the kernel is entered with a PER program check interruption. The program check handler than jumps to the system call handler by reloading the PSW. The code didn't set GPR13 to the thread pointer in struct task_struct. This made the kernel access invalid memory while trying to fetch the syscall function address. Fix this by always assigned GPR13 after .Lsysc_per. Fixes: 0b0ed657fe00 ("s390: remove critical section cleanup from entry.S") Reported-and-tested-by: Christian Borntraeger Signed-off-by: Sven Schnelle Reviewed-by: Heiko Carstens Signed-off-by: Heiko Carstens --- arch/s390/kernel/entry.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index 496f74d98473..969b35b177dd 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -378,9 +378,9 @@ ENTRY(system_call) stmg %r8,%r15,__LC_SAVE_AREA_SYNC BPOFF lg %r12,__LC_CURRENT - lghi %r13,__TASK_thread lghi %r14,_PIF_SYSCALL .Lsysc_per: + lghi %r13,__TASK_thread lg %r15,__LC_KERNEL_STACK la %r11,STACK_FRAME_OVERHEAD(%r15) # pointer to pt_regs UPDATE_VTIME %r8,%r9,__LC_SYNC_ENTER_TIMER From 998f5bbe3dbdab81c1cfb1aef7c3892f5d24f6c7 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Wed, 17 Jun 2020 15:05:49 +0200 Subject: [PATCH 376/462] s390/kasan: fix early pgm check handler execution Currently if early_pgm_check_handler is called it ends up in pgm check loop. The problem is that early_pgm_check_handler is instrumented by KASAN but executed without DAT flag enabled which leads to addressing exception when KASAN checks try to access shadow memory. Fix that by executing early handlers with DAT flag on under KASAN as expected. Reported-and-tested-by: Alexander Egorenkov Reviewed-by: Heiko Carstens Signed-off-by: Vasily Gorbik Signed-off-by: Heiko Carstens --- arch/s390/kernel/early.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c index cd241ee66eff..078277231858 100644 --- a/arch/s390/kernel/early.c +++ b/arch/s390/kernel/early.c @@ -170,6 +170,8 @@ static noinline __init void setup_lowcore_early(void) psw_t psw; psw.mask = PSW_MASK_BASE | PSW_DEFAULT_KEY | PSW_MASK_EA | PSW_MASK_BA; + if (IS_ENABLED(CONFIG_KASAN)) + psw.mask |= PSW_MASK_DAT; psw.addr = (unsigned long) s390_base_ext_handler; S390_lowcore.external_new_psw = psw; psw.addr = (unsigned long) s390_base_pgm_handler; From 827c4913923e0b441ba07ba4cc41e01181102303 Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Tue, 31 Mar 2020 05:57:23 -0400 Subject: [PATCH 377/462] s390/debug: avoid kernel warning on too large number of pages When specifying insanely large debug buffers a kernel warning is printed. The debug code does handle the error gracefully, though. Instead of duplicating the check let us silence the warning to avoid crashes when panic_on_warn is used. Signed-off-by: Christian Borntraeger Reviewed-by: Heiko Carstens Signed-off-by: Heiko Carstens --- arch/s390/kernel/debug.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/s390/kernel/debug.c b/arch/s390/kernel/debug.c index 636446003a06..263075a1af36 100644 --- a/arch/s390/kernel/debug.c +++ b/arch/s390/kernel/debug.c @@ -198,9 +198,10 @@ static debug_entry_t ***debug_areas_alloc(int pages_per_area, int nr_areas) if (!areas) goto fail_malloc_areas; for (i = 0; i < nr_areas; i++) { + /* GFP_NOWARN to avoid user triggerable WARN, we handle fails */ areas[i] = kmalloc_array(pages_per_area, sizeof(debug_entry_t *), - GFP_KERNEL); + GFP_KERNEL | __GFP_NOWARN); if (!areas[i]) goto fail_malloc_areas2; for (j = 0; j < pages_per_area; j++) { From a32a1fc99807244d920d274adc46ba04b538cc8a Mon Sep 17 00:00:00 2001 From: Macpaul Lin Date: Tue, 23 Jun 2020 19:03:23 +0800 Subject: [PATCH 378/462] ALSA: usb-audio: add quirk for Samsung USBC Headset (AKG) We've found Samsung USBC Headset (AKG) (VID: 0x04e8, PID: 0xa051) need a tiny delay after each class compliant request. Otherwise the device might not be able to be recognized each times. Signed-off-by: Chihhao Chen Signed-off-by: Macpaul Lin Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/1592910203-24035-1-git-send-email-macpaul.lin@mediatek.com Signed-off-by: Takashi Iwai --- sound/usb/quirks.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c index 54d4b4b5bd11..fca72730a802 100644 --- a/sound/usb/quirks.c +++ b/sound/usb/quirks.c @@ -1674,6 +1674,14 @@ void snd_usb_ctl_msg_quirk(struct usb_device *dev, unsigned int pipe, chip->usb_id == USB_ID(0x0951, 0x16ad)) && (requesttype & USB_TYPE_MASK) == USB_TYPE_CLASS) usleep_range(1000, 2000); + + /* + * Samsung USBC Headset (AKG) need a tiny delay after each + * class compliant request. (Model number: AAM625R or AAM627R) + */ + if (chip->usb_id == USB_ID(0x04e8, 0xa051) && + (requesttype & USB_TYPE_MASK) == USB_TYPE_CLASS) + usleep_range(5000, 6000); } /* From 097350d1c6e1f5808cae142006f18a0bbc57018d Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Mon, 22 Jun 2020 15:18:15 -0400 Subject: [PATCH 379/462] ring-buffer: Zero out time extend if it is nested and not absolute Currently the ring buffer makes events that happen in interrupts that preempt another event have a delta of zero. (Hopefully we can change this soon). But this is to deal with the races of updating a global counter with lockless and nesting functions updating deltas. With the addition of absolute time stamps, the time extend didn't follow this rule. A time extend can happen if two events happen longer than 2^27 nanoseconds appart, as the delta time field in each event is only 27 bits. If that happens, then a time extend is injected with 2^59 bits of nanoseconds to use (18 years). But if the 2^27 nanoseconds happen between two events, and as it is writing the event, an interrupt triggers, it will see the 2^27 difference as well and inject a time extend of its own. But a recent change made the time extend logic not take into account the nesting, and this can cause two time extend deltas to happen moving the time stamp much further ahead than the current time. This gets all reset when the ring buffer moves to the next page, but that can cause time to appear to go backwards. This was observed in a trace-cmd recording, and since the data is saved in a file, with trace-cmd report --debug, it was possible to see that this indeed did happen! bash-52501 110d... 81778.908247: sched_switch: bash:52501 [120] S ==> swapper/110:0 [120] [12770284:0x2e8:64] -0 110d... 81778.908757: sched_switch: swapper/110:0 [120] R ==> bash:52501 [120] [509947:0x32c:64] TIME EXTEND: delta:306454770 length:0 bash-52501 110.... 81779.215212: sched_swap_numa: src_pid=52501 src_tgid=52388 src_ngid=52501 src_cpu=110 src_nid=2 dst_pid=52509 dst_tgid=52388 dst_ngid=52501 dst_cpu=49 dst_nid=1 [0:0x378:48] TIME EXTEND: delta:306458165 length:0 bash-52501 110dNh. 81779.521670: sched_wakeup: migration/110:565 [0] success=1 CPU:110 [0:0x3b4:40] and at the next page, caused the time to go backwards: bash-52504 110d... 81779.685411: sched_switch: bash:52504 [120] S ==> swapper/110:0 [120] [8347057:0xfb4:64] CPU:110 [SUBBUFFER START] [81779379165886:0x1320000] -0 110dN.. 81779.379166: sched_wakeup: bash:52504 [120] success=1 CPU:110 [0:0x10:40] -0 110d... 81779.379167: sched_switch: swapper/110:0 [120] R ==> bash:52504 [120] [1168:0x3c:64] Link: https://lkml.kernel.org/r/20200622151815.345d1bf5@oasis.local.home Cc: Ingo Molnar Cc: Andrew Morton Cc: Tom Zanussi Cc: stable@vger.kernel.org Fixes: dc4e2801d400b ("ring-buffer: Redefine the unimplemented RINGBUF_TYPE_TIME_STAMP") Reported-by: Julia Lawall Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/ring_buffer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index b8e1ca48be50..00867ff82412 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -2427,7 +2427,7 @@ rb_update_event(struct ring_buffer_per_cpu *cpu_buffer, if (unlikely(info->add_timestamp)) { bool abs = ring_buffer_time_stamp_abs(cpu_buffer->buffer); - event = rb_add_time_stamp(event, info->delta, abs); + event = rb_add_time_stamp(event, abs ? info->delta : delta, abs); length -= RB_LEN_TIME_EXTEND; delta = 0; } From 2464bc7c2897b7549146a743045089d3aea7b85b Mon Sep 17 00:00:00 2001 From: Horatiu Vultur Date: Tue, 23 Jun 2020 11:05:40 +0200 Subject: [PATCH 380/462] bridge: uapi: mrp: Fix MRP_PORT_ROLE Currently the MRP_PORT_ROLE_NONE has the value 0x2 but this is in conflict with the IEC 62439-2 standard. The standard defines the following port roles: primary (0x0), secondary(0x1), interconnect(0x2). Therefore remove the port role none. Fixes: 4714d13791f831 ("bridge: uapi: mrp: Add mrp attributes.") Signed-off-by: Horatiu Vultur Signed-off-by: David S. Miller --- include/uapi/linux/mrp_bridge.h | 1 - 1 file changed, 1 deletion(-) diff --git a/include/uapi/linux/mrp_bridge.h b/include/uapi/linux/mrp_bridge.h index 84f15f48a7cb..bee366540212 100644 --- a/include/uapi/linux/mrp_bridge.h +++ b/include/uapi/linux/mrp_bridge.h @@ -36,7 +36,6 @@ enum br_mrp_port_state_type { enum br_mrp_port_role_type { BR_MRP_PORT_ROLE_PRIMARY, BR_MRP_PORT_ROLE_SECONDARY, - BR_MRP_PORT_ROLE_NONE, }; enum br_mrp_tlv_header_type { From 7882c895b71b8cd6c81946fcc8e13d15a2841de7 Mon Sep 17 00:00:00 2001 From: Horatiu Vultur Date: Tue, 23 Jun 2020 11:05:41 +0200 Subject: [PATCH 381/462] bridge: mrp: Validate when setting the port role This patch adds specific checks for primary(0x0) and secondary(0x1) when setting the port role. For any other value the function 'br_mrp_set_port_role' will return -EINVAL. Fixes: 20f6a05ef63594 ("bridge: mrp: Rework the MRP netlink interface") Signed-off-by: Horatiu Vultur Signed-off-by: David S. Miller --- net/bridge/br_mrp.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/net/bridge/br_mrp.c b/net/bridge/br_mrp.c index 24986ec7d38c..779e1eb75443 100644 --- a/net/bridge/br_mrp.c +++ b/net/bridge/br_mrp.c @@ -411,10 +411,16 @@ int br_mrp_set_port_role(struct net_bridge_port *p, if (!mrp) return -EINVAL; - if (role == BR_MRP_PORT_ROLE_PRIMARY) + switch (role) { + case BR_MRP_PORT_ROLE_PRIMARY: rcu_assign_pointer(mrp->p_port, p); - else + break; + case BR_MRP_PORT_ROLE_SECONDARY: rcu_assign_pointer(mrp->s_port, p); + break; + default: + return -EINVAL; + } br_mrp_port_switchdev_set_role(p, role); From 558b353c9c2a717509f291c066c6bd8f5f5e21be Mon Sep 17 00:00:00 2001 From: Frank Werner-Krippendorf Date: Tue, 23 Jun 2020 03:59:44 -0600 Subject: [PATCH 382/462] wireguard: noise: do not assign initiation time in if condition Fixes an error condition reported by checkpatch.pl which caused by assigning a variable in an if condition in wg_noise_handshake_consume_ initiation(). Signed-off-by: Frank Werner-Krippendorf Signed-off-by: Jason A. Donenfeld Signed-off-by: David S. Miller --- drivers/net/wireguard/noise.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireguard/noise.c b/drivers/net/wireguard/noise.c index 626433690abb..201a22681945 100644 --- a/drivers/net/wireguard/noise.c +++ b/drivers/net/wireguard/noise.c @@ -617,8 +617,8 @@ wg_noise_handshake_consume_initiation(struct message_handshake_initiation *src, memcpy(handshake->hash, hash, NOISE_HASH_LEN); memcpy(handshake->chaining_key, chaining_key, NOISE_HASH_LEN); handshake->remote_index = src->sender_index; - if ((s64)(handshake->last_initiation_consumption - - (initiation_consumption = ktime_get_coarse_boottime_ns())) < 0) + initiation_consumption = ktime_get_coarse_boottime_ns(); + if ((s64)(handshake->last_initiation_consumption - initiation_consumption) < 0) handshake->last_initiation_consumption = initiation_consumption; handshake->state = HANDSHAKE_CONSUMED_INITIATION; up_write(&handshake->lock); From 900575aa33a3eaaef802b31de187a85c4a4b4bd0 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Tue, 23 Jun 2020 03:59:45 -0600 Subject: [PATCH 383/462] wireguard: device: avoid circular netns references Before, we took a reference to the creating netns if the new netns was different. This caused issues with circular references, with two wireguard interfaces swapping namespaces. The solution is to rather not take any extra references at all, but instead simply invalidate the creating netns pointer when that netns is deleted. In order to prevent this from happening again, this commit improves the rough object leak tracking by allowing it to account for created and destroyed interfaces, aside from just peers and keys. That then makes it possible to check for the object leak when having two interfaces take a reference to each others' namespaces. Fixes: e7096c131e51 ("net: WireGuard secure network tunnel") Signed-off-by: Jason A. Donenfeld Signed-off-by: David S. Miller --- drivers/net/wireguard/device.c | 58 ++++++++++------------ drivers/net/wireguard/device.h | 3 +- drivers/net/wireguard/netlink.c | 14 ++++-- drivers/net/wireguard/socket.c | 25 +++++++--- tools/testing/selftests/wireguard/netns.sh | 13 ++++- 5 files changed, 67 insertions(+), 46 deletions(-) diff --git a/drivers/net/wireguard/device.c b/drivers/net/wireguard/device.c index 3ac3f8570ca1..a8f151b1b5fa 100644 --- a/drivers/net/wireguard/device.c +++ b/drivers/net/wireguard/device.c @@ -45,17 +45,18 @@ static int wg_open(struct net_device *dev) if (dev_v6) dev_v6->cnf.addr_gen_mode = IN6_ADDR_GEN_MODE_NONE; + mutex_lock(&wg->device_update_lock); ret = wg_socket_init(wg, wg->incoming_port); if (ret < 0) - return ret; - mutex_lock(&wg->device_update_lock); + goto out; list_for_each_entry(peer, &wg->peer_list, peer_list) { wg_packet_send_staged_packets(peer); if (peer->persistent_keepalive_interval) wg_packet_send_keepalive(peer); } +out: mutex_unlock(&wg->device_update_lock); - return 0; + return ret; } #ifdef CONFIG_PM_SLEEP @@ -225,6 +226,7 @@ static void wg_destruct(struct net_device *dev) list_del(&wg->device_list); rtnl_unlock(); mutex_lock(&wg->device_update_lock); + rcu_assign_pointer(wg->creating_net, NULL); wg->incoming_port = 0; wg_socket_reinit(wg, NULL, NULL); /* The final references are cleared in the below calls to destroy_workqueue. */ @@ -240,13 +242,11 @@ static void wg_destruct(struct net_device *dev) skb_queue_purge(&wg->incoming_handshakes); free_percpu(dev->tstats); free_percpu(wg->incoming_handshakes_worker); - if (wg->have_creating_net_ref) - put_net(wg->creating_net); kvfree(wg->index_hashtable); kvfree(wg->peer_hashtable); mutex_unlock(&wg->device_update_lock); - pr_debug("%s: Interface deleted\n", dev->name); + pr_debug("%s: Interface destroyed\n", dev->name); free_netdev(dev); } @@ -292,7 +292,7 @@ static int wg_newlink(struct net *src_net, struct net_device *dev, struct wg_device *wg = netdev_priv(dev); int ret = -ENOMEM; - wg->creating_net = src_net; + rcu_assign_pointer(wg->creating_net, src_net); init_rwsem(&wg->static_identity.lock); mutex_init(&wg->socket_update_lock); mutex_init(&wg->device_update_lock); @@ -393,30 +393,26 @@ static struct rtnl_link_ops link_ops __read_mostly = { .newlink = wg_newlink, }; -static int wg_netdevice_notification(struct notifier_block *nb, - unsigned long action, void *data) +static void wg_netns_pre_exit(struct net *net) { - struct net_device *dev = ((struct netdev_notifier_info *)data)->dev; - struct wg_device *wg = netdev_priv(dev); + struct wg_device *wg; - ASSERT_RTNL(); - - if (action != NETDEV_REGISTER || dev->netdev_ops != &netdev_ops) - return 0; - - if (dev_net(dev) == wg->creating_net && wg->have_creating_net_ref) { - put_net(wg->creating_net); - wg->have_creating_net_ref = false; - } else if (dev_net(dev) != wg->creating_net && - !wg->have_creating_net_ref) { - wg->have_creating_net_ref = true; - get_net(wg->creating_net); + rtnl_lock(); + list_for_each_entry(wg, &device_list, device_list) { + if (rcu_access_pointer(wg->creating_net) == net) { + pr_debug("%s: Creating namespace exiting\n", wg->dev->name); + netif_carrier_off(wg->dev); + mutex_lock(&wg->device_update_lock); + rcu_assign_pointer(wg->creating_net, NULL); + wg_socket_reinit(wg, NULL, NULL); + mutex_unlock(&wg->device_update_lock); + } } - return 0; + rtnl_unlock(); } -static struct notifier_block netdevice_notifier = { - .notifier_call = wg_netdevice_notification +static struct pernet_operations pernet_ops = { + .pre_exit = wg_netns_pre_exit }; int __init wg_device_init(void) @@ -429,18 +425,18 @@ int __init wg_device_init(void) return ret; #endif - ret = register_netdevice_notifier(&netdevice_notifier); + ret = register_pernet_device(&pernet_ops); if (ret) goto error_pm; ret = rtnl_link_register(&link_ops); if (ret) - goto error_netdevice; + goto error_pernet; return 0; -error_netdevice: - unregister_netdevice_notifier(&netdevice_notifier); +error_pernet: + unregister_pernet_device(&pernet_ops); error_pm: #ifdef CONFIG_PM_SLEEP unregister_pm_notifier(&pm_notifier); @@ -451,7 +447,7 @@ error_pm: void wg_device_uninit(void) { rtnl_link_unregister(&link_ops); - unregister_netdevice_notifier(&netdevice_notifier); + unregister_pernet_device(&pernet_ops); #ifdef CONFIG_PM_SLEEP unregister_pm_notifier(&pm_notifier); #endif diff --git a/drivers/net/wireguard/device.h b/drivers/net/wireguard/device.h index b15a8be9d816..4d0144e16947 100644 --- a/drivers/net/wireguard/device.h +++ b/drivers/net/wireguard/device.h @@ -40,7 +40,7 @@ struct wg_device { struct net_device *dev; struct crypt_queue encrypt_queue, decrypt_queue; struct sock __rcu *sock4, *sock6; - struct net *creating_net; + struct net __rcu *creating_net; struct noise_static_identity static_identity; struct workqueue_struct *handshake_receive_wq, *handshake_send_wq; struct workqueue_struct *packet_crypt_wq; @@ -56,7 +56,6 @@ struct wg_device { unsigned int num_peers, device_update_gen; u32 fwmark; u16 incoming_port; - bool have_creating_net_ref; }; int wg_device_init(void); diff --git a/drivers/net/wireguard/netlink.c b/drivers/net/wireguard/netlink.c index 802099c8828a..20a4f3c0a0a1 100644 --- a/drivers/net/wireguard/netlink.c +++ b/drivers/net/wireguard/netlink.c @@ -511,11 +511,15 @@ static int wg_set_device(struct sk_buff *skb, struct genl_info *info) if (flags & ~__WGDEVICE_F_ALL) goto out; - ret = -EPERM; - if ((info->attrs[WGDEVICE_A_LISTEN_PORT] || - info->attrs[WGDEVICE_A_FWMARK]) && - !ns_capable(wg->creating_net->user_ns, CAP_NET_ADMIN)) - goto out; + if (info->attrs[WGDEVICE_A_LISTEN_PORT] || info->attrs[WGDEVICE_A_FWMARK]) { + struct net *net; + rcu_read_lock(); + net = rcu_dereference(wg->creating_net); + ret = !net || !ns_capable(net->user_ns, CAP_NET_ADMIN) ? -EPERM : 0; + rcu_read_unlock(); + if (ret) + goto out; + } ++wg->device_update_gen; diff --git a/drivers/net/wireguard/socket.c b/drivers/net/wireguard/socket.c index f9018027fc13..c33e2c81635f 100644 --- a/drivers/net/wireguard/socket.c +++ b/drivers/net/wireguard/socket.c @@ -347,6 +347,7 @@ static void set_sock_opts(struct socket *sock) int wg_socket_init(struct wg_device *wg, u16 port) { + struct net *net; int ret; struct udp_tunnel_sock_cfg cfg = { .sk_user_data = wg, @@ -371,37 +372,47 @@ int wg_socket_init(struct wg_device *wg, u16 port) }; #endif + rcu_read_lock(); + net = rcu_dereference(wg->creating_net); + net = net ? maybe_get_net(net) : NULL; + rcu_read_unlock(); + if (unlikely(!net)) + return -ENONET; + #if IS_ENABLED(CONFIG_IPV6) retry: #endif - ret = udp_sock_create(wg->creating_net, &port4, &new4); + ret = udp_sock_create(net, &port4, &new4); if (ret < 0) { pr_err("%s: Could not create IPv4 socket\n", wg->dev->name); - return ret; + goto out; } set_sock_opts(new4); - setup_udp_tunnel_sock(wg->creating_net, new4, &cfg); + setup_udp_tunnel_sock(net, new4, &cfg); #if IS_ENABLED(CONFIG_IPV6) if (ipv6_mod_enabled()) { port6.local_udp_port = inet_sk(new4->sk)->inet_sport; - ret = udp_sock_create(wg->creating_net, &port6, &new6); + ret = udp_sock_create(net, &port6, &new6); if (ret < 0) { udp_tunnel_sock_release(new4); if (ret == -EADDRINUSE && !port && retries++ < 100) goto retry; pr_err("%s: Could not create IPv6 socket\n", wg->dev->name); - return ret; + goto out; } set_sock_opts(new6); - setup_udp_tunnel_sock(wg->creating_net, new6, &cfg); + setup_udp_tunnel_sock(net, new6, &cfg); } #endif wg_socket_reinit(wg, new4->sk, new6 ? new6->sk : NULL); - return 0; + ret = 0; +out: + put_net(net); + return ret; } void wg_socket_reinit(struct wg_device *wg, struct sock *new4, diff --git a/tools/testing/selftests/wireguard/netns.sh b/tools/testing/selftests/wireguard/netns.sh index 17a1f53ceba0..d77f4829f1e0 100755 --- a/tools/testing/selftests/wireguard/netns.sh +++ b/tools/testing/selftests/wireguard/netns.sh @@ -587,9 +587,20 @@ ip0 link set wg0 up kill $ncat_pid ip0 link del wg0 +# Ensure there aren't circular reference loops +ip1 link add wg1 type wireguard +ip2 link add wg2 type wireguard +ip1 link set wg1 netns $netns2 +ip2 link set wg2 netns $netns1 +pp ip netns delete $netns1 +pp ip netns delete $netns2 +pp ip netns add $netns1 +pp ip netns add $netns2 + +sleep 2 # Wait for cleanup and grace periods declare -A objects while read -t 0.1 -r line 2>/dev/null || [[ $? -ne 142 ]]; do - [[ $line =~ .*(wg[0-9]+:\ [A-Z][a-z]+\ [0-9]+)\ .*(created|destroyed).* ]] || continue + [[ $line =~ .*(wg[0-9]+:\ [A-Z][a-z]+\ ?[0-9]*)\ .*(created|destroyed).* ]] || continue objects["${BASH_REMATCH[1]}"]+="${BASH_REMATCH[2]}" done < /dev/kmsg alldeleted=1 From b4730ae6a443afe611afb4fb651c885c51003c15 Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Tue, 23 Jun 2020 10:43:48 +0000 Subject: [PATCH 384/462] net: ethtool: add missing string for NETIF_F_GSO_TUNNEL_REMCSUM Commit e585f2363637 ("udp: Changes to udp_offload to support remote checksum offload") added new GSO type and a corresponding netdev feature, but missed Ethtool's 'netdev_features_strings' table. Give it a name so it will be exposed to userspace and become available for manual configuration. v3: - decouple from "netdev_features_strings[] cleanup" series; - no functional changes. v2: - don't split the "Fixes:" tag across lines; - no functional changes. Fixes: e585f2363637 ("udp: Changes to udp_offload to support remote checksum offload") Signed-off-by: Alexander Lobakin Signed-off-by: David S. Miller --- net/ethtool/common.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/ethtool/common.c b/net/ethtool/common.c index 47f63526818e..aaecfc916a4d 100644 --- a/net/ethtool/common.c +++ b/net/ethtool/common.c @@ -40,6 +40,7 @@ const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN] = { [NETIF_F_GSO_UDP_TUNNEL_BIT] = "tx-udp_tnl-segmentation", [NETIF_F_GSO_UDP_TUNNEL_CSUM_BIT] = "tx-udp_tnl-csum-segmentation", [NETIF_F_GSO_PARTIAL_BIT] = "tx-gso-partial", + [NETIF_F_GSO_TUNNEL_REMCSUM_BIT] = "tx-tunnel-remcsum-segmentation", [NETIF_F_GSO_SCTP_BIT] = "tx-sctp-segmentation", [NETIF_F_GSO_ESP_BIT] = "tx-esp-segmentation", [NETIF_F_GSO_UDP_L4_BIT] = "tx-udp-segmentation", From 97dd1abd026ae4e6a82fa68645928404ad483409 Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Tue, 23 Jun 2020 16:51:29 +0300 Subject: [PATCH 385/462] net: qed: fix left elements count calculation qed_chain_get_element_left{,_u32} returned 0 when the difference between producer and consumer page count was equal to the total page count. Fix this by conditional expanding of producer value (vs unconditional). This allowed to eliminate normalizaton against total page count, which was the cause of this bug. Misc: replace open-coded constants with common defines. Fixes: a91eb52abb50 ("qed: Revisit chain implementation") Signed-off-by: Alexander Lobakin Signed-off-by: Igor Russkikh Signed-off-by: Michal Kalderon Signed-off-by: David S. Miller --- include/linux/qed/qed_chain.h | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/include/linux/qed/qed_chain.h b/include/linux/qed/qed_chain.h index 733fad7dfbed..6d15040c642c 100644 --- a/include/linux/qed/qed_chain.h +++ b/include/linux/qed/qed_chain.h @@ -207,28 +207,34 @@ static inline u32 qed_chain_get_cons_idx_u32(struct qed_chain *p_chain) static inline u16 qed_chain_get_elem_left(struct qed_chain *p_chain) { + u16 elem_per_page = p_chain->elem_per_page; + u32 prod = p_chain->u.chain16.prod_idx; + u32 cons = p_chain->u.chain16.cons_idx; u16 used; - used = (u16) (((u32)0x10000 + - (u32)p_chain->u.chain16.prod_idx) - - (u32)p_chain->u.chain16.cons_idx); + if (prod < cons) + prod += (u32)U16_MAX + 1; + + used = (u16)(prod - cons); if (p_chain->mode == QED_CHAIN_MODE_NEXT_PTR) - used -= p_chain->u.chain16.prod_idx / p_chain->elem_per_page - - p_chain->u.chain16.cons_idx / p_chain->elem_per_page; + used -= prod / elem_per_page - cons / elem_per_page; return (u16)(p_chain->capacity - used); } static inline u32 qed_chain_get_elem_left_u32(struct qed_chain *p_chain) { + u16 elem_per_page = p_chain->elem_per_page; + u64 prod = p_chain->u.chain32.prod_idx; + u64 cons = p_chain->u.chain32.cons_idx; u32 used; - used = (u32) (((u64)0x100000000ULL + - (u64)p_chain->u.chain32.prod_idx) - - (u64)p_chain->u.chain32.cons_idx); + if (prod < cons) + prod += (u64)U32_MAX + 1; + + used = (u32)(prod - cons); if (p_chain->mode == QED_CHAIN_MODE_NEXT_PTR) - used -= p_chain->u.chain32.prod_idx / p_chain->elem_per_page - - p_chain->u.chain32.cons_idx / p_chain->elem_per_page; + used -= (u32)(prod / elem_per_page - cons / elem_per_page); return p_chain->capacity - used; } From 31333c1a2521ff4b4ceb0c29de492549cd4a8de3 Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Tue, 23 Jun 2020 16:51:30 +0300 Subject: [PATCH 386/462] net: qed: fix async event callbacks unregistering qed_spq_unregister_async_cb() should be called before qed_rdma_info_free() to avoid crash-spawning uses-after-free. Instead of calling it from each subsystem exit code, do it in one place on PF down. Fixes: 291d57f67d24 ("qed: Fix rdma_info structure allocation") Signed-off-by: Alexander Lobakin Signed-off-by: Igor Russkikh Signed-off-by: Michal Kalderon Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed_dev.c | 9 +++++++-- drivers/net/ethernet/qlogic/qed/qed_iwarp.c | 2 -- drivers/net/ethernet/qlogic/qed/qed_roce.c | 1 - 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_dev.c b/drivers/net/ethernet/qlogic/qed/qed_dev.c index 1eebf30fa798..b41ada668948 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_dev.c +++ b/drivers/net/ethernet/qlogic/qed/qed_dev.c @@ -1368,6 +1368,8 @@ static void qed_dbg_user_data_free(struct qed_hwfn *p_hwfn) void qed_resc_free(struct qed_dev *cdev) { + struct qed_rdma_info *rdma_info; + struct qed_hwfn *p_hwfn; int i; if (IS_VF(cdev)) { @@ -1385,7 +1387,8 @@ void qed_resc_free(struct qed_dev *cdev) qed_llh_free(cdev); for_each_hwfn(cdev, i) { - struct qed_hwfn *p_hwfn = &cdev->hwfns[i]; + p_hwfn = cdev->hwfns + i; + rdma_info = p_hwfn->p_rdma_info; qed_cxt_mngr_free(p_hwfn); qed_qm_info_free(p_hwfn); @@ -1404,8 +1407,10 @@ void qed_resc_free(struct qed_dev *cdev) qed_ooo_free(p_hwfn); } - if (QED_IS_RDMA_PERSONALITY(p_hwfn)) + if (QED_IS_RDMA_PERSONALITY(p_hwfn) && rdma_info) { + qed_spq_unregister_async_cb(p_hwfn, rdma_info->proto); qed_rdma_info_free(p_hwfn); + } qed_iov_free(p_hwfn); qed_l2_free(p_hwfn); diff --git a/drivers/net/ethernet/qlogic/qed/qed_iwarp.c b/drivers/net/ethernet/qlogic/qed/qed_iwarp.c index d2fe61a5cf56..5409a2da6106 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_iwarp.c +++ b/drivers/net/ethernet/qlogic/qed/qed_iwarp.c @@ -2836,8 +2836,6 @@ int qed_iwarp_stop(struct qed_hwfn *p_hwfn) if (rc) return rc; - qed_spq_unregister_async_cb(p_hwfn, PROTOCOLID_IWARP); - return qed_iwarp_ll2_stop(p_hwfn); } diff --git a/drivers/net/ethernet/qlogic/qed/qed_roce.c b/drivers/net/ethernet/qlogic/qed/qed_roce.c index 4566815f7b87..7271dd7166e5 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_roce.c +++ b/drivers/net/ethernet/qlogic/qed/qed_roce.c @@ -113,7 +113,6 @@ void qed_roce_stop(struct qed_hwfn *p_hwfn) break; } } - qed_spq_unregister_async_cb(p_hwfn, PROTOCOLID_ROCE); } static void qed_rdma_copy_gids(struct qed_rdma_qp *qp, __le32 *src_gid, From 4079c7f7a2a00ab403c177ce723b560de59139c3 Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Tue, 23 Jun 2020 16:51:31 +0300 Subject: [PATCH 387/462] net: qede: stop adding events on an already destroyed workqueue Set rdma_wq pointer to NULL after destroying the workqueue and check for it when adding new events to fix crashes on driver unload. Fixes: cee9fbd8e2e9 ("qede: Add qedr framework") Signed-off-by: Alexander Lobakin Signed-off-by: Igor Russkikh Signed-off-by: Michal Kalderon Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qede/qede_rdma.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/qlogic/qede/qede_rdma.c b/drivers/net/ethernet/qlogic/qede/qede_rdma.c index 2d873ae8a234..668ccc9d49f8 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_rdma.c +++ b/drivers/net/ethernet/qlogic/qede/qede_rdma.c @@ -105,6 +105,7 @@ static void qede_rdma_destroy_wq(struct qede_dev *edev) qede_rdma_cleanup_event(edev); destroy_workqueue(edev->rdma_info.rdma_wq); + edev->rdma_info.rdma_wq = NULL; } int qede_rdma_dev_add(struct qede_dev *edev, bool recovery) @@ -325,7 +326,7 @@ static void qede_rdma_add_event(struct qede_dev *edev, if (edev->rdma_info.exp_recovery) return; - if (!edev->rdma_info.qedr_dev) + if (!edev->rdma_info.qedr_dev || !edev->rdma_info.rdma_wq) return; /* We don't want the cleanup flow to start while we're allocating and From ccd7c7ce167a21dbf2b698ffcf00f11d96d44f9b Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Tue, 23 Jun 2020 16:51:32 +0300 Subject: [PATCH 388/462] net: qed: fix NVMe login fails over VFs 25ms sleep cycles in waiting for PF response are excessive and may lead to different timeout failures. Start to wait with short udelays, and in most cases polling will end here. If the time was not sufficient, switch to msleeps. usleep_range() may go far beyond 100us depending on platform and tick configuration, hence atomic udelays for consistency. Also add explicit DMA barriers since 'done' always comes from a shared request-response DMA pool, and note that in the comment nearby. Fixes: 1408cc1fa48c ("qed: Introduce VFs") Signed-off-by: Alexander Lobakin Signed-off-by: Igor Russkikh Signed-off-by: Michal Kalderon Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed_vf.c | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_vf.c b/drivers/net/ethernet/qlogic/qed/qed_vf.c index 856051f50eb7..adc2c8f3d48e 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_vf.c +++ b/drivers/net/ethernet/qlogic/qed/qed_vf.c @@ -81,12 +81,17 @@ static void qed_vf_pf_req_end(struct qed_hwfn *p_hwfn, int req_status) mutex_unlock(&(p_hwfn->vf_iov_info->mutex)); } +#define QED_VF_CHANNEL_USLEEP_ITERATIONS 90 +#define QED_VF_CHANNEL_USLEEP_DELAY 100 +#define QED_VF_CHANNEL_MSLEEP_ITERATIONS 10 +#define QED_VF_CHANNEL_MSLEEP_DELAY 25 + static int qed_send_msg2pf(struct qed_hwfn *p_hwfn, u8 *done, u32 resp_size) { union vfpf_tlvs *p_req = p_hwfn->vf_iov_info->vf2pf_request; struct ustorm_trigger_vf_zone trigger; struct ustorm_vf_zone *zone_data; - int rc = 0, time = 100; + int iter, rc = 0; zone_data = (struct ustorm_vf_zone *)PXP_VF_BAR0_START_USDM_ZONE_B; @@ -126,11 +131,19 @@ static int qed_send_msg2pf(struct qed_hwfn *p_hwfn, u8 *done, u32 resp_size) REG_WR(p_hwfn, (uintptr_t)&zone_data->trigger, *((u32 *)&trigger)); /* When PF would be done with the response, it would write back to the - * `done' address. Poll until then. + * `done' address from a coherent DMA zone. Poll until then. */ - while ((!*done) && time) { - msleep(25); - time--; + + iter = QED_VF_CHANNEL_USLEEP_ITERATIONS; + while (!*done && iter--) { + udelay(QED_VF_CHANNEL_USLEEP_DELAY); + dma_rmb(); + } + + iter = QED_VF_CHANNEL_MSLEEP_ITERATIONS; + while (!*done && iter--) { + msleep(QED_VF_CHANNEL_MSLEEP_DELAY); + dma_rmb(); } if (!*done) { From d434d02f7e7c24c721365fd594ed781acb18e0da Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Tue, 23 Jun 2020 16:51:33 +0300 Subject: [PATCH 389/462] net: qed: fix excessive QM ILT lines consumption This is likely a copy'n'paste mistake. The amount of ILT lines to reserve for a single VF was being multiplied by the total VFs count. This led to a huge redundancy in reservation and potential lines drainouts. Fixes: 1408cc1fa48c ("qed: Introduce VFs") Signed-off-by: Alexander Lobakin Signed-off-by: Igor Russkikh Signed-off-by: Michal Kalderon Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed_cxt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_cxt.c b/drivers/net/ethernet/qlogic/qed/qed_cxt.c index 7b76667acaba..c0a769b5358c 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_cxt.c +++ b/drivers/net/ethernet/qlogic/qed/qed_cxt.c @@ -271,7 +271,7 @@ static void qed_cxt_qm_iids(struct qed_hwfn *p_hwfn, vf_tids += segs[NUM_TASK_PF_SEGMENTS].count; } - iids->vf_cids += vf_cids * p_mngr->vf_count; + iids->vf_cids = vf_cids; iids->tids += vf_tids * p_mngr->vf_count; DP_VERBOSE(p_hwfn, QED_MSG_ILT, From 1c85f394c2206ea3835f43534d5675f0574e1b70 Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Tue, 23 Jun 2020 16:51:34 +0300 Subject: [PATCH 390/462] net: qede: fix PTP initialization on recovery Currently PTP cyclecounter and timecounter are initialized only on the first probing and are cleaned up during removal. This means that PTP becomes non-functional after device recovery. Fix this by unconditional PTP initialization on probing and clearing Tx pending bit on exiting. Fixes: ccc67ef50b90 ("qede: Error recovery process") Signed-off-by: Alexander Lobakin Signed-off-by: Igor Russkikh Signed-off-by: Michal Kalderon Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qede/qede_main.c | 2 +- drivers/net/ethernet/qlogic/qede/qede_ptp.c | 29 ++++++++------------ drivers/net/ethernet/qlogic/qede/qede_ptp.h | 2 +- 3 files changed, 14 insertions(+), 19 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qede/qede_main.c b/drivers/net/ethernet/qlogic/qede/qede_main.c index 756c05eb96f3..f6ff31e73ebe 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_main.c +++ b/drivers/net/ethernet/qlogic/qede/qede_main.c @@ -1229,7 +1229,7 @@ static int __qede_probe(struct pci_dev *pdev, u32 dp_module, u8 dp_level, /* PTP not supported on VFs */ if (!is_vf) - qede_ptp_enable(edev, (mode == QEDE_PROBE_NORMAL)); + qede_ptp_enable(edev); edev->ops->register_ops(cdev, &qede_ll_ops, edev); diff --git a/drivers/net/ethernet/qlogic/qede/qede_ptp.c b/drivers/net/ethernet/qlogic/qede/qede_ptp.c index 4c7f7a7fc151..cd5841a9415e 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_ptp.c +++ b/drivers/net/ethernet/qlogic/qede/qede_ptp.c @@ -412,6 +412,7 @@ void qede_ptp_disable(struct qede_dev *edev) if (ptp->tx_skb) { dev_kfree_skb_any(ptp->tx_skb); ptp->tx_skb = NULL; + clear_bit_unlock(QEDE_FLAGS_PTP_TX_IN_PRORGESS, &edev->flags); } /* Disable PTP in HW */ @@ -423,7 +424,7 @@ void qede_ptp_disable(struct qede_dev *edev) edev->ptp = NULL; } -static int qede_ptp_init(struct qede_dev *edev, bool init_tc) +static int qede_ptp_init(struct qede_dev *edev) { struct qede_ptp *ptp; int rc; @@ -444,25 +445,19 @@ static int qede_ptp_init(struct qede_dev *edev, bool init_tc) /* Init work queue for Tx timestamping */ INIT_WORK(&ptp->work, qede_ptp_task); - /* Init cyclecounter and timecounter. This is done only in the first - * load. If done in every load, PTP application will fail when doing - * unload / load (e.g. MTU change) while it is running. - */ - if (init_tc) { - memset(&ptp->cc, 0, sizeof(ptp->cc)); - ptp->cc.read = qede_ptp_read_cc; - ptp->cc.mask = CYCLECOUNTER_MASK(64); - ptp->cc.shift = 0; - ptp->cc.mult = 1; + /* Init cyclecounter and timecounter */ + memset(&ptp->cc, 0, sizeof(ptp->cc)); + ptp->cc.read = qede_ptp_read_cc; + ptp->cc.mask = CYCLECOUNTER_MASK(64); + ptp->cc.shift = 0; + ptp->cc.mult = 1; - timecounter_init(&ptp->tc, &ptp->cc, - ktime_to_ns(ktime_get_real())); - } + timecounter_init(&ptp->tc, &ptp->cc, ktime_to_ns(ktime_get_real())); - return rc; + return 0; } -int qede_ptp_enable(struct qede_dev *edev, bool init_tc) +int qede_ptp_enable(struct qede_dev *edev) { struct qede_ptp *ptp; int rc; @@ -483,7 +478,7 @@ int qede_ptp_enable(struct qede_dev *edev, bool init_tc) edev->ptp = ptp; - rc = qede_ptp_init(edev, init_tc); + rc = qede_ptp_init(edev); if (rc) goto err1; diff --git a/drivers/net/ethernet/qlogic/qede/qede_ptp.h b/drivers/net/ethernet/qlogic/qede/qede_ptp.h index 691a14c4b2c5..89c7f3cf3ee2 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_ptp.h +++ b/drivers/net/ethernet/qlogic/qede/qede_ptp.h @@ -41,7 +41,7 @@ void qede_ptp_rx_ts(struct qede_dev *edev, struct sk_buff *skb); void qede_ptp_tx_ts(struct qede_dev *edev, struct sk_buff *skb); int qede_ptp_hw_ts(struct qede_dev *edev, struct ifreq *req); void qede_ptp_disable(struct qede_dev *edev); -int qede_ptp_enable(struct qede_dev *edev, bool init_tc); +int qede_ptp_enable(struct qede_dev *edev); int qede_ptp_get_ts_info(struct qede_dev *edev, struct ethtool_ts_info *ts); static inline void qede_ptp_record_rx_ts(struct qede_dev *edev, From ec6c80590bde6b5dfa4970fffa3572f1acd313ca Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Tue, 23 Jun 2020 16:51:35 +0300 Subject: [PATCH 391/462] net: qede: fix use-after-free on recovery and AER handling Set edev->cdev pointer to NULL after calling remove() callback to avoid using of already freed object. Fixes: ccc67ef50b90 ("qede: Error recovery process") Signed-off-by: Alexander Lobakin Signed-off-by: Igor Russkikh Signed-off-by: Michal Kalderon Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qede/qede_main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/qlogic/qede/qede_main.c b/drivers/net/ethernet/qlogic/qede/qede_main.c index f6ff31e73ebe..29e285430f99 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_main.c +++ b/drivers/net/ethernet/qlogic/qede/qede_main.c @@ -1318,6 +1318,7 @@ static void __qede_remove(struct pci_dev *pdev, enum qede_remove_mode mode) if (system_state == SYSTEM_POWER_OFF) return; qed_ops->common->remove(cdev); + edev->cdev = NULL; /* Since this can happen out-of-sync with other flows, * don't release the netdevice until after slowpath stop From c221dd1831732449f844e03631a34fd21c2b9429 Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Tue, 23 Jun 2020 16:51:36 +0300 Subject: [PATCH 392/462] net: qed: reset ILT block sizes before recomputing to fix crashes Sizes of all ILT blocks must be reset before ILT recomputing when disabling clients, or memory allocation may exceed ILT shadow array and provoke system crashes. Fixes: 1408cc1fa48c ("qed: Introduce VFs") Signed-off-by: Alexander Lobakin Signed-off-by: Igor Russkikh Signed-off-by: Michal Kalderon Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed_cxt.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/drivers/net/ethernet/qlogic/qed/qed_cxt.c b/drivers/net/ethernet/qlogic/qed/qed_cxt.c index c0a769b5358c..08ba9d54ab63 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_cxt.c +++ b/drivers/net/ethernet/qlogic/qed/qed_cxt.c @@ -465,6 +465,20 @@ static struct qed_ilt_cli_blk *qed_cxt_set_blk(struct qed_ilt_cli_blk *p_blk) return p_blk; } +static void qed_cxt_ilt_blk_reset(struct qed_hwfn *p_hwfn) +{ + struct qed_ilt_client_cfg *clients = p_hwfn->p_cxt_mngr->clients; + u32 cli_idx, blk_idx; + + for (cli_idx = 0; cli_idx < MAX_ILT_CLIENTS; cli_idx++) { + for (blk_idx = 0; blk_idx < ILT_CLI_PF_BLOCKS; blk_idx++) + clients[cli_idx].pf_blks[blk_idx].total_size = 0; + + for (blk_idx = 0; blk_idx < ILT_CLI_VF_BLOCKS; blk_idx++) + clients[cli_idx].vf_blks[blk_idx].total_size = 0; + } +} + int qed_cxt_cfg_ilt_compute(struct qed_hwfn *p_hwfn, u32 *line_count) { struct qed_cxt_mngr *p_mngr = p_hwfn->p_cxt_mngr; @@ -484,6 +498,11 @@ int qed_cxt_cfg_ilt_compute(struct qed_hwfn *p_hwfn, u32 *line_count) p_mngr->pf_start_line = RESC_START(p_hwfn, QED_ILT); + /* Reset all ILT blocks at the beginning of ILT computing in order + * to prevent memory allocation for irrelevant blocks afterwards. + */ + qed_cxt_ilt_blk_reset(p_hwfn); + DP_VERBOSE(p_hwfn, QED_MSG_ILT, "hwfn [%d] - Set context manager starting line to be 0x%08x\n", p_hwfn->my_id, p_hwfn->p_cxt_mngr->pf_start_line); From 10f468ea5c481b3a60cc291c4dfdc7bb338abb74 Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Tue, 23 Jun 2020 16:51:37 +0300 Subject: [PATCH 393/462] net: qed: fix "maybe uninitialized" warning Variable 'abs_ppfid' in qed_dev.c:qed_llh_add_mac_filter() always gets printed, but is initialized only under 'ref_cnt == 1' condition. This results in: In file included from ./include/linux/kernel.h:15:0, from ./include/asm-generic/bug.h:19, from ./arch/x86/include/asm/bug.h:86, from ./include/linux/bug.h:5, from ./include/linux/io.h:11, from drivers/net/ethernet/qlogic/qed/qed_dev.c:35: drivers/net/ethernet/qlogic/qed/qed_dev.c: In function 'qed_llh_add_mac_filter': ./include/linux/printk.h:358:2: warning: 'abs_ppfid' may be used uninitialized in this function [-Wmaybe-uninitialized] printk(KERN_NOTICE pr_fmt(fmt), ##__VA_ARGS__) ^~~~~~ drivers/net/ethernet/qlogic/qed/qed_dev.c:983:17: note: 'abs_ppfid' was declared here u8 filter_idx, abs_ppfid; ^~~~~~~~~ ...under W=1+. Fix this by initializing it with zero. Fixes: 79284adeb99e ("qed: Add llh ppfid interface and 100g support for offload protocols") Signed-off-by: Alexander Lobakin Signed-off-by: Igor Russkikh Signed-off-by: Michal Kalderon Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed_dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_dev.c b/drivers/net/ethernet/qlogic/qed/qed_dev.c index b41ada668948..3aa51374e727 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_dev.c +++ b/drivers/net/ethernet/qlogic/qed/qed_dev.c @@ -980,7 +980,7 @@ int qed_llh_add_mac_filter(struct qed_dev *cdev, struct qed_hwfn *p_hwfn = QED_LEADING_HWFN(cdev); struct qed_ptt *p_ptt = qed_ptt_acquire(p_hwfn); union qed_llh_filter filter = {}; - u8 filter_idx, abs_ppfid; + u8 filter_idx, abs_ppfid = 0; u32 high, low, ref_cnt; int rc = 0; From 11d8cd5c9f3b46f397f889cefdb66795518aaebd Mon Sep 17 00:00:00 2001 From: Rahul Lakkireddy Date: Wed, 24 Jun 2020 01:51:31 +0530 Subject: [PATCH 394/462] cxgb4: move handling L2T ARP failures to caller Move code handling L2T ARP failures to the only caller. Fixes following sparse warning: skbuff.h:2091:29: warning: context imbalance in 'handle_failed_resolution' - unexpected unlock Fixes: 749cb5fe48bb ("cxgb4: Replace arpq_head/arpq_tail with SKB double link-list code") Signed-off-by: Rahul Lakkireddy Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/cxgb4/l2t.c | 52 +++++++++++------------- 1 file changed, 24 insertions(+), 28 deletions(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4/l2t.c b/drivers/net/ethernet/chelsio/cxgb4/l2t.c index 72b37a66c7d8..0ed20a9cca14 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/l2t.c +++ b/drivers/net/ethernet/chelsio/cxgb4/l2t.c @@ -502,41 +502,20 @@ u64 cxgb4_select_ntuple(struct net_device *dev, } EXPORT_SYMBOL(cxgb4_select_ntuple); -/* - * Called when address resolution fails for an L2T entry to handle packets - * on the arpq head. If a packet specifies a failure handler it is invoked, - * otherwise the packet is sent to the device. - */ -static void handle_failed_resolution(struct adapter *adap, struct l2t_entry *e) -{ - struct sk_buff *skb; - - while ((skb = __skb_dequeue(&e->arpq)) != NULL) { - const struct l2t_skb_cb *cb = L2T_SKB_CB(skb); - - spin_unlock(&e->lock); - if (cb->arp_err_handler) - cb->arp_err_handler(cb->handle, skb); - else - t4_ofld_send(adap, skb); - spin_lock(&e->lock); - } -} - /* * Called when the host's neighbor layer makes a change to some entry that is * loaded into the HW L2 table. */ void t4_l2t_update(struct adapter *adap, struct neighbour *neigh) { - struct l2t_entry *e; - struct sk_buff_head *arpq = NULL; - struct l2t_data *d = adap->l2t; unsigned int addr_len = neigh->tbl->key_len; u32 *addr = (u32 *) neigh->primary_key; - int ifidx = neigh->dev->ifindex; - int hash = addr_hash(d, addr, addr_len, ifidx); + int hash, ifidx = neigh->dev->ifindex; + struct sk_buff_head *arpq = NULL; + struct l2t_data *d = adap->l2t; + struct l2t_entry *e; + hash = addr_hash(d, addr, addr_len, ifidx); read_lock_bh(&d->lock); for (e = d->l2tab[hash].first; e; e = e->next) if (!addreq(e, addr) && e->ifindex == ifidx) { @@ -569,8 +548,25 @@ void t4_l2t_update(struct adapter *adap, struct neighbour *neigh) write_l2e(adap, e, 0); } - if (arpq) - handle_failed_resolution(adap, e); + if (arpq) { + struct sk_buff *skb; + + /* Called when address resolution fails for an L2T + * entry to handle packets on the arpq head. If a + * packet specifies a failure handler it is invoked, + * otherwise the packet is sent to the device. + */ + while ((skb = __skb_dequeue(&e->arpq)) != NULL) { + const struct l2t_skb_cb *cb = L2T_SKB_CB(skb); + + spin_unlock(&e->lock); + if (cb->arp_err_handler) + cb->arp_err_handler(cb->handle, skb); + else + t4_ofld_send(adap, skb); + spin_lock(&e->lock); + } + } spin_unlock_bh(&e->lock); } From 030c98824deaba205787af501a074b3d7f46e32e Mon Sep 17 00:00:00 2001 From: Rahul Lakkireddy Date: Wed, 24 Jun 2020 01:51:32 +0530 Subject: [PATCH 395/462] cxgb4: move PTP lock and unlock to caller in Tx path Check for whether PTP is enabled or not at the caller and perform locking/unlocking at the caller. Fixes following sparse warning: sge.c:1641:26: warning: context imbalance in 'cxgb4_eth_xmit' - different lock contexts for basic block Fixes: a456950445a0 ("cxgb4: time stamping interface for PTP") Signed-off-by: Rahul Lakkireddy Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/cxgb4/sge.c | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4/sge.c b/drivers/net/ethernet/chelsio/cxgb4/sge.c index 1359158652b7..f7b5a2f11001 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/sge.c +++ b/drivers/net/ethernet/chelsio/cxgb4/sge.c @@ -1425,12 +1425,10 @@ static netdev_tx_t cxgb4_eth_xmit(struct sk_buff *skb, struct net_device *dev) qidx = skb_get_queue_mapping(skb); if (ptp_enabled) { - spin_lock(&adap->ptp_lock); if (!(adap->ptp_tx_skb)) { skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS; adap->ptp_tx_skb = skb_get(skb); } else { - spin_unlock(&adap->ptp_lock); goto out_free; } q = &adap->sge.ptptxq; @@ -1444,11 +1442,8 @@ static netdev_tx_t cxgb4_eth_xmit(struct sk_buff *skb, struct net_device *dev) #ifdef CONFIG_CHELSIO_T4_FCOE ret = cxgb_fcoe_offload(skb, adap, pi, &cntrl); - if (unlikely(ret == -ENOTSUPP)) { - if (ptp_enabled) - spin_unlock(&adap->ptp_lock); + if (unlikely(ret == -EOPNOTSUPP)) goto out_free; - } #endif /* CONFIG_CHELSIO_T4_FCOE */ chip_ver = CHELSIO_CHIP_VERSION(adap->params.chip); @@ -1461,8 +1456,6 @@ static netdev_tx_t cxgb4_eth_xmit(struct sk_buff *skb, struct net_device *dev) dev_err(adap->pdev_dev, "%s: Tx ring %u full while queue awake!\n", dev->name, qidx); - if (ptp_enabled) - spin_unlock(&adap->ptp_lock); return NETDEV_TX_BUSY; } @@ -1481,8 +1474,6 @@ static netdev_tx_t cxgb4_eth_xmit(struct sk_buff *skb, struct net_device *dev) unlikely(cxgb4_map_skb(adap->pdev_dev, skb, sgl_sdesc->addr) < 0)) { memset(sgl_sdesc->addr, 0, sizeof(sgl_sdesc->addr)); q->mapping_err++; - if (ptp_enabled) - spin_unlock(&adap->ptp_lock); goto out_free; } @@ -1630,8 +1621,6 @@ static netdev_tx_t cxgb4_eth_xmit(struct sk_buff *skb, struct net_device *dev) txq_advance(&q->q, ndesc); cxgb4_ring_tx_db(adap, &q->q, ndesc); - if (ptp_enabled) - spin_unlock(&adap->ptp_lock); return NETDEV_TX_OK; out_free: @@ -2377,6 +2366,16 @@ netdev_tx_t t4_start_xmit(struct sk_buff *skb, struct net_device *dev) if (unlikely(qid >= pi->nqsets)) return cxgb4_ethofld_xmit(skb, dev); + if (is_ptp_enabled(skb, dev)) { + struct adapter *adap = netdev2adap(dev); + netdev_tx_t ret; + + spin_lock(&adap->ptp_lock); + ret = cxgb4_eth_xmit(skb, dev); + spin_unlock(&adap->ptp_lock); + return ret; + } + return cxgb4_eth_xmit(skb, dev); } From 589b1c9c166dce120e27b32a83a78f55464a7ef9 Mon Sep 17 00:00:00 2001 From: Rahul Lakkireddy Date: Wed, 24 Jun 2020 01:51:33 +0530 Subject: [PATCH 396/462] cxgb4: use unaligned conversion for fetching timestamp Use get_unaligned_be64() to fetch the timestamp needed for ns_to_ktime() conversion. Fixes following sparse warning: sge.c:3282:43: warning: cast to restricted __be64 Fixes: a456950445a0 ("cxgb4: time stamping interface for PTP") Signed-off-by: Rahul Lakkireddy Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/cxgb4/sge.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4/sge.c b/drivers/net/ethernet/chelsio/cxgb4/sge.c index f7b5a2f11001..3c8b4b153ec4 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/sge.c +++ b/drivers/net/ethernet/chelsio/cxgb4/sge.c @@ -3312,7 +3312,7 @@ static noinline int t4_systim_to_hwstamp(struct adapter *adapter, hwtstamps = skb_hwtstamps(skb); memset(hwtstamps, 0, sizeof(*hwtstamps)); - hwtstamps->hwtstamp = ns_to_ktime(be64_to_cpu(*((u64 *)data))); + hwtstamps->hwtstamp = ns_to_ktime(get_unaligned_be64(data)); return RX_PTP_PKT_SUC; } From 27f78cb245abdb86735529c13b0a579f57829e71 Mon Sep 17 00:00:00 2001 From: Rahul Lakkireddy Date: Wed, 24 Jun 2020 01:51:34 +0530 Subject: [PATCH 397/462] cxgb4: parse TC-U32 key values and masks natively TC-U32 passes all keys values and masks in __be32 format. The parser already expects this and hence pass the value and masks in __be32 natively to the parser. Fixes following sparse warnings in several places: cxgb4_tc_u32.c:57:21: warning: incorrect type in assignment (different base types) cxgb4_tc_u32.c:57:21: expected unsigned int [usertype] val cxgb4_tc_u32.c:57:21: got restricted __be32 [usertype] val cxgb4_tc_u32_parse.h:48:24: warning: cast to restricted __be32 Fixes: 2e8aad7bf203 ("cxgb4: add parser to translate u32 filters to internal spec") Signed-off-by: Rahul Lakkireddy Signed-off-by: David S. Miller --- .../net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.c | 18 +-- .../chelsio/cxgb4/cxgb4_tc_u32_parse.h | 122 ++++++++++++------ 2 files changed, 91 insertions(+), 49 deletions(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.c index 3f3c11e54d97..dede02505ceb 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.c @@ -48,7 +48,7 @@ static int fill_match_fields(struct adapter *adap, bool next_header) { unsigned int i, j; - u32 val, mask; + __be32 val, mask; int off, err; bool found; @@ -228,7 +228,7 @@ int cxgb4_config_knode(struct net_device *dev, struct tc_cls_u32_offload *cls) const struct cxgb4_next_header *next; bool found = false; unsigned int i, j; - u32 val, mask; + __be32 val, mask; int off; if (t->table[link_uhtid - 1].link_handle) { @@ -242,10 +242,10 @@ int cxgb4_config_knode(struct net_device *dev, struct tc_cls_u32_offload *cls) /* Try to find matches that allow jumps to next header. */ for (i = 0; next[i].jump; i++) { - if (next[i].offoff != cls->knode.sel->offoff || - next[i].shift != cls->knode.sel->offshift || - next[i].mask != cls->knode.sel->offmask || - next[i].offset != cls->knode.sel->off) + if (next[i].sel.offoff != cls->knode.sel->offoff || + next[i].sel.offshift != cls->knode.sel->offshift || + next[i].sel.offmask != cls->knode.sel->offmask || + next[i].sel.off != cls->knode.sel->off) continue; /* Found a possible candidate. Find a key that @@ -257,9 +257,9 @@ int cxgb4_config_knode(struct net_device *dev, struct tc_cls_u32_offload *cls) val = cls->knode.sel->keys[j].val; mask = cls->knode.sel->keys[j].mask; - if (next[i].match_off == off && - next[i].match_val == val && - next[i].match_mask == mask) { + if (next[i].key.off == off && + next[i].key.val == val && + next[i].key.mask == mask) { found = true; break; } diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32_parse.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32_parse.h index 125868c6770a..f59dd4b2ae6f 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32_parse.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32_parse.h @@ -38,12 +38,12 @@ struct cxgb4_match_field { int off; /* Offset from the beginning of the header to match */ /* Fill the value/mask pair in the spec if matched */ - int (*val)(struct ch_filter_specification *f, u32 val, u32 mask); + int (*val)(struct ch_filter_specification *f, __be32 val, __be32 mask); }; /* IPv4 match fields */ static inline int cxgb4_fill_ipv4_tos(struct ch_filter_specification *f, - u32 val, u32 mask) + __be32 val, __be32 mask) { f->val.tos = (ntohl(val) >> 16) & 0x000000FF; f->mask.tos = (ntohl(mask) >> 16) & 0x000000FF; @@ -52,7 +52,7 @@ static inline int cxgb4_fill_ipv4_tos(struct ch_filter_specification *f, } static inline int cxgb4_fill_ipv4_frag(struct ch_filter_specification *f, - u32 val, u32 mask) + __be32 val, __be32 mask) { u32 mask_val; u8 frag_val; @@ -74,7 +74,7 @@ static inline int cxgb4_fill_ipv4_frag(struct ch_filter_specification *f, } static inline int cxgb4_fill_ipv4_proto(struct ch_filter_specification *f, - u32 val, u32 mask) + __be32 val, __be32 mask) { f->val.proto = (ntohl(val) >> 16) & 0x000000FF; f->mask.proto = (ntohl(mask) >> 16) & 0x000000FF; @@ -83,7 +83,7 @@ static inline int cxgb4_fill_ipv4_proto(struct ch_filter_specification *f, } static inline int cxgb4_fill_ipv4_src_ip(struct ch_filter_specification *f, - u32 val, u32 mask) + __be32 val, __be32 mask) { memcpy(&f->val.fip[0], &val, sizeof(u32)); memcpy(&f->mask.fip[0], &mask, sizeof(u32)); @@ -92,7 +92,7 @@ static inline int cxgb4_fill_ipv4_src_ip(struct ch_filter_specification *f, } static inline int cxgb4_fill_ipv4_dst_ip(struct ch_filter_specification *f, - u32 val, u32 mask) + __be32 val, __be32 mask) { memcpy(&f->val.lip[0], &val, sizeof(u32)); memcpy(&f->mask.lip[0], &mask, sizeof(u32)); @@ -111,7 +111,7 @@ static const struct cxgb4_match_field cxgb4_ipv4_fields[] = { /* IPv6 match fields */ static inline int cxgb4_fill_ipv6_tos(struct ch_filter_specification *f, - u32 val, u32 mask) + __be32 val, __be32 mask) { f->val.tos = (ntohl(val) >> 20) & 0x000000FF; f->mask.tos = (ntohl(mask) >> 20) & 0x000000FF; @@ -120,7 +120,7 @@ static inline int cxgb4_fill_ipv6_tos(struct ch_filter_specification *f, } static inline int cxgb4_fill_ipv6_proto(struct ch_filter_specification *f, - u32 val, u32 mask) + __be32 val, __be32 mask) { f->val.proto = (ntohl(val) >> 8) & 0x000000FF; f->mask.proto = (ntohl(mask) >> 8) & 0x000000FF; @@ -129,7 +129,7 @@ static inline int cxgb4_fill_ipv6_proto(struct ch_filter_specification *f, } static inline int cxgb4_fill_ipv6_src_ip0(struct ch_filter_specification *f, - u32 val, u32 mask) + __be32 val, __be32 mask) { memcpy(&f->val.fip[0], &val, sizeof(u32)); memcpy(&f->mask.fip[0], &mask, sizeof(u32)); @@ -138,7 +138,7 @@ static inline int cxgb4_fill_ipv6_src_ip0(struct ch_filter_specification *f, } static inline int cxgb4_fill_ipv6_src_ip1(struct ch_filter_specification *f, - u32 val, u32 mask) + __be32 val, __be32 mask) { memcpy(&f->val.fip[4], &val, sizeof(u32)); memcpy(&f->mask.fip[4], &mask, sizeof(u32)); @@ -147,7 +147,7 @@ static inline int cxgb4_fill_ipv6_src_ip1(struct ch_filter_specification *f, } static inline int cxgb4_fill_ipv6_src_ip2(struct ch_filter_specification *f, - u32 val, u32 mask) + __be32 val, __be32 mask) { memcpy(&f->val.fip[8], &val, sizeof(u32)); memcpy(&f->mask.fip[8], &mask, sizeof(u32)); @@ -156,7 +156,7 @@ static inline int cxgb4_fill_ipv6_src_ip2(struct ch_filter_specification *f, } static inline int cxgb4_fill_ipv6_src_ip3(struct ch_filter_specification *f, - u32 val, u32 mask) + __be32 val, __be32 mask) { memcpy(&f->val.fip[12], &val, sizeof(u32)); memcpy(&f->mask.fip[12], &mask, sizeof(u32)); @@ -165,7 +165,7 @@ static inline int cxgb4_fill_ipv6_src_ip3(struct ch_filter_specification *f, } static inline int cxgb4_fill_ipv6_dst_ip0(struct ch_filter_specification *f, - u32 val, u32 mask) + __be32 val, __be32 mask) { memcpy(&f->val.lip[0], &val, sizeof(u32)); memcpy(&f->mask.lip[0], &mask, sizeof(u32)); @@ -174,7 +174,7 @@ static inline int cxgb4_fill_ipv6_dst_ip0(struct ch_filter_specification *f, } static inline int cxgb4_fill_ipv6_dst_ip1(struct ch_filter_specification *f, - u32 val, u32 mask) + __be32 val, __be32 mask) { memcpy(&f->val.lip[4], &val, sizeof(u32)); memcpy(&f->mask.lip[4], &mask, sizeof(u32)); @@ -183,7 +183,7 @@ static inline int cxgb4_fill_ipv6_dst_ip1(struct ch_filter_specification *f, } static inline int cxgb4_fill_ipv6_dst_ip2(struct ch_filter_specification *f, - u32 val, u32 mask) + __be32 val, __be32 mask) { memcpy(&f->val.lip[8], &val, sizeof(u32)); memcpy(&f->mask.lip[8], &mask, sizeof(u32)); @@ -192,7 +192,7 @@ static inline int cxgb4_fill_ipv6_dst_ip2(struct ch_filter_specification *f, } static inline int cxgb4_fill_ipv6_dst_ip3(struct ch_filter_specification *f, - u32 val, u32 mask) + __be32 val, __be32 mask) { memcpy(&f->val.lip[12], &val, sizeof(u32)); memcpy(&f->mask.lip[12], &mask, sizeof(u32)); @@ -216,7 +216,7 @@ static const struct cxgb4_match_field cxgb4_ipv6_fields[] = { /* TCP/UDP match */ static inline int cxgb4_fill_l4_ports(struct ch_filter_specification *f, - u32 val, u32 mask) + __be32 val, __be32 mask) { f->val.fport = ntohl(val) >> 16; f->mask.fport = ntohl(mask) >> 16; @@ -237,19 +237,13 @@ static const struct cxgb4_match_field cxgb4_udp_fields[] = { }; struct cxgb4_next_header { - unsigned int offset; /* Offset to next header */ - /* offset, shift, and mask added to offset above + /* Offset, shift, and mask added to beginning of the header * to get to next header. Useful when using a header * field's value to jump to next header such as IHL field * in IPv4 header. */ - unsigned int offoff; - u32 shift; - u32 mask; - /* match criteria to make this jump */ - unsigned int match_off; - u32 match_val; - u32 match_mask; + struct tc_u32_sel sel; + struct tc_u32_key key; /* location of jump to make */ const struct cxgb4_match_field *jump; }; @@ -258,26 +252,74 @@ struct cxgb4_next_header { * IPv4 header. */ static const struct cxgb4_next_header cxgb4_ipv4_jumps[] = { - { .offset = 0, .offoff = 0, .shift = 6, .mask = 0xF, - .match_off = 8, .match_val = 0x600, .match_mask = 0xFF00, - .jump = cxgb4_tcp_fields }, - { .offset = 0, .offoff = 0, .shift = 6, .mask = 0xF, - .match_off = 8, .match_val = 0x1100, .match_mask = 0xFF00, - .jump = cxgb4_udp_fields }, - { .jump = NULL } + { + /* TCP Jump */ + .sel = { + .off = 0, + .offoff = 0, + .offshift = 6, + .offmask = cpu_to_be16(0x0f00), + }, + .key = { + .off = 8, + .val = cpu_to_be32(0x00060000), + .mask = cpu_to_be32(0x00ff0000), + }, + .jump = cxgb4_tcp_fields, + }, + { + /* UDP Jump */ + .sel = { + .off = 0, + .offoff = 0, + .offshift = 6, + .offmask = cpu_to_be16(0x0f00), + }, + .key = { + .off = 8, + .val = cpu_to_be32(0x00110000), + .mask = cpu_to_be32(0x00ff0000), + }, + .jump = cxgb4_udp_fields, + }, + { .jump = NULL }, }; /* Accept a rule with a jump directly past the 40 Bytes of IPv6 fixed header * to get to transport layer header. */ static const struct cxgb4_next_header cxgb4_ipv6_jumps[] = { - { .offset = 0x28, .offoff = 0, .shift = 0, .mask = 0, - .match_off = 4, .match_val = 0x60000, .match_mask = 0xFF0000, - .jump = cxgb4_tcp_fields }, - { .offset = 0x28, .offoff = 0, .shift = 0, .mask = 0, - .match_off = 4, .match_val = 0x110000, .match_mask = 0xFF0000, - .jump = cxgb4_udp_fields }, - { .jump = NULL } + { + /* TCP Jump */ + .sel = { + .off = 40, + .offoff = 0, + .offshift = 0, + .offmask = 0, + }, + .key = { + .off = 4, + .val = cpu_to_be32(0x00000600), + .mask = cpu_to_be32(0x0000ff00), + }, + .jump = cxgb4_tcp_fields, + }, + { + /* UDP Jump */ + .sel = { + .off = 40, + .offoff = 0, + .offshift = 0, + .offmask = 0, + }, + .key = { + .off = 4, + .val = cpu_to_be32(0x00001100), + .mask = cpu_to_be32(0x0000ff00), + }, + .jump = cxgb4_udp_fields, + }, + { .jump = NULL }, }; struct cxgb4_link { From 63b53b0b99cd5f2d9754a21eda2ed8e706646cc9 Mon Sep 17 00:00:00 2001 From: Rahul Lakkireddy Date: Wed, 24 Jun 2020 01:51:35 +0530 Subject: [PATCH 398/462] cxgb4: fix endian conversions for L4 ports in filters The source and destination L4 ports in filter offload need to be in CPU endian. They will finally be converted to Big Endian after all operations are done and before giving them to hardware. The L4 ports for NAT are expected to be passed as a byte stream TCB. So, treat them as such. Fixes following sparse warnings in several places: cxgb4_tc_flower.c:159:33: warning: cast from restricted __be16 cxgb4_tc_flower.c:159:33: warning: incorrect type in argument 1 (different base types) cxgb4_tc_flower.c:159:33: expected unsigned short [usertype] val cxgb4_tc_flower.c:159:33: got restricted __be16 [usertype] dst Fixes: dca4faeb812f ("cxgb4: Add LE hash collision bug fix path in LLD driver") Fixes: 62488e4b53ae ("cxgb4: add basic tc flower offload support") Fixes: 557ccbf9dfa8 ("cxgb4: add tc flower support for L3/L4 rewrite") Signed-off-by: Rahul Lakkireddy Signed-off-by: David S. Miller --- .../net/ethernet/chelsio/cxgb4/cxgb4_filter.c | 15 +++++++--- .../net/ethernet/chelsio/cxgb4/cxgb4_main.c | 2 +- .../ethernet/chelsio/cxgb4/cxgb4_tc_flower.c | 30 +++++++------------ 3 files changed, 22 insertions(+), 25 deletions(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c index 796555255207..c6bf2648fe42 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c @@ -165,6 +165,9 @@ static void set_nat_params(struct adapter *adap, struct filter_entry *f, unsigned int tid, bool dip, bool sip, bool dp, bool sp) { + u8 *nat_lp = (u8 *)&f->fs.nat_lport; + u8 *nat_fp = (u8 *)&f->fs.nat_fport; + if (dip) { if (f->fs.type) { set_tcb_field(adap, f, tid, TCB_SND_UNA_RAW_W, @@ -236,8 +239,9 @@ static void set_nat_params(struct adapter *adap, struct filter_entry *f, } set_tcb_field(adap, f, tid, TCB_PDU_HDR_LEN_W, WORD_MASK, - (dp ? f->fs.nat_lport : 0) | - (sp ? f->fs.nat_fport << 16 : 0), 1); + (dp ? (nat_lp[1] | nat_lp[0] << 8) : 0) | + (sp ? (nat_fp[1] << 16 | nat_fp[0] << 24) : 0), + 1); } /* Validate filter spec against configuration done on the card. */ @@ -909,6 +913,9 @@ int set_filter_wr(struct adapter *adapter, int fidx) fwr->fpm = htons(f->fs.mask.fport); if (adapter->params.filter2_wr_support) { + u8 *nat_lp = (u8 *)&f->fs.nat_lport; + u8 *nat_fp = (u8 *)&f->fs.nat_fport; + fwr->natmode_to_ulp_type = FW_FILTER2_WR_ULP_TYPE_V(f->fs.nat_mode ? ULP_MODE_TCPDDP : @@ -916,8 +923,8 @@ int set_filter_wr(struct adapter *adapter, int fidx) FW_FILTER2_WR_NATMODE_V(f->fs.nat_mode); memcpy(fwr->newlip, f->fs.nat_lip, sizeof(fwr->newlip)); memcpy(fwr->newfip, f->fs.nat_fip, sizeof(fwr->newfip)); - fwr->newlport = htons(f->fs.nat_lport); - fwr->newfport = htons(f->fs.nat_fport); + fwr->newlport = htons(nat_lp[1] | nat_lp[0] << 8); + fwr->newfport = htons(nat_fp[1] | nat_fp[0] << 8); } /* Mark the filter as "pending" and ship off the Filter Work Request. diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c index 854b1717a70d..1e66159de079 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c @@ -2609,7 +2609,7 @@ int cxgb4_create_server_filter(const struct net_device *dev, unsigned int stid, /* Clear out filter specifications */ memset(&f->fs, 0, sizeof(struct ch_filter_specification)); - f->fs.val.lport = cpu_to_be16(sport); + f->fs.val.lport = be16_to_cpu(sport); f->fs.mask.lport = ~0; val = (u8 *)&sip; if ((val[0] | val[1] | val[2] | val[3]) != 0) { diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c index 4a5fa9eba0b6..59b65d4db086 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c @@ -58,10 +58,6 @@ static struct ch_tc_pedit_fields pedits[] = { PEDIT_FIELDS(IP6_, DST_63_32, 4, nat_lip, 4), PEDIT_FIELDS(IP6_, DST_95_64, 4, nat_lip, 8), PEDIT_FIELDS(IP6_, DST_127_96, 4, nat_lip, 12), - PEDIT_FIELDS(TCP_, SPORT, 2, nat_fport, 0), - PEDIT_FIELDS(TCP_, DPORT, 2, nat_lport, 0), - PEDIT_FIELDS(UDP_, SPORT, 2, nat_fport, 0), - PEDIT_FIELDS(UDP_, DPORT, 2, nat_lport, 0), }; static struct ch_tc_flower_entry *allocate_flower_entry(void) @@ -156,14 +152,14 @@ static void cxgb4_process_flow_match(struct net_device *dev, struct flow_match_ports match; flow_rule_match_ports(rule, &match); - fs->val.lport = cpu_to_be16(match.key->dst); - fs->mask.lport = cpu_to_be16(match.mask->dst); - fs->val.fport = cpu_to_be16(match.key->src); - fs->mask.fport = cpu_to_be16(match.mask->src); + fs->val.lport = be16_to_cpu(match.key->dst); + fs->mask.lport = be16_to_cpu(match.mask->dst); + fs->val.fport = be16_to_cpu(match.key->src); + fs->mask.fport = be16_to_cpu(match.mask->src); /* also initialize nat_lport/fport to same values */ - fs->nat_lport = cpu_to_be16(match.key->dst); - fs->nat_fport = cpu_to_be16(match.key->src); + fs->nat_lport = fs->val.lport; + fs->nat_fport = fs->val.fport; } if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_IP)) { @@ -354,12 +350,9 @@ static void process_pedit_field(struct ch_filter_specification *fs, u32 val, switch (offset) { case PEDIT_TCP_SPORT_DPORT: if (~mask & PEDIT_TCP_UDP_SPORT_MASK) - offload_pedit(fs, cpu_to_be32(val) >> 16, - cpu_to_be32(mask) >> 16, - TCP_SPORT); + fs->nat_fport = val; else - offload_pedit(fs, cpu_to_be32(val), - cpu_to_be32(mask), TCP_DPORT); + fs->nat_lport = val >> 16; } fs->nat_mode = NAT_MODE_ALL; break; @@ -367,12 +360,9 @@ static void process_pedit_field(struct ch_filter_specification *fs, u32 val, switch (offset) { case PEDIT_UDP_SPORT_DPORT: if (~mask & PEDIT_TCP_UDP_SPORT_MASK) - offload_pedit(fs, cpu_to_be32(val) >> 16, - cpu_to_be32(mask) >> 16, - UDP_SPORT); + fs->nat_fport = val; else - offload_pedit(fs, cpu_to_be32(val), - cpu_to_be32(mask), UDP_DPORT); + fs->nat_lport = val >> 16; } fs->nat_mode = NAT_MODE_ALL; } From f286dd8eaad5a2758750f407ab079298e0bcc8a5 Mon Sep 17 00:00:00 2001 From: Rahul Lakkireddy Date: Wed, 24 Jun 2020 01:51:36 +0530 Subject: [PATCH 399/462] cxgb4: use correct type for all-mask IP address comparison Use correct type to check for all-mask exact match IP addresses. Fixes following sparse warnings due to big endian value checks against 0xffffffff in is_addr_all_mask(): cxgb4_filter.c:977:25: warning: restricted __be32 degrades to integer cxgb4_filter.c:983:37: warning: restricted __be32 degrades to integer cxgb4_filter.c:984:37: warning: restricted __be32 degrades to integer cxgb4_filter.c:985:37: warning: restricted __be32 degrades to integer cxgb4_filter.c:986:37: warning: restricted __be32 degrades to integer Fixes: 3eb8b62d5a26 ("cxgb4: add support to create hash-filters via tc-flower offload") Signed-off-by: Rahul Lakkireddy Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c index c6bf2648fe42..7a7f61a8cdf4 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c @@ -1112,16 +1112,16 @@ static bool is_addr_all_mask(u8 *ipmask, int family) struct in_addr *addr; addr = (struct in_addr *)ipmask; - if (addr->s_addr == 0xffffffff) + if (ntohl(addr->s_addr) == 0xffffffff) return true; } else if (family == AF_INET6) { struct in6_addr *addr6; addr6 = (struct in6_addr *)ipmask; - if (addr6->s6_addr32[0] == 0xffffffff && - addr6->s6_addr32[1] == 0xffffffff && - addr6->s6_addr32[2] == 0xffffffff && - addr6->s6_addr32[3] == 0xffffffff) + if (ntohl(addr6->s6_addr32[0]) == 0xffffffff && + ntohl(addr6->s6_addr32[1]) == 0xffffffff && + ntohl(addr6->s6_addr32[2]) == 0xffffffff && + ntohl(addr6->s6_addr32[3]) == 0xffffffff) return true; } return false; From 1992ded5d111997877a9a25205976d8d03c46814 Mon Sep 17 00:00:00 2001 From: Rahul Lakkireddy Date: Wed, 24 Jun 2020 01:51:37 +0530 Subject: [PATCH 400/462] cxgb4: fix SGE queue dump destination buffer context The data in destination buffer is expected to be be parsed in big endian. So, use the right context. Fixes following sparse warning: cudbg_lib.c:2041:44: warning: incorrect type in assignment (different base types) cudbg_lib.c:2041:44: expected unsigned long long [usertype] cudbg_lib.c:2041:44: got restricted __be64 [usertype] Fixes: 736c3b94474e ("cxgb4: collect egress and ingress SGE queue contexts") Signed-off-by: Rahul Lakkireddy Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c b/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c index 7b9cd69f9844..d8ab8e366818 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c @@ -1975,7 +1975,6 @@ int cudbg_collect_dump_context(struct cudbg_init *pdbg_init, u8 mem_type[CTXT_INGRESS + 1] = { 0 }; struct cudbg_buffer temp_buff = { 0 }; struct cudbg_ch_cntxt *buff; - u64 *dst_off, *src_off; u8 *ctx_buf; u8 i, k; int rc; @@ -2044,8 +2043,11 @@ int cudbg_collect_dump_context(struct cudbg_init *pdbg_init, } for (j = 0; j < max_ctx_qid; j++) { + __be64 *dst_off; + u64 *src_off; + src_off = (u64 *)(ctx_buf + j * SGE_CTXT_SIZE); - dst_off = (u64 *)buff->data; + dst_off = (__be64 *)buff->data; /* The data is stored in 64-bit cpu order. Convert it * to big endian before parsing. From 2f6670165d22406467c667e9454e558bc75b933e Mon Sep 17 00:00:00 2001 From: Rahul Lakkireddy Date: Wed, 24 Jun 2020 01:51:38 +0530 Subject: [PATCH 401/462] cxgb4: remove cast when saving IPv4 partial checksum The checksum field in IPv4 header is in __sum16 and ip_fast_csum() also returns __sum16. So, no need to cast it to u16. Fixes following sparse warning: sge.c:1539:47: warning: cast from restricted __sum16 sge.c:1539:44: warning: incorrect type in assignment (different base types) sge.c:1539:44: expected restricted __sum16 [usertype] check sge.c:1539:44: got unsigned short [usertype] Fixes: d0a1299c6bf7 ("cxgb4: add support for vxlan segmentation offload") Signed-off-by: Rahul Lakkireddy Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/cxgb4/sge.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4/sge.c b/drivers/net/ethernet/chelsio/cxgb4/sge.c index 3c8b4b153ec4..72ff46b16704 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/sge.c +++ b/drivers/net/ethernet/chelsio/cxgb4/sge.c @@ -1524,8 +1524,7 @@ static netdev_tx_t cxgb4_eth_xmit(struct sk_buff *skb, struct net_device *dev) if (iph->version == 4) { iph->check = 0; iph->tot_len = 0; - iph->check = (u16)(~ip_fast_csum((u8 *)iph, - iph->ihl)); + iph->check = ~ip_fast_csum((u8 *)iph, iph->ihl); } if (skb->ip_summed == CHECKSUM_PARTIAL) cntrl = hwcsum(adap->params.chip, skb); From bab3bcf3e9873e1e6e9cb39c1f55a05fb10415a4 Mon Sep 17 00:00:00 2001 From: Rahul Lakkireddy Date: Wed, 24 Jun 2020 01:51:39 +0530 Subject: [PATCH 402/462] cxgb4: move DCB version extern to header file Move the DCB version string array extern to header file. Fixes following sparse warning: cxgb4_dcb.c:13:12: warning: symbol 'dcb_ver_array' was not declared. Should it be static? Fixes: ebddd97afb89 ("cxgb4: add support to display DCB info") Signed-off-by: Rahul Lakkireddy Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/cxgb4/cxgb4_dcb.h | 3 +++ drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c | 1 - 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_dcb.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_dcb.h index d3c654b9989b..80c6627fe981 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_dcb.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_dcb.h @@ -136,6 +136,9 @@ static inline __u8 bitswap_1(unsigned char val) ((val & 0x02) << 5) | ((val & 0x01) << 7); } + +extern const char * const dcb_ver_array[]; + #define CXGB4_DCB_ENABLED true #else /* !CONFIG_CHELSIO_T4_DCB */ diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c index 828499256004..b477b8842905 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c @@ -2379,7 +2379,6 @@ static const struct file_operations rss_vf_config_debugfs_fops = { }; #ifdef CONFIG_CHELSIO_T4_DCB -extern char *dcb_ver_array[]; /* Data Center Briging information for each port. */ From 00e31cfc8995de2a15352522fc672922a23b435e Mon Sep 17 00:00:00 2001 From: Rahul Lakkireddy Date: Wed, 24 Jun 2020 01:51:40 +0530 Subject: [PATCH 403/462] cxgb4: fix set but unused variable when DCB is disabled Remove the set but unused variable when DCB is disabled. Instead, do the calculation directly inline. Fixes following warning in make W=1: cxgb4_main.c: In function 'cfg_queues': cxgb4_main.c:5380:29: warning: variable 'n1g' set but not used [-Wunused-but-set-variable] u32 i, n10g = 0, qidx = 0, n1g = 0; ^ Fixes: 116ca924aea6 ("cxgb4: fix checks for max queues to allocate") Signed-off-by: Rahul Lakkireddy Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c index 1e66159de079..472e7c9e47bd 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c @@ -5377,10 +5377,10 @@ static inline bool is_x_10g_port(const struct link_config *lc) static int cfg_queues(struct adapter *adap) { u32 avail_qsets, avail_eth_qsets, avail_uld_qsets; - u32 i, n10g = 0, qidx = 0, n1g = 0; u32 ncpus = num_online_cpus(); u32 niqflint, neq, num_ulds; struct sge *s = &adap->sge; + u32 i, n10g = 0, qidx = 0; u32 q10g = 0, q1g; /* Reduce memory usage in kdump environment, disable all offload. */ @@ -5426,7 +5426,6 @@ static int cfg_queues(struct adapter *adap) if (n10g) q10g = (avail_eth_qsets - (adap->params.nports - n10g)) / n10g; - n1g = adap->params.nports - n10g; #ifdef CONFIG_CHELSIO_T4_DCB /* For Data Center Bridging support we need to be able to support up * to 8 Traffic Priorities; each of which will be assigned to its @@ -5444,7 +5443,8 @@ static int cfg_queues(struct adapter *adap) else q10g = max(8U, q10g); - while ((q10g * n10g) > (avail_eth_qsets - n1g * q1g)) + while ((q10g * n10g) > + (avail_eth_qsets - (adap->params.nports - n10g) * q1g)) q10g--; #else /* !CONFIG_CHELSIO_T4_DCB */ From 29bbf5d7f5efe84f94bc66c6c24614f812a95f62 Mon Sep 17 00:00:00 2001 From: Rahul Lakkireddy Date: Wed, 24 Jun 2020 01:51:41 +0530 Subject: [PATCH 404/462] cxgb4: update kernel-doc line comments Update several kernel-doc line comments to fix warnings reported by make W=1. Fixes following class of warnings reported by make W=1 in several places: l2t.c:616: warning: Cannot understand * @dev: net_device pointer t4_hw.c:3175: warning: Function parameter or member 'adap' not described in 't4_get_exprom_version' t4_hw.c:3175: warning: Excess function parameter 'adapter' description in 't4_get_exprom_version' Fixes: 56d36be4dd5f ("cxgb4: Add HW and FW support code") Fixes: fd3a47900b6f ("cxgb4: Add packet queues and packet DMA code") Fixes: 26f7cbc0a5a4 ("cxgb4: Don't attempt to upgrade T4 firmware when cxgb4 will end up as a slave") Fixes: 793dad94e745 ("RDMA/cxgb4: Fix bug for active and passive LE hash collision path") Fixes: ba3f8cd55f2a ("cxgb4: Add support in cxgb4 to get expansion rom version via ethtool") Fixes: f7502659cec8 ("cxgb4: Add API to alloc l2t entry; also update existing ones") Fixes: ddc7740d9a7c ("cxgb4: Decode link down reason code obtained from firmware") Fixes: 193c4c2845f7 ("cxgb4: Update T6 Buffer Group and Channel Mappings") Fixes: 8f46d46715a1 ("cxgb4: Use Firmware params to get buffer-group map") Fixes: a456950445a0 ("cxgb4: time stamping interface for PTP") Fixes: 9c33e4208bce ("cxgb4: Add PTP Hardware Clock (PHC) support") Fixes: c3168cabe1af ("cxgb4/cxgbvf: Handle 32-bit fw port capabilities") Fixes: 5ccf9d049615 ("cxgb4: update API for TP indirect register access") Fixes: 3bdb376e6944 ("cxgb4: introduce SMT ops to prepare for SMAC rewrite support") Fixes: 736c3b94474e ("cxgb4: collect egress and ingress SGE queue contexts") Fixes: f56ec6766dcf ("cxgb4: Add support for ethtool i2c dump") Fixes: 9d5fd927d20b ("cxgb4/cxgb4vf: add support for ndo_set_vf_vlan") Fixes: 98f3697f8d41 ("cxgb4: add tc flower match support for tunnel VNI") Fixes: 02d805dc5fe3 ("cxgb4: use new fw interface to get the VIN and smt index") Fixes: 3f8cfd0d95e6 ("cxgb4/cxgb4vf: Program hash region for {t4/t4vf}_change_mac()") Fixes: d429005fdf2c ("cxgb4/cxgb4vf: Add support for SGE doorbell queue timer") Fixes: 0e395b3cb1fb ("cxgb4: add FLOWC based QoS offload") Signed-off-by: Rahul Lakkireddy Signed-off-by: David S. Miller --- .../ethernet/chelsio/cxgb4/cxgb4_ethtool.c | 2 +- .../net/ethernet/chelsio/cxgb4/cxgb4_main.c | 3 +- .../net/ethernet/chelsio/cxgb4/cxgb4_ptp.c | 3 +- drivers/net/ethernet/chelsio/cxgb4/l2t.c | 1 + drivers/net/ethernet/chelsio/cxgb4/sched.c | 2 +- drivers/net/ethernet/chelsio/cxgb4/sge.c | 19 +++++----- drivers/net/ethernet/chelsio/cxgb4/smt.c | 2 ++ drivers/net/ethernet/chelsio/cxgb4/t4_hw.c | 36 +++++++++---------- 8 files changed, 38 insertions(+), 30 deletions(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c index 9fd496732b2c..f27be1132d37 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c @@ -588,7 +588,7 @@ static void fw_caps_to_lmm(enum fw_port_type port_type, /** * lmm_to_fw_caps - translate ethtool Link Mode Mask to Firmware * capabilities - * @et_lmm: ethtool Link Mode Mask + * @link_mode_mask: ethtool Link Mode Mask * * Translate ethtool Link Mode Mask into a Firmware Port capabilities * value. diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c index 472e7c9e47bd..0329a6b52087 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c @@ -449,7 +449,7 @@ static int set_rxmode(struct net_device *dev, int mtu, bool sleep_ok) * or -1 * @addr: the new MAC address value * @persist: whether a new MAC allocation should be persistent - * @add_smt: if true also add the address to the HW SMT + * @smt_idx: the destination to store the new SMT index. * * Modifies an MPS filter and sets it to the new MAC address if * @tcam_idx >= 0, or adds the MAC address to a new filter if @@ -1615,6 +1615,7 @@ static int tid_init(struct tid_info *t) * @stid: the server TID * @sip: local IP address to bind server to * @sport: the server's TCP port + * @vlan: the VLAN header information * @queue: queue to direct messages from this server to * * Create an IP server for the given port and address. diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ptp.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ptp.c index f5bc996ac77d..70dbee89118e 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ptp.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ptp.c @@ -194,6 +194,7 @@ int cxgb4_ptp_redirect_rx_packet(struct adapter *adapter, struct port_info *pi) } /** + * cxgb4_ptp_adjfreq - Adjust frequency of PHC cycle counter * @ptp: ptp clock structure * @ppb: Desired frequency change in parts per billion * @@ -229,7 +230,7 @@ static int cxgb4_ptp_adjfreq(struct ptp_clock_info *ptp, s32 ppb) /** * cxgb4_ptp_fineadjtime - Shift the time of the hardware clock - * @ptp: ptp clock structure + * @adapter: board private structure * @delta: Desired change in nanoseconds * * Adjust the timer by resetting the timecounter structure. diff --git a/drivers/net/ethernet/chelsio/cxgb4/l2t.c b/drivers/net/ethernet/chelsio/cxgb4/l2t.c index 0ed20a9cca14..c4864125fe02 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/l2t.c +++ b/drivers/net/ethernet/chelsio/cxgb4/l2t.c @@ -609,6 +609,7 @@ struct l2t_entry *t4_l2t_alloc_switching(struct adapter *adap, u16 vlan, } /** + * cxgb4_l2t_alloc_switching - Allocates an L2T entry for switch filters * @dev: net_device pointer * @vlan: VLAN Id * @port: Associated port diff --git a/drivers/net/ethernet/chelsio/cxgb4/sched.c b/drivers/net/ethernet/chelsio/cxgb4/sched.c index fde93c50cfec..a1b14468d1ff 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/sched.c +++ b/drivers/net/ethernet/chelsio/cxgb4/sched.c @@ -598,7 +598,7 @@ struct sched_class *cxgb4_sched_class_alloc(struct net_device *dev, /** * cxgb4_sched_class_free - free a scheduling class * @dev: net_device pointer - * @e: scheduling class + * @classid: scheduling class id to free * * Frees a scheduling class if there are no users. */ diff --git a/drivers/net/ethernet/chelsio/cxgb4/sge.c b/drivers/net/ethernet/chelsio/cxgb4/sge.c index 72ff46b16704..32a45dc51ed7 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/sge.c +++ b/drivers/net/ethernet/chelsio/cxgb4/sge.c @@ -302,7 +302,7 @@ static void deferred_unmap_destructor(struct sk_buff *skb) /** * free_tx_desc - reclaims Tx descriptors and their buffers - * @adapter: the adapter + * @adap: the adapter * @q: the Tx queue to reclaim descriptors from * @n: the number of descriptors to reclaim * @unmap: whether the buffers should be unmapped for DMA @@ -722,6 +722,7 @@ static inline unsigned int flits_to_desc(unsigned int n) /** * is_eth_imm - can an Ethernet packet be sent as immediate data? * @skb: the packet + * @chip_ver: chip version * * Returns whether an Ethernet packet is small enough to fit as * immediate data. Return value corresponds to headroom required. @@ -749,6 +750,7 @@ static inline int is_eth_imm(const struct sk_buff *skb, unsigned int chip_ver) /** * calc_tx_flits - calculate the number of flits for a packet Tx WR * @skb: the packet + * @chip_ver: chip version * * Returns the number of flits needed for a Tx WR for the given Ethernet * packet, including the needed WR and CPL headers. @@ -804,6 +806,7 @@ static inline unsigned int calc_tx_flits(const struct sk_buff *skb, /** * calc_tx_descs - calculate the number of Tx descriptors for a packet * @skb: the packet + * @chip_ver: chip version * * Returns the number of Tx descriptors needed for the given Ethernet * packet, including the needed WR and CPL headers. @@ -2408,9 +2411,9 @@ static void eosw_txq_flush_pending_skbs(struct sge_eosw_txq *eosw_txq) /** * cxgb4_ethofld_send_flowc - Send ETHOFLD flowc request to bind eotid to tc. - * @dev - netdevice - * @eotid - ETHOFLD tid to bind/unbind - * @tc - traffic class. If set to FW_SCHED_CLS_NONE, then unbinds the @eotid + * @dev: netdevice + * @eotid: ETHOFLD tid to bind/unbind + * @tc: traffic class. If set to FW_SCHED_CLS_NONE, then unbinds the @eotid * * Send a FLOWC work request to bind an ETHOFLD TID to a traffic class. * If @tc is set to FW_SCHED_CLS_NONE, then the @eotid is unbound from @@ -2689,7 +2692,6 @@ static inline unsigned int calc_tx_flits_ofld(const struct sk_buff *skb) /** * txq_stop_maperr - stop a Tx queue due to I/O MMU exhaustion - * @adap: the adapter * @q: the queue to stop * * Mark a Tx queue stopped due to I/O MMU exhaustion and resulting @@ -3284,7 +3286,7 @@ enum { /** * t4_systim_to_hwstamp - read hardware time stamp - * @adap: the adapter + * @adapter: the adapter * @skb: the packet * * Read Time Stamp from MPS packet and insert in skb which @@ -3318,8 +3320,9 @@ static noinline int t4_systim_to_hwstamp(struct adapter *adapter, /** * t4_rx_hststamp - Recv PTP Event Message - * @adap: the adapter + * @adapter: the adapter * @rsp: the response queue descriptor holding the RX_PKT message + * @rxq: the response queue holding the RX_PKT message * @skb: the packet * * PTP enabled and MPS packet, read HW timestamp @@ -3343,7 +3346,7 @@ static int t4_rx_hststamp(struct adapter *adapter, const __be64 *rsp, /** * t4_tx_hststamp - Loopback PTP Transmit Event Message - * @adap: the adapter + * @adapter: the adapter * @skb: the packet * @dev: the ingress net device * diff --git a/drivers/net/ethernet/chelsio/cxgb4/smt.c b/drivers/net/ethernet/chelsio/cxgb4/smt.c index 01c65d13fc0e..cbe72ed27b1e 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/smt.c +++ b/drivers/net/ethernet/chelsio/cxgb4/smt.c @@ -103,6 +103,7 @@ static void t4_smte_free(struct smt_entry *e) } /** + * cxgb4_smt_release - Release SMT entry * @e: smt entry to release * * Releases ref count and frees up an smt entry from SMT table @@ -231,6 +232,7 @@ static struct smt_entry *t4_smt_alloc_switching(struct adapter *adap, u16 pfvf, } /** + * cxgb4_smt_alloc_switching - Allocates an SMT entry for switch filters. * @dev: net_device pointer * @smac: MAC address to add to SMT * Returns pointer to the SMT entry created diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c index 1c8068c02728..1aa6dc10dc0b 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c @@ -3163,7 +3163,7 @@ int t4_get_tp_version(struct adapter *adapter, u32 *vers) /** * t4_get_exprom_version - return the Expansion ROM version (if any) - * @adapter: the adapter + * @adap: the adapter * @vers: where to place the version * * Reads the Expansion ROM header from FLASH and returns the version @@ -5310,7 +5310,7 @@ static unsigned int t4_use_ldst(struct adapter *adap) * @cmd: TP fw ldst address space type * @vals: where the indirect register values are stored/written * @nregs: how many indirect registers to read/write - * @start_idx: index of first indirect register to read/write + * @start_index: index of first indirect register to read/write * @rw: Read (1) or Write (0) * @sleep_ok: if true we may sleep while awaiting command completion * @@ -6115,7 +6115,7 @@ void t4_pmrx_get_stats(struct adapter *adap, u32 cnt[], u64 cycles[]) /** * compute_mps_bg_map - compute the MPS Buffer Group Map for a Port - * @adap: the adapter + * @adapter: the adapter * @pidx: the port index * * Computes and returns a bitmap indicating which MPS buffer groups are @@ -6252,7 +6252,7 @@ static unsigned int t4_get_tp_e2c_map(struct adapter *adapter, int pidx) /** * t4_get_tp_ch_map - return TP ingress channels associated with a port - * @adapter: the adapter + * @adap: the adapter * @pidx: the port index * * Returns a bitmap indicating which TP Ingress Channels are associated @@ -6589,7 +6589,7 @@ int t4_mdio_rd(struct adapter *adap, unsigned int mbox, unsigned int phy_addr, * @phy_addr: the PHY address * @mmd: the PHY MMD to access (0 for clause 22 PHYs) * @reg: the register to write - * @valp: value to write + * @val: value to write * * Issues a FW command through the given mailbox to write a PHY register. */ @@ -6615,7 +6615,7 @@ int t4_mdio_wr(struct adapter *adap, unsigned int mbox, unsigned int phy_addr, /** * t4_sge_decode_idma_state - decode the idma state - * @adap: the adapter + * @adapter: the adapter * @state: the state idma is stuck in */ void t4_sge_decode_idma_state(struct adapter *adapter, int state) @@ -6782,7 +6782,7 @@ void t4_sge_decode_idma_state(struct adapter *adapter, int state) * t4_sge_ctxt_flush - flush the SGE context cache * @adap: the adapter * @mbox: mailbox to use for the FW command - * @ctx_type: Egress or Ingress + * @ctxt_type: Egress or Ingress * * Issues a FW command through the given mailbox to flush the * SGE context cache. @@ -6809,7 +6809,7 @@ int t4_sge_ctxt_flush(struct adapter *adap, unsigned int mbox, int ctxt_type) /** * t4_read_sge_dbqtimers - read SGE Doorbell Queue Timer values - * @adap - the adapter + * @adap: the adapter * @ndbqtimers: size of the provided SGE Doorbell Queue Timer table * @dbqtimers: SGE Doorbell Queue Timer table * @@ -7092,6 +7092,7 @@ static int t4_fw_halt(struct adapter *adap, unsigned int mbox, int force) /** * t4_fw_restart - restart the firmware by taking the uP out of RESET * @adap: the adapter + * @mbox: mailbox to use for the FW command * @reset: if we want to do a RESET to restart things * * Restart firmware previously halted by t4_fw_halt(). On successful @@ -7630,6 +7631,8 @@ int t4_cfg_pfvf(struct adapter *adap, unsigned int mbox, unsigned int pf, * @nmac: number of MAC addresses needed (1 to 5) * @mac: the MAC addresses of the VI * @rss_size: size of RSS table slice associated with this VI + * @vivld: the destination to store the VI Valid value. + * @vin: the destination to store the VIN value. * * Allocates a virtual interface for the given physical port. If @mac is * not %NULL it contains the MAC addresses of the VI as assigned by FW. @@ -7848,7 +7851,7 @@ int t4_free_raw_mac_filt(struct adapter *adap, unsigned int viid, * t4_alloc_encap_mac_filt - Adds a mac entry in mps tcam with VNI support * @adap: the adapter * @viid: the VI id - * @mac: the MAC address + * @addr: the MAC address * @mask: the mask * @vni: the VNI id for the tunnel protocol * @vni_mask: mask for the VNI id @@ -7897,11 +7900,11 @@ int t4_alloc_encap_mac_filt(struct adapter *adap, unsigned int viid, * t4_alloc_raw_mac_filt - Adds a mac entry in mps tcam * @adap: the adapter * @viid: the VI id - * @mac: the MAC address + * @addr: the MAC address * @mask: the mask * @idx: index at which to add this entry - * @port_id: the port index * @lookup_type: MAC address for inner (1) or outer (0) header + * @port_id: the port index * @sleep_ok: call is allowed to sleep * * Adds the mac entry at the specified index using raw mac interface. @@ -8126,7 +8129,7 @@ int t4_free_mac_filt(struct adapter *adap, unsigned int mbox, * @idx: index of existing filter for old value of MAC address, or -1 * @addr: the new MAC address value * @persist: whether a new MAC allocation should be persistent - * @add_smt: if true also add the address to the HW SMT + * @smt_idx: the destination to store the new SMT index. * * Modifies an exact-match filter and sets it to the new MAC address. * Note that in general it is not possible to modify the value of a given @@ -8448,7 +8451,6 @@ int t4_ofld_eq_free(struct adapter *adap, unsigned int mbox, unsigned int pf, /** * t4_link_down_rc_str - return a string for a Link Down Reason Code - * @adap: the adapter * @link_down_rc: Link Down Reason Code * * Returns a string representation of the Link Down Reason Code. @@ -8472,9 +8474,7 @@ static const char *t4_link_down_rc_str(unsigned char link_down_rc) return reason[link_down_rc]; } -/** - * Return the highest speed set in the port capabilities, in Mb/s. - */ +/* Return the highest speed set in the port capabilities, in Mb/s. */ static unsigned int fwcap_to_speed(fw_port_cap32_t caps) { #define TEST_SPEED_RETURN(__caps_speed, __speed) \ @@ -9110,7 +9110,6 @@ found: /** * t4_prep_adapter - prepare SW and HW for operation * @adapter: the adapter - * @reset: if true perform a HW reset * * Initialize adapter SW state for the various HW modules, set initial * values for some adapter tunables, take PHYs out of reset, and @@ -10395,6 +10394,7 @@ int t4_sched_params(struct adapter *adapter, u8 type, u8 level, u8 mode, /** * t4_i2c_rd - read I2C data from adapter * @adap: the adapter + * @mbox: mailbox to use for the FW command * @port: Port number if per-port device; <0 if not * @devid: per-port device ID or absolute device ID * @offset: byte offset into device I2C space @@ -10450,7 +10450,7 @@ int t4_i2c_rd(struct adapter *adap, unsigned int mbox, int port, /** * t4_set_vlan_acl - Set a VLAN id for the specified VF - * @adapter: the adapter + * @adap: the adapter * @mbox: mailbox to use for the FW command * @vf: one of the VFs instantiated by the specified PF * @vlan: The vlanid to be set From 20bb0c8f2c446c55f5a4e296beaa77d62ffe2d1e Mon Sep 17 00:00:00 2001 From: Rahul Lakkireddy Date: Wed, 24 Jun 2020 01:51:42 +0530 Subject: [PATCH 405/462] cxgb4vf: update kernel-doc line comments Update several kernel-doc line comments to fix warnings reported by make W=1. Fixes following class of warnings reported by make W=1 in several places: cxgb4vf_main.c:275: warning: Function parameter or member 'persistent' not described in 'cxgb4vf_change_mac' cxgb4vf_main.c:275: warning: Excess function parameter 'persist' description in 'cxgb4vf_change_mac' Fixes: 16f8bd4be754 ("cxgb4vf: Add core T4 PCI-E SR-IOV Virtual Function hardware definitions and device communication code") Fixes: c6e0d91464da ("cxgb4vf: Add T4 Virtual Function Scatter-Gather Engine DMA code") Fixes: e0a8b34a9cc4 ("cxgb4vf: Add and initialize some sge params for VF driver") Fixes: c3168cabe1af ("cxgb4/cxgbvf: Handle 32-bit fw port capabilities") Fixes: 0e23daeb6407 ("drivers/net: chelsio/cxgb*: Convert timers to use timer_setup()") Fixes: 3f8cfd0d95e6 ("cxgb4/cxgb4vf: Program hash region for {t4/t4vf}_change_mac()") Signed-off-by: Rahul Lakkireddy Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c | 3 +-- drivers/net/ethernet/chelsio/cxgb4vf/sge.c | 7 ++++--- drivers/net/ethernet/chelsio/cxgb4vf/t4vf_hw.c | 9 +++------ 3 files changed, 8 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c b/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c index cec865a97464..a7641be9094f 100644 --- a/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c @@ -260,8 +260,7 @@ static int cxgb4vf_set_addr_hash(struct port_info *pi) * @tcam_idx: TCAM index of existing filter for old value of MAC address, * or -1 * @addr: the new MAC address value - * @persist: whether a new MAC allocation should be persistent - * @add_smt: if true also add the address to the HW SMT + * @persistent: whether a new MAC allocation should be persistent * * Modifies an MPS filter and sets it to the new MAC address if * @tcam_idx >= 0, or adds the MAC address to a new filter if diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/sge.c b/drivers/net/ethernet/chelsio/cxgb4vf/sge.c index f71c973398ec..8c3d6e11a4bf 100644 --- a/drivers/net/ethernet/chelsio/cxgb4vf/sge.c +++ b/drivers/net/ethernet/chelsio/cxgb4vf/sge.c @@ -1692,7 +1692,7 @@ static inline bool is_new_response(const struct rsp_ctrl *rc, * restore_rx_bufs - put back a packet's RX buffers * @gl: the packet gather list * @fl: the SGE Free List - * @nfrags: how many fragments in @si + * @frags: how many fragments in @si * * Called when we find out that the current packet, @si, can't be * processed right away for some reason. This is a very rare event and @@ -2054,7 +2054,7 @@ irq_handler_t t4vf_intr_handler(struct adapter *adapter) /** * sge_rx_timer_cb - perform periodic maintenance of SGE RX queues - * @data: the adapter + * @t: Rx timer * * Runs periodically from a timer to perform maintenance of SGE RX queues. * @@ -2113,7 +2113,7 @@ static void sge_rx_timer_cb(struct timer_list *t) /** * sge_tx_timer_cb - perform periodic maintenance of SGE Tx queues - * @data: the adapter + * @t: Tx timer * * Runs periodically from a timer to perform maintenance of SGE TX queues. * @@ -2405,6 +2405,7 @@ err: * t4vf_sge_alloc_eth_txq - allocate an SGE Ethernet TX Queue * @adapter: the adapter * @txq: pointer to the new txq to be filled in + * @dev: the network device * @devq: the network TX queue associated with the new txq * @iqid: the relative ingress queue ID to which events relating to * the new txq should be directed diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_hw.c b/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_hw.c index 9d49ff211cc1..a31b87390b50 100644 --- a/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_hw.c +++ b/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_hw.c @@ -389,9 +389,7 @@ static inline enum cc_fec fwcap_to_cc_fec(fw_port_cap32_t fw_fec) return cc_fec; } -/** - * Return the highest speed set in the port capabilities, in Mb/s. - */ +/* Return the highest speed set in the port capabilities, in Mb/s. */ static unsigned int fwcap_to_speed(fw_port_cap32_t caps) { #define TEST_SPEED_RETURN(__caps_speed, __speed) \ @@ -1467,6 +1465,7 @@ int t4vf_identify_port(struct adapter *adapter, unsigned int viid, * @bcast: 1 to enable broadcast Rx, 0 to disable it, -1 no change * @vlanex: 1 to enable hardware VLAN Tag extraction, 0 to disable it, * -1 no change + * @sleep_ok: call is allowed to sleep * * Sets Rx properties of a virtual interface. */ @@ -1906,7 +1905,7 @@ static const char *t4vf_link_down_rc_str(unsigned char link_down_rc) /** * t4vf_handle_get_port_info - process a FW reply message * @pi: the port info - * @rpl: start of the FW message + * @cmd: start of the FW message * * Processes a GET_PORT_INFO FW reply message. */ @@ -2137,8 +2136,6 @@ int t4vf_handle_fw_rpl(struct adapter *adapter, const __be64 *rpl) return 0; } -/** - */ int t4vf_prep_adapter(struct adapter *adapter) { int err; From 3c597282887fd55181578996dca52ce697d985a5 Mon Sep 17 00:00:00 2001 From: Gao Xiang Date: Fri, 19 Jun 2020 07:43:49 +0800 Subject: [PATCH 406/462] erofs: fix partially uninitialized misuse in z_erofs_onlinepage_fixup Hongyu reported "id != index" in z_erofs_onlinepage_fixup() with specific aarch64 environment easily, which wasn't shown before. After digging into that, I found that high 32 bits of page->private was set to 0xaaaaaaaa rather than 0 (due to z_erofs_onlinepage_init behavior with specific compiler options). Actually we only use low 32 bits to keep the page information since page->private is only 4 bytes on most 32-bit platforms. However z_erofs_onlinepage_fixup() uses the upper 32 bits by mistake. Let's fix it now. Reported-and-tested-by: Hongyu Jin Fixes: 3883a79abd02 ("staging: erofs: introduce VLE decompression support") Cc: # 4.19+ Reviewed-by: Chao Yu Link: https://lore.kernel.org/r/20200618234349.22553-1-hsiangkao@aol.com Signed-off-by: Gao Xiang --- fs/erofs/zdata.h | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/fs/erofs/zdata.h b/fs/erofs/zdata.h index 7824f5563a55..9b66c28b3ae9 100644 --- a/fs/erofs/zdata.h +++ b/fs/erofs/zdata.h @@ -144,22 +144,22 @@ static inline void z_erofs_onlinepage_init(struct page *page) static inline void z_erofs_onlinepage_fixup(struct page *page, uintptr_t index, bool down) { - unsigned long *p, o, v, id; -repeat: - p = &page_private(page); - o = READ_ONCE(*p); + union z_erofs_onlinepage_converter u = { .v = &page_private(page) }; + int orig, orig_index, val; - id = o >> Z_EROFS_ONLINEPAGE_INDEX_SHIFT; - if (id) { +repeat: + orig = atomic_read(u.o); + orig_index = orig >> Z_EROFS_ONLINEPAGE_INDEX_SHIFT; + if (orig_index) { if (!index) return; - DBG_BUGON(id != index); + DBG_BUGON(orig_index != index); } - v = (index << Z_EROFS_ONLINEPAGE_INDEX_SHIFT) | - ((o & Z_EROFS_ONLINEPAGE_COUNT_MASK) + (unsigned int)down); - if (cmpxchg(p, o, v) != o) + val = (index << Z_EROFS_ONLINEPAGE_INDEX_SHIFT) | + ((orig & Z_EROFS_ONLINEPAGE_COUNT_MASK) + (unsigned int)down); + if (atomic_cmpxchg(u.o, orig, val) != orig) goto repeat; } From 6c95503c292610ff2898b4271c510c16efdcd4e1 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Sat, 20 Jun 2020 12:45:54 +0900 Subject: [PATCH 407/462] tracing/boot: Fix config dependency for synthedic event Since commit 726721a51838 ("tracing: Move synthetic events to a separate file") decoupled synthetic event from histogram, boot-time tracing also has to check CONFIG_SYNTH_EVENT instead of CONFIG_HIST_TRIGGERS. Link: http://lkml.kernel.org/r/159262475441.185015.5300725180746017555.stgit@devnote2 Fixes: 726721a51838 ("tracing: Move synthetic events to a separate file") Reviewed-by: Tom Zanussi Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_boot.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/trace/trace_boot.c b/kernel/trace/trace_boot.c index 9de29bb45a27..8b5490cb02bb 100644 --- a/kernel/trace/trace_boot.c +++ b/kernel/trace/trace_boot.c @@ -120,7 +120,7 @@ trace_boot_add_kprobe_event(struct xbc_node *node, const char *event) } #endif -#ifdef CONFIG_HIST_TRIGGERS +#ifdef CONFIG_SYNTH_EVENTS static int __init trace_boot_add_synth_event(struct xbc_node *node, const char *event) { From 6784beada631800f2c5afd567e5628c843362cee Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Sat, 20 Jun 2020 12:46:03 +0900 Subject: [PATCH 408/462] tracing: Fix event trigger to accept redundant spaces Fix the event trigger to accept redundant spaces in the trigger input. For example, these return -EINVAL echo " traceon" > events/ftrace/print/trigger echo "traceon if common_pid == 0" > events/ftrace/print/trigger echo "disable_event:kmem:kmalloc " > events/ftrace/print/trigger But these are hard to find what is wrong. To fix this issue, use skip_spaces() to remove spaces in front of actual tokens, and set NULL if there is no token. Link: http://lkml.kernel.org/r/159262476352.185015.5261566783045364186.stgit@devnote2 Cc: Tom Zanussi Cc: stable@vger.kernel.org Fixes: 85f2b08268c0 ("tracing: Add basic event trigger framework") Reviewed-by: Tom Zanussi Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_events_trigger.c | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/kernel/trace/trace_events_trigger.c b/kernel/trace/trace_events_trigger.c index 3a74736da363..f725802160c0 100644 --- a/kernel/trace/trace_events_trigger.c +++ b/kernel/trace/trace_events_trigger.c @@ -216,11 +216,17 @@ static int event_trigger_regex_open(struct inode *inode, struct file *file) int trigger_process_regex(struct trace_event_file *file, char *buff) { - char *command, *next = buff; + char *command, *next; struct event_command *p; int ret = -EINVAL; + next = buff = skip_spaces(buff); command = strsep(&next, ": \t"); + if (next) { + next = skip_spaces(next); + if (!*next) + next = NULL; + } command = (command[0] != '!') ? command : command + 1; mutex_lock(&trigger_cmd_mutex); @@ -630,8 +636,14 @@ event_trigger_callback(struct event_command *cmd_ops, int ret; /* separate the trigger from the filter (t:n [if filter]) */ - if (param && isdigit(param[0])) + if (param && isdigit(param[0])) { trigger = strsep(¶m, " \t"); + if (param) { + param = skip_spaces(param); + if (!*param) + param = NULL; + } + } trigger_ops = cmd_ops->get_trigger_ops(cmd, trigger); @@ -1368,6 +1380,11 @@ int event_enable_trigger_func(struct event_command *cmd_ops, trigger = strsep(¶m, " \t"); if (!trigger) return -EINVAL; + if (param) { + param = skip_spaces(param); + if (!*param) + param = NULL; + } system = strsep(&trigger, ":"); if (!trigger) From 20dc3847cc2fc886ee4eb9112e6e2fad9419b0c7 Mon Sep 17 00:00:00 2001 From: Sascha Ortmann Date: Thu, 18 Jun 2020 18:33:01 +0200 Subject: [PATCH 409/462] tracing/boottime: Fix kprobe multiple events Fix boottime kprobe events to report and abort after each failure when adding probes. As an example, when we try to set multiprobe kprobe events in bootconfig like this: ftrace.event.kprobes.vfsevents { probes = "vfs_read $arg1 $arg2,, !error! not reported;?", // leads to error "vfs_write $arg1 $arg2" } This will not work as expected. After commit da0f1f4167e3af69e ("tracing/boottime: Fix kprobe event API usage"), the function trace_boot_add_kprobe_event will not produce any error message when adding a probe fails at kprobe_event_gen_cmd_start. Furthermore, we continue to add probes when kprobe_event_gen_cmd_end fails (and kprobe_event_gen_cmd_start did not fail). In this case the function even returns successfully when the last call to kprobe_event_gen_cmd_end is successful. The behaviour of reporting and aborting after failures is not consistent. The function trace_boot_add_kprobe_event now reports each failure and stops adding probes immediately. Link: https://lkml.kernel.org/r/20200618163301.25854-1-sascha.ortmann@stud.uni-hannover.de Cc: stable@vger.kernel.org Cc: linux-kernel@i4.cs.fau.de Co-developed-by: Maximilian Werner Fixes: da0f1f4167e3 ("tracing/boottime: Fix kprobe event API usage") Acked-by: Masami Hiramatsu Signed-off-by: Maximilian Werner Signed-off-by: Sascha Ortmann Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_boot.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/kernel/trace/trace_boot.c b/kernel/trace/trace_boot.c index 8b5490cb02bb..fa0fc08c6ef8 100644 --- a/kernel/trace/trace_boot.c +++ b/kernel/trace/trace_boot.c @@ -101,12 +101,16 @@ trace_boot_add_kprobe_event(struct xbc_node *node, const char *event) kprobe_event_cmd_init(&cmd, buf, MAX_BUF_LEN); ret = kprobe_event_gen_cmd_start(&cmd, event, val); - if (ret) + if (ret) { + pr_err("Failed to generate probe: %s\n", buf); break; + } ret = kprobe_event_gen_cmd_end(&cmd); - if (ret) + if (ret) { pr_err("Failed to add probe: %s\n", buf); + break; + } } return ret; From d0ad2ea2bc185835f8a749302ad07b70528d2a09 Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Tue, 23 Jun 2020 19:01:35 -0400 Subject: [PATCH 410/462] bnxt_en: Store the running firmware version code. We currently only store the firmware version as a string for ethtool and devlink info. Store it also as a version code. The next 2 patches will need to check the firmware major version to determine some workarounds. We also use the 16-bit firmware version fields if the firmware is newer and provides the 16-bit fields. Reviewed-by: Edwin Peer Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 22 ++++++++++++++++++---- drivers/net/ethernet/broadcom/bnxt/bnxt.h | 4 ++++ 2 files changed, 22 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index b93e05f91d77..0ad8d490975e 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -7240,8 +7240,9 @@ static int __bnxt_hwrm_ver_get(struct bnxt *bp, bool silent) static int bnxt_hwrm_ver_get(struct bnxt *bp) { struct hwrm_ver_get_output *resp = bp->hwrm_cmd_resp_addr; + u16 fw_maj, fw_min, fw_bld, fw_rsv; u32 dev_caps_cfg, hwrm_ver; - int rc; + int rc, len; bp->hwrm_max_req_len = HWRM_MAX_REQ_LEN; mutex_lock(&bp->hwrm_cmd_lock); @@ -7273,9 +7274,22 @@ static int bnxt_hwrm_ver_get(struct bnxt *bp) resp->hwrm_intf_maj_8b, resp->hwrm_intf_min_8b, resp->hwrm_intf_upd_8b); - snprintf(bp->fw_ver_str, BC_HWRM_STR_LEN, "%d.%d.%d.%d", - resp->hwrm_fw_maj_8b, resp->hwrm_fw_min_8b, - resp->hwrm_fw_bld_8b, resp->hwrm_fw_rsvd_8b); + fw_maj = le16_to_cpu(resp->hwrm_fw_major); + if (bp->hwrm_spec_code > 0x10803 && fw_maj) { + fw_min = le16_to_cpu(resp->hwrm_fw_minor); + fw_bld = le16_to_cpu(resp->hwrm_fw_build); + fw_rsv = le16_to_cpu(resp->hwrm_fw_patch); + len = FW_VER_STR_LEN; + } else { + fw_maj = resp->hwrm_fw_maj_8b; + fw_min = resp->hwrm_fw_min_8b; + fw_bld = resp->hwrm_fw_bld_8b; + fw_rsv = resp->hwrm_fw_rsvd_8b; + len = BC_HWRM_STR_LEN; + } + bp->fw_ver_code = BNXT_FW_VER_CODE(fw_maj, fw_min, fw_bld, fw_rsv); + snprintf(bp->fw_ver_str, len, "%d.%d.%d.%d", fw_maj, fw_min, fw_bld, + fw_rsv); if (strlen(resp->active_pkg_name)) { int fw_ver_len = strlen(bp->fw_ver_str); diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h index 9e173d74b72a..858440e1dcdc 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h @@ -1746,6 +1746,10 @@ struct bnxt { #define PHY_VER_STR_LEN (FW_VER_STR_LEN - BC_HWRM_STR_LEN) char fw_ver_str[FW_VER_STR_LEN]; char hwrm_ver_supp[FW_VER_STR_LEN]; + u64 fw_ver_code; +#define BNXT_FW_VER_CODE(maj, min, bld, rsv) \ + ((u64)(maj) << 48 | (u64)(min) << 32 | (u64)(bld) << 16 | (rsv)) + __be16 vxlan_port; u8 vxlan_port_cnt; __le16 vxlan_fw_dst_port_id; From fed7edd18143c68c63ea049999a7e861123de6de Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Tue, 23 Jun 2020 19:01:36 -0400 Subject: [PATCH 411/462] bnxt_en: Do not enable legacy TX push on older firmware. Older firmware may not support legacy TX push properly and may not be disabling it. So we check certain firmware versions that may have this problem and disable legacy TX push unconditionally. Fixes: c0c050c58d84 ("bnxt_en: New Broadcom ethernet driver.") Reviewed-by: Edwin Peer Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 3 ++- drivers/net/ethernet/broadcom/bnxt/bnxt.h | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 0ad8d490975e..f8c50b1d2331 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -6976,7 +6976,8 @@ static int __bnxt_hwrm_func_qcaps(struct bnxt *bp) bp->fw_cap |= BNXT_FW_CAP_ERR_RECOVER_RELOAD; bp->tx_push_thresh = 0; - if (flags & FUNC_QCAPS_RESP_FLAGS_PUSH_MODE_SUPPORTED) + if ((flags & FUNC_QCAPS_RESP_FLAGS_PUSH_MODE_SUPPORTED) && + BNXT_FW_MAJ(bp) > 217) bp->tx_push_thresh = BNXT_TX_PUSH_THRESH; hw_resc->max_rsscos_ctxs = le16_to_cpu(resp->max_rsscos_ctx); diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h index 858440e1dcdc..78e2fd63ac3d 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h @@ -1749,6 +1749,7 @@ struct bnxt { u64 fw_ver_code; #define BNXT_FW_VER_CODE(maj, min, bld, rsv) \ ((u64)(maj) << 48 | (u64)(min) << 32 | (u64)(bld) << 16 | (rsv)) +#define BNXT_FW_MAJ(bp) ((bp)->fw_ver_code >> 48) __be16 vxlan_port; u8 vxlan_port_cnt; From c2dec363feb41544a76c8083aca2378990e17166 Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Tue, 23 Jun 2020 19:01:37 -0400 Subject: [PATCH 412/462] bnxt_en: Fix statistics counters issue during ifdown with older firmware. On older firmware, the hardware statistics are not cleared when the driver frees the hardware stats contexts during ifdown. The driver expects these stats to be cleared and saves a copy before freeing the stats contexts. During the next ifup, the driver will likely allocate the same hardware stats contexts and this will cause a big increase in the counters as the old counters are added back to the saved counters. We fix it by making an additional firmware call to clear the counters before freeing the hw stats contexts when the firmware is the older 20.x firmware. Fixes: b8875ca356f1 ("bnxt_en: Save ring statistics before reset.") Reported-by: Jakub Kicinski Reviewed-by: Vasundhara Volam Signed-off-by: Michael Chan Tested-by: Jakub Kicinski Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index f8c50b1d2331..6dc7cc4df9e8 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -6292,6 +6292,7 @@ int bnxt_hwrm_set_coal(struct bnxt *bp) static void bnxt_hwrm_stat_ctx_free(struct bnxt *bp) { + struct hwrm_stat_ctx_clr_stats_input req0 = {0}; struct hwrm_stat_ctx_free_input req = {0}; int i; @@ -6301,6 +6302,7 @@ static void bnxt_hwrm_stat_ctx_free(struct bnxt *bp) if (BNXT_CHIP_TYPE_NITRO_A0(bp)) return; + bnxt_hwrm_cmd_hdr_init(bp, &req0, HWRM_STAT_CTX_CLR_STATS, -1, -1); bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_STAT_CTX_FREE, -1, -1); mutex_lock(&bp->hwrm_cmd_lock); @@ -6310,7 +6312,11 @@ static void bnxt_hwrm_stat_ctx_free(struct bnxt *bp) if (cpr->hw_stats_ctx_id != INVALID_STATS_CTX_ID) { req.stat_ctx_id = cpu_to_le32(cpr->hw_stats_ctx_id); - + if (BNXT_FW_MAJ(bp) <= 20) { + req0.stat_ctx_id = req.stat_ctx_id; + _hwrm_send_message(bp, &req0, sizeof(req0), + HWRM_CMD_TIMEOUT); + } _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT); From c55e28a8b43fcd7dc71868bd165705bc7741a7ca Mon Sep 17 00:00:00 2001 From: Vasundhara Volam Date: Tue, 23 Jun 2020 19:01:38 -0400 Subject: [PATCH 413/462] bnxt_en: Read VPD info only for PFs Virtual functions does not have VPD information. This patch modifies calling bnxt_read_vpd_info() only for PFs and avoids an unnecessary error log. Fixes: a0d0fd70fed5 ("bnxt_en: Read partno and serialno of the board from VPD") Signed-off-by: Vasundhara Volam Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 6dc7cc4df9e8..6a884df44612 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -11913,7 +11913,8 @@ static int bnxt_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) dev->ethtool_ops = &bnxt_ethtool_ops; pci_set_drvdata(pdev, dev); - bnxt_vpd_read_info(bp); + if (BNXT_PF(bp)) + bnxt_vpd_read_info(bp); rc = bnxt_alloc_hwrm_resources(bp); if (rc) From 4b973f49830d74de36b5f4a75c07f6658524d2e4 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Wed, 24 Jun 2020 03:25:45 +0200 Subject: [PATCH 414/462] net: ethtool: Handle missing cable test TDR parameters A last minute change put the TDR cable test parameters into a nest. The validation is not sufficient, resulting in an oops if the nest is missing. Set default values first, then update them if the nest is provided. Fixes: f2bc8ad31a7f ("net: ethtool: Allow PHY cable test TDR data to configured") Signed-off-by: Andrew Lunn Signed-off-by: David S. Miller --- net/ethtool/cabletest.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/net/ethtool/cabletest.c b/net/ethtool/cabletest.c index 7b7a0456c15c..7194956aa09e 100644 --- a/net/ethtool/cabletest.c +++ b/net/ethtool/cabletest.c @@ -234,6 +234,14 @@ static int ethnl_act_cable_test_tdr_cfg(const struct nlattr *nest, struct nlattr *tb[ETHTOOL_A_CABLE_TEST_TDR_CFG_MAX + 1]; int ret; + cfg->first = 100; + cfg->step = 100; + cfg->last = MAX_CABLE_LENGTH_CM; + cfg->pair = PHY_PAIR_ALL; + + if (!nest) + return 0; + ret = nla_parse_nested(tb, ETHTOOL_A_CABLE_TEST_TDR_CFG_MAX, nest, cable_test_tdr_act_cfg_policy, info->extack); if (ret < 0) @@ -242,17 +250,12 @@ static int ethnl_act_cable_test_tdr_cfg(const struct nlattr *nest, if (tb[ETHTOOL_A_CABLE_TEST_TDR_CFG_FIRST]) cfg->first = nla_get_u32( tb[ETHTOOL_A_CABLE_TEST_TDR_CFG_FIRST]); - else - cfg->first = 100; + if (tb[ETHTOOL_A_CABLE_TEST_TDR_CFG_LAST]) cfg->last = nla_get_u32(tb[ETHTOOL_A_CABLE_TEST_TDR_CFG_LAST]); - else - cfg->last = MAX_CABLE_LENGTH_CM; if (tb[ETHTOOL_A_CABLE_TEST_TDR_CFG_STEP]) cfg->step = nla_get_u32(tb[ETHTOOL_A_CABLE_TEST_TDR_CFG_STEP]); - else - cfg->step = 100; if (tb[ETHTOOL_A_CABLE_TEST_TDR_CFG_PAIR]) { cfg->pair = nla_get_u8(tb[ETHTOOL_A_CABLE_TEST_TDR_CFG_PAIR]); @@ -263,8 +266,6 @@ static int ethnl_act_cable_test_tdr_cfg(const struct nlattr *nest, "invalid pair parameter"); return -EINVAL; } - } else { - cfg->pair = PHY_PAIR_ALL; } if (cfg->first > MAX_CABLE_LENGTH_CM) { From 41b14fb8724d5a4b382a63cb4a1a61880347ccb8 Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Mon, 22 Jun 2020 23:26:04 +0300 Subject: [PATCH 415/462] net: Do not clear the sock TX queue in sk_set_socket() Clearing the sock TX queue in sk_set_socket() might cause unexpected out-of-order transmit when called from sock_orphan(), as outstanding packets can pick a different TX queue and bypass the ones already queued. This is undesired in general. More specifically, it breaks the in-order scheduling property guarantee for device-offloaded TLS sockets. Remove the call to sk_tx_queue_clear() in sk_set_socket(), and add it explicitly only where needed. Fixes: e022f0b4a03f ("net: Introduce sk_tx_queue_mapping") Signed-off-by: Tariq Toukan Reviewed-by: Boris Pismenny Signed-off-by: David S. Miller --- include/net/sock.h | 1 - net/core/sock.c | 2 ++ 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/include/net/sock.h b/include/net/sock.h index c53cc42b5ab9..3428619faae4 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1848,7 +1848,6 @@ static inline int sk_rx_queue_get(const struct sock *sk) static inline void sk_set_socket(struct sock *sk, struct socket *sock) { - sk_tx_queue_clear(sk); sk->sk_socket = sock; } diff --git a/net/core/sock.c b/net/core/sock.c index 94391da27754..d832c650287c 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1767,6 +1767,7 @@ struct sock *sk_alloc(struct net *net, int family, gfp_t priority, cgroup_sk_alloc(&sk->sk_cgrp_data); sock_update_classid(&sk->sk_cgrp_data); sock_update_netprioidx(&sk->sk_cgrp_data); + sk_tx_queue_clear(sk); } return sk; @@ -1990,6 +1991,7 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority) */ sk_refcnt_debug_inc(newsk); sk_set_socket(newsk, NULL); + sk_tx_queue_clear(newsk); RCU_INIT_POINTER(newsk->sk_wq, NULL); if (newsk->sk_prot->sockets_allocated) From c718af2d00a37587b09e5958d142da7569f3d55b Mon Sep 17 00:00:00 2001 From: Russell King Date: Tue, 23 Jun 2020 17:47:23 +0100 Subject: [PATCH 416/462] net: phylink: fix ethtool -A with attached PHYs Fix a phylink's ethtool set_pauseparam support deadlock caused by phylib interacting with phylink: we must not hold the state lock while calling phylib functions that may call into phylink_phy_change(). Fixes: f904f15ea9b5 ("net: phylink: allow ethtool -A to change flow control advertisement") Signed-off-by: Russell King Signed-off-by: David S. Miller --- drivers/net/phy/phylink.c | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c index 0ab65fb75258..453f9a399bb1 100644 --- a/drivers/net/phy/phylink.c +++ b/drivers/net/phy/phylink.c @@ -1502,18 +1502,20 @@ int phylink_ethtool_set_pauseparam(struct phylink *pl, linkmode_set_pause(config->advertising, pause->tx_pause, pause->rx_pause); - /* If we have a PHY, phylib will call our link state function if the - * mode has changed, which will trigger a resolve and update the MAC - * configuration. + if (!pl->phydev && !test_bit(PHYLINK_DISABLE_STOPPED, + &pl->phylink_disable_state)) + phylink_pcs_config(pl, true, &pl->link_config); + + mutex_unlock(&pl->state_mutex); + + /* If we have a PHY, a change of the pause frame advertisement will + * cause phylib to renegotiate (if AN is enabled) which will in turn + * call our phylink_phy_change() and trigger a resolve. Note that + * we can't hold our state mutex while calling phy_set_asym_pause(). */ - if (pl->phydev) { + if (pl->phydev) phy_set_asym_pause(pl->phydev, pause->rx_pause, pause->tx_pause); - } else if (!test_bit(PHYLINK_DISABLE_STOPPED, - &pl->phylink_disable_state)) { - phylink_pcs_config(pl, true, &pl->link_config); - } - mutex_unlock(&pl->state_mutex); return 0; } From 2e919bc446faee429ac862a6cdb5e40017051f6b Mon Sep 17 00:00:00 2001 From: Russell King Date: Tue, 23 Jun 2020 17:47:29 +0100 Subject: [PATCH 417/462] net: phylink: ensure manual pause mode configuration takes effect We have been relying on link events and mac_config() when the manual pause modes are changed. With recent developments, such as moving the programming of link state to mac_link_up(), this no longer works. To ensure that we update the MAC, we must generate a link-down followed by a link-up event; we can do that by setting mac_link_dropped and triggering a resolve. Fixes: 91a208f2185a ("net: phylink: propagate resolved link config via mac_link_up()") Signed-off-by: Russell King Signed-off-by: David S. Miller --- drivers/net/phy/phylink.c | 27 ++++++++++++++++++++++----- 1 file changed, 22 insertions(+), 5 deletions(-) diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c index 453f9a399bb1..3b7c70e6c5dd 100644 --- a/drivers/net/phy/phylink.c +++ b/drivers/net/phy/phylink.c @@ -1463,6 +1463,8 @@ int phylink_ethtool_set_pauseparam(struct phylink *pl, struct ethtool_pauseparam *pause) { struct phylink_link_state *config = &pl->link_config; + bool manual_changed; + int pause_state; ASSERT_RTNL(); @@ -1477,15 +1479,15 @@ int phylink_ethtool_set_pauseparam(struct phylink *pl, !pause->autoneg && pause->rx_pause != pause->tx_pause) return -EINVAL; - mutex_lock(&pl->state_mutex); - config->pause = 0; + pause_state = 0; if (pause->autoneg) - config->pause |= MLO_PAUSE_AN; + pause_state |= MLO_PAUSE_AN; if (pause->rx_pause) - config->pause |= MLO_PAUSE_RX; + pause_state |= MLO_PAUSE_RX; if (pause->tx_pause) - config->pause |= MLO_PAUSE_TX; + pause_state |= MLO_PAUSE_TX; + mutex_lock(&pl->state_mutex); /* * See the comments for linkmode_set_pause(), wrt the deficiencies * with the current implementation. A solution to this issue would @@ -1502,6 +1504,12 @@ int phylink_ethtool_set_pauseparam(struct phylink *pl, linkmode_set_pause(config->advertising, pause->tx_pause, pause->rx_pause); + manual_changed = (config->pause ^ pause_state) & MLO_PAUSE_AN || + (!(pause_state & MLO_PAUSE_AN) && + (config->pause ^ pause_state) & MLO_PAUSE_TXRX_MASK); + + config->pause = pause_state; + if (!pl->phydev && !test_bit(PHYLINK_DISABLE_STOPPED, &pl->phylink_disable_state)) phylink_pcs_config(pl, true, &pl->link_config); @@ -1517,6 +1525,15 @@ int phylink_ethtool_set_pauseparam(struct phylink *pl, phy_set_asym_pause(pl->phydev, pause->rx_pause, pause->tx_pause); + /* If the manual pause settings changed, make sure we trigger a + * resolve to update their state; we can not guarantee that the + * link will cycle. + */ + if (manual_changed) { + pl->mac_link_dropped = true; + phylink_run_resolve(pl); + } + return 0; } EXPORT_SYMBOL_GPL(phylink_ethtool_set_pauseparam); From 220345e98f1cdc768eeb6e3364a0fa7ab9647fe7 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 24 Jun 2020 14:23:40 +0200 Subject: [PATCH 418/462] ALSA: usb-audio: Fix OOB access of mixer element list The USB-audio mixer code holds a linked list of usb_mixer_elem_list, and several operations are performed for each mixer element. A few of them (snd_usb_mixer_notify_id() and snd_usb_mixer_interrupt_v2()) assume each mixer element being a usb_mixer_elem_info object that is a subclass of usb_mixer_elem_list, cast via container_of() and access it members. This may result in an out-of-bound access when a non-standard list element has been added, as spotted by syzkaller recently. This patch adds a new field, is_std_info, in usb_mixer_elem_list to indicate that the element is the usb_mixer_elem_info type or not, and skip the access to such an element if needed. Reported-by: syzbot+fb14314433463ad51625@syzkaller.appspotmail.com Reported-by: syzbot+2405ca3401e943c538b5@syzkaller.appspotmail.com Cc: Link: https://lore.kernel.org/r/20200624122340.9615-1-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/usb/mixer.c | 15 +++++++++++---- sound/usb/mixer.h | 9 +++++++-- sound/usb/mixer_quirks.c | 3 ++- 3 files changed, 20 insertions(+), 7 deletions(-) diff --git a/sound/usb/mixer.c b/sound/usb/mixer.c index 15769f266790..eab0fd4fd7c3 100644 --- a/sound/usb/mixer.c +++ b/sound/usb/mixer.c @@ -581,8 +581,9 @@ static int check_matrix_bitmap(unsigned char *bmap, * if failed, give up and free the control instance. */ -int snd_usb_mixer_add_control(struct usb_mixer_elem_list *list, - struct snd_kcontrol *kctl) +int snd_usb_mixer_add_list(struct usb_mixer_elem_list *list, + struct snd_kcontrol *kctl, + bool is_std_info) { struct usb_mixer_interface *mixer = list->mixer; int err; @@ -596,6 +597,7 @@ int snd_usb_mixer_add_control(struct usb_mixer_elem_list *list, return err; } list->kctl = kctl; + list->is_std_info = is_std_info; list->next_id_elem = mixer->id_elems[list->id]; mixer->id_elems[list->id] = list; return 0; @@ -3234,8 +3236,11 @@ void snd_usb_mixer_notify_id(struct usb_mixer_interface *mixer, int unitid) unitid = delegate_notify(mixer, unitid, NULL, NULL); for_each_mixer_elem(list, mixer, unitid) { - struct usb_mixer_elem_info *info = - mixer_elem_list_to_info(list); + struct usb_mixer_elem_info *info; + + if (!list->is_std_info) + continue; + info = mixer_elem_list_to_info(list); /* invalidate cache, so the value is read from the device */ info->cached = 0; snd_ctl_notify(mixer->chip->card, SNDRV_CTL_EVENT_MASK_VALUE, @@ -3315,6 +3320,8 @@ static void snd_usb_mixer_interrupt_v2(struct usb_mixer_interface *mixer, if (!list->kctl) continue; + if (!list->is_std_info) + continue; info = mixer_elem_list_to_info(list); if (count > 1 && info->control != control) diff --git a/sound/usb/mixer.h b/sound/usb/mixer.h index 41ec9dc4139b..c29e27ac43a7 100644 --- a/sound/usb/mixer.h +++ b/sound/usb/mixer.h @@ -66,6 +66,7 @@ struct usb_mixer_elem_list { struct usb_mixer_elem_list *next_id_elem; /* list of controls with same id */ struct snd_kcontrol *kctl; unsigned int id; + bool is_std_info; usb_mixer_elem_dump_func_t dump; usb_mixer_elem_resume_func_t resume; }; @@ -103,8 +104,12 @@ void snd_usb_mixer_notify_id(struct usb_mixer_interface *mixer, int unitid); int snd_usb_mixer_set_ctl_value(struct usb_mixer_elem_info *cval, int request, int validx, int value_set); -int snd_usb_mixer_add_control(struct usb_mixer_elem_list *list, - struct snd_kcontrol *kctl); +int snd_usb_mixer_add_list(struct usb_mixer_elem_list *list, + struct snd_kcontrol *kctl, + bool is_std_info); + +#define snd_usb_mixer_add_control(list, kctl) \ + snd_usb_mixer_add_list(list, kctl, true) void snd_usb_mixer_elem_init_std(struct usb_mixer_elem_list *list, struct usb_mixer_interface *mixer, diff --git a/sound/usb/mixer_quirks.c b/sound/usb/mixer_quirks.c index b6bcf2f92383..cec1cfd7edb7 100644 --- a/sound/usb/mixer_quirks.c +++ b/sound/usb/mixer_quirks.c @@ -158,7 +158,8 @@ static int add_single_ctl_with_resume(struct usb_mixer_interface *mixer, return -ENOMEM; } kctl->private_free = snd_usb_mixer_elem_free; - return snd_usb_mixer_add_control(list, kctl); + /* don't use snd_usb_mixer_add_control() here, this is a special list element */ + return snd_usb_mixer_add_list(list, kctl, false); } /* From b46925a24a9ca7db03655657565e03d2de3027c8 Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Tue, 23 Jun 2020 16:32:24 -0400 Subject: [PATCH 419/462] IB/hfi1: Restore kfree in dummy_netdev cleanup We need to do some rework on the dummy netdev. Calling the free_netdev() would normally make sense, and that will be addressed in an upcoming patch. For now just revert the behavior to what it was before keeping the unused variable removal part of the patch. The dd->dumm_netdev is mainly used for packet receiving through alloc_netdev_mqs() for typical net devices. A a result, it should be freed with kfree instead of free_netdev() that leads to a crash when unloading the hfi1 module: BUG: kernel NULL pointer dereference, address: 0000000000000000 #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page PGD 8000000855b54067 P4D 8000000855b54067 PUD 84a4f5067 PMD 0 Oops: 0000 [#1] SMP PTI CPU: 73 PID: 10299 Comm: modprobe Not tainted 5.6.0-rc5+ #1 Hardware name: Intel Corporation S2600WT2R/S2600WT2R, BIOS SE5C610.86B.01.01.0016.033120161139 03/31/2016 RIP: 0010:__hw_addr_flush+0x12/0x80 Code: 40 00 48 83 c4 08 4c 89 e7 5b 5d 41 5c e9 76 77 18 00 66 0f 1f 44 00 00 0f 1f 44 00 00 41 54 49 89 fc 55 53 48 8b 1f 48 39 df <48> 8b 2b 75 08 eb 4a 48 89 eb 48 89 c5 48 89 df e8 99 bf d0 ff 84 RSP: 0018:ffffb40e08783db8 EFLAGS: 00010282 RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000002 RDX: ffffb40e00000000 RSI: 0000000000000246 RDI: ffff88ab13662298 RBP: ffff88ab13662000 R08: 0000000000001549 R09: 0000000000001549 R10: 0000000000000001 R11: 0000000000aaaaaa R12: ffff88ab13662298 R13: ffff88ab1b259e20 R14: ffff88ab1b259e42 R15: 0000000000000000 FS: 00007fb39b534740(0000) GS:ffff88b31f940000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000000 CR3: 000000084d3ea004 CR4: 00000000003606e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: dev_addr_flush+0x15/0x30 free_netdev+0x7e/0x130 hfi1_netdev_free+0x59/0x70 [hfi1] remove_one+0x65/0x110 [hfi1] pci_device_remove+0x3b/0xc0 device_release_driver_internal+0xec/0x1b0 driver_detach+0x46/0x90 bus_remove_driver+0x58/0xd0 pci_unregister_driver+0x26/0xa0 hfi1_mod_cleanup+0xc/0xd54 [hfi1] __x64_sys_delete_module+0x16c/0x260 ? exit_to_usermode_loop+0xa4/0xc0 do_syscall_64+0x5b/0x200 entry_SYSCALL_64_after_hwframe+0x44/0xa9 Fixes: 193ba03141bb ("IB/hfi1: Use free_netdev() in hfi1_netdev_free()") Link: https://lore.kernel.org/r/20200623203224.106975.16926.stgit@awfm-01.aw.intel.com Reviewed-by: Mike Marciniszyn Signed-off-by: Kaike Wan Signed-off-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hfi1/netdev_rx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/hfi1/netdev_rx.c b/drivers/infiniband/hw/hfi1/netdev_rx.c index 63688e85e8da..6d263c9749b3 100644 --- a/drivers/infiniband/hw/hfi1/netdev_rx.c +++ b/drivers/infiniband/hw/hfi1/netdev_rx.c @@ -373,7 +373,7 @@ void hfi1_netdev_free(struct hfi1_devdata *dd) { if (dd->dummy_netdev) { dd_dev_info(dd, "hfi1 netdev freed\n"); - free_netdev(dd->dummy_netdev); + kfree(dd->dummy_netdev); dd->dummy_netdev = NULL; } } From 822fbd37410639acdae368ea55477ddd3498651d Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Tue, 23 Jun 2020 16:32:30 -0400 Subject: [PATCH 420/462] IB/hfi1: Fix module use count flaw due to leftover module put calls When the try_module_get calls were removed from opening and closing of the i2c debugfs file, the corresponding module_put calls were missed. This results in an inaccurate module use count that requires a power cycle to fix. Fixes: 09fbca8e6240 ("IB/hfi1: No need to use try_module_get for debugfs") Link: https://lore.kernel.org/r/20200623203230.106975.76240.stgit@awfm-01.aw.intel.com Cc: Reviewed-by: Kaike Wan Reviewed-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hfi1/debugfs.c | 19 ++----------------- 1 file changed, 2 insertions(+), 17 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/debugfs.c b/drivers/infiniband/hw/hfi1/debugfs.c index 4633a0ce1a8c..2ced236e1553 100644 --- a/drivers/infiniband/hw/hfi1/debugfs.c +++ b/drivers/infiniband/hw/hfi1/debugfs.c @@ -985,15 +985,10 @@ static ssize_t qsfp2_debugfs_read(struct file *file, char __user *buf, static int __i2c_debugfs_open(struct inode *in, struct file *fp, u32 target) { struct hfi1_pportdata *ppd; - int ret; ppd = private2ppd(fp); - ret = acquire_chip_resource(ppd->dd, i2c_target(target), 0); - if (ret) /* failed - release the module */ - module_put(THIS_MODULE); - - return ret; + return acquire_chip_resource(ppd->dd, i2c_target(target), 0); } static int i2c1_debugfs_open(struct inode *in, struct file *fp) @@ -1013,7 +1008,6 @@ static int __i2c_debugfs_release(struct inode *in, struct file *fp, u32 target) ppd = private2ppd(fp); release_chip_resource(ppd->dd, i2c_target(target)); - module_put(THIS_MODULE); return 0; } @@ -1031,18 +1025,10 @@ static int i2c2_debugfs_release(struct inode *in, struct file *fp) static int __qsfp_debugfs_open(struct inode *in, struct file *fp, u32 target) { struct hfi1_pportdata *ppd; - int ret; - - if (!try_module_get(THIS_MODULE)) - return -ENODEV; ppd = private2ppd(fp); - ret = acquire_chip_resource(ppd->dd, i2c_target(target), 0); - if (ret) /* failed - release the module */ - module_put(THIS_MODULE); - - return ret; + return acquire_chip_resource(ppd->dd, i2c_target(target), 0); } static int qsfp1_debugfs_open(struct inode *in, struct file *fp) @@ -1062,7 +1048,6 @@ static int __qsfp_debugfs_release(struct inode *in, struct file *fp, u32 target) ppd = private2ppd(fp); release_chip_resource(ppd->dd, i2c_target(target)); - module_put(THIS_MODULE); return 0; } From 82172b765530f84b4b9da929f2dcf46f2b7b232b Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Tue, 23 Jun 2020 16:43:22 -0400 Subject: [PATCH 421/462] IB/hfi1: Correct -EBUSY handling in tx code The current code mishandles -EBUSY in two ways: - The flow change doesn't test the return from the flush and runs on to process the current packet racing with the wakeup processing - The -EBUSY handling for a single packet inserts the tx into the txlist after the submit call, racing with the same wakeup processing Fix the first by dropping the skb and returning NETDEV_TX_OK. Fix the second by insuring the the list entry within the txreq is inited when allocated. This enables the sleep routine to detect that the txreq has used the non-list api and queue the packet to the txlist. Both flaws can lead to having the flushing thread executing in causing two threads to manipulate the txlist. Fixes: d99dc602e2a5 ("IB/hfi1: Add functions to transmit datagram ipoib packets") Link: https://lore.kernel.org/r/20200623204321.108092.83898.stgit@awfm-01.aw.intel.com Reviewed-by: Kaike Wan Signed-off-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hfi1/ipoib_tx.c | 33 ++++++++++++++------------- 1 file changed, 17 insertions(+), 16 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/ipoib_tx.c b/drivers/infiniband/hw/hfi1/ipoib_tx.c index 175290c56db9..76dcb5624aa6 100644 --- a/drivers/infiniband/hw/hfi1/ipoib_tx.c +++ b/drivers/infiniband/hw/hfi1/ipoib_tx.c @@ -369,6 +369,7 @@ static struct ipoib_txreq *hfi1_ipoib_send_dma_common(struct net_device *dev, tx->priv = priv; tx->txq = txp->txq; tx->skb = skb; + INIT_LIST_HEAD(&tx->txreq.list); hfi1_ipoib_build_ib_tx_headers(tx, txp); @@ -469,6 +470,7 @@ static int hfi1_ipoib_send_dma_single(struct net_device *dev, ret = hfi1_ipoib_submit_tx(txq, tx); if (likely(!ret)) { +tx_ok: trace_sdma_output_ibhdr(tx->priv->dd, &tx->sdma_hdr.hdr, ib_is_sc5(txp->flow.sc5)); @@ -478,20 +480,8 @@ static int hfi1_ipoib_send_dma_single(struct net_device *dev, txq->pkts_sent = false; - if (ret == -EBUSY) { - list_add_tail(&tx->txreq.list, &txq->tx_list); - - trace_sdma_output_ibhdr(tx->priv->dd, - &tx->sdma_hdr.hdr, - ib_is_sc5(txp->flow.sc5)); - hfi1_ipoib_check_queue_depth(txq); - return NETDEV_TX_OK; - } - - if (ret == -ECOMM) { - hfi1_ipoib_check_queue_depth(txq); - return NETDEV_TX_OK; - } + if (ret == -EBUSY || ret == -ECOMM) + goto tx_ok; sdma_txclean(priv->dd, &tx->txreq); dev_kfree_skb_any(skb); @@ -509,9 +499,17 @@ static int hfi1_ipoib_send_dma_list(struct net_device *dev, struct ipoib_txreq *tx; /* Has the flow change ? */ - if (txq->flow.as_int != txp->flow.as_int) - (void)hfi1_ipoib_flush_tx_list(dev, txq); + if (txq->flow.as_int != txp->flow.as_int) { + int ret; + ret = hfi1_ipoib_flush_tx_list(dev, txq); + if (unlikely(ret)) { + if (ret == -EBUSY) + ++dev->stats.tx_dropped; + dev_kfree_skb_any(skb); + return NETDEV_TX_OK; + } + } tx = hfi1_ipoib_send_dma_common(dev, skb, txp); if (IS_ERR(tx)) { int ret = PTR_ERR(tx); @@ -612,6 +610,9 @@ static int hfi1_ipoib_sdma_sleep(struct sdma_engine *sde, netif_stop_subqueue(txq->priv->netdev, txq->q_idx); + if (list_empty(&txreq->list)) + /* came from non-list submit */ + list_add_tail(&txreq->list, &txq->tx_list); if (list_empty(&txq->wait.list)) iowait_queue(pkts_sent, wait->iow, &sde->dmawait); From 38fd98afeeb79d3b148db49f81f2ec6a37a4ee00 Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Tue, 23 Jun 2020 16:43:28 -0400 Subject: [PATCH 422/462] IB/hfi1: Add atomic triggered sleep/wakeup When running iperf in a two host configuration the following trace can occur: [ 319.728730] NETDEV WATCHDOG: ib0 (hfi1): transmit queue 0 timed out The issue happens because the current implementation relies on the netif txq being stopped to control the flushing of the tx list. There are two resources that the transmit logic can wait on and stop the txq: - SDMA descriptors - Ring space to hold completions The ring space is tested on the sending side and relieved when the ring is consumed in the napi tx reaping. Unfortunately, that reaping can run conncurrently with the workqueue flushing of the txlist. If the txq is started just before the workitem executes, the txlist will never be flushed, leading to the txq being stuck. Fix by: - Adding sleep/wakeup wrappers * Use an atomic to control the call to the netif routines inside the wrappers - Use another atomic to record ring space exhaustion * Only wakeup when the a ring space exhaustion has happened and it relieved Add additional wrappers to clarify the ring space resource handling. Fixes: d99dc602e2a5 ("IB/hfi1: Add functions to transmit datagram ipoib packets") Link: https://lore.kernel.org/r/20200623204327.108092.4024.stgit@awfm-01.aw.intel.com Reviewed-by: Kaike Wan Signed-off-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hfi1/ipoib.h | 6 +++ drivers/infiniband/hw/hfi1/ipoib_tx.c | 67 +++++++++++++++++++-------- 2 files changed, 53 insertions(+), 20 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/ipoib.h b/drivers/infiniband/hw/hfi1/ipoib.h index 185c9b02c974..b8c9d0a003fb 100644 --- a/drivers/infiniband/hw/hfi1/ipoib.h +++ b/drivers/infiniband/hw/hfi1/ipoib.h @@ -67,6 +67,9 @@ struct hfi1_ipoib_circ_buf { * @sde: sdma engine * @tx_list: tx request list * @sent_txreqs: count of txreqs posted to sdma + * @stops: count of stops of queue + * @ring_full: ring has been filled + * @no_desc: descriptor shortage seen * @flow: tracks when list needs to be flushed for a flow change * @q_idx: ipoib Tx queue index * @pkts_sent: indicator packets have been sent from this queue @@ -80,6 +83,9 @@ struct hfi1_ipoib_txq { struct sdma_engine *sde; struct list_head tx_list; u64 sent_txreqs; + atomic_t stops; + atomic_t ring_full; + atomic_t no_desc; union hfi1_ipoib_flow flow; u8 q_idx; bool pkts_sent; diff --git a/drivers/infiniband/hw/hfi1/ipoib_tx.c b/drivers/infiniband/hw/hfi1/ipoib_tx.c index 76dcb5624aa6..9df292b51a05 100644 --- a/drivers/infiniband/hw/hfi1/ipoib_tx.c +++ b/drivers/infiniband/hw/hfi1/ipoib_tx.c @@ -55,23 +55,48 @@ static u64 hfi1_ipoib_txreqs(const u64 sent, const u64 completed) return sent - completed; } +static u64 hfi1_ipoib_used(struct hfi1_ipoib_txq *txq) +{ + return hfi1_ipoib_txreqs(txq->sent_txreqs, + atomic64_read(&txq->complete_txreqs)); +} + +static void hfi1_ipoib_stop_txq(struct hfi1_ipoib_txq *txq) +{ + if (atomic_inc_return(&txq->stops) == 1) + netif_stop_subqueue(txq->priv->netdev, txq->q_idx); +} + +static void hfi1_ipoib_wake_txq(struct hfi1_ipoib_txq *txq) +{ + if (atomic_dec_and_test(&txq->stops)) + netif_wake_subqueue(txq->priv->netdev, txq->q_idx); +} + +static uint hfi1_ipoib_ring_hwat(struct hfi1_ipoib_txq *txq) +{ + return min_t(uint, txq->priv->netdev->tx_queue_len, + txq->tx_ring.max_items - 1); +} + +static uint hfi1_ipoib_ring_lwat(struct hfi1_ipoib_txq *txq) +{ + return min_t(uint, txq->priv->netdev->tx_queue_len, + txq->tx_ring.max_items) >> 1; +} + static void hfi1_ipoib_check_queue_depth(struct hfi1_ipoib_txq *txq) { - if (unlikely(hfi1_ipoib_txreqs(++txq->sent_txreqs, - atomic64_read(&txq->complete_txreqs)) >= - min_t(unsigned int, txq->priv->netdev->tx_queue_len, - txq->tx_ring.max_items - 1))) - netif_stop_subqueue(txq->priv->netdev, txq->q_idx); + ++txq->sent_txreqs; + if (hfi1_ipoib_used(txq) >= hfi1_ipoib_ring_hwat(txq) && + !atomic_xchg(&txq->ring_full, 1)) + hfi1_ipoib_stop_txq(txq); } static void hfi1_ipoib_check_queue_stopped(struct hfi1_ipoib_txq *txq) { struct net_device *dev = txq->priv->netdev; - /* If the queue is already running just return */ - if (likely(!__netif_subqueue_stopped(dev, txq->q_idx))) - return; - /* If shutting down just return as queue state is irrelevant */ if (unlikely(dev->reg_state != NETREG_REGISTERED)) return; @@ -86,11 +111,9 @@ static void hfi1_ipoib_check_queue_stopped(struct hfi1_ipoib_txq *txq) * Use the minimum of the current tx_queue_len or the rings max txreqs * to protect against ring overflow. */ - if (hfi1_ipoib_txreqs(txq->sent_txreqs, - atomic64_read(&txq->complete_txreqs)) - < min_t(unsigned int, dev->tx_queue_len, - txq->tx_ring.max_items) >> 1) - netif_wake_subqueue(dev, txq->q_idx); + if (hfi1_ipoib_used(txq) < hfi1_ipoib_ring_lwat(txq) && + atomic_xchg(&txq->ring_full, 0)) + hfi1_ipoib_wake_txq(txq); } static void hfi1_ipoib_free_tx(struct ipoib_txreq *tx, int budget) @@ -608,13 +631,14 @@ static int hfi1_ipoib_sdma_sleep(struct sdma_engine *sde, return -EAGAIN; } - netif_stop_subqueue(txq->priv->netdev, txq->q_idx); - if (list_empty(&txreq->list)) /* came from non-list submit */ list_add_tail(&txreq->list, &txq->tx_list); - if (list_empty(&txq->wait.list)) + if (list_empty(&txq->wait.list)) { + if (!atomic_xchg(&txq->no_desc, 1)) + hfi1_ipoib_stop_txq(txq); iowait_queue(pkts_sent, wait->iow, &sde->dmawait); + } write_sequnlock(&sde->waitlock); return -EBUSY; @@ -649,9 +673,9 @@ static void hfi1_ipoib_flush_txq(struct work_struct *work) struct net_device *dev = txq->priv->netdev; if (likely(dev->reg_state == NETREG_REGISTERED) && - likely(__netif_subqueue_stopped(dev, txq->q_idx)) && likely(!hfi1_ipoib_flush_tx_list(dev, txq))) - netif_wake_subqueue(dev, txq->q_idx); + if (atomic_xchg(&txq->no_desc, 0)) + hfi1_ipoib_wake_txq(txq); } int hfi1_ipoib_txreq_init(struct hfi1_ipoib_dev_priv *priv) @@ -705,6 +729,9 @@ int hfi1_ipoib_txreq_init(struct hfi1_ipoib_dev_priv *priv) txq->sde = NULL; INIT_LIST_HEAD(&txq->tx_list); atomic64_set(&txq->complete_txreqs, 0); + atomic_set(&txq->stops, 0); + atomic_set(&txq->ring_full, 0); + atomic_set(&txq->no_desc, 0); txq->q_idx = i; txq->flow.tx_queue = 0xff; txq->flow.sc5 = 0xff; @@ -770,7 +797,7 @@ static void hfi1_ipoib_drain_tx_list(struct hfi1_ipoib_txq *txq) atomic64_inc(complete_txreqs); } - if (hfi1_ipoib_txreqs(txq->sent_txreqs, atomic64_read(complete_txreqs))) + if (hfi1_ipoib_used(txq)) dd_dev_warn(txq->priv->dd, "txq %d not empty found %llu requests\n", txq->q_idx, From 17843655708e1941c0653af3cd61be6948e36f43 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Tue, 23 Jun 2020 18:33:15 +0200 Subject: [PATCH 423/462] openvswitch: take into account de-fragmentation/gso_size in execute_check_pkt_len ovs connection tracking module performs de-fragmentation on incoming fragmented traffic. Take info account if traffic has been de-fragmented in execute_check_pkt_len action otherwise we will perform the wrong nested action considering the original packet size. This issue typically occurs if ovs-vswitchd adds a rule in the pipeline that requires connection tracking (e.g. OVN stateful ACLs) before execute_check_pkt_len action. Moreover take into account GSO fragment size for GSO packet in execute_check_pkt_len routine Fixes: 4d5ec89fc8d14 ("net: openvswitch: Add a new action check_pkt_len") Signed-off-by: Lorenzo Bianconi Signed-off-by: David S. Miller --- net/openvswitch/actions.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c index fc0efd8833c8..2611657f40ca 100644 --- a/net/openvswitch/actions.c +++ b/net/openvswitch/actions.c @@ -1169,9 +1169,10 @@ static int execute_check_pkt_len(struct datapath *dp, struct sk_buff *skb, struct sw_flow_key *key, const struct nlattr *attr, bool last) { + struct ovs_skb_cb *ovs_cb = OVS_CB(skb); const struct nlattr *actions, *cpl_arg; + int len, max_len, rem = nla_len(attr); const struct check_pkt_len_arg *arg; - int rem = nla_len(attr); bool clone_flow_key; /* The first netlink attribute in 'attr' is always @@ -1180,7 +1181,11 @@ static int execute_check_pkt_len(struct datapath *dp, struct sk_buff *skb, cpl_arg = nla_data(attr); arg = nla_data(cpl_arg); - if (skb->len <= arg->pkt_len) { + len = ovs_cb->mru ? ovs_cb->mru + skb->mac_len : skb->len; + max_len = arg->pkt_len; + + if ((skb_is_gso(skb) && skb_gso_validate_mac_len(skb, max_len)) || + len <= max_len) { /* Second netlink attribute in 'attr' is always * 'OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_LESS_EQUAL'. */ From 1ed9ec9b08addbd8d3e36d5f4a652d8590a6ddb7 Mon Sep 17 00:00:00 2001 From: Daniel Mack Date: Sat, 20 Jun 2020 21:39:25 +0200 Subject: [PATCH 424/462] dsa: Allow forwarding of redirected IGMP traffic The driver for Marvell switches puts all ports in IGMP snooping mode which results in all IGMP/MLD frames that ingress on the ports to be forwarded to the CPU only. The bridge code in the kernel can then interpret these frames and act upon them, for instance by updating the mdb in the switch to reflect multicast memberships of stations connected to the ports. However, the IGMP/MLD frames must then also be forwarded to other ports of the bridge so external IGMP queriers can track membership reports, and external multicast clients can receive query reports from foreign IGMP queriers. Currently, this is impossible as the EDSA tagger sets offload_fwd_mark on the skb when it unwraps the tagged frames, and that will make the switchdev layer prevent the skb from egressing on any other port of the same switch. To fix that, look at the To_CPU code in the DSA header and make forwarding of the frame possible for trapped IGMP packets. Introduce some #defines for the frame types to make the code a bit more comprehensive. This was tested on a Marvell 88E6352 variant. Signed-off-by: Daniel Mack Reviewed-by: Andrew Lunn Tested-by: Andrew Lunn Signed-off-by: David S. Miller --- net/dsa/tag_edsa.c | 37 ++++++++++++++++++++++++++++++++++--- 1 file changed, 34 insertions(+), 3 deletions(-) diff --git a/net/dsa/tag_edsa.c b/net/dsa/tag_edsa.c index e8eaa804ccb9..d6200ff98200 100644 --- a/net/dsa/tag_edsa.c +++ b/net/dsa/tag_edsa.c @@ -13,6 +13,16 @@ #define DSA_HLEN 4 #define EDSA_HLEN 8 +#define FRAME_TYPE_TO_CPU 0x00 +#define FRAME_TYPE_FORWARD 0x03 + +#define TO_CPU_CODE_MGMT_TRAP 0x00 +#define TO_CPU_CODE_FRAME2REG 0x01 +#define TO_CPU_CODE_IGMP_MLD_TRAP 0x02 +#define TO_CPU_CODE_POLICY_TRAP 0x03 +#define TO_CPU_CODE_ARP_MIRROR 0x04 +#define TO_CPU_CODE_POLICY_MIRROR 0x05 + static struct sk_buff *edsa_xmit(struct sk_buff *skb, struct net_device *dev) { struct dsa_port *dp = dsa_slave_to_port(dev); @@ -77,6 +87,8 @@ static struct sk_buff *edsa_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt) { u8 *edsa_header; + int frame_type; + int code; int source_device; int source_port; @@ -91,8 +103,29 @@ static struct sk_buff *edsa_rcv(struct sk_buff *skb, struct net_device *dev, /* * Check that frame type is either TO_CPU or FORWARD. */ - if ((edsa_header[0] & 0xc0) != 0x00 && (edsa_header[0] & 0xc0) != 0xc0) + frame_type = edsa_header[0] >> 6; + + switch (frame_type) { + case FRAME_TYPE_TO_CPU: + code = (edsa_header[1] & 0x6) | ((edsa_header[2] >> 4) & 1); + + /* + * Mark the frame to never egress on any port of the same switch + * unless it's a trapped IGMP/MLD packet, in which case the + * bridge might want to forward it. + */ + if (code != TO_CPU_CODE_IGMP_MLD_TRAP) + skb->offload_fwd_mark = 1; + + break; + + case FRAME_TYPE_FORWARD: + skb->offload_fwd_mark = 1; + break; + + default: return NULL; + } /* * Determine source device and port. @@ -156,8 +189,6 @@ static struct sk_buff *edsa_rcv(struct sk_buff *skb, struct net_device *dev, 2 * ETH_ALEN); } - skb->offload_fwd_mark = 1; - return skb; } From d3d239dcb8aae6d7b10642d292b404e57604f7ea Mon Sep 17 00:00:00 2001 From: Sascha Hauer Date: Wed, 24 Jun 2020 09:00:44 +0200 Subject: [PATCH 425/462] net: ethernet: mvneta: Do not error out in non serdes modes In mvneta_config_interface() the RGMII modes are catched by the default case which is an error return. The RGMII modes are valid modes for the driver, so instead of returning an error add a break statement to return successfully. This avoids this warning for non comphy SoCs which use RGMII, like SolidRun Clearfog: WARNING: CPU: 0 PID: 268 at drivers/net/ethernet/marvell/mvneta.c:3512 mvneta_start_dev+0x220/0x23c Fixes: b4748553f53f ("net: ethernet: mvneta: Fix Serdes configuration for SoCs without comphy") Signed-off-by: Sascha Hauer Reviewed-by: Russell King Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/mvneta.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index af6000172848..c4552f868157 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c @@ -3571,7 +3571,7 @@ static int mvneta_config_interface(struct mvneta_port *pp, MVNETA_HSGMII_SERDES_PROTO); break; default: - return -EINVAL; + break; } } From 41c2b6b4f0f807803bb49f65835d136941a70f85 Mon Sep 17 00:00:00 2001 From: Sascha Hauer Date: Wed, 24 Jun 2020 09:00:45 +0200 Subject: [PATCH 426/462] net: ethernet: mvneta: Add back interface mode validation When writing the serdes configuration register was moved to mvneta_config_interface() the whole code block was removed from mvneta_port_power_up() in the assumption that its only purpose was to write the serdes configuration register. As mentioned by Russell King its purpose was also to check for valid interface modes early so that later in the driver we do not have to care for unexpected interface modes. Add back the test to let the driver bail out early on unhandled interface modes. Fixes: b4748553f53f ("net: ethernet: mvneta: Fix Serdes configuration for SoCs without comphy") Signed-off-by: Sascha Hauer Reviewed-by: Russell King Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/mvneta.c | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index c4552f868157..c639e3a29302 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c @@ -5009,10 +5009,18 @@ static void mvneta_conf_mbus_windows(struct mvneta_port *pp, } /* Power up the port */ -static void mvneta_port_power_up(struct mvneta_port *pp, int phy_mode) +static int mvneta_port_power_up(struct mvneta_port *pp, int phy_mode) { /* MAC Cause register should be cleared */ mvreg_write(pp, MVNETA_UNIT_INTR_CAUSE, 0); + + if (phy_mode != PHY_INTERFACE_MODE_QSGMII && + phy_mode != PHY_INTERFACE_MODE_SGMII && + !phy_interface_mode_is_8023z(phy_mode) && + !phy_interface_mode_is_rgmii(phy_mode)) + return -EINVAL; + + return 0; } /* Device initialization routine */ @@ -5198,7 +5206,11 @@ static int mvneta_probe(struct platform_device *pdev) if (err < 0) goto err_netdev; - mvneta_port_power_up(pp, phy_mode); + err = mvneta_port_power_up(pp, pp->phy_interface); + if (err < 0) { + dev_err(&pdev->dev, "can't power up port\n"); + return err; + } /* Armada3700 network controller does not support per-cpu * operation, so only single NAPI should be initialized. @@ -5352,7 +5364,11 @@ static int mvneta_resume(struct device *device) } } mvneta_defaults_set(pp); - mvneta_port_power_up(pp, pp->phy_interface); + err = mvneta_port_power_up(pp, pp->phy_interface); + if (err < 0) { + dev_err(device, "can't power up port\n"); + return err; + } netif_device_attach(dev); From 3dd4ef1bdbac959bb20faec93937720ddd9917c6 Mon Sep 17 00:00:00 2001 From: Jisheng Zhang Date: Wed, 24 Jun 2020 15:58:24 +0800 Subject: [PATCH 427/462] net: phy: make phy_disable_interrupts() non-static We face an issue with rtl8211f, a pin is shared between INTB and PMEB, and the PHY Register Accessible Interrupt is enabled by default, so the INTB/PMEB pin is always active in polling mode case. As Heiner pointed out "I was thinking about calling phy_disable_interrupts() in phy_init_hw(), to have a defined init state as we don't know in which state the PHY is if the PHY driver is loaded. We shouldn't assume that it's the chip power-on defaults, BIOS or boot loader could have changed this. Or in case of dual-boot systems the other OS could leave the PHY in whatever state." Make phy_disable_interrupts() non-static so that it could be used in phy_init_hw() to have a defined init state. Suggested-by: Heiner Kallweit Signed-off-by: Jisheng Zhang Signed-off-by: David S. Miller --- drivers/net/phy/phy.c | 2 +- include/linux/phy.h | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c index 1de3938628f4..56cfae950472 100644 --- a/drivers/net/phy/phy.c +++ b/drivers/net/phy/phy.c @@ -840,7 +840,7 @@ static void phy_error(struct phy_device *phydev) * phy_disable_interrupts - Disable the PHY interrupts from the PHY side * @phydev: target phy_device struct */ -static int phy_disable_interrupts(struct phy_device *phydev) +int phy_disable_interrupts(struct phy_device *phydev) { int err; diff --git a/include/linux/phy.h b/include/linux/phy.h index 8c05d0fb5c00..b693b609b2f5 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -1416,6 +1416,7 @@ int phy_ethtool_ksettings_set(struct phy_device *phydev, int phy_mii_ioctl(struct phy_device *phydev, struct ifreq *ifr, int cmd); int phy_do_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd); int phy_do_ioctl_running(struct net_device *dev, struct ifreq *ifr, int cmd); +int phy_disable_interrupts(struct phy_device *phydev); void phy_request_interrupt(struct phy_device *phydev); void phy_free_interrupt(struct phy_device *phydev); void phy_print_status(struct phy_device *phydev); From 9886a4dbd2aacf4b044e294e4cdebb43992d44b1 Mon Sep 17 00:00:00 2001 From: Jisheng Zhang Date: Wed, 24 Jun 2020 15:59:23 +0800 Subject: [PATCH 428/462] net: phy: call phy_disable_interrupts() in phy_init_hw() Call phy_disable_interrupts() in phy_init_hw() to "have a defined init state as we don't know in which state the PHY is if the PHY driver is loaded. We shouldn't assume that it's the chip power-on defaults, BIOS or boot loader could have changed this. Or in case of dual-boot systems the other OS could leave the PHY in whatever state." as pointed out by Heiner. Suggested-by: Heiner Kallweit Signed-off-by: Jisheng Zhang Signed-off-by: David S. Miller --- drivers/net/phy/phy_device.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index 85ba95b598b5..b4978c5fb2ca 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -1092,6 +1092,10 @@ int phy_init_hw(struct phy_device *phydev) if (ret < 0) return ret; + ret = phy_disable_interrupts(phydev); + if (ret) + return ret; + if (phydev->drv->config_init) ret = phydev->drv->config_init(phydev); From a51243860893615b457b149da1ef5df0a4f1ffc2 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Wed, 24 Jun 2020 11:13:02 +0100 Subject: [PATCH 429/462] qed: add missing error test for DBG_STATUS_NO_MATCHING_FRAMING_MODE The error DBG_STATUS_NO_MATCHING_FRAMING_MODE was added to the enum enum dbg_status however there is a missing corresponding entry for this in the array s_status_str. This causes an out-of-bounds read when indexing into the last entry of s_status_str. Fix this by adding in the missing entry. Addresses-Coverity: ("Out-of-bounds read"). Fixes: 2d22bc8354b1 ("qed: FW 8.42.2.0 debug features") Signed-off-by: Colin Ian King Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed_debug.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_debug.c b/drivers/net/ethernet/qlogic/qed/qed_debug.c index 57a0dab88431..81e8fbe4a05b 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_debug.c +++ b/drivers/net/ethernet/qlogic/qed/qed_debug.c @@ -5568,7 +5568,8 @@ static const char * const s_status_str[] = { /* DBG_STATUS_INVALID_FILTER_TRIGGER_DWORDS */ "The filter/trigger constraint dword offsets are not enabled for recording", - + /* DBG_STATUS_NO_MATCHING_FRAMING_MODE */ + "No matching framing mode", /* DBG_STATUS_VFC_READ_ERROR */ "Error reading from VFC", From 715028460082d07a7ec6fcd87b14b46784346a72 Mon Sep 17 00:00:00 2001 From: Russell King Date: Wed, 10 Jun 2020 21:51:11 +0100 Subject: [PATCH 430/462] netfilter: ipset: fix unaligned atomic access When using ip_set with counters and comment, traffic causes the kernel to panic on 32-bit ARM: Alignment trap: not handling instruction e1b82f9f at [] Unhandled fault: alignment exception (0x221) at 0xea08133c PC is at ip_set_match_extensions+0xe0/0x224 [ip_set] The problem occurs when we try to update the 64-bit counters - the faulting address above is not 64-bit aligned. The problem occurs due to the way elements are allocated, for example: set->dsize = ip_set_elem_len(set, tb, 0, 0); map = ip_set_alloc(sizeof(*map) + elements * set->dsize); If the element has a requirement for a member to be 64-bit aligned, and set->dsize is not a multiple of 8, but is a multiple of four, then every odd numbered elements will be misaligned - and hitting an atomic64_add() on that element will cause the kernel to panic. ip_set_elem_len() must return a size that is rounded to the maximum alignment of any extension field stored in the element. This change ensures that is the case. Fixes: 95ad1f4a9358 ("netfilter: ipset: Fix extension alignment") Signed-off-by: Russell King Acked-by: Jozsef Kadlecsik Signed-off-by: Pablo Neira Ayuso --- net/netfilter/ipset/ip_set_core.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index 340cb955af25..56621d6bfd29 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -460,6 +460,8 @@ ip_set_elem_len(struct ip_set *set, struct nlattr *tb[], size_t len, for (id = 0; id < IPSET_EXT_ID_MAX; id++) { if (!add_extension(id, cadt_flags, tb)) continue; + if (align < ip_set_extensions[id].align) + align = ip_set_extensions[id].align; len = ALIGN(len, ip_set_extensions[id].align); set->offset[id] = len; set->extensions |= ip_set_extensions[id].type; From 4cacc39516784670aa09833a9ec8bf3e90bef561 Mon Sep 17 00:00:00 2001 From: Rob Gill Date: Sun, 21 Jun 2020 05:27:36 +0000 Subject: [PATCH 431/462] netfilter: Add MODULE_DESCRIPTION entries to kernel modules The user tool modinfo is used to get information on kernel modules, including a description where it is available. This patch adds a brief MODULE_DESCRIPTION to netfilter kernel modules (descriptions taken from Kconfig file or code comments) Signed-off-by: Rob Gill Signed-off-by: Pablo Neira Ayuso --- net/bridge/netfilter/nft_meta_bridge.c | 1 + net/bridge/netfilter/nft_reject_bridge.c | 1 + net/ipv4/netfilter/ipt_SYNPROXY.c | 1 + net/ipv4/netfilter/nf_flow_table_ipv4.c | 1 + net/ipv4/netfilter/nft_dup_ipv4.c | 1 + net/ipv4/netfilter/nft_fib_ipv4.c | 1 + net/ipv4/netfilter/nft_reject_ipv4.c | 1 + net/ipv6/netfilter/ip6t_SYNPROXY.c | 1 + net/ipv6/netfilter/nf_flow_table_ipv6.c | 1 + net/ipv6/netfilter/nft_dup_ipv6.c | 1 + net/ipv6/netfilter/nft_fib_ipv6.c | 1 + net/ipv6/netfilter/nft_reject_ipv6.c | 1 + net/netfilter/nf_dup_netdev.c | 1 + net/netfilter/nf_flow_table_core.c | 1 + net/netfilter/nf_flow_table_inet.c | 1 + net/netfilter/nf_synproxy_core.c | 1 + net/netfilter/nfnetlink.c | 1 + net/netfilter/nft_compat.c | 1 + net/netfilter/nft_connlimit.c | 1 + net/netfilter/nft_counter.c | 1 + net/netfilter/nft_ct.c | 1 + net/netfilter/nft_dup_netdev.c | 1 + net/netfilter/nft_fib_inet.c | 1 + net/netfilter/nft_fib_netdev.c | 1 + net/netfilter/nft_flow_offload.c | 1 + net/netfilter/nft_hash.c | 1 + net/netfilter/nft_limit.c | 1 + net/netfilter/nft_log.c | 1 + net/netfilter/nft_masq.c | 1 + net/netfilter/nft_nat.c | 1 + net/netfilter/nft_numgen.c | 1 + net/netfilter/nft_objref.c | 1 + net/netfilter/nft_osf.c | 1 + net/netfilter/nft_queue.c | 1 + net/netfilter/nft_quota.c | 1 + net/netfilter/nft_redir.c | 1 + net/netfilter/nft_reject.c | 1 + net/netfilter/nft_reject_inet.c | 1 + net/netfilter/nft_synproxy.c | 1 + net/netfilter/nft_tunnel.c | 1 + net/netfilter/xt_nat.c | 1 + 41 files changed, 41 insertions(+) diff --git a/net/bridge/netfilter/nft_meta_bridge.c b/net/bridge/netfilter/nft_meta_bridge.c index 7c9e92b2f806..8e8ffac037cd 100644 --- a/net/bridge/netfilter/nft_meta_bridge.c +++ b/net/bridge/netfilter/nft_meta_bridge.c @@ -155,3 +155,4 @@ module_exit(nft_meta_bridge_module_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("wenxu "); MODULE_ALIAS_NFT_AF_EXPR(AF_BRIDGE, "meta"); +MODULE_DESCRIPTION("Support for bridge dedicated meta key"); diff --git a/net/bridge/netfilter/nft_reject_bridge.c b/net/bridge/netfilter/nft_reject_bridge.c index f48cf4cfb80f..deae2c9a0f69 100644 --- a/net/bridge/netfilter/nft_reject_bridge.c +++ b/net/bridge/netfilter/nft_reject_bridge.c @@ -455,3 +455,4 @@ module_exit(nft_reject_bridge_module_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Pablo Neira Ayuso "); MODULE_ALIAS_NFT_AF_EXPR(AF_BRIDGE, "reject"); +MODULE_DESCRIPTION("Reject packets from bridge via nftables"); diff --git a/net/ipv4/netfilter/ipt_SYNPROXY.c b/net/ipv4/netfilter/ipt_SYNPROXY.c index 748dc3ce58d3..f2984c7eef40 100644 --- a/net/ipv4/netfilter/ipt_SYNPROXY.c +++ b/net/ipv4/netfilter/ipt_SYNPROXY.c @@ -118,3 +118,4 @@ module_exit(synproxy_tg4_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Patrick McHardy "); +MODULE_DESCRIPTION("Intercept TCP connections and establish them using syncookies"); diff --git a/net/ipv4/netfilter/nf_flow_table_ipv4.c b/net/ipv4/netfilter/nf_flow_table_ipv4.c index e32e41b99f0f..aba65fe90345 100644 --- a/net/ipv4/netfilter/nf_flow_table_ipv4.c +++ b/net/ipv4/netfilter/nf_flow_table_ipv4.c @@ -34,3 +34,4 @@ module_exit(nf_flow_ipv4_module_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Pablo Neira Ayuso "); MODULE_ALIAS_NF_FLOWTABLE(AF_INET); +MODULE_DESCRIPTION("Netfilter flow table support"); diff --git a/net/ipv4/netfilter/nft_dup_ipv4.c b/net/ipv4/netfilter/nft_dup_ipv4.c index abf89b972094..bcdb37f86a94 100644 --- a/net/ipv4/netfilter/nft_dup_ipv4.c +++ b/net/ipv4/netfilter/nft_dup_ipv4.c @@ -107,3 +107,4 @@ module_exit(nft_dup_ipv4_module_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Pablo Neira Ayuso "); MODULE_ALIAS_NFT_AF_EXPR(AF_INET, "dup"); +MODULE_DESCRIPTION("IPv4 nftables packet duplication support"); diff --git a/net/ipv4/netfilter/nft_fib_ipv4.c b/net/ipv4/netfilter/nft_fib_ipv4.c index ce294113dbcd..03df986217b7 100644 --- a/net/ipv4/netfilter/nft_fib_ipv4.c +++ b/net/ipv4/netfilter/nft_fib_ipv4.c @@ -210,3 +210,4 @@ module_exit(nft_fib4_module_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Florian Westphal "); MODULE_ALIAS_NFT_AF_EXPR(2, "fib"); +MODULE_DESCRIPTION("nftables fib / ip route lookup support"); diff --git a/net/ipv4/netfilter/nft_reject_ipv4.c b/net/ipv4/netfilter/nft_reject_ipv4.c index 7e6fd5cde50f..e408f813f5d8 100644 --- a/net/ipv4/netfilter/nft_reject_ipv4.c +++ b/net/ipv4/netfilter/nft_reject_ipv4.c @@ -71,3 +71,4 @@ module_exit(nft_reject_ipv4_module_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Patrick McHardy "); MODULE_ALIAS_NFT_AF_EXPR(AF_INET, "reject"); +MODULE_DESCRIPTION("IPv4 packet rejection for nftables"); diff --git a/net/ipv6/netfilter/ip6t_SYNPROXY.c b/net/ipv6/netfilter/ip6t_SYNPROXY.c index fd1f52a21bf1..d51d0c3e5fe9 100644 --- a/net/ipv6/netfilter/ip6t_SYNPROXY.c +++ b/net/ipv6/netfilter/ip6t_SYNPROXY.c @@ -121,3 +121,4 @@ module_exit(synproxy_tg6_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Patrick McHardy "); +MODULE_DESCRIPTION("Intercept IPv6 TCP connections and establish them using syncookies"); diff --git a/net/ipv6/netfilter/nf_flow_table_ipv6.c b/net/ipv6/netfilter/nf_flow_table_ipv6.c index a8566ee12e83..667b8af2546a 100644 --- a/net/ipv6/netfilter/nf_flow_table_ipv6.c +++ b/net/ipv6/netfilter/nf_flow_table_ipv6.c @@ -35,3 +35,4 @@ module_exit(nf_flow_ipv6_module_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Pablo Neira Ayuso "); MODULE_ALIAS_NF_FLOWTABLE(AF_INET6); +MODULE_DESCRIPTION("Netfilter flow table IPv6 module"); diff --git a/net/ipv6/netfilter/nft_dup_ipv6.c b/net/ipv6/netfilter/nft_dup_ipv6.c index 2af32200507d..8b5193efb1f1 100644 --- a/net/ipv6/netfilter/nft_dup_ipv6.c +++ b/net/ipv6/netfilter/nft_dup_ipv6.c @@ -105,3 +105,4 @@ module_exit(nft_dup_ipv6_module_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Pablo Neira Ayuso "); MODULE_ALIAS_NFT_AF_EXPR(AF_INET6, "dup"); +MODULE_DESCRIPTION("IPv6 nftables packet duplication support"); diff --git a/net/ipv6/netfilter/nft_fib_ipv6.c b/net/ipv6/netfilter/nft_fib_ipv6.c index 7ece86afd079..e204163c7036 100644 --- a/net/ipv6/netfilter/nft_fib_ipv6.c +++ b/net/ipv6/netfilter/nft_fib_ipv6.c @@ -255,3 +255,4 @@ module_exit(nft_fib6_module_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Florian Westphal "); MODULE_ALIAS_NFT_AF_EXPR(10, "fib"); +MODULE_DESCRIPTION("nftables fib / ipv6 route lookup support"); diff --git a/net/ipv6/netfilter/nft_reject_ipv6.c b/net/ipv6/netfilter/nft_reject_ipv6.c index 680a28ce29fd..c1098a1968e1 100644 --- a/net/ipv6/netfilter/nft_reject_ipv6.c +++ b/net/ipv6/netfilter/nft_reject_ipv6.c @@ -72,3 +72,4 @@ module_exit(nft_reject_ipv6_module_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Patrick McHardy "); MODULE_ALIAS_NFT_AF_EXPR(AF_INET6, "reject"); +MODULE_DESCRIPTION("IPv6 packet rejection for nftables"); diff --git a/net/netfilter/nf_dup_netdev.c b/net/netfilter/nf_dup_netdev.c index f108a76925dd..2b01a151eaa8 100644 --- a/net/netfilter/nf_dup_netdev.c +++ b/net/netfilter/nf_dup_netdev.c @@ -73,3 +73,4 @@ EXPORT_SYMBOL_GPL(nft_fwd_dup_netdev_offload); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Pablo Neira Ayuso "); +MODULE_DESCRIPTION("Netfilter packet duplication support"); diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c index afa85171df38..b1eb5272b379 100644 --- a/net/netfilter/nf_flow_table_core.c +++ b/net/netfilter/nf_flow_table_core.c @@ -594,3 +594,4 @@ module_exit(nf_flow_table_module_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Pablo Neira Ayuso "); +MODULE_DESCRIPTION("Netfilter flow table module"); diff --git a/net/netfilter/nf_flow_table_inet.c b/net/netfilter/nf_flow_table_inet.c index 88bedf1ff1ae..bc4126d8ef65 100644 --- a/net/netfilter/nf_flow_table_inet.c +++ b/net/netfilter/nf_flow_table_inet.c @@ -72,3 +72,4 @@ module_exit(nf_flow_inet_module_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Pablo Neira Ayuso "); MODULE_ALIAS_NF_FLOWTABLE(1); /* NFPROTO_INET */ +MODULE_DESCRIPTION("Netfilter flow table mixed IPv4/IPv6 module"); diff --git a/net/netfilter/nf_synproxy_core.c b/net/netfilter/nf_synproxy_core.c index b9cbe1e2453e..ebcdc8e54476 100644 --- a/net/netfilter/nf_synproxy_core.c +++ b/net/netfilter/nf_synproxy_core.c @@ -1237,3 +1237,4 @@ EXPORT_SYMBOL_GPL(nf_synproxy_ipv6_fini); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Patrick McHardy "); +MODULE_DESCRIPTION("nftables SYNPROXY expression support"); diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c index 99127e2d95a8..5f24edf95830 100644 --- a/net/netfilter/nfnetlink.c +++ b/net/netfilter/nfnetlink.c @@ -33,6 +33,7 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Harald Welte "); MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_NETFILTER); +MODULE_DESCRIPTION("Netfilter messages via netlink socket"); #define nfnl_dereference_protected(id) \ rcu_dereference_protected(table[(id)].subsys, \ diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c index f9adca62ccb3..aa1a066cb74b 100644 --- a/net/netfilter/nft_compat.c +++ b/net/netfilter/nft_compat.c @@ -902,3 +902,4 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Pablo Neira Ayuso "); MODULE_ALIAS_NFT_EXPR("match"); MODULE_ALIAS_NFT_EXPR("target"); +MODULE_DESCRIPTION("x_tables over nftables support"); diff --git a/net/netfilter/nft_connlimit.c b/net/netfilter/nft_connlimit.c index 69d6173f91e2..7d0761fad37e 100644 --- a/net/netfilter/nft_connlimit.c +++ b/net/netfilter/nft_connlimit.c @@ -280,3 +280,4 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Pablo Neira Ayuso"); MODULE_ALIAS_NFT_EXPR("connlimit"); MODULE_ALIAS_NFT_OBJ(NFT_OBJECT_CONNLIMIT); +MODULE_DESCRIPTION("nftables connlimit rule support"); diff --git a/net/netfilter/nft_counter.c b/net/netfilter/nft_counter.c index f6d4d0fa23a6..85ed461ec24e 100644 --- a/net/netfilter/nft_counter.c +++ b/net/netfilter/nft_counter.c @@ -303,3 +303,4 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Patrick McHardy "); MODULE_ALIAS_NFT_EXPR("counter"); MODULE_ALIAS_NFT_OBJ(NFT_OBJECT_COUNTER); +MODULE_DESCRIPTION("nftables counter rule support"); diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c index faea72c2df32..77258af1fce0 100644 --- a/net/netfilter/nft_ct.c +++ b/net/netfilter/nft_ct.c @@ -1345,3 +1345,4 @@ MODULE_ALIAS_NFT_EXPR("notrack"); MODULE_ALIAS_NFT_OBJ(NFT_OBJECT_CT_HELPER); MODULE_ALIAS_NFT_OBJ(NFT_OBJECT_CT_TIMEOUT); MODULE_ALIAS_NFT_OBJ(NFT_OBJECT_CT_EXPECT); +MODULE_DESCRIPTION("Netfilter nf_tables conntrack module"); diff --git a/net/netfilter/nft_dup_netdev.c b/net/netfilter/nft_dup_netdev.c index c2e78c160fd7..40788b3f1071 100644 --- a/net/netfilter/nft_dup_netdev.c +++ b/net/netfilter/nft_dup_netdev.c @@ -102,3 +102,4 @@ module_exit(nft_dup_netdev_module_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Pablo Neira Ayuso "); MODULE_ALIAS_NFT_AF_EXPR(5, "dup"); +MODULE_DESCRIPTION("nftables netdev packet duplication support"); diff --git a/net/netfilter/nft_fib_inet.c b/net/netfilter/nft_fib_inet.c index 465432e0531b..a88d44e163d1 100644 --- a/net/netfilter/nft_fib_inet.c +++ b/net/netfilter/nft_fib_inet.c @@ -76,3 +76,4 @@ module_exit(nft_fib_inet_module_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Florian Westphal "); MODULE_ALIAS_NFT_AF_EXPR(1, "fib"); +MODULE_DESCRIPTION("nftables fib inet support"); diff --git a/net/netfilter/nft_fib_netdev.c b/net/netfilter/nft_fib_netdev.c index a2e726ae7f07..3f3478abd845 100644 --- a/net/netfilter/nft_fib_netdev.c +++ b/net/netfilter/nft_fib_netdev.c @@ -85,3 +85,4 @@ module_exit(nft_fib_netdev_module_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Pablo M. Bermudo Garay "); MODULE_ALIAS_NFT_AF_EXPR(5, "fib"); +MODULE_DESCRIPTION("nftables netdev fib lookups support"); diff --git a/net/netfilter/nft_flow_offload.c b/net/netfilter/nft_flow_offload.c index b70b48996801..3b9b97aa4b32 100644 --- a/net/netfilter/nft_flow_offload.c +++ b/net/netfilter/nft_flow_offload.c @@ -286,3 +286,4 @@ module_exit(nft_flow_offload_module_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Pablo Neira Ayuso "); MODULE_ALIAS_NFT_EXPR("flow_offload"); +MODULE_DESCRIPTION("nftables hardware flow offload module"); diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c index b836d550b919..96371d878e7e 100644 --- a/net/netfilter/nft_hash.c +++ b/net/netfilter/nft_hash.c @@ -248,3 +248,4 @@ module_exit(nft_hash_module_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Laura Garcia "); MODULE_ALIAS_NFT_EXPR("hash"); +MODULE_DESCRIPTION("Netfilter nftables hash module"); diff --git a/net/netfilter/nft_limit.c b/net/netfilter/nft_limit.c index 35b67d7e3694..0e2c315c3b5e 100644 --- a/net/netfilter/nft_limit.c +++ b/net/netfilter/nft_limit.c @@ -372,3 +372,4 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Patrick McHardy "); MODULE_ALIAS_NFT_EXPR("limit"); MODULE_ALIAS_NFT_OBJ(NFT_OBJECT_LIMIT); +MODULE_DESCRIPTION("nftables limit expression support"); diff --git a/net/netfilter/nft_log.c b/net/netfilter/nft_log.c index fe4831f2258f..57899454a530 100644 --- a/net/netfilter/nft_log.c +++ b/net/netfilter/nft_log.c @@ -298,3 +298,4 @@ module_exit(nft_log_module_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Patrick McHardy "); MODULE_ALIAS_NFT_EXPR("log"); +MODULE_DESCRIPTION("Netfilter nf_tables log module"); diff --git a/net/netfilter/nft_masq.c b/net/netfilter/nft_masq.c index bc9fd98c5d6d..71390b727040 100644 --- a/net/netfilter/nft_masq.c +++ b/net/netfilter/nft_masq.c @@ -305,3 +305,4 @@ module_exit(nft_masq_module_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Arturo Borrero Gonzalez "); MODULE_ALIAS_NFT_EXPR("masq"); +MODULE_DESCRIPTION("Netfilter nftables masquerade expression support"); diff --git a/net/netfilter/nft_nat.c b/net/netfilter/nft_nat.c index 23a7bfd10521..4bcf33b049c4 100644 --- a/net/netfilter/nft_nat.c +++ b/net/netfilter/nft_nat.c @@ -402,3 +402,4 @@ module_exit(nft_nat_module_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Tomasz Bursztyka "); MODULE_ALIAS_NFT_EXPR("nat"); +MODULE_DESCRIPTION("Network Address Translation support"); diff --git a/net/netfilter/nft_numgen.c b/net/netfilter/nft_numgen.c index 48edb9d5f012..f1fc824f9737 100644 --- a/net/netfilter/nft_numgen.c +++ b/net/netfilter/nft_numgen.c @@ -217,3 +217,4 @@ module_exit(nft_ng_module_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Laura Garcia "); MODULE_ALIAS_NFT_EXPR("numgen"); +MODULE_DESCRIPTION("nftables number generator module"); diff --git a/net/netfilter/nft_objref.c b/net/netfilter/nft_objref.c index bfd18d2b65a2..5f9207a9f485 100644 --- a/net/netfilter/nft_objref.c +++ b/net/netfilter/nft_objref.c @@ -252,3 +252,4 @@ module_exit(nft_objref_module_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Pablo Neira Ayuso "); MODULE_ALIAS_NFT_EXPR("objref"); +MODULE_DESCRIPTION("nftables stateful object reference module"); diff --git a/net/netfilter/nft_osf.c b/net/netfilter/nft_osf.c index b42247aa48a9..c261d57a666a 100644 --- a/net/netfilter/nft_osf.c +++ b/net/netfilter/nft_osf.c @@ -149,3 +149,4 @@ module_exit(nft_osf_module_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Fernando Fernandez "); MODULE_ALIAS_NFT_EXPR("osf"); +MODULE_DESCRIPTION("nftables passive OS fingerprint support"); diff --git a/net/netfilter/nft_queue.c b/net/netfilter/nft_queue.c index 5ece0a6aa8c3..23265d757acb 100644 --- a/net/netfilter/nft_queue.c +++ b/net/netfilter/nft_queue.c @@ -216,3 +216,4 @@ module_exit(nft_queue_module_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Eric Leblond "); MODULE_ALIAS_NFT_EXPR("queue"); +MODULE_DESCRIPTION("Netfilter nftables queue module"); diff --git a/net/netfilter/nft_quota.c b/net/netfilter/nft_quota.c index 4413690591f2..0363f533a42b 100644 --- a/net/netfilter/nft_quota.c +++ b/net/netfilter/nft_quota.c @@ -254,3 +254,4 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Pablo Neira Ayuso "); MODULE_ALIAS_NFT_EXPR("quota"); MODULE_ALIAS_NFT_OBJ(NFT_OBJECT_QUOTA); +MODULE_DESCRIPTION("Netfilter nftables quota module"); diff --git a/net/netfilter/nft_redir.c b/net/netfilter/nft_redir.c index 5b779171565c..2056051c0af0 100644 --- a/net/netfilter/nft_redir.c +++ b/net/netfilter/nft_redir.c @@ -292,3 +292,4 @@ module_exit(nft_redir_module_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Arturo Borrero Gonzalez "); MODULE_ALIAS_NFT_EXPR("redir"); +MODULE_DESCRIPTION("Netfilter nftables redirect support"); diff --git a/net/netfilter/nft_reject.c b/net/netfilter/nft_reject.c index 00f865fb80ca..86eafbb0fdd0 100644 --- a/net/netfilter/nft_reject.c +++ b/net/netfilter/nft_reject.c @@ -119,3 +119,4 @@ EXPORT_SYMBOL_GPL(nft_reject_icmpv6_code); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Patrick McHardy "); +MODULE_DESCRIPTION("Netfilter x_tables over nftables module"); diff --git a/net/netfilter/nft_reject_inet.c b/net/netfilter/nft_reject_inet.c index f41f414b72d1..cf8f2646e93c 100644 --- a/net/netfilter/nft_reject_inet.c +++ b/net/netfilter/nft_reject_inet.c @@ -149,3 +149,4 @@ module_exit(nft_reject_inet_module_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Patrick McHardy "); MODULE_ALIAS_NFT_AF_EXPR(1, "reject"); +MODULE_DESCRIPTION("Netfilter nftables reject inet support"); diff --git a/net/netfilter/nft_synproxy.c b/net/netfilter/nft_synproxy.c index e2c1fc608841..4fda8b3f1762 100644 --- a/net/netfilter/nft_synproxy.c +++ b/net/netfilter/nft_synproxy.c @@ -388,3 +388,4 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Fernando Fernandez "); MODULE_ALIAS_NFT_EXPR("synproxy"); MODULE_ALIAS_NFT_OBJ(NFT_OBJECT_SYNPROXY); +MODULE_DESCRIPTION("nftables SYNPROXY expression support"); diff --git a/net/netfilter/nft_tunnel.c b/net/netfilter/nft_tunnel.c index 30be5787fbde..d3eb953d0333 100644 --- a/net/netfilter/nft_tunnel.c +++ b/net/netfilter/nft_tunnel.c @@ -719,3 +719,4 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Pablo Neira Ayuso "); MODULE_ALIAS_NFT_EXPR("tunnel"); MODULE_ALIAS_NFT_OBJ(NFT_OBJECT_TUNNEL); +MODULE_DESCRIPTION("nftables tunnel expression support"); diff --git a/net/netfilter/xt_nat.c b/net/netfilter/xt_nat.c index a8e5f6c8db7a..b4f7bbc3f3ca 100644 --- a/net/netfilter/xt_nat.c +++ b/net/netfilter/xt_nat.c @@ -244,3 +244,4 @@ MODULE_ALIAS("ipt_SNAT"); MODULE_ALIAS("ipt_DNAT"); MODULE_ALIAS("ip6t_SNAT"); MODULE_ALIAS("ip6t_DNAT"); +MODULE_DESCRIPTION("SNAT and DNAT targets support"); From 1cbf90985f7448f1b0dd630e17ee1070f7d58665 Mon Sep 17 00:00:00 2001 From: David Wilder Date: Mon, 22 Jun 2020 10:10:11 -0700 Subject: [PATCH 432/462] netfilter: iptables: Split ipt_unregister_table() into pre_exit and exit helpers. The pre_exit will un-register the underlying hook and .exit will do the table freeing. The netns core does an unconditional synchronize_rcu after the pre_exit hooks insuring no packets are in flight that have picked up the pointer before completing the un-register. Fixes: b9e69e127397 ("netfilter: xtables: don't hook tables by default") Signed-off-by: David Wilder Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter_ipv4/ip_tables.h | 6 ++++++ net/ipv4/netfilter/ip_tables.c | 15 ++++++++++++++- 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/include/linux/netfilter_ipv4/ip_tables.h b/include/linux/netfilter_ipv4/ip_tables.h index b394bd4f68a3..c4676d6feeff 100644 --- a/include/linux/netfilter_ipv4/ip_tables.h +++ b/include/linux/netfilter_ipv4/ip_tables.h @@ -25,6 +25,12 @@ int ipt_register_table(struct net *net, const struct xt_table *table, const struct ipt_replace *repl, const struct nf_hook_ops *ops, struct xt_table **res); + +void ipt_unregister_table_pre_exit(struct net *net, struct xt_table *table, + const struct nf_hook_ops *ops); + +void ipt_unregister_table_exit(struct net *net, struct xt_table *table); + void ipt_unregister_table(struct net *net, struct xt_table *table, const struct nf_hook_ops *ops); diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index c2670eaa74e6..5bf9fa06aee0 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -1797,11 +1797,22 @@ out_free: return ret; } +void ipt_unregister_table_pre_exit(struct net *net, struct xt_table *table, + const struct nf_hook_ops *ops) +{ + nf_unregister_net_hooks(net, ops, hweight32(table->valid_hooks)); +} + +void ipt_unregister_table_exit(struct net *net, struct xt_table *table) +{ + __ipt_unregister_table(net, table); +} + void ipt_unregister_table(struct net *net, struct xt_table *table, const struct nf_hook_ops *ops) { if (ops) - nf_unregister_net_hooks(net, ops, hweight32(table->valid_hooks)); + ipt_unregister_table_pre_exit(net, table, ops); __ipt_unregister_table(net, table); } @@ -1958,6 +1969,8 @@ static void __exit ip_tables_fini(void) EXPORT_SYMBOL(ipt_register_table); EXPORT_SYMBOL(ipt_unregister_table); +EXPORT_SYMBOL(ipt_unregister_table_pre_exit); +EXPORT_SYMBOL(ipt_unregister_table_exit); EXPORT_SYMBOL(ipt_do_table); module_init(ip_tables_init); module_exit(ip_tables_fini); From cf4cbc610bfa29a88cd71ca638a890f8c565a22e Mon Sep 17 00:00:00 2001 From: David Wilder Date: Mon, 22 Jun 2020 10:10:12 -0700 Subject: [PATCH 433/462] netfilter: iptables: Add a .pre_exit hook in all iptable_foo.c. Using new helpers ipt_unregister_table_pre_exit() and ipt_unregister_table_exit(). Fixes: b9e69e127397 ("netfilter: xtables: don't hook tables by default") Signed-off-by: David Wilder Signed-off-by: Pablo Neira Ayuso --- net/ipv4/netfilter/iptable_filter.c | 10 +++++++++- net/ipv4/netfilter/iptable_mangle.c | 10 +++++++++- net/ipv4/netfilter/iptable_nat.c | 10 ++++++++-- net/ipv4/netfilter/iptable_raw.c | 10 +++++++++- net/ipv4/netfilter/iptable_security.c | 11 +++++++++-- 5 files changed, 44 insertions(+), 7 deletions(-) diff --git a/net/ipv4/netfilter/iptable_filter.c b/net/ipv4/netfilter/iptable_filter.c index 9d54b4017e50..8f7bc1ee7453 100644 --- a/net/ipv4/netfilter/iptable_filter.c +++ b/net/ipv4/netfilter/iptable_filter.c @@ -72,16 +72,24 @@ static int __net_init iptable_filter_net_init(struct net *net) return 0; } +static void __net_exit iptable_filter_net_pre_exit(struct net *net) +{ + if (net->ipv4.iptable_filter) + ipt_unregister_table_pre_exit(net, net->ipv4.iptable_filter, + filter_ops); +} + static void __net_exit iptable_filter_net_exit(struct net *net) { if (!net->ipv4.iptable_filter) return; - ipt_unregister_table(net, net->ipv4.iptable_filter, filter_ops); + ipt_unregister_table_exit(net, net->ipv4.iptable_filter); net->ipv4.iptable_filter = NULL; } static struct pernet_operations iptable_filter_net_ops = { .init = iptable_filter_net_init, + .pre_exit = iptable_filter_net_pre_exit, .exit = iptable_filter_net_exit, }; diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c index bb9266ea3785..f703a717ab1d 100644 --- a/net/ipv4/netfilter/iptable_mangle.c +++ b/net/ipv4/netfilter/iptable_mangle.c @@ -100,15 +100,23 @@ static int __net_init iptable_mangle_table_init(struct net *net) return ret; } +static void __net_exit iptable_mangle_net_pre_exit(struct net *net) +{ + if (net->ipv4.iptable_mangle) + ipt_unregister_table_pre_exit(net, net->ipv4.iptable_mangle, + mangle_ops); +} + static void __net_exit iptable_mangle_net_exit(struct net *net) { if (!net->ipv4.iptable_mangle) return; - ipt_unregister_table(net, net->ipv4.iptable_mangle, mangle_ops); + ipt_unregister_table_exit(net, net->ipv4.iptable_mangle); net->ipv4.iptable_mangle = NULL; } static struct pernet_operations iptable_mangle_net_ops = { + .pre_exit = iptable_mangle_net_pre_exit, .exit = iptable_mangle_net_exit, }; diff --git a/net/ipv4/netfilter/iptable_nat.c b/net/ipv4/netfilter/iptable_nat.c index ad33687b7444..b0143b109f25 100644 --- a/net/ipv4/netfilter/iptable_nat.c +++ b/net/ipv4/netfilter/iptable_nat.c @@ -113,16 +113,22 @@ static int __net_init iptable_nat_table_init(struct net *net) return ret; } +static void __net_exit iptable_nat_net_pre_exit(struct net *net) +{ + if (net->ipv4.nat_table) + ipt_nat_unregister_lookups(net); +} + static void __net_exit iptable_nat_net_exit(struct net *net) { if (!net->ipv4.nat_table) return; - ipt_nat_unregister_lookups(net); - ipt_unregister_table(net, net->ipv4.nat_table, NULL); + ipt_unregister_table_exit(net, net->ipv4.nat_table); net->ipv4.nat_table = NULL; } static struct pernet_operations iptable_nat_net_ops = { + .pre_exit = iptable_nat_net_pre_exit, .exit = iptable_nat_net_exit, }; diff --git a/net/ipv4/netfilter/iptable_raw.c b/net/ipv4/netfilter/iptable_raw.c index 69697eb4bfc6..9abfe6bf2cb9 100644 --- a/net/ipv4/netfilter/iptable_raw.c +++ b/net/ipv4/netfilter/iptable_raw.c @@ -67,15 +67,23 @@ static int __net_init iptable_raw_table_init(struct net *net) return ret; } +static void __net_exit iptable_raw_net_pre_exit(struct net *net) +{ + if (net->ipv4.iptable_raw) + ipt_unregister_table_pre_exit(net, net->ipv4.iptable_raw, + rawtable_ops); +} + static void __net_exit iptable_raw_net_exit(struct net *net) { if (!net->ipv4.iptable_raw) return; - ipt_unregister_table(net, net->ipv4.iptable_raw, rawtable_ops); + ipt_unregister_table_exit(net, net->ipv4.iptable_raw); net->ipv4.iptable_raw = NULL; } static struct pernet_operations iptable_raw_net_ops = { + .pre_exit = iptable_raw_net_pre_exit, .exit = iptable_raw_net_exit, }; diff --git a/net/ipv4/netfilter/iptable_security.c b/net/ipv4/netfilter/iptable_security.c index ac633c1db97e..415c1975d770 100644 --- a/net/ipv4/netfilter/iptable_security.c +++ b/net/ipv4/netfilter/iptable_security.c @@ -62,16 +62,23 @@ static int __net_init iptable_security_table_init(struct net *net) return ret; } +static void __net_exit iptable_security_net_pre_exit(struct net *net) +{ + if (net->ipv4.iptable_security) + ipt_unregister_table_pre_exit(net, net->ipv4.iptable_security, + sectbl_ops); +} + static void __net_exit iptable_security_net_exit(struct net *net) { if (!net->ipv4.iptable_security) return; - - ipt_unregister_table(net, net->ipv4.iptable_security, sectbl_ops); + ipt_unregister_table_exit(net, net->ipv4.iptable_security); net->ipv4.iptable_security = NULL; } static struct pernet_operations iptable_security_net_ops = { + .pre_exit = iptable_security_net_pre_exit, .exit = iptable_security_net_exit, }; From 57ea5f18882a3d7cf6135fa8c949a37c89395837 Mon Sep 17 00:00:00 2001 From: David Wilder Date: Mon, 22 Jun 2020 10:10:13 -0700 Subject: [PATCH 434/462] netfilter: ip6tables: Split ip6t_unregister_table() into pre_exit and exit helpers. The pre_exit will un-register the underlying hook and .exit will do the table freeing. The netns core does an unconditional synchronize_rcu after the pre_exit hooks insuring no packets are in flight that have picked up the pointer before completing the un-register. Fixes: b9e69e127397 ("netfilter: xtables: don't hook tables by default") Signed-off-by: David Wilder Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter_ipv6/ip6_tables.h | 3 +++ net/ipv6/netfilter/ip6_tables.c | 15 ++++++++++++++- 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/include/linux/netfilter_ipv6/ip6_tables.h b/include/linux/netfilter_ipv6/ip6_tables.h index 8225f7821a29..1547d5f9ae06 100644 --- a/include/linux/netfilter_ipv6/ip6_tables.h +++ b/include/linux/netfilter_ipv6/ip6_tables.h @@ -29,6 +29,9 @@ int ip6t_register_table(struct net *net, const struct xt_table *table, const struct nf_hook_ops *ops, struct xt_table **res); void ip6t_unregister_table(struct net *net, struct xt_table *table, const struct nf_hook_ops *ops); +void ip6t_unregister_table_pre_exit(struct net *net, struct xt_table *table, + const struct nf_hook_ops *ops); +void ip6t_unregister_table_exit(struct net *net, struct xt_table *table); extern unsigned int ip6t_do_table(struct sk_buff *skb, const struct nf_hook_state *state, struct xt_table *table); diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index e27393498ecb..e96a431549bc 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -1807,11 +1807,22 @@ out_free: return ret; } +void ip6t_unregister_table_pre_exit(struct net *net, struct xt_table *table, + const struct nf_hook_ops *ops) +{ + nf_unregister_net_hooks(net, ops, hweight32(table->valid_hooks)); +} + +void ip6t_unregister_table_exit(struct net *net, struct xt_table *table) +{ + __ip6t_unregister_table(net, table); +} + void ip6t_unregister_table(struct net *net, struct xt_table *table, const struct nf_hook_ops *ops) { if (ops) - nf_unregister_net_hooks(net, ops, hweight32(table->valid_hooks)); + ip6t_unregister_table_pre_exit(net, table, ops); __ip6t_unregister_table(net, table); } @@ -1969,6 +1980,8 @@ static void __exit ip6_tables_fini(void) EXPORT_SYMBOL(ip6t_register_table); EXPORT_SYMBOL(ip6t_unregister_table); +EXPORT_SYMBOL(ip6t_unregister_table_pre_exit); +EXPORT_SYMBOL(ip6t_unregister_table_exit); EXPORT_SYMBOL(ip6t_do_table); module_init(ip6_tables_init); From 5f027bc74a9be2e53233de4ebcd2f90390b50a62 Mon Sep 17 00:00:00 2001 From: David Wilder Date: Mon, 22 Jun 2020 10:10:14 -0700 Subject: [PATCH 435/462] netfilter: ip6tables: Add a .pre_exit hook in all ip6table_foo.c. Using new helpers ip6t_unregister_table_pre_exit() and ip6t_unregister_table_exit(). Fixes: b9e69e127397 ("netfilter: xtables: don't hook tables by default") Signed-off-by: David Wilder Signed-off-by: Pablo Neira Ayuso --- net/ipv6/netfilter/ip6table_filter.c | 10 +++++++++- net/ipv6/netfilter/ip6table_mangle.c | 10 +++++++++- net/ipv6/netfilter/ip6table_nat.c | 10 ++++++++-- net/ipv6/netfilter/ip6table_raw.c | 10 +++++++++- net/ipv6/netfilter/ip6table_security.c | 10 +++++++++- 5 files changed, 44 insertions(+), 6 deletions(-) diff --git a/net/ipv6/netfilter/ip6table_filter.c b/net/ipv6/netfilter/ip6table_filter.c index 32667f5d5a33..88337b51ffbf 100644 --- a/net/ipv6/netfilter/ip6table_filter.c +++ b/net/ipv6/netfilter/ip6table_filter.c @@ -73,16 +73,24 @@ static int __net_init ip6table_filter_net_init(struct net *net) return 0; } +static void __net_exit ip6table_filter_net_pre_exit(struct net *net) +{ + if (net->ipv6.ip6table_filter) + ip6t_unregister_table_pre_exit(net, net->ipv6.ip6table_filter, + filter_ops); +} + static void __net_exit ip6table_filter_net_exit(struct net *net) { if (!net->ipv6.ip6table_filter) return; - ip6t_unregister_table(net, net->ipv6.ip6table_filter, filter_ops); + ip6t_unregister_table_exit(net, net->ipv6.ip6table_filter); net->ipv6.ip6table_filter = NULL; } static struct pernet_operations ip6table_filter_net_ops = { .init = ip6table_filter_net_init, + .pre_exit = ip6table_filter_net_pre_exit, .exit = ip6table_filter_net_exit, }; diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c index 070afb97fa2b..1a2748611e00 100644 --- a/net/ipv6/netfilter/ip6table_mangle.c +++ b/net/ipv6/netfilter/ip6table_mangle.c @@ -93,16 +93,24 @@ static int __net_init ip6table_mangle_table_init(struct net *net) return ret; } +static void __net_exit ip6table_mangle_net_pre_exit(struct net *net) +{ + if (net->ipv6.ip6table_mangle) + ip6t_unregister_table_pre_exit(net, net->ipv6.ip6table_mangle, + mangle_ops); +} + static void __net_exit ip6table_mangle_net_exit(struct net *net) { if (!net->ipv6.ip6table_mangle) return; - ip6t_unregister_table(net, net->ipv6.ip6table_mangle, mangle_ops); + ip6t_unregister_table_exit(net, net->ipv6.ip6table_mangle); net->ipv6.ip6table_mangle = NULL; } static struct pernet_operations ip6table_mangle_net_ops = { + .pre_exit = ip6table_mangle_net_pre_exit, .exit = ip6table_mangle_net_exit, }; diff --git a/net/ipv6/netfilter/ip6table_nat.c b/net/ipv6/netfilter/ip6table_nat.c index 0f4875952efc..0a23265e3caa 100644 --- a/net/ipv6/netfilter/ip6table_nat.c +++ b/net/ipv6/netfilter/ip6table_nat.c @@ -114,16 +114,22 @@ static int __net_init ip6table_nat_table_init(struct net *net) return ret; } +static void __net_exit ip6table_nat_net_pre_exit(struct net *net) +{ + if (net->ipv6.ip6table_nat) + ip6t_nat_unregister_lookups(net); +} + static void __net_exit ip6table_nat_net_exit(struct net *net) { if (!net->ipv6.ip6table_nat) return; - ip6t_nat_unregister_lookups(net); - ip6t_unregister_table(net, net->ipv6.ip6table_nat, NULL); + ip6t_unregister_table_exit(net, net->ipv6.ip6table_nat); net->ipv6.ip6table_nat = NULL; } static struct pernet_operations ip6table_nat_net_ops = { + .pre_exit = ip6table_nat_net_pre_exit, .exit = ip6table_nat_net_exit, }; diff --git a/net/ipv6/netfilter/ip6table_raw.c b/net/ipv6/netfilter/ip6table_raw.c index a22100b1cf2c..8f9e742226f7 100644 --- a/net/ipv6/netfilter/ip6table_raw.c +++ b/net/ipv6/netfilter/ip6table_raw.c @@ -66,15 +66,23 @@ static int __net_init ip6table_raw_table_init(struct net *net) return ret; } +static void __net_exit ip6table_raw_net_pre_exit(struct net *net) +{ + if (net->ipv6.ip6table_raw) + ip6t_unregister_table_pre_exit(net, net->ipv6.ip6table_raw, + rawtable_ops); +} + static void __net_exit ip6table_raw_net_exit(struct net *net) { if (!net->ipv6.ip6table_raw) return; - ip6t_unregister_table(net, net->ipv6.ip6table_raw, rawtable_ops); + ip6t_unregister_table_exit(net, net->ipv6.ip6table_raw); net->ipv6.ip6table_raw = NULL; } static struct pernet_operations ip6table_raw_net_ops = { + .pre_exit = ip6table_raw_net_pre_exit, .exit = ip6table_raw_net_exit, }; diff --git a/net/ipv6/netfilter/ip6table_security.c b/net/ipv6/netfilter/ip6table_security.c index a74335fe2bd9..5e8c48fed032 100644 --- a/net/ipv6/netfilter/ip6table_security.c +++ b/net/ipv6/netfilter/ip6table_security.c @@ -61,15 +61,23 @@ static int __net_init ip6table_security_table_init(struct net *net) return ret; } +static void __net_exit ip6table_security_net_pre_exit(struct net *net) +{ + if (net->ipv6.ip6table_security) + ip6t_unregister_table_pre_exit(net, net->ipv6.ip6table_security, + sectbl_ops); +} + static void __net_exit ip6table_security_net_exit(struct net *net) { if (!net->ipv6.ip6table_security) return; - ip6t_unregister_table(net, net->ipv6.ip6table_security, sectbl_ops); + ip6t_unregister_table_exit(net, net->ipv6.ip6table_security); net->ipv6.ip6table_security = NULL; } static struct pernet_operations ip6table_security_net_ops = { + .pre_exit = ip6table_security_net_pre_exit, .exit = ip6table_security_net_exit, }; From 619ae8e0697a6fb85b99b19137590c7c337c579e Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 22 Jun 2020 10:28:32 +0200 Subject: [PATCH 436/462] selftests: netfilter: add test case for conntrack helper assignment check that 'nft ... ct helper set ' works: 1. configure ftp helper via nft and assign it to connections on port 2121 2. check with 'conntrack -L' that the next connection has the ftp helper attached to it. Also add a test for auto-assign (old behaviour). Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- tools/testing/selftests/netfilter/Makefile | 2 +- .../netfilter/nft_conntrack_helper.sh | 175 ++++++++++++++++++ 2 files changed, 176 insertions(+), 1 deletion(-) create mode 100755 tools/testing/selftests/netfilter/nft_conntrack_helper.sh diff --git a/tools/testing/selftests/netfilter/Makefile b/tools/testing/selftests/netfilter/Makefile index 9c0f758310fe..a179f0dca8ce 100644 --- a/tools/testing/selftests/netfilter/Makefile +++ b/tools/testing/selftests/netfilter/Makefile @@ -3,7 +3,7 @@ TEST_PROGS := nft_trans_stress.sh nft_nat.sh bridge_brouter.sh \ conntrack_icmp_related.sh nft_flowtable.sh ipvs.sh \ - nft_concat_range.sh \ + nft_concat_range.sh nft_conntrack_helper.sh \ nft_queue.sh LDLIBS = -lmnl diff --git a/tools/testing/selftests/netfilter/nft_conntrack_helper.sh b/tools/testing/selftests/netfilter/nft_conntrack_helper.sh new file mode 100755 index 000000000000..edf0a48da6bf --- /dev/null +++ b/tools/testing/selftests/netfilter/nft_conntrack_helper.sh @@ -0,0 +1,175 @@ +#!/bin/bash +# +# This tests connection tracking helper assignment: +# 1. can attach ftp helper to a connection from nft ruleset. +# 2. auto-assign still works. +# +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 +ret=0 + +sfx=$(mktemp -u "XXXXXXXX") +ns1="ns1-$sfx" +ns2="ns2-$sfx" +testipv6=1 + +cleanup() +{ + ip netns del ${ns1} + ip netns del ${ns2} +} + +nft --version > /dev/null 2>&1 +if [ $? -ne 0 ];then + echo "SKIP: Could not run test without nft tool" + exit $ksft_skip +fi + +ip -Version > /dev/null 2>&1 +if [ $? -ne 0 ];then + echo "SKIP: Could not run test without ip tool" + exit $ksft_skip +fi + +conntrack -V > /dev/null 2>&1 +if [ $? -ne 0 ];then + echo "SKIP: Could not run test without conntrack tool" + exit $ksft_skip +fi + +which nc >/dev/null 2>&1 +if [ $? -ne 0 ];then + echo "SKIP: Could not run test without netcat tool" + exit $ksft_skip +fi + +trap cleanup EXIT + +ip netns add ${ns1} +ip netns add ${ns2} + +ip link add veth0 netns ${ns1} type veth peer name veth0 netns ${ns2} > /dev/null 2>&1 +if [ $? -ne 0 ];then + echo "SKIP: No virtual ethernet pair device support in kernel" + exit $ksft_skip +fi + +ip -net ${ns1} link set lo up +ip -net ${ns1} link set veth0 up + +ip -net ${ns2} link set lo up +ip -net ${ns2} link set veth0 up + +ip -net ${ns1} addr add 10.0.1.1/24 dev veth0 +ip -net ${ns1} addr add dead:1::1/64 dev veth0 + +ip -net ${ns2} addr add 10.0.1.2/24 dev veth0 +ip -net ${ns2} addr add dead:1::2/64 dev veth0 + +load_ruleset_family() { + local family=$1 + local ns=$2 + +ip netns exec ${ns} nft -f - < /dev/null |grep -q 'helper=ftp' + if [ $? -ne 0 ] ; then + echo "FAIL: ${netns} did not show attached helper $message" 1>&2 + ret=1 + fi + + echo "PASS: ${netns} connection on port $port has ftp helper attached" 1>&2 + return 0 +} + +test_helper() +{ + local port=$1 + local msg=$2 + + sleep 3 | ip netns exec ${ns2} nc -w 2 -l -p $port > /dev/null & + + sleep 1 + sleep 1 | ip netns exec ${ns1} nc -w 2 10.0.1.2 $port > /dev/null & + + check_for_helper "$ns1" "ip $msg" $port + check_for_helper "$ns2" "ip $msg" $port + + wait + + if [ $testipv6 -eq 0 ] ;then + return 0 + fi + + ip netns exec ${ns1} conntrack -F 2> /dev/null + ip netns exec ${ns2} conntrack -F 2> /dev/null + + sleep 3 | ip netns exec ${ns2} nc -w 2 -6 -l -p $port > /dev/null & + + sleep 1 + sleep 1 | ip netns exec ${ns1} nc -w 2 -6 dead:1::2 $port > /dev/null & + + check_for_helper "$ns1" "ipv6 $msg" $port + check_for_helper "$ns2" "ipv6 $msg" $port + + wait +} + +load_ruleset_family ip ${ns1} +if [ $? -ne 0 ];then + echo "FAIL: ${ns1} cannot load ip ruleset" 1>&2 + exit 1 +fi + +load_ruleset_family ip6 ${ns1} +if [ $? -ne 0 ];then + echo "SKIP: ${ns1} cannot load ip6 ruleset" 1>&2 + testipv6=0 +fi + +load_ruleset_family inet ${ns2} +if [ $? -ne 0 ];then + echo "SKIP: ${ns1} cannot load inet ruleset" 1>&2 + load_ruleset_family ip ${ns2} + if [ $? -ne 0 ];then + echo "FAIL: ${ns2} cannot load ip ruleset" 1>&2 + exit 1 + fi + + if [ $testipv6 -eq 1 ] ;then + load_ruleset_family ip6 ${ns2} + if [ $? -ne 0 ];then + echo "FAIL: ${ns2} cannot load ip6 ruleset" 1>&2 + exit 1 + fi + fi +fi + +test_helper 2121 "set via ruleset" +ip netns exec ${ns1} sysctl -q 'net.netfilter.nf_conntrack_helper=1' +ip netns exec ${ns2} sysctl -q 'net.netfilter.nf_conntrack_helper=1' +test_helper 21 "auto-assign" + +exit $ret From 673bafd5b8b0ec9b1e2eedca2a2be6b44ee5ba9c Mon Sep 17 00:00:00 2001 From: Doug Berger Date: Wed, 24 Jun 2020 18:14:53 -0700 Subject: [PATCH 437/462] net: bcmgenet: re-remove bcmgenet_hfb_add_filter This function was originally removed by Baoyou Xie in commit e2072600a241 ("net: bcmgenet: remove unused function in bcmgenet.c") to prevent a build warning. Some of the functions removed by Baoyou Xie are now used for WAKE_FILTER support so his commit was reverted, but this function is still unused and the kbuild test robot dutifully reported the warning. This commit once again removes the remaining unused hfb functions. Fixes: 14da1510fedc ("Revert "net: bcmgenet: remove unused function in bcmgenet.c"") Reported-by: kbuild test robot Signed-off-by: Doug Berger Acked-by: Florian Fainelli Signed-off-by: David S. Miller --- .../net/ethernet/broadcom/genet/bcmgenet.c | 77 ------------------- 1 file changed, 77 deletions(-) diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c index ff31da0ed846..f1fa11665319 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c @@ -459,17 +459,6 @@ static inline void bcmgenet_rdma_ring_writel(struct bcmgenet_priv *priv, genet_dma_ring_regs[r]); } -static bool bcmgenet_hfb_is_filter_enabled(struct bcmgenet_priv *priv, - u32 f_index) -{ - u32 offset; - u32 reg; - - offset = HFB_FLT_ENABLE_V3PLUS + (f_index < 32) * sizeof(u32); - reg = bcmgenet_hfb_reg_readl(priv, offset); - return !!(reg & (1 << (f_index % 32))); -} - static void bcmgenet_hfb_enable_filter(struct bcmgenet_priv *priv, u32 f_index) { u32 offset; @@ -533,19 +522,6 @@ static void bcmgenet_hfb_set_filter_length(struct bcmgenet_priv *priv, bcmgenet_hfb_reg_writel(priv, reg, offset); } -static int bcmgenet_hfb_find_unused_filter(struct bcmgenet_priv *priv) -{ - u32 f_index; - - /* First MAX_NUM_OF_FS_RULES are reserved for Rx NFC filters */ - for (f_index = MAX_NUM_OF_FS_RULES; - f_index < priv->hw_params->hfb_filter_cnt; f_index++) - if (!bcmgenet_hfb_is_filter_enabled(priv, f_index)) - return f_index; - - return -ENOMEM; -} - static int bcmgenet_hfb_validate_mask(void *mask, size_t size) { while (size) { @@ -744,59 +720,6 @@ static int bcmgenet_hfb_create_rxnfc_filter(struct bcmgenet_priv *priv, return err; } -/* bcmgenet_hfb_add_filter - * - * Add new filter to Hardware Filter Block to match and direct Rx traffic to - * desired Rx queue. - * - * f_data is an array of unsigned 32-bit integers where each 32-bit integer - * provides filter data for 2 bytes (4 nibbles) of Rx frame: - * - * bits 31:20 - unused - * bit 19 - nibble 0 match enable - * bit 18 - nibble 1 match enable - * bit 17 - nibble 2 match enable - * bit 16 - nibble 3 match enable - * bits 15:12 - nibble 0 data - * bits 11:8 - nibble 1 data - * bits 7:4 - nibble 2 data - * bits 3:0 - nibble 3 data - * - * Example: - * In order to match: - * - Ethernet frame type = 0x0800 (IP) - * - IP version field = 4 - * - IP protocol field = 0x11 (UDP) - * - * The following filter is needed: - * u32 hfb_filter_ipv4_udp[] = { - * Rx frame offset 0x00: 0x00000000, 0x00000000, 0x00000000, 0x00000000, - * Rx frame offset 0x08: 0x00000000, 0x00000000, 0x000F0800, 0x00084000, - * Rx frame offset 0x10: 0x00000000, 0x00000000, 0x00000000, 0x00030011, - * }; - * - * To add the filter to HFB and direct the traffic to Rx queue 0, call: - * bcmgenet_hfb_add_filter(priv, hfb_filter_ipv4_udp, - * ARRAY_SIZE(hfb_filter_ipv4_udp), 0); - */ -int bcmgenet_hfb_add_filter(struct bcmgenet_priv *priv, u32 *f_data, - u32 f_length, u32 rx_queue) -{ - int f_index; - - f_index = bcmgenet_hfb_find_unused_filter(priv); - if (f_index < 0) - return -ENOMEM; - - if (f_length > priv->hw_params->hfb_filter_size) - return -EINVAL; - - bcmgenet_hfb_set_filter(priv, f_data, f_length, rx_queue, f_index); - bcmgenet_hfb_enable_filter(priv, f_index); - - return 0; -} - /* bcmgenet_hfb_clear * * Clear Hardware Filter Block and disable all filtering. From d966d2efb643a8ba925e910bfc10e19c8b018de7 Mon Sep 17 00:00:00 2001 From: Doug Berger Date: Wed, 24 Jun 2020 18:14:54 -0700 Subject: [PATCH 438/462] net: bcmgenet: use __be16 for htons(ETH_P_IP) The 16-bit value that holds a short in network byte order should be declared as a restricted big endian type to allow type checks to succeed during assignment. Fixes: 3e370952287c ("net: bcmgenet: add support for ethtool rxnfc flows") Reported-by: kbuild test robot Signed-off-by: Doug Berger Acked-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/genet/bcmgenet.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c index f1fa11665319..c63f01e2bb03 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c @@ -610,8 +610,9 @@ static int bcmgenet_hfb_create_rxnfc_filter(struct bcmgenet_priv *priv, { struct ethtool_rx_flow_spec *fs = &rule->fs; int err = 0, offset = 0, f_length = 0; - u16 val_16, mask_16; u8 val_8, mask_8; + __be16 val_16; + u16 mask_16; size_t size; u32 *f_data; From 20d1f2d1b024f6be199a3bedf1578a1d21592bc5 Mon Sep 17 00:00:00 2001 From: Doug Berger Date: Wed, 24 Jun 2020 18:14:55 -0700 Subject: [PATCH 439/462] net: bcmgenet: use hardware padding of runt frames When commit 474ea9cafc45 ("net: bcmgenet: correctly pad short packets") added the call to skb_padto() it should have been located before the nr_frags parameter was read since that value could be changed when padding packets with lengths between 55 and 59 bytes (inclusive). The use of a stale nr_frags value can cause corruption of the pad data when tx-scatter-gather is enabled. This corruption of the pad can cause invalid checksum computation when hardware offload of tx-checksum is also enabled. Since the original reason for the padding was corrected by commit 7dd399130efb ("net: bcmgenet: fix skb_len in bcmgenet_xmit_single()") we can remove the software padding all together and make use of hardware padding of short frames as long as the hardware also always appends the FCS value to the frame. Fixes: 474ea9cafc45 ("net: bcmgenet: correctly pad short packets") Signed-off-by: Doug Berger Acked-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/genet/bcmgenet.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c index c63f01e2bb03..af924a8b885f 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c @@ -2042,11 +2042,6 @@ static netdev_tx_t bcmgenet_xmit(struct sk_buff *skb, struct net_device *dev) goto out; } - if (skb_padto(skb, ETH_ZLEN)) { - ret = NETDEV_TX_OK; - goto out; - } - /* Retain how many bytes will be sent on the wire, without TSB inserted * by transmit checksum offload */ @@ -2093,6 +2088,9 @@ static netdev_tx_t bcmgenet_xmit(struct sk_buff *skb, struct net_device *dev) len_stat = (size << DMA_BUFLENGTH_SHIFT) | (priv->hw_params->qtag_mask << DMA_TX_QTAG_SHIFT); + /* Note: if we ever change from DMA_TX_APPEND_CRC below we + * will need to restore software padding of "runt" packets + */ if (!i) { len_stat |= DMA_TX_APPEND_CRC | DMA_SOP; if (skb->ip_summed == CHECKSUM_PARTIAL) From 5a3235e50c0e3219d8659cbec996dae960acfee8 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Thu, 25 Jun 2020 09:18:16 +0200 Subject: [PATCH 440/462] net: phy: mscc: avoid skcipher API for single block AES encryption The skcipher API dynamically instantiates the transformation object on request that implements the requested algorithm optimally on the given platform. This notion of optimality only matters for cases like bulk network or disk encryption, where performance can be a bottleneck, or in cases where the algorithm itself is not known at compile time. In the mscc case, we are dealing with AES encryption of a single block, and so neither concern applies, and we are better off using the AES library interface, which is lightweight and safe for this kind of use. Note that the scatterlist API does not permit references to buffers that are located on the stack, so the existing code is incorrect in any case, but avoiding the skcipher and scatterlist APIs entirely is the most straight-forward approach to fixing this. Cc: Antoine Tenart Cc: Andrew Lunn Cc: Florian Fainelli Cc: Heiner Kallweit Cc: "David S. Miller" Cc: Jakub Kicinski Fixes: 28c5107aa904e ("net: phy: mscc: macsec support") Reviewed-by: Eric Biggers Signed-off-by: Ard Biesheuvel Tested-by: Antoine Tenart Signed-off-by: David S. Miller --- drivers/net/phy/Kconfig | 3 +-- drivers/net/phy/mscc/mscc_macsec.c | 40 +++++++----------------------- 2 files changed, 10 insertions(+), 33 deletions(-) diff --git a/drivers/net/phy/Kconfig b/drivers/net/phy/Kconfig index f25702386d83..e351d65533aa 100644 --- a/drivers/net/phy/Kconfig +++ b/drivers/net/phy/Kconfig @@ -480,8 +480,7 @@ config MICROCHIP_T1_PHY config MICROSEMI_PHY tristate "Microsemi PHYs" depends on MACSEC || MACSEC=n - select CRYPTO_AES - select CRYPTO_ECB + select CRYPTO_LIB_AES if MACSEC help Currently supports VSC8514, VSC8530, VSC8531, VSC8540 and VSC8541 PHYs diff --git a/drivers/net/phy/mscc/mscc_macsec.c b/drivers/net/phy/mscc/mscc_macsec.c index b4d3dc4068e2..d53ca884b5c9 100644 --- a/drivers/net/phy/mscc/mscc_macsec.c +++ b/drivers/net/phy/mscc/mscc_macsec.c @@ -10,7 +10,7 @@ #include #include -#include +#include #include @@ -500,39 +500,17 @@ static u32 vsc8584_macsec_flow_context_id(struct macsec_flow *flow) static int vsc8584_macsec_derive_key(const u8 key[MACSEC_KEYID_LEN], u16 key_len, u8 hkey[16]) { - struct crypto_skcipher *tfm = crypto_alloc_skcipher("ecb(aes)", 0, 0); - struct skcipher_request *req = NULL; - struct scatterlist src, dst; - DECLARE_CRYPTO_WAIT(wait); - u32 input[4] = {0}; + const u8 input[AES_BLOCK_SIZE] = {0}; + struct crypto_aes_ctx ctx; int ret; - if (IS_ERR(tfm)) - return PTR_ERR(tfm); + ret = aes_expandkey(&ctx, key, key_len); + if (ret) + return ret; - req = skcipher_request_alloc(tfm, GFP_KERNEL); - if (!req) { - ret = -ENOMEM; - goto out; - } - - skcipher_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG | - CRYPTO_TFM_REQ_MAY_SLEEP, crypto_req_done, - &wait); - ret = crypto_skcipher_setkey(tfm, key, key_len); - if (ret < 0) - goto out; - - sg_init_one(&src, input, 16); - sg_init_one(&dst, hkey, 16); - skcipher_request_set_crypt(req, &src, &dst, 16, NULL); - - ret = crypto_wait_req(crypto_skcipher_encrypt(req), &wait); - -out: - skcipher_request_free(req); - crypto_free_skcipher(tfm); - return ret; + aes_encrypt(&ctx, hkey, input); + memzero_explicit(&ctx, sizeof(ctx)); + return 0; } static int vsc8584_macsec_transformation(struct phy_device *phydev, From 2570284060b48f3f79d8f1a2698792f36c385e9a Mon Sep 17 00:00:00 2001 From: Denis Kirjanov Date: Thu, 25 Jun 2020 14:51:06 +0300 Subject: [PATCH 441/462] tcp: don't ignore ECN CWR on pure ACK there is a problem with the CWR flag set in an incoming ACK segment and it leads to the situation when the ECE flag is latched forever the following packetdrill script shows what happens: // Stack receives incoming segments with CE set +0.1 <[ect0] . 11001:12001(1000) ack 1001 win 65535 +0.0 <[ce] . 12001:13001(1000) ack 1001 win 65535 +0.0 <[ect0] P. 13001:14001(1000) ack 1001 win 65535 // Stack repsonds with ECN ECHO +0.0 >[noecn] . 1001:1001(0) ack 12001 +0.0 >[noecn] E. 1001:1001(0) ack 13001 +0.0 >[noecn] E. 1001:1001(0) ack 14001 // Write a packet +0.1 write(3, ..., 1000) = 1000 +0.0 >[ect0] PE. 1001:2001(1000) ack 14001 // Pure ACK received +0.01 <[noecn] W. 14001:14001(0) ack 2001 win 65535 // Since CWR was sent, this packet should NOT have ECE set +0.1 write(3, ..., 1000) = 1000 +0.0 >[ect0] P. 2001:3001(1000) ack 14001 // but Linux will still keep ECE latched here, with packetdrill // flagging a missing ECE flag, expecting // >[ect0] PE. 2001:3001(1000) ack 14001 // in the script In the situation above we will continue to send ECN ECHO packets and trigger the peer to reduce the congestion window. To avoid that we can check CWR on pure ACKs received. v3: - Add a sequence check to avoid sending an ACK to an ACK v2: - Adjusted the comment - move CWR check before checking for unacknowledged packets Signed-off-by: Denis Kirjanov Acked-by: Neal Cardwell Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 12fda8f27b08..f3a0eb139b76 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -261,7 +261,8 @@ static void tcp_ecn_accept_cwr(struct sock *sk, const struct sk_buff *skb) * cwnd may be very low (even just 1 packet), so we should ACK * immediately. */ - inet_csk(sk)->icsk_ack.pending |= ICSK_ACK_NOW; + if (TCP_SKB_CB(skb)->seq != TCP_SKB_CB(skb)->end_seq) + inet_csk(sk)->icsk_ack.pending |= ICSK_ACK_NOW; } } @@ -3665,6 +3666,15 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) tcp_in_ack_event(sk, ack_ev_flags); } + /* This is a deviation from RFC3168 since it states that: + * "When the TCP data sender is ready to set the CWR bit after reducing + * the congestion window, it SHOULD set the CWR bit only on the first + * new data packet that it transmits." + * We accept CWR on pure ACKs to be more robust + * with widely-deployed TCP implementations that do this. + */ + tcp_ecn_accept_cwr(sk, skb); + /* We passed data and got it acked, remove any soft error * log. Something worked... */ @@ -4800,8 +4810,6 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb) skb_dst_drop(skb); __skb_pull(skb, tcp_hdr(skb)->doff * 4); - tcp_ecn_accept_cwr(sk, skb); - tp->rx_opt.dsack = 0; /* Queue data for delivery to the user. From 206e732323c2a8e6d11f4125a62c27e60a50a850 Mon Sep 17 00:00:00 2001 From: Thomas Martitz Date: Thu, 25 Jun 2020 14:26:03 +0200 Subject: [PATCH 442/462] net: bridge: enfore alignment for ethernet address The eth_addr member is passed to ether_addr functions that require 2-byte alignment, therefore the member must be properly aligned to avoid unaligned accesses. The problem is in place since the initial merge of multicast to unicast: commit 6db6f0eae6052b70885562e1733896647ec1d807 bridge: multicast to unicast Fixes: 6db6f0eae605 ("bridge: multicast to unicast") Cc: Roopa Prabhu Cc: Nikolay Aleksandrov Cc: David S. Miller Cc: Jakub Kicinski Cc: Felix Fietkau Signed-off-by: Thomas Martitz Acked-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- net/bridge/br_private.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 7501be4eeba0..2130fe0194e6 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -217,8 +217,8 @@ struct net_bridge_port_group { struct rcu_head rcu; struct timer_list timer; struct br_ip addr; + unsigned char eth_addr[ETH_ALEN] __aligned(2); unsigned char flags; - unsigned char eth_addr[ETH_ALEN]; }; struct net_bridge_mdb_entry { From 0eaf228d574bd82a9aed73e3953bfb81721f4227 Mon Sep 17 00:00:00 2001 From: Claudiu Beznea Date: Wed, 24 Jun 2020 13:08:17 +0300 Subject: [PATCH 443/462] net: macb: call pm_runtime_put_sync on failure path Call pm_runtime_put_sync() on failure path of at91ether_open. Fixes: e6a41c23df0d ("net: macb: ensure interface is not suspended on at91rm9200") Signed-off-by: Claudiu Beznea Signed-off-by: David S. Miller --- drivers/net/ethernet/cadence/macb_main.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index 257c4920cb88..5705359a3612 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -3837,7 +3837,7 @@ static int at91ether_open(struct net_device *dev) ret = at91ether_start(dev); if (ret) - return ret; + goto pm_exit; /* Enable MAC interrupts */ macb_writel(lp, IER, MACB_BIT(RCOMP) | @@ -3850,11 +3850,15 @@ static int at91ether_open(struct net_device *dev) ret = macb_phylink_connect(lp); if (ret) - return ret; + goto pm_exit; netif_start_queue(dev); return 0; + +pm_exit: + pm_runtime_put_sync(&lp->pdev->dev); + return ret; } /* Close the interface */ From 33fdef24c9ac20c68b71b363e423fbf9ad2bfc1e Mon Sep 17 00:00:00 2001 From: Claudiu Beznea Date: Wed, 24 Jun 2020 13:08:18 +0300 Subject: [PATCH 444/462] net: macb: free resources on failure path of at91ether_open() DMA buffers were not freed on failure path of at91ether_open(). Along with changes for freeing the DMA buffers the enable/disable interrupt instructions were moved to at91ether_start()/at91ether_stop() functions and the operations on at91ether_stop() were done in their reverse order (compared with how is done in at91ether_start()): before this patch the operation order on interface open path was as follows: 1/ alloc DMA buffers 2/ enable tx, rx 3/ enable interrupts and the order on interface close path was as follows: 1/ disable tx, rx 2/ disable interrupts 3/ free dma buffers. Fixes: 7897b071ac3b ("net: macb: convert to phylink") Signed-off-by: Claudiu Beznea Signed-off-by: David S. Miller --- drivers/net/ethernet/cadence/macb_main.c | 116 ++++++++++++++--------- 1 file changed, 73 insertions(+), 43 deletions(-) diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index 5705359a3612..52582e8ed90e 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -3762,15 +3762,9 @@ static int macb_init(struct platform_device *pdev) static struct sifive_fu540_macb_mgmt *mgmt; -/* Initialize and start the Receiver and Transmit subsystems */ -static int at91ether_start(struct net_device *dev) +static int at91ether_alloc_coherent(struct macb *lp) { - struct macb *lp = netdev_priv(dev); struct macb_queue *q = &lp->queues[0]; - struct macb_dma_desc *desc; - dma_addr_t addr; - u32 ctl; - int i; q->rx_ring = dma_alloc_coherent(&lp->pdev->dev, (AT91ETHER_MAX_RX_DESCR * @@ -3792,6 +3786,43 @@ static int at91ether_start(struct net_device *dev) return -ENOMEM; } + return 0; +} + +static void at91ether_free_coherent(struct macb *lp) +{ + struct macb_queue *q = &lp->queues[0]; + + if (q->rx_ring) { + dma_free_coherent(&lp->pdev->dev, + AT91ETHER_MAX_RX_DESCR * + macb_dma_desc_get_size(lp), + q->rx_ring, q->rx_ring_dma); + q->rx_ring = NULL; + } + + if (q->rx_buffers) { + dma_free_coherent(&lp->pdev->dev, + AT91ETHER_MAX_RX_DESCR * + AT91ETHER_MAX_RBUFF_SZ, + q->rx_buffers, q->rx_buffers_dma); + q->rx_buffers = NULL; + } +} + +/* Initialize and start the Receiver and Transmit subsystems */ +static int at91ether_start(struct macb *lp) +{ + struct macb_queue *q = &lp->queues[0]; + struct macb_dma_desc *desc; + dma_addr_t addr; + u32 ctl; + int i, ret; + + ret = at91ether_alloc_coherent(lp); + if (ret) + return ret; + addr = q->rx_buffers_dma; for (i = 0; i < AT91ETHER_MAX_RX_DESCR; i++) { desc = macb_rx_desc(q, i); @@ -3813,9 +3844,39 @@ static int at91ether_start(struct net_device *dev) ctl = macb_readl(lp, NCR); macb_writel(lp, NCR, ctl | MACB_BIT(RE) | MACB_BIT(TE)); + /* Enable MAC interrupts */ + macb_writel(lp, IER, MACB_BIT(RCOMP) | + MACB_BIT(RXUBR) | + MACB_BIT(ISR_TUND) | + MACB_BIT(ISR_RLE) | + MACB_BIT(TCOMP) | + MACB_BIT(ISR_ROVR) | + MACB_BIT(HRESP)); + return 0; } +static void at91ether_stop(struct macb *lp) +{ + u32 ctl; + + /* Disable MAC interrupts */ + macb_writel(lp, IDR, MACB_BIT(RCOMP) | + MACB_BIT(RXUBR) | + MACB_BIT(ISR_TUND) | + MACB_BIT(ISR_RLE) | + MACB_BIT(TCOMP) | + MACB_BIT(ISR_ROVR) | + MACB_BIT(HRESP)); + + /* Disable Receiver and Transmitter */ + ctl = macb_readl(lp, NCR); + macb_writel(lp, NCR, ctl & ~(MACB_BIT(TE) | MACB_BIT(RE))); + + /* Free resources. */ + at91ether_free_coherent(lp); +} + /* Open the ethernet interface */ static int at91ether_open(struct net_device *dev) { @@ -3835,27 +3896,20 @@ static int at91ether_open(struct net_device *dev) macb_set_hwaddr(lp); - ret = at91ether_start(dev); + ret = at91ether_start(lp); if (ret) goto pm_exit; - /* Enable MAC interrupts */ - macb_writel(lp, IER, MACB_BIT(RCOMP) | - MACB_BIT(RXUBR) | - MACB_BIT(ISR_TUND) | - MACB_BIT(ISR_RLE) | - MACB_BIT(TCOMP) | - MACB_BIT(ISR_ROVR) | - MACB_BIT(HRESP)); - ret = macb_phylink_connect(lp); if (ret) - goto pm_exit; + goto stop; netif_start_queue(dev); return 0; +stop: + at91ether_stop(lp); pm_exit: pm_runtime_put_sync(&lp->pdev->dev); return ret; @@ -3865,37 +3919,13 @@ pm_exit: static int at91ether_close(struct net_device *dev) { struct macb *lp = netdev_priv(dev); - struct macb_queue *q = &lp->queues[0]; - u32 ctl; - - /* Disable Receiver and Transmitter */ - ctl = macb_readl(lp, NCR); - macb_writel(lp, NCR, ctl & ~(MACB_BIT(TE) | MACB_BIT(RE))); - - /* Disable MAC interrupts */ - macb_writel(lp, IDR, MACB_BIT(RCOMP) | - MACB_BIT(RXUBR) | - MACB_BIT(ISR_TUND) | - MACB_BIT(ISR_RLE) | - MACB_BIT(TCOMP) | - MACB_BIT(ISR_ROVR) | - MACB_BIT(HRESP)); netif_stop_queue(dev); phylink_stop(lp->phylink); phylink_disconnect_phy(lp->phylink); - dma_free_coherent(&lp->pdev->dev, - AT91ETHER_MAX_RX_DESCR * - macb_dma_desc_get_size(lp), - q->rx_ring, q->rx_ring_dma); - q->rx_ring = NULL; - - dma_free_coherent(&lp->pdev->dev, - AT91ETHER_MAX_RX_DESCR * AT91ETHER_MAX_RBUFF_SZ, - q->rx_buffers, q->rx_buffers_dma); - q->rx_buffers = NULL; + at91ether_stop(lp); return pm_runtime_put(&lp->pdev->dev); } From e39109f59614e5646e6c53100a3e7e8f63dd1d2b Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Wed, 24 Jun 2020 16:54:44 +0300 Subject: [PATCH 445/462] net: dsa: sja1105: move sja1105_compose_gating_subschedule at the top It turns out that sja1105_compose_gating_subschedule must also be called from sja1105_vl_delete, to recalculate the overall tc-gate configuration. Currently this is not possible without introducing a forward declaration. So move the function at the top of the file, along with its dependencies. Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- drivers/net/dsa/sja1105/sja1105_vl.c | 320 +++++++++++++-------------- 1 file changed, 160 insertions(+), 160 deletions(-) diff --git a/drivers/net/dsa/sja1105/sja1105_vl.c b/drivers/net/dsa/sja1105/sja1105_vl.c index 0056f9c1e471..5ffa71f02064 100644 --- a/drivers/net/dsa/sja1105/sja1105_vl.c +++ b/drivers/net/dsa/sja1105/sja1105_vl.c @@ -7,6 +7,166 @@ #define SJA1105_SIZE_VL_STATUS 8 +/* Insert into the global gate list, sorted by gate action time. */ +static int sja1105_insert_gate_entry(struct sja1105_gating_config *gating_cfg, + struct sja1105_rule *rule, + u8 gate_state, s64 entry_time, + struct netlink_ext_ack *extack) +{ + struct sja1105_gate_entry *e; + int rc; + + e = kzalloc(sizeof(*e), GFP_KERNEL); + if (!e) + return -ENOMEM; + + e->rule = rule; + e->gate_state = gate_state; + e->interval = entry_time; + + if (list_empty(&gating_cfg->entries)) { + list_add(&e->list, &gating_cfg->entries); + } else { + struct sja1105_gate_entry *p; + + list_for_each_entry(p, &gating_cfg->entries, list) { + if (p->interval == e->interval) { + NL_SET_ERR_MSG_MOD(extack, + "Gate conflict"); + rc = -EBUSY; + goto err; + } + + if (e->interval < p->interval) + break; + } + list_add(&e->list, p->list.prev); + } + + gating_cfg->num_entries++; + + return 0; +err: + kfree(e); + return rc; +} + +/* The gate entries contain absolute times in their e->interval field. Convert + * that to proper intervals (i.e. "0, 5, 10, 15" to "5, 5, 5, 5"). + */ +static void +sja1105_gating_cfg_time_to_interval(struct sja1105_gating_config *gating_cfg, + u64 cycle_time) +{ + struct sja1105_gate_entry *last_e; + struct sja1105_gate_entry *e; + struct list_head *prev; + + list_for_each_entry(e, &gating_cfg->entries, list) { + struct sja1105_gate_entry *p; + + prev = e->list.prev; + + if (prev == &gating_cfg->entries) + continue; + + p = list_entry(prev, struct sja1105_gate_entry, list); + p->interval = e->interval - p->interval; + } + last_e = list_last_entry(&gating_cfg->entries, + struct sja1105_gate_entry, list); + if (last_e->list.prev != &gating_cfg->entries) + last_e->interval = cycle_time - last_e->interval; +} + +static void sja1105_free_gating_config(struct sja1105_gating_config *gating_cfg) +{ + struct sja1105_gate_entry *e, *n; + + list_for_each_entry_safe(e, n, &gating_cfg->entries, list) { + list_del(&e->list); + kfree(e); + } +} + +static int sja1105_compose_gating_subschedule(struct sja1105_private *priv, + struct netlink_ext_ack *extack) +{ + struct sja1105_gating_config *gating_cfg = &priv->tas_data.gating_cfg; + struct sja1105_rule *rule; + s64 max_cycle_time = 0; + s64 its_base_time = 0; + int i, rc = 0; + + list_for_each_entry(rule, &priv->flow_block.rules, list) { + if (rule->type != SJA1105_RULE_VL) + continue; + if (rule->vl.type != SJA1105_VL_TIME_TRIGGERED) + continue; + + if (max_cycle_time < rule->vl.cycle_time) { + max_cycle_time = rule->vl.cycle_time; + its_base_time = rule->vl.base_time; + } + } + + if (!max_cycle_time) + return 0; + + dev_dbg(priv->ds->dev, "max_cycle_time %lld its_base_time %lld\n", + max_cycle_time, its_base_time); + + sja1105_free_gating_config(gating_cfg); + + gating_cfg->base_time = its_base_time; + gating_cfg->cycle_time = max_cycle_time; + gating_cfg->num_entries = 0; + + list_for_each_entry(rule, &priv->flow_block.rules, list) { + s64 time; + s64 rbt; + + if (rule->type != SJA1105_RULE_VL) + continue; + if (rule->vl.type != SJA1105_VL_TIME_TRIGGERED) + continue; + + /* Calculate the difference between this gating schedule's + * base time, and the base time of the gating schedule with the + * longest cycle time. We call it the relative base time (rbt). + */ + rbt = future_base_time(rule->vl.base_time, rule->vl.cycle_time, + its_base_time); + rbt -= its_base_time; + + time = rbt; + + for (i = 0; i < rule->vl.num_entries; i++) { + u8 gate_state = rule->vl.entries[i].gate_state; + s64 entry_time = time; + + while (entry_time < max_cycle_time) { + rc = sja1105_insert_gate_entry(gating_cfg, rule, + gate_state, + entry_time, + extack); + if (rc) + goto err; + + entry_time += rule->vl.cycle_time; + } + time += rule->vl.entries[i].interval; + } + } + + sja1105_gating_cfg_time_to_interval(gating_cfg, max_cycle_time); + + return 0; +err: + sja1105_free_gating_config(gating_cfg); + return rc; +} + /* The switch flow classification core implements TTEthernet, which 'thinks' in * terms of Virtual Links (VL), a concept borrowed from ARINC 664 part 7. * However it also has one other operating mode (VLLUPFORMAT=0) where it acts @@ -397,166 +557,6 @@ int sja1105_vl_delete(struct sja1105_private *priv, int port, return sja1105_static_config_reload(priv, SJA1105_VIRTUAL_LINKS); } -/* Insert into the global gate list, sorted by gate action time. */ -static int sja1105_insert_gate_entry(struct sja1105_gating_config *gating_cfg, - struct sja1105_rule *rule, - u8 gate_state, s64 entry_time, - struct netlink_ext_ack *extack) -{ - struct sja1105_gate_entry *e; - int rc; - - e = kzalloc(sizeof(*e), GFP_KERNEL); - if (!e) - return -ENOMEM; - - e->rule = rule; - e->gate_state = gate_state; - e->interval = entry_time; - - if (list_empty(&gating_cfg->entries)) { - list_add(&e->list, &gating_cfg->entries); - } else { - struct sja1105_gate_entry *p; - - list_for_each_entry(p, &gating_cfg->entries, list) { - if (p->interval == e->interval) { - NL_SET_ERR_MSG_MOD(extack, - "Gate conflict"); - rc = -EBUSY; - goto err; - } - - if (e->interval < p->interval) - break; - } - list_add(&e->list, p->list.prev); - } - - gating_cfg->num_entries++; - - return 0; -err: - kfree(e); - return rc; -} - -/* The gate entries contain absolute times in their e->interval field. Convert - * that to proper intervals (i.e. "0, 5, 10, 15" to "5, 5, 5, 5"). - */ -static void -sja1105_gating_cfg_time_to_interval(struct sja1105_gating_config *gating_cfg, - u64 cycle_time) -{ - struct sja1105_gate_entry *last_e; - struct sja1105_gate_entry *e; - struct list_head *prev; - - list_for_each_entry(e, &gating_cfg->entries, list) { - struct sja1105_gate_entry *p; - - prev = e->list.prev; - - if (prev == &gating_cfg->entries) - continue; - - p = list_entry(prev, struct sja1105_gate_entry, list); - p->interval = e->interval - p->interval; - } - last_e = list_last_entry(&gating_cfg->entries, - struct sja1105_gate_entry, list); - if (last_e->list.prev != &gating_cfg->entries) - last_e->interval = cycle_time - last_e->interval; -} - -static void sja1105_free_gating_config(struct sja1105_gating_config *gating_cfg) -{ - struct sja1105_gate_entry *e, *n; - - list_for_each_entry_safe(e, n, &gating_cfg->entries, list) { - list_del(&e->list); - kfree(e); - } -} - -static int sja1105_compose_gating_subschedule(struct sja1105_private *priv, - struct netlink_ext_ack *extack) -{ - struct sja1105_gating_config *gating_cfg = &priv->tas_data.gating_cfg; - struct sja1105_rule *rule; - s64 max_cycle_time = 0; - s64 its_base_time = 0; - int i, rc = 0; - - list_for_each_entry(rule, &priv->flow_block.rules, list) { - if (rule->type != SJA1105_RULE_VL) - continue; - if (rule->vl.type != SJA1105_VL_TIME_TRIGGERED) - continue; - - if (max_cycle_time < rule->vl.cycle_time) { - max_cycle_time = rule->vl.cycle_time; - its_base_time = rule->vl.base_time; - } - } - - if (!max_cycle_time) - return 0; - - dev_dbg(priv->ds->dev, "max_cycle_time %lld its_base_time %lld\n", - max_cycle_time, its_base_time); - - sja1105_free_gating_config(gating_cfg); - - gating_cfg->base_time = its_base_time; - gating_cfg->cycle_time = max_cycle_time; - gating_cfg->num_entries = 0; - - list_for_each_entry(rule, &priv->flow_block.rules, list) { - s64 time; - s64 rbt; - - if (rule->type != SJA1105_RULE_VL) - continue; - if (rule->vl.type != SJA1105_VL_TIME_TRIGGERED) - continue; - - /* Calculate the difference between this gating schedule's - * base time, and the base time of the gating schedule with the - * longest cycle time. We call it the relative base time (rbt). - */ - rbt = future_base_time(rule->vl.base_time, rule->vl.cycle_time, - its_base_time); - rbt -= its_base_time; - - time = rbt; - - for (i = 0; i < rule->vl.num_entries; i++) { - u8 gate_state = rule->vl.entries[i].gate_state; - s64 entry_time = time; - - while (entry_time < max_cycle_time) { - rc = sja1105_insert_gate_entry(gating_cfg, rule, - gate_state, - entry_time, - extack); - if (rc) - goto err; - - entry_time += rule->vl.cycle_time; - } - time += rule->vl.entries[i].interval; - } - } - - sja1105_gating_cfg_time_to_interval(gating_cfg, max_cycle_time); - - return 0; -err: - sja1105_free_gating_config(gating_cfg); - return rc; -} - int sja1105_vl_gate(struct sja1105_private *priv, int port, struct netlink_ext_ack *extack, unsigned long cookie, struct sja1105_key *key, u32 index, s32 prio, From 026bdb2b96525edd1fb5cc6f24587ff08d277d5c Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Wed, 24 Jun 2020 16:54:45 +0300 Subject: [PATCH 446/462] net: dsa: sja1105: unconditionally free old gating config Currently sja1105_compose_gating_subschedule is not prepared to be called for the case where we want to recompute the global tc-gate configuration after we've deleted those actions on a port. After deleting the tc-gate actions on the last port, max_cycle_time would become zero, and that would incorrectly prevent sja1105_free_gating_config from getting called. So move the freeing function above the check for the need to apply a new configuration. Fixes: 834f8933d5dd ("net: dsa: sja1105: implement tc-gate using time-triggered virtual links") Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- drivers/net/dsa/sja1105/sja1105_vl.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/dsa/sja1105/sja1105_vl.c b/drivers/net/dsa/sja1105/sja1105_vl.c index 5ffa71f02064..5ff370f507e6 100644 --- a/drivers/net/dsa/sja1105/sja1105_vl.c +++ b/drivers/net/dsa/sja1105/sja1105_vl.c @@ -98,6 +98,8 @@ static int sja1105_compose_gating_subschedule(struct sja1105_private *priv, s64 its_base_time = 0; int i, rc = 0; + sja1105_free_gating_config(gating_cfg); + list_for_each_entry(rule, &priv->flow_block.rules, list) { if (rule->type != SJA1105_RULE_VL) continue; @@ -116,8 +118,6 @@ static int sja1105_compose_gating_subschedule(struct sja1105_private *priv, dev_dbg(priv->ds->dev, "max_cycle_time %lld its_base_time %lld\n", max_cycle_time, its_base_time); - sja1105_free_gating_config(gating_cfg); - gating_cfg->base_time = its_base_time; gating_cfg->cycle_time = max_cycle_time; gating_cfg->num_entries = 0; From 82f6896a25ee2471fdf039eb7b286a692d78f504 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Wed, 24 Jun 2020 16:54:46 +0300 Subject: [PATCH 447/462] net: dsa: sja1105: recalculate gating subschedule after deleting tc-gate rules Currently, tas_data->enabled would remain true even after deleting all tc-gate rules from the switch ports, which would cause the sja1105_tas_state_machine to get unnecessarily scheduled. Also, if there were any errors which would prevent the hardware from enabling the gating schedule, the sja1105_tas_state_machine would continuously detect and print that, spamming the kernel log, even if the rules were subsequently deleted. The rules themselves are _not_ active, because sja1105_init_scheduling does enough of a job to not install the gating schedule in the static config. But the virtual link rules themselves are still present. So call the functions that remove the tc-gate configuration from priv->tas_data.gating_cfg, so that tas_data->enabled can be set to false, and sja1105_tas_state_machine will stop from being scheduled. Fixes: 834f8933d5dd ("net: dsa: sja1105: implement tc-gate using time-triggered virtual links") Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- drivers/net/dsa/sja1105/sja1105_vl.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/net/dsa/sja1105/sja1105_vl.c b/drivers/net/dsa/sja1105/sja1105_vl.c index 5ff370f507e6..6b1fdc1b46e7 100644 --- a/drivers/net/dsa/sja1105/sja1105_vl.c +++ b/drivers/net/dsa/sja1105/sja1105_vl.c @@ -550,10 +550,18 @@ int sja1105_vl_delete(struct sja1105_private *priv, int port, kfree(rule); } + rc = sja1105_compose_gating_subschedule(priv, extack); + if (rc) + return rc; + rc = sja1105_init_virtual_links(priv, extack); if (rc) return rc; + rc = sja1105_init_scheduling(priv); + if (rc < 0) + return rc; + return sja1105_static_config_reload(priv, SJA1105_VIRTUAL_LINKS); } From 43ce887c5050a3c213450a3058505f6a06519dd4 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Wed, 24 Jun 2020 16:54:47 +0300 Subject: [PATCH 448/462] net: dsa: sja1105: fix tc-gate schedule with single element The sja1105_gating_cfg_time_to_interval function does this, as per the comments: /* The gate entries contain absolute times in their e->interval field. Convert * that to proper intervals (i.e. "0, 5, 10, 15" to "5, 5, 5, 5"). */ To perform that task, it iterates over gating_cfg->entries, at each step updating the interval of the _previous_ entry. So one interval remains to be updated at the end of the loop: the last one (since it isn't "prev" for anyone else). But there was an erroneous check, that the last element's interval should not be updated if it's also the only element. I'm not quite sure why that check was there, but it's clearly incorrect, as a tc-gate schedule with a single element would get an e->interval of zero, regardless of the duration requested by the user. The switch wouldn't even consider this configuration as valid: it will just drop all traffic that matches the rule. Fixes: 834f8933d5dd ("net: dsa: sja1105: implement tc-gate using time-triggered virtual links") Reported-by: Xiaoliang Yang Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- drivers/net/dsa/sja1105/sja1105_vl.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/dsa/sja1105/sja1105_vl.c b/drivers/net/dsa/sja1105/sja1105_vl.c index 6b1fdc1b46e7..af3565160db6 100644 --- a/drivers/net/dsa/sja1105/sja1105_vl.c +++ b/drivers/net/dsa/sja1105/sja1105_vl.c @@ -75,8 +75,7 @@ sja1105_gating_cfg_time_to_interval(struct sja1105_gating_config *gating_cfg, } last_e = list_last_entry(&gating_cfg->entries, struct sja1105_gate_entry, list); - if (last_e->list.prev != &gating_cfg->entries) - last_e->interval = cycle_time - last_e->interval; + last_e->interval = cycle_time - last_e->interval; } static void sja1105_free_gating_config(struct sja1105_gating_config *gating_cfg) From b344579ca8478598937215f7005d6c7b84d28aee Mon Sep 17 00:00:00 2001 From: Neal Cardwell Date: Wed, 24 Jun 2020 12:42:02 -0400 Subject: [PATCH 449/462] tcp_cubic: fix spurious HYSTART_DELAY exit upon drop in min RTT Mirja Kuehlewind reported a bug in Linux TCP CUBIC Hystart, where Hystart HYSTART_DELAY mechanism can exit Slow Start spuriously on an ACK when the minimum rtt of a connection goes down. From inspection it is clear from the existing code that this could happen in an example like the following: o The first 8 RTT samples in a round trip are 150ms, resulting in a curr_rtt of 150ms and a delay_min of 150ms. o The 9th RTT sample is 100ms. The curr_rtt does not change after the first 8 samples, so curr_rtt remains 150ms. But delay_min can be lowered at any time, so delay_min falls to 100ms. The code executes the HYSTART_DELAY comparison between curr_rtt of 150ms and delay_min of 100ms, and the curr_rtt is declared far enough above delay_min to force a (spurious) exit of Slow start. The fix here is simple: allow every RTT sample in a round trip to lower the curr_rtt. Fixes: ae27e98a5152 ("[TCP] CUBIC v2.3") Reported-by: Mirja Kuehlewind Signed-off-by: Neal Cardwell Signed-off-by: Eric Dumazet Acked-by: Soheil Hassas Yeganeh Signed-off-by: David S. Miller --- net/ipv4/tcp_cubic.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c index 8f8eefd3a3ce..c7bf5b26bf0c 100644 --- a/net/ipv4/tcp_cubic.c +++ b/net/ipv4/tcp_cubic.c @@ -432,10 +432,9 @@ static void hystart_update(struct sock *sk, u32 delay) if (hystart_detect & HYSTART_DELAY) { /* obtain the minimum delay of more than sampling packets */ + if (ca->curr_rtt > delay) + ca->curr_rtt = delay; if (ca->sample_cnt < HYSTART_MIN_SAMPLES) { - if (ca->curr_rtt > delay) - ca->curr_rtt = delay; - ca->sample_cnt++; } else { if (ca->curr_rtt > ca->delay_min + From 7d21d54d624777358ab6c7be7ff778808fef70ba Mon Sep 17 00:00:00 2001 From: Neal Cardwell Date: Wed, 24 Jun 2020 12:42:03 -0400 Subject: [PATCH 450/462] bpf: tcp: bpf_cubic: fix spurious HYSTART_DELAY exit upon drop in min RTT Apply the fix from: "tcp_cubic: fix spurious HYSTART_DELAY exit upon drop in min RTT" to the BPF implementation of TCP CUBIC congestion control. Repeating the commit description here for completeness: Mirja Kuehlewind reported a bug in Linux TCP CUBIC Hystart, where Hystart HYSTART_DELAY mechanism can exit Slow Start spuriously on an ACK when the minimum rtt of a connection goes down. From inspection it is clear from the existing code that this could happen in an example like the following: o The first 8 RTT samples in a round trip are 150ms, resulting in a curr_rtt of 150ms and a delay_min of 150ms. o The 9th RTT sample is 100ms. The curr_rtt does not change after the first 8 samples, so curr_rtt remains 150ms. But delay_min can be lowered at any time, so delay_min falls to 100ms. The code executes the HYSTART_DELAY comparison between curr_rtt of 150ms and delay_min of 100ms, and the curr_rtt is declared far enough above delay_min to force a (spurious) exit of Slow start. The fix here is simple: allow every RTT sample in a round trip to lower the curr_rtt. Fixes: 6de4a9c430b5 ("bpf: tcp: Add bpf_cubic example") Reported-by: Mirja Kuehlewind Signed-off-by: Neal Cardwell Signed-off-by: Eric Dumazet Acked-by: Soheil Hassas Yeganeh Signed-off-by: David S. Miller --- tools/testing/selftests/bpf/progs/bpf_cubic.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/bpf/progs/bpf_cubic.c b/tools/testing/selftests/bpf/progs/bpf_cubic.c index 7897c8f4d363..ef574087f1e1 100644 --- a/tools/testing/selftests/bpf/progs/bpf_cubic.c +++ b/tools/testing/selftests/bpf/progs/bpf_cubic.c @@ -480,10 +480,9 @@ static __always_inline void hystart_update(struct sock *sk, __u32 delay) if (hystart_detect & HYSTART_DELAY) { /* obtain the minimum delay of more than sampling packets */ + if (ca->curr_rtt > delay) + ca->curr_rtt = delay; if (ca->sample_cnt < HYSTART_MIN_SAMPLES) { - if (ca->curr_rtt > delay) - ca->curr_rtt = delay; - ca->sample_cnt++; } else { if (ca->curr_rtt > ca->delay_min + From b6186d413b1d50f45fe2415d760947576ef3691a Mon Sep 17 00:00:00 2001 From: Briana Oursler Date: Wed, 24 Jun 2020 12:29:14 -0700 Subject: [PATCH 451/462] tc-testing: avoid action cookies with odd length. Update odd length cookie hexstrings in csum.json, tunnel_key.json and bpf.json to be even length to comply with check enforced in commit 0149dabf2a1b ("tc: m_actions: check cookie hexstring len") in iproute2. Signed-off-by: Briana Oursler Reviewed-by: Stefano Brivio Reviewed-by: Davide Caratti Signed-off-by: David S. Miller --- .../testing/selftests/tc-testing/tc-tests/actions/bpf.json | 4 ++-- .../testing/selftests/tc-testing/tc-tests/actions/csum.json | 4 ++-- .../selftests/tc-testing/tc-tests/actions/tunnel_key.json | 6 +++--- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/bpf.json b/tools/testing/selftests/tc-testing/tc-tests/actions/bpf.json index 47a3082b6661..503982b8f295 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/actions/bpf.json +++ b/tools/testing/selftests/tc-testing/tc-tests/actions/bpf.json @@ -260,10 +260,10 @@ 255 ] ], - "cmdUnderTest": "$TC action add action bpf bytecode '4,40 0 0 12,21 0 1 2054,6 0 0 262144,6 0 0 0' index 4294967296 cookie 12345", + "cmdUnderTest": "$TC action add action bpf bytecode '4,40 0 0 12,21 0 1 2054,6 0 0 262144,6 0 0 0' index 4294967296 cookie 123456", "expExitCode": "255", "verifyCmd": "$TC action ls action bpf", - "matchPattern": "action order [0-9]*: bpf bytecode '4,40 0 0 12,21 0 1 2048,6 0 0 262144,6 0 0 0' default-action pipe.*cookie 12345", + "matchPattern": "action order [0-9]*: bpf bytecode '4,40 0 0 12,21 0 1 2048,6 0 0 262144,6 0 0 0' default-action pipe.*cookie 123456", "matchCount": "0", "teardown": [ "$TC action flush action bpf" diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/csum.json b/tools/testing/selftests/tc-testing/tc-tests/actions/csum.json index 88ec134872e4..072febf25f55 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/actions/csum.json +++ b/tools/testing/selftests/tc-testing/tc-tests/actions/csum.json @@ -469,7 +469,7 @@ 255 ] ], - "cmdUnderTest": "bash -c \"for i in \\`seq 1 32\\`; do cmd=\\\"action csum tcp continue index \\$i cookie aaabbbcccdddeee \\\"; args=\"\\$args\\$cmd\"; done && $TC actions add \\$args\"", + "cmdUnderTest": "bash -c \"for i in \\`seq 1 32\\`; do cmd=\\\"action csum tcp continue index \\$i cookie 123456789abcde \\\"; args=\"\\$args\\$cmd\"; done && $TC actions add \\$args\"", "expExitCode": "0", "verifyCmd": "$TC actions ls action csum", "matchPattern": "^[ \t]+index [0-9]* ref", @@ -492,7 +492,7 @@ 1, 255 ], - "bash -c \"for i in \\`seq 1 32\\`; do cmd=\\\"action csum tcp continue index \\$i cookie aaabbbcccdddeee \\\"; args=\"\\$args\\$cmd\"; done && $TC actions add \\$args\"" + "bash -c \"for i in \\`seq 1 32\\`; do cmd=\\\"action csum tcp continue index \\$i cookie 123456789abcde \\\"; args=\"\\$args\\$cmd\"; done && $TC actions add \\$args\"" ], "cmdUnderTest": "bash -c \"for i in \\`seq 1 32\\`; do cmd=\\\"action csum index \\$i \\\"; args=\"\\$args\\$cmd\"; done && $TC actions del \\$args\"", "expExitCode": "0", diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/tunnel_key.json b/tools/testing/selftests/tc-testing/tc-tests/actions/tunnel_key.json index 7357c58fa2dc..d06346968bcb 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/actions/tunnel_key.json +++ b/tools/testing/selftests/tc-testing/tc-tests/actions/tunnel_key.json @@ -818,12 +818,12 @@ 1, 255 ], - "$TC actions add action tunnel_key set src_ip 10.10.10.1 dst_ip 20.20.20.2 dst_port 3128 nocsum id 1 index 1 cookie aabbccddeeff112233445566778800a" + "$TC actions add action tunnel_key set src_ip 10.10.10.1 dst_ip 20.20.20.2 dst_port 3128 nocsum id 1 index 1 cookie 123456" ], - "cmdUnderTest": "$TC actions replace action tunnel_key set src_ip 11.11.11.1 dst_ip 21.21.21.2 dst_port 3129 id 11 csum reclassify index 1 cookie a1b1c1d1", + "cmdUnderTest": "$TC actions replace action tunnel_key set src_ip 11.11.11.1 dst_ip 21.21.21.2 dst_port 3129 id 11 csum reclassify index 1 cookie 123456", "expExitCode": "0", "verifyCmd": "$TC actions get action tunnel_key index 1", - "matchPattern": "action order [0-9]+: tunnel_key.*set.*src_ip 11.11.11.1.*dst_ip 21.21.21.2.*key_id 11.*dst_port 3129.*csum reclassify.*index 1.*cookie a1b1c1d1", + "matchPattern": "action order [0-9]+: tunnel_key.*set.*src_ip 11.11.11.1.*dst_ip 21.21.21.2.*key_id 11.*dst_port 3129.*csum reclassify.*index 1.*cookie 123456", "matchCount": "1", "teardown": [ "$TC actions flush action tunnel_key" From 471e39df96b9a4c4ba88a2da9e25a126624d7a9c Mon Sep 17 00:00:00 2001 From: Marcelo Ricardo Leitner Date: Wed, 24 Jun 2020 17:34:18 -0300 Subject: [PATCH 452/462] sctp: Don't advertise IPv4 addresses if ipv6only is set on the socket If a socket is set ipv6only, it will still send IPv4 addresses in the INIT and INIT_ACK packets. This potentially misleads the peer into using them, which then would cause association termination. The fix is to not add IPv4 addresses to ipv6only sockets. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Reported-by: Corey Minyard Signed-off-by: Marcelo Ricardo Leitner Tested-by: Corey Minyard Signed-off-by: David S. Miller --- include/net/sctp/constants.h | 8 +++++--- net/sctp/associola.c | 5 ++++- net/sctp/bind_addr.c | 1 + net/sctp/protocol.c | 3 ++- 4 files changed, 12 insertions(+), 5 deletions(-) diff --git a/include/net/sctp/constants.h b/include/net/sctp/constants.h index 15b4d9aec7ff..122d9e2d8dfd 100644 --- a/include/net/sctp/constants.h +++ b/include/net/sctp/constants.h @@ -353,11 +353,13 @@ enum { ipv4_is_anycast_6to4(a)) /* Flags used for the bind address copy functions. */ -#define SCTP_ADDR6_ALLOWED 0x00000001 /* IPv6 address is allowed by +#define SCTP_ADDR4_ALLOWED 0x00000001 /* IPv4 address is allowed by local sock family */ -#define SCTP_ADDR4_PEERSUPP 0x00000002 /* IPv4 address is supported by +#define SCTP_ADDR6_ALLOWED 0x00000002 /* IPv6 address is allowed by + local sock family */ +#define SCTP_ADDR4_PEERSUPP 0x00000004 /* IPv4 address is supported by peer */ -#define SCTP_ADDR6_PEERSUPP 0x00000004 /* IPv6 address is supported by +#define SCTP_ADDR6_PEERSUPP 0x00000008 /* IPv6 address is supported by peer */ /* Reasons to retransmit. */ diff --git a/net/sctp/associola.c b/net/sctp/associola.c index 72315137d7e7..8d735461fa19 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -1565,12 +1565,15 @@ void sctp_assoc_rwnd_decrease(struct sctp_association *asoc, unsigned int len) int sctp_assoc_set_bind_addr_from_ep(struct sctp_association *asoc, enum sctp_scope scope, gfp_t gfp) { + struct sock *sk = asoc->base.sk; int flags; /* Use scoping rules to determine the subset of addresses from * the endpoint. */ - flags = (PF_INET6 == asoc->base.sk->sk_family) ? SCTP_ADDR6_ALLOWED : 0; + flags = (PF_INET6 == sk->sk_family) ? SCTP_ADDR6_ALLOWED : 0; + if (!inet_v6_ipv6only(sk)) + flags |= SCTP_ADDR4_ALLOWED; if (asoc->peer.ipv4_address) flags |= SCTP_ADDR4_PEERSUPP; if (asoc->peer.ipv6_address) diff --git a/net/sctp/bind_addr.c b/net/sctp/bind_addr.c index 53bc61537f44..701c5a4e441d 100644 --- a/net/sctp/bind_addr.c +++ b/net/sctp/bind_addr.c @@ -461,6 +461,7 @@ static int sctp_copy_one_addr(struct net *net, struct sctp_bind_addr *dest, * well as the remote peer. */ if ((((AF_INET == addr->sa.sa_family) && + (flags & SCTP_ADDR4_ALLOWED) && (flags & SCTP_ADDR4_PEERSUPP))) || (((AF_INET6 == addr->sa.sa_family) && (flags & SCTP_ADDR6_ALLOWED) && diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 092d1afdee0d..cde29f3c7fb3 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -148,7 +148,8 @@ int sctp_copy_local_addr_list(struct net *net, struct sctp_bind_addr *bp, * sock as well as the remote peer. */ if (addr->a.sa.sa_family == AF_INET && - !(copy_flags & SCTP_ADDR4_PEERSUPP)) + (!(copy_flags & SCTP_ADDR4_ALLOWED) || + !(copy_flags & SCTP_ADDR4_PEERSUPP))) continue; if (addr->a.sa.sa_family == AF_INET6 && (!(copy_flags & SCTP_ADDR6_ALLOWED) || From b18e9834f7b248b41e8179a039ec80803bb3d67a Mon Sep 17 00:00:00 2001 From: Roopa Prabhu Date: Wed, 24 Jun 2020 14:02:36 -0700 Subject: [PATCH 453/462] vxlan: fix last fdb index during dump of fdb with nhid This patch fixes last saved fdb index in fdb dump handler when handling fdb's with nhid. Fixes: 1274e1cc4226 ("vxlan: ecmp support for mac fdb entries") Signed-off-by: Roopa Prabhu Signed-off-by: David S. Miller --- drivers/net/vxlan.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index e8085ab6d484..89d85dcb200e 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -1380,6 +1380,8 @@ static int vxlan_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb, struct vxlan_rdst *rd; if (rcu_access_pointer(f->nh)) { + if (*idx < cb->args[2]) + goto skip_nh; err = vxlan_fdb_info(skb, vxlan, f, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, @@ -1387,6 +1389,8 @@ static int vxlan_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb, NLM_F_MULTI, NULL); if (err < 0) goto out; +skip_nh: + *idx += 1; continue; } From df08126e3833e9dca19e2407db5f5860a7c194fb Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Wed, 24 Jun 2020 16:06:03 -0600 Subject: [PATCH 454/462] wireguard: receive: account for napi_gro_receive never returning GRO_DROP The napi_gro_receive function no longer returns GRO_DROP ever, making handling GRO_DROP dead code. This commit removes that dead code. Further, it's not even clear that device drivers have any business in taking action after passing off received packets; that's arguably out of their hands. Fixes: e7096c131e51 ("net: WireGuard secure network tunnel") Fixes: 6570bc79c0df ("net: core: use listified Rx for GRO_NORMAL in napi_gro_receive()") Signed-off-by: Jason A. Donenfeld Signed-off-by: David S. Miller --- drivers/net/wireguard/receive.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/drivers/net/wireguard/receive.c b/drivers/net/wireguard/receive.c index 91438144e4f7..9b2ab6fc91cd 100644 --- a/drivers/net/wireguard/receive.c +++ b/drivers/net/wireguard/receive.c @@ -414,14 +414,8 @@ static void wg_packet_consume_data_done(struct wg_peer *peer, if (unlikely(routed_peer != peer)) goto dishonest_packet_peer; - if (unlikely(napi_gro_receive(&peer->napi, skb) == GRO_DROP)) { - ++dev->stats.rx_dropped; - net_dbg_ratelimited("%s: Failed to give packet to userspace from peer %llu (%pISpfsc)\n", - dev->name, peer->internal_id, - &peer->endpoint.addr); - } else { - update_rx_stats(peer, message_data_len(len_before_trim)); - } + napi_gro_receive(&peer->napi, skb); + update_rx_stats(peer, message_data_len(len_before_trim)); return; dishonest_packet_peer: From e5e7d8052f6140985c03bd49ebaa0af9c2944bc6 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Wed, 24 Jun 2020 16:06:04 -0600 Subject: [PATCH 455/462] socionext: account for napi_gro_receive never returning GRO_DROP The napi_gro_receive function no longer returns GRO_DROP ever, making handling GRO_DROP dead code. This commit removes that dead code. Further, it's not even clear that device drivers have any business in taking action after passing off received packets; that's arguably out of their hands. Fixes: 6570bc79c0df ("net: core: use listified Rx for GRO_NORMAL in napi_gro_receive()") Signed-off-by: Jason A. Donenfeld Signed-off-by: David S. Miller --- drivers/net/ethernet/socionext/netsec.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/socionext/netsec.c b/drivers/net/ethernet/socionext/netsec.c index 328bc38848bb..0f366cc50b74 100644 --- a/drivers/net/ethernet/socionext/netsec.c +++ b/drivers/net/ethernet/socionext/netsec.c @@ -1044,8 +1044,9 @@ static int netsec_process_rx(struct netsec_priv *priv, int budget) skb->ip_summed = CHECKSUM_UNNECESSARY; next: - if ((skb && napi_gro_receive(&priv->napi, skb) != GRO_DROP) || - xdp_result) { + if (skb) + napi_gro_receive(&priv->napi, skb); + if (skb || xdp_result) { ndev->stats.rx_packets++; ndev->stats.rx_bytes += xdp.data_end - xdp.data; } From 93ab48a97af50c7c62c3c94996002949ffebf004 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Wed, 24 Jun 2020 16:06:05 -0600 Subject: [PATCH 456/462] hns: do not cast return value of napi_gro_receive to null Basically no drivers care about the return value here, and there's no __must_check that would make casting to void sensible, so remove it. Signed-off-by: Jason A. Donenfeld Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns/hns_enet.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/hisilicon/hns/hns_enet.c b/drivers/net/ethernet/hisilicon/hns/hns_enet.c index c117074c16e3..23f278e46975 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_enet.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_enet.c @@ -699,7 +699,7 @@ static void hns_nic_rx_up_pro(struct hns_nic_ring_data *ring_data, struct net_device *ndev = ring_data->napi.dev; skb->protocol = eth_type_trans(skb, ndev); - (void)napi_gro_receive(&ring_data->napi, skb); + napi_gro_receive(&ring_data->napi, skb); } static int hns_desc_unused(struct hnae_ring *ring) From 045790b7bc66a75070c112a61558c639cef2263e Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Wed, 24 Jun 2020 16:06:06 -0600 Subject: [PATCH 457/462] wil6210: account for napi_gro_receive never returning GRO_DROP The napi_gro_receive function no longer returns GRO_DROP ever, making handling GRO_DROP dead code. This commit removes that dead code. Further, it's not even clear that device drivers have any business in taking action after passing off received packets; that's arguably out of their hands. In this case, too, the non-gro path didn't bother checking the return value. Plus, this had some clunky debugging functions that duplicated code from elsewhere and was generally pretty messy. So, this commit cleans that all up too. Fixes: 6570bc79c0df ("net: core: use listified Rx for GRO_NORMAL in napi_gro_receive()") Signed-off-by: Jason A. Donenfeld Signed-off-by: David S. Miller --- drivers/net/wireless/ath/wil6210/txrx.c | 39 +++++++------------------ 1 file changed, 11 insertions(+), 28 deletions(-) diff --git a/drivers/net/wireless/ath/wil6210/txrx.c b/drivers/net/wireless/ath/wil6210/txrx.c index bc8c15fb609d..080e5aa60bea 100644 --- a/drivers/net/wireless/ath/wil6210/txrx.c +++ b/drivers/net/wireless/ath/wil6210/txrx.c @@ -897,7 +897,6 @@ static void wil_rx_handle_eapol(struct wil6210_vif *vif, struct sk_buff *skb) void wil_netif_rx(struct sk_buff *skb, struct net_device *ndev, int cid, struct wil_net_stats *stats, bool gro) { - gro_result_t rc = GRO_NORMAL; struct wil6210_vif *vif = ndev_to_vif(ndev); struct wil6210_priv *wil = ndev_to_wil(ndev); struct wireless_dev *wdev = vif_to_wdev(vif); @@ -908,22 +907,16 @@ void wil_netif_rx(struct sk_buff *skb, struct net_device *ndev, int cid, */ int mcast = is_multicast_ether_addr(da); struct sk_buff *xmit_skb = NULL; - static const char * const gro_res_str[] = { - [GRO_MERGED] = "GRO_MERGED", - [GRO_MERGED_FREE] = "GRO_MERGED_FREE", - [GRO_HELD] = "GRO_HELD", - [GRO_NORMAL] = "GRO_NORMAL", - [GRO_DROP] = "GRO_DROP", - [GRO_CONSUMED] = "GRO_CONSUMED", - }; if (wdev->iftype == NL80211_IFTYPE_STATION) { sa = wil_skb_get_sa(skb); if (mcast && ether_addr_equal(sa, ndev->dev_addr)) { /* mcast packet looped back to us */ - rc = GRO_DROP; dev_kfree_skb(skb); - goto stats; + ndev->stats.rx_dropped++; + stats->rx_dropped++; + wil_dbg_txrx(wil, "Rx drop %d bytes\n", len); + return; } } else if (wdev->iftype == NL80211_IFTYPE_AP && !vif->ap_isolate) { if (mcast) { @@ -967,26 +960,16 @@ void wil_netif_rx(struct sk_buff *skb, struct net_device *ndev, int cid, wil_rx_handle_eapol(vif, skb); if (gro) - rc = napi_gro_receive(&wil->napi_rx, skb); + napi_gro_receive(&wil->napi_rx, skb); else netif_rx_ni(skb); - wil_dbg_txrx(wil, "Rx complete %d bytes => %s\n", - len, gro_res_str[rc]); - } -stats: - /* statistics. rc set to GRO_NORMAL for AP bridging */ - if (unlikely(rc == GRO_DROP)) { - ndev->stats.rx_dropped++; - stats->rx_dropped++; - wil_dbg_txrx(wil, "Rx drop %d bytes\n", len); - } else { - ndev->stats.rx_packets++; - stats->rx_packets++; - ndev->stats.rx_bytes += len; - stats->rx_bytes += len; - if (mcast) - ndev->stats.multicast++; } + ndev->stats.rx_packets++; + stats->rx_packets++; + ndev->stats.rx_bytes += len; + stats->rx_bytes += len; + if (mcast) + ndev->stats.multicast++; } void wil_netif_rx_any(struct sk_buff *skb, struct net_device *ndev) From 1ae71d997a672739b22572cbe72d61a0eed8c42c Mon Sep 17 00:00:00 2001 From: Michal Kubecek Date: Thu, 25 Jun 2020 00:09:08 +0200 Subject: [PATCH 458/462] ethtool: fix error handling in linkstate_prepare_data() When getting SQI or maximum SQI value fails in linkstate_prepare_data(), we must not return without calling ethnl_ops_complete(dev) as that could result in imbalance between ethtool_ops ->begin() and ->complete() calls. Fixes: 806602191592 ("ethtool: provide UAPI for PHY Signal Quality Index (SQI)") Signed-off-by: Michal Kubecek Signed-off-by: David S. Miller --- net/ethtool/linkstate.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/net/ethtool/linkstate.c b/net/ethtool/linkstate.c index 7f47ba89054e..afe5ac8a0f00 100644 --- a/net/ethtool/linkstate.c +++ b/net/ethtool/linkstate.c @@ -78,19 +78,18 @@ static int linkstate_prepare_data(const struct ethnl_req_info *req_base, ret = linkstate_get_sqi(dev); if (ret < 0 && ret != -EOPNOTSUPP) - return ret; - + goto out; data->sqi = ret; ret = linkstate_get_sqi_max(dev); if (ret < 0 && ret != -EOPNOTSUPP) - return ret; - + goto out; data->sqi_max = ret; + ret = 0; +out: ethnl_ops_complete(dev); - - return 0; + return ret; } static int linkstate_reply_size(const struct ethnl_req_info *req_base, From 9208d2863ac689a563b92f2161d8d1e7127d0add Mon Sep 17 00:00:00 2001 From: Ilya Ponetayev Date: Thu, 25 Jun 2020 22:12:07 +0200 Subject: [PATCH 459/462] sch_cake: don't try to reallocate or unshare skb unconditionally MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit cake_handle_diffserv() tries to linearize mac and network header parts of skb and to make it writable unconditionally. In some cases it leads to full skb reallocation, which reduces throughput and increases CPU load. Some measurements of IPv4 forward + NAPT on MIPS router with 580 MHz single-core CPU was conducted. It appears that on kernel 4.9 skb_try_make_writable() reallocates skb, if skb was allocated in ethernet driver via so-called 'build skb' method from page cache (it was discovered by strange increase of kmalloc-2048 slab at first). Obtain DSCP value via read-only skb_header_pointer() call, and leave linearization only for DSCP bleaching or ECN CE setting. And, as an additional optimisation, skip diffserv parsing entirely if it is not needed by the current configuration. Fixes: c87b4ecdbe8d ("sch_cake: Make sure we can write the IP header before changing DSCP bits") Signed-off-by: Ilya Ponetayev [ fix a few style issues, reflow commit message ] Signed-off-by: Toke Høiland-Jørgensen Signed-off-by: David S. Miller --- net/sched/sch_cake.c | 41 ++++++++++++++++++++++++++++++----------- 1 file changed, 30 insertions(+), 11 deletions(-) diff --git a/net/sched/sch_cake.c b/net/sched/sch_cake.c index 60f8ae578819..cae006bef565 100644 --- a/net/sched/sch_cake.c +++ b/net/sched/sch_cake.c @@ -1553,30 +1553,49 @@ static unsigned int cake_drop(struct Qdisc *sch, struct sk_buff **to_free) static u8 cake_handle_diffserv(struct sk_buff *skb, u16 wash) { - int wlen = skb_network_offset(skb); + const int offset = skb_network_offset(skb); + u16 *buf, buf_; u8 dscp; switch (tc_skb_protocol(skb)) { case htons(ETH_P_IP): - wlen += sizeof(struct iphdr); - if (!pskb_may_pull(skb, wlen) || - skb_try_make_writable(skb, wlen)) + buf = skb_header_pointer(skb, offset, sizeof(buf_), &buf_); + if (unlikely(!buf)) return 0; - dscp = ipv4_get_dsfield(ip_hdr(skb)) >> 2; - if (wash && dscp) + /* ToS is in the second byte of iphdr */ + dscp = ipv4_get_dsfield((struct iphdr *)buf) >> 2; + + if (wash && dscp) { + const int wlen = offset + sizeof(struct iphdr); + + if (!pskb_may_pull(skb, wlen) || + skb_try_make_writable(skb, wlen)) + return 0; + ipv4_change_dsfield(ip_hdr(skb), INET_ECN_MASK, 0); + } + return dscp; case htons(ETH_P_IPV6): - wlen += sizeof(struct ipv6hdr); - if (!pskb_may_pull(skb, wlen) || - skb_try_make_writable(skb, wlen)) + buf = skb_header_pointer(skb, offset, sizeof(buf_), &buf_); + if (unlikely(!buf)) return 0; - dscp = ipv6_get_dsfield(ipv6_hdr(skb)) >> 2; - if (wash && dscp) + /* Traffic class is in the first and second bytes of ipv6hdr */ + dscp = ipv6_get_dsfield((struct ipv6hdr *)buf) >> 2; + + if (wash && dscp) { + const int wlen = offset + sizeof(struct ipv6hdr); + + if (!pskb_may_pull(skb, wlen) || + skb_try_make_writable(skb, wlen)) + return 0; + ipv6_change_dsfield(ipv6_hdr(skb), INET_ECN_MASK, 0); + } + return dscp; case htons(ETH_P_ARP): From 8c95eca0bb8c4bd2231a0d581f1ad0d50c90488c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= Date: Thu, 25 Jun 2020 22:12:08 +0200 Subject: [PATCH 460/462] sch_cake: don't call diffserv parsing code when it is not needed MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As a further optimisation of the diffserv parsing codepath, we can skip it entirely if CAKE is configured to neither use diffserv-based classification, nor to zero out the diffserv bits. Fixes: c87b4ecdbe8d ("sch_cake: Make sure we can write the IP header before changing DSCP bits") Signed-off-by: Toke Høiland-Jørgensen Signed-off-by: David S. Miller --- net/sched/sch_cake.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/net/sched/sch_cake.c b/net/sched/sch_cake.c index cae006bef565..094d6e652deb 100644 --- a/net/sched/sch_cake.c +++ b/net/sched/sch_cake.c @@ -1551,7 +1551,7 @@ static unsigned int cake_drop(struct Qdisc *sch, struct sk_buff **to_free) return idx + (tin << 16); } -static u8 cake_handle_diffserv(struct sk_buff *skb, u16 wash) +static u8 cake_handle_diffserv(struct sk_buff *skb, bool wash) { const int offset = skb_network_offset(skb); u16 *buf, buf_; @@ -1612,14 +1612,17 @@ static struct cake_tin_data *cake_select_tin(struct Qdisc *sch, { struct cake_sched_data *q = qdisc_priv(sch); u32 tin, mark; + bool wash; u8 dscp; /* Tin selection: Default to diffserv-based selection, allow overriding - * using firewall marks or skb->priority. + * using firewall marks or skb->priority. Call DSCP parsing early if + * wash is enabled, otherwise defer to below to skip unneeded parsing. */ - dscp = cake_handle_diffserv(skb, - q->rate_flags & CAKE_FLAG_WASH); mark = (skb->mark & q->fwmark_mask) >> q->fwmark_shft; + wash = !!(q->rate_flags & CAKE_FLAG_WASH); + if (wash) + dscp = cake_handle_diffserv(skb, wash); if (q->tin_mode == CAKE_DIFFSERV_BESTEFFORT) tin = 0; @@ -1633,6 +1636,8 @@ static struct cake_tin_data *cake_select_tin(struct Qdisc *sch, tin = q->tin_order[TC_H_MIN(skb->priority) - 1]; else { + if (!wash) + dscp = cake_handle_diffserv(skb, wash); tin = q->tin_index[dscp]; if (unlikely(tin >= q->tin_cnt)) From 3f608f0c41360b11b04c763f348b712f651c8bac Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= Date: Thu, 25 Jun 2020 22:12:09 +0200 Subject: [PATCH 461/462] sch_cake: fix a few style nits MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit I spotted a few nits when comparing the in-tree version of sch_cake with the out-of-tree one: A redundant error variable declaration shadowing an outer declaration, and an indentation alignment issue. Fix both of these. Fixes: 046f6fd5daef ("sched: Add Common Applications Kept Enhanced (cake) qdisc") Signed-off-by: Toke Høiland-Jørgensen Signed-off-by: David S. Miller --- net/sched/sch_cake.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/sched/sch_cake.c b/net/sched/sch_cake.c index 094d6e652deb..ca813697728e 100644 --- a/net/sched/sch_cake.c +++ b/net/sched/sch_cake.c @@ -2715,7 +2715,7 @@ static int cake_init(struct Qdisc *sch, struct nlattr *opt, qdisc_watchdog_init(&q->watchdog, sch); if (opt) { - int err = cake_change(sch, opt, extack); + err = cake_change(sch, opt, extack); if (err) return err; @@ -3032,7 +3032,7 @@ static int cake_dump_class_stats(struct Qdisc *sch, unsigned long cl, PUT_STAT_S32(BLUE_TIMER_US, ktime_to_us( ktime_sub(now, - flow->cvars.blue_timer))); + flow->cvars.blue_timer))); } if (flow->cvars.dropping) { PUT_STAT_S32(DROP_NEXT_US, From 4c342f778fe234e0c2a2601d87fec8ba42f0d2c6 Mon Sep 17 00:00:00 2001 From: Rao Shoaib Date: Thu, 25 Jun 2020 13:46:00 -0700 Subject: [PATCH 462/462] rds: transport module should be auto loaded when transport is set MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This enhancement auto loads transport module when the transport is set via SO_RDS_TRANSPORT socket option. Reviewed-by: Ka-Cheong Poon Reviewed-by: Håkon Bugge Signed-off-by: Rao Shoaib Signed-off-by: Somasundaram Krishnasamy Signed-off-by: David S. Miller --- include/uapi/linux/rds.h | 4 +++- net/rds/transport.c | 26 +++++++++++++++++--------- 2 files changed, 20 insertions(+), 10 deletions(-) diff --git a/include/uapi/linux/rds.h b/include/uapi/linux/rds.h index cba368e55863..c21edb966c19 100644 --- a/include/uapi/linux/rds.h +++ b/include/uapi/linux/rds.h @@ -64,10 +64,12 @@ /* supported values for SO_RDS_TRANSPORT */ #define RDS_TRANS_IB 0 -#define RDS_TRANS_IWARP 1 +#define RDS_TRANS_GAP 1 #define RDS_TRANS_TCP 2 #define RDS_TRANS_COUNT 3 #define RDS_TRANS_NONE (~0) +/* don't use RDS_TRANS_IWARP - it is deprecated */ +#define RDS_TRANS_IWARP RDS_TRANS_GAP /* IOCTLS commands for SOL_RDS */ #define SIOCRDSSETTOS (SIOCPROTOPRIVATE) diff --git a/net/rds/transport.c b/net/rds/transport.c index 46f709a4b577..f8001ec80867 100644 --- a/net/rds/transport.c +++ b/net/rds/transport.c @@ -38,6 +38,12 @@ #include "rds.h" #include "loop.h" +static char * const rds_trans_modules[] = { + [RDS_TRANS_IB] = "rds_rdma", + [RDS_TRANS_GAP] = NULL, + [RDS_TRANS_TCP] = "rds_tcp", +}; + static struct rds_transport *transports[RDS_TRANS_COUNT]; static DECLARE_RWSEM(rds_trans_sem); @@ -110,18 +116,20 @@ struct rds_transport *rds_trans_get(int t_type) { struct rds_transport *ret = NULL; struct rds_transport *trans; - unsigned int i; down_read(&rds_trans_sem); - for (i = 0; i < RDS_TRANS_COUNT; i++) { - trans = transports[i]; - - if (trans && trans->t_type == t_type && - (!trans->t_owner || try_module_get(trans->t_owner))) { - ret = trans; - break; - } + trans = transports[t_type]; + if (!trans) { + up_read(&rds_trans_sem); + if (rds_trans_modules[t_type]) + request_module(rds_trans_modules[t_type]); + down_read(&rds_trans_sem); + trans = transports[t_type]; } + if (trans && trans->t_type == t_type && + (!trans->t_owner || try_module_get(trans->t_owner))) + ret = trans; + up_read(&rds_trans_sem); return ret;