From 406de7555424d93849166684d0bd172743d2a30c Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Fri, 26 Jan 2018 01:36:27 +0200 Subject: [PATCH 01/12] ptr_ring: keep consumer_head valid at all times The comment near __ptr_ring_peek says: * If ring is never resized, and if the pointer is merely * tested, there's no need to take the lock - see e.g. __ptr_ring_empty. but this was in fact never possible since consumer_head would sometimes point outside the ring. Refactor the code so that it's always pointing within a ring. Fixes: c5ad119fb6c09 ("net: sched: pfifo_fast use skb_array") Signed-off-by: Michael S. Tsirkin Acked-by: John Fastabend Signed-off-by: David S. Miller --- include/linux/ptr_ring.h | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/include/linux/ptr_ring.h b/include/linux/ptr_ring.h index 9ca1726ff963..5ebcdd40df99 100644 --- a/include/linux/ptr_ring.h +++ b/include/linux/ptr_ring.h @@ -248,22 +248,28 @@ static inline void __ptr_ring_discard_one(struct ptr_ring *r) /* Fundamentally, what we want to do is update consumer * index and zero out the entry so producer can reuse it. * Doing it naively at each consume would be as simple as: - * r->queue[r->consumer++] = NULL; - * if (unlikely(r->consumer >= r->size)) - * r->consumer = 0; + * consumer = r->consumer; + * r->queue[consumer++] = NULL; + * if (unlikely(consumer >= r->size)) + * consumer = 0; + * r->consumer = consumer; * but that is suboptimal when the ring is full as producer is writing * out new entries in the same cache line. Defer these updates until a * batch of entries has been consumed. */ - int head = r->consumer_head++; + /* Note: we must keep consumer_head valid at all times for __ptr_ring_empty + * to work correctly. + */ + int consumer_head = r->consumer_head; + int head = consumer_head++; /* Once we have processed enough entries invalidate them in * the ring all at once so producer can reuse their space in the ring. * We also do this when we reach end of the ring - not mandatory * but helps keep the implementation simple. */ - if (unlikely(r->consumer_head - r->consumer_tail >= r->batch || - r->consumer_head >= r->size)) { + if (unlikely(consumer_head - r->consumer_tail >= r->batch || + consumer_head >= r->size)) { /* Zero out entries in the reverse order: this way we touch the * cache line that producer might currently be reading the last; * producer won't make progress and touch other cache lines @@ -271,12 +277,13 @@ static inline void __ptr_ring_discard_one(struct ptr_ring *r) */ while (likely(head >= r->consumer_tail)) r->queue[head--] = NULL; - r->consumer_tail = r->consumer_head; + r->consumer_tail = consumer_head; } - if (unlikely(r->consumer_head >= r->size)) { - r->consumer_head = 0; + if (unlikely(consumer_head >= r->size)) { + consumer_head = 0; r->consumer_tail = 0; } + r->consumer_head = consumer_head; } static inline void *__ptr_ring_consume(struct ptr_ring *r) From 8619d384eb5e9256a9d4ebe96c1832ac9b9049d6 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Fri, 26 Jan 2018 01:36:28 +0200 Subject: [PATCH 02/12] ptr_ring: clean up documentation The only function safe to call without locks is __ptr_ring_empty. Move documentation about lockless use there to make sure people do not try to use __ptr_ring_peek outside locks. Signed-off-by: Michael S. Tsirkin Signed-off-by: David S. Miller --- include/linux/ptr_ring.h | 34 ++++++++++++++++++---------------- 1 file changed, 18 insertions(+), 16 deletions(-) diff --git a/include/linux/ptr_ring.h b/include/linux/ptr_ring.h index 5ebcdd40df99..8594c7b37c15 100644 --- a/include/linux/ptr_ring.h +++ b/include/linux/ptr_ring.h @@ -169,21 +169,6 @@ static inline int ptr_ring_produce_bh(struct ptr_ring *r, void *ptr) return ret; } -/* Note: callers invoking this in a loop must use a compiler barrier, - * for example cpu_relax(). Callers must take consumer_lock - * if they dereference the pointer - see e.g. PTR_RING_PEEK_CALL. - * If ring is never resized, and if the pointer is merely - * tested, there's no need to take the lock - see e.g. __ptr_ring_empty. - * However, if called outside the lock, and if some other CPU - * consumes ring entries at the same time, the value returned - * is not guaranteed to be correct. - * In this case - to avoid incorrectly detecting the ring - * as empty - the CPU consuming the ring entries is responsible - * for either consuming all ring entries until the ring is empty, - * or synchronizing with some other CPU and causing it to - * execute __ptr_ring_peek and/or consume the ring enteries - * after the synchronization point. - */ static inline void *__ptr_ring_peek(struct ptr_ring *r) { if (likely(r->size)) @@ -191,7 +176,24 @@ static inline void *__ptr_ring_peek(struct ptr_ring *r) return NULL; } -/* See __ptr_ring_peek above for locking rules. */ +/* + * Test ring empty status without taking any locks. + * + * NB: This is only safe to call if ring is never resized. + * + * However, if some other CPU consumes ring entries at the same time, the value + * returned is not guaranteed to be correct. + * + * In this case - to avoid incorrectly detecting the ring + * as empty - the CPU consuming the ring entries is responsible + * for either consuming all ring entries until the ring is empty, + * or synchronizing with some other CPU and causing it to + * re-test __ptr_ring_empty and/or consume the ring enteries + * after the synchronization point. + * + * Note: callers invoking this in a loop must use a compiler barrier, + * for example cpu_relax(). + */ static inline bool __ptr_ring_empty(struct ptr_ring *r) { return !__ptr_ring_peek(r); From a259df36d1fbf25f0e4f649fdb84e4527e5640ed Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Fri, 26 Jan 2018 01:36:29 +0200 Subject: [PATCH 03/12] ptr_ring: READ/WRITE_ONCE for __ptr_ring_empty Lockless __ptr_ring_empty requires that consumer head is read and written at once, atomically. Annotate accordingly to make sure compiler does it correctly. Switch locked callers to __ptr_ring_peek which does not support the lockless operation. Signed-off-by: Michael S. Tsirkin Signed-off-by: David S. Miller --- include/linux/ptr_ring.h | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/include/linux/ptr_ring.h b/include/linux/ptr_ring.h index 8594c7b37c15..9a72d8fec50e 100644 --- a/include/linux/ptr_ring.h +++ b/include/linux/ptr_ring.h @@ -196,7 +196,9 @@ static inline void *__ptr_ring_peek(struct ptr_ring *r) */ static inline bool __ptr_ring_empty(struct ptr_ring *r) { - return !__ptr_ring_peek(r); + if (likely(r->size)) + return !r->queue[READ_ONCE(r->consumer_head)]; + return true; } static inline bool ptr_ring_empty(struct ptr_ring *r) @@ -285,7 +287,8 @@ static inline void __ptr_ring_discard_one(struct ptr_ring *r) consumer_head = 0; r->consumer_tail = 0; } - r->consumer_head = consumer_head; + /* matching READ_ONCE in __ptr_ring_empty for lockless tests */ + WRITE_ONCE(r->consumer_head, consumer_head); } static inline void *__ptr_ring_consume(struct ptr_ring *r) @@ -541,7 +544,9 @@ static inline void ptr_ring_unconsume(struct ptr_ring *r, void **batch, int n, goto done; } r->queue[head] = batch[--n]; - r->consumer_tail = r->consumer_head = head; + r->consumer_tail = head; + /* matching READ_ONCE in __ptr_ring_empty for lockless tests */ + WRITE_ONCE(r->consumer_head, head); } done: From 88fae87327a2261cf8db078f6ce4e5a3e55b30b1 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Fri, 26 Jan 2018 01:36:31 +0200 Subject: [PATCH 04/12] tap: fix use-after-free Lockless access to __ptr_ring_full is only legal if ring is never resized, otherwise it might cause use-after free errors. Simply drop the lockless test, we'll drop the packet a bit later when produce fails. Fixes: 362899b8 ("macvtap: switch to use skb array") Signed-off-by: Michael S. Tsirkin Signed-off-by: David S. Miller --- drivers/net/tap.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/net/tap.c b/drivers/net/tap.c index 7c38659b2a76..77872699c45d 100644 --- a/drivers/net/tap.c +++ b/drivers/net/tap.c @@ -330,9 +330,6 @@ rx_handler_result_t tap_handle_frame(struct sk_buff **pskb) if (!q) return RX_HANDLER_PASS; - if (__ptr_ring_full(&q->ring)) - goto drop; - skb_push(skb, ETH_HLEN); /* Apply the forward feature mask so that we perform segmentation From 84328342a70a44379dd73011a44c5f5e00481a42 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Fri, 26 Jan 2018 01:36:32 +0200 Subject: [PATCH 05/12] ptr_ring: disallow lockless __ptr_ring_full Similar to bcecb4bbf88a ("net: ptr_ring: otherwise safe empty checks can overrun array bounds") a lockless use of __ptr_ring_full might cause an out of bounds access. We can fix this, but it's easier to just disallow lockless __ptr_ring_full for now. Signed-off-by: Michael S. Tsirkin Signed-off-by: David S. Miller --- include/linux/ptr_ring.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/include/linux/ptr_ring.h b/include/linux/ptr_ring.h index 9a72d8fec50e..f17584680b79 100644 --- a/include/linux/ptr_ring.h +++ b/include/linux/ptr_ring.h @@ -45,9 +45,10 @@ struct ptr_ring { }; /* Note: callers invoking this in a loop must use a compiler barrier, - * for example cpu_relax(). If ring is ever resized, callers must hold - * producer_lock - see e.g. ptr_ring_full. Otherwise, if callers don't hold - * producer_lock, the next call to __ptr_ring_produce may fail. + * for example cpu_relax(). + * + * NB: this is unlike __ptr_ring_empty in that callers must hold producer_lock: + * see e.g. ptr_ring_full. */ static inline bool __ptr_ring_full(struct ptr_ring *r) { From 9fb582b67072bea6cbfe1aefc2be13c62c7681bf Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Fri, 26 Jan 2018 01:36:35 +0200 Subject: [PATCH 06/12] Revert "net: ptr_ring: otherwise safe empty checks can overrun array bounds" This reverts commit bcecb4bbf88aa03171c30652bca761cf27755a6b. If we try to allocate an extra entry as the above commit did, and when the requested size is UINT_MAX, addition overflows causing zero size to be passed to kmalloc(). kmalloc then returns ZERO_SIZE_PTR with a subsequent crash. Reported-by: syzbot+87678bcf753b44c39b67@syzkaller.appspotmail.com Cc: John Fastabend Signed-off-by: Michael S. Tsirkin Acked-by: John Fastabend Signed-off-by: David S. Miller --- include/linux/ptr_ring.h | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/include/linux/ptr_ring.h b/include/linux/ptr_ring.h index f17584680b79..3a19ebdcef14 100644 --- a/include/linux/ptr_ring.h +++ b/include/linux/ptr_ring.h @@ -466,12 +466,7 @@ static inline int ptr_ring_consume_batched_bh(struct ptr_ring *r, static inline void **__ptr_ring_init_queue_alloc(unsigned int size, gfp_t gfp) { - /* Allocate an extra dummy element at end of ring to avoid consumer head - * or produce head access past the end of the array. Possible when - * producer/consumer operations and __ptr_ring_peek operations run in - * parallel. - */ - return kcalloc(size + 1, sizeof(void *), gfp); + return kcalloc(size, sizeof(void *), gfp); } static inline void __ptr_ring_set_size(struct ptr_ring *r, int size) From f417dc28185d7fdce567821862fcbf9222058cb7 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Fri, 26 Jan 2018 01:36:36 +0200 Subject: [PATCH 07/12] skb_array: use __ptr_ring_empty __skb_array_empty should use __ptr_ring_empty since that's the only legal lockless function. Signed-off-by: Michael S. Tsirkin Signed-off-by: David S. Miller --- include/linux/skb_array.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/skb_array.h b/include/linux/skb_array.h index c7addf37d119..a6b6e8bb3d7b 100644 --- a/include/linux/skb_array.h +++ b/include/linux/skb_array.h @@ -69,7 +69,7 @@ static inline int skb_array_produce_any(struct skb_array *a, struct sk_buff *skb */ static inline bool __skb_array_empty(struct skb_array *a) { - return !__ptr_ring_peek(&a->ring); + return __ptr_ring_empty(&a->ring); } static inline struct sk_buff *__skb_array_peek(struct skb_array *a) From a07d29c6724a19eab120b7a74a9bfd107d20f69a Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Fri, 26 Jan 2018 01:36:38 +0200 Subject: [PATCH 08/12] ptr_ring: prevent queue load/store tearing In theory compiler could tear queue loads or stores in two. It does not seem to be happening in practice but it seems easier to convert the cases where this would be a problem to READ/WRITE_ONCE than worry about it. Signed-off-by: Michael S. Tsirkin Signed-off-by: David S. Miller --- include/linux/ptr_ring.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/ptr_ring.h b/include/linux/ptr_ring.h index 3a19ebdcef14..1883d6137e9b 100644 --- a/include/linux/ptr_ring.h +++ b/include/linux/ptr_ring.h @@ -114,7 +114,7 @@ static inline int __ptr_ring_produce(struct ptr_ring *r, void *ptr) /* Pairs with smp_read_barrier_depends in __ptr_ring_consume. */ smp_wmb(); - r->queue[r->producer++] = ptr; + WRITE_ONCE(r->queue[r->producer++], ptr); if (unlikely(r->producer >= r->size)) r->producer = 0; return 0; @@ -173,7 +173,7 @@ static inline int ptr_ring_produce_bh(struct ptr_ring *r, void *ptr) static inline void *__ptr_ring_peek(struct ptr_ring *r) { if (likely(r->size)) - return r->queue[r->consumer_head]; + return READ_ONCE(r->queue[r->consumer_head]); return NULL; } From 30f1d370744cc35f26d78a1dd31aeb0e4be93c38 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Fri, 26 Jan 2018 01:36:39 +0200 Subject: [PATCH 09/12] tools/virtio: switch to __ptr_ring_empty We don't rely on lockless guarantees, but it seems cleaner than inverting __ptr_ring_peek. Signed-off-by: Michael S. Tsirkin Signed-off-by: David S. Miller --- tools/virtio/ringtest/ptr_ring.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/virtio/ringtest/ptr_ring.c b/tools/virtio/ringtest/ptr_ring.c index e6e81305ef46..477899c12c51 100644 --- a/tools/virtio/ringtest/ptr_ring.c +++ b/tools/virtio/ringtest/ptr_ring.c @@ -187,7 +187,7 @@ bool enable_kick() bool avail_empty() { - return !__ptr_ring_peek(&array); + return __ptr_ring_empty(&array); } bool use_buf(unsigned *lenp, void **bufp) From 6dd42157830d875bdd3c6fefd05cbcf0875b51e8 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Fri, 26 Jan 2018 01:36:40 +0200 Subject: [PATCH 10/12] tools/virtio: more stubs to fix tools build Signed-off-by: Michael S. Tsirkin Signed-off-by: David S. Miller --- tools/virtio/linux/kernel.h | 2 +- tools/virtio/linux/thread_info.h | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) create mode 100644 tools/virtio/linux/thread_info.h diff --git a/tools/virtio/linux/kernel.h b/tools/virtio/linux/kernel.h index 395521a7a8d8..fca8381bbe04 100644 --- a/tools/virtio/linux/kernel.h +++ b/tools/virtio/linux/kernel.h @@ -118,7 +118,7 @@ static inline void free_page(unsigned long addr) #define dev_err(dev, format, ...) fprintf (stderr, format, ## __VA_ARGS__) #define dev_warn(dev, format, ...) fprintf (stderr, format, ## __VA_ARGS__) -#define WARN_ON_ONCE(cond) ((cond) && fprintf (stderr, "WARNING\n")) +#define WARN_ON_ONCE(cond) ((cond) ? fprintf (stderr, "WARNING\n") : 0) #define min(x, y) ({ \ typeof(x) _min1 = (x); \ diff --git a/tools/virtio/linux/thread_info.h b/tools/virtio/linux/thread_info.h new file mode 100644 index 000000000000..e0f610d08006 --- /dev/null +++ b/tools/virtio/linux/thread_info.h @@ -0,0 +1 @@ +#define check_copy_size(A, B, C) (1) From b4eab7de6685ee2691a7e297d511a126dbf53207 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Fri, 26 Jan 2018 01:36:42 +0200 Subject: [PATCH 11/12] tools/virtio: copy READ/WRITE_ONCE This is to make ptr_ring test build again. Signed-off-by: Michael S. Tsirkin Signed-off-by: David S. Miller --- tools/virtio/ringtest/main.h | 57 ++++++++++++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) diff --git a/tools/virtio/ringtest/main.h b/tools/virtio/ringtest/main.h index 5706e075adf2..593a3289c87d 100644 --- a/tools/virtio/ringtest/main.h +++ b/tools/virtio/ringtest/main.h @@ -134,4 +134,61 @@ static inline void busy_wait(void) barrier(); \ } while (0) +#if defined(__i386__) || defined(__x86_64__) || defined(__s390x__) +#define smp_wmb() barrier() +#else +#define smp_wmb() smp_release() +#endif + +#ifdef __alpha__ +#define smp_read_barrier_depends() smp_acquire() +#else +#define smp_read_barrier_depends() do {} while(0) +#endif + +static __always_inline +void __read_once_size(const volatile void *p, void *res, int size) +{ + switch (size) { \ + case 1: *(unsigned char *)res = *(volatile unsigned char *)p; break; \ + case 2: *(unsigned short *)res = *(volatile unsigned short *)p; break; \ + case 4: *(unsigned int *)res = *(volatile unsigned int *)p; break; \ + case 8: *(unsigned long long *)res = *(volatile unsigned long long *)p; break; \ + default: \ + barrier(); \ + __builtin_memcpy((void *)res, (const void *)p, size); \ + barrier(); \ + } \ +} + +static __always_inline void __write_once_size(volatile void *p, void *res, int size) +{ + switch (size) { + case 1: *(volatile unsigned char *)p = *(unsigned char *)res; break; + case 2: *(volatile unsigned short *)p = *(unsigned short *)res; break; + case 4: *(volatile unsigned int *)p = *(unsigned int *)res; break; + case 8: *(volatile unsigned long long *)p = *(unsigned long long *)res; break; + default: + barrier(); + __builtin_memcpy((void *)p, (const void *)res, size); + barrier(); + } +} + +#define READ_ONCE(x) \ +({ \ + union { typeof(x) __val; char __c[1]; } __u; \ + __read_once_size(&(x), __u.__c, sizeof(x)); \ + smp_read_barrier_depends(); /* Enforce dependency ordering from x */ \ + __u.__val; \ +}) + +#define WRITE_ONCE(x, val) \ +({ \ + union { typeof(x) __val; char __c[1]; } __u = \ + { .__val = (typeof(x)) (val) }; \ + __write_once_size(&(x), __u.__c, sizeof(x)); \ + __u.__val; \ +}) + #endif From 491847f3b29cef0417a03142b96e2a6dea81cca0 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Fri, 26 Jan 2018 01:36:44 +0200 Subject: [PATCH 12/12] tools/virtio: fix smp_mb on x86 Offset 128 overlaps the last word of the redzone. Use 132 which is always beyond that. Signed-off-by: Michael S. Tsirkin Signed-off-by: David S. Miller --- tools/virtio/ringtest/main.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/virtio/ringtest/main.h b/tools/virtio/ringtest/main.h index 593a3289c87d..301d59bfcd0a 100644 --- a/tools/virtio/ringtest/main.h +++ b/tools/virtio/ringtest/main.h @@ -111,7 +111,7 @@ static inline void busy_wait(void) } #if defined(__x86_64__) || defined(__i386__) -#define smp_mb() asm volatile("lock; addl $0,-128(%%rsp)" ::: "memory", "cc") +#define smp_mb() asm volatile("lock; addl $0,-132(%%rsp)" ::: "memory", "cc") #else /* * Not using __ATOMIC_SEQ_CST since gcc docs say they are only synchronized