diff --git a/drivers/net/tap.c b/drivers/net/tap.c index 7c38659b2a76..77872699c45d 100644 --- a/drivers/net/tap.c +++ b/drivers/net/tap.c @@ -330,9 +330,6 @@ rx_handler_result_t tap_handle_frame(struct sk_buff **pskb) if (!q) return RX_HANDLER_PASS; - if (__ptr_ring_full(&q->ring)) - goto drop; - skb_push(skb, ETH_HLEN); /* Apply the forward feature mask so that we perform segmentation diff --git a/include/linux/ptr_ring.h b/include/linux/ptr_ring.h index 9ca1726ff963..1883d6137e9b 100644 --- a/include/linux/ptr_ring.h +++ b/include/linux/ptr_ring.h @@ -45,9 +45,10 @@ struct ptr_ring { }; /* Note: callers invoking this in a loop must use a compiler barrier, - * for example cpu_relax(). If ring is ever resized, callers must hold - * producer_lock - see e.g. ptr_ring_full. Otherwise, if callers don't hold - * producer_lock, the next call to __ptr_ring_produce may fail. + * for example cpu_relax(). + * + * NB: this is unlike __ptr_ring_empty in that callers must hold producer_lock: + * see e.g. ptr_ring_full. */ static inline bool __ptr_ring_full(struct ptr_ring *r) { @@ -113,7 +114,7 @@ static inline int __ptr_ring_produce(struct ptr_ring *r, void *ptr) /* Pairs with smp_read_barrier_depends in __ptr_ring_consume. */ smp_wmb(); - r->queue[r->producer++] = ptr; + WRITE_ONCE(r->queue[r->producer++], ptr); if (unlikely(r->producer >= r->size)) r->producer = 0; return 0; @@ -169,32 +170,36 @@ static inline int ptr_ring_produce_bh(struct ptr_ring *r, void *ptr) return ret; } -/* Note: callers invoking this in a loop must use a compiler barrier, - * for example cpu_relax(). Callers must take consumer_lock - * if they dereference the pointer - see e.g. PTR_RING_PEEK_CALL. - * If ring is never resized, and if the pointer is merely - * tested, there's no need to take the lock - see e.g. __ptr_ring_empty. - * However, if called outside the lock, and if some other CPU - * consumes ring entries at the same time, the value returned - * is not guaranteed to be correct. +static inline void *__ptr_ring_peek(struct ptr_ring *r) +{ + if (likely(r->size)) + return READ_ONCE(r->queue[r->consumer_head]); + return NULL; +} + +/* + * Test ring empty status without taking any locks. + * + * NB: This is only safe to call if ring is never resized. + * + * However, if some other CPU consumes ring entries at the same time, the value + * returned is not guaranteed to be correct. + * * In this case - to avoid incorrectly detecting the ring * as empty - the CPU consuming the ring entries is responsible * for either consuming all ring entries until the ring is empty, * or synchronizing with some other CPU and causing it to - * execute __ptr_ring_peek and/or consume the ring enteries + * re-test __ptr_ring_empty and/or consume the ring enteries * after the synchronization point. + * + * Note: callers invoking this in a loop must use a compiler barrier, + * for example cpu_relax(). */ -static inline void *__ptr_ring_peek(struct ptr_ring *r) -{ - if (likely(r->size)) - return r->queue[r->consumer_head]; - return NULL; -} - -/* See __ptr_ring_peek above for locking rules. */ static inline bool __ptr_ring_empty(struct ptr_ring *r) { - return !__ptr_ring_peek(r); + if (likely(r->size)) + return !r->queue[READ_ONCE(r->consumer_head)]; + return true; } static inline bool ptr_ring_empty(struct ptr_ring *r) @@ -248,22 +253,28 @@ static inline void __ptr_ring_discard_one(struct ptr_ring *r) /* Fundamentally, what we want to do is update consumer * index and zero out the entry so producer can reuse it. * Doing it naively at each consume would be as simple as: - * r->queue[r->consumer++] = NULL; - * if (unlikely(r->consumer >= r->size)) - * r->consumer = 0; + * consumer = r->consumer; + * r->queue[consumer++] = NULL; + * if (unlikely(consumer >= r->size)) + * consumer = 0; + * r->consumer = consumer; * but that is suboptimal when the ring is full as producer is writing * out new entries in the same cache line. Defer these updates until a * batch of entries has been consumed. */ - int head = r->consumer_head++; + /* Note: we must keep consumer_head valid at all times for __ptr_ring_empty + * to work correctly. + */ + int consumer_head = r->consumer_head; + int head = consumer_head++; /* Once we have processed enough entries invalidate them in * the ring all at once so producer can reuse their space in the ring. * We also do this when we reach end of the ring - not mandatory * but helps keep the implementation simple. */ - if (unlikely(r->consumer_head - r->consumer_tail >= r->batch || - r->consumer_head >= r->size)) { + if (unlikely(consumer_head - r->consumer_tail >= r->batch || + consumer_head >= r->size)) { /* Zero out entries in the reverse order: this way we touch the * cache line that producer might currently be reading the last; * producer won't make progress and touch other cache lines @@ -271,12 +282,14 @@ static inline void __ptr_ring_discard_one(struct ptr_ring *r) */ while (likely(head >= r->consumer_tail)) r->queue[head--] = NULL; - r->consumer_tail = r->consumer_head; + r->consumer_tail = consumer_head; } - if (unlikely(r->consumer_head >= r->size)) { - r->consumer_head = 0; + if (unlikely(consumer_head >= r->size)) { + consumer_head = 0; r->consumer_tail = 0; } + /* matching READ_ONCE in __ptr_ring_empty for lockless tests */ + WRITE_ONCE(r->consumer_head, consumer_head); } static inline void *__ptr_ring_consume(struct ptr_ring *r) @@ -453,12 +466,7 @@ static inline int ptr_ring_consume_batched_bh(struct ptr_ring *r, static inline void **__ptr_ring_init_queue_alloc(unsigned int size, gfp_t gfp) { - /* Allocate an extra dummy element at end of ring to avoid consumer head - * or produce head access past the end of the array. Possible when - * producer/consumer operations and __ptr_ring_peek operations run in - * parallel. - */ - return kcalloc(size + 1, sizeof(void *), gfp); + return kcalloc(size, sizeof(void *), gfp); } static inline void __ptr_ring_set_size(struct ptr_ring *r, int size) @@ -532,7 +540,9 @@ static inline void ptr_ring_unconsume(struct ptr_ring *r, void **batch, int n, goto done; } r->queue[head] = batch[--n]; - r->consumer_tail = r->consumer_head = head; + r->consumer_tail = head; + /* matching READ_ONCE in __ptr_ring_empty for lockless tests */ + WRITE_ONCE(r->consumer_head, head); } done: diff --git a/include/linux/skb_array.h b/include/linux/skb_array.h index c7addf37d119..a6b6e8bb3d7b 100644 --- a/include/linux/skb_array.h +++ b/include/linux/skb_array.h @@ -69,7 +69,7 @@ static inline int skb_array_produce_any(struct skb_array *a, struct sk_buff *skb */ static inline bool __skb_array_empty(struct skb_array *a) { - return !__ptr_ring_peek(&a->ring); + return __ptr_ring_empty(&a->ring); } static inline struct sk_buff *__skb_array_peek(struct skb_array *a) diff --git a/tools/virtio/linux/kernel.h b/tools/virtio/linux/kernel.h index 395521a7a8d8..fca8381bbe04 100644 --- a/tools/virtio/linux/kernel.h +++ b/tools/virtio/linux/kernel.h @@ -118,7 +118,7 @@ static inline void free_page(unsigned long addr) #define dev_err(dev, format, ...) fprintf (stderr, format, ## __VA_ARGS__) #define dev_warn(dev, format, ...) fprintf (stderr, format, ## __VA_ARGS__) -#define WARN_ON_ONCE(cond) ((cond) && fprintf (stderr, "WARNING\n")) +#define WARN_ON_ONCE(cond) ((cond) ? fprintf (stderr, "WARNING\n") : 0) #define min(x, y) ({ \ typeof(x) _min1 = (x); \ diff --git a/tools/virtio/linux/thread_info.h b/tools/virtio/linux/thread_info.h new file mode 100644 index 000000000000..e0f610d08006 --- /dev/null +++ b/tools/virtio/linux/thread_info.h @@ -0,0 +1 @@ +#define check_copy_size(A, B, C) (1) diff --git a/tools/virtio/ringtest/main.h b/tools/virtio/ringtest/main.h index 5706e075adf2..301d59bfcd0a 100644 --- a/tools/virtio/ringtest/main.h +++ b/tools/virtio/ringtest/main.h @@ -111,7 +111,7 @@ static inline void busy_wait(void) } #if defined(__x86_64__) || defined(__i386__) -#define smp_mb() asm volatile("lock; addl $0,-128(%%rsp)" ::: "memory", "cc") +#define smp_mb() asm volatile("lock; addl $0,-132(%%rsp)" ::: "memory", "cc") #else /* * Not using __ATOMIC_SEQ_CST since gcc docs say they are only synchronized @@ -134,4 +134,61 @@ static inline void busy_wait(void) barrier(); \ } while (0) +#if defined(__i386__) || defined(__x86_64__) || defined(__s390x__) +#define smp_wmb() barrier() +#else +#define smp_wmb() smp_release() +#endif + +#ifdef __alpha__ +#define smp_read_barrier_depends() smp_acquire() +#else +#define smp_read_barrier_depends() do {} while(0) +#endif + +static __always_inline +void __read_once_size(const volatile void *p, void *res, int size) +{ + switch (size) { \ + case 1: *(unsigned char *)res = *(volatile unsigned char *)p; break; \ + case 2: *(unsigned short *)res = *(volatile unsigned short *)p; break; \ + case 4: *(unsigned int *)res = *(volatile unsigned int *)p; break; \ + case 8: *(unsigned long long *)res = *(volatile unsigned long long *)p; break; \ + default: \ + barrier(); \ + __builtin_memcpy((void *)res, (const void *)p, size); \ + barrier(); \ + } \ +} + +static __always_inline void __write_once_size(volatile void *p, void *res, int size) +{ + switch (size) { + case 1: *(volatile unsigned char *)p = *(unsigned char *)res; break; + case 2: *(volatile unsigned short *)p = *(unsigned short *)res; break; + case 4: *(volatile unsigned int *)p = *(unsigned int *)res; break; + case 8: *(volatile unsigned long long *)p = *(unsigned long long *)res; break; + default: + barrier(); + __builtin_memcpy((void *)p, (const void *)res, size); + barrier(); + } +} + +#define READ_ONCE(x) \ +({ \ + union { typeof(x) __val; char __c[1]; } __u; \ + __read_once_size(&(x), __u.__c, sizeof(x)); \ + smp_read_barrier_depends(); /* Enforce dependency ordering from x */ \ + __u.__val; \ +}) + +#define WRITE_ONCE(x, val) \ +({ \ + union { typeof(x) __val; char __c[1]; } __u = \ + { .__val = (typeof(x)) (val) }; \ + __write_once_size(&(x), __u.__c, sizeof(x)); \ + __u.__val; \ +}) + #endif diff --git a/tools/virtio/ringtest/ptr_ring.c b/tools/virtio/ringtest/ptr_ring.c index e6e81305ef46..477899c12c51 100644 --- a/tools/virtio/ringtest/ptr_ring.c +++ b/tools/virtio/ringtest/ptr_ring.c @@ -187,7 +187,7 @@ bool enable_kick() bool avail_empty() { - return !__ptr_ring_peek(&array); + return __ptr_ring_empty(&array); } bool use_buf(unsigned *lenp, void **bufp)