From da29e4b466e6916a52e0e2f60054f855c324a9c2 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 3 Jun 2019 15:16:58 -0700 Subject: [PATCH 1/8] net/tls: fully initialize the msg wrapper skb If strparser gets cornered into starting a new message from an sk_buff which already has frags, it will allocate a new skb to become the "wrapper" around the fragments of the message. This new skb does not inherit any metadata fields. In case of TLS offload this may lead to unnecessarily re-encrypting the message, as skb->decrypted is not set for the wrapper skb. Try to be conservative and copy all fields of old skb strparser's user may reasonably need. Signed-off-by: Jakub Kicinski Reviewed-by: Dirk van der Merwe Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- include/linux/skbuff.h | 1 + net/core/skbuff.c | 25 +++++++++++++++++++++++++ net/strparser/strparser.c | 8 ++------ 3 files changed, 28 insertions(+), 6 deletions(-) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 2ee5e63195c0..98ff5ac98caa 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1063,6 +1063,7 @@ struct sk_buff *alloc_skb_with_frags(unsigned long header_len, int max_page_order, int *errcode, gfp_t gfp_mask); +struct sk_buff *alloc_skb_for_msg(struct sk_buff *first); /* Layout of fast clones : [skb1][skb2][fclone_ref] */ struct sk_buff_fclones { diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 4a712a00243a..b50a5e3ac4e4 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -913,6 +913,31 @@ static struct sk_buff *__skb_clone(struct sk_buff *n, struct sk_buff *skb) #undef C } +/** + * alloc_skb_for_msg() - allocate sk_buff to wrap frag list forming a msg + * @first: first sk_buff of the msg + */ +struct sk_buff *alloc_skb_for_msg(struct sk_buff *first) +{ + struct sk_buff *n; + + n = alloc_skb(0, GFP_ATOMIC); + if (!n) + return NULL; + + n->len = first->len; + n->data_len = first->len; + n->truesize = first->truesize; + + skb_shinfo(n)->frag_list = first; + + __copy_skb_header(n, first); + n->destructor = NULL; + + return n; +} +EXPORT_SYMBOL_GPL(alloc_skb_for_msg); + /** * skb_morph - morph one skb into another * @dst: the skb to receive the contents diff --git a/net/strparser/strparser.c b/net/strparser/strparser.c index e137698e8aef..3fe541b746b0 100644 --- a/net/strparser/strparser.c +++ b/net/strparser/strparser.c @@ -160,18 +160,14 @@ static int __strp_recv(read_descriptor_t *desc, struct sk_buff *orig_skb, return 0; } - skb = alloc_skb(0, GFP_ATOMIC); + skb = alloc_skb_for_msg(head); if (!skb) { STRP_STATS_INCR(strp->stats.mem_fail); desc->error = -ENOMEM; return 0; } - skb->len = head->len; - skb->data_len = head->len; - skb->truesize = head->truesize; - *_strp_msg(skb) = *_strp_msg(head); + strp->skb_nextp = &head->next; - skb_shinfo(skb)->frag_list = head; strp->skb_head = skb; head = skb; } else { From aeb11ff0dc46be309ba000af7e608f8d3695fd6e Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 3 Jun 2019 15:16:59 -0700 Subject: [PATCH 2/8] net/tls: check return values from skb_copy_bits() and skb_store_bits() In light of recent bugs, we should make a better effort of checking return values. In theory none of the functions should fail today. Signed-off-by: Jakub Kicinski Reviewed-by: Dirk van der Merwe Signed-off-by: David S. Miller --- net/tls/tls_device.c | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c index b95c408fd771..dde6513628d2 100644 --- a/net/tls/tls_device.c +++ b/net/tls/tls_device.c @@ -603,8 +603,10 @@ static int tls_device_reencrypt(struct sock *sk, struct sk_buff *skb) sg_set_buf(&sg[0], buf, rxm->full_len + TLS_HEADER_SIZE + TLS_CIPHER_AES_GCM_128_IV_SIZE); - skb_copy_bits(skb, offset, buf, - TLS_HEADER_SIZE + TLS_CIPHER_AES_GCM_128_IV_SIZE); + err = skb_copy_bits(skb, offset, buf, + TLS_HEADER_SIZE + TLS_CIPHER_AES_GCM_128_IV_SIZE); + if (err) + goto free_buf; /* We are interested only in the decrypted data not the auth */ err = decrypt_skb(sk, skb, sg); @@ -618,8 +620,11 @@ static int tls_device_reencrypt(struct sock *sk, struct sk_buff *skb) if (skb_pagelen(skb) > offset) { copy = min_t(int, skb_pagelen(skb) - offset, data_len); - if (skb->decrypted) - skb_store_bits(skb, offset, buf, copy); + if (skb->decrypted) { + err = skb_store_bits(skb, offset, buf, copy); + if (err) + goto free_buf; + } offset += copy; buf += copy; @@ -642,8 +647,11 @@ static int tls_device_reencrypt(struct sock *sk, struct sk_buff *skb) copy = min_t(int, skb_iter->len - frag_pos, data_len + rxm->offset - offset); - if (skb_iter->decrypted) - skb_store_bits(skb_iter, frag_pos, buf, copy); + if (skb_iter->decrypted) { + err = skb_store_bits(skb_iter, frag_pos, buf, copy); + if (err) + goto free_buf; + } offset += copy; buf += copy; From 87b11e0638c3dbf029a7c9020f8a779062db58fc Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 3 Jun 2019 15:17:00 -0700 Subject: [PATCH 3/8] net/tls: remove false positive warning It's possible that TCP stack will decide to retransmit a packet right when that packet's data gets acked, especially in presence of packet reordering. This means that packets may be in flight, even though tls_device code has already freed their record state. Make fill_sg_in() and in turn tls_sw_fallback() not generate a warning in that case, and quietly proceed to drop such frames. Make the exit path from tls_sw_fallback() drop monitor friendly, for users to be able to troubleshoot dropped retransmissions. Signed-off-by: Jakub Kicinski Reviewed-by: Dirk van der Merwe Signed-off-by: David S. Miller --- Documentation/networking/tls-offload.rst | 19 ------------------- net/tls/tls_device_fallback.c | 6 ++++-- 2 files changed, 4 insertions(+), 21 deletions(-) diff --git a/Documentation/networking/tls-offload.rst b/Documentation/networking/tls-offload.rst index cb85af559dff..eb7c9b81ccf5 100644 --- a/Documentation/networking/tls-offload.rst +++ b/Documentation/networking/tls-offload.rst @@ -379,7 +379,6 @@ by the driver: but did not arrive in the expected order * ``tx_tls_drop_no_sync_data`` - number of TX packets dropped because they arrived out of order and associated record could not be found - (see also :ref:`pre_tls_data`) Notable corner cases, exceptions and additional requirements ============================================================ @@ -462,21 +461,3 @@ Redirects leak clear text In the RX direction, if segment has already been decrypted by the device and it gets redirected or mirrored - clear text will be transmitted out. - -.. _pre_tls_data: - -Transmission of pre-TLS data ----------------------------- - -User can enqueue some already encrypted and framed records before enabling -``ktls`` on the socket. Those records have to get sent as they are. This is -perfectly easy to handle in the software case - such data will be waiting -in the TCP layer, TLS ULP won't see it. In the offloaded case when pre-queued -segment reaches transmission point it appears to be out of order (before the -expected TCP sequence number) and the stack does not have a record information -associated. - -All segments without record information cannot, however, be assumed to be -pre-queued data, because a race condition exists between TCP stack queuing -a retransmission, the driver seeing the retransmission and TCP ACK arriving -for the retransmitted data. diff --git a/net/tls/tls_device_fallback.c b/net/tls/tls_device_fallback.c index c3a5fe624b4e..5a087e1981c3 100644 --- a/net/tls/tls_device_fallback.c +++ b/net/tls/tls_device_fallback.c @@ -240,7 +240,6 @@ static int fill_sg_in(struct scatterlist *sg_in, record = tls_get_record(ctx, tcp_seq, rcd_sn); if (!record) { spin_unlock_irqrestore(&ctx->lock, flags); - WARN(1, "Record not found for seq %u\n", tcp_seq); return -EINVAL; } @@ -409,7 +408,10 @@ put_sg: put_page(sg_page(&sg_in[--resync_sgs])); kfree(sg_in); free_orig: - kfree_skb(skb); + if (nskb) + consume_skb(skb); + else + kfree_skb(skb); return nskb; } From b9d8fec927ef3cd157e6a0956f5ec89f6891ed27 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 3 Jun 2019 15:17:01 -0700 Subject: [PATCH 4/8] net/tls: don't look for decrypted frames on non-offloaded sockets If the RX config of a TLS socket is SW, there is no point iterating over the fragments and checking if frame is decrypted. It will always be fully encrypted. Note that in fully encrypted case the function doesn't actually touch any offload-related state, so it's safe to call for TLS_SW, today. Soon we will introduce code which can only be called for offloaded contexts. Signed-off-by: Jakub Kicinski Reviewed-by: Dirk van der Merwe Signed-off-by: David S. Miller --- net/tls/tls_sw.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 960494f437ac..f833407c789f 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -1492,9 +1492,11 @@ static int decrypt_skb_update(struct sock *sk, struct sk_buff *skb, if (!ctx->decrypted) { #ifdef CONFIG_TLS_DEVICE - err = tls_device_decrypted(sk, skb); - if (err < 0) - return err; + if (tls_ctx->rx_conf == TLS_HW) { + err = tls_device_decrypted(sk, skb); + if (err < 0) + return err; + } #endif /* Still not decrypted after tls_device */ if (!ctx->decrypted) { From 1fe275d434ad4ff2d576f9a770eb4c192153ea1d Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 3 Jun 2019 15:17:02 -0700 Subject: [PATCH 5/8] net/tls: don't re-check msg decrypted status in tls_device_decrypted() tls_device_decrypted() is only called from decrypt_skb_update(), when ctx->decrypted == false, there is no need to re-check the bit. Signed-off-by: Jakub Kicinski Reviewed-by: Dirk van der Merwe Signed-off-by: David S. Miller --- net/tls/tls_device.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c index dde6513628d2..bb9d229832cc 100644 --- a/net/tls/tls_device.c +++ b/net/tls/tls_device.c @@ -672,10 +672,6 @@ int tls_device_decrypted(struct sock *sk, struct sk_buff *skb) int is_encrypted = !is_decrypted; struct sk_buff *skb_iter; - /* Skip if it is already decrypted */ - if (ctx->sw.decrypted) - return 0; - /* Check if all the data is decrypted already */ skb_walk_frags(skb, skb_iter) { is_decrypted &= skb_iter->decrypted; From 9cd81988cce195598e04fd8290fea873052bb7bd Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 3 Jun 2019 15:17:03 -0700 Subject: [PATCH 6/8] net/tls: use version from prot ctx->prot holds the same information as per-direction contexts. Almost all code gets TLS version from this structure, convert the last two stragglers, this way we can improve the cache utilization by moving the per-direction data into cold cache lines. Signed-off-by: Jakub Kicinski Reviewed-by: Dirk van der Merwe Signed-off-by: David S. Miller --- net/tls/tls_device.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c index bb9d229832cc..8ffc8f95f55f 100644 --- a/net/tls/tls_device.c +++ b/net/tls/tls_device.c @@ -252,7 +252,7 @@ static int tls_push_record(struct sock *sk, skb_frag_address(frag), record->len - prot->prepend_size, record_type, - ctx->crypto_send.info.version); + prot->version); /* HW doesn't care about the data in the tag, because it fills it. */ dummy_tag_frag.page = skb_frag_page(frag); @@ -264,7 +264,7 @@ static int tls_push_record(struct sock *sk, list_add_tail(&record->list, &offload_ctx->records_list); spin_unlock_irq(&offload_ctx->lock); offload_ctx->open_record = NULL; - tls_advance_record_sn(sk, &ctx->tx, ctx->crypto_send.info.version); + tls_advance_record_sn(sk, &ctx->tx, prot->version); for (i = 0; i < record->num_frags; i++) { frag = &record->frags[i]; From f0aaa2c975617da78b80feebc87e74dba9ec1f53 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 3 Jun 2019 15:17:04 -0700 Subject: [PATCH 7/8] net/tls: reorganize struct tls_context struct tls_context is slightly badly laid out. If we reorder things right we can save 16 bytes (320 -> 304) but also make all fast path data fit into two cache lines (one read only and one read/write, down from four cache lines). Signed-off-by: Jakub Kicinski Reviewed-by: Dirk van der Merwe Signed-off-by: David S. Miller --- include/net/tls.h | 32 ++++++++++++++++++-------------- 1 file changed, 18 insertions(+), 14 deletions(-) diff --git a/include/net/tls.h b/include/net/tls.h index 39ea62f0c1f6..a463a6074e5d 100644 --- a/include/net/tls.h +++ b/include/net/tls.h @@ -236,34 +236,32 @@ struct tls_prot_info { }; struct tls_context { + /* read-only cache line */ struct tls_prot_info prot_info; - union tls_crypto_context crypto_send; - union tls_crypto_context crypto_recv; - - struct list_head list; - struct net_device *netdev; - refcount_t refcount; - - void *priv_ctx_tx; - void *priv_ctx_rx; - u8 tx_conf:3; u8 rx_conf:3; + int (*push_pending_record)(struct sock *sk, int flags); + void (*sk_write_space)(struct sock *sk); + + void *priv_ctx_tx; + void *priv_ctx_rx; + + struct net_device *netdev; + + /* rw cache line */ struct cipher_context tx; struct cipher_context rx; struct scatterlist *partially_sent_record; u16 partially_sent_offset; - unsigned long flags; bool in_tcp_sendpages; bool pending_open_record_frags; + unsigned long flags; - int (*push_pending_record)(struct sock *sk, int flags); - - void (*sk_write_space)(struct sock *sk); + /* cache cold stuff */ void (*sk_destruct)(struct sock *sk); void (*sk_proto_close)(struct sock *sk, long timeout); @@ -275,6 +273,12 @@ struct tls_context { int __user *optlen); int (*hash)(struct sock *sk); void (*unhash)(struct sock *sk); + + union tls_crypto_context crypto_send; + union tls_crypto_context crypto_recv; + + struct list_head list; + refcount_t refcount; }; enum tls_offload_ctx_dir { From fb0f886fa265f265ad126fc7cd7e8ec51e2f770f Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 3 Jun 2019 15:17:05 -0700 Subject: [PATCH 8/8] net/tls: don't pass version to tls_advance_record_sn() All callers pass prot->version as the last parameter of tls_advance_record_sn(), yet tls_advance_record_sn() itself needs a pointer to prot. Pass prot from callers. Signed-off-by: Jakub Kicinski Reviewed-by: Dirk van der Merwe Signed-off-by: David S. Miller --- include/net/tls.h | 10 +++------- net/tls/tls_device.c | 2 +- net/tls/tls_sw.c | 9 ++++----- 3 files changed, 8 insertions(+), 13 deletions(-) diff --git a/include/net/tls.h b/include/net/tls.h index a463a6074e5d..0a0072636009 100644 --- a/include/net/tls.h +++ b/include/net/tls.h @@ -446,19 +446,15 @@ static inline struct tls_context *tls_get_ctx(const struct sock *sk) } static inline void tls_advance_record_sn(struct sock *sk, - struct cipher_context *ctx, - int version) + struct tls_prot_info *prot, + struct cipher_context *ctx) { - struct tls_context *tls_ctx = tls_get_ctx(sk); - struct tls_prot_info *prot = &tls_ctx->prot_info; - if (tls_bigint_increment(ctx->rec_seq, prot->rec_seq_size)) tls_err_abort(sk, EBADMSG); - if (version != TLS_1_3_VERSION) { + if (prot->version != TLS_1_3_VERSION) tls_bigint_increment(ctx->iv + TLS_CIPHER_AES_GCM_128_SALT_SIZE, prot->iv_size); - } } static inline void tls_fill_prepend(struct tls_context *ctx, diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c index 8ffc8f95f55f..51e556e79371 100644 --- a/net/tls/tls_device.c +++ b/net/tls/tls_device.c @@ -264,7 +264,7 @@ static int tls_push_record(struct sock *sk, list_add_tail(&record->list, &offload_ctx->records_list); spin_unlock_irq(&offload_ctx->lock); offload_ctx->open_record = NULL; - tls_advance_record_sn(sk, &ctx->tx, prot->version); + tls_advance_record_sn(sk, prot, &ctx->tx); for (i = 0; i < record->num_frags; i++) { frag = &record->frags[i]; diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index f833407c789f..bef71e54fad0 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -534,7 +534,7 @@ static int tls_do_encryption(struct sock *sk, /* Unhook the record from context if encryption is not failure */ ctx->open_rec = NULL; - tls_advance_record_sn(sk, &tls_ctx->tx, prot->version); + tls_advance_record_sn(sk, prot, &tls_ctx->tx); return rc; } @@ -1486,7 +1486,6 @@ static int decrypt_skb_update(struct sock *sk, struct sk_buff *skb, struct tls_context *tls_ctx = tls_get_ctx(sk); struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx); struct tls_prot_info *prot = &tls_ctx->prot_info; - int version = prot->version; struct strp_msg *rxm = strp_msg(skb); int pad, err = 0; @@ -1504,8 +1503,8 @@ static int decrypt_skb_update(struct sock *sk, struct sk_buff *skb, async); if (err < 0) { if (err == -EINPROGRESS) - tls_advance_record_sn(sk, &tls_ctx->rx, - version); + tls_advance_record_sn(sk, prot, + &tls_ctx->rx); return err; } @@ -1520,7 +1519,7 @@ static int decrypt_skb_update(struct sock *sk, struct sk_buff *skb, rxm->full_len -= pad; rxm->offset += prot->prepend_size; rxm->full_len -= prot->overhead_size; - tls_advance_record_sn(sk, &tls_ctx->rx, version); + tls_advance_record_sn(sk, prot, &tls_ctx->rx); ctx->decrypted = true; ctx->saved_data_ready(sk); } else {