sfc: Use write-combining to reduce TX latency
Based on work by Neil Turton <nturton@solarflare.com> and Kieran Mansley <kmansley@solarflare.com>. The BIU has now been verified to handle 3- and 4-dword writes within a single 128-bit register correctly. This means we can enable write- combining and only insert write barriers between writes to distinct registers. This has been observed to save about 0.5 us when pushing a TX descriptor to an empty TX queue. Signed-off-by: Ben Hutchings <bhutchings@solarflare.com>
This commit is contained in:
parent
6d84b986b2
commit
65f0b417de
@ -1104,8 +1104,8 @@ static int efx_init_io(struct efx_nic *efx)
|
|||||||
rc = -EIO;
|
rc = -EIO;
|
||||||
goto fail3;
|
goto fail3;
|
||||||
}
|
}
|
||||||
efx->membase = ioremap_nocache(efx->membase_phys,
|
efx->membase = ioremap_wc(efx->membase_phys,
|
||||||
efx->type->mem_map_size);
|
efx->type->mem_map_size);
|
||||||
if (!efx->membase) {
|
if (!efx->membase) {
|
||||||
netif_err(efx, probe, efx->net_dev,
|
netif_err(efx, probe, efx->net_dev,
|
||||||
"could not map memory BAR at %llx+%x\n",
|
"could not map memory BAR at %llx+%x\n",
|
||||||
|
@ -48,9 +48,9 @@
|
|||||||
* replacing the low 96 bits with zero does not affect functionality.
|
* replacing the low 96 bits with zero does not affect functionality.
|
||||||
* - If the host writes to the last dword address of such a register
|
* - If the host writes to the last dword address of such a register
|
||||||
* (i.e. the high 32 bits) the underlying register will always be
|
* (i.e. the high 32 bits) the underlying register will always be
|
||||||
* written. If the collector does not hold values for the low 96
|
* written. If the collector and the current write together do not
|
||||||
* bits of the register, they will be written as zero. Writing to
|
* provide values for all 128 bits of the register, the low 96 bits
|
||||||
* the last qword does not have this effect and must not be done.
|
* will be written as zero.
|
||||||
* - If the host writes to the address of any other part of such a
|
* - If the host writes to the address of any other part of such a
|
||||||
* register while the collector already holds values for some other
|
* register while the collector already holds values for some other
|
||||||
* register, the write is discarded and the collector maintains its
|
* register, the write is discarded and the collector maintains its
|
||||||
@ -103,6 +103,7 @@ static inline void efx_writeo(struct efx_nic *efx, efx_oword_t *value,
|
|||||||
_efx_writed(efx, value->u32[2], reg + 8);
|
_efx_writed(efx, value->u32[2], reg + 8);
|
||||||
_efx_writed(efx, value->u32[3], reg + 12);
|
_efx_writed(efx, value->u32[3], reg + 12);
|
||||||
#endif
|
#endif
|
||||||
|
wmb();
|
||||||
mmiowb();
|
mmiowb();
|
||||||
spin_unlock_irqrestore(&efx->biu_lock, flags);
|
spin_unlock_irqrestore(&efx->biu_lock, flags);
|
||||||
}
|
}
|
||||||
@ -125,6 +126,7 @@ static inline void efx_sram_writeq(struct efx_nic *efx, void __iomem *membase,
|
|||||||
__raw_writel((__force u32)value->u32[0], membase + addr);
|
__raw_writel((__force u32)value->u32[0], membase + addr);
|
||||||
__raw_writel((__force u32)value->u32[1], membase + addr + 4);
|
__raw_writel((__force u32)value->u32[1], membase + addr + 4);
|
||||||
#endif
|
#endif
|
||||||
|
wmb();
|
||||||
mmiowb();
|
mmiowb();
|
||||||
spin_unlock_irqrestore(&efx->biu_lock, flags);
|
spin_unlock_irqrestore(&efx->biu_lock, flags);
|
||||||
}
|
}
|
||||||
@ -139,6 +141,7 @@ static inline void efx_writed(struct efx_nic *efx, efx_dword_t *value,
|
|||||||
|
|
||||||
/* No lock required */
|
/* No lock required */
|
||||||
_efx_writed(efx, value->u32[0], reg);
|
_efx_writed(efx, value->u32[0], reg);
|
||||||
|
wmb();
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Read a 128-bit CSR, locking as appropriate. */
|
/* Read a 128-bit CSR, locking as appropriate. */
|
||||||
@ -237,12 +240,14 @@ static inline void _efx_writeo_page(struct efx_nic *efx, efx_oword_t *value,
|
|||||||
|
|
||||||
#ifdef EFX_USE_QWORD_IO
|
#ifdef EFX_USE_QWORD_IO
|
||||||
_efx_writeq(efx, value->u64[0], reg + 0);
|
_efx_writeq(efx, value->u64[0], reg + 0);
|
||||||
|
_efx_writeq(efx, value->u64[1], reg + 8);
|
||||||
#else
|
#else
|
||||||
_efx_writed(efx, value->u32[0], reg + 0);
|
_efx_writed(efx, value->u32[0], reg + 0);
|
||||||
_efx_writed(efx, value->u32[1], reg + 4);
|
_efx_writed(efx, value->u32[1], reg + 4);
|
||||||
#endif
|
|
||||||
_efx_writed(efx, value->u32[2], reg + 8);
|
_efx_writed(efx, value->u32[2], reg + 8);
|
||||||
_efx_writed(efx, value->u32[3], reg + 12);
|
_efx_writed(efx, value->u32[3], reg + 12);
|
||||||
|
#endif
|
||||||
|
wmb();
|
||||||
}
|
}
|
||||||
#define efx_writeo_page(efx, value, reg, page) \
|
#define efx_writeo_page(efx, value, reg, page) \
|
||||||
_efx_writeo_page(efx, value, \
|
_efx_writeo_page(efx, value, \
|
||||||
|
@ -94,14 +94,15 @@ static void efx_mcdi_copyin(struct efx_nic *efx, unsigned cmd,
|
|||||||
|
|
||||||
efx_writed(efx, &hdr, pdu);
|
efx_writed(efx, &hdr, pdu);
|
||||||
|
|
||||||
for (i = 0; i < inlen; i += 4)
|
for (i = 0; i < inlen; i += 4) {
|
||||||
_efx_writed(efx, *((__le32 *)(inbuf + i)), pdu + 4 + i);
|
_efx_writed(efx, *((__le32 *)(inbuf + i)), pdu + 4 + i);
|
||||||
|
/* use wmb() within loop to inhibit write combining */
|
||||||
/* Ensure the payload is written out before the header */
|
wmb();
|
||||||
wmb();
|
}
|
||||||
|
|
||||||
/* ring the doorbell with a distinctive value */
|
/* ring the doorbell with a distinctive value */
|
||||||
_efx_writed(efx, (__force __le32) 0x45789abc, doorbell);
|
_efx_writed(efx, (__force __le32) 0x45789abc, doorbell);
|
||||||
|
wmb();
|
||||||
}
|
}
|
||||||
|
|
||||||
static void efx_mcdi_copyout(struct efx_nic *efx, u8 *outbuf, size_t outlen)
|
static void efx_mcdi_copyout(struct efx_nic *efx, u8 *outbuf, size_t outlen)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user