Currently, sending a packet into a time gate too small for it (or always closed) causes the queue system to hold the frame forever. Even worse, this frame isn't subject to aging either, because for that to happen, it needs to be scheduled for transmission in the first place. But the frame will consume buffer memory and frame references while it is forever held in the queue system. Before commit a4ae997adcbd ("net: mscc: ocelot: initialize watermarks to sane defaults"), this behavior was somewhat subtle, as the switch had a more intricately tuned default watermark configuration out of reset, which did not allow any single port and tc to consume the entire switch buffer space. Nonetheless, the held frames are still there, and they reduce the total backplane capacity of the switch. However, after the aforementioned commit, the behavior can be very clearly seen, since we deliberately allow each {port, tc} to consume the entire shared buffer of the switch minus the reservations (and we disable all reservations by default). That is to say, we allow a permanently closed tc-taprio gate to hang the entire switch. A careful inspection of the documentation shows that the QSYS:Q_MAX_SDU per-port-tc registers serve 2 purposes: one is for guard band calculation (when zero, this falls back to QSYS:PORT_MAX_SDU), and the other is to enable oversized frame dropping (when non-zero). Currently the QSYS:Q_MAX_SDU registers are all zero, so oversized frame dropping is disabled. The goal of the change is to enable it seamlessly. For that, we need to hook into the MTU change, tc-taprio change, and port link speed change procedures, since we depend on these variables. Frames are not dropped on egress due to a queue system oversize condition, instead that egress port is simply excluded from the mask of valid destination ports for the packet. If there are no destination ports at all, the ingress counter that increments is the generic "drop_tail" in ethtool -S. The issue exists in various forms since the tc-taprio offload was introduced. Fixes: de143c0e274b ("net: dsa: felix: Configure Time-Aware Scheduler via taprio offload") Reported-by: Richie Pearn <richard.pearn@nxp.com> Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com> Signed-off-by: Jakub Kicinski <kuba@kernel.org>
98 lines
3.4 KiB
C
98 lines
3.4 KiB
C
/* SPDX-License-Identifier: GPL-2.0 */
|
|
/* Copyright 2019 NXP
|
|
*/
|
|
#ifndef _MSCC_FELIX_H
|
|
#define _MSCC_FELIX_H
|
|
|
|
#define ocelot_to_felix(o) container_of((o), struct felix, ocelot)
|
|
#define FELIX_MAC_QUIRKS OCELOT_QUIRK_PCS_PERFORMS_RATE_ADAPTATION
|
|
|
|
#define OCELOT_PORT_MODE_INTERNAL BIT(0)
|
|
#define OCELOT_PORT_MODE_SGMII BIT(1)
|
|
#define OCELOT_PORT_MODE_QSGMII BIT(2)
|
|
#define OCELOT_PORT_MODE_2500BASEX BIT(3)
|
|
#define OCELOT_PORT_MODE_USXGMII BIT(4)
|
|
#define OCELOT_PORT_MODE_1000BASEX BIT(5)
|
|
|
|
/* Platform-specific information */
|
|
struct felix_info {
|
|
const struct resource *target_io_res;
|
|
const struct resource *port_io_res;
|
|
const struct resource *imdio_res;
|
|
const struct reg_field *regfields;
|
|
const u32 *const *map;
|
|
const struct ocelot_ops *ops;
|
|
const u32 *port_modes;
|
|
int num_mact_rows;
|
|
const struct ocelot_stat_layout *stats_layout;
|
|
int num_ports;
|
|
int num_tx_queues;
|
|
struct vcap_props *vcap;
|
|
u16 vcap_pol_base;
|
|
u16 vcap_pol_max;
|
|
u16 vcap_pol_base2;
|
|
u16 vcap_pol_max2;
|
|
const struct ptp_clock_info *ptp_caps;
|
|
|
|
/* Some Ocelot switches are integrated into the SoC without the
|
|
* extraction IRQ line connected to the ARM GIC. By enabling this
|
|
* workaround, the few packets that are delivered to the CPU port
|
|
* module (currently only PTP) are copied not only to the hardware CPU
|
|
* port module, but also to the 802.1Q Ethernet CPU port, and polling
|
|
* the extraction registers is triggered once the DSA tagger sees a PTP
|
|
* frame. The Ethernet frame is only used as a notification: it is
|
|
* dropped, and the original frame is extracted over MMIO and annotated
|
|
* with the RX timestamp.
|
|
*/
|
|
bool quirk_no_xtr_irq;
|
|
|
|
int (*mdio_bus_alloc)(struct ocelot *ocelot);
|
|
void (*mdio_bus_free)(struct ocelot *ocelot);
|
|
void (*phylink_validate)(struct ocelot *ocelot, int port,
|
|
unsigned long *supported,
|
|
struct phylink_link_state *state);
|
|
int (*port_setup_tc)(struct dsa_switch *ds, int port,
|
|
enum tc_setup_type type, void *type_data);
|
|
void (*tas_guard_bands_update)(struct ocelot *ocelot, int port);
|
|
void (*port_sched_speed_set)(struct ocelot *ocelot, int port,
|
|
u32 speed);
|
|
struct regmap *(*init_regmap)(struct ocelot *ocelot,
|
|
struct resource *res);
|
|
};
|
|
|
|
/* Methods for initializing the hardware resources specific to a tagging
|
|
* protocol (like the NPI port, for "ocelot" or "seville", or the VCAP TCAMs,
|
|
* for "ocelot-8021q").
|
|
* It is important that the resources configured here do not have side effects
|
|
* for the other tagging protocols. If that is the case, their configuration
|
|
* needs to go to felix_tag_proto_setup_shared().
|
|
*/
|
|
struct felix_tag_proto_ops {
|
|
int (*setup)(struct dsa_switch *ds);
|
|
void (*teardown)(struct dsa_switch *ds);
|
|
unsigned long (*get_host_fwd_mask)(struct dsa_switch *ds);
|
|
};
|
|
|
|
extern const struct dsa_switch_ops felix_switch_ops;
|
|
|
|
/* DSA glue / front-end for struct ocelot */
|
|
struct felix {
|
|
struct dsa_switch *ds;
|
|
const struct felix_info *info;
|
|
struct ocelot ocelot;
|
|
struct mii_bus *imdio;
|
|
struct phylink_pcs **pcs;
|
|
resource_size_t switch_base;
|
|
resource_size_t imdio_base;
|
|
enum dsa_tag_protocol tag_proto;
|
|
const struct felix_tag_proto_ops *tag_proto_ops;
|
|
struct kthread_worker *xmit_worker;
|
|
unsigned long host_flood_uc_mask;
|
|
unsigned long host_flood_mc_mask;
|
|
};
|
|
|
|
struct net_device *felix_port_to_netdev(struct ocelot *ocelot, int port);
|
|
int felix_netdev_to_port(struct net_device *dev);
|
|
|
|
#endif
|