devlink: Expose port function commands to control RoCE
Expose port function commands to enable / disable RoCE, this is used to control the port RoCE device capabilities. When RoCE is disabled for a function of the port, function cannot create any RoCE specific resources (e.g GID table). It also saves system memory utilization. For example disabling RoCE enable a VF/SF saves 1 Mbytes of system memory per function. Example of a PCI VF port which supports function configuration: Set RoCE of the VF's port function. $ devlink port show pci/0000:06:00.0/2 pci/0000:06:00.0/2: type eth netdev enp6s0pf0vf1 flavour pcivf pfnum 0 vfnum 1 function: hw_addr 00:00:00:00:00:00 roce enable $ devlink port function set pci/0000:06:00.0/2 roce disable $ devlink port show pci/0000:06:00.0/2 pci/0000:06:00.0/2: type eth netdev enp6s0pf0vf1 flavour pcivf pfnum 0 vfnum 1 function: hw_addr 00:00:00:00:00:00 roce disable Signed-off-by: Shay Drory <shayd@nvidia.com> Reviewed-by: Jiri Pirko <jiri@nvidia.com> Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
parent
875cd5eeba
commit
da65e9ff3b
@ -110,7 +110,7 @@ devlink ports for both the controllers.
|
||||
Function configuration
|
||||
======================
|
||||
|
||||
A user can configure the function attribute before enumerating the PCI
|
||||
Users can configure one or more function attributes before enumerating the PCI
|
||||
function. Usually it means, user should configure function attribute
|
||||
before a bus specific device for the function is created. However, when
|
||||
SRIOV is enabled, virtual function devices are created on the PCI bus.
|
||||
@ -122,6 +122,9 @@ A user may set the hardware address of the function using
|
||||
`devlink port function set hw_addr` command. For Ethernet port function
|
||||
this means a MAC address.
|
||||
|
||||
Users may also set the RoCE capability of the function using
|
||||
`devlink port function set roce` command.
|
||||
|
||||
Function attributes
|
||||
===================
|
||||
|
||||
@ -162,6 +165,35 @@ device created for the PCI VF/SF.
|
||||
function:
|
||||
hw_addr 00:00:00:00:88:88
|
||||
|
||||
RoCE capability setup
|
||||
---------------------
|
||||
Not all PCI VFs/SFs require RoCE capability.
|
||||
|
||||
When RoCE capability is disabled, it saves system memory per PCI VF/SF.
|
||||
|
||||
When user disables RoCE capability for a VF/SF, user application cannot send or
|
||||
receive any RoCE packets through this VF/SF and RoCE GID table for this PCI
|
||||
will be empty.
|
||||
|
||||
When RoCE capability is disabled in the device using port function attribute,
|
||||
VF/SF driver cannot override it.
|
||||
|
||||
- Get RoCE capability of the VF device::
|
||||
|
||||
$ devlink port show pci/0000:06:00.0/2
|
||||
pci/0000:06:00.0/2: type eth netdev enp6s0pf0vf1 flavour pcivf pfnum 0 vfnum 1
|
||||
function:
|
||||
hw_addr 00:00:00:00:00:00 roce enable
|
||||
|
||||
- Set RoCE capability of the VF device::
|
||||
|
||||
$ devlink port function set pci/0000:06:00.0/2 roce disable
|
||||
|
||||
$ devlink port show pci/0000:06:00.0/2
|
||||
pci/0000:06:00.0/2: type eth netdev enp6s0pf0vf1 flavour pcivf pfnum 0 vfnum 1
|
||||
function:
|
||||
hw_addr 00:00:00:00:00:00 roce disable
|
||||
|
||||
Subfunction
|
||||
============
|
||||
|
||||
|
@ -1451,6 +1451,24 @@ struct devlink_ops {
|
||||
int (*port_function_hw_addr_set)(struct devlink_port *port,
|
||||
const u8 *hw_addr, int hw_addr_len,
|
||||
struct netlink_ext_ack *extack);
|
||||
/**
|
||||
* @port_fn_roce_get: Port function's roce get function.
|
||||
*
|
||||
* Query RoCE state of a function managed by the devlink port.
|
||||
* Return -EOPNOTSUPP if port function RoCE handling is not supported.
|
||||
*/
|
||||
int (*port_fn_roce_get)(struct devlink_port *devlink_port,
|
||||
bool *is_enable,
|
||||
struct netlink_ext_ack *extack);
|
||||
/**
|
||||
* @port_fn_roce_set: Port function's roce set function.
|
||||
*
|
||||
* Enable/Disable the RoCE state of a function managed by the devlink
|
||||
* port.
|
||||
* Return -EOPNOTSUPP if port function RoCE handling is not supported.
|
||||
*/
|
||||
int (*port_fn_roce_set)(struct devlink_port *devlink_port,
|
||||
bool enable, struct netlink_ext_ack *extack);
|
||||
/**
|
||||
* port_new() - Add a new port function of a specified flavor
|
||||
* @devlink: Devlink instance
|
||||
|
@ -658,11 +658,21 @@ enum devlink_resource_unit {
|
||||
DEVLINK_RESOURCE_UNIT_ENTRY,
|
||||
};
|
||||
|
||||
enum devlink_port_fn_attr_cap {
|
||||
DEVLINK_PORT_FN_ATTR_CAP_ROCE_BIT,
|
||||
|
||||
/* Add new caps above */
|
||||
__DEVLINK_PORT_FN_ATTR_CAPS_MAX,
|
||||
};
|
||||
|
||||
#define DEVLINK_PORT_FN_CAP_ROCE _BITUL(DEVLINK_PORT_FN_ATTR_CAP_ROCE_BIT)
|
||||
|
||||
enum devlink_port_function_attr {
|
||||
DEVLINK_PORT_FUNCTION_ATTR_UNSPEC,
|
||||
DEVLINK_PORT_FUNCTION_ATTR_HW_ADDR, /* binary */
|
||||
DEVLINK_PORT_FN_ATTR_STATE, /* u8 */
|
||||
DEVLINK_PORT_FN_ATTR_OPSTATE, /* u8 */
|
||||
DEVLINK_PORT_FN_ATTR_CAPS, /* bitfield32 */
|
||||
|
||||
__DEVLINK_PORT_FUNCTION_ATTR_MAX,
|
||||
DEVLINK_PORT_FUNCTION_ATTR_MAX = __DEVLINK_PORT_FUNCTION_ATTR_MAX - 1
|
||||
|
@ -195,11 +195,16 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(devlink_hwmsg);
|
||||
EXPORT_TRACEPOINT_SYMBOL_GPL(devlink_hwerr);
|
||||
EXPORT_TRACEPOINT_SYMBOL_GPL(devlink_trap_report);
|
||||
|
||||
#define DEVLINK_PORT_FN_CAPS_VALID_MASK \
|
||||
(_BITUL(__DEVLINK_PORT_FN_ATTR_CAPS_MAX) - 1)
|
||||
|
||||
static const struct nla_policy devlink_function_nl_policy[DEVLINK_PORT_FUNCTION_ATTR_MAX + 1] = {
|
||||
[DEVLINK_PORT_FUNCTION_ATTR_HW_ADDR] = { .type = NLA_BINARY },
|
||||
[DEVLINK_PORT_FN_ATTR_STATE] =
|
||||
NLA_POLICY_RANGE(NLA_U8, DEVLINK_PORT_FN_STATE_INACTIVE,
|
||||
DEVLINK_PORT_FN_STATE_ACTIVE),
|
||||
[DEVLINK_PORT_FN_ATTR_CAPS] =
|
||||
NLA_POLICY_BITFIELD32(DEVLINK_PORT_FN_CAPS_VALID_MASK),
|
||||
};
|
||||
|
||||
static const struct nla_policy devlink_selftest_nl_policy[DEVLINK_ATTR_SELFTEST_ID_MAX + 1] = {
|
||||
@ -680,6 +685,60 @@ devlink_sb_tc_index_get_from_attrs(struct devlink_sb *devlink_sb,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void devlink_port_fn_cap_fill(struct nla_bitfield32 *caps,
|
||||
u32 cap, bool is_enable)
|
||||
{
|
||||
caps->selector |= cap;
|
||||
if (is_enable)
|
||||
caps->value |= cap;
|
||||
}
|
||||
|
||||
static int devlink_port_fn_roce_fill(const struct devlink_ops *ops,
|
||||
struct devlink_port *devlink_port,
|
||||
struct nla_bitfield32 *caps,
|
||||
struct netlink_ext_ack *extack)
|
||||
{
|
||||
bool is_enable;
|
||||
int err;
|
||||
|
||||
if (!ops->port_fn_roce_get)
|
||||
return 0;
|
||||
|
||||
err = ops->port_fn_roce_get(devlink_port, &is_enable, extack);
|
||||
if (err) {
|
||||
if (err == -EOPNOTSUPP)
|
||||
return 0;
|
||||
return err;
|
||||
}
|
||||
|
||||
devlink_port_fn_cap_fill(caps, DEVLINK_PORT_FN_CAP_ROCE, is_enable);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int devlink_port_fn_caps_fill(const struct devlink_ops *ops,
|
||||
struct devlink_port *devlink_port,
|
||||
struct sk_buff *msg,
|
||||
struct netlink_ext_ack *extack,
|
||||
bool *msg_updated)
|
||||
{
|
||||
struct nla_bitfield32 caps = {};
|
||||
int err;
|
||||
|
||||
err = devlink_port_fn_roce_fill(ops, devlink_port, &caps, extack);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
if (!caps.selector)
|
||||
return 0;
|
||||
err = nla_put_bitfield32(msg, DEVLINK_PORT_FN_ATTR_CAPS, caps.value,
|
||||
caps.selector);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
*msg_updated = true;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
devlink_sb_tc_index_get_from_info(struct devlink_sb *devlink_sb,
|
||||
struct genl_info *info,
|
||||
@ -1263,6 +1322,35 @@ static int devlink_port_fn_state_fill(const struct devlink_ops *ops,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
devlink_port_fn_roce_set(struct devlink_port *devlink_port, bool enable,
|
||||
struct netlink_ext_ack *extack)
|
||||
{
|
||||
const struct devlink_ops *ops = devlink_port->devlink->ops;
|
||||
|
||||
return ops->port_fn_roce_set(devlink_port, enable, extack);
|
||||
}
|
||||
|
||||
static int devlink_port_fn_caps_set(struct devlink_port *devlink_port,
|
||||
const struct nlattr *attr,
|
||||
struct netlink_ext_ack *extack)
|
||||
{
|
||||
struct nla_bitfield32 caps;
|
||||
u32 caps_value;
|
||||
int err;
|
||||
|
||||
caps = nla_get_bitfield32(attr);
|
||||
caps_value = caps.value & caps.selector;
|
||||
if (caps.selector & DEVLINK_PORT_FN_CAP_ROCE) {
|
||||
err = devlink_port_fn_roce_set(devlink_port,
|
||||
caps_value & DEVLINK_PORT_FN_CAP_ROCE,
|
||||
extack);
|
||||
if (err)
|
||||
return err;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
devlink_nl_port_function_attrs_put(struct sk_buff *msg, struct devlink_port *port,
|
||||
struct netlink_ext_ack *extack)
|
||||
@ -1281,6 +1369,10 @@ devlink_nl_port_function_attrs_put(struct sk_buff *msg, struct devlink_port *por
|
||||
&msg_updated);
|
||||
if (err)
|
||||
goto out;
|
||||
err = devlink_port_fn_caps_fill(ops, port, msg, extack,
|
||||
&msg_updated);
|
||||
if (err)
|
||||
goto out;
|
||||
err = devlink_port_fn_state_fill(ops, port, msg, extack, &msg_updated);
|
||||
out:
|
||||
if (err || !msg_updated)
|
||||
@ -1653,6 +1745,7 @@ static int devlink_port_function_validate(struct devlink_port *devlink_port,
|
||||
struct netlink_ext_ack *extack)
|
||||
{
|
||||
const struct devlink_ops *ops = devlink_port->devlink->ops;
|
||||
struct nlattr *attr;
|
||||
|
||||
if (tb[DEVLINK_PORT_FUNCTION_ATTR_HW_ADDR] &&
|
||||
!ops->port_function_hw_addr_set) {
|
||||
@ -1665,6 +1758,18 @@ static int devlink_port_function_validate(struct devlink_port *devlink_port,
|
||||
"Function does not support state setting");
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
attr = tb[DEVLINK_PORT_FN_ATTR_CAPS];
|
||||
if (attr) {
|
||||
struct nla_bitfield32 caps;
|
||||
|
||||
caps = nla_get_bitfield32(attr);
|
||||
if (caps.selector & DEVLINK_PORT_FN_CAP_ROCE &&
|
||||
!ops->port_fn_roce_set) {
|
||||
NL_SET_ERR_MSG_ATTR(extack, attr,
|
||||
"Port doesn't support RoCE function attribute");
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -1692,6 +1797,14 @@ static int devlink_port_function_set(struct devlink_port *port,
|
||||
if (err)
|
||||
return err;
|
||||
}
|
||||
|
||||
attr = tb[DEVLINK_PORT_FN_ATTR_CAPS];
|
||||
if (attr) {
|
||||
err = devlink_port_fn_caps_set(port, attr, extack);
|
||||
if (err)
|
||||
return err;
|
||||
}
|
||||
|
||||
/* Keep this as the last function attribute set, so that when
|
||||
* multiple port function attributes are set along with state,
|
||||
* Those can be applied first before activating the state.
|
||||
|
Loading…
x
Reference in New Issue
Block a user