60e6627f12
In general, accessing userspace memory beyond the length of the supplied buffer in VFS read/write handlers can lead to both kernel memory corruption (via kernel_read()/kernel_write(), which can e.g. be triggered via sys_splice()) and privilege escalation inside userspace. In this case, the affected files are in debugfs (and should therefore only be accessible to root), and the read handlers check that *pos is zero (meaning that at least sys_splice() can't trigger kernel memory corruption). Because of the root requirement, this is not a security fix, but rather a cleanup. For the read handlers, fix it by using simple_read_from_buffer() instead of custom logic. Add min() calls to the write handlers. Fixes: 4a2da0b8c078 ("IB/mlx5: Add debug control parameters for congestion control") Fixes: e126ba97dba9 ("mlx5: Add driver for Mellanox Connect-IB adapters") Signed-off-by: Jann Horn <jannh@google.com> Reviewed-by: Leon Romanovsky <leonro@mellanox.com> Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
450 lines
13 KiB
C
450 lines
13 KiB
C
/*
|
|
* Copyright (c) 2013-2017, Mellanox Technologies. All rights reserved.
|
|
*
|
|
* This software is available to you under a choice of one of two
|
|
* licenses. You may choose to be licensed under the terms of the GNU
|
|
* General Public License (GPL) Version 2, available from the file
|
|
* COPYING in the main directory of this source tree, or the
|
|
* OpenIB.org BSD license below:
|
|
*
|
|
* Redistribution and use in source and binary forms, with or
|
|
* without modification, are permitted provided that the following
|
|
* conditions are met:
|
|
*
|
|
* - Redistributions of source code must retain the above
|
|
* copyright notice, this list of conditions and the following
|
|
* disclaimer.
|
|
*
|
|
* - Redistributions in binary form must reproduce the above
|
|
* copyright notice, this list of conditions and the following
|
|
* disclaimer in the documentation and/or other materials
|
|
* provided with the distribution.
|
|
*
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
|
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
|
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
|
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
|
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
* SOFTWARE.
|
|
*/
|
|
|
|
#include <linux/debugfs.h>
|
|
|
|
#include "mlx5_ib.h"
|
|
#include "cmd.h"
|
|
|
|
enum mlx5_ib_cong_node_type {
|
|
MLX5_IB_RROCE_ECN_RP = 1,
|
|
MLX5_IB_RROCE_ECN_NP = 2,
|
|
};
|
|
|
|
static const char * const mlx5_ib_dbg_cc_name[] = {
|
|
"rp_clamp_tgt_rate",
|
|
"rp_clamp_tgt_rate_ati",
|
|
"rp_time_reset",
|
|
"rp_byte_reset",
|
|
"rp_threshold",
|
|
"rp_ai_rate",
|
|
"rp_hai_rate",
|
|
"rp_min_dec_fac",
|
|
"rp_min_rate",
|
|
"rp_rate_to_set_on_first_cnp",
|
|
"rp_dce_tcp_g",
|
|
"rp_dce_tcp_rtt",
|
|
"rp_rate_reduce_monitor_period",
|
|
"rp_initial_alpha_value",
|
|
"rp_gd",
|
|
"np_cnp_dscp",
|
|
"np_cnp_prio_mode",
|
|
"np_cnp_prio",
|
|
};
|
|
|
|
#define MLX5_IB_RP_CLAMP_TGT_RATE_ATTR BIT(1)
|
|
#define MLX5_IB_RP_CLAMP_TGT_RATE_ATI_ATTR BIT(2)
|
|
#define MLX5_IB_RP_TIME_RESET_ATTR BIT(3)
|
|
#define MLX5_IB_RP_BYTE_RESET_ATTR BIT(4)
|
|
#define MLX5_IB_RP_THRESHOLD_ATTR BIT(5)
|
|
#define MLX5_IB_RP_AI_RATE_ATTR BIT(7)
|
|
#define MLX5_IB_RP_HAI_RATE_ATTR BIT(8)
|
|
#define MLX5_IB_RP_MIN_DEC_FAC_ATTR BIT(9)
|
|
#define MLX5_IB_RP_MIN_RATE_ATTR BIT(10)
|
|
#define MLX5_IB_RP_RATE_TO_SET_ON_FIRST_CNP_ATTR BIT(11)
|
|
#define MLX5_IB_RP_DCE_TCP_G_ATTR BIT(12)
|
|
#define MLX5_IB_RP_DCE_TCP_RTT_ATTR BIT(13)
|
|
#define MLX5_IB_RP_RATE_REDUCE_MONITOR_PERIOD_ATTR BIT(14)
|
|
#define MLX5_IB_RP_INITIAL_ALPHA_VALUE_ATTR BIT(15)
|
|
#define MLX5_IB_RP_GD_ATTR BIT(16)
|
|
|
|
#define MLX5_IB_NP_CNP_DSCP_ATTR BIT(3)
|
|
#define MLX5_IB_NP_CNP_PRIO_MODE_ATTR BIT(4)
|
|
|
|
static enum mlx5_ib_cong_node_type
|
|
mlx5_ib_param_to_node(enum mlx5_ib_dbg_cc_types param_offset)
|
|
{
|
|
if (param_offset >= MLX5_IB_DBG_CC_RP_CLAMP_TGT_RATE &&
|
|
param_offset <= MLX5_IB_DBG_CC_RP_GD)
|
|
return MLX5_IB_RROCE_ECN_RP;
|
|
else
|
|
return MLX5_IB_RROCE_ECN_NP;
|
|
}
|
|
|
|
static u32 mlx5_get_cc_param_val(void *field, int offset)
|
|
{
|
|
switch (offset) {
|
|
case MLX5_IB_DBG_CC_RP_CLAMP_TGT_RATE:
|
|
return MLX5_GET(cong_control_r_roce_ecn_rp, field,
|
|
clamp_tgt_rate);
|
|
case MLX5_IB_DBG_CC_RP_CLAMP_TGT_RATE_ATI:
|
|
return MLX5_GET(cong_control_r_roce_ecn_rp, field,
|
|
clamp_tgt_rate_after_time_inc);
|
|
case MLX5_IB_DBG_CC_RP_TIME_RESET:
|
|
return MLX5_GET(cong_control_r_roce_ecn_rp, field,
|
|
rpg_time_reset);
|
|
case MLX5_IB_DBG_CC_RP_BYTE_RESET:
|
|
return MLX5_GET(cong_control_r_roce_ecn_rp, field,
|
|
rpg_byte_reset);
|
|
case MLX5_IB_DBG_CC_RP_THRESHOLD:
|
|
return MLX5_GET(cong_control_r_roce_ecn_rp, field,
|
|
rpg_threshold);
|
|
case MLX5_IB_DBG_CC_RP_AI_RATE:
|
|
return MLX5_GET(cong_control_r_roce_ecn_rp, field,
|
|
rpg_ai_rate);
|
|
case MLX5_IB_DBG_CC_RP_HAI_RATE:
|
|
return MLX5_GET(cong_control_r_roce_ecn_rp, field,
|
|
rpg_hai_rate);
|
|
case MLX5_IB_DBG_CC_RP_MIN_DEC_FAC:
|
|
return MLX5_GET(cong_control_r_roce_ecn_rp, field,
|
|
rpg_min_dec_fac);
|
|
case MLX5_IB_DBG_CC_RP_MIN_RATE:
|
|
return MLX5_GET(cong_control_r_roce_ecn_rp, field,
|
|
rpg_min_rate);
|
|
case MLX5_IB_DBG_CC_RP_RATE_TO_SET_ON_FIRST_CNP:
|
|
return MLX5_GET(cong_control_r_roce_ecn_rp, field,
|
|
rate_to_set_on_first_cnp);
|
|
case MLX5_IB_DBG_CC_RP_DCE_TCP_G:
|
|
return MLX5_GET(cong_control_r_roce_ecn_rp, field,
|
|
dce_tcp_g);
|
|
case MLX5_IB_DBG_CC_RP_DCE_TCP_RTT:
|
|
return MLX5_GET(cong_control_r_roce_ecn_rp, field,
|
|
dce_tcp_rtt);
|
|
case MLX5_IB_DBG_CC_RP_RATE_REDUCE_MONITOR_PERIOD:
|
|
return MLX5_GET(cong_control_r_roce_ecn_rp, field,
|
|
rate_reduce_monitor_period);
|
|
case MLX5_IB_DBG_CC_RP_INITIAL_ALPHA_VALUE:
|
|
return MLX5_GET(cong_control_r_roce_ecn_rp, field,
|
|
initial_alpha_value);
|
|
case MLX5_IB_DBG_CC_RP_GD:
|
|
return MLX5_GET(cong_control_r_roce_ecn_rp, field,
|
|
rpg_gd);
|
|
case MLX5_IB_DBG_CC_NP_CNP_DSCP:
|
|
return MLX5_GET(cong_control_r_roce_ecn_np, field,
|
|
cnp_dscp);
|
|
case MLX5_IB_DBG_CC_NP_CNP_PRIO_MODE:
|
|
return MLX5_GET(cong_control_r_roce_ecn_np, field,
|
|
cnp_prio_mode);
|
|
case MLX5_IB_DBG_CC_NP_CNP_PRIO:
|
|
return MLX5_GET(cong_control_r_roce_ecn_np, field,
|
|
cnp_802p_prio);
|
|
default:
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
static void mlx5_ib_set_cc_param_mask_val(void *field, int offset,
|
|
u32 var, u32 *attr_mask)
|
|
{
|
|
switch (offset) {
|
|
case MLX5_IB_DBG_CC_RP_CLAMP_TGT_RATE:
|
|
*attr_mask |= MLX5_IB_RP_CLAMP_TGT_RATE_ATTR;
|
|
MLX5_SET(cong_control_r_roce_ecn_rp, field,
|
|
clamp_tgt_rate, var);
|
|
break;
|
|
case MLX5_IB_DBG_CC_RP_CLAMP_TGT_RATE_ATI:
|
|
*attr_mask |= MLX5_IB_RP_CLAMP_TGT_RATE_ATI_ATTR;
|
|
MLX5_SET(cong_control_r_roce_ecn_rp, field,
|
|
clamp_tgt_rate_after_time_inc, var);
|
|
break;
|
|
case MLX5_IB_DBG_CC_RP_TIME_RESET:
|
|
*attr_mask |= MLX5_IB_RP_TIME_RESET_ATTR;
|
|
MLX5_SET(cong_control_r_roce_ecn_rp, field,
|
|
rpg_time_reset, var);
|
|
break;
|
|
case MLX5_IB_DBG_CC_RP_BYTE_RESET:
|
|
*attr_mask |= MLX5_IB_RP_BYTE_RESET_ATTR;
|
|
MLX5_SET(cong_control_r_roce_ecn_rp, field,
|
|
rpg_byte_reset, var);
|
|
break;
|
|
case MLX5_IB_DBG_CC_RP_THRESHOLD:
|
|
*attr_mask |= MLX5_IB_RP_THRESHOLD_ATTR;
|
|
MLX5_SET(cong_control_r_roce_ecn_rp, field,
|
|
rpg_threshold, var);
|
|
break;
|
|
case MLX5_IB_DBG_CC_RP_AI_RATE:
|
|
*attr_mask |= MLX5_IB_RP_AI_RATE_ATTR;
|
|
MLX5_SET(cong_control_r_roce_ecn_rp, field,
|
|
rpg_ai_rate, var);
|
|
break;
|
|
case MLX5_IB_DBG_CC_RP_HAI_RATE:
|
|
*attr_mask |= MLX5_IB_RP_HAI_RATE_ATTR;
|
|
MLX5_SET(cong_control_r_roce_ecn_rp, field,
|
|
rpg_hai_rate, var);
|
|
break;
|
|
case MLX5_IB_DBG_CC_RP_MIN_DEC_FAC:
|
|
*attr_mask |= MLX5_IB_RP_MIN_DEC_FAC_ATTR;
|
|
MLX5_SET(cong_control_r_roce_ecn_rp, field,
|
|
rpg_min_dec_fac, var);
|
|
break;
|
|
case MLX5_IB_DBG_CC_RP_MIN_RATE:
|
|
*attr_mask |= MLX5_IB_RP_MIN_RATE_ATTR;
|
|
MLX5_SET(cong_control_r_roce_ecn_rp, field,
|
|
rpg_min_rate, var);
|
|
break;
|
|
case MLX5_IB_DBG_CC_RP_RATE_TO_SET_ON_FIRST_CNP:
|
|
*attr_mask |= MLX5_IB_RP_RATE_TO_SET_ON_FIRST_CNP_ATTR;
|
|
MLX5_SET(cong_control_r_roce_ecn_rp, field,
|
|
rate_to_set_on_first_cnp, var);
|
|
break;
|
|
case MLX5_IB_DBG_CC_RP_DCE_TCP_G:
|
|
*attr_mask |= MLX5_IB_RP_DCE_TCP_G_ATTR;
|
|
MLX5_SET(cong_control_r_roce_ecn_rp, field,
|
|
dce_tcp_g, var);
|
|
break;
|
|
case MLX5_IB_DBG_CC_RP_DCE_TCP_RTT:
|
|
*attr_mask |= MLX5_IB_RP_DCE_TCP_RTT_ATTR;
|
|
MLX5_SET(cong_control_r_roce_ecn_rp, field,
|
|
dce_tcp_rtt, var);
|
|
break;
|
|
case MLX5_IB_DBG_CC_RP_RATE_REDUCE_MONITOR_PERIOD:
|
|
*attr_mask |= MLX5_IB_RP_RATE_REDUCE_MONITOR_PERIOD_ATTR;
|
|
MLX5_SET(cong_control_r_roce_ecn_rp, field,
|
|
rate_reduce_monitor_period, var);
|
|
break;
|
|
case MLX5_IB_DBG_CC_RP_INITIAL_ALPHA_VALUE:
|
|
*attr_mask |= MLX5_IB_RP_INITIAL_ALPHA_VALUE_ATTR;
|
|
MLX5_SET(cong_control_r_roce_ecn_rp, field,
|
|
initial_alpha_value, var);
|
|
break;
|
|
case MLX5_IB_DBG_CC_RP_GD:
|
|
*attr_mask |= MLX5_IB_RP_GD_ATTR;
|
|
MLX5_SET(cong_control_r_roce_ecn_rp, field,
|
|
rpg_gd, var);
|
|
break;
|
|
case MLX5_IB_DBG_CC_NP_CNP_DSCP:
|
|
*attr_mask |= MLX5_IB_NP_CNP_DSCP_ATTR;
|
|
MLX5_SET(cong_control_r_roce_ecn_np, field, cnp_dscp, var);
|
|
break;
|
|
case MLX5_IB_DBG_CC_NP_CNP_PRIO_MODE:
|
|
*attr_mask |= MLX5_IB_NP_CNP_PRIO_MODE_ATTR;
|
|
MLX5_SET(cong_control_r_roce_ecn_np, field, cnp_prio_mode, var);
|
|
break;
|
|
case MLX5_IB_DBG_CC_NP_CNP_PRIO:
|
|
*attr_mask |= MLX5_IB_NP_CNP_PRIO_MODE_ATTR;
|
|
MLX5_SET(cong_control_r_roce_ecn_np, field, cnp_prio_mode, 0);
|
|
MLX5_SET(cong_control_r_roce_ecn_np, field, cnp_802p_prio, var);
|
|
break;
|
|
}
|
|
}
|
|
|
|
static int mlx5_ib_get_cc_params(struct mlx5_ib_dev *dev, u8 port_num,
|
|
int offset, u32 *var)
|
|
{
|
|
int outlen = MLX5_ST_SZ_BYTES(query_cong_params_out);
|
|
void *out;
|
|
void *field;
|
|
int err;
|
|
enum mlx5_ib_cong_node_type node;
|
|
struct mlx5_core_dev *mdev;
|
|
|
|
/* Takes a 1-based port number */
|
|
mdev = mlx5_ib_get_native_port_mdev(dev, port_num + 1, NULL);
|
|
if (!mdev)
|
|
return -ENODEV;
|
|
|
|
out = kvzalloc(outlen, GFP_KERNEL);
|
|
if (!out) {
|
|
err = -ENOMEM;
|
|
goto alloc_err;
|
|
}
|
|
|
|
node = mlx5_ib_param_to_node(offset);
|
|
|
|
err = mlx5_cmd_query_cong_params(mdev, node, out, outlen);
|
|
if (err)
|
|
goto free;
|
|
|
|
field = MLX5_ADDR_OF(query_cong_params_out, out, congestion_parameters);
|
|
*var = mlx5_get_cc_param_val(field, offset);
|
|
|
|
free:
|
|
kvfree(out);
|
|
alloc_err:
|
|
mlx5_ib_put_native_port_mdev(dev, port_num + 1);
|
|
return err;
|
|
}
|
|
|
|
static int mlx5_ib_set_cc_params(struct mlx5_ib_dev *dev, u8 port_num,
|
|
int offset, u32 var)
|
|
{
|
|
int inlen = MLX5_ST_SZ_BYTES(modify_cong_params_in);
|
|
void *in;
|
|
void *field;
|
|
enum mlx5_ib_cong_node_type node;
|
|
struct mlx5_core_dev *mdev;
|
|
u32 attr_mask = 0;
|
|
int err;
|
|
|
|
/* Takes a 1-based port number */
|
|
mdev = mlx5_ib_get_native_port_mdev(dev, port_num + 1, NULL);
|
|
if (!mdev)
|
|
return -ENODEV;
|
|
|
|
in = kvzalloc(inlen, GFP_KERNEL);
|
|
if (!in) {
|
|
err = -ENOMEM;
|
|
goto alloc_err;
|
|
}
|
|
|
|
MLX5_SET(modify_cong_params_in, in, opcode,
|
|
MLX5_CMD_OP_MODIFY_CONG_PARAMS);
|
|
|
|
node = mlx5_ib_param_to_node(offset);
|
|
MLX5_SET(modify_cong_params_in, in, cong_protocol, node);
|
|
|
|
field = MLX5_ADDR_OF(modify_cong_params_in, in, congestion_parameters);
|
|
mlx5_ib_set_cc_param_mask_val(field, offset, var, &attr_mask);
|
|
|
|
field = MLX5_ADDR_OF(modify_cong_params_in, in, field_select);
|
|
MLX5_SET(field_select_r_roce_rp, field, field_select_r_roce_rp,
|
|
attr_mask);
|
|
|
|
err = mlx5_cmd_modify_cong_params(mdev, in, inlen);
|
|
kvfree(in);
|
|
alloc_err:
|
|
mlx5_ib_put_native_port_mdev(dev, port_num + 1);
|
|
return err;
|
|
}
|
|
|
|
static ssize_t set_param(struct file *filp, const char __user *buf,
|
|
size_t count, loff_t *pos)
|
|
{
|
|
struct mlx5_ib_dbg_param *param = filp->private_data;
|
|
int offset = param->offset;
|
|
char lbuf[11] = { };
|
|
u32 var;
|
|
int ret;
|
|
|
|
if (count > sizeof(lbuf))
|
|
return -EINVAL;
|
|
|
|
if (copy_from_user(lbuf, buf, count))
|
|
return -EFAULT;
|
|
|
|
lbuf[sizeof(lbuf) - 1] = '\0';
|
|
|
|
if (kstrtou32(lbuf, 0, &var))
|
|
return -EINVAL;
|
|
|
|
ret = mlx5_ib_set_cc_params(param->dev, param->port_num, offset, var);
|
|
return ret ? ret : count;
|
|
}
|
|
|
|
static ssize_t get_param(struct file *filp, char __user *buf, size_t count,
|
|
loff_t *pos)
|
|
{
|
|
struct mlx5_ib_dbg_param *param = filp->private_data;
|
|
int offset = param->offset;
|
|
u32 var = 0;
|
|
int ret;
|
|
char lbuf[11];
|
|
|
|
ret = mlx5_ib_get_cc_params(param->dev, param->port_num, offset, &var);
|
|
if (ret)
|
|
return ret;
|
|
|
|
ret = snprintf(lbuf, sizeof(lbuf), "%d\n", var);
|
|
if (ret < 0)
|
|
return ret;
|
|
|
|
return simple_read_from_buffer(buf, count, pos, lbuf, ret);
|
|
}
|
|
|
|
static const struct file_operations dbg_cc_fops = {
|
|
.owner = THIS_MODULE,
|
|
.open = simple_open,
|
|
.write = set_param,
|
|
.read = get_param,
|
|
};
|
|
|
|
void mlx5_ib_cleanup_cong_debugfs(struct mlx5_ib_dev *dev, u8 port_num)
|
|
{
|
|
if (!mlx5_debugfs_root ||
|
|
!dev->port[port_num].dbg_cc_params ||
|
|
!dev->port[port_num].dbg_cc_params->root)
|
|
return;
|
|
|
|
debugfs_remove_recursive(dev->port[port_num].dbg_cc_params->root);
|
|
kfree(dev->port[port_num].dbg_cc_params);
|
|
dev->port[port_num].dbg_cc_params = NULL;
|
|
}
|
|
|
|
int mlx5_ib_init_cong_debugfs(struct mlx5_ib_dev *dev, u8 port_num)
|
|
{
|
|
struct mlx5_ib_dbg_cc_params *dbg_cc_params;
|
|
struct mlx5_core_dev *mdev;
|
|
int i;
|
|
|
|
if (!mlx5_debugfs_root)
|
|
goto out;
|
|
|
|
/* Takes a 1-based port number */
|
|
mdev = mlx5_ib_get_native_port_mdev(dev, port_num + 1, NULL);
|
|
if (!mdev)
|
|
goto out;
|
|
|
|
if (!MLX5_CAP_GEN(mdev, cc_query_allowed) ||
|
|
!MLX5_CAP_GEN(mdev, cc_modify_allowed))
|
|
goto put_mdev;
|
|
|
|
dbg_cc_params = kzalloc(sizeof(*dbg_cc_params), GFP_KERNEL);
|
|
if (!dbg_cc_params)
|
|
goto err;
|
|
|
|
dev->port[port_num].dbg_cc_params = dbg_cc_params;
|
|
|
|
dbg_cc_params->root = debugfs_create_dir("cc_params",
|
|
mdev->priv.dbg_root);
|
|
if (!dbg_cc_params->root)
|
|
goto err;
|
|
|
|
for (i = 0; i < MLX5_IB_DBG_CC_MAX; i++) {
|
|
dbg_cc_params->params[i].offset = i;
|
|
dbg_cc_params->params[i].dev = dev;
|
|
dbg_cc_params->params[i].port_num = port_num;
|
|
dbg_cc_params->params[i].dentry =
|
|
debugfs_create_file(mlx5_ib_dbg_cc_name[i],
|
|
0600, dbg_cc_params->root,
|
|
&dbg_cc_params->params[i],
|
|
&dbg_cc_fops);
|
|
if (!dbg_cc_params->params[i].dentry)
|
|
goto err;
|
|
}
|
|
|
|
put_mdev:
|
|
mlx5_ib_put_native_port_mdev(dev, port_num + 1);
|
|
out:
|
|
return 0;
|
|
|
|
err:
|
|
mlx5_ib_warn(dev, "cong debugfs failure\n");
|
|
mlx5_ib_cleanup_cong_debugfs(dev, port_num);
|
|
mlx5_ib_put_native_port_mdev(dev, port_num + 1);
|
|
|
|
/*
|
|
* We don't want to fail driver if debugfs failed to initialize,
|
|
* so we are not forwarding error to the user.
|
|
*/
|
|
return 0;
|
|
}
|