Uma Krishnan 0df5bef739 scsi: cxlflash: Cancel scheduled workers before stopping AFU
When processing an AFU asynchronous interrupt, if the action results in an
operation that requires off level processing (a link reset for example),
the worker thread is scheduled. In the meantime a reset event (i.e.: EEH)
could unmap the AFU to recover. This results in an Oops when the worker
thread tries to access the AFU mapping.

[c000000f17e03b90] d000000007cd5978 cxlflash_worker_thread+0x268/0x550
[c000000f17e03c40] c00000000011883c process_one_work+0x1dc/0x680
[c000000f17e03ce0] c000000000118e80 worker_thread+0x1a0/0x520
[c000000f17e03d80] c000000000126174 kthread+0xf4/0x100
[c000000f17e03e30] c00000000000a47c ret_from_kernel_thread+0x5c/0xe0

In an effort to avoid this, a mapcount was introduced in
commit b45cdbaf9f7f ("cxlflash: Resolve oops in wait_port_offline")
but due to the race condition described above, this solution is incomplete.

In order to fully resolve this problem and to simplify things, this commit
removes the mapcount solution. Instead, the scheduled worker thread is
cancelled after interrupts have been disabled and prior to the mapping
being freed.

Fixes: b45cdbaf9f7f ("cxlflash: Resolve oops in wait_port_offline")
Signed-off-by: Uma Krishnan <ukrishn@linux.vnet.ibm.com>
Acked-by: Matthew R. Ochs <mrochs@linux.vnet.ibm.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
2017-01-11 22:38:15 -05:00

237 lines
6.2 KiB
C

/*
* CXL Flash Device Driver
*
* Written by: Manoj N. Kumar <manoj@linux.vnet.ibm.com>, IBM Corporation
* Matthew R. Ochs <mrochs@linux.vnet.ibm.com>, IBM Corporation
*
* Copyright (C) 2015 IBM Corporation
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
#ifndef _CXLFLASH_COMMON_H
#define _CXLFLASH_COMMON_H
#include <linux/list.h>
#include <linux/rwsem.h>
#include <linux/types.h>
#include <scsi/scsi.h>
#include <scsi/scsi_cmnd.h>
#include <scsi/scsi_device.h>
extern const struct file_operations cxlflash_cxl_fops;
#define MAX_CONTEXT CXLFLASH_MAX_CONTEXT /* num contexts per afu */
#define CXLFLASH_BLOCK_SIZE 4096 /* 4K blocks */
#define CXLFLASH_MAX_XFER_SIZE 16777216 /* 16MB transfer */
#define CXLFLASH_MAX_SECTORS (CXLFLASH_MAX_XFER_SIZE/512) /* SCSI wants
max_sectors
in units of
512 byte
sectors
*/
#define MAX_RHT_PER_CONTEXT (PAGE_SIZE / sizeof(struct sisl_rht_entry))
/* AFU command retry limit */
#define MC_RETRY_CNT 5 /* sufficient for SCSI check and
certain AFU errors */
/* Command management definitions */
#define CXLFLASH_NUM_CMDS (2 * CXLFLASH_MAX_CMDS) /* Must be a pow2 for
alignment and more
efficient array
index derivation
*/
#define CXLFLASH_MAX_CMDS 256
#define CXLFLASH_MAX_CMDS_PER_LUN CXLFLASH_MAX_CMDS
/* RRQ for master issued cmds */
#define NUM_RRQ_ENTRY CXLFLASH_MAX_CMDS
/* SQ for master issued cmds */
#define NUM_SQ_ENTRY CXLFLASH_MAX_CMDS
static inline void check_sizes(void)
{
BUILD_BUG_ON_NOT_POWER_OF_2(CXLFLASH_NUM_CMDS);
}
/* AFU defines a fixed size of 4K for command buffers (borrow 4K page define) */
#define CMD_BUFSIZE SIZE_4K
enum cxlflash_lr_state {
LINK_RESET_INVALID,
LINK_RESET_REQUIRED,
LINK_RESET_COMPLETE
};
enum cxlflash_init_state {
INIT_STATE_NONE,
INIT_STATE_PCI,
INIT_STATE_AFU,
INIT_STATE_SCSI
};
enum cxlflash_state {
STATE_NORMAL, /* Normal running state, everything good */
STATE_RESET, /* Reset state, trying to reset/recover */
STATE_FAILTERM /* Failed/terminating state, error out users/threads */
};
/*
* Each context has its own set of resource handles that is visible
* only from that context.
*/
struct cxlflash_cfg {
struct afu *afu;
struct cxl_context *mcctx;
struct pci_dev *dev;
struct pci_device_id *dev_id;
struct Scsi_Host *host;
ulong cxlflash_regs_pci;
struct work_struct work_q;
enum cxlflash_init_state init_state;
enum cxlflash_lr_state lr_state;
int lr_port;
atomic_t scan_host_needed;
struct cxl_afu *cxl_afu;
atomic_t recovery_threads;
struct mutex ctx_recovery_mutex;
struct mutex ctx_tbl_list_mutex;
struct rw_semaphore ioctl_rwsem;
struct ctx_info *ctx_tbl[MAX_CONTEXT];
struct list_head ctx_err_recovery; /* contexts w/ recovery pending */
struct file_operations cxl_fops;
/* Parameters that are LUN table related */
int last_lun_index[CXLFLASH_NUM_FC_PORTS];
int promote_lun_index;
struct list_head lluns; /* list of llun_info structs */
wait_queue_head_t tmf_waitq;
spinlock_t tmf_slock;
bool tmf_active;
wait_queue_head_t reset_waitq;
enum cxlflash_state state;
};
struct afu_cmd {
struct sisl_ioarcb rcb; /* IOARCB (cache line aligned) */
struct sisl_ioasa sa; /* IOASA must follow IOARCB */
struct afu *parent;
struct scsi_cmnd *scp;
struct completion cevent;
u8 cmd_tmf:1;
/* As per the SISLITE spec the IOARCB EA has to be 16-byte aligned.
* However for performance reasons the IOARCB/IOASA should be
* cache line aligned.
*/
} __aligned(cache_line_size());
static inline struct afu_cmd *sc_to_afuc(struct scsi_cmnd *sc)
{
return PTR_ALIGN(scsi_cmd_priv(sc), __alignof__(struct afu_cmd));
}
static inline struct afu_cmd *sc_to_afucz(struct scsi_cmnd *sc)
{
struct afu_cmd *afuc = sc_to_afuc(sc);
memset(afuc, 0, sizeof(*afuc));
return afuc;
}
struct afu {
/* Stuff requiring alignment go first. */
struct sisl_ioarcb sq[NUM_SQ_ENTRY]; /* 16K SQ */
u64 rrq_entry[NUM_RRQ_ENTRY]; /* 2K RRQ */
/* Beware of alignment till here. Preferably introduce new
* fields after this point
*/
int (*send_cmd)(struct afu *, struct afu_cmd *);
void (*context_reset)(struct afu_cmd *);
/* AFU HW */
struct cxl_ioctl_start_work work;
struct cxlflash_afu_map __iomem *afu_map; /* entire MMIO map */
struct sisl_host_map __iomem *host_map; /* MC host map */
struct sisl_ctrl_map __iomem *ctrl_map; /* MC control map */
ctx_hndl_t ctx_hndl; /* master's context handle */
atomic_t hsq_credits;
spinlock_t hsq_slock;
struct sisl_ioarcb *hsq_start;
struct sisl_ioarcb *hsq_end;
struct sisl_ioarcb *hsq_curr;
u64 *hrrq_start;
u64 *hrrq_end;
u64 *hrrq_curr;
bool toggle;
atomic_t cmds_active; /* Number of currently active AFU commands */
s64 room;
spinlock_t rrin_slock; /* Lock to rrin queuing and cmd_room updates */
u64 hb;
u32 internal_lun; /* User-desired LUN mode for this AFU */
char version[16];
u64 interface_version;
struct cxlflash_cfg *parent; /* Pointer back to parent cxlflash_cfg */
};
static inline bool afu_is_cmd_mode(struct afu *afu, u64 cmd_mode)
{
u64 afu_cap = afu->interface_version >> SISL_INTVER_CAP_SHIFT;
return afu_cap & cmd_mode;
}
static inline bool afu_is_sq_cmd_mode(struct afu *afu)
{
return afu_is_cmd_mode(afu, SISL_INTVER_CAP_SQ_CMD_MODE);
}
static inline bool afu_is_ioarrin_cmd_mode(struct afu *afu)
{
return afu_is_cmd_mode(afu, SISL_INTVER_CAP_IOARRIN_CMD_MODE);
}
static inline u64 lun_to_lunid(u64 lun)
{
__be64 lun_id;
int_to_scsilun(lun, (struct scsi_lun *)&lun_id);
return be64_to_cpu(lun_id);
}
int cxlflash_afu_sync(struct afu *, ctx_hndl_t, res_hndl_t, u8);
void cxlflash_list_init(void);
void cxlflash_term_global_luns(void);
void cxlflash_free_errpage(void);
int cxlflash_ioctl(struct scsi_device *, int, void __user *);
void cxlflash_stop_term_user_contexts(struct cxlflash_cfg *);
int cxlflash_mark_contexts_error(struct cxlflash_cfg *);
void cxlflash_term_local_luns(struct cxlflash_cfg *);
void cxlflash_restore_luntable(struct cxlflash_cfg *);
#endif /* ifndef _CXLFLASH_COMMON_H */