remove the lightnvm subsystem

Lightnvm supports the OCSSD 1.x and 2.0 specs which were early attempts
to produce Open Channel SSDs and never made it into the NVMe spec
proper.  They have since been superceeded by NVMe enhancements such
as ZNS support.  Remove the support per the deprecation schedule.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/r/20210812132308.38486-1-hch@lst.de
Reviewed-by: Matias Bjørling <mb@lightnvm.io>
Reviewed-by: Javier González <javier@javigon.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
This commit is contained in:
Christoph Hellwig 2021-08-12 15:23:08 +02:00 committed by Jens Axboe
parent 6e4df4c648
commit 9ea9b9c483
30 changed files with 1 additions and 13678 deletions

View File

@ -85,7 +85,6 @@ available subsections can be seen below.
io-mapping
io_ordering
generic-counter
lightnvm-pblk
memory-devices/index
men-chameleon-bus
ntb

View File

@ -1,21 +0,0 @@
pblk: Physical Block Device Target
==================================
pblk implements a fully associative, host-based FTL that exposes a traditional
block I/O interface. Its primary responsibilities are:
- Map logical addresses onto physical addresses (4KB granularity) in a
logical-to-physical (L2P) table.
- Maintain the integrity and consistency of the L2P table as well as its
recovery from normal tear down and power outage.
- Deal with controller- and media-specific constrains.
- Handle I/O errors.
- Implement garbage collection.
- Maintain consistency across the I/O stack during synchronization points.
For more information please refer to:
http://lightnvm.io
which maintains updated FAQs, manual pages, technical documentation, tools,
contacts, etc.

View File

@ -160,7 +160,6 @@ Code Seq# Include File Comments
'K' all linux/kd.h
'L' 00-1F linux/loop.h conflict!
'L' 10-1F drivers/scsi/mpt3sas/mpt3sas_ctl.h conflict!
'L' 20-2F linux/lightnvm.h
'L' E0-FF linux/ppdd.h encrypted disk device driver
<http://linux01.gwdg.de/~alatham/ppdd.html>
'M' all linux/soundcard.h conflict!

View File

@ -10609,15 +10609,6 @@ F: LICENSES/
F: scripts/spdxcheck-test.sh
F: scripts/spdxcheck.py
LIGHTNVM PLATFORM SUPPORT
M: Matias Bjorling <mb@lightnvm.io>
L: linux-block@vger.kernel.org
S: Maintained
W: http://github/OpenChannelSSD
F: drivers/lightnvm/
F: include/linux/lightnvm.h
F: include/uapi/linux/lightnvm.h
LINEAR RANGES HELPERS
M: Mark Brown <broonie@kernel.org>
R: Matti Vaittinen <matti.vaittinen@fi.rohmeurope.com>

View File

@ -51,8 +51,6 @@ source "drivers/net/Kconfig"
source "drivers/isdn/Kconfig"
source "drivers/lightnvm/Kconfig"
# input before char - char/joystick depends on it. As does USB.
source "drivers/input/Kconfig"

View File

@ -70,7 +70,6 @@ obj-$(CONFIG_FB_I810) += video/fbdev/i810/
obj-$(CONFIG_FB_INTEL) += video/fbdev/intelfb/
obj-$(CONFIG_PARPORT) += parport/
obj-$(CONFIG_NVM) += lightnvm/
obj-y += base/ block/ misc/ mfd/ nfc/
obj-$(CONFIG_LIBNVDIMM) += nvdimm/
obj-$(CONFIG_DAX) += dax/

View File

@ -1,44 +0,0 @@
# SPDX-License-Identifier: GPL-2.0-only
#
# Open-Channel SSD NVM configuration
#
menuconfig NVM
bool "Open-Channel SSD target support (DEPRECATED)"
depends on BLOCK
help
Say Y here to get to enable Open-channel SSDs.
Open-Channel SSDs implement a set of extension to SSDs, that
exposes direct access to the underlying non-volatile memory.
If you say N, all options in this submenu will be skipped and disabled
only do this if you know what you are doing.
This code is deprecated and will be removed in Linux 5.15.
if NVM
config NVM_PBLK
tristate "Physical Block Device Open-Channel SSD target"
select CRC32
help
Allows an open-channel SSD to be exposed as a block device to the
host. The target assumes the device exposes raw flash and must be
explicitly managed by the host.
Please note the disk format is considered EXPERIMENTAL for now.
if NVM_PBLK
config NVM_PBLK_DEBUG
bool "PBlk Debug Support"
default n
help
Enables debug support for pblk. This includes extra checks, more
vocal error messages, and extra tracking fields in the pblk sysfs
entries.
endif # NVM_PBLK_DEBUG
endif # NVM

View File

@ -1,11 +0,0 @@
# SPDX-License-Identifier: GPL-2.0
#
# Makefile for Open-Channel SSDs.
#
obj-$(CONFIG_NVM) := core.o
obj-$(CONFIG_NVM_PBLK) += pblk.o
pblk-y := pblk-init.o pblk-core.o pblk-rb.o \
pblk-write.o pblk-cache.o pblk-read.o \
pblk-gc.o pblk-recovery.o pblk-map.o \
pblk-rl.o pblk-sysfs.o

File diff suppressed because it is too large Load Diff

View File

@ -1,137 +0,0 @@
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (C) 2016 CNEX Labs
* Initial release: Javier Gonzalez <javier@cnexlabs.com>
* Matias Bjorling <matias@cnexlabs.com>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License version
* 2 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* pblk-cache.c - pblk's write cache
*/
#include "pblk.h"
void pblk_write_to_cache(struct pblk *pblk, struct bio *bio,
unsigned long flags)
{
struct pblk_w_ctx w_ctx;
sector_t lba = pblk_get_lba(bio);
unsigned long start_time;
unsigned int bpos, pos;
int nr_entries = pblk_get_secs(bio);
int i, ret;
start_time = bio_start_io_acct(bio);
/* Update the write buffer head (mem) with the entries that we can
* write. The write in itself cannot fail, so there is no need to
* rollback from here on.
*/
retry:
ret = pblk_rb_may_write_user(&pblk->rwb, bio, nr_entries, &bpos);
switch (ret) {
case NVM_IO_REQUEUE:
io_schedule();
goto retry;
case NVM_IO_ERR:
pblk_pipeline_stop(pblk);
bio_io_error(bio);
goto out;
}
pblk_ppa_set_empty(&w_ctx.ppa);
w_ctx.flags = flags;
if (bio->bi_opf & REQ_PREFLUSH) {
w_ctx.flags |= PBLK_FLUSH_ENTRY;
pblk_write_kick(pblk);
}
if (unlikely(!bio_has_data(bio)))
goto out;
for (i = 0; i < nr_entries; i++) {
void *data = bio_data(bio);
w_ctx.lba = lba + i;
pos = pblk_rb_wrap_pos(&pblk->rwb, bpos + i);
pblk_rb_write_entry_user(&pblk->rwb, data, w_ctx, pos);
bio_advance(bio, PBLK_EXPOSED_PAGE_SIZE);
}
atomic64_add(nr_entries, &pblk->user_wa);
#ifdef CONFIG_NVM_PBLK_DEBUG
atomic_long_add(nr_entries, &pblk->inflight_writes);
atomic_long_add(nr_entries, &pblk->req_writes);
#endif
pblk_rl_inserted(&pblk->rl, nr_entries);
out:
bio_end_io_acct(bio, start_time);
pblk_write_should_kick(pblk);
if (ret == NVM_IO_DONE)
bio_endio(bio);
}
/*
* On GC the incoming lbas are not necessarily sequential. Also, some of the
* lbas might not be valid entries, which are marked as empty by the GC thread
*/
int pblk_write_gc_to_cache(struct pblk *pblk, struct pblk_gc_rq *gc_rq)
{
struct pblk_w_ctx w_ctx;
unsigned int bpos, pos;
void *data = gc_rq->data;
int i, valid_entries;
/* Update the write buffer head (mem) with the entries that we can
* write. The write in itself cannot fail, so there is no need to
* rollback from here on.
*/
retry:
if (!pblk_rb_may_write_gc(&pblk->rwb, gc_rq->secs_to_gc, &bpos)) {
io_schedule();
goto retry;
}
w_ctx.flags = PBLK_IOTYPE_GC;
pblk_ppa_set_empty(&w_ctx.ppa);
for (i = 0, valid_entries = 0; i < gc_rq->nr_secs; i++) {
if (gc_rq->lba_list[i] == ADDR_EMPTY)
continue;
w_ctx.lba = gc_rq->lba_list[i];
pos = pblk_rb_wrap_pos(&pblk->rwb, bpos + valid_entries);
pblk_rb_write_entry_gc(&pblk->rwb, data, w_ctx, gc_rq->line,
gc_rq->paddr_list[i], pos);
data += PBLK_EXPOSED_PAGE_SIZE;
valid_entries++;
}
WARN_ONCE(gc_rq->secs_to_gc != valid_entries,
"pblk: inconsistent GC write\n");
atomic64_add(valid_entries, &pblk->gc_wa);
#ifdef CONFIG_NVM_PBLK_DEBUG
atomic_long_add(valid_entries, &pblk->inflight_writes);
atomic_long_add(valid_entries, &pblk->recov_gc_writes);
#endif
pblk_write_should_kick(pblk);
return NVM_IO_OK;
}

File diff suppressed because it is too large Load Diff

View File

@ -1,726 +0,0 @@
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (C) 2016 CNEX Labs
* Initial release: Javier Gonzalez <javier@cnexlabs.com>
* Matias Bjorling <matias@cnexlabs.com>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License version
* 2 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* pblk-gc.c - pblk's garbage collector
*/
#include "pblk.h"
#include "pblk-trace.h"
#include <linux/delay.h>
static void pblk_gc_free_gc_rq(struct pblk_gc_rq *gc_rq)
{
vfree(gc_rq->data);
kfree(gc_rq);
}
static int pblk_gc_write(struct pblk *pblk)
{
struct pblk_gc *gc = &pblk->gc;
struct pblk_gc_rq *gc_rq, *tgc_rq;
LIST_HEAD(w_list);
spin_lock(&gc->w_lock);
if (list_empty(&gc->w_list)) {
spin_unlock(&gc->w_lock);
return 1;
}
list_cut_position(&w_list, &gc->w_list, gc->w_list.prev);
gc->w_entries = 0;
spin_unlock(&gc->w_lock);
list_for_each_entry_safe(gc_rq, tgc_rq, &w_list, list) {
pblk_write_gc_to_cache(pblk, gc_rq);
list_del(&gc_rq->list);
kref_put(&gc_rq->line->ref, pblk_line_put);
pblk_gc_free_gc_rq(gc_rq);
}
return 0;
}
static void pblk_gc_writer_kick(struct pblk_gc *gc)
{
wake_up_process(gc->gc_writer_ts);
}
void pblk_put_line_back(struct pblk *pblk, struct pblk_line *line)
{
struct pblk_line_mgmt *l_mg = &pblk->l_mg;
struct list_head *move_list;
spin_lock(&l_mg->gc_lock);
spin_lock(&line->lock);
WARN_ON(line->state != PBLK_LINESTATE_GC);
line->state = PBLK_LINESTATE_CLOSED;
trace_pblk_line_state(pblk_disk_name(pblk), line->id,
line->state);
/* We need to reset gc_group in order to ensure that
* pblk_line_gc_list will return proper move_list
* since right now current line is not on any of the
* gc lists.
*/
line->gc_group = PBLK_LINEGC_NONE;
move_list = pblk_line_gc_list(pblk, line);
spin_unlock(&line->lock);
list_add_tail(&line->list, move_list);
spin_unlock(&l_mg->gc_lock);
}
static void pblk_gc_line_ws(struct work_struct *work)
{
struct pblk_line_ws *gc_rq_ws = container_of(work,
struct pblk_line_ws, ws);
struct pblk *pblk = gc_rq_ws->pblk;
struct pblk_gc *gc = &pblk->gc;
struct pblk_line *line = gc_rq_ws->line;
struct pblk_gc_rq *gc_rq = gc_rq_ws->priv;
int ret;
up(&gc->gc_sem);
/* Read from GC victim block */
ret = pblk_submit_read_gc(pblk, gc_rq);
if (ret) {
line->w_err_gc->has_gc_err = 1;
goto out;
}
if (!gc_rq->secs_to_gc)
goto out;
retry:
spin_lock(&gc->w_lock);
if (gc->w_entries >= PBLK_GC_RQ_QD) {
spin_unlock(&gc->w_lock);
pblk_gc_writer_kick(&pblk->gc);
usleep_range(128, 256);
goto retry;
}
gc->w_entries++;
list_add_tail(&gc_rq->list, &gc->w_list);
spin_unlock(&gc->w_lock);
pblk_gc_writer_kick(&pblk->gc);
kfree(gc_rq_ws);
return;
out:
pblk_gc_free_gc_rq(gc_rq);
kref_put(&line->ref, pblk_line_put);
kfree(gc_rq_ws);
}
static __le64 *get_lba_list_from_emeta(struct pblk *pblk,
struct pblk_line *line)
{
struct line_emeta *emeta_buf;
struct pblk_line_meta *lm = &pblk->lm;
unsigned int lba_list_size = lm->emeta_len[2];
__le64 *lba_list;
int ret;
emeta_buf = kvmalloc(lm->emeta_len[0], GFP_KERNEL);
if (!emeta_buf)
return NULL;
ret = pblk_line_emeta_read(pblk, line, emeta_buf);
if (ret) {
pblk_err(pblk, "line %d read emeta failed (%d)\n",
line->id, ret);
kvfree(emeta_buf);
return NULL;
}
/* If this read fails, it means that emeta is corrupted.
* For now, leave the line untouched.
* TODO: Implement a recovery routine that scans and moves
* all sectors on the line.
*/
ret = pblk_recov_check_emeta(pblk, emeta_buf);
if (ret) {
pblk_err(pblk, "inconsistent emeta (line %d)\n",
line->id);
kvfree(emeta_buf);
return NULL;
}
lba_list = kvmalloc(lba_list_size, GFP_KERNEL);
if (lba_list)
memcpy(lba_list, emeta_to_lbas(pblk, emeta_buf), lba_list_size);
kvfree(emeta_buf);
return lba_list;
}
static void pblk_gc_line_prepare_ws(struct work_struct *work)
{
struct pblk_line_ws *line_ws = container_of(work, struct pblk_line_ws,
ws);
struct pblk *pblk = line_ws->pblk;
struct pblk_line *line = line_ws->line;
struct pblk_line_meta *lm = &pblk->lm;
struct nvm_tgt_dev *dev = pblk->dev;
struct nvm_geo *geo = &dev->geo;
struct pblk_gc *gc = &pblk->gc;
struct pblk_line_ws *gc_rq_ws;
struct pblk_gc_rq *gc_rq;
__le64 *lba_list;
unsigned long *invalid_bitmap;
int sec_left, nr_secs, bit;
invalid_bitmap = kmalloc(lm->sec_bitmap_len, GFP_KERNEL);
if (!invalid_bitmap)
goto fail_free_ws;
if (line->w_err_gc->has_write_err) {
lba_list = line->w_err_gc->lba_list;
line->w_err_gc->lba_list = NULL;
} else {
lba_list = get_lba_list_from_emeta(pblk, line);
if (!lba_list) {
pblk_err(pblk, "could not interpret emeta (line %d)\n",
line->id);
goto fail_free_invalid_bitmap;
}
}
spin_lock(&line->lock);
bitmap_copy(invalid_bitmap, line->invalid_bitmap, lm->sec_per_line);
sec_left = pblk_line_vsc(line);
spin_unlock(&line->lock);
if (sec_left < 0) {
pblk_err(pblk, "corrupted GC line (%d)\n", line->id);
goto fail_free_lba_list;
}
bit = -1;
next_rq:
gc_rq = kmalloc(sizeof(struct pblk_gc_rq), GFP_KERNEL);
if (!gc_rq)
goto fail_free_lba_list;
nr_secs = 0;
do {
bit = find_next_zero_bit(invalid_bitmap, lm->sec_per_line,
bit + 1);
if (bit > line->emeta_ssec)
break;
gc_rq->paddr_list[nr_secs] = bit;
gc_rq->lba_list[nr_secs++] = le64_to_cpu(lba_list[bit]);
} while (nr_secs < pblk->max_write_pgs);
if (unlikely(!nr_secs)) {
kfree(gc_rq);
goto out;
}
gc_rq->nr_secs = nr_secs;
gc_rq->line = line;
gc_rq->data = vmalloc(array_size(gc_rq->nr_secs, geo->csecs));
if (!gc_rq->data)
goto fail_free_gc_rq;
gc_rq_ws = kmalloc(sizeof(struct pblk_line_ws), GFP_KERNEL);
if (!gc_rq_ws)
goto fail_free_gc_data;
gc_rq_ws->pblk = pblk;
gc_rq_ws->line = line;
gc_rq_ws->priv = gc_rq;
/* The write GC path can be much slower than the read GC one due to
* the budget imposed by the rate-limiter. Balance in case that we get
* back pressure from the write GC path.
*/
while (down_timeout(&gc->gc_sem, msecs_to_jiffies(30000)))
io_schedule();
kref_get(&line->ref);
INIT_WORK(&gc_rq_ws->ws, pblk_gc_line_ws);
queue_work(gc->gc_line_reader_wq, &gc_rq_ws->ws);
sec_left -= nr_secs;
if (sec_left > 0)
goto next_rq;
out:
kvfree(lba_list);
kfree(line_ws);
kfree(invalid_bitmap);
kref_put(&line->ref, pblk_line_put);
atomic_dec(&gc->read_inflight_gc);
return;
fail_free_gc_data:
vfree(gc_rq->data);
fail_free_gc_rq:
kfree(gc_rq);
fail_free_lba_list:
kvfree(lba_list);
fail_free_invalid_bitmap:
kfree(invalid_bitmap);
fail_free_ws:
kfree(line_ws);
/* Line goes back to closed state, so we cannot release additional
* reference for line, since we do that only when we want to do
* gc to free line state transition.
*/
pblk_put_line_back(pblk, line);
atomic_dec(&gc->read_inflight_gc);
pblk_err(pblk, "failed to GC line %d\n", line->id);
}
static int pblk_gc_line(struct pblk *pblk, struct pblk_line *line)
{
struct pblk_gc *gc = &pblk->gc;
struct pblk_line_ws *line_ws;
pblk_debug(pblk, "line '%d' being reclaimed for GC\n", line->id);
line_ws = kmalloc(sizeof(struct pblk_line_ws), GFP_KERNEL);
if (!line_ws)
return -ENOMEM;
line_ws->pblk = pblk;
line_ws->line = line;
atomic_inc(&gc->pipeline_gc);
INIT_WORK(&line_ws->ws, pblk_gc_line_prepare_ws);
queue_work(gc->gc_reader_wq, &line_ws->ws);
return 0;
}
static void pblk_gc_reader_kick(struct pblk_gc *gc)
{
wake_up_process(gc->gc_reader_ts);
}
static void pblk_gc_kick(struct pblk *pblk)
{
struct pblk_gc *gc = &pblk->gc;
pblk_gc_writer_kick(gc);
pblk_gc_reader_kick(gc);
/* If we're shutting down GC, let's not start it up again */
if (gc->gc_enabled) {
wake_up_process(gc->gc_ts);
mod_timer(&gc->gc_timer,
jiffies + msecs_to_jiffies(GC_TIME_MSECS));
}
}
static int pblk_gc_read(struct pblk *pblk)
{
struct pblk_gc *gc = &pblk->gc;
struct pblk_line *line;
spin_lock(&gc->r_lock);
if (list_empty(&gc->r_list)) {
spin_unlock(&gc->r_lock);
return 1;
}
line = list_first_entry(&gc->r_list, struct pblk_line, list);
list_del(&line->list);
spin_unlock(&gc->r_lock);
pblk_gc_kick(pblk);
if (pblk_gc_line(pblk, line)) {
pblk_err(pblk, "failed to GC line %d\n", line->id);
/* rollback */
spin_lock(&gc->r_lock);
list_add_tail(&line->list, &gc->r_list);
spin_unlock(&gc->r_lock);
}
return 0;
}
static struct pblk_line *pblk_gc_get_victim_line(struct pblk *pblk,
struct list_head *group_list)
{
struct pblk_line *line, *victim;
unsigned int line_vsc = ~0x0L, victim_vsc = ~0x0L;
victim = list_first_entry(group_list, struct pblk_line, list);
list_for_each_entry(line, group_list, list) {
if (!atomic_read(&line->sec_to_update))
line_vsc = le32_to_cpu(*line->vsc);
if (line_vsc < victim_vsc) {
victim = line;
victim_vsc = le32_to_cpu(*victim->vsc);
}
}
if (victim_vsc == ~0x0)
return NULL;
return victim;
}
static bool pblk_gc_should_run(struct pblk_gc *gc, struct pblk_rl *rl)
{
unsigned int nr_blocks_free, nr_blocks_need;
unsigned int werr_lines = atomic_read(&rl->werr_lines);
nr_blocks_need = pblk_rl_high_thrs(rl);
nr_blocks_free = pblk_rl_nr_free_blks(rl);
/* This is not critical, no need to take lock here */
return ((werr_lines > 0) ||
((gc->gc_active) && (nr_blocks_need > nr_blocks_free)));
}
void pblk_gc_free_full_lines(struct pblk *pblk)
{
struct pblk_line_mgmt *l_mg = &pblk->l_mg;
struct pblk_gc *gc = &pblk->gc;
struct pblk_line *line;
do {
spin_lock(&l_mg->gc_lock);
if (list_empty(&l_mg->gc_full_list)) {
spin_unlock(&l_mg->gc_lock);
return;
}
line = list_first_entry(&l_mg->gc_full_list,
struct pblk_line, list);
spin_lock(&line->lock);
WARN_ON(line->state != PBLK_LINESTATE_CLOSED);
line->state = PBLK_LINESTATE_GC;
trace_pblk_line_state(pblk_disk_name(pblk), line->id,
line->state);
spin_unlock(&line->lock);
list_del(&line->list);
spin_unlock(&l_mg->gc_lock);
atomic_inc(&gc->pipeline_gc);
kref_put(&line->ref, pblk_line_put);
} while (1);
}
/*
* Lines with no valid sectors will be returned to the free list immediately. If
* GC is activated - either because the free block count is under the determined
* threshold, or because it is being forced from user space - only lines with a
* high count of invalid sectors will be recycled.
*/
static void pblk_gc_run(struct pblk *pblk)
{
struct pblk_line_mgmt *l_mg = &pblk->l_mg;
struct pblk_gc *gc = &pblk->gc;
struct pblk_line *line;
struct list_head *group_list;
bool run_gc;
int read_inflight_gc, gc_group = 0, prev_group = 0;
pblk_gc_free_full_lines(pblk);
run_gc = pblk_gc_should_run(&pblk->gc, &pblk->rl);
if (!run_gc || (atomic_read(&gc->read_inflight_gc) >= PBLK_GC_L_QD))
return;
next_gc_group:
group_list = l_mg->gc_lists[gc_group++];
do {
spin_lock(&l_mg->gc_lock);
line = pblk_gc_get_victim_line(pblk, group_list);
if (!line) {
spin_unlock(&l_mg->gc_lock);
break;
}
spin_lock(&line->lock);
WARN_ON(line->state != PBLK_LINESTATE_CLOSED);
line->state = PBLK_LINESTATE_GC;
trace_pblk_line_state(pblk_disk_name(pblk), line->id,
line->state);
spin_unlock(&line->lock);
list_del(&line->list);
spin_unlock(&l_mg->gc_lock);
spin_lock(&gc->r_lock);
list_add_tail(&line->list, &gc->r_list);
spin_unlock(&gc->r_lock);
read_inflight_gc = atomic_inc_return(&gc->read_inflight_gc);
pblk_gc_reader_kick(gc);
prev_group = 1;
/* No need to queue up more GC lines than we can handle */
run_gc = pblk_gc_should_run(&pblk->gc, &pblk->rl);
if (!run_gc || read_inflight_gc >= PBLK_GC_L_QD)
break;
} while (1);
if (!prev_group && pblk->rl.rb_state > gc_group &&
gc_group < PBLK_GC_NR_LISTS)
goto next_gc_group;
}
static void pblk_gc_timer(struct timer_list *t)
{
struct pblk *pblk = from_timer(pblk, t, gc.gc_timer);
pblk_gc_kick(pblk);
}
static int pblk_gc_ts(void *data)
{
struct pblk *pblk = data;
while (!kthread_should_stop()) {
pblk_gc_run(pblk);
set_current_state(TASK_INTERRUPTIBLE);
io_schedule();
}
return 0;
}
static int pblk_gc_writer_ts(void *data)
{
struct pblk *pblk = data;
while (!kthread_should_stop()) {
if (!pblk_gc_write(pblk))
continue;
set_current_state(TASK_INTERRUPTIBLE);
io_schedule();
}
return 0;
}
static int pblk_gc_reader_ts(void *data)
{
struct pblk *pblk = data;
struct pblk_gc *gc = &pblk->gc;
while (!kthread_should_stop()) {
if (!pblk_gc_read(pblk))
continue;
set_current_state(TASK_INTERRUPTIBLE);
io_schedule();
}
#ifdef CONFIG_NVM_PBLK_DEBUG
pblk_info(pblk, "flushing gc pipeline, %d lines left\n",
atomic_read(&gc->pipeline_gc));
#endif
do {
if (!atomic_read(&gc->pipeline_gc))
break;
schedule();
} while (1);
return 0;
}
static void pblk_gc_start(struct pblk *pblk)
{
pblk->gc.gc_active = 1;
pblk_debug(pblk, "gc start\n");
}
void pblk_gc_should_start(struct pblk *pblk)
{
struct pblk_gc *gc = &pblk->gc;
if (gc->gc_enabled && !gc->gc_active) {
pblk_gc_start(pblk);
pblk_gc_kick(pblk);
}
}
void pblk_gc_should_stop(struct pblk *pblk)
{
struct pblk_gc *gc = &pblk->gc;
if (gc->gc_active && !gc->gc_forced)
gc->gc_active = 0;
}
void pblk_gc_should_kick(struct pblk *pblk)
{
pblk_rl_update_rates(&pblk->rl);
}
void pblk_gc_sysfs_state_show(struct pblk *pblk, int *gc_enabled,
int *gc_active)
{
struct pblk_gc *gc = &pblk->gc;
spin_lock(&gc->lock);
*gc_enabled = gc->gc_enabled;
*gc_active = gc->gc_active;
spin_unlock(&gc->lock);
}
int pblk_gc_sysfs_force(struct pblk *pblk, int force)
{
struct pblk_gc *gc = &pblk->gc;
if (force < 0 || force > 1)
return -EINVAL;
spin_lock(&gc->lock);
gc->gc_forced = force;
if (force)
gc->gc_enabled = 1;
else
gc->gc_enabled = 0;
spin_unlock(&gc->lock);
pblk_gc_should_start(pblk);
return 0;
}
int pblk_gc_init(struct pblk *pblk)
{
struct pblk_gc *gc = &pblk->gc;
int ret;
gc->gc_ts = kthread_create(pblk_gc_ts, pblk, "pblk-gc-ts");
if (IS_ERR(gc->gc_ts)) {
pblk_err(pblk, "could not allocate GC main kthread\n");
return PTR_ERR(gc->gc_ts);
}
gc->gc_writer_ts = kthread_create(pblk_gc_writer_ts, pblk,
"pblk-gc-writer-ts");
if (IS_ERR(gc->gc_writer_ts)) {
pblk_err(pblk, "could not allocate GC writer kthread\n");
ret = PTR_ERR(gc->gc_writer_ts);
goto fail_free_main_kthread;
}
gc->gc_reader_ts = kthread_create(pblk_gc_reader_ts, pblk,
"pblk-gc-reader-ts");
if (IS_ERR(gc->gc_reader_ts)) {
pblk_err(pblk, "could not allocate GC reader kthread\n");
ret = PTR_ERR(gc->gc_reader_ts);
goto fail_free_writer_kthread;
}
timer_setup(&gc->gc_timer, pblk_gc_timer, 0);
mod_timer(&gc->gc_timer, jiffies + msecs_to_jiffies(GC_TIME_MSECS));
gc->gc_active = 0;
gc->gc_forced = 0;
gc->gc_enabled = 1;
gc->w_entries = 0;
atomic_set(&gc->read_inflight_gc, 0);
atomic_set(&gc->pipeline_gc, 0);
/* Workqueue that reads valid sectors from a line and submit them to the
* GC writer to be recycled.
*/
gc->gc_line_reader_wq = alloc_workqueue("pblk-gc-line-reader-wq",
WQ_MEM_RECLAIM | WQ_UNBOUND, PBLK_GC_MAX_READERS);
if (!gc->gc_line_reader_wq) {
pblk_err(pblk, "could not allocate GC line reader workqueue\n");
ret = -ENOMEM;
goto fail_free_reader_kthread;
}
/* Workqueue that prepare lines for GC */
gc->gc_reader_wq = alloc_workqueue("pblk-gc-line_wq",
WQ_MEM_RECLAIM | WQ_UNBOUND, 1);
if (!gc->gc_reader_wq) {
pblk_err(pblk, "could not allocate GC reader workqueue\n");
ret = -ENOMEM;
goto fail_free_reader_line_wq;
}
spin_lock_init(&gc->lock);
spin_lock_init(&gc->w_lock);
spin_lock_init(&gc->r_lock);
sema_init(&gc->gc_sem, PBLK_GC_RQ_QD);
INIT_LIST_HEAD(&gc->w_list);
INIT_LIST_HEAD(&gc->r_list);
return 0;
fail_free_reader_line_wq:
destroy_workqueue(gc->gc_line_reader_wq);
fail_free_reader_kthread:
kthread_stop(gc->gc_reader_ts);
fail_free_writer_kthread:
kthread_stop(gc->gc_writer_ts);
fail_free_main_kthread:
kthread_stop(gc->gc_ts);
return ret;
}
void pblk_gc_exit(struct pblk *pblk, bool graceful)
{
struct pblk_gc *gc = &pblk->gc;
gc->gc_enabled = 0;
del_timer_sync(&gc->gc_timer);
gc->gc_active = 0;
if (gc->gc_ts)
kthread_stop(gc->gc_ts);
if (gc->gc_reader_ts)
kthread_stop(gc->gc_reader_ts);
if (graceful) {
flush_workqueue(gc->gc_reader_wq);
flush_workqueue(gc->gc_line_reader_wq);
}
destroy_workqueue(gc->gc_reader_wq);
destroy_workqueue(gc->gc_line_reader_wq);
if (gc->gc_writer_ts)
kthread_stop(gc->gc_writer_ts);
}

File diff suppressed because it is too large Load Diff

View File

@ -1,210 +0,0 @@
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (C) 2016 CNEX Labs
* Initial release: Javier Gonzalez <javier@cnexlabs.com>
* Matias Bjorling <matias@cnexlabs.com>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License version
* 2 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* pblk-map.c - pblk's lba-ppa mapping strategy
*
*/
#include "pblk.h"
static int pblk_map_page_data(struct pblk *pblk, unsigned int sentry,
struct ppa_addr *ppa_list,
unsigned long *lun_bitmap,
void *meta_list,
unsigned int valid_secs)
{
struct pblk_line *line = pblk_line_get_data(pblk);
struct pblk_emeta *emeta;
struct pblk_w_ctx *w_ctx;
__le64 *lba_list;
u64 paddr;
int nr_secs = pblk->min_write_pgs;
int i;
if (!line)
return -ENOSPC;
if (pblk_line_is_full(line)) {
struct pblk_line *prev_line = line;
/* If we cannot allocate a new line, make sure to store metadata
* on current line and then fail
*/
line = pblk_line_replace_data(pblk);
pblk_line_close_meta(pblk, prev_line);
if (!line) {
pblk_pipeline_stop(pblk);
return -ENOSPC;
}
}
emeta = line->emeta;
lba_list = emeta_to_lbas(pblk, emeta->buf);
paddr = pblk_alloc_page(pblk, line, nr_secs);
for (i = 0; i < nr_secs; i++, paddr++) {
struct pblk_sec_meta *meta = pblk_get_meta(pblk, meta_list, i);
__le64 addr_empty = cpu_to_le64(ADDR_EMPTY);
/* ppa to be sent to the device */
ppa_list[i] = addr_to_gen_ppa(pblk, paddr, line->id);
/* Write context for target bio completion on write buffer. Note
* that the write buffer is protected by the sync backpointer,
* and a single writer thread have access to each specific entry
* at a time. Thus, it is safe to modify the context for the
* entry we are setting up for submission without taking any
* lock or memory barrier.
*/
if (i < valid_secs) {
kref_get(&line->ref);
atomic_inc(&line->sec_to_update);
w_ctx = pblk_rb_w_ctx(&pblk->rwb, sentry + i);
w_ctx->ppa = ppa_list[i];
meta->lba = cpu_to_le64(w_ctx->lba);
lba_list[paddr] = cpu_to_le64(w_ctx->lba);
if (lba_list[paddr] != addr_empty)
line->nr_valid_lbas++;
else
atomic64_inc(&pblk->pad_wa);
} else {
lba_list[paddr] = addr_empty;
meta->lba = addr_empty;
__pblk_map_invalidate(pblk, line, paddr);
}
}
pblk_down_rq(pblk, ppa_list[0], lun_bitmap);
return 0;
}
int pblk_map_rq(struct pblk *pblk, struct nvm_rq *rqd, unsigned int sentry,
unsigned long *lun_bitmap, unsigned int valid_secs,
unsigned int off)
{
void *meta_list = pblk_get_meta_for_writes(pblk, rqd);
void *meta_buffer;
struct ppa_addr *ppa_list = nvm_rq_to_ppa_list(rqd);
unsigned int map_secs;
int min = pblk->min_write_pgs;
int i;
int ret;
for (i = off; i < rqd->nr_ppas; i += min) {
map_secs = (i + min > valid_secs) ? (valid_secs % min) : min;
meta_buffer = pblk_get_meta(pblk, meta_list, i);
ret = pblk_map_page_data(pblk, sentry + i, &ppa_list[i],
lun_bitmap, meta_buffer, map_secs);
if (ret)
return ret;
}
return 0;
}
/* only if erase_ppa is set, acquire erase semaphore */
int pblk_map_erase_rq(struct pblk *pblk, struct nvm_rq *rqd,
unsigned int sentry, unsigned long *lun_bitmap,
unsigned int valid_secs, struct ppa_addr *erase_ppa)
{
struct nvm_tgt_dev *dev = pblk->dev;
struct nvm_geo *geo = &dev->geo;
struct pblk_line_meta *lm = &pblk->lm;
void *meta_list = pblk_get_meta_for_writes(pblk, rqd);
void *meta_buffer;
struct ppa_addr *ppa_list = nvm_rq_to_ppa_list(rqd);
struct pblk_line *e_line, *d_line;
unsigned int map_secs;
int min = pblk->min_write_pgs;
int i, erase_lun;
int ret;
for (i = 0; i < rqd->nr_ppas; i += min) {
map_secs = (i + min > valid_secs) ? (valid_secs % min) : min;
meta_buffer = pblk_get_meta(pblk, meta_list, i);
ret = pblk_map_page_data(pblk, sentry + i, &ppa_list[i],
lun_bitmap, meta_buffer, map_secs);
if (ret)
return ret;
erase_lun = pblk_ppa_to_pos(geo, ppa_list[i]);
/* line can change after page map. We might also be writing the
* last line.
*/
e_line = pblk_line_get_erase(pblk);
if (!e_line)
return pblk_map_rq(pblk, rqd, sentry, lun_bitmap,
valid_secs, i + min);
spin_lock(&e_line->lock);
if (!test_bit(erase_lun, e_line->erase_bitmap)) {
set_bit(erase_lun, e_line->erase_bitmap);
atomic_dec(&e_line->left_eblks);
*erase_ppa = ppa_list[i];
erase_ppa->a.blk = e_line->id;
erase_ppa->a.reserved = 0;
spin_unlock(&e_line->lock);
/* Avoid evaluating e_line->left_eblks */
return pblk_map_rq(pblk, rqd, sentry, lun_bitmap,
valid_secs, i + min);
}
spin_unlock(&e_line->lock);
}
d_line = pblk_line_get_data(pblk);
/* line can change after page map. We might also be writing the
* last line.
*/
e_line = pblk_line_get_erase(pblk);
if (!e_line)
return -ENOSPC;
/* Erase blocks that are bad in this line but might not be in next */
if (unlikely(pblk_ppa_empty(*erase_ppa)) &&
bitmap_weight(d_line->blk_bitmap, lm->blk_per_line)) {
int bit = -1;
retry:
bit = find_next_bit(d_line->blk_bitmap,
lm->blk_per_line, bit + 1);
if (bit >= lm->blk_per_line)
return 0;
spin_lock(&e_line->lock);
if (test_bit(bit, e_line->erase_bitmap)) {
spin_unlock(&e_line->lock);
goto retry;
}
spin_unlock(&e_line->lock);
set_bit(bit, e_line->erase_bitmap);
atomic_dec(&e_line->left_eblks);
*erase_ppa = pblk->luns[bit].bppa; /* set ch and lun */
erase_ppa->a.blk = e_line->id;
}
return 0;
}

View File

@ -1,858 +0,0 @@
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (C) 2016 CNEX Labs
* Initial release: Javier Gonzalez <javier@cnexlabs.com>
*
* Based upon the circular ringbuffer.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License version
* 2 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* pblk-rb.c - pblk's write buffer
*/
#include <linux/circ_buf.h>
#include "pblk.h"
static DECLARE_RWSEM(pblk_rb_lock);
static void pblk_rb_data_free(struct pblk_rb *rb)
{
struct pblk_rb_pages *p, *t;
down_write(&pblk_rb_lock);
list_for_each_entry_safe(p, t, &rb->pages, list) {
free_pages((unsigned long)page_address(p->pages), p->order);
list_del(&p->list);
kfree(p);
}
up_write(&pblk_rb_lock);
}
void pblk_rb_free(struct pblk_rb *rb)
{
pblk_rb_data_free(rb);
vfree(rb->entries);
}
/*
* pblk_rb_calculate_size -- calculate the size of the write buffer
*/
static unsigned int pblk_rb_calculate_size(unsigned int nr_entries,
unsigned int threshold)
{
unsigned int thr_sz = 1 << (get_count_order(threshold + NVM_MAX_VLBA));
unsigned int max_sz = max(thr_sz, nr_entries);
unsigned int max_io;
/* Alloc a write buffer that can (i) fit at least two split bios
* (considering max I/O size NVM_MAX_VLBA, and (ii) guarantee that the
* threshold will be respected
*/
max_io = (1 << max((int)(get_count_order(max_sz)),
(int)(get_count_order(NVM_MAX_VLBA << 1))));
if ((threshold + NVM_MAX_VLBA) >= max_io)
max_io <<= 1;
return max_io;
}
/*
* Initialize ring buffer. The data and metadata buffers must be previously
* allocated and their size must be a power of two
* (Documentation/core-api/circular-buffers.rst)
*/
int pblk_rb_init(struct pblk_rb *rb, unsigned int size, unsigned int threshold,
unsigned int seg_size)
{
struct pblk *pblk = container_of(rb, struct pblk, rwb);
struct pblk_rb_entry *entries;
unsigned int init_entry = 0;
unsigned int max_order = MAX_ORDER - 1;
unsigned int power_size, power_seg_sz;
unsigned int alloc_order, order, iter;
unsigned int nr_entries;
nr_entries = pblk_rb_calculate_size(size, threshold);
entries = vzalloc(array_size(nr_entries, sizeof(struct pblk_rb_entry)));
if (!entries)
return -ENOMEM;
power_size = get_count_order(nr_entries);
power_seg_sz = get_count_order(seg_size);
down_write(&pblk_rb_lock);
rb->entries = entries;
rb->seg_size = (1 << power_seg_sz);
rb->nr_entries = (1 << power_size);
rb->mem = rb->subm = rb->sync = rb->l2p_update = 0;
rb->back_thres = threshold;
rb->flush_point = EMPTY_ENTRY;
spin_lock_init(&rb->w_lock);
spin_lock_init(&rb->s_lock);
INIT_LIST_HEAD(&rb->pages);
alloc_order = power_size;
if (alloc_order >= max_order) {
order = max_order;
iter = (1 << (alloc_order - max_order));
} else {
order = alloc_order;
iter = 1;
}
do {
struct pblk_rb_entry *entry;
struct pblk_rb_pages *page_set;
void *kaddr;
unsigned long set_size;
int i;
page_set = kmalloc(sizeof(struct pblk_rb_pages), GFP_KERNEL);
if (!page_set) {
up_write(&pblk_rb_lock);
vfree(entries);
return -ENOMEM;
}
page_set->order = order;
page_set->pages = alloc_pages(GFP_KERNEL, order);
if (!page_set->pages) {
kfree(page_set);
pblk_rb_data_free(rb);
up_write(&pblk_rb_lock);
vfree(entries);
return -ENOMEM;
}
kaddr = page_address(page_set->pages);
entry = &rb->entries[init_entry];
entry->data = kaddr;
entry->cacheline = pblk_cacheline_to_addr(init_entry++);
entry->w_ctx.flags = PBLK_WRITABLE_ENTRY;
set_size = (1 << order);
for (i = 1; i < set_size; i++) {
entry = &rb->entries[init_entry];
entry->cacheline = pblk_cacheline_to_addr(init_entry++);
entry->data = kaddr + (i * rb->seg_size);
entry->w_ctx.flags = PBLK_WRITABLE_ENTRY;
bio_list_init(&entry->w_ctx.bios);
}
list_add_tail(&page_set->list, &rb->pages);
iter--;
} while (iter > 0);
up_write(&pblk_rb_lock);
#ifdef CONFIG_NVM_PBLK_DEBUG
atomic_set(&rb->inflight_flush_point, 0);
#endif
/*
* Initialize rate-limiter, which controls access to the write buffer
* by user and GC I/O
*/
pblk_rl_init(&pblk->rl, rb->nr_entries, threshold);
return 0;
}
static void clean_wctx(struct pblk_w_ctx *w_ctx)
{
int flags;
flags = READ_ONCE(w_ctx->flags);
WARN_ONCE(!(flags & PBLK_SUBMITTED_ENTRY),
"pblk: overwriting unsubmitted data\n");
/* Release flags on context. Protect from writes and reads */
smp_store_release(&w_ctx->flags, PBLK_WRITABLE_ENTRY);
pblk_ppa_set_empty(&w_ctx->ppa);
w_ctx->lba = ADDR_EMPTY;
}
#define pblk_rb_ring_count(head, tail, size) CIRC_CNT(head, tail, size)
#define pblk_rb_ring_space(rb, head, tail, size) \
(CIRC_SPACE(head, tail, size))
/*
* Buffer space is calculated with respect to the back pointer signaling
* synchronized entries to the media.
*/
static unsigned int pblk_rb_space(struct pblk_rb *rb)
{
unsigned int mem = READ_ONCE(rb->mem);
unsigned int sync = READ_ONCE(rb->sync);
return pblk_rb_ring_space(rb, mem, sync, rb->nr_entries);
}
unsigned int pblk_rb_ptr_wrap(struct pblk_rb *rb, unsigned int p,
unsigned int nr_entries)
{
return (p + nr_entries) & (rb->nr_entries - 1);
}
/*
* Buffer count is calculated with respect to the submission entry signaling the
* entries that are available to send to the media
*/
unsigned int pblk_rb_read_count(struct pblk_rb *rb)
{
unsigned int mem = READ_ONCE(rb->mem);
unsigned int subm = READ_ONCE(rb->subm);
return pblk_rb_ring_count(mem, subm, rb->nr_entries);
}
unsigned int pblk_rb_sync_count(struct pblk_rb *rb)
{
unsigned int mem = READ_ONCE(rb->mem);
unsigned int sync = READ_ONCE(rb->sync);
return pblk_rb_ring_count(mem, sync, rb->nr_entries);
}
unsigned int pblk_rb_read_commit(struct pblk_rb *rb, unsigned int nr_entries)
{
unsigned int subm;
subm = READ_ONCE(rb->subm);
/* Commit read means updating submission pointer */
smp_store_release(&rb->subm, pblk_rb_ptr_wrap(rb, subm, nr_entries));
return subm;
}
static int __pblk_rb_update_l2p(struct pblk_rb *rb, unsigned int to_update)
{
struct pblk *pblk = container_of(rb, struct pblk, rwb);
struct pblk_line *line;
struct pblk_rb_entry *entry;
struct pblk_w_ctx *w_ctx;
unsigned int user_io = 0, gc_io = 0;
unsigned int i;
int flags;
for (i = 0; i < to_update; i++) {
entry = &rb->entries[rb->l2p_update];
w_ctx = &entry->w_ctx;
flags = READ_ONCE(entry->w_ctx.flags);
if (flags & PBLK_IOTYPE_USER)
user_io++;
else if (flags & PBLK_IOTYPE_GC)
gc_io++;
else
WARN(1, "pblk: unknown IO type\n");
pblk_update_map_dev(pblk, w_ctx->lba, w_ctx->ppa,
entry->cacheline);
line = pblk_ppa_to_line(pblk, w_ctx->ppa);
atomic_dec(&line->sec_to_update);
kref_put(&line->ref, pblk_line_put);
clean_wctx(w_ctx);
rb->l2p_update = pblk_rb_ptr_wrap(rb, rb->l2p_update, 1);
}
pblk_rl_out(&pblk->rl, user_io, gc_io);
return 0;
}
/*
* When we move the l2p_update pointer, we update the l2p table - lookups will
* point to the physical address instead of to the cacheline in the write buffer
* from this moment on.
*/
static int pblk_rb_update_l2p(struct pblk_rb *rb, unsigned int nr_entries,
unsigned int mem, unsigned int sync)
{
unsigned int space, count;
int ret = 0;
lockdep_assert_held(&rb->w_lock);
/* Update l2p only as buffer entries are being overwritten */
space = pblk_rb_ring_space(rb, mem, rb->l2p_update, rb->nr_entries);
if (space > nr_entries)
goto out;
count = nr_entries - space;
/* l2p_update used exclusively under rb->w_lock */
ret = __pblk_rb_update_l2p(rb, count);
out:
return ret;
}
/*
* Update the l2p entry for all sectors stored on the write buffer. This means
* that all future lookups to the l2p table will point to a device address, not
* to the cacheline in the write buffer.
*/
void pblk_rb_sync_l2p(struct pblk_rb *rb)
{
unsigned int sync;
unsigned int to_update;
spin_lock(&rb->w_lock);
/* Protect from reads and writes */
sync = smp_load_acquire(&rb->sync);
to_update = pblk_rb_ring_count(sync, rb->l2p_update, rb->nr_entries);
__pblk_rb_update_l2p(rb, to_update);
spin_unlock(&rb->w_lock);
}
/*
* Write @nr_entries to ring buffer from @data buffer if there is enough space.
* Typically, 4KB data chunks coming from a bio will be copied to the ring
* buffer, thus the write will fail if not all incoming data can be copied.
*
*/
static void __pblk_rb_write_entry(struct pblk_rb *rb, void *data,
struct pblk_w_ctx w_ctx,
struct pblk_rb_entry *entry)
{
memcpy(entry->data, data, rb->seg_size);
entry->w_ctx.lba = w_ctx.lba;
entry->w_ctx.ppa = w_ctx.ppa;
}
void pblk_rb_write_entry_user(struct pblk_rb *rb, void *data,
struct pblk_w_ctx w_ctx, unsigned int ring_pos)
{
struct pblk *pblk = container_of(rb, struct pblk, rwb);
struct pblk_rb_entry *entry;
int flags;
entry = &rb->entries[ring_pos];
flags = READ_ONCE(entry->w_ctx.flags);
#ifdef CONFIG_NVM_PBLK_DEBUG
/* Caller must guarantee that the entry is free */
BUG_ON(!(flags & PBLK_WRITABLE_ENTRY));
#endif
__pblk_rb_write_entry(rb, data, w_ctx, entry);
pblk_update_map_cache(pblk, w_ctx.lba, entry->cacheline);
flags = w_ctx.flags | PBLK_WRITTEN_DATA;
/* Release flags on write context. Protect from writes */
smp_store_release(&entry->w_ctx.flags, flags);
}
void pblk_rb_write_entry_gc(struct pblk_rb *rb, void *data,
struct pblk_w_ctx w_ctx, struct pblk_line *line,
u64 paddr, unsigned int ring_pos)
{
struct pblk *pblk = container_of(rb, struct pblk, rwb);
struct pblk_rb_entry *entry;
int flags;
entry = &rb->entries[ring_pos];
flags = READ_ONCE(entry->w_ctx.flags);
#ifdef CONFIG_NVM_PBLK_DEBUG
/* Caller must guarantee that the entry is free */
BUG_ON(!(flags & PBLK_WRITABLE_ENTRY));
#endif
__pblk_rb_write_entry(rb, data, w_ctx, entry);
if (!pblk_update_map_gc(pblk, w_ctx.lba, entry->cacheline, line, paddr))
entry->w_ctx.lba = ADDR_EMPTY;
flags = w_ctx.flags | PBLK_WRITTEN_DATA;
/* Release flags on write context. Protect from writes */
smp_store_release(&entry->w_ctx.flags, flags);
}
static int pblk_rb_flush_point_set(struct pblk_rb *rb, struct bio *bio,
unsigned int pos)
{
struct pblk_rb_entry *entry;
unsigned int sync, flush_point;
pblk_rb_sync_init(rb, NULL);
sync = READ_ONCE(rb->sync);
if (pos == sync) {
pblk_rb_sync_end(rb, NULL);
return 0;
}
#ifdef CONFIG_NVM_PBLK_DEBUG
atomic_inc(&rb->inflight_flush_point);
#endif
flush_point = (pos == 0) ? (rb->nr_entries - 1) : (pos - 1);
entry = &rb->entries[flush_point];
/* Protect flush points */
smp_store_release(&rb->flush_point, flush_point);
if (bio)
bio_list_add(&entry->w_ctx.bios, bio);
pblk_rb_sync_end(rb, NULL);
return bio ? 1 : 0;
}
static int __pblk_rb_may_write(struct pblk_rb *rb, unsigned int nr_entries,
unsigned int *pos)
{
unsigned int mem;
unsigned int sync;
unsigned int threshold;
sync = READ_ONCE(rb->sync);
mem = READ_ONCE(rb->mem);
threshold = nr_entries + rb->back_thres;
if (pblk_rb_ring_space(rb, mem, sync, rb->nr_entries) < threshold)
return 0;
if (pblk_rb_update_l2p(rb, nr_entries, mem, sync))
return 0;
*pos = mem;
return 1;
}
static int pblk_rb_may_write(struct pblk_rb *rb, unsigned int nr_entries,
unsigned int *pos)
{
if (!__pblk_rb_may_write(rb, nr_entries, pos))
return 0;
/* Protect from read count */
smp_store_release(&rb->mem, pblk_rb_ptr_wrap(rb, *pos, nr_entries));
return 1;
}
void pblk_rb_flush(struct pblk_rb *rb)
{
struct pblk *pblk = container_of(rb, struct pblk, rwb);
unsigned int mem = READ_ONCE(rb->mem);
if (pblk_rb_flush_point_set(rb, NULL, mem))
return;
pblk_write_kick(pblk);
}
static int pblk_rb_may_write_flush(struct pblk_rb *rb, unsigned int nr_entries,
unsigned int *pos, struct bio *bio,
int *io_ret)
{
unsigned int mem;
if (!__pblk_rb_may_write(rb, nr_entries, pos))
return 0;
mem = pblk_rb_ptr_wrap(rb, *pos, nr_entries);
*io_ret = NVM_IO_DONE;
if (bio->bi_opf & REQ_PREFLUSH) {
struct pblk *pblk = container_of(rb, struct pblk, rwb);
atomic64_inc(&pblk->nr_flush);
if (pblk_rb_flush_point_set(&pblk->rwb, bio, mem))
*io_ret = NVM_IO_OK;
}
/* Protect from read count */
smp_store_release(&rb->mem, mem);
return 1;
}
/*
* Atomically check that (i) there is space on the write buffer for the
* incoming I/O, and (ii) the current I/O type has enough budget in the write
* buffer (rate-limiter).
*/
int pblk_rb_may_write_user(struct pblk_rb *rb, struct bio *bio,
unsigned int nr_entries, unsigned int *pos)
{
struct pblk *pblk = container_of(rb, struct pblk, rwb);
int io_ret;
spin_lock(&rb->w_lock);
io_ret = pblk_rl_user_may_insert(&pblk->rl, nr_entries);
if (io_ret) {
spin_unlock(&rb->w_lock);
return io_ret;
}
if (!pblk_rb_may_write_flush(rb, nr_entries, pos, bio, &io_ret)) {
spin_unlock(&rb->w_lock);
return NVM_IO_REQUEUE;
}
pblk_rl_user_in(&pblk->rl, nr_entries);
spin_unlock(&rb->w_lock);
return io_ret;
}
/*
* Look at pblk_rb_may_write_user comment
*/
int pblk_rb_may_write_gc(struct pblk_rb *rb, unsigned int nr_entries,
unsigned int *pos)
{
struct pblk *pblk = container_of(rb, struct pblk, rwb);
spin_lock(&rb->w_lock);
if (!pblk_rl_gc_may_insert(&pblk->rl, nr_entries)) {
spin_unlock(&rb->w_lock);
return 0;
}
if (!pblk_rb_may_write(rb, nr_entries, pos)) {
spin_unlock(&rb->w_lock);
return 0;
}
pblk_rl_gc_in(&pblk->rl, nr_entries);
spin_unlock(&rb->w_lock);
return 1;
}
/*
* Read available entries on rb and add them to the given bio. To avoid a memory
* copy, a page reference to the write buffer is used to be added to the bio.
*
* This function is used by the write thread to form the write bio that will
* persist data on the write buffer to the media.
*/
unsigned int pblk_rb_read_to_bio(struct pblk_rb *rb, struct nvm_rq *rqd,
unsigned int pos, unsigned int nr_entries,
unsigned int count)
{
struct pblk *pblk = container_of(rb, struct pblk, rwb);
struct request_queue *q = pblk->dev->q;
struct pblk_c_ctx *c_ctx = nvm_rq_to_pdu(rqd);
struct bio *bio = rqd->bio;
struct pblk_rb_entry *entry;
struct page *page;
unsigned int pad = 0, to_read = nr_entries;
unsigned int i;
int flags;
if (count < nr_entries) {
pad = nr_entries - count;
to_read = count;
}
/* Add space for packed metadata if in use*/
pad += (pblk->min_write_pgs - pblk->min_write_pgs_data);
c_ctx->sentry = pos;
c_ctx->nr_valid = to_read;
c_ctx->nr_padded = pad;
for (i = 0; i < to_read; i++) {
entry = &rb->entries[pos];
/* A write has been allowed into the buffer, but data is still
* being copied to it. It is ok to busy wait.
*/
try:
flags = READ_ONCE(entry->w_ctx.flags);
if (!(flags & PBLK_WRITTEN_DATA)) {
io_schedule();
goto try;
}
page = virt_to_page(entry->data);
if (!page) {
pblk_err(pblk, "could not allocate write bio page\n");
flags &= ~PBLK_WRITTEN_DATA;
flags |= PBLK_SUBMITTED_ENTRY;
/* Release flags on context. Protect from writes */
smp_store_release(&entry->w_ctx.flags, flags);
return NVM_IO_ERR;
}
if (bio_add_pc_page(q, bio, page, rb->seg_size, 0) !=
rb->seg_size) {
pblk_err(pblk, "could not add page to write bio\n");
flags &= ~PBLK_WRITTEN_DATA;
flags |= PBLK_SUBMITTED_ENTRY;
/* Release flags on context. Protect from writes */
smp_store_release(&entry->w_ctx.flags, flags);
return NVM_IO_ERR;
}
flags &= ~PBLK_WRITTEN_DATA;
flags |= PBLK_SUBMITTED_ENTRY;
/* Release flags on context. Protect from writes */
smp_store_release(&entry->w_ctx.flags, flags);
pos = pblk_rb_ptr_wrap(rb, pos, 1);
}
if (pad) {
if (pblk_bio_add_pages(pblk, bio, GFP_KERNEL, pad)) {
pblk_err(pblk, "could not pad page in write bio\n");
return NVM_IO_ERR;
}
if (pad < pblk->min_write_pgs)
atomic64_inc(&pblk->pad_dist[pad - 1]);
else
pblk_warn(pblk, "padding more than min. sectors\n");
atomic64_add(pad, &pblk->pad_wa);
}
#ifdef CONFIG_NVM_PBLK_DEBUG
atomic_long_add(pad, &pblk->padded_writes);
#endif
return NVM_IO_OK;
}
/*
* Copy to bio only if the lba matches the one on the given cache entry.
* Otherwise, it means that the entry has been overwritten, and the bio should
* be directed to disk.
*/
int pblk_rb_copy_to_bio(struct pblk_rb *rb, struct bio *bio, sector_t lba,
struct ppa_addr ppa)
{
struct pblk *pblk = container_of(rb, struct pblk, rwb);
struct pblk_rb_entry *entry;
struct pblk_w_ctx *w_ctx;
struct ppa_addr l2p_ppa;
u64 pos = pblk_addr_to_cacheline(ppa);
void *data;
int flags;
int ret = 1;
#ifdef CONFIG_NVM_PBLK_DEBUG
/* Caller must ensure that the access will not cause an overflow */
BUG_ON(pos >= rb->nr_entries);
#endif
entry = &rb->entries[pos];
w_ctx = &entry->w_ctx;
flags = READ_ONCE(w_ctx->flags);
spin_lock(&rb->w_lock);
spin_lock(&pblk->trans_lock);
l2p_ppa = pblk_trans_map_get(pblk, lba);
spin_unlock(&pblk->trans_lock);
/* Check if the entry has been overwritten or is scheduled to be */
if (!pblk_ppa_comp(l2p_ppa, ppa) || w_ctx->lba != lba ||
flags & PBLK_WRITABLE_ENTRY) {
ret = 0;
goto out;
}
data = bio_data(bio);
memcpy(data, entry->data, rb->seg_size);
out:
spin_unlock(&rb->w_lock);
return ret;
}
struct pblk_w_ctx *pblk_rb_w_ctx(struct pblk_rb *rb, unsigned int pos)
{
unsigned int entry = pblk_rb_ptr_wrap(rb, pos, 0);
return &rb->entries[entry].w_ctx;
}
unsigned int pblk_rb_sync_init(struct pblk_rb *rb, unsigned long *flags)
__acquires(&rb->s_lock)
{
if (flags)
spin_lock_irqsave(&rb->s_lock, *flags);
else
spin_lock_irq(&rb->s_lock);
return rb->sync;
}
void pblk_rb_sync_end(struct pblk_rb *rb, unsigned long *flags)
__releases(&rb->s_lock)
{
lockdep_assert_held(&rb->s_lock);
if (flags)
spin_unlock_irqrestore(&rb->s_lock, *flags);
else
spin_unlock_irq(&rb->s_lock);
}
unsigned int pblk_rb_sync_advance(struct pblk_rb *rb, unsigned int nr_entries)
{
unsigned int sync, flush_point;
lockdep_assert_held(&rb->s_lock);
sync = READ_ONCE(rb->sync);
flush_point = READ_ONCE(rb->flush_point);
if (flush_point != EMPTY_ENTRY) {
unsigned int secs_to_flush;
secs_to_flush = pblk_rb_ring_count(flush_point, sync,
rb->nr_entries);
if (secs_to_flush < nr_entries) {
/* Protect flush points */
smp_store_release(&rb->flush_point, EMPTY_ENTRY);
}
}
sync = pblk_rb_ptr_wrap(rb, sync, nr_entries);
/* Protect from counts */
smp_store_release(&rb->sync, sync);
return sync;
}
/* Calculate how many sectors to submit up to the current flush point. */
unsigned int pblk_rb_flush_point_count(struct pblk_rb *rb)
{
unsigned int subm, sync, flush_point;
unsigned int submitted, to_flush;
/* Protect flush points */
flush_point = smp_load_acquire(&rb->flush_point);
if (flush_point == EMPTY_ENTRY)
return 0;
/* Protect syncs */
sync = smp_load_acquire(&rb->sync);
subm = READ_ONCE(rb->subm);
submitted = pblk_rb_ring_count(subm, sync, rb->nr_entries);
/* The sync point itself counts as a sector to sync */
to_flush = pblk_rb_ring_count(flush_point, sync, rb->nr_entries) + 1;
return (submitted < to_flush) ? (to_flush - submitted) : 0;
}
int pblk_rb_tear_down_check(struct pblk_rb *rb)
{
struct pblk_rb_entry *entry;
int i;
int ret = 0;
spin_lock(&rb->w_lock);
spin_lock_irq(&rb->s_lock);
if ((rb->mem == rb->subm) && (rb->subm == rb->sync) &&
(rb->sync == rb->l2p_update) &&
(rb->flush_point == EMPTY_ENTRY)) {
goto out;
}
if (!rb->entries) {
ret = 1;
goto out;
}
for (i = 0; i < rb->nr_entries; i++) {
entry = &rb->entries[i];
if (!entry->data) {
ret = 1;
goto out;
}
}
out:
spin_unlock_irq(&rb->s_lock);
spin_unlock(&rb->w_lock);
return ret;
}
unsigned int pblk_rb_wrap_pos(struct pblk_rb *rb, unsigned int pos)
{
return (pos & (rb->nr_entries - 1));
}
int pblk_rb_pos_oob(struct pblk_rb *rb, u64 pos)
{
return (pos >= rb->nr_entries);
}
ssize_t pblk_rb_sysfs(struct pblk_rb *rb, char *buf)
{
struct pblk *pblk = container_of(rb, struct pblk, rwb);
struct pblk_c_ctx *c;
ssize_t offset;
int queued_entries = 0;
spin_lock_irq(&rb->s_lock);
list_for_each_entry(c, &pblk->compl_list, list)
queued_entries++;
spin_unlock_irq(&rb->s_lock);
if (rb->flush_point != EMPTY_ENTRY)
offset = scnprintf(buf, PAGE_SIZE,
"%u\t%u\t%u\t%u\t%u\t%u\t%u - %u/%u/%u - %d\n",
rb->nr_entries,
rb->mem,
rb->subm,
rb->sync,
rb->l2p_update,
#ifdef CONFIG_NVM_PBLK_DEBUG
atomic_read(&rb->inflight_flush_point),
#else
0,
#endif
rb->flush_point,
pblk_rb_read_count(rb),
pblk_rb_space(rb),
pblk_rb_flush_point_count(rb),
queued_entries);
else
offset = scnprintf(buf, PAGE_SIZE,
"%u\t%u\t%u\t%u\t%u\t%u\tNULL - %u/%u/%u - %d\n",
rb->nr_entries,
rb->mem,
rb->subm,
rb->sync,
rb->l2p_update,
#ifdef CONFIG_NVM_PBLK_DEBUG
atomic_read(&rb->inflight_flush_point),
#else
0,
#endif
pblk_rb_read_count(rb),
pblk_rb_space(rb),
pblk_rb_flush_point_count(rb),
queued_entries);
return offset;
}

View File

@ -1,474 +0,0 @@
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (C) 2016 CNEX Labs
* Initial release: Javier Gonzalez <javier@cnexlabs.com>
* Matias Bjorling <matias@cnexlabs.com>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License version
* 2 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* pblk-read.c - pblk's read path
*/
#include "pblk.h"
/*
* There is no guarantee that the value read from cache has not been updated and
* resides at another location in the cache. We guarantee though that if the
* value is read from the cache, it belongs to the mapped lba. In order to
* guarantee and order between writes and reads are ordered, a flush must be
* issued.
*/
static int pblk_read_from_cache(struct pblk *pblk, struct bio *bio,
sector_t lba, struct ppa_addr ppa)
{
#ifdef CONFIG_NVM_PBLK_DEBUG
/* Callers must ensure that the ppa points to a cache address */
BUG_ON(pblk_ppa_empty(ppa));
BUG_ON(!pblk_addr_in_cache(ppa));
#endif
return pblk_rb_copy_to_bio(&pblk->rwb, bio, lba, ppa);
}
static int pblk_read_ppalist_rq(struct pblk *pblk, struct nvm_rq *rqd,
struct bio *bio, sector_t blba,
bool *from_cache)
{
void *meta_list = rqd->meta_list;
int nr_secs, i;
retry:
nr_secs = pblk_lookup_l2p_seq(pblk, rqd->ppa_list, blba, rqd->nr_ppas,
from_cache);
if (!*from_cache)
goto end;
for (i = 0; i < nr_secs; i++) {
struct pblk_sec_meta *meta = pblk_get_meta(pblk, meta_list, i);
sector_t lba = blba + i;
if (pblk_ppa_empty(rqd->ppa_list[i])) {
__le64 addr_empty = cpu_to_le64(ADDR_EMPTY);
meta->lba = addr_empty;
} else if (pblk_addr_in_cache(rqd->ppa_list[i])) {
/*
* Try to read from write buffer. The address is later
* checked on the write buffer to prevent retrieving
* overwritten data.
*/
if (!pblk_read_from_cache(pblk, bio, lba,
rqd->ppa_list[i])) {
if (i == 0) {
/*
* We didn't call with bio_advance()
* yet, so we can just retry.
*/
goto retry;
} else {
/*
* We already call bio_advance()
* so we cannot retry and we need
* to quit that function in order
* to allow caller to handle the bio
* splitting in the current sector
* position.
*/
nr_secs = i;
goto end;
}
}
meta->lba = cpu_to_le64(lba);
#ifdef CONFIG_NVM_PBLK_DEBUG
atomic_long_inc(&pblk->cache_reads);
#endif
}
bio_advance(bio, PBLK_EXPOSED_PAGE_SIZE);
}
end:
if (pblk_io_aligned(pblk, nr_secs))
rqd->is_seq = 1;
#ifdef CONFIG_NVM_PBLK_DEBUG
atomic_long_add(nr_secs, &pblk->inflight_reads);
#endif
return nr_secs;
}
static void pblk_read_check_seq(struct pblk *pblk, struct nvm_rq *rqd,
sector_t blba)
{
void *meta_list = rqd->meta_list;
int nr_lbas = rqd->nr_ppas;
int i;
if (!pblk_is_oob_meta_supported(pblk))
return;
for (i = 0; i < nr_lbas; i++) {
struct pblk_sec_meta *meta = pblk_get_meta(pblk, meta_list, i);
u64 lba = le64_to_cpu(meta->lba);
if (lba == ADDR_EMPTY)
continue;
if (lba != blba + i) {
#ifdef CONFIG_NVM_PBLK_DEBUG
struct ppa_addr *ppa_list = nvm_rq_to_ppa_list(rqd);
print_ppa(pblk, &ppa_list[i], "seq", i);
#endif
pblk_err(pblk, "corrupted read LBA (%llu/%llu)\n",
lba, (u64)blba + i);
WARN_ON(1);
}
}
}
/*
* There can be holes in the lba list.
*/
static void pblk_read_check_rand(struct pblk *pblk, struct nvm_rq *rqd,
u64 *lba_list, int nr_lbas)
{
void *meta_lba_list = rqd->meta_list;
int i, j;
if (!pblk_is_oob_meta_supported(pblk))
return;
for (i = 0, j = 0; i < nr_lbas; i++) {
struct pblk_sec_meta *meta = pblk_get_meta(pblk,
meta_lba_list, j);
u64 lba = lba_list[i];
u64 meta_lba;
if (lba == ADDR_EMPTY)
continue;
meta_lba = le64_to_cpu(meta->lba);
if (lba != meta_lba) {
#ifdef CONFIG_NVM_PBLK_DEBUG
struct ppa_addr *ppa_list = nvm_rq_to_ppa_list(rqd);
print_ppa(pblk, &ppa_list[j], "rnd", j);
#endif
pblk_err(pblk, "corrupted read LBA (%llu/%llu)\n",
meta_lba, lba);
WARN_ON(1);
}
j++;
}
WARN_ONCE(j != rqd->nr_ppas, "pblk: corrupted random request\n");
}
static void pblk_end_user_read(struct bio *bio, int error)
{
if (error && error != NVM_RSP_WARN_HIGHECC)
bio_io_error(bio);
else
bio_endio(bio);
}
static void __pblk_end_io_read(struct pblk *pblk, struct nvm_rq *rqd,
bool put_line)
{
struct pblk_g_ctx *r_ctx = nvm_rq_to_pdu(rqd);
struct bio *int_bio = rqd->bio;
unsigned long start_time = r_ctx->start_time;
bio_end_io_acct(int_bio, start_time);
if (rqd->error)
pblk_log_read_err(pblk, rqd);
pblk_read_check_seq(pblk, rqd, r_ctx->lba);
bio_put(int_bio);
if (put_line)
pblk_rq_to_line_put(pblk, rqd);
#ifdef CONFIG_NVM_PBLK_DEBUG
atomic_long_add(rqd->nr_ppas, &pblk->sync_reads);
atomic_long_sub(rqd->nr_ppas, &pblk->inflight_reads);
#endif
pblk_free_rqd(pblk, rqd, PBLK_READ);
atomic_dec(&pblk->inflight_io);
}
static void pblk_end_io_read(struct nvm_rq *rqd)
{
struct pblk *pblk = rqd->private;
struct pblk_g_ctx *r_ctx = nvm_rq_to_pdu(rqd);
struct bio *bio = (struct bio *)r_ctx->private;
pblk_end_user_read(bio, rqd->error);
__pblk_end_io_read(pblk, rqd, true);
}
static void pblk_read_rq(struct pblk *pblk, struct nvm_rq *rqd, struct bio *bio,
sector_t lba, bool *from_cache)
{
struct pblk_sec_meta *meta = pblk_get_meta(pblk, rqd->meta_list, 0);
struct ppa_addr ppa;
pblk_lookup_l2p_seq(pblk, &ppa, lba, 1, from_cache);
#ifdef CONFIG_NVM_PBLK_DEBUG
atomic_long_inc(&pblk->inflight_reads);
#endif
retry:
if (pblk_ppa_empty(ppa)) {
__le64 addr_empty = cpu_to_le64(ADDR_EMPTY);
meta->lba = addr_empty;
return;
}
/* Try to read from write buffer. The address is later checked on the
* write buffer to prevent retrieving overwritten data.
*/
if (pblk_addr_in_cache(ppa)) {
if (!pblk_read_from_cache(pblk, bio, lba, ppa)) {
pblk_lookup_l2p_seq(pblk, &ppa, lba, 1, from_cache);
goto retry;
}
meta->lba = cpu_to_le64(lba);
#ifdef CONFIG_NVM_PBLK_DEBUG
atomic_long_inc(&pblk->cache_reads);
#endif
} else {
rqd->ppa_addr = ppa;
}
}
void pblk_submit_read(struct pblk *pblk, struct bio *bio)
{
sector_t blba = pblk_get_lba(bio);
unsigned int nr_secs = pblk_get_secs(bio);
bool from_cache;
struct pblk_g_ctx *r_ctx;
struct nvm_rq *rqd;
struct bio *int_bio, *split_bio;
unsigned long start_time;
start_time = bio_start_io_acct(bio);
rqd = pblk_alloc_rqd(pblk, PBLK_READ);
rqd->opcode = NVM_OP_PREAD;
rqd->nr_ppas = nr_secs;
rqd->private = pblk;
rqd->end_io = pblk_end_io_read;
r_ctx = nvm_rq_to_pdu(rqd);
r_ctx->start_time = start_time;
r_ctx->lba = blba;
if (pblk_alloc_rqd_meta(pblk, rqd)) {
bio_io_error(bio);
pblk_free_rqd(pblk, rqd, PBLK_READ);
return;
}
/* Clone read bio to deal internally with:
* -read errors when reading from drive
* -bio_advance() calls during cache reads
*/
int_bio = bio_clone_fast(bio, GFP_KERNEL, &pblk_bio_set);
if (nr_secs > 1)
nr_secs = pblk_read_ppalist_rq(pblk, rqd, int_bio, blba,
&from_cache);
else
pblk_read_rq(pblk, rqd, int_bio, blba, &from_cache);
split_retry:
r_ctx->private = bio; /* original bio */
rqd->bio = int_bio; /* internal bio */
if (from_cache && nr_secs == rqd->nr_ppas) {
/* All data was read from cache, we can complete the IO. */
pblk_end_user_read(bio, 0);
atomic_inc(&pblk->inflight_io);
__pblk_end_io_read(pblk, rqd, false);
} else if (nr_secs != rqd->nr_ppas) {
/* The read bio request could be partially filled by the write
* buffer, but there are some holes that need to be read from
* the drive. In order to handle this, we will use block layer
* mechanism to split this request in to smaller ones and make
* a chain of it.
*/
split_bio = bio_split(bio, nr_secs * NR_PHY_IN_LOG, GFP_KERNEL,
&pblk_bio_set);
bio_chain(split_bio, bio);
submit_bio_noacct(bio);
/* New bio contains first N sectors of the previous one, so
* we can continue to use existing rqd, but we need to shrink
* the number of PPAs in it. New bio is also guaranteed that
* it contains only either data from cache or from drive, newer
* mix of them.
*/
bio = split_bio;
rqd->nr_ppas = nr_secs;
if (rqd->nr_ppas == 1)
rqd->ppa_addr = rqd->ppa_list[0];
/* Recreate int_bio - existing might have some needed internal
* fields modified already.
*/
bio_put(int_bio);
int_bio = bio_clone_fast(bio, GFP_KERNEL, &pblk_bio_set);
goto split_retry;
} else if (pblk_submit_io(pblk, rqd, NULL)) {
/* Submitting IO to drive failed, let's report an error */
rqd->error = -ENODEV;
pblk_end_io_read(rqd);
}
}
static int read_ppalist_rq_gc(struct pblk *pblk, struct nvm_rq *rqd,
struct pblk_line *line, u64 *lba_list,
u64 *paddr_list_gc, unsigned int nr_secs)
{
struct ppa_addr ppa_list_l2p[NVM_MAX_VLBA];
struct ppa_addr ppa_gc;
int valid_secs = 0;
int i;
pblk_lookup_l2p_rand(pblk, ppa_list_l2p, lba_list, nr_secs);
for (i = 0; i < nr_secs; i++) {
if (lba_list[i] == ADDR_EMPTY)
continue;
ppa_gc = addr_to_gen_ppa(pblk, paddr_list_gc[i], line->id);
if (!pblk_ppa_comp(ppa_list_l2p[i], ppa_gc)) {
paddr_list_gc[i] = lba_list[i] = ADDR_EMPTY;
continue;
}
rqd->ppa_list[valid_secs++] = ppa_list_l2p[i];
}
#ifdef CONFIG_NVM_PBLK_DEBUG
atomic_long_add(valid_secs, &pblk->inflight_reads);
#endif
return valid_secs;
}
static int read_rq_gc(struct pblk *pblk, struct nvm_rq *rqd,
struct pblk_line *line, sector_t lba,
u64 paddr_gc)
{
struct ppa_addr ppa_l2p, ppa_gc;
int valid_secs = 0;
if (lba == ADDR_EMPTY)
goto out;
/* logic error: lba out-of-bounds */
if (lba >= pblk->capacity) {
WARN(1, "pblk: read lba out of bounds\n");
goto out;
}
spin_lock(&pblk->trans_lock);
ppa_l2p = pblk_trans_map_get(pblk, lba);
spin_unlock(&pblk->trans_lock);
ppa_gc = addr_to_gen_ppa(pblk, paddr_gc, line->id);
if (!pblk_ppa_comp(ppa_l2p, ppa_gc))
goto out;
rqd->ppa_addr = ppa_l2p;
valid_secs = 1;
#ifdef CONFIG_NVM_PBLK_DEBUG
atomic_long_inc(&pblk->inflight_reads);
#endif
out:
return valid_secs;
}
int pblk_submit_read_gc(struct pblk *pblk, struct pblk_gc_rq *gc_rq)
{
struct nvm_rq rqd;
int ret = NVM_IO_OK;
memset(&rqd, 0, sizeof(struct nvm_rq));
ret = pblk_alloc_rqd_meta(pblk, &rqd);
if (ret)
return ret;
if (gc_rq->nr_secs > 1) {
gc_rq->secs_to_gc = read_ppalist_rq_gc(pblk, &rqd, gc_rq->line,
gc_rq->lba_list,
gc_rq->paddr_list,
gc_rq->nr_secs);
if (gc_rq->secs_to_gc == 1)
rqd.ppa_addr = rqd.ppa_list[0];
} else {
gc_rq->secs_to_gc = read_rq_gc(pblk, &rqd, gc_rq->line,
gc_rq->lba_list[0],
gc_rq->paddr_list[0]);
}
if (!(gc_rq->secs_to_gc))
goto out;
rqd.opcode = NVM_OP_PREAD;
rqd.nr_ppas = gc_rq->secs_to_gc;
if (pblk_submit_io_sync(pblk, &rqd, gc_rq->data)) {
ret = -EIO;
goto err_free_dma;
}
pblk_read_check_rand(pblk, &rqd, gc_rq->lba_list, gc_rq->nr_secs);
atomic_dec(&pblk->inflight_io);
if (rqd.error) {
atomic_long_inc(&pblk->read_failed_gc);
#ifdef CONFIG_NVM_PBLK_DEBUG
pblk_print_failed_rqd(pblk, &rqd, rqd.error);
#endif
}
#ifdef CONFIG_NVM_PBLK_DEBUG
atomic_long_add(gc_rq->secs_to_gc, &pblk->sync_reads);
atomic_long_add(gc_rq->secs_to_gc, &pblk->recov_gc_reads);
atomic_long_sub(gc_rq->secs_to_gc, &pblk->inflight_reads);
#endif
out:
pblk_free_rqd_meta(pblk, &rqd);
return ret;
err_free_dma:
pblk_free_rqd_meta(pblk, &rqd);
return ret;
}

View File

@ -1,874 +0,0 @@
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (C) 2016 CNEX Labs
* Initial: Javier Gonzalez <javier@cnexlabs.com>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License version
* 2 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* pblk-recovery.c - pblk's recovery path
*
* The L2P recovery path is single threaded as the L2P table is updated in order
* following the line sequence ID.
*/
#include "pblk.h"
#include "pblk-trace.h"
int pblk_recov_check_emeta(struct pblk *pblk, struct line_emeta *emeta_buf)
{
u32 crc;
crc = pblk_calc_emeta_crc(pblk, emeta_buf);
if (le32_to_cpu(emeta_buf->crc) != crc)
return 1;
if (le32_to_cpu(emeta_buf->header.identifier) != PBLK_MAGIC)
return 1;
return 0;
}
static int pblk_recov_l2p_from_emeta(struct pblk *pblk, struct pblk_line *line)
{
struct nvm_tgt_dev *dev = pblk->dev;
struct nvm_geo *geo = &dev->geo;
struct pblk_line_meta *lm = &pblk->lm;
struct pblk_emeta *emeta = line->emeta;
struct line_emeta *emeta_buf = emeta->buf;
__le64 *lba_list;
u64 data_start, data_end;
u64 nr_valid_lbas, nr_lbas = 0;
u64 i;
lba_list = emeta_to_lbas(pblk, emeta_buf);
if (!lba_list)
return 1;
data_start = pblk_line_smeta_start(pblk, line) + lm->smeta_sec;
data_end = line->emeta_ssec;
nr_valid_lbas = le64_to_cpu(emeta_buf->nr_valid_lbas);
for (i = data_start; i < data_end; i++) {
struct ppa_addr ppa;
int pos;
ppa = addr_to_gen_ppa(pblk, i, line->id);
pos = pblk_ppa_to_pos(geo, ppa);
/* Do not update bad blocks */
if (test_bit(pos, line->blk_bitmap))
continue;
if (le64_to_cpu(lba_list[i]) == ADDR_EMPTY) {
spin_lock(&line->lock);
if (test_and_set_bit(i, line->invalid_bitmap))
WARN_ONCE(1, "pblk: rec. double invalidate:\n");
else
le32_add_cpu(line->vsc, -1);
spin_unlock(&line->lock);
continue;
}
pblk_update_map(pblk, le64_to_cpu(lba_list[i]), ppa);
nr_lbas++;
}
if (nr_valid_lbas != nr_lbas)
pblk_err(pblk, "line %d - inconsistent lba list(%llu/%llu)\n",
line->id, nr_valid_lbas, nr_lbas);
line->left_msecs = 0;
return 0;
}
static void pblk_update_line_wp(struct pblk *pblk, struct pblk_line *line,
u64 written_secs)
{
struct pblk_line_mgmt *l_mg = &pblk->l_mg;
int i;
for (i = 0; i < written_secs; i += pblk->min_write_pgs)
__pblk_alloc_page(pblk, line, pblk->min_write_pgs);
spin_lock(&l_mg->free_lock);
if (written_secs > line->left_msecs) {
/*
* We have all data sectors written
* and some emeta sectors written too.
*/
line->left_msecs = 0;
} else {
/* We have only some data sectors written. */
line->left_msecs -= written_secs;
}
spin_unlock(&l_mg->free_lock);
}
static u64 pblk_sec_in_open_line(struct pblk *pblk, struct pblk_line *line)
{
struct pblk_line_meta *lm = &pblk->lm;
int nr_bb = bitmap_weight(line->blk_bitmap, lm->blk_per_line);
u64 written_secs = 0;
int valid_chunks = 0;
int i;
for (i = 0; i < lm->blk_per_line; i++) {
struct nvm_chk_meta *chunk = &line->chks[i];
if (chunk->state & NVM_CHK_ST_OFFLINE)
continue;
written_secs += chunk->wp;
valid_chunks++;
}
if (lm->blk_per_line - nr_bb != valid_chunks)
pblk_err(pblk, "recovery line %d is bad\n", line->id);
pblk_update_line_wp(pblk, line, written_secs - lm->smeta_sec);
return written_secs;
}
struct pblk_recov_alloc {
struct ppa_addr *ppa_list;
void *meta_list;
struct nvm_rq *rqd;
void *data;
dma_addr_t dma_ppa_list;
dma_addr_t dma_meta_list;
};
static void pblk_recov_complete(struct kref *ref)
{
struct pblk_pad_rq *pad_rq = container_of(ref, struct pblk_pad_rq, ref);
complete(&pad_rq->wait);
}
static void pblk_end_io_recov(struct nvm_rq *rqd)
{
struct ppa_addr *ppa_list = nvm_rq_to_ppa_list(rqd);
struct pblk_pad_rq *pad_rq = rqd->private;
struct pblk *pblk = pad_rq->pblk;
pblk_up_chunk(pblk, ppa_list[0]);
pblk_free_rqd(pblk, rqd, PBLK_WRITE_INT);
atomic_dec(&pblk->inflight_io);
kref_put(&pad_rq->ref, pblk_recov_complete);
}
/* pad line using line bitmap. */
static int pblk_recov_pad_line(struct pblk *pblk, struct pblk_line *line,
int left_ppas)
{
struct nvm_tgt_dev *dev = pblk->dev;
struct nvm_geo *geo = &dev->geo;
void *meta_list;
struct pblk_pad_rq *pad_rq;
struct nvm_rq *rqd;
struct ppa_addr *ppa_list;
void *data;
__le64 *lba_list = emeta_to_lbas(pblk, line->emeta->buf);
u64 w_ptr = line->cur_sec;
int left_line_ppas, rq_ppas;
int i, j;
int ret = 0;
spin_lock(&line->lock);
left_line_ppas = line->left_msecs;
spin_unlock(&line->lock);
pad_rq = kmalloc(sizeof(struct pblk_pad_rq), GFP_KERNEL);
if (!pad_rq)
return -ENOMEM;
data = vzalloc(array_size(pblk->max_write_pgs, geo->csecs));
if (!data) {
ret = -ENOMEM;
goto free_rq;
}
pad_rq->pblk = pblk;
init_completion(&pad_rq->wait);
kref_init(&pad_rq->ref);
next_pad_rq:
rq_ppas = pblk_calc_secs(pblk, left_ppas, 0, false);
if (rq_ppas < pblk->min_write_pgs) {
pblk_err(pblk, "corrupted pad line %d\n", line->id);
goto fail_complete;
}
rqd = pblk_alloc_rqd(pblk, PBLK_WRITE_INT);
ret = pblk_alloc_rqd_meta(pblk, rqd);
if (ret) {
pblk_free_rqd(pblk, rqd, PBLK_WRITE_INT);
goto fail_complete;
}
rqd->bio = NULL;
rqd->opcode = NVM_OP_PWRITE;
rqd->is_seq = 1;
rqd->nr_ppas = rq_ppas;
rqd->end_io = pblk_end_io_recov;
rqd->private = pad_rq;
ppa_list = nvm_rq_to_ppa_list(rqd);
meta_list = rqd->meta_list;
for (i = 0; i < rqd->nr_ppas; ) {
struct ppa_addr ppa;
int pos;
w_ptr = pblk_alloc_page(pblk, line, pblk->min_write_pgs);
ppa = addr_to_gen_ppa(pblk, w_ptr, line->id);
pos = pblk_ppa_to_pos(geo, ppa);
while (test_bit(pos, line->blk_bitmap)) {
w_ptr += pblk->min_write_pgs;
ppa = addr_to_gen_ppa(pblk, w_ptr, line->id);
pos = pblk_ppa_to_pos(geo, ppa);
}
for (j = 0; j < pblk->min_write_pgs; j++, i++, w_ptr++) {
struct ppa_addr dev_ppa;
struct pblk_sec_meta *meta;
__le64 addr_empty = cpu_to_le64(ADDR_EMPTY);
dev_ppa = addr_to_gen_ppa(pblk, w_ptr, line->id);
pblk_map_invalidate(pblk, dev_ppa);
lba_list[w_ptr] = addr_empty;
meta = pblk_get_meta(pblk, meta_list, i);
meta->lba = addr_empty;
ppa_list[i] = dev_ppa;
}
}
kref_get(&pad_rq->ref);
pblk_down_chunk(pblk, ppa_list[0]);
ret = pblk_submit_io(pblk, rqd, data);
if (ret) {
pblk_err(pblk, "I/O submission failed: %d\n", ret);
pblk_up_chunk(pblk, ppa_list[0]);
kref_put(&pad_rq->ref, pblk_recov_complete);
pblk_free_rqd(pblk, rqd, PBLK_WRITE_INT);
goto fail_complete;
}
left_line_ppas -= rq_ppas;
left_ppas -= rq_ppas;
if (left_ppas && left_line_ppas)
goto next_pad_rq;
fail_complete:
kref_put(&pad_rq->ref, pblk_recov_complete);
wait_for_completion(&pad_rq->wait);
if (!pblk_line_is_full(line))
pblk_err(pblk, "corrupted padded line: %d\n", line->id);
vfree(data);
free_rq:
kfree(pad_rq);
return ret;
}
static int pblk_pad_distance(struct pblk *pblk, struct pblk_line *line)
{
struct nvm_tgt_dev *dev = pblk->dev;
struct nvm_geo *geo = &dev->geo;
int distance = geo->mw_cunits * geo->all_luns * geo->ws_opt;
return (distance > line->left_msecs) ? line->left_msecs : distance;
}
/* Return a chunk belonging to a line by stripe(write order) index */
static struct nvm_chk_meta *pblk_get_stripe_chunk(struct pblk *pblk,
struct pblk_line *line,
int index)
{
struct nvm_tgt_dev *dev = pblk->dev;
struct nvm_geo *geo = &dev->geo;
struct pblk_lun *rlun;
struct ppa_addr ppa;
int pos;
rlun = &pblk->luns[index];
ppa = rlun->bppa;
pos = pblk_ppa_to_pos(geo, ppa);
return &line->chks[pos];
}
static int pblk_line_wps_are_unbalanced(struct pblk *pblk,
struct pblk_line *line)
{
struct pblk_line_meta *lm = &pblk->lm;
int blk_in_line = lm->blk_per_line;
struct nvm_chk_meta *chunk;
u64 max_wp, min_wp;
int i;
i = find_first_zero_bit(line->blk_bitmap, blk_in_line);
/* If there is one or zero good chunks in the line,
* the write pointers can't be unbalanced.
*/
if (i >= (blk_in_line - 1))
return 0;
chunk = pblk_get_stripe_chunk(pblk, line, i);
max_wp = chunk->wp;
if (max_wp > pblk->max_write_pgs)
min_wp = max_wp - pblk->max_write_pgs;
else
min_wp = 0;
i = find_next_zero_bit(line->blk_bitmap, blk_in_line, i + 1);
while (i < blk_in_line) {
chunk = pblk_get_stripe_chunk(pblk, line, i);
if (chunk->wp > max_wp || chunk->wp < min_wp)
return 1;
i = find_next_zero_bit(line->blk_bitmap, blk_in_line, i + 1);
}
return 0;
}
static int pblk_recov_scan_oob(struct pblk *pblk, struct pblk_line *line,
struct pblk_recov_alloc p)
{
struct nvm_tgt_dev *dev = pblk->dev;
struct pblk_line_meta *lm = &pblk->lm;
struct nvm_geo *geo = &dev->geo;
struct ppa_addr *ppa_list;
void *meta_list;
struct nvm_rq *rqd;
void *data;
dma_addr_t dma_ppa_list, dma_meta_list;
__le64 *lba_list;
u64 paddr = pblk_line_smeta_start(pblk, line) + lm->smeta_sec;
bool padded = false;
int rq_ppas;
int i, j;
int ret;
u64 left_ppas = pblk_sec_in_open_line(pblk, line) - lm->smeta_sec;
if (pblk_line_wps_are_unbalanced(pblk, line))
pblk_warn(pblk, "recovering unbalanced line (%d)\n", line->id);
ppa_list = p.ppa_list;
meta_list = p.meta_list;
rqd = p.rqd;
data = p.data;
dma_ppa_list = p.dma_ppa_list;
dma_meta_list = p.dma_meta_list;
lba_list = emeta_to_lbas(pblk, line->emeta->buf);
next_rq:
memset(rqd, 0, pblk_g_rq_size);
rq_ppas = pblk_calc_secs(pblk, left_ppas, 0, false);
if (!rq_ppas)
rq_ppas = pblk->min_write_pgs;
retry_rq:
rqd->bio = NULL;
rqd->opcode = NVM_OP_PREAD;
rqd->meta_list = meta_list;
rqd->nr_ppas = rq_ppas;
rqd->ppa_list = ppa_list;
rqd->dma_ppa_list = dma_ppa_list;
rqd->dma_meta_list = dma_meta_list;
ppa_list = nvm_rq_to_ppa_list(rqd);
if (pblk_io_aligned(pblk, rq_ppas))
rqd->is_seq = 1;
for (i = 0; i < rqd->nr_ppas; ) {
struct ppa_addr ppa;
int pos;
ppa = addr_to_gen_ppa(pblk, paddr, line->id);
pos = pblk_ppa_to_pos(geo, ppa);
while (test_bit(pos, line->blk_bitmap)) {
paddr += pblk->min_write_pgs;
ppa = addr_to_gen_ppa(pblk, paddr, line->id);
pos = pblk_ppa_to_pos(geo, ppa);
}
for (j = 0; j < pblk->min_write_pgs; j++, i++)
ppa_list[i] =
addr_to_gen_ppa(pblk, paddr + j, line->id);
}
ret = pblk_submit_io_sync(pblk, rqd, data);
if (ret) {
pblk_err(pblk, "I/O submission failed: %d\n", ret);
return ret;
}
atomic_dec(&pblk->inflight_io);
/* If a read fails, do a best effort by padding the line and retrying */
if (rqd->error && rqd->error != NVM_RSP_WARN_HIGHECC) {
int pad_distance, ret;
if (padded) {
pblk_log_read_err(pblk, rqd);
return -EINTR;
}
pad_distance = pblk_pad_distance(pblk, line);
ret = pblk_recov_pad_line(pblk, line, pad_distance);
if (ret) {
return ret;
}
padded = true;
goto retry_rq;
}
pblk_get_packed_meta(pblk, rqd);
for (i = 0; i < rqd->nr_ppas; i++) {
struct pblk_sec_meta *meta = pblk_get_meta(pblk, meta_list, i);
u64 lba = le64_to_cpu(meta->lba);
lba_list[paddr++] = cpu_to_le64(lba);
if (lba == ADDR_EMPTY || lba >= pblk->capacity)
continue;
line->nr_valid_lbas++;
pblk_update_map(pblk, lba, ppa_list[i]);
}
left_ppas -= rq_ppas;
if (left_ppas > 0)
goto next_rq;
#ifdef CONFIG_NVM_PBLK_DEBUG
WARN_ON(padded && !pblk_line_is_full(line));
#endif
return 0;
}
/* Scan line for lbas on out of bound area */
static int pblk_recov_l2p_from_oob(struct pblk *pblk, struct pblk_line *line)
{
struct nvm_tgt_dev *dev = pblk->dev;
struct nvm_geo *geo = &dev->geo;
struct nvm_rq *rqd;
struct ppa_addr *ppa_list;
void *meta_list;
struct pblk_recov_alloc p;
void *data;
dma_addr_t dma_ppa_list, dma_meta_list;
int ret = 0;
meta_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL, &dma_meta_list);
if (!meta_list)
return -ENOMEM;
ppa_list = (void *)(meta_list) + pblk_dma_meta_size(pblk);
dma_ppa_list = dma_meta_list + pblk_dma_meta_size(pblk);
data = kcalloc(pblk->max_write_pgs, geo->csecs, GFP_KERNEL);
if (!data) {
ret = -ENOMEM;
goto free_meta_list;
}
rqd = mempool_alloc(&pblk->r_rq_pool, GFP_KERNEL);
memset(rqd, 0, pblk_g_rq_size);
p.ppa_list = ppa_list;
p.meta_list = meta_list;
p.rqd = rqd;
p.data = data;
p.dma_ppa_list = dma_ppa_list;
p.dma_meta_list = dma_meta_list;
ret = pblk_recov_scan_oob(pblk, line, p);
if (ret) {
pblk_err(pblk, "could not recover L2P form OOB\n");
goto out;
}
if (pblk_line_is_full(line))
pblk_line_recov_close(pblk, line);
out:
mempool_free(rqd, &pblk->r_rq_pool);
kfree(data);
free_meta_list:
nvm_dev_dma_free(dev->parent, meta_list, dma_meta_list);
return ret;
}
/* Insert lines ordered by sequence number (seq_num) on list */
static void pblk_recov_line_add_ordered(struct list_head *head,
struct pblk_line *line)
{
struct pblk_line *t = NULL;
list_for_each_entry(t, head, list)
if (t->seq_nr > line->seq_nr)
break;
__list_add(&line->list, t->list.prev, &t->list);
}
static u64 pblk_line_emeta_start(struct pblk *pblk, struct pblk_line *line)
{
struct nvm_tgt_dev *dev = pblk->dev;
struct nvm_geo *geo = &dev->geo;
struct pblk_line_meta *lm = &pblk->lm;
unsigned int emeta_secs;
u64 emeta_start;
struct ppa_addr ppa;
int pos;
emeta_secs = lm->emeta_sec[0];
emeta_start = lm->sec_per_line;
while (emeta_secs) {
emeta_start--;
ppa = addr_to_gen_ppa(pblk, emeta_start, line->id);
pos = pblk_ppa_to_pos(geo, ppa);
if (!test_bit(pos, line->blk_bitmap))
emeta_secs--;
}
return emeta_start;
}
static int pblk_recov_check_line_version(struct pblk *pblk,
struct line_emeta *emeta)
{
struct line_header *header = &emeta->header;
if (header->version_major != EMETA_VERSION_MAJOR) {
pblk_err(pblk, "line major version mismatch: %d, expected: %d\n",
header->version_major, EMETA_VERSION_MAJOR);
return 1;
}
#ifdef CONFIG_NVM_PBLK_DEBUG
if (header->version_minor > EMETA_VERSION_MINOR)
pblk_info(pblk, "newer line minor version found: %d\n",
header->version_minor);
#endif
return 0;
}
static void pblk_recov_wa_counters(struct pblk *pblk,
struct line_emeta *emeta)
{
struct pblk_line_meta *lm = &pblk->lm;
struct line_header *header = &emeta->header;
struct wa_counters *wa = emeta_to_wa(lm, emeta);
/* WA counters were introduced in emeta version 0.2 */
if (header->version_major > 0 || header->version_minor >= 2) {
u64 user = le64_to_cpu(wa->user);
u64 pad = le64_to_cpu(wa->pad);
u64 gc = le64_to_cpu(wa->gc);
atomic64_set(&pblk->user_wa, user);
atomic64_set(&pblk->pad_wa, pad);
atomic64_set(&pblk->gc_wa, gc);
pblk->user_rst_wa = user;
pblk->pad_rst_wa = pad;
pblk->gc_rst_wa = gc;
}
}
static int pblk_line_was_written(struct pblk_line *line,
struct pblk *pblk)
{
struct pblk_line_meta *lm = &pblk->lm;
struct nvm_tgt_dev *dev = pblk->dev;
struct nvm_geo *geo = &dev->geo;
struct nvm_chk_meta *chunk;
struct ppa_addr bppa;
int smeta_blk;
if (line->state == PBLK_LINESTATE_BAD)
return 0;
smeta_blk = find_first_zero_bit(line->blk_bitmap, lm->blk_per_line);
if (smeta_blk >= lm->blk_per_line)
return 0;
bppa = pblk->luns[smeta_blk].bppa;
chunk = &line->chks[pblk_ppa_to_pos(geo, bppa)];
if (chunk->state & NVM_CHK_ST_CLOSED ||
(chunk->state & NVM_CHK_ST_OPEN
&& chunk->wp >= lm->smeta_sec))
return 1;
return 0;
}
static bool pblk_line_is_open(struct pblk *pblk, struct pblk_line *line)
{
struct pblk_line_meta *lm = &pblk->lm;
int i;
for (i = 0; i < lm->blk_per_line; i++)
if (line->chks[i].state & NVM_CHK_ST_OPEN)
return true;
return false;
}
struct pblk_line *pblk_recov_l2p(struct pblk *pblk)
{
struct pblk_line_meta *lm = &pblk->lm;
struct pblk_line_mgmt *l_mg = &pblk->l_mg;
struct pblk_line *line, *tline, *data_line = NULL;
struct pblk_smeta *smeta;
struct pblk_emeta *emeta;
struct line_smeta *smeta_buf;
int found_lines = 0, recovered_lines = 0, open_lines = 0;
int is_next = 0;
int meta_line;
int i, valid_uuid = 0;
LIST_HEAD(recov_list);
/* TODO: Implement FTL snapshot */
/* Scan recovery - takes place when FTL snapshot fails */
spin_lock(&l_mg->free_lock);
meta_line = find_first_zero_bit(&l_mg->meta_bitmap, PBLK_DATA_LINES);
set_bit(meta_line, &l_mg->meta_bitmap);
smeta = l_mg->sline_meta[meta_line];
emeta = l_mg->eline_meta[meta_line];
smeta_buf = (struct line_smeta *)smeta;
spin_unlock(&l_mg->free_lock);
/* Order data lines using their sequence number */
for (i = 0; i < l_mg->nr_lines; i++) {
u32 crc;
line = &pblk->lines[i];
memset(smeta, 0, lm->smeta_len);
line->smeta = smeta;
line->lun_bitmap = ((void *)(smeta_buf)) +
sizeof(struct line_smeta);
if (!pblk_line_was_written(line, pblk))
continue;
/* Lines that cannot be read are assumed as not written here */
if (pblk_line_smeta_read(pblk, line))
continue;
crc = pblk_calc_smeta_crc(pblk, smeta_buf);
if (le32_to_cpu(smeta_buf->crc) != crc)
continue;
if (le32_to_cpu(smeta_buf->header.identifier) != PBLK_MAGIC)
continue;
if (smeta_buf->header.version_major != SMETA_VERSION_MAJOR) {
pblk_err(pblk, "found incompatible line version %u\n",
smeta_buf->header.version_major);
return ERR_PTR(-EINVAL);
}
/* The first valid instance uuid is used for initialization */
if (!valid_uuid) {
import_guid(&pblk->instance_uuid, smeta_buf->header.uuid);
valid_uuid = 1;
}
if (!guid_equal(&pblk->instance_uuid,
(guid_t *)&smeta_buf->header.uuid)) {
pblk_debug(pblk, "ignore line %u due to uuid mismatch\n",
i);
continue;
}
/* Update line metadata */
spin_lock(&line->lock);
line->id = le32_to_cpu(smeta_buf->header.id);
line->type = le16_to_cpu(smeta_buf->header.type);
line->seq_nr = le64_to_cpu(smeta_buf->seq_nr);
spin_unlock(&line->lock);
/* Update general metadata */
spin_lock(&l_mg->free_lock);
if (line->seq_nr >= l_mg->d_seq_nr)
l_mg->d_seq_nr = line->seq_nr + 1;
l_mg->nr_free_lines--;
spin_unlock(&l_mg->free_lock);
if (pblk_line_recov_alloc(pblk, line))
goto out;
pblk_recov_line_add_ordered(&recov_list, line);
found_lines++;
pblk_debug(pblk, "recovering data line %d, seq:%llu\n",
line->id, smeta_buf->seq_nr);
}
if (!found_lines) {
guid_gen(&pblk->instance_uuid);
spin_lock(&l_mg->free_lock);
WARN_ON_ONCE(!test_and_clear_bit(meta_line,
&l_mg->meta_bitmap));
spin_unlock(&l_mg->free_lock);
goto out;
}
/* Verify closed blocks and recover this portion of L2P table*/
list_for_each_entry_safe(line, tline, &recov_list, list) {
recovered_lines++;
line->emeta_ssec = pblk_line_emeta_start(pblk, line);
line->emeta = emeta;
memset(line->emeta->buf, 0, lm->emeta_len[0]);
if (pblk_line_is_open(pblk, line)) {
pblk_recov_l2p_from_oob(pblk, line);
goto next;
}
if (pblk_line_emeta_read(pblk, line, line->emeta->buf)) {
pblk_recov_l2p_from_oob(pblk, line);
goto next;
}
if (pblk_recov_check_emeta(pblk, line->emeta->buf)) {
pblk_recov_l2p_from_oob(pblk, line);
goto next;
}
if (pblk_recov_check_line_version(pblk, line->emeta->buf))
return ERR_PTR(-EINVAL);
pblk_recov_wa_counters(pblk, line->emeta->buf);
if (pblk_recov_l2p_from_emeta(pblk, line))
pblk_recov_l2p_from_oob(pblk, line);
next:
if (pblk_line_is_full(line)) {
struct list_head *move_list;
spin_lock(&line->lock);
line->state = PBLK_LINESTATE_CLOSED;
trace_pblk_line_state(pblk_disk_name(pblk), line->id,
line->state);
move_list = pblk_line_gc_list(pblk, line);
spin_unlock(&line->lock);
spin_lock(&l_mg->gc_lock);
list_move_tail(&line->list, move_list);
spin_unlock(&l_mg->gc_lock);
mempool_free(line->map_bitmap, l_mg->bitmap_pool);
line->map_bitmap = NULL;
line->smeta = NULL;
line->emeta = NULL;
} else {
spin_lock(&line->lock);
line->state = PBLK_LINESTATE_OPEN;
spin_unlock(&line->lock);
line->emeta->mem = 0;
atomic_set(&line->emeta->sync, 0);
trace_pblk_line_state(pblk_disk_name(pblk), line->id,
line->state);
data_line = line;
line->meta_line = meta_line;
open_lines++;
}
}
if (!open_lines) {
spin_lock(&l_mg->free_lock);
WARN_ON_ONCE(!test_and_clear_bit(meta_line,
&l_mg->meta_bitmap));
spin_unlock(&l_mg->free_lock);
} else {
spin_lock(&l_mg->free_lock);
l_mg->data_line = data_line;
/* Allocate next line for preparation */
l_mg->data_next = pblk_line_get(pblk);
if (l_mg->data_next) {
l_mg->data_next->seq_nr = l_mg->d_seq_nr++;
l_mg->data_next->type = PBLK_LINETYPE_DATA;
is_next = 1;
}
spin_unlock(&l_mg->free_lock);
}
if (is_next)
pblk_line_erase(pblk, l_mg->data_next);
out:
if (found_lines != recovered_lines)
pblk_err(pblk, "failed to recover all found lines %d/%d\n",
found_lines, recovered_lines);
return data_line;
}
/*
* Pad current line
*/
int pblk_recov_pad(struct pblk *pblk)
{
struct pblk_line *line;
struct pblk_line_mgmt *l_mg = &pblk->l_mg;
int left_msecs;
int ret = 0;
spin_lock(&l_mg->free_lock);
line = l_mg->data_line;
left_msecs = line->left_msecs;
spin_unlock(&l_mg->free_lock);
ret = pblk_recov_pad_line(pblk, line, left_msecs);
if (ret) {
pblk_err(pblk, "tear down padding failed (%d)\n", ret);
return ret;
}
pblk_line_close_meta(pblk, line);
return ret;
}

View File

@ -1,254 +0,0 @@
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (C) 2016 CNEX Labs
* Initial release: Javier Gonzalez <javier@cnexlabs.com>
* Matias Bjorling <matias@cnexlabs.com>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License version
* 2 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* pblk-rl.c - pblk's rate limiter for user I/O
*
*/
#include "pblk.h"
static void pblk_rl_kick_u_timer(struct pblk_rl *rl)
{
mod_timer(&rl->u_timer, jiffies + msecs_to_jiffies(5000));
}
int pblk_rl_is_limit(struct pblk_rl *rl)
{
int rb_space;
rb_space = atomic_read(&rl->rb_space);
return (rb_space == 0);
}
int pblk_rl_user_may_insert(struct pblk_rl *rl, int nr_entries)
{
int rb_user_cnt = atomic_read(&rl->rb_user_cnt);
int rb_space = atomic_read(&rl->rb_space);
if (unlikely(rb_space >= 0) && (rb_space - nr_entries < 0))
return NVM_IO_ERR;
if (rb_user_cnt >= rl->rb_user_max)
return NVM_IO_REQUEUE;
return NVM_IO_OK;
}
void pblk_rl_inserted(struct pblk_rl *rl, int nr_entries)
{
int rb_space = atomic_read(&rl->rb_space);
if (unlikely(rb_space >= 0))
atomic_sub(nr_entries, &rl->rb_space);
}
int pblk_rl_gc_may_insert(struct pblk_rl *rl, int nr_entries)
{
int rb_gc_cnt = atomic_read(&rl->rb_gc_cnt);
int rb_user_active;
/* If there is no user I/O let GC take over space on the write buffer */
rb_user_active = READ_ONCE(rl->rb_user_active);
return (!(rb_gc_cnt >= rl->rb_gc_max && rb_user_active));
}
void pblk_rl_user_in(struct pblk_rl *rl, int nr_entries)
{
atomic_add(nr_entries, &rl->rb_user_cnt);
/* Release user I/O state. Protect from GC */
smp_store_release(&rl->rb_user_active, 1);
pblk_rl_kick_u_timer(rl);
}
void pblk_rl_werr_line_in(struct pblk_rl *rl)
{
atomic_inc(&rl->werr_lines);
}
void pblk_rl_werr_line_out(struct pblk_rl *rl)
{
atomic_dec(&rl->werr_lines);
}
void pblk_rl_gc_in(struct pblk_rl *rl, int nr_entries)
{
atomic_add(nr_entries, &rl->rb_gc_cnt);
}
void pblk_rl_out(struct pblk_rl *rl, int nr_user, int nr_gc)
{
atomic_sub(nr_user, &rl->rb_user_cnt);
atomic_sub(nr_gc, &rl->rb_gc_cnt);
}
unsigned long pblk_rl_nr_free_blks(struct pblk_rl *rl)
{
return atomic_read(&rl->free_blocks);
}
unsigned long pblk_rl_nr_user_free_blks(struct pblk_rl *rl)
{
return atomic_read(&rl->free_user_blocks);
}
static void __pblk_rl_update_rates(struct pblk_rl *rl,
unsigned long free_blocks)
{
struct pblk *pblk = container_of(rl, struct pblk, rl);
int max = rl->rb_budget;
int werr_gc_needed = atomic_read(&rl->werr_lines);
if (free_blocks >= rl->high) {
if (werr_gc_needed) {
/* Allocate a small budget for recovering
* lines with write errors
*/
rl->rb_gc_max = 1 << rl->rb_windows_pw;
rl->rb_user_max = max - rl->rb_gc_max;
rl->rb_state = PBLK_RL_WERR;
} else {
rl->rb_user_max = max;
rl->rb_gc_max = 0;
rl->rb_state = PBLK_RL_OFF;
}
} else if (free_blocks < rl->high) {
int shift = rl->high_pw - rl->rb_windows_pw;
int user_windows = free_blocks >> shift;
int user_max = user_windows << ilog2(NVM_MAX_VLBA);
rl->rb_user_max = user_max;
rl->rb_gc_max = max - user_max;
if (free_blocks <= rl->rsv_blocks) {
rl->rb_user_max = 0;
rl->rb_gc_max = max;
}
/* In the worst case, we will need to GC lines in the low list
* (high valid sector count). If there are lines to GC on high
* or mid lists, these will be prioritized
*/
rl->rb_state = PBLK_RL_LOW;
}
if (rl->rb_state != PBLK_RL_OFF)
pblk_gc_should_start(pblk);
else
pblk_gc_should_stop(pblk);
}
void pblk_rl_update_rates(struct pblk_rl *rl)
{
__pblk_rl_update_rates(rl, pblk_rl_nr_user_free_blks(rl));
}
void pblk_rl_free_lines_inc(struct pblk_rl *rl, struct pblk_line *line)
{
int blk_in_line = atomic_read(&line->blk_in_line);
int free_blocks;
atomic_add(blk_in_line, &rl->free_blocks);
free_blocks = atomic_add_return(blk_in_line, &rl->free_user_blocks);
__pblk_rl_update_rates(rl, free_blocks);
}
void pblk_rl_free_lines_dec(struct pblk_rl *rl, struct pblk_line *line,
bool used)
{
int blk_in_line = atomic_read(&line->blk_in_line);
int free_blocks;
atomic_sub(blk_in_line, &rl->free_blocks);
if (used)
free_blocks = atomic_sub_return(blk_in_line,
&rl->free_user_blocks);
else
free_blocks = atomic_read(&rl->free_user_blocks);
__pblk_rl_update_rates(rl, free_blocks);
}
int pblk_rl_high_thrs(struct pblk_rl *rl)
{
return rl->high;
}
int pblk_rl_max_io(struct pblk_rl *rl)
{
return rl->rb_max_io;
}
static void pblk_rl_u_timer(struct timer_list *t)
{
struct pblk_rl *rl = from_timer(rl, t, u_timer);
/* Release user I/O state. Protect from GC */
smp_store_release(&rl->rb_user_active, 0);
}
void pblk_rl_free(struct pblk_rl *rl)
{
del_timer(&rl->u_timer);
}
void pblk_rl_init(struct pblk_rl *rl, int budget, int threshold)
{
struct pblk *pblk = container_of(rl, struct pblk, rl);
struct nvm_tgt_dev *dev = pblk->dev;
struct nvm_geo *geo = &dev->geo;
struct pblk_line_mgmt *l_mg = &pblk->l_mg;
struct pblk_line_meta *lm = &pblk->lm;
int sec_meta, blk_meta;
unsigned int rb_windows;
/* Consider sectors used for metadata */
sec_meta = (lm->smeta_sec + lm->emeta_sec[0]) * l_mg->nr_free_lines;
blk_meta = DIV_ROUND_UP(sec_meta, geo->clba);
rl->high = pblk->op_blks - blk_meta - lm->blk_per_line;
rl->high_pw = get_count_order(rl->high);
rl->rsv_blocks = pblk_get_min_chks(pblk);
/* This will always be a power-of-2 */
rb_windows = budget / NVM_MAX_VLBA;
rl->rb_windows_pw = get_count_order(rb_windows);
/* To start with, all buffer is available to user I/O writers */
rl->rb_budget = budget;
rl->rb_user_max = budget;
rl->rb_gc_max = 0;
rl->rb_state = PBLK_RL_HIGH;
/* Maximize I/O size and ansure that back threshold is respected */
if (threshold)
rl->rb_max_io = budget - pblk->min_write_pgs_data - threshold;
else
rl->rb_max_io = budget - pblk->min_write_pgs_data - 1;
atomic_set(&rl->rb_user_cnt, 0);
atomic_set(&rl->rb_gc_cnt, 0);
atomic_set(&rl->rb_space, -1);
atomic_set(&rl->werr_lines, 0);
timer_setup(&rl->u_timer, pblk_rl_u_timer, 0);
rl->rb_user_active = 0;
rl->rb_gc_active = 0;
}

View File

@ -1,728 +0,0 @@
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (C) 2016 CNEX Labs
* Initial release: Javier Gonzalez <javier@cnexlabs.com>
* Matias Bjorling <matias@cnexlabs.com>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License version
* 2 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* Implementation of a physical block-device target for Open-channel SSDs.
*
* pblk-sysfs.c - pblk's sysfs
*
*/
#include "pblk.h"
static ssize_t pblk_sysfs_luns_show(struct pblk *pblk, char *page)
{
struct nvm_tgt_dev *dev = pblk->dev;
struct nvm_geo *geo = &dev->geo;
struct pblk_lun *rlun;
ssize_t sz = 0;
int i;
for (i = 0; i < geo->all_luns; i++) {
int active = 1;
rlun = &pblk->luns[i];
if (!down_trylock(&rlun->wr_sem)) {
active = 0;
up(&rlun->wr_sem);
}
sz += scnprintf(page + sz, PAGE_SIZE - sz,
"pblk: pos:%d, ch:%d, lun:%d - %d\n",
i,
rlun->bppa.a.ch,
rlun->bppa.a.lun,
active);
}
return sz;
}
static ssize_t pblk_sysfs_rate_limiter(struct pblk *pblk, char *page)
{
int free_blocks, free_user_blocks, total_blocks;
int rb_user_max, rb_user_cnt;
int rb_gc_max, rb_gc_cnt, rb_budget, rb_state;
free_blocks = pblk_rl_nr_free_blks(&pblk->rl);
free_user_blocks = pblk_rl_nr_user_free_blks(&pblk->rl);
rb_user_max = pblk->rl.rb_user_max;
rb_user_cnt = atomic_read(&pblk->rl.rb_user_cnt);
rb_gc_max = pblk->rl.rb_gc_max;
rb_gc_cnt = atomic_read(&pblk->rl.rb_gc_cnt);
rb_budget = pblk->rl.rb_budget;
rb_state = pblk->rl.rb_state;
total_blocks = pblk->rl.total_blocks;
return snprintf(page, PAGE_SIZE,
"u:%u/%u,gc:%u/%u(%u)(stop:<%u,full:>%u,free:%d/%d/%d)-%d\n",
rb_user_cnt,
rb_user_max,
rb_gc_cnt,
rb_gc_max,
rb_state,
rb_budget,
pblk->rl.high,
free_blocks,
free_user_blocks,
total_blocks,
READ_ONCE(pblk->rl.rb_user_active));
}
static ssize_t pblk_sysfs_gc_state_show(struct pblk *pblk, char *page)
{
int gc_enabled, gc_active;
pblk_gc_sysfs_state_show(pblk, &gc_enabled, &gc_active);
return snprintf(page, PAGE_SIZE, "gc_enabled=%d, gc_active=%d\n",
gc_enabled, gc_active);
}
static ssize_t pblk_sysfs_stats(struct pblk *pblk, char *page)
{
ssize_t sz;
sz = snprintf(page, PAGE_SIZE,
"read_failed=%lu, read_high_ecc=%lu, read_empty=%lu, read_failed_gc=%lu, write_failed=%lu, erase_failed=%lu\n",
atomic_long_read(&pblk->read_failed),
atomic_long_read(&pblk->read_high_ecc),
atomic_long_read(&pblk->read_empty),
atomic_long_read(&pblk->read_failed_gc),
atomic_long_read(&pblk->write_failed),
atomic_long_read(&pblk->erase_failed));
return sz;
}
static ssize_t pblk_sysfs_write_buffer(struct pblk *pblk, char *page)
{
return pblk_rb_sysfs(&pblk->rwb, page);
}
static ssize_t pblk_sysfs_ppaf(struct pblk *pblk, char *page)
{
struct nvm_tgt_dev *dev = pblk->dev;
struct nvm_geo *geo = &dev->geo;
ssize_t sz = 0;
if (geo->version == NVM_OCSSD_SPEC_12) {
struct nvm_addrf_12 *ppaf = (struct nvm_addrf_12 *)&pblk->addrf;
struct nvm_addrf_12 *gppaf = (struct nvm_addrf_12 *)&geo->addrf;
sz = scnprintf(page, PAGE_SIZE,
"g:(b:%d)blk:%d/%d,pg:%d/%d,lun:%d/%d,ch:%d/%d,pl:%d/%d,sec:%d/%d\n",
pblk->addrf_len,
ppaf->blk_offset, ppaf->blk_len,
ppaf->pg_offset, ppaf->pg_len,
ppaf->lun_offset, ppaf->lun_len,
ppaf->ch_offset, ppaf->ch_len,
ppaf->pln_offset, ppaf->pln_len,
ppaf->sec_offset, ppaf->sec_len);
sz += scnprintf(page + sz, PAGE_SIZE - sz,
"d:blk:%d/%d,pg:%d/%d,lun:%d/%d,ch:%d/%d,pl:%d/%d,sec:%d/%d\n",
gppaf->blk_offset, gppaf->blk_len,
gppaf->pg_offset, gppaf->pg_len,
gppaf->lun_offset, gppaf->lun_len,
gppaf->ch_offset, gppaf->ch_len,
gppaf->pln_offset, gppaf->pln_len,
gppaf->sec_offset, gppaf->sec_len);
} else {
struct nvm_addrf *ppaf = &pblk->addrf;
struct nvm_addrf *gppaf = &geo->addrf;
sz = scnprintf(page, PAGE_SIZE,
"pblk:(s:%d)ch:%d/%d,lun:%d/%d,chk:%d/%d/sec:%d/%d\n",
pblk->addrf_len,
ppaf->ch_offset, ppaf->ch_len,
ppaf->lun_offset, ppaf->lun_len,
ppaf->chk_offset, ppaf->chk_len,
ppaf->sec_offset, ppaf->sec_len);
sz += scnprintf(page + sz, PAGE_SIZE - sz,
"device:ch:%d/%d,lun:%d/%d,chk:%d/%d,sec:%d/%d\n",
gppaf->ch_offset, gppaf->ch_len,
gppaf->lun_offset, gppaf->lun_len,
gppaf->chk_offset, gppaf->chk_len,
gppaf->sec_offset, gppaf->sec_len);
}
return sz;
}
static ssize_t pblk_sysfs_lines(struct pblk *pblk, char *page)
{
struct nvm_tgt_dev *dev = pblk->dev;
struct nvm_geo *geo = &dev->geo;
struct pblk_line_meta *lm = &pblk->lm;
struct pblk_line_mgmt *l_mg = &pblk->l_mg;
struct pblk_line *line;
ssize_t sz = 0;
int nr_free_lines;
int cur_data, cur_log;
int free_line_cnt = 0, closed_line_cnt = 0, emeta_line_cnt = 0;
int d_line_cnt = 0, l_line_cnt = 0;
int gc_full = 0, gc_high = 0, gc_mid = 0, gc_low = 0, gc_empty = 0;
int gc_werr = 0;
int bad = 0, cor = 0;
int msecs = 0, cur_sec = 0, vsc = 0, sec_in_line = 0;
int map_weight = 0, meta_weight = 0;
spin_lock(&l_mg->free_lock);
cur_data = (l_mg->data_line) ? l_mg->data_line->id : -1;
cur_log = (l_mg->log_line) ? l_mg->log_line->id : -1;
nr_free_lines = l_mg->nr_free_lines;
list_for_each_entry(line, &l_mg->free_list, list)
free_line_cnt++;
spin_unlock(&l_mg->free_lock);
spin_lock(&l_mg->close_lock);
list_for_each_entry(line, &l_mg->emeta_list, list)
emeta_line_cnt++;
spin_unlock(&l_mg->close_lock);
spin_lock(&l_mg->gc_lock);
list_for_each_entry(line, &l_mg->gc_full_list, list) {
if (line->type == PBLK_LINETYPE_DATA)
d_line_cnt++;
else if (line->type == PBLK_LINETYPE_LOG)
l_line_cnt++;
closed_line_cnt++;
gc_full++;
}
list_for_each_entry(line, &l_mg->gc_high_list, list) {
if (line->type == PBLK_LINETYPE_DATA)
d_line_cnt++;
else if (line->type == PBLK_LINETYPE_LOG)
l_line_cnt++;
closed_line_cnt++;
gc_high++;
}
list_for_each_entry(line, &l_mg->gc_mid_list, list) {
if (line->type == PBLK_LINETYPE_DATA)
d_line_cnt++;
else if (line->type == PBLK_LINETYPE_LOG)
l_line_cnt++;
closed_line_cnt++;
gc_mid++;
}
list_for_each_entry(line, &l_mg->gc_low_list, list) {
if (line->type == PBLK_LINETYPE_DATA)
d_line_cnt++;
else if (line->type == PBLK_LINETYPE_LOG)
l_line_cnt++;
closed_line_cnt++;
gc_low++;
}
list_for_each_entry(line, &l_mg->gc_empty_list, list) {
if (line->type == PBLK_LINETYPE_DATA)
d_line_cnt++;
else if (line->type == PBLK_LINETYPE_LOG)
l_line_cnt++;
closed_line_cnt++;
gc_empty++;
}
list_for_each_entry(line, &l_mg->gc_werr_list, list) {
if (line->type == PBLK_LINETYPE_DATA)
d_line_cnt++;
else if (line->type == PBLK_LINETYPE_LOG)
l_line_cnt++;
closed_line_cnt++;
gc_werr++;
}
list_for_each_entry(line, &l_mg->bad_list, list)
bad++;
list_for_each_entry(line, &l_mg->corrupt_list, list)
cor++;
spin_unlock(&l_mg->gc_lock);
spin_lock(&l_mg->free_lock);
if (l_mg->data_line) {
cur_sec = l_mg->data_line->cur_sec;
msecs = l_mg->data_line->left_msecs;
vsc = le32_to_cpu(*l_mg->data_line->vsc);
sec_in_line = l_mg->data_line->sec_in_line;
meta_weight = bitmap_weight(&l_mg->meta_bitmap,
PBLK_DATA_LINES);
spin_lock(&l_mg->data_line->lock);
if (l_mg->data_line->map_bitmap)
map_weight = bitmap_weight(l_mg->data_line->map_bitmap,
lm->sec_per_line);
else
map_weight = 0;
spin_unlock(&l_mg->data_line->lock);
}
spin_unlock(&l_mg->free_lock);
if (nr_free_lines != free_line_cnt)
pblk_err(pblk, "corrupted free line list:%d/%d\n",
nr_free_lines, free_line_cnt);
sz = scnprintf(page, PAGE_SIZE - sz,
"line: nluns:%d, nblks:%d, nsecs:%d\n",
geo->all_luns, lm->blk_per_line, lm->sec_per_line);
sz += scnprintf(page + sz, PAGE_SIZE - sz,
"lines:d:%d,l:%d-f:%d,m:%d/%d,c:%d,b:%d,co:%d(d:%d,l:%d)t:%d\n",
cur_data, cur_log,
nr_free_lines,
emeta_line_cnt, meta_weight,
closed_line_cnt,
bad, cor,
d_line_cnt, l_line_cnt,
l_mg->nr_lines);
sz += scnprintf(page + sz, PAGE_SIZE - sz,
"GC: full:%d, high:%d, mid:%d, low:%d, empty:%d, werr: %d, queue:%d\n",
gc_full, gc_high, gc_mid, gc_low, gc_empty, gc_werr,
atomic_read(&pblk->gc.read_inflight_gc));
sz += scnprintf(page + sz, PAGE_SIZE - sz,
"data (%d) cur:%d, left:%d, vsc:%d, s:%d, map:%d/%d (%d)\n",
cur_data, cur_sec, msecs, vsc, sec_in_line,
map_weight, lm->sec_per_line,
atomic_read(&pblk->inflight_io));
return sz;
}
static ssize_t pblk_sysfs_lines_info(struct pblk *pblk, char *page)
{
struct nvm_tgt_dev *dev = pblk->dev;
struct nvm_geo *geo = &dev->geo;
struct pblk_line_meta *lm = &pblk->lm;
ssize_t sz = 0;
sz = scnprintf(page, PAGE_SIZE - sz,
"smeta - len:%d, secs:%d\n",
lm->smeta_len, lm->smeta_sec);
sz += scnprintf(page + sz, PAGE_SIZE - sz,
"emeta - len:%d, sec:%d, bb_start:%d\n",
lm->emeta_len[0], lm->emeta_sec[0],
lm->emeta_bb);
sz += scnprintf(page + sz, PAGE_SIZE - sz,
"bitmap lengths: sec:%d, blk:%d, lun:%d\n",
lm->sec_bitmap_len,
lm->blk_bitmap_len,
lm->lun_bitmap_len);
sz += scnprintf(page + sz, PAGE_SIZE - sz,
"blk_line:%d, sec_line:%d, sec_blk:%d\n",
lm->blk_per_line,
lm->sec_per_line,
geo->clba);
return sz;
}
static ssize_t pblk_sysfs_get_sec_per_write(struct pblk *pblk, char *page)
{
return snprintf(page, PAGE_SIZE, "%d\n", pblk->sec_per_write);
}
static ssize_t pblk_get_write_amp(u64 user, u64 gc, u64 pad,
char *page)
{
int sz;
sz = scnprintf(page, PAGE_SIZE,
"user:%lld gc:%lld pad:%lld WA:",
user, gc, pad);
if (!user) {
sz += scnprintf(page + sz, PAGE_SIZE - sz, "NaN\n");
} else {
u64 wa_int;
u32 wa_frac;
wa_int = (user + gc + pad) * 100000;
wa_int = div64_u64(wa_int, user);
wa_int = div_u64_rem(wa_int, 100000, &wa_frac);
sz += scnprintf(page + sz, PAGE_SIZE - sz, "%llu.%05u\n",
wa_int, wa_frac);
}
return sz;
}
static ssize_t pblk_sysfs_get_write_amp_mileage(struct pblk *pblk, char *page)
{
return pblk_get_write_amp(atomic64_read(&pblk->user_wa),
atomic64_read(&pblk->gc_wa), atomic64_read(&pblk->pad_wa),
page);
}
static ssize_t pblk_sysfs_get_write_amp_trip(struct pblk *pblk, char *page)
{
return pblk_get_write_amp(
atomic64_read(&pblk->user_wa) - pblk->user_rst_wa,
atomic64_read(&pblk->gc_wa) - pblk->gc_rst_wa,
atomic64_read(&pblk->pad_wa) - pblk->pad_rst_wa, page);
}
static long long bucket_percentage(unsigned long long bucket,
unsigned long long total)
{
int p = bucket * 100;
p = div_u64(p, total);
return p;
}
static ssize_t pblk_sysfs_get_padding_dist(struct pblk *pblk, char *page)
{
int sz = 0;
unsigned long long total;
unsigned long long total_buckets = 0;
int buckets = pblk->min_write_pgs - 1;
int i;
total = atomic64_read(&pblk->nr_flush) - pblk->nr_flush_rst;
if (!total) {
for (i = 0; i < (buckets + 1); i++)
sz += scnprintf(page + sz, PAGE_SIZE - sz,
"%d:0 ", i);
sz += scnprintf(page + sz, PAGE_SIZE - sz, "\n");
return sz;
}
for (i = 0; i < buckets; i++)
total_buckets += atomic64_read(&pblk->pad_dist[i]);
sz += scnprintf(page + sz, PAGE_SIZE - sz, "0:%lld%% ",
bucket_percentage(total - total_buckets, total));
for (i = 0; i < buckets; i++) {
unsigned long long p;
p = bucket_percentage(atomic64_read(&pblk->pad_dist[i]),
total);
sz += scnprintf(page + sz, PAGE_SIZE - sz, "%d:%lld%% ",
i + 1, p);
}
sz += scnprintf(page + sz, PAGE_SIZE - sz, "\n");
return sz;
}
#ifdef CONFIG_NVM_PBLK_DEBUG
static ssize_t pblk_sysfs_stats_debug(struct pblk *pblk, char *page)
{
return snprintf(page, PAGE_SIZE,
"%lu\t%lu\t%ld\t%llu\t%ld\t%lu\t%lu\t%lu\t%lu\t%lu\t%lu\t%lu\t%lu\n",
atomic_long_read(&pblk->inflight_writes),
atomic_long_read(&pblk->inflight_reads),
atomic_long_read(&pblk->req_writes),
(u64)atomic64_read(&pblk->nr_flush),
atomic_long_read(&pblk->padded_writes),
atomic_long_read(&pblk->padded_wb),
atomic_long_read(&pblk->sub_writes),
atomic_long_read(&pblk->sync_writes),
atomic_long_read(&pblk->recov_writes),
atomic_long_read(&pblk->recov_gc_writes),
atomic_long_read(&pblk->recov_gc_reads),
atomic_long_read(&pblk->cache_reads),
atomic_long_read(&pblk->sync_reads));
}
#endif
static ssize_t pblk_sysfs_gc_force(struct pblk *pblk, const char *page,
size_t len)
{
size_t c_len;
int force;
c_len = strcspn(page, "\n");
if (c_len >= len)
return -EINVAL;
if (kstrtouint(page, 0, &force))
return -EINVAL;
pblk_gc_sysfs_force(pblk, force);
return len;
}
static ssize_t pblk_sysfs_set_sec_per_write(struct pblk *pblk,
const char *page, size_t len)
{
size_t c_len;
int sec_per_write;
c_len = strcspn(page, "\n");
if (c_len >= len)
return -EINVAL;
if (kstrtouint(page, 0, &sec_per_write))
return -EINVAL;
if (!pblk_is_oob_meta_supported(pblk)) {
/* For packed metadata case it is
* not allowed to change sec_per_write.
*/
return -EINVAL;
}
if (sec_per_write < pblk->min_write_pgs
|| sec_per_write > pblk->max_write_pgs
|| sec_per_write % pblk->min_write_pgs != 0)
return -EINVAL;
pblk_set_sec_per_write(pblk, sec_per_write);
return len;
}
static ssize_t pblk_sysfs_set_write_amp_trip(struct pblk *pblk,
const char *page, size_t len)
{
size_t c_len;
int reset_value;
c_len = strcspn(page, "\n");
if (c_len >= len)
return -EINVAL;
if (kstrtouint(page, 0, &reset_value))
return -EINVAL;
if (reset_value != 0)
return -EINVAL;
pblk->user_rst_wa = atomic64_read(&pblk->user_wa);
pblk->pad_rst_wa = atomic64_read(&pblk->pad_wa);
pblk->gc_rst_wa = atomic64_read(&pblk->gc_wa);
return len;
}
static ssize_t pblk_sysfs_set_padding_dist(struct pblk *pblk,
const char *page, size_t len)
{
size_t c_len;
int reset_value;
int buckets = pblk->min_write_pgs - 1;
int i;
c_len = strcspn(page, "\n");
if (c_len >= len)
return -EINVAL;
if (kstrtouint(page, 0, &reset_value))
return -EINVAL;
if (reset_value != 0)
return -EINVAL;
for (i = 0; i < buckets; i++)
atomic64_set(&pblk->pad_dist[i], 0);
pblk->nr_flush_rst = atomic64_read(&pblk->nr_flush);
return len;
}
static struct attribute sys_write_luns = {
.name = "write_luns",
.mode = 0444,
};
static struct attribute sys_rate_limiter_attr = {
.name = "rate_limiter",
.mode = 0444,
};
static struct attribute sys_gc_state = {
.name = "gc_state",
.mode = 0444,
};
static struct attribute sys_errors_attr = {
.name = "errors",
.mode = 0444,
};
static struct attribute sys_rb_attr = {
.name = "write_buffer",
.mode = 0444,
};
static struct attribute sys_stats_ppaf_attr = {
.name = "ppa_format",
.mode = 0444,
};
static struct attribute sys_lines_attr = {
.name = "lines",
.mode = 0444,
};
static struct attribute sys_lines_info_attr = {
.name = "lines_info",
.mode = 0444,
};
static struct attribute sys_gc_force = {
.name = "gc_force",
.mode = 0200,
};
static struct attribute sys_max_sec_per_write = {
.name = "max_sec_per_write",
.mode = 0644,
};
static struct attribute sys_write_amp_mileage = {
.name = "write_amp_mileage",
.mode = 0444,
};
static struct attribute sys_write_amp_trip = {
.name = "write_amp_trip",
.mode = 0644,
};
static struct attribute sys_padding_dist = {
.name = "padding_dist",
.mode = 0644,
};
#ifdef CONFIG_NVM_PBLK_DEBUG
static struct attribute sys_stats_debug_attr = {
.name = "stats",
.mode = 0444,
};
#endif
static struct attribute *pblk_attrs[] = {
&sys_write_luns,
&sys_rate_limiter_attr,
&sys_errors_attr,
&sys_gc_state,
&sys_gc_force,
&sys_max_sec_per_write,
&sys_rb_attr,
&sys_stats_ppaf_attr,
&sys_lines_attr,
&sys_lines_info_attr,
&sys_write_amp_mileage,
&sys_write_amp_trip,
&sys_padding_dist,
#ifdef CONFIG_NVM_PBLK_DEBUG
&sys_stats_debug_attr,
#endif
NULL,
};
static ssize_t pblk_sysfs_show(struct kobject *kobj, struct attribute *attr,
char *buf)
{
struct pblk *pblk = container_of(kobj, struct pblk, kobj);
if (strcmp(attr->name, "rate_limiter") == 0)
return pblk_sysfs_rate_limiter(pblk, buf);
else if (strcmp(attr->name, "write_luns") == 0)
return pblk_sysfs_luns_show(pblk, buf);
else if (strcmp(attr->name, "gc_state") == 0)
return pblk_sysfs_gc_state_show(pblk, buf);
else if (strcmp(attr->name, "errors") == 0)
return pblk_sysfs_stats(pblk, buf);
else if (strcmp(attr->name, "write_buffer") == 0)
return pblk_sysfs_write_buffer(pblk, buf);
else if (strcmp(attr->name, "ppa_format") == 0)
return pblk_sysfs_ppaf(pblk, buf);
else if (strcmp(attr->name, "lines") == 0)
return pblk_sysfs_lines(pblk, buf);
else if (strcmp(attr->name, "lines_info") == 0)
return pblk_sysfs_lines_info(pblk, buf);
else if (strcmp(attr->name, "max_sec_per_write") == 0)
return pblk_sysfs_get_sec_per_write(pblk, buf);
else if (strcmp(attr->name, "write_amp_mileage") == 0)
return pblk_sysfs_get_write_amp_mileage(pblk, buf);
else if (strcmp(attr->name, "write_amp_trip") == 0)
return pblk_sysfs_get_write_amp_trip(pblk, buf);
else if (strcmp(attr->name, "padding_dist") == 0)
return pblk_sysfs_get_padding_dist(pblk, buf);
#ifdef CONFIG_NVM_PBLK_DEBUG
else if (strcmp(attr->name, "stats") == 0)
return pblk_sysfs_stats_debug(pblk, buf);
#endif
return 0;
}
static ssize_t pblk_sysfs_store(struct kobject *kobj, struct attribute *attr,
const char *buf, size_t len)
{
struct pblk *pblk = container_of(kobj, struct pblk, kobj);
if (strcmp(attr->name, "gc_force") == 0)
return pblk_sysfs_gc_force(pblk, buf, len);
else if (strcmp(attr->name, "max_sec_per_write") == 0)
return pblk_sysfs_set_sec_per_write(pblk, buf, len);
else if (strcmp(attr->name, "write_amp_trip") == 0)
return pblk_sysfs_set_write_amp_trip(pblk, buf, len);
else if (strcmp(attr->name, "padding_dist") == 0)
return pblk_sysfs_set_padding_dist(pblk, buf, len);
return 0;
}
static const struct sysfs_ops pblk_sysfs_ops = {
.show = pblk_sysfs_show,
.store = pblk_sysfs_store,
};
static struct kobj_type pblk_ktype = {
.sysfs_ops = &pblk_sysfs_ops,
.default_attrs = pblk_attrs,
};
int pblk_sysfs_init(struct gendisk *tdisk)
{
struct pblk *pblk = tdisk->private_data;
struct device *parent_dev = disk_to_dev(pblk->disk);
int ret;
ret = kobject_init_and_add(&pblk->kobj, &pblk_ktype,
kobject_get(&parent_dev->kobj),
"%s", "pblk");
if (ret) {
pblk_err(pblk, "could not register\n");
return ret;
}
kobject_uevent(&pblk->kobj, KOBJ_ADD);
return 0;
}
void pblk_sysfs_exit(struct gendisk *tdisk)
{
struct pblk *pblk = tdisk->private_data;
kobject_uevent(&pblk->kobj, KOBJ_REMOVE);
kobject_del(&pblk->kobj);
kobject_put(&pblk->kobj);
}

View File

@ -1,145 +0,0 @@
/* SPDX-License-Identifier: GPL-2.0 */
#undef TRACE_SYSTEM
#define TRACE_SYSTEM pblk
#if !defined(_TRACE_PBLK_H) || defined(TRACE_HEADER_MULTI_READ)
#define _TRACE_PBLK_H
#include <linux/tracepoint.h>
struct ppa_addr;
#define show_chunk_flags(state) __print_flags(state, "", \
{ NVM_CHK_ST_FREE, "FREE", }, \
{ NVM_CHK_ST_CLOSED, "CLOSED", }, \
{ NVM_CHK_ST_OPEN, "OPEN", }, \
{ NVM_CHK_ST_OFFLINE, "OFFLINE", })
#define show_line_state(state) __print_symbolic(state, \
{ PBLK_LINESTATE_NEW, "NEW", }, \
{ PBLK_LINESTATE_FREE, "FREE", }, \
{ PBLK_LINESTATE_OPEN, "OPEN", }, \
{ PBLK_LINESTATE_CLOSED, "CLOSED", }, \
{ PBLK_LINESTATE_GC, "GC", }, \
{ PBLK_LINESTATE_BAD, "BAD", }, \
{ PBLK_LINESTATE_CORRUPT, "CORRUPT" })
#define show_pblk_state(state) __print_symbolic(state, \
{ PBLK_STATE_RUNNING, "RUNNING", }, \
{ PBLK_STATE_STOPPING, "STOPPING", }, \
{ PBLK_STATE_RECOVERING, "RECOVERING", }, \
{ PBLK_STATE_STOPPED, "STOPPED" })
#define show_chunk_erase_state(state) __print_symbolic(state, \
{ PBLK_CHUNK_RESET_START, "START", }, \
{ PBLK_CHUNK_RESET_DONE, "OK", }, \
{ PBLK_CHUNK_RESET_FAILED, "FAILED" })
TRACE_EVENT(pblk_chunk_reset,
TP_PROTO(const char *name, struct ppa_addr *ppa, int state),
TP_ARGS(name, ppa, state),
TP_STRUCT__entry(
__string(name, name)
__field(u64, ppa)
__field(int, state)
),
TP_fast_assign(
__assign_str(name, name);
__entry->ppa = ppa->ppa;
__entry->state = state;
),
TP_printk("dev=%s grp=%llu pu=%llu chk=%llu state=%s", __get_str(name),
(u64)(((struct ppa_addr *)(&__entry->ppa))->m.grp),
(u64)(((struct ppa_addr *)(&__entry->ppa))->m.pu),
(u64)(((struct ppa_addr *)(&__entry->ppa))->m.chk),
show_chunk_erase_state((int)__entry->state))
);
TRACE_EVENT(pblk_chunk_state,
TP_PROTO(const char *name, struct ppa_addr *ppa, int state),
TP_ARGS(name, ppa, state),
TP_STRUCT__entry(
__string(name, name)
__field(u64, ppa)
__field(int, state)
),
TP_fast_assign(
__assign_str(name, name);
__entry->ppa = ppa->ppa;
__entry->state = state;
),
TP_printk("dev=%s grp=%llu pu=%llu chk=%llu state=%s", __get_str(name),
(u64)(((struct ppa_addr *)(&__entry->ppa))->m.grp),
(u64)(((struct ppa_addr *)(&__entry->ppa))->m.pu),
(u64)(((struct ppa_addr *)(&__entry->ppa))->m.chk),
show_chunk_flags((int)__entry->state))
);
TRACE_EVENT(pblk_line_state,
TP_PROTO(const char *name, int line, int state),
TP_ARGS(name, line, state),
TP_STRUCT__entry(
__string(name, name)
__field(int, line)
__field(int, state)
),
TP_fast_assign(
__assign_str(name, name);
__entry->line = line;
__entry->state = state;
),
TP_printk("dev=%s line=%d state=%s", __get_str(name),
(int)__entry->line,
show_line_state((int)__entry->state))
);
TRACE_EVENT(pblk_state,
TP_PROTO(const char *name, int state),
TP_ARGS(name, state),
TP_STRUCT__entry(
__string(name, name)
__field(int, state)
),
TP_fast_assign(
__assign_str(name, name);
__entry->state = state;
),
TP_printk("dev=%s state=%s", __get_str(name),
show_pblk_state((int)__entry->state))
);
#endif /* !defined(_TRACE_PBLK_H) || defined(TRACE_HEADER_MULTI_READ) */
/* This part must be outside protection */
#undef TRACE_INCLUDE_PATH
#define TRACE_INCLUDE_PATH ../../drivers/lightnvm
#undef TRACE_INCLUDE_FILE
#define TRACE_INCLUDE_FILE pblk-trace
#include <trace/define_trace.h>

View File

@ -1,665 +0,0 @@
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (C) 2016 CNEX Labs
* Initial release: Javier Gonzalez <javier@cnexlabs.com>
* Matias Bjorling <matias@cnexlabs.com>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License version
* 2 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* pblk-write.c - pblk's write path from write buffer to media
*/
#include "pblk.h"
#include "pblk-trace.h"
static unsigned long pblk_end_w_bio(struct pblk *pblk, struct nvm_rq *rqd,
struct pblk_c_ctx *c_ctx)
{
struct bio *original_bio;
struct pblk_rb *rwb = &pblk->rwb;
unsigned long ret;
int i;
for (i = 0; i < c_ctx->nr_valid; i++) {
struct pblk_w_ctx *w_ctx;
int pos = c_ctx->sentry + i;
int flags;
w_ctx = pblk_rb_w_ctx(rwb, pos);
flags = READ_ONCE(w_ctx->flags);
if (flags & PBLK_FLUSH_ENTRY) {
flags &= ~PBLK_FLUSH_ENTRY;
/* Release flags on context. Protect from writes */
smp_store_release(&w_ctx->flags, flags);
#ifdef CONFIG_NVM_PBLK_DEBUG
atomic_dec(&rwb->inflight_flush_point);
#endif
}
while ((original_bio = bio_list_pop(&w_ctx->bios)))
bio_endio(original_bio);
}
if (c_ctx->nr_padded)
pblk_bio_free_pages(pblk, rqd->bio, c_ctx->nr_valid,
c_ctx->nr_padded);
#ifdef CONFIG_NVM_PBLK_DEBUG
atomic_long_add(rqd->nr_ppas, &pblk->sync_writes);
#endif
ret = pblk_rb_sync_advance(&pblk->rwb, c_ctx->nr_valid);
bio_put(rqd->bio);
pblk_free_rqd(pblk, rqd, PBLK_WRITE);
return ret;
}
static unsigned long pblk_end_queued_w_bio(struct pblk *pblk,
struct nvm_rq *rqd,
struct pblk_c_ctx *c_ctx)
{
list_del(&c_ctx->list);
return pblk_end_w_bio(pblk, rqd, c_ctx);
}
static void pblk_complete_write(struct pblk *pblk, struct nvm_rq *rqd,
struct pblk_c_ctx *c_ctx)
{
struct pblk_c_ctx *c, *r;
unsigned long flags;
unsigned long pos;
#ifdef CONFIG_NVM_PBLK_DEBUG
atomic_long_sub(c_ctx->nr_valid, &pblk->inflight_writes);
#endif
pblk_up_rq(pblk, c_ctx->lun_bitmap);
pos = pblk_rb_sync_init(&pblk->rwb, &flags);
if (pos == c_ctx->sentry) {
pos = pblk_end_w_bio(pblk, rqd, c_ctx);
retry:
list_for_each_entry_safe(c, r, &pblk->compl_list, list) {
rqd = nvm_rq_from_c_ctx(c);
if (c->sentry == pos) {
pos = pblk_end_queued_w_bio(pblk, rqd, c);
goto retry;
}
}
} else {
WARN_ON(nvm_rq_from_c_ctx(c_ctx) != rqd);
list_add_tail(&c_ctx->list, &pblk->compl_list);
}
pblk_rb_sync_end(&pblk->rwb, &flags);
}
/* Map remaining sectors in chunk, starting from ppa */
static void pblk_map_remaining(struct pblk *pblk, struct ppa_addr *ppa,
int rqd_ppas)
{
struct pblk_line *line;
struct ppa_addr map_ppa = *ppa;
__le64 addr_empty = cpu_to_le64(ADDR_EMPTY);
__le64 *lba_list;
u64 paddr;
int done = 0;
int n = 0;
line = pblk_ppa_to_line(pblk, *ppa);
lba_list = emeta_to_lbas(pblk, line->emeta->buf);
spin_lock(&line->lock);
while (!done) {
paddr = pblk_dev_ppa_to_line_addr(pblk, map_ppa);
if (!test_and_set_bit(paddr, line->map_bitmap))
line->left_msecs--;
if (n < rqd_ppas && lba_list[paddr] != addr_empty)
line->nr_valid_lbas--;
lba_list[paddr] = addr_empty;
if (!test_and_set_bit(paddr, line->invalid_bitmap))
le32_add_cpu(line->vsc, -1);
done = nvm_next_ppa_in_chk(pblk->dev, &map_ppa);
n++;
}
line->w_err_gc->has_write_err = 1;
spin_unlock(&line->lock);
}
static void pblk_prepare_resubmit(struct pblk *pblk, unsigned int sentry,
unsigned int nr_entries)
{
struct pblk_rb *rb = &pblk->rwb;
struct pblk_rb_entry *entry;
struct pblk_line *line;
struct pblk_w_ctx *w_ctx;
struct ppa_addr ppa_l2p;
int flags;
unsigned int i;
spin_lock(&pblk->trans_lock);
for (i = 0; i < nr_entries; i++) {
entry = &rb->entries[pblk_rb_ptr_wrap(rb, sentry, i)];
w_ctx = &entry->w_ctx;
/* Check if the lba has been overwritten */
if (w_ctx->lba != ADDR_EMPTY) {
ppa_l2p = pblk_trans_map_get(pblk, w_ctx->lba);
if (!pblk_ppa_comp(ppa_l2p, entry->cacheline))
w_ctx->lba = ADDR_EMPTY;
}
/* Mark up the entry as submittable again */
flags = READ_ONCE(w_ctx->flags);
flags |= PBLK_WRITTEN_DATA;
/* Release flags on write context. Protect from writes */
smp_store_release(&w_ctx->flags, flags);
/* Decrease the reference count to the line as we will
* re-map these entries
*/
line = pblk_ppa_to_line(pblk, w_ctx->ppa);
atomic_dec(&line->sec_to_update);
kref_put(&line->ref, pblk_line_put);
}
spin_unlock(&pblk->trans_lock);
}
static void pblk_queue_resubmit(struct pblk *pblk, struct pblk_c_ctx *c_ctx)
{
struct pblk_c_ctx *r_ctx;
r_ctx = kzalloc(sizeof(struct pblk_c_ctx), GFP_KERNEL);
if (!r_ctx)
return;
r_ctx->lun_bitmap = NULL;
r_ctx->sentry = c_ctx->sentry;
r_ctx->nr_valid = c_ctx->nr_valid;
r_ctx->nr_padded = c_ctx->nr_padded;
spin_lock(&pblk->resubmit_lock);
list_add_tail(&r_ctx->list, &pblk->resubmit_list);
spin_unlock(&pblk->resubmit_lock);
#ifdef CONFIG_NVM_PBLK_DEBUG
atomic_long_add(c_ctx->nr_valid, &pblk->recov_writes);
#endif
}
static void pblk_submit_rec(struct work_struct *work)
{
struct pblk_rec_ctx *recovery =
container_of(work, struct pblk_rec_ctx, ws_rec);
struct pblk *pblk = recovery->pblk;
struct nvm_rq *rqd = recovery->rqd;
struct pblk_c_ctx *c_ctx = nvm_rq_to_pdu(rqd);
struct ppa_addr *ppa_list = nvm_rq_to_ppa_list(rqd);
pblk_log_write_err(pblk, rqd);
pblk_map_remaining(pblk, ppa_list, rqd->nr_ppas);
pblk_queue_resubmit(pblk, c_ctx);
pblk_up_rq(pblk, c_ctx->lun_bitmap);
if (c_ctx->nr_padded)
pblk_bio_free_pages(pblk, rqd->bio, c_ctx->nr_valid,
c_ctx->nr_padded);
bio_put(rqd->bio);
pblk_free_rqd(pblk, rqd, PBLK_WRITE);
mempool_free(recovery, &pblk->rec_pool);
atomic_dec(&pblk->inflight_io);
pblk_write_kick(pblk);
}
static void pblk_end_w_fail(struct pblk *pblk, struct nvm_rq *rqd)
{
struct pblk_rec_ctx *recovery;
recovery = mempool_alloc(&pblk->rec_pool, GFP_ATOMIC);
if (!recovery) {
pblk_err(pblk, "could not allocate recovery work\n");
return;
}
recovery->pblk = pblk;
recovery->rqd = rqd;
INIT_WORK(&recovery->ws_rec, pblk_submit_rec);
queue_work(pblk->close_wq, &recovery->ws_rec);
}
static void pblk_end_io_write(struct nvm_rq *rqd)
{
struct pblk *pblk = rqd->private;
struct pblk_c_ctx *c_ctx = nvm_rq_to_pdu(rqd);
if (rqd->error) {
pblk_end_w_fail(pblk, rqd);
return;
} else {
if (trace_pblk_chunk_state_enabled())
pblk_check_chunk_state_update(pblk, rqd);
#ifdef CONFIG_NVM_PBLK_DEBUG
WARN_ONCE(rqd->bio->bi_status, "pblk: corrupted write error\n");
#endif
}
pblk_complete_write(pblk, rqd, c_ctx);
atomic_dec(&pblk->inflight_io);
}
static void pblk_end_io_write_meta(struct nvm_rq *rqd)
{
struct pblk *pblk = rqd->private;
struct pblk_g_ctx *m_ctx = nvm_rq_to_pdu(rqd);
struct pblk_line *line = m_ctx->private;
struct pblk_emeta *emeta = line->emeta;
struct ppa_addr *ppa_list = nvm_rq_to_ppa_list(rqd);
int sync;
pblk_up_chunk(pblk, ppa_list[0]);
if (rqd->error) {
pblk_log_write_err(pblk, rqd);
pblk_err(pblk, "metadata I/O failed. Line %d\n", line->id);
line->w_err_gc->has_write_err = 1;
} else {
if (trace_pblk_chunk_state_enabled())
pblk_check_chunk_state_update(pblk, rqd);
}
sync = atomic_add_return(rqd->nr_ppas, &emeta->sync);
if (sync == emeta->nr_entries)
pblk_gen_run_ws(pblk, line, NULL, pblk_line_close_ws,
GFP_ATOMIC, pblk->close_wq);
pblk_free_rqd(pblk, rqd, PBLK_WRITE_INT);
atomic_dec(&pblk->inflight_io);
}
static int pblk_alloc_w_rq(struct pblk *pblk, struct nvm_rq *rqd,
unsigned int nr_secs, nvm_end_io_fn(*end_io))
{
/* Setup write request */
rqd->opcode = NVM_OP_PWRITE;
rqd->nr_ppas = nr_secs;
rqd->is_seq = 1;
rqd->private = pblk;
rqd->end_io = end_io;
return pblk_alloc_rqd_meta(pblk, rqd);
}
static int pblk_setup_w_rq(struct pblk *pblk, struct nvm_rq *rqd,
struct ppa_addr *erase_ppa)
{
struct pblk_line_meta *lm = &pblk->lm;
struct pblk_line *e_line = pblk_line_get_erase(pblk);
struct pblk_c_ctx *c_ctx = nvm_rq_to_pdu(rqd);
unsigned int valid = c_ctx->nr_valid;
unsigned int padded = c_ctx->nr_padded;
unsigned int nr_secs = valid + padded;
unsigned long *lun_bitmap;
int ret;
lun_bitmap = kzalloc(lm->lun_bitmap_len, GFP_KERNEL);
if (!lun_bitmap)
return -ENOMEM;
c_ctx->lun_bitmap = lun_bitmap;
ret = pblk_alloc_w_rq(pblk, rqd, nr_secs, pblk_end_io_write);
if (ret) {
kfree(lun_bitmap);
return ret;
}
if (likely(!e_line || !atomic_read(&e_line->left_eblks)))
ret = pblk_map_rq(pblk, rqd, c_ctx->sentry, lun_bitmap,
valid, 0);
else
ret = pblk_map_erase_rq(pblk, rqd, c_ctx->sentry, lun_bitmap,
valid, erase_ppa);
return ret;
}
static int pblk_calc_secs_to_sync(struct pblk *pblk, unsigned int secs_avail,
unsigned int secs_to_flush)
{
int secs_to_sync;
secs_to_sync = pblk_calc_secs(pblk, secs_avail, secs_to_flush, true);
#ifdef CONFIG_NVM_PBLK_DEBUG
if ((!secs_to_sync && secs_to_flush)
|| (secs_to_sync < 0)
|| (secs_to_sync > secs_avail && !secs_to_flush)) {
pblk_err(pblk, "bad sector calculation (a:%d,s:%d,f:%d)\n",
secs_avail, secs_to_sync, secs_to_flush);
}
#endif
return secs_to_sync;
}
int pblk_submit_meta_io(struct pblk *pblk, struct pblk_line *meta_line)
{
struct nvm_tgt_dev *dev = pblk->dev;
struct nvm_geo *geo = &dev->geo;
struct pblk_line_mgmt *l_mg = &pblk->l_mg;
struct pblk_line_meta *lm = &pblk->lm;
struct pblk_emeta *emeta = meta_line->emeta;
struct ppa_addr *ppa_list;
struct pblk_g_ctx *m_ctx;
struct nvm_rq *rqd;
void *data;
u64 paddr;
int rq_ppas = pblk->min_write_pgs;
int id = meta_line->id;
int rq_len;
int i, j;
int ret;
rqd = pblk_alloc_rqd(pblk, PBLK_WRITE_INT);
m_ctx = nvm_rq_to_pdu(rqd);
m_ctx->private = meta_line;
rq_len = rq_ppas * geo->csecs;
data = ((void *)emeta->buf) + emeta->mem;
ret = pblk_alloc_w_rq(pblk, rqd, rq_ppas, pblk_end_io_write_meta);
if (ret)
goto fail_free_rqd;
ppa_list = nvm_rq_to_ppa_list(rqd);
for (i = 0; i < rqd->nr_ppas; ) {
spin_lock(&meta_line->lock);
paddr = __pblk_alloc_page(pblk, meta_line, rq_ppas);
spin_unlock(&meta_line->lock);
for (j = 0; j < rq_ppas; j++, i++, paddr++)
ppa_list[i] = addr_to_gen_ppa(pblk, paddr, id);
}
spin_lock(&l_mg->close_lock);
emeta->mem += rq_len;
if (emeta->mem >= lm->emeta_len[0])
list_del(&meta_line->list);
spin_unlock(&l_mg->close_lock);
pblk_down_chunk(pblk, ppa_list[0]);
ret = pblk_submit_io(pblk, rqd, data);
if (ret) {
pblk_err(pblk, "emeta I/O submission failed: %d\n", ret);
goto fail_rollback;
}
return NVM_IO_OK;
fail_rollback:
pblk_up_chunk(pblk, ppa_list[0]);
spin_lock(&l_mg->close_lock);
pblk_dealloc_page(pblk, meta_line, rq_ppas);
list_add(&meta_line->list, &meta_line->list);
spin_unlock(&l_mg->close_lock);
fail_free_rqd:
pblk_free_rqd(pblk, rqd, PBLK_WRITE_INT);
return ret;
}
static inline bool pblk_valid_meta_ppa(struct pblk *pblk,
struct pblk_line *meta_line,
struct nvm_rq *data_rqd)
{
struct nvm_tgt_dev *dev = pblk->dev;
struct nvm_geo *geo = &dev->geo;
struct pblk_c_ctx *data_c_ctx = nvm_rq_to_pdu(data_rqd);
struct pblk_line *data_line = pblk_line_get_data(pblk);
struct ppa_addr ppa, ppa_opt;
u64 paddr;
int pos_opt;
/* Schedule a metadata I/O that is half the distance from the data I/O
* with regards to the number of LUNs forming the pblk instance. This
* balances LUN conflicts across every I/O.
*
* When the LUN configuration changes (e.g., due to GC), this distance
* can align, which would result on metadata and data I/Os colliding. In
* this case, modify the distance to not be optimal, but move the
* optimal in the right direction.
*/
paddr = pblk_lookup_page(pblk, meta_line);
ppa = addr_to_gen_ppa(pblk, paddr, 0);
ppa_opt = addr_to_gen_ppa(pblk, paddr + data_line->meta_distance, 0);
pos_opt = pblk_ppa_to_pos(geo, ppa_opt);
if (test_bit(pos_opt, data_c_ctx->lun_bitmap) ||
test_bit(pos_opt, data_line->blk_bitmap))
return true;
if (unlikely(pblk_ppa_comp(ppa_opt, ppa)))
data_line->meta_distance--;
return false;
}
static struct pblk_line *pblk_should_submit_meta_io(struct pblk *pblk,
struct nvm_rq *data_rqd)
{
struct pblk_line_meta *lm = &pblk->lm;
struct pblk_line_mgmt *l_mg = &pblk->l_mg;
struct pblk_line *meta_line;
spin_lock(&l_mg->close_lock);
if (list_empty(&l_mg->emeta_list)) {
spin_unlock(&l_mg->close_lock);
return NULL;
}
meta_line = list_first_entry(&l_mg->emeta_list, struct pblk_line, list);
if (meta_line->emeta->mem >= lm->emeta_len[0]) {
spin_unlock(&l_mg->close_lock);
return NULL;
}
spin_unlock(&l_mg->close_lock);
if (!pblk_valid_meta_ppa(pblk, meta_line, data_rqd))
return NULL;
return meta_line;
}
static int pblk_submit_io_set(struct pblk *pblk, struct nvm_rq *rqd)
{
struct ppa_addr erase_ppa;
struct pblk_line *meta_line;
int err;
pblk_ppa_set_empty(&erase_ppa);
/* Assign lbas to ppas and populate request structure */
err = pblk_setup_w_rq(pblk, rqd, &erase_ppa);
if (err) {
pblk_err(pblk, "could not setup write request: %d\n", err);
return NVM_IO_ERR;
}
meta_line = pblk_should_submit_meta_io(pblk, rqd);
/* Submit data write for current data line */
err = pblk_submit_io(pblk, rqd, NULL);
if (err) {
pblk_err(pblk, "data I/O submission failed: %d\n", err);
return NVM_IO_ERR;
}
if (!pblk_ppa_empty(erase_ppa)) {
/* Submit erase for next data line */
if (pblk_blk_erase_async(pblk, erase_ppa)) {
struct pblk_line *e_line = pblk_line_get_erase(pblk);
struct nvm_tgt_dev *dev = pblk->dev;
struct nvm_geo *geo = &dev->geo;
int bit;
atomic_inc(&e_line->left_eblks);
bit = pblk_ppa_to_pos(geo, erase_ppa);
WARN_ON(!test_and_clear_bit(bit, e_line->erase_bitmap));
}
}
if (meta_line) {
/* Submit metadata write for previous data line */
err = pblk_submit_meta_io(pblk, meta_line);
if (err) {
pblk_err(pblk, "metadata I/O submission failed: %d",
err);
return NVM_IO_ERR;
}
}
return NVM_IO_OK;
}
static void pblk_free_write_rqd(struct pblk *pblk, struct nvm_rq *rqd)
{
struct pblk_c_ctx *c_ctx = nvm_rq_to_pdu(rqd);
struct bio *bio = rqd->bio;
if (c_ctx->nr_padded)
pblk_bio_free_pages(pblk, bio, c_ctx->nr_valid,
c_ctx->nr_padded);
}
static int pblk_submit_write(struct pblk *pblk, int *secs_left)
{
struct bio *bio;
struct nvm_rq *rqd;
unsigned int secs_avail, secs_to_sync, secs_to_com;
unsigned int secs_to_flush, packed_meta_pgs;
unsigned long pos;
unsigned int resubmit;
*secs_left = 0;
spin_lock(&pblk->resubmit_lock);
resubmit = !list_empty(&pblk->resubmit_list);
spin_unlock(&pblk->resubmit_lock);
/* Resubmit failed writes first */
if (resubmit) {
struct pblk_c_ctx *r_ctx;
spin_lock(&pblk->resubmit_lock);
r_ctx = list_first_entry(&pblk->resubmit_list,
struct pblk_c_ctx, list);
list_del(&r_ctx->list);
spin_unlock(&pblk->resubmit_lock);
secs_avail = r_ctx->nr_valid;
pos = r_ctx->sentry;
pblk_prepare_resubmit(pblk, pos, secs_avail);
secs_to_sync = pblk_calc_secs_to_sync(pblk, secs_avail,
secs_avail);
kfree(r_ctx);
} else {
/* If there are no sectors in the cache,
* flushes (bios without data) will be cleared on
* the cache threads
*/
secs_avail = pblk_rb_read_count(&pblk->rwb);
if (!secs_avail)
return 0;
secs_to_flush = pblk_rb_flush_point_count(&pblk->rwb);
if (!secs_to_flush && secs_avail < pblk->min_write_pgs_data)
return 0;
secs_to_sync = pblk_calc_secs_to_sync(pblk, secs_avail,
secs_to_flush);
if (secs_to_sync > pblk->max_write_pgs) {
pblk_err(pblk, "bad buffer sync calculation\n");
return 0;
}
secs_to_com = (secs_to_sync > secs_avail) ?
secs_avail : secs_to_sync;
pos = pblk_rb_read_commit(&pblk->rwb, secs_to_com);
}
packed_meta_pgs = (pblk->min_write_pgs - pblk->min_write_pgs_data);
bio = bio_alloc(GFP_KERNEL, secs_to_sync + packed_meta_pgs);
bio->bi_iter.bi_sector = 0; /* internal bio */
bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
rqd = pblk_alloc_rqd(pblk, PBLK_WRITE);
rqd->bio = bio;
if (pblk_rb_read_to_bio(&pblk->rwb, rqd, pos, secs_to_sync,
secs_avail)) {
pblk_err(pblk, "corrupted write bio\n");
goto fail_put_bio;
}
if (pblk_submit_io_set(pblk, rqd))
goto fail_free_bio;
#ifdef CONFIG_NVM_PBLK_DEBUG
atomic_long_add(secs_to_sync, &pblk->sub_writes);
#endif
*secs_left = 1;
return 0;
fail_free_bio:
pblk_free_write_rqd(pblk, rqd);
fail_put_bio:
bio_put(bio);
pblk_free_rqd(pblk, rqd, PBLK_WRITE);
return -EINTR;
}
int pblk_write_ts(void *data)
{
struct pblk *pblk = data;
int secs_left;
int write_failure = 0;
while (!kthread_should_stop()) {
if (!write_failure) {
write_failure = pblk_submit_write(pblk, &secs_left);
if (secs_left)
continue;
}
set_current_state(TASK_INTERRUPTIBLE);
io_schedule();
}
return 0;
}

File diff suppressed because it is too large Load Diff

View File

@ -12,7 +12,6 @@ obj-$(CONFIG_NVME_TCP) += nvme-tcp.o
nvme-core-y := core.o ioctl.o
nvme-core-$(CONFIG_TRACING) += trace.o
nvme-core-$(CONFIG_NVME_MULTIPATH) += multipath.o
nvme-core-$(CONFIG_NVM) += lightnvm.o
nvme-core-$(CONFIG_BLK_DEV_ZONED) += zns.o
nvme-core-$(CONFIG_FAULT_INJECTION_DEBUG_FS) += fault_inject.o
nvme-core-$(CONFIG_NVME_HWMON) += hwmon.o

View File

@ -587,9 +587,6 @@ static void nvme_free_ns(struct kref *kref)
{
struct nvme_ns *ns = container_of(kref, struct nvme_ns, kref);
if (ns->ndev)
nvme_nvm_unregister(ns);
put_disk(ns->disk);
nvme_put_ns_head(ns->head);
nvme_put_ctrl(ns->ctrl);
@ -3218,9 +3215,6 @@ static const struct attribute_group nvme_ns_id_attr_group = {
const struct attribute_group *nvme_ns_id_attr_groups[] = {
&nvme_ns_id_attr_group,
#ifdef CONFIG_NVM
&nvme_nvm_attr_group,
#endif
NULL,
};
@ -3767,13 +3761,6 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid,
if (nvme_update_ns_info(ns, id))
goto out_put_disk;
if ((ctrl->quirks & NVME_QUIRK_LIGHTNVM) && id->vs[0] == 0x1) {
if (nvme_nvm_register(ns, disk->disk_name, node)) {
dev_warn(ctrl->device, "LightNVM init failure\n");
goto out_put_disk;
}
}
down_write(&ctrl->namespaces_rwsem);
list_add_tail(&ns->list, &ctrl->namespaces);
up_write(&ctrl->namespaces_rwsem);

View File

@ -342,9 +342,7 @@ static int nvme_ns_ioctl(struct nvme_ns *ns, unsigned int cmd,
case NVME_IOCTL_IO64_CMD:
return nvme_user_cmd64(ns->ctrl, ns, argp);
default:
if (!ns->ndev)
return -ENOTTY;
return nvme_nvm_ioctl(ns, cmd, argp);
}
}

File diff suppressed because it is too large Load Diff

View File

@ -11,7 +11,6 @@
#include <linux/pci.h>
#include <linux/kref.h>
#include <linux/blk-mq.h>
#include <linux/lightnvm.h>
#include <linux/sed-opal.h>
#include <linux/fault-inject.h>
#include <linux/rcupdate.h>
@ -92,11 +91,6 @@ enum nvme_quirks {
*/
NVME_QUIRK_NO_DEEPEST_PS = (1 << 5),
/*
* Supports the LighNVM command set if indicated in vs[1].
*/
NVME_QUIRK_LIGHTNVM = (1 << 6),
/*
* Set MEDIUM priority on SQ creation
*/
@ -823,26 +817,6 @@ static inline int nvme_update_zone_info(struct nvme_ns *ns, unsigned lbaf)
}
#endif
#ifdef CONFIG_NVM
int nvme_nvm_register(struct nvme_ns *ns, char *disk_name, int node);
void nvme_nvm_unregister(struct nvme_ns *ns);
extern const struct attribute_group nvme_nvm_attr_group;
int nvme_nvm_ioctl(struct nvme_ns *ns, unsigned int cmd, void __user *argp);
#else
static inline int nvme_nvm_register(struct nvme_ns *ns, char *disk_name,
int node)
{
return 0;
}
static inline void nvme_nvm_unregister(struct nvme_ns *ns) {};
static inline int nvme_nvm_ioctl(struct nvme_ns *ns, unsigned int cmd,
void __user *argp)
{
return -ENOTTY;
}
#endif /* CONFIG_NVM */
static inline struct nvme_ns *nvme_get_ns_from_dev(struct device *dev)
{
return dev_to_disk(dev)->private_data;

View File

@ -3243,12 +3243,6 @@ static const struct pci_device_id nvme_id_table[] = {
{ PCI_DEVICE(0x1b4b, 0x1092), /* Lexar 256 GB SSD */
.driver_data = NVME_QUIRK_NO_NS_DESC_LIST |
NVME_QUIRK_IGNORE_DEV_SUBNQN, },
{ PCI_DEVICE(0x1d1d, 0x1f1f), /* LighNVM qemu device */
.driver_data = NVME_QUIRK_LIGHTNVM, },
{ PCI_DEVICE(0x1d1d, 0x2807), /* CNEX WL */
.driver_data = NVME_QUIRK_LIGHTNVM, },
{ PCI_DEVICE(0x1d1d, 0x2601), /* CNEX Granby */
.driver_data = NVME_QUIRK_LIGHTNVM, },
{ PCI_DEVICE(0x10ec, 0x5762), /* ADATA SX6000LNP */
.driver_data = NVME_QUIRK_IGNORE_DEV_SUBNQN, },
{ PCI_DEVICE(0x1cc1, 0x8201), /* ADATA SX8200PNP 512GB */

View File

@ -1,697 +0,0 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef NVM_H
#define NVM_H
#include <linux/blkdev.h>
#include <linux/types.h>
#include <uapi/linux/lightnvm.h>
enum {
NVM_IO_OK = 0,
NVM_IO_REQUEUE = 1,
NVM_IO_DONE = 2,
NVM_IO_ERR = 3,
NVM_IOTYPE_NONE = 0,
NVM_IOTYPE_GC = 1,
};
/* common format */
#define NVM_GEN_CH_BITS (8)
#define NVM_GEN_LUN_BITS (8)
#define NVM_GEN_BLK_BITS (16)
#define NVM_GEN_RESERVED (32)
/* 1.2 format */
#define NVM_12_PG_BITS (16)
#define NVM_12_PL_BITS (4)
#define NVM_12_SEC_BITS (4)
#define NVM_12_RESERVED (8)
/* 2.0 format */
#define NVM_20_SEC_BITS (24)
#define NVM_20_RESERVED (8)
enum {
NVM_OCSSD_SPEC_12 = 12,
NVM_OCSSD_SPEC_20 = 20,
};
struct ppa_addr {
/* Generic structure for all addresses */
union {
/* generic device format */
struct {
u64 ch : NVM_GEN_CH_BITS;
u64 lun : NVM_GEN_LUN_BITS;
u64 blk : NVM_GEN_BLK_BITS;
u64 reserved : NVM_GEN_RESERVED;
} a;
/* 1.2 device format */
struct {
u64 ch : NVM_GEN_CH_BITS;
u64 lun : NVM_GEN_LUN_BITS;
u64 blk : NVM_GEN_BLK_BITS;
u64 pg : NVM_12_PG_BITS;
u64 pl : NVM_12_PL_BITS;
u64 sec : NVM_12_SEC_BITS;
u64 reserved : NVM_12_RESERVED;
} g;
/* 2.0 device format */
struct {
u64 grp : NVM_GEN_CH_BITS;
u64 pu : NVM_GEN_LUN_BITS;
u64 chk : NVM_GEN_BLK_BITS;
u64 sec : NVM_20_SEC_BITS;
u64 reserved : NVM_20_RESERVED;
} m;
struct {
u64 line : 63;
u64 is_cached : 1;
} c;
u64 ppa;
};
};
struct nvm_rq;
struct nvm_id;
struct nvm_dev;
struct nvm_tgt_dev;
struct nvm_chk_meta;
typedef int (nvm_id_fn)(struct nvm_dev *);
typedef int (nvm_op_bb_tbl_fn)(struct nvm_dev *, struct ppa_addr, u8 *);
typedef int (nvm_op_set_bb_fn)(struct nvm_dev *, struct ppa_addr *, int, int);
typedef int (nvm_get_chk_meta_fn)(struct nvm_dev *, sector_t, int,
struct nvm_chk_meta *);
typedef int (nvm_submit_io_fn)(struct nvm_dev *, struct nvm_rq *, void *);
typedef void *(nvm_create_dma_pool_fn)(struct nvm_dev *, char *, int);
typedef void (nvm_destroy_dma_pool_fn)(void *);
typedef void *(nvm_dev_dma_alloc_fn)(struct nvm_dev *, void *, gfp_t,
dma_addr_t *);
typedef void (nvm_dev_dma_free_fn)(void *, void*, dma_addr_t);
struct nvm_dev_ops {
nvm_id_fn *identity;
nvm_op_bb_tbl_fn *get_bb_tbl;
nvm_op_set_bb_fn *set_bb_tbl;
nvm_get_chk_meta_fn *get_chk_meta;
nvm_submit_io_fn *submit_io;
nvm_create_dma_pool_fn *create_dma_pool;
nvm_destroy_dma_pool_fn *destroy_dma_pool;
nvm_dev_dma_alloc_fn *dev_dma_alloc;
nvm_dev_dma_free_fn *dev_dma_free;
};
#ifdef CONFIG_NVM
#include <linux/file.h>
#include <linux/dmapool.h>
enum {
/* HW Responsibilities */
NVM_RSP_L2P = 1 << 0,
NVM_RSP_ECC = 1 << 1,
/* Physical Adressing Mode */
NVM_ADDRMODE_LINEAR = 0,
NVM_ADDRMODE_CHANNEL = 1,
/* Plane programming mode for LUN */
NVM_PLANE_SINGLE = 1,
NVM_PLANE_DOUBLE = 2,
NVM_PLANE_QUAD = 4,
/* Status codes */
NVM_RSP_SUCCESS = 0x0,
NVM_RSP_NOT_CHANGEABLE = 0x1,
NVM_RSP_ERR_FAILWRITE = 0x40ff,
NVM_RSP_ERR_EMPTYPAGE = 0x42ff,
NVM_RSP_ERR_FAILECC = 0x4281,
NVM_RSP_ERR_FAILCRC = 0x4004,
NVM_RSP_WARN_HIGHECC = 0x4700,
/* Device opcodes */
NVM_OP_PWRITE = 0x91,
NVM_OP_PREAD = 0x92,
NVM_OP_ERASE = 0x90,
/* PPA Command Flags */
NVM_IO_SNGL_ACCESS = 0x0,
NVM_IO_DUAL_ACCESS = 0x1,
NVM_IO_QUAD_ACCESS = 0x2,
/* NAND Access Modes */
NVM_IO_SUSPEND = 0x80,
NVM_IO_SLC_MODE = 0x100,
NVM_IO_SCRAMBLE_ENABLE = 0x200,
/* Block Types */
NVM_BLK_T_FREE = 0x0,
NVM_BLK_T_BAD = 0x1,
NVM_BLK_T_GRWN_BAD = 0x2,
NVM_BLK_T_DEV = 0x4,
NVM_BLK_T_HOST = 0x8,
/* Memory capabilities */
NVM_ID_CAP_SLC = 0x1,
NVM_ID_CAP_CMD_SUSPEND = 0x2,
NVM_ID_CAP_SCRAMBLE = 0x4,
NVM_ID_CAP_ENCRYPT = 0x8,
/* Memory types */
NVM_ID_FMTYPE_SLC = 0,
NVM_ID_FMTYPE_MLC = 1,
/* Device capabilities */
NVM_ID_DCAP_BBLKMGMT = 0x1,
NVM_UD_DCAP_ECC = 0x2,
};
struct nvm_id_lp_mlc {
u16 num_pairs;
u8 pairs[886];
};
struct nvm_id_lp_tbl {
__u8 id[8];
struct nvm_id_lp_mlc mlc;
};
struct nvm_addrf_12 {
u8 ch_len;
u8 lun_len;
u8 blk_len;
u8 pg_len;
u8 pln_len;
u8 sec_len;
u8 ch_offset;
u8 lun_offset;
u8 blk_offset;
u8 pg_offset;
u8 pln_offset;
u8 sec_offset;
u64 ch_mask;
u64 lun_mask;
u64 blk_mask;
u64 pg_mask;
u64 pln_mask;
u64 sec_mask;
};
struct nvm_addrf {
u8 ch_len;
u8 lun_len;
u8 chk_len;
u8 sec_len;
u8 rsv_len[2];
u8 ch_offset;
u8 lun_offset;
u8 chk_offset;
u8 sec_offset;
u8 rsv_off[2];
u64 ch_mask;
u64 lun_mask;
u64 chk_mask;
u64 sec_mask;
u64 rsv_mask[2];
};
enum {
/* Chunk states */
NVM_CHK_ST_FREE = 1 << 0,
NVM_CHK_ST_CLOSED = 1 << 1,
NVM_CHK_ST_OPEN = 1 << 2,
NVM_CHK_ST_OFFLINE = 1 << 3,
/* Chunk types */
NVM_CHK_TP_W_SEQ = 1 << 0,
NVM_CHK_TP_W_RAN = 1 << 1,
NVM_CHK_TP_SZ_SPEC = 1 << 4,
};
/*
* Note: The structure size is linked to nvme_nvm_chk_meta such that the same
* buffer can be used when converting from little endian to cpu addressing.
*/
struct nvm_chk_meta {
u8 state;
u8 type;
u8 wi;
u8 rsvd[5];
u64 slba;
u64 cnlb;
u64 wp;
};
struct nvm_target {
struct list_head list;
struct nvm_tgt_dev *dev;
struct nvm_tgt_type *type;
struct gendisk *disk;
};
#define ADDR_EMPTY (~0ULL)
#define NVM_TARGET_DEFAULT_OP (101)
#define NVM_TARGET_MIN_OP (3)
#define NVM_TARGET_MAX_OP (80)
#define NVM_VERSION_MAJOR 1
#define NVM_VERSION_MINOR 0
#define NVM_VERSION_PATCH 0
#define NVM_MAX_VLBA (64) /* max logical blocks in a vector command */
struct nvm_rq;
typedef void (nvm_end_io_fn)(struct nvm_rq *);
struct nvm_rq {
struct nvm_tgt_dev *dev;
struct bio *bio;
union {
struct ppa_addr ppa_addr;
dma_addr_t dma_ppa_list;
};
struct ppa_addr *ppa_list;
void *meta_list;
dma_addr_t dma_meta_list;
nvm_end_io_fn *end_io;
uint8_t opcode;
uint16_t nr_ppas;
uint16_t flags;
u64 ppa_status; /* ppa media status */
int error;
int is_seq; /* Sequential hint flag. 1.2 only */
void *private;
};
static inline struct nvm_rq *nvm_rq_from_pdu(void *pdu)
{
return pdu - sizeof(struct nvm_rq);
}
static inline void *nvm_rq_to_pdu(struct nvm_rq *rqdata)
{
return rqdata + 1;
}
static inline struct ppa_addr *nvm_rq_to_ppa_list(struct nvm_rq *rqd)
{
return (rqd->nr_ppas > 1) ? rqd->ppa_list : &rqd->ppa_addr;
}
enum {
NVM_BLK_ST_FREE = 0x1, /* Free block */
NVM_BLK_ST_TGT = 0x2, /* Block in use by target */
NVM_BLK_ST_BAD = 0x8, /* Bad block */
};
/* Instance geometry */
struct nvm_geo {
/* device reported version */
u8 major_ver_id;
u8 minor_ver_id;
/* kernel short version */
u8 version;
/* instance specific geometry */
int num_ch;
int num_lun; /* per channel */
/* calculated values */
int all_luns; /* across channels */
int all_chunks; /* across channels */
int op; /* over-provision in instance */
sector_t total_secs; /* across channels */
/* chunk geometry */
u32 num_chk; /* chunks per lun */
u32 clba; /* sectors per chunk */
u16 csecs; /* sector size */
u16 sos; /* out-of-band area size */
bool ext; /* metadata in extended data buffer */
u32 mdts; /* Max data transfer size*/
/* device write constrains */
u32 ws_min; /* minimum write size */
u32 ws_opt; /* optimal write size */
u32 mw_cunits; /* distance required for successful read */
u32 maxoc; /* maximum open chunks */
u32 maxocpu; /* maximum open chunks per parallel unit */
/* device capabilities */
u32 mccap;
/* device timings */
u32 trdt; /* Avg. Tread (ns) */
u32 trdm; /* Max Tread (ns) */
u32 tprt; /* Avg. Tprog (ns) */
u32 tprm; /* Max Tprog (ns) */
u32 tbet; /* Avg. Terase (ns) */
u32 tbem; /* Max Terase (ns) */
/* generic address format */
struct nvm_addrf addrf;
/* 1.2 compatibility */
u8 vmnt;
u32 cap;
u32 dom;
u8 mtype;
u8 fmtype;
u16 cpar;
u32 mpos;
u8 num_pln;
u8 pln_mode;
u16 num_pg;
u16 fpg_sz;
};
/* sub-device structure */
struct nvm_tgt_dev {
/* Device information */
struct nvm_geo geo;
/* Base ppas for target LUNs */
struct ppa_addr *luns;
struct request_queue *q;
struct nvm_dev *parent;
void *map;
};
struct nvm_dev {
struct nvm_dev_ops *ops;
struct list_head devices;
/* Device information */
struct nvm_geo geo;
unsigned long *lun_map;
void *dma_pool;
/* Backend device */
struct request_queue *q;
char name[DISK_NAME_LEN];
void *private_data;
struct kref ref;
void *rmap;
struct mutex mlock;
spinlock_t lock;
/* target management */
struct list_head area_list;
struct list_head targets;
};
static inline struct ppa_addr generic_to_dev_addr(struct nvm_dev *dev,
struct ppa_addr r)
{
struct nvm_geo *geo = &dev->geo;
struct ppa_addr l;
if (geo->version == NVM_OCSSD_SPEC_12) {
struct nvm_addrf_12 *ppaf = (struct nvm_addrf_12 *)&geo->addrf;
l.ppa = ((u64)r.g.ch) << ppaf->ch_offset;
l.ppa |= ((u64)r.g.lun) << ppaf->lun_offset;
l.ppa |= ((u64)r.g.blk) << ppaf->blk_offset;
l.ppa |= ((u64)r.g.pg) << ppaf->pg_offset;
l.ppa |= ((u64)r.g.pl) << ppaf->pln_offset;
l.ppa |= ((u64)r.g.sec) << ppaf->sec_offset;
} else {
struct nvm_addrf *lbaf = &geo->addrf;
l.ppa = ((u64)r.m.grp) << lbaf->ch_offset;
l.ppa |= ((u64)r.m.pu) << lbaf->lun_offset;
l.ppa |= ((u64)r.m.chk) << lbaf->chk_offset;
l.ppa |= ((u64)r.m.sec) << lbaf->sec_offset;
}
return l;
}
static inline struct ppa_addr dev_to_generic_addr(struct nvm_dev *dev,
struct ppa_addr r)
{
struct nvm_geo *geo = &dev->geo;
struct ppa_addr l;
l.ppa = 0;
if (geo->version == NVM_OCSSD_SPEC_12) {
struct nvm_addrf_12 *ppaf = (struct nvm_addrf_12 *)&geo->addrf;
l.g.ch = (r.ppa & ppaf->ch_mask) >> ppaf->ch_offset;
l.g.lun = (r.ppa & ppaf->lun_mask) >> ppaf->lun_offset;
l.g.blk = (r.ppa & ppaf->blk_mask) >> ppaf->blk_offset;
l.g.pg = (r.ppa & ppaf->pg_mask) >> ppaf->pg_offset;
l.g.pl = (r.ppa & ppaf->pln_mask) >> ppaf->pln_offset;
l.g.sec = (r.ppa & ppaf->sec_mask) >> ppaf->sec_offset;
} else {
struct nvm_addrf *lbaf = &geo->addrf;
l.m.grp = (r.ppa & lbaf->ch_mask) >> lbaf->ch_offset;
l.m.pu = (r.ppa & lbaf->lun_mask) >> lbaf->lun_offset;
l.m.chk = (r.ppa & lbaf->chk_mask) >> lbaf->chk_offset;
l.m.sec = (r.ppa & lbaf->sec_mask) >> lbaf->sec_offset;
}
return l;
}
static inline u64 dev_to_chunk_addr(struct nvm_dev *dev, void *addrf,
struct ppa_addr p)
{
struct nvm_geo *geo = &dev->geo;
u64 caddr;
if (geo->version == NVM_OCSSD_SPEC_12) {
struct nvm_addrf_12 *ppaf = (struct nvm_addrf_12 *)addrf;
caddr = (u64)p.g.pg << ppaf->pg_offset;
caddr |= (u64)p.g.pl << ppaf->pln_offset;
caddr |= (u64)p.g.sec << ppaf->sec_offset;
} else {
caddr = p.m.sec;
}
return caddr;
}
static inline struct ppa_addr nvm_ppa32_to_ppa64(struct nvm_dev *dev,
void *addrf, u32 ppa32)
{
struct ppa_addr ppa64;
ppa64.ppa = 0;
if (ppa32 == -1) {
ppa64.ppa = ADDR_EMPTY;
} else if (ppa32 & (1U << 31)) {
ppa64.c.line = ppa32 & ((~0U) >> 1);
ppa64.c.is_cached = 1;
} else {
struct nvm_geo *geo = &dev->geo;
if (geo->version == NVM_OCSSD_SPEC_12) {
struct nvm_addrf_12 *ppaf = addrf;
ppa64.g.ch = (ppa32 & ppaf->ch_mask) >>
ppaf->ch_offset;
ppa64.g.lun = (ppa32 & ppaf->lun_mask) >>
ppaf->lun_offset;
ppa64.g.blk = (ppa32 & ppaf->blk_mask) >>
ppaf->blk_offset;
ppa64.g.pg = (ppa32 & ppaf->pg_mask) >>
ppaf->pg_offset;
ppa64.g.pl = (ppa32 & ppaf->pln_mask) >>
ppaf->pln_offset;
ppa64.g.sec = (ppa32 & ppaf->sec_mask) >>
ppaf->sec_offset;
} else {
struct nvm_addrf *lbaf = addrf;
ppa64.m.grp = (ppa32 & lbaf->ch_mask) >>
lbaf->ch_offset;
ppa64.m.pu = (ppa32 & lbaf->lun_mask) >>
lbaf->lun_offset;
ppa64.m.chk = (ppa32 & lbaf->chk_mask) >>
lbaf->chk_offset;
ppa64.m.sec = (ppa32 & lbaf->sec_mask) >>
lbaf->sec_offset;
}
}
return ppa64;
}
static inline u32 nvm_ppa64_to_ppa32(struct nvm_dev *dev,
void *addrf, struct ppa_addr ppa64)
{
u32 ppa32 = 0;
if (ppa64.ppa == ADDR_EMPTY) {
ppa32 = ~0U;
} else if (ppa64.c.is_cached) {
ppa32 |= ppa64.c.line;
ppa32 |= 1U << 31;
} else {
struct nvm_geo *geo = &dev->geo;
if (geo->version == NVM_OCSSD_SPEC_12) {
struct nvm_addrf_12 *ppaf = addrf;
ppa32 |= ppa64.g.ch << ppaf->ch_offset;
ppa32 |= ppa64.g.lun << ppaf->lun_offset;
ppa32 |= ppa64.g.blk << ppaf->blk_offset;
ppa32 |= ppa64.g.pg << ppaf->pg_offset;
ppa32 |= ppa64.g.pl << ppaf->pln_offset;
ppa32 |= ppa64.g.sec << ppaf->sec_offset;
} else {
struct nvm_addrf *lbaf = addrf;
ppa32 |= ppa64.m.grp << lbaf->ch_offset;
ppa32 |= ppa64.m.pu << lbaf->lun_offset;
ppa32 |= ppa64.m.chk << lbaf->chk_offset;
ppa32 |= ppa64.m.sec << lbaf->sec_offset;
}
}
return ppa32;
}
static inline int nvm_next_ppa_in_chk(struct nvm_tgt_dev *dev,
struct ppa_addr *ppa)
{
struct nvm_geo *geo = &dev->geo;
int last = 0;
if (geo->version == NVM_OCSSD_SPEC_12) {
int sec = ppa->g.sec;
sec++;
if (sec == geo->ws_min) {
int pg = ppa->g.pg;
sec = 0;
pg++;
if (pg == geo->num_pg) {
int pl = ppa->g.pl;
pg = 0;
pl++;
if (pl == geo->num_pln)
last = 1;
ppa->g.pl = pl;
}
ppa->g.pg = pg;
}
ppa->g.sec = sec;
} else {
ppa->m.sec++;
if (ppa->m.sec == geo->clba)
last = 1;
}
return last;
}
typedef sector_t (nvm_tgt_capacity_fn)(void *);
typedef void *(nvm_tgt_init_fn)(struct nvm_tgt_dev *, struct gendisk *,
int flags);
typedef void (nvm_tgt_exit_fn)(void *, bool);
typedef int (nvm_tgt_sysfs_init_fn)(struct gendisk *);
typedef void (nvm_tgt_sysfs_exit_fn)(struct gendisk *);
enum {
NVM_TGT_F_DEV_L2P = 0,
NVM_TGT_F_HOST_L2P = 1 << 0,
};
struct nvm_tgt_type {
const char *name;
unsigned int version[3];
int flags;
/* target entry points */
const struct block_device_operations *bops;
nvm_tgt_capacity_fn *capacity;
/* module-specific init/teardown */
nvm_tgt_init_fn *init;
nvm_tgt_exit_fn *exit;
/* sysfs */
nvm_tgt_sysfs_init_fn *sysfs_init;
nvm_tgt_sysfs_exit_fn *sysfs_exit;
/* For internal use */
struct list_head list;
struct module *owner;
};
extern int nvm_register_tgt_type(struct nvm_tgt_type *);
extern void nvm_unregister_tgt_type(struct nvm_tgt_type *);
extern void *nvm_dev_dma_alloc(struct nvm_dev *, gfp_t, dma_addr_t *);
extern void nvm_dev_dma_free(struct nvm_dev *, void *, dma_addr_t);
extern struct nvm_dev *nvm_alloc_dev(int);
extern int nvm_register(struct nvm_dev *);
extern void nvm_unregister(struct nvm_dev *);
extern int nvm_get_chunk_meta(struct nvm_tgt_dev *, struct ppa_addr,
int, struct nvm_chk_meta *);
extern int nvm_set_chunk_meta(struct nvm_tgt_dev *, struct ppa_addr *,
int, int);
extern int nvm_submit_io(struct nvm_tgt_dev *, struct nvm_rq *, void *);
extern int nvm_submit_io_sync(struct nvm_tgt_dev *, struct nvm_rq *, void *);
extern void nvm_end_io(struct nvm_rq *);
#else /* CONFIG_NVM */
struct nvm_dev_ops;
static inline struct nvm_dev *nvm_alloc_dev(int node)
{
return ERR_PTR(-EINVAL);
}
static inline int nvm_register(struct nvm_dev *dev)
{
return -EINVAL;
}
static inline void nvm_unregister(struct nvm_dev *dev) {}
#endif /* CONFIG_NVM */
#endif /* LIGHTNVM.H */

View File

@ -1,224 +0,0 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
* Copyright (C) 2015 CNEX Labs. All rights reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License version
* 2 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; see the file COPYING. If not, write to
* the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139,
* USA.
*/
#ifndef _UAPI_LINUX_LIGHTNVM_H
#define _UAPI_LINUX_LIGHTNVM_H
#ifdef __KERNEL__
#include <linux/const.h>
#else /* __KERNEL__ */
#include <stdio.h>
#include <sys/ioctl.h>
#define DISK_NAME_LEN 32
#endif /* __KERNEL__ */
#include <linux/types.h>
#include <linux/ioctl.h>
#define NVM_TTYPE_NAME_MAX 48
#define NVM_TTYPE_MAX 63
#define NVM_MMTYPE_LEN 8
#define NVM_CTRL_FILE "/dev/lightnvm/control"
struct nvm_ioctl_info_tgt {
__u32 version[3];
__u32 reserved;
char tgtname[NVM_TTYPE_NAME_MAX];
};
struct nvm_ioctl_info {
__u32 version[3]; /* in/out - major, minor, patch */
__u16 tgtsize; /* number of targets */
__u16 reserved16; /* pad to 4K page */
__u32 reserved[12];
struct nvm_ioctl_info_tgt tgts[NVM_TTYPE_MAX];
};
enum {
NVM_DEVICE_ACTIVE = 1 << 0,
};
struct nvm_ioctl_device_info {
char devname[DISK_NAME_LEN];
char bmname[NVM_TTYPE_NAME_MAX];
__u32 bmversion[3];
__u32 flags;
__u32 reserved[8];
};
struct nvm_ioctl_get_devices {
__u32 nr_devices;
__u32 reserved[31];
struct nvm_ioctl_device_info info[31];
};
struct nvm_ioctl_create_simple {
__u32 lun_begin;
__u32 lun_end;
};
struct nvm_ioctl_create_extended {
__u16 lun_begin;
__u16 lun_end;
__u16 op;
__u16 rsv;
};
enum {
NVM_CONFIG_TYPE_SIMPLE = 0,
NVM_CONFIG_TYPE_EXTENDED = 1,
};
struct nvm_ioctl_create_conf {
__u32 type;
union {
struct nvm_ioctl_create_simple s;
struct nvm_ioctl_create_extended e;
};
};
enum {
NVM_TARGET_FACTORY = 1 << 0, /* Init target in factory mode */
};
struct nvm_ioctl_create {
char dev[DISK_NAME_LEN]; /* open-channel SSD device */
char tgttype[NVM_TTYPE_NAME_MAX]; /* target type name */
char tgtname[DISK_NAME_LEN]; /* dev to expose target as */
__u32 flags;
struct nvm_ioctl_create_conf conf;
};
struct nvm_ioctl_remove {
char tgtname[DISK_NAME_LEN];
__u32 flags;
};
struct nvm_ioctl_dev_init {
char dev[DISK_NAME_LEN]; /* open-channel SSD device */
char mmtype[NVM_MMTYPE_LEN]; /* register to media manager */
__u32 flags;
};
enum {
NVM_FACTORY_ERASE_ONLY_USER = 1 << 0, /* erase only blocks used as
* host blks or grown blks */
NVM_FACTORY_RESET_HOST_BLKS = 1 << 1, /* remove host blk marks */
NVM_FACTORY_RESET_GRWN_BBLKS = 1 << 2, /* remove grown blk marks */
NVM_FACTORY_NR_BITS = 1 << 3, /* stops here */
};
struct nvm_ioctl_dev_factory {
char dev[DISK_NAME_LEN];
__u32 flags;
};
struct nvm_user_vio {
__u8 opcode;
__u8 flags;
__u16 control;
__u16 nppas;
__u16 rsvd;
__u64 metadata;
__u64 addr;
__u64 ppa_list;
__u32 metadata_len;
__u32 data_len;
__u64 status;
__u32 result;
__u32 rsvd3[3];
};
struct nvm_passthru_vio {
__u8 opcode;
__u8 flags;
__u8 rsvd[2];
__u32 nsid;
__u32 cdw2;
__u32 cdw3;
__u64 metadata;
__u64 addr;
__u32 metadata_len;
__u32 data_len;
__u64 ppa_list;
__u16 nppas;
__u16 control;
__u32 cdw13;
__u32 cdw14;
__u32 cdw15;
__u64 status;
__u32 result;
__u32 timeout_ms;
};
/* The ioctl type, 'L', 0x20 - 0x2F documented in ioctl-number.txt */
enum {
/* top level cmds */
NVM_INFO_CMD = 0x20,
NVM_GET_DEVICES_CMD,
/* device level cmds */
NVM_DEV_CREATE_CMD,
NVM_DEV_REMOVE_CMD,
/* Init a device to support LightNVM media managers */
NVM_DEV_INIT_CMD,
/* Factory reset device */
NVM_DEV_FACTORY_CMD,
/* Vector user I/O */
NVM_DEV_VIO_ADMIN_CMD = 0x41,
NVM_DEV_VIO_CMD = 0x42,
NVM_DEV_VIO_USER_CMD = 0x43,
};
#define NVM_IOCTL 'L' /* 0x4c */
#define NVM_INFO _IOWR(NVM_IOCTL, NVM_INFO_CMD, \
struct nvm_ioctl_info)
#define NVM_GET_DEVICES _IOR(NVM_IOCTL, NVM_GET_DEVICES_CMD, \
struct nvm_ioctl_get_devices)
#define NVM_DEV_CREATE _IOW(NVM_IOCTL, NVM_DEV_CREATE_CMD, \
struct nvm_ioctl_create)
#define NVM_DEV_REMOVE _IOW(NVM_IOCTL, NVM_DEV_REMOVE_CMD, \
struct nvm_ioctl_remove)
#define NVM_DEV_INIT _IOW(NVM_IOCTL, NVM_DEV_INIT_CMD, \
struct nvm_ioctl_dev_init)
#define NVM_DEV_FACTORY _IOW(NVM_IOCTL, NVM_DEV_FACTORY_CMD, \
struct nvm_ioctl_dev_factory)
#define NVME_NVM_IOCTL_IO_VIO _IOWR(NVM_IOCTL, NVM_DEV_VIO_USER_CMD, \
struct nvm_passthru_vio)
#define NVME_NVM_IOCTL_ADMIN_VIO _IOWR(NVM_IOCTL, NVM_DEV_VIO_ADMIN_CMD,\
struct nvm_passthru_vio)
#define NVME_NVM_IOCTL_SUBMIT_VIO _IOWR(NVM_IOCTL, NVM_DEV_VIO_CMD,\
struct nvm_user_vio)
#define NVM_VERSION_MAJOR 1
#define NVM_VERSION_MINOR 0
#define NVM_VERSION_PATCHLEVEL 0
#endif