2605257a26
Add support for the Dataset Management command and the Deallocate attribute. Deallocation results in discards being sent to the underlying block device. Whether of not the blocks are actually deallocated is affected by the same factors as Write Zeroes (see previous commit). format | discard | dsm (512B) dsm (4KiB) dsm (64KiB) -------------------------------------------------------- qcow2 ignore n n n qcow2 unmap n n y raw ignore n n n raw unmap n y y Again, a raw format and 4KiB LBAs are preferable. In order to set the Namespace Preferred Deallocate Granularity and Alignment fields (NPDG and NPDA), choose a sane minimum discard granularity of 4KiB. If we are using a passthru device supporting discard at a 512B granularity, user should set the discard_granularity property explicitly. NPDG and NPDA will also account for the cluster_size of the block driver if required (i.e. for QCOW2). See NVM Express 1.3d, Section 6.7 ("Dataset Management command"). Signed-off-by: Klaus Jensen <k.jensen@samsung.com> Reviewed-by: Keith Busch <kbusch@kernel.org>
194 lines
4.5 KiB
C
194 lines
4.5 KiB
C
/*
|
|
* QEMU NVM Express Virtual Namespace
|
|
*
|
|
* Copyright (c) 2019 CNEX Labs
|
|
* Copyright (c) 2020 Samsung Electronics
|
|
*
|
|
* Authors:
|
|
* Klaus Jensen <k.jensen@samsung.com>
|
|
*
|
|
* This work is licensed under the terms of the GNU GPL, version 2. See the
|
|
* COPYING file in the top-level directory.
|
|
*
|
|
*/
|
|
|
|
#include "qemu/osdep.h"
|
|
#include "qemu/units.h"
|
|
#include "qemu/cutils.h"
|
|
#include "qemu/log.h"
|
|
#include "hw/block/block.h"
|
|
#include "hw/pci/pci.h"
|
|
#include "sysemu/sysemu.h"
|
|
#include "sysemu/block-backend.h"
|
|
#include "qapi/error.h"
|
|
|
|
#include "hw/qdev-properties.h"
|
|
#include "hw/qdev-core.h"
|
|
|
|
#include "nvme.h"
|
|
#include "nvme-ns.h"
|
|
|
|
#define MIN_DISCARD_GRANULARITY (4 * KiB)
|
|
|
|
static int nvme_ns_init(NvmeNamespace *ns, Error **errp)
|
|
{
|
|
BlockDriverInfo bdi;
|
|
NvmeIdNs *id_ns = &ns->id_ns;
|
|
int lba_index = NVME_ID_NS_FLBAS_INDEX(ns->id_ns.flbas);
|
|
int npdg;
|
|
|
|
ns->id_ns.dlfeat = 0x9;
|
|
|
|
id_ns->lbaf[lba_index].ds = 31 - clz32(ns->blkconf.logical_block_size);
|
|
|
|
id_ns->nsze = cpu_to_le64(nvme_ns_nlbas(ns));
|
|
|
|
/* no thin provisioning */
|
|
id_ns->ncap = id_ns->nsze;
|
|
id_ns->nuse = id_ns->ncap;
|
|
|
|
/* support DULBE and I/O optimization fields */
|
|
id_ns->nsfeat |= (0x4 | 0x10);
|
|
|
|
npdg = ns->blkconf.discard_granularity / ns->blkconf.logical_block_size;
|
|
|
|
if (bdrv_get_info(blk_bs(ns->blkconf.blk), &bdi) >= 0 &&
|
|
bdi.cluster_size > ns->blkconf.discard_granularity) {
|
|
npdg = bdi.cluster_size / ns->blkconf.logical_block_size;
|
|
}
|
|
|
|
id_ns->npda = id_ns->npdg = npdg - 1;
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int nvme_ns_init_blk(NvmeCtrl *n, NvmeNamespace *ns, Error **errp)
|
|
{
|
|
bool read_only;
|
|
|
|
if (!blkconf_blocksizes(&ns->blkconf, errp)) {
|
|
return -1;
|
|
}
|
|
|
|
read_only = !blk_supports_write_perm(ns->blkconf.blk);
|
|
if (!blkconf_apply_backend_options(&ns->blkconf, read_only, false, errp)) {
|
|
return -1;
|
|
}
|
|
|
|
if (ns->blkconf.discard_granularity == -1) {
|
|
ns->blkconf.discard_granularity =
|
|
MAX(ns->blkconf.logical_block_size, MIN_DISCARD_GRANULARITY);
|
|
}
|
|
|
|
ns->size = blk_getlength(ns->blkconf.blk);
|
|
if (ns->size < 0) {
|
|
error_setg_errno(errp, -ns->size, "could not get blockdev size");
|
|
return -1;
|
|
}
|
|
|
|
if (blk_enable_write_cache(ns->blkconf.blk)) {
|
|
n->features.vwc = 0x1;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int nvme_ns_check_constraints(NvmeNamespace *ns, Error **errp)
|
|
{
|
|
if (!ns->blkconf.blk) {
|
|
error_setg(errp, "block backend not configured");
|
|
return -1;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
int nvme_ns_setup(NvmeCtrl *n, NvmeNamespace *ns, Error **errp)
|
|
{
|
|
if (nvme_ns_check_constraints(ns, errp)) {
|
|
return -1;
|
|
}
|
|
|
|
if (nvme_ns_init_blk(n, ns, errp)) {
|
|
return -1;
|
|
}
|
|
|
|
if (nvme_ns_init(ns, errp)) {
|
|
return -1;
|
|
}
|
|
|
|
if (nvme_register_namespace(n, ns, errp)) {
|
|
return -1;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
void nvme_ns_drain(NvmeNamespace *ns)
|
|
{
|
|
blk_drain(ns->blkconf.blk);
|
|
}
|
|
|
|
void nvme_ns_flush(NvmeNamespace *ns)
|
|
{
|
|
blk_flush(ns->blkconf.blk);
|
|
}
|
|
|
|
static void nvme_ns_realize(DeviceState *dev, Error **errp)
|
|
{
|
|
NvmeNamespace *ns = NVME_NS(dev);
|
|
BusState *s = qdev_get_parent_bus(dev);
|
|
NvmeCtrl *n = NVME(s->parent);
|
|
Error *local_err = NULL;
|
|
|
|
if (nvme_ns_setup(n, ns, &local_err)) {
|
|
error_propagate_prepend(errp, local_err,
|
|
"could not setup namespace: ");
|
|
return;
|
|
}
|
|
}
|
|
|
|
static Property nvme_ns_props[] = {
|
|
DEFINE_BLOCK_PROPERTIES(NvmeNamespace, blkconf),
|
|
DEFINE_PROP_UINT32("nsid", NvmeNamespace, params.nsid, 0),
|
|
DEFINE_PROP_END_OF_LIST(),
|
|
};
|
|
|
|
static void nvme_ns_class_init(ObjectClass *oc, void *data)
|
|
{
|
|
DeviceClass *dc = DEVICE_CLASS(oc);
|
|
|
|
set_bit(DEVICE_CATEGORY_STORAGE, dc->categories);
|
|
|
|
dc->bus_type = TYPE_NVME_BUS;
|
|
dc->realize = nvme_ns_realize;
|
|
device_class_set_props(dc, nvme_ns_props);
|
|
dc->desc = "Virtual NVMe namespace";
|
|
}
|
|
|
|
static void nvme_ns_instance_init(Object *obj)
|
|
{
|
|
NvmeNamespace *ns = NVME_NS(obj);
|
|
char *bootindex = g_strdup_printf("/namespace@%d,0", ns->params.nsid);
|
|
|
|
device_add_bootindex_property(obj, &ns->bootindex, "bootindex",
|
|
bootindex, DEVICE(obj));
|
|
|
|
g_free(bootindex);
|
|
}
|
|
|
|
static const TypeInfo nvme_ns_info = {
|
|
.name = TYPE_NVME_NS,
|
|
.parent = TYPE_DEVICE,
|
|
.class_init = nvme_ns_class_init,
|
|
.instance_size = sizeof(NvmeNamespace),
|
|
.instance_init = nvme_ns_instance_init,
|
|
};
|
|
|
|
static void nvme_ns_register_types(void)
|
|
{
|
|
type_register_static(&nvme_ns_info);
|
|
}
|
|
|
|
type_init(nvme_ns_register_types)
|