2015-12-02 08:20:56 +01:00
|
|
|
/*
|
|
|
|
* Non-Volatile Dual In-line Memory Module Virtualization Implementation
|
|
|
|
*
|
|
|
|
* Copyright(C) 2015 Intel Corporation.
|
|
|
|
*
|
|
|
|
* Author:
|
|
|
|
* Xiao Guangrong <guangrong.xiao@linux.intel.com>
|
|
|
|
*
|
|
|
|
* Currently, it only supports PMEM Virtualization.
|
|
|
|
*
|
|
|
|
* This library is free software; you can redistribute it and/or
|
|
|
|
* modify it under the terms of the GNU Lesser General Public
|
|
|
|
* License as published by the Free Software Foundation; either
|
2020-10-23 14:44:24 +02:00
|
|
|
* version 2.1 of the License, or (at your option) any later version.
|
2015-12-02 08:20:56 +01:00
|
|
|
*
|
|
|
|
* This library is distributed in the hope that it will be useful,
|
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
|
|
* Lesser General Public License for more details.
|
|
|
|
*
|
|
|
|
* You should have received a copy of the GNU Lesser General Public
|
|
|
|
* License along with this library; if not, see <http://www.gnu.org/licenses/>
|
|
|
|
*/
|
|
|
|
|
2016-01-26 19:17:30 +01:00
|
|
|
#include "qemu/osdep.h"
|
2019-05-23 16:35:07 +02:00
|
|
|
#include "qemu/module.h"
|
2018-07-18 09:48:01 +02:00
|
|
|
#include "qemu/pmem.h"
|
2016-06-07 14:21:58 +02:00
|
|
|
#include "qapi/error.h"
|
|
|
|
#include "qapi/visitor.h"
|
2015-12-02 08:20:56 +01:00
|
|
|
#include "hw/mem/nvdimm.h"
|
2019-08-12 07:23:51 +02:00
|
|
|
#include "hw/qdev-properties.h"
|
2018-10-05 11:20:18 +02:00
|
|
|
#include "hw/mem/memory-device.h"
|
2019-08-12 07:23:54 +02:00
|
|
|
#include "sysemu/hostmem.h"
|
2015-12-02 08:20:56 +01:00
|
|
|
|
2016-06-07 14:21:58 +02:00
|
|
|
static void nvdimm_get_label_size(Object *obj, Visitor *v, const char *name,
|
|
|
|
void *opaque, Error **errp)
|
|
|
|
{
|
|
|
|
NVDIMMDevice *nvdimm = NVDIMM(obj);
|
|
|
|
uint64_t value = nvdimm->label_size;
|
|
|
|
|
|
|
|
visit_type_size(v, name, &value, errp);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void nvdimm_set_label_size(Object *obj, Visitor *v, const char *name,
|
|
|
|
void *opaque, Error **errp)
|
|
|
|
{
|
|
|
|
NVDIMMDevice *nvdimm = NVDIMM(obj);
|
|
|
|
uint64_t value;
|
|
|
|
|
2018-06-19 15:41:39 +02:00
|
|
|
if (nvdimm->nvdimm_mr) {
|
error: Avoid unnecessary error_propagate() after error_setg()
Replace
error_setg(&err, ...);
error_propagate(errp, err);
by
error_setg(errp, ...);
Related pattern:
if (...) {
error_setg(&err, ...);
goto out;
}
...
out:
error_propagate(errp, err);
return;
When all paths to label out are that way, replace by
if (...) {
error_setg(errp, ...);
return;
}
and delete the label along with the error_propagate().
When we have at most one other path that actually needs to propagate,
and maybe one at the end that where propagation is unnecessary, e.g.
foo(..., &err);
if (err) {
goto out;
}
...
bar(..., &err);
out:
error_propagate(errp, err);
return;
move the error_propagate() to where it's needed, like
if (...) {
foo(..., &err);
error_propagate(errp, err);
return;
}
...
bar(..., errp);
return;
and transform the error_setg() as above.
In some places, the transformation results in obviously unnecessary
error_propagate(). The next few commits will eliminate them.
Bonus: the elimination of gotos will make later patches in this series
easier to review.
Candidates for conversion tracked down with this Coccinelle script:
@@
identifier err, errp;
expression list args;
@@
- error_setg(&err, args);
+ error_setg(errp, args);
... when != err
error_propagate(errp, err);
Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-Id: <20200707160613.848843-34-armbru@redhat.com>
2020-07-07 18:06:01 +02:00
|
|
|
error_setg(errp, "cannot change property value");
|
|
|
|
return;
|
2016-06-07 14:21:58 +02:00
|
|
|
}
|
|
|
|
|
error: Eliminate error_propagate() with Coccinelle, part 1
When all we do with an Error we receive into a local variable is
propagating to somewhere else, we can just as well receive it there
right away. Convert
if (!foo(..., &err)) {
...
error_propagate(errp, err);
...
return ...
}
to
if (!foo(..., errp)) {
...
...
return ...
}
where nothing else needs @err. Coccinelle script:
@rule1 forall@
identifier fun, err, errp, lbl;
expression list args, args2;
binary operator op;
constant c1, c2;
symbol false;
@@
if (
(
- fun(args, &err, args2)
+ fun(args, errp, args2)
|
- !fun(args, &err, args2)
+ !fun(args, errp, args2)
|
- fun(args, &err, args2) op c1
+ fun(args, errp, args2) op c1
)
)
{
... when != err
when != lbl:
when strict
- error_propagate(errp, err);
... when != err
(
return;
|
return c2;
|
return false;
)
}
@rule2 forall@
identifier fun, err, errp, lbl;
expression list args, args2;
expression var;
binary operator op;
constant c1, c2;
symbol false;
@@
- var = fun(args, &err, args2);
+ var = fun(args, errp, args2);
... when != err
if (
(
var
|
!var
|
var op c1
)
)
{
... when != err
when != lbl:
when strict
- error_propagate(errp, err);
... when != err
(
return;
|
return c2;
|
return false;
|
return var;
)
}
@depends on rule1 || rule2@
identifier err;
@@
- Error *err = NULL;
... when != err
Not exactly elegant, I'm afraid.
The "when != lbl:" is necessary to avoid transforming
if (fun(args, &err)) {
goto out
}
...
out:
error_propagate(errp, err);
even though other paths to label out still need the error_propagate().
For an actual example, see sclp_realize().
Without the "when strict", Coccinelle transforms vfio_msix_setup(),
incorrectly. I don't know what exactly "when strict" does, only that
it helps here.
The match of return is narrower than what I want, but I can't figure
out how to express "return where the operand doesn't use @err". For
an example where it's too narrow, see vfio_intx_enable().
Silently fails to convert hw/arm/armsse.c, because Coccinelle gets
confused by ARMSSE being used both as typedef and function-like macro
there. Converted manually.
Line breaks tidied up manually. One nested declaration of @local_err
deleted manually. Preexisting unwanted blank line dropped in
hw/riscv/sifive_e.c.
Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-Id: <20200707160613.848843-35-armbru@redhat.com>
2020-07-07 18:06:02 +02:00
|
|
|
if (!visit_type_size(v, name, &value, errp)) {
|
error: Avoid unnecessary error_propagate() after error_setg()
Replace
error_setg(&err, ...);
error_propagate(errp, err);
by
error_setg(errp, ...);
Related pattern:
if (...) {
error_setg(&err, ...);
goto out;
}
...
out:
error_propagate(errp, err);
return;
When all paths to label out are that way, replace by
if (...) {
error_setg(errp, ...);
return;
}
and delete the label along with the error_propagate().
When we have at most one other path that actually needs to propagate,
and maybe one at the end that where propagation is unnecessary, e.g.
foo(..., &err);
if (err) {
goto out;
}
...
bar(..., &err);
out:
error_propagate(errp, err);
return;
move the error_propagate() to where it's needed, like
if (...) {
foo(..., &err);
error_propagate(errp, err);
return;
}
...
bar(..., errp);
return;
and transform the error_setg() as above.
In some places, the transformation results in obviously unnecessary
error_propagate(). The next few commits will eliminate them.
Bonus: the elimination of gotos will make later patches in this series
easier to review.
Candidates for conversion tracked down with this Coccinelle script:
@@
identifier err, errp;
expression list args;
@@
- error_setg(&err, args);
+ error_setg(errp, args);
... when != err
error_propagate(errp, err);
Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-Id: <20200707160613.848843-34-armbru@redhat.com>
2020-07-07 18:06:01 +02:00
|
|
|
return;
|
2016-06-07 14:21:58 +02:00
|
|
|
}
|
|
|
|
if (value < MIN_NAMESPACE_LABEL_SIZE) {
|
error: Avoid unnecessary error_propagate() after error_setg()
Replace
error_setg(&err, ...);
error_propagate(errp, err);
by
error_setg(errp, ...);
Related pattern:
if (...) {
error_setg(&err, ...);
goto out;
}
...
out:
error_propagate(errp, err);
return;
When all paths to label out are that way, replace by
if (...) {
error_setg(errp, ...);
return;
}
and delete the label along with the error_propagate().
When we have at most one other path that actually needs to propagate,
and maybe one at the end that where propagation is unnecessary, e.g.
foo(..., &err);
if (err) {
goto out;
}
...
bar(..., &err);
out:
error_propagate(errp, err);
return;
move the error_propagate() to where it's needed, like
if (...) {
foo(..., &err);
error_propagate(errp, err);
return;
}
...
bar(..., errp);
return;
and transform the error_setg() as above.
In some places, the transformation results in obviously unnecessary
error_propagate(). The next few commits will eliminate them.
Bonus: the elimination of gotos will make later patches in this series
easier to review.
Candidates for conversion tracked down with this Coccinelle script:
@@
identifier err, errp;
expression list args;
@@
- error_setg(&err, args);
+ error_setg(errp, args);
... when != err
error_propagate(errp, err);
Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-Id: <20200707160613.848843-34-armbru@redhat.com>
2020-07-07 18:06:01 +02:00
|
|
|
error_setg(errp, "Property '%s.%s' (0x%" PRIx64 ") is required"
|
|
|
|
" at least 0x%lx", object_get_typename(obj), name, value,
|
|
|
|
MIN_NAMESPACE_LABEL_SIZE);
|
|
|
|
return;
|
2016-06-07 14:21:58 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
nvdimm->label_size = value;
|
|
|
|
}
|
|
|
|
|
2020-02-10 05:56:13 +01:00
|
|
|
static void nvdimm_get_uuid(Object *obj, Visitor *v, const char *name,
|
|
|
|
void *opaque, Error **errp)
|
|
|
|
{
|
|
|
|
NVDIMMDevice *nvdimm = NVDIMM(obj);
|
|
|
|
char *value = NULL;
|
|
|
|
|
|
|
|
value = qemu_uuid_unparse_strdup(&nvdimm->uuid);
|
|
|
|
|
|
|
|
visit_type_str(v, name, &value, errp);
|
|
|
|
g_free(value);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
static void nvdimm_set_uuid(Object *obj, Visitor *v, const char *name,
|
|
|
|
void *opaque, Error **errp)
|
|
|
|
{
|
|
|
|
NVDIMMDevice *nvdimm = NVDIMM(obj);
|
|
|
|
char *value;
|
|
|
|
|
2020-07-07 18:05:47 +02:00
|
|
|
if (!visit_type_str(v, name, &value, errp)) {
|
|
|
|
return;
|
2020-02-10 05:56:13 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
if (qemu_uuid_parse(value, &nvdimm->uuid) != 0) {
|
|
|
|
error_setg(errp, "Property '%s.%s' has invalid value",
|
|
|
|
object_get_typename(obj), name);
|
|
|
|
}
|
|
|
|
|
2020-07-07 18:05:47 +02:00
|
|
|
g_free(value);
|
2020-02-10 05:56:13 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2016-06-07 14:21:58 +02:00
|
|
|
static void nvdimm_init(Object *obj)
|
|
|
|
{
|
2018-05-21 18:32:00 +02:00
|
|
|
object_property_add(obj, NVDIMM_LABEL_SIZE_PROP, "int",
|
2016-06-07 14:21:58 +02:00
|
|
|
nvdimm_get_label_size, nvdimm_set_label_size, NULL,
|
qom: Drop parameter @errp of object_property_add() & friends
The only way object_property_add() can fail is when a property with
the same name already exists. Since our property names are all
hardcoded, failure is a programming error, and the appropriate way to
handle it is passing &error_abort.
Same for its variants, except for object_property_add_child(), which
additionally fails when the child already has a parent. Parentage is
also under program control, so this is a programming error, too.
We have a bit over 500 callers. Almost half of them pass
&error_abort, slightly fewer ignore errors, one test case handles
errors, and the remaining few callers pass them to their own callers.
The previous few commits demonstrated once again that ignoring
programming errors is a bad idea.
Of the few ones that pass on errors, several violate the Error API.
The Error ** argument must be NULL, &error_abort, &error_fatal, or a
pointer to a variable containing NULL. Passing an argument of the
latter kind twice without clearing it in between is wrong: if the
first call sets an error, it no longer points to NULL for the second
call. ich9_pm_add_properties(), sparc32_ledma_realize(),
sparc32_dma_realize(), xilinx_axidma_realize(), xilinx_enet_realize()
are wrong that way.
When the one appropriate choice of argument is &error_abort, letting
users pick the argument is a bad idea.
Drop parameter @errp and assert the preconditions instead.
There's one exception to "duplicate property name is a programming
error": the way object_property_add() implements the magic (and
undocumented) "automatic arrayification". Don't drop @errp there.
Instead, rename object_property_add() to object_property_try_add(),
and add the obvious wrapper object_property_add().
Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <20200505152926.18877-15-armbru@redhat.com>
[Two semantic rebase conflicts resolved]
2020-05-05 17:29:22 +02:00
|
|
|
NULL);
|
2020-02-10 05:56:13 +01:00
|
|
|
|
|
|
|
object_property_add(obj, NVDIMM_UUID_PROP, "QemuUUID", nvdimm_get_uuid,
|
qom: Drop parameter @errp of object_property_add() & friends
The only way object_property_add() can fail is when a property with
the same name already exists. Since our property names are all
hardcoded, failure is a programming error, and the appropriate way to
handle it is passing &error_abort.
Same for its variants, except for object_property_add_child(), which
additionally fails when the child already has a parent. Parentage is
also under program control, so this is a programming error, too.
We have a bit over 500 callers. Almost half of them pass
&error_abort, slightly fewer ignore errors, one test case handles
errors, and the remaining few callers pass them to their own callers.
The previous few commits demonstrated once again that ignoring
programming errors is a bad idea.
Of the few ones that pass on errors, several violate the Error API.
The Error ** argument must be NULL, &error_abort, &error_fatal, or a
pointer to a variable containing NULL. Passing an argument of the
latter kind twice without clearing it in between is wrong: if the
first call sets an error, it no longer points to NULL for the second
call. ich9_pm_add_properties(), sparc32_ledma_realize(),
sparc32_dma_realize(), xilinx_axidma_realize(), xilinx_enet_realize()
are wrong that way.
When the one appropriate choice of argument is &error_abort, letting
users pick the argument is a bad idea.
Drop parameter @errp and assert the preconditions instead.
There's one exception to "duplicate property name is a programming
error": the way object_property_add() implements the magic (and
undocumented) "automatic arrayification". Don't drop @errp there.
Instead, rename object_property_add() to object_property_try_add(),
and add the obvious wrapper object_property_add().
Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <20200505152926.18877-15-armbru@redhat.com>
[Two semantic rebase conflicts resolved]
2020-05-05 17:29:22 +02:00
|
|
|
nvdimm_set_uuid, NULL, NULL);
|
2016-06-07 14:21:58 +02:00
|
|
|
}
|
|
|
|
|
2018-06-19 15:41:39 +02:00
|
|
|
static void nvdimm_finalize(Object *obj)
|
|
|
|
{
|
|
|
|
NVDIMMDevice *nvdimm = NVDIMM(obj);
|
|
|
|
|
|
|
|
g_free(nvdimm->nvdimm_mr);
|
|
|
|
}
|
|
|
|
|
2018-06-19 15:41:40 +02:00
|
|
|
static void nvdimm_prepare_memory_region(NVDIMMDevice *nvdimm, Error **errp)
|
2016-06-07 14:21:58 +02:00
|
|
|
{
|
2018-06-19 15:41:40 +02:00
|
|
|
PCDIMMDevice *dimm = PC_DIMM(nvdimm);
|
|
|
|
uint64_t align, pmem_size, size;
|
|
|
|
MemoryRegion *mr;
|
2016-06-07 14:21:58 +02:00
|
|
|
|
2018-06-19 15:41:40 +02:00
|
|
|
g_assert(!nvdimm->nvdimm_mr);
|
2016-06-07 14:21:58 +02:00
|
|
|
|
2018-06-19 15:41:40 +02:00
|
|
|
if (!dimm->hostmem) {
|
|
|
|
error_setg(errp, "'" PC_DIMM_MEMDEV_PROP "' property must be set");
|
|
|
|
return;
|
|
|
|
}
|
2016-06-07 14:21:58 +02:00
|
|
|
|
2018-06-19 15:41:40 +02:00
|
|
|
mr = host_memory_backend_get_memory(dimm->hostmem);
|
2016-06-07 14:21:58 +02:00
|
|
|
align = memory_region_get_alignment(mr);
|
2018-06-19 15:41:40 +02:00
|
|
|
size = memory_region_size(mr);
|
2016-06-07 14:21:58 +02:00
|
|
|
|
|
|
|
pmem_size = size - nvdimm->label_size;
|
|
|
|
nvdimm->label_data = memory_region_get_ram_ptr(mr) + pmem_size;
|
|
|
|
pmem_size = QEMU_ALIGN_DOWN(pmem_size, align);
|
|
|
|
|
|
|
|
if (size <= nvdimm->label_size || !pmem_size) {
|
|
|
|
HostMemoryBackend *hostmem = dimm->hostmem;
|
|
|
|
|
|
|
|
error_setg(errp, "the size of memdev %s (0x%" PRIx64 ") is too "
|
|
|
|
"small to contain nvdimm label (0x%" PRIx64 ") and "
|
|
|
|
"aligned PMEM (0x%" PRIx64 ")",
|
2020-07-14 18:02:00 +02:00
|
|
|
object_get_canonical_path_component(OBJECT(hostmem)),
|
|
|
|
memory_region_size(mr), nvdimm->label_size, align);
|
2016-06-07 14:21:58 +02:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2021-01-04 18:13:20 +01:00
|
|
|
if (!nvdimm->unarmed && memory_region_is_rom(mr)) {
|
|
|
|
HostMemoryBackend *hostmem = dimm->hostmem;
|
|
|
|
|
2022-10-23 21:58:12 +02:00
|
|
|
error_setg(errp, "'unarmed' property must be 'on' since memdev %s "
|
2021-01-04 18:13:20 +01:00
|
|
|
"is read-only",
|
|
|
|
object_get_canonical_path_component(OBJECT(hostmem)));
|
|
|
|
return;
|
|
|
|
}
|
nvdimm: Reject writing label data to ROM instead of crashing QEMU
Currently, when using a true R/O NVDIMM (ROM memory backend) with a label
area, the VM can easily crash QEMU by trying to write to the label area,
because the ROM memory is mmap'ed without PROT_WRITE.
[root@vm-0 ~]# ndctl disable-region region0
disabled 1 region
[root@vm-0 ~]# ndctl zero-labels nmem0
-> QEMU segfaults
Let's remember whether we have a ROM memory backend and properly
reject the write request:
[root@vm-0 ~]# ndctl disable-region region0
disabled 1 region
[root@vm-0 ~]# ndctl zero-labels nmem0
zeroed 0 nmem
In comparison, on a system with a R/W NVDIMM:
[root@vm-0 ~]# ndctl disable-region region0
disabled 1 region
[root@vm-0 ~]# ndctl zero-labels nmem0
zeroed 1 nmem
For ACPI, just return "unsupported", like if no label exists. For spapr,
return "H_P2", similar to when no label area exists.
Could we rely on the "unarmed" property? Maybe, but it looks cleaner to
only disallow what certainly cannot work.
After all "unarmed=on" primarily means: cannot accept persistent writes. In
theory, there might be setups where devices with "unarmed=on" set could
be used to host non-persistent data (temporary files, system RAM, ...); for
example, in Linux, admins can overwrite the "readonly" setting and still
write to the device -- which will work as long as we're not using ROM.
Allowing writing label data in such configurations can make sense.
Message-ID: <20230906120503.359863-2-david@redhat.com>
Fixes: dbd730e85987 ("nvdimm: check -object memory-backend-file, readonly=on option")
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: David Hildenbrand <david@redhat.com>
2023-09-06 14:04:53 +02:00
|
|
|
if (memory_region_is_rom(mr)) {
|
|
|
|
nvdimm->readonly = true;
|
|
|
|
}
|
2021-01-04 18:13:20 +01:00
|
|
|
|
2018-06-19 15:41:39 +02:00
|
|
|
nvdimm->nvdimm_mr = g_new(MemoryRegion, 1);
|
|
|
|
memory_region_init_alias(nvdimm->nvdimm_mr, OBJECT(dimm),
|
2016-06-07 14:21:58 +02:00
|
|
|
"nvdimm-memory", mr, 0, pmem_size);
|
nvdimm: set non-volatile on the memory region
qemu-system-x86_64 -machine pc,nvdimm -m 2G,slots=4,maxmem=16G -enable-kvm -monitor stdio -object memory-backend-file,id=mem1,share=on,mem-path=/tmp/foo,size=1G -device nvdimm,id=nvdimm1,memdev=mem1
HMP info mtree command reflects the flag with "nv-" prefix on memory type:
(qemu) info mtree
0000000100000000-000000013fffffff (prio 0, nv-i/o): alias nvdimm-memory @/objects/mem1 0000000000000000-000000003fffffff
(qemu) info mtree -f
0000000100000000-000000013fffffff (prio 0, nv-ram): /objects/mem1
Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Message-Id: <20181003114454.5662-3-marcandre.lureau@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2018-10-03 13:44:53 +02:00
|
|
|
memory_region_set_nonvolatile(nvdimm->nvdimm_mr, true);
|
2018-06-19 15:41:39 +02:00
|
|
|
nvdimm->nvdimm_mr->align = align;
|
2016-06-07 14:21:58 +02:00
|
|
|
}
|
|
|
|
|
2018-10-05 11:20:18 +02:00
|
|
|
static MemoryRegion *nvdimm_md_get_memory_region(MemoryDeviceState *md,
|
|
|
|
Error **errp)
|
2018-06-19 15:41:40 +02:00
|
|
|
{
|
2018-10-05 11:20:18 +02:00
|
|
|
NVDIMMDevice *nvdimm = NVDIMM(md);
|
2018-06-19 15:41:40 +02:00
|
|
|
Error *local_err = NULL;
|
|
|
|
|
|
|
|
if (!nvdimm->nvdimm_mr) {
|
|
|
|
nvdimm_prepare_memory_region(nvdimm, &local_err);
|
|
|
|
if (local_err) {
|
|
|
|
error_propagate(errp, local_err);
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return nvdimm->nvdimm_mr;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void nvdimm_realize(PCDIMMDevice *dimm, Error **errp)
|
|
|
|
{
|
|
|
|
NVDIMMDevice *nvdimm = NVDIMM(dimm);
|
2022-02-18 08:34:13 +01:00
|
|
|
NVDIMMClass *ndc = NVDIMM_GET_CLASS(nvdimm);
|
2018-06-19 15:41:40 +02:00
|
|
|
|
|
|
|
if (!nvdimm->nvdimm_mr) {
|
|
|
|
nvdimm_prepare_memory_region(nvdimm, errp);
|
|
|
|
}
|
2022-02-18 08:34:13 +01:00
|
|
|
|
|
|
|
if (ndc->realize) {
|
|
|
|
ndc->realize(nvdimm, errp);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void nvdimm_unrealize(PCDIMMDevice *dimm)
|
|
|
|
{
|
|
|
|
NVDIMMDevice *nvdimm = NVDIMM(dimm);
|
|
|
|
NVDIMMClass *ndc = NVDIMM_GET_CLASS(nvdimm);
|
|
|
|
|
|
|
|
if (ndc->unrealize) {
|
|
|
|
ndc->unrealize(nvdimm);
|
|
|
|
}
|
2018-06-19 15:41:40 +02:00
|
|
|
}
|
|
|
|
|
2016-06-07 14:21:58 +02:00
|
|
|
/*
|
|
|
|
* the caller should check the input parameters before calling
|
|
|
|
* label read/write functions.
|
|
|
|
*/
|
|
|
|
static void nvdimm_validate_rw_label_data(NVDIMMDevice *nvdimm, uint64_t size,
|
nvdimm: Reject writing label data to ROM instead of crashing QEMU
Currently, when using a true R/O NVDIMM (ROM memory backend) with a label
area, the VM can easily crash QEMU by trying to write to the label area,
because the ROM memory is mmap'ed without PROT_WRITE.
[root@vm-0 ~]# ndctl disable-region region0
disabled 1 region
[root@vm-0 ~]# ndctl zero-labels nmem0
-> QEMU segfaults
Let's remember whether we have a ROM memory backend and properly
reject the write request:
[root@vm-0 ~]# ndctl disable-region region0
disabled 1 region
[root@vm-0 ~]# ndctl zero-labels nmem0
zeroed 0 nmem
In comparison, on a system with a R/W NVDIMM:
[root@vm-0 ~]# ndctl disable-region region0
disabled 1 region
[root@vm-0 ~]# ndctl zero-labels nmem0
zeroed 1 nmem
For ACPI, just return "unsupported", like if no label exists. For spapr,
return "H_P2", similar to when no label area exists.
Could we rely on the "unarmed" property? Maybe, but it looks cleaner to
only disallow what certainly cannot work.
After all "unarmed=on" primarily means: cannot accept persistent writes. In
theory, there might be setups where devices with "unarmed=on" set could
be used to host non-persistent data (temporary files, system RAM, ...); for
example, in Linux, admins can overwrite the "readonly" setting and still
write to the device -- which will work as long as we're not using ROM.
Allowing writing label data in such configurations can make sense.
Message-ID: <20230906120503.359863-2-david@redhat.com>
Fixes: dbd730e85987 ("nvdimm: check -object memory-backend-file, readonly=on option")
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: David Hildenbrand <david@redhat.com>
2023-09-06 14:04:53 +02:00
|
|
|
uint64_t offset, bool is_write)
|
2016-06-07 14:21:58 +02:00
|
|
|
{
|
|
|
|
assert((nvdimm->label_size >= size + offset) && (offset + size > offset));
|
nvdimm: Reject writing label data to ROM instead of crashing QEMU
Currently, when using a true R/O NVDIMM (ROM memory backend) with a label
area, the VM can easily crash QEMU by trying to write to the label area,
because the ROM memory is mmap'ed without PROT_WRITE.
[root@vm-0 ~]# ndctl disable-region region0
disabled 1 region
[root@vm-0 ~]# ndctl zero-labels nmem0
-> QEMU segfaults
Let's remember whether we have a ROM memory backend and properly
reject the write request:
[root@vm-0 ~]# ndctl disable-region region0
disabled 1 region
[root@vm-0 ~]# ndctl zero-labels nmem0
zeroed 0 nmem
In comparison, on a system with a R/W NVDIMM:
[root@vm-0 ~]# ndctl disable-region region0
disabled 1 region
[root@vm-0 ~]# ndctl zero-labels nmem0
zeroed 1 nmem
For ACPI, just return "unsupported", like if no label exists. For spapr,
return "H_P2", similar to when no label area exists.
Could we rely on the "unarmed" property? Maybe, but it looks cleaner to
only disallow what certainly cannot work.
After all "unarmed=on" primarily means: cannot accept persistent writes. In
theory, there might be setups where devices with "unarmed=on" set could
be used to host non-persistent data (temporary files, system RAM, ...); for
example, in Linux, admins can overwrite the "readonly" setting and still
write to the device -- which will work as long as we're not using ROM.
Allowing writing label data in such configurations can make sense.
Message-ID: <20230906120503.359863-2-david@redhat.com>
Fixes: dbd730e85987 ("nvdimm: check -object memory-backend-file, readonly=on option")
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: David Hildenbrand <david@redhat.com>
2023-09-06 14:04:53 +02:00
|
|
|
assert(!is_write || !nvdimm->readonly);
|
2016-06-07 14:21:58 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
static void nvdimm_read_label_data(NVDIMMDevice *nvdimm, void *buf,
|
|
|
|
uint64_t size, uint64_t offset)
|
|
|
|
{
|
nvdimm: Reject writing label data to ROM instead of crashing QEMU
Currently, when using a true R/O NVDIMM (ROM memory backend) with a label
area, the VM can easily crash QEMU by trying to write to the label area,
because the ROM memory is mmap'ed without PROT_WRITE.
[root@vm-0 ~]# ndctl disable-region region0
disabled 1 region
[root@vm-0 ~]# ndctl zero-labels nmem0
-> QEMU segfaults
Let's remember whether we have a ROM memory backend and properly
reject the write request:
[root@vm-0 ~]# ndctl disable-region region0
disabled 1 region
[root@vm-0 ~]# ndctl zero-labels nmem0
zeroed 0 nmem
In comparison, on a system with a R/W NVDIMM:
[root@vm-0 ~]# ndctl disable-region region0
disabled 1 region
[root@vm-0 ~]# ndctl zero-labels nmem0
zeroed 1 nmem
For ACPI, just return "unsupported", like if no label exists. For spapr,
return "H_P2", similar to when no label area exists.
Could we rely on the "unarmed" property? Maybe, but it looks cleaner to
only disallow what certainly cannot work.
After all "unarmed=on" primarily means: cannot accept persistent writes. In
theory, there might be setups where devices with "unarmed=on" set could
be used to host non-persistent data (temporary files, system RAM, ...); for
example, in Linux, admins can overwrite the "readonly" setting and still
write to the device -- which will work as long as we're not using ROM.
Allowing writing label data in such configurations can make sense.
Message-ID: <20230906120503.359863-2-david@redhat.com>
Fixes: dbd730e85987 ("nvdimm: check -object memory-backend-file, readonly=on option")
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: David Hildenbrand <david@redhat.com>
2023-09-06 14:04:53 +02:00
|
|
|
nvdimm_validate_rw_label_data(nvdimm, size, offset, false);
|
2016-06-07 14:21:58 +02:00
|
|
|
|
|
|
|
memcpy(buf, nvdimm->label_data + offset, size);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void nvdimm_write_label_data(NVDIMMDevice *nvdimm, const void *buf,
|
|
|
|
uint64_t size, uint64_t offset)
|
|
|
|
{
|
|
|
|
MemoryRegion *mr;
|
|
|
|
PCDIMMDevice *dimm = PC_DIMM(nvdimm);
|
2018-07-18 09:48:01 +02:00
|
|
|
bool is_pmem = object_property_get_bool(OBJECT(dimm->hostmem),
|
|
|
|
"pmem", NULL);
|
2016-06-07 14:21:58 +02:00
|
|
|
uint64_t backend_offset;
|
|
|
|
|
nvdimm: Reject writing label data to ROM instead of crashing QEMU
Currently, when using a true R/O NVDIMM (ROM memory backend) with a label
area, the VM can easily crash QEMU by trying to write to the label area,
because the ROM memory is mmap'ed without PROT_WRITE.
[root@vm-0 ~]# ndctl disable-region region0
disabled 1 region
[root@vm-0 ~]# ndctl zero-labels nmem0
-> QEMU segfaults
Let's remember whether we have a ROM memory backend and properly
reject the write request:
[root@vm-0 ~]# ndctl disable-region region0
disabled 1 region
[root@vm-0 ~]# ndctl zero-labels nmem0
zeroed 0 nmem
In comparison, on a system with a R/W NVDIMM:
[root@vm-0 ~]# ndctl disable-region region0
disabled 1 region
[root@vm-0 ~]# ndctl zero-labels nmem0
zeroed 1 nmem
For ACPI, just return "unsupported", like if no label exists. For spapr,
return "H_P2", similar to when no label area exists.
Could we rely on the "unarmed" property? Maybe, but it looks cleaner to
only disallow what certainly cannot work.
After all "unarmed=on" primarily means: cannot accept persistent writes. In
theory, there might be setups where devices with "unarmed=on" set could
be used to host non-persistent data (temporary files, system RAM, ...); for
example, in Linux, admins can overwrite the "readonly" setting and still
write to the device -- which will work as long as we're not using ROM.
Allowing writing label data in such configurations can make sense.
Message-ID: <20230906120503.359863-2-david@redhat.com>
Fixes: dbd730e85987 ("nvdimm: check -object memory-backend-file, readonly=on option")
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: David Hildenbrand <david@redhat.com>
2023-09-06 14:04:53 +02:00
|
|
|
nvdimm_validate_rw_label_data(nvdimm, size, offset, true);
|
2016-06-07 14:21:58 +02:00
|
|
|
|
2018-07-18 09:48:01 +02:00
|
|
|
if (!is_pmem) {
|
|
|
|
memcpy(nvdimm->label_data + offset, buf, size);
|
|
|
|
} else {
|
|
|
|
pmem_memcpy_persist(nvdimm->label_data + offset, buf, size);
|
|
|
|
}
|
2016-06-07 14:21:58 +02:00
|
|
|
|
2018-06-19 15:41:36 +02:00
|
|
|
mr = host_memory_backend_get_memory(dimm->hostmem);
|
2016-06-07 14:21:58 +02:00
|
|
|
backend_offset = memory_region_size(mr) - nvdimm->label_size + offset;
|
|
|
|
memory_region_set_dirty(mr, backend_offset, size);
|
|
|
|
}
|
|
|
|
|
2018-06-19 15:41:38 +02:00
|
|
|
static Property nvdimm_properties[] = {
|
|
|
|
DEFINE_PROP_BOOL(NVDIMM_UNARMED_PROP, NVDIMMDevice, unarmed, false),
|
|
|
|
DEFINE_PROP_END_OF_LIST(),
|
|
|
|
};
|
|
|
|
|
2015-12-02 08:20:56 +01:00
|
|
|
static void nvdimm_class_init(ObjectClass *oc, void *data)
|
|
|
|
{
|
2016-06-07 14:21:58 +02:00
|
|
|
PCDIMMDeviceClass *ddc = PC_DIMM_CLASS(oc);
|
2018-10-05 11:20:18 +02:00
|
|
|
MemoryDeviceClass *mdc = MEMORY_DEVICE_CLASS(oc);
|
2016-06-07 14:21:58 +02:00
|
|
|
NVDIMMClass *nvc = NVDIMM_CLASS(oc);
|
2018-06-19 15:41:38 +02:00
|
|
|
DeviceClass *dc = DEVICE_CLASS(oc);
|
2015-12-02 08:20:56 +01:00
|
|
|
|
2016-06-07 14:21:58 +02:00
|
|
|
ddc->realize = nvdimm_realize;
|
2022-02-18 08:34:13 +01:00
|
|
|
ddc->unrealize = nvdimm_unrealize;
|
2018-10-05 11:20:18 +02:00
|
|
|
mdc->get_memory_region = nvdimm_md_get_memory_region;
|
2020-01-10 16:30:32 +01:00
|
|
|
device_class_set_props(dc, nvdimm_properties);
|
2016-06-07 14:21:58 +02:00
|
|
|
|
|
|
|
nvc->read_label_data = nvdimm_read_label_data;
|
|
|
|
nvc->write_label_data = nvdimm_write_label_data;
|
2020-11-30 09:36:22 +01:00
|
|
|
set_bit(DEVICE_CATEGORY_STORAGE, dc->categories);
|
2015-12-02 08:20:56 +01:00
|
|
|
}
|
|
|
|
|
2022-01-17 15:58:04 +01:00
|
|
|
static const TypeInfo nvdimm_info = {
|
2015-12-02 08:20:56 +01:00
|
|
|
.name = TYPE_NVDIMM,
|
|
|
|
.parent = TYPE_PC_DIMM,
|
2016-06-07 14:21:58 +02:00
|
|
|
.class_size = sizeof(NVDIMMClass),
|
2015-12-02 08:20:56 +01:00
|
|
|
.class_init = nvdimm_class_init,
|
2016-06-07 14:21:58 +02:00
|
|
|
.instance_size = sizeof(NVDIMMDevice),
|
|
|
|
.instance_init = nvdimm_init,
|
2018-06-19 15:41:39 +02:00
|
|
|
.instance_finalize = nvdimm_finalize,
|
2015-12-02 08:20:56 +01:00
|
|
|
};
|
|
|
|
|
|
|
|
static void nvdimm_register_types(void)
|
|
|
|
{
|
|
|
|
type_register_static(&nvdimm_info);
|
|
|
|
}
|
|
|
|
|
|
|
|
type_init(nvdimm_register_types)
|