2011-10-25 10:24:24 +02:00
|
|
|
/*
|
|
|
|
* QEMU Block driver for iSCSI images
|
|
|
|
*
|
|
|
|
* Copyright (c) 2010-2011 Ronnie Sahlberg <ronniesahlberg@gmail.com>
|
2017-12-08 12:51:07 +01:00
|
|
|
* Copyright (c) 2012-2017 Peter Lieven <pl@kamp.de>
|
2011-10-25 10:24:24 +02:00
|
|
|
*
|
|
|
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
|
|
* of this software and associated documentation files (the "Software"), to deal
|
|
|
|
* in the Software without restriction, including without limitation the rights
|
|
|
|
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
|
|
* copies of the Software, and to permit persons to whom the Software is
|
|
|
|
* furnished to do so, subject to the following conditions:
|
|
|
|
*
|
|
|
|
* The above copyright notice and this permission notice shall be included in
|
|
|
|
* all copies or substantial portions of the Software.
|
|
|
|
*
|
|
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
|
|
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
|
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
|
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
|
|
|
* THE SOFTWARE.
|
|
|
|
*/
|
|
|
|
|
2016-01-18 19:01:42 +01:00
|
|
|
#include "qemu/osdep.h"
|
2011-10-25 10:24:24 +02:00
|
|
|
|
|
|
|
#include <poll.h>
|
2014-05-30 23:36:47 +02:00
|
|
|
#include <math.h>
|
2012-05-22 12:10:05 +02:00
|
|
|
#include <arpa/inet.h>
|
2022-03-23 16:57:36 +01:00
|
|
|
#include "sysemu/sysemu.h"
|
2012-12-17 18:20:00 +01:00
|
|
|
#include "qemu/config-file.h"
|
|
|
|
#include "qemu/error-report.h"
|
2014-04-28 13:11:32 +02:00
|
|
|
#include "qemu/bitops.h"
|
|
|
|
#include "qemu/bitmap.h"
|
2022-12-21 14:35:49 +01:00
|
|
|
#include "block/block-io.h"
|
2012-12-17 18:19:44 +01:00
|
|
|
#include "block/block_int.h"
|
2018-06-14 21:14:28 +02:00
|
|
|
#include "block/qdict.h"
|
2017-08-22 09:23:55 +02:00
|
|
|
#include "scsi/constants.h"
|
2013-06-23 17:07:08 +02:00
|
|
|
#include "qemu/iov.h"
|
2019-05-23 16:35:07 +02:00
|
|
|
#include "qemu/module.h"
|
2018-02-01 12:18:46 +01:00
|
|
|
#include "qemu/option.h"
|
2016-09-21 06:27:14 +02:00
|
|
|
#include "qemu/uuid.h"
|
2019-09-17 13:58:19 +02:00
|
|
|
#include "sysemu/replay.h"
|
2018-02-01 12:18:31 +01:00
|
|
|
#include "qapi/error.h"
|
2020-09-13 21:53:45 +02:00
|
|
|
#include "qapi/qapi-commands-machine.h"
|
2018-02-01 12:18:39 +01:00
|
|
|
#include "qapi/qmp/qdict.h"
|
2015-03-17 18:29:20 +01:00
|
|
|
#include "qapi/qmp/qstring.h"
|
2016-01-21 15:19:21 +01:00
|
|
|
#include "crypto/secret.h"
|
2017-08-22 07:08:27 +02:00
|
|
|
#include "scsi/utils.h"
|
2018-07-10 08:31:16 +02:00
|
|
|
#include "trace.h"
|
2011-10-25 10:24:24 +02:00
|
|
|
|
2017-08-22 07:08:27 +02:00
|
|
|
/* Conflict between scsi/utils.h and libiscsi! :( */
|
|
|
|
#define SCSI_XFER_NONE ISCSI_XFER_NONE
|
2011-10-25 10:24:24 +02:00
|
|
|
#include <iscsi/iscsi.h>
|
2019-01-14 14:37:20 +01:00
|
|
|
#define inline __attribute__((gnu_inline)) /* required for libiscsi v1.9.0 */
|
2011-10-25 10:24:24 +02:00
|
|
|
#include <iscsi/scsi-lowlevel.h>
|
2019-01-14 14:37:20 +01:00
|
|
|
#undef inline
|
2017-08-22 07:08:27 +02:00
|
|
|
#undef SCSI_XFER_NONE
|
|
|
|
QEMU_BUILD_BUG_ON((int)SCSI_XFER_NONE != (int)ISCSI_XFER_NONE);
|
2011-10-25 10:24:24 +02:00
|
|
|
|
ISCSI: Add SCSI passthrough via scsi-generic to libiscsi
Update iscsi to allow passthrough of SG_IO scsi commands when the iscsi
device is forced to be scsi-generic.
Implement both bdrv_ioctl() and bdrv_aio_ioctl() in the iscsi backend,
emulate the SG_IO ioctl and pass the SCSI commands across to the
iscsi target.
This allows end-to-end passthrough of SCSI all the way from the guest,
to qemu, via scsi-generic, then libiscsi all the way to the iscsi target.
To activate this you need to specify that the iscsi lun should be treated
as a scsi-generic device.
Example:
-device lsi -device scsi-generic,drive=MyISCSI \
-drive file=iscsi://10.1.1.125/iqn.ronnie.test/1,if=none,id=MyISCSI
Note, you can currently not boot a qemu guest from a scsi device.
Note,
This only works when the host is linux, since the emulation relies on
definitions of SG_IO from the scsi-generic implementation in the
linux kernel.
It should be fairly easy to re-implement some structures similar enough
for non-linux hosts to do the same style of passthrough via a fake
scsi generic layer and libiscsi if need be.
Signed-off-by: Ronnie Sahlberg <ronniesahlberg@gmail.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2012-05-25 13:59:01 +02:00
|
|
|
#ifdef __linux__
|
|
|
|
#include <scsi/sg.h>
|
|
|
|
#endif
|
2011-10-25 10:24:24 +02:00
|
|
|
|
|
|
|
typedef struct IscsiLun {
|
|
|
|
struct iscsi_context *iscsi;
|
2014-05-08 16:34:42 +02:00
|
|
|
AioContext *aio_context;
|
2011-10-25 10:24:24 +02:00
|
|
|
int lun;
|
2012-05-25 13:59:01 +02:00
|
|
|
enum scsi_inquiry_peripheral_device_type type;
|
2011-10-25 10:24:24 +02:00
|
|
|
int block_size;
|
2012-05-26 09:41:13 +02:00
|
|
|
uint64_t num_blocks;
|
2012-05-22 11:56:36 +02:00
|
|
|
int events;
|
2012-12-06 10:46:47 +01:00
|
|
|
QEMUTimer *nop_timer;
|
2015-04-07 22:08:15 +02:00
|
|
|
QEMUTimer *event_timer;
|
2017-02-22 19:07:25 +01:00
|
|
|
QemuMutex mutex;
|
2013-07-19 09:19:39 +02:00
|
|
|
struct scsi_inquiry_logical_block_provisioning lbp;
|
|
|
|
struct scsi_inquiry_block_limits bl;
|
2018-06-01 11:26:44 +02:00
|
|
|
struct scsi_inquiry_device_designator *dd;
|
2013-10-24 12:07:02 +02:00
|
|
|
unsigned char *zeroblock;
|
2016-07-18 10:52:20 +02:00
|
|
|
/* The allocmap tracks which clusters (pages) on the iSCSI target are
|
|
|
|
* allocated and which are not. In case a target returns zeros for
|
|
|
|
* unallocated pages (iscsilun->lprz) we can directly return zeros instead
|
|
|
|
* of reading zeros over the wire if a read request falls within an
|
|
|
|
* unallocated block. As there are 3 possible states we need 2 bitmaps to
|
|
|
|
* track. allocmap_valid keeps track if QEMU's information about a page is
|
|
|
|
* valid. allocmap tracks if a page is allocated or not. In case QEMU has no
|
|
|
|
* valid information about a page the corresponding allocmap entry should be
|
|
|
|
* switched to unallocated as well to force a new lookup of the allocation
|
|
|
|
* status as lookups are generally skipped if a page is suspect to be
|
|
|
|
* allocated. If a iSCSI target is opened with cache.direct = on the
|
|
|
|
* allocmap_valid does not exist turning all cached information invalid so
|
|
|
|
* that a fresh lookup is made for any page even if allocmap entry returns
|
|
|
|
* it's unallocated. */
|
|
|
|
unsigned long *allocmap;
|
|
|
|
unsigned long *allocmap_valid;
|
|
|
|
long allocmap_size;
|
2018-02-13 21:26:46 +01:00
|
|
|
int cluster_size;
|
2014-06-04 15:47:39 +02:00
|
|
|
bool use_16_for_rw;
|
2015-02-25 05:40:08 +01:00
|
|
|
bool write_protected;
|
2015-04-16 16:08:26 +02:00
|
|
|
bool lbpme;
|
|
|
|
bool lbprz;
|
2015-04-16 16:08:28 +02:00
|
|
|
bool dpofua;
|
2015-04-16 16:08:26 +02:00
|
|
|
bool has_write_same;
|
2015-06-16 13:45:07 +02:00
|
|
|
bool request_timed_out;
|
2011-10-25 10:24:24 +02:00
|
|
|
} IscsiLun;
|
|
|
|
|
2013-07-19 09:19:40 +02:00
|
|
|
typedef struct IscsiTask {
|
|
|
|
int status;
|
|
|
|
int complete;
|
|
|
|
int retries;
|
|
|
|
int do_retry;
|
|
|
|
struct scsi_task *task;
|
|
|
|
Coroutine *co;
|
2014-05-08 16:34:42 +02:00
|
|
|
IscsiLun *iscsilun;
|
2014-05-30 23:36:47 +02:00
|
|
|
QEMUTimer retry_timer;
|
2015-11-05 06:00:09 +01:00
|
|
|
int err_code;
|
2017-12-08 12:51:08 +01:00
|
|
|
char *err_str;
|
2013-07-19 09:19:40 +02:00
|
|
|
} IscsiTask;
|
|
|
|
|
2011-10-25 10:24:24 +02:00
|
|
|
typedef struct IscsiAIOCB {
|
2014-10-07 13:59:14 +02:00
|
|
|
BlockAIOCB common;
|
2011-10-25 10:24:24 +02:00
|
|
|
QEMUBH *bh;
|
|
|
|
IscsiLun *iscsilun;
|
|
|
|
struct scsi_task *task;
|
|
|
|
int status;
|
2013-02-21 16:15:54 +01:00
|
|
|
int64_t sector_num;
|
|
|
|
int nb_sectors;
|
2015-11-09 11:16:49 +01:00
|
|
|
int ret;
|
ISCSI: Add SCSI passthrough via scsi-generic to libiscsi
Update iscsi to allow passthrough of SG_IO scsi commands when the iscsi
device is forced to be scsi-generic.
Implement both bdrv_ioctl() and bdrv_aio_ioctl() in the iscsi backend,
emulate the SG_IO ioctl and pass the SCSI commands across to the
iscsi target.
This allows end-to-end passthrough of SCSI all the way from the guest,
to qemu, via scsi-generic, then libiscsi all the way to the iscsi target.
To activate this you need to specify that the iscsi lun should be treated
as a scsi-generic device.
Example:
-device lsi -device scsi-generic,drive=MyISCSI \
-drive file=iscsi://10.1.1.125/iqn.ronnie.test/1,if=none,id=MyISCSI
Note, you can currently not boot a qemu guest from a scsi device.
Note,
This only works when the host is linux, since the emulation relies on
definitions of SG_IO from the scsi-generic implementation in the
linux kernel.
It should be fairly easy to re-implement some structures similar enough
for non-linux hosts to do the same style of passthrough via a fake
scsi generic layer and libiscsi if need be.
Signed-off-by: Ronnie Sahlberg <ronniesahlberg@gmail.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2012-05-25 13:59:01 +02:00
|
|
|
#ifdef __linux__
|
|
|
|
sg_io_hdr_t *ioh;
|
|
|
|
#endif
|
2018-02-03 07:16:21 +01:00
|
|
|
bool cancelled;
|
2011-10-25 10:24:24 +02:00
|
|
|
} IscsiAIOCB;
|
|
|
|
|
2015-06-16 13:45:07 +02:00
|
|
|
/* libiscsi uses time_t so its enough to process events every second */
|
|
|
|
#define EVENT_INTERVAL 1000
|
2012-12-06 10:46:47 +01:00
|
|
|
#define NOP_INTERVAL 5000
|
|
|
|
#define MAX_NOP_FAILURES 3
|
2014-05-30 23:36:47 +02:00
|
|
|
#define ISCSI_CMD_RETRIES ARRAY_SIZE(iscsi_retry_times)
|
2015-04-16 16:08:30 +02:00
|
|
|
static const unsigned iscsi_retry_times[] = {8, 32, 128, 512, 2048, 8192, 32768};
|
2012-12-06 10:46:47 +01:00
|
|
|
|
2014-06-13 20:42:57 +02:00
|
|
|
/* this threshold is a trade-off knob to choose between
|
2014-04-28 17:11:33 +02:00
|
|
|
* the potential additional overhead of an extra GET_LBA_STATUS request
|
|
|
|
* vs. unnecessarily reading a lot of zero sectors over the wire.
|
|
|
|
* If a read request is greater or equal than ISCSI_CHECKALLOC_THRES
|
|
|
|
* sectors we check the allocation status of the area covered by the
|
|
|
|
* request first if the allocationmap indicates that the area might be
|
|
|
|
* unallocated. */
|
|
|
|
#define ISCSI_CHECKALLOC_THRES 64
|
2012-12-06 10:46:47 +01:00
|
|
|
|
2019-02-20 01:05:53 +01:00
|
|
|
#ifdef __linux__
|
|
|
|
|
2012-08-18 23:37:31 +02:00
|
|
|
static void
|
2012-08-18 23:38:03 +02:00
|
|
|
iscsi_bh_cb(void *p)
|
2012-08-18 23:37:31 +02:00
|
|
|
{
|
|
|
|
IscsiAIOCB *acb = p;
|
|
|
|
|
|
|
|
qemu_bh_delete(acb->bh);
|
|
|
|
|
2014-09-11 07:41:15 +02:00
|
|
|
acb->common.cb(acb->common.opaque, acb->status);
|
2012-08-18 23:37:31 +02:00
|
|
|
|
2012-08-18 23:35:49 +02:00
|
|
|
if (acb->task != NULL) {
|
|
|
|
scsi_free_scsi_task(acb->task);
|
|
|
|
acb->task = NULL;
|
|
|
|
}
|
|
|
|
|
2014-09-11 07:41:28 +02:00
|
|
|
qemu_aio_unref(acb);
|
2012-08-18 23:37:31 +02:00
|
|
|
}
|
|
|
|
|
2012-08-18 23:38:03 +02:00
|
|
|
static void
|
|
|
|
iscsi_schedule_bh(IscsiAIOCB *acb)
|
2012-08-18 23:37:31 +02:00
|
|
|
{
|
2012-08-18 23:35:49 +02:00
|
|
|
if (acb->bh) {
|
|
|
|
return;
|
|
|
|
}
|
2014-05-08 16:34:42 +02:00
|
|
|
acb->bh = aio_bh_new(acb->iscsilun->aio_context, iscsi_bh_cb, acb);
|
2012-08-18 23:37:31 +02:00
|
|
|
qemu_bh_schedule(acb->bh);
|
|
|
|
}
|
|
|
|
|
2019-02-20 01:05:53 +01:00
|
|
|
#endif
|
|
|
|
|
2013-12-14 17:31:40 +01:00
|
|
|
static void iscsi_co_generic_bh_cb(void *opaque)
|
|
|
|
{
|
|
|
|
struct IscsiTask *iTask = opaque;
|
2017-02-13 14:52:31 +01:00
|
|
|
|
2014-06-10 09:52:16 +02:00
|
|
|
iTask->complete = 1;
|
2017-02-13 14:52:31 +01:00
|
|
|
aio_co_wake(iTask->co);
|
2013-12-14 17:31:40 +01:00
|
|
|
}
|
|
|
|
|
2014-05-30 23:36:47 +02:00
|
|
|
static void iscsi_retry_timer_expired(void *opaque)
|
|
|
|
{
|
|
|
|
struct IscsiTask *iTask = opaque;
|
2014-06-10 09:52:16 +02:00
|
|
|
iTask->complete = 1;
|
2014-05-30 23:36:47 +02:00
|
|
|
if (iTask->co) {
|
2017-02-13 14:52:29 +01:00
|
|
|
aio_co_wake(iTask->co);
|
2014-05-30 23:36:47 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline unsigned exp_random(double mean)
|
|
|
|
{
|
|
|
|
return -mean * log((double)rand() / RAND_MAX);
|
|
|
|
}
|
|
|
|
|
2015-11-05 06:00:09 +01:00
|
|
|
/* SCSI_SENSE_ASCQ_INVALID_FIELD_IN_PARAMETER_LIST was introduced in
|
|
|
|
* libiscsi 1.10.0, together with other constants we need. Use it as
|
|
|
|
* a hint that we have to define them ourselves if needed, to keep the
|
|
|
|
* minimum required libiscsi version at 1.9.0. We use an ASCQ macro for
|
|
|
|
* the test because SCSI_STATUS_* is an enum.
|
|
|
|
*
|
|
|
|
* To guard against future changes where SCSI_SENSE_ASCQ_* also becomes
|
|
|
|
* an enum, check against the LIBISCSI_API_VERSION macro, which was
|
|
|
|
* introduced in 1.11.0. If it is present, there is no need to define
|
|
|
|
* anything.
|
|
|
|
*/
|
|
|
|
#if !defined(SCSI_SENSE_ASCQ_INVALID_FIELD_IN_PARAMETER_LIST) && \
|
|
|
|
!defined(LIBISCSI_API_VERSION)
|
|
|
|
#define SCSI_STATUS_TASK_SET_FULL 0x28
|
|
|
|
#define SCSI_STATUS_TIMEOUT 0x0f000002
|
|
|
|
#define SCSI_SENSE_ASCQ_INVALID_FIELD_IN_PARAMETER_LIST 0x2600
|
|
|
|
#define SCSI_SENSE_ASCQ_PARAMETER_LIST_LENGTH_ERROR 0x1a00
|
2015-06-26 12:18:01 +02:00
|
|
|
#endif
|
|
|
|
|
2016-10-09 10:14:55 +02:00
|
|
|
#ifndef LIBISCSI_API_VERSION
|
|
|
|
#define LIBISCSI_API_VERSION 20130701
|
|
|
|
#endif
|
|
|
|
|
2015-11-05 06:00:09 +01:00
|
|
|
static int iscsi_translate_sense(struct scsi_sense *sense)
|
|
|
|
{
|
2019-07-02 11:40:41 +02:00
|
|
|
return scsi_sense_to_errno(sense->key,
|
|
|
|
(sense->ascq & 0xFF00) >> 8,
|
|
|
|
sense->ascq & 0xFF);
|
2015-11-05 06:00:09 +01:00
|
|
|
}
|
|
|
|
|
2017-02-22 19:07:25 +01:00
|
|
|
/* Called (via iscsi_service) with QemuMutex held. */
|
2013-07-19 09:19:40 +02:00
|
|
|
static void
|
|
|
|
iscsi_co_generic_cb(struct iscsi_context *iscsi, int status,
|
|
|
|
void *command_data, void *opaque)
|
|
|
|
{
|
|
|
|
struct IscsiTask *iTask = opaque;
|
|
|
|
struct scsi_task *task = command_data;
|
|
|
|
|
|
|
|
iTask->status = status;
|
|
|
|
iTask->do_retry = 0;
|
2020-07-01 12:54:44 +02:00
|
|
|
iTask->err_code = 0;
|
2013-07-19 09:19:40 +02:00
|
|
|
iTask->task = task;
|
|
|
|
|
|
|
|
if (status != SCSI_STATUS_GOOD) {
|
2020-07-01 12:54:44 +02:00
|
|
|
iTask->err_code = -EIO;
|
2014-05-30 23:36:47 +02:00
|
|
|
if (iTask->retries++ < ISCSI_CMD_RETRIES) {
|
2015-06-26 12:18:01 +02:00
|
|
|
if (status == SCSI_STATUS_BUSY ||
|
2015-11-05 06:00:09 +01:00
|
|
|
status == SCSI_STATUS_TIMEOUT ||
|
|
|
|
status == SCSI_STATUS_TASK_SET_FULL) {
|
2014-05-30 23:36:47 +02:00
|
|
|
unsigned retry_time =
|
|
|
|
exp_random(iscsi_retry_times[iTask->retries - 1]);
|
2015-11-05 06:00:09 +01:00
|
|
|
if (status == SCSI_STATUS_TIMEOUT) {
|
2015-06-16 13:45:07 +02:00
|
|
|
/* make sure the request is rescheduled AFTER the
|
|
|
|
* reconnect is initiated */
|
|
|
|
retry_time = EVENT_INTERVAL * 2;
|
|
|
|
iTask->iscsilun->request_timed_out = true;
|
|
|
|
}
|
|
|
|
error_report("iSCSI Busy/TaskSetFull/TimeOut"
|
|
|
|
" (retry #%u in %u ms): %s",
|
2014-05-30 23:36:47 +02:00
|
|
|
iTask->retries, retry_time,
|
|
|
|
iscsi_get_error(iscsi));
|
|
|
|
aio_timer_init(iTask->iscsilun->aio_context,
|
|
|
|
&iTask->retry_timer, QEMU_CLOCK_REALTIME,
|
|
|
|
SCALE_MS, iscsi_retry_timer_expired, iTask);
|
|
|
|
timer_mod(&iTask->retry_timer,
|
|
|
|
qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + retry_time);
|
|
|
|
iTask->do_retry = 1;
|
2023-01-10 17:36:33 +01:00
|
|
|
return;
|
2020-07-01 12:54:43 +02:00
|
|
|
} else if (status == SCSI_STATUS_CHECK_CONDITION) {
|
|
|
|
int error = iscsi_translate_sense(&task->sense);
|
|
|
|
if (error == EAGAIN) {
|
|
|
|
error_report("iSCSI CheckCondition: %s",
|
|
|
|
iscsi_get_error(iscsi));
|
|
|
|
iTask->do_retry = 1;
|
|
|
|
} else {
|
|
|
|
iTask->err_code = -error;
|
|
|
|
iTask->err_str = g_strdup(iscsi_get_error(iscsi));
|
|
|
|
}
|
2014-05-30 23:36:47 +02:00
|
|
|
}
|
|
|
|
}
|
2013-07-19 09:19:40 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
if (iTask->co) {
|
2019-09-17 13:58:19 +02:00
|
|
|
replay_bh_schedule_oneshot_event(iTask->iscsilun->aio_context,
|
|
|
|
iscsi_co_generic_bh_cb, iTask);
|
2014-06-10 09:52:16 +02:00
|
|
|
} else {
|
|
|
|
iTask->complete = 1;
|
2013-07-19 09:19:40 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-09-22 10:49:07 +02:00
|
|
|
static void coroutine_fn
|
|
|
|
iscsi_co_init_iscsitask(IscsiLun *iscsilun, struct IscsiTask *iTask)
|
2013-07-19 09:19:40 +02:00
|
|
|
{
|
|
|
|
*iTask = (struct IscsiTask) {
|
2014-05-30 23:36:47 +02:00
|
|
|
.co = qemu_coroutine_self(),
|
|
|
|
.iscsilun = iscsilun,
|
2013-07-19 09:19:40 +02:00
|
|
|
};
|
|
|
|
}
|
2012-08-18 23:37:31 +02:00
|
|
|
|
2019-02-20 01:05:53 +01:00
|
|
|
#ifdef __linux__
|
|
|
|
|
2018-02-03 07:16:21 +01:00
|
|
|
/* Called (via iscsi_service) with QemuMutex held. */
|
2011-10-25 10:24:24 +02:00
|
|
|
static void
|
|
|
|
iscsi_abort_task_cb(struct iscsi_context *iscsi, int status, void *command_data,
|
|
|
|
void *private_data)
|
|
|
|
{
|
2012-08-18 23:35:49 +02:00
|
|
|
IscsiAIOCB *acb = private_data;
|
|
|
|
|
2018-02-15 12:15:26 +01:00
|
|
|
/* If the command callback hasn't been called yet, drop the task */
|
|
|
|
if (!acb->bh) {
|
|
|
|
/* Call iscsi_aio_ioctl_cb() with SCSI_STATUS_CANCELLED */
|
|
|
|
iscsi_scsi_cancel_task(iscsi, acb->task);
|
|
|
|
}
|
|
|
|
|
2018-02-03 07:16:21 +01:00
|
|
|
qemu_aio_unref(acb); /* acquired in iscsi_aio_cancel() */
|
2011-10-25 10:24:24 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
2014-10-07 13:59:14 +02:00
|
|
|
iscsi_aio_cancel(BlockAIOCB *blockacb)
|
2011-10-25 10:24:24 +02:00
|
|
|
{
|
|
|
|
IscsiAIOCB *acb = (IscsiAIOCB *)blockacb;
|
|
|
|
IscsiLun *iscsilun = acb->iscsilun;
|
|
|
|
|
2020-12-03 08:50:55 +01:00
|
|
|
WITH_QEMU_LOCK_GUARD(&iscsilun->mutex) {
|
2018-02-03 07:16:21 +01:00
|
|
|
|
2020-12-03 08:50:55 +01:00
|
|
|
/* If it was cancelled or completed already, our work is done here */
|
|
|
|
if (acb->cancelled || acb->status != -EINPROGRESS) {
|
|
|
|
return;
|
|
|
|
}
|
2012-08-18 23:35:49 +02:00
|
|
|
|
2020-12-03 08:50:55 +01:00
|
|
|
acb->cancelled = true;
|
2018-02-03 07:16:21 +01:00
|
|
|
|
2020-12-03 08:50:55 +01:00
|
|
|
qemu_aio_ref(acb); /* released in iscsi_abort_task_cb() */
|
2018-02-03 07:16:21 +01:00
|
|
|
|
2020-12-03 08:50:55 +01:00
|
|
|
/* send a task mgmt call to the target to cancel the task on the target */
|
|
|
|
if (iscsi_task_mgmt_abort_task_async(iscsilun->iscsi, acb->task,
|
|
|
|
iscsi_abort_task_cb, acb) < 0) {
|
|
|
|
qemu_aio_unref(acb); /* since iscsi_abort_task_cb() won't be called */
|
|
|
|
}
|
2018-02-03 07:16:21 +01:00
|
|
|
}
|
2011-10-25 10:24:24 +02:00
|
|
|
}
|
|
|
|
|
2012-10-31 16:34:37 +01:00
|
|
|
static const AIOCBInfo iscsi_aiocb_info = {
|
2011-10-25 10:24:24 +02:00
|
|
|
.aiocb_size = sizeof(IscsiAIOCB),
|
2014-09-11 07:41:15 +02:00
|
|
|
.cancel_async = iscsi_aio_cancel,
|
2011-10-25 10:24:24 +02:00
|
|
|
};
|
|
|
|
|
2019-02-20 01:05:53 +01:00
|
|
|
#endif
|
2011-10-25 10:24:24 +02:00
|
|
|
|
|
|
|
static void iscsi_process_read(void *arg);
|
|
|
|
static void iscsi_process_write(void *arg);
|
|
|
|
|
2017-02-22 19:07:25 +01:00
|
|
|
/* Called with QemuMutex held. */
|
2011-10-25 10:24:24 +02:00
|
|
|
static void
|
|
|
|
iscsi_set_events(IscsiLun *iscsilun)
|
|
|
|
{
|
|
|
|
struct iscsi_context *iscsi = iscsilun->iscsi;
|
2015-04-07 22:08:15 +02:00
|
|
|
int ev = iscsi_which_events(iscsi);
|
2012-05-22 11:56:36 +02:00
|
|
|
|
|
|
|
if (ev != iscsilun->events) {
|
2015-10-23 05:08:05 +02:00
|
|
|
aio_set_fd_handler(iscsilun->aio_context, iscsi_get_fd(iscsi),
|
|
|
|
false,
|
2015-04-07 22:08:15 +02:00
|
|
|
(ev & POLLIN) ? iscsi_process_read : NULL,
|
2014-05-08 16:34:42 +02:00
|
|
|
(ev & POLLOUT) ? iscsi_process_write : NULL,
|
aio-posix: split poll check from ready handler
Adaptive polling measures the execution time of the polling check plus
handlers called when a polled event becomes ready. Handlers can take a
significant amount of time, making it look like polling was running for
a long time when in fact the event handler was running for a long time.
For example, on Linux the io_submit(2) syscall invoked when a virtio-blk
device's virtqueue becomes ready can take 10s of microseconds. This
can exceed the default polling interval (32 microseconds) and cause
adaptive polling to stop polling.
By excluding the handler's execution time from the polling check we make
the adaptive polling calculation more accurate. As a result, the event
loop now stays in polling mode where previously it would have fallen
back to file descriptor monitoring.
The following data was collected with virtio-blk num-queues=2
event_idx=off using an IOThread. Before:
168k IOPS, IOThread syscalls:
9837.115 ( 0.020 ms): IO iothread1/620155 io_submit(ctx_id: 140512552468480, nr: 16, iocbpp: 0x7fcb9f937db0) = 16
9837.158 ( 0.002 ms): IO iothread1/620155 write(fd: 103, buf: 0x556a2ef71b88, count: 8) = 8
9837.161 ( 0.001 ms): IO iothread1/620155 write(fd: 104, buf: 0x556a2ef71b88, count: 8) = 8
9837.163 ( 0.001 ms): IO iothread1/620155 ppoll(ufds: 0x7fcb90002800, nfds: 4, tsp: 0x7fcb9f1342d0, sigsetsize: 8) = 3
9837.164 ( 0.001 ms): IO iothread1/620155 read(fd: 107, buf: 0x7fcb9f939cc0, count: 512) = 8
9837.174 ( 0.001 ms): IO iothread1/620155 read(fd: 105, buf: 0x7fcb9f939cc0, count: 512) = 8
9837.176 ( 0.001 ms): IO iothread1/620155 read(fd: 106, buf: 0x7fcb9f939cc0, count: 512) = 8
9837.209 ( 0.035 ms): IO iothread1/620155 io_submit(ctx_id: 140512552468480, nr: 32, iocbpp: 0x7fca7d0cebe0) = 32
174k IOPS (+3.6%), IOThread syscalls:
9809.566 ( 0.036 ms): IO iothread1/623061 io_submit(ctx_id: 140539805028352, nr: 32, iocbpp: 0x7fd0cdd62be0) = 32
9809.625 ( 0.001 ms): IO iothread1/623061 write(fd: 103, buf: 0x5647cfba5f58, count: 8) = 8
9809.627 ( 0.002 ms): IO iothread1/623061 write(fd: 104, buf: 0x5647cfba5f58, count: 8) = 8
9809.663 ( 0.036 ms): IO iothread1/623061 io_submit(ctx_id: 140539805028352, nr: 32, iocbpp: 0x7fd0d0388b50) = 32
Notice that ppoll(2) and eventfd read(2) syscalls are eliminated because
the IOThread stays in polling mode instead of falling back to file
descriptor monitoring.
As usual, polling is not implemented on Windows so this patch ignores
the new io_poll_read() callback in aio-win32.c.
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Stefano Garzarella <sgarzare@redhat.com>
Message-id: 20211207132336.36627-2-stefanha@redhat.com
[Fixed up aio_set_event_notifier() calls in
tests/unit/test-fdmon-epoll.c added after this series was queued.
--Stefan]
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2021-12-07 14:23:31 +01:00
|
|
|
NULL, NULL,
|
2014-05-08 16:34:42 +02:00
|
|
|
iscsilun);
|
2015-04-07 22:08:15 +02:00
|
|
|
iscsilun->events = ev;
|
|
|
|
}
|
|
|
|
}
|
2012-05-22 11:56:36 +02:00
|
|
|
|
2015-06-16 13:45:07 +02:00
|
|
|
static void iscsi_timed_check_events(void *opaque)
|
2015-04-07 22:08:15 +02:00
|
|
|
{
|
|
|
|
IscsiLun *iscsilun = opaque;
|
2015-06-16 13:45:07 +02:00
|
|
|
|
2020-12-03 08:50:55 +01:00
|
|
|
WITH_QEMU_LOCK_GUARD(&iscsilun->mutex) {
|
|
|
|
/* check for timed out requests */
|
|
|
|
iscsi_service(iscsilun->iscsi, 0);
|
2018-02-03 07:16:20 +01:00
|
|
|
|
2020-12-03 08:50:55 +01:00
|
|
|
if (iscsilun->request_timed_out) {
|
|
|
|
iscsilun->request_timed_out = false;
|
|
|
|
iscsi_reconnect(iscsilun->iscsi);
|
|
|
|
}
|
2015-06-16 13:45:07 +02:00
|
|
|
|
2020-12-03 08:50:55 +01:00
|
|
|
/*
|
|
|
|
* newer versions of libiscsi may return zero events. Ensure we are
|
|
|
|
* able to return to service once this situation changes.
|
|
|
|
*/
|
|
|
|
iscsi_set_events(iscsilun);
|
2015-06-16 13:45:07 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
timer_mod(iscsilun->event_timer,
|
|
|
|
qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + EVENT_INTERVAL);
|
2011-10-25 10:24:24 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
iscsi_process_read(void *arg)
|
|
|
|
{
|
|
|
|
IscsiLun *iscsilun = arg;
|
|
|
|
struct iscsi_context *iscsi = iscsilun->iscsi;
|
|
|
|
|
2017-02-22 19:07:25 +01:00
|
|
|
qemu_mutex_lock(&iscsilun->mutex);
|
2011-10-25 10:24:24 +02:00
|
|
|
iscsi_service(iscsi, POLLIN);
|
|
|
|
iscsi_set_events(iscsilun);
|
2017-02-22 19:07:25 +01:00
|
|
|
qemu_mutex_unlock(&iscsilun->mutex);
|
2011-10-25 10:24:24 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
iscsi_process_write(void *arg)
|
|
|
|
{
|
|
|
|
IscsiLun *iscsilun = arg;
|
|
|
|
struct iscsi_context *iscsi = iscsilun->iscsi;
|
|
|
|
|
2017-02-22 19:07:25 +01:00
|
|
|
qemu_mutex_lock(&iscsilun->mutex);
|
2011-10-25 10:24:24 +02:00
|
|
|
iscsi_service(iscsi, POLLOUT);
|
|
|
|
iscsi_set_events(iscsilun);
|
2017-02-22 19:07:25 +01:00
|
|
|
qemu_mutex_unlock(&iscsilun->mutex);
|
2011-10-25 10:24:24 +02:00
|
|
|
}
|
|
|
|
|
2013-07-11 14:16:25 +02:00
|
|
|
static int64_t sector_lun2qemu(int64_t sector, IscsiLun *iscsilun)
|
|
|
|
{
|
|
|
|
return sector * iscsilun->block_size / BDRV_SECTOR_SIZE;
|
|
|
|
}
|
|
|
|
|
2011-10-25 10:24:24 +02:00
|
|
|
static int64_t sector_qemu2lun(int64_t sector, IscsiLun *iscsilun)
|
|
|
|
{
|
|
|
|
return sector * BDRV_SECTOR_SIZE / iscsilun->block_size;
|
|
|
|
}
|
|
|
|
|
block: use int64_t instead of int in driver write_zeroes handlers
We are generally moving to int64_t for both offset and bytes parameters
on all io paths.
Main motivation is realization of 64-bit write_zeroes operation for
fast zeroing large disk chunks, up to the whole disk.
We chose signed type, to be consistent with off_t (which is signed) and
with possibility for signed return type (where negative value means
error).
So, convert driver write_zeroes handlers bytes parameter to int64_t.
The only caller of all updated function is bdrv_co_do_pwrite_zeroes().
bdrv_co_do_pwrite_zeroes() itself is of course OK with widening of
callee parameter type. Also, bdrv_co_do_pwrite_zeroes()'s
max_write_zeroes is limited to INT_MAX. So, updated functions all are
safe, they will not get "bytes" larger than before.
Still, let's look through all updated functions, and add assertions to
the ones which are actually unprepared to values larger than INT_MAX.
For these drivers also set explicit max_pwrite_zeroes limit.
Let's go:
blkdebug: calculations can't overflow, thanks to
bdrv_check_qiov_request() in generic layer. rule_check() and
bdrv_co_pwrite_zeroes() both have 64bit argument.
blklogwrites: pass to blk_log_writes_co_log() with 64bit argument.
blkreplay, copy-on-read, filter-compress: pass to
bdrv_co_pwrite_zeroes() which is OK
copy-before-write: Calls cbw_do_copy_before_write() and
bdrv_co_pwrite_zeroes, both have 64bit argument.
file-posix: both handler calls raw_do_pwrite_zeroes, which is updated.
In raw_do_pwrite_zeroes() calculations are OK due to
bdrv_check_qiov_request(), bytes go to RawPosixAIOData::aio_nbytes
which is uint64_t.
Check also where that uint64_t gets handed:
handle_aiocb_write_zeroes_block() passes a uint64_t[2] to
ioctl(BLKZEROOUT), handle_aiocb_write_zeroes() calls do_fallocate()
which takes off_t (and we compile to always have 64-bit off_t), as
does handle_aiocb_write_zeroes_unmap. All look safe.
gluster: bytes go to GlusterAIOCB::size which is int64_t and to
glfs_zerofill_async works with off_t.
iscsi: Aha, here we deal with iscsi_writesame16_task() that has
uint32_t num_blocks argument and iscsi_writesame16_task() has
uint16_t argument. Make comments, add assertions and clarify
max_pwrite_zeroes calculation.
iscsi_allocmap_() functions already has int64_t argument
is_byte_request_lun_aligned is simple to update, do it.
mirror_top: pass to bdrv_mirror_top_do_write which has uint64_t
argument
nbd: Aha, here we have protocol limitation, and NBDRequest::len is
uint32_t. max_pwrite_zeroes is cleanly set to 32bit value, so we are
OK for now.
nvme: Again, protocol limitation. And no inherent limit for
write-zeroes at all. But from code that calculates cdw12 it's obvious
that we do have limit and alignment. Let's clarify it. Also,
obviously the code is not prepared to handle bytes=0. Let's handle
this case too.
trace events already 64bit
preallocate: pass to handle_write() and bdrv_co_pwrite_zeroes(), both
64bit.
rbd: pass to qemu_rbd_start_co() which is 64bit.
qcow2: offset + bytes and alignment still works good (thanks to
bdrv_check_qiov_request()), so tail calculation is OK
qcow2_subcluster_zeroize() has 64bit argument, should be OK
trace events updated
qed: qed_co_request wants int nb_sectors. Also in code we have size_t
used for request length which may be 32bit. So, let's just keep
INT_MAX as a limit (aligning it down to pwrite_zeroes_alignment) and
don't care.
raw-format: Is OK. raw_adjust_offset and bdrv_co_pwrite_zeroes are both
64bit.
throttle: Both throttle_group_co_io_limits_intercept() and
bdrv_co_pwrite_zeroes() are 64bit.
vmdk: pass to vmdk_pwritev which is 64bit
quorum: pass to quorum_co_pwritev() which is 64bit
Hooray!
At this point all block drivers are prepared to support 64bit
write-zero requests, or have explicitly set max_pwrite_zeroes.
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Message-Id: <20210903102807.27127-8-vsementsov@virtuozzo.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
[eblake: use <= rather than < in assertions relying on max_pwrite_zeroes]
Signed-off-by: Eric Blake <eblake@redhat.com>
2021-09-03 12:28:03 +02:00
|
|
|
static bool is_byte_request_lun_aligned(int64_t offset, int64_t bytes,
|
2016-06-01 23:10:05 +02:00
|
|
|
IscsiLun *iscsilun)
|
2013-07-11 14:16:27 +02:00
|
|
|
{
|
block: use int64_t instead of int in driver write_zeroes handlers
We are generally moving to int64_t for both offset and bytes parameters
on all io paths.
Main motivation is realization of 64-bit write_zeroes operation for
fast zeroing large disk chunks, up to the whole disk.
We chose signed type, to be consistent with off_t (which is signed) and
with possibility for signed return type (where negative value means
error).
So, convert driver write_zeroes handlers bytes parameter to int64_t.
The only caller of all updated function is bdrv_co_do_pwrite_zeroes().
bdrv_co_do_pwrite_zeroes() itself is of course OK with widening of
callee parameter type. Also, bdrv_co_do_pwrite_zeroes()'s
max_write_zeroes is limited to INT_MAX. So, updated functions all are
safe, they will not get "bytes" larger than before.
Still, let's look through all updated functions, and add assertions to
the ones which are actually unprepared to values larger than INT_MAX.
For these drivers also set explicit max_pwrite_zeroes limit.
Let's go:
blkdebug: calculations can't overflow, thanks to
bdrv_check_qiov_request() in generic layer. rule_check() and
bdrv_co_pwrite_zeroes() both have 64bit argument.
blklogwrites: pass to blk_log_writes_co_log() with 64bit argument.
blkreplay, copy-on-read, filter-compress: pass to
bdrv_co_pwrite_zeroes() which is OK
copy-before-write: Calls cbw_do_copy_before_write() and
bdrv_co_pwrite_zeroes, both have 64bit argument.
file-posix: both handler calls raw_do_pwrite_zeroes, which is updated.
In raw_do_pwrite_zeroes() calculations are OK due to
bdrv_check_qiov_request(), bytes go to RawPosixAIOData::aio_nbytes
which is uint64_t.
Check also where that uint64_t gets handed:
handle_aiocb_write_zeroes_block() passes a uint64_t[2] to
ioctl(BLKZEROOUT), handle_aiocb_write_zeroes() calls do_fallocate()
which takes off_t (and we compile to always have 64-bit off_t), as
does handle_aiocb_write_zeroes_unmap. All look safe.
gluster: bytes go to GlusterAIOCB::size which is int64_t and to
glfs_zerofill_async works with off_t.
iscsi: Aha, here we deal with iscsi_writesame16_task() that has
uint32_t num_blocks argument and iscsi_writesame16_task() has
uint16_t argument. Make comments, add assertions and clarify
max_pwrite_zeroes calculation.
iscsi_allocmap_() functions already has int64_t argument
is_byte_request_lun_aligned is simple to update, do it.
mirror_top: pass to bdrv_mirror_top_do_write which has uint64_t
argument
nbd: Aha, here we have protocol limitation, and NBDRequest::len is
uint32_t. max_pwrite_zeroes is cleanly set to 32bit value, so we are
OK for now.
nvme: Again, protocol limitation. And no inherent limit for
write-zeroes at all. But from code that calculates cdw12 it's obvious
that we do have limit and alignment. Let's clarify it. Also,
obviously the code is not prepared to handle bytes=0. Let's handle
this case too.
trace events already 64bit
preallocate: pass to handle_write() and bdrv_co_pwrite_zeroes(), both
64bit.
rbd: pass to qemu_rbd_start_co() which is 64bit.
qcow2: offset + bytes and alignment still works good (thanks to
bdrv_check_qiov_request()), so tail calculation is OK
qcow2_subcluster_zeroize() has 64bit argument, should be OK
trace events updated
qed: qed_co_request wants int nb_sectors. Also in code we have size_t
used for request length which may be 32bit. So, let's just keep
INT_MAX as a limit (aligning it down to pwrite_zeroes_alignment) and
don't care.
raw-format: Is OK. raw_adjust_offset and bdrv_co_pwrite_zeroes are both
64bit.
throttle: Both throttle_group_co_io_limits_intercept() and
bdrv_co_pwrite_zeroes() are 64bit.
vmdk: pass to vmdk_pwritev which is 64bit
quorum: pass to quorum_co_pwritev() which is 64bit
Hooray!
At this point all block drivers are prepared to support 64bit
write-zero requests, or have explicitly set max_pwrite_zeroes.
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Message-Id: <20210903102807.27127-8-vsementsov@virtuozzo.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
[eblake: use <= rather than < in assertions relying on max_pwrite_zeroes]
Signed-off-by: Eric Blake <eblake@redhat.com>
2021-09-03 12:28:03 +02:00
|
|
|
if (offset % iscsilun->block_size || bytes % iscsilun->block_size) {
|
2016-06-01 23:10:05 +02:00
|
|
|
error_report("iSCSI misaligned request: "
|
|
|
|
"iscsilun->block_size %u, offset %" PRIi64
|
block: use int64_t instead of int in driver write_zeroes handlers
We are generally moving to int64_t for both offset and bytes parameters
on all io paths.
Main motivation is realization of 64-bit write_zeroes operation for
fast zeroing large disk chunks, up to the whole disk.
We chose signed type, to be consistent with off_t (which is signed) and
with possibility for signed return type (where negative value means
error).
So, convert driver write_zeroes handlers bytes parameter to int64_t.
The only caller of all updated function is bdrv_co_do_pwrite_zeroes().
bdrv_co_do_pwrite_zeroes() itself is of course OK with widening of
callee parameter type. Also, bdrv_co_do_pwrite_zeroes()'s
max_write_zeroes is limited to INT_MAX. So, updated functions all are
safe, they will not get "bytes" larger than before.
Still, let's look through all updated functions, and add assertions to
the ones which are actually unprepared to values larger than INT_MAX.
For these drivers also set explicit max_pwrite_zeroes limit.
Let's go:
blkdebug: calculations can't overflow, thanks to
bdrv_check_qiov_request() in generic layer. rule_check() and
bdrv_co_pwrite_zeroes() both have 64bit argument.
blklogwrites: pass to blk_log_writes_co_log() with 64bit argument.
blkreplay, copy-on-read, filter-compress: pass to
bdrv_co_pwrite_zeroes() which is OK
copy-before-write: Calls cbw_do_copy_before_write() and
bdrv_co_pwrite_zeroes, both have 64bit argument.
file-posix: both handler calls raw_do_pwrite_zeroes, which is updated.
In raw_do_pwrite_zeroes() calculations are OK due to
bdrv_check_qiov_request(), bytes go to RawPosixAIOData::aio_nbytes
which is uint64_t.
Check also where that uint64_t gets handed:
handle_aiocb_write_zeroes_block() passes a uint64_t[2] to
ioctl(BLKZEROOUT), handle_aiocb_write_zeroes() calls do_fallocate()
which takes off_t (and we compile to always have 64-bit off_t), as
does handle_aiocb_write_zeroes_unmap. All look safe.
gluster: bytes go to GlusterAIOCB::size which is int64_t and to
glfs_zerofill_async works with off_t.
iscsi: Aha, here we deal with iscsi_writesame16_task() that has
uint32_t num_blocks argument and iscsi_writesame16_task() has
uint16_t argument. Make comments, add assertions and clarify
max_pwrite_zeroes calculation.
iscsi_allocmap_() functions already has int64_t argument
is_byte_request_lun_aligned is simple to update, do it.
mirror_top: pass to bdrv_mirror_top_do_write which has uint64_t
argument
nbd: Aha, here we have protocol limitation, and NBDRequest::len is
uint32_t. max_pwrite_zeroes is cleanly set to 32bit value, so we are
OK for now.
nvme: Again, protocol limitation. And no inherent limit for
write-zeroes at all. But from code that calculates cdw12 it's obvious
that we do have limit and alignment. Let's clarify it. Also,
obviously the code is not prepared to handle bytes=0. Let's handle
this case too.
trace events already 64bit
preallocate: pass to handle_write() and bdrv_co_pwrite_zeroes(), both
64bit.
rbd: pass to qemu_rbd_start_co() which is 64bit.
qcow2: offset + bytes and alignment still works good (thanks to
bdrv_check_qiov_request()), so tail calculation is OK
qcow2_subcluster_zeroize() has 64bit argument, should be OK
trace events updated
qed: qed_co_request wants int nb_sectors. Also in code we have size_t
used for request length which may be 32bit. So, let's just keep
INT_MAX as a limit (aligning it down to pwrite_zeroes_alignment) and
don't care.
raw-format: Is OK. raw_adjust_offset and bdrv_co_pwrite_zeroes are both
64bit.
throttle: Both throttle_group_co_io_limits_intercept() and
bdrv_co_pwrite_zeroes() are 64bit.
vmdk: pass to vmdk_pwritev which is 64bit
quorum: pass to quorum_co_pwritev() which is 64bit
Hooray!
At this point all block drivers are prepared to support 64bit
write-zero requests, or have explicitly set max_pwrite_zeroes.
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Message-Id: <20210903102807.27127-8-vsementsov@virtuozzo.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
[eblake: use <= rather than < in assertions relying on max_pwrite_zeroes]
Signed-off-by: Eric Blake <eblake@redhat.com>
2021-09-03 12:28:03 +02:00
|
|
|
", bytes %" PRIi64,
|
|
|
|
iscsilun->block_size, offset, bytes);
|
2016-06-01 23:10:05 +02:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
static bool is_sector_request_lun_aligned(int64_t sector_num, int nb_sectors,
|
|
|
|
IscsiLun *iscsilun)
|
|
|
|
{
|
2016-06-20 11:24:40 +02:00
|
|
|
assert(nb_sectors <= BDRV_REQUEST_MAX_SECTORS);
|
2016-06-01 23:10:05 +02:00
|
|
|
return is_byte_request_lun_aligned(sector_num << BDRV_SECTOR_BITS,
|
|
|
|
nb_sectors << BDRV_SECTOR_BITS,
|
|
|
|
iscsilun);
|
2013-07-11 14:16:27 +02:00
|
|
|
}
|
|
|
|
|
2016-07-18 10:52:20 +02:00
|
|
|
static void iscsi_allocmap_free(IscsiLun *iscsilun)
|
2014-09-30 09:09:12 +02:00
|
|
|
{
|
2016-07-18 10:52:20 +02:00
|
|
|
g_free(iscsilun->allocmap);
|
|
|
|
g_free(iscsilun->allocmap_valid);
|
|
|
|
iscsilun->allocmap = NULL;
|
|
|
|
iscsilun->allocmap_valid = NULL;
|
2014-09-30 09:09:12 +02:00
|
|
|
}
|
|
|
|
|
2016-07-18 10:52:20 +02:00
|
|
|
|
|
|
|
static int iscsi_allocmap_init(IscsiLun *iscsilun, int open_flags)
|
2014-04-28 13:11:32 +02:00
|
|
|
{
|
2016-07-18 10:52:20 +02:00
|
|
|
iscsi_allocmap_free(iscsilun);
|
|
|
|
|
2018-02-13 21:26:46 +01:00
|
|
|
assert(iscsilun->cluster_size);
|
2016-07-18 10:52:20 +02:00
|
|
|
iscsilun->allocmap_size =
|
2018-02-13 21:26:46 +01:00
|
|
|
DIV_ROUND_UP(iscsilun->num_blocks * iscsilun->block_size,
|
|
|
|
iscsilun->cluster_size);
|
2016-07-18 10:52:20 +02:00
|
|
|
|
|
|
|
iscsilun->allocmap = bitmap_try_new(iscsilun->allocmap_size);
|
|
|
|
if (!iscsilun->allocmap) {
|
|
|
|
return -ENOMEM;
|
2014-04-28 13:11:32 +02:00
|
|
|
}
|
2016-07-18 10:52:20 +02:00
|
|
|
|
|
|
|
if (open_flags & BDRV_O_NOCACHE) {
|
2018-02-13 21:26:46 +01:00
|
|
|
/* when cache.direct = on all allocmap entries are
|
2016-07-18 10:52:20 +02:00
|
|
|
* treated as invalid to force a relookup of the block
|
|
|
|
* status on every read request */
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
iscsilun->allocmap_valid = bitmap_try_new(iscsilun->allocmap_size);
|
|
|
|
if (!iscsilun->allocmap_valid) {
|
|
|
|
/* if we are under memory pressure free the allocmap as well */
|
|
|
|
iscsi_allocmap_free(iscsilun);
|
|
|
|
return -ENOMEM;
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
2014-04-28 13:11:32 +02:00
|
|
|
}
|
|
|
|
|
2016-07-18 10:52:20 +02:00
|
|
|
static void
|
2018-02-13 21:26:47 +01:00
|
|
|
iscsi_allocmap_update(IscsiLun *iscsilun, int64_t offset,
|
|
|
|
int64_t bytes, bool allocated, bool valid)
|
2014-04-28 13:11:32 +02:00
|
|
|
{
|
2016-07-18 10:52:20 +02:00
|
|
|
int64_t cl_num_expanded, nb_cls_expanded, cl_num_shrunk, nb_cls_shrunk;
|
|
|
|
|
|
|
|
if (iscsilun->allocmap == NULL) {
|
2014-04-28 13:11:32 +02:00
|
|
|
return;
|
|
|
|
}
|
2016-07-18 10:52:20 +02:00
|
|
|
/* expand to entirely contain all affected clusters */
|
2018-02-13 21:26:47 +01:00
|
|
|
assert(iscsilun->cluster_size);
|
|
|
|
cl_num_expanded = offset / iscsilun->cluster_size;
|
|
|
|
nb_cls_expanded = DIV_ROUND_UP(offset + bytes,
|
|
|
|
iscsilun->cluster_size) - cl_num_expanded;
|
2016-07-18 10:52:20 +02:00
|
|
|
/* shrink to touch only completely contained clusters */
|
2018-02-13 21:26:47 +01:00
|
|
|
cl_num_shrunk = DIV_ROUND_UP(offset, iscsilun->cluster_size);
|
|
|
|
nb_cls_shrunk = (offset + bytes) / iscsilun->cluster_size - cl_num_shrunk;
|
2016-07-18 10:52:20 +02:00
|
|
|
if (allocated) {
|
|
|
|
bitmap_set(iscsilun->allocmap, cl_num_expanded, nb_cls_expanded);
|
|
|
|
} else {
|
2017-01-16 16:17:12 +01:00
|
|
|
if (nb_cls_shrunk > 0) {
|
|
|
|
bitmap_clear(iscsilun->allocmap, cl_num_shrunk, nb_cls_shrunk);
|
|
|
|
}
|
2016-07-18 10:52:20 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
if (iscsilun->allocmap_valid == NULL) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
if (valid) {
|
2017-01-16 16:17:12 +01:00
|
|
|
if (nb_cls_shrunk > 0) {
|
|
|
|
bitmap_set(iscsilun->allocmap_valid, cl_num_shrunk, nb_cls_shrunk);
|
|
|
|
}
|
2016-07-18 10:52:20 +02:00
|
|
|
} else {
|
|
|
|
bitmap_clear(iscsilun->allocmap_valid, cl_num_expanded,
|
|
|
|
nb_cls_expanded);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
2018-02-13 21:26:47 +01:00
|
|
|
iscsi_allocmap_set_allocated(IscsiLun *iscsilun, int64_t offset,
|
|
|
|
int64_t bytes)
|
2016-07-18 10:52:20 +02:00
|
|
|
{
|
2018-02-13 21:26:47 +01:00
|
|
|
iscsi_allocmap_update(iscsilun, offset, bytes, true, true);
|
2016-07-18 10:52:20 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
2018-02-13 21:26:47 +01:00
|
|
|
iscsi_allocmap_set_unallocated(IscsiLun *iscsilun, int64_t offset,
|
|
|
|
int64_t bytes)
|
2016-07-18 10:52:20 +02:00
|
|
|
{
|
|
|
|
/* Note: if cache.direct=on the fifth argument to iscsi_allocmap_update
|
|
|
|
* is ignored, so this will in effect be an iscsi_allocmap_set_invalid.
|
|
|
|
*/
|
2018-02-13 21:26:47 +01:00
|
|
|
iscsi_allocmap_update(iscsilun, offset, bytes, false, true);
|
2016-07-18 10:52:20 +02:00
|
|
|
}
|
|
|
|
|
2018-02-13 21:26:47 +01:00
|
|
|
static void iscsi_allocmap_set_invalid(IscsiLun *iscsilun, int64_t offset,
|
|
|
|
int64_t bytes)
|
2016-07-18 10:52:20 +02:00
|
|
|
{
|
2018-02-13 21:26:47 +01:00
|
|
|
iscsi_allocmap_update(iscsilun, offset, bytes, false, false);
|
2016-07-18 10:52:20 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
static void iscsi_allocmap_invalidate(IscsiLun *iscsilun)
|
|
|
|
{
|
|
|
|
if (iscsilun->allocmap) {
|
|
|
|
bitmap_zero(iscsilun->allocmap, iscsilun->allocmap_size);
|
|
|
|
}
|
|
|
|
if (iscsilun->allocmap_valid) {
|
|
|
|
bitmap_zero(iscsilun->allocmap_valid, iscsilun->allocmap_size);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline bool
|
2018-02-13 21:26:47 +01:00
|
|
|
iscsi_allocmap_is_allocated(IscsiLun *iscsilun, int64_t offset,
|
|
|
|
int64_t bytes)
|
2016-07-18 10:52:20 +02:00
|
|
|
{
|
|
|
|
unsigned long size;
|
|
|
|
if (iscsilun->allocmap == NULL) {
|
|
|
|
return true;
|
2014-04-28 13:11:32 +02:00
|
|
|
}
|
2018-02-13 21:26:46 +01:00
|
|
|
assert(iscsilun->cluster_size);
|
2018-02-13 21:26:47 +01:00
|
|
|
size = DIV_ROUND_UP(offset + bytes, iscsilun->cluster_size);
|
2016-07-18 10:52:20 +02:00
|
|
|
return !(find_next_bit(iscsilun->allocmap, size,
|
2018-02-13 21:26:47 +01:00
|
|
|
offset / iscsilun->cluster_size) == size);
|
2016-07-18 10:52:20 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
static inline bool iscsi_allocmap_is_valid(IscsiLun *iscsilun,
|
2018-02-13 21:26:47 +01:00
|
|
|
int64_t offset, int64_t bytes)
|
2016-07-18 10:52:20 +02:00
|
|
|
{
|
|
|
|
unsigned long size;
|
|
|
|
if (iscsilun->allocmap_valid == NULL) {
|
|
|
|
return false;
|
|
|
|
}
|
2018-02-13 21:26:46 +01:00
|
|
|
assert(iscsilun->cluster_size);
|
2018-02-13 21:26:47 +01:00
|
|
|
size = DIV_ROUND_UP(offset + bytes, iscsilun->cluster_size);
|
2016-07-18 10:52:20 +02:00
|
|
|
return (find_next_zero_bit(iscsilun->allocmap_valid, size,
|
2018-02-13 21:26:47 +01:00
|
|
|
offset / iscsilun->cluster_size) == size);
|
2014-04-28 13:11:32 +02:00
|
|
|
}
|
|
|
|
|
2018-06-01 11:26:45 +02:00
|
|
|
static void coroutine_fn iscsi_co_wait_for_task(IscsiTask *iTask,
|
|
|
|
IscsiLun *iscsilun)
|
|
|
|
{
|
|
|
|
while (!iTask->complete) {
|
|
|
|
iscsi_set_events(iscsilun);
|
|
|
|
qemu_mutex_unlock(&iscsilun->mutex);
|
|
|
|
qemu_coroutine_yield();
|
|
|
|
qemu_mutex_lock(&iscsilun->mutex);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-03-10 13:55:50 +01:00
|
|
|
static int coroutine_fn
|
2018-04-25 00:01:57 +02:00
|
|
|
iscsi_co_writev(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
|
|
|
|
QEMUIOVector *iov, int flags)
|
2011-10-25 10:24:24 +02:00
|
|
|
{
|
2013-12-05 16:47:17 +01:00
|
|
|
IscsiLun *iscsilun = bs->opaque;
|
|
|
|
struct IscsiTask iTask;
|
2012-05-22 12:10:05 +02:00
|
|
|
uint64_t lba;
|
2013-12-05 16:47:17 +01:00
|
|
|
uint32_t num_sectors;
|
2016-05-04 00:39:06 +02:00
|
|
|
bool fua = flags & BDRV_REQ_FUA;
|
2017-02-22 19:07:25 +01:00
|
|
|
int r = 0;
|
2011-10-25 10:24:24 +02:00
|
|
|
|
2016-05-04 00:39:06 +02:00
|
|
|
if (fua) {
|
|
|
|
assert(iscsilun->dpofua);
|
|
|
|
}
|
2016-06-01 23:10:05 +02:00
|
|
|
if (!is_sector_request_lun_aligned(sector_num, nb_sectors, iscsilun)) {
|
2013-12-05 16:47:17 +01:00
|
|
|
return -EINVAL;
|
|
|
|
}
|
2012-12-03 20:35:15 +01:00
|
|
|
|
2016-07-15 20:32:04 +02:00
|
|
|
if (bs->bl.max_transfer) {
|
|
|
|
assert(nb_sectors << BDRV_SECTOR_BITS <= bs->bl.max_transfer);
|
2014-10-27 10:18:48 +01:00
|
|
|
}
|
|
|
|
|
2013-12-05 16:47:17 +01:00
|
|
|
lba = sector_qemu2lun(sector_num, iscsilun);
|
|
|
|
num_sectors = sector_qemu2lun(nb_sectors, iscsilun);
|
|
|
|
iscsi_co_init_iscsitask(iscsilun, &iTask);
|
2017-02-22 19:07:25 +01:00
|
|
|
qemu_mutex_lock(&iscsilun->mutex);
|
2013-12-05 16:47:17 +01:00
|
|
|
retry:
|
2014-06-04 15:47:39 +02:00
|
|
|
if (iscsilun->use_16_for_rw) {
|
2016-10-09 10:14:55 +02:00
|
|
|
#if LIBISCSI_API_VERSION >= (20160603)
|
|
|
|
iTask.task = iscsi_write16_iov_task(iscsilun->iscsi, iscsilun->lun, lba,
|
|
|
|
NULL, num_sectors * iscsilun->block_size,
|
|
|
|
iscsilun->block_size, 0, 0, fua, 0, 0,
|
|
|
|
iscsi_co_generic_cb, &iTask,
|
|
|
|
(struct scsi_iovec *)iov->iov, iov->niov);
|
|
|
|
} else {
|
|
|
|
iTask.task = iscsi_write10_iov_task(iscsilun->iscsi, iscsilun->lun, lba,
|
|
|
|
NULL, num_sectors * iscsilun->block_size,
|
|
|
|
iscsilun->block_size, 0, 0, fua, 0, 0,
|
|
|
|
iscsi_co_generic_cb, &iTask,
|
|
|
|
(struct scsi_iovec *)iov->iov, iov->niov);
|
|
|
|
}
|
|
|
|
#else
|
2014-06-04 15:47:39 +02:00
|
|
|
iTask.task = iscsi_write16_task(iscsilun->iscsi, iscsilun->lun, lba,
|
2014-06-18 18:40:22 +02:00
|
|
|
NULL, num_sectors * iscsilun->block_size,
|
2015-04-16 16:08:29 +02:00
|
|
|
iscsilun->block_size, 0, 0, fua, 0, 0,
|
2014-06-04 15:47:39 +02:00
|
|
|
iscsi_co_generic_cb, &iTask);
|
|
|
|
} else {
|
|
|
|
iTask.task = iscsi_write10_task(iscsilun->iscsi, iscsilun->lun, lba,
|
2014-06-18 18:40:22 +02:00
|
|
|
NULL, num_sectors * iscsilun->block_size,
|
2015-04-16 16:08:29 +02:00
|
|
|
iscsilun->block_size, 0, 0, fua, 0, 0,
|
2014-06-04 15:47:39 +02:00
|
|
|
iscsi_co_generic_cb, &iTask);
|
|
|
|
}
|
2016-10-09 10:14:55 +02:00
|
|
|
#endif
|
2013-12-05 16:47:17 +01:00
|
|
|
if (iTask.task == NULL) {
|
2017-03-03 16:23:36 +01:00
|
|
|
qemu_mutex_unlock(&iscsilun->mutex);
|
2013-12-20 10:02:47 +01:00
|
|
|
return -ENOMEM;
|
2012-05-22 12:10:05 +02:00
|
|
|
}
|
2016-10-09 10:14:55 +02:00
|
|
|
#if LIBISCSI_API_VERSION < (20160603)
|
2013-12-05 16:47:17 +01:00
|
|
|
scsi_task_set_iov_out(iTask.task, (struct scsi_iovec *) iov->iov,
|
|
|
|
iov->niov);
|
2016-10-09 10:14:55 +02:00
|
|
|
#endif
|
2018-06-01 11:26:45 +02:00
|
|
|
iscsi_co_wait_for_task(&iTask, iscsilun);
|
2011-10-25 10:24:24 +02:00
|
|
|
|
2013-12-05 16:47:17 +01:00
|
|
|
if (iTask.task != NULL) {
|
|
|
|
scsi_free_scsi_task(iTask.task);
|
|
|
|
iTask.task = NULL;
|
2013-07-11 14:16:27 +02:00
|
|
|
}
|
|
|
|
|
2013-12-05 16:47:17 +01:00
|
|
|
if (iTask.do_retry) {
|
2014-02-18 13:08:39 +01:00
|
|
|
iTask.complete = 0;
|
2013-12-05 16:47:17 +01:00
|
|
|
goto retry;
|
2013-02-21 16:15:54 +01:00
|
|
|
}
|
|
|
|
|
2013-12-05 16:47:17 +01:00
|
|
|
if (iTask.status != SCSI_STATUS_GOOD) {
|
2018-02-13 21:26:47 +01:00
|
|
|
iscsi_allocmap_set_invalid(iscsilun, sector_num * BDRV_SECTOR_SIZE,
|
|
|
|
nb_sectors * BDRV_SECTOR_SIZE);
|
2017-12-08 12:51:08 +01:00
|
|
|
error_report("iSCSI WRITE10/16 failed at lba %" PRIu64 ": %s", lba,
|
|
|
|
iTask.err_str);
|
2017-02-22 19:07:25 +01:00
|
|
|
r = iTask.err_code;
|
|
|
|
goto out_unlock;
|
2011-10-25 10:24:24 +02:00
|
|
|
}
|
|
|
|
|
2018-02-13 21:26:47 +01:00
|
|
|
iscsi_allocmap_set_allocated(iscsilun, sector_num * BDRV_SECTOR_SIZE,
|
|
|
|
nb_sectors * BDRV_SECTOR_SIZE);
|
2014-04-28 13:11:32 +02:00
|
|
|
|
2017-02-22 19:07:25 +01:00
|
|
|
out_unlock:
|
|
|
|
qemu_mutex_unlock(&iscsilun->mutex);
|
2017-12-08 12:51:08 +01:00
|
|
|
g_free(iTask.err_str);
|
2017-02-22 19:07:25 +01:00
|
|
|
return r;
|
2011-10-25 10:24:24 +02:00
|
|
|
}
|
|
|
|
|
2014-04-28 13:11:32 +02:00
|
|
|
|
|
|
|
|
2018-02-13 21:26:48 +01:00
|
|
|
static int coroutine_fn iscsi_co_block_status(BlockDriverState *bs,
|
|
|
|
bool want_zero, int64_t offset,
|
|
|
|
int64_t bytes, int64_t *pnum,
|
|
|
|
int64_t *map,
|
|
|
|
BlockDriverState **file)
|
2014-04-28 13:11:32 +02:00
|
|
|
{
|
|
|
|
IscsiLun *iscsilun = bs->opaque;
|
|
|
|
struct scsi_get_lba_status *lbas = NULL;
|
|
|
|
struct scsi_lba_status_descriptor *lbasd = NULL;
|
|
|
|
struct IscsiTask iTask;
|
2020-01-23 13:44:59 +01:00
|
|
|
uint64_t lba, max_bytes;
|
2018-02-13 21:26:48 +01:00
|
|
|
int ret;
|
2014-04-28 13:11:32 +02:00
|
|
|
|
2018-01-08 16:27:27 +01:00
|
|
|
iscsi_co_init_iscsitask(iscsilun, &iTask);
|
|
|
|
|
2018-02-13 21:26:48 +01:00
|
|
|
assert(QEMU_IS_ALIGNED(offset | bytes, iscsilun->block_size));
|
2014-04-28 13:11:32 +02:00
|
|
|
|
|
|
|
/* default to all sectors allocated */
|
2018-02-13 21:26:48 +01:00
|
|
|
ret = BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID;
|
|
|
|
if (map) {
|
|
|
|
*map = offset;
|
|
|
|
}
|
|
|
|
*pnum = bytes;
|
2014-04-28 13:11:32 +02:00
|
|
|
|
|
|
|
/* LUN does not support logical block provisioning */
|
2015-04-16 16:08:26 +02:00
|
|
|
if (!iscsilun->lbpme) {
|
2014-04-28 13:11:32 +02:00
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
2018-02-13 21:26:48 +01:00
|
|
|
lba = offset / iscsilun->block_size;
|
2020-01-23 13:44:59 +01:00
|
|
|
max_bytes = (iscsilun->num_blocks - lba) * iscsilun->block_size;
|
2017-12-08 12:51:08 +01:00
|
|
|
|
2017-02-22 19:07:25 +01:00
|
|
|
qemu_mutex_lock(&iscsilun->mutex);
|
2014-04-28 13:11:32 +02:00
|
|
|
retry:
|
|
|
|
if (iscsi_get_lba_status_task(iscsilun->iscsi, iscsilun->lun,
|
2017-12-08 12:51:08 +01:00
|
|
|
lba, 8 + 16, iscsi_co_generic_cb,
|
2014-04-28 13:11:32 +02:00
|
|
|
&iTask) == NULL) {
|
|
|
|
ret = -ENOMEM;
|
2017-02-22 19:07:25 +01:00
|
|
|
goto out_unlock;
|
2014-04-28 13:11:32 +02:00
|
|
|
}
|
2018-06-01 11:26:45 +02:00
|
|
|
iscsi_co_wait_for_task(&iTask, iscsilun);
|
2014-04-28 13:11:32 +02:00
|
|
|
|
|
|
|
if (iTask.do_retry) {
|
|
|
|
if (iTask.task != NULL) {
|
|
|
|
scsi_free_scsi_task(iTask.task);
|
|
|
|
iTask.task = NULL;
|
|
|
|
}
|
|
|
|
iTask.complete = 0;
|
|
|
|
goto retry;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (iTask.status != SCSI_STATUS_GOOD) {
|
|
|
|
/* in case the get_lba_status_callout fails (i.e.
|
|
|
|
* because the device is busy or the cmd is not
|
|
|
|
* supported) we pretend all blocks are allocated
|
|
|
|
* for backwards compatibility */
|
2017-12-08 12:51:08 +01:00
|
|
|
error_report("iSCSI GET_LBA_STATUS failed at lba %" PRIu64 ": %s",
|
|
|
|
lba, iTask.err_str);
|
2017-02-22 19:07:25 +01:00
|
|
|
goto out_unlock;
|
2014-04-28 13:11:32 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
lbas = scsi_datain_unmarshall(iTask.task);
|
2020-01-23 18:00:54 +01:00
|
|
|
if (lbas == NULL || lbas->num_descriptors == 0) {
|
2014-04-28 13:11:32 +02:00
|
|
|
ret = -EIO;
|
2017-02-22 19:07:25 +01:00
|
|
|
goto out_unlock;
|
2014-04-28 13:11:32 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
lbasd = &lbas->descriptors[0];
|
|
|
|
|
2018-02-13 21:26:48 +01:00
|
|
|
if (lba != lbasd->lba) {
|
2014-04-28 13:11:32 +02:00
|
|
|
ret = -EIO;
|
2017-02-22 19:07:25 +01:00
|
|
|
goto out_unlock;
|
2014-04-28 13:11:32 +02:00
|
|
|
}
|
|
|
|
|
2020-01-23 13:44:59 +01:00
|
|
|
*pnum = MIN((int64_t) lbasd->num_blocks * iscsilun->block_size, max_bytes);
|
2014-04-28 13:11:32 +02:00
|
|
|
|
|
|
|
if (lbasd->provisioning == SCSI_PROVISIONING_TYPE_DEALLOCATED ||
|
|
|
|
lbasd->provisioning == SCSI_PROVISIONING_TYPE_ANCHORED) {
|
|
|
|
ret &= ~BDRV_BLOCK_DATA;
|
|
|
|
if (iscsilun->lbprz) {
|
|
|
|
ret |= BDRV_BLOCK_ZERO;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (ret & BDRV_BLOCK_ZERO) {
|
2018-02-13 21:26:48 +01:00
|
|
|
iscsi_allocmap_set_unallocated(iscsilun, offset, *pnum);
|
2014-04-28 13:11:32 +02:00
|
|
|
} else {
|
2018-02-13 21:26:48 +01:00
|
|
|
iscsi_allocmap_set_allocated(iscsilun, offset, *pnum);
|
2014-04-28 13:11:32 +02:00
|
|
|
}
|
|
|
|
|
2017-02-22 19:07:25 +01:00
|
|
|
out_unlock:
|
|
|
|
qemu_mutex_unlock(&iscsilun->mutex);
|
2017-12-08 12:51:08 +01:00
|
|
|
g_free(iTask.err_str);
|
2014-04-28 13:11:32 +02:00
|
|
|
out:
|
|
|
|
if (iTask.task != NULL) {
|
|
|
|
scsi_free_scsi_task(iTask.task);
|
|
|
|
}
|
2018-02-13 21:26:48 +01:00
|
|
|
if (ret > 0 && ret & BDRV_BLOCK_OFFSET_VALID && file) {
|
2016-01-26 04:58:52 +01:00
|
|
|
*file = bs;
|
|
|
|
}
|
2014-04-28 13:11:32 +02:00
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2013-12-05 16:47:17 +01:00
|
|
|
static int coroutine_fn iscsi_co_readv(BlockDriverState *bs,
|
|
|
|
int64_t sector_num, int nb_sectors,
|
|
|
|
QEMUIOVector *iov)
|
2011-10-25 10:24:24 +02:00
|
|
|
{
|
2013-12-05 16:47:17 +01:00
|
|
|
IscsiLun *iscsilun = bs->opaque;
|
|
|
|
struct IscsiTask iTask;
|
2013-02-21 16:15:54 +01:00
|
|
|
uint64_t lba;
|
|
|
|
uint32_t num_sectors;
|
2017-12-08 12:51:08 +01:00
|
|
|
int r = 0;
|
2011-10-25 10:24:24 +02:00
|
|
|
|
2016-06-01 23:10:05 +02:00
|
|
|
if (!is_sector_request_lun_aligned(sector_num, nb_sectors, iscsilun)) {
|
2013-12-05 16:47:17 +01:00
|
|
|
return -EINVAL;
|
2012-05-22 12:10:05 +02:00
|
|
|
}
|
|
|
|
|
2016-07-15 20:32:04 +02:00
|
|
|
if (bs->bl.max_transfer) {
|
|
|
|
assert(nb_sectors << BDRV_SECTOR_BITS <= bs->bl.max_transfer);
|
2014-10-27 10:18:48 +01:00
|
|
|
}
|
|
|
|
|
2016-07-18 10:52:20 +02:00
|
|
|
/* if cache.direct is off and we have a valid entry in our allocation map
|
|
|
|
* we can skip checking the block status and directly return zeroes if
|
|
|
|
* the request falls within an unallocated area */
|
2018-02-13 21:26:47 +01:00
|
|
|
if (iscsi_allocmap_is_valid(iscsilun, sector_num * BDRV_SECTOR_SIZE,
|
|
|
|
nb_sectors * BDRV_SECTOR_SIZE) &&
|
|
|
|
!iscsi_allocmap_is_allocated(iscsilun, sector_num * BDRV_SECTOR_SIZE,
|
|
|
|
nb_sectors * BDRV_SECTOR_SIZE)) {
|
2016-07-18 10:52:20 +02:00
|
|
|
qemu_iovec_memset(iov, 0, 0x00, iov->size);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (nb_sectors >= ISCSI_CHECKALLOC_THRES &&
|
2018-02-13 21:26:47 +01:00
|
|
|
!iscsi_allocmap_is_valid(iscsilun, sector_num * BDRV_SECTOR_SIZE,
|
|
|
|
nb_sectors * BDRV_SECTOR_SIZE) &&
|
|
|
|
!iscsi_allocmap_is_allocated(iscsilun, sector_num * BDRV_SECTOR_SIZE,
|
|
|
|
nb_sectors * BDRV_SECTOR_SIZE)) {
|
2018-02-13 21:26:48 +01:00
|
|
|
int64_t pnum;
|
2016-07-18 10:52:20 +02:00
|
|
|
/* check the block status from the beginning of the cluster
|
|
|
|
* containing the start sector */
|
2018-02-13 21:26:48 +01:00
|
|
|
int64_t head;
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
assert(iscsilun->cluster_size);
|
|
|
|
head = (sector_num * BDRV_SECTOR_SIZE) % iscsilun->cluster_size;
|
|
|
|
ret = iscsi_co_block_status(bs, true,
|
|
|
|
sector_num * BDRV_SECTOR_SIZE - head,
|
|
|
|
BDRV_REQUEST_MAX_BYTES, &pnum, NULL, NULL);
|
2014-04-28 13:11:32 +02:00
|
|
|
if (ret < 0) {
|
|
|
|
return ret;
|
|
|
|
}
|
2016-07-18 10:52:20 +02:00
|
|
|
/* if the whole request falls into an unallocated area we can avoid
|
2018-02-13 21:26:46 +01:00
|
|
|
* reading and directly return zeroes instead */
|
2016-07-18 10:52:20 +02:00
|
|
|
if (ret & BDRV_BLOCK_ZERO &&
|
2018-02-13 21:26:48 +01:00
|
|
|
pnum >= nb_sectors * BDRV_SECTOR_SIZE + head) {
|
2014-04-28 13:11:32 +02:00
|
|
|
qemu_iovec_memset(iov, 0, 0x00, iov->size);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2013-12-05 16:47:17 +01:00
|
|
|
lba = sector_qemu2lun(sector_num, iscsilun);
|
|
|
|
num_sectors = sector_qemu2lun(nb_sectors, iscsilun);
|
2012-05-22 12:10:05 +02:00
|
|
|
|
2013-12-05 16:47:17 +01:00
|
|
|
iscsi_co_init_iscsitask(iscsilun, &iTask);
|
2017-02-22 19:07:25 +01:00
|
|
|
qemu_mutex_lock(&iscsilun->mutex);
|
2013-12-05 16:47:17 +01:00
|
|
|
retry:
|
2014-06-04 15:47:39 +02:00
|
|
|
if (iscsilun->use_16_for_rw) {
|
2016-10-09 10:14:55 +02:00
|
|
|
#if LIBISCSI_API_VERSION >= (20160603)
|
|
|
|
iTask.task = iscsi_read16_iov_task(iscsilun->iscsi, iscsilun->lun, lba,
|
|
|
|
num_sectors * iscsilun->block_size,
|
|
|
|
iscsilun->block_size, 0, 0, 0, 0, 0,
|
|
|
|
iscsi_co_generic_cb, &iTask,
|
|
|
|
(struct scsi_iovec *)iov->iov, iov->niov);
|
|
|
|
} else {
|
|
|
|
iTask.task = iscsi_read10_iov_task(iscsilun->iscsi, iscsilun->lun, lba,
|
|
|
|
num_sectors * iscsilun->block_size,
|
|
|
|
iscsilun->block_size,
|
|
|
|
0, 0, 0, 0, 0,
|
|
|
|
iscsi_co_generic_cb, &iTask,
|
|
|
|
(struct scsi_iovec *)iov->iov, iov->niov);
|
|
|
|
}
|
|
|
|
#else
|
2013-12-05 16:47:17 +01:00
|
|
|
iTask.task = iscsi_read16_task(iscsilun->iscsi, iscsilun->lun, lba,
|
|
|
|
num_sectors * iscsilun->block_size,
|
|
|
|
iscsilun->block_size, 0, 0, 0, 0, 0,
|
|
|
|
iscsi_co_generic_cb, &iTask);
|
2014-06-04 15:47:39 +02:00
|
|
|
} else {
|
2013-12-05 16:47:17 +01:00
|
|
|
iTask.task = iscsi_read10_task(iscsilun->iscsi, iscsilun->lun, lba,
|
|
|
|
num_sectors * iscsilun->block_size,
|
2013-12-17 08:57:10 +01:00
|
|
|
iscsilun->block_size,
|
|
|
|
0, 0, 0, 0, 0,
|
2013-12-05 16:47:17 +01:00
|
|
|
iscsi_co_generic_cb, &iTask);
|
2012-05-22 12:10:05 +02:00
|
|
|
}
|
2016-10-09 10:14:55 +02:00
|
|
|
#endif
|
2013-12-05 16:47:17 +01:00
|
|
|
if (iTask.task == NULL) {
|
2017-03-03 16:23:36 +01:00
|
|
|
qemu_mutex_unlock(&iscsilun->mutex);
|
2013-12-20 10:02:47 +01:00
|
|
|
return -ENOMEM;
|
2011-10-25 10:24:24 +02:00
|
|
|
}
|
2016-10-09 10:14:55 +02:00
|
|
|
#if LIBISCSI_API_VERSION < (20160603)
|
2013-12-05 16:47:17 +01:00
|
|
|
scsi_task_set_iov_in(iTask.task, (struct scsi_iovec *) iov->iov, iov->niov);
|
2016-10-09 10:14:55 +02:00
|
|
|
#endif
|
2011-10-25 10:24:24 +02:00
|
|
|
|
2018-06-01 11:26:45 +02:00
|
|
|
iscsi_co_wait_for_task(&iTask, iscsilun);
|
2013-12-05 16:47:17 +01:00
|
|
|
if (iTask.task != NULL) {
|
|
|
|
scsi_free_scsi_task(iTask.task);
|
|
|
|
iTask.task = NULL;
|
2011-10-25 10:24:24 +02:00
|
|
|
}
|
|
|
|
|
2013-12-05 16:47:17 +01:00
|
|
|
if (iTask.do_retry) {
|
2014-02-18 13:08:39 +01:00
|
|
|
iTask.complete = 0;
|
2013-12-05 16:47:17 +01:00
|
|
|
goto retry;
|
2011-10-25 10:24:24 +02:00
|
|
|
}
|
|
|
|
|
2013-12-05 16:47:17 +01:00
|
|
|
if (iTask.status != SCSI_STATUS_GOOD) {
|
2017-12-08 12:51:08 +01:00
|
|
|
error_report("iSCSI READ10/16 failed at lba %" PRIu64 ": %s",
|
|
|
|
lba, iTask.err_str);
|
|
|
|
r = iTask.err_code;
|
2013-02-21 16:15:54 +01:00
|
|
|
}
|
|
|
|
|
2017-12-08 12:51:08 +01:00
|
|
|
qemu_mutex_unlock(&iscsilun->mutex);
|
|
|
|
g_free(iTask.err_str);
|
|
|
|
return r;
|
2013-02-21 16:15:54 +01:00
|
|
|
}
|
|
|
|
|
2013-12-05 16:47:17 +01:00
|
|
|
static int coroutine_fn iscsi_co_flush(BlockDriverState *bs)
|
2013-02-21 16:15:54 +01:00
|
|
|
{
|
|
|
|
IscsiLun *iscsilun = bs->opaque;
|
2013-12-05 16:47:17 +01:00
|
|
|
struct IscsiTask iTask;
|
2017-12-08 12:51:08 +01:00
|
|
|
int r = 0;
|
2013-02-21 16:15:54 +01:00
|
|
|
|
2015-04-16 16:08:29 +02:00
|
|
|
iscsi_co_init_iscsitask(iscsilun, &iTask);
|
2017-02-22 19:07:25 +01:00
|
|
|
qemu_mutex_lock(&iscsilun->mutex);
|
2013-12-05 16:47:17 +01:00
|
|
|
retry:
|
|
|
|
if (iscsi_synchronizecache10_task(iscsilun->iscsi, iscsilun->lun, 0, 0, 0,
|
|
|
|
0, iscsi_co_generic_cb, &iTask) == NULL) {
|
2017-03-03 16:23:36 +01:00
|
|
|
qemu_mutex_unlock(&iscsilun->mutex);
|
2013-12-20 10:02:47 +01:00
|
|
|
return -ENOMEM;
|
2013-12-05 16:47:17 +01:00
|
|
|
}
|
2013-02-21 16:15:54 +01:00
|
|
|
|
2018-06-01 11:26:45 +02:00
|
|
|
iscsi_co_wait_for_task(&iTask, iscsilun);
|
2013-02-21 16:15:54 +01:00
|
|
|
|
2013-12-05 16:47:17 +01:00
|
|
|
if (iTask.task != NULL) {
|
|
|
|
scsi_free_scsi_task(iTask.task);
|
|
|
|
iTask.task = NULL;
|
2011-10-25 10:24:24 +02:00
|
|
|
}
|
|
|
|
|
2013-12-05 16:47:17 +01:00
|
|
|
if (iTask.do_retry) {
|
2014-02-18 13:08:39 +01:00
|
|
|
iTask.complete = 0;
|
2013-12-05 16:47:17 +01:00
|
|
|
goto retry;
|
|
|
|
}
|
2011-10-25 10:24:24 +02:00
|
|
|
|
2013-12-05 16:47:17 +01:00
|
|
|
if (iTask.status != SCSI_STATUS_GOOD) {
|
2017-12-08 12:51:08 +01:00
|
|
|
error_report("iSCSI SYNCHRONIZECACHE10 failed: %s", iTask.err_str);
|
|
|
|
r = iTask.err_code;
|
2013-12-05 16:47:17 +01:00
|
|
|
}
|
|
|
|
|
2017-12-08 12:51:08 +01:00
|
|
|
qemu_mutex_unlock(&iscsilun->mutex);
|
|
|
|
g_free(iTask.err_str);
|
|
|
|
return r;
|
2011-10-25 10:24:24 +02:00
|
|
|
}
|
|
|
|
|
ISCSI: Add SCSI passthrough via scsi-generic to libiscsi
Update iscsi to allow passthrough of SG_IO scsi commands when the iscsi
device is forced to be scsi-generic.
Implement both bdrv_ioctl() and bdrv_aio_ioctl() in the iscsi backend,
emulate the SG_IO ioctl and pass the SCSI commands across to the
iscsi target.
This allows end-to-end passthrough of SCSI all the way from the guest,
to qemu, via scsi-generic, then libiscsi all the way to the iscsi target.
To activate this you need to specify that the iscsi lun should be treated
as a scsi-generic device.
Example:
-device lsi -device scsi-generic,drive=MyISCSI \
-drive file=iscsi://10.1.1.125/iqn.ronnie.test/1,if=none,id=MyISCSI
Note, you can currently not boot a qemu guest from a scsi device.
Note,
This only works when the host is linux, since the emulation relies on
definitions of SG_IO from the scsi-generic implementation in the
linux kernel.
It should be fairly easy to re-implement some structures similar enough
for non-linux hosts to do the same style of passthrough via a fake
scsi generic layer and libiscsi if need be.
Signed-off-by: Ronnie Sahlberg <ronniesahlberg@gmail.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2012-05-25 13:59:01 +02:00
|
|
|
#ifdef __linux__
|
2017-02-22 19:07:25 +01:00
|
|
|
/* Called (via iscsi_service) with QemuMutex held. */
|
ISCSI: Add SCSI passthrough via scsi-generic to libiscsi
Update iscsi to allow passthrough of SG_IO scsi commands when the iscsi
device is forced to be scsi-generic.
Implement both bdrv_ioctl() and bdrv_aio_ioctl() in the iscsi backend,
emulate the SG_IO ioctl and pass the SCSI commands across to the
iscsi target.
This allows end-to-end passthrough of SCSI all the way from the guest,
to qemu, via scsi-generic, then libiscsi all the way to the iscsi target.
To activate this you need to specify that the iscsi lun should be treated
as a scsi-generic device.
Example:
-device lsi -device scsi-generic,drive=MyISCSI \
-drive file=iscsi://10.1.1.125/iqn.ronnie.test/1,if=none,id=MyISCSI
Note, you can currently not boot a qemu guest from a scsi device.
Note,
This only works when the host is linux, since the emulation relies on
definitions of SG_IO from the scsi-generic implementation in the
linux kernel.
It should be fairly easy to re-implement some structures similar enough
for non-linux hosts to do the same style of passthrough via a fake
scsi generic layer and libiscsi if need be.
Signed-off-by: Ronnie Sahlberg <ronniesahlberg@gmail.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2012-05-25 13:59:01 +02:00
|
|
|
static void
|
|
|
|
iscsi_aio_ioctl_cb(struct iscsi_context *iscsi, int status,
|
|
|
|
void *command_data, void *opaque)
|
|
|
|
{
|
|
|
|
IscsiAIOCB *acb = opaque;
|
|
|
|
|
2018-02-15 12:15:26 +01:00
|
|
|
if (status == SCSI_STATUS_CANCELLED) {
|
|
|
|
if (!acb->bh) {
|
|
|
|
acb->status = -ECANCELED;
|
|
|
|
iscsi_schedule_bh(acb);
|
|
|
|
}
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
ISCSI: Add SCSI passthrough via scsi-generic to libiscsi
Update iscsi to allow passthrough of SG_IO scsi commands when the iscsi
device is forced to be scsi-generic.
Implement both bdrv_ioctl() and bdrv_aio_ioctl() in the iscsi backend,
emulate the SG_IO ioctl and pass the SCSI commands across to the
iscsi target.
This allows end-to-end passthrough of SCSI all the way from the guest,
to qemu, via scsi-generic, then libiscsi all the way to the iscsi target.
To activate this you need to specify that the iscsi lun should be treated
as a scsi-generic device.
Example:
-device lsi -device scsi-generic,drive=MyISCSI \
-drive file=iscsi://10.1.1.125/iqn.ronnie.test/1,if=none,id=MyISCSI
Note, you can currently not boot a qemu guest from a scsi device.
Note,
This only works when the host is linux, since the emulation relies on
definitions of SG_IO from the scsi-generic implementation in the
linux kernel.
It should be fairly easy to re-implement some structures similar enough
for non-linux hosts to do the same style of passthrough via a fake
scsi generic layer and libiscsi if need be.
Signed-off-by: Ronnie Sahlberg <ronniesahlberg@gmail.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2012-05-25 13:59:01 +02:00
|
|
|
acb->status = 0;
|
|
|
|
if (status < 0) {
|
|
|
|
error_report("Failed to ioctl(SG_IO) to iSCSI lun. %s",
|
|
|
|
iscsi_get_error(iscsi));
|
2019-07-02 11:40:41 +02:00
|
|
|
acb->status = -iscsi_translate_sense(&acb->task->sense);
|
ISCSI: Add SCSI passthrough via scsi-generic to libiscsi
Update iscsi to allow passthrough of SG_IO scsi commands when the iscsi
device is forced to be scsi-generic.
Implement both bdrv_ioctl() and bdrv_aio_ioctl() in the iscsi backend,
emulate the SG_IO ioctl and pass the SCSI commands across to the
iscsi target.
This allows end-to-end passthrough of SCSI all the way from the guest,
to qemu, via scsi-generic, then libiscsi all the way to the iscsi target.
To activate this you need to specify that the iscsi lun should be treated
as a scsi-generic device.
Example:
-device lsi -device scsi-generic,drive=MyISCSI \
-drive file=iscsi://10.1.1.125/iqn.ronnie.test/1,if=none,id=MyISCSI
Note, you can currently not boot a qemu guest from a scsi device.
Note,
This only works when the host is linux, since the emulation relies on
definitions of SG_IO from the scsi-generic implementation in the
linux kernel.
It should be fairly easy to re-implement some structures similar enough
for non-linux hosts to do the same style of passthrough via a fake
scsi generic layer and libiscsi if need be.
Signed-off-by: Ronnie Sahlberg <ronniesahlberg@gmail.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2012-05-25 13:59:01 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
acb->ioh->driver_status = 0;
|
|
|
|
acb->ioh->host_status = 0;
|
|
|
|
acb->ioh->resid = 0;
|
2016-05-13 13:03:22 +02:00
|
|
|
acb->ioh->status = status;
|
ISCSI: Add SCSI passthrough via scsi-generic to libiscsi
Update iscsi to allow passthrough of SG_IO scsi commands when the iscsi
device is forced to be scsi-generic.
Implement both bdrv_ioctl() and bdrv_aio_ioctl() in the iscsi backend,
emulate the SG_IO ioctl and pass the SCSI commands across to the
iscsi target.
This allows end-to-end passthrough of SCSI all the way from the guest,
to qemu, via scsi-generic, then libiscsi all the way to the iscsi target.
To activate this you need to specify that the iscsi lun should be treated
as a scsi-generic device.
Example:
-device lsi -device scsi-generic,drive=MyISCSI \
-drive file=iscsi://10.1.1.125/iqn.ronnie.test/1,if=none,id=MyISCSI
Note, you can currently not boot a qemu guest from a scsi device.
Note,
This only works when the host is linux, since the emulation relies on
definitions of SG_IO from the scsi-generic implementation in the
linux kernel.
It should be fairly easy to re-implement some structures similar enough
for non-linux hosts to do the same style of passthrough via a fake
scsi generic layer and libiscsi if need be.
Signed-off-by: Ronnie Sahlberg <ronniesahlberg@gmail.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2012-05-25 13:59:01 +02:00
|
|
|
|
|
|
|
#define SG_ERR_DRIVER_SENSE 0x08
|
|
|
|
|
|
|
|
if (status == SCSI_STATUS_CHECK_CONDITION && acb->task->datain.size >= 2) {
|
|
|
|
int ss;
|
|
|
|
|
|
|
|
acb->ioh->driver_status |= SG_ERR_DRIVER_SENSE;
|
|
|
|
|
|
|
|
acb->ioh->sb_len_wr = acb->task->datain.size - 2;
|
2020-04-18 08:26:02 +02:00
|
|
|
ss = MIN(acb->ioh->mx_sb_len, acb->ioh->sb_len_wr);
|
ISCSI: Add SCSI passthrough via scsi-generic to libiscsi
Update iscsi to allow passthrough of SG_IO scsi commands when the iscsi
device is forced to be scsi-generic.
Implement both bdrv_ioctl() and bdrv_aio_ioctl() in the iscsi backend,
emulate the SG_IO ioctl and pass the SCSI commands across to the
iscsi target.
This allows end-to-end passthrough of SCSI all the way from the guest,
to qemu, via scsi-generic, then libiscsi all the way to the iscsi target.
To activate this you need to specify that the iscsi lun should be treated
as a scsi-generic device.
Example:
-device lsi -device scsi-generic,drive=MyISCSI \
-drive file=iscsi://10.1.1.125/iqn.ronnie.test/1,if=none,id=MyISCSI
Note, you can currently not boot a qemu guest from a scsi device.
Note,
This only works when the host is linux, since the emulation relies on
definitions of SG_IO from the scsi-generic implementation in the
linux kernel.
It should be fairly easy to re-implement some structures similar enough
for non-linux hosts to do the same style of passthrough via a fake
scsi generic layer and libiscsi if need be.
Signed-off-by: Ronnie Sahlberg <ronniesahlberg@gmail.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2012-05-25 13:59:01 +02:00
|
|
|
memcpy(acb->ioh->sbp, &acb->task->datain.data[2], ss);
|
|
|
|
}
|
|
|
|
|
2012-08-18 23:38:03 +02:00
|
|
|
iscsi_schedule_bh(acb);
|
ISCSI: Add SCSI passthrough via scsi-generic to libiscsi
Update iscsi to allow passthrough of SG_IO scsi commands when the iscsi
device is forced to be scsi-generic.
Implement both bdrv_ioctl() and bdrv_aio_ioctl() in the iscsi backend,
emulate the SG_IO ioctl and pass the SCSI commands across to the
iscsi target.
This allows end-to-end passthrough of SCSI all the way from the guest,
to qemu, via scsi-generic, then libiscsi all the way to the iscsi target.
To activate this you need to specify that the iscsi lun should be treated
as a scsi-generic device.
Example:
-device lsi -device scsi-generic,drive=MyISCSI \
-drive file=iscsi://10.1.1.125/iqn.ronnie.test/1,if=none,id=MyISCSI
Note, you can currently not boot a qemu guest from a scsi device.
Note,
This only works when the host is linux, since the emulation relies on
definitions of SG_IO from the scsi-generic implementation in the
linux kernel.
It should be fairly easy to re-implement some structures similar enough
for non-linux hosts to do the same style of passthrough via a fake
scsi generic layer and libiscsi if need be.
Signed-off-by: Ronnie Sahlberg <ronniesahlberg@gmail.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2012-05-25 13:59:01 +02:00
|
|
|
}
|
|
|
|
|
2015-11-09 11:16:49 +01:00
|
|
|
static void iscsi_ioctl_bh_completion(void *opaque)
|
|
|
|
{
|
|
|
|
IscsiAIOCB *acb = opaque;
|
|
|
|
|
|
|
|
qemu_bh_delete(acb->bh);
|
|
|
|
acb->common.cb(acb->common.opaque, acb->ret);
|
|
|
|
qemu_aio_unref(acb);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void iscsi_ioctl_handle_emulated(IscsiAIOCB *acb, int req, void *buf)
|
|
|
|
{
|
|
|
|
BlockDriverState *bs = acb->common.bs;
|
|
|
|
IscsiLun *iscsilun = bs->opaque;
|
|
|
|
int ret = 0;
|
|
|
|
|
|
|
|
switch (req) {
|
|
|
|
case SG_GET_VERSION_NUM:
|
|
|
|
*(int *)buf = 30000;
|
|
|
|
break;
|
|
|
|
case SG_GET_SCSI_ID:
|
|
|
|
((struct sg_scsi_id *)buf)->scsi_type = iscsilun->type;
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
ret = -EINVAL;
|
|
|
|
}
|
|
|
|
assert(!acb->bh);
|
|
|
|
acb->bh = aio_bh_new(bdrv_get_aio_context(bs),
|
|
|
|
iscsi_ioctl_bh_completion, acb);
|
|
|
|
acb->ret = ret;
|
|
|
|
qemu_bh_schedule(acb->bh);
|
|
|
|
}
|
|
|
|
|
2014-10-07 13:59:14 +02:00
|
|
|
static BlockAIOCB *iscsi_aio_ioctl(BlockDriverState *bs,
|
ISCSI: Add SCSI passthrough via scsi-generic to libiscsi
Update iscsi to allow passthrough of SG_IO scsi commands when the iscsi
device is forced to be scsi-generic.
Implement both bdrv_ioctl() and bdrv_aio_ioctl() in the iscsi backend,
emulate the SG_IO ioctl and pass the SCSI commands across to the
iscsi target.
This allows end-to-end passthrough of SCSI all the way from the guest,
to qemu, via scsi-generic, then libiscsi all the way to the iscsi target.
To activate this you need to specify that the iscsi lun should be treated
as a scsi-generic device.
Example:
-device lsi -device scsi-generic,drive=MyISCSI \
-drive file=iscsi://10.1.1.125/iqn.ronnie.test/1,if=none,id=MyISCSI
Note, you can currently not boot a qemu guest from a scsi device.
Note,
This only works when the host is linux, since the emulation relies on
definitions of SG_IO from the scsi-generic implementation in the
linux kernel.
It should be fairly easy to re-implement some structures similar enough
for non-linux hosts to do the same style of passthrough via a fake
scsi generic layer and libiscsi if need be.
Signed-off-by: Ronnie Sahlberg <ronniesahlberg@gmail.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2012-05-25 13:59:01 +02:00
|
|
|
unsigned long int req, void *buf,
|
2014-10-07 13:59:15 +02:00
|
|
|
BlockCompletionFunc *cb, void *opaque)
|
ISCSI: Add SCSI passthrough via scsi-generic to libiscsi
Update iscsi to allow passthrough of SG_IO scsi commands when the iscsi
device is forced to be scsi-generic.
Implement both bdrv_ioctl() and bdrv_aio_ioctl() in the iscsi backend,
emulate the SG_IO ioctl and pass the SCSI commands across to the
iscsi target.
This allows end-to-end passthrough of SCSI all the way from the guest,
to qemu, via scsi-generic, then libiscsi all the way to the iscsi target.
To activate this you need to specify that the iscsi lun should be treated
as a scsi-generic device.
Example:
-device lsi -device scsi-generic,drive=MyISCSI \
-drive file=iscsi://10.1.1.125/iqn.ronnie.test/1,if=none,id=MyISCSI
Note, you can currently not boot a qemu guest from a scsi device.
Note,
This only works when the host is linux, since the emulation relies on
definitions of SG_IO from the scsi-generic implementation in the
linux kernel.
It should be fairly easy to re-implement some structures similar enough
for non-linux hosts to do the same style of passthrough via a fake
scsi generic layer and libiscsi if need be.
Signed-off-by: Ronnie Sahlberg <ronniesahlberg@gmail.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2012-05-25 13:59:01 +02:00
|
|
|
{
|
|
|
|
IscsiLun *iscsilun = bs->opaque;
|
|
|
|
struct iscsi_context *iscsi = iscsilun->iscsi;
|
|
|
|
struct iscsi_data data;
|
|
|
|
IscsiAIOCB *acb;
|
|
|
|
|
2012-10-31 16:34:37 +01:00
|
|
|
acb = qemu_aio_get(&iscsi_aiocb_info, bs, cb, opaque);
|
ISCSI: Add SCSI passthrough via scsi-generic to libiscsi
Update iscsi to allow passthrough of SG_IO scsi commands when the iscsi
device is forced to be scsi-generic.
Implement both bdrv_ioctl() and bdrv_aio_ioctl() in the iscsi backend,
emulate the SG_IO ioctl and pass the SCSI commands across to the
iscsi target.
This allows end-to-end passthrough of SCSI all the way from the guest,
to qemu, via scsi-generic, then libiscsi all the way to the iscsi target.
To activate this you need to specify that the iscsi lun should be treated
as a scsi-generic device.
Example:
-device lsi -device scsi-generic,drive=MyISCSI \
-drive file=iscsi://10.1.1.125/iqn.ronnie.test/1,if=none,id=MyISCSI
Note, you can currently not boot a qemu guest from a scsi device.
Note,
This only works when the host is linux, since the emulation relies on
definitions of SG_IO from the scsi-generic implementation in the
linux kernel.
It should be fairly easy to re-implement some structures similar enough
for non-linux hosts to do the same style of passthrough via a fake
scsi generic layer and libiscsi if need be.
Signed-off-by: Ronnie Sahlberg <ronniesahlberg@gmail.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2012-05-25 13:59:01 +02:00
|
|
|
|
|
|
|
acb->iscsilun = iscsilun;
|
2012-08-18 23:35:49 +02:00
|
|
|
acb->bh = NULL;
|
|
|
|
acb->status = -EINPROGRESS;
|
ISCSI: Add SCSI passthrough via scsi-generic to libiscsi
Update iscsi to allow passthrough of SG_IO scsi commands when the iscsi
device is forced to be scsi-generic.
Implement both bdrv_ioctl() and bdrv_aio_ioctl() in the iscsi backend,
emulate the SG_IO ioctl and pass the SCSI commands across to the
iscsi target.
This allows end-to-end passthrough of SCSI all the way from the guest,
to qemu, via scsi-generic, then libiscsi all the way to the iscsi target.
To activate this you need to specify that the iscsi lun should be treated
as a scsi-generic device.
Example:
-device lsi -device scsi-generic,drive=MyISCSI \
-drive file=iscsi://10.1.1.125/iqn.ronnie.test/1,if=none,id=MyISCSI
Note, you can currently not boot a qemu guest from a scsi device.
Note,
This only works when the host is linux, since the emulation relies on
definitions of SG_IO from the scsi-generic implementation in the
linux kernel.
It should be fairly easy to re-implement some structures similar enough
for non-linux hosts to do the same style of passthrough via a fake
scsi generic layer and libiscsi if need be.
Signed-off-by: Ronnie Sahlberg <ronniesahlberg@gmail.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2012-05-25 13:59:01 +02:00
|
|
|
acb->ioh = buf;
|
2018-02-03 07:16:21 +01:00
|
|
|
acb->cancelled = false;
|
ISCSI: Add SCSI passthrough via scsi-generic to libiscsi
Update iscsi to allow passthrough of SG_IO scsi commands when the iscsi
device is forced to be scsi-generic.
Implement both bdrv_ioctl() and bdrv_aio_ioctl() in the iscsi backend,
emulate the SG_IO ioctl and pass the SCSI commands across to the
iscsi target.
This allows end-to-end passthrough of SCSI all the way from the guest,
to qemu, via scsi-generic, then libiscsi all the way to the iscsi target.
To activate this you need to specify that the iscsi lun should be treated
as a scsi-generic device.
Example:
-device lsi -device scsi-generic,drive=MyISCSI \
-drive file=iscsi://10.1.1.125/iqn.ronnie.test/1,if=none,id=MyISCSI
Note, you can currently not boot a qemu guest from a scsi device.
Note,
This only works when the host is linux, since the emulation relies on
definitions of SG_IO from the scsi-generic implementation in the
linux kernel.
It should be fairly easy to re-implement some structures similar enough
for non-linux hosts to do the same style of passthrough via a fake
scsi generic layer and libiscsi if need be.
Signed-off-by: Ronnie Sahlberg <ronniesahlberg@gmail.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2012-05-25 13:59:01 +02:00
|
|
|
|
2015-11-09 11:16:49 +01:00
|
|
|
if (req != SG_IO) {
|
|
|
|
iscsi_ioctl_handle_emulated(acb, req, buf);
|
|
|
|
return &acb->common;
|
|
|
|
}
|
|
|
|
|
2016-05-24 10:59:28 +02:00
|
|
|
if (acb->ioh->cmd_len > SCSI_CDB_MAX_SIZE) {
|
|
|
|
error_report("iSCSI: ioctl error CDB exceeds max size (%d > %d)",
|
|
|
|
acb->ioh->cmd_len, SCSI_CDB_MAX_SIZE);
|
|
|
|
qemu_aio_unref(acb);
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
ISCSI: Add SCSI passthrough via scsi-generic to libiscsi
Update iscsi to allow passthrough of SG_IO scsi commands when the iscsi
device is forced to be scsi-generic.
Implement both bdrv_ioctl() and bdrv_aio_ioctl() in the iscsi backend,
emulate the SG_IO ioctl and pass the SCSI commands across to the
iscsi target.
This allows end-to-end passthrough of SCSI all the way from the guest,
to qemu, via scsi-generic, then libiscsi all the way to the iscsi target.
To activate this you need to specify that the iscsi lun should be treated
as a scsi-generic device.
Example:
-device lsi -device scsi-generic,drive=MyISCSI \
-drive file=iscsi://10.1.1.125/iqn.ronnie.test/1,if=none,id=MyISCSI
Note, you can currently not boot a qemu guest from a scsi device.
Note,
This only works when the host is linux, since the emulation relies on
definitions of SG_IO from the scsi-generic implementation in the
linux kernel.
It should be fairly easy to re-implement some structures similar enough
for non-linux hosts to do the same style of passthrough via a fake
scsi generic layer and libiscsi if need be.
Signed-off-by: Ronnie Sahlberg <ronniesahlberg@gmail.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2012-05-25 13:59:01 +02:00
|
|
|
acb->task = malloc(sizeof(struct scsi_task));
|
|
|
|
if (acb->task == NULL) {
|
|
|
|
error_report("iSCSI: Failed to allocate task for scsi command. %s",
|
|
|
|
iscsi_get_error(iscsi));
|
2014-09-11 07:41:28 +02:00
|
|
|
qemu_aio_unref(acb);
|
ISCSI: Add SCSI passthrough via scsi-generic to libiscsi
Update iscsi to allow passthrough of SG_IO scsi commands when the iscsi
device is forced to be scsi-generic.
Implement both bdrv_ioctl() and bdrv_aio_ioctl() in the iscsi backend,
emulate the SG_IO ioctl and pass the SCSI commands across to the
iscsi target.
This allows end-to-end passthrough of SCSI all the way from the guest,
to qemu, via scsi-generic, then libiscsi all the way to the iscsi target.
To activate this you need to specify that the iscsi lun should be treated
as a scsi-generic device.
Example:
-device lsi -device scsi-generic,drive=MyISCSI \
-drive file=iscsi://10.1.1.125/iqn.ronnie.test/1,if=none,id=MyISCSI
Note, you can currently not boot a qemu guest from a scsi device.
Note,
This only works when the host is linux, since the emulation relies on
definitions of SG_IO from the scsi-generic implementation in the
linux kernel.
It should be fairly easy to re-implement some structures similar enough
for non-linux hosts to do the same style of passthrough via a fake
scsi generic layer and libiscsi if need be.
Signed-off-by: Ronnie Sahlberg <ronniesahlberg@gmail.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2012-05-25 13:59:01 +02:00
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
memset(acb->task, 0, sizeof(struct scsi_task));
|
|
|
|
|
|
|
|
switch (acb->ioh->dxfer_direction) {
|
|
|
|
case SG_DXFER_TO_DEV:
|
|
|
|
acb->task->xfer_dir = SCSI_XFER_WRITE;
|
|
|
|
break;
|
|
|
|
case SG_DXFER_FROM_DEV:
|
|
|
|
acb->task->xfer_dir = SCSI_XFER_READ;
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
acb->task->xfer_dir = SCSI_XFER_NONE;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
acb->task->cdb_size = acb->ioh->cmd_len;
|
|
|
|
memcpy(&acb->task->cdb[0], acb->ioh->cmdp, acb->ioh->cmd_len);
|
|
|
|
acb->task->expxferlen = acb->ioh->dxfer_len;
|
|
|
|
|
2013-06-23 17:07:08 +02:00
|
|
|
data.size = 0;
|
2017-02-22 19:07:25 +01:00
|
|
|
qemu_mutex_lock(&iscsilun->mutex);
|
ISCSI: Add SCSI passthrough via scsi-generic to libiscsi
Update iscsi to allow passthrough of SG_IO scsi commands when the iscsi
device is forced to be scsi-generic.
Implement both bdrv_ioctl() and bdrv_aio_ioctl() in the iscsi backend,
emulate the SG_IO ioctl and pass the SCSI commands across to the
iscsi target.
This allows end-to-end passthrough of SCSI all the way from the guest,
to qemu, via scsi-generic, then libiscsi all the way to the iscsi target.
To activate this you need to specify that the iscsi lun should be treated
as a scsi-generic device.
Example:
-device lsi -device scsi-generic,drive=MyISCSI \
-drive file=iscsi://10.1.1.125/iqn.ronnie.test/1,if=none,id=MyISCSI
Note, you can currently not boot a qemu guest from a scsi device.
Note,
This only works when the host is linux, since the emulation relies on
definitions of SG_IO from the scsi-generic implementation in the
linux kernel.
It should be fairly easy to re-implement some structures similar enough
for non-linux hosts to do the same style of passthrough via a fake
scsi generic layer and libiscsi if need be.
Signed-off-by: Ronnie Sahlberg <ronniesahlberg@gmail.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2012-05-25 13:59:01 +02:00
|
|
|
if (acb->task->xfer_dir == SCSI_XFER_WRITE) {
|
2013-06-23 17:07:08 +02:00
|
|
|
if (acb->ioh->iovec_count == 0) {
|
|
|
|
data.data = acb->ioh->dxferp;
|
|
|
|
data.size = acb->ioh->dxfer_len;
|
|
|
|
} else {
|
|
|
|
scsi_task_set_iov_out(acb->task,
|
|
|
|
(struct scsi_iovec *) acb->ioh->dxferp,
|
|
|
|
acb->ioh->iovec_count);
|
|
|
|
}
|
ISCSI: Add SCSI passthrough via scsi-generic to libiscsi
Update iscsi to allow passthrough of SG_IO scsi commands when the iscsi
device is forced to be scsi-generic.
Implement both bdrv_ioctl() and bdrv_aio_ioctl() in the iscsi backend,
emulate the SG_IO ioctl and pass the SCSI commands across to the
iscsi target.
This allows end-to-end passthrough of SCSI all the way from the guest,
to qemu, via scsi-generic, then libiscsi all the way to the iscsi target.
To activate this you need to specify that the iscsi lun should be treated
as a scsi-generic device.
Example:
-device lsi -device scsi-generic,drive=MyISCSI \
-drive file=iscsi://10.1.1.125/iqn.ronnie.test/1,if=none,id=MyISCSI
Note, you can currently not boot a qemu guest from a scsi device.
Note,
This only works when the host is linux, since the emulation relies on
definitions of SG_IO from the scsi-generic implementation in the
linux kernel.
It should be fairly easy to re-implement some structures similar enough
for non-linux hosts to do the same style of passthrough via a fake
scsi generic layer and libiscsi if need be.
Signed-off-by: Ronnie Sahlberg <ronniesahlberg@gmail.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2012-05-25 13:59:01 +02:00
|
|
|
}
|
2013-06-23 17:07:08 +02:00
|
|
|
|
ISCSI: Add SCSI passthrough via scsi-generic to libiscsi
Update iscsi to allow passthrough of SG_IO scsi commands when the iscsi
device is forced to be scsi-generic.
Implement both bdrv_ioctl() and bdrv_aio_ioctl() in the iscsi backend,
emulate the SG_IO ioctl and pass the SCSI commands across to the
iscsi target.
This allows end-to-end passthrough of SCSI all the way from the guest,
to qemu, via scsi-generic, then libiscsi all the way to the iscsi target.
To activate this you need to specify that the iscsi lun should be treated
as a scsi-generic device.
Example:
-device lsi -device scsi-generic,drive=MyISCSI \
-drive file=iscsi://10.1.1.125/iqn.ronnie.test/1,if=none,id=MyISCSI
Note, you can currently not boot a qemu guest from a scsi device.
Note,
This only works when the host is linux, since the emulation relies on
definitions of SG_IO from the scsi-generic implementation in the
linux kernel.
It should be fairly easy to re-implement some structures similar enough
for non-linux hosts to do the same style of passthrough via a fake
scsi generic layer and libiscsi if need be.
Signed-off-by: Ronnie Sahlberg <ronniesahlberg@gmail.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2012-05-25 13:59:01 +02:00
|
|
|
if (iscsi_scsi_command_async(iscsi, iscsilun->lun, acb->task,
|
|
|
|
iscsi_aio_ioctl_cb,
|
2013-06-23 17:07:08 +02:00
|
|
|
(data.size > 0) ? &data : NULL,
|
ISCSI: Add SCSI passthrough via scsi-generic to libiscsi
Update iscsi to allow passthrough of SG_IO scsi commands when the iscsi
device is forced to be scsi-generic.
Implement both bdrv_ioctl() and bdrv_aio_ioctl() in the iscsi backend,
emulate the SG_IO ioctl and pass the SCSI commands across to the
iscsi target.
This allows end-to-end passthrough of SCSI all the way from the guest,
to qemu, via scsi-generic, then libiscsi all the way to the iscsi target.
To activate this you need to specify that the iscsi lun should be treated
as a scsi-generic device.
Example:
-device lsi -device scsi-generic,drive=MyISCSI \
-drive file=iscsi://10.1.1.125/iqn.ronnie.test/1,if=none,id=MyISCSI
Note, you can currently not boot a qemu guest from a scsi device.
Note,
This only works when the host is linux, since the emulation relies on
definitions of SG_IO from the scsi-generic implementation in the
linux kernel.
It should be fairly easy to re-implement some structures similar enough
for non-linux hosts to do the same style of passthrough via a fake
scsi generic layer and libiscsi if need be.
Signed-off-by: Ronnie Sahlberg <ronniesahlberg@gmail.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2012-05-25 13:59:01 +02:00
|
|
|
acb) != 0) {
|
2017-02-22 19:07:25 +01:00
|
|
|
qemu_mutex_unlock(&iscsilun->mutex);
|
ISCSI: Add SCSI passthrough via scsi-generic to libiscsi
Update iscsi to allow passthrough of SG_IO scsi commands when the iscsi
device is forced to be scsi-generic.
Implement both bdrv_ioctl() and bdrv_aio_ioctl() in the iscsi backend,
emulate the SG_IO ioctl and pass the SCSI commands across to the
iscsi target.
This allows end-to-end passthrough of SCSI all the way from the guest,
to qemu, via scsi-generic, then libiscsi all the way to the iscsi target.
To activate this you need to specify that the iscsi lun should be treated
as a scsi-generic device.
Example:
-device lsi -device scsi-generic,drive=MyISCSI \
-drive file=iscsi://10.1.1.125/iqn.ronnie.test/1,if=none,id=MyISCSI
Note, you can currently not boot a qemu guest from a scsi device.
Note,
This only works when the host is linux, since the emulation relies on
definitions of SG_IO from the scsi-generic implementation in the
linux kernel.
It should be fairly easy to re-implement some structures similar enough
for non-linux hosts to do the same style of passthrough via a fake
scsi generic layer and libiscsi if need be.
Signed-off-by: Ronnie Sahlberg <ronniesahlberg@gmail.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2012-05-25 13:59:01 +02:00
|
|
|
scsi_free_scsi_task(acb->task);
|
2014-09-11 07:41:28 +02:00
|
|
|
qemu_aio_unref(acb);
|
ISCSI: Add SCSI passthrough via scsi-generic to libiscsi
Update iscsi to allow passthrough of SG_IO scsi commands when the iscsi
device is forced to be scsi-generic.
Implement both bdrv_ioctl() and bdrv_aio_ioctl() in the iscsi backend,
emulate the SG_IO ioctl and pass the SCSI commands across to the
iscsi target.
This allows end-to-end passthrough of SCSI all the way from the guest,
to qemu, via scsi-generic, then libiscsi all the way to the iscsi target.
To activate this you need to specify that the iscsi lun should be treated
as a scsi-generic device.
Example:
-device lsi -device scsi-generic,drive=MyISCSI \
-drive file=iscsi://10.1.1.125/iqn.ronnie.test/1,if=none,id=MyISCSI
Note, you can currently not boot a qemu guest from a scsi device.
Note,
This only works when the host is linux, since the emulation relies on
definitions of SG_IO from the scsi-generic implementation in the
linux kernel.
It should be fairly easy to re-implement some structures similar enough
for non-linux hosts to do the same style of passthrough via a fake
scsi generic layer and libiscsi if need be.
Signed-off-by: Ronnie Sahlberg <ronniesahlberg@gmail.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2012-05-25 13:59:01 +02:00
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* tell libiscsi to read straight into the buffer we got from ioctl */
|
|
|
|
if (acb->task->xfer_dir == SCSI_XFER_READ) {
|
2013-06-23 17:07:08 +02:00
|
|
|
if (acb->ioh->iovec_count == 0) {
|
|
|
|
scsi_task_add_data_in_buffer(acb->task,
|
|
|
|
acb->ioh->dxfer_len,
|
|
|
|
acb->ioh->dxferp);
|
|
|
|
} else {
|
|
|
|
scsi_task_set_iov_in(acb->task,
|
|
|
|
(struct scsi_iovec *) acb->ioh->dxferp,
|
|
|
|
acb->ioh->iovec_count);
|
|
|
|
}
|
ISCSI: Add SCSI passthrough via scsi-generic to libiscsi
Update iscsi to allow passthrough of SG_IO scsi commands when the iscsi
device is forced to be scsi-generic.
Implement both bdrv_ioctl() and bdrv_aio_ioctl() in the iscsi backend,
emulate the SG_IO ioctl and pass the SCSI commands across to the
iscsi target.
This allows end-to-end passthrough of SCSI all the way from the guest,
to qemu, via scsi-generic, then libiscsi all the way to the iscsi target.
To activate this you need to specify that the iscsi lun should be treated
as a scsi-generic device.
Example:
-device lsi -device scsi-generic,drive=MyISCSI \
-drive file=iscsi://10.1.1.125/iqn.ronnie.test/1,if=none,id=MyISCSI
Note, you can currently not boot a qemu guest from a scsi device.
Note,
This only works when the host is linux, since the emulation relies on
definitions of SG_IO from the scsi-generic implementation in the
linux kernel.
It should be fairly easy to re-implement some structures similar enough
for non-linux hosts to do the same style of passthrough via a fake
scsi generic layer and libiscsi if need be.
Signed-off-by: Ronnie Sahlberg <ronniesahlberg@gmail.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2012-05-25 13:59:01 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
iscsi_set_events(iscsilun);
|
2017-02-22 19:07:25 +01:00
|
|
|
qemu_mutex_unlock(&iscsilun->mutex);
|
ISCSI: Add SCSI passthrough via scsi-generic to libiscsi
Update iscsi to allow passthrough of SG_IO scsi commands when the iscsi
device is forced to be scsi-generic.
Implement both bdrv_ioctl() and bdrv_aio_ioctl() in the iscsi backend,
emulate the SG_IO ioctl and pass the SCSI commands across to the
iscsi target.
This allows end-to-end passthrough of SCSI all the way from the guest,
to qemu, via scsi-generic, then libiscsi all the way to the iscsi target.
To activate this you need to specify that the iscsi lun should be treated
as a scsi-generic device.
Example:
-device lsi -device scsi-generic,drive=MyISCSI \
-drive file=iscsi://10.1.1.125/iqn.ronnie.test/1,if=none,id=MyISCSI
Note, you can currently not boot a qemu guest from a scsi device.
Note,
This only works when the host is linux, since the emulation relies on
definitions of SG_IO from the scsi-generic implementation in the
linux kernel.
It should be fairly easy to re-implement some structures similar enough
for non-linux hosts to do the same style of passthrough via a fake
scsi generic layer and libiscsi if need be.
Signed-off-by: Ronnie Sahlberg <ronniesahlberg@gmail.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2012-05-25 13:59:01 +02:00
|
|
|
|
|
|
|
return &acb->common;
|
|
|
|
}
|
|
|
|
|
|
|
|
#endif
|
|
|
|
|
2023-01-13 21:42:04 +01:00
|
|
|
static int64_t coroutine_fn
|
|
|
|
iscsi_co_getlength(BlockDriverState *bs)
|
2011-10-25 10:24:24 +02:00
|
|
|
{
|
|
|
|
IscsiLun *iscsilun = bs->opaque;
|
|
|
|
int64_t len;
|
|
|
|
|
|
|
|
len = iscsilun->num_blocks;
|
|
|
|
len *= iscsilun->block_size;
|
|
|
|
|
|
|
|
return len;
|
|
|
|
}
|
|
|
|
|
2013-07-19 09:19:41 +02:00
|
|
|
static int
|
block: use int64_t instead of int in driver discard handlers
We are generally moving to int64_t for both offset and bytes parameters
on all io paths.
Main motivation is realization of 64-bit write_zeroes operation for
fast zeroing large disk chunks, up to the whole disk.
We chose signed type, to be consistent with off_t (which is signed) and
with possibility for signed return type (where negative value means
error).
So, convert driver discard handlers bytes parameter to int64_t.
The only caller of all updated function is bdrv_co_pdiscard in
block/io.c. It is already prepared to work with 64bit requests, but
pass at most max(bs->bl.max_pdiscard, INT_MAX) to the driver.
Let's look at all updated functions:
blkdebug: all calculations are still OK, thanks to
bdrv_check_qiov_request().
both rule_check and bdrv_co_pdiscard are 64bit
blklogwrites: pass to blk_loc_writes_co_log which is 64bit
blkreplay, copy-on-read, filter-compress: pass to bdrv_co_pdiscard, OK
copy-before-write: pass to bdrv_co_pdiscard which is 64bit and to
cbw_do_copy_before_write which is 64bit
file-posix: one handler calls raw_account_discard() is 64bit and both
handlers calls raw_do_pdiscard(). Update raw_do_pdiscard, which pass
to RawPosixAIOData::aio_nbytes, which is 64bit (and calls
raw_account_discard())
gluster: somehow, third argument of glfs_discard_async is size_t.
Let's set max_pdiscard accordingly.
iscsi: iscsi_allocmap_set_invalid is 64bit,
!is_byte_request_lun_aligned is 64bit.
list.num is uint32_t. Let's clarify max_pdiscard and
pdiscard_alignment.
mirror_top: pass to bdrv_mirror_top_do_write() which is
64bit
nbd: protocol limitation. max_pdiscard is alredy set strict enough,
keep it as is for now.
nvme: buf.nlb is uint32_t and we do shift. So, add corresponding limits
to nvme_refresh_limits().
preallocate: pass to bdrv_co_pdiscard() which is 64bit.
rbd: pass to qemu_rbd_start_co() which is 64bit.
qcow2: calculations are still OK, thanks to bdrv_check_qiov_request(),
qcow2_cluster_discard() is 64bit.
raw-format: raw_adjust_offset() is 64bit, bdrv_co_pdiscard too.
throttle: pass to bdrv_co_pdiscard() which is 64bit and to
throttle_group_co_io_limits_intercept() which is 64bit as well.
test-block-iothread: bytes argument is unused
Great! Now all drivers are prepared to handle 64bit discard requests,
or else have explicit max_pdiscard limits.
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Message-Id: <20210903102807.27127-11-vsementsov@virtuozzo.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Eric Blake <eblake@redhat.com>
2021-09-03 12:28:06 +02:00
|
|
|
coroutine_fn iscsi_co_pdiscard(BlockDriverState *bs, int64_t offset,
|
|
|
|
int64_t bytes)
|
2013-07-19 09:19:41 +02:00
|
|
|
{
|
|
|
|
IscsiLun *iscsilun = bs->opaque;
|
|
|
|
struct IscsiTask iTask;
|
|
|
|
struct unmap_list list;
|
2017-02-22 19:07:25 +01:00
|
|
|
int r = 0;
|
2013-07-19 09:19:41 +02:00
|
|
|
|
2017-06-09 12:18:08 +02:00
|
|
|
if (!is_byte_request_lun_aligned(offset, bytes, iscsilun)) {
|
2016-11-17 21:13:57 +01:00
|
|
|
return -ENOTSUP;
|
|
|
|
}
|
2013-07-19 09:19:41 +02:00
|
|
|
|
|
|
|
if (!iscsilun->lbp.lbpu) {
|
|
|
|
/* UNMAP is not supported by the target */
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
block: use int64_t instead of int in driver discard handlers
We are generally moving to int64_t for both offset and bytes parameters
on all io paths.
Main motivation is realization of 64-bit write_zeroes operation for
fast zeroing large disk chunks, up to the whole disk.
We chose signed type, to be consistent with off_t (which is signed) and
with possibility for signed return type (where negative value means
error).
So, convert driver discard handlers bytes parameter to int64_t.
The only caller of all updated function is bdrv_co_pdiscard in
block/io.c. It is already prepared to work with 64bit requests, but
pass at most max(bs->bl.max_pdiscard, INT_MAX) to the driver.
Let's look at all updated functions:
blkdebug: all calculations are still OK, thanks to
bdrv_check_qiov_request().
both rule_check and bdrv_co_pdiscard are 64bit
blklogwrites: pass to blk_loc_writes_co_log which is 64bit
blkreplay, copy-on-read, filter-compress: pass to bdrv_co_pdiscard, OK
copy-before-write: pass to bdrv_co_pdiscard which is 64bit and to
cbw_do_copy_before_write which is 64bit
file-posix: one handler calls raw_account_discard() is 64bit and both
handlers calls raw_do_pdiscard(). Update raw_do_pdiscard, which pass
to RawPosixAIOData::aio_nbytes, which is 64bit (and calls
raw_account_discard())
gluster: somehow, third argument of glfs_discard_async is size_t.
Let's set max_pdiscard accordingly.
iscsi: iscsi_allocmap_set_invalid is 64bit,
!is_byte_request_lun_aligned is 64bit.
list.num is uint32_t. Let's clarify max_pdiscard and
pdiscard_alignment.
mirror_top: pass to bdrv_mirror_top_do_write() which is
64bit
nbd: protocol limitation. max_pdiscard is alredy set strict enough,
keep it as is for now.
nvme: buf.nlb is uint32_t and we do shift. So, add corresponding limits
to nvme_refresh_limits().
preallocate: pass to bdrv_co_pdiscard() which is 64bit.
rbd: pass to qemu_rbd_start_co() which is 64bit.
qcow2: calculations are still OK, thanks to bdrv_check_qiov_request(),
qcow2_cluster_discard() is 64bit.
raw-format: raw_adjust_offset() is 64bit, bdrv_co_pdiscard too.
throttle: pass to bdrv_co_pdiscard() which is 64bit and to
throttle_group_co_io_limits_intercept() which is 64bit as well.
test-block-iothread: bytes argument is unused
Great! Now all drivers are prepared to handle 64bit discard requests,
or else have explicit max_pdiscard limits.
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Message-Id: <20210903102807.27127-11-vsementsov@virtuozzo.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Eric Blake <eblake@redhat.com>
2021-09-03 12:28:06 +02:00
|
|
|
/*
|
|
|
|
* We don't want to overflow list.num which is uint32_t.
|
|
|
|
* We rely on our max_pdiscard.
|
|
|
|
*/
|
|
|
|
assert(bytes / iscsilun->block_size <= UINT32_MAX);
|
|
|
|
|
2016-07-16 01:23:01 +02:00
|
|
|
list.lba = offset / iscsilun->block_size;
|
2017-06-09 12:18:08 +02:00
|
|
|
list.num = bytes / iscsilun->block_size;
|
2013-07-19 09:19:41 +02:00
|
|
|
|
2013-10-24 12:07:01 +02:00
|
|
|
iscsi_co_init_iscsitask(iscsilun, &iTask);
|
2017-02-22 19:07:25 +01:00
|
|
|
qemu_mutex_lock(&iscsilun->mutex);
|
2013-07-19 09:19:41 +02:00
|
|
|
retry:
|
2013-10-24 12:07:01 +02:00
|
|
|
if (iscsi_unmap_task(iscsilun->iscsi, iscsilun->lun, 0, 0, &list, 1,
|
2016-07-16 01:23:01 +02:00
|
|
|
iscsi_co_generic_cb, &iTask) == NULL) {
|
2017-02-22 19:07:25 +01:00
|
|
|
r = -ENOMEM;
|
|
|
|
goto out_unlock;
|
2013-10-24 12:07:01 +02:00
|
|
|
}
|
2013-07-19 09:19:41 +02:00
|
|
|
|
2018-06-01 11:26:45 +02:00
|
|
|
iscsi_co_wait_for_task(&iTask, iscsilun);
|
2013-07-19 09:19:41 +02:00
|
|
|
|
2013-10-24 12:07:01 +02:00
|
|
|
if (iTask.task != NULL) {
|
|
|
|
scsi_free_scsi_task(iTask.task);
|
|
|
|
iTask.task = NULL;
|
|
|
|
}
|
2013-07-19 09:19:41 +02:00
|
|
|
|
2013-10-24 12:07:01 +02:00
|
|
|
if (iTask.do_retry) {
|
2014-02-18 13:08:39 +01:00
|
|
|
iTask.complete = 0;
|
2013-10-24 12:07:01 +02:00
|
|
|
goto retry;
|
|
|
|
}
|
2013-07-19 09:19:41 +02:00
|
|
|
|
2018-02-13 21:26:47 +01:00
|
|
|
iscsi_allocmap_set_invalid(iscsilun, offset, bytes);
|
2017-12-08 12:51:07 +01:00
|
|
|
|
2013-10-24 12:07:01 +02:00
|
|
|
if (iTask.status == SCSI_STATUS_CHECK_CONDITION) {
|
|
|
|
/* the target might fail with a check condition if it
|
|
|
|
is not happy with the alignment of the UNMAP request
|
|
|
|
we silently fail in this case */
|
2017-02-22 19:07:25 +01:00
|
|
|
goto out_unlock;
|
2013-10-24 12:07:01 +02:00
|
|
|
}
|
2013-07-19 09:19:41 +02:00
|
|
|
|
2013-10-24 12:07:01 +02:00
|
|
|
if (iTask.status != SCSI_STATUS_GOOD) {
|
2017-12-08 12:51:08 +01:00
|
|
|
error_report("iSCSI UNMAP failed at lba %" PRIu64 ": %s",
|
|
|
|
list.lba, iTask.err_str);
|
2017-02-22 19:07:25 +01:00
|
|
|
r = iTask.err_code;
|
|
|
|
goto out_unlock;
|
2013-07-19 09:19:41 +02:00
|
|
|
}
|
|
|
|
|
2017-02-22 19:07:25 +01:00
|
|
|
out_unlock:
|
|
|
|
qemu_mutex_unlock(&iscsilun->mutex);
|
2017-12-08 12:51:08 +01:00
|
|
|
g_free(iTask.err_str);
|
2017-02-22 19:07:25 +01:00
|
|
|
return r;
|
2013-07-19 09:19:41 +02:00
|
|
|
}
|
|
|
|
|
2013-10-24 12:07:02 +02:00
|
|
|
static int
|
2016-06-01 23:10:05 +02:00
|
|
|
coroutine_fn iscsi_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset,
|
block: use int64_t instead of int in driver write_zeroes handlers
We are generally moving to int64_t for both offset and bytes parameters
on all io paths.
Main motivation is realization of 64-bit write_zeroes operation for
fast zeroing large disk chunks, up to the whole disk.
We chose signed type, to be consistent with off_t (which is signed) and
with possibility for signed return type (where negative value means
error).
So, convert driver write_zeroes handlers bytes parameter to int64_t.
The only caller of all updated function is bdrv_co_do_pwrite_zeroes().
bdrv_co_do_pwrite_zeroes() itself is of course OK with widening of
callee parameter type. Also, bdrv_co_do_pwrite_zeroes()'s
max_write_zeroes is limited to INT_MAX. So, updated functions all are
safe, they will not get "bytes" larger than before.
Still, let's look through all updated functions, and add assertions to
the ones which are actually unprepared to values larger than INT_MAX.
For these drivers also set explicit max_pwrite_zeroes limit.
Let's go:
blkdebug: calculations can't overflow, thanks to
bdrv_check_qiov_request() in generic layer. rule_check() and
bdrv_co_pwrite_zeroes() both have 64bit argument.
blklogwrites: pass to blk_log_writes_co_log() with 64bit argument.
blkreplay, copy-on-read, filter-compress: pass to
bdrv_co_pwrite_zeroes() which is OK
copy-before-write: Calls cbw_do_copy_before_write() and
bdrv_co_pwrite_zeroes, both have 64bit argument.
file-posix: both handler calls raw_do_pwrite_zeroes, which is updated.
In raw_do_pwrite_zeroes() calculations are OK due to
bdrv_check_qiov_request(), bytes go to RawPosixAIOData::aio_nbytes
which is uint64_t.
Check also where that uint64_t gets handed:
handle_aiocb_write_zeroes_block() passes a uint64_t[2] to
ioctl(BLKZEROOUT), handle_aiocb_write_zeroes() calls do_fallocate()
which takes off_t (and we compile to always have 64-bit off_t), as
does handle_aiocb_write_zeroes_unmap. All look safe.
gluster: bytes go to GlusterAIOCB::size which is int64_t and to
glfs_zerofill_async works with off_t.
iscsi: Aha, here we deal with iscsi_writesame16_task() that has
uint32_t num_blocks argument and iscsi_writesame16_task() has
uint16_t argument. Make comments, add assertions and clarify
max_pwrite_zeroes calculation.
iscsi_allocmap_() functions already has int64_t argument
is_byte_request_lun_aligned is simple to update, do it.
mirror_top: pass to bdrv_mirror_top_do_write which has uint64_t
argument
nbd: Aha, here we have protocol limitation, and NBDRequest::len is
uint32_t. max_pwrite_zeroes is cleanly set to 32bit value, so we are
OK for now.
nvme: Again, protocol limitation. And no inherent limit for
write-zeroes at all. But from code that calculates cdw12 it's obvious
that we do have limit and alignment. Let's clarify it. Also,
obviously the code is not prepared to handle bytes=0. Let's handle
this case too.
trace events already 64bit
preallocate: pass to handle_write() and bdrv_co_pwrite_zeroes(), both
64bit.
rbd: pass to qemu_rbd_start_co() which is 64bit.
qcow2: offset + bytes and alignment still works good (thanks to
bdrv_check_qiov_request()), so tail calculation is OK
qcow2_subcluster_zeroize() has 64bit argument, should be OK
trace events updated
qed: qed_co_request wants int nb_sectors. Also in code we have size_t
used for request length which may be 32bit. So, let's just keep
INT_MAX as a limit (aligning it down to pwrite_zeroes_alignment) and
don't care.
raw-format: Is OK. raw_adjust_offset and bdrv_co_pwrite_zeroes are both
64bit.
throttle: Both throttle_group_co_io_limits_intercept() and
bdrv_co_pwrite_zeroes() are 64bit.
vmdk: pass to vmdk_pwritev which is 64bit
quorum: pass to quorum_co_pwritev() which is 64bit
Hooray!
At this point all block drivers are prepared to support 64bit
write-zero requests, or have explicitly set max_pwrite_zeroes.
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Message-Id: <20210903102807.27127-8-vsementsov@virtuozzo.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
[eblake: use <= rather than < in assertions relying on max_pwrite_zeroes]
Signed-off-by: Eric Blake <eblake@redhat.com>
2021-09-03 12:28:03 +02:00
|
|
|
int64_t bytes, BdrvRequestFlags flags)
|
2013-10-24 12:07:02 +02:00
|
|
|
{
|
|
|
|
IscsiLun *iscsilun = bs->opaque;
|
|
|
|
struct IscsiTask iTask;
|
|
|
|
uint64_t lba;
|
block: use int64_t instead of int in driver write_zeroes handlers
We are generally moving to int64_t for both offset and bytes parameters
on all io paths.
Main motivation is realization of 64-bit write_zeroes operation for
fast zeroing large disk chunks, up to the whole disk.
We chose signed type, to be consistent with off_t (which is signed) and
with possibility for signed return type (where negative value means
error).
So, convert driver write_zeroes handlers bytes parameter to int64_t.
The only caller of all updated function is bdrv_co_do_pwrite_zeroes().
bdrv_co_do_pwrite_zeroes() itself is of course OK with widening of
callee parameter type. Also, bdrv_co_do_pwrite_zeroes()'s
max_write_zeroes is limited to INT_MAX. So, updated functions all are
safe, they will not get "bytes" larger than before.
Still, let's look through all updated functions, and add assertions to
the ones which are actually unprepared to values larger than INT_MAX.
For these drivers also set explicit max_pwrite_zeroes limit.
Let's go:
blkdebug: calculations can't overflow, thanks to
bdrv_check_qiov_request() in generic layer. rule_check() and
bdrv_co_pwrite_zeroes() both have 64bit argument.
blklogwrites: pass to blk_log_writes_co_log() with 64bit argument.
blkreplay, copy-on-read, filter-compress: pass to
bdrv_co_pwrite_zeroes() which is OK
copy-before-write: Calls cbw_do_copy_before_write() and
bdrv_co_pwrite_zeroes, both have 64bit argument.
file-posix: both handler calls raw_do_pwrite_zeroes, which is updated.
In raw_do_pwrite_zeroes() calculations are OK due to
bdrv_check_qiov_request(), bytes go to RawPosixAIOData::aio_nbytes
which is uint64_t.
Check also where that uint64_t gets handed:
handle_aiocb_write_zeroes_block() passes a uint64_t[2] to
ioctl(BLKZEROOUT), handle_aiocb_write_zeroes() calls do_fallocate()
which takes off_t (and we compile to always have 64-bit off_t), as
does handle_aiocb_write_zeroes_unmap. All look safe.
gluster: bytes go to GlusterAIOCB::size which is int64_t and to
glfs_zerofill_async works with off_t.
iscsi: Aha, here we deal with iscsi_writesame16_task() that has
uint32_t num_blocks argument and iscsi_writesame16_task() has
uint16_t argument. Make comments, add assertions and clarify
max_pwrite_zeroes calculation.
iscsi_allocmap_() functions already has int64_t argument
is_byte_request_lun_aligned is simple to update, do it.
mirror_top: pass to bdrv_mirror_top_do_write which has uint64_t
argument
nbd: Aha, here we have protocol limitation, and NBDRequest::len is
uint32_t. max_pwrite_zeroes is cleanly set to 32bit value, so we are
OK for now.
nvme: Again, protocol limitation. And no inherent limit for
write-zeroes at all. But from code that calculates cdw12 it's obvious
that we do have limit and alignment. Let's clarify it. Also,
obviously the code is not prepared to handle bytes=0. Let's handle
this case too.
trace events already 64bit
preallocate: pass to handle_write() and bdrv_co_pwrite_zeroes(), both
64bit.
rbd: pass to qemu_rbd_start_co() which is 64bit.
qcow2: offset + bytes and alignment still works good (thanks to
bdrv_check_qiov_request()), so tail calculation is OK
qcow2_subcluster_zeroize() has 64bit argument, should be OK
trace events updated
qed: qed_co_request wants int nb_sectors. Also in code we have size_t
used for request length which may be 32bit. So, let's just keep
INT_MAX as a limit (aligning it down to pwrite_zeroes_alignment) and
don't care.
raw-format: Is OK. raw_adjust_offset and bdrv_co_pwrite_zeroes are both
64bit.
throttle: Both throttle_group_co_io_limits_intercept() and
bdrv_co_pwrite_zeroes() are 64bit.
vmdk: pass to vmdk_pwritev which is 64bit
quorum: pass to quorum_co_pwritev() which is 64bit
Hooray!
At this point all block drivers are prepared to support 64bit
write-zero requests, or have explicitly set max_pwrite_zeroes.
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Message-Id: <20210903102807.27127-8-vsementsov@virtuozzo.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
[eblake: use <= rather than < in assertions relying on max_pwrite_zeroes]
Signed-off-by: Eric Blake <eblake@redhat.com>
2021-09-03 12:28:03 +02:00
|
|
|
uint64_t nb_blocks;
|
2014-06-04 15:47:39 +02:00
|
|
|
bool use_16_for_ws = iscsilun->use_16_for_rw;
|
2017-02-22 19:07:25 +01:00
|
|
|
int r = 0;
|
2013-10-24 12:07:02 +02:00
|
|
|
|
2017-06-09 12:18:08 +02:00
|
|
|
if (!is_byte_request_lun_aligned(offset, bytes, iscsilun)) {
|
2016-06-01 23:10:05 +02:00
|
|
|
return -ENOTSUP;
|
2013-10-24 12:07:02 +02:00
|
|
|
}
|
|
|
|
|
2014-06-04 15:47:39 +02:00
|
|
|
if (flags & BDRV_REQ_MAY_UNMAP) {
|
|
|
|
if (!use_16_for_ws && !iscsilun->lbp.lbpws10) {
|
|
|
|
/* WRITESAME10 with UNMAP is unsupported try WRITESAME16 */
|
|
|
|
use_16_for_ws = true;
|
|
|
|
}
|
|
|
|
if (use_16_for_ws && !iscsilun->lbp.lbpws) {
|
|
|
|
/* WRITESAME16 with UNMAP is not supported by the target,
|
|
|
|
* fall back and try WRITESAME10/16 without UNMAP */
|
|
|
|
flags &= ~BDRV_REQ_MAY_UNMAP;
|
|
|
|
use_16_for_ws = iscsilun->use_16_for_rw;
|
|
|
|
}
|
2013-11-22 13:39:54 +01:00
|
|
|
}
|
|
|
|
|
2014-04-28 13:23:25 +02:00
|
|
|
if (!(flags & BDRV_REQ_MAY_UNMAP) && !iscsilun->has_write_same) {
|
2014-06-04 15:47:39 +02:00
|
|
|
/* WRITESAME without UNMAP is not supported by the target */
|
2013-10-24 12:07:02 +02:00
|
|
|
return -ENOTSUP;
|
|
|
|
}
|
|
|
|
|
2016-06-01 23:10:05 +02:00
|
|
|
lba = offset / iscsilun->block_size;
|
2017-06-09 12:18:08 +02:00
|
|
|
nb_blocks = bytes / iscsilun->block_size;
|
2013-10-24 12:07:02 +02:00
|
|
|
|
|
|
|
if (iscsilun->zeroblock == NULL) {
|
2014-05-20 13:30:49 +02:00
|
|
|
iscsilun->zeroblock = g_try_malloc0(iscsilun->block_size);
|
|
|
|
if (iscsilun->zeroblock == NULL) {
|
|
|
|
return -ENOMEM;
|
|
|
|
}
|
2013-10-24 12:07:02 +02:00
|
|
|
}
|
|
|
|
|
2017-02-22 19:07:25 +01:00
|
|
|
qemu_mutex_lock(&iscsilun->mutex);
|
2013-10-24 12:07:02 +02:00
|
|
|
iscsi_co_init_iscsitask(iscsilun, &iTask);
|
|
|
|
retry:
|
2014-06-04 15:47:39 +02:00
|
|
|
if (use_16_for_ws) {
|
block: use int64_t instead of int in driver write_zeroes handlers
We are generally moving to int64_t for both offset and bytes parameters
on all io paths.
Main motivation is realization of 64-bit write_zeroes operation for
fast zeroing large disk chunks, up to the whole disk.
We chose signed type, to be consistent with off_t (which is signed) and
with possibility for signed return type (where negative value means
error).
So, convert driver write_zeroes handlers bytes parameter to int64_t.
The only caller of all updated function is bdrv_co_do_pwrite_zeroes().
bdrv_co_do_pwrite_zeroes() itself is of course OK with widening of
callee parameter type. Also, bdrv_co_do_pwrite_zeroes()'s
max_write_zeroes is limited to INT_MAX. So, updated functions all are
safe, they will not get "bytes" larger than before.
Still, let's look through all updated functions, and add assertions to
the ones which are actually unprepared to values larger than INT_MAX.
For these drivers also set explicit max_pwrite_zeroes limit.
Let's go:
blkdebug: calculations can't overflow, thanks to
bdrv_check_qiov_request() in generic layer. rule_check() and
bdrv_co_pwrite_zeroes() both have 64bit argument.
blklogwrites: pass to blk_log_writes_co_log() with 64bit argument.
blkreplay, copy-on-read, filter-compress: pass to
bdrv_co_pwrite_zeroes() which is OK
copy-before-write: Calls cbw_do_copy_before_write() and
bdrv_co_pwrite_zeroes, both have 64bit argument.
file-posix: both handler calls raw_do_pwrite_zeroes, which is updated.
In raw_do_pwrite_zeroes() calculations are OK due to
bdrv_check_qiov_request(), bytes go to RawPosixAIOData::aio_nbytes
which is uint64_t.
Check also where that uint64_t gets handed:
handle_aiocb_write_zeroes_block() passes a uint64_t[2] to
ioctl(BLKZEROOUT), handle_aiocb_write_zeroes() calls do_fallocate()
which takes off_t (and we compile to always have 64-bit off_t), as
does handle_aiocb_write_zeroes_unmap. All look safe.
gluster: bytes go to GlusterAIOCB::size which is int64_t and to
glfs_zerofill_async works with off_t.
iscsi: Aha, here we deal with iscsi_writesame16_task() that has
uint32_t num_blocks argument and iscsi_writesame16_task() has
uint16_t argument. Make comments, add assertions and clarify
max_pwrite_zeroes calculation.
iscsi_allocmap_() functions already has int64_t argument
is_byte_request_lun_aligned is simple to update, do it.
mirror_top: pass to bdrv_mirror_top_do_write which has uint64_t
argument
nbd: Aha, here we have protocol limitation, and NBDRequest::len is
uint32_t. max_pwrite_zeroes is cleanly set to 32bit value, so we are
OK for now.
nvme: Again, protocol limitation. And no inherent limit for
write-zeroes at all. But from code that calculates cdw12 it's obvious
that we do have limit and alignment. Let's clarify it. Also,
obviously the code is not prepared to handle bytes=0. Let's handle
this case too.
trace events already 64bit
preallocate: pass to handle_write() and bdrv_co_pwrite_zeroes(), both
64bit.
rbd: pass to qemu_rbd_start_co() which is 64bit.
qcow2: offset + bytes and alignment still works good (thanks to
bdrv_check_qiov_request()), so tail calculation is OK
qcow2_subcluster_zeroize() has 64bit argument, should be OK
trace events updated
qed: qed_co_request wants int nb_sectors. Also in code we have size_t
used for request length which may be 32bit. So, let's just keep
INT_MAX as a limit (aligning it down to pwrite_zeroes_alignment) and
don't care.
raw-format: Is OK. raw_adjust_offset and bdrv_co_pwrite_zeroes are both
64bit.
throttle: Both throttle_group_co_io_limits_intercept() and
bdrv_co_pwrite_zeroes() are 64bit.
vmdk: pass to vmdk_pwritev which is 64bit
quorum: pass to quorum_co_pwritev() which is 64bit
Hooray!
At this point all block drivers are prepared to support 64bit
write-zero requests, or have explicitly set max_pwrite_zeroes.
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Message-Id: <20210903102807.27127-8-vsementsov@virtuozzo.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
[eblake: use <= rather than < in assertions relying on max_pwrite_zeroes]
Signed-off-by: Eric Blake <eblake@redhat.com>
2021-09-03 12:28:03 +02:00
|
|
|
/*
|
|
|
|
* iscsi_writesame16_task num_blocks argument is uint32_t. We rely here
|
|
|
|
* on our max_pwrite_zeroes limit.
|
|
|
|
*/
|
|
|
|
assert(nb_blocks <= UINT32_MAX);
|
2014-06-04 15:47:39 +02:00
|
|
|
iTask.task = iscsi_writesame16_task(iscsilun->iscsi, iscsilun->lun, lba,
|
|
|
|
iscsilun->zeroblock, iscsilun->block_size,
|
|
|
|
nb_blocks, 0, !!(flags & BDRV_REQ_MAY_UNMAP),
|
|
|
|
0, 0, iscsi_co_generic_cb, &iTask);
|
|
|
|
} else {
|
block: use int64_t instead of int in driver write_zeroes handlers
We are generally moving to int64_t for both offset and bytes parameters
on all io paths.
Main motivation is realization of 64-bit write_zeroes operation for
fast zeroing large disk chunks, up to the whole disk.
We chose signed type, to be consistent with off_t (which is signed) and
with possibility for signed return type (where negative value means
error).
So, convert driver write_zeroes handlers bytes parameter to int64_t.
The only caller of all updated function is bdrv_co_do_pwrite_zeroes().
bdrv_co_do_pwrite_zeroes() itself is of course OK with widening of
callee parameter type. Also, bdrv_co_do_pwrite_zeroes()'s
max_write_zeroes is limited to INT_MAX. So, updated functions all are
safe, they will not get "bytes" larger than before.
Still, let's look through all updated functions, and add assertions to
the ones which are actually unprepared to values larger than INT_MAX.
For these drivers also set explicit max_pwrite_zeroes limit.
Let's go:
blkdebug: calculations can't overflow, thanks to
bdrv_check_qiov_request() in generic layer. rule_check() and
bdrv_co_pwrite_zeroes() both have 64bit argument.
blklogwrites: pass to blk_log_writes_co_log() with 64bit argument.
blkreplay, copy-on-read, filter-compress: pass to
bdrv_co_pwrite_zeroes() which is OK
copy-before-write: Calls cbw_do_copy_before_write() and
bdrv_co_pwrite_zeroes, both have 64bit argument.
file-posix: both handler calls raw_do_pwrite_zeroes, which is updated.
In raw_do_pwrite_zeroes() calculations are OK due to
bdrv_check_qiov_request(), bytes go to RawPosixAIOData::aio_nbytes
which is uint64_t.
Check also where that uint64_t gets handed:
handle_aiocb_write_zeroes_block() passes a uint64_t[2] to
ioctl(BLKZEROOUT), handle_aiocb_write_zeroes() calls do_fallocate()
which takes off_t (and we compile to always have 64-bit off_t), as
does handle_aiocb_write_zeroes_unmap. All look safe.
gluster: bytes go to GlusterAIOCB::size which is int64_t and to
glfs_zerofill_async works with off_t.
iscsi: Aha, here we deal with iscsi_writesame16_task() that has
uint32_t num_blocks argument and iscsi_writesame16_task() has
uint16_t argument. Make comments, add assertions and clarify
max_pwrite_zeroes calculation.
iscsi_allocmap_() functions already has int64_t argument
is_byte_request_lun_aligned is simple to update, do it.
mirror_top: pass to bdrv_mirror_top_do_write which has uint64_t
argument
nbd: Aha, here we have protocol limitation, and NBDRequest::len is
uint32_t. max_pwrite_zeroes is cleanly set to 32bit value, so we are
OK for now.
nvme: Again, protocol limitation. And no inherent limit for
write-zeroes at all. But from code that calculates cdw12 it's obvious
that we do have limit and alignment. Let's clarify it. Also,
obviously the code is not prepared to handle bytes=0. Let's handle
this case too.
trace events already 64bit
preallocate: pass to handle_write() and bdrv_co_pwrite_zeroes(), both
64bit.
rbd: pass to qemu_rbd_start_co() which is 64bit.
qcow2: offset + bytes and alignment still works good (thanks to
bdrv_check_qiov_request()), so tail calculation is OK
qcow2_subcluster_zeroize() has 64bit argument, should be OK
trace events updated
qed: qed_co_request wants int nb_sectors. Also in code we have size_t
used for request length which may be 32bit. So, let's just keep
INT_MAX as a limit (aligning it down to pwrite_zeroes_alignment) and
don't care.
raw-format: Is OK. raw_adjust_offset and bdrv_co_pwrite_zeroes are both
64bit.
throttle: Both throttle_group_co_io_limits_intercept() and
bdrv_co_pwrite_zeroes() are 64bit.
vmdk: pass to vmdk_pwritev which is 64bit
quorum: pass to quorum_co_pwritev() which is 64bit
Hooray!
At this point all block drivers are prepared to support 64bit
write-zero requests, or have explicitly set max_pwrite_zeroes.
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Message-Id: <20210903102807.27127-8-vsementsov@virtuozzo.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
[eblake: use <= rather than < in assertions relying on max_pwrite_zeroes]
Signed-off-by: Eric Blake <eblake@redhat.com>
2021-09-03 12:28:03 +02:00
|
|
|
/*
|
|
|
|
* iscsi_writesame10_task num_blocks argument is uint16_t. We rely here
|
|
|
|
* on our max_pwrite_zeroes limit.
|
|
|
|
*/
|
|
|
|
assert(nb_blocks <= UINT16_MAX);
|
2014-06-04 15:47:39 +02:00
|
|
|
iTask.task = iscsi_writesame10_task(iscsilun->iscsi, iscsilun->lun, lba,
|
|
|
|
iscsilun->zeroblock, iscsilun->block_size,
|
|
|
|
nb_blocks, 0, !!(flags & BDRV_REQ_MAY_UNMAP),
|
|
|
|
0, 0, iscsi_co_generic_cb, &iTask);
|
|
|
|
}
|
|
|
|
if (iTask.task == NULL) {
|
2017-03-03 16:23:36 +01:00
|
|
|
qemu_mutex_unlock(&iscsilun->mutex);
|
2013-12-20 10:02:47 +01:00
|
|
|
return -ENOMEM;
|
2013-10-24 12:07:02 +02:00
|
|
|
}
|
|
|
|
|
2018-06-01 11:26:45 +02:00
|
|
|
iscsi_co_wait_for_task(&iTask, iscsilun);
|
2013-10-24 12:07:02 +02:00
|
|
|
|
2014-02-22 13:17:24 +01:00
|
|
|
if (iTask.status == SCSI_STATUS_CHECK_CONDITION &&
|
|
|
|
iTask.task->sense.key == SCSI_SENSE_ILLEGAL_REQUEST &&
|
2014-04-02 12:12:50 +02:00
|
|
|
(iTask.task->sense.ascq == SCSI_SENSE_ASCQ_INVALID_OPERATION_CODE ||
|
|
|
|
iTask.task->sense.ascq == SCSI_SENSE_ASCQ_INVALID_FIELD_IN_CDB)) {
|
2014-02-22 13:17:24 +01:00
|
|
|
/* WRITE SAME is not supported by the target */
|
|
|
|
iscsilun->has_write_same = false;
|
|
|
|
scsi_free_scsi_task(iTask.task);
|
2017-02-22 19:07:25 +01:00
|
|
|
r = -ENOTSUP;
|
|
|
|
goto out_unlock;
|
2014-02-22 13:17:24 +01:00
|
|
|
}
|
|
|
|
|
2013-10-24 12:07:02 +02:00
|
|
|
if (iTask.task != NULL) {
|
|
|
|
scsi_free_scsi_task(iTask.task);
|
|
|
|
iTask.task = NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (iTask.do_retry) {
|
2014-02-18 13:08:39 +01:00
|
|
|
iTask.complete = 0;
|
2013-10-24 12:07:02 +02:00
|
|
|
goto retry;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (iTask.status != SCSI_STATUS_GOOD) {
|
2018-02-13 21:26:47 +01:00
|
|
|
iscsi_allocmap_set_invalid(iscsilun, offset, bytes);
|
2017-12-08 12:51:08 +01:00
|
|
|
error_report("iSCSI WRITESAME10/16 failed at lba %" PRIu64 ": %s",
|
|
|
|
lba, iTask.err_str);
|
2017-02-22 19:07:25 +01:00
|
|
|
r = iTask.err_code;
|
|
|
|
goto out_unlock;
|
2013-10-24 12:07:02 +02:00
|
|
|
}
|
|
|
|
|
2014-04-28 13:11:32 +02:00
|
|
|
if (flags & BDRV_REQ_MAY_UNMAP) {
|
2018-02-13 21:26:47 +01:00
|
|
|
iscsi_allocmap_set_invalid(iscsilun, offset, bytes);
|
2014-04-28 13:11:32 +02:00
|
|
|
} else {
|
2018-02-13 21:26:47 +01:00
|
|
|
iscsi_allocmap_set_allocated(iscsilun, offset, bytes);
|
2014-04-28 13:11:32 +02:00
|
|
|
}
|
|
|
|
|
2017-02-22 19:07:25 +01:00
|
|
|
out_unlock:
|
|
|
|
qemu_mutex_unlock(&iscsilun->mutex);
|
2017-12-08 12:51:08 +01:00
|
|
|
g_free(iTask.err_str);
|
2017-02-22 19:07:25 +01:00
|
|
|
return r;
|
2013-10-24 12:07:02 +02:00
|
|
|
}
|
|
|
|
|
2016-12-08 14:23:07 +01:00
|
|
|
static void apply_chap(struct iscsi_context *iscsi, QemuOpts *opts,
|
2014-02-17 14:43:53 +01:00
|
|
|
Error **errp)
|
2012-01-25 23:39:02 +01:00
|
|
|
{
|
|
|
|
const char *user = NULL;
|
|
|
|
const char *password = NULL;
|
2016-01-21 15:19:21 +01:00
|
|
|
const char *secretid;
|
|
|
|
char *secret = NULL;
|
2012-01-25 23:39:02 +01:00
|
|
|
|
|
|
|
user = qemu_opt_get(opts, "user");
|
|
|
|
if (!user) {
|
2014-02-17 14:43:53 +01:00
|
|
|
return;
|
2012-01-25 23:39:02 +01:00
|
|
|
}
|
|
|
|
|
2016-01-21 15:19:21 +01:00
|
|
|
secretid = qemu_opt_get(opts, "password-secret");
|
2012-01-25 23:39:02 +01:00
|
|
|
password = qemu_opt_get(opts, "password");
|
2016-01-21 15:19:21 +01:00
|
|
|
if (secretid && password) {
|
|
|
|
error_setg(errp, "'password' and 'password-secret' properties are "
|
|
|
|
"mutually exclusive");
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
if (secretid) {
|
|
|
|
secret = qcrypto_secret_lookup_as_utf8(secretid, errp);
|
|
|
|
if (!secret) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
password = secret;
|
|
|
|
} else if (!password) {
|
2014-02-17 14:43:53 +01:00
|
|
|
error_setg(errp, "CHAP username specified but no password was given");
|
|
|
|
return;
|
2022-12-01 10:08:07 +01:00
|
|
|
} else {
|
|
|
|
warn_report("iSCSI block driver 'password' option is deprecated, "
|
|
|
|
"use 'password-secret' instead");
|
2012-01-25 23:39:02 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
if (iscsi_set_initiator_username_pwd(iscsi, user, password)) {
|
2014-02-17 14:43:53 +01:00
|
|
|
error_setg(errp, "Failed to set initiator username and password");
|
2012-01-25 23:39:02 +01:00
|
|
|
}
|
2016-01-21 15:19:21 +01:00
|
|
|
|
|
|
|
g_free(secret);
|
2012-01-25 23:39:02 +01:00
|
|
|
}
|
|
|
|
|
2016-12-08 14:23:09 +01:00
|
|
|
static void apply_header_digest(struct iscsi_context *iscsi, QemuOpts *opts,
|
2014-02-17 14:43:53 +01:00
|
|
|
Error **errp)
|
2012-01-25 23:39:02 +01:00
|
|
|
{
|
|
|
|
const char *digest = NULL;
|
|
|
|
|
|
|
|
digest = qemu_opt_get(opts, "header-digest");
|
|
|
|
if (!digest) {
|
2016-12-08 14:23:09 +01:00
|
|
|
iscsi_set_header_digest(iscsi, ISCSI_HEADER_DIGEST_NONE_CRC32C);
|
2016-12-08 14:23:11 +01:00
|
|
|
} else if (!strcmp(digest, "crc32c")) {
|
2012-01-25 23:39:02 +01:00
|
|
|
iscsi_set_header_digest(iscsi, ISCSI_HEADER_DIGEST_CRC32C);
|
2016-12-08 14:23:11 +01:00
|
|
|
} else if (!strcmp(digest, "none")) {
|
2012-01-25 23:39:02 +01:00
|
|
|
iscsi_set_header_digest(iscsi, ISCSI_HEADER_DIGEST_NONE);
|
2016-12-08 14:23:11 +01:00
|
|
|
} else if (!strcmp(digest, "crc32c-none")) {
|
2012-01-25 23:39:02 +01:00
|
|
|
iscsi_set_header_digest(iscsi, ISCSI_HEADER_DIGEST_CRC32C_NONE);
|
2016-12-08 14:23:11 +01:00
|
|
|
} else if (!strcmp(digest, "none-crc32c")) {
|
2012-01-25 23:39:02 +01:00
|
|
|
iscsi_set_header_digest(iscsi, ISCSI_HEADER_DIGEST_NONE_CRC32C);
|
|
|
|
} else {
|
2014-02-17 14:43:53 +01:00
|
|
|
error_setg(errp, "Invalid header-digest setting : %s", digest);
|
2012-01-25 23:39:02 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-12-08 14:23:08 +01:00
|
|
|
static char *get_initiator_name(QemuOpts *opts)
|
2012-01-25 23:39:02 +01:00
|
|
|
{
|
2013-08-02 17:02:01 +02:00
|
|
|
const char *name;
|
|
|
|
char *iscsi_name;
|
|
|
|
UuidInfo *uuid_info;
|
2012-01-25 23:39:02 +01:00
|
|
|
|
2016-12-08 14:23:08 +01:00
|
|
|
name = qemu_opt_get(opts, "initiator-name");
|
|
|
|
if (name) {
|
|
|
|
return g_strdup(name);
|
2012-01-25 23:39:02 +01:00
|
|
|
}
|
|
|
|
|
2013-08-02 17:02:01 +02:00
|
|
|
uuid_info = qmp_query_uuid(NULL);
|
|
|
|
if (strcmp(uuid_info->UUID, UUID_NONE) == 0) {
|
|
|
|
name = qemu_get_vm_name();
|
2012-08-06 10:54:41 +02:00
|
|
|
} else {
|
2013-08-02 17:02:01 +02:00
|
|
|
name = uuid_info->UUID;
|
2012-01-25 23:39:02 +01:00
|
|
|
}
|
2013-08-02 17:02:01 +02:00
|
|
|
iscsi_name = g_strdup_printf("iqn.2008-11.org.linux-kvm%s%s",
|
|
|
|
name ? ":" : "", name ? name : "");
|
|
|
|
qapi_free_UuidInfo(uuid_info);
|
|
|
|
return iscsi_name;
|
2012-01-25 23:39:02 +01:00
|
|
|
}
|
|
|
|
|
2012-12-06 10:46:47 +01:00
|
|
|
static void iscsi_nop_timed_event(void *opaque)
|
|
|
|
{
|
|
|
|
IscsiLun *iscsilun = opaque;
|
|
|
|
|
2020-04-04 06:21:08 +02:00
|
|
|
QEMU_LOCK_GUARD(&iscsilun->mutex);
|
2015-06-16 13:45:07 +02:00
|
|
|
if (iscsi_get_nops_in_flight(iscsilun->iscsi) >= MAX_NOP_FAILURES) {
|
2012-12-06 10:46:47 +01:00
|
|
|
error_report("iSCSI: NOP timeout. Reconnecting...");
|
2015-06-16 13:45:07 +02:00
|
|
|
iscsilun->request_timed_out = true;
|
|
|
|
} else if (iscsi_nop_out_async(iscsilun->iscsi, NULL, NULL, 0, NULL) != 0) {
|
2012-12-06 10:46:47 +01:00
|
|
|
error_report("iSCSI: failed to sent NOP-Out. Disabling NOP messages.");
|
2020-04-04 06:21:08 +02:00
|
|
|
return;
|
2012-12-06 10:46:47 +01:00
|
|
|
}
|
|
|
|
|
2013-08-21 17:03:08 +02:00
|
|
|
timer_mod(iscsilun->nop_timer, qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + NOP_INTERVAL);
|
2012-12-06 10:46:47 +01:00
|
|
|
iscsi_set_events(iscsilun);
|
|
|
|
}
|
|
|
|
|
2014-02-17 14:43:53 +01:00
|
|
|
static void iscsi_readcapacity_sync(IscsiLun *iscsilun, Error **errp)
|
2013-02-18 14:50:46 +01:00
|
|
|
{
|
|
|
|
struct scsi_task *task = NULL;
|
|
|
|
struct scsi_readcapacity10 *rc10 = NULL;
|
|
|
|
struct scsi_readcapacity16 *rc16 = NULL;
|
|
|
|
int retries = ISCSI_CMD_RETRIES;
|
|
|
|
|
2013-05-31 13:56:24 +02:00
|
|
|
do {
|
|
|
|
if (task != NULL) {
|
|
|
|
scsi_free_scsi_task(task);
|
|
|
|
task = NULL;
|
2013-02-18 14:50:46 +01:00
|
|
|
}
|
2013-05-31 13:56:24 +02:00
|
|
|
|
|
|
|
switch (iscsilun->type) {
|
|
|
|
case TYPE_DISK:
|
|
|
|
task = iscsi_readcapacity16_sync(iscsilun->iscsi, iscsilun->lun);
|
|
|
|
if (task != NULL && task->status == SCSI_STATUS_GOOD) {
|
|
|
|
rc16 = scsi_datain_unmarshall(task);
|
|
|
|
if (rc16 == NULL) {
|
2014-02-17 14:43:53 +01:00
|
|
|
error_setg(errp, "iSCSI: Failed to unmarshall readcapacity16 data.");
|
2013-05-31 13:56:24 +02:00
|
|
|
} else {
|
|
|
|
iscsilun->block_size = rc16->block_length;
|
|
|
|
iscsilun->num_blocks = rc16->returned_lba + 1;
|
2015-04-16 16:08:26 +02:00
|
|
|
iscsilun->lbpme = !!rc16->lbpme;
|
|
|
|
iscsilun->lbprz = !!rc16->lbprz;
|
2014-06-04 15:47:39 +02:00
|
|
|
iscsilun->use_16_for_rw = (rc16->returned_lba > 0xffffffff);
|
2013-05-31 13:56:24 +02:00
|
|
|
}
|
2015-12-29 04:32:14 +01:00
|
|
|
break;
|
2013-05-31 13:56:24 +02:00
|
|
|
}
|
2015-12-29 04:32:14 +01:00
|
|
|
if (task != NULL && task->status == SCSI_STATUS_CHECK_CONDITION
|
|
|
|
&& task->sense.key == SCSI_SENSE_UNIT_ATTENTION) {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
/* Fall through and try READ CAPACITY(10) instead. */
|
2013-05-31 13:56:24 +02:00
|
|
|
case TYPE_ROM:
|
|
|
|
task = iscsi_readcapacity10_sync(iscsilun->iscsi, iscsilun->lun, 0, 0);
|
|
|
|
if (task != NULL && task->status == SCSI_STATUS_GOOD) {
|
|
|
|
rc10 = scsi_datain_unmarshall(task);
|
|
|
|
if (rc10 == NULL) {
|
2014-02-17 14:43:53 +01:00
|
|
|
error_setg(errp, "iSCSI: Failed to unmarshall readcapacity10 data.");
|
2013-05-31 13:56:24 +02:00
|
|
|
} else {
|
|
|
|
iscsilun->block_size = rc10->block_size;
|
|
|
|
if (rc10->lba == 0) {
|
|
|
|
/* blank disk loaded */
|
|
|
|
iscsilun->num_blocks = 0;
|
|
|
|
} else {
|
|
|
|
iscsilun->num_blocks = rc10->lba + 1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
default:
|
2014-02-17 14:43:53 +01:00
|
|
|
return;
|
2013-02-18 14:50:46 +01:00
|
|
|
}
|
2013-05-31 13:56:24 +02:00
|
|
|
} while (task != NULL && task->status == SCSI_STATUS_CHECK_CONDITION
|
|
|
|
&& task->sense.key == SCSI_SENSE_UNIT_ATTENTION
|
|
|
|
&& retries-- > 0);
|
2013-02-18 14:50:46 +01:00
|
|
|
|
2013-05-31 13:56:24 +02:00
|
|
|
if (task == NULL || task->status != SCSI_STATUS_GOOD) {
|
2015-12-16 05:40:40 +01:00
|
|
|
error_setg(errp, "iSCSI: failed to send readcapacity10/16 command");
|
2015-08-14 13:33:36 +02:00
|
|
|
} else if (!iscsilun->block_size ||
|
|
|
|
iscsilun->block_size % BDRV_SECTOR_SIZE) {
|
|
|
|
error_setg(errp, "iSCSI: the target returned an invalid "
|
|
|
|
"block size of %d.", iscsilun->block_size);
|
2013-05-31 13:56:24 +02:00
|
|
|
}
|
2013-02-18 14:50:46 +01:00
|
|
|
if (task) {
|
|
|
|
scsi_free_scsi_task(task);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-02-17 14:43:52 +01:00
|
|
|
static struct scsi_task *iscsi_do_inquiry(struct iscsi_context *iscsi, int lun,
|
2014-02-17 18:34:08 +01:00
|
|
|
int evpd, int pc, void **inq, Error **errp)
|
2014-02-17 14:43:52 +01:00
|
|
|
{
|
|
|
|
int full_size;
|
|
|
|
struct scsi_task *task = NULL;
|
|
|
|
task = iscsi_inquiry_sync(iscsi, lun, evpd, pc, 64);
|
|
|
|
if (task == NULL || task->status != SCSI_STATUS_GOOD) {
|
|
|
|
goto fail;
|
|
|
|
}
|
|
|
|
full_size = scsi_datain_getfullsize(task);
|
|
|
|
if (full_size > task->datain.size) {
|
|
|
|
scsi_free_scsi_task(task);
|
|
|
|
|
|
|
|
/* we need more data for the full list */
|
|
|
|
task = iscsi_inquiry_sync(iscsi, lun, evpd, pc, full_size);
|
2013-07-19 09:19:39 +02:00
|
|
|
if (task == NULL || task->status != SCSI_STATUS_GOOD) {
|
|
|
|
goto fail;
|
|
|
|
}
|
2014-02-17 14:43:52 +01:00
|
|
|
}
|
2013-07-19 09:19:39 +02:00
|
|
|
|
2014-02-17 18:34:08 +01:00
|
|
|
*inq = scsi_datain_unmarshall(task);
|
|
|
|
if (*inq == NULL) {
|
|
|
|
error_setg(errp, "iSCSI: failed to unmarshall inquiry datain blob");
|
2014-04-25 16:50:35 +02:00
|
|
|
goto fail_with_err;
|
2014-02-17 18:34:08 +01:00
|
|
|
}
|
|
|
|
|
2014-02-17 14:43:52 +01:00
|
|
|
return task;
|
2013-07-19 09:19:39 +02:00
|
|
|
|
|
|
|
fail:
|
2014-04-25 16:50:35 +02:00
|
|
|
error_setg(errp, "iSCSI: Inquiry command failed : %s",
|
|
|
|
iscsi_get_error(iscsi));
|
|
|
|
fail_with_err:
|
2014-02-17 18:34:08 +01:00
|
|
|
if (task != NULL) {
|
2014-02-17 14:43:52 +01:00
|
|
|
scsi_free_scsi_task(task);
|
|
|
|
}
|
|
|
|
return NULL;
|
2013-07-19 09:19:39 +02:00
|
|
|
}
|
|
|
|
|
2014-05-08 16:34:42 +02:00
|
|
|
static void iscsi_detach_aio_context(BlockDriverState *bs)
|
|
|
|
{
|
|
|
|
IscsiLun *iscsilun = bs->opaque;
|
|
|
|
|
2015-10-23 05:08:05 +02:00
|
|
|
aio_set_fd_handler(iscsilun->aio_context, iscsi_get_fd(iscsilun->iscsi),
|
aio-posix: split poll check from ready handler
Adaptive polling measures the execution time of the polling check plus
handlers called when a polled event becomes ready. Handlers can take a
significant amount of time, making it look like polling was running for
a long time when in fact the event handler was running for a long time.
For example, on Linux the io_submit(2) syscall invoked when a virtio-blk
device's virtqueue becomes ready can take 10s of microseconds. This
can exceed the default polling interval (32 microseconds) and cause
adaptive polling to stop polling.
By excluding the handler's execution time from the polling check we make
the adaptive polling calculation more accurate. As a result, the event
loop now stays in polling mode where previously it would have fallen
back to file descriptor monitoring.
The following data was collected with virtio-blk num-queues=2
event_idx=off using an IOThread. Before:
168k IOPS, IOThread syscalls:
9837.115 ( 0.020 ms): IO iothread1/620155 io_submit(ctx_id: 140512552468480, nr: 16, iocbpp: 0x7fcb9f937db0) = 16
9837.158 ( 0.002 ms): IO iothread1/620155 write(fd: 103, buf: 0x556a2ef71b88, count: 8) = 8
9837.161 ( 0.001 ms): IO iothread1/620155 write(fd: 104, buf: 0x556a2ef71b88, count: 8) = 8
9837.163 ( 0.001 ms): IO iothread1/620155 ppoll(ufds: 0x7fcb90002800, nfds: 4, tsp: 0x7fcb9f1342d0, sigsetsize: 8) = 3
9837.164 ( 0.001 ms): IO iothread1/620155 read(fd: 107, buf: 0x7fcb9f939cc0, count: 512) = 8
9837.174 ( 0.001 ms): IO iothread1/620155 read(fd: 105, buf: 0x7fcb9f939cc0, count: 512) = 8
9837.176 ( 0.001 ms): IO iothread1/620155 read(fd: 106, buf: 0x7fcb9f939cc0, count: 512) = 8
9837.209 ( 0.035 ms): IO iothread1/620155 io_submit(ctx_id: 140512552468480, nr: 32, iocbpp: 0x7fca7d0cebe0) = 32
174k IOPS (+3.6%), IOThread syscalls:
9809.566 ( 0.036 ms): IO iothread1/623061 io_submit(ctx_id: 140539805028352, nr: 32, iocbpp: 0x7fd0cdd62be0) = 32
9809.625 ( 0.001 ms): IO iothread1/623061 write(fd: 103, buf: 0x5647cfba5f58, count: 8) = 8
9809.627 ( 0.002 ms): IO iothread1/623061 write(fd: 104, buf: 0x5647cfba5f58, count: 8) = 8
9809.663 ( 0.036 ms): IO iothread1/623061 io_submit(ctx_id: 140539805028352, nr: 32, iocbpp: 0x7fd0d0388b50) = 32
Notice that ppoll(2) and eventfd read(2) syscalls are eliminated because
the IOThread stays in polling mode instead of falling back to file
descriptor monitoring.
As usual, polling is not implemented on Windows so this patch ignores
the new io_poll_read() callback in aio-win32.c.
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Stefano Garzarella <sgarzare@redhat.com>
Message-id: 20211207132336.36627-2-stefanha@redhat.com
[Fixed up aio_set_event_notifier() calls in
tests/unit/test-fdmon-epoll.c added after this series was queued.
--Stefan]
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2021-12-07 14:23:31 +01:00
|
|
|
false, NULL, NULL, NULL, NULL, NULL);
|
2014-05-08 16:34:42 +02:00
|
|
|
iscsilun->events = 0;
|
|
|
|
|
|
|
|
if (iscsilun->nop_timer) {
|
|
|
|
timer_free(iscsilun->nop_timer);
|
|
|
|
iscsilun->nop_timer = NULL;
|
|
|
|
}
|
2015-04-07 22:08:15 +02:00
|
|
|
if (iscsilun->event_timer) {
|
|
|
|
timer_free(iscsilun->event_timer);
|
|
|
|
iscsilun->event_timer = NULL;
|
|
|
|
}
|
2014-05-08 16:34:42 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
static void iscsi_attach_aio_context(BlockDriverState *bs,
|
|
|
|
AioContext *new_context)
|
|
|
|
{
|
|
|
|
IscsiLun *iscsilun = bs->opaque;
|
|
|
|
|
|
|
|
iscsilun->aio_context = new_context;
|
|
|
|
iscsi_set_events(iscsilun);
|
|
|
|
|
|
|
|
/* Set up a timer for sending out iSCSI NOPs */
|
|
|
|
iscsilun->nop_timer = aio_timer_new(iscsilun->aio_context,
|
|
|
|
QEMU_CLOCK_REALTIME, SCALE_MS,
|
|
|
|
iscsi_nop_timed_event, iscsilun);
|
|
|
|
timer_mod(iscsilun->nop_timer,
|
|
|
|
qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + NOP_INTERVAL);
|
2015-04-07 22:08:15 +02:00
|
|
|
|
2015-06-16 13:45:07 +02:00
|
|
|
/* Set up a timer for periodic calls to iscsi_set_events and to
|
|
|
|
* scan for command timeout */
|
2015-04-07 22:08:15 +02:00
|
|
|
iscsilun->event_timer = aio_timer_new(iscsilun->aio_context,
|
|
|
|
QEMU_CLOCK_REALTIME, SCALE_MS,
|
2015-06-16 13:45:07 +02:00
|
|
|
iscsi_timed_check_events, iscsilun);
|
|
|
|
timer_mod(iscsilun->event_timer,
|
|
|
|
qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + EVENT_INTERVAL);
|
2014-05-08 16:34:42 +02:00
|
|
|
}
|
|
|
|
|
2015-04-16 16:08:27 +02:00
|
|
|
static void iscsi_modesense_sync(IscsiLun *iscsilun)
|
2014-10-30 12:23:46 +01:00
|
|
|
{
|
|
|
|
struct scsi_task *task;
|
|
|
|
struct scsi_mode_sense *ms = NULL;
|
2015-04-16 16:08:27 +02:00
|
|
|
iscsilun->write_protected = false;
|
2015-04-16 16:08:28 +02:00
|
|
|
iscsilun->dpofua = false;
|
2014-10-30 12:23:46 +01:00
|
|
|
|
|
|
|
task = iscsi_modesense6_sync(iscsilun->iscsi, iscsilun->lun,
|
|
|
|
1, SCSI_MODESENSE_PC_CURRENT,
|
|
|
|
0x3F, 0, 255);
|
|
|
|
if (task == NULL) {
|
|
|
|
error_report("iSCSI: Failed to send MODE_SENSE(6) command: %s",
|
|
|
|
iscsi_get_error(iscsilun->iscsi));
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (task->status != SCSI_STATUS_GOOD) {
|
|
|
|
error_report("iSCSI: Failed MODE_SENSE(6), LUN assumed writable");
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
ms = scsi_datain_unmarshall(task);
|
|
|
|
if (!ms) {
|
|
|
|
error_report("iSCSI: Failed to unmarshall MODE_SENSE(6) data: %s",
|
|
|
|
iscsi_get_error(iscsilun->iscsi));
|
|
|
|
goto out;
|
|
|
|
}
|
2015-04-16 16:08:27 +02:00
|
|
|
iscsilun->write_protected = ms->device_specific_parameter & 0x80;
|
2015-04-16 16:08:28 +02:00
|
|
|
iscsilun->dpofua = ms->device_specific_parameter & 0x10;
|
2014-10-30 12:23:46 +01:00
|
|
|
|
|
|
|
out:
|
|
|
|
if (task) {
|
|
|
|
scsi_free_scsi_task(task);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-12-08 14:23:07 +01:00
|
|
|
static void iscsi_parse_iscsi_option(const char *target, QDict *options)
|
|
|
|
{
|
|
|
|
QemuOptsList *list;
|
|
|
|
QemuOpts *opts;
|
2016-12-08 14:23:09 +01:00
|
|
|
const char *user, *password, *password_secret, *initiator_name,
|
2016-12-08 14:23:10 +01:00
|
|
|
*header_digest, *timeout;
|
2016-12-08 14:23:07 +01:00
|
|
|
|
|
|
|
list = qemu_find_opts("iscsi");
|
|
|
|
if (!list) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
opts = qemu_opts_find(list, target);
|
|
|
|
if (opts == NULL) {
|
|
|
|
opts = QTAILQ_FIRST(&list->head);
|
|
|
|
if (!opts) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
user = qemu_opt_get(opts, "user");
|
|
|
|
if (user) {
|
|
|
|
qdict_set_default_str(options, "user", user);
|
|
|
|
}
|
|
|
|
|
|
|
|
password = qemu_opt_get(opts, "password");
|
|
|
|
if (password) {
|
|
|
|
qdict_set_default_str(options, "password", password);
|
|
|
|
}
|
|
|
|
|
|
|
|
password_secret = qemu_opt_get(opts, "password-secret");
|
|
|
|
if (password_secret) {
|
|
|
|
qdict_set_default_str(options, "password-secret", password_secret);
|
|
|
|
}
|
2016-12-08 14:23:08 +01:00
|
|
|
|
|
|
|
initiator_name = qemu_opt_get(opts, "initiator-name");
|
|
|
|
if (initiator_name) {
|
|
|
|
qdict_set_default_str(options, "initiator-name", initiator_name);
|
|
|
|
}
|
2016-12-08 14:23:09 +01:00
|
|
|
|
|
|
|
header_digest = qemu_opt_get(opts, "header-digest");
|
|
|
|
if (header_digest) {
|
2016-12-08 14:23:11 +01:00
|
|
|
/* -iscsi takes upper case values, but QAPI only supports lower case
|
|
|
|
* enum constant names, so we have to convert here. */
|
|
|
|
char *qapi_value = g_ascii_strdown(header_digest, -1);
|
|
|
|
qdict_set_default_str(options, "header-digest", qapi_value);
|
|
|
|
g_free(qapi_value);
|
2016-12-08 14:23:09 +01:00
|
|
|
}
|
2016-12-08 14:23:10 +01:00
|
|
|
|
|
|
|
timeout = qemu_opt_get(opts, "timeout");
|
|
|
|
if (timeout) {
|
|
|
|
qdict_set_default_str(options, "timeout", timeout);
|
|
|
|
}
|
2016-12-08 14:23:07 +01:00
|
|
|
}
|
|
|
|
|
2011-10-25 10:24:24 +02:00
|
|
|
/*
|
|
|
|
* We support iscsi url's on the form
|
|
|
|
* iscsi://[<username>%<password>@]<host>[:<port>]/<targetname>/<lun>
|
|
|
|
*/
|
2016-12-08 14:23:06 +01:00
|
|
|
static void iscsi_parse_filename(const char *filename, QDict *options,
|
|
|
|
Error **errp)
|
|
|
|
{
|
|
|
|
struct iscsi_url *iscsi_url;
|
|
|
|
const char *transport_name;
|
|
|
|
char *lun_str;
|
|
|
|
|
|
|
|
iscsi_url = iscsi_parse_full_url(NULL, filename);
|
|
|
|
if (iscsi_url == NULL) {
|
|
|
|
error_setg(errp, "Failed to parse URL : %s", filename);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
#if LIBISCSI_API_VERSION >= (20160603)
|
|
|
|
switch (iscsi_url->transport) {
|
|
|
|
case TCP_TRANSPORT:
|
|
|
|
transport_name = "tcp";
|
|
|
|
break;
|
|
|
|
case ISER_TRANSPORT:
|
|
|
|
transport_name = "iser";
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
error_setg(errp, "Unknown transport type (%d)",
|
|
|
|
iscsi_url->transport);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
#else
|
|
|
|
transport_name = "tcp";
|
|
|
|
#endif
|
|
|
|
|
|
|
|
qdict_set_default_str(options, "transport", transport_name);
|
|
|
|
qdict_set_default_str(options, "portal", iscsi_url->portal);
|
|
|
|
qdict_set_default_str(options, "target", iscsi_url->target);
|
|
|
|
|
|
|
|
lun_str = g_strdup_printf("%d", iscsi_url->lun);
|
|
|
|
qdict_set_default_str(options, "lun", lun_str);
|
|
|
|
g_free(lun_str);
|
|
|
|
|
2016-12-08 14:23:07 +01:00
|
|
|
/* User/password from -iscsi take precedence over those from the URL */
|
|
|
|
iscsi_parse_iscsi_option(iscsi_url->target, options);
|
|
|
|
|
2016-12-08 14:23:06 +01:00
|
|
|
if (iscsi_url->user[0] != '\0') {
|
|
|
|
qdict_set_default_str(options, "user", iscsi_url->user);
|
|
|
|
qdict_set_default_str(options, "password", iscsi_url->passwd);
|
|
|
|
}
|
|
|
|
|
|
|
|
iscsi_destroy_url(iscsi_url);
|
|
|
|
}
|
|
|
|
|
|
|
|
static QemuOptsList runtime_opts = {
|
|
|
|
.name = "iscsi",
|
|
|
|
.head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head),
|
|
|
|
.desc = {
|
|
|
|
{
|
|
|
|
.name = "transport",
|
|
|
|
.type = QEMU_OPT_STRING,
|
|
|
|
},
|
|
|
|
{
|
|
|
|
.name = "portal",
|
|
|
|
.type = QEMU_OPT_STRING,
|
|
|
|
},
|
|
|
|
{
|
|
|
|
.name = "target",
|
|
|
|
.type = QEMU_OPT_STRING,
|
|
|
|
},
|
|
|
|
{
|
|
|
|
.name = "user",
|
|
|
|
.type = QEMU_OPT_STRING,
|
|
|
|
},
|
|
|
|
{
|
|
|
|
.name = "password",
|
|
|
|
.type = QEMU_OPT_STRING,
|
|
|
|
},
|
2016-12-08 14:23:07 +01:00
|
|
|
{
|
|
|
|
.name = "password-secret",
|
|
|
|
.type = QEMU_OPT_STRING,
|
|
|
|
},
|
2016-12-08 14:23:06 +01:00
|
|
|
{
|
|
|
|
.name = "lun",
|
|
|
|
.type = QEMU_OPT_NUMBER,
|
|
|
|
},
|
2016-12-08 14:23:08 +01:00
|
|
|
{
|
|
|
|
.name = "initiator-name",
|
|
|
|
.type = QEMU_OPT_STRING,
|
|
|
|
},
|
2016-12-08 14:23:09 +01:00
|
|
|
{
|
|
|
|
.name = "header-digest",
|
|
|
|
.type = QEMU_OPT_STRING,
|
|
|
|
},
|
2016-12-08 14:23:10 +01:00
|
|
|
{
|
|
|
|
.name = "timeout",
|
|
|
|
.type = QEMU_OPT_NUMBER,
|
|
|
|
},
|
2016-12-08 14:23:06 +01:00
|
|
|
{ /* end of list */ }
|
|
|
|
},
|
|
|
|
};
|
|
|
|
|
2018-06-01 11:26:44 +02:00
|
|
|
static void iscsi_save_designator(IscsiLun *lun,
|
|
|
|
struct scsi_inquiry_device_identification *inq_di)
|
|
|
|
{
|
|
|
|
struct scsi_inquiry_device_designator *desig, *copy = NULL;
|
|
|
|
|
|
|
|
for (desig = inq_di->designators; desig; desig = desig->next) {
|
|
|
|
if (desig->association ||
|
|
|
|
desig->designator_type > SCSI_DESIGNATOR_TYPE_NAA) {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
/* NAA works better than T10 vendor ID based designator. */
|
|
|
|
if (!copy || copy->designator_type < desig->designator_type) {
|
|
|
|
copy = desig;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (copy) {
|
|
|
|
lun->dd = g_new(struct scsi_inquiry_device_designator, 1);
|
|
|
|
*lun->dd = *copy;
|
|
|
|
lun->dd->next = NULL;
|
|
|
|
lun->dd->designator = g_malloc(copy->designator_length);
|
|
|
|
memcpy(lun->dd->designator, copy->designator, copy->designator_length);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2013-09-05 14:22:29 +02:00
|
|
|
static int iscsi_open(BlockDriverState *bs, QDict *options, int flags,
|
|
|
|
Error **errp)
|
2011-10-25 10:24:24 +02:00
|
|
|
{
|
|
|
|
IscsiLun *iscsilun = bs->opaque;
|
|
|
|
struct iscsi_context *iscsi = NULL;
|
2012-11-17 14:37:39 +01:00
|
|
|
struct scsi_task *task = NULL;
|
|
|
|
struct scsi_inquiry_standard *inq = NULL;
|
2014-02-17 18:34:08 +01:00
|
|
|
struct scsi_inquiry_supported_pages *inq_vpd;
|
2012-01-25 23:39:02 +01:00
|
|
|
char *initiator_name = NULL;
|
2013-04-12 17:59:59 +02:00
|
|
|
QemuOpts *opts;
|
|
|
|
Error *local_err = NULL;
|
2018-06-14 21:14:27 +02:00
|
|
|
const char *transport_name, *portal, *target;
|
2016-12-08 14:23:06 +01:00
|
|
|
#if LIBISCSI_API_VERSION >= (20160603)
|
|
|
|
enum iscsi_transport_type transport;
|
|
|
|
#endif
|
|
|
|
int i, ret = 0, timeout = 0, lun;
|
2011-10-25 10:24:24 +02:00
|
|
|
|
2014-01-02 03:49:17 +01:00
|
|
|
opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
|
2020-07-07 18:06:03 +02:00
|
|
|
if (!qemu_opts_absorb_qdict(opts, options, errp)) {
|
2013-04-12 17:59:59 +02:00
|
|
|
ret = -EINVAL;
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
2016-12-08 14:23:06 +01:00
|
|
|
transport_name = qemu_opt_get(opts, "transport");
|
|
|
|
portal = qemu_opt_get(opts, "portal");
|
|
|
|
target = qemu_opt_get(opts, "target");
|
|
|
|
lun = qemu_opt_get_number(opts, "lun", 0);
|
2013-04-12 17:59:59 +02:00
|
|
|
|
2016-12-08 14:23:06 +01:00
|
|
|
if (!transport_name || !portal || !target) {
|
|
|
|
error_setg(errp, "Need all of transport, portal and target options");
|
|
|
|
ret = -EINVAL;
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!strcmp(transport_name, "tcp")) {
|
|
|
|
#if LIBISCSI_API_VERSION >= (20160603)
|
|
|
|
transport = TCP_TRANSPORT;
|
|
|
|
} else if (!strcmp(transport_name, "iser")) {
|
|
|
|
transport = ISER_TRANSPORT;
|
|
|
|
#else
|
|
|
|
/* TCP is what older libiscsi versions always use */
|
|
|
|
#endif
|
|
|
|
} else {
|
|
|
|
error_setg(errp, "Unknown transport: %s", transport_name);
|
2011-10-25 10:24:24 +02:00
|
|
|
ret = -EINVAL;
|
2012-08-06 10:52:22 +02:00
|
|
|
goto out;
|
2011-10-25 10:24:24 +02:00
|
|
|
}
|
|
|
|
|
2012-01-25 23:39:02 +01:00
|
|
|
memset(iscsilun, 0, sizeof(IscsiLun));
|
|
|
|
|
2016-12-08 14:23:08 +01:00
|
|
|
initiator_name = get_initiator_name(opts);
|
2012-01-25 23:39:02 +01:00
|
|
|
|
|
|
|
iscsi = iscsi_create_context(initiator_name);
|
|
|
|
if (iscsi == NULL) {
|
2014-02-17 14:43:53 +01:00
|
|
|
error_setg(errp, "iSCSI: Failed to create iSCSI context.");
|
2012-01-25 23:39:02 +01:00
|
|
|
ret = -ENOMEM;
|
2012-08-06 10:52:22 +02:00
|
|
|
goto out;
|
2012-01-25 23:39:02 +01:00
|
|
|
}
|
2016-10-09 10:14:56 +02:00
|
|
|
#if LIBISCSI_API_VERSION >= (20160603)
|
2016-12-08 14:23:06 +01:00
|
|
|
if (iscsi_init_transport(iscsi, transport)) {
|
2016-10-09 10:14:56 +02:00
|
|
|
error_setg(errp, ("Error initializing transport."));
|
|
|
|
ret = -EINVAL;
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
#endif
|
2016-12-08 14:23:06 +01:00
|
|
|
if (iscsi_set_targetname(iscsi, target)) {
|
2014-02-17 14:43:53 +01:00
|
|
|
error_setg(errp, "iSCSI: Failed to set target name.");
|
2011-10-25 10:24:24 +02:00
|
|
|
ret = -EINVAL;
|
2012-08-06 10:52:22 +02:00
|
|
|
goto out;
|
2011-10-25 10:24:24 +02:00
|
|
|
}
|
|
|
|
|
2012-01-25 23:39:02 +01:00
|
|
|
/* check if we got CHAP username/password via the options */
|
2016-12-08 14:23:07 +01:00
|
|
|
apply_chap(iscsi, opts, &local_err);
|
2014-02-17 14:43:53 +01:00
|
|
|
if (local_err != NULL) {
|
|
|
|
error_propagate(errp, local_err);
|
2012-01-25 23:39:02 +01:00
|
|
|
ret = -EINVAL;
|
2012-08-06 10:52:22 +02:00
|
|
|
goto out;
|
2012-01-25 23:39:02 +01:00
|
|
|
}
|
|
|
|
|
2011-10-25 10:24:24 +02:00
|
|
|
if (iscsi_set_session_type(iscsi, ISCSI_SESSION_NORMAL) != 0) {
|
2014-02-17 14:43:53 +01:00
|
|
|
error_setg(errp, "iSCSI: Failed to set session type to normal.");
|
2011-10-25 10:24:24 +02:00
|
|
|
ret = -EINVAL;
|
2012-08-06 10:52:22 +02:00
|
|
|
goto out;
|
2011-10-25 10:24:24 +02:00
|
|
|
}
|
|
|
|
|
2012-01-25 23:39:02 +01:00
|
|
|
/* check if we got HEADER_DIGEST via the options */
|
2016-12-08 14:23:09 +01:00
|
|
|
apply_header_digest(iscsi, opts, &local_err);
|
2014-02-17 14:43:53 +01:00
|
|
|
if (local_err != NULL) {
|
|
|
|
error_propagate(errp, local_err);
|
|
|
|
ret = -EINVAL;
|
|
|
|
goto out;
|
|
|
|
}
|
2012-01-25 23:39:02 +01:00
|
|
|
|
2015-06-26 12:18:01 +02:00
|
|
|
/* timeout handling is broken in libiscsi before 1.15.0 */
|
2016-12-08 14:23:10 +01:00
|
|
|
timeout = qemu_opt_get_number(opts, "timeout", 0);
|
2016-10-09 10:14:55 +02:00
|
|
|
#if LIBISCSI_API_VERSION >= 20150621
|
2015-06-26 12:18:01 +02:00
|
|
|
iscsi_set_timeout(iscsi, timeout);
|
|
|
|
#else
|
|
|
|
if (timeout) {
|
2018-10-17 10:26:27 +02:00
|
|
|
warn_report("iSCSI: ignoring timeout value for libiscsi <1.15.0");
|
2015-06-26 12:18:01 +02:00
|
|
|
}
|
|
|
|
#endif
|
2015-06-16 13:45:07 +02:00
|
|
|
|
2016-12-08 14:23:06 +01:00
|
|
|
if (iscsi_full_connect_sync(iscsi, portal, lun) != 0) {
|
2014-02-17 14:43:53 +01:00
|
|
|
error_setg(errp, "iSCSI: Failed to connect to LUN : %s",
|
2012-11-17 14:37:39 +01:00
|
|
|
iscsi_get_error(iscsi));
|
|
|
|
ret = -EINVAL;
|
|
|
|
goto out;
|
|
|
|
}
|
2011-10-25 10:24:24 +02:00
|
|
|
|
|
|
|
iscsilun->iscsi = iscsi;
|
2014-05-08 16:34:42 +02:00
|
|
|
iscsilun->aio_context = bdrv_get_aio_context(bs);
|
2016-12-08 14:23:06 +01:00
|
|
|
iscsilun->lun = lun;
|
2014-02-17 18:34:08 +01:00
|
|
|
iscsilun->has_write_same = true;
|
2011-10-25 10:24:24 +02:00
|
|
|
|
2014-02-17 18:34:08 +01:00
|
|
|
task = iscsi_do_inquiry(iscsilun->iscsi, iscsilun->lun, 0, 0,
|
|
|
|
(void **) &inq, errp);
|
|
|
|
if (task == NULL) {
|
2011-10-25 10:24:24 +02:00
|
|
|
ret = -EINVAL;
|
2012-08-06 10:52:22 +02:00
|
|
|
goto out;
|
2011-10-25 10:24:24 +02:00
|
|
|
}
|
2012-11-17 14:37:39 +01:00
|
|
|
iscsilun->type = inq->periperal_device_type;
|
2014-02-17 18:34:08 +01:00
|
|
|
scsi_free_scsi_task(task);
|
|
|
|
task = NULL;
|
2012-11-17 14:37:39 +01:00
|
|
|
|
2015-04-16 16:08:27 +02:00
|
|
|
iscsi_modesense_sync(iscsilun);
|
2016-05-04 00:39:06 +02:00
|
|
|
if (iscsilun->dpofua) {
|
|
|
|
bs->supported_write_flags = BDRV_REQ_FUA;
|
|
|
|
}
|
2015-04-16 16:08:27 +02:00
|
|
|
|
2014-10-30 12:23:46 +01:00
|
|
|
/* Check the write protect flag of the LUN if we want to write */
|
|
|
|
if (iscsilun->type == TYPE_DISK && (flags & BDRV_O_RDWR) &&
|
2015-02-25 05:40:08 +01:00
|
|
|
iscsilun->write_protected) {
|
2018-10-08 17:27:18 +02:00
|
|
|
ret = bdrv_apply_auto_read_only(bs, "LUN is write protected", errp);
|
|
|
|
if (ret < 0) {
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
flags &= ~BDRV_O_RDWR;
|
2014-10-30 12:23:46 +01:00
|
|
|
}
|
|
|
|
|
2014-02-17 14:43:53 +01:00
|
|
|
iscsi_readcapacity_sync(iscsilun, &local_err);
|
|
|
|
if (local_err != NULL) {
|
|
|
|
error_propagate(errp, local_err);
|
2014-04-10 03:33:55 +02:00
|
|
|
ret = -EINVAL;
|
2013-02-18 14:50:46 +01:00
|
|
|
goto out;
|
2012-11-17 14:37:39 +01:00
|
|
|
}
|
2013-07-11 14:16:25 +02:00
|
|
|
bs->total_sectors = sector_lun2qemu(iscsilun->num_blocks, iscsilun);
|
2012-11-17 14:37:39 +01:00
|
|
|
|
2014-03-05 15:45:00 +01:00
|
|
|
/* We don't have any emulation for devices other than disks and CD-ROMs, so
|
|
|
|
* this must be sg ioctl compatible. We force it to be sg, otherwise qemu
|
|
|
|
* will try to read from the device to guess the image format.
|
2012-05-26 06:56:41 +02:00
|
|
|
*/
|
2014-03-05 15:45:00 +01:00
|
|
|
if (iscsilun->type != TYPE_DISK && iscsilun->type != TYPE_ROM) {
|
2016-06-24 00:37:26 +02:00
|
|
|
bs->sg = true;
|
2012-05-26 06:56:41 +02:00
|
|
|
}
|
|
|
|
|
2014-02-17 18:34:08 +01:00
|
|
|
task = iscsi_do_inquiry(iscsilun->iscsi, iscsilun->lun, 1,
|
|
|
|
SCSI_INQUIRY_PAGECODE_SUPPORTED_VPD_PAGES,
|
|
|
|
(void **) &inq_vpd, errp);
|
|
|
|
if (task == NULL) {
|
|
|
|
ret = -EINVAL;
|
|
|
|
goto out;
|
2013-07-19 09:19:39 +02:00
|
|
|
}
|
2014-02-17 18:34:08 +01:00
|
|
|
for (i = 0; i < inq_vpd->num_pages; i++) {
|
|
|
|
struct scsi_task *inq_task;
|
|
|
|
struct scsi_inquiry_logical_block_provisioning *inq_lbp;
|
2013-07-19 09:19:39 +02:00
|
|
|
struct scsi_inquiry_block_limits *inq_bl;
|
2018-06-01 11:26:44 +02:00
|
|
|
struct scsi_inquiry_device_identification *inq_di;
|
2014-02-17 18:34:08 +01:00
|
|
|
switch (inq_vpd->pages[i]) {
|
|
|
|
case SCSI_INQUIRY_PAGECODE_LOGICAL_BLOCK_PROVISIONING:
|
|
|
|
inq_task = iscsi_do_inquiry(iscsilun->iscsi, iscsilun->lun, 1,
|
|
|
|
SCSI_INQUIRY_PAGECODE_LOGICAL_BLOCK_PROVISIONING,
|
|
|
|
(void **) &inq_lbp, errp);
|
|
|
|
if (inq_task == NULL) {
|
|
|
|
ret = -EINVAL;
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
memcpy(&iscsilun->lbp, inq_lbp,
|
|
|
|
sizeof(struct scsi_inquiry_logical_block_provisioning));
|
|
|
|
scsi_free_scsi_task(inq_task);
|
|
|
|
break;
|
|
|
|
case SCSI_INQUIRY_PAGECODE_BLOCK_LIMITS:
|
|
|
|
inq_task = iscsi_do_inquiry(iscsilun->iscsi, iscsilun->lun, 1,
|
|
|
|
SCSI_INQUIRY_PAGECODE_BLOCK_LIMITS,
|
|
|
|
(void **) &inq_bl, errp);
|
|
|
|
if (inq_task == NULL) {
|
|
|
|
ret = -EINVAL;
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
memcpy(&iscsilun->bl, inq_bl,
|
|
|
|
sizeof(struct scsi_inquiry_block_limits));
|
|
|
|
scsi_free_scsi_task(inq_task);
|
|
|
|
break;
|
2018-06-01 11:26:44 +02:00
|
|
|
case SCSI_INQUIRY_PAGECODE_DEVICE_IDENTIFICATION:
|
|
|
|
inq_task = iscsi_do_inquiry(iscsilun->iscsi, iscsilun->lun, 1,
|
|
|
|
SCSI_INQUIRY_PAGECODE_DEVICE_IDENTIFICATION,
|
|
|
|
(void **) &inq_di, errp);
|
|
|
|
if (inq_task == NULL) {
|
|
|
|
ret = -EINVAL;
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
iscsi_save_designator(iscsilun, inq_di);
|
|
|
|
scsi_free_scsi_task(inq_task);
|
|
|
|
break;
|
2014-02-17 18:34:08 +01:00
|
|
|
default:
|
|
|
|
break;
|
2013-07-19 09:19:39 +02:00
|
|
|
}
|
|
|
|
}
|
2014-02-17 18:34:08 +01:00
|
|
|
scsi_free_scsi_task(task);
|
|
|
|
task = NULL;
|
2013-07-19 09:19:39 +02:00
|
|
|
|
2017-02-22 19:07:25 +01:00
|
|
|
qemu_mutex_init(&iscsilun->mutex);
|
2014-05-08 16:34:42 +02:00
|
|
|
iscsi_attach_aio_context(bs, iscsilun->aio_context);
|
2012-12-06 10:46:47 +01:00
|
|
|
|
2014-04-28 13:11:32 +02:00
|
|
|
/* Guess the internal cluster (page) size of the iscsi target by the means
|
|
|
|
* of opt_unmap_gran. Transfer the unmap granularity only if it has a
|
|
|
|
* reasonable size */
|
2014-04-28 17:18:32 +02:00
|
|
|
if (iscsilun->bl.opt_unmap_gran * iscsilun->block_size >= 4 * 1024 &&
|
2014-04-28 13:11:32 +02:00
|
|
|
iscsilun->bl.opt_unmap_gran * iscsilun->block_size <= 16 * 1024 * 1024) {
|
2018-02-13 21:26:46 +01:00
|
|
|
iscsilun->cluster_size = iscsilun->bl.opt_unmap_gran *
|
|
|
|
iscsilun->block_size;
|
2015-04-16 16:08:33 +02:00
|
|
|
if (iscsilun->lbprz) {
|
2020-03-11 04:29:27 +01:00
|
|
|
ret = iscsi_allocmap_init(iscsilun, flags);
|
2014-04-28 13:11:32 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
block: Simplify bdrv_can_write_zeroes_with_unmap()
We don't need the can_write_zeroes_with_unmap field in
BlockDriverInfo, because it is redundant information with
supported_zero_flags & BDRV_REQ_MAY_UNMAP. Note that
BlockDriverInfo and supported_zero_flags are both per-device
settings, rather than global state about the driver as a
whole, which means one or both of these bits of information
can already be conditional. Let's audit how they were set:
crypto: always setting can_write_ to false is pointless (the
struct starts life zero-initialized), no use of supported_
nbd: just recently fixed to set can_write_ if supported_
includes MAY_UNMAP (thus this commit effectively reverts
bca80059e and solves the problem mentioned there in a more
global way)
file-posix, iscsi, qcow2: can_write_ is conditional, while
supported_ was unconditional; but passing MAY_UNMAP would
fail with ENOTSUP if the condition wasn't met
qed: can_write_ is unconditional, but pwrite_zeroes lacks
support for MAY_UNMAP and supported_ is not set. Perhaps
support can be added later (since it would be similar to
qcow2), but for now claiming false is no real loss
all other drivers: can_write_ is not set, and supported_ is
either unset or a passthrough
Simplify the code by moving the conditional into
supported_zero_flags for all drivers, then dropping the
now-unused BDI field. For callers that relied on
bdrv_can_write_zeroes_with_unmap(), we return the same
per-device settings for drivers that had conditions (no
observable change in behavior there); and can now return
true (instead of false) for drivers that support passthrough
(for example, the commit driver) which gives those drivers
the same fix as nbd just got in bca80059e. For callers that
relied on supported_zero_flags, we now have a few more places
that can avoid a wasted call to pwrite_zeroes() that will
just fail with ENOTSUP.
Suggested-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Eric Blake <eblake@redhat.com>
Message-Id: <20180126193439.20219-1-eblake@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
2018-01-26 20:34:39 +01:00
|
|
|
if (iscsilun->lbprz && iscsilun->lbp.lbpws) {
|
|
|
|
bs->supported_zero_flags = BDRV_REQ_MAY_UNMAP;
|
|
|
|
}
|
|
|
|
|
2012-08-06 10:52:22 +02:00
|
|
|
out:
|
2013-04-12 17:59:59 +02:00
|
|
|
qemu_opts_del(opts);
|
2014-06-06 18:25:12 +02:00
|
|
|
g_free(initiator_name);
|
2012-11-17 14:37:39 +01:00
|
|
|
if (task != NULL) {
|
|
|
|
scsi_free_scsi_task(task);
|
|
|
|
}
|
2012-08-06 10:52:22 +02:00
|
|
|
|
|
|
|
if (ret) {
|
|
|
|
if (iscsi != NULL) {
|
2015-04-16 16:08:25 +02:00
|
|
|
if (iscsi_is_logged_in(iscsi)) {
|
|
|
|
iscsi_logout_sync(iscsi);
|
|
|
|
}
|
2012-08-06 10:52:22 +02:00
|
|
|
iscsi_destroy_context(iscsi);
|
|
|
|
}
|
|
|
|
memset(iscsilun, 0, sizeof(IscsiLun));
|
2011-10-25 10:24:24 +02:00
|
|
|
}
|
2018-06-14 21:14:27 +02:00
|
|
|
|
2011-10-25 10:24:24 +02:00
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void iscsi_close(BlockDriverState *bs)
|
|
|
|
{
|
|
|
|
IscsiLun *iscsilun = bs->opaque;
|
|
|
|
struct iscsi_context *iscsi = iscsilun->iscsi;
|
|
|
|
|
2014-05-08 16:34:42 +02:00
|
|
|
iscsi_detach_aio_context(bs);
|
2015-04-16 16:08:25 +02:00
|
|
|
if (iscsi_is_logged_in(iscsi)) {
|
|
|
|
iscsi_logout_sync(iscsi);
|
|
|
|
}
|
2011-10-25 10:24:24 +02:00
|
|
|
iscsi_destroy_context(iscsi);
|
2018-06-01 11:26:44 +02:00
|
|
|
if (iscsilun->dd) {
|
|
|
|
g_free(iscsilun->dd->designator);
|
|
|
|
g_free(iscsilun->dd);
|
|
|
|
}
|
2013-10-24 12:07:02 +02:00
|
|
|
g_free(iscsilun->zeroblock);
|
2016-07-18 10:52:20 +02:00
|
|
|
iscsi_allocmap_free(iscsilun);
|
2017-02-22 19:07:25 +01:00
|
|
|
qemu_mutex_destroy(&iscsilun->mutex);
|
2011-10-25 10:24:24 +02:00
|
|
|
memset(iscsilun, 0, sizeof(IscsiLun));
|
|
|
|
}
|
|
|
|
|
2014-10-27 10:18:45 +01:00
|
|
|
static void iscsi_refresh_limits(BlockDriverState *bs, Error **errp)
|
|
|
|
{
|
2013-12-11 19:26:16 +01:00
|
|
|
/* We don't actually refresh here, but just return data queried in
|
|
|
|
* iscsi_open(): iscsi targets don't change their limits. */
|
2014-10-27 10:18:45 +01:00
|
|
|
|
|
|
|
IscsiLun *iscsilun = bs->opaque;
|
2016-06-24 00:37:19 +02:00
|
|
|
uint64_t max_xfer_len = iscsilun->use_16_for_rw ? 0xffffffff : 0xffff;
|
iscsi: Fix divide-by-zero regression on raw SG devices
When qemu uses iscsi devices in sg mode, iscsilun->block_size
is left at 0. Prior to commits cf081fca and similar, when
block limits were tracked in sectors, this did not matter:
various block limits were just left at 0. But when we started
scaling by block size, this caused SIGFPE.
Then, in a later patch, commit a5b8dd2c added an assertion to
bdrv_open_common() that request_alignment is always non-zero;
which was not true for SG mode. Rather than relax that assertion,
we can just provide a sane value (we don't know of any SG device
with a block size smaller than qemu's default sizing of 512 bytes).
One possible solution for SG mode is to just blindly skip ALL
of iscsi_refresh_limits(), since we already short circuit so
many other things in sg mode. But this patch takes a slightly
more conservative approach, and merely guarantees that scaling
will succeed, while still using multiples of the original size
where possible. Resulting limits may still be zero in SG mode
(that is, we mostly only fix block_size used as a denominator
or which affect assertions, not all uses).
Reported-by: Holger Schranz <holger@fam-schranz.de>
Signed-off-by: Eric Blake <eblake@redhat.com>
CC: qemu-stable@nongnu.org
Message-Id: <1473283640-15756-1-git-send-email-eblake@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2016-09-07 23:27:20 +02:00
|
|
|
unsigned int block_size = MAX(BDRV_SECTOR_SIZE, iscsilun->block_size);
|
2014-10-27 10:18:45 +01:00
|
|
|
|
2022-08-17 10:37:35 +02:00
|
|
|
assert(iscsilun->block_size >= BDRV_SECTOR_SIZE || bdrv_is_sg(bs));
|
iscsi: Fix divide-by-zero regression on raw SG devices
When qemu uses iscsi devices in sg mode, iscsilun->block_size
is left at 0. Prior to commits cf081fca and similar, when
block limits were tracked in sectors, this did not matter:
various block limits were just left at 0. But when we started
scaling by block size, this caused SIGFPE.
Then, in a later patch, commit a5b8dd2c added an assertion to
bdrv_open_common() that request_alignment is always non-zero;
which was not true for SG mode. Rather than relax that assertion,
we can just provide a sane value (we don't know of any SG device
with a block size smaller than qemu's default sizing of 512 bytes).
One possible solution for SG mode is to just blindly skip ALL
of iscsi_refresh_limits(), since we already short circuit so
many other things in sg mode. But this patch takes a slightly
more conservative approach, and merely guarantees that scaling
will succeed, while still using multiples of the original size
where possible. Resulting limits may still be zero in SG mode
(that is, we mostly only fix block_size used as a denominator
or which affect assertions, not all uses).
Reported-by: Holger Schranz <holger@fam-schranz.de>
Signed-off-by: Eric Blake <eblake@redhat.com>
CC: qemu-stable@nongnu.org
Message-Id: <1473283640-15756-1-git-send-email-eblake@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2016-09-07 23:27:20 +02:00
|
|
|
|
|
|
|
bs->bl.request_alignment = block_size;
|
2016-06-24 00:37:14 +02:00
|
|
|
|
2014-10-27 10:18:45 +01:00
|
|
|
if (iscsilun->bl.max_xfer_len) {
|
|
|
|
max_xfer_len = MIN(max_xfer_len, iscsilun->bl.max_xfer_len);
|
|
|
|
}
|
|
|
|
|
iscsi: Fix divide-by-zero regression on raw SG devices
When qemu uses iscsi devices in sg mode, iscsilun->block_size
is left at 0. Prior to commits cf081fca and similar, when
block limits were tracked in sectors, this did not matter:
various block limits were just left at 0. But when we started
scaling by block size, this caused SIGFPE.
Then, in a later patch, commit a5b8dd2c added an assertion to
bdrv_open_common() that request_alignment is always non-zero;
which was not true for SG mode. Rather than relax that assertion,
we can just provide a sane value (we don't know of any SG device
with a block size smaller than qemu's default sizing of 512 bytes).
One possible solution for SG mode is to just blindly skip ALL
of iscsi_refresh_limits(), since we already short circuit so
many other things in sg mode. But this patch takes a slightly
more conservative approach, and merely guarantees that scaling
will succeed, while still using multiples of the original size
where possible. Resulting limits may still be zero in SG mode
(that is, we mostly only fix block_size used as a denominator
or which affect assertions, not all uses).
Reported-by: Holger Schranz <holger@fam-schranz.de>
Signed-off-by: Eric Blake <eblake@redhat.com>
CC: qemu-stable@nongnu.org
Message-Id: <1473283640-15756-1-git-send-email-eblake@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2016-09-07 23:27:20 +02:00
|
|
|
if (max_xfer_len * block_size < INT_MAX) {
|
2016-06-24 00:37:19 +02:00
|
|
|
bs->bl.max_transfer = max_xfer_len * iscsilun->block_size;
|
|
|
|
}
|
2014-10-27 10:18:45 +01:00
|
|
|
|
2014-04-02 15:30:29 +02:00
|
|
|
if (iscsilun->lbp.lbpu) {
|
block: use int64_t instead of int in driver discard handlers
We are generally moving to int64_t for both offset and bytes parameters
on all io paths.
Main motivation is realization of 64-bit write_zeroes operation for
fast zeroing large disk chunks, up to the whole disk.
We chose signed type, to be consistent with off_t (which is signed) and
with possibility for signed return type (where negative value means
error).
So, convert driver discard handlers bytes parameter to int64_t.
The only caller of all updated function is bdrv_co_pdiscard in
block/io.c. It is already prepared to work with 64bit requests, but
pass at most max(bs->bl.max_pdiscard, INT_MAX) to the driver.
Let's look at all updated functions:
blkdebug: all calculations are still OK, thanks to
bdrv_check_qiov_request().
both rule_check and bdrv_co_pdiscard are 64bit
blklogwrites: pass to blk_loc_writes_co_log which is 64bit
blkreplay, copy-on-read, filter-compress: pass to bdrv_co_pdiscard, OK
copy-before-write: pass to bdrv_co_pdiscard which is 64bit and to
cbw_do_copy_before_write which is 64bit
file-posix: one handler calls raw_account_discard() is 64bit and both
handlers calls raw_do_pdiscard(). Update raw_do_pdiscard, which pass
to RawPosixAIOData::aio_nbytes, which is 64bit (and calls
raw_account_discard())
gluster: somehow, third argument of glfs_discard_async is size_t.
Let's set max_pdiscard accordingly.
iscsi: iscsi_allocmap_set_invalid is 64bit,
!is_byte_request_lun_aligned is 64bit.
list.num is uint32_t. Let's clarify max_pdiscard and
pdiscard_alignment.
mirror_top: pass to bdrv_mirror_top_do_write() which is
64bit
nbd: protocol limitation. max_pdiscard is alredy set strict enough,
keep it as is for now.
nvme: buf.nlb is uint32_t and we do shift. So, add corresponding limits
to nvme_refresh_limits().
preallocate: pass to bdrv_co_pdiscard() which is 64bit.
rbd: pass to qemu_rbd_start_co() which is 64bit.
qcow2: calculations are still OK, thanks to bdrv_check_qiov_request(),
qcow2_cluster_discard() is 64bit.
raw-format: raw_adjust_offset() is 64bit, bdrv_co_pdiscard too.
throttle: pass to bdrv_co_pdiscard() which is 64bit and to
throttle_group_co_io_limits_intercept() which is 64bit as well.
test-block-iothread: bytes argument is unused
Great! Now all drivers are prepared to handle 64bit discard requests,
or else have explicit max_pdiscard limits.
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Message-Id: <20210903102807.27127-11-vsementsov@virtuozzo.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Eric Blake <eblake@redhat.com>
2021-09-03 12:28:06 +02:00
|
|
|
bs->bl.max_pdiscard =
|
|
|
|
MIN_NON_ZERO(iscsilun->bl.max_unmap * iscsilun->block_size,
|
|
|
|
(uint64_t)UINT32_MAX * iscsilun->block_size);
|
2016-06-24 00:37:21 +02:00
|
|
|
bs->bl.pdiscard_alignment =
|
|
|
|
iscsilun->bl.opt_unmap_gran * iscsilun->block_size;
|
2016-06-01 23:10:01 +02:00
|
|
|
} else {
|
2016-06-24 00:37:21 +02:00
|
|
|
bs->bl.pdiscard_alignment = iscsilun->block_size;
|
2014-04-02 15:30:29 +02:00
|
|
|
}
|
2013-12-11 19:26:16 +01:00
|
|
|
|
block: use int64_t instead of int in driver write_zeroes handlers
We are generally moving to int64_t for both offset and bytes parameters
on all io paths.
Main motivation is realization of 64-bit write_zeroes operation for
fast zeroing large disk chunks, up to the whole disk.
We chose signed type, to be consistent with off_t (which is signed) and
with possibility for signed return type (where negative value means
error).
So, convert driver write_zeroes handlers bytes parameter to int64_t.
The only caller of all updated function is bdrv_co_do_pwrite_zeroes().
bdrv_co_do_pwrite_zeroes() itself is of course OK with widening of
callee parameter type. Also, bdrv_co_do_pwrite_zeroes()'s
max_write_zeroes is limited to INT_MAX. So, updated functions all are
safe, they will not get "bytes" larger than before.
Still, let's look through all updated functions, and add assertions to
the ones which are actually unprepared to values larger than INT_MAX.
For these drivers also set explicit max_pwrite_zeroes limit.
Let's go:
blkdebug: calculations can't overflow, thanks to
bdrv_check_qiov_request() in generic layer. rule_check() and
bdrv_co_pwrite_zeroes() both have 64bit argument.
blklogwrites: pass to blk_log_writes_co_log() with 64bit argument.
blkreplay, copy-on-read, filter-compress: pass to
bdrv_co_pwrite_zeroes() which is OK
copy-before-write: Calls cbw_do_copy_before_write() and
bdrv_co_pwrite_zeroes, both have 64bit argument.
file-posix: both handler calls raw_do_pwrite_zeroes, which is updated.
In raw_do_pwrite_zeroes() calculations are OK due to
bdrv_check_qiov_request(), bytes go to RawPosixAIOData::aio_nbytes
which is uint64_t.
Check also where that uint64_t gets handed:
handle_aiocb_write_zeroes_block() passes a uint64_t[2] to
ioctl(BLKZEROOUT), handle_aiocb_write_zeroes() calls do_fallocate()
which takes off_t (and we compile to always have 64-bit off_t), as
does handle_aiocb_write_zeroes_unmap. All look safe.
gluster: bytes go to GlusterAIOCB::size which is int64_t and to
glfs_zerofill_async works with off_t.
iscsi: Aha, here we deal with iscsi_writesame16_task() that has
uint32_t num_blocks argument and iscsi_writesame16_task() has
uint16_t argument. Make comments, add assertions and clarify
max_pwrite_zeroes calculation.
iscsi_allocmap_() functions already has int64_t argument
is_byte_request_lun_aligned is simple to update, do it.
mirror_top: pass to bdrv_mirror_top_do_write which has uint64_t
argument
nbd: Aha, here we have protocol limitation, and NBDRequest::len is
uint32_t. max_pwrite_zeroes is cleanly set to 32bit value, so we are
OK for now.
nvme: Again, protocol limitation. And no inherent limit for
write-zeroes at all. But from code that calculates cdw12 it's obvious
that we do have limit and alignment. Let's clarify it. Also,
obviously the code is not prepared to handle bytes=0. Let's handle
this case too.
trace events already 64bit
preallocate: pass to handle_write() and bdrv_co_pwrite_zeroes(), both
64bit.
rbd: pass to qemu_rbd_start_co() which is 64bit.
qcow2: offset + bytes and alignment still works good (thanks to
bdrv_check_qiov_request()), so tail calculation is OK
qcow2_subcluster_zeroize() has 64bit argument, should be OK
trace events updated
qed: qed_co_request wants int nb_sectors. Also in code we have size_t
used for request length which may be 32bit. So, let's just keep
INT_MAX as a limit (aligning it down to pwrite_zeroes_alignment) and
don't care.
raw-format: Is OK. raw_adjust_offset and bdrv_co_pwrite_zeroes are both
64bit.
throttle: Both throttle_group_co_io_limits_intercept() and
bdrv_co_pwrite_zeroes() are 64bit.
vmdk: pass to vmdk_pwritev which is 64bit
quorum: pass to quorum_co_pwritev() which is 64bit
Hooray!
At this point all block drivers are prepared to support 64bit
write-zero requests, or have explicitly set max_pwrite_zeroes.
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Message-Id: <20210903102807.27127-8-vsementsov@virtuozzo.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
[eblake: use <= rather than < in assertions relying on max_pwrite_zeroes]
Signed-off-by: Eric Blake <eblake@redhat.com>
2021-09-03 12:28:03 +02:00
|
|
|
bs->bl.max_pwrite_zeroes =
|
|
|
|
MIN_NON_ZERO(iscsilun->bl.max_ws_len * iscsilun->block_size,
|
|
|
|
max_xfer_len * iscsilun->block_size);
|
|
|
|
|
2014-04-02 15:30:29 +02:00
|
|
|
if (iscsilun->lbp.lbpws) {
|
2016-06-01 23:10:02 +02:00
|
|
|
bs->bl.pwrite_zeroes_alignment =
|
|
|
|
iscsilun->bl.opt_unmap_gran * iscsilun->block_size;
|
2016-06-01 23:10:01 +02:00
|
|
|
} else {
|
2016-06-01 23:10:02 +02:00
|
|
|
bs->bl.pwrite_zeroes_alignment = iscsilun->block_size;
|
2013-12-11 19:26:16 +01:00
|
|
|
}
|
2016-06-24 00:37:19 +02:00
|
|
|
if (iscsilun->bl.opt_xfer_len &&
|
iscsi: Fix divide-by-zero regression on raw SG devices
When qemu uses iscsi devices in sg mode, iscsilun->block_size
is left at 0. Prior to commits cf081fca and similar, when
block limits were tracked in sectors, this did not matter:
various block limits were just left at 0. But when we started
scaling by block size, this caused SIGFPE.
Then, in a later patch, commit a5b8dd2c added an assertion to
bdrv_open_common() that request_alignment is always non-zero;
which was not true for SG mode. Rather than relax that assertion,
we can just provide a sane value (we don't know of any SG device
with a block size smaller than qemu's default sizing of 512 bytes).
One possible solution for SG mode is to just blindly skip ALL
of iscsi_refresh_limits(), since we already short circuit so
many other things in sg mode. But this patch takes a slightly
more conservative approach, and merely guarantees that scaling
will succeed, while still using multiples of the original size
where possible. Resulting limits may still be zero in SG mode
(that is, we mostly only fix block_size used as a denominator
or which affect assertions, not all uses).
Reported-by: Holger Schranz <holger@fam-schranz.de>
Signed-off-by: Eric Blake <eblake@redhat.com>
CC: qemu-stable@nongnu.org
Message-Id: <1473283640-15756-1-git-send-email-eblake@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2016-09-07 23:27:20 +02:00
|
|
|
iscsilun->bl.opt_xfer_len < INT_MAX / block_size) {
|
2016-06-24 00:37:19 +02:00
|
|
|
bs->bl.opt_transfer = pow2floor(iscsilun->bl.opt_xfer_len *
|
|
|
|
iscsilun->block_size);
|
|
|
|
}
|
2014-01-25 00:50:14 +01:00
|
|
|
}
|
2013-12-11 19:26:16 +01:00
|
|
|
|
2015-02-25 05:40:08 +01:00
|
|
|
/* Note that this will not re-establish a connection with an iSCSI target - it
|
|
|
|
* is effectively a NOP. */
|
2014-01-14 19:10:24 +01:00
|
|
|
static int iscsi_reopen_prepare(BDRVReopenState *state,
|
|
|
|
BlockReopenQueue *queue, Error **errp)
|
|
|
|
{
|
2015-02-25 05:40:08 +01:00
|
|
|
IscsiLun *iscsilun = state->bs->opaque;
|
|
|
|
|
|
|
|
if (state->flags & BDRV_O_RDWR && iscsilun->write_protected) {
|
|
|
|
error_setg(errp, "Cannot open a write protected LUN as read-write");
|
|
|
|
return -EACCES;
|
|
|
|
}
|
2013-12-11 19:26:16 +01:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2016-07-18 10:52:20 +02:00
|
|
|
static void iscsi_reopen_commit(BDRVReopenState *reopen_state)
|
|
|
|
{
|
|
|
|
IscsiLun *iscsilun = reopen_state->bs->opaque;
|
|
|
|
|
|
|
|
/* the cache.direct status might have changed */
|
|
|
|
if (iscsilun->allocmap != NULL) {
|
|
|
|
iscsi_allocmap_init(iscsilun, reopen_state->flags);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
block: Convert .bdrv_truncate callback to coroutine_fn
bdrv_truncate() is an operation that can block (even for a quite long
time, depending on the PreallocMode) in I/O paths that shouldn't block.
Convert it to a coroutine_fn so that we have the infrastructure for
drivers to make their .bdrv_co_truncate implementation asynchronous.
This change could potentially introduce new race conditions because
bdrv_truncate() isn't necessarily executed atomically any more. Whether
this is a problem needs to be evaluated for each block driver that
supports truncate:
* file-posix/win32, gluster, iscsi, nfs, rbd, ssh, sheepdog: The
protocol drivers are trivially safe because they don't actually yield
yet, so there is no change in behaviour.
* copy-on-read, crypto, raw-format: Essentially just filter drivers that
pass the request to a child node, no problem.
* qcow2: The implementation modifies metadata, so it needs to hold
s->lock to be safe with concurrent I/O requests. In order to avoid
double locking, this requires pulling the locking out into
preallocate_co() and using qcow2_write_caches() instead of
bdrv_flush().
* qed: Does a single header update, this is fine without locking.
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
2018-06-21 17:54:35 +02:00
|
|
|
static int coroutine_fn iscsi_co_truncate(BlockDriverState *bs, int64_t offset,
|
2019-09-18 11:51:40 +02:00
|
|
|
bool exact, PreallocMode prealloc,
|
2020-04-24 14:54:39 +02:00
|
|
|
BdrvRequestFlags flags, Error **errp)
|
2013-02-18 14:50:46 +01:00
|
|
|
{
|
|
|
|
IscsiLun *iscsilun = bs->opaque;
|
2019-09-18 11:51:41 +02:00
|
|
|
int64_t cur_length;
|
2014-02-17 14:43:53 +01:00
|
|
|
Error *local_err = NULL;
|
2013-02-18 14:50:46 +01:00
|
|
|
|
2017-06-13 22:20:52 +02:00
|
|
|
if (prealloc != PREALLOC_MODE_OFF) {
|
|
|
|
error_setg(errp, "Unsupported preallocation mode '%s'",
|
2017-08-24 10:46:08 +02:00
|
|
|
PreallocMode_str(prealloc));
|
2017-06-13 22:20:52 +02:00
|
|
|
return -ENOTSUP;
|
|
|
|
}
|
|
|
|
|
2013-02-18 14:50:46 +01:00
|
|
|
if (iscsilun->type != TYPE_DISK) {
|
2017-03-28 22:51:29 +02:00
|
|
|
error_setg(errp, "Cannot resize non-disk iSCSI devices");
|
2013-02-18 14:50:46 +01:00
|
|
|
return -ENOTSUP;
|
|
|
|
}
|
|
|
|
|
2014-02-17 14:43:53 +01:00
|
|
|
iscsi_readcapacity_sync(iscsilun, &local_err);
|
|
|
|
if (local_err != NULL) {
|
2017-03-28 22:51:28 +02:00
|
|
|
error_propagate(errp, local_err);
|
2014-02-17 14:43:53 +01:00
|
|
|
return -EIO;
|
2013-02-18 14:50:46 +01:00
|
|
|
}
|
|
|
|
|
2023-01-13 21:42:04 +01:00
|
|
|
cur_length = iscsi_co_getlength(bs);
|
2019-09-18 11:51:41 +02:00
|
|
|
if (offset != cur_length && exact) {
|
|
|
|
error_setg(errp, "Cannot resize iSCSI devices");
|
|
|
|
return -ENOTSUP;
|
|
|
|
} else if (offset > cur_length) {
|
2017-03-28 22:51:29 +02:00
|
|
|
error_setg(errp, "Cannot grow iSCSI devices");
|
2013-02-18 14:50:46 +01:00
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
2016-07-18 10:52:20 +02:00
|
|
|
if (iscsilun->allocmap != NULL) {
|
|
|
|
iscsi_allocmap_init(iscsilun, bs->open_flags);
|
2014-04-28 13:11:32 +02:00
|
|
|
}
|
|
|
|
|
2013-02-18 14:50:46 +01:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2023-01-13 21:42:08 +01:00
|
|
|
static int coroutine_fn
|
|
|
|
iscsi_co_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
|
2013-10-24 12:06:55 +02:00
|
|
|
{
|
|
|
|
IscsiLun *iscsilun = bs->opaque;
|
2018-02-13 21:26:46 +01:00
|
|
|
bdi->cluster_size = iscsilun->cluster_size;
|
2013-10-24 12:06:55 +02:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2018-03-01 17:36:18 +01:00
|
|
|
static void coroutine_fn iscsi_co_invalidate_cache(BlockDriverState *bs,
|
|
|
|
Error **errp)
|
2016-07-18 10:52:20 +02:00
|
|
|
{
|
|
|
|
IscsiLun *iscsilun = bs->opaque;
|
|
|
|
iscsi_allocmap_invalidate(iscsilun);
|
|
|
|
}
|
|
|
|
|
2023-02-03 16:21:53 +01:00
|
|
|
static int coroutine_fn GRAPH_RDLOCK
|
|
|
|
iscsi_co_copy_range_from(BlockDriverState *bs,
|
|
|
|
BdrvChild *src, int64_t src_offset,
|
|
|
|
BdrvChild *dst, int64_t dst_offset,
|
|
|
|
int64_t bytes, BdrvRequestFlags read_flags,
|
|
|
|
BdrvRequestFlags write_flags)
|
2018-06-01 11:26:46 +02:00
|
|
|
{
|
2018-07-09 18:37:17 +02:00
|
|
|
return bdrv_co_copy_range_to(src, src_offset, dst, dst_offset, bytes,
|
|
|
|
read_flags, write_flags);
|
2018-06-01 11:26:46 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
static struct scsi_task *iscsi_xcopy_task(int param_len)
|
|
|
|
{
|
|
|
|
struct scsi_task *task;
|
|
|
|
|
|
|
|
task = g_new0(struct scsi_task, 1);
|
|
|
|
|
|
|
|
task->cdb[0] = EXTENDED_COPY;
|
|
|
|
task->cdb[10] = (param_len >> 24) & 0xFF;
|
|
|
|
task->cdb[11] = (param_len >> 16) & 0xFF;
|
|
|
|
task->cdb[12] = (param_len >> 8) & 0xFF;
|
|
|
|
task->cdb[13] = param_len & 0xFF;
|
|
|
|
task->cdb_size = 16;
|
|
|
|
task->xfer_dir = SCSI_XFER_WRITE;
|
|
|
|
task->expxferlen = param_len;
|
|
|
|
|
|
|
|
return task;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void iscsi_populate_target_desc(unsigned char *desc, IscsiLun *lun)
|
|
|
|
{
|
|
|
|
struct scsi_inquiry_device_designator *dd = lun->dd;
|
|
|
|
|
|
|
|
memset(desc, 0, 32);
|
|
|
|
desc[0] = 0xE4; /* IDENT_DESCR_TGT_DESCR */
|
|
|
|
desc[4] = dd->code_set;
|
|
|
|
desc[5] = (dd->designator_type & 0xF)
|
|
|
|
| ((dd->association & 3) << 4);
|
|
|
|
desc[7] = dd->designator_length;
|
2018-06-29 08:03:27 +02:00
|
|
|
memcpy(desc + 8, dd->designator, MIN(dd->designator_length, 20));
|
2018-06-01 11:26:46 +02:00
|
|
|
|
|
|
|
desc[28] = 0;
|
|
|
|
desc[29] = (lun->block_size >> 16) & 0xFF;
|
|
|
|
desc[30] = (lun->block_size >> 8) & 0xFF;
|
|
|
|
desc[31] = lun->block_size & 0xFF;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void iscsi_xcopy_desc_hdr(uint8_t *hdr, int dc, int cat, int src_index,
|
|
|
|
int dst_index)
|
|
|
|
{
|
|
|
|
hdr[0] = 0x02; /* BLK_TO_BLK_SEG_DESCR */
|
|
|
|
hdr[1] = ((dc << 1) | cat) & 0xFF;
|
|
|
|
hdr[2] = (XCOPY_BLK2BLK_SEG_DESC_SIZE >> 8) & 0xFF;
|
|
|
|
/* don't account for the first 4 bytes in descriptor header*/
|
|
|
|
hdr[3] = (XCOPY_BLK2BLK_SEG_DESC_SIZE - 4 /* SEG_DESC_SRC_INDEX_OFFSET */) & 0xFF;
|
|
|
|
hdr[4] = (src_index >> 8) & 0xFF;
|
|
|
|
hdr[5] = src_index & 0xFF;
|
|
|
|
hdr[6] = (dst_index >> 8) & 0xFF;
|
|
|
|
hdr[7] = dst_index & 0xFF;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void iscsi_xcopy_populate_desc(uint8_t *desc, int dc, int cat,
|
|
|
|
int src_index, int dst_index, int num_blks,
|
|
|
|
uint64_t src_lba, uint64_t dst_lba)
|
|
|
|
{
|
|
|
|
iscsi_xcopy_desc_hdr(desc, dc, cat, src_index, dst_index);
|
|
|
|
|
|
|
|
/* The caller should verify the request size */
|
|
|
|
assert(num_blks < 65536);
|
|
|
|
desc[10] = (num_blks >> 8) & 0xFF;
|
|
|
|
desc[11] = num_blks & 0xFF;
|
|
|
|
desc[12] = (src_lba >> 56) & 0xFF;
|
|
|
|
desc[13] = (src_lba >> 48) & 0xFF;
|
|
|
|
desc[14] = (src_lba >> 40) & 0xFF;
|
|
|
|
desc[15] = (src_lba >> 32) & 0xFF;
|
|
|
|
desc[16] = (src_lba >> 24) & 0xFF;
|
|
|
|
desc[17] = (src_lba >> 16) & 0xFF;
|
|
|
|
desc[18] = (src_lba >> 8) & 0xFF;
|
|
|
|
desc[19] = src_lba & 0xFF;
|
|
|
|
desc[20] = (dst_lba >> 56) & 0xFF;
|
|
|
|
desc[21] = (dst_lba >> 48) & 0xFF;
|
|
|
|
desc[22] = (dst_lba >> 40) & 0xFF;
|
|
|
|
desc[23] = (dst_lba >> 32) & 0xFF;
|
|
|
|
desc[24] = (dst_lba >> 24) & 0xFF;
|
|
|
|
desc[25] = (dst_lba >> 16) & 0xFF;
|
|
|
|
desc[26] = (dst_lba >> 8) & 0xFF;
|
|
|
|
desc[27] = dst_lba & 0xFF;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void iscsi_xcopy_populate_header(unsigned char *buf, int list_id, int str,
|
|
|
|
int list_id_usage, int prio,
|
|
|
|
int tgt_desc_len,
|
|
|
|
int seg_desc_len, int inline_data_len)
|
|
|
|
{
|
|
|
|
buf[0] = list_id;
|
|
|
|
buf[1] = ((str & 1) << 5) | ((list_id_usage & 3) << 3) | (prio & 7);
|
|
|
|
buf[2] = (tgt_desc_len >> 8) & 0xFF;
|
|
|
|
buf[3] = tgt_desc_len & 0xFF;
|
|
|
|
buf[8] = (seg_desc_len >> 24) & 0xFF;
|
|
|
|
buf[9] = (seg_desc_len >> 16) & 0xFF;
|
|
|
|
buf[10] = (seg_desc_len >> 8) & 0xFF;
|
|
|
|
buf[11] = seg_desc_len & 0xFF;
|
|
|
|
buf[12] = (inline_data_len >> 24) & 0xFF;
|
|
|
|
buf[13] = (inline_data_len >> 16) & 0xFF;
|
|
|
|
buf[14] = (inline_data_len >> 8) & 0xFF;
|
|
|
|
buf[15] = inline_data_len & 0xFF;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void iscsi_xcopy_data(struct iscsi_data *data,
|
|
|
|
IscsiLun *src, int64_t src_lba,
|
|
|
|
IscsiLun *dst, int64_t dst_lba,
|
|
|
|
uint16_t num_blocks)
|
|
|
|
{
|
|
|
|
uint8_t *buf;
|
|
|
|
const int src_offset = XCOPY_DESC_OFFSET;
|
|
|
|
const int dst_offset = XCOPY_DESC_OFFSET + IDENT_DESCR_TGT_DESCR_SIZE;
|
|
|
|
const int seg_offset = dst_offset + IDENT_DESCR_TGT_DESCR_SIZE;
|
|
|
|
|
|
|
|
data->size = XCOPY_DESC_OFFSET +
|
|
|
|
IDENT_DESCR_TGT_DESCR_SIZE * 2 +
|
|
|
|
XCOPY_BLK2BLK_SEG_DESC_SIZE;
|
|
|
|
data->data = g_malloc0(data->size);
|
|
|
|
buf = data->data;
|
|
|
|
|
|
|
|
/* Initialise the parameter list header */
|
|
|
|
iscsi_xcopy_populate_header(buf, 1, 0, 2 /* LIST_ID_USAGE_DISCARD */,
|
|
|
|
0, 2 * IDENT_DESCR_TGT_DESCR_SIZE,
|
|
|
|
XCOPY_BLK2BLK_SEG_DESC_SIZE,
|
|
|
|
0);
|
|
|
|
|
|
|
|
/* Initialise CSCD list with one src + one dst descriptor */
|
|
|
|
iscsi_populate_target_desc(&buf[src_offset], src);
|
|
|
|
iscsi_populate_target_desc(&buf[dst_offset], dst);
|
|
|
|
|
|
|
|
/* Initialise one segment descriptor */
|
|
|
|
iscsi_xcopy_populate_desc(&buf[seg_offset], 0, 0, 0, 1, num_blocks,
|
|
|
|
src_lba, dst_lba);
|
|
|
|
}
|
|
|
|
|
2023-02-03 16:21:53 +01:00
|
|
|
static int coroutine_fn GRAPH_RDLOCK
|
|
|
|
iscsi_co_copy_range_to(BlockDriverState *bs,
|
|
|
|
BdrvChild *src, int64_t src_offset,
|
|
|
|
BdrvChild *dst, int64_t dst_offset,
|
|
|
|
int64_t bytes, BdrvRequestFlags read_flags,
|
|
|
|
BdrvRequestFlags write_flags)
|
2018-06-01 11:26:46 +02:00
|
|
|
{
|
|
|
|
IscsiLun *dst_lun = dst->bs->opaque;
|
|
|
|
IscsiLun *src_lun;
|
|
|
|
struct IscsiTask iscsi_task;
|
|
|
|
struct iscsi_data data;
|
|
|
|
int r = 0;
|
|
|
|
int block_size;
|
|
|
|
|
|
|
|
if (src->bs->drv->bdrv_co_copy_range_to != iscsi_co_copy_range_to) {
|
|
|
|
return -ENOTSUP;
|
|
|
|
}
|
|
|
|
src_lun = src->bs->opaque;
|
|
|
|
|
|
|
|
if (!src_lun->dd || !dst_lun->dd) {
|
|
|
|
return -ENOTSUP;
|
|
|
|
}
|
|
|
|
if (!is_byte_request_lun_aligned(dst_offset, bytes, dst_lun)) {
|
|
|
|
return -ENOTSUP;
|
|
|
|
}
|
|
|
|
if (!is_byte_request_lun_aligned(src_offset, bytes, src_lun)) {
|
|
|
|
return -ENOTSUP;
|
|
|
|
}
|
|
|
|
if (dst_lun->block_size != src_lun->block_size ||
|
|
|
|
!dst_lun->block_size) {
|
|
|
|
return -ENOTSUP;
|
|
|
|
}
|
|
|
|
|
|
|
|
block_size = dst_lun->block_size;
|
|
|
|
if (bytes / block_size > 65535) {
|
|
|
|
return -ENOTSUP;
|
|
|
|
}
|
|
|
|
|
|
|
|
iscsi_xcopy_data(&data,
|
|
|
|
src_lun, src_offset / block_size,
|
|
|
|
dst_lun, dst_offset / block_size,
|
|
|
|
bytes / block_size);
|
|
|
|
|
|
|
|
iscsi_co_init_iscsitask(dst_lun, &iscsi_task);
|
|
|
|
|
|
|
|
qemu_mutex_lock(&dst_lun->mutex);
|
|
|
|
iscsi_task.task = iscsi_xcopy_task(data.size);
|
|
|
|
retry:
|
|
|
|
if (iscsi_scsi_command_async(dst_lun->iscsi, dst_lun->lun,
|
|
|
|
iscsi_task.task, iscsi_co_generic_cb,
|
|
|
|
&data,
|
|
|
|
&iscsi_task) != 0) {
|
|
|
|
r = -EIO;
|
|
|
|
goto out_unlock;
|
|
|
|
}
|
|
|
|
|
|
|
|
iscsi_co_wait_for_task(&iscsi_task, dst_lun);
|
|
|
|
|
|
|
|
if (iscsi_task.do_retry) {
|
|
|
|
iscsi_task.complete = 0;
|
|
|
|
goto retry;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (iscsi_task.status != SCSI_STATUS_GOOD) {
|
|
|
|
r = iscsi_task.err_code;
|
|
|
|
goto out_unlock;
|
|
|
|
}
|
|
|
|
|
|
|
|
out_unlock:
|
2018-07-10 08:31:16 +02:00
|
|
|
|
|
|
|
trace_iscsi_xcopy(src_lun, src_offset, dst_lun, dst_offset, bytes, r);
|
2018-06-01 11:26:46 +02:00
|
|
|
g_free(iscsi_task.task);
|
|
|
|
qemu_mutex_unlock(&dst_lun->mutex);
|
|
|
|
g_free(iscsi_task.err_str);
|
|
|
|
return r;
|
|
|
|
}
|
|
|
|
|
2012-11-17 16:13:24 +01:00
|
|
|
|
2019-02-01 20:29:25 +01:00
|
|
|
static const char *const iscsi_strong_runtime_opts[] = {
|
|
|
|
"transport",
|
|
|
|
"portal",
|
|
|
|
"target",
|
|
|
|
"user",
|
|
|
|
"password",
|
|
|
|
"password-secret",
|
|
|
|
"lun",
|
|
|
|
"initiator-name",
|
|
|
|
"header-digest",
|
|
|
|
|
|
|
|
NULL
|
|
|
|
};
|
|
|
|
|
2011-10-25 10:24:24 +02:00
|
|
|
static BlockDriver bdrv_iscsi = {
|
|
|
|
.format_name = "iscsi",
|
|
|
|
.protocol_name = "iscsi",
|
|
|
|
|
2016-12-08 14:23:06 +01:00
|
|
|
.instance_size = sizeof(IscsiLun),
|
|
|
|
.bdrv_parse_filename = iscsi_parse_filename,
|
|
|
|
.bdrv_file_open = iscsi_open,
|
|
|
|
.bdrv_close = iscsi_close,
|
2020-03-26 02:12:18 +01:00
|
|
|
.bdrv_co_create_opts = bdrv_co_create_opts_simple,
|
|
|
|
.create_opts = &bdrv_create_opts_simple,
|
2016-12-08 14:23:06 +01:00
|
|
|
.bdrv_reopen_prepare = iscsi_reopen_prepare,
|
|
|
|
.bdrv_reopen_commit = iscsi_reopen_commit,
|
2018-03-01 17:36:18 +01:00
|
|
|
.bdrv_co_invalidate_cache = iscsi_co_invalidate_cache,
|
2011-10-25 10:24:24 +02:00
|
|
|
|
2023-01-13 21:42:04 +01:00
|
|
|
.bdrv_co_getlength = iscsi_co_getlength,
|
2023-01-13 21:42:08 +01:00
|
|
|
.bdrv_co_get_info = iscsi_co_get_info,
|
block: Convert .bdrv_truncate callback to coroutine_fn
bdrv_truncate() is an operation that can block (even for a quite long
time, depending on the PreallocMode) in I/O paths that shouldn't block.
Convert it to a coroutine_fn so that we have the infrastructure for
drivers to make their .bdrv_co_truncate implementation asynchronous.
This change could potentially introduce new race conditions because
bdrv_truncate() isn't necessarily executed atomically any more. Whether
this is a problem needs to be evaluated for each block driver that
supports truncate:
* file-posix/win32, gluster, iscsi, nfs, rbd, ssh, sheepdog: The
protocol drivers are trivially safe because they don't actually yield
yet, so there is no change in behaviour.
* copy-on-read, crypto, raw-format: Essentially just filter drivers that
pass the request to a child node, no problem.
* qcow2: The implementation modifies metadata, so it needs to hold
s->lock to be safe with concurrent I/O requests. In order to avoid
double locking, this requires pulling the locking out into
preallocate_co() and using qcow2_write_caches() instead of
bdrv_flush().
* qed: Does a single header update, this is fine without locking.
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
2018-06-21 17:54:35 +02:00
|
|
|
.bdrv_co_truncate = iscsi_co_truncate,
|
2013-12-11 19:26:16 +01:00
|
|
|
.bdrv_refresh_limits = iscsi_refresh_limits,
|
2011-10-25 10:24:24 +02:00
|
|
|
|
2018-02-13 21:26:48 +01:00
|
|
|
.bdrv_co_block_status = iscsi_co_block_status,
|
2016-07-16 01:23:01 +02:00
|
|
|
.bdrv_co_pdiscard = iscsi_co_pdiscard,
|
2018-06-01 11:26:46 +02:00
|
|
|
.bdrv_co_copy_range_from = iscsi_co_copy_range_from,
|
|
|
|
.bdrv_co_copy_range_to = iscsi_co_copy_range_to,
|
2016-06-01 23:10:05 +02:00
|
|
|
.bdrv_co_pwrite_zeroes = iscsi_co_pwrite_zeroes,
|
2013-12-05 16:47:17 +01:00
|
|
|
.bdrv_co_readv = iscsi_co_readv,
|
2018-04-25 00:01:57 +02:00
|
|
|
.bdrv_co_writev = iscsi_co_writev,
|
2013-12-05 16:47:17 +01:00
|
|
|
.bdrv_co_flush_to_disk = iscsi_co_flush,
|
2012-04-24 08:29:04 +02:00
|
|
|
|
ISCSI: Add SCSI passthrough via scsi-generic to libiscsi
Update iscsi to allow passthrough of SG_IO scsi commands when the iscsi
device is forced to be scsi-generic.
Implement both bdrv_ioctl() and bdrv_aio_ioctl() in the iscsi backend,
emulate the SG_IO ioctl and pass the SCSI commands across to the
iscsi target.
This allows end-to-end passthrough of SCSI all the way from the guest,
to qemu, via scsi-generic, then libiscsi all the way to the iscsi target.
To activate this you need to specify that the iscsi lun should be treated
as a scsi-generic device.
Example:
-device lsi -device scsi-generic,drive=MyISCSI \
-drive file=iscsi://10.1.1.125/iqn.ronnie.test/1,if=none,id=MyISCSI
Note, you can currently not boot a qemu guest from a scsi device.
Note,
This only works when the host is linux, since the emulation relies on
definitions of SG_IO from the scsi-generic implementation in the
linux kernel.
It should be fairly easy to re-implement some structures similar enough
for non-linux hosts to do the same style of passthrough via a fake
scsi generic layer and libiscsi if need be.
Signed-off-by: Ronnie Sahlberg <ronniesahlberg@gmail.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2012-05-25 13:59:01 +02:00
|
|
|
#ifdef __linux__
|
|
|
|
.bdrv_aio_ioctl = iscsi_aio_ioctl,
|
|
|
|
#endif
|
2014-05-08 16:34:42 +02:00
|
|
|
|
|
|
|
.bdrv_detach_aio_context = iscsi_detach_aio_context,
|
|
|
|
.bdrv_attach_aio_context = iscsi_attach_aio_context,
|
2019-02-01 20:29:25 +01:00
|
|
|
|
|
|
|
.strong_runtime_opts = iscsi_strong_runtime_opts,
|
2011-10-25 10:24:24 +02:00
|
|
|
};
|
|
|
|
|
2016-10-09 10:14:56 +02:00
|
|
|
#if LIBISCSI_API_VERSION >= (20160603)
|
|
|
|
static BlockDriver bdrv_iser = {
|
|
|
|
.format_name = "iser",
|
|
|
|
.protocol_name = "iser",
|
|
|
|
|
2016-12-08 14:23:06 +01:00
|
|
|
.instance_size = sizeof(IscsiLun),
|
|
|
|
.bdrv_parse_filename = iscsi_parse_filename,
|
|
|
|
.bdrv_file_open = iscsi_open,
|
|
|
|
.bdrv_close = iscsi_close,
|
2020-03-26 02:12:18 +01:00
|
|
|
.bdrv_co_create_opts = bdrv_co_create_opts_simple,
|
|
|
|
.create_opts = &bdrv_create_opts_simple,
|
2016-12-08 14:23:06 +01:00
|
|
|
.bdrv_reopen_prepare = iscsi_reopen_prepare,
|
|
|
|
.bdrv_reopen_commit = iscsi_reopen_commit,
|
2018-03-15 15:30:56 +01:00
|
|
|
.bdrv_co_invalidate_cache = iscsi_co_invalidate_cache,
|
2016-10-09 10:14:56 +02:00
|
|
|
|
2023-01-13 21:42:04 +01:00
|
|
|
.bdrv_co_getlength = iscsi_co_getlength,
|
2023-01-13 21:42:08 +01:00
|
|
|
.bdrv_co_get_info = iscsi_co_get_info,
|
block: Convert .bdrv_truncate callback to coroutine_fn
bdrv_truncate() is an operation that can block (even for a quite long
time, depending on the PreallocMode) in I/O paths that shouldn't block.
Convert it to a coroutine_fn so that we have the infrastructure for
drivers to make their .bdrv_co_truncate implementation asynchronous.
This change could potentially introduce new race conditions because
bdrv_truncate() isn't necessarily executed atomically any more. Whether
this is a problem needs to be evaluated for each block driver that
supports truncate:
* file-posix/win32, gluster, iscsi, nfs, rbd, ssh, sheepdog: The
protocol drivers are trivially safe because they don't actually yield
yet, so there is no change in behaviour.
* copy-on-read, crypto, raw-format: Essentially just filter drivers that
pass the request to a child node, no problem.
* qcow2: The implementation modifies metadata, so it needs to hold
s->lock to be safe with concurrent I/O requests. In order to avoid
double locking, this requires pulling the locking out into
preallocate_co() and using qcow2_write_caches() instead of
bdrv_flush().
* qed: Does a single header update, this is fine without locking.
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
2018-06-21 17:54:35 +02:00
|
|
|
.bdrv_co_truncate = iscsi_co_truncate,
|
2016-10-09 10:14:56 +02:00
|
|
|
.bdrv_refresh_limits = iscsi_refresh_limits,
|
|
|
|
|
2018-02-13 21:26:48 +01:00
|
|
|
.bdrv_co_block_status = iscsi_co_block_status,
|
2016-10-09 10:14:56 +02:00
|
|
|
.bdrv_co_pdiscard = iscsi_co_pdiscard,
|
2018-06-01 11:26:46 +02:00
|
|
|
.bdrv_co_copy_range_from = iscsi_co_copy_range_from,
|
|
|
|
.bdrv_co_copy_range_to = iscsi_co_copy_range_to,
|
2016-10-09 10:14:56 +02:00
|
|
|
.bdrv_co_pwrite_zeroes = iscsi_co_pwrite_zeroes,
|
|
|
|
.bdrv_co_readv = iscsi_co_readv,
|
2018-04-25 00:01:57 +02:00
|
|
|
.bdrv_co_writev = iscsi_co_writev,
|
2016-10-09 10:14:56 +02:00
|
|
|
.bdrv_co_flush_to_disk = iscsi_co_flush,
|
|
|
|
|
|
|
|
#ifdef __linux__
|
|
|
|
.bdrv_aio_ioctl = iscsi_aio_ioctl,
|
|
|
|
#endif
|
|
|
|
|
|
|
|
.bdrv_detach_aio_context = iscsi_detach_aio_context,
|
|
|
|
.bdrv_attach_aio_context = iscsi_attach_aio_context,
|
2019-02-01 20:29:25 +01:00
|
|
|
|
|
|
|
.strong_runtime_opts = iscsi_strong_runtime_opts,
|
2016-10-09 10:14:56 +02:00
|
|
|
};
|
|
|
|
#endif
|
|
|
|
|
2011-10-25 10:24:24 +02:00
|
|
|
static void iscsi_block_init(void)
|
|
|
|
{
|
|
|
|
bdrv_register(&bdrv_iscsi);
|
2016-10-09 10:14:56 +02:00
|
|
|
#if LIBISCSI_API_VERSION >= (20160603)
|
|
|
|
bdrv_register(&bdrv_iser);
|
|
|
|
#endif
|
2011-10-25 10:24:24 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
block_init(iscsi_block_init);
|