qemu-e2k/block/iscsi.c
Ronnie Sahlberg fa6acb0c2f ISCSI: Add support for thin-provisioning via discard/UNMAP and bigger LUNs
Update the configure test for libiscsi support to detect version 1.3
or later.  Version 1.3 of libiscsi provides both READCAPACITY16 as well
as UNMAP commands.

Update the iscsi block layer to use READCAPACITY16 to detect the size of
the LUN instead of READCAPACITY10. This allows support for LUNs larger
than 2TB.

Update to implement bdrv_aio_discard() using the UNMAP command.
This allows us to use thin-provisioned LUNs from TGTD and other iSCSI
targets that support thin-provisioning.

Signed-off-by: Ronnie Sahlberg <ronniesahlberg@gmail.com>
[squashed in subsequent patch from Ronnie to fix off-by-one in LBA count]
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2012-05-04 10:39:18 +02:00

771 lines
20 KiB
C

/*
* QEMU Block driver for iSCSI images
*
* Copyright (c) 2010-2011 Ronnie Sahlberg <ronniesahlberg@gmail.com>
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#include "config-host.h"
#include <poll.h>
#include "qemu-common.h"
#include "qemu-error.h"
#include "block_int.h"
#include "trace.h"
#include <iscsi/iscsi.h>
#include <iscsi/scsi-lowlevel.h>
typedef struct IscsiLun {
struct iscsi_context *iscsi;
int lun;
int block_size;
unsigned long num_blocks;
} IscsiLun;
typedef struct IscsiAIOCB {
BlockDriverAIOCB common;
QEMUIOVector *qiov;
QEMUBH *bh;
IscsiLun *iscsilun;
struct scsi_task *task;
uint8_t *buf;
int status;
int canceled;
size_t read_size;
size_t read_offset;
} IscsiAIOCB;
struct IscsiTask {
IscsiLun *iscsilun;
BlockDriverState *bs;
int status;
int complete;
};
static void
iscsi_abort_task_cb(struct iscsi_context *iscsi, int status, void *command_data,
void *private_data)
{
}
static void
iscsi_aio_cancel(BlockDriverAIOCB *blockacb)
{
IscsiAIOCB *acb = (IscsiAIOCB *)blockacb;
IscsiLun *iscsilun = acb->iscsilun;
acb->common.cb(acb->common.opaque, -ECANCELED);
acb->canceled = 1;
/* send a task mgmt call to the target to cancel the task on the target */
iscsi_task_mgmt_abort_task_async(iscsilun->iscsi, acb->task,
iscsi_abort_task_cb, NULL);
/* then also cancel the task locally in libiscsi */
iscsi_scsi_task_cancel(iscsilun->iscsi, acb->task);
}
static AIOPool iscsi_aio_pool = {
.aiocb_size = sizeof(IscsiAIOCB),
.cancel = iscsi_aio_cancel,
};
static void iscsi_process_read(void *arg);
static void iscsi_process_write(void *arg);
static int iscsi_process_flush(void *arg)
{
IscsiLun *iscsilun = arg;
return iscsi_queue_length(iscsilun->iscsi) > 0;
}
static void
iscsi_set_events(IscsiLun *iscsilun)
{
struct iscsi_context *iscsi = iscsilun->iscsi;
qemu_aio_set_fd_handler(iscsi_get_fd(iscsi), iscsi_process_read,
(iscsi_which_events(iscsi) & POLLOUT)
? iscsi_process_write : NULL,
iscsi_process_flush, iscsilun);
}
static void
iscsi_process_read(void *arg)
{
IscsiLun *iscsilun = arg;
struct iscsi_context *iscsi = iscsilun->iscsi;
iscsi_service(iscsi, POLLIN);
iscsi_set_events(iscsilun);
}
static void
iscsi_process_write(void *arg)
{
IscsiLun *iscsilun = arg;
struct iscsi_context *iscsi = iscsilun->iscsi;
iscsi_service(iscsi, POLLOUT);
iscsi_set_events(iscsilun);
}
static int
iscsi_schedule_bh(QEMUBHFunc *cb, IscsiAIOCB *acb)
{
acb->bh = qemu_bh_new(cb, acb);
if (!acb->bh) {
error_report("oom: could not create iscsi bh");
return -EIO;
}
qemu_bh_schedule(acb->bh);
return 0;
}
static void
iscsi_readv_writev_bh_cb(void *p)
{
IscsiAIOCB *acb = p;
qemu_bh_delete(acb->bh);
if (acb->canceled == 0) {
acb->common.cb(acb->common.opaque, acb->status);
}
qemu_aio_release(acb);
}
static void
iscsi_aio_write10_cb(struct iscsi_context *iscsi, int status,
void *command_data, void *opaque)
{
IscsiAIOCB *acb = opaque;
trace_iscsi_aio_write10_cb(iscsi, status, acb, acb->canceled);
g_free(acb->buf);
if (acb->canceled != 0) {
qemu_aio_release(acb);
scsi_free_scsi_task(acb->task);
acb->task = NULL;
return;
}
acb->status = 0;
if (status < 0) {
error_report("Failed to write10 data to iSCSI lun. %s",
iscsi_get_error(iscsi));
acb->status = -EIO;
}
iscsi_schedule_bh(iscsi_readv_writev_bh_cb, acb);
scsi_free_scsi_task(acb->task);
acb->task = NULL;
}
static int64_t sector_qemu2lun(int64_t sector, IscsiLun *iscsilun)
{
return sector * BDRV_SECTOR_SIZE / iscsilun->block_size;
}
static BlockDriverAIOCB *
iscsi_aio_writev(BlockDriverState *bs, int64_t sector_num,
QEMUIOVector *qiov, int nb_sectors,
BlockDriverCompletionFunc *cb,
void *opaque)
{
IscsiLun *iscsilun = bs->opaque;
struct iscsi_context *iscsi = iscsilun->iscsi;
IscsiAIOCB *acb;
size_t size;
int fua = 0;
/* set FUA on writes when cache mode is write through */
if (!(bs->open_flags & BDRV_O_CACHE_WB)) {
fua = 1;
}
acb = qemu_aio_get(&iscsi_aio_pool, bs, cb, opaque);
trace_iscsi_aio_writev(iscsi, sector_num, nb_sectors, opaque, acb);
acb->iscsilun = iscsilun;
acb->qiov = qiov;
acb->canceled = 0;
/* XXX we should pass the iovec to write10 to avoid the extra copy */
/* this will allow us to get rid of 'buf' completely */
size = nb_sectors * BDRV_SECTOR_SIZE;
acb->buf = g_malloc(size);
qemu_iovec_to_buffer(acb->qiov, acb->buf);
acb->task = iscsi_write10_task(iscsi, iscsilun->lun, acb->buf, size,
sector_qemu2lun(sector_num, iscsilun),
fua, 0, iscsilun->block_size,
iscsi_aio_write10_cb, acb);
if (acb->task == NULL) {
error_report("iSCSI: Failed to send write10 command. %s",
iscsi_get_error(iscsi));
g_free(acb->buf);
qemu_aio_release(acb);
return NULL;
}
iscsi_set_events(iscsilun);
return &acb->common;
}
static void
iscsi_aio_read10_cb(struct iscsi_context *iscsi, int status,
void *command_data, void *opaque)
{
IscsiAIOCB *acb = opaque;
trace_iscsi_aio_read10_cb(iscsi, status, acb, acb->canceled);
if (acb->canceled != 0) {
qemu_aio_release(acb);
scsi_free_scsi_task(acb->task);
acb->task = NULL;
return;
}
acb->status = 0;
if (status != 0) {
error_report("Failed to read10 data from iSCSI lun. %s",
iscsi_get_error(iscsi));
acb->status = -EIO;
}
iscsi_schedule_bh(iscsi_readv_writev_bh_cb, acb);
scsi_free_scsi_task(acb->task);
acb->task = NULL;
}
static BlockDriverAIOCB *
iscsi_aio_readv(BlockDriverState *bs, int64_t sector_num,
QEMUIOVector *qiov, int nb_sectors,
BlockDriverCompletionFunc *cb,
void *opaque)
{
IscsiLun *iscsilun = bs->opaque;
struct iscsi_context *iscsi = iscsilun->iscsi;
IscsiAIOCB *acb;
size_t qemu_read_size, lun_read_size;
int i;
qemu_read_size = BDRV_SECTOR_SIZE * (size_t)nb_sectors;
acb = qemu_aio_get(&iscsi_aio_pool, bs, cb, opaque);
trace_iscsi_aio_readv(iscsi, sector_num, nb_sectors, opaque, acb);
acb->iscsilun = iscsilun;
acb->qiov = qiov;
acb->canceled = 0;
acb->read_size = qemu_read_size;
acb->buf = NULL;
/* If LUN blocksize is bigger than BDRV_BLOCK_SIZE a read from QEMU
* may be misaligned to the LUN, so we may need to read some extra
* data.
*/
acb->read_offset = 0;
if (iscsilun->block_size > BDRV_SECTOR_SIZE) {
uint64_t bdrv_offset = BDRV_SECTOR_SIZE * sector_num;
acb->read_offset = bdrv_offset % iscsilun->block_size;
}
lun_read_size = (qemu_read_size + iscsilun->block_size
+ acb->read_offset - 1)
/ iscsilun->block_size * iscsilun->block_size;
acb->task = iscsi_read10_task(iscsi, iscsilun->lun,
sector_qemu2lun(sector_num, iscsilun),
lun_read_size, iscsilun->block_size,
iscsi_aio_read10_cb, acb);
if (acb->task == NULL) {
error_report("iSCSI: Failed to send read10 command. %s",
iscsi_get_error(iscsi));
qemu_aio_release(acb);
return NULL;
}
for (i = 0; i < acb->qiov->niov; i++) {
scsi_task_add_data_in_buffer(acb->task,
acb->qiov->iov[i].iov_len,
acb->qiov->iov[i].iov_base);
}
iscsi_set_events(iscsilun);
return &acb->common;
}
static void
iscsi_synccache10_cb(struct iscsi_context *iscsi, int status,
void *command_data, void *opaque)
{
IscsiAIOCB *acb = opaque;
if (acb->canceled != 0) {
qemu_aio_release(acb);
scsi_free_scsi_task(acb->task);
acb->task = NULL;
return;
}
acb->status = 0;
if (status < 0) {
error_report("Failed to sync10 data on iSCSI lun. %s",
iscsi_get_error(iscsi));
acb->status = -EIO;
}
iscsi_schedule_bh(iscsi_readv_writev_bh_cb, acb);
scsi_free_scsi_task(acb->task);
acb->task = NULL;
}
static BlockDriverAIOCB *
iscsi_aio_flush(BlockDriverState *bs,
BlockDriverCompletionFunc *cb, void *opaque)
{
IscsiLun *iscsilun = bs->opaque;
struct iscsi_context *iscsi = iscsilun->iscsi;
IscsiAIOCB *acb;
acb = qemu_aio_get(&iscsi_aio_pool, bs, cb, opaque);
acb->iscsilun = iscsilun;
acb->canceled = 0;
acb->task = iscsi_synchronizecache10_task(iscsi, iscsilun->lun,
0, 0, 0, 0,
iscsi_synccache10_cb,
acb);
if (acb->task == NULL) {
error_report("iSCSI: Failed to send synchronizecache10 command. %s",
iscsi_get_error(iscsi));
qemu_aio_release(acb);
return NULL;
}
iscsi_set_events(iscsilun);
return &acb->common;
}
static void
iscsi_unmap_cb(struct iscsi_context *iscsi, int status,
void *command_data, void *opaque)
{
IscsiAIOCB *acb = opaque;
if (acb->canceled != 0) {
qemu_aio_release(acb);
scsi_free_scsi_task(acb->task);
acb->task = NULL;
return;
}
acb->status = 0;
if (status < 0) {
error_report("Failed to unmap data on iSCSI lun. %s",
iscsi_get_error(iscsi));
acb->status = -EIO;
}
iscsi_schedule_bh(iscsi_readv_writev_bh_cb, acb);
scsi_free_scsi_task(acb->task);
acb->task = NULL;
}
static BlockDriverAIOCB *
iscsi_aio_discard(BlockDriverState *bs,
int64_t sector_num, int nb_sectors,
BlockDriverCompletionFunc *cb, void *opaque)
{
IscsiLun *iscsilun = bs->opaque;
struct iscsi_context *iscsi = iscsilun->iscsi;
IscsiAIOCB *acb;
struct unmap_list list[1];
acb = qemu_aio_get(&iscsi_aio_pool, bs, cb, opaque);
acb->iscsilun = iscsilun;
acb->canceled = 0;
list[0].lba = sector_qemu2lun(sector_num, iscsilun);
list[0].num = nb_sectors * BDRV_SECTOR_SIZE / iscsilun->block_size;
acb->task = iscsi_unmap_task(iscsi, iscsilun->lun,
0, 0, &list[0], 1,
iscsi_unmap_cb,
acb);
if (acb->task == NULL) {
error_report("iSCSI: Failed to send unmap command. %s",
iscsi_get_error(iscsi));
qemu_aio_release(acb);
return NULL;
}
iscsi_set_events(iscsilun);
return &acb->common;
}
static int64_t
iscsi_getlength(BlockDriverState *bs)
{
IscsiLun *iscsilun = bs->opaque;
int64_t len;
len = iscsilun->num_blocks;
len *= iscsilun->block_size;
return len;
}
static void
iscsi_readcapacity16_cb(struct iscsi_context *iscsi, int status,
void *command_data, void *opaque)
{
struct IscsiTask *itask = opaque;
struct scsi_readcapacity16 *rc16;
struct scsi_task *task = command_data;
if (status != 0) {
error_report("iSCSI: Failed to read capacity of iSCSI lun. %s",
iscsi_get_error(iscsi));
itask->status = 1;
itask->complete = 1;
scsi_free_scsi_task(task);
return;
}
rc16 = scsi_datain_unmarshall(task);
if (rc16 == NULL) {
error_report("iSCSI: Failed to unmarshall readcapacity16 data.");
itask->status = 1;
itask->complete = 1;
scsi_free_scsi_task(task);
return;
}
itask->iscsilun->block_size = rc16->block_length;
itask->iscsilun->num_blocks = rc16->returned_lba + 1;
itask->bs->total_sectors = itask->iscsilun->num_blocks *
itask->iscsilun->block_size / BDRV_SECTOR_SIZE ;
itask->status = 0;
itask->complete = 1;
scsi_free_scsi_task(task);
}
static void
iscsi_connect_cb(struct iscsi_context *iscsi, int status, void *command_data,
void *opaque)
{
struct IscsiTask *itask = opaque;
struct scsi_task *task;
if (status != 0) {
itask->status = 1;
itask->complete = 1;
return;
}
task = iscsi_readcapacity16_task(iscsi, itask->iscsilun->lun,
iscsi_readcapacity16_cb, opaque);
if (task == NULL) {
error_report("iSCSI: failed to send readcapacity16 command.");
itask->status = 1;
itask->complete = 1;
return;
}
}
static int parse_chap(struct iscsi_context *iscsi, const char *target)
{
QemuOptsList *list;
QemuOpts *opts;
const char *user = NULL;
const char *password = NULL;
list = qemu_find_opts("iscsi");
if (!list) {
return 0;
}
opts = qemu_opts_find(list, target);
if (opts == NULL) {
opts = QTAILQ_FIRST(&list->head);
if (!opts) {
return 0;
}
}
user = qemu_opt_get(opts, "user");
if (!user) {
return 0;
}
password = qemu_opt_get(opts, "password");
if (!password) {
error_report("CHAP username specified but no password was given");
return -1;
}
if (iscsi_set_initiator_username_pwd(iscsi, user, password)) {
error_report("Failed to set initiator username and password");
return -1;
}
return 0;
}
static void parse_header_digest(struct iscsi_context *iscsi, const char *target)
{
QemuOptsList *list;
QemuOpts *opts;
const char *digest = NULL;
list = qemu_find_opts("iscsi");
if (!list) {
return;
}
opts = qemu_opts_find(list, target);
if (opts == NULL) {
opts = QTAILQ_FIRST(&list->head);
if (!opts) {
return;
}
}
digest = qemu_opt_get(opts, "header-digest");
if (!digest) {
return;
}
if (!strcmp(digest, "CRC32C")) {
iscsi_set_header_digest(iscsi, ISCSI_HEADER_DIGEST_CRC32C);
} else if (!strcmp(digest, "NONE")) {
iscsi_set_header_digest(iscsi, ISCSI_HEADER_DIGEST_NONE);
} else if (!strcmp(digest, "CRC32C-NONE")) {
iscsi_set_header_digest(iscsi, ISCSI_HEADER_DIGEST_CRC32C_NONE);
} else if (!strcmp(digest, "NONE-CRC32C")) {
iscsi_set_header_digest(iscsi, ISCSI_HEADER_DIGEST_NONE_CRC32C);
} else {
error_report("Invalid header-digest setting : %s", digest);
}
}
static char *parse_initiator_name(const char *target)
{
QemuOptsList *list;
QemuOpts *opts;
const char *name = NULL;
list = qemu_find_opts("iscsi");
if (!list) {
return g_strdup("iqn.2008-11.org.linux-kvm");
}
opts = qemu_opts_find(list, target);
if (opts == NULL) {
opts = QTAILQ_FIRST(&list->head);
if (!opts) {
return g_strdup("iqn.2008-11.org.linux-kvm");
}
}
name = qemu_opt_get(opts, "initiator-name");
if (!name) {
return g_strdup("iqn.2008-11.org.linux-kvm");
}
return g_strdup(name);
}
/*
* We support iscsi url's on the form
* iscsi://[<username>%<password>@]<host>[:<port>]/<targetname>/<lun>
*/
static int iscsi_open(BlockDriverState *bs, const char *filename, int flags)
{
IscsiLun *iscsilun = bs->opaque;
struct iscsi_context *iscsi = NULL;
struct iscsi_url *iscsi_url = NULL;
struct IscsiTask task;
char *initiator_name = NULL;
int ret;
if ((BDRV_SECTOR_SIZE % 512) != 0) {
error_report("iSCSI: Invalid BDRV_SECTOR_SIZE. "
"BDRV_SECTOR_SIZE(%lld) is not a multiple "
"of 512", BDRV_SECTOR_SIZE);
return -EINVAL;
}
iscsi_url = iscsi_parse_full_url(iscsi, filename);
if (iscsi_url == NULL) {
error_report("Failed to parse URL : %s %s", filename,
iscsi_get_error(iscsi));
ret = -EINVAL;
goto failed;
}
memset(iscsilun, 0, sizeof(IscsiLun));
initiator_name = parse_initiator_name(iscsi_url->target);
iscsi = iscsi_create_context(initiator_name);
if (iscsi == NULL) {
error_report("iSCSI: Failed to create iSCSI context.");
ret = -ENOMEM;
goto failed;
}
if (iscsi_set_targetname(iscsi, iscsi_url->target)) {
error_report("iSCSI: Failed to set target name.");
ret = -EINVAL;
goto failed;
}
if (iscsi_url->user != NULL) {
ret = iscsi_set_initiator_username_pwd(iscsi, iscsi_url->user,
iscsi_url->passwd);
if (ret != 0) {
error_report("Failed to set initiator username and password");
ret = -EINVAL;
goto failed;
}
}
/* check if we got CHAP username/password via the options */
if (parse_chap(iscsi, iscsi_url->target) != 0) {
error_report("iSCSI: Failed to set CHAP user/password");
ret = -EINVAL;
goto failed;
}
if (iscsi_set_session_type(iscsi, ISCSI_SESSION_NORMAL) != 0) {
error_report("iSCSI: Failed to set session type to normal.");
ret = -EINVAL;
goto failed;
}
iscsi_set_header_digest(iscsi, ISCSI_HEADER_DIGEST_NONE_CRC32C);
/* check if we got HEADER_DIGEST via the options */
parse_header_digest(iscsi, iscsi_url->target);
task.iscsilun = iscsilun;
task.status = 0;
task.complete = 0;
task.bs = bs;
iscsilun->iscsi = iscsi;
iscsilun->lun = iscsi_url->lun;
if (iscsi_full_connect_async(iscsi, iscsi_url->portal, iscsi_url->lun,
iscsi_connect_cb, &task)
!= 0) {
error_report("iSCSI: Failed to start async connect.");
ret = -EINVAL;
goto failed;
}
while (!task.complete) {
iscsi_set_events(iscsilun);
qemu_aio_wait();
}
if (task.status != 0) {
error_report("iSCSI: Failed to connect to LUN : %s",
iscsi_get_error(iscsi));
ret = -EINVAL;
goto failed;
}
if (iscsi_url != NULL) {
iscsi_destroy_url(iscsi_url);
}
return 0;
failed:
if (initiator_name != NULL) {
g_free(initiator_name);
}
if (iscsi_url != NULL) {
iscsi_destroy_url(iscsi_url);
}
if (iscsi != NULL) {
iscsi_destroy_context(iscsi);
}
memset(iscsilun, 0, sizeof(IscsiLun));
return ret;
}
static void iscsi_close(BlockDriverState *bs)
{
IscsiLun *iscsilun = bs->opaque;
struct iscsi_context *iscsi = iscsilun->iscsi;
qemu_aio_set_fd_handler(iscsi_get_fd(iscsi), NULL, NULL, NULL, NULL);
iscsi_destroy_context(iscsi);
memset(iscsilun, 0, sizeof(IscsiLun));
}
static BlockDriver bdrv_iscsi = {
.format_name = "iscsi",
.protocol_name = "iscsi",
.instance_size = sizeof(IscsiLun),
.bdrv_file_open = iscsi_open,
.bdrv_close = iscsi_close,
.bdrv_getlength = iscsi_getlength,
.bdrv_aio_readv = iscsi_aio_readv,
.bdrv_aio_writev = iscsi_aio_writev,
.bdrv_aio_flush = iscsi_aio_flush,
.bdrv_aio_discard = iscsi_aio_discard,
};
static void iscsi_block_init(void)
{
bdrv_register(&bdrv_iscsi);
}
block_init(iscsi_block_init);