Max Reitz d9efe9384e iotests: Fix intermittent failure in 219
In 219, we wait for the job to make progress before we emit its status.
This makes the output reliable.  We do not wait for any more progress if
the job's current-progress already matches its total-progress.

Unfortunately, there is a bug: Right after the job has been started,
it's possible that total-progress is still 0.  In that case, we may skip
the first progress-making step and keep ending up 64 kB short.

To fix that bug, we can simply wait for total-progress to reach 4 MB
(the image size) after starting the job.

Reported-by: Karen Mezick <kmezick@redhat.com>
Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=1686651
Signed-off-by: Max Reitz <mreitz@redhat.com>
Message-id: 20190516161114.27596-1-mreitz@redhat.com
Reviewed-by: John Snow <jsnow@redhat.com>
[mreitz: Adjusted commit message as per John's proposal]
Signed-off-by: Max Reitz <mreitz@redhat.com>
2019-06-14 14:16:57 +02:00

231 lines
8.8 KiB
Python
Executable File

#!/usr/bin/env python
#
# Copyright (C) 2018 Red Hat, Inc.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
# Creator/Owner: Kevin Wolf <kwolf@redhat.com>
#
# Check using the job-* QMP commands with block jobs
import iotests
iotests.verify_image_format(supported_fmts=['qcow2'])
img_size = 4 * 1024 * 1024
def pause_wait(vm, job_id):
with iotests.Timeout(3, "Timeout waiting for job to pause"):
while True:
result = vm.qmp('query-jobs')
for job in result['return']:
if job['id'] == job_id and job['status'] in ['paused', 'standby']:
return job
# Test that block-job-pause/resume and job-pause/resume can be mixed
def test_pause_resume(vm):
for pause_cmd, pause_arg in [('block-job-pause', 'device'),
('job-pause', 'id')]:
for resume_cmd, resume_arg in [('block-job-resume', 'device'),
('job-resume', 'id')]:
iotests.log('=== Testing %s/%s ===' % (pause_cmd, resume_cmd))
iotests.log(vm.qmp(pause_cmd, **{pause_arg: 'job0'}))
pause_wait(vm, 'job0')
iotests.log(iotests.filter_qmp_event(vm.event_wait('JOB_STATUS_CHANGE')))
result = vm.qmp('query-jobs')
iotests.log(result)
old_progress = result['return'][0]['current-progress']
total_progress = result['return'][0]['total-progress']
iotests.log(vm.qmp(resume_cmd, **{resume_arg: 'job0'}))
iotests.log(iotests.filter_qmp_event(vm.event_wait('JOB_STATUS_CHANGE')))
if old_progress < total_progress:
# Wait for the job to advance
while result['return'][0]['current-progress'] == old_progress:
result = vm.qmp('query-jobs')
iotests.log(result)
else:
# Already reached the end, so the job cannot advance
# any further; therefore, the query-jobs result can be
# logged immediately
iotests.log(vm.qmp('query-jobs'))
def test_job_lifecycle(vm, job, job_args, has_ready=False):
global img_size
iotests.log('')
iotests.log('')
iotests.log('Starting block job: %s (auto-finalize: %s; auto-dismiss: %s)' %
(job,
job_args.get('auto-finalize', True),
job_args.get('auto-dismiss', True)))
iotests.log(vm.qmp(job, job_id='job0', **job_args))
# Depending on the storage, the first request may or may not have completed
# yet (and the total progress may not have been fully determined yet), so
# filter out the progress. Later query-job calls don't need the filtering
# because the progress is made deterministic by the block job speed
result = vm.qmp('query-jobs')
for j in result['return']:
j['current-progress'] = 'FILTERED'
j['total-progress'] = 'FILTERED'
iotests.log(result)
# undefined -> created -> running
iotests.log(iotests.filter_qmp_event(vm.event_wait('JOB_STATUS_CHANGE')))
iotests.log(iotests.filter_qmp_event(vm.event_wait('JOB_STATUS_CHANGE')))
# Wait for total-progress to stabilize
while vm.qmp('query-jobs')['return'][0]['total-progress'] < img_size:
pass
# RUNNING state:
# pause/resume should work, complete/finalize/dismiss should error out
iotests.log('')
iotests.log('Pause/resume in RUNNING')
test_pause_resume(vm)
iotests.log(vm.qmp('job-complete', id='job0'))
iotests.log(vm.qmp('job-finalize', id='job0'))
iotests.log(vm.qmp('job-dismiss', id='job0'))
iotests.log(vm.qmp('block-job-complete', device='job0'))
iotests.log(vm.qmp('block-job-finalize', id='job0'))
iotests.log(vm.qmp('block-job-dismiss', id='job0'))
# Let the job complete (or transition to READY if it supports that)
iotests.log(vm.qmp('block-job-set-speed', device='job0', speed=0))
if has_ready:
iotests.log('')
iotests.log('Waiting for READY state...')
vm.event_wait('BLOCK_JOB_READY')
iotests.log(iotests.filter_qmp_event(vm.event_wait('JOB_STATUS_CHANGE')))
iotests.log(vm.qmp('query-jobs'))
# READY state:
# pause/resume/complete should work, finalize/dismiss should error out
iotests.log('')
iotests.log('Pause/resume in READY')
test_pause_resume(vm)
iotests.log(vm.qmp('job-finalize', id='job0'))
iotests.log(vm.qmp('job-dismiss', id='job0'))
iotests.log(vm.qmp('block-job-finalize', id='job0'))
iotests.log(vm.qmp('block-job-dismiss', id='job0'))
# Transition to WAITING
iotests.log(vm.qmp('job-complete', id='job0'))
# Move to WAITING and PENDING state
iotests.log('')
iotests.log('Waiting for PENDING state...')
iotests.log(iotests.filter_qmp_event(vm.event_wait('JOB_STATUS_CHANGE')))
iotests.log(iotests.filter_qmp_event(vm.event_wait('JOB_STATUS_CHANGE')))
if not job_args.get('auto-finalize', True):
# PENDING state:
# finalize should work, pause/complete/dismiss should error out
iotests.log(vm.qmp('query-jobs'))
iotests.log(vm.qmp('job-pause', id='job0'))
iotests.log(vm.qmp('job-complete', id='job0'))
iotests.log(vm.qmp('job-dismiss', id='job0'))
iotests.log(vm.qmp('block-job-pause', device='job0'))
iotests.log(vm.qmp('block-job-complete', device='job0'))
iotests.log(vm.qmp('block-job-dismiss', id='job0'))
# Transition to CONCLUDED
iotests.log(vm.qmp('job-finalize', id='job0'))
# Move to CONCLUDED state
iotests.log(iotests.filter_qmp_event(vm.event_wait('JOB_STATUS_CHANGE')))
if not job_args.get('auto-dismiss', True):
# CONCLUDED state:
# dismiss should work, pause/complete/finalize should error out
iotests.log(vm.qmp('query-jobs'))
iotests.log(vm.qmp('job-pause', id='job0'))
iotests.log(vm.qmp('job-complete', id='job0'))
iotests.log(vm.qmp('job-finalize', id='job0'))
iotests.log(vm.qmp('block-job-pause', device='job0'))
iotests.log(vm.qmp('block-job-complete', device='job0'))
iotests.log(vm.qmp('block-job-finalize', id='job0'))
# Transition to NULL
iotests.log(vm.qmp('job-dismiss', id='job0'))
# Move to NULL state
iotests.log(iotests.filter_qmp_event(vm.event_wait('JOB_STATUS_CHANGE')))
iotests.log(vm.qmp('query-jobs'))
with iotests.FilePath('disk.img') as disk_path, \
iotests.FilePath('copy.img') as copy_path, \
iotests.VM() as vm:
iotests.qemu_img_create('-f', iotests.imgfmt, disk_path, str(img_size))
iotests.qemu_io('-c', 'write 0 %i' % (img_size),
'-f', iotests.imgfmt, disk_path)
iotests.log('Launching VM...')
vm.add_blockdev(vm.qmp_to_opts({
'driver': iotests.imgfmt,
'node-name': 'drive0-node',
'file': {
'driver': 'file',
'filename': disk_path,
},
}))
vm.launch()
# In order to keep things deterministic (especially progress in query-job,
# but related to this also automatic state transitions like job
# completion), but still get pause points often enough to avoid making this
# test very slow, it's important to have the right ratio between speed and
# buf_size.
#
# For backup, buf_size is hard-coded to the source image cluster size (64k),
# so we'll pick the same for mirror. The slice time, i.e. the granularity
# of the rate limiting is 100ms. With a speed of 256k per second, we can
# get four pause points per second. This gives us 250ms per iteration,
# which should be enough to stay deterministic.
test_job_lifecycle(vm, 'drive-mirror', has_ready=True, job_args={
'device': 'drive0-node',
'target': copy_path,
'sync': 'full',
'speed': 262144,
'buf_size': 65536,
})
for auto_finalize in [True, False]:
for auto_dismiss in [True, False]:
test_job_lifecycle(vm, 'drive-backup', job_args={
'device': 'drive0-node',
'target': copy_path,
'sync': 'full',
'speed': 262144,
'auto-finalize': auto_finalize,
'auto-dismiss': auto_dismiss,
})
vm.shutdown()