@ -323,7 +323,7 @@ struct vfio_region_info_cap_type {
# define VFIO_REGION_TYPE_PCI_VENDOR_MASK (0xffff)
# define VFIO_REGION_TYPE_GFX (1)
# define VFIO_REGION_TYPE_CCW (2)
# define VFIO_REGION_TYPE_MIGRATION (3)
# define VFIO_REGION_TYPE_MIGRATION_DEPRECATED (3)
/* sub-types for VFIO_REGION_TYPE_PCI_* */
@ -405,225 +405,29 @@ struct vfio_region_gfx_edid {
# define VFIO_REGION_SUBTYPE_CCW_CRW (3)
/* sub-types for VFIO_REGION_TYPE_MIGRATION */
# define VFIO_REGION_SUBTYPE_MIGRATION (1)
/*
* The structure vfio_device_migration_info is placed at the 0 th offset of
* the VFIO_REGION_SUBTYPE_MIGRATION region to get and set VFIO device related
* migration information . Field accesses from this structure are only supported
* at their native width and alignment . Otherwise , the result is undefined and
* vendor drivers should return an error .
*
* device_state : ( read / write )
* - The user application writes to this field to inform the vendor driver
* about the device state to be transitioned to .
* - The vendor driver should take the necessary actions to change the
* device state . After successful transition to a given state , the
* vendor driver should return success on write ( device_state , state )
* system call . If the device state transition fails , the vendor driver
* should return an appropriate - errno for the fault condition .
* - On the user application side , if the device state transition fails ,
* that is , if write ( device_state , state ) returns an error , read
* device_state again to determine the current state of the device from
* the vendor driver .
* - The vendor driver should return previous state of the device unless
* the vendor driver has encountered an internal error , in which case
* the vendor driver may report the device_state VFIO_DEVICE_STATE_ERROR .
* - The user application must use the device reset ioctl to recover the
* device from VFIO_DEVICE_STATE_ERROR state . If the device is
* indicated to be in a valid device state by reading device_state , the
* user application may attempt to transition the device to any valid
* state reachable from the current state or terminate itself .
*
* device_state consists of 3 bits :
* - If bit 0 is set , it indicates the _RUNNING state . If bit 0 is clear ,
* it indicates the _STOP state . When the device state is changed to
* _STOP , driver should stop the device before write ( ) returns .
* - If bit 1 is set , it indicates the _SAVING state , which means that the
* driver should start gathering device state information that will be
* provided to the VFIO user application to save the device ' s state .
* - If bit 2 is set , it indicates the _RESUMING state , which means that
* the driver should prepare to resume the device . Data provided through
* the migration region should be used to resume the device .
* Bits 3 - 31 are reserved for future use . To preserve them , the user
* application should perform a read - modify - write operation on this
* field when modifying the specified bits .
*
* + - - - - - - - _RESUMING
* | + - - - - - - _SAVING
* | | + - - - - - _RUNNING
* | | |
* 000 b = > Device Stopped , not saving or resuming
* 001 b = > Device running , which is the default state
* 010 b = > Stop the device & save the device state , stop - and - copy state
* 011 b = > Device running and save the device state , pre - copy state
* 100 b = > Device stopped and the device state is resuming
* 101 b = > Invalid state
* 110 b = > Error state
* 111 b = > Invalid state
*
* State transitions :
*
* _RESUMING _RUNNING Pre - copy Stop - and - copy _STOP
* ( 100 b ) ( 001 b ) ( 011 b ) ( 010 b ) ( 000 b )
* 0. Running or default state
* |
*
* 1. Normal Shutdown ( optional )
* | - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - > |
*
* 2. Save the state or suspend
* | - - - - - - - - - - - - - - - - - - - - - - - - - > | - - - - - - - - - - > |
*
* 3. Save the state during live migration
* | - - - - - - - - - - - > | - - - - - - - - - - - - > | - - - - - - - - - - > |
*
* 4. Resuming
* | < - - - - - - - - - |
*
* 5. Resumed
* | - - - - - - - - - > |
*
* 0. Default state of VFIO device is _RUNNING when the user application starts .
* 1. During normal shutdown of the user application , the user application may
* optionally change the VFIO device state from _RUNNING to _STOP . This
* transition is optional . The vendor driver must support this transition but
* must not require it .
* 2. When the user application saves state or suspends the application , the
* device state transitions from _RUNNING to stop - and - copy and then to _STOP .
* On state transition from _RUNNING to stop - and - copy , driver must stop the
* device , save the device state and send it to the application through the
* migration region . The sequence to be followed for such transition is given
* below .
* 3. In live migration of user application , the state transitions from _RUNNING
* to pre - copy , to stop - and - copy , and to _STOP .
* On state transition from _RUNNING to pre - copy , the driver should start
* gathering the device state while the application is still running and send
* the device state data to application through the migration region .
* On state transition from pre - copy to stop - and - copy , the driver must stop
* the device , save the device state and send it to the user application
* through the migration region .
* Vendor drivers must support the pre - copy state even for implementations
* where no data is provided to the user before the stop - and - copy state . The
* user must not be required to consume all migration data before the device
* transitions to a new state , including the stop - and - copy state .
* The sequence to be followed for above two transitions is given below .
* 4. To start the resuming phase , the device state should be transitioned from
* the _RUNNING to the _RESUMING state .
* In the _RESUMING state , the driver should use the device state data
* received through the migration region to resume the device .
* 5. After providing saved device data to the driver , the application should
* change the state from _RESUMING to _RUNNING .
*
* reserved :
* Reads on this field return zero and writes are ignored .
*
* pending_bytes : ( read only )
* The number of pending bytes still to be migrated from the vendor driver .
*
* data_offset : ( read only )
* The user application should read data_offset field from the migration
* region . The user application should read the device data from this
* offset within the migration region during the _SAVING state or write
* the device data during the _RESUMING state . See below for details of
* sequence to be followed .
*
* data_size : ( read / write )
* The user application should read data_size to get the size in bytes of
* the data copied in the migration region during the _SAVING state and
* write the size in bytes of the data copied in the migration region
* during the _RESUMING state .
*
* The format of the migration region is as follows :
* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
* | vfio_device_migration_info | data section |
* | | /////////////////////////////// |
* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
* ^ ^
* offset 0 - trapped part data_offset
*
* The structure vfio_device_migration_info is always followed by the data
* section in the region , so data_offset will always be nonzero . The offset
* from where the data is copied is decided by the kernel driver . The data
* section can be trapped , mmapped , or partitioned , depending on how the kernel
* driver defines the data section . The data section partition can be defined
* as mapped by the sparse mmap capability . If mmapped , data_offset must be
* page aligned , whereas initial section which contains the
* vfio_device_migration_info structure , might not end at the offset , which is
* page aligned . The user is not required to access through mmap regardless
* of the capabilities of the region mmap .
* The vendor driver should determine whether and how to partition the data
* section . The vendor driver should return data_offset accordingly .
*
* The sequence to be followed while in pre - copy state and stop - and - copy state
* is as follows :
* a . Read pending_bytes , indicating the start of a new iteration to get device
* data . Repeated read on pending_bytes at this stage should have no side
* effects .
* If pending_bytes = = 0 , the user application should not iterate to get data
* for that device .
* If pending_bytes > 0 , perform the following steps .
* b . Read data_offset , indicating that the vendor driver should make data
* available through the data section . The vendor driver should return this
* read operation only after data is available from ( region + data_offset )
* to ( region + data_offset + data_size ) .
* c . Read data_size , which is the amount of data in bytes available through
* the migration region .
* Read on data_offset and data_size should return the offset and size of
* the current buffer if the user application reads data_offset and
* data_size more than once here .
* d . Read data_size bytes of data from ( region + data_offset ) from the
* migration region .
* e . Process the data .
* f . Read pending_bytes , which indicates that the data from the previous
* iteration has been read . If pending_bytes > 0 , go to step b .
*
* The user application can transition from the _SAVING | _RUNNING
* ( pre - copy state ) to the _SAVING ( stop - and - copy ) state regardless of the
* number of pending bytes . The user application should iterate in _SAVING
* ( stop - and - copy ) until pending_bytes is 0.
*
* The sequence to be followed while _RESUMING device state is as follows :
* While data for this device is available , repeat the following steps :
* a . Read data_offset from where the user application should write data .
* b . Write migration data starting at the migration region + data_offset for
* the length determined by data_size from the migration source .
* c . Write data_size , which indicates to the vendor driver that data is
* written in the migration region . Vendor driver must return this write
* operations on consuming data . Vendor driver should apply the
* user - provided migration region data to the device resume state .
*
* If an error occurs during the above sequences , the vendor driver can return
* an error code for next read ( ) or write ( ) operation , which will terminate the
* loop . The user application should then take the next necessary action , for
* example , failing migration or terminating the user application .
*
* For the user application , data is opaque . The user application should write
* data in the same order as the data is received and the data should be of
* same transaction size at the source .
*/
# define VFIO_REGION_SUBTYPE_MIGRATION_DEPRECATED (1)
struct vfio_device_migration_info {
__u32 device_state ; /* VFIO device state */
# define VFIO_DEVICE_STATE_STOP (0)
# define VFIO_DEVICE_STATE_RUNNING (1 << 0)
# define VFIO_DEVICE_STATE_SAVING (1 << 1)
# define VFIO_DEVICE_STATE_RESUMING (1 << 2)
# define VFIO_DEVICE_STATE_MASK (VFIO_DEVICE_STATE_RUNNING | \
VFIO_DEVICE_STATE_SAVING | \
VFIO_DEVICE_STATE_RESUMING )
# define VFIO_DEVICE_STATE_V1_STOP (0)
# define VFIO_DEVICE_STATE_V1_RUNNING (1 << 0)
# define VFIO_DEVICE_STATE_V1_SAVING (1 << 1)
# define VFIO_DEVICE_STATE_V1_RESUMING (1 << 2)
# define VFIO_DEVICE_STATE_MASK (VFIO_DEVICE_STATE_V1_RUNNING | \
VFIO_DEVICE_STATE_V1_SAVING | \
VFIO_DEVICE_STATE_V1_RESUMING )
# define VFIO_DEVICE_STATE_VALID(state) \
( state & VFIO_DEVICE_STATE_RESUMING ? \
( state & VFIO_DEVICE_STATE_MASK ) = = VFIO_DEVICE_STATE_RESUMING : 1 )
( state & VFIO_DEVICE_STATE_V1_RESUMING ? \
( state & VFIO_DEVICE_STATE_MASK ) = = VFIO_DEVICE_STATE_V1_RESUMING : 1 )
# define VFIO_DEVICE_STATE_IS_ERROR(state) \
( ( state & VFIO_DEVICE_STATE_MASK ) = = ( VFIO_DEVICE_STATE_SAVING | \
VFIO_DEVICE_STATE_RESUMING ) )
( ( state & VFIO_DEVICE_STATE_MASK ) = = ( VFIO_DEVICE_STATE_V1_SAVING | \
VFIO_DEVICE_STATE_V1_RESUMING ) )
# define VFIO_DEVICE_STATE_SET_ERROR(state) \
( ( state & ~ VFIO_DEVICE_STATE_MASK ) | VFIO_DEVICE_SATE_SAVING | \
VFIO_DEVICE_STATE_RESUMING )
( ( state & ~ VFIO_DEVICE_STATE_MASK ) | VFIO_DEVICE_STATE_V1_SAVING | \
VFIO_DEVICE_STATE_V1_RESUMING )
__u32 reserved ;
__u64 pending_bytes ;
@ -1002,6 +806,186 @@ struct vfio_device_feature {
*/
# define VFIO_DEVICE_FEATURE_PCI_VF_TOKEN (0)
/*
* Indicates the device can support the migration API through
* VFIO_DEVICE_FEATURE_MIG_DEVICE_STATE . If this GET succeeds , the RUNNING and
* ERROR states are always supported . Support for additional states is
* indicated via the flags field ; at least VFIO_MIGRATION_STOP_COPY must be
* set .
*
* VFIO_MIGRATION_STOP_COPY means that STOP , STOP_COPY and
* RESUMING are supported .
*
* VFIO_MIGRATION_STOP_COPY | VFIO_MIGRATION_P2P means that RUNNING_P2P
* is supported in addition to the STOP_COPY states .
*
* Other combinations of flags have behavior to be defined in the future .
*/
struct vfio_device_feature_migration {
__aligned_u64 flags ;
# define VFIO_MIGRATION_STOP_COPY (1 << 0)
# define VFIO_MIGRATION_P2P (1 << 1)
} ;
# define VFIO_DEVICE_FEATURE_MIGRATION 1
/*
* Upon VFIO_DEVICE_FEATURE_SET , execute a migration state change on the VFIO
* device . The new state is supplied in device_state , see enum
* vfio_device_mig_state for details
*
* The kernel migration driver must fully transition the device to the new state
* value before the operation returns to the user .
*
* The kernel migration driver must not generate asynchronous device state
* transitions outside of manipulation by the user or the VFIO_DEVICE_RESET
* ioctl as described above .
*
* If this function fails then current device_state may be the original
* operating state or some other state along the combination transition path .
* The user can then decide if it should execute a VFIO_DEVICE_RESET , attempt
* to return to the original state , or attempt to return to some other state
* such as RUNNING or STOP .
*
* If the new_state starts a new data transfer session then the FD associated
* with that session is returned in data_fd . The user is responsible to close
* this FD when it is finished . The user must consider the migration data stream
* carried over the FD to be opaque and must preserve the byte order of the
* stream . The user is not required to preserve buffer segmentation when writing
* the data stream during the RESUMING operation .
*
* Upon VFIO_DEVICE_FEATURE_GET , get the current migration state of the VFIO
* device , data_fd will be - 1.
*/
struct vfio_device_feature_mig_state {
__u32 device_state ; /* From enum vfio_device_mig_state */
__s32 data_fd ;
} ;
# define VFIO_DEVICE_FEATURE_MIG_DEVICE_STATE 2
/*
* The device migration Finite State Machine is described by the enum
* vfio_device_mig_state . Some of the FSM arcs will create a migration data
* transfer session by returning a FD , in this case the migration data will
* flow over the FD using read ( ) and write ( ) as discussed below .
*
* There are 5 states to support VFIO_MIGRATION_STOP_COPY :
* RUNNING - The device is running normally
* STOP - The device does not change the internal or external state
* STOP_COPY - The device internal state can be read out
* RESUMING - The device is stopped and is loading a new internal state
* ERROR - The device has failed and must be reset
*
* And 1 optional state to support VFIO_MIGRATION_P2P :
* RUNNING_P2P - RUNNING , except the device cannot do peer to peer DMA
*
* The FSM takes actions on the arcs between FSM states . The driver implements
* the following behavior for the FSM arcs :
*
* RUNNING_P2P - > STOP
* STOP_COPY - > STOP
* While in STOP the device must stop the operation of the device . The device
* must not generate interrupts , DMA , or any other change to external state .
* It must not change its internal state . When stopped the device and kernel
* migration driver must accept and respond to interaction to support external
* subsystems in the STOP state , for example PCI MSI - X and PCI config space .
* Failure by the user to restrict device access while in STOP must not result
* in error conditions outside the user context ( ex . host system faults ) .
*
* The STOP_COPY arc will terminate a data transfer session .
*
* RESUMING - > STOP
* Leaving RESUMING terminates a data transfer session and indicates the
* device should complete processing of the data delivered by write ( ) . The
* kernel migration driver should complete the incorporation of data written
* to the data transfer FD into the device internal state and perform
* final validity and consistency checking of the new device state . If the
* user provided data is found to be incomplete , inconsistent , or otherwise
* invalid , the migration driver must fail the SET_STATE ioctl and
* optionally go to the ERROR state as described below .
*
* While in STOP the device has the same behavior as other STOP states
* described above .
*
* To abort a RESUMING session the device must be reset .
*
* RUNNING_P2P - > RUNNING
* While in RUNNING the device is fully operational , the device may generate
* interrupts , DMA , respond to MMIO , all vfio device regions are functional ,
* and the device may advance its internal state .
*
* RUNNING - > RUNNING_P2P
* STOP - > RUNNING_P2P
* While in RUNNING_P2P the device is partially running in the P2P quiescent
* state defined below .
*
* STOP - > STOP_COPY
* This arc begin the process of saving the device state and will return a
* new data_fd .
*
* While in the STOP_COPY state the device has the same behavior as STOP
* with the addition that the data transfers session continues to stream the
* migration state . End of stream on the FD indicates the entire device
* state has been transferred .
*
* The user should take steps to restrict access to vfio device regions while
* the device is in STOP_COPY or risk corruption of the device migration data
* stream .
*
* STOP - > RESUMING
* Entering the RESUMING state starts a process of restoring the device state
* and will return a new data_fd . The data stream fed into the data_fd should
* be taken from the data transfer output of a single FD during saving from
* a compatible device . The migration driver may alter / reset the internal
* device state for this arc if required to prepare the device to receive the
* migration data .
*
* any - > ERROR
* ERROR cannot be specified as a device state , however any transition request
* can be failed with an errno return and may then move the device_state into
* ERROR . In this case the device was unable to execute the requested arc and
* was also unable to restore the device to any valid device_state .
* To recover from ERROR VFIO_DEVICE_RESET must be used to return the
* device_state back to RUNNING .
*
* The optional peer to peer ( P2P ) quiescent state is intended to be a quiescent
* state for the device for the purposes of managing multiple devices within a
* user context where peer - to - peer DMA between devices may be active . The
* RUNNING_P2P states must prevent the device from initiating
* any new P2P DMA transactions . If the device can identify P2P transactions
* then it can stop only P2P DMA , otherwise it must stop all DMA . The migration
* driver must complete any such outstanding operations prior to completing the
* FSM arc into a P2P state . For the purpose of specification the states
* behave as though the device was fully running if not supported . Like while in
* STOP or STOP_COPY the user must not touch the device , otherwise the state
* can be exited .
*
* The remaining possible transitions are interpreted as combinations of the
* above FSM arcs . As there are multiple paths through the FSM arcs the path
* should be selected based on the following rules :
* - Select the shortest path .
* Refer to vfio_mig_get_next_state ( ) for the result of the algorithm .
*
* The automatic transit through the FSM arcs that make up the combination
* transition is invisible to the user . When working with combination arcs the
* user may see any step along the path in the device_state if SET_STATE
* fails . When handling these types of errors users should anticipate future
* revisions of this protocol using new states and those states becoming
* visible in this case .
*
* The optional states cannot be used with SET_STATE if the device does not
* support them . The user can discover if these states are supported by using
* VFIO_DEVICE_FEATURE_MIGRATION . By using combination transitions the user can
* avoid knowing about these optional states if the kernel driver supports them .
*/
enum vfio_device_mig_state {
VFIO_DEVICE_STATE_ERROR = 0 ,
VFIO_DEVICE_STATE_STOP = 1 ,
VFIO_DEVICE_STATE_RUNNING = 2 ,
VFIO_DEVICE_STATE_STOP_COPY = 3 ,
VFIO_DEVICE_STATE_RESUMING = 4 ,
VFIO_DEVICE_STATE_RUNNING_P2P = 5 ,
} ;
/* -------- API for Type1 VFIO IOMMU -------- */
/**