| #LyX 2.0 created this file. For more info see http://www.lyx.org/ |
| \lyxformat 413 |
| \begin_document |
| \begin_header |
| \textclass report |
| \use_default_options false |
| \maintain_unincluded_children false |
| \language english |
| \language_package default |
| \inputencoding auto |
| \fontencoding global |
| \font_roman default |
| \font_sans default |
| \font_typewriter default |
| \font_default_family default |
| \use_non_tex_fonts false |
| \font_sc false |
| \font_osf false |
| \font_sf_scale 100 |
| \font_tt_scale 100 |
| |
| \graphics default |
| \default_output_format default |
| \output_sync 0 |
| \bibtex_command default |
| \index_command default |
| \paperfontsize default |
| \spacing single |
| \use_hyperref false |
| \papersize default |
| \use_geometry false |
| \use_amsmath 1 |
| \use_esint 1 |
| \use_mhchem 1 |
| \use_mathdots 1 |
| \cite_engine basic |
| \use_bibtopic false |
| \use_indices false |
| \paperorientation portrait |
| \suppress_date false |
| \use_refstyle 0 |
| \index Index |
| \shortcut idx |
| \color #008000 |
| \end_index |
| \secnumdepth 3 |
| \tocdepth 3 |
| \paragraph_separation skip |
| \defskip medskip |
| \quotes_language english |
| \papercolumns 1 |
| \papersides 1 |
| \paperpagestyle default |
| \tracking_changes true |
| \output_changes true |
| \html_math_output 0 |
| \html_css_as_file 0 |
| \html_be_strict false |
| \end_header |
| |
| \begin_body |
| |
| \begin_layout Title |
| Virtio PCI Card Specification |
| \begin_inset Newline newline |
| \end_inset |
| |
| v0.9.5 DRAFT |
| \begin_inset Newline newline |
| \end_inset |
| |
| - |
| \end_layout |
| |
| \begin_layout Author |
| Rusty Russell <rusty@rustcorp.com.au> IBM Corporation (Editor) |
| \end_layout |
| |
| \begin_layout Date |
| 2012 May 7. |
| \end_layout |
| |
| \begin_layout Chapter |
| Purpose and Description |
| \end_layout |
| |
| \begin_layout Standard |
| This document describes the specifications of the |
| \begin_inset Quotes eld |
| \end_inset |
| |
| virtio |
| \begin_inset Quotes erd |
| \end_inset |
| |
| family of |
| \emph on |
| PCI |
| \emph default |
| |
| \begin_inset CommandInset nomenclature |
| LatexCommand nomenclature |
| symbol "PCI" |
| description "Peripheral Component Interconnect; a common device bus. See\\\\http://en.wikipedia.org/wiki/Peripheral Component Interconnect" |
| |
| \end_inset |
| |
| devices. |
| These are devices are found in |
| \emph on |
| virtual |
| \emph default |
| |
| \emph on |
| environments |
| \begin_inset CommandInset nomenclature |
| LatexCommand nomenclature |
| symbol "virtualized" |
| description "Environments where access to hardware is restricted (and often emulated) by a hypervisor." |
| |
| \end_inset |
| |
| |
| \emph default |
| , yet by design they are not all that different from physical PCI devices, |
| and this document treats them as such. |
| This allows the guest to use standard PCI drivers and discovery mechanisms. |
| \end_layout |
| |
| \begin_layout Standard |
| The purpose of virtio and this specification is that virtual environments |
| and guests should have a straightforward, efficient, standard and extensible |
| mechanism for virtual devices, rather than boutique per-environment or |
| per-OS mechanisms. |
| \end_layout |
| |
| \begin_layout Description |
| Straightforward: Virtio PCI devices use normal PCI mechanisms of interrupts |
| and DMA which should be familiar to any device driver author. |
| There is no exotic page-flipping or COW mechanism: it's just a PCI device. |
| \begin_inset Foot |
| status open |
| |
| \begin_layout Plain Layout |
| This lack of page-sharing implies that the implementation of the device |
| (e.g. |
| the hypervisor or host) needs full access to the guest memory. |
| Communication with untrusted parties (i.e. |
| inter-guest communication) requires copying. |
| \end_layout |
| |
| \end_inset |
| |
| |
| \end_layout |
| |
| \begin_layout Description |
| Efficient: Virtio PCI devices consist of rings of descriptors for input |
| and output, which are neatly separated to avoid cache effects from both |
| guest and device writing to the same cache lines. |
| \end_layout |
| |
| \begin_layout Description |
| Standard: Virtio PCI makes no assumptions about the environment in which |
| it operates, beyond supporting PCI. |
| In fact the virtio devices specified in the appendices do not require PCI |
| at all: they have been implemented on non-PCI buses. |
| \begin_inset Foot |
| status open |
| |
| \begin_layout Plain Layout |
| The Linux implementation further separates the PCI virtio code from the |
| specific virtio drivers: these drivers are shared with the non-PCI implementati |
| ons (currently lguest and S/390). |
| \end_layout |
| |
| \end_inset |
| |
| |
| \end_layout |
| |
| \begin_layout Description |
| Extensible: Virtio PCI devices contain feature bits which are acknowledged |
| by the guest operating system during device setup. |
| This allows forwards and backwards compatibility: the device offers all |
| the features it knows about, and the driver acknowledges those it understands |
| and wishes to use. |
| \end_layout |
| |
| \begin_layout Section |
| Virtqueues |
| \end_layout |
| |
| \begin_layout Standard |
| The mechanism for bulk data transport on virtio PCI devices is pretentiously |
| called a virtqueue. |
| Each device can have zero or more virtqueues: for example, the network |
| device has one for transmit and one for receive. |
| \end_layout |
| |
| \begin_layout Standard |
| Each virtqueue occupies two or more physically-contiguous pages (defined, |
| for the purposes of this specification, as 4096 bytes), and consists of |
| three parts: |
| \end_layout |
| |
| \begin_layout Standard |
| \begin_inset Tabular |
| <lyxtabular version="3" rows="1" columns="4"> |
| <features tabularvalignment="middle"> |
| <column alignment="center" valignment="top" width="0"> |
| <column alignment="center" valignment="top" width="0"> |
| <column alignment="center" valignment="top" width="0"> |
| <column alignment="center" valignment="top" width="0"> |
| <row> |
| <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none"> |
| \begin_inset Text |
| |
| \begin_layout Plain Layout |
| Descriptor Table |
| \end_layout |
| |
| \end_inset |
| </cell> |
| <cell multicolumn="1" alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none"> |
| \begin_inset Text |
| |
| \begin_layout Plain Layout |
| Available Ring |
| \begin_inset space ~ |
| \end_inset |
| |
| |
| \begin_inset space ~ |
| \end_inset |
| |
| |
| \begin_inset space ~ |
| \end_inset |
| |
| |
| \begin_inset space ~ |
| \end_inset |
| |
| |
| \begin_inset space ~ |
| \end_inset |
| |
| |
| \emph on |
| (padding) |
| \end_layout |
| |
| \end_inset |
| </cell> |
| <cell multicolumn="2" alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none"> |
| \begin_inset Text |
| |
| \begin_layout Plain Layout |
| |
| \end_layout |
| |
| \end_inset |
| </cell> |
| <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" rightline="true" usebox="none"> |
| \begin_inset Text |
| |
| \begin_layout Plain Layout |
| Used Ring |
| \end_layout |
| |
| \end_inset |
| </cell> |
| </row> |
| </lyxtabular> |
| |
| \end_inset |
| |
| |
| \end_layout |
| |
| \begin_layout Standard |
| When the driver wants to send a buffer to the device, it fills in a slot |
| in the descriptor table (or chains several together), and writes the descriptor |
| index into the available ring. |
| It then notifies the device. |
| When the device has finished a buffer, it writes the descriptor into the |
| used ring, and sends an interrupt. |
| \end_layout |
| |
| \begin_layout Chapter |
| Specification |
| \end_layout |
| |
| \begin_layout Section |
| PCI Discovery |
| \end_layout |
| |
| \begin_layout Standard |
| Any PCI device with Vendor ID 0x1AF4, and Device ID 0x1000 through 0x103F |
| inclusive is a virtio device |
| \begin_inset Foot |
| status open |
| |
| \begin_layout Plain Layout |
| The actual value within this range is ignored |
| \end_layout |
| |
| \end_inset |
| |
| . |
| The device must also have a Revision ID of 0 to match this specification. |
| \end_layout |
| |
| \begin_layout Standard |
| The Subsystem Device ID indicates which virtio device is supported by the |
| device. |
| The Subsystem Vendor ID should reflect the PCI Vendor ID of the environment |
| (it's currently only used for informational purposes by the guest). |
| \end_layout |
| |
| \begin_layout Standard |
| \begin_inset Tabular |
| <lyxtabular version="3" rows="11" columns="3"> |
| <features tabularvalignment="bottom"> |
| <column alignment="center" valignment="top" width="0"> |
| <column alignment="center" valignment="top" width="0"> |
| <column alignment="center" valignment="bottom" width="0"> |
| <row> |
| <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none"> |
| \begin_inset Text |
| |
| \begin_layout Plain Layout |
| Subsystem Device ID |
| \end_layout |
| |
| \end_inset |
| </cell> |
| <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none"> |
| \begin_inset Text |
| |
| \begin_layout Plain Layout |
| Virtio Device |
| \end_layout |
| |
| \end_inset |
| </cell> |
| <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" rightline="true" usebox="none"> |
| \begin_inset Text |
| |
| \begin_layout Plain Layout |
| Specification |
| \end_layout |
| |
| \end_inset |
| </cell> |
| </row> |
| <row> |
| <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> |
| \begin_inset Text |
| |
| \begin_layout Plain Layout |
| 1 |
| \end_layout |
| |
| \end_inset |
| </cell> |
| <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> |
| \begin_inset Text |
| |
| \begin_layout Plain Layout |
| network card |
| \end_layout |
| |
| \end_inset |
| </cell> |
| <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> |
| \begin_inset Text |
| |
| \begin_layout Plain Layout |
| Appendix C |
| \end_layout |
| |
| \end_inset |
| </cell> |
| </row> |
| <row> |
| <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> |
| \begin_inset Text |
| |
| \begin_layout Plain Layout |
| 2 |
| \end_layout |
| |
| \end_inset |
| </cell> |
| <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> |
| \begin_inset Text |
| |
| \begin_layout Plain Layout |
| block device |
| \end_layout |
| |
| \end_inset |
| </cell> |
| <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> |
| \begin_inset Text |
| |
| \begin_layout Plain Layout |
| Appendix D |
| \end_layout |
| |
| \end_inset |
| </cell> |
| </row> |
| <row> |
| <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> |
| \begin_inset Text |
| |
| \begin_layout Plain Layout |
| 3 |
| \end_layout |
| |
| \end_inset |
| </cell> |
| <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> |
| \begin_inset Text |
| |
| \begin_layout Plain Layout |
| console |
| \end_layout |
| |
| \end_inset |
| </cell> |
| <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> |
| \begin_inset Text |
| |
| \begin_layout Plain Layout |
| Appendix E |
| \end_layout |
| |
| \end_inset |
| </cell> |
| </row> |
| <row> |
| <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> |
| \begin_inset Text |
| |
| \begin_layout Plain Layout |
| 4 |
| \end_layout |
| |
| \end_inset |
| </cell> |
| <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> |
| \begin_inset Text |
| |
| \begin_layout Plain Layout |
| entropy source |
| \end_layout |
| |
| \end_inset |
| </cell> |
| <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> |
| \begin_inset Text |
| |
| \begin_layout Plain Layout |
| Appendix F |
| \end_layout |
| |
| \end_inset |
| </cell> |
| </row> |
| <row> |
| <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> |
| \begin_inset Text |
| |
| \begin_layout Plain Layout |
| 5 |
| \end_layout |
| |
| \end_inset |
| </cell> |
| <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> |
| \begin_inset Text |
| |
| \begin_layout Plain Layout |
| memory ballooning |
| \end_layout |
| |
| \end_inset |
| </cell> |
| <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> |
| \begin_inset Text |
| |
| \begin_layout Plain Layout |
| Appendix G |
| \end_layout |
| |
| \end_inset |
| </cell> |
| </row> |
| <row> |
| <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> |
| \begin_inset Text |
| |
| \begin_layout Plain Layout |
| 6 |
| \end_layout |
| |
| \end_inset |
| </cell> |
| <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> |
| \begin_inset Text |
| |
| \begin_layout Plain Layout |
| ioMemory |
| \end_layout |
| |
| \end_inset |
| </cell> |
| <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> |
| \begin_inset Text |
| |
| \begin_layout Plain Layout |
| - |
| \end_layout |
| |
| \end_inset |
| </cell> |
| </row> |
| <row> |
| <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> |
| \begin_inset Text |
| |
| \begin_layout Plain Layout |
| 7 |
| \end_layout |
| |
| \end_inset |
| </cell> |
| <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> |
| \begin_inset Text |
| |
| \begin_layout Plain Layout |
| rpmsg |
| \end_layout |
| |
| \end_inset |
| </cell> |
| <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> |
| \begin_inset Text |
| |
| \begin_layout Plain Layout |
| Appendix H |
| \end_layout |
| |
| \end_inset |
| </cell> |
| </row> |
| <row> |
| <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> |
| \begin_inset Text |
| |
| \begin_layout Plain Layout |
| 8 |
| \end_layout |
| |
| \end_inset |
| </cell> |
| <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> |
| \begin_inset Text |
| |
| \begin_layout Plain Layout |
| SCSI host |
| \end_layout |
| |
| \end_inset |
| </cell> |
| <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> |
| \begin_inset Text |
| |
| \begin_layout Plain Layout |
| Appendix I |
| \end_layout |
| |
| \end_inset |
| </cell> |
| </row> |
| <row> |
| <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> |
| \begin_inset Text |
| |
| \begin_layout Plain Layout |
| 9 |
| \end_layout |
| |
| \end_inset |
| </cell> |
| <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> |
| \begin_inset Text |
| |
| \begin_layout Plain Layout |
| 9P transport |
| \end_layout |
| |
| \end_inset |
| </cell> |
| <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> |
| \begin_inset Text |
| |
| \begin_layout Plain Layout |
| - |
| \end_layout |
| |
| \end_inset |
| </cell> |
| </row> |
| <row> |
| <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none"> |
| \begin_inset Text |
| |
| \begin_layout Plain Layout |
| 10 |
| \end_layout |
| |
| \end_inset |
| </cell> |
| <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none"> |
| \begin_inset Text |
| |
| \begin_layout Plain Layout |
| mac80211 wlan |
| \end_layout |
| |
| \end_inset |
| </cell> |
| <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" rightline="true" usebox="none"> |
| \begin_inset Text |
| |
| \begin_layout Plain Layout |
| - |
| \end_layout |
| |
| \end_inset |
| </cell> |
| </row> |
| </lyxtabular> |
| |
| \end_inset |
| |
| |
| \end_layout |
| |
| \begin_layout Section |
| Device Configuration |
| \end_layout |
| |
| \begin_layout Standard |
| To configure the device, we use the first I/O region of the PCI device. |
| This contains a |
| \emph on |
| virtio header |
| \emph default |
| followed by a |
| \emph on |
| device-specific region. |
| \end_layout |
| |
| \begin_layout Standard |
| There may be different widths of accesses to the I/O region; the |
| \begin_inset Quotes eld |
| \end_inset |
| |
| natural |
| \begin_inset Quotes erd |
| \end_inset |
| |
| access method for each field in the virtio header must be used (i.e. |
| 32-bit accesses for 32-bit fields, etc), but the device-specific region |
| can be accessed using any width accesses, and should obtain the same results. |
| \end_layout |
| |
| \begin_layout Standard |
| Note that this is possible because while the virtio header is PCI (i.e. |
| little) endian, the device-specific region is encoded in the native endian |
| of the guest (where such distinction is applicable). |
| \end_layout |
| |
| \begin_layout Subsection |
| Device Initialization Sequence |
| \begin_inset CommandInset label |
| LatexCommand label |
| name "sub:Device-Initialization-Sequence" |
| |
| \end_inset |
| |
| |
| \end_layout |
| |
| \begin_layout Standard |
| We start with an overview of device initialization, then expand on the details |
| of the device and how each step is preformed. |
| \end_layout |
| |
| \begin_layout Enumerate |
| Reset the device. |
| This is not required on initial start up. |
| \end_layout |
| |
| \begin_layout Enumerate |
| The ACKNOWLEDGE status bit is set: we have noticed the device. |
| \end_layout |
| |
| \begin_layout Enumerate |
| The DRIVER status bit is set: we know how to drive the device. |
| \end_layout |
| |
| \begin_layout Enumerate |
| Device-specific setup, including reading the Device Feature Bits, discovery |
| of virtqueues for the device, optional MSI-X setup, and reading and possibly |
| writing the virtio configuration space. |
| \end_layout |
| |
| \begin_layout Enumerate |
| The subset of Device Feature Bits understood by the driver is written to |
| the device. |
| \end_layout |
| |
| \begin_layout Enumerate |
| The DRIVER_OK status bit is set. |
| \end_layout |
| |
| \begin_layout Enumerate |
| The device can now be used (ie. |
| buffers added to the virtqueues) |
| \begin_inset Foot |
| status open |
| |
| \begin_layout Plain Layout |
| Historically, drivers have used the device before steps 5 and 6. |
| This is only allowed if the driver does not use any features which would |
| alter this early use of the device. |
| \end_layout |
| |
| \end_inset |
| |
| |
| \end_layout |
| |
| \begin_layout Standard |
| If any of these steps go irrecoverably wrong, the guest should set the FAILED |
| status bit to indicate that it has given up on the device (it can reset |
| the device later to restart if desired). |
| \end_layout |
| |
| \begin_layout Standard |
| We now cover the fields required for general setup in detail. |
| \end_layout |
| |
| \begin_layout Subsection |
| Virtio Header |
| \end_layout |
| |
| \begin_layout Standard |
| The virtio header looks as follows: |
| \end_layout |
| |
| \begin_layout Standard |
| \begin_inset Tabular |
| <lyxtabular version="3" rows="4" columns="9"> |
| <features tabularvalignment="middle"> |
| <column alignment="left" valignment="top" width="0"> |
| <column alignment="left" valignment="top" width="0"> |
| <column alignment="left" valignment="top" width="0"> |
| <column alignment="left" valignment="top" width="0"> |
| <column alignment="left" valignment="top" width="0"> |
| <column alignment="left" valignment="top" width="0"> |
| <column alignment="left" valignment="top" width="0"> |
| <column alignment="left" valignment="top" width="0"> |
| <column alignment="left" valignment="top" width="0"> |
| <row> |
| <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> |
| \begin_inset Text |
| |
| \begin_layout Plain Layout |
| Bits |
| \end_layout |
| |
| \end_inset |
| </cell> |
| <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> |
| \begin_inset Text |
| |
| \begin_layout Plain Layout |
| 32 |
| \end_layout |
| |
| \end_inset |
| </cell> |
| <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> |
| \begin_inset Text |
| |
| \begin_layout Plain Layout |
| 32 |
| \end_layout |
| |
| \end_inset |
| </cell> |
| <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> |
| \begin_inset Text |
| |
| \begin_layout Plain Layout |
| 32 |
| \end_layout |
| |
| \end_inset |
| </cell> |
| <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> |
| \begin_inset Text |
| |
| \begin_layout Plain Layout |
| 16 |
| \end_layout |
| |
| \end_inset |
| </cell> |
| <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> |
| \begin_inset Text |
| |
| \begin_layout Plain Layout |
| 16 |
| \end_layout |
| |
| \end_inset |
| </cell> |
| <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> |
| \begin_inset Text |
| |
| \begin_layout Plain Layout |
| 16 |
| \end_layout |
| |
| \end_inset |
| </cell> |
| <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> |
| \begin_inset Text |
| |
| \begin_layout Plain Layout |
| 8 |
| \end_layout |
| |
| \end_inset |
| </cell> |
| <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> |
| \begin_inset Text |
| |
| \begin_layout Plain Layout |
| 8 |
| \end_layout |
| |
| \end_inset |
| </cell> |
| </row> |
| <row> |
| <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> |
| \begin_inset Text |
| |
| \begin_layout Plain Layout |
| Read/Write |
| \end_layout |
| |
| \end_inset |
| </cell> |
| <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> |
| \begin_inset Text |
| |
| \begin_layout Plain Layout |
| R |
| \end_layout |
| |
| \end_inset |
| </cell> |
| <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> |
| \begin_inset Text |
| |
| \begin_layout Plain Layout |
| R+W |
| \end_layout |
| |
| \end_inset |
| </cell> |
| <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> |
| \begin_inset Text |
| |
| \begin_layout Plain Layout |
| R+W |
| \end_layout |
| |
| \end_inset |
| </cell> |
| <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> |
| \begin_inset Text |
| |
| \begin_layout Plain Layout |
| R |
| \end_layout |
| |
| \end_inset |
| </cell> |
| <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> |
| \begin_inset Text |
| |
| \begin_layout Plain Layout |
| R+W |
| \end_layout |
| |
| \end_inset |
| </cell> |
| <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> |
| \begin_inset Text |
| |
| \begin_layout Plain Layout |
| R+W |
| \end_layout |
| |
| \end_inset |
| </cell> |
| <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> |
| \begin_inset Text |
| |
| \begin_layout Plain Layout |
| R+W |
| \end_layout |
| |
| \end_inset |
| </cell> |
| <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> |
| \begin_inset Text |
| |
| \begin_layout Plain Layout |
| R |
| \end_layout |
| |
| \end_inset |
| </cell> |
| </row> |
| <row> |
| <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> |
| \begin_inset Text |
| |
| \begin_layout Plain Layout |
| Purpose |
| \end_layout |
| |
| \end_inset |
| </cell> |
| <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> |
| \begin_inset Text |
| |
| \begin_layout Plain Layout |
| |
| \size footnotesize |
| Device |
| \end_layout |
| |
| \end_inset |
| </cell> |
| <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> |
| \begin_inset Text |
| |
| \begin_layout Plain Layout |
| |
| \size footnotesize |
| Guest |
| \end_layout |
| |
| \end_inset |
| </cell> |
| <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> |
| \begin_inset Text |
| |
| \begin_layout Plain Layout |
| |
| \size footnotesize |
| Queue |
| \end_layout |
| |
| \end_inset |
| </cell> |
| <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> |
| \begin_inset Text |
| |
| \begin_layout Plain Layout |
| |
| \size footnotesize |
| Queue |
| \end_layout |
| |
| \end_inset |
| </cell> |
| <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> |
| \begin_inset Text |
| |
| \begin_layout Plain Layout |
| |
| \size footnotesize |
| Queue |
| \end_layout |
| |
| \end_inset |
| </cell> |
| <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> |
| \begin_inset Text |
| |
| \begin_layout Plain Layout |
| |
| \size footnotesize |
| Queue |
| \end_layout |
| |
| \end_inset |
| </cell> |
| <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> |
| \begin_inset Text |
| |
| \begin_layout Plain Layout |
| |
| \size footnotesize |
| Device |
| \end_layout |
| |
| \end_inset |
| </cell> |
| <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> |
| \begin_inset Text |
| |
| \begin_layout Plain Layout |
| |
| \size footnotesize |
| ISR |
| \end_layout |
| |
| \end_inset |
| </cell> |
| </row> |
| <row> |
| <cell alignment="center" valignment="top" bottomline="true" leftline="true" rightline="true" usebox="none"> |
| \begin_inset Text |
| |
| \begin_layout Plain Layout |
| |
| \end_layout |
| |
| \end_inset |
| </cell> |
| <cell alignment="center" valignment="top" bottomline="true" leftline="true" usebox="none"> |
| \begin_inset Text |
| |
| \begin_layout Plain Layout |
| |
| \size footnotesize |
| Features bits 0:31 |
| \end_layout |
| |
| \end_inset |
| </cell> |
| <cell alignment="center" valignment="top" bottomline="true" leftline="true" usebox="none"> |
| \begin_inset Text |
| |
| \begin_layout Plain Layout |
| |
| \size footnotesize |
| Features bits 0:31 |
| \end_layout |
| |
| \end_inset |
| </cell> |
| <cell alignment="center" valignment="top" bottomline="true" leftline="true" usebox="none"> |
| \begin_inset Text |
| |
| \begin_layout Plain Layout |
| |
| \size footnotesize |
| Address |
| \end_layout |
| |
| \end_inset |
| </cell> |
| <cell alignment="center" valignment="top" bottomline="true" leftline="true" usebox="none"> |
| \begin_inset Text |
| |
| \begin_layout Plain Layout |
| |
| \size footnotesize |
| Size |
| \end_layout |
| |
| \end_inset |
| </cell> |
| <cell alignment="center" valignment="top" bottomline="true" leftline="true" usebox="none"> |
| \begin_inset Text |
| |
| \begin_layout Plain Layout |
| |
| \size footnotesize |
| Select |
| \end_layout |
| |
| \end_inset |
| </cell> |
| <cell alignment="center" valignment="top" bottomline="true" leftline="true" usebox="none"> |
| \begin_inset Text |
| |
| \begin_layout Plain Layout |
| |
| \size footnotesize |
| Notify |
| \end_layout |
| |
| \end_inset |
| </cell> |
| <cell alignment="center" valignment="top" bottomline="true" leftline="true" usebox="none"> |
| \begin_inset Text |
| |
| \begin_layout Plain Layout |
| |
| \size footnotesize |
| Status |
| \end_layout |
| |
| \end_inset |
| </cell> |
| <cell alignment="center" valignment="top" bottomline="true" leftline="true" rightline="true" usebox="none"> |
| \begin_inset Text |
| |
| \begin_layout Plain Layout |
| |
| \size footnotesize |
| Status |
| \end_layout |
| |
| \end_inset |
| </cell> |
| </row> |
| </lyxtabular> |
| |
| \end_inset |
| |
| |
| \end_layout |
| |
| \begin_layout Standard |
| If MSI-X is enabled for the device, two additional fields immediately follow |
| this header: |
| \begin_inset Foot |
| status collapsed |
| |
| \begin_layout Plain Layout |
| ie. |
| once you enable MSI-X on the device, the other fields move. |
| If you turn it off again, they move back! |
| \end_layout |
| |
| \end_inset |
| |
| |
| \end_layout |
| |
| \begin_layout Standard |
| \begin_inset Tabular |
| <lyxtabular version="3" rows="4" columns="3"> |
| <features tabularvalignment="middle"> |
| <column alignment="left" valignment="top" width="0"> |
| <column alignment="left" valignment="top" width="0"> |
| <column alignment="left" valignment="top" width="0"> |
| <row> |
| <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> |
| \begin_inset Text |
| |
| \begin_layout Plain Layout |
| Bits |
| \end_layout |
| |
| \end_inset |
| </cell> |
| <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none"> |
| \begin_inset Text |
| |
| \begin_layout Plain Layout |
| 16 |
| \end_layout |
| |
| \end_inset |
| </cell> |
| <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" rightline="true" usebox="none"> |
| \begin_inset Text |
| |
| \begin_layout Plain Layout |
| 16 |
| \end_layout |
| |
| \end_inset |
| </cell> |
| </row> |
| <row> |
| <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> |
| \begin_inset Text |
| |
| \begin_layout Plain Layout |
| Read/Write |
| \end_layout |
| |
| \end_inset |
| </cell> |
| <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> |
| \begin_inset Text |
| |
| \begin_layout Plain Layout |
| R+W |
| \end_layout |
| |
| \end_inset |
| </cell> |
| <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> |
| \begin_inset Text |
| |
| \begin_layout Plain Layout |
| R+W |
| \end_layout |
| |
| \end_inset |
| </cell> |
| </row> |
| <row> |
| <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> |
| \begin_inset Text |
| |
| \begin_layout Plain Layout |
| Purpose |
| \end_layout |
| |
| \end_inset |
| </cell> |
| <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> |
| \begin_inset Text |
| |
| \begin_layout Plain Layout |
| |
| \size footnotesize |
| Configuration |
| \end_layout |
| |
| \end_inset |
| </cell> |
| <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> |
| \begin_inset Text |
| |
| \begin_layout Plain Layout |
| |
| \size footnotesize |
| Queue |
| \end_layout |
| |
| \end_inset |
| </cell> |
| </row> |
| <row> |
| <cell alignment="center" valignment="top" bottomline="true" leftline="true" rightline="true" usebox="none"> |
| \begin_inset Text |
| |
| \begin_layout Plain Layout |
| (MSI-X) |
| \end_layout |
| |
| \end_inset |
| </cell> |
| <cell alignment="center" valignment="top" bottomline="true" leftline="true" usebox="none"> |
| \begin_inset Text |
| |
| \begin_layout Plain Layout |
| |
| \size footnotesize |
| Vector |
| \end_layout |
| |
| \end_inset |
| </cell> |
| <cell alignment="center" valignment="top" bottomline="true" leftline="true" rightline="true" usebox="none"> |
| \begin_inset Text |
| |
| \begin_layout Plain Layout |
| |
| \size footnotesize |
| Vector |
| \end_layout |
| |
| \end_inset |
| </cell> |
| </row> |
| </lyxtabular> |
| |
| \end_inset |
| |
| |
| \end_layout |
| |
| \begin_layout Standard |
| Immediately following these general headers, there may be device-specific |
| headers: |
| \end_layout |
| |
| \begin_layout Standard |
| \begin_inset Tabular |
| <lyxtabular version="3" rows="4" columns="2"> |
| <features tabularvalignment="middle"> |
| <column alignment="left" valignment="top" width="0"> |
| <column alignment="left" valignment="top" width="0"> |
| <row> |
| <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> |
| \begin_inset Text |
| |
| \begin_layout Plain Layout |
| Bits |
| \end_layout |
| |
| \end_inset |
| </cell> |
| <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" rightline="true" usebox="none"> |
| \begin_inset Text |
| |
| \begin_layout Plain Layout |
| Device Specific |
| \end_layout |
| |
| \end_inset |
| </cell> |
| </row> |
| <row> |
| <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> |
| \begin_inset Text |
| |
| \begin_layout Plain Layout |
| Read/Write |
| \end_layout |
| |
| \end_inset |
| </cell> |
| <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> |
| \begin_inset Text |
| |
| \begin_layout Plain Layout |
| Device Specific |
| \end_layout |
| |
| \end_inset |
| </cell> |
| </row> |
| <row> |
| <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> |
| \begin_inset Text |
| |
| \begin_layout Plain Layout |
| Purpose |
| \end_layout |
| |
| \end_inset |
| </cell> |
| <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> |
| \begin_inset Text |
| |
| \begin_layout Plain Layout |
| |
| \size footnotesize |
| Device Specific... |
| \end_layout |
| |
| \end_inset |
| </cell> |
| </row> |
| <row> |
| <cell alignment="center" valignment="top" bottomline="true" leftline="true" rightline="true" usebox="none"> |
| \begin_inset Text |
| |
| \begin_layout Plain Layout |
| |
| \end_layout |
| |
| \end_inset |
| </cell> |
| <cell alignment="center" valignment="top" bottomline="true" leftline="true" rightline="true" usebox="none"> |
| \begin_inset Text |
| |
| \begin_layout Plain Layout |
| |
| \end_layout |
| |
| \end_inset |
| </cell> |
| </row> |
| </lyxtabular> |
| |
| \end_inset |
| |
| |
| \end_layout |
| |
| \begin_layout Subsubsection |
| Device Status |
| \end_layout |
| |
| \begin_layout Standard |
| The Device Status field is updated by the guest to indicate its progress. |
| This provides a simple low-level diagnostic: it's most useful to imagine |
| them hooked up to traffic lights on the console indicating the status of |
| each device. |
| \end_layout |
| |
| \begin_layout Standard |
| The device can be reset by writing a 0 to this field, otherwise at least |
| one bit should be set: |
| \end_layout |
| |
| \begin_layout Description |
| ACKNOWLEDGE |
| \begin_inset space ~ |
| \end_inset |
| |
| (1) Indicates that the guest OS has found the device and recognized it as |
| a valid virtio device. |
| \end_layout |
| |
| \begin_layout Description |
| DRIVER |
| \begin_inset space ~ |
| \end_inset |
| |
| (2) Indicates that the guest OS knows how to drive the device. |
| Under Linux, drivers can be loadable modules so there may be a significant |
| (or infinite) delay before setting this bit. |
| \end_layout |
| |
| \begin_layout Description |
| DRIVER_OK |
| \begin_inset space ~ |
| \end_inset |
| |
| (4) Indicates that the driver is set up and ready to drive the device. |
| \end_layout |
| |
| \begin_layout Description |
| FAILED |
| \begin_inset space ~ |
| \end_inset |
| |
| (128) Indicates that something went wrong in the guest, and it has given |
| up on the device. |
| This could be an internal error, or the driver didn't like the device for |
| some reason, or even a fatal error during device operation. |
| The device must be reset before attempting to re-initialize. |
| \end_layout |
| |
| \begin_layout Subsubsection |
| Feature Bits |
| \begin_inset CommandInset label |
| LatexCommand label |
| name "sub:Feature-Bits" |
| |
| \end_inset |
| |
| |
| \end_layout |
| |
| \begin_layout Standard |
| Thefirst configuration field indicates the features that the device supports. |
| The bits are allocated as follows: |
| \end_layout |
| |
| \begin_layout Description |
| 0 |
| \begin_inset space ~ |
| \end_inset |
| |
| to |
| \begin_inset space ~ |
| \end_inset |
| |
| 23 Feature bits for the specific device type |
| \end_layout |
| |
| \begin_layout Description |
| 24 |
| \begin_inset space \space{} |
| \end_inset |
| |
| to |
| \begin_inset space ~ |
| \end_inset |
| |
| 32 Feature bits reserved for extensions to the queue and feature negotiation |
| mechanisms |
| \end_layout |
| |
| \begin_layout Standard |
| For example, feature bit 0 for a network device (i.e. |
| Subsystem Device ID 1) indicates that the device supports checksumming |
| of packets. |
| \end_layout |
| |
| \begin_layout Standard |
| The feature bits are |
| \emph on |
| negotiated: |
| \emph default |
| the device lists all the features it understands in the Device Features |
| field, and the guest writes the subset that it understands into the Guest |
| Features field. |
| The only way to renegotiate is to reset the device. |
| \end_layout |
| |
| \begin_layout Standard |
| In particular, new fields in the device configuration header are indicated |
| by offering a feature bit, so the guest can check before accessing that |
| part of the configuration space. |
| \end_layout |
| |
| \begin_layout Standard |
| This allows for forwards and backwards compatibility: if the device is enhanced |
| with a new feature bit, older guests will not write that feature bit back |
| to the Guest Features field and it can go into backwards compatibility |
| mode. |
| Similarly, if a guest is enhanced with a feature that the device doesn't |
| support, it will not see that feature bit in the Device Features field |
| and can go into backwards compatibility mode (or, for poor implementations, |
| set the FAILED Device Status bit). |
| \end_layout |
| |
| \begin_layout Subsubsection |
| Configuration/Queue Vectors |
| \end_layout |
| |
| \begin_layout Standard |
| When MSI-X capability is present and enabled in the device (through standard |
| PCI configuration space) 4 bytes at byte offset 20 are used to map configuratio |
| n change and queue interrupts to MSI-X vectors. |
| In this case, the ISR Status field is unused, and device specific configuration |
| starts at byte offset 24 in virtio header structure. |
| When MSI-X capability is not enabled, device specific configuration starts |
| at byte offset 20 in virtio header. |
| \end_layout |
| |
| \begin_layout Standard |
| Writing a valid MSI-X Table entry number, 0 to 0x7FF, to one of Configuration/Qu |
| eue Vector registers, |
| \emph on |
| maps |
| \emph default |
| interrupts triggered by the configuration change/selected queue events |
| respectively to the corresponding MSI-X vector. |
| To disable interrupts for a specific event type, unmap it by writing a |
| special NO_VECTOR value: |
| \end_layout |
| |
| \begin_layout Standard |
| \begin_inset listings |
| inline false |
| status open |
| |
| \begin_layout Plain Layout |
| |
| /* Vector value used to disable MSI for queue */ |
| \end_layout |
| |
| \begin_layout Plain Layout |
| |
| #define VIRTIO_MSI_NO_VECTOR 0xffff |
| \end_layout |
| |
| \end_inset |
| |
| |
| \end_layout |
| |
| \begin_layout Standard |
| Reading these registers returns vector mapped to a given event, or NO_VECTOR |
| if unmapped. |
| All queue and configuration change events are unmapped by default. |
| \end_layout |
| |
| \begin_layout Standard |
| Note that mapping an event to vector might require allocating internal device |
| resources, and might fail. |
| Devices report such failures by returning the NO_VECTOR value when the |
| relevant Vector field is read. |
| After mapping an event to vector, the driver must verify success by reading |
| the Vector field value: on success, the previously written value is returned, |
| and on failure, NO_VECTOR is returned. |
| If a mapping failure is detected, the driver can retry mapping with fewervector |
| s, or disable MSI-X. |
| \end_layout |
| |
| \begin_layout Section |
| Virtqueue Configuration |
| \begin_inset CommandInset label |
| LatexCommand label |
| name "sec:Virtqueue-Configuration" |
| |
| \end_inset |
| |
| |
| \end_layout |
| |
| \begin_layout Standard |
| As a device can have zero or more virtqueues for bulk data transport (for |
| example, the network driver has two), the driver needs to configure them |
| as part of the device-specific configuration. |
| \end_layout |
| |
| \begin_layout Standard |
| This is done as follows, for each virtqueue a device has: |
| \end_layout |
| |
| \begin_layout Enumerate |
| Write the virtqueue index (first queue is 0) to the Queue Select field. |
| \end_layout |
| |
| \begin_layout Enumerate |
| Read the virtqueue size from the Queue Size field, which is always a power |
| of 2. |
| This controls how big the virtqueue is (see below). |
| If this field is 0, the virtqueue does not exist. |
| |
| \end_layout |
| |
| \begin_layout Enumerate |
| Allocate and zero virtqueue in contiguous physical memory, on a 4096 byte |
| alignment. |
| Write the physical address, divided by 4096 to the Queue Address field. |
| \begin_inset Foot |
| status open |
| |
| \begin_layout Plain Layout |
| The 4096 is based on the x86 page size, but it's also large enough to ensure |
| that the separate parts of the virtqueue are on separate cache lines. |
| \end_layout |
| |
| \end_inset |
| |
| |
| \end_layout |
| |
| \begin_layout Enumerate |
| Optionally, if MSI-X capability is present and enabled on the device, select |
| a vector to use to request interrupts triggered by virtqueue events. |
| Write the MSI-X Table entry number corresponding to this vector in Queue |
| Vector field. |
| Read the Queue Vector field: on success, previously written value is returned; |
| on failure, NO_VECTOR value is returned. |
| \end_layout |
| |
| \begin_layout Standard |
| The Queue Size field controls the total number of bytes required for the |
| virtqueue according to the following formula: |
| \end_layout |
| |
| \begin_layout Standard |
| \begin_inset listings |
| inline false |
| status open |
| |
| \begin_layout Plain Layout |
| |
| #define ALIGN(x) (((x) + 4095) & ~4095) |
| \end_layout |
| |
| \begin_layout Plain Layout |
| |
| static inline unsigned vring_size(unsigned int qsz) |
| \end_layout |
| |
| \begin_layout Plain Layout |
| |
| { |
| \end_layout |
| |
| \begin_layout Plain Layout |
| |
| return ALIGN(sizeof(struct vring_desc)*qsz + sizeof(u16)*(2 + qsz)) |
| \end_layout |
| |
| \begin_layout Plain Layout |
| |
| + ALIGN(sizeof(struct vring_used_elem)*qsz); |
| \end_layout |
| |
| \begin_layout Plain Layout |
| |
| } |
| \end_layout |
| |
| \end_inset |
| |
| |
| \end_layout |
| |
| \begin_layout Standard |
| This currently wastes some space with padding, but also allows future extensions. |
| The virtqueue layout structure looks like this (qsz is the Queue Size field, |
| which is a variable, so this code won't compile): |
| \end_layout |
| |
| \begin_layout Standard |
| \begin_inset listings |
| inline false |
| status open |
| |
| \begin_layout Plain Layout |
| |
| struct vring { |
| \end_layout |
| |
| \begin_layout Plain Layout |
| |
| /* The actual descriptors (16 bytes each) */ |
| \end_layout |
| |
| \begin_layout Plain Layout |
| |
| struct vring_desc desc[qsz]; |
| \end_layout |
| |
| \begin_layout Plain Layout |
| |
| \end_layout |
| |
| \begin_layout Plain Layout |
| |
| /* A ring of available descriptor heads with free-running index. |
| */ |
| \end_layout |
| |
| \begin_layout Plain Layout |
| |
| struct vring_avail avail; |
| \end_layout |
| |
| \begin_layout Plain Layout |
| |
| \end_layout |
| |
| \begin_layout Plain Layout |
| |
| // Padding to the next 4096 boundary. |
| \end_layout |
| |
| \begin_layout Plain Layout |
| |
| char pad[]; |
| \end_layout |
| |
| \begin_layout Plain Layout |
| |
| \end_layout |
| |
| \begin_layout Plain Layout |
| |
| // A ring of used descriptor heads with free-running index. |
| \end_layout |
| |
| \begin_layout Plain Layout |
| |
| struct vring_used used; |
| \end_layout |
| |
| \begin_layout Plain Layout |
| |
| }; |
| \end_layout |
| |
| \end_inset |
| |
| |
| \end_layout |
| |
| \begin_layout Subsection |
| A Note on Virtqueue Endianness |
| \end_layout |
| |
| \begin_layout Standard |
| Note that the |
| \emph on |
| endian |
| \emph default |
| of these fields and everything else in the virtqueue is the native endian |
| of the guest, not little-endian as PCI normally is. |
| This makes for simpler guest code, and it is assumed that the host already |
| has to be deeply aware of the guest endian so such an |
| \begin_inset Quotes eld |
| \end_inset |
| |
| endian-aware |
| \begin_inset Quotes erd |
| \end_inset |
| |
| device is not a significant issue. |
| \end_layout |
| |
| \begin_layout Subsection |
| Descriptor Table |
| \end_layout |
| |
| \begin_layout Standard |
| The descriptor table refers to the buffers the guest is using for the device. |
| The addresses are physical addresses, and the buffers can be chained via |
| the next field. |
| Each descriptor describes a buffer which is read-only or write-only, but |
| a chain of descriptors can contain both read-only and write-only buffers. |
| \end_layout |
| |
| \begin_layout Standard |
| No descriptor chain may be more than 2^32 bytes long in total. |
| \begin_inset listings |
| inline false |
| status open |
| |
| \begin_layout Plain Layout |
| |
| struct vring_desc { |
| \end_layout |
| |
| \begin_layout Plain Layout |
| |
| /* Address (guest-physical). |
| */ |
| \end_layout |
| |
| \begin_layout Plain Layout |
| |
| u64 addr; |
| \end_layout |
| |
| \begin_layout Plain Layout |
| |
| /* Length. |
| */ |
| \end_layout |
| |
| \begin_layout Plain Layout |
| |
| u32 len; |
| \end_layout |
| |
| \begin_layout Plain Layout |
| |
| /* This marks a buffer as continuing via the next field. |
| */ |
| \end_layout |
| |
| \begin_layout Plain Layout |
| |
| #define VRING_DESC_F_NEXT 1 |
| \end_layout |
| |
| \begin_layout Plain Layout |
| |
| /* This marks a buffer as write-only (otherwise read-only). |
| */ |
| \end_layout |
| |
| \begin_layout Plain Layout |
| |
| #define VRING_DESC_F_WRITE 2 |
| \end_layout |
| |
| \begin_layout Plain Layout |
| |
| /* This means the buffer contains a list of buffer descriptors. |
| */ |
| \end_layout |
| |
| \begin_layout Plain Layout |
| |
| #define VRING_DESC_F_INDIRECT 4 |
| \end_layout |
| |
| \begin_layout Plain Layout |
| |
| /* The flags as indicated above. |
| */ |
| \end_layout |
| |
| \begin_layout Plain Layout |
| |
| u16 flags; |
| \end_layout |
| |
| \begin_layout Plain Layout |
| |
| /* Next field if flags & NEXT */ |
| \end_layout |
| |
| \begin_layout Plain Layout |
| |
| u16 next; |
| \end_layout |
| |
| \begin_layout Plain Layout |
| |
| }; |
| \end_layout |
| |
| \end_inset |
| |
| |
| \end_layout |
| |
| \begin_layout Standard |
| The number of descriptors in the table is specified by the Queue Size field |
| for this virtqueue. |
| \end_layout |
| |
| \begin_layout Subsection |
| \begin_inset CommandInset label |
| LatexCommand label |
| name "sub:Indirect-Descriptors" |
| |
| \end_inset |
| |
| Indirect Descriptors |
| \end_layout |
| |
| \begin_layout Standard |
| Some devices benefit by concurrently dispatching a large number of large |
| requests. |
| The VIRTIO_RING_F_INDIRECT_DESC feature can be used to allow this (see |
| |
| \begin_inset CommandInset ref |
| LatexCommand ref |
| reference "cha:Reserved-Feature-Bits" |
| |
| \end_inset |
| |
| ). |
| To increase ring capacity it is possible to store a table of |
| \emph on |
| indirect descriptors |
| \emph default |
| anywhere in memory, and insert a descriptor in main virtqueue (with flags&INDIR |
| ECT on) that refers to memory buffer containing this |
| \emph on |
| indirect descriptor table |
| \emph default |
| ; fields |
| \emph on |
| addr |
| \emph default |
| and |
| \emph on |
| len |
| \emph default |
| refer to the indirect table address and length in bytes, respectively. |
| The indirect table layout structure looks like this (len is the length |
| of the descriptor that refers to this table, which is a variable, so this |
| code won't compile): |
| \end_layout |
| |
| \begin_layout Standard |
| \begin_inset listings |
| inline false |
| status open |
| |
| \begin_layout Plain Layout |
| |
| struct indirect_descriptor_table { |
| \end_layout |
| |
| \begin_layout Plain Layout |
| |
| /* The actual descriptors (16 bytes each) */ |
| \end_layout |
| |
| \begin_layout Plain Layout |
| |
| struct vring_desc desc[len / 16]; |
| \end_layout |
| |
| \begin_layout Plain Layout |
| |
| }; |
| \end_layout |
| |
| \end_inset |
| |
| |
| \end_layout |
| |
| \begin_layout Standard |
| The first indirect descriptor is located at start of the indirect descriptor |
| table (index 0), additional indirect descriptors are chained by next field. |
| An indirect descriptor without next field (with flags&NEXT off) signals |
| the end of the indirect descriptor table, and transfers control back to |
| the main virtqueue. |
| An indirect descriptor can not refer to another indirect descriptor table |
| (flags&INDIRECT must be off). |
| A single indirect descriptor table can include both read-only and write-only |
| descriptors; write-only flag (flags&WRITE) in the descriptor that refers |
| to it is ignored. |
| \end_layout |
| |
| \begin_layout Subsection |
| Available Ring |
| \end_layout |
| |
| \begin_layout Standard |
| The available ring refers to what descriptors we are offering the device: |
| it refers to the head of a descriptor chain. |
| The |
| \begin_inset Quotes eld |
| \end_inset |
| |
| flags |
| \begin_inset Quotes erd |
| \end_inset |
| |
| field is currently 0 or 1: 1 indicating that we do not need an interrupt |
| when the device consumes a descriptor from the available ring. |
| Alternatively, the guest can ask the device to delay interrupts until an |
| entry with an index specified by the |
| \begin_inset Quotes eld |
| \end_inset |
| |
| used_event |
| \begin_inset Quotes erd |
| \end_inset |
| |
| field is written in the used ring (equivalently, until the |
| \emph on |
| idx |
| \emph default |
| field in the used ring will reach the value |
| \emph on |
| used_event + 1 |
| \emph default |
| ). |
| The method employed by the device is controlled by the VIRTIO_RING_F_EVENT_IDX |
| feature bit (see |
| \begin_inset CommandInset ref |
| LatexCommand ref |
| reference "cha:Reserved-Feature-Bits" |
| |
| \end_inset |
| |
| ). |
| This interrupt suppression is merely an optimization; it may not suppress |
| interrupts entirely. |
| \end_layout |
| |
| \begin_layout Standard |
| The |
| \begin_inset Quotes eld |
| \end_inset |
| |
| idx |
| \begin_inset Quotes erd |
| \end_inset |
| |
| field indicates where we would put the |
| \emph on |
| next |
| \emph default |
| descriptor entry (modulo the ring size). |
| This starts at 0, and increases. |
| \end_layout |
| |
| \begin_layout Standard |
| \begin_inset listings |
| inline false |
| status open |
| |
| \begin_layout Plain Layout |
| |
| struct vring_avail { |
| \end_layout |
| |
| \begin_layout Plain Layout |
| |
| #define VRING_AVAIL_F_NO_INTERRUPT 1 |
| \end_layout |
| |
| \begin_layout Plain Layout |
| |
| u16 flags; |
| \end_layout |
| |
| \begin_layout Plain Layout |
| |
| u16 idx; |
| \end_layout |
| |
| \begin_layout Plain Layout |
| |
| u16 ring[qsz]; /* qsz is the Queue Size field read from device */ |
| \end_layout |
| |
| \begin_layout Plain Layout |
| |
| u16 used_event; |
| \end_layout |
| |
| \begin_layout Plain Layout |
| |
| }; |
| \end_layout |
| |
| \end_inset |
| |
| |
| \end_layout |
| |
| \begin_layout Subsection |
| Used Ring |
| \end_layout |
| |
| \begin_layout Standard |
| The used ring is where the device returns buffers once it is done with them. |
| The flags field can be used by the device to hint that no notification |
| is necessary when the guest adds to the |
| \emph on |
| available |
| \emph default |
| ring. |
| Alternatively, the |
| \begin_inset Quotes eld |
| \end_inset |
| |
| avail_event |
| \begin_inset Quotes erd |
| \end_inset |
| |
| field can be used by the device to hint that no notification is necessary |
| until an entry with an index specified by the |
| \begin_inset Quotes eld |
| \end_inset |
| |
| avail_event |
| \begin_inset Quotes erd |
| \end_inset |
| |
| is written in the available ring (equivalently, until the |
| \emph on |
| idx |
| \emph default |
| field in the available ring will reach the value |
| \emph on |
| avail_event + 1 |
| \emph default |
| ). |
| The method employed by the device is controlled by the guest through the |
| VIRTIO_RING_F_EVENT_IDX feature bit (see |
| \begin_inset CommandInset ref |
| LatexCommand ref |
| reference "cha:Reserved-Feature-Bits" |
| |
| \end_inset |
| |
| ). |
| |
| \begin_inset Foot |
| status open |
| |
| \begin_layout Plain Layout |
| These fields are kept here because this is the only part of the virtqueue |
| written by the device |
| \end_layout |
| |
| \end_inset |
| |
| . |
| \end_layout |
| |
| \begin_layout Standard |
| Each entry in the ring is a pair: the head entry of the descriptor chain |
| describing the buffer (this matches an entry placed in the available ring |
| by the guest earlier), and the total of bytes written into the buffer. |
| The latter is extremely useful for guests using untrusted buffers: if you |
| do not know exactly how much has been written by the device, you usually |
| have to zero the buffer to ensure no data leakage occurs. |
| \end_layout |
| |
| \begin_layout Standard |
| \begin_inset listings |
| inline false |
| status open |
| |
| \begin_layout Plain Layout |
| |
| /* u32 is used here for ids for padding reasons. |
| */ |
| \end_layout |
| |
| \begin_layout Plain Layout |
| |
| struct vring_used_elem { |
| \end_layout |
| |
| \begin_layout Plain Layout |
| |
| /* Index of start of used descriptor chain. |
| */ |
| \end_layout |
| |
| \begin_layout Plain Layout |
| |
| u32 id; |
| \end_layout |
| |
| \begin_layout Plain Layout |
| |
| /* Total length of the descriptor chain which was used (written to) |
| */ |
| \end_layout |
| |
| \begin_layout Plain Layout |
| |
| u32 len; |
| \end_layout |
| |
| \begin_layout Plain Layout |
| |
| }; |
| \end_layout |
| |
| \begin_layout Plain Layout |
| |
| \end_layout |
| |
| \begin_layout Plain Layout |
| |
| struct vring_used { |
| \end_layout |
| |
| \begin_layout Plain Layout |
| |
| #define VRING_USED_F_NO_NOTIFY 1 |
| \end_layout |
| |
| \begin_layout Plain Layout |
| |
| u16 flags; |
| \end_layout |
| |
| \begin_layout Plain Layout |
| |
| u16 idx; |
| \end_layout |
| |
| \begin_layout Plain Layout |
| |
| struct vring_used_elem ring[qsz]; |
| \end_layout |
| |
| \begin_layout Plain Layout |
| |
| u16 avail_event; |
| \end_layout |
| |
| \begin_layout Plain Layout |
| |
| }; |
| \end_layout |
| |
| \end_inset |
| |
| |
| \end_layout |
| |
| \begin_layout Subsection |
| Helpers for Managing Virtqueues |
| \end_layout |
| |
| \begin_layout Standard |
| The Linux Kernel Source code contains the definitions above and helper routines |
| in a more usable form, in include/linux/virtio_ring.h. |
| This was explicitly licensed by IBM and Red Hat under the (3-clause) BSD |
| license so that it can be freely used by all other projects, and is reproduced |
| (with slight variation to remove Linux assumptions) in Appendix A. |
| \end_layout |
| |
| \begin_layout Section |
| Device Operation |
| \begin_inset CommandInset label |
| LatexCommand label |
| name "sec:Device-Operation" |
| |
| \end_inset |
| |
| |
| \end_layout |
| |
| \begin_layout Standard |
| There are two parts to device operation: supplying new buffers to the device, |
| and processing used buffers from the device. |
| As an example, the virtio network device has two virtqueues: the transmit |
| virtqueue and the receive virtqueue. |
| The driver adds outgoing (read-only) packets to the transmit virtqueue, |
| and then frees them after they are used. |
| Similarly, incoming (write-only) buffers are added to the receive virtqueue, |
| and processed after they are used. |
| \end_layout |
| |
| \begin_layout Subsection |
| Supplying Buffers to The Device |
| \end_layout |
| |
| \begin_layout Standard |
| Actual transfer of buffers from the guest OS to the device operates as follows: |
| \end_layout |
| |
| \begin_layout Enumerate |
| Place the buffer(s) into free descriptor(s). |
| \end_layout |
| |
| \begin_deeper |
| \begin_layout Enumerate |
| If there are no free descriptors, the guest may choose to notify the device |
| even if notifications are suppressed (to reduce latency). |
| \begin_inset Foot |
| status open |
| |
| \begin_layout Plain Layout |
| The Linux drivers do this only for read-only buffers: for write-only buffers, |
| it is assumed that the driver is merely trying to keep the receive buffer |
| ring full, and no notification of this expected condition is necessary. |
| \end_layout |
| |
| \end_inset |
| |
| |
| \end_layout |
| |
| \end_deeper |
| \begin_layout Enumerate |
| Place the id of the buffer in the next ring entry of the available ring. |
| \end_layout |
| |
| \begin_layout Enumerate |
| The steps (1) and (2) may be performed repeatedly if batching is possible. |
| \end_layout |
| |
| \begin_layout Enumerate |
| A memory barrier should be executed to ensure the device sees the updated |
| descriptor table and available ring before the next step. |
| \end_layout |
| |
| \begin_layout Enumerate |
| The available |
| \begin_inset Quotes eld |
| \end_inset |
| |
| idx |
| \begin_inset Quotes erd |
| \end_inset |
| |
| field should be increased by the number of entries added to the available |
| ring. |
| \end_layout |
| |
| \begin_layout Enumerate |
| A memory barrier should be executed to ensure that we update the idx field |
| before checking for notification suppression. |
| \end_layout |
| |
| \begin_layout Enumerate |
| If notifications are not suppressed, the device should be notified of the |
| new buffers. |
| \end_layout |
| |
| \begin_layout Standard |
| Note that the above code does not take precautions against the available |
| ring buffer wrapping around: this is not possible since the ring buffer |
| is the same size as the descriptor table, so step (1) will prevent such |
| a condition. |
| \end_layout |
| |
| \begin_layout Standard |
| In addition, the maximum queue size is 32768 (it must be a power of 2 which |
| fits in 16 bits), so the 16-bit |
| \begin_inset Quotes eld |
| \end_inset |
| |
| idx |
| \begin_inset Quotes erd |
| \end_inset |
| |
| value can always distinguish between a full and empty buffer. |
| \end_layout |
| |
| \begin_layout Standard |
| Here is a description of each stage in more detail. |
| \end_layout |
| |
| \begin_layout Subsubsection |
| Placing Buffers Into The Descriptor Table |
| \end_layout |
| |
| \begin_layout Standard |
| A buffer consists of zero or more read-only physically-contiguous elements |
| followed by zero or more physically-contiguous write-only elements (it |
| must have at least one element). |
| This algorithm maps it into the descriptor table: |
| \end_layout |
| |
| \begin_layout Enumerate |
| for each buffer element, |
| \family typewriter |
| b |
| \family default |
| : |
| \end_layout |
| |
| \begin_deeper |
| \begin_layout Enumerate |
| Get the next free descriptor table entry, |
| \family typewriter |
| d |
| \end_layout |
| |
| \begin_layout Enumerate |
| Set |
| \family typewriter |
| d.addr |
| \family default |
| to the physical address of the start of |
| \family typewriter |
| b |
| \end_layout |
| |
| \begin_layout Enumerate |
| Set |
| \family typewriter |
| d.len |
| \family default |
| to the length of |
| \family typewriter |
| b |
| \family default |
| . |
| \end_layout |
| |
| \begin_layout Enumerate |
| If |
| \family typewriter |
| b |
| \family default |
| is write-only, set |
| \family typewriter |
| d.flags |
| \family default |
| to VRING_DESC_F_WRITE, otherwise 0. |
| \end_layout |
| |
| \begin_layout Enumerate |
| If there is a buffer element after this: |
| \end_layout |
| |
| \begin_deeper |
| \begin_layout Enumerate |
| Set |
| \family typewriter |
| d.next |
| \family default |
| to the index of the next free descriptor element. |
| \end_layout |
| |
| \begin_layout Enumerate |
| Set the VRING_DESC_F_NEXT bit in |
| \family typewriter |
| d.flags |
| \family default |
| . |
| \end_layout |
| |
| \end_deeper |
| \end_deeper |
| \begin_layout Standard |
| In practice, the d.next fields are usually used to chain free descriptors, |
| and a separate count kept to check there are enough free descriptors before |
| beginning the mappings. |
| \end_layout |
| |
| \begin_layout Subsubsection |
| Updating The Available Ring |
| \end_layout |
| |
| \begin_layout Standard |
| The head of the buffer we mapped is the first |
| \family typewriter |
| d |
| \family default |
| in the algorithm above. |
| A naive implementation would do the following: |
| \end_layout |
| |
| \begin_layout Standard |
| \begin_inset listings |
| inline false |
| status open |
| |
| \begin_layout Plain Layout |
| |
| avail->ring[avail->idx % qsz] = head; |
| \end_layout |
| |
| \end_inset |
| |
| |
| \end_layout |
| |
| \begin_layout Standard |
| However, in general we can add many descriptors before we update the |
| \begin_inset Quotes eld |
| \end_inset |
| |
| idx |
| \begin_inset Quotes erd |
| \end_inset |
| |
| field (at which point they become visible to the device), so we keep a |
| counter of how many we've added: |
| \end_layout |
| |
| \begin_layout Standard |
| \begin_inset listings |
| inline false |
| status open |
| |
| \begin_layout Plain Layout |
| |
| avail->ring[(avail->idx + added++) % qsz] = head; |
| \end_layout |
| |
| \end_inset |
| |
| |
| \end_layout |
| |
| \begin_layout Subsubsection |
| Updating The Index Field |
| \end_layout |
| |
| \begin_layout Standard |
| Once the idx field of the virtqueue is updated, the device will be able |
| to access the descriptor entries we've created and the memory they refer |
| to. |
| This is why a memory barrier is generally used before the idx update, to |
| ensure it sees the most up-to-date copy. |
| \end_layout |
| |
| \begin_layout Standard |
| The idx field always increments, and we let it wrap naturally at 65536: |
| \end_layout |
| |
| \begin_layout Standard |
| \begin_inset listings |
| inline false |
| status open |
| |
| \begin_layout Plain Layout |
| |
| avail->idx += added; |
| \end_layout |
| |
| \end_inset |
| |
| |
| \end_layout |
| |
| \begin_layout Subsubsection |
| \begin_inset CommandInset label |
| LatexCommand label |
| name "sub:Notifying-The-Device" |
| |
| \end_inset |
| |
| Notifying The Device |
| \end_layout |
| |
| \begin_layout Standard |
| Device notification occurs by writing the 16-bit virtqueue index of this |
| virtqueue to the Queue Notify field of the virtio header in the first I/O |
| region of the PCI device. |
| This can be expensive, however, so the device can suppress such notifications |
| if it doesn't need them. |
| We have to be careful to expose the new idx value |
| \emph on |
| before |
| \emph default |
| checking the suppression flag: it's OK to notify gratuitously, but not |
| to omit a required notification. |
| So again, we use a memory barrier here before reading the flags or the |
| avail_event field. |
| \end_layout |
| |
| \begin_layout Standard |
| If the VIRTIO_F_RING_EVENT_IDX feature is not negotiated, and if the VRING_USED_ |
| F_NOTIFY flag is not set, we go ahead and write to the PCI configuration |
| space. |
| \end_layout |
| |
| \begin_layout Standard |
| If the VIRTIO_F_RING_EVENT_IDX feature is negotiated, we read the avail_event |
| field in the available ring structure. |
| If the available index crossed_the |
| \emph on |
| avail_event |
| \emph default |
| field value since the last notification, we go ahead and write to the PCI |
| configuration space. |
| The |
| \emph on |
| avail_event |
| \emph default |
| field wraps naturally at 65536 as well: |
| \end_layout |
| |
| \begin_layout Standard |
| \begin_inset listings |
| inline false |
| status open |
| |
| \begin_layout Plain Layout |
| |
| (u16)(new_idx - avail_event - 1) < (u16)(new_idx - old_idx) |
| \end_layout |
| |
| \end_inset |
| |
| |
| \end_layout |
| |
| \begin_layout Subsection |
| \begin_inset CommandInset label |
| LatexCommand label |
| name "sub:Receiving-Used-Buffers" |
| |
| \end_inset |
| |
| Receiving Used Buffers From The Device |
| \end_layout |
| |
| \begin_layout Standard |
| Once the device has used a buffer (read from or written to it, or parts |
| of both, depending on the nature of the virtqueue and the device), it sends |
| an interrupt, following an algorithm very similar to the algorithm used |
| for the driver to send the device a buffer: |
| \end_layout |
| |
| \begin_layout Enumerate |
| Write the head descriptor number to the next field in the used ring. |
| \end_layout |
| |
| \begin_layout Enumerate |
| Update the used ring idx. |
| \end_layout |
| |
| \begin_layout Enumerate |
| Determine whether an interrupt is necessary: |
| \end_layout |
| |
| \begin_deeper |
| \begin_layout Enumerate |
| If the VIRTIO_F_RING_EVENT_IDX feature is not negotiated: check if f the |
| VRING_AVAIL_F_NO_INTERRUPT flag is not set in avail\SpecialChar \nobreakdash- |
| >flags |
| \end_layout |
| |
| \begin_layout Enumerate |
| If the VIRTIO_F_RING_EVENT_IDX feature is negotiated: check whether the |
| used index crossed the |
| \emph on |
| used_event |
| \emph default |
| field value since the last update. |
| The |
| \emph on |
| used_event |
| \emph default |
| field wraps naturally at 65536 as well: |
| \begin_inset listings |
| inline false |
| status open |
| |
| \begin_layout Plain Layout |
| |
| (u16)(new_idx - used_event - 1) < (u16)(new_idx - old_idx) |
| \end_layout |
| |
| \end_inset |
| |
| |
| \end_layout |
| |
| \end_deeper |
| \begin_layout Enumerate |
| If an interrupt is necessary: |
| \end_layout |
| |
| \begin_deeper |
| \begin_layout Enumerate |
| If MSI-X capability is disabled: |
| \end_layout |
| |
| \begin_deeper |
| \begin_layout Enumerate |
| Set the lower bit of the ISR Status field for the device. |
| \end_layout |
| |
| \begin_layout Enumerate |
| Send the appropriate PCI interrupt for the device. |
| \end_layout |
| |
| \end_deeper |
| \begin_layout Enumerate |
| If MSI-X capability is enabled: |
| \end_layout |
| |
| \begin_deeper |
| \begin_layout Enumerate |
| Request the appropriate MSI-X interrupt message for the device, Queue Vector |
| field sets the MSI-X Table entry number. |
| \end_layout |
| |
| \begin_layout Enumerate |
| If Queue Vector field value is NO_VECTOR, no interrupt message is requested |
| for this event. |
| \end_layout |
| |
| \end_deeper |
| \end_deeper |
| \begin_layout Standard |
| The guest interrupt handler should: |
| \end_layout |
| |
| \begin_layout Enumerate |
| If MSI-X capability is disabled: read the ISR Status field, which will reset |
| it to zero. |
| If the lower bit is zero, the interrupt was not for this device. |
| Otherwise, the guest driver should look through the used rings of each |
| virtqueue for the device, to see if any progress has been made by the device |
| which requires servicing. |
| \end_layout |
| |
| \begin_layout Enumerate |
| If MSI-X capability is enabled: look through the used rings of each virtqueue |
| mapped to the specific MSI-X vector for the device, to see if any progress |
| has been made by the device which requires servicing. |
| \end_layout |
| |
| \begin_layout Standard |
| For each ring, guest should then disable interrupts by writing VRING_AVAIL_F_NO_ |
| INTERRUPT flag in avail structure, if required. |
| It can then process used ring entries finally enabling interrupts by clearing |
| the VRING_AVAIL_F_NO_INTERRUPT flag or updating the EVENT_IDX field in |
| the available structure, Guest should then execute a memory barrier, and |
| then recheck the ring empty condition. |
| This is necessary to handle the case where, after the last check and before |
| enabling interrupts, an interrupt has been suppressed by the device: |
| \end_layout |
| |
| \begin_layout Standard |
| \begin_inset listings |
| inline false |
| status open |
| |
| \begin_layout Plain Layout |
| |
| vring_disable_interrupts(vq); |
| \end_layout |
| |
| \begin_layout Plain Layout |
| |
| for (;;) { |
| \end_layout |
| |
| \begin_layout Plain Layout |
| |
| if (vq->last_seen_used != vring->used.idx) { |
| \end_layout |
| |
| \begin_layout Plain Layout |
| |
| vring_enable_interrupts(vq); |
| \end_layout |
| |
| \begin_layout Plain Layout |
| |
| mb(); |
| \end_layout |
| |
| \begin_layout Plain Layout |
| |
| if (vq->last_seen_used != vring->used.idx) |
| \end_layout |
| |
| \begin_layout Plain Layout |
| |
| break; |
| \end_layout |
| |
| \begin_layout Plain Layout |
| |
| } |
| \end_layout |
| |
| \begin_layout Plain Layout |
| |
| struct vring_used_elem *e = vring.used->ring[vq->last_seen_used%vsz]; |
| \end_layout |
| |
| \begin_layout Plain Layout |
| |
| process_buffer(e); |
| \end_layout |
| |
| \begin_layout Plain Layout |
| |
| vq->last_seen_used++; |
| \end_layout |
| |
| \begin_layout Plain Layout |
| |
| } |
| \end_layout |
| |
| \end_inset |
| |
| |
| \end_layout |
| |
| \begin_layout Subsection |
| Dealing With Configuration Changes |
| \begin_inset CommandInset label |
| LatexCommand label |
| name "sub:Dealing-With-Configuration" |
| |
| \end_inset |
| |
| |
| \end_layout |
| |
| \begin_layout Standard |
| Some virtio PCI devices can change the device configuration state, as reflected |
| in the virtio header in the PCI configuration space. |
| In this case: |
| \end_layout |
| |
| \begin_layout Enumerate |
| If MSI-X capability is disabled: an interrupt is delivered and the second |
| highest bit is set in the ISR Status field to indicate that the driver |
| should re-examine the configuration space.Note that a single interrupt can |
| indicate both that one or more virtqueue has been used and that the configurati |
| on space has changed: even if the config bit is set, virtqueues must be |
| scanned. |
| \end_layout |
| |
| \begin_layout Enumerate |
| If MSI-X capability is enabled: an interrupt message is requested. |
| The Configuration Vector field sets the MSI-X Table entry number to use. |
| If Configuration Vector field value is NO_VECTOR, no interrupt message |
| is requested for this event. |
| \end_layout |
| |
| \begin_layout Chapter |
| Creating New Device Types |
| \end_layout |
| |
| \begin_layout Standard |
| Various considerations are necessary when creating a new device type: |
| \end_layout |
| |
| \begin_layout Section* |
| How Many Virtqueues? |
| \end_layout |
| |
| \begin_layout Standard |
| It is possible that a very simple device will operate entirely through its |
| configuration space, but most will need at least one virtqueue in which |
| it will place requests. |
| A device with both input and output (eg. |
| console and network devices described here) need two queues: one which |
| the driver fills with buffers to receive input, and one which the driver |
| places buffers to transmit output. |
| \end_layout |
| |
| \begin_layout Section* |
| What Configuration Space Layout? |
| \end_layout |
| |
| \begin_layout Standard |
| Configuration space is generally used for rarely-changing or initialization-time |
| parameters. |
| But it is a limited resource, so it might be better to use a virtqueue |
| to update configuration information (the network device does this for filtering |
| , otherwise the table in the config space could potentially be very large). |
| \end_layout |
| |
| \begin_layout Standard |
| Note that this space is generally the guest's native endian, rather than |
| PCI's little-endian. |
| \end_layout |
| |
| \begin_layout Section* |
| What Device Number? |
| \end_layout |
| |
| \begin_layout Standard |
| Currently device numbers are assigned quite freely: a simple request mail |
| to the author of this document or the Linux virtualization mailing list |
| \begin_inset Foot |
| status open |
| |
| \begin_layout Plain Layout |
| https://lists.linux-foundation.org/mailman/listinfo/virtualization |
| \end_layout |
| |
| \end_inset |
| |
| will be sufficient to secure a unique one. |
| \end_layout |
| |
| \begin_layout Standard |
| Meanwhile for experimental drivers, use 65535 and work backwards. |
| \end_layout |
| |
| \begin_layout Section* |
| How many MSI-X vectors? |
| \end_layout |
| |
| \begin_layout Standard |
| Using the optional MSI-X capability devices can speed up interrupt processing |
| by removing the need to read ISR Status register by guest driver (which |
| might be an expensive operation), reducing interrupt sharing between devices |
| and queues within the device, and handling interrupts from multiple CPUs. |
| However, some systems impose a limit (which might be as low as 256) on |
| the total number of MSI-X vectors that can be allocated to all devices. |
| Devices and/or device drivers should take this into account, limiting the |
| number of vectors used unless the device is expected to cause a high volume |
| of interrupts. |
| Devices can control the number of vectors used by limiting the MSI-X Table |
| Size or not presenting MSI-X capability in PCI configuration space. |
| Drivers can control this by mapping events to as small number of vectors |
| as possible, or disabling MSI-X capability altogether. |
| \end_layout |
| |
| \begin_layout Section* |
| Message Framing |
| \end_layout |
| |
| \begin_layout Standard |
| The descriptors used for a buffer should not effect the semantics of the |
| message, except for the total length of the buffer. |
| For example, a network buffer consists of a 10 byte header followed by |
| the network packet. |
| Whether this is presented in the ring descriptor chain as (say) a 10 byte |
| buffer and a 1514 byte buffer, or a single 1524 byte buffer, or even three |
| buffers, should have no effect. |
| \end_layout |
| |
| \begin_layout Standard |
| In particular, no implementation should use the descriptor boundaries to |
| determine the size of any header in a request. |
| \begin_inset Foot |
| status open |
| |
| \begin_layout Plain Layout |
| The current qemu device implementations mistakenly insist that the first |
| descriptor cover the header in these cases exactly, so a cautious driver |
| should arrange it so. |
| \end_layout |
| |
| \end_inset |
| |
| |
| \end_layout |
| |
| \begin_layout Section* |
| Device Improvements |
| \end_layout |
| |
| \begin_layout Standard |
| Any change to configuration space, or new virtqueues, or behavioural changes, |
| should be indicated by negotiation of a new feature bit. |
| This establishes clarity |
| \begin_inset Foot |
| status open |
| |
| \begin_layout Plain Layout |
| Even if it does mean documenting design or implementation mistakes! |
| \end_layout |
| |
| \end_inset |
| |
| and avoids future expansion problems. |
| \end_layout |
| |
| \begin_layout Standard |
| Clusters of functionality which are always implemented together can use |
| a single bit, but if one feature makes sense without the others they should |
| not be gratuitously grouped together to conserve feature bits. |
| We can always extend the spec when the first person needs more than 24 |
| feature bits for their device. |
| \end_layout |
| |
| \begin_layout Standard |
| \begin_inset CommandInset nomencl_print |
| LatexCommand printnomenclature |
| set_width "none" |
| |
| \end_inset |
| |
| |
| \end_layout |
| |
| \begin_layout Chapter* |
| Appendix A: virtio_ring.h |
| \end_layout |
| |
| \begin_layout Standard |
| \begin_inset listings |
| inline false |
| status open |
| |
| \begin_layout Plain Layout |
| |
| #ifndef VIRTIO_RING_H |
| \end_layout |
| |
| \begin_layout Plain Layout |
| |
| #define VIRTIO_RING_H |
| \end_layout |
| |
| \begin_layout Plain Layout |
| |
| /* An interface for efficient virtio implementation. |
| \end_layout |
| |
| \begin_layout Plain Layout |
| |
| * |
| \end_layout |
| |
| \begin_layout Plain Layout |
| |
| * This header is BSD licensed so anyone can use the definitions |
| \end_layout |
| |
| \begin_layout Plain Layout |
| |
| * to implement compatible drivers/servers. |
| \end_layout |
| |
| \begin_layout Plain Layout |
| |
| * |
| \end_layout |
| |
| \begin_layout Plain Layout |
| |
| * Copyright 2007, 2009, IBM Corporation |
| \end_layout |
| |
| \begin_layout Plain Layout |
| |
| * Copyright 2011, Red Hat, Inc |
| \end_layout |
| |
| \begin_layout Plain Layout |
| |
| * All rights reserved. |
| \end_layout |
| |
| \begin_layout Plain Layout |
| |
| * |
| \end_layout |
| |
| \begin_layout Plain Layout |
| |
| * Redistribution and use in source and binary forms, with or without |
| \end_layout |
| |
| \begin_layout Plain Layout |
| |
| * modification, are permitted provided that the following conditions |
| \end_layout |
| |
| \begin_layout Plain Layout |
| |
| * are met: |
| \end_layout |
| |
| \begin_layout Plain Layout |
| |
| * 1. |
| Redistributions of source code must retain the above copyright |
| \end_layout |
| |
| \begin_layout Plain Layout |
| |
| * notice, this list of conditions and the following disclaimer. |
| \end_layout |
| |
| \begin_layout Plain Layout |
| |
| * 2. |
| Redistributions in binary form must reproduce the above copyright |
| \end_layout |
| |
| \begin_layout Plain Layout |
| |
| * notice, this list of conditions and the following disclaimer in the |
| \end_layout |
| |
| \begin_layout Plain Layout |
| |
| * documentation and/or other materials provided with the distribution. |
| \end_layout |
| |
| \begin_layout Plain Layout |
| |
| * 3. |
| Neither the name of IBM nor the names of its contributors |
| \end_layout |
| |
| \begin_layout Plain Layout |
| |
| * may be used to endorse or promote products derived from this software |
| \end_layout |
| |
| \begin_layout Plain Layout |
| |
| * without specific prior written permission. |
| \end_layout |
| |
| \begin_layout Plain Layout |
| |
| * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS |
| IS'' AND |
| \end_layout |
| |
| \begin_layout Plain Layout |
| |
| * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| \end_layout |
| |
| \begin_layout Plain Layout |
| |
| * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
| \end_layout |
| |
| \begin_layout Plain Layout |
| |
| * ARE DISCLAIMED. |
| IN NO EVENT SHALL IBM OR CONTRIBUTORS BE LIABLE |
| \end_layout |
| |
| \begin_layout Plain Layout |
| |
| * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
| \end_layout |
| |
| \begin_layout Plain Layout |
| |
| * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
| \end_layout |
| |
| \begin_layout Plain Layout |
| |
| * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
| \end_layout |
| |
| \begin_layout Plain Layout |
| |
| * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
| \end_layout |
| |
| \begin_layout Plain Layout |
| |
| * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY |
| WAY |
| \end_layout |
| |
| \begin_layout Plain Layout |
| |
| * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
| \end_layout |
| |
| \begin_layout Plain Layout |
| |
| * SUCH DAMAGE. |
| \end_layout |
| |
| \begin_layout Plain Layout |
| |
| */ |
| \end_layout |
| |
| \begin_layout Plain Layout |
| |
| \end_layout |
| |
| \begin_layout Plain Layout |
| |
| /* This marks a buffer as continuing via the next field. |
| */ |
| \end_layout |
| |
| \begin_layout Plain Layout |
| |
| #define VRING_DESC_F_NEXT 1 |
| \end_layout |
| |
| \begin_layout Plain Layout |
| |
| /* This marks a buffer as write-only (otherwise read-only). |
| */ |
| \end_layout |
| |
| \begin_layout Plain Layout |
| |
| #define VRING_DESC_F_WRITE 2 |
| \end_layout |
| |
| \begin_layout Plain Layout |
| |
| \end_layout |
| |
| \begin_layout Plain Layout |
| |
| /* The Host uses this in used->flags to advise the Guest: don't kick me |
| \end_layout |
| |
| \begin_layout Plain Layout |
| |
| * when you add a buffer. |
| It's unreliable, so it's simply an |
| \end_layout |
| |
| \begin_layout Plain Layout |
| |
| * optimization. |
| Guest will still kick if it's out of buffers. |
| */ |
| \end_layout |
| |
| \begin_layout Plain Layout |
| |
| #define VRING_USED_F_NO_NOTIFY 1 |
| \end_layout |
| |
| \begin_layout Plain Layout |
| |
| /* The Guest uses this in avail->flags to advise the Host: don't |
| \end_layout |
| |
| \begin_layout Plain Layout |
| |
| * interrupt me when you consume a buffer. |
| It's unreliable, so it's |
| \end_layout |
| |
| \begin_layout Plain Layout |
| |
| * simply an optimization. |
| */ |
| \end_layout |
| |
| \begin_layout Plain Layout |
| |
| #define VRING_AVAIL_F_NO_INTERRUPT 1 |
| \end_layout |
| |
| \begin_layout Plain Layout |
| |
| \end_layout |
| |
| \begin_layout Plain Layout |
| |
| /* Virtio ring descriptors: 16 bytes. |
| \end_layout |
| |
| \begin_layout Plain Layout |
| |
| * These can chain together via "next". |
| */ |
| \end_layout |
| |
| \begin_layout Plain Layout |
| |
| struct vring_desc { |
| \end_layout |
| |
| \begin_layout Plain Layout |
| |
| /* Address (guest-physical). |
| */ |
| \end_layout |
| |
| \begin_layout Plain Layout |
| |
| uint64_t addr; |
| \end_layout |
| |
| \begin_layout Plain Layout |
| |
| /* Length. |
| */ |
| \end_layout |
| |
| \begin_layout Plain Layout |
| |
| uint32_t len; |
| \end_layout |
| |
| \begin_layout Plain Layout |
| |
| /* The flags as indicated above. |
| */ |
| \end_layout |
| |
| \begin_layout Plain Layout |
| |
| uint16_t flags; |
| \end_layout |
| |
| \begin_layout Plain Layout |
| |
| /* We chain unused descriptors via this, too */ |
| \end_layout |
| |
| \begin_layout Plain Layout |
| |
| uint16_t next; |
| \end_layout |
| |
| \begin_layout Plain Layout |
| |
| }; |
| \end_layout |
| |
| \begin_layout Plain Layout |
| |
| \end_layout |
| |
| \begin_layout Plain Layout |
| |
| struct vring_avail { |
| \end_layout |
| |
| \begin_layout Plain Layout |
| |
| uint16_t flags; |
| \end_layout |
| |
| \begin_layout Plain Layout |
| |
| uint16_t idx; |
| \end_layout |
| |
| \begin_layout Plain Layout |
| |
| uint16_t ring[]; |
| \end_layout |
| |
| \begin_layout Plain Layout |
| |
| uint16_t used_event; |
| \end_layout |
| |
| \begin_layout Plain Layout |
| |
| }; |
| \end_layout |
| |
| \begin_layout Plain Layout |
| |
| \end_layout |
| |
| \begin_layout Plain Layout |
| |
| /* u32 is used here for ids for padding reasons. |
| */ |
| \end_layout |
| |
| \begin_layout Plain Layout |
| |
| struct vring_used_elem { |
| \end_layout |
| |
| \begin_layout Plain Layout |
| |
| /* Index of start of used descriptor chain. |
| */ |
| \end_layout |
| |
| \begin_layout Plain Layout |
| |
| uint32_t id; |
| \end_layout |
| |
| \begin_layout Plain Layout |
| |
| /* Total length of the descriptor chain which was written to. |
| */ |
| \end_layout |
| |
| \begin_layout Plain Layout |
| |
| uint32_t len; |
| \end_layout |
| |
| \begin_layout Plain Layout |
| |
| }; |
| \end_layout |
| |
| \begin_layout Plain Layout |
| |
| \end_layout |
| |
| \begin_layout Plain Layout |
| |
| struct vring_used { |
| \end_layout |
| |
| \begin_layout Plain Layout |
| |
| uint16_t flags; |
| \end_layout |
| |
| \begin_layout Plain Layout |
| |
| uint16_t idx; |
| \end_layout |
| |
| \begin_layout Plain Layout |
| |
| struct vring_used_elem ring[]; |
| \end_layout |
| |
| \begin_layout Plain Layout |
| |
| uint16_t avail_event; |
| \end_layout |
| |
| \begin_layout Plain Layout |
| |
| }; |
| \end_layout |
| |
| \begin_layout Plain Layout |
| |
| \end_layout |
| |
| \begin_layout Plain Layout |
| |
| struct vring { |
| \end_layout |
| |
| \begin_layout Plain Layout |
| |
| unsigned int num; |
| \end_layout |
| |
| \begin_layout Plain Layout |
| |
| \end_layout |
| |
| \begin_layout Plain Layout |
| |
| struct vring_desc *desc; |
| \end_layout |
| |
| \begin_layout Plain Layout |
| |
| struct vring_avail *avail; |
| \end_layout |
| |
| \begin_layout Plain Layout |
| |
| struct vring_used *used; |
| \end_layout |
| |
| \begin_layout Plain Layout |
| |
| }; |
| \end_layout |
| |
| \begin_layout Plain Layout |
| |
| \end_layout |
| |
| \begin_layout Plain Layout |
| |
| /* The standard layout for the ring is a continuous chunk of memory which |
| \end_layout |
| |
| \begin_layout Plain Layout |
| |
| * looks like this. |
| We assume num is a power of 2. |
| \end_layout |
| |
| \begin_layout Plain Layout |
| |
| * |
| \end_layout |
| |
| \begin_layout Plain Layout |
| |
| * struct vring { |
| \end_layout |
| |
| \begin_layout Plain Layout |
| |
| * // The actual descriptors (16 bytes each) |
| \end_layout |
| |
| \begin_layout Plain Layout |
| |
| * struct vring_desc desc[num]; |
| \end_layout |
| |
| \begin_layout Plain Layout |
| |
| * |
| \end_layout |
| |
| \begin_layout Plain Layout |
| |
| * // A ring of available descriptor heads with free-running index. |
| \end_layout |
| |
| \begin_layout Plain Layout |
| |
| * __u16 avail_flags; |
| \end_layout |
| |
| \begin_layout Plain Layout |
| |
| * __u16 avail_idx; |
| \end_layout |
| |
| \begin_layout Plain Layout |
| |
| * __u16 available[num]; |
| \end_layout |
| |
| \begin_layout Plain Layout |
| |
| * |
| \end_layout |
| |
| \begin_layout Plain Layout |
| |
| * // Padding to the next align boundary. |
| \end_layout |
| |
| \begin_layout Plain Layout |
| |
| * char pad[]; |
| \end_layout |
| |
| \begin_layout Plain Layout |
| |
| * |
| \end_layout |
| |
| \begin_layout Plain Layout |
| |
| * // A ring of used descriptor heads with free-running index. |
| \end_layout |
| |
| \begin_layout Plain Layout |
| |
| * __u16 used_flags; |
| \end_layout |
| |
| \begin_layout Plain Layout |
| |
| * __u16 EVENT_IDX; |
| \end_layout |
| |
| \begin_layout Plain Layout |
| |
| * struct vring_used_elem used[num]; |
| \end_layout |
| |
| \begin_layout Plain Layout |
| |
| * }; |
| \end_layout |
| |
| \begin_layout Plain Layout |
| |
| * Note: for virtio PCI, align is 4096. |
| \end_layout |
| |
| \begin_layout Plain Layout |
| |
| */ |
| \end_layout |
| |
| \begin_layout Plain Layout |
| |
| static inline void vring_init(struct vring *vr, unsigned int num, void *p, |
| \end_layout |
| |
| \begin_layout Plain Layout |
| |
| unsigned long align) |
| \end_layout |
| |
| \begin_layout Plain Layout |
| |
| { |
| \end_layout |
| |
| \begin_layout Plain Layout |
| |
| vr->num = num; |
| \end_layout |
| |
| \begin_layout Plain Layout |
| |
| vr->desc = p; |
| \end_layout |
| |
| \begin_layout Plain Layout |
| |
| vr->avail = p + num*sizeof(struct vring_desc); |
| \end_layout |
| |
| \begin_layout Plain Layout |
| |
| vr->used = (void *)(((unsigned long)&vr->avail->ring[num] |
| \end_layout |
| |
| \begin_layout Plain Layout |
| |
| + align-1) |
| \end_layout |
| |
| \begin_layout Plain Layout |
| |
| & ~(align - 1)); |
| \end_layout |
| |
| \begin_layout Plain Layout |
| |
| } |
| \end_layout |
| |
| \begin_layout Plain Layout |
| |
| \end_layout |
| |
| \begin_layout Plain Layout |
| |
| static inline unsigned vring_size(unsigned int num, unsigned long align) |
| \end_layout |
| |
| \begin_layout Plain Layout |
| |
| { |
| \end_layout |
| |
| \begin_layout Plain Layout |
| |
| return ((sizeof(struct vring_desc)*num + sizeof(uint16_t)*(2+num) |
| \end_layout |
| |
| \begin_layout Plain Layout |
| |
| + align - 1) & ~(align - 1)) |
| \end_layout |
| |
| \begin_layout Plain Layout |
| |
| + sizeof(uint16_t)*3 + sizeof(struct vring_used_elem)*num; |
| \end_layout |
| |
| \begin_layout Plain Layout |
| |
| } |
| \end_layout |
| |
| \begin_layout Plain Layout |
| |
| \end_layout |
| |
| \begin_layout Plain Layout |
| |
| static inline int vring_need_event(uint16_t event_idx, uint16_t new_idx, |
| uint16_t old_idx) |
| \end_layout |
| |
| \begin_layout Plain Layout |
| |
| { |
| \end_layout |
| |
| \begin_layout Plain Layout |
| |
| return (uint16_t)(new_idx - event_idx - 1) < (uint16_t)(new_idx |
| - old_idx); |
| \end_layout |
| |
| \begin_layout Plain Layout |
| |
| } |
| \end_layout |
| |
| \begin_layout Plain Layout |
| |
| #endif /* VIRTIO_RING_H */ |
| \end_layout |
| |
| \end_inset |
| |
| |
| \end_layout |
| |
| \begin_layout Chapter* |
| \begin_inset CommandInset label |
| LatexCommand label |
| name "cha:Reserved-Feature-Bits" |
| |
| \end_inset |
| |
| Appendix B: Reserved Feature Bits |
| \end_layout |
| |
| \begin_layout Standard |
| Currently there are five device-independent feature bits defined: |
| \end_layout |
| |
| \begin_layout Description |
| VIRTIO_F_NOTIFY_ON_EMPTY |
| \begin_inset space ~ |
| \end_inset |
| |
| (24) Negotiating this feature indicates that the driver wants an interrupt |
| if the device runs out of available descriptors on a virtqueue, even though |
| interrupts are suppressed using the VRING_AVAIL_F_NO_INTERRUPT flag or |
| the used_event field. |
| An example of this is the networking driver: it doesn't need to know every |
| time a packet is transmitted, but it does need to free the transmitted |
| packets a finite time after they are transmitted. |
| It can avoid using a timer if the device interrupts it when all the packets |
| are transmitted. |
| \end_layout |
| |
| \begin_layout Description |
| VIRTIO_F_RING_INDIRECT_DESC |
| \begin_inset space ~ |
| \end_inset |
| |
| (28) Negotiating this feature indicates that the driver can use descriptors |
| with the VRING_DESC_F_INDIRECT flag set, as described in |
| \begin_inset CommandInset ref |
| LatexCommand ref |
| reference "sub:Indirect-Descriptors" |
| |
| \end_inset |
| |
| . |
| \end_layout |
| |
| \begin_layout Description |
| VIRTIO_F_RING_EVENT_IDX(29) This feature enables the |
| \emph on |
| used_event |
| \emph default |
| and the |
| \emph on |
| avail_event |
| \emph default |
| fields. |
| If set, it indicates that the device should ignore the |
| \emph on |
| flags |
| \emph default |
| field in the available ring structure. |
| Instead, the |
| \emph on |
| used_event |
| \emph default |
| field in this structure is used by guest to suppress device interrupts. |
| Further, the driver should ignore the |
| \emph on |
| flags |
| \emph default |
| field in the used ring structure. |
| Instead, the |
| \emph on |
| avail_event |
| \emph default |
| field in this structure is used by the device to suppress notifications. |
| If unset, the driver should ignore the |
| \emph on |
| used_event |
| \emph default |
| field; the device should ignore the |
| \emph on |
| avail_event |
| \emph default |
| field; the |
| \emph on |
| flags |
| \emph default |
| field is used |
| \end_layout |
| |
| \begin_layout Chapter* |
| Appendix C: Network Device |
| \end_layout |
| |
| \begin_layout Standard |
| The virtio network device is a virtual ethernet card, and is the most complex |
| of the devices supported so far by virtio. |
| It has enhanced rapidly and demonstrates clearly how support for new features |
| should be added to an existing device. |
| Empty buffers are placed in one virtqueue for receiving packets, and outgoing |
| packets are enqueued into another for transmission in that order. |
| A third command queue is used to control advanced filtering features. |
| \end_layout |
| |
| \begin_layout Section* |
| Configuration |
| \end_layout |
| |
| \begin_layout Description |
| Subsystem |
| \begin_inset space ~ |
| \end_inset |
| |
| Device |
| \begin_inset space ~ |
| \end_inset |
| |
| ID 1 |
| \end_layout |
| |
| \begin_layout Description |
| Virtqueues 0:receiveq. |
| 1:transmitq. |
| 2:controlq |
| \begin_inset Foot |
| status open |
| |
| \begin_layout Plain Layout |
| Only if VIRTIO_NET_F_CTRL_VQ set |
| \end_layout |
| |
| \end_inset |
| |
| |
| \end_layout |
| |
| \begin_layout Description |
| Feature |
| \begin_inset space ~ |
| \end_inset |
| |
| bits |
| \end_layout |
| |
| \begin_deeper |
| \begin_layout Description |
| VIRTIO_NET_F_CSUM |
| \begin_inset space ~ |
| \end_inset |
| |
| (0) Device handles packets with partial checksum |
| \end_layout |
| |
| \begin_layout Description |
| VIRTIO_NET_F_GUEST_CSUM |
| \begin_inset space ~ |
| \end_inset |
| |
| (1) Guest handles packets with partial checksum |
| \end_layout |
| |
| \begin_layout Description |
| VIRTIO_NET_F_MAC |
| \begin_inset space ~ |
| \end_inset |
| |
| (5) Device has given MAC address. |
| \end_layout |
| |
| \begin_layout Description |
| VIRTIO_NET_F_GSO |
| \begin_inset space ~ |
| \end_inset |
| |
| (6) (Deprecated) device handles packets with any GSO type. |
| \begin_inset Foot |
| status open |
| |
| \begin_layout Plain Layout |
| It was supposed to indicate segmentation offload support, but upon further |
| investigation it became clear that multiple bits were required. |
| \end_layout |
| |
| \end_inset |
|