diff options
-rw-r--r-- | REVISION | 2 | ||||
-rw-r--r-- | REVISION-DATE | 2 | ||||
-rw-r--r-- | acknowledgements.tex | 6 | ||||
-rw-r--r-- | changelog.tex | 2 | ||||
-rw-r--r-- | cl-cs02.tex | 52 | ||||
-rw-r--r-- | cl-os.tex | 338 | ||||
-rw-r--r-- | commands.tex | 1 | ||||
-rw-r--r-- | conformance.tex | 41 | ||||
-rw-r--r-- | content.tex | 739 | ||||
-rw-r--r-- | diffpreamble.tex | 7 | ||||
-rw-r--r-- | fixupdiff.pl | 77 | ||||
-rwxr-xr-x | makediff.sh | 30 | ||||
-rw-r--r-- | newdevice.tex | 5 | ||||
-rw-r--r-- | title.tex | 6 | ||||
-rw-r--r-- | virtio-ring.h | 2 |
15 files changed, 1123 insertions, 187 deletions
@@ -1 +1 @@ -virtio-v1.0-cs02 +virtio-v1.0-cs03 diff --git a/REVISION-DATE b/REVISION-DATE index 83b910b..86ffb4e 100644 --- a/REVISION-DATE +++ b/REVISION-DATE @@ -1 +1 @@ -18 January 2015 +02 August 2015 diff --git a/acknowledgements.tex b/acknowledgements.tex index 3866a0c..6c86d12 100644 --- a/acknowledgements.tex +++ b/acknowledgements.tex @@ -20,6 +20,7 @@ Luiz Capitulino, Red Hat \newline Michael S. Tsirkin, Red Hat \newline Paolo Bonzini, Red Hat \newline Pawel Moll, ARM \newline +Richard Sohn, Alcatel-Lucent \newline Rusty Russell, IBM \newline Sasha Levin, Oracle \newline Sergey Tverdyshev, Thales e-Security \newline @@ -33,12 +34,13 @@ specification and are gratefully acknowledged: \begin{oasistitlesection}{Reviewers} Andrew Thornton, Google \newline Arun Subbarao, LynuxWorks \newline +Brian Foley, ARM \newline +David Alan Gilbert, Red Hat \newline Fam Zheng, Red Hat \newline Gerd Hoffmann, Red Hat \newline +Jason Wang, Red Hat \newline Laura Novich, Red Hat \newline Patrick Durusau, Technical Advisory Board, OASIS \newline Thomas Huth, IBM \newline Yan Vugenfirer, Red Hat / Daynix \newline -Brian Foley, ARM \newline -David Alan Gilbert, Red Hat \newline \end{oasistitlesection} diff --git a/changelog.tex b/changelog.tex index fbe2dd5..4b89e97 100644 --- a/changelog.tex +++ b/changelog.tex @@ -2,6 +2,7 @@ The following changes have been made since the previous version of this specification: +\begin{DIFnomarkup} \begin{longtable}{ | c | c | c | p{0.4\textwidth} | } \hline \textbf{Revision} & \textbf{Date} & \textbf{Editor} & \textbf{Changes Made} \\ @@ -11,3 +12,4 @@ of this specification: %\hline \input{cl-os.tex} \end{longtable} +\end{DIFnomarkup} diff --git a/cl-cs02.tex b/cl-cs02.tex new file mode 100644 index 0000000..61aa6fd --- /dev/null +++ b/cl-cs02.tex @@ -0,0 +1,52 @@ +448 & 22 Dec 2014 & Michael S. Tsirkin & {VIRTIO-120: virtio: +fix used element size + +General ring description lists size for +used ring elements as 4, it must be 8. + +See \ref{sec:Basic Facilities of a Virtio Device / Virtqueues}. + } \\ +\hline +449 & 22 Doc 2014 & Cornelia Huck & {VIRTIO-125: block: fixup section levels + The specification for the configuration layout for block devices + should be its own subsection as for all other devices and not be + hidden beneath "Feature bits". + + The normative sections for device operation should appear under + the device operation section. +See \ref{sec:Device Types / Block Device / Device configuration +layout}. + } \\ +\hline +450 & 22 Dec 2014 & Cornelia Huck & {VIRTIO-127: ccw: two-stage +indicators for legacy devices + + Some legacy devices will support two-stage queue indicators +and therefore + won't reject CCW_CMD_SET_IND_ADAPTER. Note this. + +See \ref{sec:Virtio Transport Options / Virtio over channel I/O / +Device Initialization / Setting Up Indicators / Legacy +Interfaces: A Note on Setting Up Indicators}. + } \\ +\hline +452 & 22 Dec 2014 & Michael S. Tsirkin & {VIRTIO-115: +formatting: escape {\textbackslash}ldots in lstlisting + + {\textbackslash}ldots does not work within lstlisting, the result is + {\textbackslash}ldots verbatim in the PDF output. + + To fix, make \$ an escape character, and escape the sequence: + \${\textbackslash}ldots\$ + +See \ref{sec:Device Types / SCSI Host Device / Device Operation / +Device Operation: controlq}. +} \\ +\hline +455,457 & 23 Dec 2014 & Michael S. Tsirkin & {acknowledgements: acknowledge dgilbert + + Acknowledge David Alan Gilbert for reporting VIRTIO-120. + +See \ref{chap:Acknowledgements}. +} \\ +\hline @@ -1,52 +1,328 @@ -448 & 22 Dec 2014 & Michael S. Tsirkin & {VIRTIO-120: virtio: -fix used element size +478 & 15 Mar 2015 & Cornelia Huck & {VIRTIO-129: legacy: +clean up virtqueue layout definitions -General ring description lists size for -used ring elements as 4, it must be 8. +Generalize "Legacy Interfaces: A Note on Virtqueue Layout" to allow +for different alignment requirements. Have pci and ccw refer to that +section for legacy devices. Remove the double definition of virtqueue +alignment (which referred to legacy, but was not tagged as such) from +the ccw section. +See \ref{sec:Basic Facilities of a Virtio Device / Virtqueues / +Legacy Interfaces: A Note on Virtqueue Layout}, \ref{sec:Virtio +Transport Options / Virtio Over PCI Bus / PCI-specific +Initialization And Device Operation / Device Initialization / +Virtqueue Configuration / Legacy Interface: A Note on Virtqueue +Configuration} and \ref{sec:Virtio Transport Options / Virtio +over channel I/O / Device Initialization / Configuring a +Virtqueue / Legacy Interface: A Note on Configuring a Virtqueue}. + } \\ +\hline +479 & 15 Mar 2015 & Cornelia Huck & {VIRTIO-118: +ccw: clarify basic channel commands -See \ref{sec:Basic Facilities of a Virtio Device / Virtqueues}. +"Basic channel commands" seems to be not as clear as it +could, so let's spell out which channel commands we refer to. +See \ref{sec:Virtio Transport Options / Virtio over channel I/O / +Basic Concepts}. +} \\ +\hline +479 & 15 Mar 2015 & Cornelia Huck & {VIRTIO-116: +ccw: allow WRITE_STATUS to fail + +We want to be able to fail setting a status on the device +(e.g. FEATURES_OK if the device can't work with the features +negotiated). +The easiest way to do that is to allow the device to fail the +WRITE_STATUS command by posting a command reject. +See \ref{sec:Virtio Transport Options / Virtio over channel I/O / +Device Initialization / Communicating Status Information}. } \\ \hline -449 & 22 Doc 2014 & Cornelia Huck & {VIRTIO-125: block: fixup section levels - The specification for the configuration layout for block devices - should be its own subsection as for all other devices and not be - hidden beneath "Feature bits". +485 & 15 Mar 2015 & Jason Wang & {VIRTIO-135: +virtio-ring: comment fixup - The normative sections for device operation should appear under - the device operation section. -See \ref{sec:Device Types / Block Device / Device configuration -layout}. +virtio_ring.h included with spec has this text: +/* Support for avail_idx and used_idx fields */ +it should really refer to avail_event and used_event. +See Appendix \ref{sec:virtio-ring.h}. } \\ \hline -450 & 22 Dec 2014 & Cornelia Huck & {VIRTIO-127: ccw: two-stage -indicators for legacy devices +486 & 15 Mar 2015 & Jason Wang & {VIRTIO-136: +document idx field in virtqueue used ring - Some legacy devices will support two-stage queue indicators -and therefore - won't reject CCW_CMD_SET_IND_ADAPTER. Note this. +Section \ref{sec:Basic Facilities of a Virtio Device / Virtqueues +/ The Virtqueue Used Ring} The Virtqueue Used Ring +listed the idx field, but never documented it. +See \ref{sec:Basic Facilities of a Virtio Device / Virtqueues / +The Virtqueue Used Ring}. + } \\ +\hline +487 & 15 Mar 2015 & Rusty Russell & {VIRTIO-130: +ISR status: Fix incorrect diagram -See \ref{sec:Virtio Transport Options / Virtio over channel I/O / -Device Initialization / Setting Up Indicators / Legacy -Interfaces: A Note on Setting Up Indicators}. +ISR status capability diagram has the "Device Configuration +Interrupt " as bit 0, and the "Queue Interrupt" as bit 1. This is +the wrong way around: it disagrees with the legacy +implementations, as well as the spec elsewhere. + +All current guests correctly follow the text, fix +up the diagram to match. +See \ref{sec:Virtio Transport Options / Virtio Over PCI Bus / PCI +Device Layout / ISR status capability}. + } \\ +\hline +488 & 15 Mar 2015 & Rusty Russell & {VIRTIO-133: +Change 4.1.5.1.2.1 to device requirement + +4.1.5.1.2.1 is incorrectly labelled as a driver requirement; it's +self-evidently referring to the device. +See \ref{sec:Conformance / Driver Conformance / PCI Driver +Conformance}, \ref{sec:Conformance / Device Conformance / PCI +Device Conformance} and \ref{devicenormative:Virtio +Transport Options / Virtio Over PCI Bus / PCI-specific +Initialization And Device Operation / Device Initialization / +Non-transitional Device With Legacy Driver}. + } \\ +\hline +504 & 22 Apr 2015 & Rusty Russell & {VIRTIO-137: +define the meaning and requirements of the len field. + +We said what it was for, and noted why. We didn't place any +requirements on it, nor clearly spell out the implications of its use. + +This clarification comes particularly from noticing that QEMU +didn't set len correctly, and philosophising over the correct value +when an error has occurred. +See \ref{sec:Basic Facilities of a Virtio Device / Virtqueues / +The Virtqueue Used Ring}, \ref{devicenormative:Basic Facilities +of a Virtio Device / Virtqueues / The Virtqueue Used Ring} and +\ref{sec:Basic Facilities of a Virtio Device / Virtqueues / The +Virtqueue Used Ring}. } \\ \hline -452 & 22 Dec 2014 & Michael S. Tsirkin & {VIRTIO-115: -formatting: escape {\textbackslash}ldots in lstlisting +506 & 22 Apr 2015 & Michael S. Tsirkin & {VIRTIO-138: +multiple errors: Non-transitional With Legacy + +virtio 1.0 has two sections titled "Non-transitional Device With +Legacy Driver" the first says devices SHOULD fail, the second +says devices MUST fail. Clearly a mistake. - {\textbackslash}ldots does not work within lstlisting, the result is - {\textbackslash}ldots verbatim in the PDF output. +Other issues: devices don't really fail - they cause drivers to +fail. second section seems to be in the wrong place, and also +have a section followed by subsection with no explanatory text in +between, which is ugly. +Finally, this text was originally ritten to handle buggy windows +drivers gracefully, but later we changed device IDs so it's not +really required there. Might be handy for some other buggy legacy +drivers, though no such drivers are known. - To fix, make \$ an escape character, and escape the sequence: - \${\textbackslash}ldots\$ +To fix, drop the duplicate section variant, add some explanatory +text, clarify what does "same ID" mean here, and clarify +that the work-around is only needed if a buggy driver +is known to bind to a transitional device. -See \ref{sec:Device Types / SCSI Host Device / Device Operation / -Device Operation: controlq}. +See \ref{sec:Virtio Transport Options / Virtio +Over PCI Bus / PCI Device Layout / Non-transitional Device With +Legacy Driver: A Note on PCI Device Layout}, +\ref{devicenormative:Virtio Transport Options / Virtio Over PCI +Bus / PCI-specific Initialization And Device Operation / Device +Initialization / Non-transitional Device With Legacy Driver} and +\ref{sec:Virtio Transport Options / Virtio Over PCI Bus / +PCI-specific Initialization And Device Operation / Device +Initialization}. } \\ \hline -455,457 & 23 Dec 2014 & Michael S. Tsirkin & {acknowledgements: acknowledge dgilbert +508 & 22 Apr 2015 & Michael S. Tsirkin & {VIRTIO-139: +pci: missing documentation for dealing with 64 bit config fields - Acknowledge David Alan Gilbert for reporting VIRTIO-120. +pci spec says what width access to use for 32, 16 and 8 +bit fields, but does not explicitly say what to do for +32 bit fields. As we have text that says driver must +treat 64 bit accesses as non-atomic, this seems +to imply driver should always do two 32 bit wide accesses. + +Let's make this an explicit requirement, and require +devices to support this. + +See \ref{sec:Virtio Transport Options / Virtio Over PCI Bus / PCI +Device Layout}, \ref{drivernormative:Virtio Transport Options / +Virtio Over PCI Bus / PCI Device Layout}, +\ref{devicenormative:Virtio Transport Options / Virtio Over PCI +Bus / PCI Device Layout} and \ref{sec:Conformance / Driver +Conformance / PCI Driver Conformance}. + } \\ +\hline +509 & 22 Apr 2015 & Michael S. Tsirkin & {balloon: +MUST -> has to + +MUST shouldn't be used outside normative statements, +that's confusing. Replace with "has to". + +See \ref{sec:Device Types / Memory Balloon Device / Feature +bits}. + } \\ +\hline +510 & 22 Apr 2015 & Michael S. Tsirkin & {conformance: +add VIRTIO-137 statement links + +Add links to new conformance statements added to +resolve VIRTIO-137 (describing used ring entry len usage). + +See \ref{sec:Conformance / Device Conformance} +and \ref{sec:Conformance / Driver Conformance}. + } \\ +\hline +517 & 22 Apr 2015 & Michael S. Tsirkin & {acknowledgements: +contributors+minor fixup + +acknowledge feedback by Jason Wang, add Richard Sohn who +joined the TC, sort acknowledged reviewers alphabetically. See \ref{chap:Acknowledgements}. } \\ \hline +520 & 30 Apr 2015 & James Bottomley & {VIRTIO-140: +give explicit guidance on the use of 64 bit fields + +Just saying 64 bit fields may not be atomic is true, but less +helpful than it might be. Add explicit guidance about what the +consequences of non-atomicity are. + +See \ref{sec:Creating New Device Types / What Device +Configuration Space Layout?} +} \\ +\hline +521 & 30 Apr 2015 & Rusty Russell & {VIRTIO-134: +Spell out details of indirect elements in chains + +1) It's implied that a chain terminates with an indirect descriptor (since +VIRTIO-15) but we didn't spell out that a device MUST NOT +continue it. + +2) We allow [direct]->[direct]->[indirect], and qemu and +bhyve both accept it. Make it clear that this is valid, thus devices MUST +handle it. + +See \ref{drivernormative:Basic Facilities of a Virtio Device / +Virtqueues / The Virtqueue Descriptor Table / Indirect +Descriptors} and \ref{devicenormative:Basic Facilities of a +Virtio Device / Virtqueues / The Virtqueue Descriptor Table / +Indirect Descriptors} +} \\ +\hline +522 & 30 Apr 2015 & Michael S. Tsirkin & {VIRTIO-141: +used ring: specify legacy behaviour for len field + +many hypervisors implemented len field incorrectly. +Document existing bugs in the legacy sections. + +See \ref{sec:Basic Facilities of a Virtio Device / Virtqueues +/ The Virtqueue Used Ring/ Legacy Interface: The Virtqueue Used +Ring}, \ref{sec:Device Types / Network Device / Device Operation +/ Legacy Interface: Device Operation}, \ref{sec:Device Types / +Block Device / Device Operation / Legacy Interface: Device +Operation}, \ref{sec:Device Types / Console Device / Device +Operation / Legacy Interface: Device Operation}, \ref{sec:Device +Types / Memory Balloon Device / Device Operation / Legacy +Interface: Device Operation}, \ref{sec:Device +Types / SCSI Host Device / Device Operation / Legacy +Interface: Device Operation} and \ref{sec:Conformance / Legacy +Interface: Transitional Device and Transitional Driver +Conformance}. +} \\ +\hline +523 & 30 Apr 2015 & Michael S. Tsirkin & {VIRTIO-142: +entropy device: typo fix + +Current text: "The driver MUST examine the length written by the +driver" makes no sense. length is written by the device. + +See \ref{drivernormative:Device Types / Entropy Device / Device +Operation}. +} \\ +\hline +526 & 18 May 2015 & Michael S. Tsirkin & {VIRTIO-143: +balloon: transitional device support + +Support a transitional balloon device: this has the advantage of supporting +existing drivers, transparently, as well as transports that don't allow mixing +virtio 0 and virtio 1 devices. And balloon is an easy device to test, so it's +also useful for people to test virtio core handling of transitional devices. + +Three issues with legacy hypervisors have been identified: +\begin{enumerate} +\item +Actual value is actually used, and is necessary for management +to work. Luckily 4 byte config space writes are now atomic. +When using old guests, hypervisors can detect access to the last byte. +When using old hypervisors, drivers can use atomic 4-byte accesses. +\item Hypervisors actually didn't ignore the stats from the first +buffer supplied. This means the values there would be +incorrect until hypervisor resends the request. +Add a note suggesting hypervisors ignore the 1st buffer. +\item QEMU simply over-writes stats from each buffer it gets. +Thus if driver supplies a different subset of stats +on each request, stale values will be there. +Require drivers to supply the same subset on each +request. This also gives us a simple way to figure out +which stats are supported. +\end{enumerate} + +See +\ref{sec:Device Types / Memory Balloon Device}, +\ref{devicenormative:Virtio Transport Options / Virtio Over PCI Bus / PCI Device Discovery}, +\ref{sec:Conformance / Driver Conformance / Traditional Memory Balloon Driver Conformance}, +\ref{sec:Conformance / Device Conformance / Traditional Memory Balloon Device Conformance}, +\ref{sec:Conformance / Legacy Interface: Transitional Device and Transitional Driver Conformance}, +\ref{sec:Conformance / Device Conformance} and \ref{sec:Conformance / Driver Conformance}. +} \\ +\hline +527 & 18 May 2015 & Michael S. Tsirkin & {VIRTIO-126: +document deflate on oom + +Document the new option, and also clarify behaviour +without it. + +In particular, actual field is not the +actual number of pages in the balloon as +driver might do inflate followed by deflate. + +Also, device isn't always driven by interrupts, +driver can inflate/deflate in response to e.g. +memory compaction. + +See \ref{sec:Device Types / Memory Balloon Device / Feature bits}, +\ref{sec:Device Types / Memory Balloon Device / Device Operation} and +\ref{drivernormative:Device Types / Memory Balloon Device / Device Operation}. +} \\ +\hline +528 & 18 May 2015 & Michael S. Tsirkin & {VIRTIO-123: +network device: xmit/receive cleanup + +Fix up multiple issues in xmit/receive sections: +\begin{itemize} + \item drop MAY/MUST/SHOULD outside normative statements + \item spell out conformance requirements for both drivers and + devices, for xmit and receive paths + \item document the missing VIRTIO_NET_HDR_F_DATA_VALID + \item document handling of unrecognized flag bits so we can extend + flags in the future, similar to VIRTIO_NET_HDR_F_DATA_VALID +\end{itemize} + +\ref{sec:Device Types / Network Device / Device Initialization}, +\ref{drivernormative:Device Types / Network Device / Device Operation / Packet Transmission}, +\ref{devicenormative:Device Types / Network Device / Device Operation / Packet Transmission}, +\ref{sec:Device Types / Network Device / Device Operation / Processing of Incoming Packets}, +\ref{sec:Conformance / Driver Conformance / Network Driver Conformance} and +\ref{sec:Conformance / Device Conformance / Network Device Conformance}. +} \\ +\hline +529 & 18 May 2015 & Michael S. Tsirkin & {VIRTIO-124: +network device: document VIRTIO_NET_F_CTRL_RX_EXTRA + +See +\ref{sec:Device Types / Network Device / Device Operation / Control Virtqueue / Packet Receive Filtering}, +\ref{sec:Device Types / Network Device / Device Operation / Control Virtqueue / Setting MAC Address Filtering}, +\ref{sec:Conformance / Driver Conformance / Network Driver Conformance} and +\ref{sec:Conformance / Device Conformance / Network Device Conformance}. +} \\ +\hline diff --git a/commands.tex b/commands.tex index 59c3da3..871f416 100644 --- a/commands.tex +++ b/commands.tex @@ -14,3 +14,4 @@ \newcommand{\devicenormative}[3]{#1{Device Requirements: #2}\label{devicenormative:#3}} \providecommand{\DIFaddtextcstwo}[1]{#1} \providecommand{\DIFdeltextcstwo}[1]{} +\newenvironment{DIFnomarkup}{}{} diff --git a/conformance.tex b/conformance.tex index 29c6ba8..7b7df32 100644 --- a/conformance.tex +++ b/conformance.tex @@ -15,13 +15,13 @@ Conformance targets: \begin{itemize} \item Clause \ref{sec:Conformance / Driver Conformance}, \item One of clauses \ref{sec:Conformance / Driver Conformance / PCI Driver Conformance}, \ref{sec:Conformance / Driver Conformance / MMIO Driver Conformance} or \ref{sec:Conformance / Driver Conformance / Channel I/O Driver Conformance}. - \item One of clauses \ref{sec:Conformance / Driver Conformance / Network Driver Conformance}, \ref{sec:Conformance / Driver Conformance / Block Driver Conformance}, \ref{sec:Conformance / Driver Conformance / Console Driver Conformance}, \ref{sec:Conformance / Driver Conformance / Entropy Driver Conformance} or \ref{sec:Conformance / Driver Conformance / SCSI Host Driver Conformance}. + \item One of clauses \ref{sec:Conformance / Driver Conformance / Network Driver Conformance}, \ref{sec:Conformance / Driver Conformance / Block Driver Conformance}, \ref{sec:Conformance / Driver Conformance / Console Driver Conformance}, \ref{sec:Conformance / Driver Conformance / Entropy Driver Conformance}, \ref{sec:Conformance / Driver Conformance / Traditional Memory Balloon Driver Conformance} or \ref{sec:Conformance / Driver Conformance / SCSI Host Driver Conformance}. \end{itemize} \item[Device] A device MUST conform to three conformance clauses: \begin{itemize} \item Clause \ref{sec:Conformance / Device Conformance}, \item One of clauses \ref{sec:Conformance / Device Conformance / PCI Device Conformance}, \ref{sec:Conformance / Device Conformance / MMIO Device Conformance} or \ref{sec:Conformance / Device Conformance / Channel I/O Device Conformance}. - \item One of clauses \ref{sec:Conformance / Device Conformance / Network Device Conformance}, \ref{sec:Conformance / Device Conformance / Block Device Conformance}, \ref{sec:Conformance / Device Conformance / Console Device Conformance}, \ref{sec:Conformance / Device Conformance / Entropy Device Conformance} or \ref{sec:Conformance / Device Conformance / SCSI Host Device Conformance}. + \item One of clauses \ref{sec:Conformance / Device Conformance / Network Device Conformance}, \ref{sec:Conformance / Device Conformance / Block Device Conformance}, \ref{sec:Conformance / Device Conformance / Console Device Conformance}, \ref{sec:Conformance / Device Conformance / Entropy Device Conformance}, \ref{sec:Conformance / Device Conformance / Traditional Memory Balloon Device Conformance} or \ref{sec:Conformance / Device Conformance / SCSI Host Device Conformance}. \end{itemize} \end{description} @@ -38,6 +38,7 @@ A driver MUST conform to the following normative statements: \item \ref{drivernormative:Basic Facilities of a Virtio Device / Virtqueues / The Virtqueue Descriptor Table} \item \ref{drivernormative:Basic Facilities of a Virtio Device / Virtqueues / The Virtqueue Descriptor Table / Indirect Descriptors} \item \ref{drivernormative:Basic Facilities of a Virtio Device / Virtqueues / Virtqueue Interrupt Suppression} +\item \ref{drivernormative:Basic Facilities of a Virtio Device / Virtqueues / The Virtqueue Used Ring} \item \ref{drivernormative:Basic Facilities of a Virtio Device / Virtqueues / Virtqueue Notification Suppression} \item \ref{drivernormative:General Initialization And Device Operation / Device Initialization} \item \ref{drivernormative:General Initialization And Device Operation / Device Operation / Supplying Buffers to The Device / Updating idx} @@ -57,7 +58,6 @@ A PCI driver MUST conform to the following normative statements: \item \ref{drivernormative:Virtio Transport Options / Virtio Over PCI Bus / PCI Device Layout / Common configuration structure layout} \item \ref{drivernormative:Virtio Transport Options / Virtio Over PCI Bus / PCI Device Layout / ISR status capability} \item \ref{drivernormative:Virtio Transport Options / Virtio Over PCI Bus / PCI Device Layout / PCI configuration access capability} -\item \ref{drivernormative:Virtio Transport Options / Virtio Over PCI Bus / PCI-specific Initialization And Device Operation / Device Initialization / Non-transitional Device With Legacy Driver} \item \ref{drivernormative:Virtio Transport Options / Virtio Over PCI Bus / PCI-specific Initialization And Device Operation / Device Initialization / MSI-X Vector Configuration} \item \ref{drivernormative:Virtio Transport Options / Virtio Over PCI Bus / PCI-specific Initialization And Device Operation / Notification of Device Configuration Changes} \end{itemize} @@ -79,6 +79,7 @@ A Channel I/O driver MUST conform to the following normative statements: \begin{itemize} \item \ref{drivernormative:Virtio Transport Options / Virtio over channel I/O / Basic Concepts} \item \ref{drivernormative:Virtio Transport Options / Virtio over channel I/O / Device Initialization / Setting the Virtio Revision} +\item \ref{drivernormative:Virtio Transport Options / Virtio over channel I/O / Device Initialization / Communicating Status Information} \item \ref{drivernormative:Virtio Transport Options / Virtio over channel I/O / Device Operation / Host->Guest Notification / Notification via Adapter I/O Interrupts} \item \ref{drivernormative:Virtio Transport Options / Virtio over channel I/O / Device Operation / Guest->Host Notification} \end{itemize} @@ -91,6 +92,8 @@ A network driver MUST conform to the following normative statements: \item \ref{drivernormative:Device Types / Network Device / Device configuration layout} \item \ref{drivernormative:Device Types / Network Device / Device Operation / Packet Transmission} \item \ref{drivernormative:Device Types / Network Device / Device Operation / Setting Up Receive Buffers} +\item \ref{drivernormative:Device Types / Network Device / Device Operation / Processing of Incoming Packets} +\item \ref{drivernormative:Device Types / Network Device / Device Operation / Control Virtqueue / Packet Receive Filtering} \item \ref{drivernormative:Device Types / Network Device / Device Operation / Control Virtqueue / Setting MAC Address Filtering} \item \ref{drivernormative:Device Types / Network Device / Device Operation / Control Virtqueue / Gratuitous Packet Sending} \item \ref{drivernormative:Device Types / Network Device / Device Operation / Control Virtqueue / Automatic receive steering in multiqueue mode} @@ -122,6 +125,16 @@ An entropy driver MUST conform to the following normative statements: \item \ref{drivernormative:Device Types / Entropy Device / Device Operation} \end{itemize} +\subsection{Traditional Memory Balloon Driver Conformance}\label{sec:Conformance / Driver Conformance / Traditional Memory Balloon Driver Conformance} + +A traditional memory balloon driver MUST conform to the following normative statements: + +\begin{itemize} +\item \ref{drivernormative:Device Types / Memory Balloon Device / Feature bits} +\item \ref{drivernormative:Device Types / Memory Balloon Device / Device Operation} +\item \ref{drivernormative:Device Types / Memory Balloon Device / Device Operation / Memory Statistics} +\end{itemize} + \subsection{SCSI Host Driver Conformance}\label{sec:Conformance / Driver Conformance / SCSI Host Driver Conformance} An SCSI host driver MUST conform to the following normative statements: @@ -144,6 +157,7 @@ A device MUST conform to the following normative statements: \item \ref{devicenormative:Basic Facilities of a Virtio Device / Virtqueues / The Virtqueue Descriptor Table} \item \ref{devicenormative:Basic Facilities of a Virtio Device / Virtqueues / The Virtqueue Descriptor Table / Indirect Descriptors} \item \ref{devicenormative:Basic Facilities of a Virtio Device / Virtqueues / Virtqueue Interrupt Suppression} +\item \ref{devicenormative:Basic Facilities of a Virtio Device / Virtqueues / The Virtqueue Used Ring} \item \ref{devicenormative:Basic Facilities of a Virtio Device / Virtqueues / Virtqueue Notification Suppression} \item \ref{devicenormative:Reserved Feature Bits} \end{itemize} @@ -155,12 +169,14 @@ A PCI device MUST conform to the following normative statements: \begin{itemize} \item \ref{devicenormative:Virtio Transport Options / Virtio Over PCI Bus} \item \ref{devicenormative:Virtio Transport Options / Virtio Over PCI Bus / PCI Device Discovery} +\item \ref{devicenormative:Virtio Transport Options / Virtio Over PCI Bus / PCI Device Layout} \item \ref{devicenormative:Virtio Transport Options / Virtio Over PCI Bus / Virtio Structure PCI Capabilities} \item \ref{devicenormative:Virtio Transport Options / Virtio Over PCI Bus / PCI Device Layout / Common configuration structure layout} \item \ref{devicenormative:Virtio Transport Options / Virtio Over PCI Bus / PCI Device Layout / Notification capability} \item \ref{devicenormative:Virtio Transport Options / Virtio Over PCI Bus / PCI Device Layout / ISR status capability} \item \ref{devicenormative:Virtio Transport Options / Virtio Over PCI Bus / PCI Device Layout / Device-specific configuration} \item \ref{devicenormative:Virtio Transport Options / Virtio Over PCI Bus / PCI Device Layout / PCI configuration access capability} +\item \ref{devicenormative:Virtio Transport Options / Virtio Over PCI Bus / PCI-specific Initialization And Device Operation / Device Initialization / Non-transitional Device With Legacy Driver} \item \ref{devicenormative:Virtio Transport Options / Virtio Over PCI Bus / PCI-specific Initialization And Device Operation / Device Initialization / MSI-X Vector Configuration} \item \ref{devicenormative:Virtio Transport Options / Virtio Over PCI Bus / PCI-specific Initialization And Device Operation / Virtqueue Interrupts From The Device} \item \ref{devicenormative:Virtio Transport Options / Virtio Over PCI Bus / PCI-specific Initialization And Device Operation / Notification of Device Configuration Changes} @@ -182,6 +198,7 @@ A Channel I/O device MUST conform to the following normative statements: \item \ref{devicenormative:Virtio Transport Options / Virtio over channel I/O / Basic Concepts} \item \ref{devicenormative:Virtio Transport Options / Virtio over channel I/O / Device Initialization / Setting the Virtio Revision} \item \ref{devicenormative:Virtio Transport Options / Virtio over channel I/O / Device Initialization / Configuring a Virtqueue} +\item \ref{devicenormative:Virtio Transport Options / Virtio over channel I/O / Device Initialization / Communicating Status Information} \item \ref{devicenormative:Virtio Transport Options / Virtio over channel I/O / Device Initialization / Setting Up Indicators / Setting Up Two-Stage Queue Indicators} \item \ref{devicenormative:Virtio Transport Options / Virtio over channel I/O / Device Operation / Host->Guest Notification / Notification via Adapter I/O Interrupts} \item \ref{devicenormative:Virtio Transport Options / Virtio over channel I/O / Device Operation / Guest->Host Notification} @@ -193,8 +210,10 @@ A network device MUST conform to the following normative statements: \begin{itemize} \item \ref{devicenormative:Device Types / Network Device / Device configuration layout} +\item \ref{devicenormative:Device Types / Network Device / Device Operation / Packet Transmission} \item \ref{devicenormative:Device Types / Network Device / Device Operation / Setting Up Receive Buffers} -\item \ref{devicenormative:Device Types / Network Device / Device Operation / Processing of Packets} +\item \ref{devicenormative:Device Types / Network Device / Device Operation / Processing of Incoming Packets} +\item \ref{devicenormative:Device Types / Network Device / Device Operation / Control Virtqueue / Packet Receive Filtering} \item \ref{devicenormative:Device Types / Network Device / Device Operation / Control Virtqueue / Setting MAC Address Filtering} \item \ref{devicenormative:Device Types / Network Device / Device Operation / Control Virtqueue / Gratuitous Packet Sending} \item \ref{devicenormative:Device Types / Network Device / Device Operation / Control Virtqueue / Automatic receive steering in multiqueue mode} @@ -225,6 +244,16 @@ An entropy device MUST conform to the following normative statements: \item \ref{devicenormative:Device Types / Entropy Device / Device Operation} \end{itemize} +\subsection{Traditional Memory Balloon Device Conformance}\label{sec:Conformance / Device Conformance / Traditional Memory Balloon Device Conformance} + +A traditional memory balloon device MUST conform to the following normative statements: + +\begin{itemize} +\item \ref{devicenormative:Device Types / Memory Balloon Device / Feature bits} +\item \ref{devicenormative:Device Types / Memory Balloon Device / Device Operation} +\item \ref{devicenormative:Device Types / Memory Balloon Device / Device Operation / Memory Statistics} +\end{itemize} + \subsection{SCSI Host Device Conformance}\label{sec:Conformance / Device Conformance / SCSI Host Device Conformance} An SCSI host device MUST conform to the following normative statements: @@ -287,9 +316,11 @@ Feature Bits / Legacy Interface: A Note on Feature Bits} \item Section \ref{sec:Device Types / Block Device / Device Operation / Legacy Interface: Device Operation} \item Section \ref{sec:Device Types / Console Device / Device configuration layout / Legacy Interface: Device configuration layout} \item Section \ref{sec:Device Types / Console Device / Device Operation / Legacy Interface: Device Operation} -\item Section \ref{drivernormative:Device Types / Memory Balloon Device / Device Operation} +\item Section \ref{sec:Device Types / Memory Balloon Device / Feature bits / Legacy Interface: Feature bits} +\item Section \ref{sec:Device Types / Memory Balloon Device / Device Operation / Legacy Interface: Device Operation} \item Section \ref{sec:Device Types / Memory Balloon Device / Device Operation / Memory Statistics / Legacy Interface: Memory Statistics} \item Section \ref{sec:Device Types / SCSI Host Device / Device configuration layout / Legacy Interface: Device configuration layout} +\item Section \ref{sec:Device Types / SCSI Host Device / Device Operation / Legacy Interface: Device Operation} \item Section \ref{sec:Device Types / SCSI Host Device / Device Operation / Device Operation: Request Queues / Legacy Interface: Device Operation: Request Queues} \item Section \ref{sec:Device Types / SCSI Host Device / Device Operation / Device Operation: controlq / Legacy Interface: Device Operation: controlq} \item Section \ref{sec:Device Types / SCSI Host Device / Device Operation / Device Operation: eventq / Legacy Interface: Device Operation: eventq} diff --git a/content.tex b/content.tex index 1efdcc8..d989d98 100644 --- a/content.tex +++ b/content.tex @@ -288,7 +288,8 @@ For Legacy Interfaces, several additional restrictions are placed on the virtqueue layout: Each virtqueue occupies two or more physically-contiguous pages -(usually defined as 4096 bytes, but depending on the transport) +(usually defined as 4096 bytes, but depending on the transport; +henceforth referred to as Queue Align) and consists of three parts: \begin{tabular}{|l|l|l|} @@ -301,11 +302,12 @@ The bus-specific Queue Size field controls the total number of bytes for the virtqueue. When using the legacy interface, the transitional driver MUST retrieve the Queue Size field from the device -and MUST allocate the total number of bytes for the virtuqueue -according to the following formula: +and MUST allocate the total number of bytes for the virtqueue +according to the following formula (Queue Align given in qalign and +Queue Size given in qsz): \begin{lstlisting} -#define ALIGN(x) (((x) + PAGE_SIZE) & ~PAGE_SIZE) +#define ALIGN(x) (((x) + qalign) & ~qalign) static inline unsigned virtq_size(unsigned int qsz) { return ALIGN(sizeof(struct virtq_desc)*qsz + sizeof(u16)*(3 + qsz)) @@ -326,7 +328,7 @@ struct virtq { // A ring of available descriptor heads with free-running index. struct virtq_avail avail; - // Padding to the next PAGE_SIZE boundary. + // Padding to the next Queue Align boundary. u8 pad[ Padding ]; // A ring of used descriptor heads with free-running index. @@ -475,9 +477,21 @@ one table per descriptor). A driver MUST NOT create a descriptor chain longer than the Queue Size of the device. +A driver MUST NOT set both VIRTQ_DESC_F_INDIRECT and VIRTQ_DESC_F_NEXT +in \field{flags}. + \devicenormative{\paragraph}{Indirect Descriptors}{Basic Facilities of a Virtio Device / Virtqueues / The Virtqueue Descriptor Table / Indirect Descriptors} The device MUST ignore the write-only flag (\field{flags}\&VIRTQ_DESC_F_WRITE) in the descriptor that refers to an indirect table. +The device MUST handle the case of zero or more normal chained +descriptors followed by a single descriptor with \field{flags}\&VIRTQ_DESC_F_INDIRECT. + +\begin{note} +While unusual (most implementations either create a chain solely using +non-indirect descriptors, or use a single indirect element), such a +layout is valid. +\end{note} + \subsection{The Virtqueue Available Ring}\label{sec:Basic Facilities of a Virtio Device / Virtqueues / The Virtqueue Available Ring} \begin{lstlisting} @@ -588,10 +602,22 @@ them: it is only written to by the device, and read by the driver. Each entry in the ring is a pair: \field{id} indicates the head entry of the descriptor chain describing the buffer (this matches an entry placed in the available ring by the guest earlier), and \field{len} the total -of bytes written into the buffer. The latter is extremely useful -for drivers using untrusted buffers: if you do not know exactly -how much has been written by the device, you usually have to zero -the buffer to ensure no data leakage occurs. +of bytes written into the buffer. + +\begin{note} +\field{len} is particularly useful +for drivers using untrusted buffers: if a driver does not know exactly +how much has been written by the device, the driver would have to zero +the buffer in advance to ensure no data leakage occurs. + +For example, a network driver may hand a received buffer directly to +an unprivileged userspace application. If the network device has not +overwritten the bytes which were in that buffer, this could leak the +contents of freed memory from other processes to the application. +\end{note} + +\field{idx} field indicates where the driver would put the next descriptor +entry in the ring (modulo the queue size). This starts at 0, and increases. \begin{note} The legacy \hyperref[intro:Virtio PCI Draft]{[Virtio PCI Draft]} @@ -600,6 +626,39 @@ the constant as VRING_USED_F_NO_NOTIFY, but the layout and value were identical. \end{note} +\subsubsection{Legacy Interface: The Virtqueue Used +Ring}\label{sec:Basic Facilities of a Virtio Device / Virtqueues +/ The Virtqueue Used Ring/ Legacy Interface: The Virtqueue Used +Ring} + +Historically, many drivers ignored the \field{len} value, as a +result, many devices set \field{len} incorrectly. Thus, when +using the legacy interface, it is generally a good idea to ignore +the \field{len} value in used ring entries if possible. Specific +known issues are listed per device type. + +\devicenormative{\subsubsection}{The Virtqueue Used Ring}{Basic Facilities of a Virtio Device / Virtqueues / The Virtqueue Used Ring} + +The device MUST set \field{len} prior to updating the used \field{idx}. + +The device MUST write at least \field{len} bytes to descriptor, +beginning at the first device-writable buffer, +prior to updating the used \field{idx}. + +The device MAY write more than \field{len} bytes to descriptor. + +\begin{note} +There are potential error cases where a device might not know what +parts of the buffers have been written. This is why \field{len} is +permitted to be an underestimate: that's preferable to the driver believing +that uninitialized memory has been overwritten when it has not. +\end{note} + +\drivernormative{\subsubsection}{The Virtqueue Used Ring}{Basic Facilities of a Virtio Device / Virtqueues / The Virtqueue Used Ring} + +The driver MUST NOT make assumptions about data in device-writable buffers +beyond the first \field{len} bytes, and SHOULD ignore this data. + \subsection{Virtqueue Notification Suppression}\label{sec:Basic Facilities of a Virtio Device / Virtqueues / Virtqueue Notification Suppression} The device can suppress notifications in a manner analogous to the way @@ -987,7 +1046,7 @@ Transitional PCI Device ID & Virtio Device \\ \hline 0x1001 & block device \\ \hline -0x1002 & memory ballooning (legacy) \\ +0x1002 & memory ballooning (traditional) \\ \hline 0x1003 & console \\ \hline @@ -1047,14 +1106,23 @@ Structure PCI Capabilities. Fields of different sizes are present in the device configuration regions. -All 32-bit and 16-bit fields are little-endian. +All 64-bit, 32-bit and 16-bit fields are little-endian. +64-bit fields are to be treated as two 32-bit fields, +with low 32 bit part followed by the high 32 bit part. \drivernormative{\subsubsection}{PCI Device Layout}{Virtio Transport Options / Virtio Over PCI Bus / PCI Device Layout} -The driver -MUST access each field using the ``natural'' access method, i.e. -32-bit accesses for 32-bit fields, 16-bit accesses for 16-bit -fields and 8-bit accesses for 8-bit fields. +For device configuration access, the driver MUST use 8-bit wide +accesses for 8-bit wide fields, 16-bit wide and aligned accesses +for 16-bit wide fields and 32-bit wide and aligned accesses for +32-bit and 64-bit wide fields. For 64-bit fields, the driver MAY +access each of the high and low 32-bit parts of the field +independently. + +\devicenormative{\subsubsection}{PCI Device Layout}{Virtio Transport Options / Virtio Over PCI Bus / PCI Device Layout} + +For 64-bit device configuration fields, the device MUST allow driver +independent access to high and low 32-bit parts of the field. \subsection{Virtio Structure PCI Capabilities}\label{sec:Virtio Transport Options / Virtio Over PCI Bus / Virtio Structure PCI Capabilities} @@ -1410,7 +1478,7 @@ change interrupts and normal virtqueue interrupts: \hline Bits & 0 & 1 & 2 to 31 \\ \hline -Purpose & Device Configuration Interrupt & Queue Interrupt & Reserved \\ +Purpose & Queue Interrupt & Device Configuration Interrupt & Reserved \\ \hline \end{tabular} @@ -1600,10 +1668,37 @@ on PCI Device Layout}\label{sec:Virtio Transport Options / Virtio Over PCI Bus / PCI Device Layout / Non-transitional Device With Legacy Driver: A Note on PCI Device Layout} +All known legacy drivers check either the PCI Revision or the +Device and Vendor IDs, and thus won't attempt to drive a +non-transitional device. + +A buggy legacy driver might mistakenly attempt to drive a +non-transitional device. If support for such drivers is required +(as opposed to fixing the bug), the following would be the +recommended way to detect and handle them. +\begin{note} +Such buggy drivers are not currently known to be used in +production. +\end{note} + +\subparagraph{ +\DIFdeltextcstwo{Driver Requirements: Non-transitional Device With Legacy Driver} +\DIFaddtextcstwo{Device Requirements: Non-transitional Device With Legacy Driver} +} +\label{drivernormative:Virtio Transport Options / Virtio Over PCI +Bus / PCI-specific Initialization And Device Operation / +Device Initialization / Non-transitional Device With Legacy +Driver} +\label{devicenormative:Virtio Transport Options / Virtio Over PCI +Bus / PCI-specific Initialization And Device Operation / +Device Initialization / Non-transitional Device With Legacy +Driver} + Non-transitional devices, on a platform where a legacy driver for -a legacy device with the same ID might have previously existed, -SHOULD take the following steps to fail gracefully when a legacy -driver attempts to drive them: +a legacy device with the same ID (including PCI Revision, Device +and Vendor IDs) is known to have previously existed, +SHOULD take the following steps to cause the legacy driver to +fail gracefully when it attempts to drive them: \begin{enumerate} \item Present an I/O BAR in BAR0, and @@ -1624,22 +1719,6 @@ As a prerequisite to device initialization, the driver scans the PCI capability list, detecting virtio configuration layout using Virtio Structure PCI capabilities as detailed in \ref{sec:Virtio Transport Options / Virtio Over PCI Bus / Virtio Structure PCI Capabilities} -\paragraph{Non-transitional Device With Legacy Driver}\label{sec:Virtio Transport Options / Virtio Over PCI Bus / PCI-specific Initialization And Device Operation / Device Initialization / Non-transitional Device With Legacy Driver} - -\drivernormative{\subparagraph}{Non-transitional Device With Legacy Driver}{Virtio Transport Options / Virtio Over PCI Bus / PCI-specific Initialization And Device Operation / Device Initialization / Non-transitional Device With Legacy Driver} - -Non-transitional devices, on a platform where a legacy driver for -a legacy device with the same ID might have previously existed, -MUST take the following steps to fail gracefully when a legacy -driver attempts to drive them: - -\begin{enumerate} -\item Present an I/O BAR in BAR0, and -\item Respond to a single-byte zero write to offset 18 - (corresponding to Device Status register in the legacy layout) - of BAR0 by presenting zeroes on every BAR and ignoring writes. -\end{enumerate} - \subparagraph{Legacy Interface: A Note on Device Layout Detection}\label{sec:Virtio Transport Options / Virtio Over PCI Bus / PCI-specific Initialization And Device Operation / Device Initialization / Virtio Device Configuration Layout Detection / Legacy Interface: A Note on Device Layout Detection} Legacy drivers skipped the Device Layout Detection step, assuming legacy @@ -1761,8 +1840,8 @@ The driver typically does this as follows, for each virtqueue a device has: \end{enumerate} \subparagraph{Legacy Interface: A Note on Virtqueue Configuration}\label{sec:Virtio Transport Options / Virtio Over PCI Bus / PCI-specific Initialization And Device Operation / Device Initialization / Virtqueue Configuration / Legacy Interface: A Note on Virtqueue Configuration} -When using the legacy interface, the page size for a virtqueue on a PCI virtio -device is defined as 4096 bytes. Driver writes the physical address, divided +When using the legacy interface, the queue layout follows \ref{sec:Basic Facilities of a Virtio Device / Virtqueues / Legacy Interfaces: A Note on Virtqueue Layout}~\nameref{sec:Basic Facilities of a Virtio Device / Virtqueues / Legacy Interfaces: A Note on Virtqueue Layout} with an alignment of 4096. +Driver writes the physical address, divided by 4096 to the Queue Address field\footnote{The 4096 is based on the x86 page size, but it's also large enough to ensure that the separate parts of the virtqueue are on separate cache lines. @@ -2379,8 +2458,14 @@ model corresponding to the attached virtio device's subsystem device ID, accessed via a virtual I/O subchannel and a virtual channel path of type 0x32. This proxy device is discoverable via normal channel subsystem device discovery (usually a STORE -SUBCHANNEL loop) and answers to the basic channel commands, most -importantly SENSE ID. +SUBCHANNEL loop) and answers to the basic channel commands: + +\begin{itemize} +\item NO-OPERATION (0x03) +\item BASIC SENSE (0x04) +\item TRANSFER IN CHANNEL (0x08) +\item SENSE ID (0xe4) +\end{itemize} For a virtio-ccw proxy device, SENSE ID will return the following information: @@ -2585,36 +2670,30 @@ struct vq_info_block_legacy { \end{lstlisting} \field{queue} contains the guest address for queue \field{index}, \field{num} the number of buffers -and \field{align} the alignment. +and \field{align} the alignment. The queue layout follows \ref{sec:Basic Facilities of a Virtio Device / Virtqueues / Legacy Interfaces: A Note on Virtqueue Layout}~\nameref{sec:Basic Facilities of a Virtio Device / Virtqueues / Legacy Interfaces: A Note on Virtqueue Layout}. -\subsubsection{Virtqueue Layout}\label{sec:Virtio Transport Options / Virtio over channel I/O / Device Initialization / Virtqueue Layout} +\subsubsection{Communicating Status Information}\label{sec:Virtio Transport Options / Virtio over channel I/O / Device Initialization / Communicating Status Information} -The virtqueue is physically contiguous, with padding added to make the -used ring meet the align value: +The driver changes the status of a device via the +CCW_CMD_WRITE_STATUS command, which transmits an 8 bit status +value. -\begin{tabular}{|l|l|l|} -\hline -Descriptor Table & Available Ring (\ldots padding\ldots) & Used Ring \\ -\hline -\end{tabular} +As described in +\ref{devicenormative:Basic Facilities of a Virtio Device / Feature Bits}, +a device sometimes fails to set the \field{status} field: For example, it +might fail to accept the FEATURES_OK status bit during device initialization. -The calculation for total size is as follows: +\drivernormative{\paragraph}{Communicating Status Information}{Virtio Transport Options / Virtio over channel I/O / Device Initialization / Communicating Status Information} -\begin{lstlisting} -#define ALIGN(x) (((x) + align) & ~align) -static inline unsigned virtq_size(unsigned int num) -{ - return ALIGN(sizeof(struct virtq_desc)*num - + sizeof(u16)*(3 + num)) - + ALIGN(sizeof(u16)*3 + sizeof(struct virtq_used_elem)*num); -} -\end{lstlisting} +If the device posts a unit check with command reject in response to the +CCW_CMD_WRITE_STATUS command, the driver MUST assume that the device failed +to set the status and the \field{status} field retained its previous value. -\subsubsection{Communicating Status Information}\label{sec:Virtio Transport Options / Virtio over channel I/O / Device Initialization / Communicating Status Information} +\devicenormative{\paragraph}{Communicating Status Information}{Virtio Transport Options / Virtio over channel I/O / Device Initialization / Communicating Status Information} -The driver changes the status of a device via the -CCW_CMD_WRITE_STATUS command, which transmits an 8 bit status -value. +If the device fails to set the \field{status} field to the value written by +the driver, the device MUST assure that the \field{status} field is left +unchanged and MUST post a unit check with command reject. \subsubsection{Handling Device Features}\label{sec:Virtio Transport Options / Virtio over channel I/O / Device Initialization / Handling Device Features} @@ -2887,7 +2966,7 @@ Device ID & Virtio Device \\ \hline 4 & entropy source \\ \hline -5 & memory ballooning (legacy) \\ +5 & memory ballooning (traditional) \\ \hline 6 & ioMemory \\ \hline @@ -3136,7 +3215,12 @@ if both guests are amenable.} the VIRTIO_NET_F_GUEST_TSO4, VIRTIO_NET_F_GUEST_TSO6, VIRTIO_NET_F_GUEST_UFO and VIRTIO_NET_F_GUEST_ECN are the input equivalents of the features described above. - See \ref{sec:Device Types / Network Device / Device Operation / Setting Up Receive Buffers}~\nameref{sec:Device Types / Network Device / Device Operation / Setting Up Receive Buffers} and \ref{sec:Device Types / Network Device / Device Operation / Processing of Packets}~\nameref{sec:Device Types / Network Device / Device Operation / Processing of Packets} below. + See \ref{sec:Device Types / Network Device / Device Operation / +Setting Up Receive Buffers}~\nameref{sec:Device Types / Network +Device / Device Operation / Setting Up Receive Buffers} and +\ref{sec:Device Types / Network Device / Device Operation / +Processing of Incoming Packets}~\nameref{sec:Device Types / +Network Device / Device Operation / Processing of Incoming Packets} below. \end{enumerate} A truly minimal driver would only accept VIRTIO_NET_F_MAC and ignore @@ -3180,16 +3264,25 @@ The legacy driver only presented \field{num_buffers} in the struct virtio_net_hd when VIRTIO_NET_F_MRG_RXBUF was not negotiated; without that feature the structure was 2 bytes shorter. +When using the legacy interface, the driver SHOULD ignore the +\field{len} value in used ring entries for the transmit queues +and the controlq queue. +\begin{note} +Historically, some devices put +the total descriptor length there, even though no data was +actually written. +\end{note} + \subsubsection{Packet Transmission}\label{sec:Device Types / Network Device / Device Operation / Packet Transmission} Transmitting a single packet is simple, but varies depending on the different features the driver negotiated. \begin{enumerate} -\item The driver MAY send a completely checksummed packet. In this case, +\item The driver can send a completely checksummed packet. In this case, \field{flags} will be zero, and \field{gso_type} will be VIRTIO_NET_HDR_GSO_NONE. -\item If the driver negotiated VIRTIO_NET_F_CSUM, it MAY skip +\item If the driver negotiated VIRTIO_NET_F_CSUM, it can skip checksumming the packet: \begin{itemize} \item \field{flags} has the VIRTIO_NET_HDR_F_NEEDS_CSUM set, @@ -3248,15 +3341,83 @@ specifically in the protocol.}. \drivernormative{\paragraph}{Packet Transmission}{Device Types / Network Device / Device Operation / Packet Transmission} -If a driver has not negotiated VIRTIO_NET_F_CSUM, \field{flags} MUST be zero and -the packet MUST be fully checksummed. - The driver MUST set \field{num_buffers} to zero. -A driver SHOULD NOT send TCP packets requiring segmentation offload which have the Explicit Congestion Notification bit set, unless the VIRTIO_NET_F_HOST_ECN feature is -negotiated\footnote{This is a common restriction in real, older network cards.}, in -which case it MUST set the VIRTIO_NET_HDR_GSO_ECN bit in \field{gso_type}. +If VIRTIO_NET_F_CSUM is not negotiated, the driver MUST set +\field{flags} to zero and SHOULD supply a fully checksummed +packet to the device. +If VIRTIO_NET_F_HOST_TSO4 is negotiated, the driver MAY set +\field{gso_type} to VIRTIO_NET_HDR_GSO_TCPV4 to request TCPv4 +segmentation, otherwise the driver MUST NOT set +\field{gso_type} to VIRTIO_NET_HDR_GSO_TCPV4. + +If VIRTIO_NET_F_HOST_TSO6 is negotiated, the driver MAY set +\field{gso_type} to VIRTIO_NET_HDR_GSO_TCPV6 to request TCPv6 +segmentation, otherwise the driver MUST NOT set +\field{gso_type} to VIRTIO_NET_HDR_GSO_TCPV6. + +If VIRTIO_NET_F_HOST_UFO is negotiated, the driver MAY set +\field{gso_type} to VIRTIO_NET_HDR_GSO_UDP to request UDP +segmentation, otherwise the driver MUST NOT set +\field{gso_type} to VIRTIO_NET_HDR_GSO_UDP. + +The driver SHOULD NOT send to the device TCP packets requiring segmentation offload +which have the Explicit Congestion Notification bit set, unless the +VIRTIO_NET_F_HOST_ECN feature is negotiated, in which case the +driver MUST set the VIRTIO_NET_HDR_GSO_ECN bit in +\field{gso_type}. + +If the VIRTIO_NET_F_CSUM feature has been negotiated, the +driver MAY set the VIRTIO_NET_HDR_F_NEEDS_CSUM bit in +\field{flags}, if so: +\begin{enumerate} +\item the driver MUST validate the packet checksum at + offset \field{csum_offset} from \field{csum_start} as well as all + preceding offsets; +\item the driver MUST set the packet checksum stored in the + buffer to the TCP/UDP pseudo header; +\item the driver MUST set \field{csum_start} and + \field{csum_offset} such that calculating a ones' + complement checksum from \field{csum_start} up until the end of + the packet and storing the result at offset \field{csum_offset} + from \field{csum_start} will result in a fully checksummed + packet; +\end{enumerate} + +If none of the VIRTIO_NET_F_HOST_TSO4, TSO6 or UFO options have +been negotiated, the driver MUST set \field{gso_type} to +VIRTIO_NET_HDR_GSO_NONE. + +If \field{gso_type} differs from VIRTIO_NET_HDR_GSO_NONE, then +the driver MUST also set the VIRTIO_NET_HDR_F_NEEDS_CSUM bit in +\field{flags} and MUST set \field{gso_size} to indicate the +desired MSS. + +If one of the VIRTIO_NET_F_HOST_TSO4, TSO6 or UFO options have +been negotiated, the driver SHOULD set \field{hdr_len} to a value +not less than the length of the headers, including the transport +header. + +The driver MUST NOT set the VIRTIO_NET_HDR_F_DATA_VALID bit in +\field{flags}. + +\devicenormative{\paragraph}{Packet Transmission}{Device Types / Network Device / Device Operation / Packet Transmission} +The device MUST ignore \field{flag} bits that it does not recognize. + +If VIRTIO_NET_HDR_F_NEEDS_CSUM bit in \field{flags} is not set, the +device MUST NOT use the \field{csum_start} and \field{csum_offset}. + +If one of the VIRTIO_NET_F_HOST_TSO4, TSO6 or UFO options have +been negotiated, the device MAY use \field{hdr_len} only as a hint about the +transport header size. +The device MUST NOT rely on \field{hdr_len} to be correct. +\begin{note} +This is due to various bugs in implementations. +\end{note} + +If VIRTIO_NET_HDR_F_NEEDS_CSUM is not set, the device MUST NOT +rely on the packet checksum being correct. \paragraph{Packet Transmission Interrupt}\label{sec:Device Types / Network Device / Device Operation / Packet Transmission / Packet Transmission Interrupt} Often a driver will suppress transmission interrupts using the @@ -3310,17 +3471,21 @@ The device MUST set \field{num_buffers} to the number of descriptors used to hold the incoming packet. The device MUST use only a single descriptor if VIRTIO_NET_F_MRG_RXBUF -was not negotiated. \note{This means that \field{num_buffers} will always be 1 +was not negotiated. +\begin{note} +{This means that \field{num_buffers} will always be 1 if VIRTIO_NET_F_MRG_RXBUF is not negotiated.} +\end{note} -\subsubsection{Processing of Packets}\label{sec:Device Types / Network Device / Device Operation / Processing of Packets} +\subsubsection{Processing of Incoming Packets}\label{sec:Device Types / Network Device / Device Operation / Processing of Incoming Packets} +\label{sec:Device Types / Network Device / Device Operation / Processing of Packets}%old label for latexdiff When a packet is copied into a buffer in the receiveq, the optimal path is to disable further interrupts for the receiveq (see \ref{sec:General Initialization And Device Operation / Device Operation / Receiving Used Buffers From The Device}~\nameref{sec:General Initialization And Device Operation / Device Operation / Receiving Used Buffers From The Device}) and process packets until no more are found, then re-enable them. -Processing packet involves: +Processing incoming packets involves: \begin{enumerate} \item \field{num_buffers} indicates how many descriptors @@ -3336,10 +3501,25 @@ Processing packet involves: \field{num_buffers} is one, then the entire packet will be contained within this buffer, immediately following the struct virtio_net_hdr. +\item If the VIRTIO_NET_F_GUEST_CSUM feature was negotiated, the + VIRTIO_NET_HDR_F_DATA_VALID bit in \field{flags} can be + set: if so, device has validated the packet checksum. + In case of multiple encapsulated protocols, one level of checksums + has been validated. +\end{enumerate} +Additionally, VIRTIO_NET_F_GUEST_CSUM, TSO4, TSO6, UDP and ECN +features enable receive checksum, large receive offload and ECN +support which are the input equivalents of the transmit checksum, +transmit segmentation offloading and ECN features, as described +in \ref{sec:Device Types / Network Device / Device Operation / +Packet Transmission}: +\begin{enumerate} \item If the VIRTIO_NET_F_GUEST_CSUM feature was negotiated, the - VIRTIO_NET_HDR_F_NEEDS_CSUM bit in \field{flags} MAY be - set: if so, the checksum on the packet is incomplete and + VIRTIO_NET_HDR_F_NEEDS_CSUM bit in \field{flags} can be + set: if so, the packet checksum at offset \field{csum_offset} + from \field{csum_start} and any preceding checksums + have been validated. The checksum on the packet is incomplete and \field{csum_start} and \field{csum_offset} indicate how to calculate it (see Packet Transmission point 1). @@ -3349,10 +3529,23 @@ Processing packet involves: desired MSS (see Packet Transmission point 2). \end{enumerate} -\devicenormative{\paragraph}{Processing of Packets}{Device Types / Network Device / Device Operation / Processing of Packets} +\devicenormative{\paragraph}{Processing of Incoming Packets}{Device Types / Network Device / Device Operation / Processing of Incoming Packets} +\label{devicenormative:Device Types / Network Device / Device Operation / Processing of Packets}%old label for latexdiff + +If VIRTIO_NET_F_MRG_RXBUF has not been negotiated, the device MUST set +\field{num_buffers} to 1. + +If VIRTIO_NET_F_MRG_RXBUF has been negotiated, the device MUST set +\field{num_buffers} to indicate the number of descriptors +the packet (including the header) is spread over. -If VIRTIO_NET_F_CSUM is not negotiated, the device MUST set -\field{flags} to zero and the packet MUST be fully checksummed. +The device MUST use all descriptors used by a single receive +packet together, by atomically incrementing \field{idx} in the +used ring by the \field{num_buffers} value. + +If VIRTIO_NET_F_GUEST_CSUM is not negotiated, the device MUST set +\field{flags} to zero and SHOULD supply a fully checksummed +packet to the driver. If VIRTIO_NET_F_GUEST_TSO4 is not negotiated, the device MUST NOT set \field{gso_type} to VIRTIO_NET_HDR_GSO_TCPV4. @@ -3363,11 +3556,68 @@ If VIRTIO_NET_F_GUEST_UDP is not negotiated, the device MUST NOT set If VIRTIO_NET_F_GUEST_TSO6 is not negotiated, the device MUST NOT set \field{gso_type} to VIRTIO_NET_HDR_GSO_TCPV6. -A device SHOULD NOT send TCP packets requiring segmentation offload +The device SHOULD NOT send to the driver TCP packets requiring segmentation offload which have the Explicit Congestion Notification bit set, unless the -VIRTIO_NET_F_GUEST_ECN feature is negotiated, in which case it MUST set -the VIRTIO_NET_HDR_GSO_ECN bit in \field{gso_type}. +VIRTIO_NET_F_GUEST_ECN feature is negotiated, in which case the +device MUST set the VIRTIO_NET_HDR_GSO_ECN bit in +\field{gso_type}. + +If the VIRTIO_NET_F_GUEST_CSUM feature has been negotiated, the +device MAY set the VIRTIO_NET_HDR_F_NEEDS_CSUM bit in +\field{flags}, if so: +\begin{enumerate} +\item the device MUST validate the packet checksum at + offset \field{csum_offset} from \field{csum_start} as well as all + preceding offsets; +\item the device MUST set the packet checksum stored in the + receive buffer to the TCP/UDP pseudo header; +\item the device MUST set \field{csum_start} and + \field{csum_offset} such that calculating a ones' + complement checksum from \field{csum_start} up until the + end of the packet and storing the result at offset + \field{csum_offset} from \field{csum_start} will result in a + fully checksummed packet; +\end{enumerate} + +If none of the VIRTIO_NET_F_GUEST_TSO4, TSO6 or UFO options have +been negotiated, the device MUST set \field{gso_type} to +VIRTIO_NET_HDR_GSO_NONE. +If \field{gso_type} differs from VIRTIO_NET_HDR_GSO_NONE, then +the device MUST also set the VIRTIO_NET_HDR_F_NEEDS_CSUM bit in +\field{flags} MUST set \field{gso_size} to indicate the desired MSS. + +If one of the VIRTIO_NET_F_GUEST_TSO4, TSO6 or UFO options have +been negotiated, the device SHOULD set \field{hdr_len} to a value +not less than the length of the headers, including the transport +header. + +If the VIRTIO_NET_F_GUEST_CSUM feature has been negotiated, the +device MAY set the VIRTIO_NET_HDR_F_DATA_VALID bit in +\field{flags}, if so, the device MUST validate the packet +checksum (in case of multiple encapsulated protocols, one level +of checksums is validated). + +\drivernormative{\paragraph}{Processing of Incoming +Packets}{Device Types / Network Device / Device Operation / +Processing of Incoming Packets} + +The driver MUST ignore \field{flag} bits that it does not recognize. + +If VIRTIO_NET_HDR_F_NEEDS_CSUM bit in \field{flags} is not set, the +driver MUST NOT use the \field{csum_start} and \field{csum_offset}. + +If one of the VIRTIO_NET_F_GUEST_TSO4, TSO6 or UFO options have +been negotiated, the driver MAY use \field{hdr_len} only as a hint about the +transport header size. +The driver MUST NOT rely on \field{hdr_len} to be correct. +\begin{note} +This is due to various bugs in implementations. +\end{note} + +If neither VIRTIO_NET_HDR_F_NEEDS_CSUM nor +VIRTIO_NET_HDR_F_DATA_VALID is set, the driver MUST NOT +rely on the packet checksum being correct. \subsubsection{Control Virtqueue}\label{sec:Device Types / Network Device / Device Operation / Control Virtqueue} The driver uses the control virtqueue (if VIRTIO_NET_F_CTRL_VQ is @@ -3396,32 +3646,93 @@ do except issue a diagnostic if \field{ack} is not VIRTIO_NET_OK. \paragraph{Packet Receive Filtering}\label{sec:Device Types / Network Device / Device Operation / Control Virtqueue / Packet Receive Filtering} +\label{sec:Device Types / Network Device / Device Operation / Control Virtqueue / Setting Promiscuous Mode}%old label for latexdiff -If the VIRTIO_NET_F_CTRL_RX feature is negotiated, the driver can -send control commands for promiscuous mode, multicast receiving, -and filtering of MAC addresses. +If the VIRTIO_NET_F_CTRL_RX and VIRTIO_NET_F_CTRL_RX_EXTRA +features are negotiated, the driver can send control commands for +promiscuous mode, multicast, unicast and broadcast receiving. \begin{note} In general, these commands are best-effort: unwanted packets could still arrive. \end{note} -\paragraph{Setting Promiscuous Mode}\label{sec:Device Types / Network Device / Device Operation / Control Virtqueue / Setting Promiscuous Mode} - \begin{lstlisting} #define VIRTIO_NET_CTRL_RX 0 #define VIRTIO_NET_CTRL_RX_PROMISC 0 #define VIRTIO_NET_CTRL_RX_ALLMULTI 1 + #define VIRTIO_NET_CTRL_RX_ALLUNI 2 + #define VIRTIO_NET_CTRL_RX_NOMULTI 3 + #define VIRTIO_NET_CTRL_RX_NOUNI 4 + #define VIRTIO_NET_CTRL_RX_NOBCAST 5 \end{lstlisting} -The class VIRTIO_NET_CTRL_RX has two commands: -VIRTIO_NET_CTRL_RX_PROMISC turns promiscuous mode on and off, and -VIRTIO_NET_CTRL_RX_ALLMULTI turns all-multicast receive on and + +\devicenormative{\subparagraph}{Packet Receive Filtering}{Device Types / Network Device / Device Operation / Control Virtqueue / Packet Receive Filtering} + +If the VIRTIO_NET_F_CTRL_RX feature has been negotiated, +the device MUST support the following VIRTIO_NET_CTRL_RX class +commands: +\begin{itemize} +\item VIRTIO_NET_CTRL_RX_PROMISC turns promiscuous mode on and off. The command-specific-data is one byte containing 0 (off) or -1 (on). +1 (on). If promiscous mode is on, the device SHOULD receive all +incoming packets. +This SHOULD take effect even if one of the other modes set by +a VIRTIO_NET_CTRL_RX class command is on. +\item VIRTIO_NET_CTRL_RX_ALLMULTI turns all-multicast receive on and +off. The command-specific-data is one byte containing 0 (off) or +1 (on). When all-multicast receive is on the device SHOULD allow +all incoming multicast packets. +\end{itemize} + +If the VIRTIO_NET_F_CTRL_RX_EXTRA feature has been negotiated, +the device MUST support the following VIRTIO_NET_CTRL_RX class +commands: +\begin{itemize} +\item VIRTIO_NET_CTRL_RX_ALLUNI turns all-unicast receive on and +off. The command-specific-data is one byte containing 0 (off) or +1 (on). When all-unicast receive is on the device SHOULD allow +all incoming unicast packets. +\item VIRTIO_NET_CTRL_RX_NOMULTI suppresses multicast receive. +The command-specific-data is one byte containing 0 (multicast +receive allowed) or 1 (multicast receive suppressed). +When multicast receive is suppressed, the device SHOULD NOT +send multicast packets to the driver. +This SHOULD take effect even if VIRTIO_NET_CTRL_RX_ALLMULTI is on. +This filter SHOULD NOT apply to broadcast packets. +\item VIRTIO_NET_CTRL_RX_NOUNI suppresses unicast receive. +The command-specific-data is one byte containing 0 (unicast +receive allowed) or 1 (unicast receive suppressed). +When unicast receive is suppressed, the device SHOULD NOT +send unicast packets to the driver. +This SHOULD take effect even if VIRTIO_NET_CTRL_RX_ALLUNI is on. +\item VIRTIO_NET_CTRL_RX_NOBCAST suppresses broadcast receive. +The command-specific-data is one byte containing 0 (broadcast +receive allowed) or 1 (broadcast receive suppressed). +When broadcast receive is suppressed, the device SHOULD NOT +send broadcast packets to the driver. +This SHOULD take effect even if VIRTIO_NET_CTRL_RX_ALLMULTI is on. +\end{itemize} + +\drivernormative{\subparagraph}{Packet Receive Filtering}{Device Types / Network Device / Device Operation / Control Virtqueue / Packet Receive Filtering} + +If the VIRTIO_NET_F_CTRL_RX feature has not been negotiated, +the driver MUST NOT issue commands VIRTIO_NET_CTRL_RX_PROMISC or +VIRTIO_NET_CTRL_RX_ALLMULTI. + +If the VIRTIO_NET_F_CTRL_RX_EXTRA feature has not been negotiated, +the driver MUST NOT issue commands + VIRTIO_NET_CTRL_RX_ALLUNI, + VIRTIO_NET_CTRL_RX_NOMULTI, + VIRTIO_NET_CTRL_RX_NOUNI or + VIRTIO_NET_CTRL_RX_NOBCAST. \paragraph{Setting MAC Address Filtering}\label{sec:Device Types / Network Device / Device Operation / Control Virtqueue / Setting MAC Address Filtering} +If the VIRTIO_NET_F_CTRL_RX feature is negotiated, the driver can +send control commands for MAC address filtering. + \begin{lstlisting} struct virtio_net_ctrl_mac { le32 entries; @@ -3468,6 +3779,13 @@ nor the MAC filtering table. \drivernormative{\subparagraph}{Setting MAC Address Filtering}{Device Types / Network Device / Device Operation / Control Virtqueue / Setting MAC Address Filtering} +If VIRTIO_NET_F_CTRL_RX has not been negotiated, +the driver MUST NOT issue VIRTIO_NET_CTRL_MAC class commands. + +If VIRTIO_NET_F_CTRL_RX has been negotiated, +the driver SHOULD issue VIRTIO_NET_CTRL_MAC_ADDR_SET +to set the default mac if it is different from \field{mac}. + The driver MUST follow the VIRTIO_NET_CTRL_MAC_TABLE_SET command by a le32 number, followed by that number of non-multicast MAC addresses, followed by another le32 number, followed by @@ -3880,6 +4198,14 @@ MUST format the fields in struct virtio_blk_req according to the native endian of the guest rather than (necessarily when not using the legacy interface) little-endian. +When using the legacy interface, transitional drivers +SHOULD ignore the \field{len} value in used ring entries. +\begin{note} +Historically, some devices put the total descriptor length, +or the total length of device-writable buffers there, +even when only the status byte was actually written. +\end{note} + The \field{reserved} field was previously called \field{ioprio}. \field{ioprio} is a hint about the relative priorities of requests to the device: higher numbers indicate more important requests. @@ -4203,6 +4529,14 @@ MUST format the fields in struct virtio_console_control according to the native endian of the guest rather than (necessarily when not using the legacy interface) little-endian. +When using the legacy interface, the driver SHOULD ignore the +\field{len} value in used ring entries for the transmit queues +and the control transmitq. +\begin{note} +Historically, some devices put the total descriptor length there, +even though no data was actually written. +\end{note} + \subsubsection{Legacy Interface: Framing Requirements}\label{sec:Device Types / Console Device / Legacy Interface: Framing Requirements} @@ -4245,7 +4579,7 @@ by random data by the device. The driver MUST NOT place driver-readable buffers into the queue. -The driver MUST examine the length written by the driver to determine +The driver MUST examine the length written by the device to determine how many random bytes were received. \devicenormative{\subsubsection}{Device Operation}{Device Types / Entropy Device / Device Operation} @@ -4253,14 +4587,13 @@ how many random bytes were received. The device MUST place one or more random bytes into the buffer, but it MAY use less than the entire buffer length. -\section{Legacy Interface: Memory Balloon Device}\label{sec:Device Types / Memory Balloon Device} +\section{Traditional Memory Balloon Device}\label{sec:Device Types / Memory Balloon Device} -This device is deprecated, and thus only exists as a legacy device -illustrated here for reference. The device number 13 is reserved for -a new memory balloon interface which is expected in a future version -of the standard. +This is the traditional balloon device. The device number 13 is +reserved for a new memory balloon interface, with different +semantics, which is expected in a future version of the standard. -The virtio memory balloon device is a primitive device for +The traditional virtio memory balloon device is a primitive device for managing guest memory: the device asks for a certain amount of memory, and the driver supplies it (or withdraws it, if the device has more than it asks for). This allows the guest to adapt to @@ -4282,13 +4615,35 @@ guest memory statistics to the host. \subsection{Feature bits}\label{sec:Device Types / Memory Balloon Device / Feature bits} \begin{description} -\item[VIRTIO_BALLOON_F_MUST_TELL_HOST (0)] Host MUST be told before +\item[VIRTIO_BALLOON_F_MUST_TELL_HOST (0)] Host has to be told before pages from the balloon are used. \item[VIRTIO_BALLOON_F_STATS_VQ (1)] A virtqueue for reporting guest memory statistics is present. +\item[VIRTIO_BALLOON_F_DEFLATE_ON_OOM (2) ] Deflate balloon on + guest out of memory condition. + \end{description} +\drivernormative{\subsubsection}{Feature bits}{Device Types / Memory Balloon Device / Feature bits} +The driver SHOULD accept the VIRTIO_BALLOON_F_MUST_TELL_HOST +feature if offered by the device. + +\devicenormative{\subsubsection}{Feature bits}{Device Types / Memory Balloon Device / Feature bits} +If the device offers the VIRTIO_BALLOON_F_MUST_TELL_HOST feature +bit, and if the driver did not accept this feature bit, the +device MAY signal failure by failing to set FEATURES_OK +\field{device status} bit when the driver writes it. +\subparagraph{Legacy Interface: Feature bits}\label{sec:Device +Types / Memory Balloon Device / Feature bits / Legacy Interface: +Feature bits} +As the legacy interface does not have a way to gracefully report feature +negotiation failure, when using the legacy interface, +transitional devices MUST support guests which do not negotiate +VIRTIO_BALLOON_F_MUST_TELL_HOST feature, and SHOULD +allow guest to use memory before notifying host if +VIRTIO_BALLOON_F_MUST_TELL_HOST is not negotiated. + \subsection{Device configuration layout}\label{sec:Device Types / Memory Balloon Device / Device configuration layout} Both fields of this configuration are always available. @@ -4300,29 +4655,37 @@ struct virtio_balloon_config { }; \end{lstlisting} -Note that these fields are always little endian, despite convention -that legacy device fields are guest endian. +\subparagraph{Legacy Interface: Device configuration layout}\label{sec:Device Types / Memory Balloon Device / Device +configuration layout / Legacy Interface: Device configuration layout} +When using the legacy interface, transitional devices and drivers +MUST format the fields in struct virtio_balloon_config +according to the little-endian format. +\begin{note} +This is unlike the usual convention that legacy device fields are guest endian. +\end{note} \subsection{Device Initialization}\label{sec:Device Types / Memory Balloon Device / Device Initialization} +The device initialization process is outlined below: + \begin{enumerate} \item The inflate and deflate virtqueues are identified. \item If the VIRTIO_BALLOON_F_STATS_VQ feature bit is negotiated: \begin{enumerate} \item Identify the stats virtqueue. - - \item Add one empty buffer to the stats virtqueue and notify the - device. + \item Add one empty buffer to the stats virtqueue. + \item DRIVER_OK is set: device operation begins. + \item Notify the device about the stats virtqueue buffer. \end{enumerate} \end{enumerate} -Device operation begins immediately. - \subsection{Device Operation}\label{sec:Device Types / Memory Balloon Device / Device Operation} -The device is driven by the receipt of a -configuration change interrupt. +The device is driven either by the receipt of a configuration +change interrupt, or by changing guest memory needs, such as +performing memory compaction or responding to out of memory +conditions. \begin{enumerate} \item \field{num_pages} configuration field is examined. If this is @@ -4347,35 +4710,106 @@ configuration change interrupt. \item If the VIRTIO_BALLOON_F_MUST_TELL_HOST feature is negotiated, the guest informs the device of pages before it uses them. - \item Otherwise, the guest MAY begin to re-use pages previously + \item Otherwise, the guest is allowed to re-use pages previously given to the balloon before the device has acknowledged their withdrawal\footnote{In this case, deflation advice is merely a courtesy. }. \end{enumerate} -\item In either case, once the device has completed the inflation or - deflation, the driver updates \field{actual} to reflect the new number of pages in the balloon\footnote{As updates to device-specific configuration space are not atomic, this field -isn't particularly reliable, but can be used to diagnose buggy guests. -}. +\item In either case, the device acknowledges inflate and deflate +requests by using the descriptor. +\item Once the device has acknowledged the inflation or + deflation, the driver updates \field{actual} to reflect the new number of pages in the balloon. \end{enumerate} \drivernormative{\subsubsection}{Device Operation}{Device Types / Memory Balloon Device / Device Operation} The driver SHOULD supply pages to the balloon when \field{num_pages} is -greater than \field{actual}. +greater than the actual number of pages in the balloon. The driver MAY use pages from the balloon when \field{num_pages} is -less than \field{actual}. +less than the actual number of pages in the balloon. + +The driver MAY supply pages to the balloon when \field{num_pages} is +greater than or equal to the actual number of pages in the balloon. + +If VIRTIO_BALLOON_F_DEFLATE_ON_OOM has not been negotiated, the +driver MUST NOT use pages from the balloon when \field{num_pages} +is less than or equal to the actual number of pages in the +balloon. + +If VIRTIO_BALLOON_F_DEFLATE_ON_OOM has been negotiated, the +driver MAY use pages from the balloon when \field{num_pages} +is less than or equal to the actual number of pages in the +balloon if this is required for system stability +(e.g. if memory is required by applications running within + the guest). The driver MUST use the deflateq to inform the device of pages that it wants to use from the balloon. If the VIRTIO_BALLOON_F_MUST_TELL_HOST feature is negotiated, the -driver MUST wait until the device has used the deflateq descriptor -before using the pages. +driver MUST NOT use pages from the balloon until +the device has acknowledged the deflate request. + +Otherwise, if the VIRTIO_BALLOON_F_MUST_TELL_HOST feature is not +negotiated, the driver MAY begin to re-use pages previously +given to the balloon before the device has acknowledged the +deflate request. + +In any case, the driver MUST NOT use pages from the balloon +after adding the pages to the balloon, but before the device has +acknowledged the inflate request. + +The driver MUST NOT request deflation of pages in +the balloon before the device has acknowledged the inflate +request. The driver MUST update \field{actual} after changing the number of pages in the balloon. +The driver MAY update \field{actual} once after multiple +inflate and deflate operations. + +\devicenormative{\subsubsection}{Device Operation}{Device Types / Memory Balloon Device / Device Operation} + +The device MAY modify the contents of a page in the balloon +after detecting its physical number in an inflate request +and before acknowledging the inflate request by using the inflateq +descriptor. + +If the VIRTIO_BALLOON_F_MUST_TELL_HOST feature is negotiated, the +device MAY modify the contents of a page in the balloon +after detecting its physical number in an inflate request +and before detecting its physical number in a deflate request +and acknowledging the deflate request. + +\paragraph{Legacy Interface: Device Operation}\label{sec:Device +Types / Memory Balloon Device / Device Operation / Legacy +Interface: Device Operation} +When using the legacy interface, the driver SHOULD ignore the \field{len} value in used ring entries. +\begin{note} +Historically, some devices put the total descriptor length there, +even though no data was actually written. +\end{note} +When using the legacy interface, the driver MUST write out all +4 bytes each time it updates the \field{actual} value in the +configuration space, using a single atomic operation. + +When using the legacy interface, the device SHOULD NOT use the +\field{actual} value written by the driver in the configuration +space, until the last, most-significant byte of the value has been +written. +\begin{note} +Historically, devices used the \field{actual} value, even though +when using Virtio Over PCI Bus the device-specific configuration +space was not guaranteed to be atomic. Using intermediate +values during update by driver is best avoided, except for +debugging. + +Historically, drivers using Virtio Over PCI Bus wrote the +\field{actual} value by using multiple single-byte writes in +order, from the least-significant to the most-significant value. +\end{note} \subsubsection{Memory Statistics}\label{sec:Device Types / Memory Balloon Device / Device Operation / Memory Statistics} The stats virtqueue is atypical because communication is driven @@ -4400,10 +4834,11 @@ as follows: subsequent request) and consumes the statistics. \end{enumerate} + Within the buffer, statistics are an array of 6-byte entries. Each statistic consists of a 16 bit tag and a 64 bit value. All statistics are optional and the driver chooses which ones to supply. To guarantee backwards - compatibility, the driver SHOULD omit unsupported statistics. + compatibility, devices omit unsupported statistics. \begin{lstlisting} struct virtio_balloon_stat { @@ -4413,17 +4848,67 @@ struct virtio_balloon_stat { #define VIRTIO_BALLOON_S_MINFLT 3 #define VIRTIO_BALLOON_S_MEMFREE 4 #define VIRTIO_BALLOON_S_MEMTOT 5 - u16 tag; - u64 val; + le16 tag; + le64 val; } __attribute__((packed)); \end{lstlisting} +\drivernormative{\paragraph}{Memory Statistics}{Device Types / Memory Balloon Device / Device Operation / Memory Statistics} +Normative statements in this section apply if and only if the +VIRTIO_BALLOON_F_STATS_VQ feature has been negotiated. + +The driver MUST make at most one buffer available to the device +in the statsq, at all times. + +After initializing the device, the driver MUST make an output +buffer available in the statsq. + +Upon detecting that device has used a buffer in the statsq, the +driver MUST make an output buffer available in the statsq. + +Before making an output buffer available in the statsq, the +driver MUST initialize it, including one struct +virtio_balloon_stat entry for each statistic that it supports. + +Driver MUST use an output buffer size which is a multiple of 6 +bytes for all buffers submitted to the statsq. + +Driver MAY supply struct virtio_balloon_stat entries in the +output buffer submitted to the statsq in any order, without +regard to \field{tag} values. + +Driver MAY supply a subset of all statistics in the output buffer +submitted to the statsq. + +Driver MUST supply the same subset of statistics in all buffers +submitted to the statsq. + +\devicenormative{\paragraph}{Memory Statistics}{Device Types / Memory Balloon Device / Device Operation / Memory Statistics} +Normative statements in this section apply if and only if the +VIRTIO_BALLOON_F_STATS_VQ feature has been negotiated. + +Within an output buffer submitted to the statsq, +the device MUST ignore entries with \field{tag} values that it does not recognize. + +Within an output buffer submitted to the statsq, +the device MUST accept struct virtio_balloon_stat entries in any +order without regard to \field{tag} values. + \paragraph{Legacy Interface: Memory Statistics}\label{sec:Device Types / Memory Balloon Device / Device Operation / Memory Statistics / Legacy Interface: Memory Statistics} + When using the legacy interface, transitional devices and drivers MUST format the fields in struct virtio_balloon_stat according to the native endian of the guest rather than (necessarily when not using the legacy interface) little-endian. +When using the legacy interface, +the device SHOULD ignore all values in the first buffer in the +statsq supplied by the driver after device initialization. +\begin{note} +Historically, drivers supplied an uninitialized buffer in the +first buffer. +\end{note} + \subsubsection{Memory Statistics Tags}\label{sec:Device Types / Memory Balloon Device / Device Operation / Memory Statistics Tags} \begin{description} @@ -4593,6 +5078,16 @@ RESET.}. Device operation consists of operating request queues, the control queue and the event queue. +\paragraph{Legacy Interface: Device Operation}\label{sec:Device +Types / SCSI Host Device / Device Operation / Legacy +Interface: Device Operation} +When using the legacy interface, the driver SHOULD ignore the \field{len} value in used ring entries. +\begin{note} +Historically, devices put the total descriptor length, +or the total length of device-writable buffers there, +even when only part of the buffers were actually written. +\end{note} + \subsubsection{Device Operation: Request Queues}\label{sec:Device Types / SCSI Host Device / Device Operation / Device Operation: Request Queues} The driver queues requests to an arbitrary request queue, and @@ -4793,7 +5288,7 @@ Requests have the following format: \begin{lstlisting} struct virtio_scsi_ctrl { le32 type; -$\DIFdeltextcstwo{{\textbackslash}ldots}\DIFaddtextcstwo{\ldots}$ +$\ldots$ u8 response; }; diff --git a/diffpreamble.tex b/diffpreamble.tex index 8162004..9b5b7e5 100644 --- a/diffpreamble.tex +++ b/diffpreamble.tex @@ -27,6 +27,13 @@ \providecommand{\DIFaddendFL}{} \providecommand{\DIFdelbeginFL}{} \providecommand{\DIFdelendFL}{} +\renewenvironment{DIFnomarkup} +{ +\begingroup\color{black}\hypersetup{linkcolor=blue,urlcolor=blue} +} +{ +\endgroup +} %DIF END FLOATSAFE PREAMBLE %% diffpreable.tex end diff --git a/fixupdiff.pl b/fixupdiff.pl new file mode 100644 index 0000000..e557e2b --- /dev/null +++ b/fixupdiff.pl @@ -0,0 +1,77 @@ +my $bufferdiff=""; +my $diff=""; +my $buffer=""; +while (<>) { + my $line = $_; + if (m/%DIFDELCMD\s+<\s+\\begin{lstlisting}/) { + $lstlisting=1; + $line =~s/%DIFDELCMD\s+</{\\lstset{escapechar=\\\$} /; + } + if ($lstlisting) { + $line =~ s/%DIFDELCMD\s+< //; + if (not $line =~ m/\\(?:begin|end){lstlisting}/) { + $line =~ s/([#&{} ])/\\$1/g; + $line =~ s/(.*)/\$\\DIFdel{$1}\$/; + } + #print "%FIXED BY RULE 1\n"; + } + #In section headings, replace begin/end with begin/endFL, + #but be careful in case some tag spills over to the next + #line + if (m/\\(section|subsection|subsubsection|paragraph)/ and m/DIF/) { + my @list = split(/(\\DIF(?:add|del)(?:begin|end)(?:FL)?)/, $line, -1); + #if there's only one tag, don't touch it: + #matching one is on the other line + if ($#list >= 5) { + #if first tag is end, don't touch it - matching + #begin is on the previous line + if ($list[1] =~ m/begin$/) { + $list[1] .= "FL"; + } + #if last tag is begin, don't touch it - matching + #end is on the next line + if ($list[$#list - 1] =~ m/end$/) { + $list[$#list - 1] .= "FL"; + } + } + for (my $i = 3; $i <= $#list - 3; $i += 2) { + if (not $list[$i] =~ m/FL$/) { + $list[$i] .= "FL"; + } + } + $line = join("", @list); + #print "%FIXED BY RULE 2\n"; + } + #detect where we have DIFbegin/end cross + #enumerate/itemize environments and fix up + if (m/\\DIF(?:add|del)(?:begin|end)/) { + my @list = split(/(\\DIF(?:add|del)(?:begin|end)(?:FL)?)/, $line, -1); + $diff = $list[$#list - 1]; + if ($diff =~ m/begin/) { + $diff =~ s/begin/end/; + } else { + $diff = ""; + } + } + if ($diff ne "" and m/\\(?:begin|end){(?:enumerate|itemize)}$/ and not m/\\DIF/) { + $buffer = $line; + $bufferdiff = $diff; + $line = ""; + #print "%BUFFERED BY RULE 3: $bufferdiff\n"; + } + if ($buffer ne "" and $line ne "") { + if (m/^(\\DIF(?:add|del)end(?:FL)?)/ and $bufferdiff ne $1) { + $line =~ s/^(\\DIF(?:add|del)end(?:FL)?)//; + $buffer =~ s/(\\(?:begin|end){(?:enumerate|itemize)})$/$bufferdiff$1/; + #print "%FIXED BY RULE 3: $bufferdiff\n"; + } + print $buffer; + $buffer = ""; + $bufferdiff = ""; + } + print $line; + if (m/%DIFDELCMD\s+<\s+\\end{lstlisting}/) { + print "}\n"; + $lstlisting=0; + } +} diff --git a/makediff.sh b/makediff.sh index 16eba2d..7d64c93 100755 --- a/makediff.sh +++ b/makediff.sh @@ -9,31 +9,21 @@ export DATESTR=${DATESTR:-`cat REVISION-DATE`} MAIN=$1 PATH=.:${PATH} cur="$PWD" -oldrev=`git rev-list -1 origin/tags/v1.0-cs01` +oldrev=`git rev-list -1 origin/tags/v1.0-cs02` newrev=`git rev-list -1 HEAD` rm -fr old new git clone $PWD old cd "${cur}/old" git checkout $oldrev -##suppress diff of title -#git cherry-pick 0adee486ab987c3e98c5f31b51cc963d8bb6fff4 -##suppress diff of changelog -#git cherry-pick a41f3813a748e7d279cb6eb82f3c0afde4a3243a -#git cherry-pick fbfb402e69cdd9279c44b7684612e6f81df99e6d -#git cherry-pick 9f240fe0e718bf9b1e502e02916db9d8fede304b -#git cherry-pick a02605f9945f450ecaadf86736741de2e2c2e788 -#git cherry-pick 175e797beede8aea840102bee9b70bb08190153d while read -r rev; do echo "Applying $rev" git cherry-pick `git rev-list -1 -F --grep "$rev" $newrev` || exit 1 done << 'EOF' -formatting: escape \ldots in lstlisting -formatting: mark change manually as changed in cs02 -cl: remove changelog for cs01 -cl-os: prepare changelog for v1.0 cs02 -title: update previous version to cs01 -changelog: list acknowledgement change -changelog: typo fixup: formatting: formatting +Revert: formatting: mark change manually as changed in cs02 +cl: move out cs02 changelog +cl: drop contents temporarily +changelog: comment out header +changelog: disable markup EOF #mv specvars.tex specvars-orig.tex @@ -64,6 +54,8 @@ sed 's/\\footnote{/\\footnote {/' new/flat.tex > new/flat-fixed.tex #wget http://mirror.math.ku.edu/tex-archive/support/latexdiff/latexdiff-fast #chmod +x latexdiff-fast latexdiff-fast --config \ -"FLOATENV=(?:figure|longtable|table|tabular|plate)[\w\d*@]*" \ - --append-safecmd=field --append-textcmd=mmioreg --ignore-warnings -p diffpreamble.tex old/flat-fixed.tex new/flat-fixed.tex > virtio-diff.tex -#perl -pi fixupdiff.pl virtio-diff.tex +"FLOATENV=(?:figure|longtable|table|tabular|plate|lstlisting|note|enumerate|itemize)[\w\d*@]*,PICTUREENV=(?:picture|DIFdeltextcstwo|DIFnomarkup|lstlisting)[\w\d*@]*" \ + --append-safecmd=field --append-textcmd=mmioreg \ +--ignore-warnings -p diffpreamble.tex old/flat-fixed.tex \ +new/flat-fixed.tex > virtio-diff-tofix.tex +perl fixupdiff.pl virtio-diff-tofix.tex > virtio-diff.tex diff --git a/newdevice.tex b/newdevice.tex index c7e6221..28e1c7b 100644 --- a/newdevice.tex +++ b/newdevice.tex @@ -22,8 +22,9 @@ configuration information (the network device does this for filtering, otherwise the table in the config space could potentially be very large). -Remember that configuration fields over 32 bits wide might not be -atomically writable by the driver. +Remember that configuration fields over 32 bits wide might not be atomically +writable by the driver. Therefore, no writeable field which triggers an +action ought to be wider than 32 bits. \section{What Device Number?}\label{sec:Creating New Device Types / What Device Number?} @@ -20,10 +20,10 @@ \end{oasistitlesection} \begin{oasistitlesection}{Previous version} -\url{http://docs.oasis-open.org/virtio/virtio/v1.0/cs01/tex/} +\url{http://docs.oasis-open.org/virtio/virtio/v1.0/cs02/tex/} {}(Authoritative)\newline -\url{http://docs.oasis-open.org/virtio/virtio/v1.0/cs01/virtio-v1.0-cs01.pdf}\newline -\url{http://docs.oasis-open.org/virtio/virtio/v1.0/cs01/virtio-v1.0-cs01.html} +\url{http://docs.oasis-open.org/virtio/virtio/v1.0/cs02/virtio-v1.0-cs02.pdf}\newline +\url{http://docs.oasis-open.org/virtio/virtio/v1.0/cs02/virtio-v1.0-cs02.html} \end{oasistitlesection} \begin{oasistitlesection}{Latest version} diff --git a/virtio-ring.h b/virtio-ring.h index aa01d92..5a1e87d 100644 --- a/virtio-ring.h +++ b/virtio-ring.h @@ -53,7 +53,7 @@ /* Support for indirect descriptors */ #define VIRTIO_F_INDIRECT_DESC 28 -/* Support for avail_idx and used_idx fields */ +/* Support for avail_event and used_event fields */ #define VIRTIO_F_EVENT_IDX 29 /* Arbitrary descriptor layouts. */ |