summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--REVISION2
-rw-r--r--REVISION-DATE2
-rw-r--r--acknowledgements.tex6
-rw-r--r--changelog.tex2
-rw-r--r--cl-cs02.tex52
-rw-r--r--cl-os.tex338
-rw-r--r--commands.tex1
-rw-r--r--conformance.tex41
-rw-r--r--content.tex739
-rw-r--r--diffpreamble.tex7
-rw-r--r--fixupdiff.pl77
-rwxr-xr-xmakediff.sh30
-rw-r--r--newdevice.tex5
-rw-r--r--title.tex6
-rw-r--r--virtio-ring.h2
15 files changed, 1123 insertions, 187 deletions
diff --git a/REVISION b/REVISION
index 9ae2867..376c82e 100644
--- a/REVISION
+++ b/REVISION
@@ -1 +1 @@
-virtio-v1.0-cs02
+virtio-v1.0-cs03
diff --git a/REVISION-DATE b/REVISION-DATE
index 83b910b..86ffb4e 100644
--- a/REVISION-DATE
+++ b/REVISION-DATE
@@ -1 +1 @@
-18 January 2015
+02 August 2015
diff --git a/acknowledgements.tex b/acknowledgements.tex
index 3866a0c..6c86d12 100644
--- a/acknowledgements.tex
+++ b/acknowledgements.tex
@@ -20,6 +20,7 @@ Luiz Capitulino, Red Hat \newline
Michael S. Tsirkin, Red Hat \newline
Paolo Bonzini, Red Hat \newline
Pawel Moll, ARM \newline
+Richard Sohn, Alcatel-Lucent \newline
Rusty Russell, IBM \newline
Sasha Levin, Oracle \newline
Sergey Tverdyshev, Thales e-Security \newline
@@ -33,12 +34,13 @@ specification and are gratefully acknowledged:
\begin{oasistitlesection}{Reviewers}
Andrew Thornton, Google \newline
Arun Subbarao, LynuxWorks \newline
+Brian Foley, ARM \newline
+David Alan Gilbert, Red Hat \newline
Fam Zheng, Red Hat \newline
Gerd Hoffmann, Red Hat \newline
+Jason Wang, Red Hat \newline
Laura Novich, Red Hat \newline
Patrick Durusau, Technical Advisory Board, OASIS \newline
Thomas Huth, IBM \newline
Yan Vugenfirer, Red Hat / Daynix \newline
-Brian Foley, ARM \newline
-David Alan Gilbert, Red Hat \newline
\end{oasistitlesection}
diff --git a/changelog.tex b/changelog.tex
index fbe2dd5..4b89e97 100644
--- a/changelog.tex
+++ b/changelog.tex
@@ -2,6 +2,7 @@
The following changes have been made since the previous version
of this specification:
+\begin{DIFnomarkup}
\begin{longtable}{ | c | c | c | p{0.4\textwidth} | }
\hline
\textbf{Revision} & \textbf{Date} & \textbf{Editor} & \textbf{Changes Made} \\
@@ -11,3 +12,4 @@ of this specification:
%\hline
\input{cl-os.tex}
\end{longtable}
+\end{DIFnomarkup}
diff --git a/cl-cs02.tex b/cl-cs02.tex
new file mode 100644
index 0000000..61aa6fd
--- /dev/null
+++ b/cl-cs02.tex
@@ -0,0 +1,52 @@
+448 & 22 Dec 2014 & Michael S. Tsirkin & {VIRTIO-120: virtio:
+fix used element size
+
+General ring description lists size for
+used ring elements as 4, it must be 8.
+
+See \ref{sec:Basic Facilities of a Virtio Device / Virtqueues}.
+ } \\
+\hline
+449 & 22 Doc 2014 & Cornelia Huck & {VIRTIO-125: block: fixup section levels
+ The specification for the configuration layout for block devices
+ should be its own subsection as for all other devices and not be
+ hidden beneath "Feature bits".
+
+ The normative sections for device operation should appear under
+ the device operation section.
+See \ref{sec:Device Types / Block Device / Device configuration
+layout}.
+ } \\
+\hline
+450 & 22 Dec 2014 & Cornelia Huck & {VIRTIO-127: ccw: two-stage
+indicators for legacy devices
+
+ Some legacy devices will support two-stage queue indicators
+and therefore
+ won't reject CCW_CMD_SET_IND_ADAPTER. Note this.
+
+See \ref{sec:Virtio Transport Options / Virtio over channel I/O /
+Device Initialization / Setting Up Indicators / Legacy
+Interfaces: A Note on Setting Up Indicators}.
+ } \\
+\hline
+452 & 22 Dec 2014 & Michael S. Tsirkin & {VIRTIO-115:
+formatting: escape {\textbackslash}ldots in lstlisting
+
+ {\textbackslash}ldots does not work within lstlisting, the result is
+ {\textbackslash}ldots verbatim in the PDF output.
+
+ To fix, make \$ an escape character, and escape the sequence:
+ \${\textbackslash}ldots\$
+
+See \ref{sec:Device Types / SCSI Host Device / Device Operation /
+Device Operation: controlq}.
+} \\
+\hline
+455,457 & 23 Dec 2014 & Michael S. Tsirkin & {acknowledgements: acknowledge dgilbert
+
+ Acknowledge David Alan Gilbert for reporting VIRTIO-120.
+
+See \ref{chap:Acknowledgements}.
+} \\
+\hline
diff --git a/cl-os.tex b/cl-os.tex
index 61aa6fd..ab26d64 100644
--- a/cl-os.tex
+++ b/cl-os.tex
@@ -1,52 +1,328 @@
-448 & 22 Dec 2014 & Michael S. Tsirkin & {VIRTIO-120: virtio:
-fix used element size
+478 & 15 Mar 2015 & Cornelia Huck & {VIRTIO-129: legacy:
+clean up virtqueue layout definitions
-General ring description lists size for
-used ring elements as 4, it must be 8.
+Generalize "Legacy Interfaces: A Note on Virtqueue Layout" to allow
+for different alignment requirements. Have pci and ccw refer to that
+section for legacy devices. Remove the double definition of virtqueue
+alignment (which referred to legacy, but was not tagged as such) from
+the ccw section.
+See \ref{sec:Basic Facilities of a Virtio Device / Virtqueues /
+Legacy Interfaces: A Note on Virtqueue Layout}, \ref{sec:Virtio
+Transport Options / Virtio Over PCI Bus / PCI-specific
+Initialization And Device Operation / Device Initialization /
+Virtqueue Configuration / Legacy Interface: A Note on Virtqueue
+Configuration} and \ref{sec:Virtio Transport Options / Virtio
+over channel I/O / Device Initialization / Configuring a
+Virtqueue / Legacy Interface: A Note on Configuring a Virtqueue}.
+ } \\
+\hline
+479 & 15 Mar 2015 & Cornelia Huck & {VIRTIO-118:
+ccw: clarify basic channel commands
-See \ref{sec:Basic Facilities of a Virtio Device / Virtqueues}.
+"Basic channel commands" seems to be not as clear as it
+could, so let's spell out which channel commands we refer to.
+See \ref{sec:Virtio Transport Options / Virtio over channel I/O /
+Basic Concepts}.
+} \\
+\hline
+479 & 15 Mar 2015 & Cornelia Huck & {VIRTIO-116:
+ccw: allow WRITE_STATUS to fail
+
+We want to be able to fail setting a status on the device
+(e.g. FEATURES_OK if the device can't work with the features
+negotiated).
+The easiest way to do that is to allow the device to fail the
+WRITE_STATUS command by posting a command reject.
+See \ref{sec:Virtio Transport Options / Virtio over channel I/O /
+Device Initialization / Communicating Status Information}.
} \\
\hline
-449 & 22 Doc 2014 & Cornelia Huck & {VIRTIO-125: block: fixup section levels
- The specification for the configuration layout for block devices
- should be its own subsection as for all other devices and not be
- hidden beneath "Feature bits".
+485 & 15 Mar 2015 & Jason Wang & {VIRTIO-135:
+virtio-ring: comment fixup
- The normative sections for device operation should appear under
- the device operation section.
-See \ref{sec:Device Types / Block Device / Device configuration
-layout}.
+virtio_ring.h included with spec has this text:
+/* Support for avail_idx and used_idx fields */
+it should really refer to avail_event and used_event.
+See Appendix \ref{sec:virtio-ring.h}.
} \\
\hline
-450 & 22 Dec 2014 & Cornelia Huck & {VIRTIO-127: ccw: two-stage
-indicators for legacy devices
+486 & 15 Mar 2015 & Jason Wang & {VIRTIO-136:
+document idx field in virtqueue used ring
- Some legacy devices will support two-stage queue indicators
-and therefore
- won't reject CCW_CMD_SET_IND_ADAPTER. Note this.
+Section \ref{sec:Basic Facilities of a Virtio Device / Virtqueues
+/ The Virtqueue Used Ring} The Virtqueue Used Ring
+listed the idx field, but never documented it.
+See \ref{sec:Basic Facilities of a Virtio Device / Virtqueues /
+The Virtqueue Used Ring}.
+ } \\
+\hline
+487 & 15 Mar 2015 & Rusty Russell & {VIRTIO-130:
+ISR status: Fix incorrect diagram
-See \ref{sec:Virtio Transport Options / Virtio over channel I/O /
-Device Initialization / Setting Up Indicators / Legacy
-Interfaces: A Note on Setting Up Indicators}.
+ISR status capability diagram has the "Device Configuration
+Interrupt " as bit 0, and the "Queue Interrupt" as bit 1. This is
+the wrong way around: it disagrees with the legacy
+implementations, as well as the spec elsewhere.
+
+All current guests correctly follow the text, fix
+up the diagram to match.
+See \ref{sec:Virtio Transport Options / Virtio Over PCI Bus / PCI
+Device Layout / ISR status capability}.
+ } \\
+\hline
+488 & 15 Mar 2015 & Rusty Russell & {VIRTIO-133:
+Change 4.1.5.1.2.1 to device requirement
+
+4.1.5.1.2.1 is incorrectly labelled as a driver requirement; it's
+self-evidently referring to the device.
+See \ref{sec:Conformance / Driver Conformance / PCI Driver
+Conformance}, \ref{sec:Conformance / Device Conformance / PCI
+Device Conformance} and \ref{devicenormative:Virtio
+Transport Options / Virtio Over PCI Bus / PCI-specific
+Initialization And Device Operation / Device Initialization /
+Non-transitional Device With Legacy Driver}.
+ } \\
+\hline
+504 & 22 Apr 2015 & Rusty Russell & {VIRTIO-137:
+define the meaning and requirements of the len field.
+
+We said what it was for, and noted why. We didn't place any
+requirements on it, nor clearly spell out the implications of its use.
+
+This clarification comes particularly from noticing that QEMU
+didn't set len correctly, and philosophising over the correct value
+when an error has occurred.
+See \ref{sec:Basic Facilities of a Virtio Device / Virtqueues /
+The Virtqueue Used Ring}, \ref{devicenormative:Basic Facilities
+of a Virtio Device / Virtqueues / The Virtqueue Used Ring} and
+\ref{sec:Basic Facilities of a Virtio Device / Virtqueues / The
+Virtqueue Used Ring}.
} \\
\hline
-452 & 22 Dec 2014 & Michael S. Tsirkin & {VIRTIO-115:
-formatting: escape {\textbackslash}ldots in lstlisting
+506 & 22 Apr 2015 & Michael S. Tsirkin & {VIRTIO-138:
+multiple errors: Non-transitional With Legacy
+
+virtio 1.0 has two sections titled "Non-transitional Device With
+Legacy Driver" the first says devices SHOULD fail, the second
+says devices MUST fail. Clearly a mistake.
- {\textbackslash}ldots does not work within lstlisting, the result is
- {\textbackslash}ldots verbatim in the PDF output.
+Other issues: devices don't really fail - they cause drivers to
+fail. second section seems to be in the wrong place, and also
+have a section followed by subsection with no explanatory text in
+between, which is ugly.
+Finally, this text was originally ritten to handle buggy windows
+drivers gracefully, but later we changed device IDs so it's not
+really required there. Might be handy for some other buggy legacy
+drivers, though no such drivers are known.
- To fix, make \$ an escape character, and escape the sequence:
- \${\textbackslash}ldots\$
+To fix, drop the duplicate section variant, add some explanatory
+text, clarify what does "same ID" mean here, and clarify
+that the work-around is only needed if a buggy driver
+is known to bind to a transitional device.
-See \ref{sec:Device Types / SCSI Host Device / Device Operation /
-Device Operation: controlq}.
+See \ref{sec:Virtio Transport Options / Virtio
+Over PCI Bus / PCI Device Layout / Non-transitional Device With
+Legacy Driver: A Note on PCI Device Layout},
+\ref{devicenormative:Virtio Transport Options / Virtio Over PCI
+Bus / PCI-specific Initialization And Device Operation / Device
+Initialization / Non-transitional Device With Legacy Driver} and
+\ref{sec:Virtio Transport Options / Virtio Over PCI Bus /
+PCI-specific Initialization And Device Operation / Device
+Initialization}.
} \\
\hline
-455,457 & 23 Dec 2014 & Michael S. Tsirkin & {acknowledgements: acknowledge dgilbert
+508 & 22 Apr 2015 & Michael S. Tsirkin & {VIRTIO-139:
+pci: missing documentation for dealing with 64 bit config fields
- Acknowledge David Alan Gilbert for reporting VIRTIO-120.
+pci spec says what width access to use for 32, 16 and 8
+bit fields, but does not explicitly say what to do for
+32 bit fields. As we have text that says driver must
+treat 64 bit accesses as non-atomic, this seems
+to imply driver should always do two 32 bit wide accesses.
+
+Let's make this an explicit requirement, and require
+devices to support this.
+
+See \ref{sec:Virtio Transport Options / Virtio Over PCI Bus / PCI
+Device Layout}, \ref{drivernormative:Virtio Transport Options /
+Virtio Over PCI Bus / PCI Device Layout},
+\ref{devicenormative:Virtio Transport Options / Virtio Over PCI
+Bus / PCI Device Layout} and \ref{sec:Conformance / Driver
+Conformance / PCI Driver Conformance}.
+ } \\
+\hline
+509 & 22 Apr 2015 & Michael S. Tsirkin & {balloon:
+MUST -> has to
+
+MUST shouldn't be used outside normative statements,
+that's confusing. Replace with "has to".
+
+See \ref{sec:Device Types / Memory Balloon Device / Feature
+bits}.
+ } \\
+\hline
+510 & 22 Apr 2015 & Michael S. Tsirkin & {conformance:
+add VIRTIO-137 statement links
+
+Add links to new conformance statements added to
+resolve VIRTIO-137 (describing used ring entry len usage).
+
+See \ref{sec:Conformance / Device Conformance}
+and \ref{sec:Conformance / Driver Conformance}.
+ } \\
+\hline
+517 & 22 Apr 2015 & Michael S. Tsirkin & {acknowledgements:
+contributors+minor fixup
+
+acknowledge feedback by Jason Wang, add Richard Sohn who
+joined the TC, sort acknowledged reviewers alphabetically.
See \ref{chap:Acknowledgements}.
} \\
\hline
+520 & 30 Apr 2015 & James Bottomley & {VIRTIO-140:
+give explicit guidance on the use of 64 bit fields
+
+Just saying 64 bit fields may not be atomic is true, but less
+helpful than it might be. Add explicit guidance about what the
+consequences of non-atomicity are.
+
+See \ref{sec:Creating New Device Types / What Device
+Configuration Space Layout?}
+} \\
+\hline
+521 & 30 Apr 2015 & Rusty Russell & {VIRTIO-134:
+Spell out details of indirect elements in chains
+
+1) It's implied that a chain terminates with an indirect descriptor (since
+VIRTIO-15) but we didn't spell out that a device MUST NOT
+continue it.
+
+2) We allow [direct]->[direct]->[indirect], and qemu and
+bhyve both accept it. Make it clear that this is valid, thus devices MUST
+handle it.
+
+See \ref{drivernormative:Basic Facilities of a Virtio Device /
+Virtqueues / The Virtqueue Descriptor Table / Indirect
+Descriptors} and \ref{devicenormative:Basic Facilities of a
+Virtio Device / Virtqueues / The Virtqueue Descriptor Table /
+Indirect Descriptors}
+} \\
+\hline
+522 & 30 Apr 2015 & Michael S. Tsirkin & {VIRTIO-141:
+used ring: specify legacy behaviour for len field
+
+many hypervisors implemented len field incorrectly.
+Document existing bugs in the legacy sections.
+
+See \ref{sec:Basic Facilities of a Virtio Device / Virtqueues
+/ The Virtqueue Used Ring/ Legacy Interface: The Virtqueue Used
+Ring}, \ref{sec:Device Types / Network Device / Device Operation
+/ Legacy Interface: Device Operation}, \ref{sec:Device Types /
+Block Device / Device Operation / Legacy Interface: Device
+Operation}, \ref{sec:Device Types / Console Device / Device
+Operation / Legacy Interface: Device Operation}, \ref{sec:Device
+Types / Memory Balloon Device / Device Operation / Legacy
+Interface: Device Operation}, \ref{sec:Device
+Types / SCSI Host Device / Device Operation / Legacy
+Interface: Device Operation} and \ref{sec:Conformance / Legacy
+Interface: Transitional Device and Transitional Driver
+Conformance}.
+} \\
+\hline
+523 & 30 Apr 2015 & Michael S. Tsirkin & {VIRTIO-142:
+entropy device: typo fix
+
+Current text: "The driver MUST examine the length written by the
+driver" makes no sense. length is written by the device.
+
+See \ref{drivernormative:Device Types / Entropy Device / Device
+Operation}.
+} \\
+\hline
+526 & 18 May 2015 & Michael S. Tsirkin & {VIRTIO-143:
+balloon: transitional device support
+
+Support a transitional balloon device: this has the advantage of supporting
+existing drivers, transparently, as well as transports that don't allow mixing
+virtio 0 and virtio 1 devices. And balloon is an easy device to test, so it's
+also useful for people to test virtio core handling of transitional devices.
+
+Three issues with legacy hypervisors have been identified:
+\begin{enumerate}
+\item
+Actual value is actually used, and is necessary for management
+to work. Luckily 4 byte config space writes are now atomic.
+When using old guests, hypervisors can detect access to the last byte.
+When using old hypervisors, drivers can use atomic 4-byte accesses.
+\item Hypervisors actually didn't ignore the stats from the first
+buffer supplied. This means the values there would be
+incorrect until hypervisor resends the request.
+Add a note suggesting hypervisors ignore the 1st buffer.
+\item QEMU simply over-writes stats from each buffer it gets.
+Thus if driver supplies a different subset of stats
+on each request, stale values will be there.
+Require drivers to supply the same subset on each
+request. This also gives us a simple way to figure out
+which stats are supported.
+\end{enumerate}
+
+See
+\ref{sec:Device Types / Memory Balloon Device},
+\ref{devicenormative:Virtio Transport Options / Virtio Over PCI Bus / PCI Device Discovery},
+\ref{sec:Conformance / Driver Conformance / Traditional Memory Balloon Driver Conformance},
+\ref{sec:Conformance / Device Conformance / Traditional Memory Balloon Device Conformance},
+\ref{sec:Conformance / Legacy Interface: Transitional Device and Transitional Driver Conformance},
+\ref{sec:Conformance / Device Conformance} and \ref{sec:Conformance / Driver Conformance}.
+} \\
+\hline
+527 & 18 May 2015 & Michael S. Tsirkin & {VIRTIO-126:
+document deflate on oom
+
+Document the new option, and also clarify behaviour
+without it.
+
+In particular, actual field is not the
+actual number of pages in the balloon as
+driver might do inflate followed by deflate.
+
+Also, device isn't always driven by interrupts,
+driver can inflate/deflate in response to e.g.
+memory compaction.
+
+See \ref{sec:Device Types / Memory Balloon Device / Feature bits},
+\ref{sec:Device Types / Memory Balloon Device / Device Operation} and
+\ref{drivernormative:Device Types / Memory Balloon Device / Device Operation}.
+} \\
+\hline
+528 & 18 May 2015 & Michael S. Tsirkin & {VIRTIO-123:
+network device: xmit/receive cleanup
+
+Fix up multiple issues in xmit/receive sections:
+\begin{itemize}
+ \item drop MAY/MUST/SHOULD outside normative statements
+ \item spell out conformance requirements for both drivers and
+ devices, for xmit and receive paths
+ \item document the missing VIRTIO_NET_HDR_F_DATA_VALID
+ \item document handling of unrecognized flag bits so we can extend
+ flags in the future, similar to VIRTIO_NET_HDR_F_DATA_VALID
+\end{itemize}
+
+\ref{sec:Device Types / Network Device / Device Initialization},
+\ref{drivernormative:Device Types / Network Device / Device Operation / Packet Transmission},
+\ref{devicenormative:Device Types / Network Device / Device Operation / Packet Transmission},
+\ref{sec:Device Types / Network Device / Device Operation / Processing of Incoming Packets},
+\ref{sec:Conformance / Driver Conformance / Network Driver Conformance} and
+\ref{sec:Conformance / Device Conformance / Network Device Conformance}.
+} \\
+\hline
+529 & 18 May 2015 & Michael S. Tsirkin & {VIRTIO-124:
+network device: document VIRTIO_NET_F_CTRL_RX_EXTRA
+
+See
+\ref{sec:Device Types / Network Device / Device Operation / Control Virtqueue / Packet Receive Filtering},
+\ref{sec:Device Types / Network Device / Device Operation / Control Virtqueue / Setting MAC Address Filtering},
+\ref{sec:Conformance / Driver Conformance / Network Driver Conformance} and
+\ref{sec:Conformance / Device Conformance / Network Device Conformance}.
+} \\
+\hline
diff --git a/commands.tex b/commands.tex
index 59c3da3..871f416 100644
--- a/commands.tex
+++ b/commands.tex
@@ -14,3 +14,4 @@
\newcommand{\devicenormative}[3]{#1{Device Requirements: #2}\label{devicenormative:#3}}
\providecommand{\DIFaddtextcstwo}[1]{#1}
\providecommand{\DIFdeltextcstwo}[1]{}
+\newenvironment{DIFnomarkup}{}{}
diff --git a/conformance.tex b/conformance.tex
index 29c6ba8..7b7df32 100644
--- a/conformance.tex
+++ b/conformance.tex
@@ -15,13 +15,13 @@ Conformance targets:
\begin{itemize}
\item Clause \ref{sec:Conformance / Driver Conformance},
\item One of clauses \ref{sec:Conformance / Driver Conformance / PCI Driver Conformance}, \ref{sec:Conformance / Driver Conformance / MMIO Driver Conformance} or \ref{sec:Conformance / Driver Conformance / Channel I/O Driver Conformance}.
- \item One of clauses \ref{sec:Conformance / Driver Conformance / Network Driver Conformance}, \ref{sec:Conformance / Driver Conformance / Block Driver Conformance}, \ref{sec:Conformance / Driver Conformance / Console Driver Conformance}, \ref{sec:Conformance / Driver Conformance / Entropy Driver Conformance} or \ref{sec:Conformance / Driver Conformance / SCSI Host Driver Conformance}.
+ \item One of clauses \ref{sec:Conformance / Driver Conformance / Network Driver Conformance}, \ref{sec:Conformance / Driver Conformance / Block Driver Conformance}, \ref{sec:Conformance / Driver Conformance / Console Driver Conformance}, \ref{sec:Conformance / Driver Conformance / Entropy Driver Conformance}, \ref{sec:Conformance / Driver Conformance / Traditional Memory Balloon Driver Conformance} or \ref{sec:Conformance / Driver Conformance / SCSI Host Driver Conformance}.
\end{itemize}
\item[Device] A device MUST conform to three conformance clauses:
\begin{itemize}
\item Clause \ref{sec:Conformance / Device Conformance},
\item One of clauses \ref{sec:Conformance / Device Conformance / PCI Device Conformance}, \ref{sec:Conformance / Device Conformance / MMIO Device Conformance} or \ref{sec:Conformance / Device Conformance / Channel I/O Device Conformance}.
- \item One of clauses \ref{sec:Conformance / Device Conformance / Network Device Conformance}, \ref{sec:Conformance / Device Conformance / Block Device Conformance}, \ref{sec:Conformance / Device Conformance / Console Device Conformance}, \ref{sec:Conformance / Device Conformance / Entropy Device Conformance} or \ref{sec:Conformance / Device Conformance / SCSI Host Device Conformance}.
+ \item One of clauses \ref{sec:Conformance / Device Conformance / Network Device Conformance}, \ref{sec:Conformance / Device Conformance / Block Device Conformance}, \ref{sec:Conformance / Device Conformance / Console Device Conformance}, \ref{sec:Conformance / Device Conformance / Entropy Device Conformance}, \ref{sec:Conformance / Device Conformance / Traditional Memory Balloon Device Conformance} or \ref{sec:Conformance / Device Conformance / SCSI Host Device Conformance}.
\end{itemize}
\end{description}
@@ -38,6 +38,7 @@ A driver MUST conform to the following normative statements:
\item \ref{drivernormative:Basic Facilities of a Virtio Device / Virtqueues / The Virtqueue Descriptor Table}
\item \ref{drivernormative:Basic Facilities of a Virtio Device / Virtqueues / The Virtqueue Descriptor Table / Indirect Descriptors}
\item \ref{drivernormative:Basic Facilities of a Virtio Device / Virtqueues / Virtqueue Interrupt Suppression}
+\item \ref{drivernormative:Basic Facilities of a Virtio Device / Virtqueues / The Virtqueue Used Ring}
\item \ref{drivernormative:Basic Facilities of a Virtio Device / Virtqueues / Virtqueue Notification Suppression}
\item \ref{drivernormative:General Initialization And Device Operation / Device Initialization}
\item \ref{drivernormative:General Initialization And Device Operation / Device Operation / Supplying Buffers to The Device / Updating idx}
@@ -57,7 +58,6 @@ A PCI driver MUST conform to the following normative statements:
\item \ref{drivernormative:Virtio Transport Options / Virtio Over PCI Bus / PCI Device Layout / Common configuration structure layout}
\item \ref{drivernormative:Virtio Transport Options / Virtio Over PCI Bus / PCI Device Layout / ISR status capability}
\item \ref{drivernormative:Virtio Transport Options / Virtio Over PCI Bus / PCI Device Layout / PCI configuration access capability}
-\item \ref{drivernormative:Virtio Transport Options / Virtio Over PCI Bus / PCI-specific Initialization And Device Operation / Device Initialization / Non-transitional Device With Legacy Driver}
\item \ref{drivernormative:Virtio Transport Options / Virtio Over PCI Bus / PCI-specific Initialization And Device Operation / Device Initialization / MSI-X Vector Configuration}
\item \ref{drivernormative:Virtio Transport Options / Virtio Over PCI Bus / PCI-specific Initialization And Device Operation / Notification of Device Configuration Changes}
\end{itemize}
@@ -79,6 +79,7 @@ A Channel I/O driver MUST conform to the following normative statements:
\begin{itemize}
\item \ref{drivernormative:Virtio Transport Options / Virtio over channel I/O / Basic Concepts}
\item \ref{drivernormative:Virtio Transport Options / Virtio over channel I/O / Device Initialization / Setting the Virtio Revision}
+\item \ref{drivernormative:Virtio Transport Options / Virtio over channel I/O / Device Initialization / Communicating Status Information}
\item \ref{drivernormative:Virtio Transport Options / Virtio over channel I/O / Device Operation / Host->Guest Notification / Notification via Adapter I/O Interrupts}
\item \ref{drivernormative:Virtio Transport Options / Virtio over channel I/O / Device Operation / Guest->Host Notification}
\end{itemize}
@@ -91,6 +92,8 @@ A network driver MUST conform to the following normative statements:
\item \ref{drivernormative:Device Types / Network Device / Device configuration layout}
\item \ref{drivernormative:Device Types / Network Device / Device Operation / Packet Transmission}
\item \ref{drivernormative:Device Types / Network Device / Device Operation / Setting Up Receive Buffers}
+\item \ref{drivernormative:Device Types / Network Device / Device Operation / Processing of Incoming Packets}
+\item \ref{drivernormative:Device Types / Network Device / Device Operation / Control Virtqueue / Packet Receive Filtering}
\item \ref{drivernormative:Device Types / Network Device / Device Operation / Control Virtqueue / Setting MAC Address Filtering}
\item \ref{drivernormative:Device Types / Network Device / Device Operation / Control Virtqueue / Gratuitous Packet Sending}
\item \ref{drivernormative:Device Types / Network Device / Device Operation / Control Virtqueue / Automatic receive steering in multiqueue mode}
@@ -122,6 +125,16 @@ An entropy driver MUST conform to the following normative statements:
\item \ref{drivernormative:Device Types / Entropy Device / Device Operation}
\end{itemize}
+\subsection{Traditional Memory Balloon Driver Conformance}\label{sec:Conformance / Driver Conformance / Traditional Memory Balloon Driver Conformance}
+
+A traditional memory balloon driver MUST conform to the following normative statements:
+
+\begin{itemize}
+\item \ref{drivernormative:Device Types / Memory Balloon Device / Feature bits}
+\item \ref{drivernormative:Device Types / Memory Balloon Device / Device Operation}
+\item \ref{drivernormative:Device Types / Memory Balloon Device / Device Operation / Memory Statistics}
+\end{itemize}
+
\subsection{SCSI Host Driver Conformance}\label{sec:Conformance / Driver Conformance / SCSI Host Driver Conformance}
An SCSI host driver MUST conform to the following normative statements:
@@ -144,6 +157,7 @@ A device MUST conform to the following normative statements:
\item \ref{devicenormative:Basic Facilities of a Virtio Device / Virtqueues / The Virtqueue Descriptor Table}
\item \ref{devicenormative:Basic Facilities of a Virtio Device / Virtqueues / The Virtqueue Descriptor Table / Indirect Descriptors}
\item \ref{devicenormative:Basic Facilities of a Virtio Device / Virtqueues / Virtqueue Interrupt Suppression}
+\item \ref{devicenormative:Basic Facilities of a Virtio Device / Virtqueues / The Virtqueue Used Ring}
\item \ref{devicenormative:Basic Facilities of a Virtio Device / Virtqueues / Virtqueue Notification Suppression}
\item \ref{devicenormative:Reserved Feature Bits}
\end{itemize}
@@ -155,12 +169,14 @@ A PCI device MUST conform to the following normative statements:
\begin{itemize}
\item \ref{devicenormative:Virtio Transport Options / Virtio Over PCI Bus}
\item \ref{devicenormative:Virtio Transport Options / Virtio Over PCI Bus / PCI Device Discovery}
+\item \ref{devicenormative:Virtio Transport Options / Virtio Over PCI Bus / PCI Device Layout}
\item \ref{devicenormative:Virtio Transport Options / Virtio Over PCI Bus / Virtio Structure PCI Capabilities}
\item \ref{devicenormative:Virtio Transport Options / Virtio Over PCI Bus / PCI Device Layout / Common configuration structure layout}
\item \ref{devicenormative:Virtio Transport Options / Virtio Over PCI Bus / PCI Device Layout / Notification capability}
\item \ref{devicenormative:Virtio Transport Options / Virtio Over PCI Bus / PCI Device Layout / ISR status capability}
\item \ref{devicenormative:Virtio Transport Options / Virtio Over PCI Bus / PCI Device Layout / Device-specific configuration}
\item \ref{devicenormative:Virtio Transport Options / Virtio Over PCI Bus / PCI Device Layout / PCI configuration access capability}
+\item \ref{devicenormative:Virtio Transport Options / Virtio Over PCI Bus / PCI-specific Initialization And Device Operation / Device Initialization / Non-transitional Device With Legacy Driver}
\item \ref{devicenormative:Virtio Transport Options / Virtio Over PCI Bus / PCI-specific Initialization And Device Operation / Device Initialization / MSI-X Vector Configuration}
\item \ref{devicenormative:Virtio Transport Options / Virtio Over PCI Bus / PCI-specific Initialization And Device Operation / Virtqueue Interrupts From The Device}
\item \ref{devicenormative:Virtio Transport Options / Virtio Over PCI Bus / PCI-specific Initialization And Device Operation / Notification of Device Configuration Changes}
@@ -182,6 +198,7 @@ A Channel I/O device MUST conform to the following normative statements:
\item \ref{devicenormative:Virtio Transport Options / Virtio over channel I/O / Basic Concepts}
\item \ref{devicenormative:Virtio Transport Options / Virtio over channel I/O / Device Initialization / Setting the Virtio Revision}
\item \ref{devicenormative:Virtio Transport Options / Virtio over channel I/O / Device Initialization / Configuring a Virtqueue}
+\item \ref{devicenormative:Virtio Transport Options / Virtio over channel I/O / Device Initialization / Communicating Status Information}
\item \ref{devicenormative:Virtio Transport Options / Virtio over channel I/O / Device Initialization / Setting Up Indicators / Setting Up Two-Stage Queue Indicators}
\item \ref{devicenormative:Virtio Transport Options / Virtio over channel I/O / Device Operation / Host->Guest Notification / Notification via Adapter I/O Interrupts}
\item \ref{devicenormative:Virtio Transport Options / Virtio over channel I/O / Device Operation / Guest->Host Notification}
@@ -193,8 +210,10 @@ A network device MUST conform to the following normative statements:
\begin{itemize}
\item \ref{devicenormative:Device Types / Network Device / Device configuration layout}
+\item \ref{devicenormative:Device Types / Network Device / Device Operation / Packet Transmission}
\item \ref{devicenormative:Device Types / Network Device / Device Operation / Setting Up Receive Buffers}
-\item \ref{devicenormative:Device Types / Network Device / Device Operation / Processing of Packets}
+\item \ref{devicenormative:Device Types / Network Device / Device Operation / Processing of Incoming Packets}
+\item \ref{devicenormative:Device Types / Network Device / Device Operation / Control Virtqueue / Packet Receive Filtering}
\item \ref{devicenormative:Device Types / Network Device / Device Operation / Control Virtqueue / Setting MAC Address Filtering}
\item \ref{devicenormative:Device Types / Network Device / Device Operation / Control Virtqueue / Gratuitous Packet Sending}
\item \ref{devicenormative:Device Types / Network Device / Device Operation / Control Virtqueue / Automatic receive steering in multiqueue mode}
@@ -225,6 +244,16 @@ An entropy device MUST conform to the following normative statements:
\item \ref{devicenormative:Device Types / Entropy Device / Device Operation}
\end{itemize}
+\subsection{Traditional Memory Balloon Device Conformance}\label{sec:Conformance / Device Conformance / Traditional Memory Balloon Device Conformance}
+
+A traditional memory balloon device MUST conform to the following normative statements:
+
+\begin{itemize}
+\item \ref{devicenormative:Device Types / Memory Balloon Device / Feature bits}
+\item \ref{devicenormative:Device Types / Memory Balloon Device / Device Operation}
+\item \ref{devicenormative:Device Types / Memory Balloon Device / Device Operation / Memory Statistics}
+\end{itemize}
+
\subsection{SCSI Host Device Conformance}\label{sec:Conformance / Device Conformance / SCSI Host Device Conformance}
An SCSI host device MUST conform to the following normative statements:
@@ -287,9 +316,11 @@ Feature Bits / Legacy Interface: A Note on Feature Bits}
\item Section \ref{sec:Device Types / Block Device / Device Operation / Legacy Interface: Device Operation}
\item Section \ref{sec:Device Types / Console Device / Device configuration layout / Legacy Interface: Device configuration layout}
\item Section \ref{sec:Device Types / Console Device / Device Operation / Legacy Interface: Device Operation}
-\item Section \ref{drivernormative:Device Types / Memory Balloon Device / Device Operation}
+\item Section \ref{sec:Device Types / Memory Balloon Device / Feature bits / Legacy Interface: Feature bits}
+\item Section \ref{sec:Device Types / Memory Balloon Device / Device Operation / Legacy Interface: Device Operation}
\item Section \ref{sec:Device Types / Memory Balloon Device / Device Operation / Memory Statistics / Legacy Interface: Memory Statistics}
\item Section \ref{sec:Device Types / SCSI Host Device / Device configuration layout / Legacy Interface: Device configuration layout}
+\item Section \ref{sec:Device Types / SCSI Host Device / Device Operation / Legacy Interface: Device Operation}
\item Section \ref{sec:Device Types / SCSI Host Device / Device Operation / Device Operation: Request Queues / Legacy Interface: Device Operation: Request Queues}
\item Section \ref{sec:Device Types / SCSI Host Device / Device Operation / Device Operation: controlq / Legacy Interface: Device Operation: controlq}
\item Section \ref{sec:Device Types / SCSI Host Device / Device Operation / Device Operation: eventq / Legacy Interface: Device Operation: eventq}
diff --git a/content.tex b/content.tex
index 1efdcc8..d989d98 100644
--- a/content.tex
+++ b/content.tex
@@ -288,7 +288,8 @@ For Legacy Interfaces, several additional
restrictions are placed on the virtqueue layout:
Each virtqueue occupies two or more physically-contiguous pages
-(usually defined as 4096 bytes, but depending on the transport)
+(usually defined as 4096 bytes, but depending on the transport;
+henceforth referred to as Queue Align)
and consists of three parts:
\begin{tabular}{|l|l|l|}
@@ -301,11 +302,12 @@ The bus-specific Queue Size field controls the total number of bytes
for the virtqueue.
When using the legacy interface, the transitional
driver MUST retrieve the Queue Size field from the device
-and MUST allocate the total number of bytes for the virtuqueue
-according to the following formula:
+and MUST allocate the total number of bytes for the virtqueue
+according to the following formula (Queue Align given in qalign and
+Queue Size given in qsz):
\begin{lstlisting}
-#define ALIGN(x) (((x) + PAGE_SIZE) & ~PAGE_SIZE)
+#define ALIGN(x) (((x) + qalign) & ~qalign)
static inline unsigned virtq_size(unsigned int qsz)
{
return ALIGN(sizeof(struct virtq_desc)*qsz + sizeof(u16)*(3 + qsz))
@@ -326,7 +328,7 @@ struct virtq {
// A ring of available descriptor heads with free-running index.
struct virtq_avail avail;
- // Padding to the next PAGE_SIZE boundary.
+ // Padding to the next Queue Align boundary.
u8 pad[ Padding ];
// A ring of used descriptor heads with free-running index.
@@ -475,9 +477,21 @@ one table per descriptor).
A driver MUST NOT create a descriptor chain longer than the Queue Size of
the device.
+A driver MUST NOT set both VIRTQ_DESC_F_INDIRECT and VIRTQ_DESC_F_NEXT
+in \field{flags}.
+
\devicenormative{\paragraph}{Indirect Descriptors}{Basic Facilities of a Virtio Device / Virtqueues / The Virtqueue Descriptor Table / Indirect Descriptors}
The device MUST ignore the write-only flag (\field{flags}\&VIRTQ_DESC_F_WRITE) in the descriptor that refers to an indirect table.
+The device MUST handle the case of zero or more normal chained
+descriptors followed by a single descriptor with \field{flags}\&VIRTQ_DESC_F_INDIRECT.
+
+\begin{note}
+While unusual (most implementations either create a chain solely using
+non-indirect descriptors, or use a single indirect element), such a
+layout is valid.
+\end{note}
+
\subsection{The Virtqueue Available Ring}\label{sec:Basic Facilities of a Virtio Device / Virtqueues / The Virtqueue Available Ring}
\begin{lstlisting}
@@ -588,10 +602,22 @@ them: it is only written to by the device, and read by the driver.
Each entry in the ring is a pair: \field{id} indicates the head entry of the
descriptor chain describing the buffer (this matches an entry
placed in the available ring by the guest earlier), and \field{len} the total
-of bytes written into the buffer. The latter is extremely useful
-for drivers using untrusted buffers: if you do not know exactly
-how much has been written by the device, you usually have to zero
-the buffer to ensure no data leakage occurs.
+of bytes written into the buffer.
+
+\begin{note}
+\field{len} is particularly useful
+for drivers using untrusted buffers: if a driver does not know exactly
+how much has been written by the device, the driver would have to zero
+the buffer in advance to ensure no data leakage occurs.
+
+For example, a network driver may hand a received buffer directly to
+an unprivileged userspace application. If the network device has not
+overwritten the bytes which were in that buffer, this could leak the
+contents of freed memory from other processes to the application.
+\end{note}
+
+\field{idx} field indicates where the driver would put the next descriptor
+entry in the ring (modulo the queue size). This starts at 0, and increases.
\begin{note}
The legacy \hyperref[intro:Virtio PCI Draft]{[Virtio PCI Draft]}
@@ -600,6 +626,39 @@ the constant as VRING_USED_F_NO_NOTIFY, but the layout and value were
identical.
\end{note}
+\subsubsection{Legacy Interface: The Virtqueue Used
+Ring}\label{sec:Basic Facilities of a Virtio Device / Virtqueues
+/ The Virtqueue Used Ring/ Legacy Interface: The Virtqueue Used
+Ring}
+
+Historically, many drivers ignored the \field{len} value, as a
+result, many devices set \field{len} incorrectly. Thus, when
+using the legacy interface, it is generally a good idea to ignore
+the \field{len} value in used ring entries if possible. Specific
+known issues are listed per device type.
+
+\devicenormative{\subsubsection}{The Virtqueue Used Ring}{Basic Facilities of a Virtio Device / Virtqueues / The Virtqueue Used Ring}
+
+The device MUST set \field{len} prior to updating the used \field{idx}.
+
+The device MUST write at least \field{len} bytes to descriptor,
+beginning at the first device-writable buffer,
+prior to updating the used \field{idx}.
+
+The device MAY write more than \field{len} bytes to descriptor.
+
+\begin{note}
+There are potential error cases where a device might not know what
+parts of the buffers have been written. This is why \field{len} is
+permitted to be an underestimate: that's preferable to the driver believing
+that uninitialized memory has been overwritten when it has not.
+\end{note}
+
+\drivernormative{\subsubsection}{The Virtqueue Used Ring}{Basic Facilities of a Virtio Device / Virtqueues / The Virtqueue Used Ring}
+
+The driver MUST NOT make assumptions about data in device-writable buffers
+beyond the first \field{len} bytes, and SHOULD ignore this data.
+
\subsection{Virtqueue Notification Suppression}\label{sec:Basic Facilities of a Virtio Device / Virtqueues / Virtqueue Notification Suppression}
The device can suppress notifications in a manner analogous to the way
@@ -987,7 +1046,7 @@ Transitional PCI Device ID & Virtio Device \\
\hline
0x1001 & block device \\
\hline
-0x1002 & memory ballooning (legacy) \\
+0x1002 & memory ballooning (traditional) \\
\hline
0x1003 & console \\
\hline
@@ -1047,14 +1106,23 @@ Structure PCI Capabilities.
Fields of different sizes are present in the device
configuration regions.
-All 32-bit and 16-bit fields are little-endian.
+All 64-bit, 32-bit and 16-bit fields are little-endian.
+64-bit fields are to be treated as two 32-bit fields,
+with low 32 bit part followed by the high 32 bit part.
\drivernormative{\subsubsection}{PCI Device Layout}{Virtio Transport Options / Virtio Over PCI Bus / PCI Device Layout}
-The driver
-MUST access each field using the ``natural'' access method, i.e.
-32-bit accesses for 32-bit fields, 16-bit accesses for 16-bit
-fields and 8-bit accesses for 8-bit fields.
+For device configuration access, the driver MUST use 8-bit wide
+accesses for 8-bit wide fields, 16-bit wide and aligned accesses
+for 16-bit wide fields and 32-bit wide and aligned accesses for
+32-bit and 64-bit wide fields. For 64-bit fields, the driver MAY
+access each of the high and low 32-bit parts of the field
+independently.
+
+\devicenormative{\subsubsection}{PCI Device Layout}{Virtio Transport Options / Virtio Over PCI Bus / PCI Device Layout}
+
+For 64-bit device configuration fields, the device MUST allow driver
+independent access to high and low 32-bit parts of the field.
\subsection{Virtio Structure PCI Capabilities}\label{sec:Virtio Transport Options / Virtio Over PCI Bus / Virtio Structure PCI Capabilities}
@@ -1410,7 +1478,7 @@ change interrupts and normal virtqueue interrupts:
\hline
Bits & 0 & 1 & 2 to 31 \\
\hline
-Purpose & Device Configuration Interrupt & Queue Interrupt & Reserved \\
+Purpose & Queue Interrupt & Device Configuration Interrupt & Reserved \\
\hline
\end{tabular}
@@ -1600,10 +1668,37 @@ on PCI Device Layout}\label{sec:Virtio Transport Options / Virtio
Over PCI Bus / PCI Device Layout / Non-transitional Device With
Legacy Driver: A Note on PCI Device Layout}
+All known legacy drivers check either the PCI Revision or the
+Device and Vendor IDs, and thus won't attempt to drive a
+non-transitional device.
+
+A buggy legacy driver might mistakenly attempt to drive a
+non-transitional device. If support for such drivers is required
+(as opposed to fixing the bug), the following would be the
+recommended way to detect and handle them.
+\begin{note}
+Such buggy drivers are not currently known to be used in
+production.
+\end{note}
+
+\subparagraph{
+\DIFdeltextcstwo{Driver Requirements: Non-transitional Device With Legacy Driver}
+\DIFaddtextcstwo{Device Requirements: Non-transitional Device With Legacy Driver}
+}
+\label{drivernormative:Virtio Transport Options / Virtio Over PCI
+Bus / PCI-specific Initialization And Device Operation /
+Device Initialization / Non-transitional Device With Legacy
+Driver}
+\label{devicenormative:Virtio Transport Options / Virtio Over PCI
+Bus / PCI-specific Initialization And Device Operation /
+Device Initialization / Non-transitional Device With Legacy
+Driver}
+
Non-transitional devices, on a platform where a legacy driver for
-a legacy device with the same ID might have previously existed,
-SHOULD take the following steps to fail gracefully when a legacy
-driver attempts to drive them:
+a legacy device with the same ID (including PCI Revision, Device
+and Vendor IDs) is known to have previously existed,
+SHOULD take the following steps to cause the legacy driver to
+fail gracefully when it attempts to drive them:
\begin{enumerate}
\item Present an I/O BAR in BAR0, and
@@ -1624,22 +1719,6 @@ As a prerequisite to device initialization, the driver scans the
PCI capability list, detecting virtio configuration layout using Virtio
Structure PCI capabilities as detailed in \ref{sec:Virtio Transport Options / Virtio Over PCI Bus / Virtio Structure PCI Capabilities}
-\paragraph{Non-transitional Device With Legacy Driver}\label{sec:Virtio Transport Options / Virtio Over PCI Bus / PCI-specific Initialization And Device Operation / Device Initialization / Non-transitional Device With Legacy Driver}
-
-\drivernormative{\subparagraph}{Non-transitional Device With Legacy Driver}{Virtio Transport Options / Virtio Over PCI Bus / PCI-specific Initialization And Device Operation / Device Initialization / Non-transitional Device With Legacy Driver}
-
-Non-transitional devices, on a platform where a legacy driver for
-a legacy device with the same ID might have previously existed,
-MUST take the following steps to fail gracefully when a legacy
-driver attempts to drive them:
-
-\begin{enumerate}
-\item Present an I/O BAR in BAR0, and
-\item Respond to a single-byte zero write to offset 18
- (corresponding to Device Status register in the legacy layout)
- of BAR0 by presenting zeroes on every BAR and ignoring writes.
-\end{enumerate}
-
\subparagraph{Legacy Interface: A Note on Device Layout Detection}\label{sec:Virtio Transport Options / Virtio Over PCI Bus / PCI-specific Initialization And Device Operation / Device Initialization / Virtio Device Configuration Layout Detection / Legacy Interface: A Note on Device Layout Detection}
Legacy drivers skipped the Device Layout Detection step, assuming legacy
@@ -1761,8 +1840,8 @@ The driver typically does this as follows, for each virtqueue a device has:
\end{enumerate}
\subparagraph{Legacy Interface: A Note on Virtqueue Configuration}\label{sec:Virtio Transport Options / Virtio Over PCI Bus / PCI-specific Initialization And Device Operation / Device Initialization / Virtqueue Configuration / Legacy Interface: A Note on Virtqueue Configuration}
-When using the legacy interface, the page size for a virtqueue on a PCI virtio
-device is defined as 4096 bytes. Driver writes the physical address, divided
+When using the legacy interface, the queue layout follows \ref{sec:Basic Facilities of a Virtio Device / Virtqueues / Legacy Interfaces: A Note on Virtqueue Layout}~\nameref{sec:Basic Facilities of a Virtio Device / Virtqueues / Legacy Interfaces: A Note on Virtqueue Layout} with an alignment of 4096.
+Driver writes the physical address, divided
by 4096 to the Queue Address field\footnote{The 4096 is based on the x86 page size, but it's also large
enough to ensure that the separate parts of the virtqueue are on
separate cache lines.
@@ -2379,8 +2458,14 @@ model corresponding to the attached virtio device's subsystem
device ID, accessed via a virtual I/O subchannel and a virtual
channel path of type 0x32. This proxy device is discoverable via
normal channel subsystem device discovery (usually a STORE
-SUBCHANNEL loop) and answers to the basic channel commands, most
-importantly SENSE ID.
+SUBCHANNEL loop) and answers to the basic channel commands:
+
+\begin{itemize}
+\item NO-OPERATION (0x03)
+\item BASIC SENSE (0x04)
+\item TRANSFER IN CHANNEL (0x08)
+\item SENSE ID (0xe4)
+\end{itemize}
For a virtio-ccw proxy device, SENSE ID will return the following
information:
@@ -2585,36 +2670,30 @@ struct vq_info_block_legacy {
\end{lstlisting}
\field{queue} contains the guest address for queue \field{index}, \field{num} the number of buffers
-and \field{align} the alignment.
+and \field{align} the alignment. The queue layout follows \ref{sec:Basic Facilities of a Virtio Device / Virtqueues / Legacy Interfaces: A Note on Virtqueue Layout}~\nameref{sec:Basic Facilities of a Virtio Device / Virtqueues / Legacy Interfaces: A Note on Virtqueue Layout}.
-\subsubsection{Virtqueue Layout}\label{sec:Virtio Transport Options / Virtio over channel I/O / Device Initialization / Virtqueue Layout}
+\subsubsection{Communicating Status Information}\label{sec:Virtio Transport Options / Virtio over channel I/O / Device Initialization / Communicating Status Information}
-The virtqueue is physically contiguous, with padding added to make the
-used ring meet the align value:
+The driver changes the status of a device via the
+CCW_CMD_WRITE_STATUS command, which transmits an 8 bit status
+value.
-\begin{tabular}{|l|l|l|}
-\hline
-Descriptor Table & Available Ring (\ldots padding\ldots) & Used Ring \\
-\hline
-\end{tabular}
+As described in
+\ref{devicenormative:Basic Facilities of a Virtio Device / Feature Bits},
+a device sometimes fails to set the \field{status} field: For example, it
+might fail to accept the FEATURES_OK status bit during device initialization.
-The calculation for total size is as follows:
+\drivernormative{\paragraph}{Communicating Status Information}{Virtio Transport Options / Virtio over channel I/O / Device Initialization / Communicating Status Information}
-\begin{lstlisting}
-#define ALIGN(x) (((x) + align) & ~align)
-static inline unsigned virtq_size(unsigned int num)
-{
- return ALIGN(sizeof(struct virtq_desc)*num
- + sizeof(u16)*(3 + num))
- + ALIGN(sizeof(u16)*3 + sizeof(struct virtq_used_elem)*num);
-}
-\end{lstlisting}
+If the device posts a unit check with command reject in response to the
+CCW_CMD_WRITE_STATUS command, the driver MUST assume that the device failed
+to set the status and the \field{status} field retained its previous value.
-\subsubsection{Communicating Status Information}\label{sec:Virtio Transport Options / Virtio over channel I/O / Device Initialization / Communicating Status Information}
+\devicenormative{\paragraph}{Communicating Status Information}{Virtio Transport Options / Virtio over channel I/O / Device Initialization / Communicating Status Information}
-The driver changes the status of a device via the
-CCW_CMD_WRITE_STATUS command, which transmits an 8 bit status
-value.
+If the device fails to set the \field{status} field to the value written by
+the driver, the device MUST assure that the \field{status} field is left
+unchanged and MUST post a unit check with command reject.
\subsubsection{Handling Device Features}\label{sec:Virtio Transport Options / Virtio over channel I/O / Device Initialization / Handling Device Features}
@@ -2887,7 +2966,7 @@ Device ID & Virtio Device \\
\hline
4 & entropy source \\
\hline
-5 & memory ballooning (legacy) \\
+5 & memory ballooning (traditional) \\
\hline
6 & ioMemory \\
\hline
@@ -3136,7 +3215,12 @@ if both guests are amenable.}
the VIRTIO_NET_F_GUEST_TSO4, VIRTIO_NET_F_GUEST_TSO6,
VIRTIO_NET_F_GUEST_UFO and VIRTIO_NET_F_GUEST_ECN are the input
equivalents of the features described above.
- See \ref{sec:Device Types / Network Device / Device Operation / Setting Up Receive Buffers}~\nameref{sec:Device Types / Network Device / Device Operation / Setting Up Receive Buffers} and \ref{sec:Device Types / Network Device / Device Operation / Processing of Packets}~\nameref{sec:Device Types / Network Device / Device Operation / Processing of Packets} below.
+ See \ref{sec:Device Types / Network Device / Device Operation /
+Setting Up Receive Buffers}~\nameref{sec:Device Types / Network
+Device / Device Operation / Setting Up Receive Buffers} and
+\ref{sec:Device Types / Network Device / Device Operation /
+Processing of Incoming Packets}~\nameref{sec:Device Types /
+Network Device / Device Operation / Processing of Incoming Packets} below.
\end{enumerate}
A truly minimal driver would only accept VIRTIO_NET_F_MAC and ignore
@@ -3180,16 +3264,25 @@ The legacy driver only presented \field{num_buffers} in the struct virtio_net_hd
when VIRTIO_NET_F_MRG_RXBUF was not negotiated; without that feature the
structure was 2 bytes shorter.
+When using the legacy interface, the driver SHOULD ignore the
+\field{len} value in used ring entries for the transmit queues
+and the controlq queue.
+\begin{note}
+Historically, some devices put
+the total descriptor length there, even though no data was
+actually written.
+\end{note}
+
\subsubsection{Packet Transmission}\label{sec:Device Types / Network Device / Device Operation / Packet Transmission}
Transmitting a single packet is simple, but varies depending on
the different features the driver negotiated.
\begin{enumerate}
-\item The driver MAY send a completely checksummed packet. In this case,
+\item The driver can send a completely checksummed packet. In this case,
\field{flags} will be zero, and \field{gso_type} will be VIRTIO_NET_HDR_GSO_NONE.
-\item If the driver negotiated VIRTIO_NET_F_CSUM, it MAY skip
+\item If the driver negotiated VIRTIO_NET_F_CSUM, it can skip
checksumming the packet:
\begin{itemize}
\item \field{flags} has the VIRTIO_NET_HDR_F_NEEDS_CSUM set,
@@ -3248,15 +3341,83 @@ specifically in the protocol.}.
\drivernormative{\paragraph}{Packet Transmission}{Device Types / Network Device / Device Operation / Packet Transmission}
-If a driver has not negotiated VIRTIO_NET_F_CSUM, \field{flags} MUST be zero and
-the packet MUST be fully checksummed.
-
The driver MUST set \field{num_buffers} to zero.
-A driver SHOULD NOT send TCP packets requiring segmentation offload which have the Explicit Congestion Notification bit set, unless the VIRTIO_NET_F_HOST_ECN feature is
-negotiated\footnote{This is a common restriction in real, older network cards.}, in
-which case it MUST set the VIRTIO_NET_HDR_GSO_ECN bit in \field{gso_type}.
+If VIRTIO_NET_F_CSUM is not negotiated, the driver MUST set
+\field{flags} to zero and SHOULD supply a fully checksummed
+packet to the device.
+If VIRTIO_NET_F_HOST_TSO4 is negotiated, the driver MAY set
+\field{gso_type} to VIRTIO_NET_HDR_GSO_TCPV4 to request TCPv4
+segmentation, otherwise the driver MUST NOT set
+\field{gso_type} to VIRTIO_NET_HDR_GSO_TCPV4.
+
+If VIRTIO_NET_F_HOST_TSO6 is negotiated, the driver MAY set
+\field{gso_type} to VIRTIO_NET_HDR_GSO_TCPV6 to request TCPv6
+segmentation, otherwise the driver MUST NOT set
+\field{gso_type} to VIRTIO_NET_HDR_GSO_TCPV6.
+
+If VIRTIO_NET_F_HOST_UFO is negotiated, the driver MAY set
+\field{gso_type} to VIRTIO_NET_HDR_GSO_UDP to request UDP
+segmentation, otherwise the driver MUST NOT set
+\field{gso_type} to VIRTIO_NET_HDR_GSO_UDP.
+
+The driver SHOULD NOT send to the device TCP packets requiring segmentation offload
+which have the Explicit Congestion Notification bit set, unless the
+VIRTIO_NET_F_HOST_ECN feature is negotiated, in which case the
+driver MUST set the VIRTIO_NET_HDR_GSO_ECN bit in
+\field{gso_type}.
+
+If the VIRTIO_NET_F_CSUM feature has been negotiated, the
+driver MAY set the VIRTIO_NET_HDR_F_NEEDS_CSUM bit in
+\field{flags}, if so:
+\begin{enumerate}
+\item the driver MUST validate the packet checksum at
+ offset \field{csum_offset} from \field{csum_start} as well as all
+ preceding offsets;
+\item the driver MUST set the packet checksum stored in the
+ buffer to the TCP/UDP pseudo header;
+\item the driver MUST set \field{csum_start} and
+ \field{csum_offset} such that calculating a ones'
+ complement checksum from \field{csum_start} up until the end of
+ the packet and storing the result at offset \field{csum_offset}
+ from \field{csum_start} will result in a fully checksummed
+ packet;
+\end{enumerate}
+
+If none of the VIRTIO_NET_F_HOST_TSO4, TSO6 or UFO options have
+been negotiated, the driver MUST set \field{gso_type} to
+VIRTIO_NET_HDR_GSO_NONE.
+
+If \field{gso_type} differs from VIRTIO_NET_HDR_GSO_NONE, then
+the driver MUST also set the VIRTIO_NET_HDR_F_NEEDS_CSUM bit in
+\field{flags} and MUST set \field{gso_size} to indicate the
+desired MSS.
+
+If one of the VIRTIO_NET_F_HOST_TSO4, TSO6 or UFO options have
+been negotiated, the driver SHOULD set \field{hdr_len} to a value
+not less than the length of the headers, including the transport
+header.
+
+The driver MUST NOT set the VIRTIO_NET_HDR_F_DATA_VALID bit in
+\field{flags}.
+
+\devicenormative{\paragraph}{Packet Transmission}{Device Types / Network Device / Device Operation / Packet Transmission}
+The device MUST ignore \field{flag} bits that it does not recognize.
+
+If VIRTIO_NET_HDR_F_NEEDS_CSUM bit in \field{flags} is not set, the
+device MUST NOT use the \field{csum_start} and \field{csum_offset}.
+
+If one of the VIRTIO_NET_F_HOST_TSO4, TSO6 or UFO options have
+been negotiated, the device MAY use \field{hdr_len} only as a hint about the
+transport header size.
+The device MUST NOT rely on \field{hdr_len} to be correct.
+\begin{note}
+This is due to various bugs in implementations.
+\end{note}
+
+If VIRTIO_NET_HDR_F_NEEDS_CSUM is not set, the device MUST NOT
+rely on the packet checksum being correct.
\paragraph{Packet Transmission Interrupt}\label{sec:Device Types / Network Device / Device Operation / Packet Transmission / Packet Transmission Interrupt}
Often a driver will suppress transmission interrupts using the
@@ -3310,17 +3471,21 @@ The device MUST set \field{num_buffers} to the number of descriptors used to
hold the incoming packet.
The device MUST use only a single descriptor if VIRTIO_NET_F_MRG_RXBUF
-was not negotiated. \note{This means that \field{num_buffers} will always be 1
+was not negotiated.
+\begin{note}
+{This means that \field{num_buffers} will always be 1
if VIRTIO_NET_F_MRG_RXBUF is not negotiated.}
+\end{note}
-\subsubsection{Processing of Packets}\label{sec:Device Types / Network Device / Device Operation / Processing of Packets}
+\subsubsection{Processing of Incoming Packets}\label{sec:Device Types / Network Device / Device Operation / Processing of Incoming Packets}
+\label{sec:Device Types / Network Device / Device Operation / Processing of Packets}%old label for latexdiff
When a packet is copied into a buffer in the receiveq, the
optimal path is to disable further interrupts for the receiveq
(see \ref{sec:General Initialization And Device Operation / Device Operation / Receiving Used Buffers From The Device}~\nameref{sec:General Initialization And Device Operation / Device Operation / Receiving Used Buffers From The Device}) and process
packets until no more are found, then re-enable them.
-Processing packet involves:
+Processing incoming packets involves:
\begin{enumerate}
\item \field{num_buffers} indicates how many descriptors
@@ -3336,10 +3501,25 @@ Processing packet involves:
\field{num_buffers} is one, then the entire packet will be
contained within this buffer, immediately following the struct
virtio_net_hdr.
+\item If the VIRTIO_NET_F_GUEST_CSUM feature was negotiated, the
+ VIRTIO_NET_HDR_F_DATA_VALID bit in \field{flags} can be
+ set: if so, device has validated the packet checksum.
+ In case of multiple encapsulated protocols, one level of checksums
+ has been validated.
+\end{enumerate}
+Additionally, VIRTIO_NET_F_GUEST_CSUM, TSO4, TSO6, UDP and ECN
+features enable receive checksum, large receive offload and ECN
+support which are the input equivalents of the transmit checksum,
+transmit segmentation offloading and ECN features, as described
+in \ref{sec:Device Types / Network Device / Device Operation /
+Packet Transmission}:
+\begin{enumerate}
\item If the VIRTIO_NET_F_GUEST_CSUM feature was negotiated, the
- VIRTIO_NET_HDR_F_NEEDS_CSUM bit in \field{flags} MAY be
- set: if so, the checksum on the packet is incomplete and
+ VIRTIO_NET_HDR_F_NEEDS_CSUM bit in \field{flags} can be
+ set: if so, the packet checksum at offset \field{csum_offset}
+ from \field{csum_start} and any preceding checksums
+ have been validated. The checksum on the packet is incomplete and
\field{csum_start} and \field{csum_offset} indicate how to calculate
it (see Packet Transmission point 1).
@@ -3349,10 +3529,23 @@ Processing packet involves:
desired MSS (see Packet Transmission point 2).
\end{enumerate}
-\devicenormative{\paragraph}{Processing of Packets}{Device Types / Network Device / Device Operation / Processing of Packets}
+\devicenormative{\paragraph}{Processing of Incoming Packets}{Device Types / Network Device / Device Operation / Processing of Incoming Packets}
+\label{devicenormative:Device Types / Network Device / Device Operation / Processing of Packets}%old label for latexdiff
+
+If VIRTIO_NET_F_MRG_RXBUF has not been negotiated, the device MUST set
+\field{num_buffers} to 1.
+
+If VIRTIO_NET_F_MRG_RXBUF has been negotiated, the device MUST set
+\field{num_buffers} to indicate the number of descriptors
+the packet (including the header) is spread over.
-If VIRTIO_NET_F_CSUM is not negotiated, the device MUST set
-\field{flags} to zero and the packet MUST be fully checksummed.
+The device MUST use all descriptors used by a single receive
+packet together, by atomically incrementing \field{idx} in the
+used ring by the \field{num_buffers} value.
+
+If VIRTIO_NET_F_GUEST_CSUM is not negotiated, the device MUST set
+\field{flags} to zero and SHOULD supply a fully checksummed
+packet to the driver.
If VIRTIO_NET_F_GUEST_TSO4 is not negotiated, the device MUST NOT set
\field{gso_type} to VIRTIO_NET_HDR_GSO_TCPV4.
@@ -3363,11 +3556,68 @@ If VIRTIO_NET_F_GUEST_UDP is not negotiated, the device MUST NOT set
If VIRTIO_NET_F_GUEST_TSO6 is not negotiated, the device MUST NOT set
\field{gso_type} to VIRTIO_NET_HDR_GSO_TCPV6.
-A device SHOULD NOT send TCP packets requiring segmentation offload
+The device SHOULD NOT send to the driver TCP packets requiring segmentation offload
which have the Explicit Congestion Notification bit set, unless the
-VIRTIO_NET_F_GUEST_ECN feature is negotiated, in which case it MUST set
-the VIRTIO_NET_HDR_GSO_ECN bit in \field{gso_type}.
+VIRTIO_NET_F_GUEST_ECN feature is negotiated, in which case the
+device MUST set the VIRTIO_NET_HDR_GSO_ECN bit in
+\field{gso_type}.
+
+If the VIRTIO_NET_F_GUEST_CSUM feature has been negotiated, the
+device MAY set the VIRTIO_NET_HDR_F_NEEDS_CSUM bit in
+\field{flags}, if so:
+\begin{enumerate}
+\item the device MUST validate the packet checksum at
+ offset \field{csum_offset} from \field{csum_start} as well as all
+ preceding offsets;
+\item the device MUST set the packet checksum stored in the
+ receive buffer to the TCP/UDP pseudo header;
+\item the device MUST set \field{csum_start} and
+ \field{csum_offset} such that calculating a ones'
+ complement checksum from \field{csum_start} up until the
+ end of the packet and storing the result at offset
+ \field{csum_offset} from \field{csum_start} will result in a
+ fully checksummed packet;
+\end{enumerate}
+
+If none of the VIRTIO_NET_F_GUEST_TSO4, TSO6 or UFO options have
+been negotiated, the device MUST set \field{gso_type} to
+VIRTIO_NET_HDR_GSO_NONE.
+If \field{gso_type} differs from VIRTIO_NET_HDR_GSO_NONE, then
+the device MUST also set the VIRTIO_NET_HDR_F_NEEDS_CSUM bit in
+\field{flags} MUST set \field{gso_size} to indicate the desired MSS.
+
+If one of the VIRTIO_NET_F_GUEST_TSO4, TSO6 or UFO options have
+been negotiated, the device SHOULD set \field{hdr_len} to a value
+not less than the length of the headers, including the transport
+header.
+
+If the VIRTIO_NET_F_GUEST_CSUM feature has been negotiated, the
+device MAY set the VIRTIO_NET_HDR_F_DATA_VALID bit in
+\field{flags}, if so, the device MUST validate the packet
+checksum (in case of multiple encapsulated protocols, one level
+of checksums is validated).
+
+\drivernormative{\paragraph}{Processing of Incoming
+Packets}{Device Types / Network Device / Device Operation /
+Processing of Incoming Packets}
+
+The driver MUST ignore \field{flag} bits that it does not recognize.
+
+If VIRTIO_NET_HDR_F_NEEDS_CSUM bit in \field{flags} is not set, the
+driver MUST NOT use the \field{csum_start} and \field{csum_offset}.
+
+If one of the VIRTIO_NET_F_GUEST_TSO4, TSO6 or UFO options have
+been negotiated, the driver MAY use \field{hdr_len} only as a hint about the
+transport header size.
+The driver MUST NOT rely on \field{hdr_len} to be correct.
+\begin{note}
+This is due to various bugs in implementations.
+\end{note}
+
+If neither VIRTIO_NET_HDR_F_NEEDS_CSUM nor
+VIRTIO_NET_HDR_F_DATA_VALID is set, the driver MUST NOT
+rely on the packet checksum being correct.
\subsubsection{Control Virtqueue}\label{sec:Device Types / Network Device / Device Operation / Control Virtqueue}
The driver uses the control virtqueue (if VIRTIO_NET_F_CTRL_VQ is
@@ -3396,32 +3646,93 @@ do except issue a diagnostic if \field{ack} is not
VIRTIO_NET_OK.
\paragraph{Packet Receive Filtering}\label{sec:Device Types / Network Device / Device Operation / Control Virtqueue / Packet Receive Filtering}
+\label{sec:Device Types / Network Device / Device Operation / Control Virtqueue / Setting Promiscuous Mode}%old label for latexdiff
-If the VIRTIO_NET_F_CTRL_RX feature is negotiated, the driver can
-send control commands for promiscuous mode, multicast receiving,
-and filtering of MAC addresses.
+If the VIRTIO_NET_F_CTRL_RX and VIRTIO_NET_F_CTRL_RX_EXTRA
+features are negotiated, the driver can send control commands for
+promiscuous mode, multicast, unicast and broadcast receiving.
\begin{note}
In general, these commands are best-effort: unwanted
packets could still arrive.
\end{note}
-\paragraph{Setting Promiscuous Mode}\label{sec:Device Types / Network Device / Device Operation / Control Virtqueue / Setting Promiscuous Mode}
-
\begin{lstlisting}
#define VIRTIO_NET_CTRL_RX 0
#define VIRTIO_NET_CTRL_RX_PROMISC 0
#define VIRTIO_NET_CTRL_RX_ALLMULTI 1
+ #define VIRTIO_NET_CTRL_RX_ALLUNI 2
+ #define VIRTIO_NET_CTRL_RX_NOMULTI 3
+ #define VIRTIO_NET_CTRL_RX_NOUNI 4
+ #define VIRTIO_NET_CTRL_RX_NOBCAST 5
\end{lstlisting}
-The class VIRTIO_NET_CTRL_RX has two commands:
-VIRTIO_NET_CTRL_RX_PROMISC turns promiscuous mode on and off, and
-VIRTIO_NET_CTRL_RX_ALLMULTI turns all-multicast receive on and
+
+\devicenormative{\subparagraph}{Packet Receive Filtering}{Device Types / Network Device / Device Operation / Control Virtqueue / Packet Receive Filtering}
+
+If the VIRTIO_NET_F_CTRL_RX feature has been negotiated,
+the device MUST support the following VIRTIO_NET_CTRL_RX class
+commands:
+\begin{itemize}
+\item VIRTIO_NET_CTRL_RX_PROMISC turns promiscuous mode on and
off. The command-specific-data is one byte containing 0 (off) or
-1 (on).
+1 (on). If promiscous mode is on, the device SHOULD receive all
+incoming packets.
+This SHOULD take effect even if one of the other modes set by
+a VIRTIO_NET_CTRL_RX class command is on.
+\item VIRTIO_NET_CTRL_RX_ALLMULTI turns all-multicast receive on and
+off. The command-specific-data is one byte containing 0 (off) or
+1 (on). When all-multicast receive is on the device SHOULD allow
+all incoming multicast packets.
+\end{itemize}
+
+If the VIRTIO_NET_F_CTRL_RX_EXTRA feature has been negotiated,
+the device MUST support the following VIRTIO_NET_CTRL_RX class
+commands:
+\begin{itemize}
+\item VIRTIO_NET_CTRL_RX_ALLUNI turns all-unicast receive on and
+off. The command-specific-data is one byte containing 0 (off) or
+1 (on). When all-unicast receive is on the device SHOULD allow
+all incoming unicast packets.
+\item VIRTIO_NET_CTRL_RX_NOMULTI suppresses multicast receive.
+The command-specific-data is one byte containing 0 (multicast
+receive allowed) or 1 (multicast receive suppressed).
+When multicast receive is suppressed, the device SHOULD NOT
+send multicast packets to the driver.
+This SHOULD take effect even if VIRTIO_NET_CTRL_RX_ALLMULTI is on.
+This filter SHOULD NOT apply to broadcast packets.
+\item VIRTIO_NET_CTRL_RX_NOUNI suppresses unicast receive.
+The command-specific-data is one byte containing 0 (unicast
+receive allowed) or 1 (unicast receive suppressed).
+When unicast receive is suppressed, the device SHOULD NOT
+send unicast packets to the driver.
+This SHOULD take effect even if VIRTIO_NET_CTRL_RX_ALLUNI is on.
+\item VIRTIO_NET_CTRL_RX_NOBCAST suppresses broadcast receive.
+The command-specific-data is one byte containing 0 (broadcast
+receive allowed) or 1 (broadcast receive suppressed).
+When broadcast receive is suppressed, the device SHOULD NOT
+send broadcast packets to the driver.
+This SHOULD take effect even if VIRTIO_NET_CTRL_RX_ALLMULTI is on.
+\end{itemize}
+
+\drivernormative{\subparagraph}{Packet Receive Filtering}{Device Types / Network Device / Device Operation / Control Virtqueue / Packet Receive Filtering}
+
+If the VIRTIO_NET_F_CTRL_RX feature has not been negotiated,
+the driver MUST NOT issue commands VIRTIO_NET_CTRL_RX_PROMISC or
+VIRTIO_NET_CTRL_RX_ALLMULTI.
+
+If the VIRTIO_NET_F_CTRL_RX_EXTRA feature has not been negotiated,
+the driver MUST NOT issue commands
+ VIRTIO_NET_CTRL_RX_ALLUNI,
+ VIRTIO_NET_CTRL_RX_NOMULTI,
+ VIRTIO_NET_CTRL_RX_NOUNI or
+ VIRTIO_NET_CTRL_RX_NOBCAST.
\paragraph{Setting MAC Address Filtering}\label{sec:Device Types / Network Device / Device Operation / Control Virtqueue / Setting MAC Address Filtering}
+If the VIRTIO_NET_F_CTRL_RX feature is negotiated, the driver can
+send control commands for MAC address filtering.
+
\begin{lstlisting}
struct virtio_net_ctrl_mac {
le32 entries;
@@ -3468,6 +3779,13 @@ nor the MAC filtering table.
\drivernormative{\subparagraph}{Setting MAC Address Filtering}{Device Types / Network Device / Device Operation / Control Virtqueue / Setting MAC Address Filtering}
+If VIRTIO_NET_F_CTRL_RX has not been negotiated,
+the driver MUST NOT issue VIRTIO_NET_CTRL_MAC class commands.
+
+If VIRTIO_NET_F_CTRL_RX has been negotiated,
+the driver SHOULD issue VIRTIO_NET_CTRL_MAC_ADDR_SET
+to set the default mac if it is different from \field{mac}.
+
The driver MUST follow the VIRTIO_NET_CTRL_MAC_TABLE_SET command
by a le32 number, followed by that number of non-multicast
MAC addresses, followed by another le32 number, followed by
@@ -3880,6 +4198,14 @@ MUST format the fields in struct virtio_blk_req
according to the native endian of the guest rather than
(necessarily when not using the legacy interface) little-endian.
+When using the legacy interface, transitional drivers
+SHOULD ignore the \field{len} value in used ring entries.
+\begin{note}
+Historically, some devices put the total descriptor length,
+or the total length of device-writable buffers there,
+even when only the status byte was actually written.
+\end{note}
+
The \field{reserved} field was previously called \field{ioprio}. \field{ioprio}
is a hint about the relative priorities of requests to the device:
higher numbers indicate more important requests.
@@ -4203,6 +4529,14 @@ MUST format the fields in struct virtio_console_control
according to the native endian of the guest rather than
(necessarily when not using the legacy interface) little-endian.
+When using the legacy interface, the driver SHOULD ignore the
+\field{len} value in used ring entries for the transmit queues
+and the control transmitq.
+\begin{note}
+Historically, some devices put the total descriptor length there,
+even though no data was actually written.
+\end{note}
+
\subsubsection{Legacy Interface: Framing Requirements}\label{sec:Device
Types / Console Device / Legacy Interface: Framing Requirements}
@@ -4245,7 +4579,7 @@ by random data by the device.
The driver MUST NOT place driver-readable buffers into the queue.
-The driver MUST examine the length written by the driver to determine
+The driver MUST examine the length written by the device to determine
how many random bytes were received.
\devicenormative{\subsubsection}{Device Operation}{Device Types / Entropy Device / Device Operation}
@@ -4253,14 +4587,13 @@ how many random bytes were received.
The device MUST place one or more random bytes into the buffer, but it
MAY use less than the entire buffer length.
-\section{Legacy Interface: Memory Balloon Device}\label{sec:Device Types / Memory Balloon Device}
+\section{Traditional Memory Balloon Device}\label{sec:Device Types / Memory Balloon Device}
-This device is deprecated, and thus only exists as a legacy device
-illustrated here for reference. The device number 13 is reserved for
-a new memory balloon interface which is expected in a future version
-of the standard.
+This is the traditional balloon device. The device number 13 is
+reserved for a new memory balloon interface, with different
+semantics, which is expected in a future version of the standard.
-The virtio memory balloon device is a primitive device for
+The traditional virtio memory balloon device is a primitive device for
managing guest memory: the device asks for a certain amount of
memory, and the driver supplies it (or withdraws it, if the device
has more than it asks for). This allows the guest to adapt to
@@ -4282,13 +4615,35 @@ guest memory statistics to the host.
\subsection{Feature bits}\label{sec:Device Types / Memory Balloon Device / Feature bits}
\begin{description}
-\item[VIRTIO_BALLOON_F_MUST_TELL_HOST (0)] Host MUST be told before
+\item[VIRTIO_BALLOON_F_MUST_TELL_HOST (0)] Host has to be told before
pages from the balloon are used.
\item[VIRTIO_BALLOON_F_STATS_VQ (1)] A virtqueue for reporting guest
memory statistics is present.
+\item[VIRTIO_BALLOON_F_DEFLATE_ON_OOM (2) ] Deflate balloon on
+ guest out of memory condition.
+
\end{description}
+\drivernormative{\subsubsection}{Feature bits}{Device Types / Memory Balloon Device / Feature bits}
+The driver SHOULD accept the VIRTIO_BALLOON_F_MUST_TELL_HOST
+feature if offered by the device.
+
+\devicenormative{\subsubsection}{Feature bits}{Device Types / Memory Balloon Device / Feature bits}
+If the device offers the VIRTIO_BALLOON_F_MUST_TELL_HOST feature
+bit, and if the driver did not accept this feature bit, the
+device MAY signal failure by failing to set FEATURES_OK
+\field{device status} bit when the driver writes it.
+\subparagraph{Legacy Interface: Feature bits}\label{sec:Device
+Types / Memory Balloon Device / Feature bits / Legacy Interface:
+Feature bits}
+As the legacy interface does not have a way to gracefully report feature
+negotiation failure, when using the legacy interface,
+transitional devices MUST support guests which do not negotiate
+VIRTIO_BALLOON_F_MUST_TELL_HOST feature, and SHOULD
+allow guest to use memory before notifying host if
+VIRTIO_BALLOON_F_MUST_TELL_HOST is not negotiated.
+
\subsection{Device configuration layout}\label{sec:Device Types / Memory Balloon Device / Device configuration layout}
Both fields of this configuration
are always available.
@@ -4300,29 +4655,37 @@ struct virtio_balloon_config {
};
\end{lstlisting}
-Note that these fields are always little endian, despite convention
-that legacy device fields are guest endian.
+\subparagraph{Legacy Interface: Device configuration layout}\label{sec:Device Types / Memory Balloon Device / Device
+configuration layout / Legacy Interface: Device configuration layout}
+When using the legacy interface, transitional devices and drivers
+MUST format the fields in struct virtio_balloon_config
+according to the little-endian format.
+\begin{note}
+This is unlike the usual convention that legacy device fields are guest endian.
+\end{note}
\subsection{Device Initialization}\label{sec:Device Types / Memory Balloon Device / Device Initialization}
+The device initialization process is outlined below:
+
\begin{enumerate}
\item The inflate and deflate virtqueues are identified.
\item If the VIRTIO_BALLOON_F_STATS_VQ feature bit is negotiated:
\begin{enumerate}
\item Identify the stats virtqueue.
-
- \item Add one empty buffer to the stats virtqueue and notify the
- device.
+ \item Add one empty buffer to the stats virtqueue.
+ \item DRIVER_OK is set: device operation begins.
+ \item Notify the device about the stats virtqueue buffer.
\end{enumerate}
\end{enumerate}
-Device operation begins immediately.
-
\subsection{Device Operation}\label{sec:Device Types / Memory Balloon Device / Device Operation}
-The device is driven by the receipt of a
-configuration change interrupt.
+The device is driven either by the receipt of a configuration
+change interrupt, or by changing guest memory needs, such as
+performing memory compaction or responding to out of memory
+conditions.
\begin{enumerate}
\item \field{num_pages} configuration field is examined. If this is
@@ -4347,35 +4710,106 @@ configuration change interrupt.
\item If the VIRTIO_BALLOON_F_MUST_TELL_HOST feature is negotiated, the
guest informs the device of pages before it uses them.
- \item Otherwise, the guest MAY begin to re-use pages previously
+ \item Otherwise, the guest is allowed to re-use pages previously
given to the balloon before the device has acknowledged their
withdrawal\footnote{In this case, deflation advice is merely a courtesy.
}.
\end{enumerate}
-\item In either case, once the device has completed the inflation or
- deflation, the driver updates \field{actual} to reflect the new number of pages in the balloon\footnote{As updates to device-specific configuration space are not atomic, this field
-isn't particularly reliable, but can be used to diagnose buggy guests.
-}.
+\item In either case, the device acknowledges inflate and deflate
+requests by using the descriptor.
+\item Once the device has acknowledged the inflation or
+ deflation, the driver updates \field{actual} to reflect the new number of pages in the balloon.
\end{enumerate}
\drivernormative{\subsubsection}{Device Operation}{Device Types / Memory Balloon Device / Device Operation}
The driver SHOULD supply pages to the balloon when \field{num_pages} is
-greater than \field{actual}.
+greater than the actual number of pages in the balloon.
The driver MAY use pages from the balloon when \field{num_pages} is
-less than \field{actual}.
+less than the actual number of pages in the balloon.
+
+The driver MAY supply pages to the balloon when \field{num_pages} is
+greater than or equal to the actual number of pages in the balloon.
+
+If VIRTIO_BALLOON_F_DEFLATE_ON_OOM has not been negotiated, the
+driver MUST NOT use pages from the balloon when \field{num_pages}
+is less than or equal to the actual number of pages in the
+balloon.
+
+If VIRTIO_BALLOON_F_DEFLATE_ON_OOM has been negotiated, the
+driver MAY use pages from the balloon when \field{num_pages}
+is less than or equal to the actual number of pages in the
+balloon if this is required for system stability
+(e.g. if memory is required by applications running within
+ the guest).
The driver MUST use the deflateq to inform the device of pages that it
wants to use from the balloon.
If the VIRTIO_BALLOON_F_MUST_TELL_HOST feature is negotiated, the
-driver MUST wait until the device has used the deflateq descriptor
-before using the pages.
+driver MUST NOT use pages from the balloon until
+the device has acknowledged the deflate request.
+
+Otherwise, if the VIRTIO_BALLOON_F_MUST_TELL_HOST feature is not
+negotiated, the driver MAY begin to re-use pages previously
+given to the balloon before the device has acknowledged the
+deflate request.
+
+In any case, the driver MUST NOT use pages from the balloon
+after adding the pages to the balloon, but before the device has
+acknowledged the inflate request.
+
+The driver MUST NOT request deflation of pages in
+the balloon before the device has acknowledged the inflate
+request.
The driver MUST update \field{actual} after changing the number
of pages in the balloon.
+The driver MAY update \field{actual} once after multiple
+inflate and deflate operations.
+
+\devicenormative{\subsubsection}{Device Operation}{Device Types / Memory Balloon Device / Device Operation}
+
+The device MAY modify the contents of a page in the balloon
+after detecting its physical number in an inflate request
+and before acknowledging the inflate request by using the inflateq
+descriptor.
+
+If the VIRTIO_BALLOON_F_MUST_TELL_HOST feature is negotiated, the
+device MAY modify the contents of a page in the balloon
+after detecting its physical number in an inflate request
+and before detecting its physical number in a deflate request
+and acknowledging the deflate request.
+
+\paragraph{Legacy Interface: Device Operation}\label{sec:Device
+Types / Memory Balloon Device / Device Operation / Legacy
+Interface: Device Operation}
+When using the legacy interface, the driver SHOULD ignore the \field{len} value in used ring entries.
+\begin{note}
+Historically, some devices put the total descriptor length there,
+even though no data was actually written.
+\end{note}
+When using the legacy interface, the driver MUST write out all
+4 bytes each time it updates the \field{actual} value in the
+configuration space, using a single atomic operation.
+
+When using the legacy interface, the device SHOULD NOT use the
+\field{actual} value written by the driver in the configuration
+space, until the last, most-significant byte of the value has been
+written.
+\begin{note}
+Historically, devices used the \field{actual} value, even though
+when using Virtio Over PCI Bus the device-specific configuration
+space was not guaranteed to be atomic. Using intermediate
+values during update by driver is best avoided, except for
+debugging.
+
+Historically, drivers using Virtio Over PCI Bus wrote the
+\field{actual} value by using multiple single-byte writes in
+order, from the least-significant to the most-significant value.
+\end{note}
\subsubsection{Memory Statistics}\label{sec:Device Types / Memory Balloon Device / Device Operation / Memory Statistics}
The stats virtqueue is atypical because communication is driven
@@ -4400,10 +4834,11 @@ as follows:
subsequent request) and consumes the statistics.
\end{enumerate}
+ Within the buffer, statistics are an array of 6-byte entries.
Each statistic consists of a 16 bit
tag and a 64 bit value. All statistics are optional and the
driver chooses which ones to supply. To guarantee backwards
- compatibility, the driver SHOULD omit unsupported statistics.
+ compatibility, devices omit unsupported statistics.
\begin{lstlisting}
struct virtio_balloon_stat {
@@ -4413,17 +4848,67 @@ struct virtio_balloon_stat {
#define VIRTIO_BALLOON_S_MINFLT 3
#define VIRTIO_BALLOON_S_MEMFREE 4
#define VIRTIO_BALLOON_S_MEMTOT 5
- u16 tag;
- u64 val;
+ le16 tag;
+ le64 val;
} __attribute__((packed));
\end{lstlisting}
+\drivernormative{\paragraph}{Memory Statistics}{Device Types / Memory Balloon Device / Device Operation / Memory Statistics}
+Normative statements in this section apply if and only if the
+VIRTIO_BALLOON_F_STATS_VQ feature has been negotiated.
+
+The driver MUST make at most one buffer available to the device
+in the statsq, at all times.
+
+After initializing the device, the driver MUST make an output
+buffer available in the statsq.
+
+Upon detecting that device has used a buffer in the statsq, the
+driver MUST make an output buffer available in the statsq.
+
+Before making an output buffer available in the statsq, the
+driver MUST initialize it, including one struct
+virtio_balloon_stat entry for each statistic that it supports.
+
+Driver MUST use an output buffer size which is a multiple of 6
+bytes for all buffers submitted to the statsq.
+
+Driver MAY supply struct virtio_balloon_stat entries in the
+output buffer submitted to the statsq in any order, without
+regard to \field{tag} values.
+
+Driver MAY supply a subset of all statistics in the output buffer
+submitted to the statsq.
+
+Driver MUST supply the same subset of statistics in all buffers
+submitted to the statsq.
+
+\devicenormative{\paragraph}{Memory Statistics}{Device Types / Memory Balloon Device / Device Operation / Memory Statistics}
+Normative statements in this section apply if and only if the
+VIRTIO_BALLOON_F_STATS_VQ feature has been negotiated.
+
+Within an output buffer submitted to the statsq,
+the device MUST ignore entries with \field{tag} values that it does not recognize.
+
+Within an output buffer submitted to the statsq,
+the device MUST accept struct virtio_balloon_stat entries in any
+order without regard to \field{tag} values.
+
\paragraph{Legacy Interface: Memory Statistics}\label{sec:Device Types / Memory Balloon Device / Device Operation / Memory Statistics / Legacy Interface: Memory Statistics}
+
When using the legacy interface, transitional devices and drivers
MUST format the fields in struct virtio_balloon_stat
according to the native endian of the guest rather than
(necessarily when not using the legacy interface) little-endian.
+When using the legacy interface,
+the device SHOULD ignore all values in the first buffer in the
+statsq supplied by the driver after device initialization.
+\begin{note}
+Historically, drivers supplied an uninitialized buffer in the
+first buffer.
+\end{note}
+
\subsubsection{Memory Statistics Tags}\label{sec:Device Types / Memory Balloon Device / Device Operation / Memory Statistics Tags}
\begin{description}
@@ -4593,6 +5078,16 @@ RESET.}.
Device operation consists of operating request queues, the control
queue and the event queue.
+\paragraph{Legacy Interface: Device Operation}\label{sec:Device
+Types / SCSI Host Device / Device Operation / Legacy
+Interface: Device Operation}
+When using the legacy interface, the driver SHOULD ignore the \field{len} value in used ring entries.
+\begin{note}
+Historically, devices put the total descriptor length,
+or the total length of device-writable buffers there,
+even when only part of the buffers were actually written.
+\end{note}
+
\subsubsection{Device Operation: Request Queues}\label{sec:Device Types / SCSI Host Device / Device Operation / Device Operation: Request Queues}
The driver queues requests to an arbitrary request queue, and
@@ -4793,7 +5288,7 @@ Requests have the following format:
\begin{lstlisting}
struct virtio_scsi_ctrl {
le32 type;
-$\DIFdeltextcstwo{{\textbackslash}ldots}\DIFaddtextcstwo{\ldots}$
+$\ldots$
u8 response;
};
diff --git a/diffpreamble.tex b/diffpreamble.tex
index 8162004..9b5b7e5 100644
--- a/diffpreamble.tex
+++ b/diffpreamble.tex
@@ -27,6 +27,13 @@
\providecommand{\DIFaddendFL}{}
\providecommand{\DIFdelbeginFL}{}
\providecommand{\DIFdelendFL}{}
+\renewenvironment{DIFnomarkup}
+{
+\begingroup\color{black}\hypersetup{linkcolor=blue,urlcolor=blue}
+}
+{
+\endgroup
+}
%DIF END FLOATSAFE PREAMBLE
%% diffpreable.tex end
diff --git a/fixupdiff.pl b/fixupdiff.pl
new file mode 100644
index 0000000..e557e2b
--- /dev/null
+++ b/fixupdiff.pl
@@ -0,0 +1,77 @@
+my $bufferdiff="";
+my $diff="";
+my $buffer="";
+while (<>) {
+ my $line = $_;
+ if (m/%DIFDELCMD\s+<\s+\\begin{lstlisting}/) {
+ $lstlisting=1;
+ $line =~s/%DIFDELCMD\s+</{\\lstset{escapechar=\\\$} /;
+ }
+ if ($lstlisting) {
+ $line =~ s/%DIFDELCMD\s+< //;
+ if (not $line =~ m/\\(?:begin|end){lstlisting}/) {
+ $line =~ s/([#&{} ])/\\$1/g;
+ $line =~ s/(.*)/\$\\DIFdel{$1}\$/;
+ }
+ #print "%FIXED BY RULE 1\n";
+ }
+ #In section headings, replace begin/end with begin/endFL,
+ #but be careful in case some tag spills over to the next
+ #line
+ if (m/\\(section|subsection|subsubsection|paragraph)/ and m/DIF/) {
+ my @list = split(/(\\DIF(?:add|del)(?:begin|end)(?:FL)?)/, $line, -1);
+ #if there's only one tag, don't touch it:
+ #matching one is on the other line
+ if ($#list >= 5) {
+ #if first tag is end, don't touch it - matching
+ #begin is on the previous line
+ if ($list[1] =~ m/begin$/) {
+ $list[1] .= "FL";
+ }
+ #if last tag is begin, don't touch it - matching
+ #end is on the next line
+ if ($list[$#list - 1] =~ m/end$/) {
+ $list[$#list - 1] .= "FL";
+ }
+ }
+ for (my $i = 3; $i <= $#list - 3; $i += 2) {
+ if (not $list[$i] =~ m/FL$/) {
+ $list[$i] .= "FL";
+ }
+ }
+ $line = join("", @list);
+ #print "%FIXED BY RULE 2\n";
+ }
+ #detect where we have DIFbegin/end cross
+ #enumerate/itemize environments and fix up
+ if (m/\\DIF(?:add|del)(?:begin|end)/) {
+ my @list = split(/(\\DIF(?:add|del)(?:begin|end)(?:FL)?)/, $line, -1);
+ $diff = $list[$#list - 1];
+ if ($diff =~ m/begin/) {
+ $diff =~ s/begin/end/;
+ } else {
+ $diff = "";
+ }
+ }
+ if ($diff ne "" and m/\\(?:begin|end){(?:enumerate|itemize)}$/ and not m/\\DIF/) {
+ $buffer = $line;
+ $bufferdiff = $diff;
+ $line = "";
+ #print "%BUFFERED BY RULE 3: $bufferdiff\n";
+ }
+ if ($buffer ne "" and $line ne "") {
+ if (m/^(\\DIF(?:add|del)end(?:FL)?)/ and $bufferdiff ne $1) {
+ $line =~ s/^(\\DIF(?:add|del)end(?:FL)?)//;
+ $buffer =~ s/(\\(?:begin|end){(?:enumerate|itemize)})$/$bufferdiff$1/;
+ #print "%FIXED BY RULE 3: $bufferdiff\n";
+ }
+ print $buffer;
+ $buffer = "";
+ $bufferdiff = "";
+ }
+ print $line;
+ if (m/%DIFDELCMD\s+<\s+\\end{lstlisting}/) {
+ print "}\n";
+ $lstlisting=0;
+ }
+}
diff --git a/makediff.sh b/makediff.sh
index 16eba2d..7d64c93 100755
--- a/makediff.sh
+++ b/makediff.sh
@@ -9,31 +9,21 @@ export DATESTR=${DATESTR:-`cat REVISION-DATE`}
MAIN=$1
PATH=.:${PATH}
cur="$PWD"
-oldrev=`git rev-list -1 origin/tags/v1.0-cs01`
+oldrev=`git rev-list -1 origin/tags/v1.0-cs02`
newrev=`git rev-list -1 HEAD`
rm -fr old new
git clone $PWD old
cd "${cur}/old"
git checkout $oldrev
-##suppress diff of title
-#git cherry-pick 0adee486ab987c3e98c5f31b51cc963d8bb6fff4
-##suppress diff of changelog
-#git cherry-pick a41f3813a748e7d279cb6eb82f3c0afde4a3243a
-#git cherry-pick fbfb402e69cdd9279c44b7684612e6f81df99e6d
-#git cherry-pick 9f240fe0e718bf9b1e502e02916db9d8fede304b
-#git cherry-pick a02605f9945f450ecaadf86736741de2e2c2e788
-#git cherry-pick 175e797beede8aea840102bee9b70bb08190153d
while read -r rev; do
echo "Applying $rev"
git cherry-pick `git rev-list -1 -F --grep "$rev" $newrev` || exit 1
done << 'EOF'
-formatting: escape \ldots in lstlisting
-formatting: mark change manually as changed in cs02
-cl: remove changelog for cs01
-cl-os: prepare changelog for v1.0 cs02
-title: update previous version to cs01
-changelog: list acknowledgement change
-changelog: typo fixup: formatting: formatting
+Revert: formatting: mark change manually as changed in cs02
+cl: move out cs02 changelog
+cl: drop contents temporarily
+changelog: comment out header
+changelog: disable markup
EOF
#mv specvars.tex specvars-orig.tex
@@ -64,6 +54,8 @@ sed 's/\\footnote{/\\footnote {/' new/flat.tex > new/flat-fixed.tex
#wget http://mirror.math.ku.edu/tex-archive/support/latexdiff/latexdiff-fast
#chmod +x latexdiff-fast
latexdiff-fast --config \
-"FLOATENV=(?:figure|longtable|table|tabular|plate)[\w\d*@]*" \
- --append-safecmd=field --append-textcmd=mmioreg --ignore-warnings -p diffpreamble.tex old/flat-fixed.tex new/flat-fixed.tex > virtio-diff.tex
-#perl -pi fixupdiff.pl virtio-diff.tex
+"FLOATENV=(?:figure|longtable|table|tabular|plate|lstlisting|note|enumerate|itemize)[\w\d*@]*,PICTUREENV=(?:picture|DIFdeltextcstwo|DIFnomarkup|lstlisting)[\w\d*@]*" \
+ --append-safecmd=field --append-textcmd=mmioreg \
+--ignore-warnings -p diffpreamble.tex old/flat-fixed.tex \
+new/flat-fixed.tex > virtio-diff-tofix.tex
+perl fixupdiff.pl virtio-diff-tofix.tex > virtio-diff.tex
diff --git a/newdevice.tex b/newdevice.tex
index c7e6221..28e1c7b 100644
--- a/newdevice.tex
+++ b/newdevice.tex
@@ -22,8 +22,9 @@ configuration information (the network device does this for filtering,
otherwise the table in the config space could potentially be very
large).
-Remember that configuration fields over 32 bits wide might not be
-atomically writable by the driver.
+Remember that configuration fields over 32 bits wide might not be atomically
+writable by the driver. Therefore, no writeable field which triggers an
+action ought to be wider than 32 bits.
\section{What Device Number?}\label{sec:Creating New Device Types / What Device Number?}
diff --git a/title.tex b/title.tex
index 89f94b3..00eea75 100644
--- a/title.tex
+++ b/title.tex
@@ -20,10 +20,10 @@
\end{oasistitlesection}
\begin{oasistitlesection}{Previous version}
-\url{http://docs.oasis-open.org/virtio/virtio/v1.0/cs01/tex/}
+\url{http://docs.oasis-open.org/virtio/virtio/v1.0/cs02/tex/}
{}(Authoritative)\newline
-\url{http://docs.oasis-open.org/virtio/virtio/v1.0/cs01/virtio-v1.0-cs01.pdf}\newline
-\url{http://docs.oasis-open.org/virtio/virtio/v1.0/cs01/virtio-v1.0-cs01.html}
+\url{http://docs.oasis-open.org/virtio/virtio/v1.0/cs02/virtio-v1.0-cs02.pdf}\newline
+\url{http://docs.oasis-open.org/virtio/virtio/v1.0/cs02/virtio-v1.0-cs02.html}
\end{oasistitlesection}
\begin{oasistitlesection}{Latest version}
diff --git a/virtio-ring.h b/virtio-ring.h
index aa01d92..5a1e87d 100644
--- a/virtio-ring.h
+++ b/virtio-ring.h
@@ -53,7 +53,7 @@
/* Support for indirect descriptors */
#define VIRTIO_F_INDIRECT_DESC 28
-/* Support for avail_idx and used_idx fields */
+/* Support for avail_event and used_event fields */
#define VIRTIO_F_EVENT_IDX 29
/* Arbitrary descriptor layouts. */