diff options
51 files changed, 10640 insertions, 0 deletions
diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..947f120 --- /dev/null +++ b/.gitattributes @@ -0,0 +1 @@ +*.png binary diff --git a/README.txt b/README.txt new file mode 100644 index 0000000..67381d0 --- /dev/null +++ b/README.txt @@ -0,0 +1,8 @@ +This is one component of a Work Product that also includes:
+
+ Abstract: abstract.tex
+ Introduction: introduction.tex
+ Main specification content: content.tex
+ Conformance targets and clauses: conformance.tex
+ Acknowledgements: acknowledgements.tex
+ TeX source files for generating output in pdf and html format.
diff --git a/REVISION b/REVISION new file mode 100644 index 0000000..6a9f46a --- /dev/null +++ b/REVISION @@ -0,0 +1 @@ +virtio-v1.0-cs04 diff --git a/REVISION-DATE b/REVISION-DATE new file mode 100644 index 0000000..d14a9a6 --- /dev/null +++ b/REVISION-DATE @@ -0,0 +1 @@ +03 March 2016 @@ -0,0 +1,5 @@ +"VIM settings to for correct formatting. They are activated by adding the +"following settings (without the " symbol) as last two lines in $HOME/.vimrc: +"set secure +"set exrc +set textwidth=65 diff --git a/abstract.tex b/abstract.tex new file mode 100644 index 0000000..3e87d2f --- /dev/null +++ b/abstract.tex @@ -0,0 +1,11 @@ +This document describes the specifications of the ``virtio'' family of +devices. These devices are found in virtual environments, yet by +design they look like physical devices to the guest +within the virtual machine - and this +document treats them as such. This similarity allows the guest to use standard +drivers and discovery mechanisms. + +The purpose of virtio and this specification is that virtual +environments and guests should have a straightforward, efficient, +standard and extensible mechanism for virtual devices, rather +than boutique per-environment or per-OS mechanisms. diff --git a/acknowledgements.tex b/acknowledgements.tex new file mode 100644 index 0000000..53942b0 --- /dev/null +++ b/acknowledgements.tex @@ -0,0 +1,47 @@ +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Acknowledgements +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\chapter{Acknowledgements}\label{chap:Acknowledgements} + +The following individuals have participated in the creation of this specification and are gratefully acknowledged: + +\begin{oasistitlesection}{Participants} +Amit Shah, Red Hat \newline +Amos Kong, Red Hat \newline +Anthony Liguori, IBM \newline +Bruce Rogers, Novell \newline +Bryan Venteicher, NetApp \newline +Cornelia Huck, IBM \newline +Daniel Kiper, Oracle \newline +Geoff Brown, Machine-to-Machine Intelligence (M2MI) Corporation \newline +Gershon Janssen, Individual Member \newline +James Bottomley, Parallels IP Holdings GmbH \newline +Luiz Capitulino, Red Hat \newline +Michael S. Tsirkin, Red Hat \newline +Paolo Bonzini, Red Hat \newline +Pawel Moll, ARM \newline +Richard Sohn, Alcatel-Lucent \newline +Rusty Russell, IBM \newline +Sasha Levin, Oracle \newline +Sergey Tverdyshev, Thales e-Security \newline +Stefan Hajnoczi, Red Hat \newline +Tom Lyon, Samya Systems, Inc. \newline +\end{oasistitlesection} + +The following non-members have provided valuable feedback on this +specification and are gratefully acknowledged: + +\begin{oasistitlesection}{Reviewers} +Andrew Thornton, Google \newline +Arun Subbarao, LynuxWorks \newline +Brian Foley, ARM \newline +David Alan Gilbert, Red Hat \newline +Fam Zheng, Red Hat \newline +Gerd Hoffmann, Red Hat \newline +Jason Wang, Red Hat \newline +Laura Novich, Red Hat \newline +Patrick Durusau, Technical Advisory Board, OASIS \newline +Thomas Huth, Red Hat \newline +Yan Vugenfirer, Red Hat / Daynix \newline +Kevin Lo, MSI \newline +\end{oasistitlesection} diff --git a/changelog.tex b/changelog.tex new file mode 100644 index 0000000..4b89e97 --- /dev/null +++ b/changelog.tex @@ -0,0 +1,15 @@ +\chapter{Revision History} +The following changes have been made since the previous version +of this specification: + +\begin{DIFnomarkup} +\begin{longtable}{ | c | c | c | p{0.4\textwidth} | } +\hline +\textbf{Revision} & \textbf{Date} & \textbf{Editor} & \textbf{Changes Made} \\ +\hline +\endhead +%\lbrack Rev number \rbrack & \lbrack Rev Date \rbrack & \lbrack Modified By \rbrack & \lbrack Summary of Changes \rbrack \\ +%\hline +\input{cl-os.tex} +\end{longtable} +\end{DIFnomarkup} diff --git a/cl-cs01.tex b/cl-cs01.tex new file mode 100644 index 0000000..234e420 --- /dev/null +++ b/cl-cs01.tex @@ -0,0 +1,86 @@ +418 & 11 Aug 2014 & Michael S. Tsirkin & { Acknowledge input from +Brian Foley + +See \ref{chap:Acknowledgements} +} + \\ +417 & 11 Aug 2014 & Pawel Moll & { VIRTIO-110: ARM's feedback for MMIO chapter, clarifications + +\begin{itemize} + \item Extra clarifications for QueueReady and ConfigGeneration + \item Added alignment requirement section, to formalise + hidden assumptions about register accesses +\end{itemize} + +See \ref{sec:Virtio Transport Options / Virtio Over MMIO / MMIO +Device Register Layout}, \ref{devicenormative:Virtio Transport +Options / Virtio Over MMIO / MMIO Device Register Layout} and +\ref{drivernormative:Virtio Transport +Options / Virtio Over MMIO / MMIO Device Register Layout} +} + \\ +\hline +\hline +416 & 05 Aug 2014 & Pawel Moll & { VIRTIO-110: ARM's feedback for MMIO chapter, legacy section + +Make it clear that the legacy section is non-normative, +removing all MUSTs. + +See \ref{sec:Virtio Transport Options / Virtio Over MMIO / Legacy +interface}. + } \\ +\hline +415 & 05 Aug 2014 & Pawel Moll & { VIRTIO-110: ARM's feedback for MMIO chapter, trivial changes +\begin{itemize} +\item Typos and language mistakes in 4.2, 4.2.1, 4.2.2 and 4.2.2.2. +\item Extra clarifications for InterruptACK. +\end{itemize} + } \\ +\hline +414 & 04 Aug 2014 & Michael S. Tsirkin & { legacy: grammar fixup + +Legacy devices are "they" not "it". + +See \ref{sec:General Initialization And Device Operation / Device +Initialization / Legacy Interface: Device Initialization} + +Resolves VIRTIO-113 + } \\ +\hline +413 & 04 Aug 2014 & Michael S. Tsirkin & { legacy: consistently use past tense + +Paragraph with general description of feature negotiation +for legacy devices mixed present and past tense. +As rest of legacy sections all use past tense, +fix the only instance of the present tense: +s/do/did/ for consistency. + +It might be argued that legacy devices still have these +properties so present tense is more appropriate, on the +other hand, using the past tense helps stress the fact +that current spec does not attempt to fully describe the legacy +device/driver behaviour: this text is only here to serve as +motivation for the transitional device/driver requirements. + +See \ref{sec:General Initialization And Device Operation / Device +Initialization / Legacy Interface: Device Initialization} + +Resolves VIRTIO-112 + } \\ +\hline +412 & 30 Jul 2014 & Michael S. Tsirkin & { VIRTIO-111: Fix minor typos + +Fix minor typos as reported in ARM's feedback. + +See \ref{drivernormative:Basic Facilities of a Virtio Device / +Device Configuration Space}, \ref{sec:Basic Facilities of a +Virtio Device / Device Configuration Space / Legacy Interface: +Device Configuration Space}, \ref{sec:General Initialization And +Device Operation / Device Operation / Supplying Buffers to The +Device} and +\ref{drivernormative:Device Types / Network Device / Device +Operation / Control Virtqueue / Offloads State Configuration / +Setting Offloads State} + } + \\ +\hline diff --git a/cl-cs02.tex b/cl-cs02.tex new file mode 100644 index 0000000..61aa6fd --- /dev/null +++ b/cl-cs02.tex @@ -0,0 +1,52 @@ +448 & 22 Dec 2014 & Michael S. Tsirkin & {VIRTIO-120: virtio: +fix used element size + +General ring description lists size for +used ring elements as 4, it must be 8. + +See \ref{sec:Basic Facilities of a Virtio Device / Virtqueues}. + } \\ +\hline +449 & 22 Doc 2014 & Cornelia Huck & {VIRTIO-125: block: fixup section levels + The specification for the configuration layout for block devices + should be its own subsection as for all other devices and not be + hidden beneath "Feature bits". + + The normative sections for device operation should appear under + the device operation section. +See \ref{sec:Device Types / Block Device / Device configuration +layout}. + } \\ +\hline +450 & 22 Dec 2014 & Cornelia Huck & {VIRTIO-127: ccw: two-stage +indicators for legacy devices + + Some legacy devices will support two-stage queue indicators +and therefore + won't reject CCW_CMD_SET_IND_ADAPTER. Note this. + +See \ref{sec:Virtio Transport Options / Virtio over channel I/O / +Device Initialization / Setting Up Indicators / Legacy +Interfaces: A Note on Setting Up Indicators}. + } \\ +\hline +452 & 22 Dec 2014 & Michael S. Tsirkin & {VIRTIO-115: +formatting: escape {\textbackslash}ldots in lstlisting + + {\textbackslash}ldots does not work within lstlisting, the result is + {\textbackslash}ldots verbatim in the PDF output. + + To fix, make \$ an escape character, and escape the sequence: + \${\textbackslash}ldots\$ + +See \ref{sec:Device Types / SCSI Host Device / Device Operation / +Device Operation: controlq}. +} \\ +\hline +455,457 & 23 Dec 2014 & Michael S. Tsirkin & {acknowledgements: acknowledge dgilbert + + Acknowledge David Alan Gilbert for reporting VIRTIO-120. + +See \ref{chap:Acknowledgements}. +} \\ +\hline diff --git a/cl-cs03.tex b/cl-cs03.tex new file mode 100644 index 0000000..72925ca --- /dev/null +++ b/cl-cs03.tex @@ -0,0 +1,328 @@ +478 & 15 Mar 2015 & Cornelia Huck & {VIRTIO-129: legacy: +clean up virtqueue layout definitions + +Generalize "Legacy Interfaces: A Note on Virtqueue Layout" to allow +for different alignment requirements. Have pci and ccw refer to that +section for legacy devices. Remove the double definition of virtqueue +alignment (which referred to legacy, but was not tagged as such) from +the ccw section. +See \ref{sec:Basic Facilities of a Virtio Device / Virtqueues / +Legacy Interfaces: A Note on Virtqueue Layout}, \ref{sec:Virtio +Transport Options / Virtio Over PCI Bus / PCI-specific +Initialization And Device Operation / Device Initialization / +Virtqueue Configuration / Legacy Interface: A Note on Virtqueue +Configuration} and \ref{sec:Virtio Transport Options / Virtio +over channel I/O / Device Initialization / Configuring a +Virtqueue / Legacy Interface: A Note on Configuring a Virtqueue}. + } \\ +\hline +479 & 15 Mar 2015 & Cornelia Huck & {VIRTIO-118: +ccw: clarify basic channel commands + +"Basic channel commands" seems to be not as clear as it +could, so let's spell out which channel commands we refer to. +See \ref{sec:Virtio Transport Options / Virtio over channel I/O / +Basic Concepts}. +} \\ +\hline +479 & 15 Mar 2015 & Cornelia Huck & {VIRTIO-116: +ccw: allow WRITE_STATUS to fail + +We want to be able to fail setting a status on the device +(e.g. FEATURES_OK if the device can't work with the features +negotiated). +The easiest way to do that is to allow the device to fail the +WRITE_STATUS command by posting a command reject. +See \ref{sec:Virtio Transport Options / Virtio over channel I/O / +Device Initialization / Communicating Status Information}. + } \\ +\hline +485 & 15 Mar 2015 & Jason Wang & {VIRTIO-135: +virtio-ring: comment fixup + +virtio_ring.h included with spec has this text: +/* Support for avail_idx and used_idx fields */ +it should really refer to avail_event and used_event. +See Appendix \ref{sec:virtio-queue.h}. + } \\ +\hline +486 & 15 Mar 2015 & Jason Wang & {VIRTIO-136: +document idx field in virtqueue used ring + +Section \ref{sec:Basic Facilities of a Virtio Device / Virtqueues +/ The Virtqueue Used Ring} The Virtqueue Used Ring +listed the idx field, but never documented it. +See \ref{sec:Basic Facilities of a Virtio Device / Virtqueues / +The Virtqueue Used Ring}. + } \\ +\hline +487 & 15 Mar 2015 & Rusty Russell & {VIRTIO-130: +ISR status: Fix incorrect diagram + +ISR status capability diagram has the "Device Configuration +Interrupt " as bit 0, and the "Queue Interrupt" as bit 1. This is +the wrong way around: it disagrees with the legacy +implementations, as well as the spec elsewhere. + +All current guests correctly follow the text, fix +up the diagram to match. +See \ref{sec:Virtio Transport Options / Virtio Over PCI Bus / PCI +Device Layout / ISR status capability}. + } \\ +\hline +488 & 15 Mar 2015 & Rusty Russell & {VIRTIO-133: +Change 4.1.5.1.2.1 to device requirement + +4.1.5.1.2.1 is incorrectly labelled as a driver requirement; it's +self-evidently referring to the device. +See \ref{sec:Conformance / Driver Conformance / PCI Driver +Conformance}, \ref{sec:Conformance / Device Conformance / PCI +Device Conformance} and \ref{devicenormative:Virtio +Transport Options / Virtio Over PCI Bus / PCI-specific +Initialization And Device Operation / Device Initialization / +Non-transitional Device With Legacy Driver}. + } \\ +\hline +504 & 22 Apr 2015 & Rusty Russell & {VIRTIO-137: +define the meaning and requirements of the len field. + +We said what it was for, and noted why. We didn't place any +requirements on it, nor clearly spell out the implications of its use. + +This clarification comes particularly from noticing that QEMU +didn't set len correctly, and philosophising over the correct value +when an error has occurred. +See \ref{sec:Basic Facilities of a Virtio Device / Virtqueues / +The Virtqueue Used Ring}, \ref{devicenormative:Basic Facilities +of a Virtio Device / Virtqueues / The Virtqueue Used Ring} and +\ref{sec:Basic Facilities of a Virtio Device / Virtqueues / The +Virtqueue Used Ring}. + } \\ +\hline +506 & 22 Apr 2015 & Michael S. Tsirkin & {VIRTIO-138: +multiple errors: Non-transitional With Legacy + +virtio 1.0 has two sections titled "Non-transitional Device With +Legacy Driver" the first says devices SHOULD fail, the second +says devices MUST fail. Clearly a mistake. + +Other issues: devices don't really fail - they cause drivers to +fail. second section seems to be in the wrong place, and also +have a section followed by subsection with no explanatory text in +between, which is ugly. +Finally, this text was originally ritten to handle buggy windows +drivers gracefully, but later we changed device IDs so it's not +really required there. Might be handy for some other buggy legacy +drivers, though no such drivers are known. + +To fix, drop the duplicate section variant, add some explanatory +text, clarify what does "same ID" mean here, and clarify +that the work-around is only needed if a buggy driver +is known to bind to a transitional device. + +See \ref{sec:Virtio Transport Options / Virtio +Over PCI Bus / PCI Device Layout / Non-transitional Device With +Legacy Driver: A Note on PCI Device Layout}, +\ref{devicenormative:Virtio Transport Options / Virtio Over PCI +Bus / PCI-specific Initialization And Device Operation / Device +Initialization / Non-transitional Device With Legacy Driver} and +\ref{sec:Virtio Transport Options / Virtio Over PCI Bus / +PCI-specific Initialization And Device Operation / Device +Initialization}. +} \\ +\hline +508 & 22 Apr 2015 & Michael S. Tsirkin & {VIRTIO-139: +pci: missing documentation for dealing with 64 bit config fields + +pci spec says what width access to use for 32, 16 and 8 +bit fields, but does not explicitly say what to do for +32 bit fields. As we have text that says driver must +treat 64 bit accesses as non-atomic, this seems +to imply driver should always do two 32 bit wide accesses. + +Let's make this an explicit requirement, and require +devices to support this. + +See \ref{sec:Virtio Transport Options / Virtio Over PCI Bus / PCI +Device Layout}, \ref{drivernormative:Virtio Transport Options / +Virtio Over PCI Bus / PCI Device Layout}, +\ref{devicenormative:Virtio Transport Options / Virtio Over PCI +Bus / PCI Device Layout} and \ref{sec:Conformance / Driver +Conformance / PCI Driver Conformance}. + } \\ +\hline +509 & 22 Apr 2015 & Michael S. Tsirkin & {balloon: +MUST -> has to + +MUST shouldn't be used outside normative statements, +that's confusing. Replace with "has to". + +See \ref{sec:Device Types / Memory Balloon Device / Feature +bits}. + } \\ +\hline +510 & 22 Apr 2015 & Michael S. Tsirkin & {conformance: +add VIRTIO-137 statement links + +Add links to new conformance statements added to +resolve VIRTIO-137 (describing used ring entry len usage). + +See \ref{sec:Conformance / Device Conformance} +and \ref{sec:Conformance / Driver Conformance}. + } \\ +\hline +517 & 22 Apr 2015 & Michael S. Tsirkin & {acknowledgements: +contributors+minor fixup + +acknowledge feedback by Jason Wang, add Richard Sohn who +joined the TC, sort acknowledged reviewers alphabetically. + +See \ref{chap:Acknowledgements}. +} \\ +\hline +520 & 30 Apr 2015 & James Bottomley & {VIRTIO-140: +give explicit guidance on the use of 64 bit fields + +Just saying 64 bit fields may not be atomic is true, but less +helpful than it might be. Add explicit guidance about what the +consequences of non-atomicity are. + +See \ref{sec:Creating New Device Types / What Device +Configuration Space Layout?} +} \\ +\hline +521 & 30 Apr 2015 & Rusty Russell & {VIRTIO-134: +Spell out details of indirect elements in chains + +1) It's implied that a chain terminates with an indirect descriptor (since +VIRTIO-15) but we didn't spell out that a device MUST NOT +continue it. + +2) We allow [direct]->[direct]->[indirect], and qemu and +bhyve both accept it. Make it clear that this is valid, thus devices MUST +handle it. + +See \ref{drivernormative:Basic Facilities of a Virtio Device / +Virtqueues / The Virtqueue Descriptor Table / Indirect +Descriptors} and \ref{devicenormative:Basic Facilities of a +Virtio Device / Virtqueues / The Virtqueue Descriptor Table / +Indirect Descriptors} +} \\ +\hline +522 & 30 Apr 2015 & Michael S. Tsirkin & {VIRTIO-141: +used ring: specify legacy behaviour for len field + +many hypervisors implemented len field incorrectly. +Document existing bugs in the legacy sections. + +See \ref{sec:Basic Facilities of a Virtio Device / Virtqueues +/ The Virtqueue Used Ring/ Legacy Interface: The Virtqueue Used +Ring}, \ref{sec:Device Types / Network Device / Device Operation +/ Legacy Interface: Device Operation}, \ref{sec:Device Types / +Block Device / Device Operation / Legacy Interface: Device +Operation}, \ref{sec:Device Types / Console Device / Device +Operation / Legacy Interface: Device Operation}, \ref{sec:Device +Types / Memory Balloon Device / Device Operation / Legacy +Interface: Device Operation}, \ref{sec:Device +Types / SCSI Host Device / Device Operation / Legacy +Interface: Device Operation} and \ref{sec:Conformance / Legacy +Interface: Transitional Device and Transitional Driver +Conformance}. +} \\ +\hline +523 & 30 Apr 2015 & Michael S. Tsirkin & {VIRTIO-142: +entropy device: typo fix + +Current text: "The driver MUST examine the length written by the +driver" makes no sense. length is written by the device. + +See \ref{drivernormative:Device Types / Entropy Device / Device +Operation}. +} \\ +\hline +526 & 18 May 2015 & Michael S. Tsirkin & {VIRTIO-143: +balloon: transitional device support + +Support a transitional balloon device: this has the advantage of supporting +existing drivers, transparently, as well as transports that don't allow mixing +virtio 0 and virtio 1 devices. And balloon is an easy device to test, so it's +also useful for people to test virtio core handling of transitional devices. + +Three issues with legacy hypervisors have been identified: +\begin{enumerate} +\item +Actual value is actually used, and is necessary for management +to work. Luckily 4 byte config space writes are now atomic. +When using old guests, hypervisors can detect access to the last byte. +When using old hypervisors, drivers can use atomic 4-byte accesses. +\item Hypervisors actually didn't ignore the stats from the first +buffer supplied. This means the values there would be +incorrect until hypervisor resends the request. +Add a note suggesting hypervisors ignore the 1st buffer. +\item QEMU simply over-writes stats from each buffer it gets. +Thus if driver supplies a different subset of stats +on each request, stale values will be there. +Require drivers to supply the same subset on each +request. This also gives us a simple way to figure out +which stats are supported. +\end{enumerate} + +See +\ref{sec:Device Types / Memory Balloon Device}, +\ref{devicenormative:Virtio Transport Options / Virtio Over PCI Bus / PCI Device Discovery}, +\ref{sec:Conformance / Driver Conformance / Traditional Memory Balloon Driver Conformance}, +\ref{sec:Conformance / Device Conformance / Traditional Memory Balloon Device Conformance}, +\ref{sec:Conformance / Legacy Interface: Transitional Device and Transitional Driver Conformance}, +\ref{sec:Conformance / Device Conformance} and \ref{sec:Conformance / Driver Conformance}. +} \\ +\hline +527 & 18 May 2015 & Michael S. Tsirkin & {VIRTIO-126: +document deflate on oom + +Document the new option, and also clarify behaviour +without it. + +In particular, actual field is not the +actual number of pages in the balloon as +driver might do inflate followed by deflate. + +Also, device isn't always driven by interrupts, +driver can inflate/deflate in response to e.g. +memory compaction. + +See \ref{sec:Device Types / Memory Balloon Device / Feature bits}, +\ref{sec:Device Types / Memory Balloon Device / Device Operation} and +\ref{drivernormative:Device Types / Memory Balloon Device / Device Operation}. +} \\ +\hline +528 & 18 May 2015 & Michael S. Tsirkin & {VIRTIO-123: +network device: xmit/receive cleanup + +Fix up multiple issues in xmit/receive sections: +\begin{itemize} + \item drop MAY/MUST/SHOULD outside normative statements + \item spell out conformance requirements for both drivers and + devices, for xmit and receive paths + \item document the missing VIRTIO_NET_HDR_F_DATA_VALID + \item document handling of unrecognized flag bits so we can extend + flags in the future, similar to VIRTIO_NET_HDR_F_DATA_VALID +\end{itemize} + +\ref{sec:Device Types / Network Device / Device Initialization}, +\ref{drivernormative:Device Types / Network Device / Device Operation / Packet Transmission}, +\ref{devicenormative:Device Types / Network Device / Device Operation / Packet Transmission}, +\ref{sec:Device Types / Network Device / Device Operation / Processing of Incoming Packets}, +\ref{sec:Conformance / Driver Conformance / Network Driver Conformance} and +\ref{sec:Conformance / Device Conformance / Network Device Conformance}. +} \\ +\hline +529 & 18 May 2015 & Michael S. Tsirkin & {VIRTIO-124: +network device: document VIRTIO_NET_F_CTRL_RX_EXTRA + +See +\ref{sec:Device Types / Network Device / Device Operation / Control Virtqueue / Packet Receive Filtering}, +\ref{sec:Device Types / Network Device / Device Operation / Control Virtqueue / Setting MAC Address Filtering}, +\ref{sec:Conformance / Driver Conformance / Network Driver Conformance} and +\ref{sec:Conformance / Device Conformance / Network Device Conformance}. +} \\ +\hline diff --git a/cl-csprd02.tex b/cl-csprd02.tex new file mode 100644 index 0000000..1e0f53d --- /dev/null +++ b/cl-csprd02.tex @@ -0,0 +1,1043 @@ +316 & 05 Mar 2014 & Michael S. Tsirkin & { legacy framing: scsi host + } \\ +\hline +315 & 05 Mar 2014 & Michael S. Tsirkin & { legacy message framing: console device + } \\ +\hline +314 & 05 Mar 2014 & Michael S. Tsirkin & { block: legacy message framing + } \\ +\hline +313 & 05 Mar 2014 & Michael S. Tsirkin & { message framing: rusty's comments + +generic note on message framing + +specific requirements listed for net device only + } \\ +\hline +312 & 05 Mar 2014 & Michael S. Tsirkin & { legacy devices: get rid of MUST assume + +as Rusty points out MUST assume is not very good requirement. + +clarify it. + } \\ +\hline +311 & 05 Mar 2014 & Michael S. Tsirkin & { transitional driver features: fix typos noted by Rusty + } \\ +\hline +310 & 03 Mar 2014 & Rusty Russell & { Formatting: use latex-style quoting everywhere. + +Doesn't look any different, but consistent. + } \\ +\hline +309 & 03 Mar 2014 & Rusty Russell & { Use ellipsis (aka \textbackslash ldots) everywhere. + +And use the ellipsis package, which makes it symmetrical. + } \\ +\hline +308 & 03 Mar 2014 & Rusty Russell & { PCI: Tighten requirements. + +1) make it clear that queue_enable is 0 on reset. + +2) device MUST present a VIRTIO_PCI_CAP_DEVICE_CFG if needed for type. + } \\ +\hline +307 & 02 Mar 2014 & Michael S. Tsirkin & { initialization: minor clarification + +Change accepted on Virtio TC Meeting Minutes: Feb 25, 2014 + +"it" could refer to failed bit or the driver. + +clarify. + } \\ +\hline +306 & 02 Mar 2014 & Michael S. Tsirkin & { fix rfc2119 reference + +Change accepted on Virtio TC Meeting Minutes: Feb 25, 2014 + +VIRTIO-68 + +Cc: Patrick Durusau <patrick@durusau.net> + } \\ +\hline +305 & 02 Mar 2014 & Michael S. Tsirkin & { VIRTIO-67: fix html redirects + +Change accepted on Virtio TC Meeting Minutes: Feb 25, 2014 + +1. oasis switched to https + +2. Red Hat is www.redhat.com + +Cc: Patrick Durusau <patrick@durusau.net> + } \\ +\hline +304 & 02 Mar 2014 & Michael S. Tsirkin & { feedback: clarify device status bits + +Change accepted on Virtio TC Meeting Minutes: Feb 25, 2014 + +VIRTIO-70 + +Cc: Patrick Durusau <patrick@durusau.net> + } \\ +\hline +303 & 02 Mar 2014 & Michael S. Tsirkin & { legacy interface: move to terminology + +Change accepted on Virtio TC Meeting Minutes: Feb 25, 2014 + +VIRTIO-64 + +Cc: Patrick Durusau <patrick@durusau.net + } \\ +\hline +302 & 02 Mar 2014 & Michael S. Tsirkin & { introduction: add link to 0.9.5 specification + +Change accepted on Virtio TC Meeting Minutes: Feb 25, 2014 + +this version replaces it, so it's a non normative reference. + +VIRTIO-69 + +note: the link is added here but isn't used yet: will be used + +when we cleanup terminology definitions, by + +addressing VIRTIO-64 + +Cc: Patrick Durusau <patrick@durusau.net + } \\ +\hline +301 & 02 Mar 2014 & Michael S. Tsirkin & { non-transitional devices with legacy drivers + +Change accepted on Virtio TC Meeting Minutes: Feb 25, 2014 + +weaken hacky requirements helpful for graceful failure + +for non transitional PCI devices from MUST to SHOULD. + +It's nice to have but it's not like it makes things work, and you + +can avoid trouble simply by using the most recent drivers. + +also move them out to a separate section + } \\ +\hline +300 & 02 Mar 2014 & Michael S. Tsirkin & { conformance: document two types of devices + +Change accepted on Virtio TC Meeting Minutes: Feb 25, 2014 + +document that there are two conformance levels + } \\ +\hline +299 & 02 Mar 2014 & Michael S. Tsirkin & { legacy device initialization: confirmance statements + +Change accepted on Virtio TC Meeting Minutes: Feb 25, 2014 + } \\ +\hline +298 & 02 Mar 2014 & Michael S. Tsirkin & { legacy virtqueue layout: confirmance + +Change accepted on Virtio TC Meeting Minutes: Feb 25, 2014 + } \\ +\hline +297 & 02 Mar 2014 & Michael S. Tsirkin & { legacy: make all notes on endian-ness confirmance clauses + +Change accepted on Virtio TC Meeting Minutes: Feb 25, 2014 + } \\ +\hline +296 & 02 Mar 2014 & Michael S. Tsirkin & { legacy feature bits: confirmance statements + +Change accepted on Virtio TC Meeting Minutes: Feb 25, 2014 + } \\ +\hline +295 & 02 Mar 2014 & Michael S. Tsirkin & { leacy: layout detection confirmance + +Change accepted on Virtio TC Meeting Minutes: Feb 25, 2014 + } \\ +\hline +294 & 02 Mar 2014 & Michael S. Tsirkin & { legacy pci layout: extra confirmance statement + +Change accepted on Virtio TC Meeting Minutes: Feb 25, 2014 + } \\ +\hline +293 & 02 Mar 2014 & Michael S. Tsirkin & { legacy pci layout: confirmance statements + +Change accepted on Virtio TC Meeting Minutes: Feb 25, 2014 + } \\ +\hline +292 & 02 Mar 2014 & Michael S. Tsirkin & { legacy: make message framing normative + +Change accepted on Virtio TC Meeting Minutes: Feb 25, 2014 + +TODO: we really should be more specific + } \\ +\hline +291 & 02 Mar 2014 & Michael S. Tsirkin & { legacy: make note on legacy VQ endian-ness normative + +Change accepted on Virtio TC Meeting Minutes: Feb 25, 2014 + } \\ +\hline +290 & 02 Mar 2014 & Michael S. Tsirkin & { Legacy Interface: Device Configuration Space + +Change accepted on Virtio TC Meeting Minutes: Feb 25, 2014 + +legacy has no generation field. + +add SHOULD statement to document multi-byte field + +access rules. + } \\ +\hline +289 & 02 Mar 2014 & Michael S. Tsirkin & { legacy: clarify general note on endian-ness + +Change accepted on Virtio TC Meeting Minutes: Feb 25, 2014 + +this is a non normative section. + +we merely mention that details are given + +for each device. + } \\ +\hline +288 & 02 Mar 2014 & Michael S. Tsirkin & { content: explain that legacy support is optional + +Change accepted on Virtio TC Meeting Minutes: Feb 25, 2014 + } \\ +\hline +287 & 02 Mar 2014 & Michael S. Tsirkin & { drop /* LEGACY version was not little endian */ + +Two issues with the comment: + + - it mixes legacy documentation in main part of the spec + + - it says what format *isn't* - instead of what it *is* + +Now that we have documented that LE can mean + +legacy endian, there's no need for the comment. + +Resolves issues: + + VIRTIO-58 + +Change accepted on Virtio TC Meeting Minutes: Feb 25, 2014 + } \\ +\hline +286 & 27 Feb 2014 & Rusty Russell & { Fixed path of linux version of virtio_ring.h + } \\ +\hline +285 & 26 Feb 2014 & Pawel Moll & { mmio: Clarify normative requirement on QueueNum + +Minor fix: add explicit reference to QueueNumMax in the + +normative paragraph describing QueueNum. + } \\ +\hline +284 & 26 Feb 2014 & Pawel Moll & { mmio: Fix double register macro +Minor fix: commit 238 wrapped register names in \textbackslash field\{\}s + +and modified one of the register table macros, but missed + +the other one. + } \\ +\hline +283 & 26 Feb 2014 & Pawel Moll & { mmio: Fix Device Tree example + +Minor fix: the size of 0x100 was obviously wrong, + +as it didn't allow for configuration space. + } \\ +\hline +282 & 26 Feb 2014 & Cornelia Huck & { introduction: typo in terminology section + +s/device/driver/ for the transitional driver description. + } \\ +\hline +281 & 26 Feb 2014 & Rusty Russell & { ccw: Fix requirements for processing adapter interrupts. + +We currently mandate that the driver clears the summary indicator + +before processing the queue indicator; this is bogus, as the requirement + +for interrupt avoidance is rather that the driver unsets the summary + +indicator before before it stops looking at the queue indicator. + +In fact, the best way to get a race-free implementation of the interrupt + +handler is to process the queue indicators twice; let's add a recommondation + +to do that. + } \\ +\hline +280 & 26 Feb 2014 & Rusty Russell & { VIRTIO-45: Add a reserved ID for Timer/Clock device + +Just add a reserved ID for Timer/Clock device. There is no work + +on it yet but it is nice to have the ID which could be used safely + +in preliminary implementations. + } \\ +\hline +279 & 26 Feb 2014 & Rusty Russell & { VIRTIO-28: Deprecate balloon device, add number for new one. + } \\ +\hline +278 & 26 Feb 2014 & Rusty Russell & { Feedback: VIRTIO-77 Conformance clause. + +Now we have grouped all the normative statements, the conformance + +clauses for drivers and devices can simply reference them. + } \\ +\hline +277 & 26 Feb 2014 & Rusty Russell & { Feedback: Separate normative requirements for Reserved Feature Bits. + } \\ +\hline +276 & 26 Feb 2014 & Rusty Russell & { Feedback: SCSI: Separate normative and descriptive texts. + +This could use some more rigour, I think: there are still many + +implied requirements which could be called out. + } \\ +\hline +275 & 26 Feb 2014 & Rusty Russell & { Feedback: console \& entropy: separate normative and descriptive texts. + } \\ +\hline +274 & 26 Feb 2014 & Rusty Russell & { Feedback: block: separate normative and descriptive text. + } \\ +\hline +273 & 26 Feb 2014 & Rusty Russell & { Feedback: net: separate normative and instructional text. + } \\ +\hline +272 & 26 Feb 2014 & Rusty Russell & { Feedback: CCW: Separate normative and descriptive sections. + } \\ +\hline +271 & 26 Feb 2014 & Rusty Russell & { Feedback: MMIO: Separate normative and descriptive text. + +The section on initialization is now non-normative. + } \\ +\hline +270 & 26 Feb 2014 & Rusty Russell & { Feedback: PCI: Separate explanatory and normative text. + +Rather than treat selectors 0 and 1 as special, the wording for features + +is made more general (though still the same effect). + +I split the interrupt handler into a separate subsection: it was + +misleading because it didn't handle configuration interrupts until + +the next section. It's also non-normative. + } \\ +\hline +269 & 26 Feb 2014 & Rusty Russell & { Feedback: Separate the rest of chapter 2 into normative vs explanatory. + +The big change here is in introducing new subsections for interrupt and notification + +suppression, and moving all requirements into them. + +The example processing loop is also moved into a note, to show clearly + +that it's not normative. + } \\ +\hline +268 & 26 Feb 2014 & Rusty Russell & { Feedback: Normative split for Basic Facilities of a Virtio Device / Virtqueues / Message Framing + } \\ +\hline +267 & 26 Feb 2014 & Rusty Russell & { Feedback: Normative split in Basic Facilities of a Virtio Device / Virtqueues + } \\ +\hline +266 & 26 Feb 2014 & Rusty Russell & { Feedback: split Basic Facilities feature bits and config space into normative. + +Split text into descriptive and normative. + } \\ +\hline +265 & 26 Feb 2014 & Rusty Russell & { Feedback: add normative marker. +From \url{http://docs.oasis-open.org/templates/TCHandbook/ConformanceGuidelines.html:} + + Normative statements MUST be referenceable so that a statement may be + + referenced from another part of a specification, but more importantly + + so they can be referenced from Conformance Clauses. + } \\ +\hline +264 & 26 Feb 2014 & Rusty Russell & { Feedback: 2.1 Device Status field: Separate description from normative. + +Start with explanation, progress to normative requirements. + } \\ +\hline +263 & 26 Feb 2014 & Rusty Russell & { Feedback: move legacy/transitional definitions into terminology. + } \\ +\hline +262 & 26 Feb 2014 & Rusty Russell & { Feedback: hoist the one legacy-related requirement out of legacy section. + +This requirement applies to any system which *did* have legacy drivers. + } \\ +\hline +261 & 26 Feb 2014 & Rusty Russell & { Feedback: add old draft to normative references (VIRTIO-77) + } \\ +\hline +260 & 26 Feb 2014 & Rusty Russell & { Feedback: use proper list in introduction (VIRTIO-82) + +Also avoid extra spacing before footnote markers. + } \\ +\hline +259 & 26 Feb 2014 & Rusty Russell & { Feedback: move new device design section to Appendix. + +It's non-normative. + } \\ +\hline +258 & 26 Feb 2014 & Rusty Russell & { Feedback: Bug TAB-553 (VIRTIO-76) + +Haven't marked them non-normative yet, but it makes sense to put the header + +in an appendix. + } \\ +\hline +257 & 26 Feb 2014 & Rusty Russell & { Feedback: TAB-555 Bad sub-sectioning (VIRTIO-80) + } \\ +\hline +256 & 26 Feb 2014 & Rusty Russell & { Feedback: TAB-557 Spelling errors, etc (VIRTIO-75) + } \\ +\hline +255 & 26 Feb 2014 & Rusty Russell & { PCI: better document driver and device requirements + +Feedback: + +10) 4.1.3.1.2 Queue Vector Configuration + +Some of the information from section 8.4 needs to be moved to + +here, for example that the device may have an MSI-X table size + +other than 2048. + +Otherwise, this reads as though the MSI-X table must always have + +2048 entries. + +11) Please explicitly describe the device behavior when writing + +a vector value beyond the MSI-X table size. + +Address these comments. + +Cc: Arun Subbarao <asubbarao@lnxw.com> + } \\ +\hline +254 & 26 Feb 2014 & Rusty Russell & { feedback: minor wording cleanups + +We already mention requirement for natural width + +accesses for non device specific configuration. + +Don't repeat this in legacy section. + +Further, mention virtio pci structure in + +preamble to help link sections together. + +Cc: Arun Subbarao <asubbarao@lnxw.com> + +Conflicts: + + content.tex + } \\ +\hline +253 & 26 Feb 2014 & Rusty Russell & { SCSI: fix up more fields. +Some missing \textbackslash field\{\} markings, and a few redundant "the XXX field". + } \\ +\hline +252 & 20 Feb 2014 & Rusty Russell & { SCSI: missing space. + } \\ +\hline +251 & 19 Feb 2014 & Rusty Russell & { Gratuitous Packet Sending: clarify wording. + +The device can ask, not the driver. + } \\ +\hline +250 & 19 Feb 2014 & Rusty Russell & { net: fix incorrect reference. + +It pointed into the block section for some reason. + } \\ +\hline +249 & 13 Feb 2014 & Cornelia Huck & { ccw: padding annotations + +Remove __packed__ annotation from all ccw structures that don't need it, + +and make the length requirements explicit for those that do. + +This is the part of the patch to resolve VIRTIO-56 that had been missed. + } \\ +\hline +248 & 12 Feb 2014 & Michael S. Tsirkin & { PCI: minor wording change + +Since access width requirement is a confirmance clause, make it + +explicit that it applies to 4,2 and 1 byte fields. + +Also explain what happens to fields of other widths (such as + +the 6 byte MAC). + } \\ +\hline +247 & 12 Feb 2014 & Michael S. Tsirkin & { content: more strict confirmance language + +Correct new language to explicitly use MAY/SHOULD/MUST + +in more places or simply drop the somewhat vague "can" where + +we are describing the only way to operate the device. + +Most of the changes are in the PCI section. + } \\ +\hline +246 & 12 Feb 2014 & Michael S. Tsirkin & { introduction: address lnovich comments + +generally list of buses is out of date, list all supported buses. + +Drop explicit mention of lguest since it's not part of the spec. + } \\ +\hline +245 & 12 Feb 2014 & Michael S. Tsirkin & { abstract: address lnovich comment + +lnovich@redhat.com suggested rewording abstract, + +making the following point: + +. from what point of view is virtio like a physical device? + + it's very different from host POV + +. "the guest" appears out of nowhere. It's the guest that runs + + in the vm of course. + +. "not all that different" means similar so there's not need to + + be verbose + +Address this comment + } \\ +\hline +244 & 12 Feb 2014 & Rusty Russell & { VIRTIO-55: Add a reserved ID for GPU devices + +As existing work on virtio-gpu is using device ID 16, reflect this in + +the spec. This closes out VIRTIO-55. + +As per minutes: + \url{https://lists.oasis-open.org/archives/virtio/201402/msg00121.html} + } \\ +\hline +243 & 12 Feb 2014 & Rusty Russell & { Fix S390 normative references. + +As pointed out in TAB-539 and TAB-540: + +- Add an URL to the documents. (Unfortunately, there is no link that + + always points to the latest version.) + +- State that we include any future revisions as well. + +As per minutes: + \url{https://lists.oasis-open.org/archives/virtio/201402/msg00121.html} + } \\ +\hline +242 & 12 Feb 2014 & Rusty Russell & { ccw: Further use of RFC2119 language. + +Some more instances of MAY and SHOULD, as reported in TAB comments + +TAB-548 and TAB-550. + +As per minutes: + \url{https://lists.oasis-open.org/archives/virtio/201402/msg00121.html} + } \\ +\hline +241 & 12 Feb 2014 & Rusty Russell & { PCI: explicitly document ISR status field + +Feedback on ISR status register: + + It would be helpful if this section provided the meaning of each + + bit in the register. + +ISR use is scattered all around the place. + +Add a section describing the format and semantics. + +[ Merged to combine with new ISR-specific section --RR ] + +As per minutes: + \url{https://lists.oasis-open.org/archives/virtio/201402/msg00121.html} + +Cc: Arun Subbarao <asubbarao@lnxw.com> + } \\ +\hline +240 & 12 Feb 2014 & Rusty Russell & { PCI: consistent device/PCI configuration space + +Re section: + +4.1.3.4 Notification of Device Configuration Changes + +Feedback: + + Please use "PCI configuration space" and "device configuration + + state" consistently, without abbreviation. For example, from the + + first sentence it looks like "device configuration state" can be + + changed, but the first bullet claims it's "configuration space". + + So, which one? Does "configuration space" mean "PCI configuration + + space" or is it a synonym for "device configuration state"? + + Because those are two different things; the driver needs to know + + what exactly to rescan. + +As per minutes: + \url{https://lists.oasis-open.org/archives/virtio/201402/msg00121.html} + +Cc: Arun Subbarao <asubbarao@lnxw.com> + } \\ +\hline +239 & 12 Feb 2014 & Rusty Russell & { Feedback \#8: Applied. + +[ Includes fixup! removing MSI-X ] + +As per minutes: + \url{https://lists.oasis-open.org/archives/virtio/201402/msg00121.html} + } \\ +\hline +238 & 12 Feb 2014 & Rusty Russell & { Feedback \#7: Applied + +Some minor merging required. + +As per minutes: + \url{https://lists.oasis-open.org/archives/virtio/201402/msg00121.html} + } \\ +\hline +237 & 12 Feb 2014 & Rusty Russell & { Feedback \#6: Applied + +As per minutes: + \url{https://lists.oasis-open.org/archives/virtio/201402/msg00121.html} + } \\ +\hline +236 & 12 Feb 2014 & Rusty Russell & { Feedback \#5: Applied. + +As per minutes: + \url{https://lists.oasis-open.org/archives/virtio/201402/msg00121.html} + } \\ +\hline +235 & 12 Feb 2014 & Rusty Russell & { Feedback \#4: applied. + +As per minutes: + \url{https://lists.oasis-open.org/archives/virtio/201402/msg00121.html} + } \\ +\hline +234 & 12 Feb 2014 & Rusty Russell & { PCI: minor changes for previous patch. + } \\ +\hline +233 & 12 Feb 2014 & Rusty Russell & { PCI: rearrange it all + +This is the re-arrangement originally suggested by Rusty, + +except I made some fixes and also tweaked a couple of places + +where behaviour changes where suggested - if we want these, + +they should go in separately. + +Rearrange discovery section to make it clearer what goes on. + +Wording changes MUST/MAY/etc. Clarify cfg gateway use. No + +behavioural changes. + +[ Merged "fixup! PCI: rearrange it all" --RR ] + +As per minutes: + \url{https://lists.oasis-open.org/archives/virtio/201402/msg00121.html} + } \\ +\hline +232 & 12 Feb 2014 & Rusty Russell & { PCI: rearrange it all + +This is the re-arrangement originally suggested by Rusty, + +except I made some fixes and also tweaked a couple of places + +where behaviour changes where suggested - if we want these, + +they should go in separately. + +Rearrange discovery section to make it clearer what goes on. + +Wording changes MUST/MAY/etc. Clarify cfg gateway use. No + +behavioural changes. + } \\ +\hline +231 & 12 Feb 2014 & Rusty Russell & { C struct specifications. + +Explicitly specify that our C struct specifications are without padding, + +and add some definitions for our integer data types. + +[ Rusty - added /* comments */ and removed redundant old le* explanation ] + } \\ +\hline +225 & 10 Feb 2014 & Rusty Russell & { REVERT LAST 15 JUNK COMMITS. + +Back to r211. It's been a long day. + } \\ +\hline +224 & 10 Feb 2014 & Rusty Russell & { patch feedback-8-9.patch + } \\ +\hline +223 & 10 Feb 2014 & Rusty Russell & { patch feedback-8-7.patch + } \\ +\hline +222 & 10 Feb 2014 & Rusty Russell & { patch feedback-8-6.patch + } \\ +\hline +221 & 10 Feb 2014 & Rusty Russell & { patch feedback-8-5.patch + } \\ +\hline +220 & 10 Feb 2014 & Rusty Russell & { feedback: s/virtio header/virtio common configuration/ + +While most places now sat virtio common configuration + +structure, some places still use the term virtio header. + +Since it's not necessarily before the + +common configuration anymore, rename it + +to virtio common configuration structure for consistency. + +Cc: Arun Subbarao <asubbarao@lnxw.com> + } \\ +\hline +219 & 10 Feb 2014 & Rusty Russell & { We'll add more non-normative sections with hints for + +implementing registers such as PCI class, status + +and command registers. + } \\ +\hline +218 & 10 Feb 2014 & Rusty Russell & { example code does not have to be optimal but it + +seems cleaner to disable interrupts after we + +recheck the ring empty state. + } \\ +\hline +217 & 10 Feb 2014 & Rusty Russell & { patch feedback-7-orig.patch + } \\ +\hline +216 & 10 Feb 2014 & Rusty Russell & { patch feedback-6.patch + } \\ +\hline +215 & 10 Feb 2014 & Rusty Russell & { patch feedback-5.patch + } \\ +\hline +214 & 10 Feb 2014 & Rusty Russell & { patch feedback-4.patch + } \\ +\hline +213 & 10 Feb 2014 & Rusty Russell & { PCI Section Rework + +1) Minor changes from must to MUST etc. + +2) More references using \textbackslash ref. + +3) Move section on capabilities first, before we talk about the common + + config layout. The previous order made sense for legacy. + +4) Make explicit subsections for each type of capability and move more + + information into them. + +5) Make it clear that there must be one or more. + +6) Include 'struct virtio_pci_cap cap;' in struct virtio_pci_cfg_cap to + + match virtio_pci_notify_cap. + +7) Explicitly note there's no way to negotiate the queue size for a + + legacy device. + +8) Fix old language on config change event: config is not in the pci + + configuration space. + +9) Explicitly state what the driver should do to use virtio_pci_cfg_cap. + } \\ +\hline +212 & 10 Feb 2014 & Rusty Russell & { C struct specifications. + +Explicitly specify that our C struct specifications are without padding, + +and add some definitions for our integer data types. + +[ Rusty - added /* comments */ and removed redundant old le* explanation ] + } \\ +\hline +207 & 07 Feb 2014 & Rusty Russell & { Cleanup and setup clarifications + +1) Explicitly allow drivers to read config space during feature + + negotiation. + +2) Add the concept of a "live" virtqueue, and explicitly disallow + + moving it backwards or changing descriptors. + } \\ +\hline +204 & 07 Feb 2014 & Rusty Russell & { block: legacy SCSI command fix. + +When describing the historical layout requirements, it says + + "status field is a separate read-only buffer of size 1 byte, by itself." + +That's clearly wrong, as it says above "The final status byte is written by the device" + } \\ +\hline +203 & 06 Feb 2014 & Rusty Russell & { whitespace: make all examples unindented, and avoid tabs. + +This makes the formatting far nicer. Applying now as it touches almost + +all examples and layouts, so we can rebase future changes on top of + +common ground. + +(Based on feedback from Thomas Huth for one example, and generalized). + } \\ +\hline +201 & 31 Jan 2014 & Rusty Russell & { 3.2.1: Language tightening. + +1) Lots of "we", replace with "the driver". + +2) Use MAY and MUST NOT for spurious notifications. + +3) Don't refer to PCI configuration space for notification. + } \\ +\hline +198 & 29 Jan 2014 & Pawel Moll & { 4.1.2.5: Legacy: PCI Device Layout: fix PCI header fields order + +The order of the fields in the legacy PCI header seems to get + +messed up in the new spec, with the "Queue Address" moved + +behind "Queue Notify". According to the 0.9.5 version of the spec + +it should be: + +* Device Features 32 + +* Driver Features 32 + +* Queue Address 32 + +* Queue Size 16 + +* Queue Select 16 + +* Queue Notify 16 + +* Device Status 8 + +* ISR Status 8 + +-- + +1.8.3.2 + } \\ +\hline +197 & 29 Jan 2014 & Rusty Russell & { Feedback \#3: Feedback from Pranavkumar Sawargaonkar (VIRTIO_CONSOLE_F_EMERG_WRITE) + +Document: virtio-v1.0-csprd01 + +Number: 3 + +Date: Tue, 21 Jan 2014 15:09:54 +0530 +Link to Mail: \url{https://lists.oasis-open.org/archives/virtio-comment/201401/msg00037.html} + +Commenter name: Pranavkumar Sawargaonkar <pranavkumar@linaro.org> + +Approved at meeting 2014-01-28: + \url{https://lists.oasis-open.org/archives/virtio/201401/msg00054.html} + } \\ +\hline +196 & 29 Jan 2014 & Rusty Russell & { Feedback \#2: More feedback from Thomas Huth + +Document: virtio-v1.0-csprd01 + +Number: 2 + +Date: Fri, 10 Jan 2014 13:49:49 +0100 +Link to Mail: \url{https://lists.oasis-open.org/archives/virtio-comment/201401/msg00001.html} + +Commenter name: Thomas Huth <thuth@linux.vnet.ibm.com> + +Approved at meeting 2014-01-28: + \url{https://lists.oasis-open.org/archives/virtio/201401/msg00054.html} + } \\ +\hline +195 & 29 Jan 2014 & Rusty Russell & { Feedback \#1: fixes from Thomas Huth + +Document: virtio-v1.0-csprd01 + +Number: 1 + +Date: Fri, 10 Jan 2014 11:01:44 +0100 +Link to Mail: \url{https://lists.oasis-open.org/archives/virtio-comment/201401/msg00000.html} + +Commenter name: Thomas Huth <thuth@linux.vnet.ibm.com> + +Approved at meeting: 2014-01-28 + \url{https://lists.oasis-open.org/archives/virtio/201401/msg00054.html} + } \\ +\hline +194 & 28 Jan 2014 & Pawel Moll & { mmio: Move QueueReady register from offset 0x03c to 0x044 + +Legacy devices have QueueAlign register at 0x03c. To stay + +on the safe side and avoid any potential clashes (also to + +be able to abort any wrong writes), move it to previously + +unused offset 0x044. + } \\ +\hline +193 & 23 Jan 2014 & Cornelia Huck & { virtio-ccw: fix set_revision payload definition + +The members of struct virtio_rev_info are big endian: use be16 types. + } \\ +\hline +191 & 23 Jan 2014 & Rusty Russell & { Formatting: fix feature bits for console device. + +Make them a description list like every other device. + } \\ +\hline +190 & 23 Jan 2014 & Rusty Russell & { Michael's patch adding MQ support added some u16s; they are u16 in + +legacy mode but should be le16 for modern devices. + } \\ +\hline +185 & 17 Jan 2014 & Rusty Russell & { net/multiqueue: tighten wording + } \\ +\hline +184 & 17 Jan 2014 & Rusty Russell & { Fixes for first WD front page. + +Based on feedback from Paul Knight <paul.knight@oasis-open.org>. + } \\ +\hline +179 & 03 Jan 2014 & Pawel Moll & { mmio: Obviously wrong notification register name + +The "4.2.3.3 Notifying The Device" section said "writing + +the index of the updated queue to the QueueNum". This + +is obviously wrong - should read "QueueNotify". + } \\ +\hline +178 & 16 Dec 2013 & Pawel Moll & { title \& acknowledgements: Make ARM less limited + +... by removing the "Limited" bit of the name. + } \\ +\hline +177 & 16 Dec 2013 & Pawel Moll & { 2.3.2 MMIO: Configuration space offset corrected + +The offset in the MMIO configuration space description + +(table 4.1) became wrong at some time (0x0fff). Fixed. + } \\ +\hline +176 & 12 Dec 2013 & Pawel Moll & { 2.3.2 MMIO: Notifications \& interrupts clarifications + +(Hopefully) clarified the way notifications are being + +passed between the device and the driver and about + +the meaning of the interrupt registers. + } \\ +\hline +175 & 12 Dec 2013 & Pawel Moll & { 1. Introduction: Removed left-over "PCI" + +The "Extensible" paragraph of the introduction still + +referred to "Virtio PCI devices". Changed to + +"Virtio devices". + } \\ +\hline +174 & 12 Dec 2013 & Pawel Moll & { 2.3.2 MMIO: Further clarifications + +Clarified driver behaviour for out-of-spec MagicValue, + +Version and DeviceID values. + } \\ +\hline +173 & 12 Dec 2013 & Cornelia Huck & { ccw: feature bit endianness + +In contrast to the other values transmitted in ccw payload, feature bits + +are little endian. Fix it in the structure definition. + } \\ +\hline +172 & 12 Dec 2013 & Cornelia Huck & { ccw: clarify passing of subchannel id + +Make clear that the upper half of the register must be ignored, just + +like normal I/O instructions do. + } \\ +\hline +171 & 12 Dec 2013 & Cornelia Huck & { ccw: Tighten specification language. + +must -> MUST changes, removed inappropriate mays. + } \\ +\hline +170 & 09 Dec 2013 & Pawel Moll & { 2.3.2 MMIO: LaTeXisation + +Converter the register layout descriptions into + +tables. + +Also hardened the specification language, using + +MUSTs and MUST NOTs. + } \\ +\hline +168 & 09 Dec 2013 & Michael S. Tsirkin & { commands-pdf.tex: align title page + +Section titles are currently misaligned on the title + +page. This patch aligns them back. + } \\ +\hline +167 & 09 Dec 2013 & Michael S. Tsirkin & { net: document VIRTIO_NET_F_MAC_ADDR + +VIRTIO-50 + + virtio-spec: set mac address by a new vq command + +Approved Dec 3, 2013 + } \\ +\hline +166 & 09 Dec 2013 & Michael S. Tsirkin & { net: add _F_MQ support + +VIRTIO-49 + +Includes git commits: + + virtio-spec: fix two typos + + virtio-spec: virtio network device multiqueue support + + net: add note that you can defer rx queue init until mq enable. + +Approved Dec 3, 2013 + } \\ +\hline diff --git a/cl-csprd03.tex b/cl-csprd03.tex new file mode 100644 index 0000000..81f9bfb --- /dev/null +++ b/cl-csprd03.tex @@ -0,0 +1,400 @@ +399 & 27 Jun 2014 & Michael S. Tsirkin & { changelog: fill changelog since draft2 + +This will make review easier. + } \\ +\hline +398 & 27 Jun 2014 & Michael S. Tsirkin & { acknowledgements: add draft 3 reviewers, sort + +Add new reviewers and sort by name. + } \\ +\hline +397 & 27 Jun 2014 & Michael S. Tsirkin & { add draft2 acknowledgements + +List people that provided comments on draft01 in the +acknowledgements section. Might be a nice way to encourage +reviews. + } \\ +396 & 26 Jun 2014 & Michael S. Tsirkin & { diff: back to green for added text + +using blue does not work well for html + + } \\ +\hline +393 & 26 Jun 2014 & Michael S. Tsirkin & { makediff: cleanup using begingroup/endgroup + +Pawel Moll found a way to work around xetex bugs +without mangling latexdiff output using perl: + +- define DIFbegin/DIFFend commands in preample + +- pass --config FLOATENV= to latexdiff + +Use this in preference to the fixupdiff perl script. + + } \\ +\hline +391 & 26 Jun 2014 & Michael S. Tsirkin & { more latexdiff hacks + +- change link color from green to pinegreen. Looks better to me. + +- split footnotes out from their text, so that latexdiff + does not consider them as a unit + +- mark field command as safe for latexdiff, otherwise it's not shown in red + +- hack adding DIFaddtext within footnotes could not handle + case where latexdiff inserted multiple DIFadd within the + footnote. Instead, detect when footnote is within + DIFaddbegin/DIFdelbegin, add an extra DIFaddbegin/DIFdelbegin + within the footnote. + + } \\ +\hline +390 & 26 Jun 2014 & Michael S. Tsirkin & { diffpreamble: fix colors for links within diff + } \\ +\hline +389 & 26 Jun 2014 & Michael S. Tsirkin & { work around xetex bug + +Too many \textbackslash color directives produce corrupted output +and this warning: + +WARNING ** Color stack overflow. Just ignore. + +Use script to reduce \# of these directives. + + } \\ +\hline +388 & 26 Jun 2014 & Michael S. Tsirkin & { diffpreamble: remove duplicate text + +latexdiff adds some + + } \\ +\hline +387 & 26 Jun 2014 & Michael S. Tsirkin & { makediffpdf.sh: tool to create marked-up diff + +make pdf diff using latexpand and latexdiff-fast +styles are set in diffpreamble.tex +in diff, links are coloured green instead of blue + +Must be run within a git-svn clone of the spec repository. + +Note: latexdiff has --flatten option, this and options +to select diff style don't seem to work well. + +So flatten by script myself, and add our own preamble. + + } \\ +\hline +386 & 25 Jun 2014 & Michael S. Tsirkin & { pci: minor fomatting tweak + +Make table look better. Drop spaces that make +latexdiff stumble. + + } \\ +\hline +385 & 25 Jun 2014 & Michael S. Tsirkin & { fixup pci: switch from subsystem id to device id + +Patch sent to list (and applied by Rusty in + + pci: switch from subsystem id to device id + +) did not actually implement what commit log said +it implements. + +The result is wrong for transitional devices: + +Adding 0xfff works for for net+block only; + +for transitional pci devices there is no fixed scheme: +\~{}/projects/qemu/include \# grep VIRTIO_ID hw/virtio/*.h + +hw/virtio/virtio-balloon.h:\#define VIRTIO_ID_BALLOON 5 + +hw/virtio/virtio-blk.h:\#define VIRTIO_ID_BLOCK 2 + +hw/virtio/virtio-net.h:\#define VIRTIO_ID_NET 1 + +hw/virtio/virtio-rng.h:\#define VIRTIO_ID_RNG 4 + +hw/virtio/virtio-scsi.h:\#define VIRTIO_ID_SCSI 8 + +hw/virtio/virtio-serial.h:\#define VIRTIO_ID_CONSOLE 3 + +\~{}/projects/qemu/include \# grep VIRTIO hw/pci/*.h + +hw/pci/pci.h:\#define PCI_DEVICE_ID_VIRTIO_NET 0x1000 + +hw/pci/pci.h:\#define PCI_DEVICE_ID_VIRTIO_BLOCK 0x1001 + +hw/pci/pci.h:\#define PCI_DEVICE_ID_VIRTIO_BALLOON 0x1002 + +hw/pci/pci.h:\#define PCI_DEVICE_ID_VIRTIO_CONSOLE 0x1003 + +hw/pci/pci.h:\#define PCI_DEVICE_ID_VIRTIO_SCSI 0x1004 + +hw/pci/pci.h:\#define PCI_DEVICE_ID_VIRTIO_RNG 0x1005 + +hw/pci/pci.h:\#define PCI_DEVICE_ID_VIRTIO_9P 0x1009 + +I am guessing TC went by commit log when it approved the change, +so fixing it up directly. + +Cc: Andrew Thornton <andrewth@google.com> + +Cc: Rusty Russell <rusty@ozlabs.org> + +Cc: Gerd Hoffmann <kraxel@redhat.com> + + } \\ +\hline +384 & 17 Jun 2014 & & { content.tex: VIRTIO-106: mention possibility of failing TMFs + +This completes the review of virtio-scsi based on observations +from Google. + + } \\ +\hline +383 & 16 Jun 2014 & & { fix erroneous reference to Subsystem Device ID + +Subsystem device ID only exists for PCI. + + } \\ +\hline +382 & 16 Jun 2014 & Rusty Russell & { small virtio-serial fix + +nr_ports does not exist in the spec. + + } \\ +\hline +381 & 09 Jun 2014 & & { virtio-scsi: support well-known logical units + +The REPORT LUNS well-known logical unit is useful because it lets you +retrieve information about all targets with a single command. It +also provides an easy way to send a no-op request. + + } \\ +\hline +380 & 09 Jun 2014 & & { consistent formatting of footnotes + +Put the indicator before punctuation, and terminate the footnote with +a period. + + } \\ +\hline +379 & 09 Jun 2014 & & { virtio-scsi: additional SHOULDification + + } \\ +\hline +378 & 09 Jun 2014 & & { virtio-scsi: fixes to protection information + +pi_bytesin is in the device-readable section. Document lack of residual +field. Use le32 instead of u32. + +This matches the new patch series that Nicholas sent for vhost-scsi. + +Cc: <nab@daterainc.com> + + } \\ +\hline +377 & 05 Jun 2014 & Rusty Russell & { PCI: remove duplicate paragraph. + +I chose the one which used the full nomenclature. + + } \\ +\hline +376 & 05 Jun 2014 & Rusty Russell & { pci: switch from subsystem id to device id + +Switch virtio pci to use standard device id instead of using the +subsystem id. + +Unfortunately, there's no system to the way KVM allocated +device IDs to virtio devices, we'll just have to +specify these using a table, and use a new range for +future devices. For existing devices this results in +two possible IDs that all drivers will need to match. +Unfortunate, but the cost is small. + +As a nice side effect, this allows us to make non-transitional +devices use IDs 0x40 and up, this reduces even further the +chance that a non transitional device will match legacy drivers. + +And, it's probably a good idea to allow drivers to match +specific subsystem IDs if they + +want to, so relax requirement for drivers to match all +subsystem/vendor ID configurations, but allow them to do so. +To avoid confusion, say "PCI Device ID" and +"PCI Subsystem ID" everywhere, prefix "PCI" +for other standard registers, for consistency. + +VIRTIO-102 + +Note: issue reporter suggested 0x10XX where XX is the virtio +device ID. This would conflict with legacy devices, which seem +to have used 7 IDs in the range 0x1000 to 0x103f without any +system. Let's use a new range 0x1040 to 0x107f for +non-transitional devices, and add a table documenting the +transitional IDs used by in practice. + +(Approved at 2014-06-04 meeting: + + \url{https://lists.oasis-open.org/archives/virtio/201406/msg00013.html} ) + +Cc: Andrew Thornton <andrewth@google.com> + + } \\ +\hline +375 & 05 Jun 2014 & Rusty Russell & { pci: set ISR bit on config change with MSI-X + +config changes are slow path anyway, so we +can as well set ISR bit to help drivers detect changes. +This allows sharing config interrupts which is what +issue reporter seems to ask for. + +VIRTIO-104 + +(Approved at 2014-06-04 meeting: + + \url{https://lists.oasis-open.org/archives/virtio/201406/msg00013.html} ) + + } \\ +\hline +374 & 01 Jun 2014 & Michael S. Tsirkin & { NEEDS_RESET: trivial clarification + +If device sets NEEDS_RESET before DRIVER_OK, it +can't send notifications to driver. + +Make this clear. + + } \\ +\hline +373 & 22 May 2014 & Rusty Russell & { Fix build of document + +Error introduced in "VIRTIO-98: Add DEVICE_NEEDS_RESET": +seems that underscores in labels are verboten: + +[133] [134] (./virtio-v1.0-csprd02.aux + +! Missing \textbackslash endcsname inserted. + +<to be read again> + + \textbackslash unhbox + +l.45 ...ts: Device Status Field\}\}\{subsection.1\}\{\}\} + + } \\ +\hline +372 & 22 May 2014 & Rusty Russell & { content.tex: virtio-scsi review (VIRTIO-106) + +As prompted by Rusty, add a few more MUST/SHOULD items for both devices +and drivers. Clarify semantics of max_channel/max_id/max_lun, task_attr +and task management functions. + +(As per minutes of meeting 2014-05-20: + + \url{https://lists.oasis-open.org/archives/virtio/201405/msg00034.html} ) + + } \\ +\hline +371 & 22 May 2014 & Rusty Russell & { content.tex: add support for protection information (VIRTIO-108) + +This is a new feature that was suggested by Nicholas Bellinger, who + +also provided a prototype implementation for vhost-scsi. + +(As per minutes of meeting 2014-05-20: + + \url{https://lists.oasis-open.org/archives/virtio/201405/msg00034.html} ) + + } \\ +\hline +370 & 12 May 2014 & Rusty Russell & { VIRTIO-96: Assign device id to virtio input + +Assign device id to virtio input + +As passed at meeting 2014-05-06: + + \url{https://lists.oasis-open.org/archives/virtio/201405/msg00016.html} + + } \\ +\hline +369 & 12 May 2014 & Rusty Russell & { VIRTIO-52: Make mac field read only. + +As passed at meeting 2014-05-06: + + \url{https://lists.oasis-open.org/archives/virtio/201405/msg00016.html} + + } \\ +\hline +368 & 12 May 2014 & Rusty Russell & { VIRTIO-107: Clarify net mac commands. + +As passed at meeting 2014-05-06: + + \url{https://lists.oasis-open.org/archives/virtio/201405/msg00016.html} + + } \\ +\hline +367 & 12 May 2014 & Rusty Russell & { VIRTIO-98: Add DEVICE_NEEDS_RESET. + +As passed at meeting 2014-05-06: + + \url{https://lists.oasis-open.org/archives/virtio/201405/msg00016.html} + + } \\ +\hline +366 & 12 May 2014 & Rusty Russell & { VIRTIO-87: limit descriptor chain length even with INDIRECT. + +As passed at meeting 2014-05-06: + + \url{https://lists.oasis-open.org/archives/virtio/201405/msg00016.html} + + } \\ +\hline +365 & 12 May 2014 & Rusty Russell & { VIRTIO-103: PCI: Note that turning off queue_enable is not supported. + +As passed at meeting 2014-05-06: + + \url{https://lists.oasis-open.org/archives/virtio/201405/msg00016.html} + + } \\ +\hline +364 & 12 May 2014 & Rusty Russell & { VIRTIO-103: PCI: require read-after-write on device_status reset. + +As passed at meeting 2014-05-06: + + \url{https://lists.oasis-open.org/archives/virtio/201405/msg00016.html} + + } \\ +\hline +363 & 12 May 2014 & Rusty Russell & { VIRTIO-99: Typo fixes. + +As passed at meeting 2014-05-06: + + \url{https://lists.oasis-open.org/archives/virtio/201405/msg00016.html} + + } \\ +\hline +362 & 07 May 2014 & Cornelia Huck & { net: fix device conformance sections + +For the network device, we had two device normative sections both called +"setting up receive buffers", neither of which was referenced in the +conformance section. + +Let's rename the second one to "processing of packets" which seems to +better match the actual contents and reference both of them from the +conformance statement for network devices. + +Resolves VIRTIO-97. + +Agreed on the 2014/05/06 TC meeting. + + } \\ +\hline +361 & 07 Apr 2014 & Michael S. Tsirkin & { conformance.tex: fix references to mmio + +Both device and driver conformance referred to ccw twice; let's add the +correct mmio references. + + } \\ +\hline diff --git a/cl-os.tex b/cl-os.tex new file mode 100644 index 0000000..da78bf8 --- /dev/null +++ b/cl-os.tex @@ -0,0 +1,134 @@ +540 & 11 Oct 2015 & Greg Kurz & {virtqueues: fix +trivial typo + +See +\ref{sec:Basic Facilities of a Virtio Device / Virtqueues / Virtqueue Interrupt Suppression}. +} \\ +\hline +541 & 11 Oct 2015 & Paolo Bonzini & {virtio-blk: fix typo +in legacy framing requirements section + +See +\ref{sec:Device Types / Block Device / Legacy Interface: Framing Requirements}. +} \\ +\hline +545 & 18 Oct 2015 & Paolo Bonzini & {virtio-blk: restore VIRTIO_BLK_F_FLUSH and VIRTIO_BLK_F_CONFIG_WCE + +VIRTIO_BLK_F_CONFIG_WCE is important in order to achieve good performance +(up to 2x, though more realistically +30-40\%) in latency-bound workloads. +However, it was removed by mistake together with VIRTIO_BLK_F_FLUSH. + +In addition, even removing VIRTIO_BLK_F_FLUSH was probably not a great +idea, because it simplifies simple drivers (e.g. firmware) that are okay +with a writethrough cache but still need data to persist after power loss. +What really should have been removed is just the possibility that devices +not propose VIRTIO_BLK_F_FLUSH, but even that only deserves a "SHOULD" in +the new world of conformance statements. + +Restore these, with the following changes: + +* clarify and use conformance statements in order to define writeback +and writethrough caching according to what is commonly done by high-end +storage. + +* clarify (with conformance statements) the influence of the +VIRTIO_BLK_F_FLUSH feature on caching and how to proceed if only one of +VIRTIO_BLK_F_FLUSH and VIRTIO_BLK_F_CONFIG_WCE is negotiated. + +* strengthen the requirement for persisting writes to MUST after +a VIRTIO_BLK_T_FLUSH request (and in other cases too involving the +new features). + +The suggested behavior upon feature negotiation is okay for the Linux +implementation of virtio1, even after the implementation is modified to +support the two new features. + +This fixes VIRTIO-144. + +See \ref{sec:Device Types / Block Device}, +\ref{sec:Conformance / Driver Conformance / Block Driver Conformance} and +\ref{sec:Conformance / Device Conformance / Block Device Conformance}. +} \\ +\hline +546 & 18 Oct 2015 & Michael S. Tsirkin & {pci: clarify configuration access capability rules + +The point of the configuration access capability is to enable +access to other capabilities. The intent never was to allow +writes to a random place within device BARs. +Limiting drivers simplifies devices - and devices can always +add another capability if drivers ever want to access +some other range. + +This resolves VIRTIO-145. + +See \ref{drivernormative:Virtio Transport Options / Virtio Over +PCI Bus / PCI Device Layout / PCI configuration access +capability}. +} \\ +\hline +547 & 18 Oct 2015 & Michael S. Tsirkin & {add advice on transition from legacy interfaces + +Reading legacy chapters gives a hint about what changed, +let's help readers discover this useful shortcut. + +This resolves VIRTIO-146. + +See \ref{sec:Transition from earlier specification drafts}. +} \\ +\hline +554 & 16 Feb 2016 & Thomas Huth & {virtio-net: fix inconsistent legacy header size + + Current text says: + The legacy driver only presented num_buffers in the struct + virtio_net_hdr when VIRTIO_NET_F_MRG_RXBUF was not negotiated; + + Should be: + "\dots was negotiated \dots" instead of "\dots was not negotiated \dots" + + To be consistent with the following: + without that feature the structure was 2 bytes shorter. + +See \ref{sec:Device Types / Network Device / Device Operation / Legacy Interface: Device Operation}. +} \\ +\hline +555 & 16 Feb 2016 & Michael S. Tsirkin & {virtio header: tweak +change motivation + + The changes are not just to remove Linux assumptions, + we have also renamed ring->queue. + Tweak the header description accordingly. + +See \ref{sec:Basic Facilities of a Virtio Device / Virtqueues / Helpers for Operating Virtqueues}. +} \\ +\hline +558 & 16 Feb 2016 & Michael S. Tsirkin & {rename virtio_ring.h to virtio_queue.h + + Since vring* and VRING* have been replaced with virtq* and VIRTQ* + respectively, rename the header virtio_ring.h to virtio_queue.h. + +See \ref{sec:virtio-queue.h}. +} \\ +\hline +559 & 16 Feb 2016 & Michael S. Tsirkin & {init: sort status bits + + Status bit order is inconsistent: they are neither in increasing + order nor in the order they are likely to be used. + + The second approach seems more useful since there aren't + that many bits, so the numerical order does not help much. + + A typical order of use would be: + +\begin{itemize} +\item ACKNOWLEDGE +\item DRIVER +\item then either FAILED or FEATURES_OK +\item then either FAILED or DRIVER_OK +\item then DEVICE_NEEDS_RESET (if device detects an error) +\end{itemize} + + Sort the bits accordingly. + +See \ref{sec:Basic Facilities of a Virtio Device / Device Status Field}. +} \\ +\hline diff --git a/commands-html.tex b/commands-html.tex new file mode 100644 index 0000000..7831866 --- /dev/null +++ b/commands-html.tex @@ -0,0 +1,182 @@ +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% HTML specfic commands +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +\newcommand{\virtiographics}[3] +{ +\HCode{<div><img src="images/#1.png" alt="#3"/></div>} +} + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%%% VIRTIO title sections +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +\newcommand{\oasistitle}[1] +{ +\begin{spacing}{2} +\HCode{<div style="color: \#552681; font-size: 24pt;">} +\textbf{#1} +\HCode{</div>} +\end{spacing} +} + +\newcommand{\oasisstagelabel}[1] +{ +\begin{spacing}{2} +\HCode{<div style="color: \#552681; font-size: 18pt;">} +\textbf{#1} +\HCode{</div>} +\end{spacing} +} + +\newcommand{\oasisdate}[1] +{ +\begin{spacing}{2} +\HCode{<div style="color: \#552681; font-size: 18pt;">} +\textbf{#1} +\HCode{</div>} +\end{spacing} +} + +\newcommand{\oasisnoticelabel}[1] +{ +\hrule +\begin{spacing}{2} +\HCode{<div style="color: \#552681; font-size: 18pt;">} +\textbf{#1} +\HCode{</div>} +\end{spacing} +} + +\newcommand{\oasisspecificationuris}[1] +{ +\HCode{<div style="color: \#552681; font-size: 12pt;">} +\textbf{#1} +\HCode{</div>} +} + +\newenvironment{oasistitlesection}[1] +{ +\HCode{<div style="color: \#552681; font-size: 10pt;">} +\textbf{#1:} +\HCode{</div>} +\HCode{<div style="margin-left: 20px;">} +} +{ +\HCode{</div>} +} + + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%%% VIRTIO type attributes +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +% new environment: VIRTIO type attribute +\newenvironment{virtioattribute}[2] +{ +\HCode{<div style="width: 200px; float: left;">}\texttt{\textbf{#1}}\HCode{</div>} +\HCode{<span>}#2\HCode{</span>}\\ +\begin{tabular}{p{20px}p{200px}} +&} +{\\ +\end{tabular}\\ +\vspace{0.05in} +} + +% new command: VIRTIO type attribute value +\newcommand{\virtioattributevalue}[2] +{ +\texttt{\textbf{#1}}\\ +\indent +Value: #2\\ +} + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%%% VIRTIO properties +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +% new environment: VIRTIO propery +\newenvironment{virtioproperty}[2] +{ +\vspace{0.05in} +\begin{tabular}{p{20px}p{200px}p{200px}} +\multicolumn{2}{p{20px}}{\textbf{\texttt{#1}}} & #2\vspace{0.05in}\\ +} +{ +\end{tabular}\\ +\vspace{0.05in}\\ +} + +% new command: VIRTIO property detail +\newcommand{\virtiopropertydetail}[2]{& #1: & #2\\} + +% new command: VIRTIO property comment +\newcommand{\virtiopropertycomment}[1]{\\&\multicolumn{2}{p{20px}}{#1}\\} + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%%% VIRTIO allowable actions +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +% new environment: VIRTIO allowable action +\newenvironment{virtioallowableaction}[1] +{ +\textbf{#1} +\vspace{0.05in}\\ +\begin{tabular}{p{0.1in}p{1in}p{4.8in}} +} +{ +\end{tabular}\\ +\vspace{0.05in}\\ +} + +% new command: VIRTIO allowable action detail +\newcommand{\virtioallowableactiondetail}[2]{& #1: & #2\\} + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%%% VIRTIO version properties +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +% new environment: VIRTIO version property +\newenvironment{virtioversionproperty}[2] +{ +\begin{tabular}{p{3in}l} +\texttt{\textbf{#1}} & #2\\ +\end{tabular}\\\ +\begin{tabular}{p{0.3in}p{5.7in}} +&} +{\\ +\end{tabular}\\ +\vspace{0.05in} +} + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%%% VIRTIO general description +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +% new environment: VIRTIO description +\newenvironment{virtiodescription}[1] +{ +\begin{tabular}{p{20px}p{600px}} +\multicolumn{2}{l}{\textbf{#1:}}\\ +&\setlength{\topsep}{0in}\setlength{\parsep}{0in}\setlength{\partopsep}{0in} } +{\\ +\end{tabular}\\ +\vspace{0.05in} +} + + +\newenvironment{virtioexample}[1] +{ +\HCode{<div style="padding: 5px; background-color: \#E6E6E6;">} +\textbf{Example:}\\ +Request: \texttt{#1-request.log}\\ +Response: \texttt{#1-response.log} +\HCode{</div>} +\vspace{0.2in} +} + diff --git a/commands-pdf.tex b/commands-pdf.tex new file mode 100644 index 0000000..8e278be --- /dev/null +++ b/commands-pdf.tex @@ -0,0 +1,198 @@ +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% PDF specfic commands +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +\newcommand{\virtiographics}[3] +{ +\begin{figure}[htb] +\centering +\includegraphics[#2]{images/#1.pdf} +\caption{#3} +\end{figure} +\FloatBarrier +} + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%%% VIRTIO title sections +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +\newcommand{\oasistitle}[1] +{ +\begin{spacing}{1.5} +\fontspec[Color=552681]{Arial} +\fontsize{24}{24} +\textbf{\noindent#1} +\end{spacing} +} + +\newcommand{\oasisstagelabel}[1] +{ +\begin{spacing}{1.1} +\fontspec[Color=552681]{Arial} +\fontsize{18}{18} +\textbf{\noindent#1} +\end{spacing} +} + +\newcommand{\oasisdate}[1] +{ +\begin{spacing}{1.1} +\fontspec[Color=552681]{Arial} +\fontsize{18}{18} +\textbf{\noindent#1} +\end{spacing} +} + +\newcommand{\oasisnoticelabel}[1] +{ +\begin{spacing}{1.5} +\fontspec[Color=552681]{Arial} +\fontsize{18}{18} +\textbf{\noindent#1} +\end{spacing} +} + +\newcommand{\oasisspecificationuris}[1] +{ +{ +\fontspec[Color=552681]{Arial} +\fontsize{12}{12} +\textbf{\noindent#1} +} +} + +\newenvironment{oasistitlesection}[1] +{ +\setlength{\tabcolsep}{0in}\begin{tabular}{p{0in}p{5.8in}} +\multicolumn{2}{l}{\fontspec[Color=552681]{Arial}\fontsize{10}{10}\textbf{#1:}}\\[-0.05in] +&\setlength{\topsep}{0in}\setlength{\parsep}{0in}\setlength{\partopsep}{0in} } +{ +\end{tabular} +\vspace{-0.11in} +} + + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%%% VIRTIO type attributes +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +% new environment: VIRTIO type attribute +\newenvironment{virtioattribute}[2] +{ +\vspace{0.05in} +\begin{tabular}{p{2in}l} +\texttt{\textbf{#1}} & #2\\ +\end{tabular}\\\ +\begin{tabular}{p{0.3in}p{5.7in}} +&} +{\\ +\end{tabular}\\ +\vspace{0.05in} +} + +% new command: VIRTIO type attribute value +\newcommand{\virtioattributevalue}[2] +{ +\vspace{0.05in} +\noindent +\texttt{\textbf{#1}}\\ +\indent +Value: #2 +\vspace{0.05in} +} + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%%% VIRTIO properties +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +% new environment: VIRTIO propery +\newenvironment{virtioproperty}[2] +{ +\vspace{0.05in} +\begin{tabular}{p{0.1in}p{2.3in}p{3.5in}} +\multicolumn{2}{p{1.8in}}{\textbf{\texttt{#1}}} & #2\vspace{0.05in}\\ +} +{ +\end{tabular}\\ +\vspace{0.05in}\\ +} + +% new command: VIRTIO property detail +\newcommand{\virtiopropertydetail}[2]{& #1: & #2\\} + +% new command: VIRTIO property comment +\newcommand{\virtiopropertycomment}[1]{\\&\multicolumn{2}{p{5.7in}}{#1}\\} + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%%% VIRTIO allowable actions +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +% new environment: VIRTIO allowable action +\newenvironment{virtioallowableaction}[1] +{ +\vspace{0.05in} +\noindent +\textbf{#1} +\vspace{0.05in}\\ +\begin{tabular}{p{0.1in}p{1in}p{4.8in}} +} +{ +\end{tabular}\\ +\vspace{0.05in}\\ +} + +% new command: VIRTIO allowable action detail +\newcommand{\virtioallowableactiondetail}[2]{& #1: & #2\\} + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%%% VIRTIO version properties +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +% new environment: VIRTIO version property +\newenvironment{virtioversionproperty}[2] +{ +\vspace{0.05in} +\begin{tabular}{p{3in}l} +\texttt{\textbf{#1}} & #2\\ +\end{tabular}\\\ +\begin{tabular}{p{0.3in}p{5.7in}} +&} +{\\ +\end{tabular}\\ +\vspace{0.05in} +} + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%%% VIRTIO general description +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +% new environment: VIRTIO description +\newenvironment{virtiodescription}[1] +{ +\vspace{0.05in} +\noindent +\begin{tabular}{p{0.1in}p{5.9in}} +\multicolumn{2}{l}{\textbf{#1:}}\\ +&\setlength{\topsep}{0in}\setlength{\parsep}{0in}\setlength{\partopsep}{0in} } +{\\ +\end{tabular}\\ +\vspace{0.05in} +} + + +\newenvironment{virtioexample}[1] +{ +\vspace{0.2in} +\begin{shaded} +\textbf{Example:}\\ +Request: \texttt{#1-request.log}\\ +Response: \texttt{#1-response.log} +\end{shaded} +\vspace{0.2in} +} + diff --git a/commands.tex b/commands.tex new file mode 100644 index 0000000..871f416 --- /dev/null +++ b/commands.tex @@ -0,0 +1,17 @@ +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Commands for the VIRTIO specification document +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +\definecolor{oasis1}{RGB}{85,38,129} +\definecolor{oasis2}{RGB}{227,175,27} +\definecolor{shadecolor}{RGB}{230,230,230} + +% How we format a field name +\newcommand{\field}[1]{\emph{#1}} + +% Mark a normative section (driver or device) +\newcommand{\drivernormative}[3]{#1{Driver Requirements: #2}\label{drivernormative:#3}} +\newcommand{\devicenormative}[3]{#1{Device Requirements: #2}\label{devicenormative:#3}} +\providecommand{\DIFaddtextcstwo}[1]{#1} +\providecommand{\DIFdeltextcstwo}[1]{} +\newenvironment{DIFnomarkup}{}{} diff --git a/conformance.tex b/conformance.tex new file mode 100644 index 0000000..f59e360 --- /dev/null +++ b/conformance.tex @@ -0,0 +1,330 @@ +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Conformance +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\chapter{Conformance} + +This chapter lists the conformance targets and clauses for each; this +also forms a useful checklist which authors are asked to consult for their +implementations! + +\section{Conformance Targets}\label{sec:Conformance / Conformance Targets} + +Conformance targets: +\begin{description} +\item[Driver] A driver MUST conform to three conformance clauses: + \begin{itemize} + \item Clause \ref{sec:Conformance / Driver Conformance}, + \item One of clauses \ref{sec:Conformance / Driver Conformance / PCI Driver Conformance}, \ref{sec:Conformance / Driver Conformance / MMIO Driver Conformance} or \ref{sec:Conformance / Driver Conformance / Channel I/O Driver Conformance}. + \item One of clauses \ref{sec:Conformance / Driver Conformance / Network Driver Conformance}, \ref{sec:Conformance / Driver Conformance / Block Driver Conformance}, \ref{sec:Conformance / Driver Conformance / Console Driver Conformance}, \ref{sec:Conformance / Driver Conformance / Entropy Driver Conformance}, \ref{sec:Conformance / Driver Conformance / Traditional Memory Balloon Driver Conformance} or \ref{sec:Conformance / Driver Conformance / SCSI Host Driver Conformance}. + \end{itemize} +\item[Device] A device MUST conform to three conformance clauses: + \begin{itemize} + \item Clause \ref{sec:Conformance / Device Conformance}, + \item One of clauses \ref{sec:Conformance / Device Conformance / PCI Device Conformance}, \ref{sec:Conformance / Device Conformance / MMIO Device Conformance} or \ref{sec:Conformance / Device Conformance / Channel I/O Device Conformance}. + \item One of clauses \ref{sec:Conformance / Device Conformance / Network Device Conformance}, \ref{sec:Conformance / Device Conformance / Block Device Conformance}, \ref{sec:Conformance / Device Conformance / Console Device Conformance}, \ref{sec:Conformance / Device Conformance / Entropy Device Conformance}, \ref{sec:Conformance / Device Conformance / Traditional Memory Balloon Device Conformance} or \ref{sec:Conformance / Device Conformance / SCSI Host Device Conformance}. + \end{itemize} +\end{description} + +\section{Driver Conformance}\label{sec:Conformance / Driver Conformance} + +A driver MUST conform to the following normative statements: + +\begin{itemize} +\item \ref{drivernormative:Basic Facilities of a Virtio Device / Device Status Field} +\item \ref{drivernormative:Basic Facilities of a Virtio Device / Feature Bits} +\item \ref{drivernormative:Basic Facilities of a Virtio Device / Device Configuration Space} +\item \ref{drivernormative:Basic Facilities of a Virtio Device / Virtqueues} +\item \ref{drivernormative:Basic Facilities of a Virtio Device / Message Framing} +\item \ref{drivernormative:Basic Facilities of a Virtio Device / Virtqueues / The Virtqueue Descriptor Table} +\item \ref{drivernormative:Basic Facilities of a Virtio Device / Virtqueues / The Virtqueue Descriptor Table / Indirect Descriptors} +\item \ref{drivernormative:Basic Facilities of a Virtio Device / Virtqueues / Virtqueue Interrupt Suppression} +\item \ref{drivernormative:Basic Facilities of a Virtio Device / Virtqueues / The Virtqueue Used Ring} +\item \ref{drivernormative:Basic Facilities of a Virtio Device / Virtqueues / Virtqueue Notification Suppression} +\item \ref{drivernormative:General Initialization And Device Operation / Device Initialization} +\item \ref{drivernormative:General Initialization And Device Operation / Device Operation / Supplying Buffers to The Device / Updating idx} +\item \ref{drivernormative:General Initialization And Device Operation / Device Operation / Supplying Buffers to The Device / Notifying The Device} +\item \ref{drivernormative:General Initialization And Device Operation / Device Cleanup} +\item \ref{drivernormative:Reserved Feature Bits} +\end{itemize} + +\subsection{PCI Driver Conformance}\label{sec:Conformance / Driver Conformance / PCI Driver Conformance} + +A PCI driver MUST conform to the following normative statements: + +\begin{itemize} +\item \ref{drivernormative:Virtio Transport Options / Virtio Over PCI Bus / PCI Device Discovery} +\item \ref{drivernormative:Virtio Transport Options / Virtio Over PCI Bus / PCI Device Layout} +\item \ref{drivernormative:Virtio Transport Options / Virtio Over PCI Bus / Virtio Structure PCI Capabilities} +\item \ref{drivernormative:Virtio Transport Options / Virtio Over PCI Bus / PCI Device Layout / Common configuration structure layout} +\item \ref{drivernormative:Virtio Transport Options / Virtio Over PCI Bus / PCI Device Layout / ISR status capability} +\item \ref{drivernormative:Virtio Transport Options / Virtio Over PCI Bus / PCI Device Layout / PCI configuration access capability} +\item \ref{drivernormative:Virtio Transport Options / Virtio Over PCI Bus / PCI-specific Initialization And Device Operation / Device Initialization / MSI-X Vector Configuration} +\item \ref{drivernormative:Virtio Transport Options / Virtio Over PCI Bus / PCI-specific Initialization And Device Operation / Notification of Device Configuration Changes} +\end{itemize} + +\subsection{MMIO Driver Conformance}\label{sec:Conformance / Driver Conformance / MMIO Driver Conformance} + +An MMIO driver MUST conform to the following normative statements: + +\begin{itemize} +\item \ref{drivernormative:Virtio Transport Options / Virtio Over MMIO / MMIO Device Register Layout} +\item \ref{drivernormative:Virtio Transport Options / Virtio Over MMIO / MMIO-specific Initialization And Device Operation / Device Initialization} +\item \ref{drivernormative:Virtio Transport Options / Virtio Over MMIO / MMIO-specific Initialization And Device Operation / Notifications From The Device} +\end{itemize} + +\subsection{Channel I/O Driver Conformance}\label{sec:Conformance / Driver Conformance / Channel I/O Driver Conformance} + +A Channel I/O driver MUST conform to the following normative statements: + +\begin{itemize} +\item \ref{drivernormative:Virtio Transport Options / Virtio over channel I/O / Basic Concepts} +\item \ref{drivernormative:Virtio Transport Options / Virtio over channel I/O / Device Initialization / Setting the Virtio Revision} +\item \ref{drivernormative:Virtio Transport Options / Virtio over channel I/O / Device Initialization / Communicating Status Information} +\item \ref{drivernormative:Virtio Transport Options / Virtio over channel I/O / Device Operation / Host->Guest Notification / Notification via Adapter I/O Interrupts} +\item \ref{drivernormative:Virtio Transport Options / Virtio over channel I/O / Device Operation / Guest->Host Notification} +\end{itemize} + +\subsection{Network Driver Conformance}\label{sec:Conformance / Driver Conformance / Network Driver Conformance} + +A network driver MUST conform to the following normative statements: + +\begin{itemize} +\item \ref{drivernormative:Device Types / Network Device / Device configuration layout} +\item \ref{drivernormative:Device Types / Network Device / Device Operation / Packet Transmission} +\item \ref{drivernormative:Device Types / Network Device / Device Operation / Setting Up Receive Buffers} +\item \ref{drivernormative:Device Types / Network Device / Device Operation / Processing of Incoming Packets} +\item \ref{drivernormative:Device Types / Network Device / Device Operation / Control Virtqueue / Packet Receive Filtering} +\item \ref{drivernormative:Device Types / Network Device / Device Operation / Control Virtqueue / Setting MAC Address Filtering} +\item \ref{drivernormative:Device Types / Network Device / Device Operation / Control Virtqueue / Gratuitous Packet Sending} +\item \ref{drivernormative:Device Types / Network Device / Device Operation / Control Virtqueue / Automatic receive steering in multiqueue mode} +\item \ref{drivernormative:Device Types / Network Device / Device Operation / Control Virtqueue / Offloads State Configuration / Setting Offloads State} +\end{itemize} + +\subsection{Block Driver Conformance}\label{sec:Conformance / Driver Conformance / Block Driver Conformance} + +A block driver MUST conform to the following normative statements: + +\begin{itemize} +\item \ref{drivernormative:Device Types / Block Device / Device Initialization} +\item \ref{drivernormative:Device Types / Block Device / Device Operation} +\end{itemize} + +\subsection{Console Driver Conformance}\label{sec:Conformance / Driver Conformance / Console Driver Conformance} + +A console driver MUST conform to the following normative statements: + +\begin{itemize} +\item \ref{drivernormative:Device Types / Console Device / Device Operation} +\item \ref{drivernormative:Device Types / Console Device / Device Operation / Multiport Device Operation} +\end{itemize} + +\subsection{Entropy Driver Conformance}\label{sec:Conformance / Driver Conformance / Entropy Driver Conformance} + +An entropy driver MUST conform to the following normative statements: + +\begin{itemize} +\item \ref{drivernormative:Device Types / Entropy Device / Device Operation} +\end{itemize} + +\subsection{Traditional Memory Balloon Driver Conformance}\label{sec:Conformance / Driver Conformance / Traditional Memory Balloon Driver Conformance} + +A traditional memory balloon driver MUST conform to the following normative statements: + +\begin{itemize} +\item \ref{drivernormative:Device Types / Memory Balloon Device / Feature bits} +\item \ref{drivernormative:Device Types / Memory Balloon Device / Device Operation} +\item \ref{drivernormative:Device Types / Memory Balloon Device / Device Operation / Memory Statistics} +\end{itemize} + +\subsection{SCSI Host Driver Conformance}\label{sec:Conformance / Driver Conformance / SCSI Host Driver Conformance} + +An SCSI host driver MUST conform to the following normative statements: + +\begin{itemize} +\item \ref{drivernormative:Device Types / SCSI Host Device / Device configuration layout} +\item \ref{drivernormative:Device Types / SCSI Host Device / Device Operation / Device Operation: Request Queues} +\item \ref{drivernormative:Device Types / SCSI Host Device / Device Operation / Device Operation: eventq} +\end{itemize} + +\section{Device Conformance}\label{sec:Conformance / Device Conformance} + +A device MUST conform to the following normative statements: + +\begin{itemize} +\item \ref{devicenormative:Basic Facilities of a Virtio Device / Device Status Field} +\item \ref{devicenormative:Basic Facilities of a Virtio Device / Feature Bits} +\item \ref{devicenormative:Basic Facilities of a Virtio Device / Device Configuration Space} +\item \ref{devicenormative:Basic Facilities of a Virtio Device / Message Framing} +\item \ref{devicenormative:Basic Facilities of a Virtio Device / Virtqueues / The Virtqueue Descriptor Table} +\item \ref{devicenormative:Basic Facilities of a Virtio Device / Virtqueues / The Virtqueue Descriptor Table / Indirect Descriptors} +\item \ref{devicenormative:Basic Facilities of a Virtio Device / Virtqueues / Virtqueue Interrupt Suppression} +\item \ref{devicenormative:Basic Facilities of a Virtio Device / Virtqueues / The Virtqueue Used Ring} +\item \ref{devicenormative:Basic Facilities of a Virtio Device / Virtqueues / Virtqueue Notification Suppression} +\item \ref{devicenormative:Reserved Feature Bits} +\end{itemize} + +\subsection{PCI Device Conformance}\label{sec:Conformance / Device Conformance / PCI Device Conformance} + +A PCI device MUST conform to the following normative statements: + +\begin{itemize} +\item \ref{devicenormative:Virtio Transport Options / Virtio Over PCI Bus} +\item \ref{devicenormative:Virtio Transport Options / Virtio Over PCI Bus / PCI Device Discovery} +\item \ref{devicenormative:Virtio Transport Options / Virtio Over PCI Bus / PCI Device Layout} +\item \ref{devicenormative:Virtio Transport Options / Virtio Over PCI Bus / Virtio Structure PCI Capabilities} +\item \ref{devicenormative:Virtio Transport Options / Virtio Over PCI Bus / PCI Device Layout / Common configuration structure layout} +\item \ref{devicenormative:Virtio Transport Options / Virtio Over PCI Bus / PCI Device Layout / Notification capability} +\item \ref{devicenormative:Virtio Transport Options / Virtio Over PCI Bus / PCI Device Layout / ISR status capability} +\item \ref{devicenormative:Virtio Transport Options / Virtio Over PCI Bus / PCI Device Layout / Device-specific configuration} +\item \ref{devicenormative:Virtio Transport Options / Virtio Over PCI Bus / PCI Device Layout / PCI configuration access capability} +\item \ref{devicenormative:Virtio Transport Options / Virtio Over PCI Bus / PCI-specific Initialization And Device Operation / Device Initialization / Non-transitional Device With Legacy Driver} +\item \ref{devicenormative:Virtio Transport Options / Virtio Over PCI Bus / PCI-specific Initialization And Device Operation / Device Initialization / MSI-X Vector Configuration} +\item \ref{devicenormative:Virtio Transport Options / Virtio Over PCI Bus / PCI-specific Initialization And Device Operation / Virtqueue Interrupts From The Device} +\item \ref{devicenormative:Virtio Transport Options / Virtio Over PCI Bus / PCI-specific Initialization And Device Operation / Notification of Device Configuration Changes} +\end{itemize} + +\subsection{MMIO Device Conformance}\label{sec:Conformance / Device Conformance / MMIO Device Conformance} + +An MMIO device MUST conform to the following normative statements: + +\begin{itemize} +\item \ref{devicenormative:Virtio Transport Options / Virtio Over MMIO / MMIO Device Register Layout} +\end{itemize} + +\subsection{Channel I/O Device Conformance}\label{sec:Conformance / Device Conformance / Channel I/O Device Conformance} + +A Channel I/O device MUST conform to the following normative statements: + +\begin{itemize} +\item \ref{devicenormative:Virtio Transport Options / Virtio over channel I/O / Basic Concepts} +\item \ref{devicenormative:Virtio Transport Options / Virtio over channel I/O / Device Initialization / Setting the Virtio Revision} +\item \ref{devicenormative:Virtio Transport Options / Virtio over channel I/O / Device Initialization / Configuring a Virtqueue} +\item \ref{devicenormative:Virtio Transport Options / Virtio over channel I/O / Device Initialization / Communicating Status Information} +\item \ref{devicenormative:Virtio Transport Options / Virtio over channel I/O / Device Initialization / Setting Up Indicators / Setting Up Two-Stage Queue Indicators} +\item \ref{devicenormative:Virtio Transport Options / Virtio over channel I/O / Device Operation / Host->Guest Notification / Notification via Adapter I/O Interrupts} +\item \ref{devicenormative:Virtio Transport Options / Virtio over channel I/O / Device Operation / Guest->Host Notification} +\end{itemize} + +\subsection{Network Device Conformance}\label{sec:Conformance / Device Conformance / Network Device Conformance} + +A network device MUST conform to the following normative statements: + +\begin{itemize} +\item \ref{devicenormative:Device Types / Network Device / Device configuration layout} +\item \ref{devicenormative:Device Types / Network Device / Device Operation / Packet Transmission} +\item \ref{devicenormative:Device Types / Network Device / Device Operation / Setting Up Receive Buffers} +\item \ref{devicenormative:Device Types / Network Device / Device Operation / Processing of Incoming Packets} +\item \ref{devicenormative:Device Types / Network Device / Device Operation / Control Virtqueue / Packet Receive Filtering} +\item \ref{devicenormative:Device Types / Network Device / Device Operation / Control Virtqueue / Setting MAC Address Filtering} +\item \ref{devicenormative:Device Types / Network Device / Device Operation / Control Virtqueue / Gratuitous Packet Sending} +\item \ref{devicenormative:Device Types / Network Device / Device Operation / Control Virtqueue / Automatic receive steering in multiqueue mode} +\end{itemize} + +\subsection{Block Device Conformance}\label{sec:Conformance / Device Conformance / Block Device Conformance} + +A block device MUST conform to the following normative statements: + +\begin{itemize} +\item \ref{devicenormative:Device Types / Block Device / Device Initialization} +\item \ref{devicenormative:Device Types / Block Device / Device Operation} +\end{itemize} + +\subsection{Console Device Conformance}\label{sec:Conformance / Device Conformance / Console Device Conformance} + +A console device MUST conform to the following normative statements: + +\begin{itemize} +\item \ref{devicenormative:Device Types / Console Device / Device Initialization} +\item \ref{devicenormative:Device Types / Console Device / Device Operation / Multiport Device Operation} +\end{itemize} + +\subsection{Entropy Device Conformance}\label{sec:Conformance / Device Conformance / Entropy Device Conformance} + +An entropy device MUST conform to the following normative statements: + +\begin{itemize} +\item \ref{devicenormative:Device Types / Entropy Device / Device Operation} +\end{itemize} + +\subsection{Traditional Memory Balloon Device Conformance}\label{sec:Conformance / Device Conformance / Traditional Memory Balloon Device Conformance} + +A traditional memory balloon device MUST conform to the following normative statements: + +\begin{itemize} +\item \ref{devicenormative:Device Types / Memory Balloon Device / Feature bits} +\item \ref{devicenormative:Device Types / Memory Balloon Device / Device Operation} +\item \ref{devicenormative:Device Types / Memory Balloon Device / Device Operation / Memory Statistics} +\end{itemize} + +\subsection{SCSI Host Device Conformance}\label{sec:Conformance / Device Conformance / SCSI Host Device Conformance} + +An SCSI host device MUST conform to the following normative statements: + +\begin{itemize} +\item \ref{devicenormative:Device Types / SCSI Host Device / Device configuration layout} +\item \ref{devicenormative:Device Types / SCSI Host Device / Device Initialization} +\item \ref{devicenormative:Device Types / SCSI Host Device / Device Operation / Device Operation: Request Queues} +\item \ref{devicenormative:Device Types / SCSI Host Device / Device Operation / Device Operation: eventq} +\end{itemize} + +\section{Legacy Interface: Transitional Device and +Transitional Driver Conformance}\label{sec:Conformance / Legacy +Interface: Transitional Device and +Transitional Driver Conformance} +A conformant implementation MUST be either transitional or +non-transitional, see \ref{intro:Legacy +Interface: Terminology}. + +A non-transitional implementation conforms to this specification +if it satisfies all of the MUST or REQUIRED level requirements +defined above. + +An implementation MAY choose to implement OPTIONAL support for the +legacy interface, including support for legacy drivers +or devices, by additionally conforming to all of the MUST or +REQUIRED level requirements for the legacy interface +for the transitional devices and drivers. + +The requirements for the legacy interface for transitional implementations +are located in sections named ``Legacy Interface'' listed below: +\begin{itemize} +\item Section \ref{sec:Basic Facilities of a Virtio Device / +Feature Bits / Legacy Interface: A Note on Feature Bits} +\item Section \ref{sec:Basic Facilities of a Virtio Device / Device Configuration Space / Legacy Interface: A Note on Configuration Space endian-ness} +\item Section \ref{sec:Basic Facilities of a Virtio Device / Device Configuration Space / Legacy Interface: Device Configuration Space} +\item Section \ref{sec:Basic Facilities of a Virtio Device / Virtqueues / Legacy Interfaces: A Note on Virtqueue Layout} +\item Section \ref{sec:Basic Facilities of a Virtio Device / Virtqueues / Legacy Interfaces: A Note on Virtqueue Endianness} +\item Section \ref{sec:Basic Facilities of a Virtio Device / Virtqueues / Message Framing / Legacy Interface: Message Framing} +\item Section \ref{sec:General Initialization And Device Operation / Device Initialization / Legacy Interface: Device Initialization} +\item Section \ref{sec:Virtio Transport Options / Virtio Over PCI Bus / PCI Device Discovery / Legacy Interfaces: A Note on PCI Device Discovery} +\item Section \ref{sec:Virtio Transport Options / Virtio Over PCI Bus / PCI Device Layout / Legacy Interfaces: A Note on PCI Device Layout} +\item Section \ref{sec:Virtio Transport Options / Virtio Over PCI Bus / PCI-specific Initialization And Device Operation / Device Initialization / Virtio Device Configuration Layout Detection / Legacy Interface: A Note on Device Layout Detection} +\item Section \ref{sec:Virtio Transport Options / Virtio Over PCI Bus / PCI-specific Initialization And Device Operation / Device Initialization / Virtqueue Configuration / Legacy Interface: A Note on Virtqueue Configuration} +\item Section \ref{sec:Virtio Transport Options / Virtio Over MMIO / Legacy interface} +\item Section \ref{sec:Virtio Transport Options / Virtio over channel I/O / Device Initialization / Setting the Virtio Revision / Legacy Interfaces: A Note on Setting the Virtio Revision} +\item Section \ref{sec:Virtio Transport Options / Virtio over channel I/O / Device Initialization / Configuring a Virtqueue / Legacy Interface: A Note on Configuring a Virtqueue} +\item Section \ref{sec:Virtio Transport Options / Virtio over channel I/O / Device Operation / Host->Guest Notification / Legacy Interfaces: A Note on Host->Guest Notification} +\item Section \ref{sec:Virtio Transport Options / Virtio over channel I/O / Device Initialization / Setting Up Indicators / Legacy Interfaces: A Note on Setting Up Indicators} +\item Section \ref{sec:Device Types / Network Device / Feature bits / Legacy Interface: Feature bits} +\item Section \ref{sec:Device Types / Network Device / Device configuration layout / Legacy Interface: Device configuration layout} +\item Section \ref{sec:Device Types / Network Device / Device Operation / Legacy Interface: Device Operation} +\item Section \ref{sec:Device Types / Network Device / Device Operation / Control Virtqueue / Setting MAC Address Filtering / Legacy Interface: Setting MAC Address Filtering} +\item Section \ref{sec:Device Types / Network Device / Device Operation / Control Virtqueue / VLAN Filtering / Legacy Interface: VLAN Filtering} +\item Section \ref{sec:Device Types / Network Device / Device Operation / Control Virtqueue / Automatic receive steering in multiqueue mode / Legacy Interface: Automatic receive steering in multiqueue mode} +\item Section \ref{sec:Device Types / Network Device / Device Operation / Control Virtqueue / Offloads State Configuration / Setting Offloads State / Legacy Interface: Setting Offloads State} +\item Section \ref{sec:Device Types / Block Device / Feature bits / Legacy Interface: Feature bits} +\item Section \ref{sec:Device Types / Block Device / Device configuration layout / Legacy Interface: Device configuration layout} +\item Section \ref{sec:Device Types / Block Device / Device Initialization / Legacy Interface: Device Initialization} +\item Section \ref{sec:Device Types / Block Device / Device Operation / Legacy Interface: Device Operation} +\item Section \ref{sec:Device Types / Console Device / Device configuration layout / Legacy Interface: Device configuration layout} +\item Section \ref{sec:Device Types / Console Device / Device Operation / Legacy Interface: Device Operation} +\item Section \ref{sec:Device Types / Memory Balloon Device / Feature bits / Legacy Interface: Feature bits} +\item Section \ref{sec:Device Types / Memory Balloon Device / Device Operation / Legacy Interface: Device Operation} +\item Section \ref{sec:Device Types / Memory Balloon Device / Device Operation / Memory Statistics / Legacy Interface: Memory Statistics} +\item Section \ref{sec:Device Types / SCSI Host Device / Device configuration layout / Legacy Interface: Device configuration layout} +\item Section \ref{sec:Device Types / SCSI Host Device / Device Operation / Legacy Interface: Device Operation} +\item Section \ref{sec:Device Types / SCSI Host Device / Device Operation / Device Operation: Request Queues / Legacy Interface: Device Operation: Request Queues} +\item Section \ref{sec:Device Types / SCSI Host Device / Device Operation / Device Operation: controlq / Legacy Interface: Device Operation: controlq} +\item Section \ref{sec:Device Types / SCSI Host Device / Device Operation / Device Operation: eventq / Legacy Interface: Device Operation: eventq} +\item Section \ref{sec:Reserved Feature Bits / Legacy Interface: Reserved Feature Bits} +\end{itemize} diff --git a/content.tex b/content.tex new file mode 100644 index 0000000..e57ebc5 --- /dev/null +++ b/content.tex @@ -0,0 +1,5887 @@ +\chapter{Basic Facilities of a Virtio Device}\label{sec:Basic Facilities of a Virtio Device} + +A virtio device is discovered and identified by a bus-specific method +(see the bus specific sections: \ref{sec:Virtio Transport Options / Virtio Over PCI Bus}~\nameref{sec:Virtio Transport Options / Virtio Over PCI Bus}, +\ref{sec:Virtio Transport Options / Virtio Over MMIO}~\nameref{sec:Virtio Transport Options / Virtio Over MMIO} and \ref{sec:Virtio Transport Options / Virtio Over Channel I/O}~\nameref{sec:Virtio Transport Options / Virtio Over Channel I/O}). Each +device consists of the following parts: + +\begin{itemize} +\item Device status field +\item Feature bits +\item Device Configuration space +\item One or more virtqueues +\end{itemize} + +\section{\field{Device Status} Field}\label{sec:Basic Facilities of a Virtio Device / Device Status Field} +During device initialization by a driver, +the driver follows the sequence of steps specified in +\ref{sec:General Initialization And Device Operation / Device +Initialization}. + +The \field{device status} field provides a simple low-level +indication of the completed steps of this sequence. +It's most useful to imagine it hooked up to traffic +lights on the console indicating the status of each device. The +following bits are defined (listed below in the order in which +they would be typically set): +\begin{description} +\item[ACKNOWLEDGE (1)] Indicates that the guest OS has found the + device and recognized it as a valid virtio device. + +\item[DRIVER (2)] Indicates that the guest OS knows how to drive the + device. + \begin{note} + There could be a significant (or infinite) delay before setting + this bit. For example, under Linux, drivers can be loadable modules. + \end{note} + +\item[FAILED (128)] Indicates that something went wrong in the guest, + and it has given up on the device. This could be an internal + error, or the driver didn't like the device for some reason, or + even a fatal error during device operation. + +\item[FEATURES_OK (8)] Indicates that the driver has acknowledged all the + features it understands, and feature negotiation is complete. + +\item[DRIVER_OK (4)] Indicates that the driver is set up and ready to + drive the device. + +\item[DEVICE_NEEDS_RESET (64)] Indicates that the device has experienced + an error from which it can't recover. +\end{description} + +\drivernormative{\subsection}{Device Status Field}{Basic Facilities of a Virtio Device / Device Status Field} +The driver MUST update \field{device status}, +setting bits to indicate the completed steps of the driver +initialization sequence specified in +\ref{sec:General Initialization And Device Operation / Device +Initialization}. +The driver MUST NOT clear a +\field{device status} bit. If the driver sets the FAILED bit, +the driver MUST later reset the device before attempting to re-initialize. + +The driver SHOULD NOT rely on completion of operations of a +device if DEVICE_NEEDS_RESET is set. +\begin{note} +For example, the driver can't assume requests in flight will be +completed if DEVICE_NEEDS_RESET is set, nor can it assume that +they have not been completed. A good implementation will try to +recover by issuing a reset. +\end{note} + +\devicenormative{\subsection}{Device Status Field}{Basic Facilities of a Virtio Device / Device Status Field} +The device MUST initialize \field{device status} to 0 upon reset. + +The device MUST NOT consume buffers or notify the driver before DRIVER_OK. + +\label{sec:Basic Facilities of a Virtio Device / Device Status Field / DEVICENEEDSRESET}The device SHOULD set DEVICE_NEEDS_RESET when it enters an error state +that a reset is needed. If DRIVER_OK is set, after it sets DEVICE_NEEDS_RESET, the device +MUST send a device configuration change notification to the driver. + +\section{Feature Bits}\label{sec:Basic Facilities of a Virtio Device / Feature Bits} + +Each virtio device offers all the features it understands. During +device initialization, the driver reads this and tells the device the +subset that it accepts. The only way to renegotiate is to reset +the device. + +This allows for forwards and backwards compatibility: if the device is +enhanced with a new feature bit, older drivers will not write that +feature bit back to the device. Similarly, if a driver is enhanced with a feature +that the device doesn't support, it see the new feature is not offered. + +Feature bits are allocated as follows: + +\begin{description} +\item[0 to 23] Feature bits for the specific device type + +\item[24 to 33] Feature bits reserved for extensions to the queue and + feature negotiation mechanisms + +\item[34 and above] Feature bits reserved for future extensions. +\end{description} + +\begin{note} +For example, feature bit 0 for a network device (i.e. +Device ID 1) indicates that the device supports checksumming of +packets. +\end{note} + +In particular, new fields in the device configuration space are +indicated by offering a new feature bit. + +\drivernormative{\subsection}{Feature Bits}{Basic Facilities of a Virtio Device / Feature Bits} +The driver MUST NOT accept a feature which the device did not offer, +and MUST NOT accept a feature which requires another feature which was +not accepted. + +The driver SHOULD go into backwards compatibility mode +if the device does not offer a feature it understands, otherwise MUST +set the FAILED \field{device status} bit and cease initialization. + +\devicenormative{\subsection}{Feature Bits}{Basic Facilities of a Virtio Device / Feature Bits} +The device MUST NOT offer a feature which requires another feature +which was not offered. The device SHOULD accept any valid subset +of features the driver accepts, otherwise it MUST fail to set the +FEATURES_OK \field{device status} bit when the driver writes it. + +\subsection{Legacy Interface: A Note on Feature +Bits}\label{sec:Basic Facilities of a Virtio Device / Feature +Bits / Legacy Interface: A Note on Feature Bits} + +Transitional Drivers MUST detect Legacy Devices by detecting that +the feature bit VIRTIO_F_VERSION_1 is not offered. +Transitional devices MUST detect Legacy drivers by detecting that +VIRTIO_F_VERSION_1 has not been acknowledged by the driver. + +In this case device is used through the legacy interface. + +Legacy interface support is OPTIONAL. +Thus, both transitional and non-transitional devices and +drivers are compliant with this specification. + +Requirements pertaining to transitional devices and drivers +is contained in sections named 'Legacy Interface' like this one. + +When device is used through the legacy interface, transitional +devices and transitional drivers MUST operate according to the +requirements documented within these legacy interface sections. +Specification text within these sections generally does not apply +to non-transitional devices. + +\section{Device Configuration Space}\label{sec:Basic Facilities of a Virtio Device / Device Configuration Space} + +Device configuration space is generally used for rarely-changing or +initialization-time parameters. Where configuration fields are +optional, their existence is indicated by feature bits: Future +versions of this specification will likely extend the device +configuration space by adding extra fields at the tail. + +\begin{note} +The device configuration space uses the little-endian format +for multi-byte fields. +\end{note} + +Each transport also provides a generation count for the device configuration +space, which will change whenever there is a possibility that two +accesses to the device configuration space can see different versions of that +space. + +\drivernormative{\subsection}{Device Configuration Space}{Basic Facilities of a Virtio Device / Device Configuration Space} +Drivers MUST NOT assume reads from +fields greater than 32 bits wide are atomic, nor are reads from +multiple fields: drivers SHOULD read device configuration space fields like so: + +\begin{lstlisting} +u32 before, after; +do { + before = get_config_generation(device); + // read config entry/entries. + after = get_config_generation(device); +} while (after != before); +\end{lstlisting} + +For optional configuration space fields, the driver MUST check that the +corresponding feature is offered before accessing that part of the configuration +space. +\begin{note} +See section \ref{sec:General Initialization And Device Operation / Device Initialization} for details on feature negotiation. +\end{note} + +Drivers MUST +NOT limit structure size and device configuration space size. Instead, +drivers SHOULD only check that device configuration space is {\em large enough} to +contain the fields necessary for device operation. + +\begin{note} +For example, if the specification states that device configuration +space 'includes a single 8-bit field' drivers should understand this to mean that +the device configuration space might also include an arbitrary amount of +tail padding, and accept any device configuration space size equal to or +greater than the specified 8-bit size. +\end{note} + +\devicenormative{\subsection}{Device Configuration Space}{Basic Facilities of a Virtio Device / Device Configuration Space} +The device MUST allow reading of any device-specific configuration +field before FEATURES_OK is set by the driver. This includes fields which are +conditional on feature bits, as long as those feature bits are offered +by the device. + +\subsection{Legacy Interface: A Note on Device Configuration Space endian-ness}\label{sec:Basic Facilities of a Virtio Device / Device Configuration Space / Legacy Interface: A Note on Configuration Space endian-ness} + +Note that for legacy interfaces, device configuration space is generally the +guest's native endian, rather than PCI's little-endian. +The correct endian-ness is documented for each device. + +\subsection{Legacy Interface: Device Configuration Space}\label{sec:Basic Facilities of a Virtio Device / Device Configuration Space / Legacy Interface: Device Configuration Space} + +Legacy devices did not have a configuration generation field, thus are +susceptible to race conditions if configuration is updated. This +affects the block \field{capacity} (see \ref{sec:Device Types / +Block Device / Device configuration layout}) and +network \field{mac} (see \ref{sec:Device Types / Network Device / +Device configuration layout}) fields; +when using the legacy interface, drivers SHOULD +read these fields multiple times until two reads generate a consistent +result. + +\section{Virtqueues}\label{sec:Basic Facilities of a Virtio Device / Virtqueues} + +The mechanism for bulk data transport on virtio devices is +pretentiously called a virtqueue. Each device can have zero or more +virtqueues\footnote{For example, the simplest network device has one virtqueue for +transmit and one for receive.}. Each queue has a 16-bit queue size +parameter, which sets the number of entries and implies the total size +of the queue. + +Each virtqueue consists of three parts: + +\begin{itemize} +\item Descriptor Table +\item Available Ring +\item Used Ring +\end{itemize} + +where each part is physically-contiguous in guest memory, +and has different alignment requirements. + +The memory aligment and size requirements, in bytes, of each part of the +virtqueue are summarized in the following table: + +\begin{tabular}{|l|l|l|} +\hline +Virtqueue Part & Alignment & Size \\ +\hline \hline +Descriptor Table & 16 & $16 * $(Queue Size) \\ +\hline +Available Ring & 2 & $6 + 2 * $(Queue Size) \\ + \hline +Used Ring & 4 & $6 + 8 * $(Queue Size) \\ + \hline +\end{tabular} + +The Alignment column gives the minimum alignment for each part +of the virtqueue. + +The Size column gives the total number of bytes for each +part of the virtqueue. + +Queue Size corresponds to the maximum number of buffers in the +virtqueue\footnote{For example, if Queue Size is 4 then at most 4 buffers +can be queued at any given time.}. Queue Size value is always a +power of 2. The maximum Queue Size value is 32768. This value +is specified in a bus-specific way. + +When the driver wants to send a buffer to the device, it fills in +a slot in the descriptor table (or chains several together), and +writes the descriptor index into the available ring. It then +notifies the device. When the device has finished a buffer, it +writes the descriptor index into the used ring, and sends an interrupt. + +\drivernormative{\subsection}{Virtqueues}{Basic Facilities of a Virtio Device / Virtqueues} +The driver MUST ensure that the physical address of the first byte +of each virtqueue part is a multiple of the specified alignment value +in the above table. + +\subsection{Legacy Interfaces: A Note on Virtqueue Layout}\label{sec:Basic Facilities of a Virtio Device / Virtqueues / Legacy Interfaces: A Note on Virtqueue Layout} + +For Legacy Interfaces, several additional +restrictions are placed on the virtqueue layout: + +Each virtqueue occupies two or more physically-contiguous pages +(usually defined as 4096 bytes, but depending on the transport; +henceforth referred to as Queue Align) +and consists of three parts: + +\begin{tabular}{|l|l|l|} +\hline +Descriptor Table & Available Ring (\ldots padding\ldots) & Used Ring \\ +\hline +\end{tabular} + +The bus-specific Queue Size field controls the total number of bytes +for the virtqueue. +When using the legacy interface, the transitional +driver MUST retrieve the Queue Size field from the device +and MUST allocate the total number of bytes for the virtqueue +according to the following formula (Queue Align given in qalign and +Queue Size given in qsz): + +\begin{lstlisting} +#define ALIGN(x) (((x) + qalign) & ~qalign) +static inline unsigned virtq_size(unsigned int qsz) +{ + return ALIGN(sizeof(struct virtq_desc)*qsz + sizeof(u16)*(3 + qsz)) + + ALIGN(sizeof(u16)*3 + sizeof(struct virtq_used_elem)*qsz); +} +\end{lstlisting} + +This wastes some space with padding. +When using the legacy interface, both transitional +devices and drivers MUST use the following virtqueue layout +structure to locate elements of the virtqueue: + +\begin{lstlisting} +struct virtq { + // The actual descriptors (16 bytes each) + struct virtq_desc desc[ Queue Size ]; + + // A ring of available descriptor heads with free-running index. + struct virtq_avail avail; + + // Padding to the next Queue Align boundary. + u8 pad[ Padding ]; + + // A ring of used descriptor heads with free-running index. + struct virtq_used used; +}; +\end{lstlisting} + +\subsection{Legacy Interfaces: A Note on Virtqueue Endianness}\label{sec:Basic Facilities of a Virtio Device / Virtqueues / Legacy Interfaces: A Note on Virtqueue Endianness} + +Note that when using the legacy interface, transitional +devices and drivers MUST use the native +endian of the guest as the endian of fields and in the virtqueue. +This is opposed to little-endian for non-legacy interface as +specified by this standard. +It is assumed that the host is already aware of the guest endian. + +\subsection{Message Framing}\label{sec:Basic Facilities of a Virtio Device / Virtqueues / Message Framing} +The framing of messages with descriptors is +independent of the contents of the buffers. For example, a network +transmit buffer consists of a 12 byte header followed by the network +packet. This could be most simply placed in the descriptor table as a +12 byte output descriptor followed by a 1514 byte output descriptor, +but it could also consist of a single 1526 byte output descriptor in +the case where the header and packet are adjacent, or even three or +more descriptors (possibly with loss of efficiency in that case). + +Note that, some device implementations have large-but-reasonable +restrictions on total descriptor size (such as based on IOV_MAX in the +host OS). This has not been a problem in practice: little sympathy +will be given to drivers which create unreasonably-sized descriptors +such as by dividing a network packet into 1500 single-byte +descriptors! + +\devicenormative{\subsubsection}{Message Framing}{Basic Facilities of a Virtio Device / Message Framing} +The device MUST NOT make assumptions about the particular arrangement +of descriptors. The device MAY have a reasonable limit of descriptors +it will allow in a chain. + +\drivernormative{\subsubsection}{Message Framing}{Basic Facilities of a Virtio Device / Message Framing} +The driver MUST place any device-writable descriptor elements after +any device-readable descriptor elements. + +The driver SHOULD NOT use an excessive number of descriptors to +describe a buffer. + +\subsubsection{Legacy Interface: Message Framing}\label{sec:Basic Facilities of a Virtio Device / Virtqueues / Message Framing / Legacy Interface: Message Framing} + +Regrettably, initial driver implementations used simple layouts, and +devices came to rely on it, despite this specification wording. In +addition, the specification for virtio_blk SCSI commands required +intuiting field lengths from frame boundaries (see + \ref{sec:Device Types / Block Device / Device Operation / Legacy Interface: Device Operation}~\nameref{sec:Device Types / Block Device / Device Operation / Legacy Interface: Device Operation}) + +Thus when using the legacy interface, the VIRTIO_F_ANY_LAYOUT +feature indicates to both the device and the driver that no +assumptions were made about framing. Requirements for +transitional drivers when this is not negotiated are included in +each device section. + +\subsection{The Virtqueue Descriptor Table}\label{sec:Basic Facilities of a Virtio Device / Virtqueues / The Virtqueue Descriptor Table} + +The descriptor table refers to the buffers the driver is using for +the device. \field{addr} is a physical address, and the buffers +can be chained via \field{next}. Each descriptor describes a +buffer which is read-only for the device (``device-readable'') or write-only for the device (``device-writable''), but a chain of +descriptors can contain both device-readable and device-writable buffers. + +The actual contents of the memory offered to the device depends on the +device type. Most common is to begin the data with a header +(containing little-endian fields) for the device to read, and postfix +it with a status tailer for the device to write. + +\begin{lstlisting} +struct virtq_desc { + /* Address (guest-physical). */ + le64 addr; + /* Length. */ + le32 len; + +/* This marks a buffer as continuing via the next field. */ +#define VIRTQ_DESC_F_NEXT 1 +/* This marks a buffer as device write-only (otherwise device read-only). */ +#define VIRTQ_DESC_F_WRITE 2 +/* This means the buffer contains a list of buffer descriptors. */ +#define VIRTQ_DESC_F_INDIRECT 4 + /* The flags as indicated above. */ + le16 flags; + /* Next field if flags & NEXT */ + le16 next; +}; +\end{lstlisting} + +The number of descriptors in the table is defined by the queue size +for this virtqueue: this is the maximum possible descriptor chain length. + +\begin{note} +The legacy \hyperref[intro:Virtio PCI Draft]{[Virtio PCI Draft]} +referred to this structure as vring_desc, and the constants as +VRING_DESC_F_NEXT, etc, but the layout and values were identical. +\end{note} + +\devicenormative{\subsubsection}{The Virtqueue Descriptor Table}{Basic Facilities of a Virtio Device / Virtqueues / The Virtqueue Descriptor Table} +A device MUST NOT write to a device-readable buffer, and a device SHOULD NOT +read a device-writable buffer (it MAY do so for debugging or diagnostic +purposes). + +\drivernormative{\subsubsection}{The Virtqueue Descriptor Table}{Basic Facilities of a Virtio Device / Virtqueues / The Virtqueue Descriptor Table} +Drivers MUST NOT add a descriptor chain over than $2^{32}$ bytes long in total; +this implies that loops in the descriptor chain are forbidden! + +\subsubsection{Indirect Descriptors}\label{sec:Basic Facilities of a Virtio Device / Virtqueues / The Virtqueue Descriptor Table / Indirect Descriptors} + +Some devices benefit by concurrently dispatching a large number +of large requests. The VIRTIO_F_INDIRECT_DESC feature allows this (see \ref{sec:virtio-queue.h}~\nameref{sec:virtio-queue.h}). To increase +ring capacity the driver can store a table of indirect +descriptors anywhere in memory, and insert a descriptor in main +virtqueue (with \field{flags}\&VIRTQ_DESC_F_INDIRECT on) that refers to memory buffer +containing this indirect descriptor table; \field{addr} and \field{len} +refer to the indirect table address and length in bytes, +respectively. + +The indirect table layout structure looks like this +(\field{len} is the length of the descriptor that refers to this table, +which is a variable, so this code won't compile): + +\begin{lstlisting} +struct indirect_descriptor_table { + /* The actual descriptors (16 bytes each) */ + struct virtq_desc desc[len / 16]; +}; +\end{lstlisting} + +The first indirect descriptor is located at start of the indirect +descriptor table (index 0), additional indirect descriptors are +chained by \field{next}. An indirect descriptor without a valid \field{next} +(with \field{flags}\&VIRTQ_DESC_F_NEXT off) signals the end of the descriptor. +A single indirect descriptor +table can include both device-readable and device-writable descriptors. + +\drivernormative{\paragraph}{Indirect Descriptors}{Basic Facilities of a Virtio Device / Virtqueues / The Virtqueue Descriptor Table / Indirect Descriptors} +The driver MUST NOT set the VIRTQ_DESC_F_INDIRECT flag unless the +VIRTIO_F_INDIRECT_DESC feature was negotiated. The driver MUST NOT +set the VIRTQ_DESC_F_INDIRECT flag within an indirect descriptor (ie. only +one table per descriptor). + +A driver MUST NOT create a descriptor chain longer than the Queue Size of +the device. + +A driver MUST NOT set both VIRTQ_DESC_F_INDIRECT and VIRTQ_DESC_F_NEXT +in \field{flags}. + +\devicenormative{\paragraph}{Indirect Descriptors}{Basic Facilities of a Virtio Device / Virtqueues / The Virtqueue Descriptor Table / Indirect Descriptors} +The device MUST ignore the write-only flag (\field{flags}\&VIRTQ_DESC_F_WRITE) in the descriptor that refers to an indirect table. + +The device MUST handle the case of zero or more normal chained +descriptors followed by a single descriptor with \field{flags}\&VIRTQ_DESC_F_INDIRECT. + +\begin{note} +While unusual (most implementations either create a chain solely using +non-indirect descriptors, or use a single indirect element), such a +layout is valid. +\end{note} + +\subsection{The Virtqueue Available Ring}\label{sec:Basic Facilities of a Virtio Device / Virtqueues / The Virtqueue Available Ring} + +\begin{lstlisting} +struct virtq_avail { +#define VIRTQ_AVAIL_F_NO_INTERRUPT 1 + le16 flags; + le16 idx; + le16 ring[ /* Queue Size */ ]; + le16 used_event; /* Only if VIRTIO_F_EVENT_IDX */ +}; +\end{lstlisting} + +The driver uses the available ring to offer buffers to the +device: each ring entry refers to the head of a descriptor chain. It is only +written by the driver and read by the device. + +\field{idx} field indicates where the driver would put the next descriptor +entry in the ring (modulo the queue size). This starts at 0, and increases. + +\begin{note} +The legacy \hyperref[intro:Virtio PCI Draft]{[Virtio PCI Draft]} +referred to this structure as vring_avail, and the constant as +VRING_AVAIL_F_NO_INTERRUPT, but the layout and value were identical. +\end{note} + +\subsection{Virtqueue Interrupt Suppression}\label{sec:Basic Facilities of a Virtio Device / Virtqueues / Virtqueue Interrupt Suppression} + +If the VIRTIO_F_EVENT_IDX feature bit is not negotiated, +the \field{flags} field in the available ring offers a crude mechanism for the driver to inform +the device that it doesn't want interrupts when buffers are used. Otherwise +\field{used_event} is a more performant alternative where the driver +specifies how far the device can progress before interrupting. + +Neither of these interrupt suppression methods are reliable, as they +are not synchronized with the device, but they serve as +useful optimizations. + +\drivernormative{\subsubsection}{Virtqueue Interrupt Suppression}{Basic Facilities of a Virtio Device / Virtqueues / Virtqueue Interrupt Suppression} +If the VIRTIO_F_EVENT_IDX feature bit is not negotiated: +\begin{itemize} +\item The driver MUST set \field{flags} to 0 or 1. +\item The driver MAY set \field{flags} to 1 to advise +the device that interrupts are not needed. +\end{itemize} + +Otherwise, if the VIRTIO_F_EVENT_IDX feature bit is negotiated: +\begin{itemize} +\item The driver MUST set \field{flags} to 0. +\item The driver MAY use \field{used_event} to advise the device that interrupts are unnecessary until the device writes entry with an index specified by \field{used_event} into the used ring (equivalently, until \field{idx} in the +used ring will reach the value \field{used_event} + 1). +\end{itemize} + +The driver MUST handle spurious interrupts from the device. + +\devicenormative{\subsubsection}{Virtqueue Interrupt Suppression}{Basic Facilities of a Virtio Device / Virtqueues / Virtqueue Interrupt Suppression} + +If the VIRTIO_F_EVENT_IDX feature bit is not negotiated: +\begin{itemize} +\item The device MUST ignore the \field{used_event} value. +\item After the device writes a descriptor index into the used ring: + \begin{itemize} + \item If \field{flags} is 1, the device SHOULD NOT send an interrupt. + \item If \field{flags} is 0, the device MUST send an interrupt. + \end{itemize} +\end{itemize} + +Otherwise, if the VIRTIO_F_EVENT_IDX feature bit is negotiated: +\begin{itemize} +\item The device MUST ignore the lower bit of \field{flags}. +\item After the device writes a descriptor index into the used ring: + \begin{itemize} + \item If the \field{idx} field in the used ring (which determined + where that descriptor index was placed) was equal to + \field{used_event}, the device MUST send an interrupt. + \item Otherwise the device SHOULD NOT send an interrupt. + \end{itemize} +\end{itemize} + +\begin{note} +For example, if \field{used_event} is 0, then a device using + VIRTIO_F_EVENT_IDX would interrupt after the first buffer is + used (and again after the 65536th buffer, etc). +\end{note} + +\subsection{The Virtqueue Used Ring}\label{sec:Basic Facilities of a Virtio Device / Virtqueues / The Virtqueue Used Ring} + +\begin{lstlisting} +struct virtq_used { +#define VIRTQ_USED_F_NO_NOTIFY 1 + le16 flags; + le16 idx; + struct virtq_used_elem ring[ /* Queue Size */]; + le16 avail_event; /* Only if VIRTIO_F_EVENT_IDX */ +}; + +/* le32 is used here for ids for padding reasons. */ +struct virtq_used_elem { + /* Index of start of used descriptor chain. */ + le32 id; + /* Total length of the descriptor chain which was used (written to) */ + le32 len; +}; +\end{lstlisting} + +The used ring is where the device returns buffers once it is done with +them: it is only written to by the device, and read by the driver. + +Each entry in the ring is a pair: \field{id} indicates the head entry of the +descriptor chain describing the buffer (this matches an entry +placed in the available ring by the guest earlier), and \field{len} the total +of bytes written into the buffer. + +\begin{note} +\field{len} is particularly useful +for drivers using untrusted buffers: if a driver does not know exactly +how much has been written by the device, the driver would have to zero +the buffer in advance to ensure no data leakage occurs. + +For example, a network driver may hand a received buffer directly to +an unprivileged userspace application. If the network device has not +overwritten the bytes which were in that buffer, this could leak the +contents of freed memory from other processes to the application. +\end{note} + +\field{idx} field indicates where the driver would put the next descriptor +entry in the ring (modulo the queue size). This starts at 0, and increases. + +\begin{note} +The legacy \hyperref[intro:Virtio PCI Draft]{[Virtio PCI Draft]} +referred to these structures as vring_used and vring_used_elem, and +the constant as VRING_USED_F_NO_NOTIFY, but the layout and value were +identical. +\end{note} + +\subsubsection{Legacy Interface: The Virtqueue Used +Ring}\label{sec:Basic Facilities of a Virtio Device / Virtqueues +/ The Virtqueue Used Ring/ Legacy Interface: The Virtqueue Used +Ring} + +Historically, many drivers ignored the \field{len} value, as a +result, many devices set \field{len} incorrectly. Thus, when +using the legacy interface, it is generally a good idea to ignore +the \field{len} value in used ring entries if possible. Specific +known issues are listed per device type. + +\devicenormative{\subsubsection}{The Virtqueue Used Ring}{Basic Facilities of a Virtio Device / Virtqueues / The Virtqueue Used Ring} + +The device MUST set \field{len} prior to updating the used \field{idx}. + +The device MUST write at least \field{len} bytes to descriptor, +beginning at the first device-writable buffer, +prior to updating the used \field{idx}. + +The device MAY write more than \field{len} bytes to descriptor. + +\begin{note} +There are potential error cases where a device might not know what +parts of the buffers have been written. This is why \field{len} is +permitted to be an underestimate: that's preferable to the driver believing +that uninitialized memory has been overwritten when it has not. +\end{note} + +\drivernormative{\subsubsection}{The Virtqueue Used Ring}{Basic Facilities of a Virtio Device / Virtqueues / The Virtqueue Used Ring} + +The driver MUST NOT make assumptions about data in device-writable buffers +beyond the first \field{len} bytes, and SHOULD ignore this data. + +\subsection{Virtqueue Notification Suppression}\label{sec:Basic Facilities of a Virtio Device / Virtqueues / Virtqueue Notification Suppression} + +The device can suppress notifications in a manner analogous to the way +drivers can suppress interrupts as detailed in section \ref{sec:Basic Facilities of a Virtio Device / Virtqueues / Virtqueue Interrupt Suppression}. +The device manipulates \field{flags} or \field{avail_event} in the used ring the +same way the driver manipulates \field{flags} or \field{used_event} in the available ring. + +\drivernormative{\subsubsection}{Virtqueue Notification Suppression}{Basic Facilities of a Virtio Device / Virtqueues / Virtqueue Notification Suppression} + +The driver MUST initialize \field{flags} in the used ring to 0 when +allocating the used ring. + +If the VIRTIO_F_EVENT_IDX feature bit is not negotiated: +\begin{itemize} +\item The driver MUST ignore the \field{avail_event} value. +\item After the driver writes a descriptor index into the available ring: + \begin{itemize} + \item If \field{flags} is 1, the driver SHOULD NOT send a notification. + \item If \field{flags} is 0, the driver MUST send a notification. + \end{itemize} +\end{itemize} + +Otherwise, if the VIRTIO_F_EVENT_IDX feature bit is negotiated: +\begin{itemize} +\item The driver MUST ignore the lower bit of \field{flags}. +\item After the driver writes a descriptor index into the available ring: + \begin{itemize} + \item If the \field{idx} field in the available ring (which determined + where that descriptor index was placed) was equal to + \field{avail_event}, the driver MUST send a notification. + \item Otherwise the driver SHOULD NOT send a notification. + \end{itemize} +\end{itemize} + +\devicenormative{\subsubsection}{Virtqueue Notification Suppression}{Basic Facilities of a Virtio Device / Virtqueues / Virtqueue Notification Suppression} +If the VIRTIO_F_EVENT_IDX feature bit is not negotiated: +\begin{itemize} +\item The device MUST set \field{flags} to 0 or 1. +\item The device MAY set \field{flags} to 1 to advise +the driver that notifications are not needed. +\end{itemize} + +Otherwise, if the VIRTIO_F_EVENT_IDX feature bit is negotiated: +\begin{itemize} +\item The device MUST set \field{flags} to 0. +\item The device MAY use \field{avail_event} to advise the driver that notifications are unnecessary until the driver writes entry with an index specified by \field{avail_event} into the available ring (equivalently, until \field{idx} in the +available ring will reach the value \field{avail_event} + 1). +\end{itemize} + +The device MUST handle spurious notifications from the driver. + +\subsection{Helpers for Operating Virtqueues}\label{sec:Basic Facilities of a Virtio Device / Virtqueues / Helpers for Operating Virtqueues} + +The Linux Kernel Source code contains the definitions above and +helper routines in a more usable form, in +include/uapi/linux/virtio_ring.h. This was explicitly licensed by IBM +and Red Hat under the (3-clause) BSD license so that it can be +freely used by all other projects, and is reproduced (with slight +variation) in \ref{sec:virtio-queue.h}~\nameref{sec:virtio-queue.h}. + +\chapter{General Initialization And Device Operation}\label{sec:General Initialization And Device Operation} + +We start with an overview of device initialization, then expand on the +details of the device and how each step is preformed. This section +is best read along with the bus-specific section which describes +how to communicate with the specific device. + +\section{Device Initialization}\label{sec:General Initialization And Device Operation / Device Initialization} + +\drivernormative{\subsection}{Device Initialization}{General Initialization And Device Operation / Device Initialization} +The driver MUST follow this sequence to initialize a device: + +\begin{enumerate} +\item Reset the device. + +\item Set the ACKNOWLEDGE status bit: the guest OS has notice the device. + +\item Set the DRIVER status bit: the guest OS knows how to drive the device. + +\item\label{itm:General Initialization And Device Operation / +Device Initialization / Read feature bits} Read device feature bits, and write the subset of feature bits + understood by the OS and driver to the device. During this step the + driver MAY read (but MUST NOT write) the device-specific configuration fields to check that it can support the device before accepting it. + +\item\label{itm:General Initialization And Device Operation / Device Initialization / Set FEATURES-OK} Set the FEATURES_OK status bit. The driver MUST NOT accept + new feature bits after this step. + +\item\label{itm:General Initialization And Device Operation / Device Initialization / Re-read FEATURES-OK} Re-read \field{device status} to ensure the FEATURES_OK bit is still + set: otherwise, the device does not support our subset of features + and the device is unusable. + +\item\label{itm:General Initialization And Device Operation / Device Initialization / Device-specific Setup} Perform device-specific setup, including discovery of virtqueues for the + device, optional per-bus setup, reading and possibly writing the + device's virtio configuration space, and population of virtqueues. + +\item\label{itm:General Initialization And Device Operation / Device Initialization / Set DRIVER-OK} Set the DRIVER_OK status bit. At this point the device is + ``live''. +\end{enumerate} + +If any of these steps go irrecoverably wrong, the driver SHOULD +set the FAILED status bit to indicate that it has given up on the +device (it can reset the device later to restart if desired). The +driver MUST NOT continue initialization in that case. + +The driver MUST NOT notify the device before setting DRIVER_OK. + +\subsection{Legacy Interface: Device Initialization}\label{sec:General Initialization And Device Operation / Device Initialization / Legacy Interface: Device Initialization} +Legacy devices did not support the FEATURES_OK status bit, and thus did +not have a graceful way for the device to indicate unsupported feature +combinations. They also did not provide a clear mechanism to end +feature negotiation, which meant that devices finalized features on +first-use, and no features could be introduced which radically changed +the initial operation of the device. + +Legacy driver implementations often used the device before setting the +DRIVER_OK bit, and sometimes even before writing the feature bits +to the device. + +The result was the steps \ref{itm:General Initialization And +Device Operation / Device Initialization / Set FEATURES-OK} and +\ref{itm:General Initialization And Device Operation / Device +Initialization / Re-read FEATURES-OK} were omitted, and steps +\ref{itm:General Initialization And Device Operation / +Device Initialization / Read feature bits}, +\ref{itm:General Initialization And Device Operation / Device Initialization / Device-specific Setup} and \ref{itm:General Initialization And Device Operation / Device Initialization / Set DRIVER-OK} +were conflated. + +Therefore, when using the legacy interface: +\begin{itemize} +\item +The transitional driver MUST execute the initialization +sequence as described in \ref{sec:General Initialization And Device +Operation / Device Initialization} +but omitting the steps \ref{itm:General Initialization And Device +Operation / Device Initialization / Set FEATURES-OK} and +\ref{itm:General Initialization And Device Operation / Device +Initialization / Re-read FEATURES-OK}. + +\item +The transitional device MUST support the driver +writing device configuration fields +before the step \ref{itm:General Initialization And Device Operation / +Device Initialization / Read feature bits}. +\item +The transitional device MUST support the driver +using the device before the step \ref{itm:General Initialization +And Device Operation / Device Initialization / Set DRIVER-OK}. +\end{itemize} + +\section{Device Operation}\label{sec:General Initialization And Device Operation / Device Operation} + +There are two parts to device operation: supplying new buffers to +the device, and processing used buffers from the device. + +\begin{note} As an +example, the simplest virtio network device has two virtqueues: the +transmit virtqueue and the receive virtqueue. The driver adds +outgoing (device-readable) packets to the transmit virtqueue, and then +frees them after they are used. Similarly, incoming (device-writable) +buffers are added to the receive virtqueue, and processed after +they are used. +\end{note} + +\subsection{Supplying Buffers to The Device}\label{sec:General Initialization And Device Operation / Device Operation / Supplying Buffers to The Device} + +The driver offers buffers to one of the device's virtqueues as follows: + +\begin{enumerate} +\item\label{itm:General Initialization And Device Operation / Device Operation / Supplying Buffers to The Device / Place Buffers} The driver places the buffer into free descriptor(s) in the + descriptor table, chaining as necessary (see \ref{sec:Basic Facilities of a Virtio Device / Virtqueues / The Virtqueue Descriptor Table}~\nameref{sec:Basic Facilities of a Virtio Device / Virtqueues / The Virtqueue Descriptor Table}). + +\item\label{itm:General Initialization And Device Operation / Device Operation / Supplying Buffers to The Device / Place Index} The driver places the index of the head of the descriptor chain + into the next ring entry of the available ring. + +\item Steps \ref{itm:General Initialization And Device Operation / Device Operation / Supplying Buffers to The Device / Place Buffers} and \ref{itm:General Initialization And Device Operation / Device Operation / Supplying Buffers to The Device / Place Index} MAY be performed repeatedly if batching + is possible. + +\item The driver performs suitable a memory barrier to ensure the device sees + the updated descriptor table and available ring before the next + step. + +\item The available \field{idx} is increased by the number of + descriptor chain heads added to the available ring. + +\item The driver performs a suitable memory barrier to ensure that it updates + the \field{idx} field before checking for notification suppression. + +\item If notifications are not suppressed, the driver notifies the device + of the new available buffers. +\end{enumerate} + +Note that the above code does not take precautions against the +available ring buffer wrapping around: this is not possible since +the ring buffer is the same size as the descriptor table, so step +(1) will prevent such a condition. + +In addition, the maximum queue size is 32768 (the highest power +of 2 which fits in 16 bits), so the 16-bit \field{idx} value can always +distinguish between a full and empty buffer. + +What follows is the requirements of each stage in more detail. + +\subsubsection{Placing Buffers Into The Descriptor Table}\label{sec:General Initialization And Device Operation / Device Operation / Supplying Buffers to The Device / Placing Buffers Into The Descriptor Table} + +A buffer consists of zero or more device-readable physically-contiguous +elements followed by zero or more physically-contiguous +device-writable elements (each has at least one element). This +algorithm maps it into the descriptor table to form a descriptor +chain: + +for each buffer element, b: + +\begin{enumerate} +\item Get the next free descriptor table entry, d +\item Set \field{d.addr} to the physical address of the start of b +\item Set \field{d.len} to the length of b. +\item If b is device-writable, set \field{d.flags} to VIRTQ_DESC_F_WRITE, + otherwise 0. +\item If there is a buffer element after this: + \begin{enumerate} + \item Set \field{d.next} to the index of the next free descriptor + element. + \item Set the VIRTQ_DESC_F_NEXT bit in \field{d.flags}. + \end{enumerate} +\end{enumerate} + +In practice, \field{d.next} is usually used to chain free +descriptors, and a separate count kept to check there are enough +free descriptors before beginning the mappings. + +\subsubsection{Updating The Available Ring}\label{sec:General Initialization And Device Operation / Device Operation / Supplying Buffers to The Device / Updating The Available Ring} + +The descriptor chain head is the first d in the algorithm +above, ie. the index of the descriptor table entry referring to the first +part of the buffer. A naive driver implementation MAY do the following (with the +appropriate conversion to-and-from little-endian assumed): + +\begin{lstlisting} +avail->ring[avail->idx % qsz] = head; +\end{lstlisting} + +However, in general the driver MAY add many descriptor chains before it updates +\field{idx} (at which point they become visible to the +device), so it is common to keep a counter of how many the driver has added: + +\begin{lstlisting} +avail->ring[(avail->idx + added++) % qsz] = head; +\end{lstlisting} + +\subsubsection{Updating \field{idx}}\label{sec:General Initialization And Device Operation / Device Operation / Supplying Buffers to The Device / Updating idx} + +\field{idx} always increments, and wraps naturally at +65536: + +\begin{lstlisting} +avail->idx += added; +\end{lstlisting} + +Once available \field{idx} is updated by the driver, this exposes the +descriptor and its contents. The device MAY +access the descriptor chains the driver created and the +memory they refer to immediately. + +\drivernormative{\paragraph}{Updating idx}{General Initialization And Device Operation / Device Operation / Supplying Buffers to The Device / Updating idx} +The driver MUST perform a suitable memory barrier before the \field{idx} update, to ensure the +device sees the most up-to-date copy. + +\subsubsection{Notifying The Device}\label{sec:General Initialization And Device Operation / Device Operation / Supplying Buffers to The Device / Notifying The Device} + +The actual method of device notification is bus-specific, but generally +it can be expensive. So the device MAY suppress such notifications if it +doesn't need them, as detailed in section \ref{sec:Basic Facilities of a Virtio Device / Virtqueues / Virtqueue Notification Suppression}. + +The driver has to be careful to expose the new \field{idx} +value before checking if notifications are suppressed. + +\drivernormative{\paragraph}{Notifying The Device}{General Initialization And Device Operation / Device Operation / Supplying Buffers to The Device / Notifying The Device} +The driver MUST perform a suitable memory barrier before reading \field{flags} or +\field{avail_event}, to avoid missing a notification. + +\subsection{Receiving Used Buffers From The Device}\label{sec:General Initialization And Device Operation / Device Operation / Receiving Used Buffers From The Device} + +Once the device has used buffers referred to by a descriptor (read from or written to them, or +parts of both, depending on the nature of the virtqueue and the +device), it interrupts the driver as detailed in section \ref{sec:Basic Facilities of a Virtio Device / Virtqueues / Virtqueue Interrupt Suppression}. + +\begin{note} +For optimal performance, a driver MAY disable interrupts while processing +the used ring, but beware the problem of missing interrupts between +emptying the ring and reenabling interrupts. This is usually handled by +re-checking for more used buffers after interrups are re-enabled: + +\begin{lstlisting} +virtq_disable_interrupts(vq); + +for (;;) { + if (vq->last_seen_used != le16_to_cpu(virtq->used.idx)) { + virtq_enable_interrupts(vq); + mb(); + + if (vq->last_seen_used != le16_to_cpu(virtq->used.idx)) + break; + + virtq_disable_interrupts(vq); + } + + struct virtq_used_elem *e = virtq.used->ring[vq->last_seen_used%vsz]; + process_buffer(e); + vq->last_seen_used++; +} +\end{lstlisting} +\end{note} + +\subsection{Notification of Device Configuration Changes}\label{sec:General Initialization And Device Operation / Device Operation / Notification of Device Configuration Changes} + +For devices where the device-specific configuration information can be changed, an +interrupt is delivered when a device-specific configuration change occurs. + +In addition, this interrupt is triggered by the device setting +DEVICE_NEEDS_RESET (see \ref{sec:Basic Facilities of a Virtio Device / Device Status Field / DEVICENEEDSRESET}). + +\section{Device Cleanup}\label{sec:General Initialization And Device Operation / Device Cleanup} + +Once the driver has set the DRIVER_OK status bit, all the configured +virtqueue of the device are considered live. None of the virtqueues +of a device are live once the device has been reset. + +\drivernormative{\subsection}{Device Cleanup}{General Initialization And Device Operation / Device Cleanup} + +A driver MUST NOT alter descriptor table entries which have been +exposed in the available ring (and not marked consumed by the device +in the used ring) of a live virtqueue. + +A driver MUST NOT decrement the available \field{idx} on a live virtqueue (ie. +there is no way to ``unexpose'' buffers). + +Thus a driver MUST ensure a virtqueue isn't live (by device reset) before removing exposed buffers. + +\chapter{Virtio Transport Options}\label{sec:Virtio Transport Options} + +Virtio can use various different buses, thus the standard is split +into virtio general and bus-specific sections. + +\section{Virtio Over PCI Bus}\label{sec:Virtio Transport Options / Virtio Over PCI Bus} + +Virtio devices are commonly implemented as PCI devices. + +A Virtio device can be implemented as any kind of PCI device: +a Conventional PCI device or a PCI Express +device. To assure designs meet the latest level +requirements, see +the PCI-SIG home page at \url{http://www.pcisig.com} for any +approved changes. + +\devicenormative{\subsection}{Virtio Over PCI Bus}{Virtio Transport Options / Virtio Over PCI Bus} +A Virtio device using Virtio Over PCI Bus MUST expose to +guest an interface that meets the specification requirements of +the appropriate PCI specification: \hyperref[intro:PCI]{[PCI]} +and \hyperref[intro:PCIe]{[PCIe]} +respectively. + +\subsection{PCI Device Discovery}\label{sec:Virtio Transport Options / Virtio Over PCI Bus / PCI Device Discovery} + +Any PCI device with PCI Vendor ID 0x1AF4, and PCI Device ID 0x1000 through +0x107F inclusive is a virtio device. The actual value within this range +indicates which virtio device is supported by the device. +The PCI Device ID is calculated by adding 0x1040 to the Virtio Device ID, +as indicated in section \ref{sec:Device Types}. +Additionally, devices MAY utilize a Transitional PCI Device ID range, +0x1000 to 0x103F depending on the device type. + +\devicenormative{\subsubsection}{PCI Device Discovery}{Virtio Transport Options / Virtio Over PCI Bus / PCI Device Discovery} + +Devices MUST have the PCI Vendor ID 0x1AF4. +Devices MUST either have the PCI Device ID calculated by adding 0x1040 +to the Virtio Device ID, as indicated in section \ref{sec:Device +Types} or have the Transitional PCI Device ID depending on the device type, +as follows: + +\begin{tabular}{|l|c|} +\hline +Transitional PCI Device ID & Virtio Device \\ +\hline \hline +0x1000 & network card \\ +\hline +0x1001 & block device \\ +\hline +0x1002 & memory ballooning (traditional) \\ +\hline +0x1003 & console \\ +\hline +0x1004 & SCSI host \\ +\hline +0x1005 & entropy source \\ +\hline +0x1009 & 9P transport \\ +\hline +\end{tabular} + +For example, the network card device with the Virtio Device ID 1 +has the PCI Device ID 0x1041 or the Transitional PCI Device ID 0x1000. + +The PCI Subsystem Vendor ID and the PCI Subsystem Device ID MAY reflect +the PCI Vendor and Device ID of the environment (for informational purposes by the driver). + +Non-transitional devices SHOULD have a PCI Device ID in the range +0x1040 to 0x107f. +Non-transitional devices SHOULD have a PCI Revision ID of 1 or higher. +Non-transitional devices SHOULD have a PCI Subsystem Device ID of 0x40 or higher. + +This is to reduce the chance of a legacy driver attempting +to drive the device. + +\drivernormative{\subsubsection}{PCI Device Discovery}{Virtio Transport Options / Virtio Over PCI Bus / PCI Device Discovery} +Drivers MUST match devices with the PCI Vendor ID 0x1AF4 and +the PCI Device ID in the range 0x1040 to 0x107f, +calculated by adding 0x1040 to the Virtio Device ID, +as indicated in section \ref{sec:Device Types}. +Drivers for device types listed in section \ref{sec:Virtio +Transport Options / Virtio Over PCI Bus / PCI Device Discovery} +MUST match devices with the PCI Vendor ID 0x1AF4 and +the Transitional PCI Device ID indicated in section + \ref{sec:Virtio +Transport Options / Virtio Over PCI Bus / PCI Device Discovery}. + +Drivers MUST match any PCI Revision ID value. +Drivers MAY match any PCI Subsystem Vendor ID and any +PCI Subsystem Device ID value. + +\subsubsection{Legacy Interfaces: A Note on PCI Device Discovery}\label{sec:Virtio Transport Options / Virtio Over PCI Bus / PCI Device Discovery / Legacy Interfaces: A Note on PCI Device Discovery} +Transitional devices MUST have a PCI Revision ID of 0. +Transitional devices MUST have the PCI Subsystem Device ID +matching the Virtio Device ID, as indicated in section \ref{sec:Device Types}. +Transitional devices MUST have the Transitional PCI Device ID in +the range 0x1000 to 0x103f. + +This is to match legacy drivers. + +\subsection{PCI Device Layout}\label{sec:Virtio Transport Options / Virtio Over PCI Bus / PCI Device Layout} + +The device is configured via I/O and/or memory regions (though see +\ref{sec:Virtio Transport Options / Virtio Over PCI Bus / PCI Device Layout / PCI configuration access capability} +for access via the PCI configuration space), as specified by Virtio +Structure PCI Capabilities. + +Fields of different sizes are present in the device +configuration regions. +All 64-bit, 32-bit and 16-bit fields are little-endian. +64-bit fields are to be treated as two 32-bit fields, +with low 32 bit part followed by the high 32 bit part. + +\drivernormative{\subsubsection}{PCI Device Layout}{Virtio Transport Options / Virtio Over PCI Bus / PCI Device Layout} + +For device configuration access, the driver MUST use 8-bit wide +accesses for 8-bit wide fields, 16-bit wide and aligned accesses +for 16-bit wide fields and 32-bit wide and aligned accesses for +32-bit and 64-bit wide fields. For 64-bit fields, the driver MAY +access each of the high and low 32-bit parts of the field +independently. + +\devicenormative{\subsubsection}{PCI Device Layout}{Virtio Transport Options / Virtio Over PCI Bus / PCI Device Layout} + +For 64-bit device configuration fields, the device MUST allow driver +independent access to high and low 32-bit parts of the field. + +\subsection{Virtio Structure PCI Capabilities}\label{sec:Virtio Transport Options / Virtio Over PCI Bus / Virtio Structure PCI Capabilities} + +The virtio device configuration layout includes several structures: +\begin{itemize} +\item Common configuration +\item Notifications +\item ISR Status +\item Device-specific configuration (optional) +\item PCI configuration access +\end{itemize} + +Each structure can be mapped by a Base Address register (BAR) belonging to +the function, or accessed via the special VIRTIO_PCI_CAP_PCI_CFG field in the PCI configuration space. + +The location of each structure is specified using a vendor-specific PCI capability located +on the capability list in PCI configuration space of the device. +This virtio structure capability uses little-endian format; all fields are +read-only for the driver unless stated otherwise: + +\begin{lstlisting} +struct virtio_pci_cap { + u8 cap_vndr; /* Generic PCI field: PCI_CAP_ID_VNDR */ + u8 cap_next; /* Generic PCI field: next ptr. */ + u8 cap_len; /* Generic PCI field: capability length */ + u8 cfg_type; /* Identifies the structure. */ + u8 bar; /* Where to find it. */ + u8 padding[3]; /* Pad to full dword. */ + le32 offset; /* Offset within bar. */ + le32 length; /* Length of the structure, in bytes. */ +}; +\end{lstlisting} + +This structure can be followed by extra data, depending on +\field{cfg_type}, as documented below. + +The fields are interpreted as follows: + +\begin{description} +\item[\field{cap_vndr}] + 0x09; Identifies a vendor-specific capability. + +\item[\field{cap_next}] + Link to next capability in the capability list in the PCI configuration space. + +\item[\field{cap_len}] + Length of this capability structure, including the whole of + struct virtio_pci_cap, and extra data if any. + This length MAY include padding, or fields unused by the driver. + +\item[\field{cfg_type}] + identifies the structure, according to the following table: + +\begin{lstlisting} +/* Common configuration */ +#define VIRTIO_PCI_CAP_COMMON_CFG 1 +/* Notifications */ +#define VIRTIO_PCI_CAP_NOTIFY_CFG 2 +/* ISR Status */ +#define VIRTIO_PCI_CAP_ISR_CFG 3 +/* Device specific configuration */ +#define VIRTIO_PCI_CAP_DEVICE_CFG 4 +/* PCI configuration access */ +#define VIRTIO_PCI_CAP_PCI_CFG 5 +\end{lstlisting} + + Any other value is reserved for future use. + + Each structure is detailed individually below. + + The device MAY offer more than one structure of any type - this makes it + possible for the device to expose multiple interfaces to drivers. The order of + the capabilities in the capability list specifies the order of preference + suggested by the device. + \begin{note} + For example, on some hypervisors, notifications using IO accesses are + faster than memory accesses. In this case, the device would expose two + capabilities with \field{cfg_type} set to VIRTIO_PCI_CAP_NOTIFY_CFG: + the first one addressing an I/O BAR, the second one addressing a memory BAR. + In this example, the driver would use the I/O BAR if I/O resources are available, and fall back on + memory BAR when I/O resources are unavailable. + \end{note} + +\item[\field{bar}] + values 0x0 to 0x5 specify a Base Address register (BAR) belonging to + the function located beginning at 10h in PCI Configuration Space + and used to map the structure into Memory or I/O Space. + The BAR is permitted to be either 32-bit or 64-bit, it can map Memory Space + or I/O Space. + + Any other value is reserved for future use. + +\item[\field{offset}] + indicates where the structure begins relative to the base address associated + with the BAR. The alignment requirements of \field{offset} are indicated + in each structure-specific section below. + +\item[\field{length}] + indicates the length of the structure. + + \field{length} MAY include padding, or fields unused by the driver, or + future extensions. + + \begin{note} + For example, a future device might present a large structure size of several + MBytes. + As current devices never utilize structures larger than 4KBytes in size, + driver MAY limit the mapped structure size to e.g. + 4KBytes (thus ignoring parts of structure after the first + 4KBytes) to allow forward compatibility with such devices without loss of + functionality and without wasting resources. + \end{note} +\end{description} + +\drivernormative{\subsubsection}{Virtio Structure PCI Capabilities}{Virtio Transport Options / Virtio Over PCI Bus / Virtio Structure PCI Capabilities} + +The driver MUST ignore any vendor-specific capability structure which has +a reserved \field{cfg_type} value. + +The driver SHOULD use the first instance of each virtio structure type they can +support. + +The driver MUST accept a \field{cap_len} value which is larger than specified here. + +The driver MUST ignore any vendor-specific capability structure which has +a reserved \field{bar} value. + + The drivers SHOULD only map part of configuration structure + large enough for device operation. The drivers MUST handle + an unexpectedly large \field{length}, but MAY check that \field{length} + is large enough for device operation. + +The driver MUST NOT write into any field of the capability structure, +with the exception of those with \field{cap_type} VIRTIO_PCI_CAP_PCI_CFG as +detailed in \ref{drivernormative:Virtio Transport Options / Virtio Over PCI Bus / PCI Device Layout / PCI configuration access capability}. + +\devicenormative{\subsubsection}{Virtio Structure PCI Capabilities}{Virtio Transport Options / Virtio Over PCI Bus / Virtio Structure PCI Capabilities} + +The device MUST include any extra data (from the beginning of the \field{cap_vndr} field +through end of the extra data fields if any) in \field{cap_len}. +The device MAY append extra data +or padding to any structure beyond that. + +If the device presents multiple structures of the same type, it SHOULD order +them from optimal (first) to least-optimal (last). + +\subsubsection{Common configuration structure layout}\label{sec:Virtio Transport Options / Virtio Over PCI Bus / PCI Device Layout / Common configuration structure layout} + +The common configuration structure is found at the \field{bar} and \field{offset} within the VIRTIO_PCI_CAP_COMMON_CFG capability; its layout is below. + +\begin{lstlisting} +struct virtio_pci_common_cfg { + /* About the whole device. */ + le32 device_feature_select; /* read-write */ + le32 device_feature; /* read-only for driver */ + le32 driver_feature_select; /* read-write */ + le32 driver_feature; /* read-write */ + le16 msix_config; /* read-write */ + le16 num_queues; /* read-only for driver */ + u8 device_status; /* read-write */ + u8 config_generation; /* read-only for driver */ + + /* About a specific virtqueue. */ + le16 queue_select; /* read-write */ + le16 queue_size; /* read-write, power of 2, or 0. */ + le16 queue_msix_vector; /* read-write */ + le16 queue_enable; /* read-write */ + le16 queue_notify_off; /* read-only for driver */ + le64 queue_desc; /* read-write */ + le64 queue_avail; /* read-write */ + le64 queue_used; /* read-write */ +}; +\end{lstlisting} + +\begin{description} +\item[\field{device_feature_select}] + The driver uses this to select which feature bits \field{device_feature} shows. + Value 0x0 selects Feature Bits 0 to 31, 0x1 selects Feature Bits 32 to 63, etc. + +\item[\field{device_feature}] + The device uses this to report which feature bits it is + offering to the driver: the driver writes to + \field{device_feature_select} to select which feature bits are presented. + +\item[\field{driver_feature_select}] + The driver uses this to select which feature bits \field{driver_feature} shows. + Value 0x0 selects Feature Bits 0 to 31, 0x1 selects Feature Bits 32 to 63, etc. + +\item[\field{driver_feature}] + The driver writes this to accept feature bits offered by the device. + Driver Feature Bits selected by \field{driver_feature_select}. + +\item[\field{config_msix_vector}] + The driver sets the Configuration Vector for MSI-X. + +\item[\field{num_queues}] + The device specifies the maximum number of virtqueues supported here. + +\item[\field{device_status}] + The driver writes the device status here (see \ref{sec:Basic Facilities of a Virtio Device / Device Status Field}). Writing 0 into this + field resets the device. + +\item[\field{config_generation}] + Configuration atomicity value. The device changes this every time the + configuration noticeably changes. + +\item[\field{queue_select}] + Queue Select. The driver selects which virtqueue the following + fields refer to. + +\item[\field{queue_size}] + Queue Size. On reset, specifies the maximum queue size supported by + the hypervisor. This can be modified by driver to reduce memory requirements. + A 0 means the queue is unavailable. + +\item[\field{queue_msix_vector}] + The driver uses this to specify the queue vector for MSI-X. + +\item[\field{queue_enable}] + The driver uses this to selectively prevent the device from executing requests from this virtqueue. + 1 - enabled; 0 - disabled. + +\item[\field{queue_notify_off}] + The driver reads this to calculate the offset from start of Notification structure at + which this virtqueue is located. + \begin{note} this is \em{not} an offset in bytes. + See \ref{sec:Virtio Transport Options / Virtio Over PCI Bus / PCI Device Layout / Notification capability} below. + \end{note} + +\item[\field{queue_desc}] + The driver writes the physical address of Descriptor Table here. See section \ref{sec:Basic Facilities of a Virtio Device / Virtqueues}. + +\item[\field{queue_avail}] + The driver writes the physical address of Available Ring here. See section \ref{sec:Basic Facilities of a Virtio Device / Virtqueues}. + +\item[\field{queue_used}] + The driver writes the physical address of Used Ring here. See section \ref{sec:Basic Facilities of a Virtio Device / Virtqueues}. +\end{description} + +\devicenormative{\paragraph}{Common configuration structure layout}{Virtio Transport Options / Virtio Over PCI Bus / PCI Device Layout / Common configuration structure layout} +\field{offset} MUST be 4-byte aligned. + +The device MUST present at least one common configuration capability. + +The device MUST present the feature bits it is offering in \field{device_feature}, starting at bit \field{device_feature_select} $*$ 32 for any \field{device_feature_select} written by the driver. +\begin{note} + This means that it will present 0 for any \field{device_feature_select} other than 0 or 1, since no feature defined here exceeds 63. +\end{note} + +The device MUST present any valid feature bits the driver has written in \field{driver_feature}, starting at bit \field{driver_feature_select} $*$ 32 for any \field{driver_feature_select} written by the driver. Valid feature bits are those which are subset of the corresponding \field{device_feature} bits. The device MAY present invalid bits written by the driver. + +\begin{note} + This means that a device can ignore writes for feature bits it never + offers, and simply present 0 on reads. Or it can just mirror what the driver wrote + (but it will still have to check them when the driver sets FEATURES_OK). +\end{note} + +\begin{note} + A driver shouldn't write invalid bits anyway, as per \ref{drivernormative:General Initialization And Device Operation / Device Initialization}, but this attempts to handle it. +\end{note} + +The device MUST present a changed \field{config_generation} after the +driver has read a device-specific configuration value which has +changed since any part of the device-specific configuration was last +read. +\begin{note} +As \field{config_generation} is an 8-bit value, simply incrementing it +on every configuration change could violate this requirement due to wrap. +Better would be to set an internal flag when it has changed, +and if that flag is set when the driver reads from the device-specific +configuration, increment \field{config_generation} and clear the flag. +\end{note} + +The device MUST reset when 0 is written to \field{device_status}, and +present a 0 in \field{device_status} once that is done. + +The device MUST present a 0 in \field{queue_enable} on reset. + +The device MUST present a 0 in \field{queue_size} if the virtqueue +corresponding to the current \field{queue_select} is unavailable. + +\drivernormative{\paragraph}{Common configuration structure layout}{Virtio Transport Options / Virtio Over PCI Bus / PCI Device Layout / Common configuration structure layout} + +The driver MUST NOT write to \field{device_feature}, \field{num_queues}, \field{config_generation} or \field{queue_notify_off}. + +The driver MUST NOT write a value which is not a power of 2 to \field{queue_size}. + +The driver MUST configure the other virtqueue fields before enabling the virtqueue +with \field{queue_enable}. + +After writing 0 to \field{device_status}, the driver MUST wait for a read of +\field{device_status} to return 0 before reinitializing the device. + +The driver MUST NOT write a 0 to \field{queue_enable}. + +\subsubsection{Notification structure layout}\label{sec:Virtio Transport Options / Virtio Over PCI Bus / PCI Device Layout / Notification capability} + +The notification location is found using the VIRTIO_PCI_CAP_NOTIFY_CFG +capability. This capability is immediately followed by an additional +field, like so: + +\begin{lstlisting} +struct virtio_pci_notify_cap { + struct virtio_pci_cap cap; + le32 notify_off_multiplier; /* Multiplier for queue_notify_off. */ +}; +\end{lstlisting} + +\field{notify_off_multiplier} is combined with the \field{queue_notify_off} to +derive the Queue Notify address within a BAR for a virtqueue: + +\begin{lstlisting} + cap.offset + queue_notify_off * notify_off_multiplier +\end{lstlisting} + +The \field{cap.offset} and \field{notify_off_multiplier} are taken from the +notification capability structure above, and the \field{queue_notify_off} is +taken from the common configuration structure. + +\begin{note} +For example, if \field{notifier_off_multiplier} is 0, the device uses +the same Queue Notify address for all queues. +\end{note} + +\devicenormative{\paragraph}{Notification capability}{Virtio Transport Options / Virtio Over PCI Bus / PCI Device Layout / Notification capability} +The device MUST present at least one notification capability. + +The \field{cap.offset} MUST be 2-byte aligned. + +The device MUST either present \field{notify_off_multiplier} as an even power of 2, +or present \field{notify_off_multiplier} as 0. + +The value \field{cap.length} presented by the device MUST be at least 2 +and MUST be large enough to support queue notification offsets +for all supported queues in all possible configurations. + +For all queues, the value \field{cap.length} presented by the device MUST satisfy: +\begin{lstlisting} +cap.length >= queue_notify_off * notify_off_multiplier + 2 +\end{lstlisting} + +\subsubsection{ISR status capability}\label{sec:Virtio Transport Options / Virtio Over PCI Bus / PCI Device Layout / ISR status capability} + +The VIRTIO_PCI_CAP_ISR_CFG capability +refers to at least a single byte, which contains the 8-bit ISR status field +to be used for INT\#x interrupt handling. + +The \field{offset} for the \field{ISR status} has no alignment requirements. + +The ISR bits allow the device to distinguish between device-specific configuration +change interrupts and normal virtqueue interrupts: + +\begin{tabular}{ |l||l|l|l| } +\hline +Bits & 0 & 1 & 2 to 31 \\ +\hline +Purpose & Queue Interrupt & Device Configuration Interrupt & Reserved \\ +\hline +\end{tabular} + +To avoid an extra access, simply reading this register resets it to 0 and +causes the device to de-assert the interrupt. + +In this way, driver read of ISR status causes the device to de-assert +an interrupt. + +See sections \ref{sec:Virtio Transport Options / Virtio Over PCI Bus / PCI-specific Initialization And Device Operation / Virtqueue Interrupts From The Device} and \ref{sec:Virtio Transport Options / Virtio Over PCI Bus / PCI-specific Initialization And Device Operation / Notification of Device Configuration Changes} for how this is used. + +\devicenormative{\paragraph}{ISR status capability}{Virtio Transport Options / Virtio Over PCI Bus / PCI Device Layout / ISR status capability} + +The device MUST present at least one VIRTIO_PCI_CAP_ISR_CFG capability. + +The device MUST set the Device Configuration Interrupt bit +in \field{ISR status} before sending a device configuration +change notification to the driver. + +If MSI-X capability is disabled, the device MUST set the Queue +Interrupt bit in \field{ISR status} before sending a virtqueue +notification to the driver. + +If MSI-X capability is disabled, the device MUST set the Interrupt Status +bit in the PCI Status register in the PCI Configuration Header of +the device to the logical OR of all bits in \field{ISR status} of +the device. The device then asserts/deasserts INT\#x interrupts unless masked +according to standard PCI rules \hyperref[intro:PCI]{[PCI]}. + +The device MUST reset \field{ISR status} to 0 on driver read. + +\drivernormative{\paragraph}{ISR status capability}{Virtio Transport Options / Virtio Over PCI Bus / PCI Device Layout / ISR status capability} + +If MSI-X capability is enabled, the driver SHOULD NOT access +\field{ISR status} upon detecting a Queue Interrupt. + +\subsubsection{Device-specific configuration}\label{sec:Virtio Transport Options / Virtio Over PCI Bus / PCI Device Layout / Device-specific configuration} + +The device MUST present at least one VIRTIO_PCI_CAP_DEVICE_CFG capability for +any device type which has a device-specific configuration. + +\devicenormative{\paragraph}{Device-specific configuration}{Virtio Transport Options / Virtio Over PCI Bus / PCI Device Layout / Device-specific configuration} + +The \field{offset} for the device-specific configuration MUST be 4-byte aligned. + +\subsubsection{PCI configuration access capability}\label{sec:Virtio Transport Options / Virtio Over PCI Bus / PCI Device Layout / PCI configuration access capability} + +The VIRTIO_PCI_CAP_PCI_CFG capability +creates an alternative (and likely suboptimal) access method to the +common configuration, notification, ISR and device-specific configuration regions. + +The capability is immediately followed by an additional field like so: + +\begin{lstlisting} +struct virtio_pci_cfg_cap { + struct virtio_pci_cap cap; + u8 pci_cfg_data[4]; /* Data for BAR access. */ +}; +\end{lstlisting} + +The fields \field{cap.bar}, \field{cap.length}, \field{cap.offset} and +\field{pci_cfg_data} are read-write (RW) for the driver. + +To access a device region, the driver writes into the capability +structure (ie. within the PCI configuration space) as follows: + +\begin{itemize} +\item The driver sets the BAR to access by writing to \field{cap.bar}. + +\item The driver sets the size of the access by writing 1, 2 or 4 to + \field{cap.length}. + +\item The driver sets the offset within the BAR by writing to + \field{cap.offset}. +\end{itemize} + +At that point, \field{pci_cfg_data} will provide a window of size +\field{cap.length} into the given \field{cap.bar} at offset \field{cap.offset}. + +\devicenormative{\paragraph}{PCI configuration access capability}{Virtio Transport Options / Virtio Over PCI Bus / PCI Device Layout / PCI configuration access capability} + +The device MUST present at least one VIRTIO_PCI_CAP_PCI_CFG capability. + +Upon detecting driver write access +to \field{pci_cfg_data}, the device MUST execute a write access +at offset \field{cap.offset} at BAR selected by \field{cap.bar} using the first \field{cap.length} +bytes from \field{pci_cfg_data}. + +Upon detecting driver read access +to \field{pci_cfg_data}, the device MUST +execute a read access of length cap.length at offset \field{cap.offset} +at BAR selected by \field{cap.bar} and store the first \field{cap.length} bytes in +\field{pci_cfg_data}. + +\drivernormative{\paragraph}{PCI configuration access capability}{Virtio Transport Options / Virtio Over PCI Bus / PCI Device Layout / PCI configuration access capability} + +The driver MUST NOT write a \field{cap.offset} which is not +a multiple of \field{cap.length} (ie. all accesses MUST be aligned). + +The driver MUST NOT read or write \field{pci_cfg_data} +unless \field{cap.bar}, \field{cap.length} and \field{cap.offset} +address \field{cap.length} bytes within a BAR range +specified by some other Virtio Structure PCI Capability +of type other than \field{VIRTIO_PCI_CAP_PCI_CFG}. + +\subsubsection{Legacy Interfaces: A Note on PCI Device Layout}\label{sec:Virtio Transport Options / Virtio Over PCI Bus / PCI Device Layout / Legacy Interfaces: A Note on PCI Device Layout} + +Transitional devices MUST present part of configuration +registers in a legacy configuration structure in BAR0 in the first I/O +region of the PCI device, as documented below. +When using the legacy interface, transitional drivers +MUST use the legacy configuration structure in BAR0 in the first +I/O region of the PCI device, as documented below. + +When using the legacy interface the driver MAY access +the device-specific configuration region using any width accesses, and +a transitional device MUST present driver with the same results as +when accessed using the ``natural'' access method (i.e. +32-bit accesses for 32-bit fields, etc). + +Note that this is possible because while the virtio common configuration structure is PCI +(i.e. little) endian, when using the legacy interface the device-specific +configuration region is encoded in the native endian of the guest (where such distinction is +applicable). + +When used through the legacy interface, the virtio common configuration structure looks as follows: + +\begin{tabularx}{\textwidth}{ |X||X|X|X|X|X|X|X|X| } +\hline + Bits & 32 & 32 & 32 & 16 & 16 & 16 & 8 & 8 \\ +\hline + Read / Write & R & R+W & R+W & R & R+W & R+W & R+W & R \\ +\hline + Purpose & Device Features bits 0:31 & Driver Features bits 0:31 & + Queue Address & \field{queue_size} & \field{queue_select} & Queue Notify & + Device Status & ISR \newline Status \\ +\hline +\end{tabularx} + +If MSI-X is enabled for the device, two additional fields +immediately follow this header: + +\begin{tabular}{ |l||l|l| } +\hline +Bits & 16 & 16 \\ +\hline +Read/Write & R+W & R+W \\ +\hline +Purpose (MSI-X) & \field{config_msix_vector} & \field{queue_msix_vector} \\ +\hline +\end{tabular} + +Note: When MSI-X capability is enabled, device-specific configuration starts at +byte offset 24 in virtio common configuration structure structure. When MSI-X capability is not +enabled, device-specific configuration starts at byte offset 20 in virtio +header. ie. once you enable MSI-X on the device, the other fields move. +If you turn it off again, they move back! + +Any device-specific configuration space immediately follows +these general headers: + +\begin{tabular}{|l||l|l|} +\hline +Bits & Device Specific & \multirow{3}{*}{\ldots} \\ +\cline{1-2} +Read / Write & Device Specific & \\ +\cline{1-2} +Purpose & Device Specific & \\ +\hline +\end{tabular} + +When accessing the device-specific configuration space +using the legacy interface, transitional +drivers MUST access the device-specific configuration space +at an offset immediately following the general headers. + +When using the legacy interface, transitional +devices MUST present the device-specific configuration space +if any at an offset immediately following the general headers. + +Note that only Feature Bits 0 to 31 are accessible through the +Legacy Interface. When used through the Legacy Interface, +Transitional Devices MUST assume that Feature Bits 32 to 63 +are not acknowledged by Driver. + +As legacy devices had no \field{config_generation} field, +see \ref{sec:Basic Facilities of a Virtio Device / Device +Configuration Space / Legacy Interface: Device Configuration +Space}~\nameref{sec:Basic Facilities of a Virtio Device / Device Configuration Space / Legacy Interface: Device Configuration Space} for workarounds. + +\subsubsection{Non-transitional Device With Legacy Driver: A Note +on PCI Device Layout}\label{sec:Virtio Transport Options / Virtio +Over PCI Bus / PCI Device Layout / Non-transitional Device With +Legacy Driver: A Note on PCI Device Layout} + +All known legacy drivers check either the PCI Revision or the +Device and Vendor IDs, and thus won't attempt to drive a +non-transitional device. + +A buggy legacy driver might mistakenly attempt to drive a +non-transitional device. If support for such drivers is required +(as opposed to fixing the bug), the following would be the +recommended way to detect and handle them. +\begin{note} +Such buggy drivers are not currently known to be used in +production. +\end{note} + +\subparagraph{ +\DIFdeltextcstwo{Driver Requirements: Non-transitional Device With Legacy Driver} +\DIFaddtextcstwo{Device Requirements: Non-transitional Device With Legacy Driver} +} +\label{drivernormative:Virtio Transport Options / Virtio Over PCI +Bus / PCI-specific Initialization And Device Operation / +Device Initialization / Non-transitional Device With Legacy +Driver} +\label{devicenormative:Virtio Transport Options / Virtio Over PCI +Bus / PCI-specific Initialization And Device Operation / +Device Initialization / Non-transitional Device With Legacy +Driver} + +Non-transitional devices, on a platform where a legacy driver for +a legacy device with the same ID (including PCI Revision, Device +and Vendor IDs) is known to have previously existed, +SHOULD take the following steps to cause the legacy driver to +fail gracefully when it attempts to drive them: + +\begin{enumerate} +\item Present an I/O BAR in BAR0, and +\item Respond to a single-byte zero write to offset 18 + (corresponding to Device Status register in the legacy layout) + of BAR0 by presenting zeroes on every BAR and ignoring writes. +\end{enumerate} + +\subsection{PCI-specific Initialization And Device Operation}\label{sec:Virtio Transport Options / Virtio Over PCI Bus / PCI-specific Initialization And Device Operation} + +\subsubsection{Device Initialization}\label{sec:Virtio Transport Options / Virtio Over PCI Bus / PCI-specific Initialization And Device Operation / Device Initialization} + +This documents PCI-specific steps executed during Device Initialization. + +\paragraph{Virtio Device Configuration Layout Detection}\label{sec:Virtio Transport Options / Virtio Over PCI Bus / PCI-specific Initialization And Device Operation / Device Initialization / Virtio Device Configuration Layout Detection} + +As a prerequisite to device initialization, the driver scans the +PCI capability list, detecting virtio configuration layout using Virtio +Structure PCI capabilities as detailed in \ref{sec:Virtio Transport Options / Virtio Over PCI Bus / Virtio Structure PCI Capabilities} + +\subparagraph{Legacy Interface: A Note on Device Layout Detection}\label{sec:Virtio Transport Options / Virtio Over PCI Bus / PCI-specific Initialization And Device Operation / Device Initialization / Virtio Device Configuration Layout Detection / Legacy Interface: A Note on Device Layout Detection} + +Legacy drivers skipped the Device Layout Detection step, assuming legacy +device configuration space in BAR0 in I/O space unconditionally. + +Legacy devices did not have the Virtio PCI Capability in their +capability list. + +Therefore: + +Transitional devices MUST expose the Legacy Interface in I/O +space in BAR0. + +Transitional drivers MUST look for the Virtio PCI +Capabilities on the capability list. +If these are not present, driver MUST assume a legacy device, +and use it through the legacy interface. + +Non-transitional drivers MUST look for the Virtio PCI +Capabilities on the capability list. +If these are not present, driver MUST assume a legacy device, +and fail gracefully. + +\paragraph{MSI-X Vector Configuration}\label{sec:Virtio Transport Options / Virtio Over PCI Bus / PCI-specific Initialization And Device Operation / Device Initialization / MSI-X Vector Configuration} + +When MSI-X capability is present and enabled in the device +(through standard PCI configuration space) \field{config_msix_vector} and \field{queue_msix_vector} are used to map configuration change and queue +interrupts to MSI-X vectors. In this case, the ISR Status is unused. + +Writing a valid MSI-X Table entry number, 0 to 0x7FF, to +\field{config_msix_vector}/\field{queue_msix_vector} maps interrupts triggered +by the configuration change/selected queue events respectively to +the corresponding MSI-X vector. To disable interrupts for an +event type, the driver unmaps this event by writing a special NO_VECTOR +value: + +\begin{lstlisting} +/* Vector value used to disable MSI for queue */ +#define VIRTIO_MSI_NO_VECTOR 0xffff +\end{lstlisting} + +Note that mapping an event to vector might require device to +allocate internal device resources, and thus could fail. + +\devicenormative{\subparagraph}{MSI-X Vector Configuration}{Virtio Transport Options / Virtio Over PCI Bus / PCI-specific Initialization And Device Operation / Device Initialization / MSI-X Vector Configuration} + +A device that has an MSI-X capability SHOULD support at least 2 +and at most 0x800 MSI-X vectors. +Device MUST report the number of vectors supported in +\field{Table Size} in the MSI-X Capability as specified in +\hyperref[intro:PCI]{[PCI]}. +The device SHOULD restrict the reported MSI-X Table Size field +to a value that might benefit system performance. +\begin{note} +For example, a device which does not expect to send +interrupts at a high rate might only specify 2 MSI-X vectors. +\end{note} +Device MUST support mapping any event type to any valid +vector 0 to MSI-X \field{Table Size}. +Device MUST support unmapping any event type. + +The device MUST return vector mapped to a given event, +(NO_VECTOR if unmapped) on read of \field{config_msix_vector}/\field{queue_msix_vector}. +The device MUST have all queue and configuration change +events are unmapped upon reset. + +Devices SHOULD NOT cause mapping an event to vector to fail +unless it is impossible for the device to satisfy the mapping +request. Devices MUST report mapping +failures by returning the NO_VECTOR value when the relevant +\field{config_msix_vector}/\field{queue_msix_vector} field is read. + +\drivernormative{\subparagraph}{MSI-X Vector Configuration}{Virtio Transport Options / Virtio Over PCI Bus / PCI-specific Initialization And Device Operation / Device Initialization / MSI-X Vector Configuration} + +Driver MUST support device with any MSI-X Table Size 0 to 0x7FF. +Driver MAY fall back on using INT\#x interrupts for a device +which only supports one MSI-X vector (MSI-X Table Size = 0). + +Driver MAY intepret the Table Size as a hint from the device +for the suggested number of MSI-X vectors to use. + +Driver MUST NOT attempt to map an event to a vector +outside the MSI-X Table supported by the device, +as reported by \field{Table Size} in the MSI-X Capability. + +After mapping an event to vector, the +driver MUST verify success by reading the Vector field value: on +success, the previously written value is returned, and on +failure, NO_VECTOR is returned. If a mapping failure is detected, +the driver MAY retry mapping with fewer vectors, disable MSI-X +or report device failure. + +\paragraph{Virtqueue Configuration}\label{sec:Virtio Transport Options / Virtio Over PCI Bus / PCI-specific Initialization And Device Operation / Device Initialization / Virtqueue Configuration} + +As a device can have zero or more virtqueues for bulk data +transport\footnote{For example, the simplest network device has two virtqueues.}, the driver +needs to configure them as part of the device-specific +configuration. + +The driver typically does this as follows, for each virtqueue a device has: + +\begin{enumerate} +\item Write the virtqueue index (first queue is 0) to \field{queue_select}. + +\item Read the virtqueue size from \field{queue_size}. This controls how big the virtqueue is + (see \ref{sec:Basic Facilities of a Virtio Device / Virtqueues}~\nameref{sec:Basic Facilities of a Virtio Device / Virtqueues}). If this field is 0, the virtqueue does not exist. + +\item Optionally, select a smaller virtqueue size and write it to \field{queue_size}. + +\item Allocate and zero Descriptor Table, Available and Used rings for the + virtqueue in contiguous physical memory. + +\item Optionally, if MSI-X capability is present and enabled on the + device, select a vector to use to request interrupts triggered + by virtqueue events. Write the MSI-X Table entry number + corresponding to this vector into \field{queue_msix_vector}. Read + \field{queue_msix_vector}: on success, previously written value is + returned; on failure, NO_VECTOR value is returned. +\end{enumerate} + +\subparagraph{Legacy Interface: A Note on Virtqueue Configuration}\label{sec:Virtio Transport Options / Virtio Over PCI Bus / PCI-specific Initialization And Device Operation / Device Initialization / Virtqueue Configuration / Legacy Interface: A Note on Virtqueue Configuration} +When using the legacy interface, the queue layout follows \ref{sec:Basic Facilities of a Virtio Device / Virtqueues / Legacy Interfaces: A Note on Virtqueue Layout}~\nameref{sec:Basic Facilities of a Virtio Device / Virtqueues / Legacy Interfaces: A Note on Virtqueue Layout} with an alignment of 4096. +Driver writes the physical address, divided +by 4096 to the Queue Address field\footnote{The 4096 is based on the x86 page size, but it's also large +enough to ensure that the separate parts of the virtqueue are on +separate cache lines. +}. There was no mechanism to negotiate the queue size. + +\subsubsection{Notifying The Device}\label{sec:Virtio Transport Options / Virtio Over PCI Bus / PCI-specific Initialization And Device Operation / Notifying The Device} + +The driver notifies the device by writing the 16-bit virtqueue index +of this virtqueue to the Queue Notify address. See \ref{sec:Virtio Transport Options / Virtio Over PCI Bus / PCI Device Layout / Notification capability} for how to calculate this address. + +\subsubsection{Virtqueue Interrupts From The Device}\label{sec:Virtio Transport Options / Virtio Over PCI Bus / PCI-specific Initialization And Device Operation / Virtqueue Interrupts From The Device} + +If an interrupt is necessary for a virtqueue, the device would typically act as follows: + +\begin{itemize} + \item If MSI-X capability is disabled: + \begin{enumerate} + \item Set the lower bit of the ISR Status field for the device. + + \item Send the appropriate PCI interrupt for the device. + \end{enumerate} + + \item If MSI-X capability is enabled: + \begin{enumerate} + \item If \field{queue_msix_vector} is not NO_VECTOR, + request the appropriate MSI-X interrupt message for the + device, \field{queue_msix_vector} sets the MSI-X Table entry + number. + \end{enumerate} +\end{itemize} + +\devicenormative{\paragraph}{Virtqueue Interrupts From The Device}{Virtio Transport Options / Virtio Over PCI Bus / PCI-specific Initialization And Device Operation / Virtqueue Interrupts From The Device} + +If MSI-X capability is enabled and \field{queue_msix_vector} is +NO_VECTOR for a virtqueue, the device MUST NOT deliver an interrupt +for that virtqueue. + +\subsubsection{Notification of Device Configuration Changes}\label{sec:Virtio Transport Options / Virtio Over PCI Bus / PCI-specific Initialization And Device Operation / Notification of Device Configuration Changes} + +Some virtio PCI devices can change the device configuration +state, as reflected in the device-specific configuration region of the device. In this case: + +\begin{itemize} + \item If MSI-X capability is disabled: + \begin{enumerate} + \item Set the second lower bit of the ISR Status field for the device. + + \item Send the appropriate PCI interrupt for the device. + \end{enumerate} + + \item If MSI-X capability is enabled: + \begin{enumerate} + \item If \field{config_msix_vector} is not NO_VECTOR, + request the appropriate MSI-X interrupt message for the + device, \field{config_msix_vector} sets the MSI-X Table entry + number. + \end{enumerate} +\end{itemize} + +A single interrupt MAY indicate both that one or more virtqueue has +been used and that the configuration space has changed. + +\devicenormative{\paragraph}{Notification of Device Configuration Changes}{Virtio Transport Options / Virtio Over PCI Bus / PCI-specific Initialization And Device Operation / Notification of Device Configuration Changes} + +If MSI-X capability is enabled and \field{config_msix_vector} is +NO_VECTOR, the device MUST NOT deliver an interrupt +for device configuration space changes. + +\drivernormative{\paragraph}{Notification of Device Configuration Changes}{Virtio Transport Options / Virtio Over PCI Bus / PCI-specific Initialization And Device Operation / Notification of Device Configuration Changes} + +A driver MUST handle the case where the same interrupt is used to indicate +both device configuration space change and one or more virtqueues being used. + +\subsubsection{Driver Handling Interrupts}\label{sec:Virtio Transport Options / Virtio Over PCI Bus / PCI-specific Initialization And Device Operation / Driver Handling Interrupts} +The driver interrupt handler would typically: + +\begin{itemize} + \item If MSI-X capability is disabled: + \begin{itemize} + \item Read the ISR Status field, which will reset it to zero. + \item If the lower bit is set: + look through the used rings of all virtqueues for the + device, to see if any progress has been made by the device + which requires servicing. + \item If the second lower bit is set: + re-examine the configuration space to see what changed. + \end{itemize} + \item If MSI-X capability is enabled: + \begin{itemize} + \item + Look through the used rings of + all virtqueues mapped to that MSI-X vector for the + device, to see if any progress has been made by the device + which requires servicing. + \item + If the MSI-X vector is equal to \field{config_msix_vector}, + re-examine the configuration space to see what changed. + \end{itemize} +\end{itemize} + +\section{Virtio Over MMIO}\label{sec:Virtio Transport Options / Virtio Over MMIO} + +Virtual environments without PCI support (a common situation in +embedded devices models) might use simple memory mapped device +(``virtio-mmio'') instead of the PCI device. + +The memory mapped virtio device behaviour is based on the PCI +device specification. Therefore most operations including device +initialization, queues configuration and buffer transfers are +nearly identical. Existing differences are described in the +following sections. + +\subsection{MMIO Device Discovery}\label{sec:Virtio Transport Options / Virtio Over MMIO / MMIO Device Discovery} + +Unlike PCI, MMIO provides no generic device discovery mechanism. For each +device, the guest OS will need to know the location of the registers +and interrupt(s) used. The suggested binding for systems using +flattened device trees is shown in this example: + +\begin{lstlisting} +// EXAMPLE: virtio_block device taking 512 bytes at 0x1e000, interrupt 42. +virtio_block@1e000 { + compatible = "virtio,mmio"; + reg = <0x1e000 0x200>; + interrupts = <42>; +} +\end{lstlisting} + +\subsection{MMIO Device Register Layout}\label{sec:Virtio Transport Options / Virtio Over MMIO / MMIO Device Register Layout} + +MMIO virtio devices provide a set of memory mapped control +registers followed by a device-specific configuration space, +described in the table~\ref{tab:Virtio Trasport Options / Virtio Over MMIO / MMIO Device Register Layout}. + +All register values are organized as Little Endian. + +\newcommand{\mmioreg}[5]{% Name Function Offset Direction Description + {\field{#1}} \newline #3 \newline #4 & {\bf#2} \newline #5 \\ +} + +\newcommand{\mmiodreg}[7]{% NameHigh NameLow Function OffsetHigh OffsetLow Direction Description + {\field{#1}} \newline #4 \newline {\field{#2}} \newline #5 \newline #6 & {\bf#3} \newline #7 \\ +} + +\begin{longtable}{p{0.2\textwidth}p{0.7\textwidth}} + \caption {MMIO Device Register Layout} + \label{tab:Virtio Trasport Options / Virtio Over MMIO / MMIO Device Register Layout} \\ + \hline + \mmioreg{Name}{Function}{Offset from base}{Direction}{Description} + \hline + \hline + \endfirsthead + \hline + \mmioreg{Name}{Function}{Offset from the base}{Direction}{Description} + \hline + \hline + \endhead + \endfoot + \endlastfoot + \mmioreg{MagicValue}{Magic value}{0x000}{R}{% + 0x74726976 + (a Little Endian equivalent of the ``virt'' string). + } + \hline + \mmioreg{Version}{Device version number}{0x004}{R}{% + 0x2. + \begin{note} + Legacy devices (see \ref{sec:Virtio Transport Options / Virtio Over MMIO / Legacy interface}~\nameref{sec:Virtio Transport Options / Virtio Over MMIO / Legacy interface}) used 0x1. + \end{note} + } + \hline + \mmioreg{DeviceID}{Virtio Subsystem Device ID}{0x008}{R}{% + See \ref{sec:Device Types}~\nameref{sec:Device Types} for possible values. + Value zero (0x0) is used to + define a system memory map with placeholder devices at static, + well known addresses, assigning functions to them depending + on user's needs. + } + \hline + \mmioreg{VendorID}{Virtio Subsystem Vendor ID}{0x00c}{R}{} + \hline + \mmioreg{DeviceFeatures}{Flags representing features the device supports}{0x010}{R}{% + Reading from this register returns 32 consecutive flag bits, + the least significant bit depending on the last value written to + \field{DeviceFeaturesSel}. Access to this register returns + bits $\field{DeviceFeaturesSel}*32$ to $(\field{DeviceFeaturesSel}*32)+31$, eg. + feature bits 0 to 31 if \field{DeviceFeaturesSel} is set to 0 and + features bits 32 to 63 if \field{DeviceFeaturesSel} is set to 1. + Also see \ref{sec:Basic Facilities of a Virtio Device / Feature Bits}~\nameref{sec:Basic Facilities of a Virtio Device / Feature Bits}. + } + \hline + \mmioreg{DeviceFeaturesSel}{Device (host) features word selection.}{0x014}{W}{% + Writing to this register selects a set of 32 device feature bits + accessible by reading from \field{DeviceFeatures}. + } + \hline + \mmioreg{DriverFeatures}{Flags representing device features understood and activated by the driver}{0x020}{W}{% + Writing to this register sets 32 consecutive flag bits, the least significant + bit depending on the last value written to \field{DriverFeaturesSel}. + Access to this register sets bits $\field{DriverFeaturesSel}*32$ + to $(\field{DriverFeaturesSel}*32)+31$, eg. feature bits 0 to 31 if + \field{DriverFeaturesSel} is set to 0 and features bits 32 to 63 if + \field{DriverFeaturesSel} is set to 1. Also see \ref{sec:Basic Facilities of a Virtio Device / Feature Bits}~\nameref{sec:Basic Facilities of a Virtio Device / Feature Bits}. + } + \hline + \mmioreg{DriverFeaturesSel}{Activated (guest) features word selection}{0x024}{W}{% + Writing to this register selects a set of 32 activated feature + bits accessible by writing to \field{DriverFeatures}. + } + \hline + \mmioreg{QueueSel}{Virtual queue index}{0x030}{W}{% + Writing to this register selects the virtual queue that the + following operations on \field{QueueNumMax}, \field{QueueNum}, \field{QueueReady}, + \field{QueueDescLow}, \field{QueueDescHigh}, \field{QueueAvailLow}, \field{QueueAvailHigh}, + \field{QueueUsedLow} and \field{QueueUsedHigh} apply to. The index + number of the first queue is zero (0x0). + } + \hline + \mmioreg{QueueNumMax}{Maximum virtual queue size}{0x034}{R}{% + Reading from the register returns the maximum size (number of + elements) of the queue the device is ready to process or + zero (0x0) if the queue is not available. This applies to the + queue selected by writing to \field{QueueSel}. + } + \hline + \mmioreg{QueueNum}{Virtual queue size}{0x038}{W}{% + Queue size is the number of elements in the queue, therefore in each + of the Descriptor Table, the Available Ring and the Used Ring. + Writing to this register notifies the device what size of the + queue the driver will use. This applies to the queue selected by + writing to \field{QueueSel}. + } + \hline + \mmioreg{QueueReady}{Virtual queue ready bit}{0x044}{RW}{% + Writing one (0x1) to this register notifies the device that it can + execute requests from this virtual queue. Reading from this register + returns the last value written to it. Both read and write + accesses apply to the queue selected by writing to \field{QueueSel}. + } + \hline + \mmioreg{QueueNotify}{Queue notifier}{0x050}{W}{% + Writing a queue index to this register notifies the device that + there are new buffers to process in the queue. + } + \hline + \mmioreg{InterruptStatus}{Interrupt status}{0x60}{R}{% + Reading from this register returns a bit mask of events that + caused the device interrupt to be asserted. + The following events are possible: + \begin{description} + \item[Used Ring Update] - bit 0 - the interrupt was asserted + because the device has updated the Used + Ring in at least one of the active virtual queues. + \item [Configuration Change] - bit 1 - the interrupt was + asserted because the configuration of the device has changed. + \end{description} + } + \hline + \mmioreg{InterruptACK}{Interrupt acknowledge}{0x064}{W}{% + Writing a value with bits set as defined in \field{InterruptStatus} + to this register notifies the device that events causing + the interrupt have been handled. + } + \hline + \mmioreg{Status}{Device status}{0x070}{RW}{% + Reading from this register returns the current device status + flags. + Writing non-zero values to this register sets the status flags, + indicating the driver progress. Writing zero (0x0) to this + register triggers a device reset. + See also p. \ref{sec:Virtio Transport Options / Virtio Over MMIO / MMIO-specific Initialization And Device Operation / Device Initialization}~\nameref{sec:Virtio Transport Options / Virtio Over MMIO / MMIO-specific Initialization And Device Operation / Device Initialization}. + } + \hline + \mmiodreg{QueueDescLow}{QueueDescHigh}{Virtual queue's Descriptor Table 64 bit long physical address}{0x080}{0x084}{W}{% + Writing to these two registers (lower 32 bits of the address + to \field{QueueDescLow}, higher 32 bits to \field{QueueDescHigh}) notifies + the device about location of the Descriptor Table of the queue + selected by writing to \field{QueueSel} register. + } + \hline + \mmiodreg{QueueAvailLow}{QueueAvailHigh}{Virtual queue's Available Ring 64 bit long physical address}{0x090}{0x094}{W}{% + Writing to these two registers (lower 32 bits of the address + to \field{QueueAvailLow}, higher 32 bits to \field{QueueAvailHigh}) notifies + the device about location of the Available Ring of the queue + selected by writing to \field{QueueSel}. + } + \hline + \mmiodreg{QueueUsedLow}{QueueUsedHigh}{Virtual queue's Used Ring 64 bit long physical address}{0x0a0}{0x0a4}{W}{% + Writing to these two registers (lower 32 bits of the address + to \field{QueueUsedLow}, higher 32 bits to \field{QueueUsedHigh}) notifies + the device about location of the Used Ring of the queue + selected by writing to \field{QueueSel}. + } + \hline + \mmioreg{ConfigGeneration}{Configuration atomicity value}{0x0fc}{R}{ + Reading from this register returns a value describing a version of the device-specific configuration space (see \field{Config}). + The driver can then access the configuration space and, when finished, read \field{ConfigGeneration} again. + If no part of the configuration space has changed between these two \field{ConfigGeneration} reads, the returned values are identical. + If the values are different, the configuration space accesses were not atomic and the driver has to perform the operations again. + See also \ref {sec:Basic Facilities of a Virtio Device / Device Configuration Space}. + } + \hline + \mmioreg{Config}{Configuration space}{0x100+}{RW}{ + Device-specific configuration space starts at the offset 0x100 + and is accessed with byte alignment. Its meaning and size + depend on the device and the driver. + } + \hline +\end{longtable} + +\devicenormative{\subsubsection}{MMIO Device Register Layout}{Virtio Transport Options / Virtio Over MMIO / MMIO Device Register Layout} + +The device MUST return 0x74726976 in \field{MagicValue}. + +The device MUST return value 0x2 in \field{Version}. + +The device MUST present each event by setting the corresponding bit in \field{InterruptStatus} from the +moment it takes place, until the driver acknowledges the interrupt +by writing a corresponding bit mask to the \field{InterruptACK} register. Bits which +do not represent events which took place MUST be zero. + +Upon reset, the device MUST clear all bits in \field{InterruptStatus} and ready bits in the +\field{QueueReady} register for all queues in the device. + +The device MUST change value returned in \field{ConfigGeneration} if there is any risk of a +driver seeing an inconsistent configuration state. + +The device MUST NOT access virtual queue contents when \field{QueueReady} is zero (0x0). + +\drivernormative{\subsubsection}{MMIO Device Register Layout}{Virtio Transport Options / Virtio Over MMIO / MMIO Device Register Layout} +The driver MUST NOT access memory locations not described in the +table \ref{tab:Virtio Trasport Options / Virtio Over MMIO / MMIO Device Register Layout} +(or, in case of the configuration space, described in the device specification), +MUST NOT write to the read-only registers (direction R) and +MUST NOT read from the write-only registers (direction W). + +The driver MUST only use 32 bit wide and aligned reads and writes to access the control registers +described in table \ref{tab:Virtio Trasport Options / Virtio Over MMIO / MMIO Device Register Layout}. +For the device-specific configuration space, the driver MUST use 8 bit wide accesses for +8 bit wide fields, 16 bit wide and aligned accesses for 16 bit wide fields and 32 bit wide and +aligned accesses for 32 and 64 bit wide fields. + +The driver MUST ignore a device with \field{MagicValue} which is not 0x74726976, +although it MAY report an error. + +The driver MUST ignore a device with \field{Version} which is not 0x2, +although it MAY report an error. + +The driver MUST ignore a device with \field{DeviceID} 0x0, +but MUST NOT report any error. + +Before reading from \field{DeviceFeatures}, the driver MUST write a value to \field{DeviceFeaturesSel}. + +Before writing to the \field{DriverFeatures} register, the driver MUST write a value to the \field{DriverFeaturesSel} register. + +The driver MUST write a value to \field{QueueNum} which is less than +or equal to the value presented by the device in \field{QueueNumMax}. + +When \field{QueueReady} is not zero, the driver MUST NOT access +\field{QueueNum}, \field{QueueDescLow}, \field{QueueDescHigh}, +\field{QueueAvailLow}, \field{QueueAvailHigh}, \field{QueueUsedLow}, \field{QueueUsedHigh}. + +To stop using the queue the driver MUST write zero (0x0) to this +\field{QueueReady} and MUST read the value back to ensure +synchronization. + +The driver MUST ignore undefined bits in \field{InterruptStatus}. + +The driver MUST write a value with a bit mask describing events it handled into \field{InterruptACK} when +it finishes handling an interrupt and MUST NOT set any of the undefined bits in the value. + +\subsection{MMIO-specific Initialization And Device Operation}\label{sec:Virtio Transport Options / Virtio Over MMIO / MMIO-specific Initialization And Device Operation} + +\subsubsection{Device Initialization}\label{sec:Virtio Transport Options / Virtio Over MMIO / MMIO-specific Initialization And Device Operation / Device Initialization} + +\drivernormative{\paragraph}{Device Initialization}{Virtio Transport Options / Virtio Over MMIO / MMIO-specific Initialization And Device Operation / Device Initialization} + +The driver MUST start the device initialization by reading and +checking values from \field{MagicValue} and \field{Version}. +If both values are valid, it MUST read \field{DeviceID} +and if its value is zero (0x0) MUST abort initialization and +MUST NOT access any other register. + +Further initialization MUST follow the procedure described in +\ref{sec:General Initialization And Device Operation / Device Initialization}~\nameref{sec:General Initialization And Device Operation / Device Initialization}. + +\subsubsection{Virtqueue Configuration}\label{sec:Virtio Transport Options / Virtio Over MMIO / MMIO-specific Initialization And Device Operation / Virtqueue Configuration} + +The driver will typically initialize the virtual queue in the following way: + +\begin{enumerate} +\item Select the queue writing its index (first queue is 0) to + \field{QueueSel}. + +\item Check if the queue is not already in use: read \field{QueueReady}, + and expect a returned value of zero (0x0). + +\item Read maximum queue size (number of elements) from + \field{QueueNumMax}. If the returned value is zero (0x0) the + queue is not available. + +\item Allocate and zero the queue pages, making sure the memory + is physically contiguous. It is recommended to align the + Used Ring to an optimal boundary (usually the page size). + +\item Notify the device about the queue size by writing the size to + \field{QueueNum}. + +\item Write physical addresses of the queue's Descriptor Table, + Available Ring and Used Ring to (respectively) the + \field{QueueDescLow}/\field{QueueDescHigh}, + \field{QueueAvailLow}/\field{QueueAvailHigh} and + \field{QueueUsedLow}/\field{QueueUsedHigh} register pairs. + +\item Write 0x1 to \field{QueueReady}. +\end{enumerate} + +\subsubsection{Notifying The Device}\label{sec:Virtio Transport Options / Virtio Over MMIO / MMIO-specific Initialization And Device Operation / Notifying The Device} + +The driver notifies the device about new buffers being available in +a queue by writing the index of the updated queue to \field{QueueNotify}. + +\subsubsection{Notifications From The Device}\label{sec:Virtio Transport Options / Virtio Over MMIO / MMIO-specific Initialization And Device Operation / Notifications From The Device} + +The memory mapped virtio device is using a single, dedicated +interrupt signal, which is asserted when at least one of the +bits described in the description of \field{InterruptStatus} +is set. This is how the device notifies the +driver about a new used buffer being available in the queue +or about a change in the device configuration. + +\drivernormative{\paragraph}{Notifications From The Device}{Virtio Transport Options / Virtio Over MMIO / MMIO-specific Initialization And Device Operation / Notifications From The Device} +After receiving an interrupt, the driver MUST read +\field{InterruptStatus} to check what caused the interrupt +(see the register description). After the interrupt is handled, +the driver MUST acknowledge it by writing a bit mask +corresponding to the handled events to the InterruptACK register. + +\subsection{Legacy interface}\label{sec:Virtio Transport Options / Virtio Over MMIO / Legacy interface} + +The legacy MMIO transport used page-based addressing, resulting +in a slightly different control register layout, the device +initialization and the virtual queue configuration procedure. + +Table \ref{tab:Virtio Trasport Options / Virtio Over MMIO / MMIO Device Legacy Register Layout} +presents control registers layout, omitting +descriptions of registers which did not change their function +nor behaviour: + +\begin{longtable}{p{0.2\textwidth}p{0.7\textwidth}} + \caption {MMIO Device Legacy Register Layout} + \label{tab:Virtio Trasport Options / Virtio Over MMIO / MMIO Device Legacy Register Layout} \\ + \hline + \mmioreg{Name}{Function}{Offset from base}{Direction}{Description} + \hline + \hline + \endfirsthead + \hline + \mmioreg{Name}{Function}{Offset from the base}{Direction}{Description} + \hline + \hline + \endhead + \endfoot + \endlastfoot + \mmioreg{MagicValue}{Magic value}{0x000}{R}{} + \hline + \mmioreg{Version}{Device version number}{0x004}{R}{Legacy device returns value 0x1.} + \hline + \mmioreg{DeviceID}{Virtio Subsystem Device ID}{0x008}{R}{} + \hline + \mmioreg{VendorID}{Virtio Subsystem Vendor ID}{0x00c}{R}{} + \hline + \mmioreg{HostFeatures}{Flags representing features the device supports}{0x010}{R}{} + \hline + \mmioreg{HostFeaturesSel}{Device (host) features word selection.}{0x014}{W}{} + \hline + \mmioreg{GuestFeatures}{Flags representing device features understood and activated by the driver}{0x020}{W}{} + \hline + \mmioreg{GuestFeaturesSel}{Activated (guest) features word selection}{0x024}{W}{} + \hline + \mmioreg{GuestPageSize}{Guest page size}{0x028}{W}{% + The driver writes the guest page size in bytes to the + register during initialization, before any queues are used. + This value should be a power of 2 and is used by the device to + calculate the Guest address of the first queue page + (see QueuePFN). + } + \hline + \mmioreg{QueueSel}{Virtual queue index}{0x030}{W}{% + Writing to this register selects the virtual queue that the + following operations on the \field{QueueNumMax}, \field{QueueNum}, \field{QueueAlign} + and \field{QueuePFN} registers apply to. The index + number of the first queue is zero (0x0). +. + } + \hline + \mmioreg{QueueNumMax}{Maximum virtual queue size}{0x034}{R}{% + Reading from the register returns the maximum size of the queue + the device is ready to process or zero (0x0) if the queue is not + available. This applies to the queue selected by writing to + \field{QueueSel} and is allowed only when \field{QueuePFN} is set to zero + (0x0), so when the queue is not actively used. + } + \hline + \mmioreg{QueueNum}{Virtual queue size}{0x038}{W}{% + Queue size is the number of elements in the queue, therefore size + of the descriptor table and both available and used rings. + Writing to this register notifies the device what size of the + queue the driver will use. This applies to the queue selected by + writing to \field{QueueSel}. + } + \hline + \mmioreg{QueueAlign}{Used Ring alignment in the virtual queue}{0x03c}{W}{% + Writing to this register notifies the device about alignment + boundary of the Used Ring in bytes. This value should be a power + of 2 and applies to the queue selected by writing to \field{QueueSel}. + } + \hline + \mmioreg{QueuePFN}{Guest physical page number of the virtual queue}{0x040}{RW}{% + Writing to this register notifies the device about location of the + virtual queue in the Guest's physical address space. This value + is the index number of a page starting with the queue + Descriptor Table. Value zero (0x0) means physical address zero + (0x00000000) and is illegal. When the driver stops using the + queue it writes zero (0x0) to this register. + Reading from this register returns the currently used page + number of the queue, therefore a value other than zero (0x0) + means that the queue is in use. + Both read and write accesses apply to the queue selected by + writing to \field{QueueSel}. + } + \hline + \mmioreg{QueueNotify}{Queue notifier}{0x050}{W}{} + \hline + \mmioreg{InterruptStatus}{Interrupt status}{0x60}{R}{} + \hline + \mmioreg{InterruptACK}{Interrupt acknowledge}{0x064}{W}{} + \hline + \mmioreg{Status}{Device status}{0x070}{RW}{% + Reading from this register returns the current device status + flags. + Writing non-zero values to this register sets the status flags, + indicating the OS/driver progress. Writing zero (0x0) to this + register triggers a device reset. The device + sets \field{QueuePFN} to zero (0x0) for all queues in the device. + Also see \ref{sec:General Initialization And Device Operation / Device Initialization}~\nameref{sec:General Initialization And Device Operation / Device Initialization}. + } + \hline + \mmioreg{Config}{Configuration space}{0x100+}{RW}{} + \hline +\end{longtable} + +The virtual queue page size is defined by writing to \field{GuestPageSize}, +as written by the guest. The driver does this before the +virtual queues are configured. + +The virtual queue layout follows +p. \ref{sec:Basic Facilities of a Virtio Device / Virtqueues / Legacy Interfaces: A Note on Virtqueue Layout}~\nameref{sec:Basic Facilities of a Virtio Device / Virtqueues / Legacy Interfaces: A Note on Virtqueue Layout}, +with the alignment defined in \field{QueueAlign}. + +The virtual queue is configured as follows: +\begin{enumerate} +\item Select the queue writing its index (first queue is 0) to + \field{QueueSel}. + +\item Check if the queue is not already in use: read \field{QueuePFN}, + expecting a returned value of zero (0x0). + +\item Read maximum queue size (number of elements) from + \field{QueueNumMax}. If the returned value is zero (0x0) the + queue is not available. + +\item Allocate and zero the queue pages in contiguous virtual + memory, aligning the Used Ring to an optimal boundary (usually + page size). The driver should choose a queue size smaller than or + equal to \field{QueueNumMax}. + +\item Notify the device about the queue size by writing the size to + \field{QueueNum}. + +\item Notify the device about the used alignment by writing its value + in bytes to \field{QueueAlign}. + +\item Write the physical number of the first page of the queue to + the \field{QueuePFN} register. +\end{enumerate} + +Notification mechanisms did not change. + +\section{Virtio Over Channel I/O}\label{sec:Virtio Transport Options / Virtio Over Channel I/O} + +S/390 based virtual machines support neither PCI nor MMIO, so a +different transport is needed there. + +virtio-ccw uses the standard channel I/O based mechanism used for +the majority of devices on S/390. A virtual channel device with a +special control unit type acts as proxy to the virtio device +(similar to the way virtio-pci uses a PCI device) and +configuration and operation of the virtio device is accomplished +(mostly) via channel commands. This means virtio devices are +discoverable via standard operating system algorithms, and adding +virtio support is mainly a question of supporting a new control +unit type. + +As the S/390 is a big endian machine, the data structures transmitted +via channel commands are big-endian: this is made clear by use of +the types be16, be32 and be64. + +\subsection{Basic Concepts}\label{sec:Virtio Transport Options / Virtio over channel I/O / Basic Concepts} + +As a proxy device, virtio-ccw uses a channel-attached I/O control +unit with a special control unit type (0x3832) and a control unit +model corresponding to the attached virtio device's subsystem +device ID, accessed via a virtual I/O subchannel and a virtual +channel path of type 0x32. This proxy device is discoverable via +normal channel subsystem device discovery (usually a STORE +SUBCHANNEL loop) and answers to the basic channel commands: + +\begin{itemize} +\item NO-OPERATION (0x03) +\item BASIC SENSE (0x04) +\item TRANSFER IN CHANNEL (0x08) +\item SENSE ID (0xe4) +\end{itemize} + +For a virtio-ccw proxy device, SENSE ID will return the following +information: + +\begin{tabular}{ |l|l|l| } +\hline +Bytes & Description & Contents \\ +\hline \hline +0 & reserved & 0xff \\ +\hline +1-2 & control unit type & 0x3832 \\ +\hline +3 & control unit model & <virtio device id> \\ +\hline +4-5 & device type & zeroes (unset) \\ +\hline +6 & device model & zeroes (unset) \\ +\hline +7-255 & extended SenseId data & zeroes (unset) \\ +\hline +\end{tabular} + +In addition to the basic channel commands, virtio-ccw defines a +set of channel commands related to configuration and operation of +virtio: + +\begin{lstlisting} +#define CCW_CMD_SET_VQ 0x13 +#define CCW_CMD_VDEV_RESET 0x33 +#define CCW_CMD_SET_IND 0x43 +#define CCW_CMD_SET_CONF_IND 0x53 +#define CCW_CMD_SET_IND_ADAPTER 0x73 +#define CCW_CMD_READ_FEAT 0x12 +#define CCW_CMD_WRITE_FEAT 0x11 +#define CCW_CMD_READ_CONF 0x22 +#define CCW_CMD_WRITE_CONF 0x21 +#define CCW_CMD_WRITE_STATUS 0x31 +#define CCW_CMD_READ_VQ_CONF 0x32 +#define CCW_CMD_SET_VIRTIO_REV 0x83 +#define CCW_CMD_READ_STATUS 0x72 +\end{lstlisting} + +\devicenormative{\subsubsection}{Basic Concepts}{Virtio Transport Options / Virtio over channel I/O / Basic Concepts} + +The virtio-ccw device acts like a normal channel device, as specified +in \hyperref[intro:S390 PoP]{[S390 PoP]} and \hyperref[intro:S390 Common I/O]{[S390 Common I/O]}. In particular: + +\begin{itemize} +\item A device MUST post a unit check with command reject for any command + it does not support. + +\item If a driver did not suppress length checks for a channel command, + the device MUST present a subchannel status as detailed in the + architecture when the actual length did not match the expected length. + +\item If a driver did suppress length checks for a channel command, the + device MUST present a check condition if the transmitted data does + not contain enough data to process the command. If the driver submitted + a buffer that was too long, the device SHOULD accept the command. +\end{itemize} + +\drivernormative{\subsubsection}{Basic Concepts}{Virtio Transport Options / Virtio over channel I/O / Basic Concepts} + +A driver for virtio-ccw devices MUST check for a control unit +type of 0x3832 and MUST ignore the device type and model. + +A driver SHOULD attempt to provide the correct length in a channel +command even if it suppresses length checks for that command. + +\subsection{Device Initialization}\label{sec:Virtio Transport Options / Virtio over channel I/O / Device Initialization} + +virtio-ccw uses several channel commands to set up a device. + +\subsubsection{Setting the Virtio Revision}\label{sec:Virtio Transport Options / Virtio over channel I/O / Device Initialization / Setting the Virtio Revision} + +CCW_CMD_SET_VIRTIO_REV is issued by the driver to set the revision of +the virtio-ccw transport it intends to drive the device with. It uses the +following communication structure: + +\begin{lstlisting} +struct virtio_rev_info { + be16 revision; + be16 length; + u8 data[]; +}; +\end{lstlisting} + +\field{revision} contains the desired revision id, \field{length} the length of the +data portion and \field{data} revision-dependent additional desired options. + +The following values are supported: + +\begin{tabular}{ |l|l|l|l| } +\hline +\field{revision} & \field{length} & \field{data} & remarks \\ +\hline \hline +0 & 0 & <empty> & legacy interface; transitional devices only \\ +\hline +1 & 0 & <empty> & Virtio 1.0 \\ +\hline +2 & 0 & <empty> & CCW_CMD_READ_STATUS support \\ +\hline +3-n & & & reserved for later revisions \\ +\hline +\end{tabular} + +Note that a change in the virtio standard does not necessarily +correspond to a change in the virtio-ccw revision. + +\devicenormative{\paragraph}{Setting the Virtio Revision}{Virtio Transport Options / Virtio over channel I/O / Device Initialization / Setting the Virtio Revision} + +A device MUST post a unit check with command reject for any \field{revision} +it does not support. For any invalid combination of \field{revision}, \field{length} +and \field{data}, it MUST post a unit check with command reject as well. A +non-transitional device MUST reject revision id 0. + +A device MUST answer with command reject to any virtio-ccw specific +channel command that is not contained in the revision selected by the +driver. + +A device MUST answer with command reject to any attempt to select a different revision +after a revision has been successfully selected by the driver. + +A device MUST treat the revision as unset from the time the associated +subchannel has been enabled until a revision has been successfully set +by the driver. This implies that revisions are not persistent across +disabling and enabling of the associated subchannel. + +\drivernormative{\paragraph}{Setting the Virtio Revision}{Virtio Transport Options / Virtio over channel I/O / Device Initialization / Setting the Virtio Revision} + +A driver SHOULD start with trying to set the highest revision it +supports and continue with lower revisions if it gets a command reject. + +A driver MUST NOT issue any other virtio-ccw specific channel commands +prior to setting the revision. + +After a revision has been successfully selected by the driver, it +MUST NOT attempt to select a different revision. + +\paragraph{Legacy Interfaces: A Note on Setting the Virtio Revision}\label{sec:Virtio Transport Options / Virtio over channel I/O / Device Initialization / Setting the Virtio Revision / Legacy Interfaces: A Note on Setting the Virtio Revision} + +A legacy device will not support the CCW_CMD_SET_VIRTIO_REV and answer +with a command reject. A non-transitional driver MUST stop trying to +operate this device in that case. A transitional driver MUST operate +the device as if it had been able to set revision 0. + +A legacy driver will not issue the CCW_CMD_SET_VIRTIO_REV prior to +issuing other virtio-ccw specific channel commands. A non-transitional +device therefore MUST answer any such attempts with a command reject. +A transitional device MUST assume in this case that the driver is a +legacy driver and continue as if the driver selected revision 0. This +implies that the device MUST reject any command not valid for revision +0, including a subsequent CCW_CMD_SET_VIRTIO_REV. + +\subsubsection{Configuring a Virtqueue}\label{sec:Virtio Transport Options / Virtio over channel I/O / Device Initialization / Configuring a Virtqueue} + +CCW_CMD_READ_VQ_CONF is issued by the driver to obtain information +about a queue. It uses the following structure for communicating: + +\begin{lstlisting} +struct vq_config_block { + be16 index; + be16 max_num; +}; +\end{lstlisting} + +The requested number of buffers for queue \field{index} is returned in +\field{max_num}. + +Afterwards, CCW_CMD_SET_VQ is issued by the driver to inform the +device about the location used for its queue. The transmitted +structure is + +\begin{lstlisting} +struct vq_info_block { + be64 desc; + be32 res0; + be16 index; + be16 num; + be64 avail; + be64 used; +}; +\end{lstlisting} + +\field{desc}, \field{avail} and \field{used} contain the guest addresses for the descriptor table, +available ring and used ring for queue \field{index}, respectively. The actual +virtqueue size (number of allocated buffers) is transmitted in \field{num}. + +\devicenormative{\paragraph}{Configuring a Virtqueue}{Virtio Transport Options / Virtio over channel I/O / Device Initialization / Configuring a Virtqueue} + +\field{res0} is reserved and MUST be ignored by the device. + +\paragraph{Legacy Interface: A Note on Configuring a Virtqueue}\label{sec:Virtio Transport Options / Virtio over channel I/O / Device Initialization / Configuring a Virtqueue / Legacy Interface: A Note on Configuring a Virtqueue} + +For a legacy driver or for a driver that selected revision 0, +CCW_CMD_SET_VQ uses the following communication block: + +\begin{lstlisting} +struct vq_info_block_legacy { + be64 queue; + be32 align; + be16 index; + be16 num; +}; +\end{lstlisting} + +\field{queue} contains the guest address for queue \field{index}, \field{num} the number of buffers +and \field{align} the alignment. The queue layout follows \ref{sec:Basic Facilities of a Virtio Device / Virtqueues / Legacy Interfaces: A Note on Virtqueue Layout}~\nameref{sec:Basic Facilities of a Virtio Device / Virtqueues / Legacy Interfaces: A Note on Virtqueue Layout}. + +\subsubsection{Communicating Status Information}\label{sec:Virtio Transport Options / Virtio over channel I/O / Device Initialization / Communicating Status Information} + +The driver changes the status of a device via the +CCW_CMD_WRITE_STATUS command, which transmits an 8 bit status +value. + +As described in +\ref{devicenormative:Basic Facilities of a Virtio Device / Feature Bits}, +a device sometimes fails to set the \field{status} field: For example, it +might fail to accept the FEATURES_OK status bit during device initialization. + +With revision 2, CCW_CMD_READ_STATUS is defined: It reads an 8 bit status +value from the device and acts as a reverse operation to CCW_CMD_WRITE_STATUS. + +\drivernormative{\paragraph}{Communicating Status Information}{Virtio Transport Options / Virtio over channel I/O / Device Initialization / Communicating Status Information} + +If the device posts a unit check with command reject in response to the +CCW_CMD_WRITE_STATUS command, the driver MUST assume that the device failed +to set the status and the \field{status} field retained its previous value. + +If at least revision 2 has been negotiated, the driver SHOULD use the +CCW_CMD_READ_STATUS command to retrieve the \field{status} field after +a configuration change has been detected. + +If not at least revision 2 has been negotiated, the driver MUST NOT attempt +to issue the CCW_CMD_READ_STATUS command. + +\devicenormative{\paragraph}{Communicating Status Information}{Virtio Transport Options / Virtio over channel I/O / Device Initialization / Communicating Status Information} + +If the device fails to set the \field{status} field to the value written by +the driver, the device MUST assure that the \field{status} field is left +unchanged and MUST post a unit check with command reject. + +If at least revision 2 has been negotiated, the device MUST return the +current \field{status} field if the CCW_CMD_READ_STATUS command is issued. + +\subsubsection{Handling Device Features}\label{sec:Virtio Transport Options / Virtio over channel I/O / Device Initialization / Handling Device Features} + +Feature bits are arranged in an array of 32 bit values, making +for a total of 8192 feature bits. Feature bits are in +little-endian byte order. + +The CCW commands dealing with features use the following +communication block: + +\begin{lstlisting} +struct virtio_feature_desc { + le32 features; + u8 index; +}; +\end{lstlisting} + +\field{features} are the 32 bits of features currently accessed, while +\field{index} describes which of the feature bit values is to be +accessed. No padding is added at the end of the structure, it is +exactly 5 bytes in length. + +The guest obtains the device's device feature set via the +CCW_CMD_READ_FEAT command. The device stores the features at \field{index} +to \field{features}. + +For communicating its supported features to the device, the driver +uses the CCW_CMD_WRITE_FEAT command, denoting a \field{features}/\field{index} +combination. + +\subsubsection{Device Configuration}\label{sec:Virtio Transport Options / Virtio over channel I/O / Device Initialization / Device Configuration} + +The device's configuration space is located in host memory. + +To obtain information from the configuration space, the driver +uses CCW_CMD_READ_CONF, specifying the guest memory for the device +to write to. + +For changing configuration information, the driver uses +CCW_CMD_WRITE_CONF, specifying the guest memory for the device to +read from. + +In both cases, the complete configuration space is transmitted. This +allows the driver to compare the new configuration space with the old +version, and keep a generation count internally whenever it changes. + +\subsubsection{Setting Up Indicators}\label{sec:Virtio Transport Options / Virtio over channel I/O / Device Initialization / Setting Up Indicators} + +In order to set up the indicator bits for host->guest notification, +the driver uses different channel commands depending on whether it +wishes to use traditional I/O interrupts tied to a subchannel or +adapter I/O interrupts for virtqueue notifications. For any given +device, the two mechanisms are mutually exclusive. + +For the configuration change indicators, only a mechanism using +traditional I/O interrupts is provided, regardless of whether +traditional or adapter I/O interrupts are used for virtqueue +notifications. + +\paragraph{Setting Up Classic Queue Indicators}\label{sec:Virtio Transport Options / Virtio over channel I/O / Device Initialization / Setting Up Indicators / Setting Up Classic Queue Indicators} + +Indicators for notification via classic I/O interrupts are contained +in a 64 bit value per virtio-ccw proxy device. + +To communicate the location of the indicator bits for host->guest +notification, the driver uses the CCW_CMD_SET_IND command, +pointing to a location containing the guest address of the +indicators in a 64 bit value. + +If the driver has already set up two-staged queue indicators via the +CCW_CMD_SET_IND_ADAPTER command, the device MUST post a unit check +with command reject to any subsequent CCW_CMD_SET_IND command. + +\paragraph{Setting Up Configuration Change Indicators}\label{sec:Virtio Transport Options / Virtio over channel I/O / Device Initialization / Setting Up Indicators / Setting Up Configuration Change Indicators} + +Indicators for configuration change host->guest notification are +contained in a 64 bit value per virtio-ccw proxy device. + +To communicate the location of the indicator bits used in the +configuration change host->guest notification, the driver issues the +CCW_CMD_SET_CONF_IND command, pointing to a location containing the +guest address of the indicators in a 64 bit value. + +\paragraph{Setting Up Two-Stage Queue Indicators}\label{sec:Virtio Transport Options / Virtio over channel I/O / Device Initialization / Setting Up Indicators / Setting Up Two-Stage Queue Indicators} + +Indicators for notification via adapter I/O interrupts consist of +two stages: +\begin{itemize} +\item a summary indicator byte covering the virtqueues for one or more + virtio-ccw proxy devices +\item a set of contigous indicator bits for the virtqueues for a + virtio-ccw proxy device +\end{itemize} + +To communicate the location of the summary and queue indicator bits, +the driver uses the CCW_CMD_SET_IND_ADAPTER command with the following +payload: + +\begin{lstlisting} +struct virtio_thinint_area { + be64 summary_indicator; + be64 indicator; + be64 bit_nr; + u8 isc; +} __attribute__ ((packed)); +\end{lstlisting} + +\field{summary_indicator} contains the guest address of the 8 bit summary +indicator. +\field{indicator} contains the guest address of an area wherein the indicators +for the devices are contained, starting at \field{bit_nr}, one bit per +virtqueue of the device. Bit numbers start at the left, i.e. the most +significant bit in the first byte is assigned the bit number 0. +\field{isc} contains the I/O interruption subclass to be used for the adapter +I/O interrupt. It MAY be different from the isc used by the proxy +virtio-ccw device's subchannel. +No padding is added at the end of the structure, it is exactly 25 bytes +in length. + + +\devicenormative{\subparagraph}{Setting Up Two-Stage Queue Indicators}{Virtio Transport Options / Virtio over channel I/O / Device Initialization / Setting Up Indicators / Setting Up Two-Stage Queue Indicators} +If the driver has already set up classic queue indicators via the +CCW_CMD_SET_IND command, the device MUST post a unit check with +command reject to any subsequent CCW_CMD_SET_IND_ADAPTER command. + +\paragraph{Legacy Interfaces: A Note on Setting Up Indicators}\label{sec:Virtio Transport Options / Virtio over channel I/O / Device Initialization / Setting Up Indicators / Legacy Interfaces: A Note on Setting Up Indicators} + +In some cases, legacy devices will only support classic queue indicators; +in that case, they will reject CCW_CMD_SET_IND_ADAPTER as they don't know that +command. Some legacy devices will support two-stage queue indicators, though, +and a driver will be able to successfully use CCW_CMD_SET_IND_ADAPTER to set +them up. + +\subsection{Device Operation}\label{sec:Virtio Transport Options / Virtio over channel I/O / Device Operation} + +\subsubsection{Host->Guest Notification}\label{sec:Virtio Transport Options / Virtio over channel I/O / Device Operation / Host->Guest Notification} + +There are two modes of operation regarding host->guest notification, +classic I/O interrupts and adapter I/O interrupts. The mode to be +used is determined by the driver by using CCW_CMD_SET_IND respectively +CCW_CMD_SET_IND_ADAPTER to set up queue indicators. + +For configuration changes, the driver always uses classic I/O +interrupts. + +\paragraph{Notification via Classic I/O Interrupts}\label{sec:Virtio Transport Options / Virtio over channel I/O / Device Operation / Host->Guest Notification / Notification via Classic I/O Interrupts} + +If the driver used the CCW_CMD_SET_IND command to set up queue +indicators, the device will use classic I/O interrupts for +host->guest notification about virtqueue activity. + +For notifying the driver of virtqueue buffers, the device sets the +corresponding bit in the guest-provided indicators. If an +interrupt is not already pending for the subchannel, the device +generates an unsolicited I/O interrupt. + +If the device wants to notify the driver about configuration +changes, it sets bit 0 in the configuration indicators and +generates an unsolicited I/O interrupt, if needed. This also +applies if adapter I/O interrupts are used for queue notifications. + +\paragraph{Notification via Adapter I/O Interrupts}\label{sec:Virtio Transport Options / Virtio over channel I/O / Device Operation / Host->Guest Notification / Notification via Adapter I/O Interrupts} + +If the driver used the CCW_CMD_SET_IND_ADAPTER command to set up +queue indicators, the device will use adapter I/O interrupts for +host->guest notification about virtqueue activity. + +For notifying the driver of virtqueue buffers, the device sets the +bit in the guest-provided indicator area at the corresponding offset. +The guest-provided summary indicator is set to 0x01. An adapter I/O +interrupt for the corresponding interruption subclass is generated. + +The recommended way to process an adapter I/O interrupt by the driver +is as follows: + +\begin{itemize} +\item Process all queue indicator bits associated with the summary indicator. +\item Clear the summary indicator, performing a synchronization (memory +barrier) afterwards. +\item Process all queue indicator bits associated with the summary indicator +again. +\end{itemize} + +\devicenormative{\subparagraph}{Notification via Adapter I/O Interrupts}{Virtio Transport Options / Virtio over channel I/O / Device Operation / Host->Guest Notification / Notification via Adapter I/O Interrupts} + +The device SHOULD only generate an adapter I/O interrupt if the +summary indicator had not been set prior to notification. + +\drivernormative{\subparagraph}{Notification via Adapter I/O Interrupts}{Virtio Transport Options / Virtio over channel I/O / Device Operation / Host->Guest Notification / Notification via Adapter I/O Interrupts} +The driver +MUST clear the summary indicator after receiving an adapter I/O +interrupt before it processes the queue indicators. + +\paragraph{Legacy Interfaces: A Note on Host->Guest Notification}\label{sec:Virtio Transport Options / Virtio over channel I/O / Device Operation / Host->Guest Notification / Legacy Interfaces: A Note on Host->Guest Notification} + +As legacy devices and drivers support only classic queue indicators, +host->guest notification will always be done via classic I/O interrupts. + +\subsubsection{Guest->Host Notification}\label{sec:Virtio Transport Options / Virtio over channel I/O / Device Operation / Guest->Host Notification} + +For notifying the device of virtqueue buffers, the driver +unfortunately can't use a channel command (the asynchronous +characteristics of channel I/O interact badly with the host block +I/O backend). Instead, it uses a diagnose 0x500 call with subcode +3 specifying the queue, as follows: + +\begin{tabular}{ |l|l|l| } +\hline +GPR & Input Value & Output Value \\ +\hline \hline + 1 & 0x3 & \\ +\hline + 2 & Subchannel ID & Host Cookie \\ +\hline + 3 & Virtqueue number & \\ +\hline + 4 & Host Cookie & \\ +\hline +\end{tabular} + +\devicenormative{\paragraph}{Guest->Host Notification}{Virtio Transport Options / Virtio over channel I/O / Device Operation / Guest->Host Notification} +The device MUST ignore bits 0-31 (counting from the left) of GPR2. +This aligns passing the subchannel ID with the way it is passed +for the existing I/O instructions. + +The device MAY return a 64-bit host cookie in GPR2 to speed up the +notification execution. + +\drivernormative{\paragraph}{Guest->Host Notification}{Virtio Transport Options / Virtio over channel I/O / Device Operation / Guest->Host Notification} + +For each notification, the driver SHOULD use GPR4 to pass the host cookie received in GPR2 from the previous notication. + +\begin{note} +For example: +\begin{lstlisting} +info->cookie = do_notify(schid, + virtqueue_get_queue_index(vq), + info->cookie); +\end{lstlisting} +\end{note} + +\subsubsection{Resetting Devices}\label{sec:Virtio Transport Options / Virtio over channel I/O / Device Operation / Resetting Devices} + +In order to reset a device, a driver sends the +CCW_CMD_VDEV_RESET command. + + +\chapter{Device Types}\label{sec:Device Types} + +On top of the queues, config space and feature negotiation facilities +built into virtio, several devices are defined. + +The following device IDs are used to identify different types of virtio +devices. Some device IDs are reserved for devices which are not currently +defined in this standard. + +Discovering what devices are available and their type is bus-dependent. + +\begin{tabular} { |l|c| } +\hline +Device ID & Virtio Device \\ +\hline \hline +0 & reserved (invalid) \\ +\hline +1 & network card \\ +\hline +2 & block device \\ +\hline +3 & console \\ +\hline +4 & entropy source \\ +\hline +5 & memory ballooning (traditional) \\ +\hline +6 & ioMemory \\ +\hline +7 & rpmsg \\ +\hline +8 & SCSI host \\ +\hline +9 & 9P transport \\ +\hline +10 & mac80211 wlan \\ +\hline +11 & rproc serial \\ +\hline +12 & virtio CAIF \\ +\hline +13 & memory balloon \\ +\hline +16 & GPU device \\ +\hline +17 & Timer/Clock device \\ +\hline +18 & Input device \\ +\hline +19 & Socket device \\ +\hline +20 & Crypto device \\ +\hline +21 & Signal Distribution Module \\ +\hline +22 & pstore device \\ +\hline +\end{tabular} + +Some of the devices above are unspecified by this document, +because they are seen as immature or especially niche. Be warned +that some are only specified by the sole existing implementation; +they could become part of a future specification, be abandoned +entirely, or live on outside this standard. We shall speak of +them no further. + +\section{Network Device}\label{sec:Device Types / Network Device} + +The virtio network device is a virtual ethernet card, and is the +most complex of the devices supported so far by virtio. It has +enhanced rapidly and demonstrates clearly how support for new +features are added to an existing device. Empty buffers are +placed in one virtqueue for receiving packets, and outgoing +packets are enqueued into another for transmission in that order. +A third command queue is used to control advanced filtering +features. + +\subsection{Device ID}\label{sec:Device Types / Network Device / Device ID} + + 1 + +\subsection{Virtqueues}\label{sec:Device Types / Network Device / Virtqueues} + +\begin{description} +\item[0] receiveq1 +\item[1] transmitq1 +\item[\ldots] +\item[2N] receiveqN +\item[2N+1] transmitqN +\item[2N+2] controlq +\end{description} + + N=1 if VIRTIO_NET_F_MQ is not negotiated, otherwise N is set by + \field{max_virtqueue_pairs}. + + controlq only exists if VIRTIO_NET_F_CTRL_VQ set. + +\subsection{Feature bits}\label{sec:Device Types / Network Device / Feature bits} + +\begin{description} +\item[VIRTIO_NET_F_CSUM (0)] Device handles packets with partial checksum. This + ``checksum offload'' is a common feature on modern network cards. + +\item[VIRTIO_NET_F_GUEST_CSUM (1)] Driver handles packets with partial checksum. + +\item[VIRTIO_NET_F_CTRL_GUEST_OFFLOADS (2)] Control channel offloads + reconfiguration support. + +\item[VIRTIO_NET_F_MTU(3)] Device maximum MTU reporting is supported. If + offered by the device, device advises driver about the value of + its maximum MTU. If negotiated, the driver uses \field{mtu} as + the maximum MTU value. + +\item[VIRTIO_NET_F_MAC (5)] Device has given MAC address. + +\item[VIRTIO_NET_F_GUEST_TSO4 (7)] Driver can receive TSOv4. + +\item[VIRTIO_NET_F_GUEST_TSO6 (8)] Driver can receive TSOv6. + +\item[VIRTIO_NET_F_GUEST_ECN (9)] Driver can receive TSO with ECN. + +\item[VIRTIO_NET_F_GUEST_UFO (10)] Driver can receive UFO. + +\item[VIRTIO_NET_F_HOST_TSO4 (11)] Device can receive TSOv4. + +\item[VIRTIO_NET_F_HOST_TSO6 (12)] Device can receive TSOv6. + +\item[VIRTIO_NET_F_HOST_ECN (13)] Device can receive TSO with ECN. + +\item[VIRTIO_NET_F_HOST_UFO (14)] Device can receive UFO. + +\item[VIRTIO_NET_F_MRG_RXBUF (15)] Driver can merge receive buffers. + +\item[VIRTIO_NET_F_STATUS (16)] Configuration status field is + available. + +\item[VIRTIO_NET_F_CTRL_VQ (17)] Control channel is available. + +\item[VIRTIO_NET_F_CTRL_RX (18)] Control channel RX mode support. + +\item[VIRTIO_NET_F_CTRL_VLAN (19)] Control channel VLAN filtering. + +\item[VIRTIO_NET_F_GUEST_ANNOUNCE(21)] Driver can send gratuitous + packets. + +\item[VIRTIO_NET_F_MQ(22)] Device supports multiqueue with automatic + receive steering. + +\item[VIRTIO_NET_F_CTRL_MAC_ADDR(23)] Set MAC address through control + channel. +\end{description} + +\subsubsection{Feature bit requirements}\label{sec:Device Types / Network Device / Feature bits / Feature bit requirements} + +Some networking feature bits require other networking feature bits +(see \ref{drivernormative:Basic Facilities of a Virtio Device / Feature Bits}): + +\begin{description} +\item[VIRTIO_NET_F_GUEST_TSO4] Requires VIRTIO_NET_F_GUEST_CSUM. +\item[VIRTIO_NET_F_GUEST_TSO6] Requires VIRTIO_NET_F_GUEST_CSUM. +\item[VIRTIO_NET_F_GUEST_ECN] Requires VIRTIO_NET_F_GUEST_TSO4 or VIRTIO_NET_F_GUEST_TSO6. +\item[VIRTIO_NET_F_GUEST_UFO] Requires VIRTIO_NET_F_GUEST_CSUM. + +\item[VIRTIO_NET_F_HOST_TSO4] Requires VIRTIO_NET_F_CSUM. +\item[VIRTIO_NET_F_HOST_TSO6] Requires VIRTIO_NET_F_CSUM. +\item[VIRTIO_NET_F_HOST_ECN] Requires VIRTIO_NET_F_HOST_TSO4 or VIRTIO_NET_F_HOST_TSO6. +\item[VIRTIO_NET_F_HOST_UFO] Requires VIRTIO_NET_F_CSUM. + +\item[VIRTIO_NET_F_CTRL_RX] Requires VIRTIO_NET_F_CTRL_VQ. +\item[VIRTIO_NET_F_CTRL_VLAN] Requires VIRTIO_NET_F_CTRL_VQ. +\item[VIRTIO_NET_F_GUEST_ANNOUNCE] Requires VIRTIO_NET_F_CTRL_VQ. +\item[VIRTIO_NET_F_MQ] Requires VIRTIO_NET_F_CTRL_VQ. +\item[VIRTIO_NET_F_CTRL_MAC_ADDR] Requires VIRTIO_NET_F_CTRL_VQ. +\end{description} + +\subsubsection{Legacy Interface: Feature bits}\label{sec:Device Types / Network Device / Feature bits / Legacy Interface: Feature bits} +\begin{description} +\item[VIRTIO_NET_F_GSO (6)] Device handles packets with any GSO type. +\end{description} + +This was supposed to indicate segmentation offload support, but +upon further investigation it became clear that multiple bits +were needed. + +\subsection{Device configuration layout}\label{sec:Device Types / Network Device / Device configuration layout} +\label{sec:Device Types / Block Device / Feature bits / Device configuration layout} + +Three driver-read-only configuration fields are currently defined. The \field{mac} address field +always exists (though is only valid if VIRTIO_NET_F_MAC is set), and +\field{status} only exists if VIRTIO_NET_F_STATUS is set. Two +read-only bits (for the driver) are currently defined for the status field: +VIRTIO_NET_S_LINK_UP and VIRTIO_NET_S_ANNOUNCE. + +\begin{lstlisting} +#define VIRTIO_NET_S_LINK_UP 1 +#define VIRTIO_NET_S_ANNOUNCE 2 +\end{lstlisting} + +The following driver-read-only field, \field{max_virtqueue_pairs} only exists if +VIRTIO_NET_F_MQ is set. This field specifies the maximum number +of each of transmit and receive virtqueues (receiveq1\ldots receiveqN +and transmitq1\ldots transmitqN respectively) that can be configured once VIRTIO_NET_F_MQ +is negotiated. + +The following driver-read-only field, \field{mtu} only exists if +VIRTIO_NET_F_MTU is set. This field specifies the maximum MTU for the driver to +use. + +\begin{lstlisting} +struct virtio_net_config { + u8 mac[6]; + le16 status; + le16 max_virtqueue_pairs; + le16 mtu; +}; +\end{lstlisting} + +\devicenormative{\subsubsection}{Device configuration layout}{Device Types / Network Device / Device configuration layout} + +The device MUST set \field{max_virtqueue_pairs} to between 1 and 0x8000 inclusive, +if it offers VIRTIO_NET_F_MQ. + +The device MUST set \field{mtu} to between 68 and 65535 inclusive, +if it offers VIRTIO_NET_F_MTU. + +The device SHOULD set \field{mtu} to at least 1280, if it offers +VIRTIO_NET_F_MTU. + +The device MUST NOT modify \field{mtu} once it has been set. + +The device MUST NOT pass received packets that exceed \field{mtu} (plus low +level ethernet header length) size with \field{gso_type} NONE or ECN +after VIRTIO_NET_F_MTU has been successfully negotiated. + +The device MUST forward transmitted packets of up to \field{mtu} (plus low +level ethernet header length) size with \field{gso_type} NONE or ECN, and do +so without fragmentation, after VIRTIO_NET_F_MTU has been successfully +negotiated. + +\drivernormative{\subsubsection}{Device configuration layout}{Device Types / Network Device / Device configuration layout} + +A driver SHOULD negotiate VIRTIO_NET_F_MAC if the device offers it. +If the driver negotiates the VIRTIO_NET_F_MAC feature, the driver MUST set +the physical address of the NIC to \field{mac}. Otherwise, it SHOULD +use a locally-administered MAC address (see \hyperref[intro:IEEE 802]{IEEE 802}, +``9.2 48-bit universal LAN MAC addresses''). + +If the driver does not negotiate the VIRTIO_NET_F_STATUS feature, it SHOULD +assume the link is active, otherwise it SHOULD read the link status from +the bottom bit of \field{status}. + +A driver SHOULD negotiate VIRTIO_NET_F_MTU if the device offers it. + +If the driver negotiates VIRTIO_NET_F_MTU, it MUST supply enough receive +buffers to receive at least one receive packet of size \field{mtu} (plus low +level ethernet header length) with \field{gso_type} NONE or ECN. + +If the driver negotiates VIRTIO_NET_F_MTU, it MUST NOT transmit packets of +size exceeding the value of \field{mtu} (plus low level ethernet header length) +with \field{gso_type} NONE or ECN. + +\subsubsection{Legacy Interface: Device configuration layout}\label{sec:Device Types / Network Device / Device configuration layout / Legacy Interface: Device configuration layout} +\label{sec:Device Types / Block Device / Feature bits / Device configuration layout / Legacy Interface: Device configuration layout} +When using the legacy interface, transitional devices and drivers +MUST format \field{status} and +\field{max_virtqueue_pairs} in struct virtio_net_config +according to the native endian of the guest rather than +(necessarily when not using the legacy interface) little-endian. + +When using the legacy interface, \field{mac} is driver-writable +which provided a way for drivers to update the MAC without +negotiating VIRTIO_NET_F_CTRL_MAC_ADDR. + +\subsection{Device Initialization}\label{sec:Device Types / Network Device / Device Initialization} + +A driver would perform a typical initialization routine like so: + +\begin{enumerate} +\item Identify and initialize the receive and + transmission virtqueues, up to N of each kind. If + VIRTIO_NET_F_MQ feature bit is negotiated, + N=\field{max_virtqueue_pairs}, otherwise identify N=1. + +\item If the VIRTIO_NET_F_CTRL_VQ feature bit is negotiated, + identify the control virtqueue. + +\item Fill the receive queues with buffers: see \ref{sec:Device Types / Network Device / Device Operation / Setting Up Receive Buffers}. + +\item Even with VIRTIO_NET_F_MQ, only receiveq1, transmitq1 and + controlq are used by default. The driver would send the + VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET command specifying the + number of the transmit and receive queues to use. + +\item If the VIRTIO_NET_F_MAC feature bit is set, the configuration + space \field{mac} entry indicates the ``physical'' address of the + network card, otherwise the driver would typically generate a random + local MAC address. + +\item If the VIRTIO_NET_F_STATUS feature bit is negotiated, the link + status comes from the bottom bit of \field{status}. + Otherwise, the driver assumes it's active. + +\item A performant driver would indicate that it will generate checksumless + packets by negotating the VIRTIO_NET_F_CSUM feature. + +\item If that feature is negotiated, a driver can use TCP or UDP + segmentation offload by negotiating the VIRTIO_NET_F_HOST_TSO4 (IPv4 + TCP), VIRTIO_NET_F_HOST_TSO6 (IPv6 TCP) and VIRTIO_NET_F_HOST_UFO + (UDP fragmentation) features. + +\item The converse features are also available: a driver can save + the virtual device some work by negotiating these features.\note{For example, a network packet transported between two guests on +the same system might not need checksumming at all, nor segmentation, +if both guests are amenable.} + The VIRTIO_NET_F_GUEST_CSUM feature indicates that partially + checksummed packets can be received, and if it can do that then + the VIRTIO_NET_F_GUEST_TSO4, VIRTIO_NET_F_GUEST_TSO6, + VIRTIO_NET_F_GUEST_UFO and VIRTIO_NET_F_GUEST_ECN are the input + equivalents of the features described above. + See \ref{sec:Device Types / Network Device / Device Operation / +Setting Up Receive Buffers}~\nameref{sec:Device Types / Network +Device / Device Operation / Setting Up Receive Buffers} and +\ref{sec:Device Types / Network Device / Device Operation / +Processing of Incoming Packets}~\nameref{sec:Device Types / +Network Device / Device Operation / Processing of Incoming Packets} below. +\end{enumerate} + +A truly minimal driver would only accept VIRTIO_NET_F_MAC and ignore +everything else. + +\subsection{Device Operation}\label{sec:Device Types / Network Device / Device Operation} + +Packets are transmitted by placing them in the +transmitq1\ldots transmitqN, and buffers for incoming packets are +placed in the receiveq1\ldots receiveqN. In each case, the packet +itself is preceded by a header: + +\begin{lstlisting} +struct virtio_net_hdr { +#define VIRTIO_NET_HDR_F_NEEDS_CSUM 1 + u8 flags; +#define VIRTIO_NET_HDR_GSO_NONE 0 +#define VIRTIO_NET_HDR_GSO_TCPV4 1 +#define VIRTIO_NET_HDR_GSO_UDP 3 +#define VIRTIO_NET_HDR_GSO_TCPV6 4 +#define VIRTIO_NET_HDR_GSO_ECN 0x80 + u8 gso_type; + le16 hdr_len; + le16 gso_size; + le16 csum_start; + le16 csum_offset; + le16 num_buffers; +}; +\end{lstlisting} + +The controlq is used to control device features such as +filtering. + +\subsubsection{Legacy Interface: Device Operation}\label{sec:Device Types / Network Device / Device Operation / Legacy Interface: Device Operation} +When using the legacy interface, transitional devices and drivers +MUST format the fields in struct virtio_net_hdr +according to the native endian of the guest rather than +(necessarily when not using the legacy interface) little-endian. + +The legacy driver only presented \field{num_buffers} in the struct virtio_net_hdr +when VIRTIO_NET_F_MRG_RXBUF was negotiated; without that feature the +structure was 2 bytes shorter. + +When using the legacy interface, the driver SHOULD ignore the +\field{len} value in used ring entries for the transmit queues +and the controlq queue. +\begin{note} +Historically, some devices put +the total descriptor length there, even though no data was +actually written. +\end{note} + +\subsubsection{Packet Transmission}\label{sec:Device Types / Network Device / Device Operation / Packet Transmission} + +Transmitting a single packet is simple, but varies depending on +the different features the driver negotiated. + +\begin{enumerate} +\item The driver can send a completely checksummed packet. In this case, + \field{flags} will be zero, and \field{gso_type} will be VIRTIO_NET_HDR_GSO_NONE. + +\item If the driver negotiated VIRTIO_NET_F_CSUM, it can skip + checksumming the packet: + \begin{itemize} + \item \field{flags} has the VIRTIO_NET_HDR_F_NEEDS_CSUM set, + + \item \field{csum_start} is set to the offset within the packet to begin checksumming, + and + + \item \field{csum_offset} indicates how many bytes after the csum_start the + new (16 bit ones' complement) checksum is placed by the device. + + \item The TCP checksum field in the packet is set to the sum + of the TCP pseudo header, so that replacing it by the ones' + complement checksum of the TCP header and body will give the + correct result. + \end{itemize} + +\begin{note} +For example, consider a partially checksummed TCP (IPv4) packet. +It will have a 14 byte ethernet header and 20 byte IP header +followed by the TCP header (with the TCP checksum field 16 bytes +into that header). \field{csum_start} will be 14+20 = 34 (the TCP +checksum includes the header), and \field{csum_offset} will be 16. +\end{note} + +\item If the driver negotiated + VIRTIO_NET_F_HOST_TSO4, TSO6 or UFO, and the packet requires + TCP segmentation or UDP fragmentation, then \field{gso_type} + is set to VIRTIO_NET_HDR_GSO_TCPV4, TCPV6 or UDP. + (Otherwise, it is set to VIRTIO_NET_HDR_GSO_NONE). In this + case, packets larger than 1514 bytes can be transmitted: the + metadata indicates how to replicate the packet header to cut it + into smaller packets. The other gso fields are set: + + \begin{itemize} + \item \field{hdr_len} is a hint to the device as to how much of the header + needs to be kept to copy into each packet, usually set to the + length of the headers, including the transport header\footnote{Due to various bugs in implementations, this field is not useful +as a guarantee of the transport header size. +}. + + \item \field{gso_size} is the maximum size of each packet beyond that + header (ie. MSS). + + \item If the driver negotiated the VIRTIO_NET_F_HOST_ECN feature, + the VIRTIO_NET_HDR_GSO_ECN bit in \field{gso_type} + indicates that the TCP packet has the ECN bit set\footnote{This case is not handled by some older hardware, so is called out +specifically in the protocol.}. + \end{itemize} + +\item \field{num_buffers} is set to zero. This field is unused on transmitted packets. + +\item The header and packet are added as one output descriptor to the + transmitq, and the device is notified of the new entry + (see \ref{sec:Device Types / Network Device / Device Initialization}~\nameref{sec:Device Types / Network Device / Device Initialization}). +\end{enumerate} + +\drivernormative{\paragraph}{Packet Transmission}{Device Types / Network Device / Device Operation / Packet Transmission} + +The driver MUST set \field{num_buffers} to zero. + +If VIRTIO_NET_F_CSUM is not negotiated, the driver MUST set +\field{flags} to zero and SHOULD supply a fully checksummed +packet to the device. + +If VIRTIO_NET_F_HOST_TSO4 is negotiated, the driver MAY set +\field{gso_type} to VIRTIO_NET_HDR_GSO_TCPV4 to request TCPv4 +segmentation, otherwise the driver MUST NOT set +\field{gso_type} to VIRTIO_NET_HDR_GSO_TCPV4. + +If VIRTIO_NET_F_HOST_TSO6 is negotiated, the driver MAY set +\field{gso_type} to VIRTIO_NET_HDR_GSO_TCPV6 to request TCPv6 +segmentation, otherwise the driver MUST NOT set +\field{gso_type} to VIRTIO_NET_HDR_GSO_TCPV6. + +If VIRTIO_NET_F_HOST_UFO is negotiated, the driver MAY set +\field{gso_type} to VIRTIO_NET_HDR_GSO_UDP to request UDP +segmentation, otherwise the driver MUST NOT set +\field{gso_type} to VIRTIO_NET_HDR_GSO_UDP. + +The driver SHOULD NOT send to the device TCP packets requiring segmentation offload +which have the Explicit Congestion Notification bit set, unless the +VIRTIO_NET_F_HOST_ECN feature is negotiated, in which case the +driver MUST set the VIRTIO_NET_HDR_GSO_ECN bit in +\field{gso_type}. + +If the VIRTIO_NET_F_CSUM feature has been negotiated, the +driver MAY set the VIRTIO_NET_HDR_F_NEEDS_CSUM bit in +\field{flags}, if so: +\begin{enumerate} +\item the driver MUST validate the packet checksum at + offset \field{csum_offset} from \field{csum_start} as well as all + preceding offsets; +\item the driver MUST set the packet checksum stored in the + buffer to the TCP/UDP pseudo header; +\item the driver MUST set \field{csum_start} and + \field{csum_offset} such that calculating a ones' + complement checksum from \field{csum_start} up until the end of + the packet and storing the result at offset \field{csum_offset} + from \field{csum_start} will result in a fully checksummed + packet; +\end{enumerate} + +If none of the VIRTIO_NET_F_HOST_TSO4, TSO6 or UFO options have +been negotiated, the driver MUST set \field{gso_type} to +VIRTIO_NET_HDR_GSO_NONE. + +If \field{gso_type} differs from VIRTIO_NET_HDR_GSO_NONE, then +the driver MUST also set the VIRTIO_NET_HDR_F_NEEDS_CSUM bit in +\field{flags} and MUST set \field{gso_size} to indicate the +desired MSS. + +If one of the VIRTIO_NET_F_HOST_TSO4, TSO6 or UFO options have +been negotiated, the driver SHOULD set \field{hdr_len} to a value +not less than the length of the headers, including the transport +header. + +The driver MUST NOT set the VIRTIO_NET_HDR_F_DATA_VALID bit in +\field{flags}. + +\devicenormative{\paragraph}{Packet Transmission}{Device Types / Network Device / Device Operation / Packet Transmission} +The device MUST ignore \field{flag} bits that it does not recognize. + +If VIRTIO_NET_HDR_F_NEEDS_CSUM bit in \field{flags} is not set, the +device MUST NOT use the \field{csum_start} and \field{csum_offset}. + +If one of the VIRTIO_NET_F_HOST_TSO4, TSO6 or UFO options have +been negotiated, the device MAY use \field{hdr_len} only as a hint about the +transport header size. +The device MUST NOT rely on \field{hdr_len} to be correct. +\begin{note} +This is due to various bugs in implementations. +\end{note} + +If VIRTIO_NET_HDR_F_NEEDS_CSUM is not set, the device MUST NOT +rely on the packet checksum being correct. +\paragraph{Packet Transmission Interrupt}\label{sec:Device Types / Network Device / Device Operation / Packet Transmission / Packet Transmission Interrupt} + +Often a driver will suppress transmission interrupts using the +VIRTQ_AVAIL_F_NO_INTERRUPT flag + (see \ref{sec:General Initialization And Device Operation / Device Operation / Receiving Used Buffers From The Device}~\nameref{sec:General Initialization And Device Operation / Device Operation / Receiving Used Buffers From The Device}) +and check for used packets in the transmit path of following +packets. + +The normal behavior in this interrupt handler is to retrieve and +new descriptors from the used ring and free the corresponding +headers and packets. + +\subsubsection{Setting Up Receive Buffers}\label{sec:Device Types / Network Device / Device Operation / Setting Up Receive Buffers} + +It is generally a good idea to keep the receive virtqueue as +fully populated as possible: if it runs out, network performance +will suffer. + +If the VIRTIO_NET_F_GUEST_TSO4, VIRTIO_NET_F_GUEST_TSO6 or +VIRTIO_NET_F_GUEST_UFO features are used, the maximum incoming packet +will be to 65550 bytes long (the maximum size of a +TCP or UDP packet, plus the 14 byte ethernet header), otherwise +1514 bytes. The 12-byte struct virtio_net_hdr is prepended to this, +making for 65562 or 1526 bytes. + +\drivernormative{\paragraph}{Setting Up Receive Buffers}{Device Types / Network Device / Device Operation / Setting Up Receive Buffers} + +\begin{itemize} +\item If VIRTIO_NET_F_MRG_RXBUF is not negotiated: + \begin{itemize} + \item If VIRTIO_NET_F_GUEST_TSO4, VIRTIO_NET_F_GUEST_TSO6 or + VIRTIO_NET_F_GUEST_UFO are negotiated, the driver SHOULD populate + the receive queue(s) with buffers of at least 65562 bytes. + \item Otherwise, the driver SHOULD populate the receive queue(s) + with buffers of at least 1526 bytes. + \end{itemize} +\item If VIRTIO_NET_F_MRG_RXBUF is negotiated, each buffer MUST be at + greater than the size of the struct virtio_net_hdr. +\end{itemize} + +\begin{note} +Obviously each buffer can be split across multiple descriptor elements. +\end{note} + +If VIRTIO_NET_F_MQ is negotiated, each of receiveq1\ldots receiveqN +that will be used SHOULD be populated with receive buffers. + +\devicenormative{\paragraph}{Setting Up Receive Buffers}{Device Types / Network Device / Device Operation / Setting Up Receive Buffers} + +The device MUST set \field{num_buffers} to the number of descriptors used to +hold the incoming packet. + +The device MUST use only a single descriptor if VIRTIO_NET_F_MRG_RXBUF +was not negotiated. +\begin{note} +{This means that \field{num_buffers} will always be 1 +if VIRTIO_NET_F_MRG_RXBUF is not negotiated.} +\end{note} + +\subsubsection{Processing of Incoming Packets}\label{sec:Device Types / Network Device / Device Operation / Processing of Incoming Packets} +\label{sec:Device Types / Network Device / Device Operation / Processing of Packets}%old label for latexdiff + +When a packet is copied into a buffer in the receiveq, the +optimal path is to disable further interrupts for the receiveq +(see \ref{sec:General Initialization And Device Operation / Device Operation / Receiving Used Buffers From The Device}~\nameref{sec:General Initialization And Device Operation / Device Operation / Receiving Used Buffers From The Device}) and process +packets until no more are found, then re-enable them. + +Processing incoming packets involves: + +\begin{enumerate} +\item \field{num_buffers} indicates how many descriptors + this packet is spread over (including this one): this will + always be 1 if VIRTIO_NET_F_MRG_RXBUF was not negotiated. + This allows receipt of large packets without having to allocate large + buffers. In this case, there will be at least \field{num_buffers} in + the used ring, and the device chains them together to form a + single packet. The other buffers will not begin with a struct + virtio_net_hdr. + +\item If + \field{num_buffers} is one, then the entire packet will be + contained within this buffer, immediately following the struct + virtio_net_hdr. +\item If the VIRTIO_NET_F_GUEST_CSUM feature was negotiated, the + VIRTIO_NET_HDR_F_DATA_VALID bit in \field{flags} can be + set: if so, device has validated the packet checksum. + In case of multiple encapsulated protocols, one level of checksums + has been validated. +\end{enumerate} + +Additionally, VIRTIO_NET_F_GUEST_CSUM, TSO4, TSO6, UDP and ECN +features enable receive checksum, large receive offload and ECN +support which are the input equivalents of the transmit checksum, +transmit segmentation offloading and ECN features, as described +in \ref{sec:Device Types / Network Device / Device Operation / +Packet Transmission}: +\begin{enumerate} +\item If the VIRTIO_NET_F_GUEST_CSUM feature was negotiated, the + VIRTIO_NET_HDR_F_NEEDS_CSUM bit in \field{flags} can be + set: if so, the packet checksum at offset \field{csum_offset} + from \field{csum_start} and any preceding checksums + have been validated. The checksum on the packet is incomplete and + \field{csum_start} and \field{csum_offset} indicate how to calculate + it (see Packet Transmission point 1). + +\item If the VIRTIO_NET_F_GUEST_TSO4, TSO6 or UFO options were + negotiated, then \field{gso_type} MAY be something other than + VIRTIO_NET_HDR_GSO_NONE, and \field{gso_size} field indicates the + desired MSS (see Packet Transmission point 2). +\end{enumerate} + +\devicenormative{\paragraph}{Processing of Incoming Packets}{Device Types / Network Device / Device Operation / Processing of Incoming Packets} +\label{devicenormative:Device Types / Network Device / Device Operation / Processing of Packets}%old label for latexdiff + +If VIRTIO_NET_F_MRG_RXBUF has not been negotiated, the device MUST set +\field{num_buffers} to 1. + +If VIRTIO_NET_F_MRG_RXBUF has been negotiated, the device MUST set +\field{num_buffers} to indicate the number of descriptors +the packet (including the header) is spread over. + +The device MUST use all descriptors used by a single receive +packet together, by atomically incrementing \field{idx} in the +used ring by the \field{num_buffers} value. + +If VIRTIO_NET_F_GUEST_CSUM is not negotiated, the device MUST set +\field{flags} to zero and SHOULD supply a fully checksummed +packet to the driver. + +If VIRTIO_NET_F_GUEST_TSO4 is not negotiated, the device MUST NOT set +\field{gso_type} to VIRTIO_NET_HDR_GSO_TCPV4. + +If VIRTIO_NET_F_GUEST_UDP is not negotiated, the device MUST NOT set +\field{gso_type} to VIRTIO_NET_HDR_GSO_UDP. + +If VIRTIO_NET_F_GUEST_TSO6 is not negotiated, the device MUST NOT set +\field{gso_type} to VIRTIO_NET_HDR_GSO_TCPV6. + +The device SHOULD NOT send to the driver TCP packets requiring segmentation offload +which have the Explicit Congestion Notification bit set, unless the +VIRTIO_NET_F_GUEST_ECN feature is negotiated, in which case the +device MUST set the VIRTIO_NET_HDR_GSO_ECN bit in +\field{gso_type}. + +If the VIRTIO_NET_F_GUEST_CSUM feature has been negotiated, the +device MAY set the VIRTIO_NET_HDR_F_NEEDS_CSUM bit in +\field{flags}, if so: +\begin{enumerate} +\item the device MUST validate the packet checksum at + offset \field{csum_offset} from \field{csum_start} as well as all + preceding offsets; +\item the device MUST set the packet checksum stored in the + receive buffer to the TCP/UDP pseudo header; +\item the device MUST set \field{csum_start} and + \field{csum_offset} such that calculating a ones' + complement checksum from \field{csum_start} up until the + end of the packet and storing the result at offset + \field{csum_offset} from \field{csum_start} will result in a + fully checksummed packet; +\end{enumerate} + +If none of the VIRTIO_NET_F_GUEST_TSO4, TSO6 or UFO options have +been negotiated, the device MUST set \field{gso_type} to +VIRTIO_NET_HDR_GSO_NONE. + +If \field{gso_type} differs from VIRTIO_NET_HDR_GSO_NONE, then +the device MUST also set the VIRTIO_NET_HDR_F_NEEDS_CSUM bit in +\field{flags} MUST set \field{gso_size} to indicate the desired MSS. + +If one of the VIRTIO_NET_F_GUEST_TSO4, TSO6 or UFO options have +been negotiated, the device SHOULD set \field{hdr_len} to a value +not less than the length of the headers, including the transport +header. + +If the VIRTIO_NET_F_GUEST_CSUM feature has been negotiated, the +device MAY set the VIRTIO_NET_HDR_F_DATA_VALID bit in +\field{flags}, if so, the device MUST validate the packet +checksum (in case of multiple encapsulated protocols, one level +of checksums is validated). + +\drivernormative{\paragraph}{Processing of Incoming +Packets}{Device Types / Network Device / Device Operation / +Processing of Incoming Packets} + +The driver MUST ignore \field{flag} bits that it does not recognize. + +If VIRTIO_NET_HDR_F_NEEDS_CSUM bit in \field{flags} is not set, the +driver MUST NOT use the \field{csum_start} and \field{csum_offset}. + +If one of the VIRTIO_NET_F_GUEST_TSO4, TSO6 or UFO options have +been negotiated, the driver MAY use \field{hdr_len} only as a hint about the +transport header size. +The driver MUST NOT rely on \field{hdr_len} to be correct. +\begin{note} +This is due to various bugs in implementations. +\end{note} + +If neither VIRTIO_NET_HDR_F_NEEDS_CSUM nor +VIRTIO_NET_HDR_F_DATA_VALID is set, the driver MUST NOT +rely on the packet checksum being correct. +\subsubsection{Control Virtqueue}\label{sec:Device Types / Network Device / Device Operation / Control Virtqueue} + +The driver uses the control virtqueue (if VIRTIO_NET_F_CTRL_VQ is +negotiated) to send commands to manipulate various features of +the device which would not easily map into the configuration +space. + +All commands are of the following form: + +\begin{lstlisting} +struct virtio_net_ctrl { + u8 class; + u8 command; + u8 command-specific-data[]; + u8 ack; +}; + +/* ack values */ +#define VIRTIO_NET_OK 0 +#define VIRTIO_NET_ERR 1 +\end{lstlisting} + +The \field{class}, \field{command} and command-specific-data are set by the +driver, and the device sets the \field{ack} byte. There is little it can +do except issue a diagnostic if \field{ack} is not +VIRTIO_NET_OK. + +\paragraph{Packet Receive Filtering}\label{sec:Device Types / Network Device / Device Operation / Control Virtqueue / Packet Receive Filtering} +\label{sec:Device Types / Network Device / Device Operation / Control Virtqueue / Setting Promiscuous Mode}%old label for latexdiff + +If the VIRTIO_NET_F_CTRL_RX and VIRTIO_NET_F_CTRL_RX_EXTRA +features are negotiated, the driver can send control commands for +promiscuous mode, multicast, unicast and broadcast receiving. + +\begin{note} +In general, these commands are best-effort: unwanted +packets could still arrive. +\end{note} + +\begin{lstlisting} +#define VIRTIO_NET_CTRL_RX 0 + #define VIRTIO_NET_CTRL_RX_PROMISC 0 + #define VIRTIO_NET_CTRL_RX_ALLMULTI 1 + #define VIRTIO_NET_CTRL_RX_ALLUNI 2 + #define VIRTIO_NET_CTRL_RX_NOMULTI 3 + #define VIRTIO_NET_CTRL_RX_NOUNI 4 + #define VIRTIO_NET_CTRL_RX_NOBCAST 5 +\end{lstlisting} + + +\devicenormative{\subparagraph}{Packet Receive Filtering}{Device Types / Network Device / Device Operation / Control Virtqueue / Packet Receive Filtering} + +If the VIRTIO_NET_F_CTRL_RX feature has been negotiated, +the device MUST support the following VIRTIO_NET_CTRL_RX class +commands: +\begin{itemize} +\item VIRTIO_NET_CTRL_RX_PROMISC turns promiscuous mode on and +off. The command-specific-data is one byte containing 0 (off) or +1 (on). If promiscous mode is on, the device SHOULD receive all +incoming packets. +This SHOULD take effect even if one of the other modes set by +a VIRTIO_NET_CTRL_RX class command is on. +\item VIRTIO_NET_CTRL_RX_ALLMULTI turns all-multicast receive on and +off. The command-specific-data is one byte containing 0 (off) or +1 (on). When all-multicast receive is on the device SHOULD allow +all incoming multicast packets. +\end{itemize} + +If the VIRTIO_NET_F_CTRL_RX_EXTRA feature has been negotiated, +the device MUST support the following VIRTIO_NET_CTRL_RX class +commands: +\begin{itemize} +\item VIRTIO_NET_CTRL_RX_ALLUNI turns all-unicast receive on and +off. The command-specific-data is one byte containing 0 (off) or +1 (on). When all-unicast receive is on the device SHOULD allow +all incoming unicast packets. +\item VIRTIO_NET_CTRL_RX_NOMULTI suppresses multicast receive. +The command-specific-data is one byte containing 0 (multicast +receive allowed) or 1 (multicast receive suppressed). +When multicast receive is suppressed, the device SHOULD NOT +send multicast packets to the driver. +This SHOULD take effect even if VIRTIO_NET_CTRL_RX_ALLMULTI is on. +This filter SHOULD NOT apply to broadcast packets. +\item VIRTIO_NET_CTRL_RX_NOUNI suppresses unicast receive. +The command-specific-data is one byte containing 0 (unicast +receive allowed) or 1 (unicast receive suppressed). +When unicast receive is suppressed, the device SHOULD NOT +send unicast packets to the driver. +This SHOULD take effect even if VIRTIO_NET_CTRL_RX_ALLUNI is on. +\item VIRTIO_NET_CTRL_RX_NOBCAST suppresses broadcast receive. +The command-specific-data is one byte containing 0 (broadcast +receive allowed) or 1 (broadcast receive suppressed). +When broadcast receive is suppressed, the device SHOULD NOT +send broadcast packets to the driver. +This SHOULD take effect even if VIRTIO_NET_CTRL_RX_ALLMULTI is on. +\end{itemize} + +\drivernormative{\subparagraph}{Packet Receive Filtering}{Device Types / Network Device / Device Operation / Control Virtqueue / Packet Receive Filtering} + +If the VIRTIO_NET_F_CTRL_RX feature has not been negotiated, +the driver MUST NOT issue commands VIRTIO_NET_CTRL_RX_PROMISC or +VIRTIO_NET_CTRL_RX_ALLMULTI. + +If the VIRTIO_NET_F_CTRL_RX_EXTRA feature has not been negotiated, +the driver MUST NOT issue commands + VIRTIO_NET_CTRL_RX_ALLUNI, + VIRTIO_NET_CTRL_RX_NOMULTI, + VIRTIO_NET_CTRL_RX_NOUNI or + VIRTIO_NET_CTRL_RX_NOBCAST. + +\paragraph{Setting MAC Address Filtering}\label{sec:Device Types / Network Device / Device Operation / Control Virtqueue / Setting MAC Address Filtering} + +If the VIRTIO_NET_F_CTRL_RX feature is negotiated, the driver can +send control commands for MAC address filtering. + +\begin{lstlisting} +struct virtio_net_ctrl_mac { + le32 entries; + u8 macs[entries][6]; +}; + +#define VIRTIO_NET_CTRL_MAC 1 + #define VIRTIO_NET_CTRL_MAC_TABLE_SET 0 + #define VIRTIO_NET_CTRL_MAC_ADDR_SET 1 +\end{lstlisting} + +The device can filter incoming packets by any number of destination +MAC addresses\footnote{Since there are no guarantees, it can use a hash filter or +silently switch to allmulti or promiscuous mode if it is given too +many addresses. +}. This table is set using the class +VIRTIO_NET_CTRL_MAC and the command VIRTIO_NET_CTRL_MAC_TABLE_SET. The +command-specific-data is two variable length tables of 6-byte MAC +addresses (as described in struct virtio_net_ctrl_mac). The first table contains unicast addresses, and the second +contains multicast addresses. + +The VIRTIO_NET_CTRL_MAC_ADDR_SET command is used to set the +default MAC address which rx filtering +accepts (and if VIRTIO_NET_F_MAC_ADDR has been negotiated, +this will be reflected in \field{mac} in config space). + +The command-specific-data for VIRTIO_NET_CTRL_MAC_ADDR_SET is +the 6-byte MAC address. + +\devicenormative{\subparagraph}{Setting MAC Address Filtering}{Device Types / Network Device / Device Operation / Control Virtqueue / Setting MAC Address Filtering} + +The device MUST have an empty MAC filtering table on reset. + +The device MUST update the MAC filtering table before it consumes +the VIRTIO_NET_CTRL_MAC_TABLE_SET command. + +The device MUST update \field{mac} in config space before it consumes +the VIRTIO_NET_CTRL_MAC_ADDR_SET command, if VIRTIO_NET_F_MAC_ADDR has +been negotiated. + +The device SHOULD drop incoming packets which have a destination MAC which +matches neither the \field{mac} (or that set with VIRTIO_NET_CTRL_MAC_ADDR_SET) +nor the MAC filtering table. + +\drivernormative{\subparagraph}{Setting MAC Address Filtering}{Device Types / Network Device / Device Operation / Control Virtqueue / Setting MAC Address Filtering} + +If VIRTIO_NET_F_CTRL_RX has not been negotiated, +the driver MUST NOT issue VIRTIO_NET_CTRL_MAC class commands. + +If VIRTIO_NET_F_CTRL_RX has been negotiated, +the driver SHOULD issue VIRTIO_NET_CTRL_MAC_ADDR_SET +to set the default mac if it is different from \field{mac}. + +The driver MUST follow the VIRTIO_NET_CTRL_MAC_TABLE_SET command +by a le32 number, followed by that number of non-multicast +MAC addresses, followed by another le32 number, followed by +that number of multicast addresses. Either number MAY be 0. + +\subparagraph{Legacy Interface: Setting MAC Address Filtering}\label{sec:Device Types / Network Device / Device Operation / Control Virtqueue / Setting MAC Address Filtering / Legacy Interface: Setting MAC Address Filtering} +When using the legacy interface, transitional devices and drivers +MUST format \field{entries} in struct virtio_net_ctrl_mac +according to the native endian of the guest rather than +(necessarily when not using the legacy interface) little-endian. + +Legacy drivers that didn't negotiate VIRTIO_NET_F_CTRL_MAC_ADDR +changed \field{mac} in config space when NIC is accepting +incoming packets. These drivers always wrote the mac value from +first to last byte, therefore after detecting such drivers, +a transitional device MAY defer MAC update, or MAY defer +processing incoming packets until driver writes the last byte +of \field{mac} in the config space. + +\paragraph{VLAN Filtering}\label{sec:Device Types / Network Device / Device Operation / Control Virtqueue / VLAN Filtering} + +If the driver negotiates the VIRTION_NET_F_CTRL_VLAN feature, it +can control a VLAN filter table in the device. + +\begin{lstlisting} +#define VIRTIO_NET_CTRL_VLAN 2 + #define VIRTIO_NET_CTRL_VLAN_ADD 0 + #define VIRTIO_NET_CTRL_VLAN_DEL 1 +\end{lstlisting} + +Both the VIRTIO_NET_CTRL_VLAN_ADD and VIRTIO_NET_CTRL_VLAN_DEL +command take a little-endian 16-bit VLAN id as the command-specific-data. + +\subparagraph{Legacy Interface: VLAN Filtering}\label{sec:Device Types / Network Device / Device Operation / Control Virtqueue / VLAN Filtering / Legacy Interface: VLAN Filtering} +When using the legacy interface, transitional devices and drivers +MUST format the VLAN id +according to the native endian of the guest rather than +(necessarily when not using the legacy interface) little-endian. + +\paragraph{Gratuitous Packet Sending}\label{sec:Device Types / Network Device / Device Operation / Control Virtqueue / Gratuitous Packet Sending} + +If the driver negotiates the VIRTIO_NET_F_GUEST_ANNOUNCE (depends +on VIRTIO_NET_F_CTRL_VQ), the device can ask the driver to send gratuitous +packets; this is usually done after the guest has been physically +migrated, and needs to announce its presence on the new network +links. (As hypervisor does not have the knowledge of guest +network configuration (eg. tagged vlan) it is simplest to prod +the guest in this way). + +\begin{lstlisting} +#define VIRTIO_NET_CTRL_ANNOUNCE 3 + #define VIRTIO_NET_CTRL_ANNOUNCE_ACK 0 +\end{lstlisting} + +The driver checks VIRTIO_NET_S_ANNOUNCE bit in the device configuration \field{status} field +when it notices the changes of device configuration. The +command VIRTIO_NET_CTRL_ANNOUNCE_ACK is used to indicate that +driver has received the notification and device clears the +VIRTIO_NET_S_ANNOUNCE bit in \field{status}. + +Processing this notification involves: + +\begin{enumerate} +\item Sending the gratuitous packets (eg. ARP) or marking there are pending + gratuitous packets to be sent and letting deferred routine to + send them. + +\item Sending VIRTIO_NET_CTRL_ANNOUNCE_ACK command through control + vq. +\end{enumerate} + +\drivernormative{\subparagraph}{Gratuitous Packet Sending}{Device Types / Network Device / Device Operation / Control Virtqueue / Gratuitous Packet Sending} + +If the driver negotiates VIRTIO_NET_F_GUEST_ANNOUNCE, it SHOULD notify +network peers of its new location after it sees the VIRTIO_NET_S_ANNOUNCE bit +in \field{status}. The driver MUST send a command on the command queue +with class VIRTIO_NET_CTRL_ANNOUNCE and command VIRTIO_NET_CTRL_ANNOUNCE_ACK. + +\devicenormative{\subparagraph}{Gratuitous Packet Sending}{Device Types / Network Device / Device Operation / Control Virtqueue / Gratuitous Packet Sending} + +If VIRTIO_NET_F_GUEST_ANNOUNCE is negotiated, the device MUST clear the +VIRTIO_NET_S_ANNOUNCE bit in \field{status} upon receipt of a command buffer +with class VIRTIO_NET_CTRL_ANNOUNCE and command VIRTIO_NET_CTRL_ANNOUNCE_ACK +before marking the buffer as used. + +\paragraph{Automatic receive steering in multiqueue mode}\label{sec:Device Types / Network Device / Device Operation / Control Virtqueue / Automatic receive steering in multiqueue mode} + +If the driver negotiates the VIRTIO_NET_F_MQ feature bit (depends +on VIRTIO_NET_F_CTRL_VQ), it MAY transmit outgoing packets on one +of the multiple transmitq1\ldots transmitqN and ask the device to +queue incoming packets into one of the multiple receiveq1\ldots receiveqN +depending on the packet flow. + +\begin{lstlisting} +struct virtio_net_ctrl_mq { + le16 virtqueue_pairs; +}; + +#define VIRTIO_NET_CTRL_MQ 4 + #define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET 0 + #define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN 1 + #define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX 0x8000 +\end{lstlisting} + +Multiqueue is disabled by default. The driver enables multiqueue by +executing the VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET command, specifying +the number of the transmit and receive queues to be used up to +\field{max_virtqueue_pairs}; subsequently, +transmitq1\ldots transmitqn and receiveq1\ldots receiveqn where +n=\field{virtqueue_pairs} MAY be used. + +When multiqueue is enabled, the device MUST use automatic receive steering +based on packet flow. Programming of the receive steering +classificator is implicit. After the driver transmitted a packet of a +flow on transmitqX, the device SHOULD cause incoming packets for that flow to +be steered to receiveqX. For uni-directional protocols, or where +no packets have been transmitted yet, the device MAY steer a packet +to a random queue out of the specified receiveq1\ldots receiveqn. + +Multiqueue is disabled by setting \field{virtqueue_pairs} to 1 (this is +the default) and waiting for the device to use the command buffer. + +\drivernormative{\subparagraph}{Automatic receive steering in multiqueue mode}{Device Types / Network Device / Device Operation / Control Virtqueue / Automatic receive steering in multiqueue mode} + +The driver MUST configure the virtqueues before enabling them with the +VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET command. + +The driver MUST NOT request a \field{virtqueue_pairs} of 0 or +greater than \field{max_virtqueue_pairs} in the device configuration space. + +The driver MUST queue packets only on any transmitq1 before the +VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET command. + +The driver MUST NOT queue packets on transmit queues greater than +\field{virtqueue_pairs} once it has placed the VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET command in the available ring. + +\devicenormative{\subparagraph}{Automatic receive steering in multiqueue mode}{Device Types / Network Device / Device Operation / Control Virtqueue / Automatic receive steering in multiqueue mode} + +The device MUST queue packets only on any receiveq1 before the +VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET command. + +The device MUST NOT queue packets on receive queues greater than +\field{virtqueue_pairs} once it has placed the VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET command in the used ring. + +\subparagraph{Legacy Interface: Automatic receive steering in multiqueue mode}\label{sec:Device Types / Network Device / Device Operation / Control Virtqueue / Automatic receive steering in multiqueue mode / Legacy Interface: Automatic receive steering in multiqueue mode} +When using the legacy interface, transitional devices and drivers +MUST format \field{virtqueue_pairs} +according to the native endian of the guest rather than +(necessarily when not using the legacy interface) little-endian. + +\paragraph{Offloads State Configuration}\label{sec:Device Types / Network Device / Device Operation / Control Virtqueue / Offloads State Configuration} + +If the VIRTIO_NET_F_CTRL_GUEST_OFFLOADS feature is negotiated, the driver can +send control commands for dynamic offloads state configuration. + +\subparagraph{Setting Offloads State}\label{sec:Device Types / Network Device / Device Operation / Control Virtqueue / Offloads State Configuration / Setting Offloads State} + +\begin{lstlisting} +le64 offloads; + +#define VIRTIO_NET_F_GUEST_CSUM 1 +#define VIRTIO_NET_F_GUEST_TSO4 7 +#define VIRTIO_NET_F_GUEST_TSO6 8 +#define VIRTIO_NET_F_GUEST_ECN 9 +#define VIRTIO_NET_F_GUEST_UFO 10 + +#define VIRTIO_NET_CTRL_GUEST_OFFLOADS 5 + #define VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET 0 +\end{lstlisting} + +The class VIRTIO_NET_CTRL_GUEST_OFFLOADS has one command: +VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET applies the new offloads configuration. + +le64 value passed as command data is a bitmask, bits set define +offloads to be enabled, bits cleared - offloads to be disabled. + +There is a corresponding device feature for each offload. Upon feature +negotiation corresponding offload gets enabled to preserve backward +compartibility. + +\drivernormative{\subparagraph}{Setting Offloads State}{Device Types / Network Device / Device Operation / Control Virtqueue / Offloads State Configuration / Setting Offloads State} + +A driver MUST NOT enable an offload for which the appropriate feature +has not been negotiated. + +\subparagraph{Legacy Interface: Setting Offloads State}\label{sec:Device Types / Network Device / Device Operation / Control Virtqueue / Offloads State Configuration / Setting Offloads State / Legacy Interface: Setting Offloads State} +When using the legacy interface, transitional devices and drivers +MUST format \field{offloads} +according to the native endian of the guest rather than +(necessarily when not using the legacy interface) little-endian. + + +\subsubsection{Legacy Interface: Framing Requirements}\label{sec:Device +Types / Network Device / Legacy Interface: Framing Requirements} + +When using legacy interfaces, transitional drivers which have not +negotiated VIRTIO_F_ANY_LAYOUT MUST use a single descriptor for the +struct virtio_net_hdr on both transmit and receive, with the +network data in the following descriptors. + +Additionally, when using the control virtqueue (see \ref{sec:Device +Types / Network Device / Device Operation / Control Virtqueue}) +, transitional drivers which have not +negotiated VIRTIO_F_ANY_LAYOUT MUST: +\begin{itemize} +\item for all commands, use a single 2-byte descriptor including the first two +fields: \field{class} and \field{command} +\item for all commands except VIRTIO_NET_CTRL_MAC_TABLE_SET +use a single descriptor including command-specific-data +with no padding. +\item for the VIRTIO_NET_CTRL_MAC_TABLE_SET command use exactly +two descriptors including command-specific-data with no padding: +the first of these descriptors MUST include the +virtio_net_ctrl_mac table structure for the unicast addresses with no padding, +the second of these descriptors MUST include the +virtio_net_ctrl_mac table structure for the multicast addresses +with no padding. +\item for all commands, use a single 1-byte descriptor for the +\field{ack} field +\end{itemize} + +See \ref{sec:Basic +Facilities of a Virtio Device / Virtqueues / Message Framing}. + +\section{Block Device}\label{sec:Device Types / Block Device} + +The virtio block device is a simple virtual block device (ie. +disk). Read and write requests (and other exotic requests) are +placed in the queue, and serviced (probably out of order) by the +device except where noted. + +\subsection{Device ID}\label{sec:Device Types / Block Device / Device ID} + 2 + +\subsection{Virtqueues}\label{sec:Device Types / Block Device / Virtqueues} +\begin{description} +\item[0] requestq +\end{description} + +\subsection{Feature bits}\label{sec:Device Types / Block Device / Feature bits} + +\begin{description} +\item[VIRTIO_BLK_F_SIZE_MAX (1)] Maximum size of any single segment is + in \field{size_max}. + +\item[VIRTIO_BLK_F_SEG_MAX (2)] Maximum number of segments in a + request is in \field{seg_max}. + +\item[VIRTIO_BLK_F_GEOMETRY (4)] Disk-style geometry specified in + \field{geometry}. + +\item[VIRTIO_BLK_F_RO (5)] Device is read-only. + +\item[VIRTIO_BLK_F_BLK_SIZE (6)] Block size of disk is in \field{blk_size}. + +\item[VIRTIO_BLK_F_FLUSH (9)] Cache flush command support. + +\item[VIRTIO_BLK_F_TOPOLOGY (10)] Device exports information on optimal I/O + alignment. + +\item[VIRTIO_BLK_F_CONFIG_WCE (11)] Device can toggle its cache between writeback + and writethrough modes. +\end{description} + +\subsubsection{Legacy Interface: Feature bits}\label{sec:Device Types / Block Device / Feature bits / Legacy Interface: Feature bits} + +\begin{description} +\item[VIRTIO_BLK_F_BARRIER (0)] Device supports request barriers. + +\item[VIRTIO_BLK_F_SCSI (7)] Device supports scsi packet commands. +\end{description} + +\begin{note} + In the legacy interface, VIRTIO_BLK_F_FLUSH was also + called VIRTIO_BLK_F_WCE. +\end{note} + +\subsection{Device configuration layout}\label{sec:Device Types / Block Device / Device configuration layout} + +The \field{capacity} of the device (expressed in 512-byte sectors) is always +present. The availability of the others all depend on various feature +bits as indicated above. + +\begin{lstlisting} +struct virtio_blk_config { + le64 capacity; + le32 size_max; + le32 seg_max; + struct virtio_blk_geometry { + le16 cylinders; + u8 heads; + u8 sectors; + } geometry; + le32 blk_size; + struct virtio_blk_topology { + // # of logical blocks per physical block (log2) + u8 physical_block_exp; + // offset of first aligned logical block + u8 alignment_offset; + // suggested minimum I/O size in blocks + le16 min_io_size; + // optimal (suggested maximum) I/O size in blocks + le32 opt_io_size; + } topology; + u8 writeback; +}; +\end{lstlisting} + + +\subsubsection{Legacy Interface: Device configuration layout}\label{sec:Device Types / Block Device / Device configuration layout / Legacy Interface: Device configuration layout} +When using the legacy interface, transitional devices and drivers +MUST format the fields in struct virtio_blk_config +according to the native endian of the guest rather than +(necessarily when not using the legacy interface) little-endian. + + +\subsection{Device Initialization}\label{sec:Device Types / Block Device / Device Initialization} + +\begin{enumerate} +\item The device size can be read from \field{capacity}. + +\item If the VIRTIO_BLK_F_BLK_SIZE feature is negotiated, + \field{blk_size} can be read to determine the optimal sector size + for the driver to use. This does not affect the units used in + the protocol (always 512 bytes), but awareness of the correct + value can affect performance. + +\item If the VIRTIO_BLK_F_RO feature is set by the device, any write + requests will fail. + +\item If the VIRTIO_BLK_F_TOPOLOGY feature is negotiated, the fields in the + \field{topology} struct can be read to determine the physical block size and optimal + I/O lengths for the driver to use. This also does not affect the units + in the protocol, only performance. + +\item If the VIRTIO_BLK_F_CONFIG_WCE feature is negotiated, the cache + mode can be read or set through the \field{writeback} field. 0 corresponds + to a writethrough cache, 1 to a writeback cache\footnote{Consistent with + \ref{devicenormative:Device Types / Block Device / Device Operation}, + a writethrough cache can be defined broadly as a cache that commits + writes to persistent device backend storage before reporting their + completion. For example, a battery-backed writeback cache actually + counts as writethrough according to this definition.}. The cache mode + after reset can be either writeback or writethrough. The actual + mode can be determined by reading \field{writeback} after feature + negotiation. +\end{enumerate} + +\drivernormative{\subsubsection}{Device Initialization}{Device Types / Block Device / Device Initialization} + +Drivers SHOULD NOT negotiate VIRTIO_BLK_F_FLUSH if they are incapable of +sending VIRTIO_BLK_T_FLUSH commands. + +If neither VIRTIO_BLK_F_CONFIG_WCE nor VIRTIO_BLK_F_FLUSH are +negotiated, the driver MAY deduce the presence of a writethrough cache. +If VIRTIO_BLK_F_CONFIG_WCE was not negotiated but VIRTIO_BLK_F_FLUSH was, +the driver SHOULD assume presence of a writeback cache. + +The driver MUST NOT read \field{writeback} before setting +the FEATURES_OK \field{status} bit. + +\devicenormative{\subsubsection}{Device Initialization}{Device Types / Block Device / Device Initialization} + +Devices SHOULD always offer VIRTIO_BLK_F_FLUSH, and MUST offer it +if they offer VIRTIO_BLK_F_CONFIG_WCE. + +If VIRTIO_BLK_F_CONFIG_WCE is negotiated but VIRTIO_BLK_F_FLUSH +is not, the device MUST initialize \field{writeback} to 0. + +\subsubsection{Legacy Interface: Device Initialization}\label{sec:Device Types / Block Device / Device Initialization / Legacy Interface: Device Initialization} + +Because legacy devices do not have FEATURES_OK, transitional devices +MUST implement slightly different behavior around feature negotiation +when used through the legacy interface. In particular, when using the +legacy interface: + +\begin{itemize} +\item the driver MAY read or write \field{writeback} before setting + the DRIVER or DRIVER_OK \field{status} bit + +\item the device MUST NOT modify the cache mode (and \field{writeback}) + as a result of a driver setting a status bit, unless + the DRIVER_OK bit is being set and the driver has not set the + VIRTIO_BLK_F_CONFIG_WCE driver feature bit. + +\item the device MUST NOT modify the cache mode (and \field{writeback}) + as a result of a driver modifying the driver feature bits, for example + if the driver sets the VIRTIO_BLK_F_CONFIG_WCE driver feature bit but + does not set the VIRTIO_BLK_F_FLUSH bit. +\end{itemize} + + +\subsection{Device Operation}\label{sec:Device Types / Block Device / Device Operation} + +The driver queues requests to the virtqueue, and they are used by +the device (not necessarily in order). Each request is of form: + +\begin{lstlisting} +struct virtio_blk_req { + le32 type; + le32 reserved; + le64 sector; + u8 data[][512]; + u8 status; +}; +\end{lstlisting} + +The type of the request is either a read (VIRTIO_BLK_T_IN), a write +(VIRTIO_BLK_T_OUT), or a flush (VIRTIO_BLK_T_FLUSH). + +\begin{lstlisting} +#define VIRTIO_BLK_T_IN 0 +#define VIRTIO_BLK_T_OUT 1 +#define VIRTIO_BLK_T_FLUSH 4 +\end{lstlisting} + +The \field{sector} number indicates the offset (multiplied by 512) where +the read or write is to occur. This field is unused and set to 0 +for scsi packet commands and for flush commands. + +The final \field{status} byte is written by the device: either +VIRTIO_BLK_S_OK for success, VIRTIO_BLK_S_IOERR for device or driver +error or VIRTIO_BLK_S_UNSUPP for a request unsupported by device: + +\begin{lstlisting} +#define VIRTIO_BLK_S_OK 0 +#define VIRTIO_BLK_S_IOERR 1 +#define VIRTIO_BLK_S_UNSUPP 2 +\end{lstlisting} + +\drivernormative{\subsubsection}{Device Operation}{Device Types / Block Device / Device Operation} + +A driver MUST NOT submit a request which would cause a read or write +beyond \field{capacity}. + +A driver SHOULD accept the VIRTIO_BLK_F_RO feature if offered. + +A driver MUST set \field{sector} to 0 for a VIRTIO_BLK_T_FLUSH request. +A driver SHOULD NOT include any data in a VIRTIO_BLK_T_FLUSH request. + +If the VIRTIO_BLK_F_CONFIG_WCE feature is negotiated, the driver MAY +switch to writethrough or writeback mode by writing respectively 0 and +1 to the \field{writeback} field. After writing a 0 to \field{writeback}, +the driver MUST NOT assume that any volatile writes have been committed +to persistent device backend storage. + +\devicenormative{\subsubsection}{Device Operation}{Device Types / Block Device / Device Operation} + +A device MUST set the \field{status} byte to VIRTIO_BLK_S_IOERR +for a write request if the VIRTIO_BLK_F_RO feature if offered, and MUST NOT +write any data. + +A write is considered volatile when it is submitted; the contents of +sectors covered by a volatile write are undefined in persistent device +backend storage until the write becomes stable. A write becomes stable +once it is completed and one or more of the following conditions is true: + +\begin{enumerate} +\item\label{item:flush1} neither VIRTIO_BLK_F_CONFIG_WCE nor + VIRTIO_BLK_F_FLUSH feature were negotiated, but VIRTIO_BLK_F_FLUSH was + offered by the device; + +\item\label{item:flush2} the VIRTIO_BLK_F_CONFIG_WCE feature was negotiated and the + \field{writeback} field in configuration space was 0 \textbf{all the time between + the submission of the write and its completion}; + +\item\label{item:flush3} a VIRTIO_BLK_T_FLUSH request is sent \textbf{after the write is + completed} and is completed itself. +\end{enumerate} + +If the device is backed by persistent storage, the device MUST ensure that +stable writes are committed to it, before reporting completion of the write +(cases~\ref{item:flush1} and~\ref{item:flush2}) or the flush +(case~\ref{item:flush3}). Failure to do so can cause data loss +in case of a crash. + +If the driver changes \field{writeback} between the submission of the write +and its completion, the write could be either volatile or stable when +its completion is reported; in other words, the exact behavior is undefined. + +% According to the device requirements for device initialization: +% Offer(CONFIG_WCE) => Offer(FLUSH). +% +% After reversing the implication: +% not Offer(FLUSH) => not Offer(CONFIG_WCE). + +If VIRTIO_BLK_F_FLUSH was not offered by the + device\footnote{Note that in this case, according to + \ref{devicenormative:Device Types / Block Device / Device Initialization}, + the device will not have offered VIRTIO_BLK_F_CONFIG_WCE either.}, the +device MAY also commit writes to persistent device backend storage before +reporting their completion. Unlike case~\ref{item:flush1}, however, this +is not an absolute requirement of the specification. + +\begin{note} + An implementation that does not offer VIRTIO_BLK_F_FLUSH and does not commit + completed writes will not be resilient to data loss in case of crashes. + Not offering VIRTIO_BLK_F_FLUSH is an absolute requirement + for implementations that do not wish to be safe against such data losses. +\end{note} + +\subsubsection{Legacy Interface: Device Operation}\label{sec:Device Types / Block Device / Device Operation / Legacy Interface: Device Operation} +When using the legacy interface, transitional devices and drivers +MUST format the fields in struct virtio_blk_req +according to the native endian of the guest rather than +(necessarily when not using the legacy interface) little-endian. + +When using the legacy interface, transitional drivers +SHOULD ignore the \field{len} value in used ring entries. +\begin{note} +Historically, some devices put the total descriptor length, +or the total length of device-writable buffers there, +even when only the status byte was actually written. +\end{note} + +The \field{reserved} field was previously called \field{ioprio}. \field{ioprio} +is a hint about the relative priorities of requests to the device: +higher numbers indicate more important requests. + +\begin{lstlisting} +#define VIRTIO_BLK_T_FLUSH_OUT 5 +\end{lstlisting} + +The command VIRTIO_BLK_T_FLUSH_OUT was a synonym for VIRTIO_BLK_T_FLUSH; +a driver MUST treat it as a VIRTIO_BLK_T_FLUSH command. + +\begin{lstlisting} +#define VIRTIO_BLK_T_BARRIER 0x80000000 +\end{lstlisting} + +If the device has VIRTIO_BLK_F_BARRIER +feature the high bit (VIRTIO_BLK_T_BARRIER) indicates that this +request acts as a barrier and that all preceding requests SHOULD be +complete before this one, and all following requests SHOULD NOT be +started until this is complete. + +\begin{note} A barrier does not flush +caches in the underlying backend device in host, and thus does not +serve as data consistency guarantee. Only a VIRTIO_BLK_T_FLUSH request +does that. +\end{note} + +Some older legacy devices did not commit completed writes to persistent +device backend storage when VIRTIO_BLK_F_FLUSH was offered but not +negotiated. In order to work around this, the driver MAY set the +\field{writeback} to 0 (if available) or it MAY send an explicit flush +request after every completed write. + +If the device has VIRTIO_BLK_F_SCSI feature, it can also support +scsi packet command requests, each of these requests is of form: + +\begin{lstlisting} +/* All fields are in guest's native endian. */ +struct virtio_scsi_pc_req { + u32 type; + u32 ioprio; + u64 sector; + u8 cmd[]; + u8 data[][512]; +#define SCSI_SENSE_BUFFERSIZE 96 + u8 sense[SCSI_SENSE_BUFFERSIZE]; + u32 errors; + u32 data_len; + u32 sense_len; + u32 residual; + u8 status; +}; +\end{lstlisting} + +A request type can also be a scsi packet command (VIRTIO_BLK_T_SCSI_CMD or +VIRTIO_BLK_T_SCSI_CMD_OUT). The two types are equivalent, the device +does not distinguish between them: + +\begin{lstlisting} +#define VIRTIO_BLK_T_SCSI_CMD 2 +#define VIRTIO_BLK_T_SCSI_CMD_OUT 3 +\end{lstlisting} + +The \field{cmd} field is only present for scsi packet command requests, +and indicates the command to perform. This field MUST reside in a +single, separate device-readable buffer; command length can be derived +from the length of this buffer. + +Note that these first three (four for scsi packet commands) +fields are always device-readable: \field{data} is either device-readable +or device-writable, depending on the request. The size of the read or +write can be derived from the total size of the request buffers. + +\field{sense} is only present for scsi packet command requests, +and indicates the buffer for scsi sense data. + +\field{data_len} is only present for scsi packet command +requests, this field is deprecated, and SHOULD be ignored by the +driver. Historically, devices copied data length there. + +\field{sense_len} is only present for scsi packet command +requests and indicates the number of bytes actually written to +the \field{sense} buffer. + +\field{residual} field is only present for scsi packet command +requests and indicates the residual size, calculated as data +length - number of bytes actually transferred. + +\subsubsection{Legacy Interface: Framing Requirements}\label{sec:Device +Types / Block Device / Legacy Interface: Framing Requirements} + +When using legacy interfaces, transitional drivers which have not +negotiated VIRTIO_F_ANY_LAYOUT: + +\begin{itemize} +\item MUST use a single 8-byte descriptor containing \field{type}, + \field{reserved} and \field{sector}, followed by descriptors + for \field{data}, then finally a separate 1-byte descriptor + for \field{status}. + +\item For SCSI commands there are additional constraints. + \field{errors}, \field{data_len}, \field{sense_len} and + \field{residual} MUST reside in a single, separate + device-writable descriptor, \field{sense} MUST reside in a + single separate device-writable descriptor of size 96 bytes, + and \field{errors}, \field{data_len}, \field{sense_len} and + \field{residual} MUST reside a single separate + device-writable descriptor. +\end{itemize} + +See \ref{sec:Basic Facilities of a Virtio Device / Virtqueues / Message Framing}. + +\section{Console Device}\label{sec:Device Types / Console Device} + +The virtio console device is a simple device for data input and +output. A device MAY have one or more ports. Each port has a pair +of input and output virtqueues. Moreover, a device has a pair of +control IO virtqueues. The control virtqueues are used to +communicate information between the device and the driver about +ports being opened and closed on either side of the connection, +indication from the device about whether a particular port is a +console port, adding new ports, port hot-plug/unplug, etc., and +indication from the driver about whether a port or a device was +successfully added, port open/close, etc. For data IO, one or +more empty buffers are placed in the receive queue for incoming +data and outgoing characters are placed in the transmit queue. + +\subsection{Device ID}\label{sec:Device Types / Console Device / Device ID} + + 3 + +\subsection{Virtqueues}\label{sec:Device Types / Console Device / Virtqueues} + +\begin{description} +\item[0] receiveq(port0) +\item[1] transmitq(port0) +\item[2] control receiveq +\item[3] control transmitq +\item[4] receiveq(port1) +\item[5] transmitq(port1) +\item[\ldots] +\end{description} + +The port 0 receive and transmit queues always exist: other queues +only exist if VIRTIO_CONSOLE_F_MULTIPORT is set. + +\subsection{Feature bits}\label{sec:Device Types / Console Device / Feature bits} + +\begin{description} +\item[VIRTIO_CONSOLE_F_SIZE (0)] Configuration \field{cols} and \field{rows} + are valid. + +\item[VIRTIO_CONSOLE_F_MULTIPORT (1)] Device has support for multiple + ports; \field{max_nr_ports} is valid and control virtqueues will be used. + +\item[VIRTIO_CONSOLE_F_EMERG_WRITE (2)] Device has support for emergency write. + Configuration field emerg_wr is valid. +\end{description} + +\subsection{Device configuration layout}\label{sec:Device Types / Console Device / Device configuration layout} + + The size of the console is supplied + in the configuration space if the VIRTIO_CONSOLE_F_SIZE feature + is set. Furthermore, if the VIRTIO_CONSOLE_F_MULTIPORT feature + is set, the maximum number of ports supported by the device can + be fetched. + + If VIRTIO_CONSOLE_F_EMERG_WRITE is set then the driver can use emergency write + to output a single character without initializing virtio queues, or even + acknowledging the feature. + +\begin{lstlisting} +struct virtio_console_config { + le16 cols; + le16 rows; + le32 max_nr_ports; + le32 emerg_wr; +}; +\end{lstlisting} + +\subsubsection{Legacy Interface: Device configuration layout}\label{sec:Device Types / Console Device / Device configuration layout / Legacy Interface: Device configuration layout} +When using the legacy interface, transitional devices and drivers +MUST format the fields in struct virtio_console_config +according to the native endian of the guest rather than +(necessarily when not using the legacy interface) little-endian. + +\subsection{Device Initialization}\label{sec:Device Types / Console Device / Device Initialization} + +\begin{enumerate} +\item If the VIRTIO_CONSOLE_F_EMERG_WRITE feature is offered, + \field{emerg_wr} field of the configuration can be written at any time. + Thus it works for very early boot debugging output as well as + catastophic OS failures (eg. virtio ring corruption). + +\item If the VIRTIO_CONSOLE_F_SIZE feature is negotiated, the driver + can read the console dimensions from \field{cols} and \field{rows}. + +\item If the VIRTIO_CONSOLE_F_MULTIPORT feature is negotiated, the + driver can spawn multiple ports, not all of which are necessarily + attached to a console. Some could be generic ports. In this + case, the control virtqueues are enabled and according to + \field{max_nr_ports}, the appropriate number + of virtqueues are created. A control message indicating the + driver is ready is sent to the device. The device can then send + control messages for adding new ports to the device. After + creating and initializing each port, a + VIRTIO_CONSOLE_PORT_READY control message is sent to the device + for that port so the device can let the driver know of any additional + configuration options set for that port. + +\item The receiveq for each port is populated with one or more + receive buffers. +\end{enumerate} + +\devicenormative{\subsubsection}{Device Initialization}{Device Types / Console Device / Device Initialization} + +The device MUST allow a write to \field{emerg_wr}, even on an +unconfigured device. + +The device SHOULD transmit the lower byte written to \field{emerg_wr} to +an appropriate log or output method. + +\subsection{Device Operation}\label{sec:Device Types / Console Device / Device Operation} + +\begin{enumerate} +\item For output, a buffer containing the characters is placed in + the port's transmitq\footnote{Because this is high importance and low bandwidth, the current +Linux implementation polls for the buffer to be used, rather than +waiting for an interrupt, simplifying the implementation +significantly. However, for generic serial ports with the +O_NONBLOCK flag set, the polling limitation is relaxed and the +consumed buffers are freed upon the next write or poll call or +when a port is closed or hot-unplugged. +}. + +\item When a buffer is used in the receiveq (signalled by an + interrupt), the contents is the input to the port associated + with the virtqueue for which the notification was received. + +\item If the driver negotiated the VIRTIO_CONSOLE_F_SIZE feature, a + configuration change interrupt indicates that the updated size can + be read from the configuration fields. This size applies to port 0 only. + +\item If the driver negotiated the VIRTIO_CONSOLE_F_MULTIPORT + feature, active ports are announced by the device using the + VIRTIO_CONSOLE_PORT_ADD control message. The same message is + used for port hot-plug as well. +\end{enumerate} + +\drivernormative{\subsubsection}{Device Operation}{Device Types / Console Device / Device Operation} + +The driver MUST NOT put a device-readable in a receiveq. The driver +MUST NOT put a device-writable buffer in a transmitq. + +\subsubsection{Multiport Device Operation}\label{sec:Device Types / Console Device / Device Operation / Multiport Device Operation} + +If the driver negotiated the VIRTIO_CONSOLE_F_MULTIPORT, the two +control queues are used to manipulate the different console ports: the +control receiveq for messages from the device to the driver, and the +control sendq for driver-to-device messages. The layout of the +control messages is: + +\begin{lstlisting} +struct virtio_console_control { + le32 id; /* Port number */ + le16 event; /* The kind of control event */ + le16 value; /* Extra information for the event */ +}; +\end{lstlisting} + +The values for \field{event} are: +\begin{description} +\item [VIRTIO_CONSOLE_DEVICE_READY (0)] Sent by the driver at initialization + to indicate that it is ready to receive control messages. A value of + 1 indicates success, and 0 indicates failure. The port number \field{id} is unused. +\item [VIRTIO_CONSOLE_DEVICE_ADD (1)] Sent by the device, to create a new + port. \field{value} is unused. +\item [VIRTIO_CONSOLE_DEVICE_REMOVE (2)] Sent by the device, to remove an + existing port. \field{value} is unused. +\item [VIRTIO_CONSOLE_PORT_READY (3)] Sent by the driver in response + to the device's VIRTIO_CONSOLE_PORT_ADD message, to indicate that + the port is ready to be used. A \field{value} of 1 indicates success, and 0 + indicates failure. +\item [VIRTIO_CONSOLE_CONSOLE_PORT (4)] Sent by the device to nominate + a port as a console port. There MAY be more than one console port. +\item [VIRTIO_CONSOLE_RESIZE (5)] Sent by the device to indicate + a console size change. \field{value} is unused. The buffer is followed by the number of columns and rows: +\begin{lstlisting} +struct virtio_console_resize { + le16 cols; + le16 rows; +}; +\end{lstlisting} +\item [VIRTIO_CONSOLE_PORT_OPEN (6)] This message is sent by both the + device and the driver. \field{value} indicates the state: 0 (port + closed) or 1 (port open). This allows for ports to be used directly + by guest and host processes to communicate in an application-defined + manner. +\item [VIRTIO_CONSOLE_PORT_NAME (7)] Sent by the device to give a tag + to the port. This control command is immediately + followed by the UTF-8 name of the port for identification + within the guest (without a NUL terminator). +\end{description} + +\devicenormative{\paragraph}{Multiport Device Operation}{Device Types / Console Device / Device Operation / Multiport Device Operation} + +The device MUST NOT specify a port which exists in a +VIRTIO_CONSOLE_DEVICE_ADD message, nor a port which is equal or +greater than \field{max_nr_ports}. + +The device MUST NOT specify a port in VIRTIO_CONSOLE_DEVICE_REMOVE +which has not been created with a previous VIRTIO_CONSOLE_DEVICE_ADD. + +\drivernormative{\paragraph}{Multiport Device Operation}{Device Types / Console Device / Device Operation / Multiport Device Operation} + +The driver MUST send a VIRTIO_CONSOLE_DEVICE_READY message if +VIRTIO_CONSOLE_F_MULTIPORT is negotiated. + +Upon receipt of a VIRTIO_CONSOLE_CONSOLE_PORT message, the driver +SHOULD treat the port in a manner suitable for text console access +and MUST respond with a VIRTIO_CONSOLE_PORT_OPEN message, which MUST +have \field{value} set to 1. + +\subsubsection{Legacy Interface: Device Operation}\label{sec:Device Types / Console Device / Device Operation / Legacy Interface: Device Operation} +When using the legacy interface, transitional devices and drivers +MUST format the fields in struct virtio_console_control +according to the native endian of the guest rather than +(necessarily when not using the legacy interface) little-endian. + +When using the legacy interface, the driver SHOULD ignore the +\field{len} value in used ring entries for the transmit queues +and the control transmitq. +\begin{note} +Historically, some devices put the total descriptor length there, +even though no data was actually written. +\end{note} + +\subsubsection{Legacy Interface: Framing Requirements}\label{sec:Device +Types / Console Device / Legacy Interface: Framing Requirements} + +When using legacy interfaces, transitional drivers which have not +negotiated VIRTIO_F_ANY_LAYOUT MUST use only a single +descriptor for all buffers in the control receiveq and control transmitq. + +\section{Entropy Device}\label{sec:Device Types / Entropy Device} + +The virtio entropy device supplies high-quality randomness for +guest use. + +\subsection{Device ID}\label{sec:Device Types / Entropy Device / Device ID} + 4 + +\subsection{Virtqueues}\label{sec:Device Types / Entropy Device / Virtqueues} +\begin{description} +\item[0] requestq +\end{description} + +\subsection{Feature bits}\label{sec:Device Types / Entropy Device / Feature bits} + None currently defined + +\subsection{Device configuration layout}\label{sec:Device Types / Entropy Device / Device configuration layout} + None currently defined. + +\subsection{Device Initialization}\label{sec:Device Types / Entropy Device / Device Initialization} + +\begin{enumerate} +\item The virtqueue is initialized +\end{enumerate} + +\subsection{Device Operation}\label{sec:Device Types / Entropy Device / Device Operation} + +When the driver requires random bytes, it places the descriptor +of one or more buffers in the queue. It will be completely filled +by random data by the device. + +\drivernormative{\subsubsection}{Device Operation}{Device Types / Entropy Device / Device Operation} + +The driver MUST NOT place driver-readable buffers into the queue. + +The driver MUST examine the length written by the device to determine +how many random bytes were received. + +\devicenormative{\subsubsection}{Device Operation}{Device Types / Entropy Device / Device Operation} + +The device MUST place one or more random bytes into the buffer, but it +MAY use less than the entire buffer length. + +\section{Traditional Memory Balloon Device}\label{sec:Device Types / Memory Balloon Device} + +This is the traditional balloon device. The device number 13 is +reserved for a new memory balloon interface, with different +semantics, which is expected in a future version of the standard. + +The traditional virtio memory balloon device is a primitive device for +managing guest memory: the device asks for a certain amount of +memory, and the driver supplies it (or withdraws it, if the device +has more than it asks for). This allows the guest to adapt to +changes in allowance of underlying physical memory. If the +feature is negotiated, the device can also be used to communicate +guest memory statistics to the host. + +\subsection{Device ID}\label{sec:Device Types / Memory Balloon Device / Device ID} + 5 + +\subsection{Virtqueues}\label{sec:Device Types / Memory Balloon Device / Virtqueues} +\begin{description} +\item[0] inflateq +\item[1] deflateq +\item[2] statsq. +\end{description} + + Virtqueue 2 only exists if VIRTIO_BALLON_F_STATS_VQ set. + +\subsection{Feature bits}\label{sec:Device Types / Memory Balloon Device / Feature bits} +\begin{description} +\item[VIRTIO_BALLOON_F_MUST_TELL_HOST (0)] Host has to be told before + pages from the balloon are used. + +\item[VIRTIO_BALLOON_F_STATS_VQ (1)] A virtqueue for reporting guest + memory statistics is present. +\item[VIRTIO_BALLOON_F_DEFLATE_ON_OOM (2) ] Deflate balloon on + guest out of memory condition. + +\end{description} + +\drivernormative{\subsubsection}{Feature bits}{Device Types / Memory Balloon Device / Feature bits} +The driver SHOULD accept the VIRTIO_BALLOON_F_MUST_TELL_HOST +feature if offered by the device. + +\devicenormative{\subsubsection}{Feature bits}{Device Types / Memory Balloon Device / Feature bits} +If the device offers the VIRTIO_BALLOON_F_MUST_TELL_HOST feature +bit, and if the driver did not accept this feature bit, the +device MAY signal failure by failing to set FEATURES_OK +\field{device status} bit when the driver writes it. +\subparagraph{Legacy Interface: Feature bits}\label{sec:Device +Types / Memory Balloon Device / Feature bits / Legacy Interface: +Feature bits} +As the legacy interface does not have a way to gracefully report feature +negotiation failure, when using the legacy interface, +transitional devices MUST support guests which do not negotiate +VIRTIO_BALLOON_F_MUST_TELL_HOST feature, and SHOULD +allow guest to use memory before notifying host if +VIRTIO_BALLOON_F_MUST_TELL_HOST is not negotiated. + +\subsection{Device configuration layout}\label{sec:Device Types / Memory Balloon Device / Device configuration layout} + Both fields of this configuration + are always available. + +\begin{lstlisting} +struct virtio_balloon_config { + le32 num_pages; + le32 actual; +}; +\end{lstlisting} + +\subparagraph{Legacy Interface: Device configuration layout}\label{sec:Device Types / Memory Balloon Device / Device +configuration layout / Legacy Interface: Device configuration layout} +When using the legacy interface, transitional devices and drivers +MUST format the fields in struct virtio_balloon_config +according to the little-endian format. +\begin{note} +This is unlike the usual convention that legacy device fields are guest endian. +\end{note} + +\subsection{Device Initialization}\label{sec:Device Types / Memory Balloon Device / Device Initialization} + +The device initialization process is outlined below: + +\begin{enumerate} +\item The inflate and deflate virtqueues are identified. + +\item If the VIRTIO_BALLOON_F_STATS_VQ feature bit is negotiated: + \begin{enumerate} + \item Identify the stats virtqueue. + \item Add one empty buffer to the stats virtqueue. + \item DRIVER_OK is set: device operation begins. + \item Notify the device about the stats virtqueue buffer. + \end{enumerate} +\end{enumerate} + +\subsection{Device Operation}\label{sec:Device Types / Memory Balloon Device / Device Operation} + +The device is driven either by the receipt of a configuration +change interrupt, or by changing guest memory needs, such as +performing memory compaction or responding to out of memory +conditions. + +\begin{enumerate} +\item \field{num_pages} configuration field is examined. If this is + greater than the \field{actual} number of pages, the balloon wants + more memory from the guest. If it is less than \field{actual}, + the balloon doesn't need it all. + +\item To supply memory to the balloon (aka. inflate): + \begin{enumerate} + \item The driver constructs an array of addresses of unused memory + pages. These addresses are divided by 4096\footnote{This is historical, and independent of the guest page size. +} and the descriptor + describing the resulting 32-bit array is added to the inflateq. + \end{enumerate} + +\item To remove memory from the balloon (aka. deflate): + \begin{enumerate} + \item The driver constructs an array of addresses of memory pages + it has previously given to the balloon, as described above. + This descriptor is added to the deflateq. + + \item If the VIRTIO_BALLOON_F_MUST_TELL_HOST feature is negotiated, the + guest informs the device of pages before it uses them. + + \item Otherwise, the guest is allowed to re-use pages previously + given to the balloon before the device has acknowledged their + withdrawal\footnote{In this case, deflation advice is merely a courtesy. +}. + \end{enumerate} + +\item In either case, the device acknowledges inflate and deflate +requests by using the descriptor. +\item Once the device has acknowledged the inflation or + deflation, the driver updates \field{actual} to reflect the new number of pages in the balloon. +\end{enumerate} + +\drivernormative{\subsubsection}{Device Operation}{Device Types / Memory Balloon Device / Device Operation} +The driver SHOULD supply pages to the balloon when \field{num_pages} is +greater than the actual number of pages in the balloon. + +The driver MAY use pages from the balloon when \field{num_pages} is +less than the actual number of pages in the balloon. + +The driver MAY supply pages to the balloon when \field{num_pages} is +greater than or equal to the actual number of pages in the balloon. + +If VIRTIO_BALLOON_F_DEFLATE_ON_OOM has not been negotiated, the +driver MUST NOT use pages from the balloon when \field{num_pages} +is less than or equal to the actual number of pages in the +balloon. + +If VIRTIO_BALLOON_F_DEFLATE_ON_OOM has been negotiated, the +driver MAY use pages from the balloon when \field{num_pages} +is less than or equal to the actual number of pages in the +balloon if this is required for system stability +(e.g. if memory is required by applications running within + the guest). + +The driver MUST use the deflateq to inform the device of pages that it +wants to use from the balloon. + +If the VIRTIO_BALLOON_F_MUST_TELL_HOST feature is negotiated, the +driver MUST NOT use pages from the balloon until +the device has acknowledged the deflate request. + +Otherwise, if the VIRTIO_BALLOON_F_MUST_TELL_HOST feature is not +negotiated, the driver MAY begin to re-use pages previously +given to the balloon before the device has acknowledged the +deflate request. + +In any case, the driver MUST NOT use pages from the balloon +after adding the pages to the balloon, but before the device has +acknowledged the inflate request. + +The driver MUST NOT request deflation of pages in +the balloon before the device has acknowledged the inflate +request. + +The driver MUST update \field{actual} after changing the number +of pages in the balloon. + +The driver MAY update \field{actual} once after multiple +inflate and deflate operations. + +\devicenormative{\subsubsection}{Device Operation}{Device Types / Memory Balloon Device / Device Operation} + +The device MAY modify the contents of a page in the balloon +after detecting its physical number in an inflate request +and before acknowledging the inflate request by using the inflateq +descriptor. + +If the VIRTIO_BALLOON_F_MUST_TELL_HOST feature is negotiated, the +device MAY modify the contents of a page in the balloon +after detecting its physical number in an inflate request +and before detecting its physical number in a deflate request +and acknowledging the deflate request. + +\paragraph{Legacy Interface: Device Operation}\label{sec:Device +Types / Memory Balloon Device / Device Operation / Legacy +Interface: Device Operation} +When using the legacy interface, the driver SHOULD ignore the \field{len} value in used ring entries. +\begin{note} +Historically, some devices put the total descriptor length there, +even though no data was actually written. +\end{note} +When using the legacy interface, the driver MUST write out all +4 bytes each time it updates the \field{actual} value in the +configuration space, using a single atomic operation. + +When using the legacy interface, the device SHOULD NOT use the +\field{actual} value written by the driver in the configuration +space, until the last, most-significant byte of the value has been +written. +\begin{note} +Historically, devices used the \field{actual} value, even though +when using Virtio Over PCI Bus the device-specific configuration +space was not guaranteed to be atomic. Using intermediate +values during update by driver is best avoided, except for +debugging. + +Historically, drivers using Virtio Over PCI Bus wrote the +\field{actual} value by using multiple single-byte writes in +order, from the least-significant to the most-significant value. +\end{note} +\subsubsection{Memory Statistics}\label{sec:Device Types / Memory Balloon Device / Device Operation / Memory Statistics} + +The stats virtqueue is atypical because communication is driven +by the device (not the driver). The channel becomes active at +driver initialization time when the driver adds an empty buffer +and notifies the device. A request for memory statistics proceeds +as follows: + +\begin{enumerate} +\item The device pushes the buffer onto the used ring and sends an + interrupt. + +\item The driver pops the used buffer and discards it. + +\item The driver collects memory statistics and writes them into a + new buffer. + +\item The driver adds the buffer to the virtqueue and notifies the + device. + +\item The device pops the buffer (retaining it to initiate a + subsequent request) and consumes the statistics. +\end{enumerate} + + Within the buffer, statistics are an array of 6-byte entries. + Each statistic consists of a 16 bit + tag and a 64 bit value. All statistics are optional and the + driver chooses which ones to supply. To guarantee backwards + compatibility, devices omit unsupported statistics. + +\begin{lstlisting} +struct virtio_balloon_stat { +#define VIRTIO_BALLOON_S_SWAP_IN 0 +#define VIRTIO_BALLOON_S_SWAP_OUT 1 +#define VIRTIO_BALLOON_S_MAJFLT 2 +#define VIRTIO_BALLOON_S_MINFLT 3 +#define VIRTIO_BALLOON_S_MEMFREE 4 +#define VIRTIO_BALLOON_S_MEMTOT 5 + le16 tag; + le64 val; +} __attribute__((packed)); +\end{lstlisting} + +\drivernormative{\paragraph}{Memory Statistics}{Device Types / Memory Balloon Device / Device Operation / Memory Statistics} +Normative statements in this section apply if and only if the +VIRTIO_BALLOON_F_STATS_VQ feature has been negotiated. + +The driver MUST make at most one buffer available to the device +in the statsq, at all times. + +After initializing the device, the driver MUST make an output +buffer available in the statsq. + +Upon detecting that device has used a buffer in the statsq, the +driver MUST make an output buffer available in the statsq. + +Before making an output buffer available in the statsq, the +driver MUST initialize it, including one struct +virtio_balloon_stat entry for each statistic that it supports. + +Driver MUST use an output buffer size which is a multiple of 6 +bytes for all buffers submitted to the statsq. + +Driver MAY supply struct virtio_balloon_stat entries in the +output buffer submitted to the statsq in any order, without +regard to \field{tag} values. + +Driver MAY supply a subset of all statistics in the output buffer +submitted to the statsq. + +Driver MUST supply the same subset of statistics in all buffers +submitted to the statsq. + +\devicenormative{\paragraph}{Memory Statistics}{Device Types / Memory Balloon Device / Device Operation / Memory Statistics} +Normative statements in this section apply if and only if the +VIRTIO_BALLOON_F_STATS_VQ feature has been negotiated. + +Within an output buffer submitted to the statsq, +the device MUST ignore entries with \field{tag} values that it does not recognize. + +Within an output buffer submitted to the statsq, +the device MUST accept struct virtio_balloon_stat entries in any +order without regard to \field{tag} values. + +\paragraph{Legacy Interface: Memory Statistics}\label{sec:Device Types / Memory Balloon Device / Device Operation / Memory Statistics / Legacy Interface: Memory Statistics} + +When using the legacy interface, transitional devices and drivers +MUST format the fields in struct virtio_balloon_stat +according to the native endian of the guest rather than +(necessarily when not using the legacy interface) little-endian. + +When using the legacy interface, +the device SHOULD ignore all values in the first buffer in the +statsq supplied by the driver after device initialization. +\begin{note} +Historically, drivers supplied an uninitialized buffer in the +first buffer. +\end{note} + +\subsubsection{Memory Statistics Tags}\label{sec:Device Types / Memory Balloon Device / Device Operation / Memory Statistics Tags} + +\begin{description} +\item[VIRTIO_BALLOON_S_SWAP_IN (0)] The amount of memory that has been + swapped in (in bytes). + +\item[VIRTIO_BALLOON_S_SWAP_OUT (1)] The amount of memory that has been + swapped out to disk (in bytes). + +\item[VIRTIO_BALLOON_S_MAJFLT (2)] The number of major page faults that + have occurred. + +\item[VIRTIO_BALLOON_S_MINFLT (3)] The number of minor page faults that + have occurred. + +\item[VIRTIO_BALLOON_S_MEMFREE (4)] The amount of memory not being used + for any purpose (in bytes). + +\item[VIRTIO_BALLOON_S_MEMTOT (5)] The total amount of memory available + (in bytes). +\end{description} + +\section{SCSI Host Device}\label{sec:Device Types / SCSI Host Device} + +The virtio SCSI host device groups together one or more virtual +logical units (such as disks), and allows communicating to them +using the SCSI protocol. An instance of the device represents a +SCSI host to which many targets and LUNs are attached. + +The virtio SCSI device services two kinds of requests: +\begin{itemize} +\item command requests for a logical unit; + +\item task management functions related to a logical unit, target or + command. +\end{itemize} + +The device is also able to send out notifications about added and +removed logical units. Together, these capabilities provide a +SCSI transport protocol that uses virtqueues as the transfer +medium. In the transport protocol, the virtio driver acts as the +initiator, while the virtio SCSI host provides one or more +targets that receive and process the requests. + +This section relies on definitions from \hyperref[intro:SAM]{SAM}. + +\subsection{Device ID}\label{sec:Device Types / SCSI Host Device / Device ID} + 8 + +\subsection{Virtqueues}\label{sec:Device Types / SCSI Host Device / Virtqueues} + +\begin{description} +\item[0] controlq +\item[1] eventq +\item[2\ldots n] request queues +\end{description} + +\subsection{Feature bits}\label{sec:Device Types / SCSI Host Device / Feature bits} + +\begin{description} +\item[VIRTIO_SCSI_F_INOUT (0)] A single request can include both + device-readable and device-writable data buffers. + +\item[VIRTIO_SCSI_F_HOTPLUG (1)] The host SHOULD enable reporting of + hot-plug and hot-unplug events for LUNs and targets on the SCSI bus. + The guest SHOULD handle hot-plug and hot-unplug events. + +\item[VIRTIO_SCSI_F_CHANGE (2)] The host will report changes to LUN + parameters via a VIRTIO_SCSI_T_PARAM_CHANGE event; the guest + SHOULD handle them. + +\item[VIRTIO_SCSI_F_T10_PI (3)] The extended fields for T10 protection + information (DIF/DIX) are included in the SCSI request header. +\end{description} + +\subsection{Device configuration layout}\label{sec:Device Types / SCSI Host Device / Device configuration layout} + + All fields of this configuration are always available. + +\begin{lstlisting} +struct virtio_scsi_config { + le32 num_queues; + le32 seg_max; + le32 max_sectors; + le32 cmd_per_lun; + le32 event_info_size; + le32 sense_size; + le32 cdb_size; + le16 max_channel; + le16 max_target; + le32 max_lun; +}; +\end{lstlisting} + +\begin{description} +\item[\field{num_queues}] is the total number of request virtqueues exposed by + the device. The driver MAY use only one request queue, + or it can use more to achieve better performance. + +\item[\field{seg_max}] is the maximum number of segments that can be in a + command. A bidirectional command can include \field{seg_max} input + segments and \field{seg_max} output segments. + +\item[\field{max_sectors}] is a hint to the driver about the maximum transfer + size to use. + +\item[\field{cmd_per_lun}] is tells the driver the maximum number of + linked commands it can send to one LUN. + +\item[\field{event_info_size}] is the maximum size that the device will fill + for buffers that the driver places in the eventq. It is + written by the device depending on the set of negotiated + features. + +\item[\field{sense_size}] is the maximum size of the sense data that the + device will write. The default value is written by the device + and MUST be 96, but the driver can modify it. It is + restored to the default when the device is reset. + +\item[\field{cdb_size}] is the maximum size of the CDB that the driver will + write. The default value is written by the device and MUST + be 32, but the driver can likewise modify it. It is + restored to the default when the device is reset. + +\item[\field{max_channel}, \field{max_target} and \field{max_lun}] can be + used by the driver as hints to constrain scanning the logical units + on the host to channel/target/logical unit numbers that are less than + or equal to the value of the fields. \field{max_channel} SHOULD + be zero. \field{max_target} SHOULD be less than or equal to 255. + \field{max_lun} SHOULD be less than or equal to 16383. +\end{description} + +\drivernormative{\subsubsection}{Device configuration layout}{Device Types / SCSI Host Device / Device configuration layout} + +The driver MUST NOT write to device configuration fields other than +\field{sense_size} and \field{cdb_size}. + +The driver MUST NOT send more than \field{cmd_per_lun} linked commands +to one LUN, and MUST NOT send more than the virtqueue size number of +linked commands to one LUN. + +\devicenormative{\subsubsection}{Device configuration layout}{Device Types / SCSI Host Device / Device configuration layout} + +On reset, the device MUST set \field{sense_size} to 96 and +\field{cdb_size} to 32. + +\subsubsection{Legacy Interface: Device configuration layout}\label{sec:Device Types / SCSI Host Device / Device configuration layout / Legacy Interface: Device configuration layout} +When using the legacy interface, transitional devices and drivers +MUST format the fields in struct virtio_scsi_config +according to the native endian of the guest rather than +(necessarily when not using the legacy interface) little-endian. + +\devicenormative{\subsection}{Device Initialization}{Device Types / SCSI Host Device / Device Initialization} + +On initialization the driver SHOULD first discover the +device's virtqueues. + +If the driver uses the eventq, the driver SHOULD place at least one +buffer in the eventq. + +The driver MAY immediately issue requests\footnote{For example, INQUIRY +or REPORT LUNS.} or task management functions\footnote{For example, I_T +RESET.}. + +\subsection{Device Operation}\label{sec:Device Types / SCSI Host Device / Device Operation} + +Device operation consists of operating request queues, the control +queue and the event queue. + +\paragraph{Legacy Interface: Device Operation}\label{sec:Device +Types / SCSI Host Device / Device Operation / Legacy +Interface: Device Operation} +When using the legacy interface, the driver SHOULD ignore the \field{len} value in used ring entries. +\begin{note} +Historically, devices put the total descriptor length, +or the total length of device-writable buffers there, +even when only part of the buffers were actually written. +\end{note} + +\subsubsection{Device Operation: Request Queues}\label{sec:Device Types / SCSI Host Device / Device Operation / Device Operation: Request Queues} + +The driver queues requests to an arbitrary request queue, and +they are used by the device on that same queue. It is the +responsibility of the driver to ensure strict request ordering +for commands placed on different queues, because they will be +consumed with no order constraints. + +Requests have the following format: + +\begin{lstlisting} +struct virtio_scsi_req_cmd { + // Device-readable part + u8 lun[8]; + le64 id; + u8 task_attr; + u8 prio; + u8 crn; + u8 cdb[cdb_size]; + // The next two fields are only present if VIRTIO_SCSI_F_T10_PI + // is negotiated. + le32 pi_bytesout; + le32 pi_bytesin; + u8 pi_out[pi_bytesout]; + u8 dataout[]; + + // Device-writable part + le32 sense_len; + le32 residual; + le16 status_qualifier; + u8 status; + u8 response; + u8 sense[sense_size]; + // The next two fields are only present if VIRTIO_SCSI_F_T10_PI + // is negotiated + u8 pi_in[pi_bytesin]; + u8 datain[]; +}; + + +/* command-specific response values */ +#define VIRTIO_SCSI_S_OK 0 +#define VIRTIO_SCSI_S_OVERRUN 1 +#define VIRTIO_SCSI_S_ABORTED 2 +#define VIRTIO_SCSI_S_BAD_TARGET 3 +#define VIRTIO_SCSI_S_RESET 4 +#define VIRTIO_SCSI_S_BUSY 5 +#define VIRTIO_SCSI_S_TRANSPORT_FAILURE 6 +#define VIRTIO_SCSI_S_TARGET_FAILURE 7 +#define VIRTIO_SCSI_S_NEXUS_FAILURE 8 +#define VIRTIO_SCSI_S_FAILURE 9 + +/* task_attr */ +#define VIRTIO_SCSI_S_SIMPLE 0 +#define VIRTIO_SCSI_S_ORDERED 1 +#define VIRTIO_SCSI_S_HEAD 2 +#define VIRTIO_SCSI_S_ACA 3 +\end{lstlisting} + +\field{lun} addresses the REPORT LUNS well-known logical unit, or +a target and logical unit in the virtio-scsi device's SCSI domain. +When used to address the REPORT LUNS logical unit, \field{lun} is 0xC1, +0x01 and six zero bytes. The virtio-scsi device SHOULD implement the +REPORT LUNS well-known logical unit. + +When used to address a target and logical unit, the only supported format +for \field{lun} is: first byte set to 1, second byte set to target, +third and fourth byte representing a single level LUN structure, followed +by four zero bytes. With this representation, a virtio-scsi device can +serve up to 256 targets and 16384 LUNs per target. The device MAY also +support having a well-known logical units in the third and fourth byte. + +\field{id} is the command identifier (``tag''). + +\field{task_attr} defines the task attribute as in the table above, but +all task attributes MAY be mapped to SIMPLE by the device. Some commands +are defined by SCSI standards as "implicit head of queue"; for such +commands, all task attributes MAY also be mapped to HEAD OF QUEUE. +Drivers and applications SHOULD NOT send a command with the ORDERED +task attribute if the command has an implicit HEAD OF QUEUE attribute, +because whether the ORDERED task attribute is honored is vendor-specific. + +\field{crn} may also be provided by clients, but is generally expected +to be 0. The maximum CRN value defined by the protocol is 255, since +CRN is stored in an 8-bit integer. + +The CDB is included in \field{cdb} and its size, \field{cdb_size}, +is taken from the configuration space. + +All of these fields are defined in \hyperref[intro:SAM]{SAM} and are +always device-readable. + +\field{pi_bytesout} determines the size of the \field{pi_out} field +in bytes. If it is nonzero, the \field{pi_out} field contains outgoing +protection information for write operations. \field{pi_bytesin} determines +the size of the \field{pi_in} field in the device-writable section, in bytes. +All three fields are only present if VIRTIO_SCSI_F_T10_PI has been negotiated. + +The remainder of the device-readable part is the data output buffer, +\field{dataout}. + +\field{sense} and subsequent fields are always device-writable. \field{sense_len} +indicates the number of bytes actually written to the sense +buffer. + +\field{residual} indicates the residual size, +calculated as ``data_length - number_of_transferred_bytes'', for +read or write operations. For bidirectional commands, the +number_of_transferred_bytes includes both read and written bytes. +A \field{residual} that is less than the size of \field{datain} means that +\field{dataout} was processed entirely. A \field{residual} that +exceeds the size of \field{datain} means that \field{dataout} was +processed partially and \field{datain} was not processed at +all. + +If the \field{pi_bytesin} is nonzero, the \field{pi_in} field contains +incoming protection information for read operations. \field{pi_in} is +only present if VIRTIO_SCSI_F_T10_PI has been negotiated\footnote{There + is no separate residual size for \field{pi_bytesout} and + \field{pi_bytesin}. It can be computed from the \field{residual} field, + the size of the data integrity information per sector, and the sizes + of \field{pi_out}, \field{pi_in}, \field{dataout} and \field{datain}.}. + +The remainder of the device-writable part is the data input buffer, +\field{datain}. + + +\devicenormative{\paragraph}{Device Operation: Request Queues}{Device Types / SCSI Host Device / Device Operation / Device Operation: Request Queues} + +The device MUST write the \field{status} byte as the status code as +defined in \hyperref[intro:SAM]{SAM}. + +The device MUST write the \field{response} byte as one of the following: + +\begin{description} + +\item[VIRTIO_SCSI_S_OK] when the request was completed and the \field{status} + byte is filled with a SCSI status code (not necessarily + ``GOOD''). + +\item[VIRTIO_SCSI_S_OVERRUN] if the content of the CDB (such as the + allocation length, parameter length or transfer size) requires + more data than is available in the datain and dataout buffers. + +\item[VIRTIO_SCSI_S_ABORTED] if the request was cancelled due to an + ABORT TASK or ABORT TASK SET task management function. + +\item[VIRTIO_SCSI_S_BAD_TARGET] if the request was never processed + because the target indicated by \field{lun} does not exist. + +\item[VIRTIO_SCSI_S_RESET] if the request was cancelled due to a bus + or device reset (including a task management function). + +\item[VIRTIO_SCSI_S_TRANSPORT_FAILURE] if the request failed due to a + problem in the connection between the host and the target + (severed link). + +\item[VIRTIO_SCSI_S_TARGET_FAILURE] if the target is suffering a + failure and to tell the driver not to retry on other paths. + +\item[VIRTIO_SCSI_S_NEXUS_FAILURE] if the nexus is suffering a failure + but retrying on other paths might yield a different result. + +\item[VIRTIO_SCSI_S_BUSY] if the request failed but retrying on the + same path is likely to work. + +\item[VIRTIO_SCSI_S_FAILURE] for other host or driver error. In + particular, if neither \field{dataout} nor \field{datain} is empty, and the + VIRTIO_SCSI_F_INOUT feature has not been negotiated, the + request will be immediately returned with a response equal to + VIRTIO_SCSI_S_FAILURE. +\end{description} + +All commands must be completed before the virtio-scsi device is +reset or unplugged. The device MAY choose to abort them, or if +it does not do so MUST pick the VIRTIO_SCSI_S_FAILURE response. + +\drivernormative{\paragraph}{Device Operation: Request Queues}{Device Types / SCSI Host Device / Device Operation / Device Operation: Request Queues} + +\field{task_attr}, \field{prio} and \field{crn} SHOULD be zero. + +Upon receiving a VIRTIO_SCSI_S_TARGET_FAILURE response, the driver +SHOULD NOT retry the request on other paths. + +\paragraph{Legacy Interface: Device Operation: Request Queues}\label{sec:Device Types / SCSI Host Device / Device Operation / Device Operation: Request Queues / Legacy Interface: Device Operation: Request Queues} +When using the legacy interface, transitional devices and drivers +MUST format the fields in struct virtio_scsi_req_cmd +according to the native endian of the guest rather than +(necessarily when not using the legacy interface) little-endian. + +\subsubsection{Device Operation: controlq}\label{sec:Device Types / SCSI Host Device / Device Operation / Device Operation: controlq} + +The controlq is used for other SCSI transport operations. +Requests have the following format: + +{ +\lstset{escapechar=\$} +\begin{lstlisting} +struct virtio_scsi_ctrl { + le32 type; +$\ldots$ + u8 response; +}; + +/* response values valid for all commands */ +#define VIRTIO_SCSI_S_OK 0 +#define VIRTIO_SCSI_S_BAD_TARGET 3 +#define VIRTIO_SCSI_S_BUSY 5 +#define VIRTIO_SCSI_S_TRANSPORT_FAILURE 6 +#define VIRTIO_SCSI_S_TARGET_FAILURE 7 +#define VIRTIO_SCSI_S_NEXUS_FAILURE 8 +#define VIRTIO_SCSI_S_FAILURE 9 +#define VIRTIO_SCSI_S_INCORRECT_LUN 12 +\end{lstlisting} +} + +The \field{type} identifies the remaining fields. + +The following commands are defined: + +\begin{itemize} +\item Task management function. +\begin{lstlisting} +#define VIRTIO_SCSI_T_TMF 0 + +#define VIRTIO_SCSI_T_TMF_ABORT_TASK 0 +#define VIRTIO_SCSI_T_TMF_ABORT_TASK_SET 1 +#define VIRTIO_SCSI_T_TMF_CLEAR_ACA 2 +#define VIRTIO_SCSI_T_TMF_CLEAR_TASK_SET 3 +#define VIRTIO_SCSI_T_TMF_I_T_NEXUS_RESET 4 +#define VIRTIO_SCSI_T_TMF_LOGICAL_UNIT_RESET 5 +#define VIRTIO_SCSI_T_TMF_QUERY_TASK 6 +#define VIRTIO_SCSI_T_TMF_QUERY_TASK_SET 7 + +struct virtio_scsi_ctrl_tmf +{ + // Device-readable part + le32 type; + le32 subtype; + u8 lun[8]; + le64 id; + // Device-writable part + u8 response; +} + +/* command-specific response values */ +#define VIRTIO_SCSI_S_FUNCTION_COMPLETE 0 +#define VIRTIO_SCSI_S_FUNCTION_SUCCEEDED 10 +#define VIRTIO_SCSI_S_FUNCTION_REJECTED 11 +\end{lstlisting} + + The \field{type} is VIRTIO_SCSI_T_TMF; \field{subtype} defines which + task management function. All + fields except \field{response} are filled by the driver. + + Other fields which are irrelevant for the requested TMF + are ignored but they are still present. \field{lun} + is in the same format specified for request queues; the + single level LUN is ignored when the task management function + addresses a whole I_T nexus. When relevant, the value of \field{id} + is matched against the id values passed on the requestq. + + The outcome of the task management function is written by the + device in \field{response}. The command-specific response + values map 1-to-1 with those defined in \hyperref[intro:SAM]{SAM}. + + Task management function can affect the response value for commands that + are in the request queue and have not been completed yet. For example, + the device MUST complete all active commands on a logical unit + or target (possibly with a VIRTIO_SCSI_S_RESET response code) + upon receiving a "logical unit reset" or "I_T nexus reset" TMF. + Similarly, the device MUST complete the selected commands (possibly + with a VIRTIO_SCSI_S_ABORTED response code) upon receiving an "abort + task" or "abort task set" TMF. Such effects MUST take place before + the TMF itself is successfully completed, and the device MUST use + memory barriers appropriately in order to ensure that the driver sees + these writes in the correct order. + +\item Asynchronous notification query. +\begin{lstlisting} +#define VIRTIO_SCSI_T_AN_QUERY 1 + +struct virtio_scsi_ctrl_an { + // Device-readable part + le32 type; + u8 lun[8]; + le32 event_requested; + // Device-writable part + le32 event_actual; + u8 response; +} + +#define VIRTIO_SCSI_EVT_ASYNC_OPERATIONAL_CHANGE 2 +#define VIRTIO_SCSI_EVT_ASYNC_POWER_MGMT 4 +#define VIRTIO_SCSI_EVT_ASYNC_EXTERNAL_REQUEST 8 +#define VIRTIO_SCSI_EVT_ASYNC_MEDIA_CHANGE 16 +#define VIRTIO_SCSI_EVT_ASYNC_MULTI_HOST 32 +#define VIRTIO_SCSI_EVT_ASYNC_DEVICE_BUSY 64 +\end{lstlisting} + + By sending this command, the driver asks the device which + events the given LUN can report, as described in paragraphs 6.6 + and A.6 of \hyperref[intro:SCSI MMC]{SCSI MMC}. The driver writes the + events it is interested in into \field{event_requested}; the device + responds by writing the events that it supports into + \field{event_actual}. + + The \field{type} is VIRTIO_SCSI_T_AN_QUERY. \field{lun} and \field{event_requested} + are written by the driver. \field{event_actual} and \field{response} + fields are written by the device. + + No command-specific values are defined for the \field{response} byte. + +\item Asynchronous notification subscription. +\begin{lstlisting} +#define VIRTIO_SCSI_T_AN_SUBSCRIBE 2 + +struct virtio_scsi_ctrl_an { + // Device-readable part + le32 type; + u8 lun[8]; + le32 event_requested; + // Device-writable part + le32 event_actual; + u8 response; +} +\end{lstlisting} + + By sending this command, the driver asks the specified LUN to + report events for its physical interface, again as described in + \hyperref[intro:SCSI MMC]{SCSI MMC}. The driver writes the events it is + interested in into \field{event_requested}; the device responds by + writing the events that it supports into \field{event_actual}. + + Event types are the same as for the asynchronous notification + query message. + + The \field{type} is VIRTIO_SCSI_T_AN_SUBSCRIBE. \field{lun} and + \field{event_requested} are written by the driver. + \field{event_actual} and \field{response} are written by the device. + + No command-specific values are defined for the response byte. +\end{itemize} + +\paragraph{Legacy Interface: Device Operation: controlq}\label{sec:Device Types / SCSI Host Device / Device Operation / Device Operation: controlq / Legacy Interface: Device Operation: controlq} + +When using the legacy interface, transitional devices and drivers +MUST format the fields in struct virtio_scsi_ctrl, struct +virtio_scsi_ctrl_tmf, struct virtio_scsi_ctrl_an and struct +virtio_scsi_ctrl_an +according to the native endian of the guest rather than +(necessarily when not using the legacy interface) little-endian. + + +\subsubsection{Device Operation: eventq}\label{sec:Device Types / SCSI Host Device / Device Operation / Device Operation: eventq} + +The eventq is populated by the driver for the device to report information on logical +units that are attached to it. In general, the device will not +queue events to cope with an empty eventq, and will end up +dropping events if it finds no buffer ready. However, when +reporting events for many LUNs (e.g. when a whole target +disappears), the device can throttle events to avoid dropping +them. For this reason, placing 10-15 buffers on the event queue +is sufficient. + +Buffers returned by the device on the eventq will be referred to +as ``events'' in the rest of this section. Events have the +following format: + +\begin{lstlisting} +#define VIRTIO_SCSI_T_EVENTS_MISSED 0x80000000 + +struct virtio_scsi_event { + // Device-writable part + le32 event; + u8 lun[8]; + le32 reason; +} +\end{lstlisting} + +The devices sets bit 31 in \field{event} to report lost events +due to missing buffers. + +The meaning of \field{reason} depends on the +contents of \field{event}. The following events are defined: + +\begin{itemize} +\item No event. +\begin{lstlisting} +#define VIRTIO_SCSI_T_NO_EVENT 0 +\end{lstlisting} + + This event is fired in the following cases: + +\begin{itemize} +\item When the device detects in the eventq a buffer that is + shorter than what is indicated in the configuration field, it + MAY use it immediately and put this dummy value in \field{event}. + A well-written driver will never observe this + situation. + +\item When events are dropped, the device MAY signal this event as + soon as the drivers makes a buffer available, in order to + request action from the driver. In this case, of course, this + event will be reported with the VIRTIO_SCSI_T_EVENTS_MISSED + flag. +\end{itemize} + +\item Transport reset +\begin{lstlisting} +#define VIRTIO_SCSI_T_TRANSPORT_RESET 1 + +#define VIRTIO_SCSI_EVT_RESET_HARD 0 +#define VIRTIO_SCSI_EVT_RESET_RESCAN 1 +#define VIRTIO_SCSI_EVT_RESET_REMOVED 2 +\end{lstlisting} + + By sending this event, the device signals that a logical unit + on a target has been reset, including the case of a new device + appearing or disappearing on the bus. The device fills in all + fields. \field{event} is set to + VIRTIO_SCSI_T_TRANSPORT_RESET. \field{lun} addresses a + logical unit in the SCSI host. + + The \field{reason} value is one of the three \#define values appearing + above: + + \begin{description} + \item[VIRTIO_SCSI_EVT_RESET_REMOVED] (``LUN/target removed'') is used + if the target or logical unit is no longer able to receive + commands. + + \item[VIRTIO_SCSI_EVT_RESET_HARD] (``LUN hard reset'') is used if the + logical unit has been reset, but is still present. + + \item[VIRTIO_SCSI_EVT_RESET_RESCAN] (``rescan LUN/target'') is used if + a target or logical unit has just appeared on the device. + \end{description} + + The ``removed'' and ``rescan'' events can happen when + VIRTIO_SCSI_F_HOTPLUG feature was negotiated; when sent for LUN 0, + they MAY apply to the entire target so the driver can ask the + initiator to rescan the target to detect this. + + Events will also be reported via sense codes (this obviously + does not apply to newly appeared buses or targets, since the + application has never discovered them): + + \begin{itemize} + \item ``LUN/target removed'' maps to sense key ILLEGAL REQUEST, asc + 0x25, ascq 0x00 (LOGICAL UNIT NOT SUPPORTED) + + \item ``LUN hard reset'' maps to sense key UNIT ATTENTION, asc 0x29 + (POWER ON, RESET OR BUS DEVICE RESET OCCURRED) + + \item ``rescan LUN/target'' maps to sense key UNIT ATTENTION, asc + 0x3f, ascq 0x0e (REPORTED LUNS DATA HAS CHANGED) + \end{itemize} + + The preferred way to detect transport reset is always to use + events, because sense codes are only seen by the driver when it + sends a SCSI command to the logical unit or target. However, in + case events are dropped, the initiator will still be able to + synchronize with the actual state of the controller if the + driver asks the initiator to rescan of the SCSI bus. During the + rescan, the initiator will be able to observe the above sense + codes, and it will process them as if it the driver had + received the equivalent event. + + \item Asynchronous notification +\begin{lstlisting} +#define VIRTIO_SCSI_T_ASYNC_NOTIFY 2 +\end{lstlisting} + + By sending this event, the device signals that an asynchronous + event was fired from a physical interface. + + All fields are written by the device. \field{event} is set to + VIRTIO_SCSI_T_ASYNC_NOTIFY. \field{lun} addresses a logical + unit in the SCSI host. \field{reason} is a subset of the + events that the driver has subscribed to via the ``Asynchronous + notification subscription'' command. + + \item LUN parameter change +\begin{lstlisting} +#define VIRTIO_SCSI_T_PARAM_CHANGE 3 +\end{lstlisting} + + By sending this event, the device signals a change in the configuration parameters + of a logical unit, for example the capacity or cache mode. + \field{event} is set to VIRTIO_SCSI_T_PARAM_CHANGE. + \field{lun} addresses a logical unit in the SCSI host. + + The same event SHOULD also be reported as a unit attention condition. + \field{reason} contains the additional sense code and additional sense code qualifier, + respectively in bits 0\ldots 7 and 8\ldots 15. + \begin{note} + For example, a change in capacity will be reported as asc 0x2a, ascq 0x09 + (CAPACITY DATA HAS CHANGED). + \end{note} + + For MMC devices (inquiry type 5) there would be some overlap between this + event and the asynchronous notification event, so for simplicity the host never + reports this event for MMC devices. +\end{itemize} + +\drivernormative{\paragraph}{Device Operation: eventq}{Device Types / SCSI Host Device / Device Operation / Device Operation: eventq} + +The driver SHOULD keep the eventq populated with buffers. These +buffers MUST be device-writable, and SHOULD be at least +\field{event_info_size} bytes long, and MUST be at least the size of +struct virtio_scsi_event. + +If \field{event} has bit 31 set, the driver SHOULD +poll the logical units for unit attention conditions, and/or do +whatever form of bus scan is appropriate for the guest operating +system and SHOULD poll for asynchronous events manually using SCSI commands. + +When receiving a VIRTIO_SCSI_T_TRANSPORT_RESET message with +\field{reason} set to VIRTIO_SCSI_EVT_RESET_REMOVED or +VIRTIO_SCSI_EVT_RESET_RESCAN for LUN 0, the driver SHOULD ask the +initiator to rescan the target, in order to detect the case when an +entire target has appeared or disappeared. + +\devicenormative{\paragraph}{Device Operation: eventq}{Device Types / SCSI Host Device / Device Operation / Device Operation: eventq} + +The device MUST set bit 31 in \field{event} if events were lost due to +missing buffers, and it MAY use a VIRTIO_SCSI_T_NO_EVENT event to report +this. + +The device MUST NOT send VIRTIO_SCSI_T_TRANSPORT_RESET messages +with \field{reason} set to VIRTIO_SCSI_EVT_RESET_REMOVED or +VIRTIO_SCSI_EVT_RESET_RESCAN unless VIRTIO_SCSI_F_HOTPLUG was negotiated. + +The device MUST NOT report VIRTIO_SCSI_T_PARAM_CHANGE for MMC devices. + +\paragraph{Legacy Interface: Device Operation: eventq}\label{sec:Device Types / SCSI Host Device / Device Operation / Device Operation: eventq / Legacy Interface: Device Operation: eventq} +When using the legacy interface, transitional devices and drivers +MUST format the fields in struct virtio_scsi_event +according to the native endian of the guest rather than +(necessarily when not using the legacy interface) little-endian. + +\subsubsection{Legacy Interface: Framing Requirements}\label{sec:Device +Types / SCSI Host Device / Legacy Interface: Framing Requirements} + +When using legacy interfaces, transitional drivers which have not +negotiated VIRTIO_F_ANY_LAYOUT MUST use a single descriptor for the +\field{lun}, \field{id}, \field{task_attr}, \field{prio}, +\field{crn} and \field{cdb} fields, and MUST only use a single +descriptor for the \field{sense_len}, \field{residual}, +\field{status_qualifier}, \field{status}, \field{response} and +\field{sense} fields. + +\chapter{Reserved Feature Bits}\label{sec:Reserved Feature Bits} + +Currently these device-independent feature bits defined: + +\begin{description} + \item[VIRTIO_F_RING_INDIRECT_DESC (28)] Negotiating this feature indicates + that the driver can use descriptors with the VIRTQ_DESC_F_INDIRECT + flag set, as described in \ref{sec:Basic Facilities of a Virtio Device / Virtqueues / The Virtqueue Descriptor Table / Indirect Descriptors}~\nameref{sec:Basic Facilities of a Virtio Device / Virtqueues / The Virtqueue Descriptor Table / Indirect Descriptors}. + + \item[VIRTIO_F_RING_EVENT_IDX(29)] This feature enables the \field{used_event} + and the \field{avail_event} fields as described in \ref{sec:Basic Facilities of a Virtio Device / Virtqueues / Virtqueue Interrupt Suppression} and \ref{sec:Basic Facilities of a Virtio Device / Virtqueues / The Virtqueue Used Ring}. + + \item[VIRTIO_F_VERSION_1(32)] This indicates compliance with this + specification, giving a simple way to detect legacy devices or drivers. + + \item[VIRTIO_F_IOMMU_PLATFORM(33)] This feature indicates that the device is + behind an IOMMU that translates bus addresses from the device into physical + addresses in memory. If this feature bit is set to 0, then the device emits + physical addresses which are not translated further, even though an IOMMU + may be present. +\end{description} + +\drivernormative{\section}{Reserved Feature Bits}{Reserved Feature Bits} + +A driver MUST accept VIRTIO_F_VERSION_1 if it is offered. A driver +MAY fail to operate further if VIRTIO_F_VERSION_1 is not offered. + +A driver SHOULD accept VIRTIO_F_IOMMU_PLATFORM if it is offered, and it MUST +then either disable the IOMMU or configure the IOMMU to translate bus addresses +passed to the device into physical addresses in memory. If +VIRTIO_F_IOMMU_PLATFORM is not offered, then a driver MUST pass only physical +addresses to the device. + +\devicenormative{\section}{Reserved Feature Bits}{Reserved Feature Bits} + +A device MUST offer VIRTIO_F_VERSION_1. A device MAY fail to operate further +if VIRTIO_F_VERSION_1 is not accepted. + +A device SHOULD offer VIRTIO_F_IOMMU_PLATFORM if it is behind an IOMMU that +translates bus addresses from the device into physical addresses in memory. +A device MAY fail to operate further if VIRTIO_F_IOMMU_PLATFORM is not +accepted. + +\section{Legacy Interface: Reserved Feature Bits}\label{sec:Reserved Feature Bits / Legacy Interface: Reserved Feature Bits} + +Transitional devices MAY offer the following: +\begin{description} +\item[VIRTIO_F_NOTIFY_ON_EMPTY (24)] If this feature + has been negotiated by driver, the device MUST issue + an interrupt if the device runs + out of available descriptors on a virtqueue, even though + interrupts are suppressed using the VIRTQ_AVAIL_F_NO_INTERRUPT + flag or the \field{used_event} field. +\begin{note} + An example of a driver using this feature is the legacy + networking driver: it doesn't need to know every time a packet + is transmitted, but it does need to free the transmitted + packets a finite time after they are transmitted. It can avoid + using a timer if the device interrupts it when all the packets + are transmitted. +\end{note} +\end{description} + +Transitional devices MUST offer, and if offered by the device +transitional drivers MUST accept the following: +\begin{description} +\item[VIRTIO_F_ANY_LAYOUT (27)] This feature indicates that the device + accepts arbitrary descriptor layouts, as described in Section + \ref{sec:Basic Facilities of a Virtio Device / Virtqueues / Message Framing / Legacy Interface: Message Framing}~\nameref{sec:Basic Facilities of a Virtio Device / Virtqueues / Message Framing / Legacy Interface: Message Framing}. + +\item[UNUSED (30)] Bit 30 is used by qemu's implementation to check + for experimental early versions of virtio which did not perform + correct feature negotiation, and SHOULD NOT be negotiated. +\end{description} diff --git a/diffpreamble.tex b/diffpreamble.tex new file mode 100644 index 0000000..f7b5cec --- /dev/null +++ b/diffpreamble.tex @@ -0,0 +1,42 @@ +%% diffpreable.tex start + +%DIF FONTSTRIKE PREAMBLE +\RequirePackage[normalem]{ulem} +\providecommand{\DIFadd}[1]{\protect\textcolor{green}{\sf #1}} +\providecommand{\DIFdel}[1]{\protect\textcolor{red}{\footnotesize \sout{#1}}} +%DIF END FONTSTRIKE PREAMBLE + +%DIF COLOR PREAMBLE +\RequirePackage{color} +\providecommand{\DIFaddbegin}{\protect\color{green}\hypersetup{linkcolor=green,urlcolor=green}} +\providecommand{\DIFaddend}{\protect\color{black}\hypersetup{linkcolor=blue,urlcolor=blue}} +\providecommand{\DIFdelbegin}{\protect\color{red}\hypersetup{linkcolor=red,urlcolor=red}} +\providecommand{\DIFdelend}{\protect\color{black}\hypersetup{linkcolor=blue,urlcolor=blue}} +\providecommand{\DIFaddtext}[1]{\textcolor{blue}{\sf #1}} +\providecommand{\DIFdeltext}[1]{\textcolor{red}{\footnotesize \sout{#1}}} + +%DIF END COLOR PREAMBLE +\providecommand{\DIFaddtext}[1]{\textcolor{green}{\sf #1}} +\providecommand{\DIFdeltext}[1]{\textcolor{red}{\footnotesize \sout{#1}}} + +%manual markup for cs02 +\renewcommand{\DIFaddtextcstwo}[1]{\textcolor{green}{\sf #1}} +\renewcommand{\DIFdeltextcstwo}[1]{\textcolor{red}{\footnotesize \sout{#1}}} + +%DIF FLOATSAFE PREAMBLE: SEEMS UNUSED BUT JUST IN CASE +\providecommand{\DIFaddFL}[1]{\DIFadd{#1}} +\providecommand{\DIFdelFL}[1]{\DIFdel{#1}} +\providecommand{\DIFaddbeginFL}{} +\providecommand{\DIFaddendFL}{} +\providecommand{\DIFdelbeginFL}{} +\providecommand{\DIFdelendFL}{} +\renewenvironment{DIFnomarkup} +{ +\begingroup\color{black}\hypersetup{linkcolor=blue,urlcolor=blue} +} +{ +\endgroup +} +%DIF END FLOATSAFE PREAMBLE + +%% diffpreable.tex end diff --git a/fix-numbers.pl b/fix-numbers.pl new file mode 100755 index 0000000..2170856 --- /dev/null +++ b/fix-numbers.pl @@ -0,0 +1,135 @@ +#!/usr/bin/perl + +use strict; + +sub find_footnotes { + my @text = @_; + my @notes = (); + my $found = 0; + my $l; + + for ($l = 0; $l <= $#text; $l++) { + if ($text[$l] =~ m/^FOOTNOTES:$/) { + $found = 1; + } + next unless $found; + if ($text[$l] =~ m/^\[[0-9]+\]\s/) { + push @notes, $l; + } + } + return @notes; +}; + +sub find_sections { + my @text = @_; + my @sections = (); + my $l; + + for ($l = 0; $l <= $#text - 1; $l++) { + next unless (($text[$l + 1] =~ m/^=======*$/) or + ($text[$l + 1] =~ m/^-------*$/)); + + next unless ($text[$l] =~ m/^(([0-9]+\.)+) /); + + push @sections, $l; + + } + return @sections; +}; + +sub get_section { + my ($section, @prevpath) = @_; + my @path = split(/\./, $section); + + #Possible cases: + if ($#path > $#prevpath) { +# Path deeper than parent: just add .1 + my $diff = $#path - $#prevpath; + @path = @prevpath; + for (my $i = 0; $i < $diff; $i++) { + push @path, 1; + } + } elsif ($#path == $#prevpath) { +# Same level as parent + @path = @prevpath; + $path[$#path]++; + } elsif ($#path < $#prevpath) { +# Higher level than parent + @path = @prevpath[0 .. $#path]; + $path[$#path]++; + } + my $newsection = join('.', @path) . '.'; + my $prev = join('.', @prevpath) . '.'; + return ($newsection, @path); +}; + +my @text = (); +while (<>) { + push @text, $_; +} + +my @footnotes = find_footnotes(@text); +my @sections = find_sections(@text); + +#Set new numbers for footnotes +my %footnote_by_old_reference = (); +my $f; +for ($f = 0; $f <= $#footnotes; $f++) { + my $l = $footnotes[$f]; + die unless ($text[$l] =~ m/^\[([0-9]+)\]\s/); + my $footnote = $1; + my $newfootnote = $f + 1; + die "duplicate footnote number $footnote" if defined($footnote_by_old_reference{$footnote}); + $footnote_by_old_reference{$footnote} = $newfootnote; +} + +#Find and fix references to footnotes +my $l; +for ($l = 0; $l <= $#text; $l++) { + next unless $text[$l] =~ m/\[[0-9]+\]/; #premature optimization + for my $old (keys(%footnote_by_old_reference)) { + my $new = $footnote_by_old_reference{$old}; + next if $new eq $old; + $text[$l] =~ s/\[$old\]/[XYX$new]/g; + } + $text[$l] =~ s/\[XYX/[/go; +} + +#Set new numbers for sections +my %section_by_old_reference = (); +my $s; +my @path = (); + +for ($s = 0; $s <= $#sections; $s++) { + my $l = $sections[$s]; + die unless ($text[$l] =~ m/^(([0-9]+\.)+)/); + my $section = $1; + my ($newsection, @p) = get_section($section, @path); + @path = @p; + die "duplicate section number $section" if defined($section_by_old_reference{$section}); + $section_by_old_reference{$section} = $newsection; +} + +#Find and fix references to sections +my $l; +for ($l = 0; $l <= $#text; $l++) { + next unless $text[$l] =~ m/^(([0-9]+\.)+)/; #premature optimization + for my $old (keys(%section_by_old_reference)) { + my $new = $section_by_old_reference{$old}; + next if $new eq $old; + + my @p = split(/\./, $old); + my $pattern = join("\\.", @p) . "\\."; + my @s = split(/\./, $new); + my $subst = join("XYX", @s) . "XYX"; + $text[$l] =~ s/$pattern/$subst/g; + } + $text[$l] =~ s/XYX/./go; +} + + +for my $line (@text) { + print $line; +} + + diff --git a/fixupdiff.pl b/fixupdiff.pl new file mode 100644 index 0000000..f66eaa3 --- /dev/null +++ b/fixupdiff.pl @@ -0,0 +1,37 @@ +use strict; + +my $lstlisting=0; + +while (<>) { + my $line = $_; + if (m/%DIFDELCMD\s+<\s+\\begin\{lstlisting\}/) { + $lstlisting=1; + $line =~s/%DIFDELCMD\s+</{\\lstset{escapechar=\\\$} /; + } + if ($lstlisting) { + $line =~ s/%DIFDELCMD\s+< //; + if (not $line =~ m/\\(?:begin|end)\{lstlisting\}/) { + $line =~ s/([#&{} ])/\\$1/g; + $line =~ s/(.*)/\$\\DIFdel\{$1\}\$/; + } + #print "%FIXED BY RULE 1\n"; + } + + # Too many \color directives (generated by DIFdel/addbegin/end) + # confuse xetex, producing errors: + # WARNING ** Color stack overflow. Just ignore. + # and resulting in corrupted color in output. + # As a work-around, detect cases where it's safe, and replace \color with + # \textcolor. + # As a result, number of \color directives goes does sufficiently + # enough to avoid the overflow error. + + $line =~ s/\\DIFdelbegin \\DIFdel\{([^}]*)\}\\DIFdelend/\\DIFdeltext{$1}/; + $line =~ s/\\DIFaddbegin \\DIFadd\{([^}]*)\}\\DIFaddend/\\DIFaddtext{$1}/; + + print $line; + if (m/%DIFDELCMD\s+<\s+\\end\{lstlisting\}/) { + print "}\n"; + $lstlisting=0; + } +} diff --git a/getchangelog.pl b/getchangelog.pl new file mode 100755 index 0000000..5619ce4 --- /dev/null +++ b/getchangelog.pl @@ -0,0 +1,114 @@ +#!/usr/bin/perl + +use strict; + +my $rev = undef; +if ($#ARGV >= 0) { + $rev = shift @ARGV; +} else { + open(REV, "git svn log REVISION|") || die; + while (<REV>) { + next unless (m/^(r[0-9]+)/); + #top revision is WD, skip it + if (not defined $rev) { + $rev = $1; + next; + } else { + $rev = $1; + last; + } + } +} + +die unless $rev =~ m/^r([0-9]+)$/; +$rev = $1; + +sub escapelatex { + my $s = shift; + $s =~ s/[\\]/\\textbackslash /go; + $s =~ s/([&#%{}\$])/\\$1/go; + $s =~ s/[~]/\\~{}/go; + $s =~ s/(https?:\S*)/\\url{$1}/go; +#1st line always on a separate paragraph + $s =~ s/\n/\n\n/o; +#Guess where new paragraph starts + $s =~ s/\\.\n/.\n\n/go; + $s =~ s/\n-/\n\n-/go; + return $s; +} + +#map editors to authors +my %editors = {}; +$editors{'rusty'} = 'Rusty Russell <rusty@au1.ibm.com>'; +$editors{'hornet'} = 'Pawel Moll <pawel.moll@arm.com>'; +$editors{'cornelia.huck'} = 'Cornelia Huck <cornelia.huck@de.ibm.com>'; +$editors{'mstsirkin'} = 'Michael S. Tsirkin <mst@redhat.com>'; + +my $cl = ""; +my $signoff = undef; +my $editor = undef; +my $date = undef; +my $r = undef; +open(LOG, "git svn log *tex|") || die; +my $line = undef; +while (<LOG>) { + if (m/^------------------------------------------------------------------------$/) { + next if ($cl eq ""); + # act on it + my $author; + if (defined $signoff) { + $author = $signoff; + } else { + $author = $editors{$editor}; + } + #strip mail info + $author =~ s/\s*<.*//; + $cl = escapelatex($cl); + print "$r & $date & $author & { $cl } \\\\\n"; + print "\\hline\n"; + + $cl = ""; + $signoff = undef; + $editor = undef; + $date = undef; + $r = undef; + + $line = 0; + next; + } + $line++; +#r164 | mstsirkin | 2013-12-08 14:30:55 +0200 (Sun, 08 Dec 2013)| 6 lines + + if ($line eq 1) { + die unless (m/^r[0-9]/); + my @rinfo = split(/\s*\Q|\E\s*/, $_); + $r = $rinfo[0]; + + die unless $r =~ m/^r([0-9]+)$/; + $r = $1; + last if ($r <= $rev); + + $editor = $rinfo[1]; + $date = $rinfo[2]; + die unless ($date =~ m/^[^(]*\([^,]*,\s*([^)]+)\)\s*$/); + $date = $1; + next; + } + next if (m/^$/); + + # First signature is the author: needed? + # ignore for now + #if (not defined $signoff and m/^Signed-off-by:\s*(.*)/) { + # $signoff = $1; + #} + # skip signatures + next if (m/^\s*[A-Z][A-Za-z-]*-by:/); + + + # fix bug: wrong date in some commit logs + if (/Change accepted on VIRTIO TC Meeting, 3 December 2013/) { + $_ = "Change accepted on Virtio TC Meeting Minutes: Feb 25, 2014\n"; + } + + $cl .= $_; +} diff --git a/git-svn.txt b/git-svn.txt new file mode 100644 index 0000000..e094217 --- /dev/null +++ b/git-svn.txt @@ -0,0 +1,34 @@ +Using git svn with virtio svn repository: + +Initial clone (fetches all branches, takes a very long time): + git svn clone -s https://tools.oasis-open.org/version-control/svn/virtio +Pull: + git svn rebase +Push: + git svn dcommit + +Tagging 1.0 cs02 to match the released specification: + git branch -t v1.0-cs02 + +Updating the trunk with all changes made on 1.0 branch: + + git config --global svn.pushmergeinfo true + git checkout -b master origin/trunk + git svn fetch + git svn rebase -l + git merge --no-ff origin/v1.0 + [ resolve merge conflicts ] + git svn dcommit + +Faster initial clone from git mirror (example using mst's mirror at kernel.org): + + git clone git://git.kernel.org/pub/scm/virt/kvm/mst/virtio-text.git + cd virtio-text + git config --remove-section remote.origin + git svn init -s https://tools.oasis-open.org/version-control/svn/virtio + git svn rebase + git checkout -b trunk origin/trunk + +Updating a git mirror from git-svn repository (after setting up a +remote named "mirror"): + git push mirror --prune +refs/remotes/origin/*:refs/heads/* diff --git a/headerfile.tex b/headerfile.tex new file mode 100644 index 0000000..767005c --- /dev/null +++ b/headerfile.tex @@ -0,0 +1,8 @@ +\chapter[virtio_queue.h]{virtio_queue.h}\label{sec:virtio-queue.h} +\label{sec:virtio-ring.h} +This file is also available at the link +\virtiourlh. +All definitions in this section are for non-normative reference +only. + +\lstinputlisting{virtio-queue.h} diff --git a/images/oasis.png b/images/oasis.png Binary files differnew file mode 100644 index 0000000..e043aa1 --- /dev/null +++ b/images/oasis.png diff --git a/introduction.tex b/introduction.tex new file mode 100644 index 0000000..979881e --- /dev/null +++ b/introduction.tex @@ -0,0 +1,161 @@ +\chapter{Introduction} + +\input{abstract.tex} + +\begin{description} +\item[Straightforward:] Virtio devices use normal bus mechanisms of + interrupts and DMA which should be familiar to any device driver + author. There is no exotic page-flipping or COW mechanism: it's just + a normal device.\footnote{This lack of page-sharing implies that the implementation of the +device (e.g. the hypervisor or host) needs full access to the +guest memory. Communication with untrusted parties (i.e. +inter-guest communication) requires copying. +} + +\item[Efficient:] Virtio devices consist of rings of descriptors + for both input and output, which are neatly laid out to avoid cache + effects from both driver and device writing to the same cache + lines. + +\item[Standard:] Virtio makes no assumptions about the environment in which + it operates, beyond supporting the bus to which device is attached. + In this specification, virtio + devices are implemented over MMIO, Channel I/O and PCI bus transports +\footnote{The Linux implementation further separates the virtio +transport code from the specific virtio drivers: these drivers are shared +between different transports. +}, earlier drafts + have been implemented on other buses not included here. + +\item[Extensible:] Virtio devices contain feature bits which are + acknowledged by the guest operating system during device setup. + This allows forwards and backwards compatibility: the device + offers all the features it knows about, and the driver + acknowledges those it understands and wishes to use. +\end{description} + +\section{Normative References} + +\begin{longtable}{l p{5in}} + \phantomsection\label{intro:rfc2119}\textbf{[RFC2119]} & +Bradner S., ``Key words for use in RFCs to Indicate Requirement +Levels'', BCP 14, RFC 2119, March 1997. \newline\url{http://www.ietf.org/rfc/rfc2119.txt}\\ + \phantomsection\label{intro:S390 PoP}\textbf{[S390 PoP]} & z/Architecture Principles of Operation, IBM Publication SA22-7832, \newline\url{http://publibfi.boulder.ibm.com/epubs/pdf/dz9zr009.pdf}, and any future revisions\\ + \phantomsection\label{intro:S390 Common I/O}\textbf{[S390 Common I/O]} & ESA/390 Common I/O-Device and Self-Description, IBM Publication SA22-7204, \newline\url{http://publibfp.dhe.ibm.com/cgi-bin/bookmgr/BOOKS/dz9ar501/CCONTENTS}, and any future revisions\\ + \phantomsection\label{intro:PCI}\textbf{[PCI]} & + Conventional PCI Specifications, + \newline\url{http://www.pcisig.com/specifications/conventional/}, + PCI-SIG\\ + \phantomsection\label{intro:PCIe}\textbf{[PCIe]} & + PCI Express Specifications + \newline\url{http://www.pcisig.com/specifications/pciexpress/}, + PCI-SIG\\ + \phantomsection\label{intro:IEEE 802}\textbf{[IEEE 802]} & + IEEE Standard for Local and Metropolitan Area Networks: Overview and Architecture, + \newline\url{http://standards.ieee.org/about/get/802/802.html}, + IEEE\\ + \phantomsection\label{intro:SAM}\textbf{[SAM]} & + SCSI Architectural Model, + \newline\url{http://www.t10.org/cgi-bin/ac.pl?t=f&f=sam4r05.pdf}\\ + \phantomsection\label{intro:SCSI MMC}\textbf{[SCSI MMC]} & + SCSI Multimedia Commands, + \newline\url{http://www.t10.org/cgi-bin/ac.pl?t=f&f=mmc6r00.pdf}\\ + +\end{longtable} + +\section{Non-Normative References} + +\begin{longtable}{l p{5in}} + \phantomsection\label{intro:Virtio PCI Draft}\textbf{[Virtio PCI Draft]} & + Virtio PCI Draft Specification + \newline\url{http://ozlabs.org/~rusty/virtio-spec/virtio-0.9.5.pdf}\\ +\end{longtable} + +\section{Terminology}\label{Terminology} + +The key words ``MUST'', ``MUST NOT'', ``REQUIRED'', ``SHALL'', ``SHALL NOT'', ``SHOULD'', ``SHOULD NOT'', ``RECOMMENDED'', ``MAY'', and ``OPTIONAL'' in this document are to be interpreted as described in \hyperref[intro:rfc2119]{[RFC2119]}. + +\subsection{Legacy Interface: Terminology}\label{intro:Legacy +Interface: Terminology} + +Earlier drafts of this specification (i.e. revisions before 1.0, +see e.g. \hyperref[intro:Virtio PCI Draft]{[Virtio PCI Draft]}) +defined a similar, but different +interface between the driver and the device. +Since these are widely deployed, this specification +accommodates OPTIONAL features to simplify transition +from these earlier draft interfaces. + +Specifically devices and drivers MAY support: +\begin{description} +\item[Legacy Interface] + is an interface specified by an earlier draft of this specification + (before 1.0) +\item[Legacy Device] + is a device implemented before this specification was released, + and implementing a legacy interface on the host side +\item[Legacy Driver] + is a driver implemented before this specification was released, + and implementing a legacy interface on the guest side +\end{description} + +Legacy devices and legacy drivers are not compliant with this +specification. + +To simplify transition from these earlier draft interfaces, +a device MAY implement: + +\begin{description} +\item[Transitional Device] + a device supporting both drivers conforming to this + specification, and allowing legacy drivers. +\end{description} + +Similarly, a driver MAY implement: +\begin{description} +\item[Transitional Driver] + a driver supporting both devices conforming to this + specification, and legacy devices. +\end{description} + +\begin{note} + Legacy interfaces are not required; ie. don't implement them unless you + have a need for backwards compatibility! +\end{note} + +Devices or drivers with no legacy compatibility are referred to as +non-transitional devices and drivers, respectively. + +\subsection{Transition from earlier specification drafts}\label{sec:Transition from earlier specification drafts} + +For devices and drivers already implementing the legacy +interface, some changes will have to be made to support this +specification. + +In this case, it might be beneficial for the reader to focus on +sections tagged "Legacy Interface" in the section title. +These highlight the changes made since the earlier drafts. + +\section{Structure Specifications} + +Many device and driver in-memory structure layouts are documented using +the C struct syntax. All structures are assumed to be without additional +padding. To stress this, cases where common C compilers are known to insert +extra padding within structures are tagged using the GNU C +__attribute__((packed)) syntax. + +For the integer data types used in the structure definitions, the following +conventions are used: + +\begin{description} +\item[u8, u16, u32, u64] An unsigned integer of the specified length in bits. + +\item[le16, le32, le64] An unsigned integer of the specified length in bits, +in little-endian byte order. + +\item[be16, be32, be64] An unsigned integer of the specified length in bits, +in big-endian byte order. +\end{description} + +\newpage + diff --git a/listings.tex b/listings.tex new file mode 100644 index 0000000..9b88b77 --- /dev/null +++ b/listings.tex @@ -0,0 +1,30 @@ +% listings setup +\definecolor{LISTING_background}{RGB}{245,245,245} +\lstset{ + basicstyle=\footnotesize\ttfamily, + extendedchars=true, + backgroundcolor=\color{LISTING_background}, + showspaces=false, + showstringspaces=false, + showtabs=false, + frame=single, + tabsize=2, + breaklines=true, + breakatwhitespace=true, + postbreak=\raisebox{0ex}[0ex][0ex]{\ensuremath{\hookrightarrow\space}} +} + +\definecolor{XML_string}{RGB}{0,0,0} +\definecolor{XML_identifier}{RGB}{0,0,128} +\definecolor{XML_keyword}{RGB}{200,0,0} +\lstdefinelanguage{XML} +{ + morestring=[b]", + morestring=[s]{>}{<}, + morecomment=[s]{<?}{?>}, + stringstyle=\color{XML_string}, + identifierstyle=\color{XML_identifier}, + keywordstyle=\color{XML_keyword}, + morekeywords={virtio} +} + diff --git a/main.tex b/main.tex new file mode 100644 index 0000000..b1913d6 --- /dev/null +++ b/main.tex @@ -0,0 +1,51 @@ +% section numbers 5 levels deep +\setcounter{secnumdepth}{5} + +\begin{document} +\pagestyle{fancy} + +% start text +\setcounter{page}{1} + +%%% define header and footer +\virtioheaderfooter + +%%% define header an footer for chapter title pages +\fancypagestyle{plain}{\fancyhf{}\virtioheaderfooter} + +% title page +\input{title.tex} + +% ToC +\setcounter{page}{4} +\setcounter{tocdepth}{4} +\renewcommand{\contentsname}{Table of Contents} +\addtocontents{toc}{\protect\virtioheaderfooter} +\tableofcontents +\newpage + +%%% line numbers? +%\linenumbers + +% introduction +\input{introduction.tex} + +\input{content.tex} +% Conformance +\input{conformance.tex} + +% appendix +\appendix +\def\@chapapp{\appendixname} + +\input{headerfile.tex} + +\input{newdevice.tex} + +% acknowledgements +\input{acknowledgements.tex} + +% draft change log +\input{changelog.tex} + +\end{document} diff --git a/make-setup-generated.sh b/make-setup-generated.sh new file mode 100755 index 0000000..59e96a3 --- /dev/null +++ b/make-setup-generated.sh @@ -0,0 +1,63 @@ +#! /bin/sh + +VERSION=1.0 +DATESTR=${DATESTR:-`cat REVISION-DATE 2>/dev/null`} +if [ x"$DATESTR" = x ]; then + ISODATE=`git show --format=format:'%cd' --date=iso | head -n 1` + DATESTR=`date -d "$DATE" +'%d %B %Y'` +fi + +case "$1" in + *-wd*) + STAGE=wd + STAGENAME="Working Draft" + WORKINGDRAFT=`basename "$1" | sed 's/.*-wd//'` + ;; + *-os*) + STAGE=os + STAGENAME="OASIS Standard" + WORKINGDRAFT="" + ;; + *-csd*) + STAGE=csd + WORKINGDRAFT=`basename "$1" | sed 's/.*-csd//'` + STAGENAME="Committee Specification Draft $WORKINGDRAFT" + ;; + *-csprd*) + STAGE=csprd + WORKINGDRAFT=`basename "$1" | sed 's/.*-csprd//'` + STAGENAME="Committee Specification Draft $WORKINGDRAFT" + STAGEEXTRATITLE=" / \newline Public Review Draft $WORKINGDRAFT" + STAGEEXTRA=" / Public Review Draft $WORKINGDRAFT" + ;; + *-cs*) + STAGE=cs + WORKINGDRAFT=`basename "$1" | sed 's/.*-cs//'` + STAGENAME="Committee Specification $WORKINGDRAFT" + ;; + *) + echo Unknown doc type >&2 + exit 1 +esac + +#Prepend OASIS unless already there +case "$STAGENAME" in + OASIS*) + OASISSTAGENAME="$STAGENAME" + ;; + *) + OASISSTAGENAME="OASIS $STAGENAME" + ;; +esac + +cat > setup-generated.tex <<EOF +% define VIRTIO Working Draft number and date +\newcommand{\virtiorev}{$VERSION} +\newcommand{\virtioworkingdraftdate}{$DATESTR} +\newcommand{\virtioworkingdraft}{$WORKINGDRAFT} +\newcommand{\virtiodraftstage}{$STAGE} +\newcommand{\virtiodraftstageextra}{$STAGEEXTRA} +\newcommand{\virtiodraftstageextratitle}{$STAGEEXTRATITLE} +\newcommand{\virtiodraftstagename}{$STAGENAME} +\newcommand{\virtiodraftoasisstagename}{$OASISSTAGENAME} +EOF diff --git a/makeall.sh b/makeall.sh new file mode 100755 index 0000000..20f568e --- /dev/null +++ b/makeall.sh @@ -0,0 +1,14 @@ +export SPECDOC=${SPECDOC:-`cat REVISION`} +export DATESTR=${DATESTR:-`cat REVISION-DATE`} +./makezip.sh +./makehtml.sh +./makepdf.sh +zip $SPECDOC.zip $SPECDOC.pdf +echo Generated file $SPECDOC.zip +echo To change output file name, set SPECDOC environment variable +echo Examples: +echo SPECDOC=virtio-v1.0-wd01 $0 +echo SPECDOC=virtio-v1.0-csd01 $0 +echo SPECDOC=virtio-v1.0-csprd01 $0 +echo SPECDOC=virtio-v1.0-cs01 $0 +echo SPECDOC=virtio-v1.0-os $0 diff --git a/makediff.sh b/makediff.sh new file mode 100755 index 0000000..1dd75d4 --- /dev/null +++ b/makediff.sh @@ -0,0 +1,58 @@ +#force revision and date in environment +#this way they don't appear in the diff +export SPECDOC=${SPECDOC:-`cat REVISION`} +export DATESTR=${DATESTR:-`cat REVISION-DATE`} + +#make pdf diff using latexpand and latexdiff-fast +#preamble in diffpreamble.tex +#in diff, links are coloured green instead of blue +MAIN=$1 +PATH=.:${PATH} +cur="$PWD" +oldrev=`git rev-list -1 origin/tags/v1.0-cs03` +newrev=`git rev-list -1 HEAD` +rm -fr old new +git clone $PWD old +cd "${cur}/old" +git checkout $oldrev +while read -r rev; do + echo "Applying $rev" + git cherry-pick `git rev-list -1 -F --grep "$rev" $newrev` || exit 1 +done << 'EOF' +headerfile: rename virtio_ring to virtio queue +EOF + +#mv specvars.tex specvars-orig.tex +#make links green to avoid confusion +#sed s/blue/pinegreen/ specvars-orig.tex > specvars.tex +SPECDOC=${SPECDOC:-`cat REVISION`} +./make-setup-generated.sh "$SPECDOC" +#wget http://www.ctan.org/pkg/latexpand +#chmod +x latexpand +latexpand $MAIN -o flat.tex +cd "${cur}" +git clone $PWD new +cd "${cur}/new" +#mv specvars.tex specvars-orig.tex +#make links green to avoid confusion +#sed s/blue/pinegreen/ specvars-orig.tex > specvars.tex +SPECDOC=${SPECDOC:-`cat REVISION`} +./make-setup-generated.sh "$SPECDOC" +latexpand $MAIN -o flat.tex +cd "${cur}" +# latexdiff does not do diffs within footnotes +# adding space make it not realize the text is a footnote, +# and treat it normally +sed 's/\\footnote{/\\footnote {/' old/flat.tex > old/flat-fixed.tex +sed 's/\\footnote{/\\footnote {/' new/flat.tex > new/flat-fixed.tex +#cp old/flat.tex old/flat-fixed.tex +#cp new/flat.tex new/flat-fixed.tex +#wget http://mirror.math.ku.edu/tex-archive/support/latexdiff/latexdiff-fast +#chmod +x latexdiff-fast +latexdiff-fast --config \ +"FLOATENV=(?:figure|longtable|table|tabular|plate|lstlisting|note|enumerate|itemize)[\w\d*@]*,PICTUREENV=(?:picture|DIFdeltextcstwo|DIFnomarkup|lstlisting)[\w\d*@]*" \ + --append-safecmd=field --append-textcmd=mmioreg \ + --exclude-textcmd=chapter \ +--ignore-warnings -p diffpreamble.tex old/flat-fixed.tex \ +new/flat-fixed.tex > virtio-diff-tofix.tex +perl fixupdiff.pl virtio-diff-tofix.tex > virtio-diff.tex diff --git a/makediffall.sh b/makediffall.sh new file mode 100755 index 0000000..51a6ec6 --- /dev/null +++ b/makediffall.sh @@ -0,0 +1,16 @@ +export SPECDOC=${SPECDOC:-`cat REVISION`} +export DATESTR=${DATESTR:-`cat REVISION-DATE`} +./makezip.sh +./makehtml.sh +./makediffhtml.sh +./makepdf.sh +./makediffpdf.sh +zip $SPECDOC.zip $SPECDOC.pdf $SPECDOC-diff.html $SPECDOC-diff.css $SPECDOC-diff.pdf +echo Generated file $SPECDOC.zip +echo To change output file name, set SPECDOC environment variable +echo Examples: +echo SPECDOC=virtio-v1.0-wd01 $0 +echo SPECDOC=virtio-v1.0-csd01 $0 +echo SPECDOC=virtio-v1.0-csprd01 $0 +echo SPECDOC=virtio-v1.0-cs01 $0 +echo SPECDOC=virtio-v1.0-os $0 diff --git a/makediffhtml.sh b/makediffhtml.sh new file mode 100755 index 0000000..f8f8783 --- /dev/null +++ b/makediffhtml.sh @@ -0,0 +1,54 @@ +#!/bin/sh + +./makediff.sh virtio-html.tex || exit 3 + +SPECDOC=${SPECDOC:-`cat REVISION`} +SPECDOC="${SPECDOC}-diff" + +./make-setup-generated.sh "$SPECDOC" + +cp virtio-diff.tex $SPECDOC.tex + +#uncomment if you have a broken t4ht +#PATH=./t4ht-workaround:${PATH} htlatex $SPECDOC.tex "virtiohtml,info,charset=utf-8" " -cunihtf -utf8" +htlatex $SPECDOC.tex "virtio-html,info,charset=utf-8,fn-in" " -cunihtf -utf8" + +rm $SPECDOC.tex + +rm $SPECDOC.aux +mv $SPECDOC.html $SPECDOC.tmp1 + +sed 's/~~/"/g' $SPECDOC.tmp1 >$SPECDOC.tmp2 +sed 's/>~/>"/g' $SPECDOC.tmp2 >$SPECDOC.tmp3 +sed 's/>=~/>="/g' $SPECDOC.tmp3 >$SPECDOC.tmp4 +sed 's/~</"</g' $SPECDOC.tmp4 >$SPECDOC.tmp5 + +# If font paths are misconfigured, we get ligatures +# (such as 'ff or 'fi') replaced by NULL character in output. +# This in not a valid HTML output, so detect this and warn user. +# For detection, we rely on the fact that file utility +# recognizes files with NULL characters as binary data. +if test "$(file -b $SPECDOC.tmp5)" = 'data'; +then + echo + echo WARNING! + echo + echo NULL characters detected in file output. + echo This is likely due to tex4ht being unable to find font files. + echo If installed, you might need to fix font file paths + echo for tex4ht by locating tex4ht.env file in your setup + echo correcting font file paths there and copying it to + echo tex4ht.env or .tex4ht in your home directory. + echo + echo WARNING! + echo Proceeding but HTML output appears to be malformed. + echo +fi + +mv $SPECDOC.tmp5 $SPECDOC.html +rm $SPECDOC.tmp* + +#uncomment if you have a broken t4ht +#cp ./t4ht-workaround/virtio-v1.0-csd01.css $SPECDOC.css + +zip $SPECDOC.zip $SPECDOC*.html $SPECDOC.css images/*.png diff --git a/makediffpdf.sh b/makediffpdf.sh new file mode 100755 index 0000000..b3031c4 --- /dev/null +++ b/makediffpdf.sh @@ -0,0 +1,8 @@ +#make pdf diff using latexpand and latexdiff-fast +./makediff.sh virtio.tex || exit 3 +SPECDOC=${SPECDOC:-`cat REVISION`} +SPECDOC="${SPECDOC}-diff" +rm $SPECDOC.aux $SPECDOC.pdf $SPECDOC.out +xelatex --jobname $SPECDOC virtio-diff.tex +xelatex --jobname $SPECDOC virtio-diff.tex +xelatex --jobname $SPECDOC virtio-diff.tex diff --git a/makehtml.sh b/makehtml.sh new file mode 100755 index 0000000..e18c350 --- /dev/null +++ b/makehtml.sh @@ -0,0 +1,50 @@ +#!/bin/sh + +SPECDOC=${SPECDOC:-`cat REVISION`} +./make-setup-generated.sh "$SPECDOC" + +cp virtio-html.tex $SPECDOC.tex + +#uncomment if you have a broken t4ht +#PATH=./t4ht-workaround:${PATH} htlatex $SPECDOC.tex "virtiohtml,info,charset=utf-8" " -cunihtf -utf8" +htlatex $SPECDOC.tex "virtio-html,info,charset=utf-8,fn-in" " -cunihtf -utf8" + +rm $SPECDOC.tex + +rm $SPECDOC.aux +mv $SPECDOC.html $SPECDOC.tmp1 + +sed 's/~~/"/g' $SPECDOC.tmp1 >$SPECDOC.tmp2 +sed 's/>~/>"/g' $SPECDOC.tmp2 >$SPECDOC.tmp3 +sed 's/>=~/>="/g' $SPECDOC.tmp3 >$SPECDOC.tmp4 +sed 's/~</"</g' $SPECDOC.tmp4 >$SPECDOC.tmp5 + +# If font paths are misconfigured, we get ligatures +# (such as 'ff or 'fi') replaced by NULL character in output. +# This in not a valid HTML output, so detect this and warn user. +# For detection, we rely on the fact that file utility +# recognizes files with NULL characters as binary data. +if test "$(file -b $SPECDOC.tmp5)" = 'data'; +then + echo + echo WARNING! + echo + echo NULL characters detected in file output. + echo This is likely due to tex4ht being unable to find font files. + echo If installed, you might need to fix font file paths + echo for tex4ht by locating tex4ht.env file in your setup + echo correcting font file paths there and copying it to + echo tex4ht.env or .tex4ht in your home directory. + echo + echo WARNING! + echo Proceeding but HTML output appears to be malformed. + echo +fi + +mv $SPECDOC.tmp5 $SPECDOC.html +rm $SPECDOC.tmp* + +#uncomment if you have a broken t4ht +#cp ./t4ht-workaround/virtio-v1.0-csd01.css $SPECDOC.css + +zip $SPECDOC.zip $SPECDOC.html $SPECDOC.css images/*.png diff --git a/makepdf.sh b/makepdf.sh new file mode 100755 index 0000000..9cae903 --- /dev/null +++ b/makepdf.sh @@ -0,0 +1,9 @@ +#!/bin/sh + +SPECDOC=${SPECDOC:-`cat REVISION`} +./make-setup-generated.sh "$SPECDOC" + +rm $SPECDOC.aux $SPECDOC.pdf $SPECDOC.out +xelatex --jobname $SPECDOC virtio.tex +xelatex --jobname $SPECDOC virtio.tex +xelatex --jobname $SPECDOC virtio.tex diff --git a/maketex.sh b/maketex.sh new file mode 100755 index 0000000..c3b458f --- /dev/null +++ b/maketex.sh @@ -0,0 +1,3 @@ +export SPECDOC=${SPECDOC:-`cat REVISION`}-tex +export DATESTR=${DATESTR:-`cat REVISION-DATE`} +./makezip.sh diff --git a/makezip.sh b/makezip.sh new file mode 100755 index 0000000..3c94f8e --- /dev/null +++ b/makezip.sh @@ -0,0 +1,27 @@ +export SPECDOC=${SPECDOC:-`cat REVISION`} +export DATESTR=${DATESTR:-`cat REVISION-DATE`} +rm -f $SPECDOC.zip +if test -d .git; then + git archive --format=zip --prefix=tex/ -o $SPECDOC.zip HEAD +elif test -d .svn; then + rm -fr export-from-svn + mkdir -p export-from-svn + svn export . export-from-svn/tex + cd export-from-svn/ + zip ../$SPECDOC.zip tex/ + cd .. +else + echo Neither .git nor .svn found. + echo Falling back to generated list. +fi +zip -d $SPECDOC.zip tex/.gitattributes +rm -fr listings +mkdir -p listings +cp virtio-queue.h listings/virtio_queue.h +zip $SPECDOC.zip listings/virtio_queue.h +rm -fr tmpfilesforzip +mkdir -p tmpfilesforzip/tex +echo "$SPECDOC" > tmpfilesforzip/tex/REVISION +echo "$DATESTR" > tmpfilesforzip/tex/REVISION-DATE +cd tmpfilesforzip +zip ../$SPECDOC.zip tex/* diff --git a/newdevice.tex b/newdevice.tex new file mode 100644 index 0000000..28e1c7b --- /dev/null +++ b/newdevice.tex @@ -0,0 +1,67 @@ +\chapter{Creating New Device Types}\label{sec:Creating New Device Types} + +Various considerations are necessary when creating a new device +type. + +\section{How Many Virtqueues?}\label{sec:Creating New Device Types / How Many Virtqueues?} + +It is possible that a very simple device will operate entirely +through its device configuration space, but most will need at least one +virtqueue in which it will place requests. A device with both +input and output (eg. console and network devices described here) +need two queues: one which the driver fills with buffers to +receive input, and one which the driver places buffers to +transmit output. + +\section{What Device Configuration Space Layout?}\label{sec:Creating New Device Types / What Device Configuration Space Layout?} + +Device configuration space should only be used for initialization-time +parameters. It is a limited resource with no synchronization between +field written by the driver, so for most uses it is better to use a virtqueue to update +configuration information (the network device does this for filtering, +otherwise the table in the config space could potentially be very +large). + +Remember that configuration fields over 32 bits wide might not be atomically +writable by the driver. Therefore, no writeable field which triggers an +action ought to be wider than 32 bits. + +\section{What Device Number?}\label{sec:Creating New Device Types / What Device Number?} + +Device numbers can be reserved by the OASIS committee: email +virtio-dev@lists.oasis-open.org to secure a unique one. + +Meanwhile for experimental drivers, use 65535 and work backwards. + +\section{How many MSI-X vectors? (for PCI)}\label{sec:Creating New Device Types / How many MSI-X vectors? (for PCI)} + +Using the optional MSI-X capability devices can speed up +interrupt processing by removing the need to read ISR Status +register by guest driver (which might be an expensive operation), +reducing interrupt sharing between devices and queues within the +device, and handling interrupts from multiple CPUs. However, some +systems impose a limit (which might be as low as 256) on the +total number of MSI-X vectors that can be allocated to all +devices. Devices and/or drivers should take this into +account, limiting the number of vectors used unless the device is +expected to cause a high volume of interrupts. Devices can +control the number of vectors used by limiting the MSI-X Table +Size or not presenting MSI-X capability in PCI configuration +space. Drivers can control this by mapping events to as small +number of vectors as possible, or disabling MSI-X capability +altogether. + +\section{Device Improvements}\label{sec:Creating New Device Types / Device Improvements} + +Any change to device configuration space, or new virtqueues, or +behavioural changes, should be indicated by negotiation of a new +feature bit. This establishes clarity\footnote{Even if it does mean documenting design or implementation +mistakes! +} and avoids future expansion problems. + +Clusters of functionality which are always implemented together +can use a single bit, but if one feature makes sense without the +others they should not be gratuitously grouped together to +conserve feature bits. + + diff --git a/specvars.tex b/specvars.tex new file mode 100644 index 0000000..84f8bae --- /dev/null +++ b/specvars.tex @@ -0,0 +1,24 @@ +\input{setup-generated.tex} +\newcommand{\virtioversion}{Virtual I/O Device (VIRTIO) Version \virtiorev} +\newcommand{\virtiospecfile}{virtio-v\virtiorev-\virtiodraftstage\virtioworkingdraft} +\newcommand{\virtiourlbase}{http://docs.oasis-open.org/virtio/virtio/v\virtiorev/\virtiodraftstage\virtioworkingdraft} +%Below must be used to make links work correctly in HTML +\urldef \virtiourllistings\url{\virtiourlbase/listings/} +\urldef \virtiourltex\url{\virtiourlbase/tex/} +\urldef \virtiourlpdf\url{\virtiourlbase/\virtiospecfile.pdf} +\urldef \virtiourlhtml\url{\virtiourlbase/\virtiospecfile.html} +\urldef \virtiourlh\url{\virtiourlbase/listings/virtio_queue.h} +\urldef \virtiourllatestpdf\url{http://docs.oasis-open.org/virtio/virtio/v\virtiorev/virtio-v\virtiorev.pdf} +\urldef \virtiourllatesthtml\url{http://docs.oasis-open.org/virtio/virtio/v\virtiorev/virtio-v\virtiorev.html} +\newcommand{\virtioworkproduct}{Standards Track Work Product} +\newcommand{\virtiooasiscopyright}{Copyright © OASIS Open 2015. All Rights Reserved.} + +% PDF setup +\hypersetup{ +pdfauthor = {OASIS Virtual I/O Device (VIRTIO) TC}, +pdftitle = {Virtual I/O Device (VIRTIO) Version \virtiorev}, +pdfsubject = {Virtual I/O Device (VIRTIO) Version \virtiorev}, +pdfkeywords = {VIRTIO, Virtual I/O Device}, +colorlinks = {true}, +linkcolor=blue, +urlcolor = blue} diff --git a/t4ht-workaround/README b/t4ht-workaround/README new file mode 100644 index 0000000..0e98f2c --- /dev/null +++ b/t4ht-workaround/README @@ -0,0 +1,18 @@ +Recent Fedora and some other distros have broken t4ht +packages, making makehtml fail. The correct fix is to put a +working t4ht somewhere in your path. + +For example, for Fedora one can get an old copy from here: +http://pkgs.fedoraproject.org/repo/pkgs/tetex-tex4ht/tex4ht-1.0.2008_02_28_2058.tar.gz/dc9d78c8ad22acd4b40dfd2cbbe2b8de/tex4ht-1.0.2008_02_28_ +after unpacking, t4ht is under ./bin/linux/t4ht, put it on +path. +tex4ht.env is under texmf/tex4ht/base/unix/tex4ht.env, put it +in $HOME (you can rename it .tex4ht) or under current +directory. Make sure to fix up paths otherwise output +will be mailformed! + +Alternatively, you can also put this t4ht-workaround directory first +on your path, it includes an emoty stub for t4ht. +You will also need to uncomment a work-around in makehtml.sh. +If you use this work-around you will get an html output though it +won't be properly formatted. diff --git a/t4ht-workaround/t4ht b/t4ht-workaround/t4ht new file mode 100755 index 0000000..2b6e781 --- /dev/null +++ b/t4ht-workaround/t4ht @@ -0,0 +1,2 @@ +#!/bin/sh +echo t4ht is broken on Fedora F19, skip it diff --git a/t4ht-workaround/virtio-v1.0-csd01.css b/t4ht-workaround/virtio-v1.0-csd01.css new file mode 100644 index 0000000..f5342e8 --- /dev/null +++ b/t4ht-workaround/virtio-v1.0-csd01.css @@ -0,0 +1,175 @@ + +/* start css.sty */ +.aebx-10{font-weight: bold;} +.aebx-10{font-weight: bold;} +.aebx-10{font-weight: bold;} +.aebx-10{font-weight: bold;} +.aebx-10{font-weight: bold;} +.aeti-10{ font-style: italic;} +.aeti-10{ font-style: italic;} +.aeti-10{ font-style: italic;} +.aeti-10{ font-style: italic;} +.aeti-10{ font-style: italic;} +.pcrr8t-{font-weight: bold;} +.pcrr8t-{font-weight: bold;} +.pcrr8t-{font-weight: bold;} +.pcrr8t-{font-weight: bold;} +.pcrr8t-{font-weight: bold;} +.pcrb8t-{ font-weight: bold; font-family: monospace;} +.aer-9{font-size:90%;} +.aer-8{font-size:80%;} +.pcrro8t-{font-family: monospace; font-style: oblique;} +.pcrro8t-{font-family: monospace; font-style: oblique;} +.pcrr8t-x-x-80{font-size:80%;font-weight: bold;} +.pcrr8t-x-x-80{font-weight: bold;} +.pcrr8t-x-x-80{font-weight: bold;} +.pcrr8t-x-x-80{font-weight: bold;} +.pcrr8t-x-x-80{font-weight: bold;} +.pcrro8t-x-x-80{font-size:80%;font-family: monospace; font-style: oblique;} +.pcrro8t-x-x-80{font-family: monospace; font-style: oblique;} +p.noindent { text-indent: 0em } +td p.noindent { text-indent: 0em; margin-top:0em; } +p.nopar { text-indent: 0em; } +p.indent{ text-indent: 1.5em } +@media print {div.crosslinks {visibility:hidden;}} +a img { border-top: 0; border-left: 0; border-right: 0; } +center { margin-top:1em; margin-bottom:1em; } +td center { margin-top:0em; margin-bottom:0em; } +.Canvas { position:relative; } +img.math{vertical-align:middle;} +li p.indent { text-indent: 0em } +li p:first-child{ margin-top:0em; } +li p:last-child, li div:last-child { margin-bottom:0.5em; } +li p~ul:last-child, li p~ol:last-child{ margin-bottom:0.5em; } +.enumerate1 {list-style-type:decimal;} +.enumerate2 {list-style-type:lower-alpha;} +.enumerate3 {list-style-type:lower-roman;} +.enumerate4 {list-style-type:upper-alpha;} +div.newtheorem { margin-bottom: 2em; margin-top: 2em;} +.obeylines-h,.obeylines-v {white-space: nowrap; } +div.obeylines-v p { margin-top:0; margin-bottom:0; } +.overline{ text-decoration:overline; } +.overline img{ border-top: 1px solid black; } +td.displaylines {text-align:center; white-space:nowrap;} +.centerline {text-align:center;} +.rightline {text-align:right;} +div.verbatim {font-family: monospace; white-space: nowrap; text-align:left; clear:both; } +.fbox {padding-left:3.0pt; padding-right:3.0pt; text-indent:0pt; border:solid black 0.4pt; } +div.fbox {display:table} +div.center div.fbox {text-align:center; clear:both; padding-left:3.0pt; padding-right:3.0pt; text-indent:0pt; border:solid black 0.4pt; } +div.minipage{width:100%;} +div.center, div.center div.center {text-align: center; margin-left:1em; margin-right:1em;} +div.center div {text-align: left;} +div.flushright, div.flushright div.flushright {text-align: right;} +div.flushright div {text-align: left;} +div.flushleft {text-align: left;} +.underline{ text-decoration:underline; } +.underline img{ border-bottom: 1px solid black; margin-bottom:1pt; } +.framebox-c, .framebox-l, .framebox-r { padding-left:3.0pt; padding-right:3.0pt; text-indent:0pt; border:solid black 0.4pt; } +.framebox-c {text-align:center;} +.framebox-l {text-align:left;} +.framebox-r {text-align:right;} +span.thank-mark{ vertical-align: super } +span.footnote-mark sup.textsuperscript, span.footnote-mark a sup.textsuperscript{ font-size:80%; } +div.tabular, div.center div.tabular {text-align: center; margin-top:0.5em; margin-bottom:0.5em; } +table.tabular td p{margin-top:0em;} +table.tabular {margin-left: auto; margin-right: auto;} +td p:first-child{ margin-top:0em; } +td p:last-child{ margin-bottom:0em; } +div.td00{ margin-left:0pt; margin-right:0pt; } +div.td01{ margin-left:0pt; margin-right:5pt; } +div.td10{ margin-left:5pt; margin-right:0pt; } +div.td11{ margin-left:5pt; margin-right:5pt; } +table[rules] {border-left:solid black 0.4pt; border-right:solid black 0.4pt; } +td.td00{ padding-left:0pt; padding-right:0pt; } +td.td01{ padding-left:0pt; padding-right:5pt; } +td.td10{ padding-left:5pt; padding-right:0pt; } +td.td11{ padding-left:5pt; padding-right:5pt; } +table[rules] {border-left:solid black 0.4pt; border-right:solid black 0.4pt; } +.hline hr, .cline hr{ height : 1px; margin:0px; } +.tabbing-right {text-align:right;} +span.TEX {letter-spacing: -0.125em; } +span.TEX span.E{ position:relative;top:0.5ex;left:-0.0417em;} +a span.TEX span.E {text-decoration: none; } +span.LATEX span.A{ position:relative; top:-0.5ex; left:-0.4em; font-size:85%;} +span.LATEX span.TEX{ position:relative; left: -0.4em; } +div.float, div.figure {margin-left: auto; margin-right: auto;} +div.float img {text-align:center;} +div.figure img {text-align:center;} +.marginpar {width:20%; float:right; text-align:left; margin-left:auto; margin-top:0.5em; font-size:85%; text-decoration:underline;} +.marginpar p{margin-top:0.4em; margin-bottom:0.4em;} +table.equation {width:100%;} +.equation td{text-align:center; } +td.equation { margin-top:1em; margin-bottom:1em; } +td.equation-label { width:5%; text-align:center; } +td.eqnarray4 { width:5%; white-space: normal; } +td.eqnarray2 { width:5%; } +table.eqnarray-star, table.eqnarray {width:100%;} +div.eqnarray{text-align:center;} +div.array {text-align:center;} +div.pmatrix {text-align:center;} +table.pmatrix {width:100%;} +span.pmatrix img{vertical-align:middle;} +div.pmatrix {text-align:center;} +table.pmatrix {width:100%;} +span.bar-css {text-decoration:overline;} +img.cdots{vertical-align:middle;} +.partToc a, .partToc, .likepartToc a, .likepartToc {line-height: 200%; font-weight:bold; font-size:110%;} +.chapterToc a, .chapterToc, .likechapterToc a, .likechapterToc, .appendixToc a, .appendixToc {line-height: 200%; font-weight:bold;} +.index-item, .index-subitem, .index-subsubitem {display:block} +div.caption {text-indent:-2em; margin-left:3em; margin-right:1em; text-align:left;} +div.caption span.id{font-weight: bold; white-space: nowrap; } +h1.partHead{text-align: center} +p.bibitem { text-indent: -2em; margin-left: 2em; margin-top:0.6em; margin-bottom:0.6em; } +p.bibitem-p { text-indent: 0em; margin-left: 2em; margin-top:0.6em; margin-bottom:0.6em; } +.paragraphHead, .likeparagraphHead { margin-top:2em; font-weight: bold;} +.subparagraphHead, .likesubparagraphHead { font-weight: bold;} +.quote {margin-bottom:0.25em; margin-top:0.25em; margin-left:1em; margin-right:1em; text-align:justify;} +.verse{white-space:nowrap; margin-left:2em} +div.maketitle {text-align:center;} +h2.titleHead{text-align:center;} +div.maketitle{ margin-bottom: 2em; } +div.author, div.date {text-align:center;} +div.thanks{text-align:left; margin-left:10%; font-size:85%; font-style:italic; } +div.author{white-space: nowrap;} +.quotation {margin-bottom:0.25em; margin-top:0.25em; margin-left:1em; } +h1.partHead{text-align: center} +.abstract p {margin-left:5%; margin-right:5%;} +div.abstract {width:100%;} +.figure img.graphics {margin-left:10%;} +.lstlisting .label{margin-right:0.5em; } +div.lstlisting{font-family: monospace; white-space: nowrap; margin-top:0.5em; margin-bottom:0.5em; } +div.lstinputlisting{ font-family: monospace; white-space: nowrap; } +.lstinputlisting .label{margin-right:0.5em;} +P.fancyvrb {white-space: nowrap; margin:0em;} +div.tabular, div.center div.tabular {text-align: center; margin-top:0.5em; margin-bottom:0.5em; } +table.tabular td p{margin-top:0em;} +table.tabular {margin-left: auto; margin-right: auto;} +td p:first-child{ margin-top:0em; } +td p:last-child{ margin-bottom:0em; } +div.td00{ margin-left:0pt; margin-right:0pt; } +div.td01{ margin-left:0pt; margin-right:5pt; } +div.td10{ margin-left:5pt; margin-right:0pt; } +div.td11{ margin-left:5pt; margin-right:5pt; } +table[rules] {border-left:solid black 0.4pt; border-right:solid black 0.4pt; } +td.td00{ padding-left:0pt; padding-right:0pt; } +td.td01{ padding-left:0pt; padding-right:5pt; } +td.td10{ padding-left:5pt; padding-right:0pt; } +td.td11{ padding-left:5pt; padding-right:5pt; } +table[rules] {border-left:solid black 0.4pt; border-right:solid black 0.4pt; } +.hline hr, .cline hr{ height : 1px; margin:0px; } +div.array {text-align:center;} +table[rules] {border-left:solid black 0.4pt; border-right:solid black 0.4pt; } +div.longtable{text-align:center;} +body { font-family: Arial, Helvetica, sans-serif; font-size: 10pt; } +h1 { color:#552681; font-size:150%; } +h2 { color:#552681; font-size:140%; } +h3 { color:#552681; font-size:130%; } +h4 { color:#552681; font-size:120%; } +h5 { color:#552681; font-size:110%; } +h6 { color:#552681; font-size:100%; } +table.tabular { margin-left: 0em; } +dt { margin-top: 0.5em; } +div.lstlisting, div.lstinputlisting {font-family: Courier New, monospace; white-space: nowrap; margin-top:0.5em; margin-bottom:0.5em; padding: 5px; border: 1px solid black; color: black; background-color: #F5F5F5;} +/* end css.sty */ + diff --git a/title.tex b/title.tex new file mode 100644 index 0000000..8138417 --- /dev/null +++ b/title.tex @@ -0,0 +1,190 @@ +\begin{titlepage} +\thispagestyle{fancy} + +\begin{tabular*}{\textwidth}{@{\extracolsep{\fill}}lr} + \includegraphics[height=0.55in]{images/oasis.png}\\ + \hline +\end{tabular*} + +\oasistitle{\virtioversion} +\oasisstagelabel{\virtiodraftstagename\virtiodraftstageextratitle} +\vspace{0.05in} +\oasisstagelabel{\virtioworkingdraftdate} + +\oasisspecificationuris{Specification URIs} + +\begin{oasistitlesection}{This version} +\virtiourltex{ }(Authoritative)\newline +\virtiourlpdf\newline +\virtiourlhtml +\end{oasistitlesection} + +\begin{oasistitlesection}{Previous version} +\url{http://docs.oasis-open.org/virtio/virtio/v1.0/cs03/tex/} +{}(Authoritative)\newline +\url{http://docs.oasis-open.org/virtio/virtio/v1.0/cs03/virtio-v1.0-cs03.pdf}\newline +\url{http://docs.oasis-open.org/virtio/virtio/v1.0/cs03/virtio-v1.0-cs03.html} +\end{oasistitlesection} + +\begin{oasistitlesection}{Latest version} +\virtiourllatestpdf\newline +\virtiourllatesthtml +\end{oasistitlesection} + +\begin{oasistitlesection}{Technical Committee} +\href{https://www.oasis-open.org/committees/virtio/}{OASIS Virtual I/O Device (VIRTIO) TC} +\end{oasistitlesection} + +\begin{oasistitlesection}{Chairs} +Michael S. Tsirkin (\href{mailto:mst@redhat.com}{mst@redhat.com}), \href{http://www.redhat.com/}{Red Hat}\newline +\end{oasistitlesection} + +\begin{oasistitlesection}{Editors} +Michael S. Tsirkin (\href{mailto:mst@redhat.com}{mst@redhat.com}), \href{http://www.redhat.com/}{Red Hat}\newline +Cornelia Huck (\href{mailto:cornelia.huck@de.ibm.com}{cornelia.huck@de.ibm.com}), \href{http://www.ibm.com/}{IBM}\newline +Pawel Moll (\href{mailto:pawel.moll@arm.com}{pawel.moll@arm.com}), \href{http://www.arm.com/}{ARM} +\end{oasistitlesection} + + +\begin{oasistitlesection}{Additional artifacts} +This prose specification is one component of a Work Product that also includes: +\begin{itemize*} + \item Example Driver Listing: \newline + \virtiourllistings +\end{itemize*} +\end{oasistitlesection} +\vspace{-0.2in} + +\begin{oasistitlesection}{Related work} +This specification replaces or supersedes: +\begin{itemize*} + \item Virtio PCI Card Specification Version 0.9.5:\newline + \url{http://ozlabs.org/~rusty/virtio-spec/virtio-0.9.5.pdf} +\end{itemize*} +% This specification is related to: +% \begin{itemize*} +% \item Related specifications (list) +% \end{itemize*} +\end{oasistitlesection} +\vspace{-0.2in} + +% \begin{oasistitlesection}{Declared XML namespaces} +% \vspace{-0.1in} +% \begin{itemize*} +% \item namespaces which are declared, not just referenced or used (list) +% \end{itemize*} +% \end{oasistitlesection} +\vfill\newpage + +\begin{oasistitlesection}{Abstract} +\input{abstract.tex} +\end{oasistitlesection} + +\begin{oasistitlesection}{Status} +This document was last revised or approved by the Virtual I/O Device +(VIRTIO) TC on the above date. The level of approval is also listed above. Check the ``Latest version'' location noted above for possible later revisions of this document. +Any other numbered Versions and other +technical work produced by the Technical Committee (TC) are +listed at +\url{https://www.oasis-open.org/committees/tc_home.php?wg_abbrev=virtio#technical}. + +Technical Committee members should send comments on this +specification to the Technical Committee’s email list. Others +should send comments to the Technical Committee by using the +``\href{https://www.oasis-open.org/committees/comments/form.php?wg_abbrev=virtio}{Send +A Comment}'' button on the Technical Committee’s web page at +\url{https://www.oasis-open.org/committees/virtio/}. + +For information on whether any patents have been disclosed that +may be essential to implementing this specification, and any +offers of patent licensing terms, please refer to the +Intellectual Property Rights section of the Technical Committee +web page (\url{https://www.oasis-open.org/committees/virtio/ipr.php}). +\end{oasistitlesection} + + +\begin{oasistitlesection}{Citation format} +When referencing this specification the following citation format should be used:\newline + +\textbf{[VIRTIO-v\virtiorev]}\newline +\textit{\virtioversion}. Edited by Rusty Russell, Michael S. +Tsirkin, Cornelia Huck, and Pawel Moll. \virtioworkingdraftdate. +\virtiodraftoasisstagename \virtiodraftstageextra . \virtiourlhtml . +Latest version: \virtiourllatesthtml . +\end{oasistitlesection} + +\vfill\newpage + +\oasisnoticelabel{Notices} + +Copyright © OASIS Open 2015. All Rights Reserved. + +All capitalized terms in the following text have the meanings assigned +to them in the OASIS Intellectual Property Rights Policy (the "OASIS +IPR Policy"). The full \href{https://www.oasis-open.org/policies-guidelines/ipr}{Policy} may be found at the OASIS website. + +This document and translations of it may be copied and furnished to +others, and derivative works that comment on or otherwise explain it +or assist in its implementation may be prepared, copied, published, +and distributed, in whole or in part, without restriction of any kind, +provided that the above copyright notice and this section are included +on all such copies and derivative works. However, this document itself +may not be modified in any way, including by removing the copyright +notice or references to OASIS, except as needed for the purpose of +developing any document or deliverable produced by an OASIS Technical +Committee (in which case the rules applicable to copyrights, as set +forth in the OASIS IPR Policy, must be followed) or as required to +translate it into languages other than English. + +The limited permissions granted above are perpetual and will not be +revoked by OASIS or its successors or assigns. + +This document and the information contained herein is provided on an +"AS IS" basis and OASIS DISCLAIMS ALL WARRANTIES, EXPRESS OR IMPLIED, +INCLUDING BUT NOT LIMITED TO ANY WARRANTY THAT THE USE OF THE +INFORMATION HEREIN WILL NOT INFRINGE ANY OWNERSHIP RIGHTS OR ANY +IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR +PURPOSE. + +OASIS requests that any OASIS Party or any other party that believes +it has patent claims that would necessarily be infringed by +implementations of this OASIS Committee Specification or OASIS +Standard, to notify OASIS TC Administrator and provide an indication +of its willingness to grant patent licenses to such patent claims in a +manner consistent with the IPR Mode of the OASIS Technical Committee +that produced this specification. + +OASIS invites any party to contact the OASIS TC Administrator if it is +aware of a claim of ownership of any patent claims that would +necessarily be infringed by implementations of this specification by a +patent holder that is not willing to provide a license to such patent +claims in a manner consistent with the IPR Mode of the OASIS Technical +Committee that produced this specification. OASIS may include such +claims on its website, but disclaims any obligation to do so. + +OASIS takes no position regarding the validity or scope of any +intellectual property or other rights that might be claimed to pertain +to the implementation or use of the technology described in this +document or the extent to which any license under such rights might or +might not be available; neither does it represent that it has made any +effort to identify any such rights. Information on OASIS' procedures +with respect to rights in any document or deliverable produced by an +OASIS Technical Committee can be found on the OASIS website. Copies of +claims of rights made available for publication and any assurances of +licenses to be made available, or the result of an attempt made to +obtain a general license or permission for the use of such proprietary +rights by implementers or users of this OASIS Committee Specification +or OASIS Standard, can be obtained from the OASIS TC Administrator. +OASIS makes no representation that any information or list of +intellectual property rights will at any time be complete, or that any +claims in such list are, in fact, Essential Claims. + +The name "OASIS" is a trademark of \href{https://www.oasis-open.org/}{OASIS}, the owner and developer of +this specification, and should be used only to refer to the +organization and its official outputs. OASIS welcomes reference to, +and implementation and use of, specifications, while reserving the +right to enforce its marks against misleading uses. Please see +\url{https://www.oasis-open.org/policies-guidelines/trademark} for above guidance. +\\\\ + +\end{titlepage} diff --git a/txt2latex.pl b/txt2latex.pl new file mode 100755 index 0000000..5732fe5 --- /dev/null +++ b/txt2latex.pl @@ -0,0 +1,184 @@ +#!/usr/bin/perl + +use strict; + +my @depth2latex = ( + '\chapter', + '\section', + '\subsection', + '\subsubsection', + '\paragraph', + '\subparagraph' +); + +my $skip_depth = 1; + +sub find_footnotes { + my @text = @_; + my @notes = (); + my $found = 0; + my $l; + + for ($l = 0; $l <= $#text; $l++) { + if ($text[$l] =~ m/^FOOTNOTES:$/) { + $found = 1; + } + next unless $found; + if ($text[$l] =~ m/^\[[0-9]+\]\s/) { + push @notes, $l; + } + } + return @notes; +}; + +sub find_sections { + my @text = @_; + my @sections = (); + my $l; + + for ($l = 0; $l <= $#text - 1; $l++) { + next unless (($text[$l + 1] =~ m/^=======*$/) or + ($text[$l + 1] =~ m/^-------*$/)); + + next unless ($text[$l] =~ m/^(([0-9]+\.)+) /); + + push @sections, $l; + + } + return @sections; +}; + +my @text = (); +while (<>) { + push @text, $_; +} + +my @footnotes = find_footnotes(@text); +my @sections = find_sections(@text); + +#Format footnotes +my %footnote_by_number = (); +my $f; +for ($f = 0; $f <= $#footnotes; $f++) { + my $l = $footnotes[$f]; + die unless ($text[$l] =~ m/^\[([0-9]+)\]\s+(.*)/); + my $footnote = $1; + my $text = $2; + die "duplicate footnote number $footnote" if defined($footnote_by_number{$footnote}); + $footnote_by_number{$footnote} = "$text\n"; + my $next; + if ($f < $#footnotes) { + $next = $footnotes[$f + 1]; + } else { + $next = $#text + 1; + } + for ($l = $footnotes[$f] + 1; $l < $next; $l++) { + next if ($text[$l] =~ m/^$/); + $footnote_by_number{$footnote} .= $text[$l]; + } +} + +#Format sections +my %label_by_section = (); +my $s; + +my %latest_by_depth = (); + +for ($s = 0; $s <= $#sections; $s++) { + my $l = $sections[$s]; + die unless ($text[$l] =~ m/^(([0-9]+\.)+)\s+(.+)\s*/); + my $section = $1; + my $name = $3; + my @path = split(/\./, $section); + my $depth = $#path - $skip_depth; + if ($depth < 0) { + $depth = 0; + } + if ($depth > $#depth2latex) { + $depth = $#depth2latex; + } + $latest_by_depth{$#path} = $name; + my $type = $depth2latex[$depth]; + my $label = $name; + #Prepend hierarchical path to make name unique + for (my $i = 1; $i <= $#path - $skip_depth; $i++) { + last if (not defined $latest_by_depth{$#path - $i}); + $label = "$latest_by_depth{$#path - $i} / $label"; + } + #It's best to avoid underscore in labels + $label =~ s/_/-/g; + $text[$l] = $type . "{$name}\\label{sec:$label}\n"; + $label_by_section{$section} = $label; +} + +my $ifndef = 0; +my $listing = 0; +my $table = 0; +my $buffer = ""; +for my $line (@text) { + last if ($line =~ m/^FOOTNOTES:$/); + next if (($line =~ m/^=======*$/) or + ($line =~ m/^-------*$/)); + + if ($line =~ m/^#if/) { + print "\\begin{lstlisting}\n"; + $ifndef++; + } + if ($ifndef) { + if ($line =~ m/^#endif/) { + $ifndef--; + } + $buffer .= $line; + if (not $ifndef) { + print $buffer; + print "\\end{lstlisting}\n"; + $buffer = ""; + } + next; + } + if (not $table and $line =~ m/^\+\-/) { + print "\\begin{verbatim}\n"; + $table = 1; + } + if ($table and not $line =~ m/^(\+\-|\|)/) { + print "\\end{verbatim}\n"; + $table = 0; + } + if (not $listing and $line =~ m/^\t/) { + print "\\begin{lstlisting}\n"; + $listing = 1; + } + if ($listing and $line =~ m/^$/) { + $buffer .= $line; + next; + } + if ($listing and not $line =~ m/^\t/) { + print "\\end{lstlisting}\n"; + $listing = 0; + } + + if (not $table and not $listing) { + if ($line =~ m/\S+\s*\^\s*\S+/) { + $line =~ s/(\S+\s*\^\s*)(\S+)/\$$1\{$2\}\$/g; + } else { + $line =~ s/\^/\\^/go; + } + $line =~ s/#/\\#/go; + $line =~ s/&/\\&/go; + if ($line =~ m/\[[0-9]+\]/) { #premature optimization + for my $n (keys(%footnote_by_number)) { + my $txt = $footnote_by_number{$n}; + $line =~ s/\[$n\]/\n\\footnote{$txt}/g; + } + } + } + if ($line =~ m/"(([0-9]+\.)+)[^"]*"/) { + my $section = $1; + $line =~ s/"(([0-9]+\.)+)[^"]*"/\\ref{sec:$label_by_section{$section}}~\\nameref{sec:$label_by_section{$section}}/g; + } + print $buffer; + $buffer = ""; + print $line; +} + + diff --git a/unicode.4hf b/unicode.4hf new file mode 100644 index 0000000..b7e69b2 --- /dev/null +++ b/unicode.4hf @@ -0,0 +1,5 @@ +'fi' '' 'fi' '' +'fl' '' 'fl' '' +'ff' '' 'ff' '' +'ffi' '' 'ffi' '' +'ffl' '' 'ffl' '' diff --git a/virtio-html.cfg b/virtio-html.cfg new file mode 100644 index 0000000..bf4c89b --- /dev/null +++ b/virtio-html.cfg @@ -0,0 +1,31 @@ +\Preamble{html} +\begin{document} + \ConfigureMark{chapter}{\HCode{<hr><span class="titlemark">}\thechapter\HCode{</span>}} + \ConfigureMark{paragraph}{\HCode{<span class="titlemark">}\theparagraph\HCode{</span>}} + \ConfigureMark{subparagraph}{\HCode{<span class="titlemark">}\thesubparagraph\HCode{</span>}} + + \Configure{chapter}{}{}{\ShowPar\IgnoreIndent\HCode{<h2 class="chapterHead">}\begingroup\TitleMark\space}{\endgroup\HCode{</h2>}\par\IgnorePar} + \Configure{paragraph}{}{}{\ShowPar\IgnoreIndent\HCode{<h5 class="paragraphHead">}\begingroup\TitleMark\space}{\endgroup\HCode{</h5>}\par\IgnorePar} + \Configure{subparagraph}{}{}{\ShowPar\IgnoreIndent\HCode{<h6 class="subparagraphHead">}\begingroup\TitleMark\space}{\endgroup\HCode{</h6>}\par\IgnorePar} + \Configure{appendix}{}{}{\ShowPar\IgnoreIndent\HCode{<h2 class="appendixHead">}\begingroup\TitleMark.\space}{\endgroup\HCode{</h2>}\par\IgnorePar} + + \Configure{tabular} + {\HCode{<table style="border-collapse:collapse;">}}{\HCode{</table>}} + {\HCode{<tr class="row-\HRow">}}{\HCode{</tr>}} + {\HCode{<td style="text-align: left; min-width: \HColWidth; border-left-style:solid; border-right-style:solid; border-width:thin; padding-left:6pt; padding-right:6pt;" \ifnum \HMultispan>1 colspan="\HMultispan"\fi >}}{\HCode{</td>}} + + + %%\Configure{HColWidth}{\HCode{ style="min-width:\HColWidth"}} + + \Configure{@TITLE}{Virtual I/O Device (VIRTIO) Version 1.0} + \Css{body { font-family: Arial, Helvetica, sans-serif; font-size: 10pt; }} + \Css{h1 { color:\#552681; font-size:150\%; }} + \Css{h2 { color:\#552681; font-size:140\%; }} + \Css{h3 { color:\#552681; font-size:130\%; }} + \Css{h4 { color:\#552681; font-size:120\%; }} + \Css{h5 { color:\#552681; font-size:110\%; }} + \Css{h6 { color:\#552681; font-size:100\%; }} + \Css{table.tabular { margin-left: 0em; }} + \Css{dt { margin-top: 0.5em; }} + \Css{div.lstlisting, div.lstinputlisting {font-family: Courier New, monospace; white-space: nowrap; margin-top:0.5em; margin-bottom:0.5em; padding: 5px; border: 1px solid black; color: black; background-color: \#F5F5F5;}} +\EndPreamble diff --git a/virtio-html.tex b/virtio-html.tex new file mode 100644 index 0000000..5adc518 --- /dev/null +++ b/virtio-html.tex @@ -0,0 +1,65 @@ +%!TEX encoding = UTF-8 Unicode +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% VIRTIO 1.0 Specification Draft +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +\documentclass[10pt,titlepage]{report} + +\usepackage{parskip} +\usepackage{setspace} +\usepackage[english]{babel} +\usepackage{ae,aecompl} +\usepackage[T1]{fontenc} +\usepackage{graphicx} +\usepackage{framed} +\usepackage{hyperref} +\usepackage{lineno} +\usepackage{listings} +\usepackage{courier} +\usepackage{fancyvrb} +\usepackage{array} +\usepackage{longtable} +\usepackage{multirow} +\usepackage{rotating} +\usepackage{fancyhdr} +\usepackage{lastpage} +\usepackage{todonotes} +\usepackage{titlesec} +\usepackage{mdwlist} +\usepackage{url} +\usepackage{xifthen} +\usepackage{tabularx} +\usepackage{underscore} +\usepackage{xstring} +\usepackage{chngcntr} +\counterwithout{footnote}{chapter} + +\usepackage[utf8x]{inputenc} + +\urlstyle{rm} + +% commands +\input{specvars.tex} +\input{commands.tex} +\input{commands-html.tex} +\input{listings.tex} + +% new command: header and footer +\newcommand{\virtioheaderfooter}{} + +% headers setup +\titleformat{\chapter}[block]{\hrulefill\titlerule\normalfont\huge\bfseries}{\thechapter}{0pt}{} +\titleformat{\section}{\normalfont\Large\bfseries}{\thesection}{1em}{} +\titleformat{\subsection}{\normalfont\large\bfseries}{\thesubsection}{1em}{} +\titleformat{\subsubsection}{\normalfont\normalsize\bfseries}{\thesubsubsection}{1em}{} +\titleformat{\paragraph}[hang]{\normalfont\normalsize\bfseries}{\theparagraph}{1em}{} +\titleformat{\subparagraph}[hang]{\normalfont\normalsize\bfseries}{\thesubparagraph}{1em}{} + +%Using texlive on Fedora 21, \url for some reason replaces +%hypen - with -_ in html output only. +%This seems to go away if we override - within urls inserting an +%mbox around it. +\makeatletter \g@addto@macro\UrlSpecials{\do\-{\mbox{-}}} + +% main document +\input{main.tex} diff --git a/virtio-queue.h b/virtio-queue.h new file mode 100644 index 0000000..5a1e87d --- /dev/null +++ b/virtio-queue.h @@ -0,0 +1,122 @@ +#ifndef VIRTQUEUE_H +#define VIRTQUEUE_H +/* An interface for efficient virtio implementation. + * + * This header is BSD licensed so anyone can use the definitions + * to implement compatible drivers/servers. + * + * Copyright 2007, 2009, IBM Corporation + * Copyright 2011, Red Hat, Inc + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of IBM nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL IBM OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ +#include <stdint.h> + +/* This marks a buffer as continuing via the next field. */ +#define VIRTQ_DESC_F_NEXT 1 +/* This marks a buffer as write-only (otherwise read-only). */ +#define VIRTQ_DESC_F_WRITE 2 +/* This means the buffer contains a list of buffer descriptors. */ +#define VIRTQ_DESC_F_INDIRECT 4 + +/* The device uses this in used->flags to advise the driver: don't kick me + * when you add a buffer. It's unreliable, so it's simply an + * optimization. */ +#define VIRTQ_USED_F_NO_NOTIFY 1 +/* The driver uses this in avail->flags to advise the device: don't + * interrupt me when you consume a buffer. It's unreliable, so it's + * simply an optimization. */ +#define VIRTQ_AVAIL_F_NO_INTERRUPT 1 + +/* Support for indirect descriptors */ +#define VIRTIO_F_INDIRECT_DESC 28 + +/* Support for avail_event and used_event fields */ +#define VIRTIO_F_EVENT_IDX 29 + +/* Arbitrary descriptor layouts. */ +#define VIRTIO_F_ANY_LAYOUT 27 + +/* Virtqueue descriptors: 16 bytes. + * These can chain together via "next". */ +struct virtq_desc { + /* Address (guest-physical). */ + le64 addr; + /* Length. */ + le32 len; + /* The flags as indicated above. */ + le16 flags; + /* We chain unused descriptors via this, too */ + le16 next; +}; + +struct virtq_avail { + le16 flags; + le16 idx; + le16 ring[]; + /* Only if VIRTIO_F_EVENT_IDX: le16 used_event; */ +}; + +/* le32 is used here for ids for padding reasons. */ +struct virtq_used_elem { + /* Index of start of used descriptor chain. */ + le32 id; + /* Total length of the descriptor chain which was written to. */ + le32 len; +}; + +struct virtq_used { + le16 flags; + le16 idx; + struct virtq_used_elem ring[]; + /* Only if VIRTIO_F_EVENT_IDX: le16 avail_event; */ +}; + +struct virtq { + unsigned int num; + + struct virtq_desc *desc; + struct virtq_avail *avail; + struct virtq_used *used; +}; + +static inline int virtq_need_event(uint16_t event_idx, uint16_t new_idx, uint16_t old_idx) +{ + return (uint16_t)(new_idx - event_idx - 1) < (uint16_t)(new_idx - old_idx); +} + +/* Get location of event indices (only with VIRTIO_F_EVENT_IDX) */ +static inline le16 *virtq_used_event(struct virtq *vq) +{ + /* For backwards compat, used event index is at *end* of avail ring. */ + return &vq->avail->ring[vq->num]; +} + +static inline le16 *virtq_avail_event(struct virtq *vq) +{ + /* For backwards compat, avail event index is at *end* of used ring. */ + return (le16 *)&vq->used->ring[vq->num]; +} +#endif /* VIRTQUEUE_H */ diff --git a/virtio.tex b/virtio.tex new file mode 100644 index 0000000..08f06c7 --- /dev/null +++ b/virtio.tex @@ -0,0 +1,97 @@ +%!TEX TS-program = xelatex +%!TEX encoding = UTF-8 Unicode +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% VIRTIO 1.0 Specification Draft +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +\documentclass[10pt,titlepage]{report} + +\usepackage[top=1in,left=1in,bottom=0.5in,right=1in,paperwidth=8.5in, paperheight=11in]{geometry} +\usepackage{parskip} +\usepackage{setspace} +\usepackage[english]{babel} +\usepackage{ae,aecompl} +\usepackage[T1]{fontenc} +\usepackage[xetex]{graphicx} +\usepackage{placeins} +\usepackage{framed} +\usepackage[xetex]{hyperref} +\usepackage{lineno} +\usepackage{listings} +\usepackage{courier} +\usepackage{fancyvrb} +\usepackage{array} +\usepackage{longtable} +\usepackage{multirow} +\usepackage{rotating} +\usepackage{fancyhdr} +\usepackage{lastpage} +\usepackage{todonotes} +\usepackage[explicit]{titlesec} +\usepackage{mdwlist} +\usepackage{url} +\usepackage{xifthen} +\usepackage[no-math]{fontspec} +\usepackage{xltxtra} +\usepackage{etoolbox} +\usepackage{tabularx} +\usepackage{underscore} +\usepackage{xstring} +\usepackage{ellipsis} +% This is an alternative to package underscore above +% It makes it possible to disable hyphenation of upper-case +% identifiers with underscore (of which we have many) by +% treating them as acronyms. +% However, the result doesn't look pretty, so don't use this +% for now. +%\catcode`\_=\active +%\def_{\_} +% Don't hyphenate acronyms +%\uchyph=0 +\setmainfont[Mapping=tex-text]{Arial} +%\setromanfont{Arial} +\setmonofont{Courier New} + +\urlstyle{rm} + +% commands +\input{specvars.tex} +\input{commands.tex} +\input{commands-pdf.tex} +\input{listings.tex} + +% new command: header and footer +\newcommand{\virtioheaderfooter}{ +\pagestyle{fancy} +\fancyhf{} +\renewcommand{\headrulewidth}{0pt} +\renewcommand{\footrulewidth}{0pt} +\fancyfoot[L]{\footnotesize\virtiospecfile\\\virtioworkproduct} +\fancyfoot[C]{\footnotesize\ \\\virtiooasiscopyright} +\fancyfoot[R]{\footnotesize\virtioworkingdraftdate\\Page \thepage\ of \pageref*{LastPage}} +} + +% page setup +\setlength{\headheight}{0pt} +\setlength{\headsep}{10pt} +\setlength{\extrarowheight}{0.05in} + +% headers setup +\titleformat{\chapter}[block]{\titlerule}{}{0pt}{\color{oasis1}\fontsize{18}{18}\textbf{\ifthenelse{\equal{\@chapapp}{\appendixname}}{Appendix~\thechapter.\ #1}{\ifthenelse{\equal{\thechapter}{0}}{}{\thechapter\ \ }#1}}\vspace{-0.3in}} +\titleformat{\section}{\color{oasis1}\normalfont\Large\bfseries}{\color{oasis1}\thesection}{1em}{#1} +\titleformat{\subsection}{\color{oasis1}\normalfont\large\bfseries}{\color{oasis1}\thesubsection}{1em}{#1} +\titleformat{\subsubsection}{\color{oasis1}\normalfont\normalsize\bfseries}{\color{oasis1}\thesubsubsection}{1em}{#1} +\titleformat{\paragraph}[hang]{\color{oasis1}\normalfont\normalsize\bfseries}{\color{oasis1}\theparagraph}{1em}{#1} +\titleformat{\subparagraph}[hang]{\color{oasis1}\normalfont\normalsize\bfseries}{\color{oasis1}\thesubparagraph}{1em}{#1} + +% toc hacks +\makeatletter +% hyperref package: do not include the chapter/section number in the link +\def\process@contentsline#1#2{#1{#2}\hyper@linkstart{link}{\Hy@tocdestname}} +\patchcmd{\contentsline}{\hyper@linkstart{link}{\Hy@tocdestname}{#2}}{\process@contentsline#2}{}{} +% report style: add dotted lines between chapter names and page numbers +\patchcmd{\l@chapter}{\hfil}{\leaders\hbox{\normalfont$\m@th\mkern \@dotsep mu\hbox{.}\mkern \@dotsep mu$}\hfill}{}{} +\makeatother + +% main document +\input{main.tex} |