summarylogtreecommitdiffstats
diff options
context:
space:
mode:
authorRyszard Knop2018-02-08 18:40:13 +0100
committerRyszard Knop2018-02-08 18:40:13 +0100
commite07b610f20ddd6680d1adaab046051370aae1857 (patch)
tree1af239031d54e6d675820e23a1d6d0c1097a8ed0
downloadaur-e07b610f20ddd6680d1adaab046051370aae1857.tar.gz
Initial commit (and hopefully the only one)
-rw-r--r--.SRCINFO119
-rw-r--r--65-kvm.rules1
-rw-r--r--PKGBUILD223
-rw-r--r--allow_elf64.patch27
-rw-r--r--audio-improvements.patch1125
-rw-r--r--cpu-pinning.patch186
-rw-r--r--qemu-ga.service9
-rw-r--r--qemu.install8
-rw-r--r--v2_qemu_zen_smt_cache.patch172
-rw-r--r--v4_ivshmem.patch331
-rw-r--r--vfio-msi-1.patch133
-rw-r--r--vfio-msi-2.patch82
-rw-r--r--vfio-msi-3.patch170
-rw-r--r--vfio-msi-4.patch42
-rw-r--r--vfio-msi-5.patch104
15 files changed, 2732 insertions, 0 deletions
diff --git a/.SRCINFO b/.SRCINFO
new file mode 100644
index 000000000000..d66838720b42
--- /dev/null
+++ b/.SRCINFO
@@ -0,0 +1,119 @@
+pkgbase = qemu-patched-vfiomsitest
+ pkgdesc = A generic and open source machine emulator and virtualizer - Patched for extra functionality
+ pkgver = 2.11.0
+ pkgrel = 3
+ url = http://wiki.qemu.org/
+ arch = x86_64
+ license = GPL2
+ license = LGPL2.1
+ makedepends = spice-protocol
+ makedepends = python2
+ makedepends = ceph
+ makedepends = libiscsi
+ makedepends = glusterfs
+ depends = virglrenderer
+ depends = sdl2
+ depends = vte3
+ depends = libpulse
+ depends = seabios
+ depends = gnutls
+ depends = libpng
+ depends = libaio
+ depends = numactl
+ depends = jemalloc
+ depends = xfsprogs
+ depends = libnfs
+ depends = lzo
+ depends = snappy
+ depends = curl
+ depends = vde2
+ depends = libcap-ng
+ depends = spice
+ depends = libcacard
+ depends = usbredir
+ source = http://wiki.qemu.org//download/qemu-2.11.0.tar.xz
+ source = http://wiki.qemu.org//download/qemu-2.11.0.tar.xz.sig
+ source = qemu-ga.service
+ source = 65-kvm.rules
+ source = allow_elf64.patch
+ source = cpu-pinning.patch
+ source = audio-improvements.patch
+ source = v2_qemu_zen_smt_cache.patch
+ source = v4_ivshmem.patch
+ source = vfio-msi-1.patch
+ source = vfio-msi-2.patch
+ source = vfio-msi-3.patch
+ source = vfio-msi-4.patch
+ source = vfio-msi-5.patch
+ validpgpkeys = CEACC9E15534EBABB82D3FA03353C9CEF108B584
+ sha256sums = c9d34a79024eae080ce3853aa9afe503824520eefb440190383003081ce7f437
+ sha256sums = SKIP
+ sha256sums = c39bcde4a09165e64419fd2033b3532378bba84d509d39e2d51694d44c1f8d88
+ sha256sums = a66f0e791b16b03b91049aac61a25950d93e962e1b2ba64a38c6ad7f609b532c
+ sha256sums = 13a6d9e678bdc9e1f051006cfd0555f5a80582368f54c8a1bb5a78ece3832ac4
+ sha256sums = 8d4a7e35ab1a0a465f737cf60fc0392afc430e22354a40a89505f8766a3a3ee8
+ sha256sums = 23338655345d0ee535f34acc124f1ddd75e5ad4483e2bd87294b7ac4fe3fa859
+ sha256sums = adf3f389849e92c5ea4c4cee0abf1ac5df61a176d296e9263ac773194ba86e57
+ sha256sums = 4acbbd8834dc5782feb86795748f37e1b1aa4f61b54303234ea4f13bd4c0e068
+ sha256sums = 9e7b0c7e54ae0f5a9288b1e65421ecec3f369e5ac34a8660c516897419090c07
+ sha256sums = f6c12889551a22a2a6d78d106022b47a652987b9b0ab3fbd7494a86400491e26
+ sha256sums = bbeacc088f39d5baf42281bac1efa930a8eb3277a455e858a8edafdd021b3446
+ sha256sums = e8935e9e318f4d497ecade9a4b667ed494feba6304b1b4f08f9fd18c9a53c3fa
+ sha256sums = 8f105c549f565f61b1e4f6e2f91109620721cc0d5fd88407aebfdba55440ddf8
+
+pkgname = qemu-patched-vfiomsitest
+ provides = qemu-headless
+ provides = qemu
+ conflicts = qemu-headless
+ conflicts = qemu
+ replaces = qemu-kvm
+
+pkgname = qemu-patched-vfiomsitest-headless
+ pkgdesc = QEMU without GUI
+ depends = seabios
+ depends = gnutls
+ depends = libpng
+ depends = libaio
+ depends = numactl
+ depends = jemalloc
+ depends = xfsprogs
+ depends = libnfs
+ depends = lzo
+ depends = snappy
+ depends = curl
+ depends = vde2
+ depends = libcap-ng
+ depends = spice
+ depends = libcacard
+ depends = usbredir
+ conflicts = qemu-headless
+
+pkgname = qemu-patched-vfiomsitest-block-iscsi
+ pkgdesc = QEMU iSCSI block module
+ depends = glib2
+ depends = libiscsi
+ depends = jemalloc
+ provides = qemu-block-iscsi
+ conflicts = qemu-block-iscsi
+
+pkgname = qemu-patched-vfiomsitest-block-rbd
+ pkgdesc = QEMU RBD block module
+ depends = glib2
+ depends = ceph
+ provides = qemu-block-rbd
+ conflicts = qemu-block-rbd
+
+pkgname = qemu-patched-vfiomsitest-block-gluster
+ pkgdesc = QEMU GlusterFS block module
+ depends = glib2
+ depends = glusterfs
+ provides = qemu-block-gluster
+ conflicts = qemu-block-gluster
+
+pkgname = qemu-patched-vfiomsitest-guest-agent
+ pkgdesc = QEMU Guest Agent
+ depends = gcc-libs
+ depends = glib2
+ provides = qemu-guest-agent
+ conflicts = qemu-guest-agent
+
diff --git a/65-kvm.rules b/65-kvm.rules
new file mode 100644
index 000000000000..fbb0ebb69c9e
--- /dev/null
+++ b/65-kvm.rules
@@ -0,0 +1 @@
+KERNEL=="vhost-net", GROUP="kvm", MODE="0660", TAG+="uaccess", OPTIONS+="static_node=vhost-net"
diff --git a/PKGBUILD b/PKGBUILD
new file mode 100644
index 000000000000..69561b3fd35b
--- /dev/null
+++ b/PKGBUILD
@@ -0,0 +1,223 @@
+# Maintainer: Vaporeon <vaporeon@vaporeon.io>
+# Contributor: Tobias Powalowski <tpowa@archlinux.org>
+# Contributor: Sébastien "Seblu" Luttringer <seblu@seblu.net>
+
+pkgbase=qemu-patched-vfiomsitest
+pkgname=(qemu-patched-vfiomsitest qemu-patched-vfiomsitest-headless
+ qemu-patched-vfiomsitest-block-{iscsi,rbd,gluster} qemu-patched-vfiomsitest-guest-agent)
+_pkgname=qemu
+pkgdesc="A generic and open source machine emulator and virtualizer - Patched for extra functionality"
+pkgver=2.11.0
+pkgrel=3
+arch=(x86_64)
+license=(GPL2 LGPL2.1)
+url="http://wiki.qemu.org/"
+_headlessdeps=(seabios gnutls libpng libaio numactl jemalloc xfsprogs libnfs
+ lzo snappy curl vde2 libcap-ng spice libcacard usbredir)
+depends=(virglrenderer sdl2 vte3 libpulse "${_headlessdeps[@]}")
+makedepends=(spice-protocol python2 ceph libiscsi glusterfs)
+source=("$url/download/${_pkgname}-${pkgver}.tar.xz"{,.sig}
+ qemu-ga.service
+ 65-kvm.rules
+ allow_elf64.patch
+ cpu-pinning.patch
+ audio-improvements.patch
+ v2_qemu_zen_smt_cache.patch
+ v4_ivshmem.patch
+ vfio-msi-1.patch
+ vfio-msi-2.patch
+ vfio-msi-3.patch
+ vfio-msi-4.patch
+ vfio-msi-5.patch)
+sha256sums=('c9d34a79024eae080ce3853aa9afe503824520eefb440190383003081ce7f437'
+ 'SKIP'
+ 'c39bcde4a09165e64419fd2033b3532378bba84d509d39e2d51694d44c1f8d88'
+ 'a66f0e791b16b03b91049aac61a25950d93e962e1b2ba64a38c6ad7f609b532c'
+ '13a6d9e678bdc9e1f051006cfd0555f5a80582368f54c8a1bb5a78ece3832ac4'
+ '8d4a7e35ab1a0a465f737cf60fc0392afc430e22354a40a89505f8766a3a3ee8'
+ '23338655345d0ee535f34acc124f1ddd75e5ad4483e2bd87294b7ac4fe3fa859'
+ 'adf3f389849e92c5ea4c4cee0abf1ac5df61a176d296e9263ac773194ba86e57'
+ '4acbbd8834dc5782feb86795748f37e1b1aa4f61b54303234ea4f13bd4c0e068'
+ '9e7b0c7e54ae0f5a9288b1e65421ecec3f369e5ac34a8660c516897419090c07'
+ 'f6c12889551a22a2a6d78d106022b47a652987b9b0ab3fbd7494a86400491e26'
+ 'bbeacc088f39d5baf42281bac1efa930a8eb3277a455e858a8edafdd021b3446'
+ 'e8935e9e318f4d497ecade9a4b667ed494feba6304b1b4f08f9fd18c9a53c3fa'
+ '8f105c549f565f61b1e4f6e2f91109620721cc0d5fd88407aebfdba55440ddf8')
+validpgpkeys=('CEACC9E15534EBABB82D3FA03353C9CEF108B584')
+
+case $CARCH in
+ i?86) _corearch=i386 ;;
+ x86_64) _corearch=x86_64 ;;
+esac
+
+prepare() {
+ mkdir build-{full,headless}
+
+ cd ${_pkgname}-${pkgver}
+ sed -i 's/vte-2\.90/vte-2.91/g' configure
+
+ patch -p1 < ../allow_elf64.patch
+ patch -p1 < ../cpu-pinning.patch
+ patch -p0 < ../audio-improvements.patch
+ patch -p1 < ../v2_qemu_zen_smt_cache.patch
+ patch -p1 < ../v4_ivshmem.patch
+
+ # Experimental VFIO MSI interrupt patches, please read before using:
+ # https://redd.it/7vsfv7 and on the QEMU mailing list:
+ # https://lists.gnu.org/archive/html/qemu-devel/2018-02/msg01543.html
+ patch -p1 < ../vfio-msi-1.patch
+ patch -p1 < ../vfio-msi-2.patch
+ patch -p1 < ../vfio-msi-3.patch
+ patch -p1 < ../vfio-msi-4.patch
+ patch -p1 < ../vfio-msi-5.patch
+}
+
+build() {
+ _build full \
+ --audio-drv-list="pa alsa sdl"
+
+ _build headless \
+ --audio-drv-list= \
+ --disable-bluez \
+ --disable-sdl \
+ --disable-gtk \
+ --disable-vte \
+ --disable-opengl \
+ --disable-virglrenderer
+}
+
+_build() (
+ cd build-$1
+
+ # qemu vs. make 4 == bad
+ export ARFLAGS=rv
+
+ # http://permalink.gmane.org/gmane.comp.emulators.qemu/238740
+ export CFLAGS+=" -fPIC"
+
+ ../${_pkgname}-${pkgver}/configure \
+ --prefix=/usr \
+ --sysconfdir=/etc \
+ --localstatedir=/var \
+ --libexecdir=/usr/lib/qemu \
+ --python=/usr/bin/python2 \
+ --smbd=/usr/bin/smbd \
+ --target-list=x86_64-softmmu,x86_64-linux-user \
+ --with-gtkabi=3.0 \
+ --with-sdlabi=2.0 \
+ --enable-modules \
+ --enable-jemalloc \
+ "${@:2}"
+
+ make
+)
+
+package_qemu-patched-vfiomsitest() {
+ provides=(qemu-headless qemu)
+ conflicts=(qemu-headless qemu)
+ replaces=(qemu-kvm)
+
+ _package full
+}
+
+package_qemu-patched-vfiomsitest-headless() {
+ pkgdesc="QEMU without GUI"
+ depends=("${_headlessdeps[@]}")
+ conflicts=(qemu-headless)
+ _package headless
+}
+
+_package() {
+ optdepends+=('ovmf: Tianocore UEFI firmware for qemu'
+ 'samba: SMB/CIFS server support'
+ 'qemu-patched-vfiomsitest-block-iscsi: iSCSI block support'
+ 'qemu-patched-vfiomsitest-block-rbd: RBD block support'
+ 'qemu-patched-vfiomsitest-block-gluster: glusterfs block support')
+ install=qemu.install
+ options=(!strip)
+
+ make -C build-$1 DESTDIR="$pkgdir" install "${@:2}"
+
+ # systemd stuff
+ install -Dm644 65-kvm.rules "$pkgdir/usr/lib/udev/rules.d/65-kvm.rules"
+
+ # remove conflicting /var/run directory
+ cd "$pkgdir"
+ rm -r var
+
+ cd usr/lib
+ tidy_strip
+
+ # bridge_helper needs suid
+ # https://bugs.archlinux.org/task/32565
+ chmod u+s qemu/qemu-bridge-helper
+
+ # remove split block modules
+ rm qemu/block-{iscsi,rbd,gluster}.so
+
+ cd ../bin
+ tidy_strip
+
+ cd ../share/qemu
+ for _blob in *; do
+ [[ -f $_blob ]] || continue
+
+ case $_blob in
+ # provided by seabios package
+ bios.bin|acpi-dsdt.aml|bios-256k.bin|vgabios-cirrus.bin|vgabios-qxl.bin|\
+ vgabios-stdvga.bin|vgabios-vmware.bin) rm "$_blob"; continue ;;
+
+ # iPXE ROMs
+ efi-*|pxe-*) continue ;;
+
+ # core blobs
+ kvmvapic.bin|linuxboot*|multiboot.bin|sgabios.bin|vgabios*) continue ;;
+
+ # Trace events definitions
+ trace-events*) continue ;;
+
+ # Logos
+ *.bmp|*.svg) continue ;;
+ esac
+ done
+}
+
+package_qemu-patched-vfiomsitest-block-iscsi() {
+ pkgdesc="QEMU iSCSI block module"
+ depends=(glib2 libiscsi jemalloc)
+ conflicts=(qemu-block-iscsi)
+ provides=(qemu-block-iscsi)
+
+ install -D build-full/block-iscsi.so "$pkgdir/usr/lib/qemu/block-iscsi.so"
+}
+
+package_qemu-patched-vfiomsitest-block-rbd() {
+ pkgdesc="QEMU RBD block module"
+ depends=(glib2 ceph)
+ conflicts=(qemu-block-rbd)
+ provides=(qemu-block-rbd)
+
+ install -D build-full/block-rbd.so "$pkgdir/usr/lib/qemu/block-rbd.so"
+}
+
+package_qemu-patched-vfiomsitest-block-gluster() {
+ pkgdesc="QEMU GlusterFS block module"
+ depends=(glib2 glusterfs)
+ conflicts=(qemu-block-gluster)
+ provides=(qemu-block-gluster)
+
+ install -D build-full/block-gluster.so "$pkgdir/usr/lib/qemu/block-gluster.so"
+}
+
+package_qemu-patched-vfiomsitest-guest-agent() {
+ pkgdesc="QEMU Guest Agent"
+ depends=(gcc-libs glib2)
+ conflicts=(qemu-guest-agent)
+ provides=(qemu-guest-agent)
+
+ install -D build-full/qemu-ga "$pkgdir/usr/bin/qemu-ga"
+ install -Dm644 qemu-ga.service "$pkgdir/usr/lib/systemd/system/qemu-ga.service"
+ install -Dm755 "$srcdir/${_pkgname}-${pkgver}/scripts/qemu-guest-agent/fsfreeze-hook" "$pkgdir/etc/qemu/fsfreeze-hook"
+}
+
+# vim:set ts=2 sw=2 et:
diff --git a/allow_elf64.patch b/allow_elf64.patch
new file mode 100644
index 000000000000..07f27a038b1b
--- /dev/null
+++ b/allow_elf64.patch
@@ -0,0 +1,27 @@
+commit 3c72765ec760a51f0e879dc792be82c93141e318
+Author: Anatol Pomozov <anatol.pomozov@gmail.com>
+Date: Tue Jun 6 20:07:03 2017 -0700
+
+ Remove restriction that prevents bootimg elf64 images
+
+ It is possible to create a 64 bit elf image that has valid multiboot header.
+ qemu should be able to boot such images.
+
+ Signed-off-by: Anatol Pomozov <anatol.pomozov@gmail.com>
+
+diff --git a/hw/i386/multiboot.c b/hw/i386/multiboot.c
+index 663f35a658..cf1b4f5fb3 100644
+--- a/hw/i386/multiboot.c
++++ b/hw/i386/multiboot.c
+@@ -192,11 +192,6 @@ int load_multiboot(FWCfgState *fw_cfg,
+ int kernel_size;
+ fclose(f);
+
+- if (((struct elf64_hdr*)header)->e_machine == EM_X86_64) {
+- fprintf(stderr, "Cannot load x86-64 image, give a 32bit one.\n");
+- exit(1);
+- }
+-
+ kernel_size = load_elf(kernel_filename, NULL, NULL, &elf_entry,
+ &elf_low, &elf_high, 0, I386_ELF_MACHINE,
+ 0, 0);
diff --git a/audio-improvements.patch b/audio-improvements.patch
new file mode 100644
index 000000000000..47ae53e8d7b1
--- /dev/null
+++ b/audio-improvements.patch
@@ -0,0 +1,1125 @@
+diff --git audio/audio.c audio/audio.c
+index beafed209b..6f42a019b0 100644
+--- audio/audio.c
++++ audio/audio.c
+@@ -2066,3 +2066,8 @@ void AUD_set_volume_in (SWVoiceIn *sw, int mute, uint8_t lvol, uint8_t rvol)
+ }
+ }
+ }
++
++int64_t audio_get_timer_ticks(void)
++{
++ return conf.period.ticks;
++}
+diff --git audio/audio_int.h audio/audio_int.h
+index 5bcb1c60e1..2f7fc4f8ac 100644
+--- audio/audio_int.h
++++ audio/audio_int.h
+@@ -214,6 +214,8 @@ extern struct audio_driver pa_audio_driver;
+ extern struct audio_driver spice_audio_driver;
+ extern const struct mixeng_volume nominal_volume;
+
++int64_t audio_get_timer_ticks(void);
++
+ void audio_pcm_init_info (struct audio_pcm_info *info, struct audsettings *as);
+ void audio_pcm_info_clear_buf (struct audio_pcm_info *info, void *buf, int len);
+
+diff --git audio/paaudio.c audio/paaudio.c
+index 65beb6f010..b46beeea92 100644
+--- audio/paaudio.c
++++ audio/paaudio.c
+@@ -1,16 +1,22 @@
+ /* public domain */
+ #include "qemu/osdep.h"
+-#include "qemu-common.h"
++#include "qemu/timer.h"
+ #include "audio.h"
+
+ #include <pulse/pulseaudio.h>
+
+ #define AUDIO_CAP "pulseaudio"
++#define DEBUG
+ #include "audio_int.h"
+-#include "audio_pt_int.h"
+
+ typedef struct {
+- int samples;
++ int buffer_size_out;
++ int buffer_size_in;
++ int tlength;
++ int fragsize;
++ int maxlength_in;
++ int adjust_latency_out;
++ int adjust_latency_in;
+ char *server;
+ char *sink;
+ char *source;
+@@ -24,28 +30,18 @@ typedef struct {
+
+ typedef struct {
+ HWVoiceOut hw;
+- int done;
+- int live;
+- int decr;
+- int rpos;
+ pa_stream *stream;
+- void *pcm_buf;
+- struct audio_pt pt;
+ paaudio *g;
++ pa_sample_spec ss;
++ pa_buffer_attr ba;
+ } PAVoiceOut;
+
+ typedef struct {
+ HWVoiceIn hw;
+- int done;
+- int dead;
+- int incr;
+- int wpos;
+ pa_stream *stream;
+- void *pcm_buf;
+- struct audio_pt pt;
+- const void *read_data;
+- size_t read_index, read_length;
+ paaudio *g;
++ pa_sample_spec ss;
++ pa_buffer_attr ba;
+ } PAVoiceIn;
+
+ static void qpa_audio_fini(void *opaque);
+@@ -109,182 +105,59 @@ static inline int PA_STREAM_IS_GOOD(pa_stream_state_t x)
+ } \
+ } while (0);
+
+-static int qpa_simple_read (PAVoiceIn *p, void *data, size_t length, int *rerror)
+-{
+- paaudio *g = p->g;
+-
+- pa_threaded_mainloop_lock (g->mainloop);
+-
+- CHECK_DEAD_GOTO (g, p->stream, rerror, unlock_and_fail);
+-
+- while (length > 0) {
+- size_t l;
+-
+- while (!p->read_data) {
+- int r;
+-
+- r = pa_stream_peek (p->stream, &p->read_data, &p->read_length);
+- CHECK_SUCCESS_GOTO (g, rerror, r == 0, unlock_and_fail);
+-
+- if (!p->read_data) {
+- pa_threaded_mainloop_wait (g->mainloop);
+- CHECK_DEAD_GOTO (g, p->stream, rerror, unlock_and_fail);
+- } else {
+- p->read_index = 0;
+- }
+- }
+-
+- l = p->read_length < length ? p->read_length : length;
+- memcpy (data, (const uint8_t *) p->read_data+p->read_index, l);
+-
+- data = (uint8_t *) data + l;
+- length -= l;
+-
+- p->read_index += l;
+- p->read_length -= l;
+-
+- if (!p->read_length) {
+- int r;
+-
+- r = pa_stream_drop (p->stream);
+- p->read_data = NULL;
+- p->read_length = 0;
+- p->read_index = 0;
+-
+- CHECK_SUCCESS_GOTO (g, rerror, r == 0, unlock_and_fail);
+- }
+- }
+-
+- pa_threaded_mainloop_unlock (g->mainloop);
+- return 0;
+-
+-unlock_and_fail:
+- pa_threaded_mainloop_unlock (g->mainloop);
+- return -1;
+-}
+-
+-static int qpa_simple_write (PAVoiceOut *p, const void *data, size_t length, int *rerror)
++static int qpa_run_out(HWVoiceOut *hw, int live)
+ {
+- paaudio *g = p->g;
+-
+- pa_threaded_mainloop_lock (g->mainloop);
+-
+- CHECK_DEAD_GOTO (g, p->stream, rerror, unlock_and_fail);
+-
+- while (length > 0) {
+- size_t l;
+- int r;
+-
+- while (!(l = pa_stream_writable_size (p->stream))) {
+- pa_threaded_mainloop_wait (g->mainloop);
+- CHECK_DEAD_GOTO (g, p->stream, rerror, unlock_and_fail);
+- }
+-
+- CHECK_SUCCESS_GOTO (g, rerror, l != (size_t) -1, unlock_and_fail);
+-
+- if (l > length) {
+- l = length;
+- }
+-
+- r = pa_stream_write (p->stream, data, l, NULL, 0LL, PA_SEEK_RELATIVE);
+- CHECK_SUCCESS_GOTO (g, rerror, r >= 0, unlock_and_fail);
+-
+- data = (const uint8_t *) data + l;
+- length -= l;
+- }
+-
+- pa_threaded_mainloop_unlock (g->mainloop);
+- return 0;
+-
+-unlock_and_fail:
+- pa_threaded_mainloop_unlock (g->mainloop);
+- return -1;
+-}
+-
+-static void *qpa_thread_out (void *arg)
+-{
+- PAVoiceOut *pa = arg;
+- HWVoiceOut *hw = &pa->hw;
+-
+- if (audio_pt_lock (&pa->pt, AUDIO_FUNC)) {
+- return NULL;
+- }
++ PAVoiceOut *pa = (PAVoiceOut *) hw;
++ int rpos, decr, samples;
++ size_t avail_bytes, max_bytes;
++ struct st_sample *src;
++ void *pa_dst;
++ int error = 0;
++ int *rerror = &error;
++ int r;
+
+- for (;;) {
+- int decr, to_mix, rpos;
++ decr = 0;
++ rpos = hw->rpos;
+
+- for (;;) {
+- if (pa->done) {
+- goto exit;
+- }
++ pa_threaded_mainloop_lock(pa->g->mainloop);
++ CHECK_DEAD_GOTO(pa->g, pa->stream, rerror, fail);
+
+- if (pa->live > 0) {
+- break;
+- }
++ avail_bytes = (size_t) live << hw->info.shift;
+
+- if (audio_pt_wait (&pa->pt, AUDIO_FUNC)) {
+- goto exit;
+- }
+- }
++ max_bytes = pa_stream_writable_size(pa->stream);
++ CHECK_SUCCESS_GOTO(pa->g, rerror, max_bytes != -1, fail);
+
+- decr = to_mix = audio_MIN (pa->live, pa->g->conf.samples >> 2);
+- rpos = pa->rpos;
++ samples = (int)(audio_MIN(avail_bytes, max_bytes)) >> hw->info.shift;
++ while (samples) {
++ int convert_samples = audio_MIN(samples, hw->samples - rpos);
++ size_t b_wanted = (size_t) convert_samples << hw->info.shift;
++ size_t b_effective = b_wanted;
+
+- if (audio_pt_unlock (&pa->pt, AUDIO_FUNC)) {
+- return NULL;
+- }
++ r = pa_stream_begin_write(pa->stream, &pa_dst, &b_effective);
++ CHECK_SUCCESS_GOTO(pa->g, rerror, r == 0, fail);
++ CHECK_SUCCESS_GOTO(pa->g, (int *)0, b_effective == b_wanted, fail);
+
+- while (to_mix) {
+- int error;
+- int chunk = audio_MIN (to_mix, hw->samples - rpos);
+- struct st_sample *src = hw->mix_buf + rpos;
++ src = hw->mix_buf + rpos;
++ hw->clip(pa_dst, src, convert_samples);
+
+- hw->clip (pa->pcm_buf, src, chunk);
+-
+- if (qpa_simple_write (pa, pa->pcm_buf,
+- chunk << hw->info.shift, &error) < 0) {
+- qpa_logerr (error, "pa_simple_write failed\n");
+- return NULL;
+- }
++ r = pa_stream_write(pa->stream, pa_dst, b_effective,
++ NULL, 0LL, PA_SEEK_RELATIVE);
++ CHECK_SUCCESS_GOTO(pa->g, rerror, r >= 0, fail);
+
+- rpos = (rpos + chunk) % hw->samples;
+- to_mix -= chunk;
+- }
+-
+- if (audio_pt_lock (&pa->pt, AUDIO_FUNC)) {
+- return NULL;
+- }
+-
+- pa->rpos = rpos;
+- pa->live -= decr;
+- pa->decr += decr;
++ rpos = (rpos + convert_samples) % hw->samples;
++ samples -= convert_samples;
++ decr += convert_samples;
+ }
+
+- exit:
+- audio_pt_unlock (&pa->pt, AUDIO_FUNC);
+- return NULL;
+-}
+-
+-static int qpa_run_out (HWVoiceOut *hw, int live)
+-{
+- int decr;
+- PAVoiceOut *pa = (PAVoiceOut *) hw;
+-
+- if (audio_pt_lock (&pa->pt, AUDIO_FUNC)) {
+- return 0;
+- }
++ bail:
++ pa_threaded_mainloop_unlock(pa->g->mainloop);
+
+- decr = audio_MIN (live, pa->decr);
+- pa->decr -= decr;
+- pa->live = live - decr;
+- hw->rpos = pa->rpos;
+- if (pa->live > 0) {
+- audio_pt_unlock_and_signal (&pa->pt, AUDIO_FUNC);
+- }
+- else {
+- audio_pt_unlock (&pa->pt, AUDIO_FUNC);
+- }
++ hw->rpos = rpos;
+ return decr;
++
++fail:
++ qpa_logerr(error, "qpa_run_out failed\n");
++ goto bail;
+ }
+
+ static int qpa_write (SWVoiceOut *sw, void *buf, int len)
+@@ -292,92 +165,68 @@ static int qpa_write (SWVoiceOut *sw, void *buf, int len)
+ return audio_pcm_sw_write (sw, buf, len);
+ }
+
+-/* capture */
+-static void *qpa_thread_in (void *arg)
++static int qpa_run_in(HWVoiceIn *hw)
+ {
+- PAVoiceIn *pa = arg;
+- HWVoiceIn *hw = &pa->hw;
++ PAVoiceIn *pa = (PAVoiceIn *) hw;
++ int wpos, incr;
++ char *pa_src;
++ int error = 0;
++ int *rerror = &error;
++ int r;
++ size_t pa_avail;
++ incr = 0;
++ wpos = hw->wpos;
+
+- if (audio_pt_lock (&pa->pt, AUDIO_FUNC)) {
+- return NULL;
+- }
++ pa_threaded_mainloop_lock(pa->g->mainloop);
++ CHECK_DEAD_GOTO(pa->g, pa->stream, rerror, fail);
+
+- for (;;) {
+- int incr, to_grab, wpos;
++ size_t bytes_wanted = ((unsigned int)
++ (hw->samples - audio_pcm_hw_get_live_in(hw)) << hw->info.shift);
+
+- for (;;) {
+- if (pa->done) {
+- goto exit;
+- }
++ if (bytes_wanted == 0) {
++ /* no room */
++ goto bail;
++ }
+
+- if (pa->dead > 0) {
+- break;
+- }
++ size_t bytes_avail = pa_stream_readable_size(pa->stream);
+
+- if (audio_pt_wait (&pa->pt, AUDIO_FUNC)) {
+- goto exit;
+- }
+- }
++ if (bytes_wanted > bytes_avail) {
++ bytes_wanted = bytes_avail;
++ }
+
+- incr = to_grab = audio_MIN (pa->dead, pa->g->conf.samples >> 2);
+- wpos = pa->wpos;
++ while (bytes_wanted) {
++ r = pa_stream_peek(pa->stream, (const void **)&pa_src, &pa_avail);
++ CHECK_SUCCESS_GOTO(pa->g, rerror, r == 0, fail);
+
+- if (audio_pt_unlock (&pa->pt, AUDIO_FUNC)) {
+- return NULL;
++ if (pa_avail == 0 || pa_avail > bytes_wanted) {
++ break;
+ }
+
+- while (to_grab) {
+- int error;
+- int chunk = audio_MIN (to_grab, hw->samples - wpos);
+- void *buf = advance (pa->pcm_buf, wpos);
++ bytes_wanted -= pa_avail;
+
+- if (qpa_simple_read (pa, buf,
+- chunk << hw->info.shift, &error) < 0) {
+- qpa_logerr (error, "pa_simple_read failed\n");
+- return NULL;
+- }
+-
+- hw->conv (hw->conv_buf + wpos, buf, chunk);
++ while (pa_avail) {
++ int chunk = audio_MIN(
++ (int)(pa_avail >> hw->info.shift), hw->samples - wpos);
++ hw->conv(hw->conv_buf + wpos, pa_src, chunk);
+ wpos = (wpos + chunk) % hw->samples;
+- to_grab -= chunk;
+- }
+-
+- if (audio_pt_lock (&pa->pt, AUDIO_FUNC)) {
+- return NULL;
++ pa_src += chunk << hw->info.shift;
++ pa_avail -= chunk << hw->info.shift;
++ incr += chunk;
+ }
+
+- pa->wpos = wpos;
+- pa->dead -= incr;
+- pa->incr += incr;
++ r = pa_stream_drop(pa->stream);
++ CHECK_SUCCESS_GOTO(pa->g, rerror, r == 0, fail);
+ }
+
+- exit:
+- audio_pt_unlock (&pa->pt, AUDIO_FUNC);
+- return NULL;
+-}
+-
+-static int qpa_run_in (HWVoiceIn *hw)
+-{
+- int live, incr, dead;
+- PAVoiceIn *pa = (PAVoiceIn *) hw;
+-
+- if (audio_pt_lock (&pa->pt, AUDIO_FUNC)) {
+- return 0;
+- }
++bail:
++ pa_threaded_mainloop_unlock(pa->g->mainloop);
+
+- live = audio_pcm_hw_get_live_in (hw);
+- dead = hw->samples - live;
+- incr = audio_MIN (dead, pa->incr);
+- pa->incr -= incr;
+- pa->dead = dead - incr;
+- hw->wpos = pa->wpos;
+- if (pa->dead > 0) {
+- audio_pt_unlock_and_signal (&pa->pt, AUDIO_FUNC);
+- }
+- else {
+- audio_pt_unlock (&pa->pt, AUDIO_FUNC);
+- }
++ hw->wpos = wpos;
+ return incr;
++
++fail:
++ qpa_logerr(error, "qpa_run_in failed\n");
++ goto bail;
+ }
+
+ static int qpa_read (SWVoiceIn *sw, void *buf, int len)
+@@ -470,13 +319,6 @@ static void stream_state_cb (pa_stream *s, void * userdata)
+ }
+ }
+
+-static void stream_request_cb (pa_stream *s, size_t length, void *userdata)
+-{
+- paaudio *g = userdata;
+-
+- pa_threaded_mainloop_signal (g->mainloop, 0);
+-}
+-
+ static pa_stream *qpa_simple_new (
+ paaudio *g,
+ const char *name,
+@@ -498,23 +340,17 @@ static pa_stream *qpa_simple_new (
+ }
+
+ pa_stream_set_state_callback (stream, stream_state_cb, g);
+- pa_stream_set_read_callback (stream, stream_request_cb, g);
+- pa_stream_set_write_callback (stream, stream_request_cb, g);
+
+ if (dir == PA_STREAM_PLAYBACK) {
+- r = pa_stream_connect_playback (stream, dev, attr,
+- PA_STREAM_INTERPOLATE_TIMING
+-#ifdef PA_STREAM_ADJUST_LATENCY
+- |PA_STREAM_ADJUST_LATENCY
+-#endif
+- |PA_STREAM_AUTO_TIMING_UPDATE, NULL, NULL);
++ r = pa_stream_connect_playback(stream, dev, attr,
++ PA_STREAM_INTERPOLATE_TIMING
++ | (g->conf.adjust_latency_out ? PA_STREAM_ADJUST_LATENCY : 0)
++ | PA_STREAM_AUTO_TIMING_UPDATE, NULL, NULL);
+ } else {
+- r = pa_stream_connect_record (stream, dev, attr,
+- PA_STREAM_INTERPOLATE_TIMING
+-#ifdef PA_STREAM_ADJUST_LATENCY
+- |PA_STREAM_ADJUST_LATENCY
+-#endif
+- |PA_STREAM_AUTO_TIMING_UPDATE);
++ r = pa_stream_connect_record(stream, dev, attr,
++ PA_STREAM_INTERPOLATE_TIMING
++ | (g->conf.adjust_latency_in ? PA_STREAM_ADJUST_LATENCY : 0)
++ | PA_STREAM_AUTO_TIMING_UPDATE);
+ }
+
+ if (r < 0) {
+@@ -541,165 +377,167 @@ static int qpa_init_out(HWVoiceOut *hw, struct audsettings *as,
+ void *drv_opaque)
+ {
+ int error;
+- pa_sample_spec ss;
+- pa_buffer_attr ba;
+ struct audsettings obt_as = *as;
+ PAVoiceOut *pa = (PAVoiceOut *) hw;
+ paaudio *g = pa->g = drv_opaque;
+
+- ss.format = audfmt_to_pa (as->fmt, as->endianness);
+- ss.channels = as->nchannels;
+- ss.rate = as->freq;
+-
+- /*
+- * qemu audio tick runs at 100 Hz (by default), so processing
+- * data chunks worth 10 ms of sound should be a good fit.
+- */
+- ba.tlength = pa_usec_to_bytes (10 * 1000, &ss);
+- ba.minreq = pa_usec_to_bytes (5 * 1000, &ss);
+- ba.maxlength = -1;
+- ba.prebuf = -1;
+-
+- obt_as.fmt = pa_to_audfmt (ss.format, &obt_as.endianness);
+-
+- pa->stream = qpa_simple_new (
+- g,
+- "qemu",
+- PA_STREAM_PLAYBACK,
+- g->conf.sink,
+- &ss,
+- NULL, /* channel map */
+- &ba, /* buffering attributes */
+- &error
+- );
++ int64_t timer_tick_duration =
++ audio_MAX(audio_get_timer_ticks(), 1 * SCALE_MS);
++ int64_t frames_per_tick_x1000 =
++ ((timer_tick_duration * as->freq * 1000LL) / NANOSECONDS_PER_SECOND);
++
++ int64_t tlength = g->conf.tlength;
++ if (tlength == 0) {
++ tlength = (frames_per_tick_x1000) / 400;
++ }
++ int64_t buflen = g->conf.buffer_size_out;
++ if (buflen == 0) {
++ buflen = frames_per_tick_x1000 / 400;
++ }
++
++ ldebug("tick duration: %.2f ms (%.3f frames)\n",
++ ((float)timer_tick_duration) / SCALE_MS,
++ (float)frames_per_tick_x1000 / 1000.0f);
++
++ ldebug("OUT internal buffer: %.2f ms (%"PRId64" frames)\n",
++ buflen * (1000.0f / as->freq),
++ buflen);
++
++ ldebug("OUT tlength: %.2f ms (%"PRId64" frames)\n",
++ tlength * (1000.0f / as->freq),
++ tlength);
++
++ ldebug("OUT adjust latency: %s\n",
++ g->conf.adjust_latency_out ? "yes" : "no");
++
++ pa->ss.format = audfmt_to_pa(as->fmt, as->endianness);
++ pa->ss.channels = as->nchannels;
++ pa->ss.rate = as->freq;
++
++ pa->ba.tlength = tlength * pa_frame_size(&pa->ss);
++ pa->ba.maxlength = -1;
++ pa->ba.minreq = -1;
++ pa->ba.prebuf = -1;
++
++ obt_as.fmt = pa_to_audfmt(pa->ss.format, &obt_as.endianness);
++
++ pa->stream = qpa_simple_new(
++ g,
++ "qemu",
++ PA_STREAM_PLAYBACK,
++ g->conf.sink,
++ &pa->ss,
++ NULL, /* channel map */
++ &pa->ba, /* buffering attributes */
++ &error
++ );
+ if (!pa->stream) {
+ qpa_logerr (error, "pa_simple_new for playback failed\n");
+ goto fail1;
+ }
+
+- audio_pcm_init_info (&hw->info, &obt_as);
+- hw->samples = g->conf.samples;
+- pa->pcm_buf = audio_calloc (AUDIO_FUNC, hw->samples, 1 << hw->info.shift);
+- pa->rpos = hw->rpos;
+- if (!pa->pcm_buf) {
+- dolog ("Could not allocate buffer (%d bytes)\n",
+- hw->samples << hw->info.shift);
+- goto fail2;
+- }
+-
+- if (audio_pt_init (&pa->pt, qpa_thread_out, hw, AUDIO_CAP, AUDIO_FUNC)) {
+- goto fail3;
+- }
++ audio_pcm_init_info(&hw->info, &obt_as);
++ hw->samples = buflen;
+
+ return 0;
+
+- fail3:
+- g_free (pa->pcm_buf);
+- pa->pcm_buf = NULL;
+- fail2:
+- if (pa->stream) {
+- pa_stream_unref (pa->stream);
+- pa->stream = NULL;
+- }
+- fail1:
++fail1:
+ return -1;
+ }
+
+ static int qpa_init_in(HWVoiceIn *hw, struct audsettings *as, void *drv_opaque)
+ {
+ int error;
+- pa_sample_spec ss;
+ struct audsettings obt_as = *as;
+ PAVoiceIn *pa = (PAVoiceIn *) hw;
+ paaudio *g = pa->g = drv_opaque;
+
+- ss.format = audfmt_to_pa (as->fmt, as->endianness);
+- ss.channels = as->nchannels;
+- ss.rate = as->freq;
+-
+- obt_as.fmt = pa_to_audfmt (ss.format, &obt_as.endianness);
+-
+- pa->stream = qpa_simple_new (
+- g,
+- "qemu",
+- PA_STREAM_RECORD,
+- g->conf.source,
+- &ss,
+- NULL, /* channel map */
+- NULL, /* buffering attributes */
+- &error
+- );
++ int64_t timer_tick_duration =
++ audio_MAX(audio_get_timer_ticks(), 1 * SCALE_MS);
++ int64_t frames_per_tick_x1000 =
++ ((timer_tick_duration * as->freq * 1000LL) / NANOSECONDS_PER_SECOND);
++
++ int64_t fragsize = g->conf.fragsize;
++ if (fragsize == 0) {
++ fragsize = frames_per_tick_x1000 / 1000;
++ }
++ int64_t buflen = g->conf.buffer_size_in;
++ if (buflen == 0) {
++ buflen = frames_per_tick_x1000 / 400;
++ }
++ int64_t maxlength = g->conf.maxlength_in;
++ if (maxlength == 0) {
++ maxlength = fragsize * 2;
++ }
++
++ ldebug("IN internal buffer: %.2f ms (%"PRId64" frames)\n",
++ buflen * (1000.0f / as->freq),
++ buflen);
++
++ ldebug("IN fragsize: %.2f ms (%"PRId64" frames)\n",
++ fragsize * (1000.0f / as->freq),
++ fragsize);
++
++ ldebug("IN maxlength: %.2f ms (%"PRId64" frames)\n",
++ maxlength * (1000.0f / as->freq),
++ maxlength);
++
++ ldebug("IN adjust latency: %s\n",
++ g->conf.adjust_latency_in ? "yes" : "no");
++
++ pa->ss.format = audfmt_to_pa(as->fmt, as->endianness);
++ pa->ss.channels = as->nchannels;
++ pa->ss.rate = as->freq;
++
++ pa->ba.fragsize = fragsize * pa_frame_size(&pa->ss);
++ pa->ba.maxlength = maxlength * pa_frame_size(&pa->ss);
++ pa->ba.minreq = -1;
++ pa->ba.prebuf = -1;
++
++ obt_as.fmt = pa_to_audfmt(pa->ss.format, &obt_as.endianness);
++
++ pa->stream = qpa_simple_new(
++ g,
++ "qemu",
++ PA_STREAM_RECORD,
++ g->conf.source,
++ &pa->ss,
++ NULL, /* channel map */
++ &pa->ba, /* buffering attributes */
++ &error
++ );
+ if (!pa->stream) {
+ qpa_logerr (error, "pa_simple_new for capture failed\n");
+ goto fail1;
+ }
+
+- audio_pcm_init_info (&hw->info, &obt_as);
+- hw->samples = g->conf.samples;
+- pa->pcm_buf = audio_calloc (AUDIO_FUNC, hw->samples, 1 << hw->info.shift);
+- pa->wpos = hw->wpos;
+- if (!pa->pcm_buf) {
+- dolog ("Could not allocate buffer (%d bytes)\n",
+- hw->samples << hw->info.shift);
+- goto fail2;
+- }
+-
+- if (audio_pt_init (&pa->pt, qpa_thread_in, hw, AUDIO_CAP, AUDIO_FUNC)) {
+- goto fail3;
+- }
++ audio_pcm_init_info(&hw->info, &obt_as);
++ hw->samples = buflen;
+
+ return 0;
+
+- fail3:
+- g_free (pa->pcm_buf);
+- pa->pcm_buf = NULL;
+- fail2:
+- if (pa->stream) {
+- pa_stream_unref (pa->stream);
+- pa->stream = NULL;
+- }
+- fail1:
++ fail1:
+ return -1;
+ }
+
+ static void qpa_fini_out (HWVoiceOut *hw)
+ {
+- void *ret;
+ PAVoiceOut *pa = (PAVoiceOut *) hw;
+
+- audio_pt_lock (&pa->pt, AUDIO_FUNC);
+- pa->done = 1;
+- audio_pt_unlock_and_signal (&pa->pt, AUDIO_FUNC);
+- audio_pt_join (&pa->pt, &ret, AUDIO_FUNC);
+-
+ if (pa->stream) {
+ pa_stream_unref (pa->stream);
+ pa->stream = NULL;
+ }
+-
+- audio_pt_fini (&pa->pt, AUDIO_FUNC);
+- g_free (pa->pcm_buf);
+- pa->pcm_buf = NULL;
+ }
+
+ static void qpa_fini_in (HWVoiceIn *hw)
+ {
+- void *ret;
+ PAVoiceIn *pa = (PAVoiceIn *) hw;
+
+- audio_pt_lock (&pa->pt, AUDIO_FUNC);
+- pa->done = 1;
+- audio_pt_unlock_and_signal (&pa->pt, AUDIO_FUNC);
+- audio_pt_join (&pa->pt, &ret, AUDIO_FUNC);
+-
+ if (pa->stream) {
+ pa_stream_unref (pa->stream);
+ pa->stream = NULL;
+ }
+-
+- audio_pt_fini (&pa->pt, AUDIO_FUNC);
+- g_free (pa->pcm_buf);
+- pa->pcm_buf = NULL;
+ }
+
+ static int qpa_ctl_out (HWVoiceOut *hw, int cmd, ...)
+@@ -809,7 +647,8 @@ static int qpa_ctl_in (HWVoiceIn *hw, int cmd, ...)
+
+ /* common */
+ static PAConf glob_conf = {
+- .samples = 4096,
++ .adjust_latency_out = 0,
++ .adjust_latency_in = 1,
+ };
+
+ static void *qpa_audio_init (void)
+@@ -897,10 +736,46 @@ static void qpa_audio_fini (void *opaque)
+
+ struct audio_option qpa_options[] = {
+ {
+- .name = "SAMPLES",
++ .name = "BUFFER_SIZE_OUT",
++ .tag = AUD_OPT_INT,
++ .valp = &glob_conf.buffer_size_out,
++ .descr = "internal buffer size in frames for playback device"
++ },
++ {
++ .name = "BUFFER_SIZE_IN",
++ .tag = AUD_OPT_INT,
++ .valp = &glob_conf.buffer_size_in,
++ .descr = "internal buffer size in frames for recording device"
++ },
++ {
++ .name = "TLENGTH",
+ .tag = AUD_OPT_INT,
+- .valp = &glob_conf.samples,
+- .descr = "buffer size in samples"
++ .valp = &glob_conf.tlength,
++ .descr = "playback buffer target length in frames"
++ },
++ {
++ .name = "FRAGSIZE",
++ .tag = AUD_OPT_INT,
++ .valp = &glob_conf.fragsize,
++ .descr = "fragment length of recording device in frames"
++ },
++ {
++ .name = "MAXLENGTH_IN",
++ .tag = AUD_OPT_INT,
++ .valp = &glob_conf.maxlength_in,
++ .descr = "maximum length of PA recording buffer in frames"
++ },
++ {
++ .name = "ADJUST_LATENCY_OUT",
++ .tag = AUD_OPT_BOOL,
++ .valp = &glob_conf.adjust_latency_out,
++ .descr = "instruct PA to adjust latency for playback device"
++ },
++ {
++ .name = "ADJUST_LATENCY_IN",
++ .tag = AUD_OPT_BOOL,
++ .valp = &glob_conf.adjust_latency_in,
++ .descr = "instruct PA to adjust latency for recording device"
+ },
+ {
+ .name = "SERVER",
+diff --git hw/audio/hda-codec.c hw/audio/hda-codec.c
+index 5402cd196c..ab89158bfc 100644
+--- hw/audio/hda-codec.c
++++ hw/audio/hda-codec.c
+@@ -18,6 +18,7 @@
+ */
+
+ #include "qemu/osdep.h"
++#include "qemu/atomic.h"
+ #include "hw/hw.h"
+ #include "hw/pci/pci.h"
+ #include "intel-hda.h"
+@@ -126,6 +127,11 @@ static void hda_codec_parse_fmt(uint32_t format, struct audsettings *as)
+ #define PARAM nomixemu
+ #include "hda-codec-common.h"
+
++#define HDA_TIMER_TICKS (SCALE_MS)
++#define MAX_CORR (SCALE_US * 100)
++#define B_SIZE sizeof(st->buf)
++#define B_MASK (sizeof(st->buf) - 1)
++
+ /* -------------------------------------------------------------------------- */
+
+ static const char *fmt2name[] = {
+@@ -154,8 +160,13 @@ struct HDAAudioStream {
+ SWVoiceIn *in;
+ SWVoiceOut *out;
+ } voice;
+- uint8_t buf[HDA_BUFFER_SIZE];
+- uint32_t bpos;
++ uint8_t compat_buf[HDA_BUFFER_SIZE];
++ uint32_t compat_bpos;
++ uint8_t buf[8192]; /* size must be power of two */
++ int64_t rpos;
++ int64_t wpos;
++ QEMUTimer *buft;
++ int64_t buft_start;
+ };
+
+ #define TYPE_HDA_AUDIO "hda-audio"
+@@ -176,53 +187,146 @@ struct HDAAudioState {
+ bool mixer;
+ };
+
++static inline int64_t hda_bytes_per_second(HDAAudioStream *st)
++{
++ return 2 * st->as.nchannels * st->as.freq;
++}
++
++static inline void hda_timer_sync_adjust(HDAAudioStream *st, int64_t target_pos)
++{
++ int64_t corr =
++ NANOSECONDS_PER_SECOND * target_pos / hda_bytes_per_second(st);
++ if (corr > MAX_CORR) {
++ corr = MAX_CORR;
++ } else if (corr < -MAX_CORR) {
++ corr = -MAX_CORR;
++ }
++ atomic_fetch_add(&st->buft_start, corr);
++}
++
++static void hda_audio_input_timer(void *opaque)
++{
++ HDAAudioStream *st = opaque;
++
++ int64_t now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
++
++ int64_t buft_start = atomic_fetch_add(&st->buft_start, 0);
++ int64_t wpos = atomic_fetch_add(&st->wpos, 0);
++ int64_t rpos = atomic_fetch_add(&st->rpos, 0);
++
++ int64_t wanted_rpos = hda_bytes_per_second(st) * (now - buft_start)
++ / NANOSECONDS_PER_SECOND;
++ wanted_rpos &= -4; /* IMPORTANT! clip to frames */
++
++ if (wanted_rpos <= rpos) {
++ /* we already transmitted the data */
++ goto out_timer;
++ }
++
++ int64_t to_transfer = audio_MIN(wpos - rpos, wanted_rpos - rpos);
++ while (to_transfer) {
++ uint32_t start = (rpos & B_MASK);
++ uint32_t chunk = audio_MIN(B_SIZE - start, to_transfer);
++ int rc = hda_codec_xfer(
++ &st->state->hda, st->stream, false, st->buf + start, chunk);
++ if (!rc) {
++ break;
++ }
++ rpos += chunk;
++ to_transfer -= chunk;
++ atomic_fetch_add(&st->rpos, chunk);
++ }
++
++out_timer:
++
++ if (st->running) {
++ timer_mod_anticipate_ns(st->buft, now + HDA_TIMER_TICKS);
++ }
++}
++
+ static void hda_audio_input_cb(void *opaque, int avail)
+ {
+ HDAAudioStream *st = opaque;
+- int recv = 0;
+- int len;
+- bool rc;
+-
+- while (avail - recv >= sizeof(st->buf)) {
+- if (st->bpos != sizeof(st->buf)) {
+- len = AUD_read(st->voice.in, st->buf + st->bpos,
+- sizeof(st->buf) - st->bpos);
+- st->bpos += len;
+- recv += len;
+- if (st->bpos != sizeof(st->buf)) {
+- break;
+- }
++
++ int64_t wpos = atomic_fetch_add(&st->wpos, 0);
++ int64_t rpos = atomic_fetch_add(&st->rpos, 0);
++
++ int64_t to_transfer = audio_MIN(B_SIZE - (wpos - rpos), avail);
++
++ hda_timer_sync_adjust(st, -((wpos - rpos) + to_transfer - (B_SIZE >> 1)));
++
++ while (to_transfer) {
++ uint32_t start = (uint32_t) (wpos & B_MASK);
++ uint32_t chunk = (uint32_t) audio_MIN(B_SIZE - start, to_transfer);
++ uint32_t read = AUD_read(st->voice.in, st->buf + start, chunk);
++ wpos += read;
++ to_transfer -= read;
++ atomic_fetch_add(&st->wpos, read);
++ if (chunk != read) {
++ break;
+ }
+- rc = hda_codec_xfer(&st->state->hda, st->stream, false,
+- st->buf, sizeof(st->buf));
++ }
++}
++
++static void hda_audio_output_timer(void *opaque)
++{
++ HDAAudioStream *st = opaque;
++
++ int64_t now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
++
++ int64_t buft_start = atomic_fetch_add(&st->buft_start, 0);
++ int64_t wpos = atomic_fetch_add(&st->wpos, 0);
++ int64_t rpos = atomic_fetch_add(&st->rpos, 0);
++
++ int64_t wanted_wpos = hda_bytes_per_second(st) * (now - buft_start)
++ / NANOSECONDS_PER_SECOND;
++ wanted_wpos &= -4; /* IMPORTANT! clip to frames */
++
++ if (wanted_wpos <= wpos) {
++ /* we already received the data */
++ goto out_timer;
++ }
++
++ int64_t to_transfer = audio_MIN(B_SIZE - (wpos - rpos), wanted_wpos - wpos);
++ while (to_transfer) {
++ uint32_t start = (wpos & B_MASK);
++ uint32_t chunk = audio_MIN(B_SIZE - start, to_transfer);
++ int rc = hda_codec_xfer(
++ &st->state->hda, st->stream, true, st->buf + start, chunk);
+ if (!rc) {
+ break;
+ }
+- st->bpos = 0;
++ wpos += chunk;
++ to_transfer -= chunk;
++ atomic_fetch_add(&st->wpos, chunk);
++ }
++
++out_timer:
++
++ if (st->running) {
++ timer_mod_anticipate_ns(st->buft, now + HDA_TIMER_TICKS);
+ }
+ }
+
+ static void hda_audio_output_cb(void *opaque, int avail)
+ {
+ HDAAudioStream *st = opaque;
+- int sent = 0;
+- int len;
+- bool rc;
+-
+- while (avail - sent >= sizeof(st->buf)) {
+- if (st->bpos == sizeof(st->buf)) {
+- rc = hda_codec_xfer(&st->state->hda, st->stream, true,
+- st->buf, sizeof(st->buf));
+- if (!rc) {
+- break;
+- }
+- st->bpos = 0;
+- }
+- len = AUD_write(st->voice.out, st->buf + st->bpos,
+- sizeof(st->buf) - st->bpos);
+- st->bpos += len;
+- sent += len;
+- if (st->bpos != sizeof(st->buf)) {
++
++ int64_t wpos = atomic_fetch_add(&st->wpos, 0);
++ int64_t rpos = atomic_fetch_add(&st->rpos, 0);
++
++ int64_t to_transfer = audio_MIN(wpos - rpos, avail);
++
++ hda_timer_sync_adjust(st, (wpos - rpos) - to_transfer - (B_SIZE >> 1));
++
++ while (to_transfer) {
++ uint32_t start = (uint32_t) (rpos & B_MASK);
++ uint32_t chunk = (uint32_t) audio_MIN(B_SIZE - start, to_transfer);
++ uint32_t written = AUD_write(st->voice.out, st->buf + start, chunk);
++ rpos += written;
++ to_transfer -= written;
++ atomic_fetch_add(&st->rpos, written);
++ if (chunk != written) {
+ break;
+ }
+ }
+@@ -239,6 +343,15 @@ static void hda_audio_set_running(HDAAudioStream *st, bool running)
+ st->running = running;
+ dprint(st->state, 1, "%s: %s (stream %d)\n", st->node->name,
+ st->running ? "on" : "off", st->stream);
++ if (running) {
++ int64_t now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
++ st->rpos = 0;
++ st->wpos = 0;
++ st->buft_start = now;
++ timer_mod_anticipate_ns(st->buft, now + HDA_TIMER_TICKS);
++ } else {
++ timer_del(st->buft);
++ }
+ if (st->output) {
+ AUD_set_active_out(st->voice.out, st->running);
+ } else {
+@@ -286,10 +399,12 @@ static void hda_audio_setup(HDAAudioStream *st)
+ st->voice.out = AUD_open_out(&st->state->card, st->voice.out,
+ st->node->name, st,
+ hda_audio_output_cb, &st->as);
++ st->buft = timer_new_ns(QEMU_CLOCK_VIRTUAL, hda_audio_output_timer, st);
+ } else {
+ st->voice.in = AUD_open_in(&st->state->card, st->voice.in,
+ st->node->name, st,
+ hda_audio_input_cb, &st->as);
++ st->buft = timer_new_ns(QEMU_CLOCK_VIRTUAL, hda_audio_input_timer, st);
+ }
+ }
+
+@@ -505,7 +620,6 @@ static int hda_audio_init(HDACodecDevice *hda, const struct desc_codec *desc)
+ /* unmute output by default */
+ st->gain_left = QEMU_HDA_AMP_STEPS;
+ st->gain_right = QEMU_HDA_AMP_STEPS;
+- st->bpos = sizeof(st->buf);
+ st->output = true;
+ } else {
+ st->output = false;
+@@ -532,6 +646,7 @@ static void hda_audio_exit(HDACodecDevice *hda)
+ if (st->node == NULL) {
+ continue;
+ }
++ timer_del(st->buft);
+ if (st->output) {
+ AUD_close_out(&a->card, st->voice.out);
+ } else {
+@@ -592,8 +707,8 @@ static const VMStateDescription vmstate_hda_audio_stream = {
+ VMSTATE_UINT32(gain_right, HDAAudioStream),
+ VMSTATE_BOOL(mute_left, HDAAudioStream),
+ VMSTATE_BOOL(mute_right, HDAAudioStream),
+- VMSTATE_UINT32(bpos, HDAAudioStream),
+- VMSTATE_BUFFER(buf, HDAAudioStream),
++ VMSTATE_UINT32(compat_bpos, HDAAudioStream),
++ VMSTATE_BUFFER(compat_buf, HDAAudioStream),
+ VMSTATE_END_OF_LIST()
+ }
+ };
+diff --git hw/audio/intel-hda.c hw/audio/intel-hda.c
+index 18a50a8f83..721eba792d 100644
+--- hw/audio/intel-hda.c
++++ hw/audio/intel-hda.c
+@@ -407,13 +407,6 @@ static bool intel_hda_xfer(HDACodecDevice *dev, uint32_t stnr, bool output,
+ if (st->bpl == NULL) {
+ return false;
+ }
+- if (st->ctl & (1 << 26)) {
+- /*
+- * Wait with the next DMA xfer until the guest
+- * has acked the buffer completion interrupt
+- */
+- return false;
+- }
+
+ left = len;
+ s = st->bentries;
diff --git a/cpu-pinning.patch b/cpu-pinning.patch
new file mode 100644
index 000000000000..46efee5d88a5
--- /dev/null
+++ b/cpu-pinning.patch
@@ -0,0 +1,186 @@
+From e392e5516e6ae66db0f05775a22c0abf39f033f0 Mon Sep 17 00:00:00 2001
+From: Saverio Miroddi <saverio.pub2@gmail.com>
+Date: Tue, 31 Oct 2017 20:59:05 +0100
+Subject: [PATCH] Current pinning patch
+
+Changes 2017/10/31:
+
+- Fix: the MAX_VCPUS was arbitrary; it's now set to CPU_SETSIZE
+- Fix: the allowed vcpus were equated to the cores number, without accounting sockets and threads
+- Change: removed all the debug information, and a now unneded warning
+- Change: cleaned spacing
+---
+ cpus.c | 12 +++++++++++
+ qemu-options.hx | 10 ++++++++++
+ vl.c | 62 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+ 3 files changed, 84 insertions(+)
+
+diff --git a/cpus.c b/cpus.c
+index 9bed61eefcc..7437e3a00c7 100644
+--- a/cpus.c
++++ b/cpus.c
+@@ -55,6 +55,9 @@
+ #ifdef CONFIG_LINUX
+
+ #include <sys/prctl.h>
++#include <unistd.h>
++#include <stdint.h>
++#include <inttypes.h>
+
+ #ifndef PR_MCE_KILL
+ #define PR_MCE_KILL 33
+@@ -1722,9 +1725,11 @@ static void qemu_hax_start_vcpu(CPUState *cpu)
+ }
+ }
+
++extern int vcpu_affinity[];
+ static void qemu_kvm_start_vcpu(CPUState *cpu)
+ {
+ char thread_name[VCPU_THREAD_NAME_SIZE];
++ cpu_set_t cpuset;
+
+ cpu->thread = g_malloc0(sizeof(QemuThread));
+ cpu->halt_cond = g_malloc0(sizeof(QemuCond));
+@@ -1733,6 +1738,13 @@ static void qemu_kvm_start_vcpu(CPUState *cpu)
+ cpu->cpu_index);
+ qemu_thread_create(cpu->thread, thread_name, qemu_kvm_cpu_thread_fn,
+ cpu, QEMU_THREAD_JOINABLE);
++
++ if (vcpu_affinity[cpu->cpu_index] != -1) {
++ CPU_ZERO(&cpuset);
++ CPU_SET(vcpu_affinity[cpu->cpu_index], &cpuset);
++ pthread_setaffinity_np((cpu->thread)->thread, sizeof(cpu_set_t), &cpuset);
++ }
++
+ while (!cpu->created) {
+ qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
+ }
+diff --git a/qemu-options.hx b/qemu-options.hx
+index 9f6e2adfffb..1d38fc86c81 100644
+--- a/qemu-options.hx
++++ b/qemu-options.hx
+@@ -160,6 +160,16 @@ given, the total number of CPUs @var{n} can be omitted. @var{maxcpus}
+ specifies the maximum number of hotpluggable CPUs.
+ ETEXI
+
++DEF("vcpu", HAS_ARG, QEMU_OPTION_vcpu,
++ "-vcpu [vcpunum=]n[,affinity=affinity]\n"
++ "-vcpu [vcpunum=]n[,affinity=affinity]\n", QEMU_ARCH_ALL)
++STEXI
++@item -vcpu [vcpunum=]@var{n}[,affinity=@var{affinity}]
++@itemx -vcpu [vcpunum=]@var{n}[,affinity=@var{affinity}]
++@findex -vcpu
++VCPU Affinity. If specified, specify for all the CPUs.
++ETEXI
++
+ DEF("numa", HAS_ARG, QEMU_OPTION_numa,
+ "-numa node[,mem=size][,cpus=firstcpu[-lastcpu]][,nodeid=node]\n"
+ "-numa node[,memdev=id][,cpus=firstcpu[-lastcpu]][,nodeid=node]\n"
+diff --git a/vl.c b/vl.c
+index d63269332fe..754a03c9a5f 100644
+--- a/vl.c
++++ b/vl.c
+@@ -135,6 +135,7 @@ int main(int argc, char **argv)
+ #define MAX_VIRTIO_CONSOLES 1
+ #define MAX_SCLP_CONSOLES 1
+
++#define MAX_VCPUS CPU_SETSIZE
+ static const char *data_dir[16];
+ static int data_dir_idx;
+ const char *bios_name = NULL;
+@@ -167,6 +168,8 @@ int smp_cpus = 1;
+ int max_cpus = 1;
+ int smp_cores = 1;
+ int smp_threads = 1;
++int vcpu_affinity[MAX_VCPUS];
++int num_affinity = 0;
+ int acpi_enabled = 1;
+ int no_hpet = 0;
+ int fd_bootchk = 1;
+@@ -1212,6 +1215,57 @@ static QemuOptsList qemu_smp_opts = {
+ },
+ };
+
++static QemuOptsList qemu_vcpu_opts = {
++ .name = "vcpu-opts",
++ .implied_opt_name = "vcpunum",
++ .head = QTAILQ_HEAD_INITIALIZER(qemu_vcpu_opts.head),
++ .desc = {
++ {
++ .name = "vcpunum",
++ .type = QEMU_OPT_NUMBER,
++ }, {
++ .name = "affinity",
++ .type = QEMU_OPT_NUMBER,
++ },
++ { /*End of list */ }
++ },
++};
++
++static int parse_vcpu(void *opaque, QemuOpts *opts, Error **errp)
++{
++ if (opts) {
++ unsigned vcpu = qemu_opt_get_number(opts, "vcpunum", 0);
++ unsigned affinity = qemu_opt_get_number(opts,"affinity", 0);
++
++ if (vcpu < smp_cpus * smp_cores * smp_threads) {
++ if (vcpu_affinity[vcpu] == -1) {
++ vcpu_affinity[vcpu] = affinity;
++ }
++ else {
++ error_report("Duplicate affinity statement for vcpu %d\n", vcpu);
++ return -1;
++ }
++ num_affinity += 1;
++ }
++ else {
++ error_report("VCPU %d is more than allowed %d VCPUs in the system\n", vcpu, smp_cores);
++ return -1;
++ }
++ }
++ return 0;
++}
++
++static void parse_vcpu_opts(MachineClass *mc)
++{
++ int i;
++ for (i = 0; i < MAX_VCPUS; i++)
++ vcpu_affinity[i] = -1;
++
++ if (qemu_opts_foreach(qemu_find_opts("vcpu-opts"), parse_vcpu, NULL, NULL)) {
++ exit(1);
++ }
++}
++
+ static void smp_parse(QemuOpts *opts)
+ {
+ if (opts) {
+@@ -3067,6 +3121,7 @@ int main(int argc, char **argv, char **envp)
+ qemu_add_opts(&qemu_accel_opts);
+ qemu_add_opts(&qemu_mem_opts);
+ qemu_add_opts(&qemu_smp_opts);
++ qemu_add_opts(&qemu_vcpu_opts);
+ qemu_add_opts(&qemu_boot_opts);
+ qemu_add_opts(&qemu_sandbox_opts);
+ qemu_add_opts(&qemu_add_fd_opts);
+@@ -3818,6 +3873,12 @@ int main(int argc, char **argv, char **envp)
+ exit(1);
+ }
+ break;
++ case QEMU_OPTION_vcpu:
++ if (!qemu_opts_parse_noisily(qemu_find_opts("vcpu-opts"),
++ optarg, true)) {
++ exit(1);
++ }
++ break;
+ case QEMU_OPTION_vnc:
+ vnc_parse(optarg, &error_fatal);
+ break;
+@@ -4243,6 +4304,7 @@ int main(int argc, char **argv, char **envp)
+ exit(1);
+ }
+
++ parse_vcpu_opts(machine_class);
+ /*
+ * Get the default machine options from the machine if it is not already
+ * specified either by the configuration file or by the command line.
diff --git a/qemu-ga.service b/qemu-ga.service
new file mode 100644
index 000000000000..abbb6ab9dfe2
--- /dev/null
+++ b/qemu-ga.service
@@ -0,0 +1,9 @@
+[Unit]
+Description=QEMU Guest Agent
+ConditionPathExists=/dev/virtio-ports/org.qemu.guest_agent.0
+
+[Service]
+ExecStart=/usr/bin/qemu-ga
+
+[Install]
+WantedBy=multi-user.target
diff --git a/qemu.install b/qemu.install
new file mode 100644
index 000000000000..d90269ecfecd
--- /dev/null
+++ b/qemu.install
@@ -0,0 +1,8 @@
+# Arg 1: the new package version
+post_install() {
+ # trigger events on modules files when already loaded
+ for _f in /sys/devices/virtual/misc/vhost-net; do
+ [[ -e "$_f" ]] && udevadm trigger "$_f"
+ done
+ :
+}
diff --git a/v2_qemu_zen_smt_cache.patch b/v2_qemu_zen_smt_cache.patch
new file mode 100644
index 000000000000..e5d0a6e7652c
--- /dev/null
+++ b/v2_qemu_zen_smt_cache.patch
@@ -0,0 +1,172 @@
+diff --git a/target/i386/cpu.c b/target/i386/cpu.c
+index ddc45abd70..ebf27ba7e9 100644
+--- a/target/i386/cpu.c
++++ b/target/i386/cpu.c
+@@ -113,7 +113,9 @@
+ /* L1 instruction cache: */
+ #define L1I_LINE_SIZE 64
+ #define L1I_ASSOCIATIVITY 8
++#define L1I_ASSOC_AMD_ZEN 4
+ #define L1I_SETS 64
++#define L1I_SETS_AMD_ZEN 256
+ #define L1I_PARTITIONS 1
+ /* Size = LINE_SIZE*ASSOCIATIVITY*SETS*PARTITIONS = 32KiB */
+ #define L1I_DESCRIPTOR CPUID_2_L1I_32KB_8WAY_64B
+@@ -125,7 +127,9 @@
+ /* Level 2 unified cache: */
+ #define L2_LINE_SIZE 64
+ #define L2_ASSOCIATIVITY 16
++#define L2_ASSOC_AMD_ZEN 8
+ #define L2_SETS 4096
++#define L2_SETS_AMD_ZEN 1024
+ #define L2_PARTITIONS 1
+ /* Size = LINE_SIZE*ASSOCIATIVITY*SETS*PARTITIONS = 4MiB */
+ /*FIXME: CPUID leaf 2 descriptor is inconsistent with CPUID leaf 4 */
+@@ -142,6 +146,7 @@
+ #define L3_N_LINE_SIZE 64
+ #define L3_N_ASSOCIATIVITY 16
+ #define L3_N_SETS 16384
++#define L3_N_SETS_AMD_ZEN 4096
+ #define L3_N_PARTITIONS 1
+ #define L3_N_DESCRIPTOR CPUID_2_L3_16MB_16WAY_64B
+ #define L3_N_LINES_PER_TAG 1
+@@ -3072,6 +3077,91 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
+ *edx = 0;
+ }
+ break;
++ case 0x8000001D: /* AMD TOPOEXT cache info for ZEN */
++ if (cpu->cache_info_passthrough) {
++ host_cpuid(index, count, eax, ebx, ecx, edx);
++ break;
++ } else if ((env->cpuid_version & 0xFF00F00) == 0x800F00) {
++ *eax = 0;
++ switch (count) {
++ case 0: /* L1 dcache info */
++ *eax |= CPUID_4_TYPE_DCACHE | \
++ CPUID_4_LEVEL(1) | \
++ CPUID_4_SELF_INIT_LEVEL | \
++ ((cs->nr_threads - 1) << 14);
++ *ebx = (L1D_LINE_SIZE - 1) | \
++ ((L1D_PARTITIONS - 1) << 12) | \
++ ((L1D_ASSOCIATIVITY - 1) << 22);
++ *ecx = L1D_SETS - 1;
++ *edx = 0;
++ break;
++ case 1: /* L1 icache info */
++ *eax |= CPUID_4_TYPE_ICACHE | \
++ CPUID_4_LEVEL(1) | \
++ CPUID_4_SELF_INIT_LEVEL | \
++ ((cs->nr_threads - 1) << 14);
++ *ebx = (L1I_LINE_SIZE - 1) | \
++ ((L1I_PARTITIONS - 1) << 12) | \
++ ((L1I_ASSOC_AMD_ZEN - 1) << 22);
++ *ecx = L1I_SETS_AMD_ZEN - 1;
++ *edx = 0;
++ break;
++ case 2: /* L2 cache info */
++ *eax |= CPUID_4_TYPE_UNIFIED | \
++ CPUID_4_LEVEL(2) | \
++ CPUID_4_SELF_INIT_LEVEL | \
++ ((cs->nr_threads - 1) << 14);
++ *ebx = (L2_LINE_SIZE - 1) | \
++ ((L2_PARTITIONS - 1) << 12) | \
++ ((L2_ASSOC_AMD_ZEN - 1) << 22);
++ *ecx = L2_SETS_AMD_ZEN - 1;
++ *edx = CPUID_4_INCLUSIVE;
++ break;
++ case 3: /* L3 cache info */
++ if (!cpu->enable_l3_cache) {
++ *eax = 0;
++ *ebx = 0;
++ *ecx = 0;
++ *edx = 0;
++ break;
++ }
++ *eax |= CPUID_4_TYPE_UNIFIED | \
++ CPUID_4_LEVEL(3) | \
++ CPUID_4_SELF_INIT_LEVEL | \
++ ((cs->nr_cores * cs->nr_threads - 1) << 14);
++ *ebx = (L3_N_LINE_SIZE - 1) | \
++ ((L3_N_PARTITIONS - 1) << 12) | \
++ ((L3_N_ASSOCIATIVITY - 1) << 22);
++ *ecx = L3_N_SETS_AMD_ZEN - 1;
++ *edx = CPUID_4_NO_INVD_SHARING;
++ break;
++ default: /* end of info */
++ *eax = 0;
++ *ebx = 0;
++ *ecx = 0;
++ *edx = 0;
++ break;
++ }
++ } else {
++ *eax = 0;
++ *ebx = 0;
++ *ecx = 0;
++ *edx = 0;
++ }
++ break;
++ case 0x8000001E: /* AMD TOPOEXT cpu topology info for ZEN */
++ if ((env->cpuid_version & 0xFF00F00) == 0x800F00) {
++ *eax = cpu->apic_id;
++ *ebx = (cs->nr_threads - 1) << 8 | cpu->core_id;
++ *ecx = cpu->socket_id;
++ *edx = 0;
++ } else {
++ *eax = 0;
++ *ebx = 0;
++ *ecx = 0;
++ *edx = 0;
++ }
++ break;
+ case 0xC0000000:
+ *eax = env->cpuid_xlevel2;
+ *ebx = 0;
+@@ -3742,7 +3832,7 @@ static void x86_cpu_realizefn(DeviceState *dev, Error **errp)
+ * NOTE: the following code has to follow qemu_init_vcpu(). Otherwise
+ * cs->nr_threads hasn't be populated yet and the checking is incorrect.
+ */
+- if (!IS_INTEL_CPU(env) && cs->nr_threads > 1 && !ht_warned) {
++ if (!IS_INTEL_CPU(env) && cs->nr_threads > 1 && !ht_warned && (env->cpuid_version & 0xFF00F00) != 0x800F00) {
+ error_report("AMD CPU doesn't support hyperthreading. Please configure"
+ " -smp options properly.");
+ ht_warned = true;
+diff --git a/target/i386/kvm.c b/target/i386/kvm.c
+index 6db7783edc..d6b4e1ae74 100644
+--- a/target/i386/kvm.c
++++ b/target/i386/kvm.c
+@@ -869,9 +869,31 @@ int kvm_arch_init_vcpu(CPUState *cs)
+ }
+ c = &cpuid_data.entries[cpuid_i++];
+
+- c->function = i;
+- c->flags = 0;
+- cpu_x86_cpuid(env, i, 0, &c->eax, &c->ebx, &c->ecx, &c->edx);
++ switch (i) {
++ case 0x8000001d:
++ for (j = 0; ; j++) {
++ c->function = i;
++ c->flags = KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
++ c->index = j;
++ cpu_x86_cpuid(env, i, j, &c->eax, &c->ebx, &c->ecx, &c->edx);
++
++ if (c->eax == 0) {
++ break;
++ }
++ if (cpuid_i == KVM_MAX_CPUID_ENTRIES) {
++ fprintf(stderr, "cpuid_data is full, no space for "
++ "cpuid(eax:0x%x,ecx:0x%x)\n", i, j);
++ abort();
++ }
++ c = &cpuid_data.entries[cpuid_i++];
++ }
++ break;
++ default:
++ c->function = i;
++ c->flags = 0;
++ cpu_x86_cpuid(env, i, 0, &c->eax, &c->ebx, &c->ecx, &c->edx);
++ break;
++ }
+ }
+
+ /* Call Centaur's CPUID instructions they are supported. */
diff --git a/v4_ivshmem.patch b/v4_ivshmem.patch
new file mode 100644
index 000000000000..65065b6807a2
--- /dev/null
+++ b/v4_ivshmem.patch
@@ -0,0 +1,331 @@
+As of commit 660c97eef6f8 ("ivshmem: use kvm irqfd for msi notifications"),
+QEMU crashes with:
+
+ kvm_irqchip_commit_routes: Assertion `ret == 0' failed.
+
+if the ivshmem device is configured with more vectors than what the server
+supports. This is caused by the ivshmem_vector_unmask() being called on
+vectors that have not been initialized by ivshmem_add_kvm_msi_virq().
+
+This commit fixes it by adding a simple check to the mask and unmask
+callbacks.
+
+Note that the opposite mismatch, if the server supplies more vectors than
+what the device is configured for, is already handled and leads to output
+like:
+
+ Too many eventfd received, device has 1 vectors
+
+To reproduce the assert, run:
+
+ ivshmem-server -n 0
+
+and QEMU with:
+
+ -device ivshmem-doorbell,chardev=iv
+ -chardev socket,path=/tmp/ivshmem_socket,id=iv
+
+then load the Windows driver, at the time of writing available at:
+
+https://github.com/virtio-win/kvm-guest-drivers-windows/tree/master/ivshmem
+
+The issue is believed to have been masked by other guest drivers, notably
+Linux ones, not enabling MSI-X on the device.
+
+Fixes: 660c97eef6f8 ("ivshmem: use kvm irqfd for msi notifications")
+Signed-off-by: Ladi Prosek <address@hidden>
+Reviewed-by: Marc-André Lureau <address@hidden>
+Reviewed-by: Markus Armbruster <address@hidden>
+---
+ hw/misc/ivshmem.c | 12 ++++++++++--
+ 1 file changed, 10 insertions(+), 2 deletions(-)
+
+diff --git a/hw/misc/ivshmem.c b/hw/misc/ivshmem.c
+index a5a46827fe..6e46669744 100644
+--- a/hw/misc/ivshmem.c
++++ b/hw/misc/ivshmem.c
+@@ -317,6 +317,10 @@ static int ivshmem_vector_unmask(PCIDevice *dev, unsigned vector,
+ int ret;
+
+ IVSHMEM_DPRINTF("vector unmask %p %d\n", dev, vector);
++ if (!v->pdev) {
++ error_report("ivshmem: vector %d route does not exist", vector);
++ return -EINVAL;
++ }
+
+ ret = kvm_irqchip_update_msi_route(kvm_state, v->virq, msg, dev);
+ if (ret < 0) {
+@@ -331,12 +335,16 @@ static void ivshmem_vector_mask(PCIDevice *dev, unsigned vector)
+ {
+ IVShmemState *s = IVSHMEM_COMMON(dev);
+ EventNotifier *n = &s->peers[s->vm_id].eventfds[vector];
++ MSIVector *v = &s->msi_vectors[vector];
+ int ret;
+
+ IVSHMEM_DPRINTF("vector mask %p %d\n", dev, vector);
++ if (!v->pdev) {
++ error_report("ivshmem: vector %d route does not exist", vector);
++ return;
++ }
+
+- ret = kvm_irqchip_remove_irqfd_notifier_gsi(kvm_state, n,
+- s->msi_vectors[vector].virq);
++ ret = kvm_irqchip_remove_irqfd_notifier_gsi(kvm_state, n, v->virq);
+ if (ret != 0) {
+ error_report("remove_irqfd_notifier_gsi failed");
+ }
+--
+2.13.6
+As of commit 660c97eef6f8 ("ivshmem: use kvm irqfd for msi notifications"),
+QEMU crashes with:
+
+ivshmem: msix_set_vector_notifiers failed
+msix_unset_vector_notifiers: Assertion `dev->msix_vector_use_notifier &&
+dev->msix_vector_release_notifier' failed.
+
+if MSI-X is repeatedly enabled and disabled on the ivshmem device, for example
+by loading and unloading the Windows ivshmem driver. This is because
+msix_unset_vector_notifiers() doesn't call any of the release notifier callbacks
+since MSI-X is already disabled at that point (msix_enabled() returning false
+is how this transition is detected in the first place). Thus
+ivshmem_vector_mask()
+doesn't run and when MSI-X is subsequently enabled again ivshmem_vector_unmask()
+fails.
+
+This is fixed by keeping track of unmasked vectors and making sure that
+ivshmem_vector_mask() always runs on MSI-X disable.
+
+Fixes: 660c97eef6f8 ("ivshmem: use kvm irqfd for msi notifications")
+Signed-off-by: Ladi Prosek <address@hidden>
+Reviewed-by: Markus Armbruster <address@hidden>
+---
+ hw/misc/ivshmem.c | 32 ++++++++++++++++++++++++++------
+ 1 file changed, 26 insertions(+), 6 deletions(-)
+
+diff --git a/hw/misc/ivshmem.c b/hw/misc/ivshmem.c
+index 6e46669744..91364d8364 100644
+--- a/hw/misc/ivshmem.c
++++ b/hw/misc/ivshmem.c
+@@ -77,6 +77,7 @@ typedef struct Peer {
+ typedef struct MSIVector {
+ PCIDevice *pdev;
+ int virq;
++ bool unmasked;
+ } MSIVector;
+
+ typedef struct IVShmemState {
+@@ -321,6 +322,7 @@ static int ivshmem_vector_unmask(PCIDevice *dev, unsigned vector,
+ error_report("ivshmem: vector %d route does not exist", vector);
+ return -EINVAL;
+ }
++ assert(!v->unmasked);
+
+ ret = kvm_irqchip_update_msi_route(kvm_state, v->virq, msg, dev);
+ if (ret < 0) {
+@@ -328,7 +330,13 @@ static int ivshmem_vector_unmask(PCIDevice *dev, unsigned vector,
+ }
+ kvm_irqchip_commit_routes(kvm_state);
+
+- return kvm_irqchip_add_irqfd_notifier_gsi(kvm_state, n, NULL, v->virq);
++ ret = kvm_irqchip_add_irqfd_notifier_gsi(kvm_state, n, NULL, v->virq);
++ if (ret < 0) {
++ return ret;
++ }
++ v->unmasked = true;
++
++ return 0;
+ }
+
+ static void ivshmem_vector_mask(PCIDevice *dev, unsigned vector)
+@@ -343,11 +351,14 @@ static void ivshmem_vector_mask(PCIDevice *dev, unsigned vector)
+ error_report("ivshmem: vector %d route does not exist", vector);
+ return;
+ }
++ assert(v->unmasked);
+
+ ret = kvm_irqchip_remove_irqfd_notifier_gsi(kvm_state, n, v->virq);
+- if (ret != 0) {
++ if (ret < 0) {
+ error_report("remove_irqfd_notifier_gsi failed");
++ return;
+ }
++ v->unmasked = false;
+ }
+
+ static void ivshmem_vector_poll(PCIDevice *dev,
+@@ -817,11 +828,20 @@ static void ivshmem_disable_irqfd(IVShmemState *s)
+ PCIDevice *pdev = PCI_DEVICE(s);
+ int i;
+
+- for (i = 0; i < s->peers[s->vm_id].nb_eventfds; i++) {
+- ivshmem_remove_kvm_msi_virq(s, i);
+- }
+-
+ msix_unset_vector_notifiers(pdev);
++
++ for (i = 0; i < s->peers[s->vm_id].nb_eventfds; i++) {
++ /*
++ * MSI-X is already disabled here so msix_unset_vector_notifiers()
++ * didn't call our release notifier. Do it now to keep our masks and
++ * unmasks balanced.
++ */
++ if (s->msi_vectors[i].unmasked) {
++ ivshmem_vector_mask(pdev, i);
++ }
++ ivshmem_remove_kvm_msi_virq(s, i);
++ }
++
+ }
+
+ static void ivshmem_write_config(PCIDevice *pdev, uint32_t address,
+--
+2.13.6
+Adds a rollback path to ivshmem_enable_irqfd() and fixes
+ivshmem_disable_irqfd() to bail if irqfd has not been enabled.
+
+To reproduce, run:
+
+ ivshmem-server -n 0
+
+and QEMU with:
+
+ -device ivshmem-doorbell,chardev=iv
+ -chardev socket,path=/tmp/ivshmem_socket,id=iv
+
+then load, unload, and load again the Windows driver, at the time of writing
+available at:
+
+https://github.com/virtio-win/kvm-guest-drivers-windows/tree/master/ivshmem
+
+The issue is believed to have been masked by other guest drivers, notably
+Linux ones, not enabling MSI-X on the device.
+
+Signed-off-by: Ladi Prosek <address@hidden>
+Reviewed-by: Markus Armbruster <address@hidden>
+---
+ hw/misc/ivshmem.c | 37 ++++++++++++++++++++++++-------------
+ 1 file changed, 24 insertions(+), 13 deletions(-)
+
+diff --git a/hw/misc/ivshmem.c b/hw/misc/ivshmem.c
+index 91364d8364..d1bb246d12 100644
+--- a/hw/misc/ivshmem.c
++++ b/hw/misc/ivshmem.c
+@@ -786,6 +786,20 @@ static int ivshmem_setup_interrupts(IVShmemState *s, Error **errp)
+ return 0;
+ }
+
++static void ivshmem_remove_kvm_msi_virq(IVShmemState *s, int vector)
++{
++ IVSHMEM_DPRINTF("ivshmem_remove_kvm_msi_virq vector:%d\n", vector);
++
++ if (s->msi_vectors[vector].pdev == NULL) {
++ return;
++ }
++
++ /* it was cleaned when masked in the frontend. */
++ kvm_irqchip_release_virq(kvm_state, s->msi_vectors[vector].virq);
++
++ s->msi_vectors[vector].pdev = NULL;
++}
++
+ static void ivshmem_enable_irqfd(IVShmemState *s)
+ {
+ PCIDevice *pdev = PCI_DEVICE(s);
+@@ -797,7 +811,7 @@ static void ivshmem_enable_irqfd(IVShmemState *s)
+ ivshmem_add_kvm_msi_virq(s, i, &err);
+ if (err) {
+ error_report_err(err);
+- /* TODO do we need to handle the error? */
++ goto undo;
+ }
+ }
+
+@@ -806,21 +820,14 @@ static void ivshmem_enable_irqfd(IVShmemState *s)
+ ivshmem_vector_mask,
+ ivshmem_vector_poll)) {
+ error_report("ivshmem: msix_set_vector_notifiers failed");
++ goto undo;
+ }
+-}
++ return;
+
+-static void ivshmem_remove_kvm_msi_virq(IVShmemState *s, int vector)
+-{
+- IVSHMEM_DPRINTF("ivshmem_remove_kvm_msi_virq vector:%d\n", vector);
+-
+- if (s->msi_vectors[vector].pdev == NULL) {
+- return;
++undo:
++ while (--i >= 0) {
++ ivshmem_remove_kvm_msi_virq(s, i);
+ }
+-
+- /* it was cleaned when masked in the frontend. */
+- kvm_irqchip_release_virq(kvm_state, s->msi_vectors[vector].virq);
+-
+- s->msi_vectors[vector].pdev = NULL;
+ }
+
+ static void ivshmem_disable_irqfd(IVShmemState *s)
+@@ -828,6 +835,10 @@ static void ivshmem_disable_irqfd(IVShmemState *s)
+ PCIDevice *pdev = PCI_DEVICE(s);
+ int i;
+
++ if (!pdev->msix_vector_use_notifier) {
++ return;
++ }
++
+ msix_unset_vector_notifiers(pdev);
+
+ for (i = 0; i < s->peers[s->vm_id].nb_eventfds; i++) {
+--
+2.13.6
+The effects of ivshmem_enable_irqfd() was not undone on device reset.
+
+This manifested as:
+ivshmem_add_kvm_msi_virq: Assertion `!s->msi_vectors[vector].pdev' failed.
+
+when irqfd was enabled before reset and then enabled again after reset, making
+ivshmem_enable_irqfd() run for the second time.
+
+To reproduce, run:
+
+ ivshmem-server
+
+and QEMU with:
+
+ -device ivshmem-doorbell,chardev=iv
+ -chardev socket,path=/tmp/ivshmem_socket,id=iv
+
+then install the Windows driver, at the time of writing available at:
+
+https://github.com/virtio-win/kvm-guest-drivers-windows/tree/master/ivshmem
+
+and crash-reboot the guest by inducing a BSOD.
+
+Signed-off-by: Ladi Prosek <address@hidden>
+---
+ hw/misc/ivshmem.c | 4 ++++
+ 1 file changed, 4 insertions(+)
+
+diff --git a/hw/misc/ivshmem.c b/hw/misc/ivshmem.c
+index d1bb246d12..9c7e74ef12 100644
+--- a/hw/misc/ivshmem.c
++++ b/hw/misc/ivshmem.c
+@@ -758,10 +758,14 @@ static void ivshmem_msix_vector_use(IVShmemState *s)
+ }
+ }
+
++static void ivshmem_disable_irqfd(IVShmemState *s);
++
+ static void ivshmem_reset(DeviceState *d)
+ {
+ IVShmemState *s = IVSHMEM_COMMON(d);
+
++ ivshmem_disable_irqfd(s);
++
+ s->intrstatus = 0;
+ s->intrmask = 0;
+ if (ivshmem_has_feature(s, IVSHMEM_MSI)) {
+--
+2.13.6
diff --git a/vfio-msi-1.patch b/vfio-msi-1.patch
new file mode 100644
index 000000000000..8f0688eeebe4
--- /dev/null
+++ b/vfio-msi-1.patch
@@ -0,0 +1,133 @@
+This will later be used to include list initialization
+
+Signed-off-by: Alex Williamson <address@hidden>
+---
+ hw/vfio/pci-quirks.c | 48 +++++++++++++++++++++---------------------------
+ 1 file changed, 21 insertions(+), 27 deletions(-)
+
+diff --git a/hw/vfio/pci-quirks.c b/hw/vfio/pci-quirks.c
+index e5779a7ad35b..10af23217292 100644
+--- a/hw/vfio/pci-quirks.c
++++ b/hw/vfio/pci-quirks.c
+@@ -275,6 +275,15 @@ static const MemoryRegionOps vfio_ati_3c3_quirk = {
+ .endianness = DEVICE_LITTLE_ENDIAN,
+ };
+
++static VFIOQuirk *vfio_quirk_alloc(int nr_mem)
++{
++ VFIOQuirk *quirk = g_malloc0(sizeof(*quirk));
++ quirk->mem = g_new0(MemoryRegion, nr_mem);
++ quirk->nr_mem = nr_mem;
++
++ return quirk;
++}
++
+ static void vfio_vga_probe_ati_3c3_quirk(VFIOPCIDevice *vdev)
+ {
+ VFIOQuirk *quirk;
+@@ -288,9 +297,7 @@ static void vfio_vga_probe_ati_3c3_quirk(VFIOPCIDevice *vdev)
+ return;
+ }
+
+- quirk = g_malloc0(sizeof(*quirk));
+- quirk->mem = g_new0(MemoryRegion, 1);
+- quirk->nr_mem = 1;
++ quirk = vfio_quirk_alloc(1);
+
+ memory_region_init_io(quirk->mem, OBJECT(vdev), &vfio_ati_3c3_quirk, vdev,
+ "vfio-ati-3c3-quirk", 1);
+@@ -323,9 +330,7 @@ static void vfio_probe_ati_bar4_quirk(VFIOPCIDevice *vdev, int nr)
+ return;
+ }
+
+- quirk = g_malloc0(sizeof(*quirk));
+- quirk->mem = g_new0(MemoryRegion, 2);
+- quirk->nr_mem = 2;
++ quirk = vfio_quirk_alloc(2);
+ window = quirk->data = g_malloc0(sizeof(*window) +
+ sizeof(VFIOConfigWindowMatch));
+ window->vdev = vdev;
+@@ -371,10 +376,9 @@ static void vfio_probe_ati_bar2_quirk(VFIOPCIDevice *vdev, int nr)
+ return;
+ }
+
+- quirk = g_malloc0(sizeof(*quirk));
++ quirk = vfio_quirk_alloc(1);
+ mirror = quirk->data = g_malloc0(sizeof(*mirror));
+- mirror->mem = quirk->mem = g_new0(MemoryRegion, 1);
+- quirk->nr_mem = 1;
++ mirror->mem = quirk->mem;
+ mirror->vdev = vdev;
+ mirror->offset = 0x4000;
+ mirror->bar = nr;
+@@ -548,10 +552,8 @@ static void vfio_vga_probe_nvidia_3d0_quirk(VFIOPCIDevice *vdev)
+ return;
+ }
+
+- quirk = g_malloc0(sizeof(*quirk));
++ quirk = vfio_quirk_alloc(2);
+ quirk->data = data = g_malloc0(sizeof(*data));
+- quirk->mem = g_new0(MemoryRegion, 2);
+- quirk->nr_mem = 2;
+ data->vdev = vdev;
+
+ memory_region_init_io(&quirk->mem[0], OBJECT(vdev), &vfio_nvidia_3d4_quirk,
+@@ -667,9 +669,7 @@ static void vfio_probe_nvidia_bar5_quirk(VFIOPCIDevice *vdev, int nr)
+ return;
+ }
+
+- quirk = g_malloc0(sizeof(*quirk));
+- quirk->mem = g_new0(MemoryRegion, 4);
+- quirk->nr_mem = 4;
++ quirk = vfio_quirk_alloc(4);
+ bar5 = quirk->data = g_malloc0(sizeof(*bar5) +
+ (sizeof(VFIOConfigWindowMatch) * 2));
+ window = &bar5->window;
+@@ -762,10 +762,9 @@ static void vfio_probe_nvidia_bar0_quirk(VFIOPCIDevice *vdev, int nr)
+ return;
+ }
+
+- quirk = g_malloc0(sizeof(*quirk));
++ quirk = vfio_quirk_alloc(1);
+ mirror = quirk->data = g_malloc0(sizeof(*mirror));
+- mirror->mem = quirk->mem = g_new0(MemoryRegion, 1);
+- quirk->nr_mem = 1;
++ mirror->mem = quirk->mem;
+ mirror->vdev = vdev;
+ mirror->offset = 0x88000;
+ mirror->bar = nr;
+@@ -781,10 +780,9 @@ static void vfio_probe_nvidia_bar0_quirk(VFIOPCIDevice *vdev, int nr)
+
+ /* The 0x1800 offset mirror only seems to get used by legacy VGA */
+ if (vdev->vga) {
+- quirk = g_malloc0(sizeof(*quirk));
++ quirk = vfio_quirk_alloc(1);
+ mirror = quirk->data = g_malloc0(sizeof(*mirror));
+- mirror->mem = quirk->mem = g_new0(MemoryRegion, 1);
+- quirk->nr_mem = 1;
++ mirror->mem = quirk->mem;
+ mirror->vdev = vdev;
+ mirror->offset = 0x1800;
+ mirror->bar = nr;
+@@ -945,9 +943,7 @@ static void vfio_probe_rtl8168_bar2_quirk(VFIOPCIDevice *vdev, int nr)
+ return;
+ }
+
+- quirk = g_malloc0(sizeof(*quirk));
+- quirk->mem = g_new0(MemoryRegion, 2);
+- quirk->nr_mem = 2;
++ quirk = vfio_quirk_alloc(2);
+ quirk->data = rtl = g_malloc0(sizeof(*rtl));
+ rtl->vdev = vdev;
+
+@@ -1507,9 +1503,7 @@ static void vfio_probe_igd_bar4_quirk(VFIOPCIDevice *vdev, int nr)
+ }
+
+ /* Setup our quirk to munge GTT addresses to the VM allocated buffer */
+- quirk = g_malloc0(sizeof(*quirk));
+- quirk->mem = g_new0(MemoryRegion, 2);
+- quirk->nr_mem = 2;
++ quirk = vfio_quirk_alloc(2);
+ igd = quirk->data = g_malloc0(sizeof(*igd));
+ igd->vdev = vdev;
+ igd->index = ~0;
diff --git a/vfio-msi-2.patch b/vfio-msi-2.patch
new file mode 100644
index 000000000000..69af39e83bd2
--- /dev/null
+++ b/vfio-msi-2.patch
@@ -0,0 +1,82 @@
+We might wish to handle some quirks via ioeventfds, add a list of
+ioeventfds to the quirk.
+
+Signed-off-by: Alex Williamson <address@hidden>
+---
+ hw/vfio/pci-quirks.c | 17 +++++++++++++++++
+ hw/vfio/pci.h | 11 +++++++++++
+ 2 files changed, 28 insertions(+)
+
+diff --git a/hw/vfio/pci-quirks.c b/hw/vfio/pci-quirks.c
+index 10af23217292..e4cf4ea2dd9c 100644
+--- a/hw/vfio/pci-quirks.c
++++ b/hw/vfio/pci-quirks.c
+@@ -12,6 +12,7 @@
+
+ #include "qemu/osdep.h"
+ #include "qemu/error-report.h"
++#include "qemu/main-loop.h"
+ #include "qemu/range.h"
+ #include "qapi/error.h"
+ #include "qapi/visitor.h"
+@@ -278,12 +279,24 @@ static const MemoryRegionOps vfio_ati_3c3_quirk = {
+ static VFIOQuirk *vfio_quirk_alloc(int nr_mem)
+ {
+ VFIOQuirk *quirk = g_malloc0(sizeof(*quirk));
++ QLIST_INIT(&quirk->ioeventfds);
+ quirk->mem = g_new0(MemoryRegion, nr_mem);
+ quirk->nr_mem = nr_mem;
+
+ return quirk;
+ }
+
++static void vfio_ioeventfd_exit(VFIOIOEventFD *ioeventfd)
++{
++ QLIST_REMOVE(ioeventfd, next);
++ memory_region_del_eventfd(ioeventfd->mr, ioeventfd->addr, ioeventfd->size,
++ ioeventfd->match_data, ioeventfd->data,
++ &ioeventfd->e);
++ qemu_set_fd_handler(event_notifier_get_fd(&ioeventfd->e), NULL, NULL, NULL);
++ event_notifier_cleanup(&ioeventfd->e);
++ g_free(ioeventfd);
++}
++
+ static void vfio_vga_probe_ati_3c3_quirk(VFIOPCIDevice *vdev)
+ {
+ VFIOQuirk *quirk;
+@@ -1668,6 +1681,10 @@ void vfio_bar_quirk_exit(VFIOPCIDevice *vdev, int nr)
+ int i;
+
+ QLIST_FOREACH(quirk, &bar->quirks, next) {
++ while (!QLIST_EMPTY(&quirk->ioeventfds)) {
++ vfio_ioeventfd_exit(QLIST_FIRST(&quirk->ioeventfds));
++ }
++
+ for (i = 0; i < quirk->nr_mem; i++) {
+ memory_region_del_subregion(bar->region.mem, &quirk->mem[i]);
+ }
+diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h
+index f4aa13e021fa..146065c2f715 100644
+--- a/hw/vfio/pci.h
++++ b/hw/vfio/pci.h
+@@ -24,9 +24,20 @@
+
+ struct VFIOPCIDevice;
+
++typedef struct VFIOIOEventFD {
++ QLIST_ENTRY(VFIOIOEventFD) next;
++ MemoryRegion *mr;
++ hwaddr addr;
++ unsigned size;
++ bool match_data;
++ uint64_t data;
++ EventNotifier e;
++} VFIOIOEventFD;
++
+ typedef struct VFIOQuirk {
+ QLIST_ENTRY(VFIOQuirk) next;
+ void *data;
++ QLIST_HEAD(, VFIOIOEventFD) ioeventfds;
+ int nr_mem;
+ MemoryRegion *mem;
+ } VFIOQuirk;
diff --git a/vfio-msi-3.patch b/vfio-msi-3.patch
new file mode 100644
index 000000000000..6c63a134a870
--- /dev/null
+++ b/vfio-msi-3.patch
@@ -0,0 +1,170 @@
+Record data writes that come through the NVIDIA BAR0 quirk, if we get
+enough in a row that we're only passing through, automatically enable
+an ioeventfd for it. The primary target for this is the MSI-ACK
+that NVIDIA uses to allow the MSI interrupt to re-trigger, which is a
+4-byte write, data value 0x0 to offset 0x704 into the quirk, 0x88704
+into BAR0 MMIO space. For an interrupt latency sensitive micro-
+benchmark, this takes us from 83% of performance versus disabling the
+quirk entirely (which GeForce cannot do), to to almost 90%.
+
+Signed-off-by: Alex Williamson <address@hidden>
+---
+ hw/vfio/pci-quirks.c | 89 +++++++++++++++++++++++++++++++++++++++++++++++++-
+ hw/vfio/pci.h | 2 +
+ 2 files changed, 89 insertions(+), 2 deletions(-)
+
+diff --git a/hw/vfio/pci-quirks.c b/hw/vfio/pci-quirks.c
+index e4cf4ea2dd9c..e739efe601b1 100644
+--- a/hw/vfio/pci-quirks.c
++++ b/hw/vfio/pci-quirks.c
+@@ -203,6 +203,7 @@ typedef struct VFIOConfigMirrorQuirk {
+ uint32_t offset;
+ uint8_t bar;
+ MemoryRegion *mem;
++ uint8_t data[];
+ } VFIOConfigMirrorQuirk;
+
+ static uint64_t vfio_generic_quirk_mirror_read(void *opaque,
+@@ -297,6 +298,50 @@ static void vfio_ioeventfd_exit(VFIOIOEventFD *ioeventfd)
+ g_free(ioeventfd);
+ }
+
++static void vfio_ioeventfd_handler(void *opaque)
++{
++ VFIOIOEventFD *ioeventfd = opaque;
++
++ if (event_notifier_test_and_clear(&ioeventfd->e)) {
++ vfio_region_write(ioeventfd->region, ioeventfd->region_addr,
++ ioeventfd->data, ioeventfd->size);
++ }
++}
++
++static VFIOIOEventFD *vfio_ioeventfd_init(VFIOPCIDevice *vdev,
++ MemoryRegion *mr, hwaddr addr,
++ unsigned size, uint64_t data,
++ VFIORegion *region,
++ hwaddr region_addr)
++{
++ VFIOIOEventFD *ioeventfd = g_malloc0(sizeof(*ioeventfd));
++
++ if (event_notifier_init(&ioeventfd->e, 0)) {
++ g_free(ioeventfd);
++ return NULL;
++ }
++
++ ioeventfd->mr = mr;
++ ioeventfd->addr = addr;
++ ioeventfd->size = size;
++ ioeventfd->match_data = true;
++ ioeventfd->data = data;
++ ioeventfd->region = region;
++ ioeventfd->region_addr = region_addr;
++
++ qemu_set_fd_handler(event_notifier_get_fd(&ioeventfd->e),
++ vfio_ioeventfd_handler, NULL, ioeventfd);
++ memory_region_add_eventfd(ioeventfd->mr, ioeventfd->addr,
++ ioeventfd->size, ioeventfd->match_data,
++ ioeventfd->data, &ioeventfd->e);
++
++ info_report("Enabled automatic ioeventfd acceleration for %s region %d, "
++ "offset 0x%"HWADDR_PRIx", data 0x%"PRIx64", size %u",
++ vdev->vbasedev.name, region->nr, region_addr, data, size);
++
++ return ioeventfd;
++}
++
+ static void vfio_vga_probe_ati_3c3_quirk(VFIOPCIDevice *vdev)
+ {
+ VFIOQuirk *quirk;
+@@ -732,6 +777,13 @@ static void vfio_probe_nvidia_bar5_quirk(VFIOPCIDevice *vdev, int nr)
+ trace_vfio_quirk_nvidia_bar5_probe(vdev->vbasedev.name);
+ }
+
++typedef struct LastDataSet {
++ hwaddr addr;
++ uint64_t data;
++ unsigned size;
++ int count;
++} LastDataSet;
++
+ /*
+ * Finally, BAR0 itself. We want to redirect any accesses to either
+ * 0x1800 or 0x88000 through the PCI config space access functions.
+@@ -742,6 +794,7 @@ static void vfio_nvidia_quirk_mirror_write(void *opaque, hwaddr addr,
+ VFIOConfigMirrorQuirk *mirror = opaque;
+ VFIOPCIDevice *vdev = mirror->vdev;
+ PCIDevice *pdev = &vdev->pdev;
++ LastDataSet *last = (LastDataSet *)&mirror->data;
+
+ vfio_generic_quirk_mirror_write(opaque, addr, data, size);
+
+@@ -756,6 +809,38 @@ static void vfio_nvidia_quirk_mirror_write(void *opaque, hwaddr addr,
+ addr + mirror->offset, data, size);
+ trace_vfio_quirk_nvidia_bar0_msi_ack(vdev->vbasedev.name);
+ }
++
++ /*
++ * Automatically add an ioeventfd to handle any repeated write with the
++ * same data and size above the standard PCI config space header. This is
++ * primarily expected to accelerate the MSI-ACK behavior, such as noted
++ * above. Current hardware/drivers should trigger an ioeventfd at config
++ * offset 0x704 (region offset 0x88704), with data 0x0, size 4.
++ */
++ if (addr > PCI_STD_HEADER_SIZEOF) {
++ if (addr != last->addr || data != last->data || size != last->size) {
++ last->addr = addr;
++ last->data = data;
++ last->size = size;
++ last->count = 1;
++ } else if (++last->count > 10) {
++ VFIOIOEventFD *ioeventfd;
++
++ ioeventfd = vfio_ioeventfd_init(vdev, mirror->mem, addr, size, data,
++ &vdev->bars[mirror->bar].region,
++ mirror->offset + addr);
++ if (ioeventfd) {
++ VFIOQuirk *quirk;
++
++ QLIST_FOREACH(quirk, &vdev->bars[mirror->bar].quirks, next) {
++ if (quirk->data == mirror) {
++ QLIST_INSERT_HEAD(&quirk->ioeventfds, ioeventfd, next);
++ break;
++ }
++ }
++ }
++ }
++ }
+ }
+
+ static const MemoryRegionOps vfio_nvidia_mirror_quirk = {
+@@ -776,7 +861,7 @@ static void vfio_probe_nvidia_bar0_quirk(VFIOPCIDevice *vdev, int nr)
+ }
+
+ quirk = vfio_quirk_alloc(1);
+- mirror = quirk->data = g_malloc0(sizeof(*mirror));
++ mirror = quirk->data = g_malloc0(sizeof(*mirror) + sizeof(LastDataSet));
+ mirror->mem = quirk->mem;
+ mirror->vdev = vdev;
+ mirror->offset = 0x88000;
+@@ -794,7 +879,7 @@ static void vfio_probe_nvidia_bar0_quirk(VFIOPCIDevice *vdev, int nr)
+ /* The 0x1800 offset mirror only seems to get used by legacy VGA */
+ if (vdev->vga) {
+ quirk = vfio_quirk_alloc(1);
+- mirror = quirk->data = g_malloc0(sizeof(*mirror));
++ mirror = quirk->data = g_malloc0(sizeof(*mirror) + sizeof(LastDataSet));
+ mirror->mem = quirk->mem;
+ mirror->vdev = vdev;
+ mirror->offset = 0x1800;
+diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h
+index 146065c2f715..ec53b9935725 100644
+--- a/hw/vfio/pci.h
++++ b/hw/vfio/pci.h
+@@ -32,6 +32,8 @@ typedef struct VFIOIOEventFD {
+ bool match_data;
+ uint64_t data;
+ EventNotifier e;
++ VFIORegion *region;
++ hwaddr region_addr;
+ } VFIOIOEventFD;
+
+ typedef struct VFIOQuirk {
diff --git a/vfio-msi-4.patch b/vfio-msi-4.patch
new file mode 100644
index 000000000000..98417a11613c
--- /dev/null
+++ b/vfio-msi-4.patch
@@ -0,0 +1,42 @@
+Update with proposed ioeventfd API.
+
+Signed-off-by: Alex Williamson <address@hidden>
+---
+ linux-headers/linux/vfio.h | 24 ++++++++++++++++++++++++
+ 1 file changed, 24 insertions(+)
+
+diff --git a/linux-headers/linux/vfio.h b/linux-headers/linux/vfio.h
+index 4312e961ffd3..0921994daa6d 100644
+--- a/linux-headers/linux/vfio.h
++++ b/linux-headers/linux/vfio.h
+@@ -503,6 +503,30 @@ struct vfio_pci_hot_reset {
+
+ #define VFIO_DEVICE_PCI_HOT_RESET _IO(VFIO_TYPE, VFIO_BASE + 13)
+
++/**
++ * VFIO_DEVICE_IOEVENTFD - _IOW(VFIO_TYPE, VFIO_BASE + 14,
++ * struct vfio_device_ioeventfd)
++ *
++ * Perform a write to the device at the specified device fd offset, with
++ * the specified data and width when the provided eventfd is triggered.
++ *
++ * Return: 0 on success, -errno on failure.
++ */
++struct vfio_device_ioeventfd {
++ __u32 argsz;
++ __u32 flags;
++#define VFIO_DEVICE_IOEVENTFD_8 (1 << 0) /* 1-byte write */
++#define VFIO_DEVICE_IOEVENTFD_16 (1 << 1) /* 2-byte write */
++#define VFIO_DEVICE_IOEVENTFD_32 (1 << 2) /* 4-byte write */
++#define VFIO_DEVICE_IOEVENTFD_64 (1 << 3) /* 8-byte write */
++#define VFIO_DEVICE_IOEVENTFD_SIZE_MASK (0xf)
++ __u64 offset; /* device fd offset of write */
++ __u64 data; /* data to be written */
++ __s32 fd; /* -1 for de-assignment */
++};
++
++#define VFIO_DEVICE_IOEVENTFD _IO(VFIO_TYPE, VFIO_BASE + 14)
++
+ /* -------- API for Type1 VFIO IOMMU -------- */
+
+ /**
diff --git a/vfio-msi-5.patch b/vfio-msi-5.patch
new file mode 100644
index 000000000000..9033b604a7ca
--- /dev/null
+++ b/vfio-msi-5.patch
@@ -0,0 +1,104 @@
+With vfio ioeventfd support, we can program vfio-pci to perform a
+specified BAR write when an eventfd is triggered. This allows the
+KVM ioeventfd to be wired directly to vfio-pci, entirely avoiding
+userspace handling for these events. On the same micro-benchmark
+where the ioeventfd got us to almost 90% of performance versus
+disabling the GeForce quirks, this gets us to within 95%.
+
+Signed-off-by: Alex Williamson <address@hidden>
+---
+ hw/vfio/pci-quirks.c | 42 ++++++++++++++++++++++++++++++++++++------
+ 1 file changed, 36 insertions(+), 6 deletions(-)
+
+diff --git a/hw/vfio/pci-quirks.c b/hw/vfio/pci-quirks.c
+index e739efe601b1..35a4d5197e2d 100644
+--- a/hw/vfio/pci-quirks.c
++++ b/hw/vfio/pci-quirks.c
+@@ -16,6 +16,7 @@
+ #include "qemu/range.h"
+ #include "qapi/error.h"
+ #include "qapi/visitor.h"
++#include <sys/ioctl.h>
+ #include "hw/nvram/fw_cfg.h"
+ #include "pci.h"
+ #include "trace.h"
+@@ -287,13 +288,27 @@ static VFIOQuirk *vfio_quirk_alloc(int nr_mem)
+ return quirk;
+ }
+
+-static void vfio_ioeventfd_exit(VFIOIOEventFD *ioeventfd)
++static void vfio_ioeventfd_exit(VFIOPCIDevice *vdev, VFIOIOEventFD *ioeventfd)
+ {
++ struct vfio_device_ioeventfd vfio_ioeventfd;
++
+ QLIST_REMOVE(ioeventfd, next);
++
+ memory_region_del_eventfd(ioeventfd->mr, ioeventfd->addr, ioeventfd->size,
+ ioeventfd->match_data, ioeventfd->data,
+ &ioeventfd->e);
++
+ qemu_set_fd_handler(event_notifier_get_fd(&ioeventfd->e), NULL, NULL, NULL);
++
++ vfio_ioeventfd.argsz = sizeof(vfio_ioeventfd);
++ vfio_ioeventfd.flags = ioeventfd->size;
++ vfio_ioeventfd.data = ioeventfd->data;
++ vfio_ioeventfd.offset = ioeventfd->region->fd_offset +
++ ioeventfd->region_addr;
++ vfio_ioeventfd.fd = -1;
++
++ ioctl(vdev->vbasedev.fd, VFIO_DEVICE_IOEVENTFD, &vfio_ioeventfd);
++
+ event_notifier_cleanup(&ioeventfd->e);
+ g_free(ioeventfd);
+ }
+@@ -315,6 +330,8 @@ static VFIOIOEventFD *vfio_ioeventfd_init(VFIOPCIDevice *vdev,
+ hwaddr region_addr)
+ {
+ VFIOIOEventFD *ioeventfd = g_malloc0(sizeof(*ioeventfd));
++ struct vfio_device_ioeventfd vfio_ioeventfd;
++ char vfio_enabled = '+';
+
+ if (event_notifier_init(&ioeventfd->e, 0)) {
+ g_free(ioeventfd);
+@@ -329,15 +346,28 @@ static VFIOIOEventFD *vfio_ioeventfd_init(VFIOPCIDevice *vdev,
+ ioeventfd->region = region;
+ ioeventfd->region_addr = region_addr;
+
+- qemu_set_fd_handler(event_notifier_get_fd(&ioeventfd->e),
+- vfio_ioeventfd_handler, NULL, ioeventfd);
++ vfio_ioeventfd.argsz = sizeof(vfio_ioeventfd);
++ vfio_ioeventfd.flags = ioeventfd->size;
++ vfio_ioeventfd.data = ioeventfd->data;
++ vfio_ioeventfd.offset = ioeventfd->region->fd_offset +
++ ioeventfd->region_addr;
++ vfio_ioeventfd.fd = event_notifier_get_fd(&ioeventfd->e);
++
++ if (ioctl(vdev->vbasedev.fd,
++ VFIO_DEVICE_IOEVENTFD, &vfio_ioeventfd) != 0) {
++ qemu_set_fd_handler(event_notifier_get_fd(&ioeventfd->e),
++ vfio_ioeventfd_handler, NULL, ioeventfd);
++ vfio_enabled = '-';
++ }
++
+ memory_region_add_eventfd(ioeventfd->mr, ioeventfd->addr,
+ ioeventfd->size, ioeventfd->match_data,
+ ioeventfd->data, &ioeventfd->e);
+
+ info_report("Enabled automatic ioeventfd acceleration for %s region %d, "
+- "offset 0x%"HWADDR_PRIx", data 0x%"PRIx64", size %u",
+- vdev->vbasedev.name, region->nr, region_addr, data, size);
++ "offset 0x%"HWADDR_PRIx", data 0x%"PRIx64", size %u, vfio%c",
++ vdev->vbasedev.name, region->nr, region_addr, data, size,
++ vfio_enabled);
+
+ return ioeventfd;
+ }
+@@ -1767,7 +1797,7 @@ void vfio_bar_quirk_exit(VFIOPCIDevice *vdev, int nr)
+
+ QLIST_FOREACH(quirk, &bar->quirks, next) {
+ while (!QLIST_EMPTY(&quirk->ioeventfds)) {
+- vfio_ioeventfd_exit(QLIST_FIRST(&quirk->ioeventfds));
++ vfio_ioeventfd_exit(vdev, QLIST_FIRST(&quirk->ioeventfds));
+ }
+
+ for (i = 0; i < quirk->nr_mem; i++) {