diff options
author | Ryszard Knop | 2018-02-08 18:40:13 +0100 |
---|---|---|
committer | Ryszard Knop | 2018-02-08 18:40:13 +0100 |
commit | e07b610f20ddd6680d1adaab046051370aae1857 (patch) | |
tree | 1af239031d54e6d675820e23a1d6d0c1097a8ed0 | |
download | aur-e07b610f20ddd6680d1adaab046051370aae1857.tar.gz |
Initial commit (and hopefully the only one)
-rw-r--r-- | .SRCINFO | 119 | ||||
-rw-r--r-- | 65-kvm.rules | 1 | ||||
-rw-r--r-- | PKGBUILD | 223 | ||||
-rw-r--r-- | allow_elf64.patch | 27 | ||||
-rw-r--r-- | audio-improvements.patch | 1125 | ||||
-rw-r--r-- | cpu-pinning.patch | 186 | ||||
-rw-r--r-- | qemu-ga.service | 9 | ||||
-rw-r--r-- | qemu.install | 8 | ||||
-rw-r--r-- | v2_qemu_zen_smt_cache.patch | 172 | ||||
-rw-r--r-- | v4_ivshmem.patch | 331 | ||||
-rw-r--r-- | vfio-msi-1.patch | 133 | ||||
-rw-r--r-- | vfio-msi-2.patch | 82 | ||||
-rw-r--r-- | vfio-msi-3.patch | 170 | ||||
-rw-r--r-- | vfio-msi-4.patch | 42 | ||||
-rw-r--r-- | vfio-msi-5.patch | 104 |
15 files changed, 2732 insertions, 0 deletions
diff --git a/.SRCINFO b/.SRCINFO new file mode 100644 index 000000000000..d66838720b42 --- /dev/null +++ b/.SRCINFO @@ -0,0 +1,119 @@ +pkgbase = qemu-patched-vfiomsitest + pkgdesc = A generic and open source machine emulator and virtualizer - Patched for extra functionality + pkgver = 2.11.0 + pkgrel = 3 + url = http://wiki.qemu.org/ + arch = x86_64 + license = GPL2 + license = LGPL2.1 + makedepends = spice-protocol + makedepends = python2 + makedepends = ceph + makedepends = libiscsi + makedepends = glusterfs + depends = virglrenderer + depends = sdl2 + depends = vte3 + depends = libpulse + depends = seabios + depends = gnutls + depends = libpng + depends = libaio + depends = numactl + depends = jemalloc + depends = xfsprogs + depends = libnfs + depends = lzo + depends = snappy + depends = curl + depends = vde2 + depends = libcap-ng + depends = spice + depends = libcacard + depends = usbredir + source = http://wiki.qemu.org//download/qemu-2.11.0.tar.xz + source = http://wiki.qemu.org//download/qemu-2.11.0.tar.xz.sig + source = qemu-ga.service + source = 65-kvm.rules + source = allow_elf64.patch + source = cpu-pinning.patch + source = audio-improvements.patch + source = v2_qemu_zen_smt_cache.patch + source = v4_ivshmem.patch + source = vfio-msi-1.patch + source = vfio-msi-2.patch + source = vfio-msi-3.patch + source = vfio-msi-4.patch + source = vfio-msi-5.patch + validpgpkeys = CEACC9E15534EBABB82D3FA03353C9CEF108B584 + sha256sums = c9d34a79024eae080ce3853aa9afe503824520eefb440190383003081ce7f437 + sha256sums = SKIP + sha256sums = c39bcde4a09165e64419fd2033b3532378bba84d509d39e2d51694d44c1f8d88 + sha256sums = a66f0e791b16b03b91049aac61a25950d93e962e1b2ba64a38c6ad7f609b532c + sha256sums = 13a6d9e678bdc9e1f051006cfd0555f5a80582368f54c8a1bb5a78ece3832ac4 + sha256sums = 8d4a7e35ab1a0a465f737cf60fc0392afc430e22354a40a89505f8766a3a3ee8 + sha256sums = 23338655345d0ee535f34acc124f1ddd75e5ad4483e2bd87294b7ac4fe3fa859 + sha256sums = adf3f389849e92c5ea4c4cee0abf1ac5df61a176d296e9263ac773194ba86e57 + sha256sums = 4acbbd8834dc5782feb86795748f37e1b1aa4f61b54303234ea4f13bd4c0e068 + sha256sums = 9e7b0c7e54ae0f5a9288b1e65421ecec3f369e5ac34a8660c516897419090c07 + sha256sums = f6c12889551a22a2a6d78d106022b47a652987b9b0ab3fbd7494a86400491e26 + sha256sums = bbeacc088f39d5baf42281bac1efa930a8eb3277a455e858a8edafdd021b3446 + sha256sums = e8935e9e318f4d497ecade9a4b667ed494feba6304b1b4f08f9fd18c9a53c3fa + sha256sums = 8f105c549f565f61b1e4f6e2f91109620721cc0d5fd88407aebfdba55440ddf8 + +pkgname = qemu-patched-vfiomsitest + provides = qemu-headless + provides = qemu + conflicts = qemu-headless + conflicts = qemu + replaces = qemu-kvm + +pkgname = qemu-patched-vfiomsitest-headless + pkgdesc = QEMU without GUI + depends = seabios + depends = gnutls + depends = libpng + depends = libaio + depends = numactl + depends = jemalloc + depends = xfsprogs + depends = libnfs + depends = lzo + depends = snappy + depends = curl + depends = vde2 + depends = libcap-ng + depends = spice + depends = libcacard + depends = usbredir + conflicts = qemu-headless + +pkgname = qemu-patched-vfiomsitest-block-iscsi + pkgdesc = QEMU iSCSI block module + depends = glib2 + depends = libiscsi + depends = jemalloc + provides = qemu-block-iscsi + conflicts = qemu-block-iscsi + +pkgname = qemu-patched-vfiomsitest-block-rbd + pkgdesc = QEMU RBD block module + depends = glib2 + depends = ceph + provides = qemu-block-rbd + conflicts = qemu-block-rbd + +pkgname = qemu-patched-vfiomsitest-block-gluster + pkgdesc = QEMU GlusterFS block module + depends = glib2 + depends = glusterfs + provides = qemu-block-gluster + conflicts = qemu-block-gluster + +pkgname = qemu-patched-vfiomsitest-guest-agent + pkgdesc = QEMU Guest Agent + depends = gcc-libs + depends = glib2 + provides = qemu-guest-agent + conflicts = qemu-guest-agent + diff --git a/65-kvm.rules b/65-kvm.rules new file mode 100644 index 000000000000..fbb0ebb69c9e --- /dev/null +++ b/65-kvm.rules @@ -0,0 +1 @@ +KERNEL=="vhost-net", GROUP="kvm", MODE="0660", TAG+="uaccess", OPTIONS+="static_node=vhost-net" diff --git a/PKGBUILD b/PKGBUILD new file mode 100644 index 000000000000..69561b3fd35b --- /dev/null +++ b/PKGBUILD @@ -0,0 +1,223 @@ +# Maintainer: Vaporeon <vaporeon@vaporeon.io> +# Contributor: Tobias Powalowski <tpowa@archlinux.org> +# Contributor: Sébastien "Seblu" Luttringer <seblu@seblu.net> + +pkgbase=qemu-patched-vfiomsitest +pkgname=(qemu-patched-vfiomsitest qemu-patched-vfiomsitest-headless + qemu-patched-vfiomsitest-block-{iscsi,rbd,gluster} qemu-patched-vfiomsitest-guest-agent) +_pkgname=qemu +pkgdesc="A generic and open source machine emulator and virtualizer - Patched for extra functionality" +pkgver=2.11.0 +pkgrel=3 +arch=(x86_64) +license=(GPL2 LGPL2.1) +url="http://wiki.qemu.org/" +_headlessdeps=(seabios gnutls libpng libaio numactl jemalloc xfsprogs libnfs + lzo snappy curl vde2 libcap-ng spice libcacard usbredir) +depends=(virglrenderer sdl2 vte3 libpulse "${_headlessdeps[@]}") +makedepends=(spice-protocol python2 ceph libiscsi glusterfs) +source=("$url/download/${_pkgname}-${pkgver}.tar.xz"{,.sig} + qemu-ga.service + 65-kvm.rules + allow_elf64.patch + cpu-pinning.patch + audio-improvements.patch + v2_qemu_zen_smt_cache.patch + v4_ivshmem.patch + vfio-msi-1.patch + vfio-msi-2.patch + vfio-msi-3.patch + vfio-msi-4.patch + vfio-msi-5.patch) +sha256sums=('c9d34a79024eae080ce3853aa9afe503824520eefb440190383003081ce7f437' + 'SKIP' + 'c39bcde4a09165e64419fd2033b3532378bba84d509d39e2d51694d44c1f8d88' + 'a66f0e791b16b03b91049aac61a25950d93e962e1b2ba64a38c6ad7f609b532c' + '13a6d9e678bdc9e1f051006cfd0555f5a80582368f54c8a1bb5a78ece3832ac4' + '8d4a7e35ab1a0a465f737cf60fc0392afc430e22354a40a89505f8766a3a3ee8' + '23338655345d0ee535f34acc124f1ddd75e5ad4483e2bd87294b7ac4fe3fa859' + 'adf3f389849e92c5ea4c4cee0abf1ac5df61a176d296e9263ac773194ba86e57' + '4acbbd8834dc5782feb86795748f37e1b1aa4f61b54303234ea4f13bd4c0e068' + '9e7b0c7e54ae0f5a9288b1e65421ecec3f369e5ac34a8660c516897419090c07' + 'f6c12889551a22a2a6d78d106022b47a652987b9b0ab3fbd7494a86400491e26' + 'bbeacc088f39d5baf42281bac1efa930a8eb3277a455e858a8edafdd021b3446' + 'e8935e9e318f4d497ecade9a4b667ed494feba6304b1b4f08f9fd18c9a53c3fa' + '8f105c549f565f61b1e4f6e2f91109620721cc0d5fd88407aebfdba55440ddf8') +validpgpkeys=('CEACC9E15534EBABB82D3FA03353C9CEF108B584') + +case $CARCH in + i?86) _corearch=i386 ;; + x86_64) _corearch=x86_64 ;; +esac + +prepare() { + mkdir build-{full,headless} + + cd ${_pkgname}-${pkgver} + sed -i 's/vte-2\.90/vte-2.91/g' configure + + patch -p1 < ../allow_elf64.patch + patch -p1 < ../cpu-pinning.patch + patch -p0 < ../audio-improvements.patch + patch -p1 < ../v2_qemu_zen_smt_cache.patch + patch -p1 < ../v4_ivshmem.patch + + # Experimental VFIO MSI interrupt patches, please read before using: + # https://redd.it/7vsfv7 and on the QEMU mailing list: + # https://lists.gnu.org/archive/html/qemu-devel/2018-02/msg01543.html + patch -p1 < ../vfio-msi-1.patch + patch -p1 < ../vfio-msi-2.patch + patch -p1 < ../vfio-msi-3.patch + patch -p1 < ../vfio-msi-4.patch + patch -p1 < ../vfio-msi-5.patch +} + +build() { + _build full \ + --audio-drv-list="pa alsa sdl" + + _build headless \ + --audio-drv-list= \ + --disable-bluez \ + --disable-sdl \ + --disable-gtk \ + --disable-vte \ + --disable-opengl \ + --disable-virglrenderer +} + +_build() ( + cd build-$1 + + # qemu vs. make 4 == bad + export ARFLAGS=rv + + # http://permalink.gmane.org/gmane.comp.emulators.qemu/238740 + export CFLAGS+=" -fPIC" + + ../${_pkgname}-${pkgver}/configure \ + --prefix=/usr \ + --sysconfdir=/etc \ + --localstatedir=/var \ + --libexecdir=/usr/lib/qemu \ + --python=/usr/bin/python2 \ + --smbd=/usr/bin/smbd \ + --target-list=x86_64-softmmu,x86_64-linux-user \ + --with-gtkabi=3.0 \ + --with-sdlabi=2.0 \ + --enable-modules \ + --enable-jemalloc \ + "${@:2}" + + make +) + +package_qemu-patched-vfiomsitest() { + provides=(qemu-headless qemu) + conflicts=(qemu-headless qemu) + replaces=(qemu-kvm) + + _package full +} + +package_qemu-patched-vfiomsitest-headless() { + pkgdesc="QEMU without GUI" + depends=("${_headlessdeps[@]}") + conflicts=(qemu-headless) + _package headless +} + +_package() { + optdepends+=('ovmf: Tianocore UEFI firmware for qemu' + 'samba: SMB/CIFS server support' + 'qemu-patched-vfiomsitest-block-iscsi: iSCSI block support' + 'qemu-patched-vfiomsitest-block-rbd: RBD block support' + 'qemu-patched-vfiomsitest-block-gluster: glusterfs block support') + install=qemu.install + options=(!strip) + + make -C build-$1 DESTDIR="$pkgdir" install "${@:2}" + + # systemd stuff + install -Dm644 65-kvm.rules "$pkgdir/usr/lib/udev/rules.d/65-kvm.rules" + + # remove conflicting /var/run directory + cd "$pkgdir" + rm -r var + + cd usr/lib + tidy_strip + + # bridge_helper needs suid + # https://bugs.archlinux.org/task/32565 + chmod u+s qemu/qemu-bridge-helper + + # remove split block modules + rm qemu/block-{iscsi,rbd,gluster}.so + + cd ../bin + tidy_strip + + cd ../share/qemu + for _blob in *; do + [[ -f $_blob ]] || continue + + case $_blob in + # provided by seabios package + bios.bin|acpi-dsdt.aml|bios-256k.bin|vgabios-cirrus.bin|vgabios-qxl.bin|\ + vgabios-stdvga.bin|vgabios-vmware.bin) rm "$_blob"; continue ;; + + # iPXE ROMs + efi-*|pxe-*) continue ;; + + # core blobs + kvmvapic.bin|linuxboot*|multiboot.bin|sgabios.bin|vgabios*) continue ;; + + # Trace events definitions + trace-events*) continue ;; + + # Logos + *.bmp|*.svg) continue ;; + esac + done +} + +package_qemu-patched-vfiomsitest-block-iscsi() { + pkgdesc="QEMU iSCSI block module" + depends=(glib2 libiscsi jemalloc) + conflicts=(qemu-block-iscsi) + provides=(qemu-block-iscsi) + + install -D build-full/block-iscsi.so "$pkgdir/usr/lib/qemu/block-iscsi.so" +} + +package_qemu-patched-vfiomsitest-block-rbd() { + pkgdesc="QEMU RBD block module" + depends=(glib2 ceph) + conflicts=(qemu-block-rbd) + provides=(qemu-block-rbd) + + install -D build-full/block-rbd.so "$pkgdir/usr/lib/qemu/block-rbd.so" +} + +package_qemu-patched-vfiomsitest-block-gluster() { + pkgdesc="QEMU GlusterFS block module" + depends=(glib2 glusterfs) + conflicts=(qemu-block-gluster) + provides=(qemu-block-gluster) + + install -D build-full/block-gluster.so "$pkgdir/usr/lib/qemu/block-gluster.so" +} + +package_qemu-patched-vfiomsitest-guest-agent() { + pkgdesc="QEMU Guest Agent" + depends=(gcc-libs glib2) + conflicts=(qemu-guest-agent) + provides=(qemu-guest-agent) + + install -D build-full/qemu-ga "$pkgdir/usr/bin/qemu-ga" + install -Dm644 qemu-ga.service "$pkgdir/usr/lib/systemd/system/qemu-ga.service" + install -Dm755 "$srcdir/${_pkgname}-${pkgver}/scripts/qemu-guest-agent/fsfreeze-hook" "$pkgdir/etc/qemu/fsfreeze-hook" +} + +# vim:set ts=2 sw=2 et: diff --git a/allow_elf64.patch b/allow_elf64.patch new file mode 100644 index 000000000000..07f27a038b1b --- /dev/null +++ b/allow_elf64.patch @@ -0,0 +1,27 @@ +commit 3c72765ec760a51f0e879dc792be82c93141e318 +Author: Anatol Pomozov <anatol.pomozov@gmail.com> +Date: Tue Jun 6 20:07:03 2017 -0700 + + Remove restriction that prevents bootimg elf64 images + + It is possible to create a 64 bit elf image that has valid multiboot header. + qemu should be able to boot such images. + + Signed-off-by: Anatol Pomozov <anatol.pomozov@gmail.com> + +diff --git a/hw/i386/multiboot.c b/hw/i386/multiboot.c +index 663f35a658..cf1b4f5fb3 100644 +--- a/hw/i386/multiboot.c ++++ b/hw/i386/multiboot.c +@@ -192,11 +192,6 @@ int load_multiboot(FWCfgState *fw_cfg, + int kernel_size; + fclose(f); + +- if (((struct elf64_hdr*)header)->e_machine == EM_X86_64) { +- fprintf(stderr, "Cannot load x86-64 image, give a 32bit one.\n"); +- exit(1); +- } +- + kernel_size = load_elf(kernel_filename, NULL, NULL, &elf_entry, + &elf_low, &elf_high, 0, I386_ELF_MACHINE, + 0, 0); diff --git a/audio-improvements.patch b/audio-improvements.patch new file mode 100644 index 000000000000..47ae53e8d7b1 --- /dev/null +++ b/audio-improvements.patch @@ -0,0 +1,1125 @@ +diff --git audio/audio.c audio/audio.c +index beafed209b..6f42a019b0 100644 +--- audio/audio.c ++++ audio/audio.c +@@ -2066,3 +2066,8 @@ void AUD_set_volume_in (SWVoiceIn *sw, int mute, uint8_t lvol, uint8_t rvol) + } + } + } ++ ++int64_t audio_get_timer_ticks(void) ++{ ++ return conf.period.ticks; ++} +diff --git audio/audio_int.h audio/audio_int.h +index 5bcb1c60e1..2f7fc4f8ac 100644 +--- audio/audio_int.h ++++ audio/audio_int.h +@@ -214,6 +214,8 @@ extern struct audio_driver pa_audio_driver; + extern struct audio_driver spice_audio_driver; + extern const struct mixeng_volume nominal_volume; + ++int64_t audio_get_timer_ticks(void); ++ + void audio_pcm_init_info (struct audio_pcm_info *info, struct audsettings *as); + void audio_pcm_info_clear_buf (struct audio_pcm_info *info, void *buf, int len); + +diff --git audio/paaudio.c audio/paaudio.c +index 65beb6f010..b46beeea92 100644 +--- audio/paaudio.c ++++ audio/paaudio.c +@@ -1,16 +1,22 @@ + /* public domain */ + #include "qemu/osdep.h" +-#include "qemu-common.h" ++#include "qemu/timer.h" + #include "audio.h" + + #include <pulse/pulseaudio.h> + + #define AUDIO_CAP "pulseaudio" ++#define DEBUG + #include "audio_int.h" +-#include "audio_pt_int.h" + + typedef struct { +- int samples; ++ int buffer_size_out; ++ int buffer_size_in; ++ int tlength; ++ int fragsize; ++ int maxlength_in; ++ int adjust_latency_out; ++ int adjust_latency_in; + char *server; + char *sink; + char *source; +@@ -24,28 +30,18 @@ typedef struct { + + typedef struct { + HWVoiceOut hw; +- int done; +- int live; +- int decr; +- int rpos; + pa_stream *stream; +- void *pcm_buf; +- struct audio_pt pt; + paaudio *g; ++ pa_sample_spec ss; ++ pa_buffer_attr ba; + } PAVoiceOut; + + typedef struct { + HWVoiceIn hw; +- int done; +- int dead; +- int incr; +- int wpos; + pa_stream *stream; +- void *pcm_buf; +- struct audio_pt pt; +- const void *read_data; +- size_t read_index, read_length; + paaudio *g; ++ pa_sample_spec ss; ++ pa_buffer_attr ba; + } PAVoiceIn; + + static void qpa_audio_fini(void *opaque); +@@ -109,182 +105,59 @@ static inline int PA_STREAM_IS_GOOD(pa_stream_state_t x) + } \ + } while (0); + +-static int qpa_simple_read (PAVoiceIn *p, void *data, size_t length, int *rerror) +-{ +- paaudio *g = p->g; +- +- pa_threaded_mainloop_lock (g->mainloop); +- +- CHECK_DEAD_GOTO (g, p->stream, rerror, unlock_and_fail); +- +- while (length > 0) { +- size_t l; +- +- while (!p->read_data) { +- int r; +- +- r = pa_stream_peek (p->stream, &p->read_data, &p->read_length); +- CHECK_SUCCESS_GOTO (g, rerror, r == 0, unlock_and_fail); +- +- if (!p->read_data) { +- pa_threaded_mainloop_wait (g->mainloop); +- CHECK_DEAD_GOTO (g, p->stream, rerror, unlock_and_fail); +- } else { +- p->read_index = 0; +- } +- } +- +- l = p->read_length < length ? p->read_length : length; +- memcpy (data, (const uint8_t *) p->read_data+p->read_index, l); +- +- data = (uint8_t *) data + l; +- length -= l; +- +- p->read_index += l; +- p->read_length -= l; +- +- if (!p->read_length) { +- int r; +- +- r = pa_stream_drop (p->stream); +- p->read_data = NULL; +- p->read_length = 0; +- p->read_index = 0; +- +- CHECK_SUCCESS_GOTO (g, rerror, r == 0, unlock_and_fail); +- } +- } +- +- pa_threaded_mainloop_unlock (g->mainloop); +- return 0; +- +-unlock_and_fail: +- pa_threaded_mainloop_unlock (g->mainloop); +- return -1; +-} +- +-static int qpa_simple_write (PAVoiceOut *p, const void *data, size_t length, int *rerror) ++static int qpa_run_out(HWVoiceOut *hw, int live) + { +- paaudio *g = p->g; +- +- pa_threaded_mainloop_lock (g->mainloop); +- +- CHECK_DEAD_GOTO (g, p->stream, rerror, unlock_and_fail); +- +- while (length > 0) { +- size_t l; +- int r; +- +- while (!(l = pa_stream_writable_size (p->stream))) { +- pa_threaded_mainloop_wait (g->mainloop); +- CHECK_DEAD_GOTO (g, p->stream, rerror, unlock_and_fail); +- } +- +- CHECK_SUCCESS_GOTO (g, rerror, l != (size_t) -1, unlock_and_fail); +- +- if (l > length) { +- l = length; +- } +- +- r = pa_stream_write (p->stream, data, l, NULL, 0LL, PA_SEEK_RELATIVE); +- CHECK_SUCCESS_GOTO (g, rerror, r >= 0, unlock_and_fail); +- +- data = (const uint8_t *) data + l; +- length -= l; +- } +- +- pa_threaded_mainloop_unlock (g->mainloop); +- return 0; +- +-unlock_and_fail: +- pa_threaded_mainloop_unlock (g->mainloop); +- return -1; +-} +- +-static void *qpa_thread_out (void *arg) +-{ +- PAVoiceOut *pa = arg; +- HWVoiceOut *hw = &pa->hw; +- +- if (audio_pt_lock (&pa->pt, AUDIO_FUNC)) { +- return NULL; +- } ++ PAVoiceOut *pa = (PAVoiceOut *) hw; ++ int rpos, decr, samples; ++ size_t avail_bytes, max_bytes; ++ struct st_sample *src; ++ void *pa_dst; ++ int error = 0; ++ int *rerror = &error; ++ int r; + +- for (;;) { +- int decr, to_mix, rpos; ++ decr = 0; ++ rpos = hw->rpos; + +- for (;;) { +- if (pa->done) { +- goto exit; +- } ++ pa_threaded_mainloop_lock(pa->g->mainloop); ++ CHECK_DEAD_GOTO(pa->g, pa->stream, rerror, fail); + +- if (pa->live > 0) { +- break; +- } ++ avail_bytes = (size_t) live << hw->info.shift; + +- if (audio_pt_wait (&pa->pt, AUDIO_FUNC)) { +- goto exit; +- } +- } ++ max_bytes = pa_stream_writable_size(pa->stream); ++ CHECK_SUCCESS_GOTO(pa->g, rerror, max_bytes != -1, fail); + +- decr = to_mix = audio_MIN (pa->live, pa->g->conf.samples >> 2); +- rpos = pa->rpos; ++ samples = (int)(audio_MIN(avail_bytes, max_bytes)) >> hw->info.shift; ++ while (samples) { ++ int convert_samples = audio_MIN(samples, hw->samples - rpos); ++ size_t b_wanted = (size_t) convert_samples << hw->info.shift; ++ size_t b_effective = b_wanted; + +- if (audio_pt_unlock (&pa->pt, AUDIO_FUNC)) { +- return NULL; +- } ++ r = pa_stream_begin_write(pa->stream, &pa_dst, &b_effective); ++ CHECK_SUCCESS_GOTO(pa->g, rerror, r == 0, fail); ++ CHECK_SUCCESS_GOTO(pa->g, (int *)0, b_effective == b_wanted, fail); + +- while (to_mix) { +- int error; +- int chunk = audio_MIN (to_mix, hw->samples - rpos); +- struct st_sample *src = hw->mix_buf + rpos; ++ src = hw->mix_buf + rpos; ++ hw->clip(pa_dst, src, convert_samples); + +- hw->clip (pa->pcm_buf, src, chunk); +- +- if (qpa_simple_write (pa, pa->pcm_buf, +- chunk << hw->info.shift, &error) < 0) { +- qpa_logerr (error, "pa_simple_write failed\n"); +- return NULL; +- } ++ r = pa_stream_write(pa->stream, pa_dst, b_effective, ++ NULL, 0LL, PA_SEEK_RELATIVE); ++ CHECK_SUCCESS_GOTO(pa->g, rerror, r >= 0, fail); + +- rpos = (rpos + chunk) % hw->samples; +- to_mix -= chunk; +- } +- +- if (audio_pt_lock (&pa->pt, AUDIO_FUNC)) { +- return NULL; +- } +- +- pa->rpos = rpos; +- pa->live -= decr; +- pa->decr += decr; ++ rpos = (rpos + convert_samples) % hw->samples; ++ samples -= convert_samples; ++ decr += convert_samples; + } + +- exit: +- audio_pt_unlock (&pa->pt, AUDIO_FUNC); +- return NULL; +-} +- +-static int qpa_run_out (HWVoiceOut *hw, int live) +-{ +- int decr; +- PAVoiceOut *pa = (PAVoiceOut *) hw; +- +- if (audio_pt_lock (&pa->pt, AUDIO_FUNC)) { +- return 0; +- } ++ bail: ++ pa_threaded_mainloop_unlock(pa->g->mainloop); + +- decr = audio_MIN (live, pa->decr); +- pa->decr -= decr; +- pa->live = live - decr; +- hw->rpos = pa->rpos; +- if (pa->live > 0) { +- audio_pt_unlock_and_signal (&pa->pt, AUDIO_FUNC); +- } +- else { +- audio_pt_unlock (&pa->pt, AUDIO_FUNC); +- } ++ hw->rpos = rpos; + return decr; ++ ++fail: ++ qpa_logerr(error, "qpa_run_out failed\n"); ++ goto bail; + } + + static int qpa_write (SWVoiceOut *sw, void *buf, int len) +@@ -292,92 +165,68 @@ static int qpa_write (SWVoiceOut *sw, void *buf, int len) + return audio_pcm_sw_write (sw, buf, len); + } + +-/* capture */ +-static void *qpa_thread_in (void *arg) ++static int qpa_run_in(HWVoiceIn *hw) + { +- PAVoiceIn *pa = arg; +- HWVoiceIn *hw = &pa->hw; ++ PAVoiceIn *pa = (PAVoiceIn *) hw; ++ int wpos, incr; ++ char *pa_src; ++ int error = 0; ++ int *rerror = &error; ++ int r; ++ size_t pa_avail; ++ incr = 0; ++ wpos = hw->wpos; + +- if (audio_pt_lock (&pa->pt, AUDIO_FUNC)) { +- return NULL; +- } ++ pa_threaded_mainloop_lock(pa->g->mainloop); ++ CHECK_DEAD_GOTO(pa->g, pa->stream, rerror, fail); + +- for (;;) { +- int incr, to_grab, wpos; ++ size_t bytes_wanted = ((unsigned int) ++ (hw->samples - audio_pcm_hw_get_live_in(hw)) << hw->info.shift); + +- for (;;) { +- if (pa->done) { +- goto exit; +- } ++ if (bytes_wanted == 0) { ++ /* no room */ ++ goto bail; ++ } + +- if (pa->dead > 0) { +- break; +- } ++ size_t bytes_avail = pa_stream_readable_size(pa->stream); + +- if (audio_pt_wait (&pa->pt, AUDIO_FUNC)) { +- goto exit; +- } +- } ++ if (bytes_wanted > bytes_avail) { ++ bytes_wanted = bytes_avail; ++ } + +- incr = to_grab = audio_MIN (pa->dead, pa->g->conf.samples >> 2); +- wpos = pa->wpos; ++ while (bytes_wanted) { ++ r = pa_stream_peek(pa->stream, (const void **)&pa_src, &pa_avail); ++ CHECK_SUCCESS_GOTO(pa->g, rerror, r == 0, fail); + +- if (audio_pt_unlock (&pa->pt, AUDIO_FUNC)) { +- return NULL; ++ if (pa_avail == 0 || pa_avail > bytes_wanted) { ++ break; + } + +- while (to_grab) { +- int error; +- int chunk = audio_MIN (to_grab, hw->samples - wpos); +- void *buf = advance (pa->pcm_buf, wpos); ++ bytes_wanted -= pa_avail; + +- if (qpa_simple_read (pa, buf, +- chunk << hw->info.shift, &error) < 0) { +- qpa_logerr (error, "pa_simple_read failed\n"); +- return NULL; +- } +- +- hw->conv (hw->conv_buf + wpos, buf, chunk); ++ while (pa_avail) { ++ int chunk = audio_MIN( ++ (int)(pa_avail >> hw->info.shift), hw->samples - wpos); ++ hw->conv(hw->conv_buf + wpos, pa_src, chunk); + wpos = (wpos + chunk) % hw->samples; +- to_grab -= chunk; +- } +- +- if (audio_pt_lock (&pa->pt, AUDIO_FUNC)) { +- return NULL; ++ pa_src += chunk << hw->info.shift; ++ pa_avail -= chunk << hw->info.shift; ++ incr += chunk; + } + +- pa->wpos = wpos; +- pa->dead -= incr; +- pa->incr += incr; ++ r = pa_stream_drop(pa->stream); ++ CHECK_SUCCESS_GOTO(pa->g, rerror, r == 0, fail); + } + +- exit: +- audio_pt_unlock (&pa->pt, AUDIO_FUNC); +- return NULL; +-} +- +-static int qpa_run_in (HWVoiceIn *hw) +-{ +- int live, incr, dead; +- PAVoiceIn *pa = (PAVoiceIn *) hw; +- +- if (audio_pt_lock (&pa->pt, AUDIO_FUNC)) { +- return 0; +- } ++bail: ++ pa_threaded_mainloop_unlock(pa->g->mainloop); + +- live = audio_pcm_hw_get_live_in (hw); +- dead = hw->samples - live; +- incr = audio_MIN (dead, pa->incr); +- pa->incr -= incr; +- pa->dead = dead - incr; +- hw->wpos = pa->wpos; +- if (pa->dead > 0) { +- audio_pt_unlock_and_signal (&pa->pt, AUDIO_FUNC); +- } +- else { +- audio_pt_unlock (&pa->pt, AUDIO_FUNC); +- } ++ hw->wpos = wpos; + return incr; ++ ++fail: ++ qpa_logerr(error, "qpa_run_in failed\n"); ++ goto bail; + } + + static int qpa_read (SWVoiceIn *sw, void *buf, int len) +@@ -470,13 +319,6 @@ static void stream_state_cb (pa_stream *s, void * userdata) + } + } + +-static void stream_request_cb (pa_stream *s, size_t length, void *userdata) +-{ +- paaudio *g = userdata; +- +- pa_threaded_mainloop_signal (g->mainloop, 0); +-} +- + static pa_stream *qpa_simple_new ( + paaudio *g, + const char *name, +@@ -498,23 +340,17 @@ static pa_stream *qpa_simple_new ( + } + + pa_stream_set_state_callback (stream, stream_state_cb, g); +- pa_stream_set_read_callback (stream, stream_request_cb, g); +- pa_stream_set_write_callback (stream, stream_request_cb, g); + + if (dir == PA_STREAM_PLAYBACK) { +- r = pa_stream_connect_playback (stream, dev, attr, +- PA_STREAM_INTERPOLATE_TIMING +-#ifdef PA_STREAM_ADJUST_LATENCY +- |PA_STREAM_ADJUST_LATENCY +-#endif +- |PA_STREAM_AUTO_TIMING_UPDATE, NULL, NULL); ++ r = pa_stream_connect_playback(stream, dev, attr, ++ PA_STREAM_INTERPOLATE_TIMING ++ | (g->conf.adjust_latency_out ? PA_STREAM_ADJUST_LATENCY : 0) ++ | PA_STREAM_AUTO_TIMING_UPDATE, NULL, NULL); + } else { +- r = pa_stream_connect_record (stream, dev, attr, +- PA_STREAM_INTERPOLATE_TIMING +-#ifdef PA_STREAM_ADJUST_LATENCY +- |PA_STREAM_ADJUST_LATENCY +-#endif +- |PA_STREAM_AUTO_TIMING_UPDATE); ++ r = pa_stream_connect_record(stream, dev, attr, ++ PA_STREAM_INTERPOLATE_TIMING ++ | (g->conf.adjust_latency_in ? PA_STREAM_ADJUST_LATENCY : 0) ++ | PA_STREAM_AUTO_TIMING_UPDATE); + } + + if (r < 0) { +@@ -541,165 +377,167 @@ static int qpa_init_out(HWVoiceOut *hw, struct audsettings *as, + void *drv_opaque) + { + int error; +- pa_sample_spec ss; +- pa_buffer_attr ba; + struct audsettings obt_as = *as; + PAVoiceOut *pa = (PAVoiceOut *) hw; + paaudio *g = pa->g = drv_opaque; + +- ss.format = audfmt_to_pa (as->fmt, as->endianness); +- ss.channels = as->nchannels; +- ss.rate = as->freq; +- +- /* +- * qemu audio tick runs at 100 Hz (by default), so processing +- * data chunks worth 10 ms of sound should be a good fit. +- */ +- ba.tlength = pa_usec_to_bytes (10 * 1000, &ss); +- ba.minreq = pa_usec_to_bytes (5 * 1000, &ss); +- ba.maxlength = -1; +- ba.prebuf = -1; +- +- obt_as.fmt = pa_to_audfmt (ss.format, &obt_as.endianness); +- +- pa->stream = qpa_simple_new ( +- g, +- "qemu", +- PA_STREAM_PLAYBACK, +- g->conf.sink, +- &ss, +- NULL, /* channel map */ +- &ba, /* buffering attributes */ +- &error +- ); ++ int64_t timer_tick_duration = ++ audio_MAX(audio_get_timer_ticks(), 1 * SCALE_MS); ++ int64_t frames_per_tick_x1000 = ++ ((timer_tick_duration * as->freq * 1000LL) / NANOSECONDS_PER_SECOND); ++ ++ int64_t tlength = g->conf.tlength; ++ if (tlength == 0) { ++ tlength = (frames_per_tick_x1000) / 400; ++ } ++ int64_t buflen = g->conf.buffer_size_out; ++ if (buflen == 0) { ++ buflen = frames_per_tick_x1000 / 400; ++ } ++ ++ ldebug("tick duration: %.2f ms (%.3f frames)\n", ++ ((float)timer_tick_duration) / SCALE_MS, ++ (float)frames_per_tick_x1000 / 1000.0f); ++ ++ ldebug("OUT internal buffer: %.2f ms (%"PRId64" frames)\n", ++ buflen * (1000.0f / as->freq), ++ buflen); ++ ++ ldebug("OUT tlength: %.2f ms (%"PRId64" frames)\n", ++ tlength * (1000.0f / as->freq), ++ tlength); ++ ++ ldebug("OUT adjust latency: %s\n", ++ g->conf.adjust_latency_out ? "yes" : "no"); ++ ++ pa->ss.format = audfmt_to_pa(as->fmt, as->endianness); ++ pa->ss.channels = as->nchannels; ++ pa->ss.rate = as->freq; ++ ++ pa->ba.tlength = tlength * pa_frame_size(&pa->ss); ++ pa->ba.maxlength = -1; ++ pa->ba.minreq = -1; ++ pa->ba.prebuf = -1; ++ ++ obt_as.fmt = pa_to_audfmt(pa->ss.format, &obt_as.endianness); ++ ++ pa->stream = qpa_simple_new( ++ g, ++ "qemu", ++ PA_STREAM_PLAYBACK, ++ g->conf.sink, ++ &pa->ss, ++ NULL, /* channel map */ ++ &pa->ba, /* buffering attributes */ ++ &error ++ ); + if (!pa->stream) { + qpa_logerr (error, "pa_simple_new for playback failed\n"); + goto fail1; + } + +- audio_pcm_init_info (&hw->info, &obt_as); +- hw->samples = g->conf.samples; +- pa->pcm_buf = audio_calloc (AUDIO_FUNC, hw->samples, 1 << hw->info.shift); +- pa->rpos = hw->rpos; +- if (!pa->pcm_buf) { +- dolog ("Could not allocate buffer (%d bytes)\n", +- hw->samples << hw->info.shift); +- goto fail2; +- } +- +- if (audio_pt_init (&pa->pt, qpa_thread_out, hw, AUDIO_CAP, AUDIO_FUNC)) { +- goto fail3; +- } ++ audio_pcm_init_info(&hw->info, &obt_as); ++ hw->samples = buflen; + + return 0; + +- fail3: +- g_free (pa->pcm_buf); +- pa->pcm_buf = NULL; +- fail2: +- if (pa->stream) { +- pa_stream_unref (pa->stream); +- pa->stream = NULL; +- } +- fail1: ++fail1: + return -1; + } + + static int qpa_init_in(HWVoiceIn *hw, struct audsettings *as, void *drv_opaque) + { + int error; +- pa_sample_spec ss; + struct audsettings obt_as = *as; + PAVoiceIn *pa = (PAVoiceIn *) hw; + paaudio *g = pa->g = drv_opaque; + +- ss.format = audfmt_to_pa (as->fmt, as->endianness); +- ss.channels = as->nchannels; +- ss.rate = as->freq; +- +- obt_as.fmt = pa_to_audfmt (ss.format, &obt_as.endianness); +- +- pa->stream = qpa_simple_new ( +- g, +- "qemu", +- PA_STREAM_RECORD, +- g->conf.source, +- &ss, +- NULL, /* channel map */ +- NULL, /* buffering attributes */ +- &error +- ); ++ int64_t timer_tick_duration = ++ audio_MAX(audio_get_timer_ticks(), 1 * SCALE_MS); ++ int64_t frames_per_tick_x1000 = ++ ((timer_tick_duration * as->freq * 1000LL) / NANOSECONDS_PER_SECOND); ++ ++ int64_t fragsize = g->conf.fragsize; ++ if (fragsize == 0) { ++ fragsize = frames_per_tick_x1000 / 1000; ++ } ++ int64_t buflen = g->conf.buffer_size_in; ++ if (buflen == 0) { ++ buflen = frames_per_tick_x1000 / 400; ++ } ++ int64_t maxlength = g->conf.maxlength_in; ++ if (maxlength == 0) { ++ maxlength = fragsize * 2; ++ } ++ ++ ldebug("IN internal buffer: %.2f ms (%"PRId64" frames)\n", ++ buflen * (1000.0f / as->freq), ++ buflen); ++ ++ ldebug("IN fragsize: %.2f ms (%"PRId64" frames)\n", ++ fragsize * (1000.0f / as->freq), ++ fragsize); ++ ++ ldebug("IN maxlength: %.2f ms (%"PRId64" frames)\n", ++ maxlength * (1000.0f / as->freq), ++ maxlength); ++ ++ ldebug("IN adjust latency: %s\n", ++ g->conf.adjust_latency_in ? "yes" : "no"); ++ ++ pa->ss.format = audfmt_to_pa(as->fmt, as->endianness); ++ pa->ss.channels = as->nchannels; ++ pa->ss.rate = as->freq; ++ ++ pa->ba.fragsize = fragsize * pa_frame_size(&pa->ss); ++ pa->ba.maxlength = maxlength * pa_frame_size(&pa->ss); ++ pa->ba.minreq = -1; ++ pa->ba.prebuf = -1; ++ ++ obt_as.fmt = pa_to_audfmt(pa->ss.format, &obt_as.endianness); ++ ++ pa->stream = qpa_simple_new( ++ g, ++ "qemu", ++ PA_STREAM_RECORD, ++ g->conf.source, ++ &pa->ss, ++ NULL, /* channel map */ ++ &pa->ba, /* buffering attributes */ ++ &error ++ ); + if (!pa->stream) { + qpa_logerr (error, "pa_simple_new for capture failed\n"); + goto fail1; + } + +- audio_pcm_init_info (&hw->info, &obt_as); +- hw->samples = g->conf.samples; +- pa->pcm_buf = audio_calloc (AUDIO_FUNC, hw->samples, 1 << hw->info.shift); +- pa->wpos = hw->wpos; +- if (!pa->pcm_buf) { +- dolog ("Could not allocate buffer (%d bytes)\n", +- hw->samples << hw->info.shift); +- goto fail2; +- } +- +- if (audio_pt_init (&pa->pt, qpa_thread_in, hw, AUDIO_CAP, AUDIO_FUNC)) { +- goto fail3; +- } ++ audio_pcm_init_info(&hw->info, &obt_as); ++ hw->samples = buflen; + + return 0; + +- fail3: +- g_free (pa->pcm_buf); +- pa->pcm_buf = NULL; +- fail2: +- if (pa->stream) { +- pa_stream_unref (pa->stream); +- pa->stream = NULL; +- } +- fail1: ++ fail1: + return -1; + } + + static void qpa_fini_out (HWVoiceOut *hw) + { +- void *ret; + PAVoiceOut *pa = (PAVoiceOut *) hw; + +- audio_pt_lock (&pa->pt, AUDIO_FUNC); +- pa->done = 1; +- audio_pt_unlock_and_signal (&pa->pt, AUDIO_FUNC); +- audio_pt_join (&pa->pt, &ret, AUDIO_FUNC); +- + if (pa->stream) { + pa_stream_unref (pa->stream); + pa->stream = NULL; + } +- +- audio_pt_fini (&pa->pt, AUDIO_FUNC); +- g_free (pa->pcm_buf); +- pa->pcm_buf = NULL; + } + + static void qpa_fini_in (HWVoiceIn *hw) + { +- void *ret; + PAVoiceIn *pa = (PAVoiceIn *) hw; + +- audio_pt_lock (&pa->pt, AUDIO_FUNC); +- pa->done = 1; +- audio_pt_unlock_and_signal (&pa->pt, AUDIO_FUNC); +- audio_pt_join (&pa->pt, &ret, AUDIO_FUNC); +- + if (pa->stream) { + pa_stream_unref (pa->stream); + pa->stream = NULL; + } +- +- audio_pt_fini (&pa->pt, AUDIO_FUNC); +- g_free (pa->pcm_buf); +- pa->pcm_buf = NULL; + } + + static int qpa_ctl_out (HWVoiceOut *hw, int cmd, ...) +@@ -809,7 +647,8 @@ static int qpa_ctl_in (HWVoiceIn *hw, int cmd, ...) + + /* common */ + static PAConf glob_conf = { +- .samples = 4096, ++ .adjust_latency_out = 0, ++ .adjust_latency_in = 1, + }; + + static void *qpa_audio_init (void) +@@ -897,10 +736,46 @@ static void qpa_audio_fini (void *opaque) + + struct audio_option qpa_options[] = { + { +- .name = "SAMPLES", ++ .name = "BUFFER_SIZE_OUT", ++ .tag = AUD_OPT_INT, ++ .valp = &glob_conf.buffer_size_out, ++ .descr = "internal buffer size in frames for playback device" ++ }, ++ { ++ .name = "BUFFER_SIZE_IN", ++ .tag = AUD_OPT_INT, ++ .valp = &glob_conf.buffer_size_in, ++ .descr = "internal buffer size in frames for recording device" ++ }, ++ { ++ .name = "TLENGTH", + .tag = AUD_OPT_INT, +- .valp = &glob_conf.samples, +- .descr = "buffer size in samples" ++ .valp = &glob_conf.tlength, ++ .descr = "playback buffer target length in frames" ++ }, ++ { ++ .name = "FRAGSIZE", ++ .tag = AUD_OPT_INT, ++ .valp = &glob_conf.fragsize, ++ .descr = "fragment length of recording device in frames" ++ }, ++ { ++ .name = "MAXLENGTH_IN", ++ .tag = AUD_OPT_INT, ++ .valp = &glob_conf.maxlength_in, ++ .descr = "maximum length of PA recording buffer in frames" ++ }, ++ { ++ .name = "ADJUST_LATENCY_OUT", ++ .tag = AUD_OPT_BOOL, ++ .valp = &glob_conf.adjust_latency_out, ++ .descr = "instruct PA to adjust latency for playback device" ++ }, ++ { ++ .name = "ADJUST_LATENCY_IN", ++ .tag = AUD_OPT_BOOL, ++ .valp = &glob_conf.adjust_latency_in, ++ .descr = "instruct PA to adjust latency for recording device" + }, + { + .name = "SERVER", +diff --git hw/audio/hda-codec.c hw/audio/hda-codec.c +index 5402cd196c..ab89158bfc 100644 +--- hw/audio/hda-codec.c ++++ hw/audio/hda-codec.c +@@ -18,6 +18,7 @@ + */ + + #include "qemu/osdep.h" ++#include "qemu/atomic.h" + #include "hw/hw.h" + #include "hw/pci/pci.h" + #include "intel-hda.h" +@@ -126,6 +127,11 @@ static void hda_codec_parse_fmt(uint32_t format, struct audsettings *as) + #define PARAM nomixemu + #include "hda-codec-common.h" + ++#define HDA_TIMER_TICKS (SCALE_MS) ++#define MAX_CORR (SCALE_US * 100) ++#define B_SIZE sizeof(st->buf) ++#define B_MASK (sizeof(st->buf) - 1) ++ + /* -------------------------------------------------------------------------- */ + + static const char *fmt2name[] = { +@@ -154,8 +160,13 @@ struct HDAAudioStream { + SWVoiceIn *in; + SWVoiceOut *out; + } voice; +- uint8_t buf[HDA_BUFFER_SIZE]; +- uint32_t bpos; ++ uint8_t compat_buf[HDA_BUFFER_SIZE]; ++ uint32_t compat_bpos; ++ uint8_t buf[8192]; /* size must be power of two */ ++ int64_t rpos; ++ int64_t wpos; ++ QEMUTimer *buft; ++ int64_t buft_start; + }; + + #define TYPE_HDA_AUDIO "hda-audio" +@@ -176,53 +187,146 @@ struct HDAAudioState { + bool mixer; + }; + ++static inline int64_t hda_bytes_per_second(HDAAudioStream *st) ++{ ++ return 2 * st->as.nchannels * st->as.freq; ++} ++ ++static inline void hda_timer_sync_adjust(HDAAudioStream *st, int64_t target_pos) ++{ ++ int64_t corr = ++ NANOSECONDS_PER_SECOND * target_pos / hda_bytes_per_second(st); ++ if (corr > MAX_CORR) { ++ corr = MAX_CORR; ++ } else if (corr < -MAX_CORR) { ++ corr = -MAX_CORR; ++ } ++ atomic_fetch_add(&st->buft_start, corr); ++} ++ ++static void hda_audio_input_timer(void *opaque) ++{ ++ HDAAudioStream *st = opaque; ++ ++ int64_t now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL); ++ ++ int64_t buft_start = atomic_fetch_add(&st->buft_start, 0); ++ int64_t wpos = atomic_fetch_add(&st->wpos, 0); ++ int64_t rpos = atomic_fetch_add(&st->rpos, 0); ++ ++ int64_t wanted_rpos = hda_bytes_per_second(st) * (now - buft_start) ++ / NANOSECONDS_PER_SECOND; ++ wanted_rpos &= -4; /* IMPORTANT! clip to frames */ ++ ++ if (wanted_rpos <= rpos) { ++ /* we already transmitted the data */ ++ goto out_timer; ++ } ++ ++ int64_t to_transfer = audio_MIN(wpos - rpos, wanted_rpos - rpos); ++ while (to_transfer) { ++ uint32_t start = (rpos & B_MASK); ++ uint32_t chunk = audio_MIN(B_SIZE - start, to_transfer); ++ int rc = hda_codec_xfer( ++ &st->state->hda, st->stream, false, st->buf + start, chunk); ++ if (!rc) { ++ break; ++ } ++ rpos += chunk; ++ to_transfer -= chunk; ++ atomic_fetch_add(&st->rpos, chunk); ++ } ++ ++out_timer: ++ ++ if (st->running) { ++ timer_mod_anticipate_ns(st->buft, now + HDA_TIMER_TICKS); ++ } ++} ++ + static void hda_audio_input_cb(void *opaque, int avail) + { + HDAAudioStream *st = opaque; +- int recv = 0; +- int len; +- bool rc; +- +- while (avail - recv >= sizeof(st->buf)) { +- if (st->bpos != sizeof(st->buf)) { +- len = AUD_read(st->voice.in, st->buf + st->bpos, +- sizeof(st->buf) - st->bpos); +- st->bpos += len; +- recv += len; +- if (st->bpos != sizeof(st->buf)) { +- break; +- } ++ ++ int64_t wpos = atomic_fetch_add(&st->wpos, 0); ++ int64_t rpos = atomic_fetch_add(&st->rpos, 0); ++ ++ int64_t to_transfer = audio_MIN(B_SIZE - (wpos - rpos), avail); ++ ++ hda_timer_sync_adjust(st, -((wpos - rpos) + to_transfer - (B_SIZE >> 1))); ++ ++ while (to_transfer) { ++ uint32_t start = (uint32_t) (wpos & B_MASK); ++ uint32_t chunk = (uint32_t) audio_MIN(B_SIZE - start, to_transfer); ++ uint32_t read = AUD_read(st->voice.in, st->buf + start, chunk); ++ wpos += read; ++ to_transfer -= read; ++ atomic_fetch_add(&st->wpos, read); ++ if (chunk != read) { ++ break; + } +- rc = hda_codec_xfer(&st->state->hda, st->stream, false, +- st->buf, sizeof(st->buf)); ++ } ++} ++ ++static void hda_audio_output_timer(void *opaque) ++{ ++ HDAAudioStream *st = opaque; ++ ++ int64_t now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL); ++ ++ int64_t buft_start = atomic_fetch_add(&st->buft_start, 0); ++ int64_t wpos = atomic_fetch_add(&st->wpos, 0); ++ int64_t rpos = atomic_fetch_add(&st->rpos, 0); ++ ++ int64_t wanted_wpos = hda_bytes_per_second(st) * (now - buft_start) ++ / NANOSECONDS_PER_SECOND; ++ wanted_wpos &= -4; /* IMPORTANT! clip to frames */ ++ ++ if (wanted_wpos <= wpos) { ++ /* we already received the data */ ++ goto out_timer; ++ } ++ ++ int64_t to_transfer = audio_MIN(B_SIZE - (wpos - rpos), wanted_wpos - wpos); ++ while (to_transfer) { ++ uint32_t start = (wpos & B_MASK); ++ uint32_t chunk = audio_MIN(B_SIZE - start, to_transfer); ++ int rc = hda_codec_xfer( ++ &st->state->hda, st->stream, true, st->buf + start, chunk); + if (!rc) { + break; + } +- st->bpos = 0; ++ wpos += chunk; ++ to_transfer -= chunk; ++ atomic_fetch_add(&st->wpos, chunk); ++ } ++ ++out_timer: ++ ++ if (st->running) { ++ timer_mod_anticipate_ns(st->buft, now + HDA_TIMER_TICKS); + } + } + + static void hda_audio_output_cb(void *opaque, int avail) + { + HDAAudioStream *st = opaque; +- int sent = 0; +- int len; +- bool rc; +- +- while (avail - sent >= sizeof(st->buf)) { +- if (st->bpos == sizeof(st->buf)) { +- rc = hda_codec_xfer(&st->state->hda, st->stream, true, +- st->buf, sizeof(st->buf)); +- if (!rc) { +- break; +- } +- st->bpos = 0; +- } +- len = AUD_write(st->voice.out, st->buf + st->bpos, +- sizeof(st->buf) - st->bpos); +- st->bpos += len; +- sent += len; +- if (st->bpos != sizeof(st->buf)) { ++ ++ int64_t wpos = atomic_fetch_add(&st->wpos, 0); ++ int64_t rpos = atomic_fetch_add(&st->rpos, 0); ++ ++ int64_t to_transfer = audio_MIN(wpos - rpos, avail); ++ ++ hda_timer_sync_adjust(st, (wpos - rpos) - to_transfer - (B_SIZE >> 1)); ++ ++ while (to_transfer) { ++ uint32_t start = (uint32_t) (rpos & B_MASK); ++ uint32_t chunk = (uint32_t) audio_MIN(B_SIZE - start, to_transfer); ++ uint32_t written = AUD_write(st->voice.out, st->buf + start, chunk); ++ rpos += written; ++ to_transfer -= written; ++ atomic_fetch_add(&st->rpos, written); ++ if (chunk != written) { + break; + } + } +@@ -239,6 +343,15 @@ static void hda_audio_set_running(HDAAudioStream *st, bool running) + st->running = running; + dprint(st->state, 1, "%s: %s (stream %d)\n", st->node->name, + st->running ? "on" : "off", st->stream); ++ if (running) { ++ int64_t now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL); ++ st->rpos = 0; ++ st->wpos = 0; ++ st->buft_start = now; ++ timer_mod_anticipate_ns(st->buft, now + HDA_TIMER_TICKS); ++ } else { ++ timer_del(st->buft); ++ } + if (st->output) { + AUD_set_active_out(st->voice.out, st->running); + } else { +@@ -286,10 +399,12 @@ static void hda_audio_setup(HDAAudioStream *st) + st->voice.out = AUD_open_out(&st->state->card, st->voice.out, + st->node->name, st, + hda_audio_output_cb, &st->as); ++ st->buft = timer_new_ns(QEMU_CLOCK_VIRTUAL, hda_audio_output_timer, st); + } else { + st->voice.in = AUD_open_in(&st->state->card, st->voice.in, + st->node->name, st, + hda_audio_input_cb, &st->as); ++ st->buft = timer_new_ns(QEMU_CLOCK_VIRTUAL, hda_audio_input_timer, st); + } + } + +@@ -505,7 +620,6 @@ static int hda_audio_init(HDACodecDevice *hda, const struct desc_codec *desc) + /* unmute output by default */ + st->gain_left = QEMU_HDA_AMP_STEPS; + st->gain_right = QEMU_HDA_AMP_STEPS; +- st->bpos = sizeof(st->buf); + st->output = true; + } else { + st->output = false; +@@ -532,6 +646,7 @@ static void hda_audio_exit(HDACodecDevice *hda) + if (st->node == NULL) { + continue; + } ++ timer_del(st->buft); + if (st->output) { + AUD_close_out(&a->card, st->voice.out); + } else { +@@ -592,8 +707,8 @@ static const VMStateDescription vmstate_hda_audio_stream = { + VMSTATE_UINT32(gain_right, HDAAudioStream), + VMSTATE_BOOL(mute_left, HDAAudioStream), + VMSTATE_BOOL(mute_right, HDAAudioStream), +- VMSTATE_UINT32(bpos, HDAAudioStream), +- VMSTATE_BUFFER(buf, HDAAudioStream), ++ VMSTATE_UINT32(compat_bpos, HDAAudioStream), ++ VMSTATE_BUFFER(compat_buf, HDAAudioStream), + VMSTATE_END_OF_LIST() + } + }; +diff --git hw/audio/intel-hda.c hw/audio/intel-hda.c +index 18a50a8f83..721eba792d 100644 +--- hw/audio/intel-hda.c ++++ hw/audio/intel-hda.c +@@ -407,13 +407,6 @@ static bool intel_hda_xfer(HDACodecDevice *dev, uint32_t stnr, bool output, + if (st->bpl == NULL) { + return false; + } +- if (st->ctl & (1 << 26)) { +- /* +- * Wait with the next DMA xfer until the guest +- * has acked the buffer completion interrupt +- */ +- return false; +- } + + left = len; + s = st->bentries; diff --git a/cpu-pinning.patch b/cpu-pinning.patch new file mode 100644 index 000000000000..46efee5d88a5 --- /dev/null +++ b/cpu-pinning.patch @@ -0,0 +1,186 @@ +From e392e5516e6ae66db0f05775a22c0abf39f033f0 Mon Sep 17 00:00:00 2001 +From: Saverio Miroddi <saverio.pub2@gmail.com> +Date: Tue, 31 Oct 2017 20:59:05 +0100 +Subject: [PATCH] Current pinning patch + +Changes 2017/10/31: + +- Fix: the MAX_VCPUS was arbitrary; it's now set to CPU_SETSIZE +- Fix: the allowed vcpus were equated to the cores number, without accounting sockets and threads +- Change: removed all the debug information, and a now unneded warning +- Change: cleaned spacing +--- + cpus.c | 12 +++++++++++ + qemu-options.hx | 10 ++++++++++ + vl.c | 62 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + 3 files changed, 84 insertions(+) + +diff --git a/cpus.c b/cpus.c +index 9bed61eefcc..7437e3a00c7 100644 +--- a/cpus.c ++++ b/cpus.c +@@ -55,6 +55,9 @@ + #ifdef CONFIG_LINUX + + #include <sys/prctl.h> ++#include <unistd.h> ++#include <stdint.h> ++#include <inttypes.h> + + #ifndef PR_MCE_KILL + #define PR_MCE_KILL 33 +@@ -1722,9 +1725,11 @@ static void qemu_hax_start_vcpu(CPUState *cpu) + } + } + ++extern int vcpu_affinity[]; + static void qemu_kvm_start_vcpu(CPUState *cpu) + { + char thread_name[VCPU_THREAD_NAME_SIZE]; ++ cpu_set_t cpuset; + + cpu->thread = g_malloc0(sizeof(QemuThread)); + cpu->halt_cond = g_malloc0(sizeof(QemuCond)); +@@ -1733,6 +1738,13 @@ static void qemu_kvm_start_vcpu(CPUState *cpu) + cpu->cpu_index); + qemu_thread_create(cpu->thread, thread_name, qemu_kvm_cpu_thread_fn, + cpu, QEMU_THREAD_JOINABLE); ++ ++ if (vcpu_affinity[cpu->cpu_index] != -1) { ++ CPU_ZERO(&cpuset); ++ CPU_SET(vcpu_affinity[cpu->cpu_index], &cpuset); ++ pthread_setaffinity_np((cpu->thread)->thread, sizeof(cpu_set_t), &cpuset); ++ } ++ + while (!cpu->created) { + qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex); + } +diff --git a/qemu-options.hx b/qemu-options.hx +index 9f6e2adfffb..1d38fc86c81 100644 +--- a/qemu-options.hx ++++ b/qemu-options.hx +@@ -160,6 +160,16 @@ given, the total number of CPUs @var{n} can be omitted. @var{maxcpus} + specifies the maximum number of hotpluggable CPUs. + ETEXI + ++DEF("vcpu", HAS_ARG, QEMU_OPTION_vcpu, ++ "-vcpu [vcpunum=]n[,affinity=affinity]\n" ++ "-vcpu [vcpunum=]n[,affinity=affinity]\n", QEMU_ARCH_ALL) ++STEXI ++@item -vcpu [vcpunum=]@var{n}[,affinity=@var{affinity}] ++@itemx -vcpu [vcpunum=]@var{n}[,affinity=@var{affinity}] ++@findex -vcpu ++VCPU Affinity. If specified, specify for all the CPUs. ++ETEXI ++ + DEF("numa", HAS_ARG, QEMU_OPTION_numa, + "-numa node[,mem=size][,cpus=firstcpu[-lastcpu]][,nodeid=node]\n" + "-numa node[,memdev=id][,cpus=firstcpu[-lastcpu]][,nodeid=node]\n" +diff --git a/vl.c b/vl.c +index d63269332fe..754a03c9a5f 100644 +--- a/vl.c ++++ b/vl.c +@@ -135,6 +135,7 @@ int main(int argc, char **argv) + #define MAX_VIRTIO_CONSOLES 1 + #define MAX_SCLP_CONSOLES 1 + ++#define MAX_VCPUS CPU_SETSIZE + static const char *data_dir[16]; + static int data_dir_idx; + const char *bios_name = NULL; +@@ -167,6 +168,8 @@ int smp_cpus = 1; + int max_cpus = 1; + int smp_cores = 1; + int smp_threads = 1; ++int vcpu_affinity[MAX_VCPUS]; ++int num_affinity = 0; + int acpi_enabled = 1; + int no_hpet = 0; + int fd_bootchk = 1; +@@ -1212,6 +1215,57 @@ static QemuOptsList qemu_smp_opts = { + }, + }; + ++static QemuOptsList qemu_vcpu_opts = { ++ .name = "vcpu-opts", ++ .implied_opt_name = "vcpunum", ++ .head = QTAILQ_HEAD_INITIALIZER(qemu_vcpu_opts.head), ++ .desc = { ++ { ++ .name = "vcpunum", ++ .type = QEMU_OPT_NUMBER, ++ }, { ++ .name = "affinity", ++ .type = QEMU_OPT_NUMBER, ++ }, ++ { /*End of list */ } ++ }, ++}; ++ ++static int parse_vcpu(void *opaque, QemuOpts *opts, Error **errp) ++{ ++ if (opts) { ++ unsigned vcpu = qemu_opt_get_number(opts, "vcpunum", 0); ++ unsigned affinity = qemu_opt_get_number(opts,"affinity", 0); ++ ++ if (vcpu < smp_cpus * smp_cores * smp_threads) { ++ if (vcpu_affinity[vcpu] == -1) { ++ vcpu_affinity[vcpu] = affinity; ++ } ++ else { ++ error_report("Duplicate affinity statement for vcpu %d\n", vcpu); ++ return -1; ++ } ++ num_affinity += 1; ++ } ++ else { ++ error_report("VCPU %d is more than allowed %d VCPUs in the system\n", vcpu, smp_cores); ++ return -1; ++ } ++ } ++ return 0; ++} ++ ++static void parse_vcpu_opts(MachineClass *mc) ++{ ++ int i; ++ for (i = 0; i < MAX_VCPUS; i++) ++ vcpu_affinity[i] = -1; ++ ++ if (qemu_opts_foreach(qemu_find_opts("vcpu-opts"), parse_vcpu, NULL, NULL)) { ++ exit(1); ++ } ++} ++ + static void smp_parse(QemuOpts *opts) + { + if (opts) { +@@ -3067,6 +3121,7 @@ int main(int argc, char **argv, char **envp) + qemu_add_opts(&qemu_accel_opts); + qemu_add_opts(&qemu_mem_opts); + qemu_add_opts(&qemu_smp_opts); ++ qemu_add_opts(&qemu_vcpu_opts); + qemu_add_opts(&qemu_boot_opts); + qemu_add_opts(&qemu_sandbox_opts); + qemu_add_opts(&qemu_add_fd_opts); +@@ -3818,6 +3873,12 @@ int main(int argc, char **argv, char **envp) + exit(1); + } + break; ++ case QEMU_OPTION_vcpu: ++ if (!qemu_opts_parse_noisily(qemu_find_opts("vcpu-opts"), ++ optarg, true)) { ++ exit(1); ++ } ++ break; + case QEMU_OPTION_vnc: + vnc_parse(optarg, &error_fatal); + break; +@@ -4243,6 +4304,7 @@ int main(int argc, char **argv, char **envp) + exit(1); + } + ++ parse_vcpu_opts(machine_class); + /* + * Get the default machine options from the machine if it is not already + * specified either by the configuration file or by the command line. diff --git a/qemu-ga.service b/qemu-ga.service new file mode 100644 index 000000000000..abbb6ab9dfe2 --- /dev/null +++ b/qemu-ga.service @@ -0,0 +1,9 @@ +[Unit] +Description=QEMU Guest Agent +ConditionPathExists=/dev/virtio-ports/org.qemu.guest_agent.0 + +[Service] +ExecStart=/usr/bin/qemu-ga + +[Install] +WantedBy=multi-user.target diff --git a/qemu.install b/qemu.install new file mode 100644 index 000000000000..d90269ecfecd --- /dev/null +++ b/qemu.install @@ -0,0 +1,8 @@ +# Arg 1: the new package version +post_install() { + # trigger events on modules files when already loaded + for _f in /sys/devices/virtual/misc/vhost-net; do + [[ -e "$_f" ]] && udevadm trigger "$_f" + done + : +} diff --git a/v2_qemu_zen_smt_cache.patch b/v2_qemu_zen_smt_cache.patch new file mode 100644 index 000000000000..e5d0a6e7652c --- /dev/null +++ b/v2_qemu_zen_smt_cache.patch @@ -0,0 +1,172 @@ +diff --git a/target/i386/cpu.c b/target/i386/cpu.c +index ddc45abd70..ebf27ba7e9 100644 +--- a/target/i386/cpu.c ++++ b/target/i386/cpu.c +@@ -113,7 +113,9 @@ + /* L1 instruction cache: */ + #define L1I_LINE_SIZE 64 + #define L1I_ASSOCIATIVITY 8 ++#define L1I_ASSOC_AMD_ZEN 4 + #define L1I_SETS 64 ++#define L1I_SETS_AMD_ZEN 256 + #define L1I_PARTITIONS 1 + /* Size = LINE_SIZE*ASSOCIATIVITY*SETS*PARTITIONS = 32KiB */ + #define L1I_DESCRIPTOR CPUID_2_L1I_32KB_8WAY_64B +@@ -125,7 +127,9 @@ + /* Level 2 unified cache: */ + #define L2_LINE_SIZE 64 + #define L2_ASSOCIATIVITY 16 ++#define L2_ASSOC_AMD_ZEN 8 + #define L2_SETS 4096 ++#define L2_SETS_AMD_ZEN 1024 + #define L2_PARTITIONS 1 + /* Size = LINE_SIZE*ASSOCIATIVITY*SETS*PARTITIONS = 4MiB */ + /*FIXME: CPUID leaf 2 descriptor is inconsistent with CPUID leaf 4 */ +@@ -142,6 +146,7 @@ + #define L3_N_LINE_SIZE 64 + #define L3_N_ASSOCIATIVITY 16 + #define L3_N_SETS 16384 ++#define L3_N_SETS_AMD_ZEN 4096 + #define L3_N_PARTITIONS 1 + #define L3_N_DESCRIPTOR CPUID_2_L3_16MB_16WAY_64B + #define L3_N_LINES_PER_TAG 1 +@@ -3072,6 +3077,91 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, + *edx = 0; + } + break; ++ case 0x8000001D: /* AMD TOPOEXT cache info for ZEN */ ++ if (cpu->cache_info_passthrough) { ++ host_cpuid(index, count, eax, ebx, ecx, edx); ++ break; ++ } else if ((env->cpuid_version & 0xFF00F00) == 0x800F00) { ++ *eax = 0; ++ switch (count) { ++ case 0: /* L1 dcache info */ ++ *eax |= CPUID_4_TYPE_DCACHE | \ ++ CPUID_4_LEVEL(1) | \ ++ CPUID_4_SELF_INIT_LEVEL | \ ++ ((cs->nr_threads - 1) << 14); ++ *ebx = (L1D_LINE_SIZE - 1) | \ ++ ((L1D_PARTITIONS - 1) << 12) | \ ++ ((L1D_ASSOCIATIVITY - 1) << 22); ++ *ecx = L1D_SETS - 1; ++ *edx = 0; ++ break; ++ case 1: /* L1 icache info */ ++ *eax |= CPUID_4_TYPE_ICACHE | \ ++ CPUID_4_LEVEL(1) | \ ++ CPUID_4_SELF_INIT_LEVEL | \ ++ ((cs->nr_threads - 1) << 14); ++ *ebx = (L1I_LINE_SIZE - 1) | \ ++ ((L1I_PARTITIONS - 1) << 12) | \ ++ ((L1I_ASSOC_AMD_ZEN - 1) << 22); ++ *ecx = L1I_SETS_AMD_ZEN - 1; ++ *edx = 0; ++ break; ++ case 2: /* L2 cache info */ ++ *eax |= CPUID_4_TYPE_UNIFIED | \ ++ CPUID_4_LEVEL(2) | \ ++ CPUID_4_SELF_INIT_LEVEL | \ ++ ((cs->nr_threads - 1) << 14); ++ *ebx = (L2_LINE_SIZE - 1) | \ ++ ((L2_PARTITIONS - 1) << 12) | \ ++ ((L2_ASSOC_AMD_ZEN - 1) << 22); ++ *ecx = L2_SETS_AMD_ZEN - 1; ++ *edx = CPUID_4_INCLUSIVE; ++ break; ++ case 3: /* L3 cache info */ ++ if (!cpu->enable_l3_cache) { ++ *eax = 0; ++ *ebx = 0; ++ *ecx = 0; ++ *edx = 0; ++ break; ++ } ++ *eax |= CPUID_4_TYPE_UNIFIED | \ ++ CPUID_4_LEVEL(3) | \ ++ CPUID_4_SELF_INIT_LEVEL | \ ++ ((cs->nr_cores * cs->nr_threads - 1) << 14); ++ *ebx = (L3_N_LINE_SIZE - 1) | \ ++ ((L3_N_PARTITIONS - 1) << 12) | \ ++ ((L3_N_ASSOCIATIVITY - 1) << 22); ++ *ecx = L3_N_SETS_AMD_ZEN - 1; ++ *edx = CPUID_4_NO_INVD_SHARING; ++ break; ++ default: /* end of info */ ++ *eax = 0; ++ *ebx = 0; ++ *ecx = 0; ++ *edx = 0; ++ break; ++ } ++ } else { ++ *eax = 0; ++ *ebx = 0; ++ *ecx = 0; ++ *edx = 0; ++ } ++ break; ++ case 0x8000001E: /* AMD TOPOEXT cpu topology info for ZEN */ ++ if ((env->cpuid_version & 0xFF00F00) == 0x800F00) { ++ *eax = cpu->apic_id; ++ *ebx = (cs->nr_threads - 1) << 8 | cpu->core_id; ++ *ecx = cpu->socket_id; ++ *edx = 0; ++ } else { ++ *eax = 0; ++ *ebx = 0; ++ *ecx = 0; ++ *edx = 0; ++ } ++ break; + case 0xC0000000: + *eax = env->cpuid_xlevel2; + *ebx = 0; +@@ -3742,7 +3832,7 @@ static void x86_cpu_realizefn(DeviceState *dev, Error **errp) + * NOTE: the following code has to follow qemu_init_vcpu(). Otherwise + * cs->nr_threads hasn't be populated yet and the checking is incorrect. + */ +- if (!IS_INTEL_CPU(env) && cs->nr_threads > 1 && !ht_warned) { ++ if (!IS_INTEL_CPU(env) && cs->nr_threads > 1 && !ht_warned && (env->cpuid_version & 0xFF00F00) != 0x800F00) { + error_report("AMD CPU doesn't support hyperthreading. Please configure" + " -smp options properly."); + ht_warned = true; +diff --git a/target/i386/kvm.c b/target/i386/kvm.c +index 6db7783edc..d6b4e1ae74 100644 +--- a/target/i386/kvm.c ++++ b/target/i386/kvm.c +@@ -869,9 +869,31 @@ int kvm_arch_init_vcpu(CPUState *cs) + } + c = &cpuid_data.entries[cpuid_i++]; + +- c->function = i; +- c->flags = 0; +- cpu_x86_cpuid(env, i, 0, &c->eax, &c->ebx, &c->ecx, &c->edx); ++ switch (i) { ++ case 0x8000001d: ++ for (j = 0; ; j++) { ++ c->function = i; ++ c->flags = KVM_CPUID_FLAG_SIGNIFCANT_INDEX; ++ c->index = j; ++ cpu_x86_cpuid(env, i, j, &c->eax, &c->ebx, &c->ecx, &c->edx); ++ ++ if (c->eax == 0) { ++ break; ++ } ++ if (cpuid_i == KVM_MAX_CPUID_ENTRIES) { ++ fprintf(stderr, "cpuid_data is full, no space for " ++ "cpuid(eax:0x%x,ecx:0x%x)\n", i, j); ++ abort(); ++ } ++ c = &cpuid_data.entries[cpuid_i++]; ++ } ++ break; ++ default: ++ c->function = i; ++ c->flags = 0; ++ cpu_x86_cpuid(env, i, 0, &c->eax, &c->ebx, &c->ecx, &c->edx); ++ break; ++ } + } + + /* Call Centaur's CPUID instructions they are supported. */ diff --git a/v4_ivshmem.patch b/v4_ivshmem.patch new file mode 100644 index 000000000000..65065b6807a2 --- /dev/null +++ b/v4_ivshmem.patch @@ -0,0 +1,331 @@ +As of commit 660c97eef6f8 ("ivshmem: use kvm irqfd for msi notifications"), +QEMU crashes with: + + kvm_irqchip_commit_routes: Assertion `ret == 0' failed. + +if the ivshmem device is configured with more vectors than what the server +supports. This is caused by the ivshmem_vector_unmask() being called on +vectors that have not been initialized by ivshmem_add_kvm_msi_virq(). + +This commit fixes it by adding a simple check to the mask and unmask +callbacks. + +Note that the opposite mismatch, if the server supplies more vectors than +what the device is configured for, is already handled and leads to output +like: + + Too many eventfd received, device has 1 vectors + +To reproduce the assert, run: + + ivshmem-server -n 0 + +and QEMU with: + + -device ivshmem-doorbell,chardev=iv + -chardev socket,path=/tmp/ivshmem_socket,id=iv + +then load the Windows driver, at the time of writing available at: + +https://github.com/virtio-win/kvm-guest-drivers-windows/tree/master/ivshmem + +The issue is believed to have been masked by other guest drivers, notably +Linux ones, not enabling MSI-X on the device. + +Fixes: 660c97eef6f8 ("ivshmem: use kvm irqfd for msi notifications") +Signed-off-by: Ladi Prosek <address@hidden> +Reviewed-by: Marc-André Lureau <address@hidden> +Reviewed-by: Markus Armbruster <address@hidden> +--- + hw/misc/ivshmem.c | 12 ++++++++++-- + 1 file changed, 10 insertions(+), 2 deletions(-) + +diff --git a/hw/misc/ivshmem.c b/hw/misc/ivshmem.c +index a5a46827fe..6e46669744 100644 +--- a/hw/misc/ivshmem.c ++++ b/hw/misc/ivshmem.c +@@ -317,6 +317,10 @@ static int ivshmem_vector_unmask(PCIDevice *dev, unsigned vector, + int ret; + + IVSHMEM_DPRINTF("vector unmask %p %d\n", dev, vector); ++ if (!v->pdev) { ++ error_report("ivshmem: vector %d route does not exist", vector); ++ return -EINVAL; ++ } + + ret = kvm_irqchip_update_msi_route(kvm_state, v->virq, msg, dev); + if (ret < 0) { +@@ -331,12 +335,16 @@ static void ivshmem_vector_mask(PCIDevice *dev, unsigned vector) + { + IVShmemState *s = IVSHMEM_COMMON(dev); + EventNotifier *n = &s->peers[s->vm_id].eventfds[vector]; ++ MSIVector *v = &s->msi_vectors[vector]; + int ret; + + IVSHMEM_DPRINTF("vector mask %p %d\n", dev, vector); ++ if (!v->pdev) { ++ error_report("ivshmem: vector %d route does not exist", vector); ++ return; ++ } + +- ret = kvm_irqchip_remove_irqfd_notifier_gsi(kvm_state, n, +- s->msi_vectors[vector].virq); ++ ret = kvm_irqchip_remove_irqfd_notifier_gsi(kvm_state, n, v->virq); + if (ret != 0) { + error_report("remove_irqfd_notifier_gsi failed"); + } +-- +2.13.6 +As of commit 660c97eef6f8 ("ivshmem: use kvm irqfd for msi notifications"), +QEMU crashes with: + +ivshmem: msix_set_vector_notifiers failed +msix_unset_vector_notifiers: Assertion `dev->msix_vector_use_notifier && +dev->msix_vector_release_notifier' failed. + +if MSI-X is repeatedly enabled and disabled on the ivshmem device, for example +by loading and unloading the Windows ivshmem driver. This is because +msix_unset_vector_notifiers() doesn't call any of the release notifier callbacks +since MSI-X is already disabled at that point (msix_enabled() returning false +is how this transition is detected in the first place). Thus +ivshmem_vector_mask() +doesn't run and when MSI-X is subsequently enabled again ivshmem_vector_unmask() +fails. + +This is fixed by keeping track of unmasked vectors and making sure that +ivshmem_vector_mask() always runs on MSI-X disable. + +Fixes: 660c97eef6f8 ("ivshmem: use kvm irqfd for msi notifications") +Signed-off-by: Ladi Prosek <address@hidden> +Reviewed-by: Markus Armbruster <address@hidden> +--- + hw/misc/ivshmem.c | 32 ++++++++++++++++++++++++++------ + 1 file changed, 26 insertions(+), 6 deletions(-) + +diff --git a/hw/misc/ivshmem.c b/hw/misc/ivshmem.c +index 6e46669744..91364d8364 100644 +--- a/hw/misc/ivshmem.c ++++ b/hw/misc/ivshmem.c +@@ -77,6 +77,7 @@ typedef struct Peer { + typedef struct MSIVector { + PCIDevice *pdev; + int virq; ++ bool unmasked; + } MSIVector; + + typedef struct IVShmemState { +@@ -321,6 +322,7 @@ static int ivshmem_vector_unmask(PCIDevice *dev, unsigned vector, + error_report("ivshmem: vector %d route does not exist", vector); + return -EINVAL; + } ++ assert(!v->unmasked); + + ret = kvm_irqchip_update_msi_route(kvm_state, v->virq, msg, dev); + if (ret < 0) { +@@ -328,7 +330,13 @@ static int ivshmem_vector_unmask(PCIDevice *dev, unsigned vector, + } + kvm_irqchip_commit_routes(kvm_state); + +- return kvm_irqchip_add_irqfd_notifier_gsi(kvm_state, n, NULL, v->virq); ++ ret = kvm_irqchip_add_irqfd_notifier_gsi(kvm_state, n, NULL, v->virq); ++ if (ret < 0) { ++ return ret; ++ } ++ v->unmasked = true; ++ ++ return 0; + } + + static void ivshmem_vector_mask(PCIDevice *dev, unsigned vector) +@@ -343,11 +351,14 @@ static void ivshmem_vector_mask(PCIDevice *dev, unsigned vector) + error_report("ivshmem: vector %d route does not exist", vector); + return; + } ++ assert(v->unmasked); + + ret = kvm_irqchip_remove_irqfd_notifier_gsi(kvm_state, n, v->virq); +- if (ret != 0) { ++ if (ret < 0) { + error_report("remove_irqfd_notifier_gsi failed"); ++ return; + } ++ v->unmasked = false; + } + + static void ivshmem_vector_poll(PCIDevice *dev, +@@ -817,11 +828,20 @@ static void ivshmem_disable_irqfd(IVShmemState *s) + PCIDevice *pdev = PCI_DEVICE(s); + int i; + +- for (i = 0; i < s->peers[s->vm_id].nb_eventfds; i++) { +- ivshmem_remove_kvm_msi_virq(s, i); +- } +- + msix_unset_vector_notifiers(pdev); ++ ++ for (i = 0; i < s->peers[s->vm_id].nb_eventfds; i++) { ++ /* ++ * MSI-X is already disabled here so msix_unset_vector_notifiers() ++ * didn't call our release notifier. Do it now to keep our masks and ++ * unmasks balanced. ++ */ ++ if (s->msi_vectors[i].unmasked) { ++ ivshmem_vector_mask(pdev, i); ++ } ++ ivshmem_remove_kvm_msi_virq(s, i); ++ } ++ + } + + static void ivshmem_write_config(PCIDevice *pdev, uint32_t address, +-- +2.13.6 +Adds a rollback path to ivshmem_enable_irqfd() and fixes +ivshmem_disable_irqfd() to bail if irqfd has not been enabled. + +To reproduce, run: + + ivshmem-server -n 0 + +and QEMU with: + + -device ivshmem-doorbell,chardev=iv + -chardev socket,path=/tmp/ivshmem_socket,id=iv + +then load, unload, and load again the Windows driver, at the time of writing +available at: + +https://github.com/virtio-win/kvm-guest-drivers-windows/tree/master/ivshmem + +The issue is believed to have been masked by other guest drivers, notably +Linux ones, not enabling MSI-X on the device. + +Signed-off-by: Ladi Prosek <address@hidden> +Reviewed-by: Markus Armbruster <address@hidden> +--- + hw/misc/ivshmem.c | 37 ++++++++++++++++++++++++------------- + 1 file changed, 24 insertions(+), 13 deletions(-) + +diff --git a/hw/misc/ivshmem.c b/hw/misc/ivshmem.c +index 91364d8364..d1bb246d12 100644 +--- a/hw/misc/ivshmem.c ++++ b/hw/misc/ivshmem.c +@@ -786,6 +786,20 @@ static int ivshmem_setup_interrupts(IVShmemState *s, Error **errp) + return 0; + } + ++static void ivshmem_remove_kvm_msi_virq(IVShmemState *s, int vector) ++{ ++ IVSHMEM_DPRINTF("ivshmem_remove_kvm_msi_virq vector:%d\n", vector); ++ ++ if (s->msi_vectors[vector].pdev == NULL) { ++ return; ++ } ++ ++ /* it was cleaned when masked in the frontend. */ ++ kvm_irqchip_release_virq(kvm_state, s->msi_vectors[vector].virq); ++ ++ s->msi_vectors[vector].pdev = NULL; ++} ++ + static void ivshmem_enable_irqfd(IVShmemState *s) + { + PCIDevice *pdev = PCI_DEVICE(s); +@@ -797,7 +811,7 @@ static void ivshmem_enable_irqfd(IVShmemState *s) + ivshmem_add_kvm_msi_virq(s, i, &err); + if (err) { + error_report_err(err); +- /* TODO do we need to handle the error? */ ++ goto undo; + } + } + +@@ -806,21 +820,14 @@ static void ivshmem_enable_irqfd(IVShmemState *s) + ivshmem_vector_mask, + ivshmem_vector_poll)) { + error_report("ivshmem: msix_set_vector_notifiers failed"); ++ goto undo; + } +-} ++ return; + +-static void ivshmem_remove_kvm_msi_virq(IVShmemState *s, int vector) +-{ +- IVSHMEM_DPRINTF("ivshmem_remove_kvm_msi_virq vector:%d\n", vector); +- +- if (s->msi_vectors[vector].pdev == NULL) { +- return; ++undo: ++ while (--i >= 0) { ++ ivshmem_remove_kvm_msi_virq(s, i); + } +- +- /* it was cleaned when masked in the frontend. */ +- kvm_irqchip_release_virq(kvm_state, s->msi_vectors[vector].virq); +- +- s->msi_vectors[vector].pdev = NULL; + } + + static void ivshmem_disable_irqfd(IVShmemState *s) +@@ -828,6 +835,10 @@ static void ivshmem_disable_irqfd(IVShmemState *s) + PCIDevice *pdev = PCI_DEVICE(s); + int i; + ++ if (!pdev->msix_vector_use_notifier) { ++ return; ++ } ++ + msix_unset_vector_notifiers(pdev); + + for (i = 0; i < s->peers[s->vm_id].nb_eventfds; i++) { +-- +2.13.6 +The effects of ivshmem_enable_irqfd() was not undone on device reset. + +This manifested as: +ivshmem_add_kvm_msi_virq: Assertion `!s->msi_vectors[vector].pdev' failed. + +when irqfd was enabled before reset and then enabled again after reset, making +ivshmem_enable_irqfd() run for the second time. + +To reproduce, run: + + ivshmem-server + +and QEMU with: + + -device ivshmem-doorbell,chardev=iv + -chardev socket,path=/tmp/ivshmem_socket,id=iv + +then install the Windows driver, at the time of writing available at: + +https://github.com/virtio-win/kvm-guest-drivers-windows/tree/master/ivshmem + +and crash-reboot the guest by inducing a BSOD. + +Signed-off-by: Ladi Prosek <address@hidden> +--- + hw/misc/ivshmem.c | 4 ++++ + 1 file changed, 4 insertions(+) + +diff --git a/hw/misc/ivshmem.c b/hw/misc/ivshmem.c +index d1bb246d12..9c7e74ef12 100644 +--- a/hw/misc/ivshmem.c ++++ b/hw/misc/ivshmem.c +@@ -758,10 +758,14 @@ static void ivshmem_msix_vector_use(IVShmemState *s) + } + } + ++static void ivshmem_disable_irqfd(IVShmemState *s); ++ + static void ivshmem_reset(DeviceState *d) + { + IVShmemState *s = IVSHMEM_COMMON(d); + ++ ivshmem_disable_irqfd(s); ++ + s->intrstatus = 0; + s->intrmask = 0; + if (ivshmem_has_feature(s, IVSHMEM_MSI)) { +-- +2.13.6 diff --git a/vfio-msi-1.patch b/vfio-msi-1.patch new file mode 100644 index 000000000000..8f0688eeebe4 --- /dev/null +++ b/vfio-msi-1.patch @@ -0,0 +1,133 @@ +This will later be used to include list initialization + +Signed-off-by: Alex Williamson <address@hidden> +--- + hw/vfio/pci-quirks.c | 48 +++++++++++++++++++++--------------------------- + 1 file changed, 21 insertions(+), 27 deletions(-) + +diff --git a/hw/vfio/pci-quirks.c b/hw/vfio/pci-quirks.c +index e5779a7ad35b..10af23217292 100644 +--- a/hw/vfio/pci-quirks.c ++++ b/hw/vfio/pci-quirks.c +@@ -275,6 +275,15 @@ static const MemoryRegionOps vfio_ati_3c3_quirk = { + .endianness = DEVICE_LITTLE_ENDIAN, + }; + ++static VFIOQuirk *vfio_quirk_alloc(int nr_mem) ++{ ++ VFIOQuirk *quirk = g_malloc0(sizeof(*quirk)); ++ quirk->mem = g_new0(MemoryRegion, nr_mem); ++ quirk->nr_mem = nr_mem; ++ ++ return quirk; ++} ++ + static void vfio_vga_probe_ati_3c3_quirk(VFIOPCIDevice *vdev) + { + VFIOQuirk *quirk; +@@ -288,9 +297,7 @@ static void vfio_vga_probe_ati_3c3_quirk(VFIOPCIDevice *vdev) + return; + } + +- quirk = g_malloc0(sizeof(*quirk)); +- quirk->mem = g_new0(MemoryRegion, 1); +- quirk->nr_mem = 1; ++ quirk = vfio_quirk_alloc(1); + + memory_region_init_io(quirk->mem, OBJECT(vdev), &vfio_ati_3c3_quirk, vdev, + "vfio-ati-3c3-quirk", 1); +@@ -323,9 +330,7 @@ static void vfio_probe_ati_bar4_quirk(VFIOPCIDevice *vdev, int nr) + return; + } + +- quirk = g_malloc0(sizeof(*quirk)); +- quirk->mem = g_new0(MemoryRegion, 2); +- quirk->nr_mem = 2; ++ quirk = vfio_quirk_alloc(2); + window = quirk->data = g_malloc0(sizeof(*window) + + sizeof(VFIOConfigWindowMatch)); + window->vdev = vdev; +@@ -371,10 +376,9 @@ static void vfio_probe_ati_bar2_quirk(VFIOPCIDevice *vdev, int nr) + return; + } + +- quirk = g_malloc0(sizeof(*quirk)); ++ quirk = vfio_quirk_alloc(1); + mirror = quirk->data = g_malloc0(sizeof(*mirror)); +- mirror->mem = quirk->mem = g_new0(MemoryRegion, 1); +- quirk->nr_mem = 1; ++ mirror->mem = quirk->mem; + mirror->vdev = vdev; + mirror->offset = 0x4000; + mirror->bar = nr; +@@ -548,10 +552,8 @@ static void vfio_vga_probe_nvidia_3d0_quirk(VFIOPCIDevice *vdev) + return; + } + +- quirk = g_malloc0(sizeof(*quirk)); ++ quirk = vfio_quirk_alloc(2); + quirk->data = data = g_malloc0(sizeof(*data)); +- quirk->mem = g_new0(MemoryRegion, 2); +- quirk->nr_mem = 2; + data->vdev = vdev; + + memory_region_init_io(&quirk->mem[0], OBJECT(vdev), &vfio_nvidia_3d4_quirk, +@@ -667,9 +669,7 @@ static void vfio_probe_nvidia_bar5_quirk(VFIOPCIDevice *vdev, int nr) + return; + } + +- quirk = g_malloc0(sizeof(*quirk)); +- quirk->mem = g_new0(MemoryRegion, 4); +- quirk->nr_mem = 4; ++ quirk = vfio_quirk_alloc(4); + bar5 = quirk->data = g_malloc0(sizeof(*bar5) + + (sizeof(VFIOConfigWindowMatch) * 2)); + window = &bar5->window; +@@ -762,10 +762,9 @@ static void vfio_probe_nvidia_bar0_quirk(VFIOPCIDevice *vdev, int nr) + return; + } + +- quirk = g_malloc0(sizeof(*quirk)); ++ quirk = vfio_quirk_alloc(1); + mirror = quirk->data = g_malloc0(sizeof(*mirror)); +- mirror->mem = quirk->mem = g_new0(MemoryRegion, 1); +- quirk->nr_mem = 1; ++ mirror->mem = quirk->mem; + mirror->vdev = vdev; + mirror->offset = 0x88000; + mirror->bar = nr; +@@ -781,10 +780,9 @@ static void vfio_probe_nvidia_bar0_quirk(VFIOPCIDevice *vdev, int nr) + + /* The 0x1800 offset mirror only seems to get used by legacy VGA */ + if (vdev->vga) { +- quirk = g_malloc0(sizeof(*quirk)); ++ quirk = vfio_quirk_alloc(1); + mirror = quirk->data = g_malloc0(sizeof(*mirror)); +- mirror->mem = quirk->mem = g_new0(MemoryRegion, 1); +- quirk->nr_mem = 1; ++ mirror->mem = quirk->mem; + mirror->vdev = vdev; + mirror->offset = 0x1800; + mirror->bar = nr; +@@ -945,9 +943,7 @@ static void vfio_probe_rtl8168_bar2_quirk(VFIOPCIDevice *vdev, int nr) + return; + } + +- quirk = g_malloc0(sizeof(*quirk)); +- quirk->mem = g_new0(MemoryRegion, 2); +- quirk->nr_mem = 2; ++ quirk = vfio_quirk_alloc(2); + quirk->data = rtl = g_malloc0(sizeof(*rtl)); + rtl->vdev = vdev; + +@@ -1507,9 +1503,7 @@ static void vfio_probe_igd_bar4_quirk(VFIOPCIDevice *vdev, int nr) + } + + /* Setup our quirk to munge GTT addresses to the VM allocated buffer */ +- quirk = g_malloc0(sizeof(*quirk)); +- quirk->mem = g_new0(MemoryRegion, 2); +- quirk->nr_mem = 2; ++ quirk = vfio_quirk_alloc(2); + igd = quirk->data = g_malloc0(sizeof(*igd)); + igd->vdev = vdev; + igd->index = ~0; diff --git a/vfio-msi-2.patch b/vfio-msi-2.patch new file mode 100644 index 000000000000..69af39e83bd2 --- /dev/null +++ b/vfio-msi-2.patch @@ -0,0 +1,82 @@ +We might wish to handle some quirks via ioeventfds, add a list of +ioeventfds to the quirk. + +Signed-off-by: Alex Williamson <address@hidden> +--- + hw/vfio/pci-quirks.c | 17 +++++++++++++++++ + hw/vfio/pci.h | 11 +++++++++++ + 2 files changed, 28 insertions(+) + +diff --git a/hw/vfio/pci-quirks.c b/hw/vfio/pci-quirks.c +index 10af23217292..e4cf4ea2dd9c 100644 +--- a/hw/vfio/pci-quirks.c ++++ b/hw/vfio/pci-quirks.c +@@ -12,6 +12,7 @@ + + #include "qemu/osdep.h" + #include "qemu/error-report.h" ++#include "qemu/main-loop.h" + #include "qemu/range.h" + #include "qapi/error.h" + #include "qapi/visitor.h" +@@ -278,12 +279,24 @@ static const MemoryRegionOps vfio_ati_3c3_quirk = { + static VFIOQuirk *vfio_quirk_alloc(int nr_mem) + { + VFIOQuirk *quirk = g_malloc0(sizeof(*quirk)); ++ QLIST_INIT(&quirk->ioeventfds); + quirk->mem = g_new0(MemoryRegion, nr_mem); + quirk->nr_mem = nr_mem; + + return quirk; + } + ++static void vfio_ioeventfd_exit(VFIOIOEventFD *ioeventfd) ++{ ++ QLIST_REMOVE(ioeventfd, next); ++ memory_region_del_eventfd(ioeventfd->mr, ioeventfd->addr, ioeventfd->size, ++ ioeventfd->match_data, ioeventfd->data, ++ &ioeventfd->e); ++ qemu_set_fd_handler(event_notifier_get_fd(&ioeventfd->e), NULL, NULL, NULL); ++ event_notifier_cleanup(&ioeventfd->e); ++ g_free(ioeventfd); ++} ++ + static void vfio_vga_probe_ati_3c3_quirk(VFIOPCIDevice *vdev) + { + VFIOQuirk *quirk; +@@ -1668,6 +1681,10 @@ void vfio_bar_quirk_exit(VFIOPCIDevice *vdev, int nr) + int i; + + QLIST_FOREACH(quirk, &bar->quirks, next) { ++ while (!QLIST_EMPTY(&quirk->ioeventfds)) { ++ vfio_ioeventfd_exit(QLIST_FIRST(&quirk->ioeventfds)); ++ } ++ + for (i = 0; i < quirk->nr_mem; i++) { + memory_region_del_subregion(bar->region.mem, &quirk->mem[i]); + } +diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h +index f4aa13e021fa..146065c2f715 100644 +--- a/hw/vfio/pci.h ++++ b/hw/vfio/pci.h +@@ -24,9 +24,20 @@ + + struct VFIOPCIDevice; + ++typedef struct VFIOIOEventFD { ++ QLIST_ENTRY(VFIOIOEventFD) next; ++ MemoryRegion *mr; ++ hwaddr addr; ++ unsigned size; ++ bool match_data; ++ uint64_t data; ++ EventNotifier e; ++} VFIOIOEventFD; ++ + typedef struct VFIOQuirk { + QLIST_ENTRY(VFIOQuirk) next; + void *data; ++ QLIST_HEAD(, VFIOIOEventFD) ioeventfds; + int nr_mem; + MemoryRegion *mem; + } VFIOQuirk; diff --git a/vfio-msi-3.patch b/vfio-msi-3.patch new file mode 100644 index 000000000000..6c63a134a870 --- /dev/null +++ b/vfio-msi-3.patch @@ -0,0 +1,170 @@ +Record data writes that come through the NVIDIA BAR0 quirk, if we get +enough in a row that we're only passing through, automatically enable +an ioeventfd for it. The primary target for this is the MSI-ACK +that NVIDIA uses to allow the MSI interrupt to re-trigger, which is a +4-byte write, data value 0x0 to offset 0x704 into the quirk, 0x88704 +into BAR0 MMIO space. For an interrupt latency sensitive micro- +benchmark, this takes us from 83% of performance versus disabling the +quirk entirely (which GeForce cannot do), to to almost 90%. + +Signed-off-by: Alex Williamson <address@hidden> +--- + hw/vfio/pci-quirks.c | 89 +++++++++++++++++++++++++++++++++++++++++++++++++- + hw/vfio/pci.h | 2 + + 2 files changed, 89 insertions(+), 2 deletions(-) + +diff --git a/hw/vfio/pci-quirks.c b/hw/vfio/pci-quirks.c +index e4cf4ea2dd9c..e739efe601b1 100644 +--- a/hw/vfio/pci-quirks.c ++++ b/hw/vfio/pci-quirks.c +@@ -203,6 +203,7 @@ typedef struct VFIOConfigMirrorQuirk { + uint32_t offset; + uint8_t bar; + MemoryRegion *mem; ++ uint8_t data[]; + } VFIOConfigMirrorQuirk; + + static uint64_t vfio_generic_quirk_mirror_read(void *opaque, +@@ -297,6 +298,50 @@ static void vfio_ioeventfd_exit(VFIOIOEventFD *ioeventfd) + g_free(ioeventfd); + } + ++static void vfio_ioeventfd_handler(void *opaque) ++{ ++ VFIOIOEventFD *ioeventfd = opaque; ++ ++ if (event_notifier_test_and_clear(&ioeventfd->e)) { ++ vfio_region_write(ioeventfd->region, ioeventfd->region_addr, ++ ioeventfd->data, ioeventfd->size); ++ } ++} ++ ++static VFIOIOEventFD *vfio_ioeventfd_init(VFIOPCIDevice *vdev, ++ MemoryRegion *mr, hwaddr addr, ++ unsigned size, uint64_t data, ++ VFIORegion *region, ++ hwaddr region_addr) ++{ ++ VFIOIOEventFD *ioeventfd = g_malloc0(sizeof(*ioeventfd)); ++ ++ if (event_notifier_init(&ioeventfd->e, 0)) { ++ g_free(ioeventfd); ++ return NULL; ++ } ++ ++ ioeventfd->mr = mr; ++ ioeventfd->addr = addr; ++ ioeventfd->size = size; ++ ioeventfd->match_data = true; ++ ioeventfd->data = data; ++ ioeventfd->region = region; ++ ioeventfd->region_addr = region_addr; ++ ++ qemu_set_fd_handler(event_notifier_get_fd(&ioeventfd->e), ++ vfio_ioeventfd_handler, NULL, ioeventfd); ++ memory_region_add_eventfd(ioeventfd->mr, ioeventfd->addr, ++ ioeventfd->size, ioeventfd->match_data, ++ ioeventfd->data, &ioeventfd->e); ++ ++ info_report("Enabled automatic ioeventfd acceleration for %s region %d, " ++ "offset 0x%"HWADDR_PRIx", data 0x%"PRIx64", size %u", ++ vdev->vbasedev.name, region->nr, region_addr, data, size); ++ ++ return ioeventfd; ++} ++ + static void vfio_vga_probe_ati_3c3_quirk(VFIOPCIDevice *vdev) + { + VFIOQuirk *quirk; +@@ -732,6 +777,13 @@ static void vfio_probe_nvidia_bar5_quirk(VFIOPCIDevice *vdev, int nr) + trace_vfio_quirk_nvidia_bar5_probe(vdev->vbasedev.name); + } + ++typedef struct LastDataSet { ++ hwaddr addr; ++ uint64_t data; ++ unsigned size; ++ int count; ++} LastDataSet; ++ + /* + * Finally, BAR0 itself. We want to redirect any accesses to either + * 0x1800 or 0x88000 through the PCI config space access functions. +@@ -742,6 +794,7 @@ static void vfio_nvidia_quirk_mirror_write(void *opaque, hwaddr addr, + VFIOConfigMirrorQuirk *mirror = opaque; + VFIOPCIDevice *vdev = mirror->vdev; + PCIDevice *pdev = &vdev->pdev; ++ LastDataSet *last = (LastDataSet *)&mirror->data; + + vfio_generic_quirk_mirror_write(opaque, addr, data, size); + +@@ -756,6 +809,38 @@ static void vfio_nvidia_quirk_mirror_write(void *opaque, hwaddr addr, + addr + mirror->offset, data, size); + trace_vfio_quirk_nvidia_bar0_msi_ack(vdev->vbasedev.name); + } ++ ++ /* ++ * Automatically add an ioeventfd to handle any repeated write with the ++ * same data and size above the standard PCI config space header. This is ++ * primarily expected to accelerate the MSI-ACK behavior, such as noted ++ * above. Current hardware/drivers should trigger an ioeventfd at config ++ * offset 0x704 (region offset 0x88704), with data 0x0, size 4. ++ */ ++ if (addr > PCI_STD_HEADER_SIZEOF) { ++ if (addr != last->addr || data != last->data || size != last->size) { ++ last->addr = addr; ++ last->data = data; ++ last->size = size; ++ last->count = 1; ++ } else if (++last->count > 10) { ++ VFIOIOEventFD *ioeventfd; ++ ++ ioeventfd = vfio_ioeventfd_init(vdev, mirror->mem, addr, size, data, ++ &vdev->bars[mirror->bar].region, ++ mirror->offset + addr); ++ if (ioeventfd) { ++ VFIOQuirk *quirk; ++ ++ QLIST_FOREACH(quirk, &vdev->bars[mirror->bar].quirks, next) { ++ if (quirk->data == mirror) { ++ QLIST_INSERT_HEAD(&quirk->ioeventfds, ioeventfd, next); ++ break; ++ } ++ } ++ } ++ } ++ } + } + + static const MemoryRegionOps vfio_nvidia_mirror_quirk = { +@@ -776,7 +861,7 @@ static void vfio_probe_nvidia_bar0_quirk(VFIOPCIDevice *vdev, int nr) + } + + quirk = vfio_quirk_alloc(1); +- mirror = quirk->data = g_malloc0(sizeof(*mirror)); ++ mirror = quirk->data = g_malloc0(sizeof(*mirror) + sizeof(LastDataSet)); + mirror->mem = quirk->mem; + mirror->vdev = vdev; + mirror->offset = 0x88000; +@@ -794,7 +879,7 @@ static void vfio_probe_nvidia_bar0_quirk(VFIOPCIDevice *vdev, int nr) + /* The 0x1800 offset mirror only seems to get used by legacy VGA */ + if (vdev->vga) { + quirk = vfio_quirk_alloc(1); +- mirror = quirk->data = g_malloc0(sizeof(*mirror)); ++ mirror = quirk->data = g_malloc0(sizeof(*mirror) + sizeof(LastDataSet)); + mirror->mem = quirk->mem; + mirror->vdev = vdev; + mirror->offset = 0x1800; +diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h +index 146065c2f715..ec53b9935725 100644 +--- a/hw/vfio/pci.h ++++ b/hw/vfio/pci.h +@@ -32,6 +32,8 @@ typedef struct VFIOIOEventFD { + bool match_data; + uint64_t data; + EventNotifier e; ++ VFIORegion *region; ++ hwaddr region_addr; + } VFIOIOEventFD; + + typedef struct VFIOQuirk { diff --git a/vfio-msi-4.patch b/vfio-msi-4.patch new file mode 100644 index 000000000000..98417a11613c --- /dev/null +++ b/vfio-msi-4.patch @@ -0,0 +1,42 @@ +Update with proposed ioeventfd API. + +Signed-off-by: Alex Williamson <address@hidden> +--- + linux-headers/linux/vfio.h | 24 ++++++++++++++++++++++++ + 1 file changed, 24 insertions(+) + +diff --git a/linux-headers/linux/vfio.h b/linux-headers/linux/vfio.h +index 4312e961ffd3..0921994daa6d 100644 +--- a/linux-headers/linux/vfio.h ++++ b/linux-headers/linux/vfio.h +@@ -503,6 +503,30 @@ struct vfio_pci_hot_reset { + + #define VFIO_DEVICE_PCI_HOT_RESET _IO(VFIO_TYPE, VFIO_BASE + 13) + ++/** ++ * VFIO_DEVICE_IOEVENTFD - _IOW(VFIO_TYPE, VFIO_BASE + 14, ++ * struct vfio_device_ioeventfd) ++ * ++ * Perform a write to the device at the specified device fd offset, with ++ * the specified data and width when the provided eventfd is triggered. ++ * ++ * Return: 0 on success, -errno on failure. ++ */ ++struct vfio_device_ioeventfd { ++ __u32 argsz; ++ __u32 flags; ++#define VFIO_DEVICE_IOEVENTFD_8 (1 << 0) /* 1-byte write */ ++#define VFIO_DEVICE_IOEVENTFD_16 (1 << 1) /* 2-byte write */ ++#define VFIO_DEVICE_IOEVENTFD_32 (1 << 2) /* 4-byte write */ ++#define VFIO_DEVICE_IOEVENTFD_64 (1 << 3) /* 8-byte write */ ++#define VFIO_DEVICE_IOEVENTFD_SIZE_MASK (0xf) ++ __u64 offset; /* device fd offset of write */ ++ __u64 data; /* data to be written */ ++ __s32 fd; /* -1 for de-assignment */ ++}; ++ ++#define VFIO_DEVICE_IOEVENTFD _IO(VFIO_TYPE, VFIO_BASE + 14) ++ + /* -------- API for Type1 VFIO IOMMU -------- */ + + /** diff --git a/vfio-msi-5.patch b/vfio-msi-5.patch new file mode 100644 index 000000000000..9033b604a7ca --- /dev/null +++ b/vfio-msi-5.patch @@ -0,0 +1,104 @@ +With vfio ioeventfd support, we can program vfio-pci to perform a +specified BAR write when an eventfd is triggered. This allows the +KVM ioeventfd to be wired directly to vfio-pci, entirely avoiding +userspace handling for these events. On the same micro-benchmark +where the ioeventfd got us to almost 90% of performance versus +disabling the GeForce quirks, this gets us to within 95%. + +Signed-off-by: Alex Williamson <address@hidden> +--- + hw/vfio/pci-quirks.c | 42 ++++++++++++++++++++++++++++++++++++------ + 1 file changed, 36 insertions(+), 6 deletions(-) + +diff --git a/hw/vfio/pci-quirks.c b/hw/vfio/pci-quirks.c +index e739efe601b1..35a4d5197e2d 100644 +--- a/hw/vfio/pci-quirks.c ++++ b/hw/vfio/pci-quirks.c +@@ -16,6 +16,7 @@ + #include "qemu/range.h" + #include "qapi/error.h" + #include "qapi/visitor.h" ++#include <sys/ioctl.h> + #include "hw/nvram/fw_cfg.h" + #include "pci.h" + #include "trace.h" +@@ -287,13 +288,27 @@ static VFIOQuirk *vfio_quirk_alloc(int nr_mem) + return quirk; + } + +-static void vfio_ioeventfd_exit(VFIOIOEventFD *ioeventfd) ++static void vfio_ioeventfd_exit(VFIOPCIDevice *vdev, VFIOIOEventFD *ioeventfd) + { ++ struct vfio_device_ioeventfd vfio_ioeventfd; ++ + QLIST_REMOVE(ioeventfd, next); ++ + memory_region_del_eventfd(ioeventfd->mr, ioeventfd->addr, ioeventfd->size, + ioeventfd->match_data, ioeventfd->data, + &ioeventfd->e); ++ + qemu_set_fd_handler(event_notifier_get_fd(&ioeventfd->e), NULL, NULL, NULL); ++ ++ vfio_ioeventfd.argsz = sizeof(vfio_ioeventfd); ++ vfio_ioeventfd.flags = ioeventfd->size; ++ vfio_ioeventfd.data = ioeventfd->data; ++ vfio_ioeventfd.offset = ioeventfd->region->fd_offset + ++ ioeventfd->region_addr; ++ vfio_ioeventfd.fd = -1; ++ ++ ioctl(vdev->vbasedev.fd, VFIO_DEVICE_IOEVENTFD, &vfio_ioeventfd); ++ + event_notifier_cleanup(&ioeventfd->e); + g_free(ioeventfd); + } +@@ -315,6 +330,8 @@ static VFIOIOEventFD *vfio_ioeventfd_init(VFIOPCIDevice *vdev, + hwaddr region_addr) + { + VFIOIOEventFD *ioeventfd = g_malloc0(sizeof(*ioeventfd)); ++ struct vfio_device_ioeventfd vfio_ioeventfd; ++ char vfio_enabled = '+'; + + if (event_notifier_init(&ioeventfd->e, 0)) { + g_free(ioeventfd); +@@ -329,15 +346,28 @@ static VFIOIOEventFD *vfio_ioeventfd_init(VFIOPCIDevice *vdev, + ioeventfd->region = region; + ioeventfd->region_addr = region_addr; + +- qemu_set_fd_handler(event_notifier_get_fd(&ioeventfd->e), +- vfio_ioeventfd_handler, NULL, ioeventfd); ++ vfio_ioeventfd.argsz = sizeof(vfio_ioeventfd); ++ vfio_ioeventfd.flags = ioeventfd->size; ++ vfio_ioeventfd.data = ioeventfd->data; ++ vfio_ioeventfd.offset = ioeventfd->region->fd_offset + ++ ioeventfd->region_addr; ++ vfio_ioeventfd.fd = event_notifier_get_fd(&ioeventfd->e); ++ ++ if (ioctl(vdev->vbasedev.fd, ++ VFIO_DEVICE_IOEVENTFD, &vfio_ioeventfd) != 0) { ++ qemu_set_fd_handler(event_notifier_get_fd(&ioeventfd->e), ++ vfio_ioeventfd_handler, NULL, ioeventfd); ++ vfio_enabled = '-'; ++ } ++ + memory_region_add_eventfd(ioeventfd->mr, ioeventfd->addr, + ioeventfd->size, ioeventfd->match_data, + ioeventfd->data, &ioeventfd->e); + + info_report("Enabled automatic ioeventfd acceleration for %s region %d, " +- "offset 0x%"HWADDR_PRIx", data 0x%"PRIx64", size %u", +- vdev->vbasedev.name, region->nr, region_addr, data, size); ++ "offset 0x%"HWADDR_PRIx", data 0x%"PRIx64", size %u, vfio%c", ++ vdev->vbasedev.name, region->nr, region_addr, data, size, ++ vfio_enabled); + + return ioeventfd; + } +@@ -1767,7 +1797,7 @@ void vfio_bar_quirk_exit(VFIOPCIDevice *vdev, int nr) + + QLIST_FOREACH(quirk, &bar->quirks, next) { + while (!QLIST_EMPTY(&quirk->ioeventfds)) { +- vfio_ioeventfd_exit(QLIST_FIRST(&quirk->ioeventfds)); ++ vfio_ioeventfd_exit(vdev, QLIST_FIRST(&quirk->ioeventfds)); + } + + for (i = 0; i < quirk->nr_mem; i++) { |