summarylogtreecommitdiffstats
diff options
context:
space:
mode:
authorStefan Schmidt2018-03-01 13:40:31 +0100
committerStefan Schmidt2018-03-01 13:40:31 +0100
commit9065f70a5d47e4cf8f466b68104d5ddeb7f02409 (patch)
tree7227516d4faeca513d8ff787cd22ed72ba2cd064
downloadaur-9065f70a5d47e4cf8f466b68104d5ddeb7f02409.tar.gz
Initial version (tracks 68de8e9b3f26e68bc6d64f353e0954ddab2f7590)
-rw-r--r--.SRCINFO190
-rw-r--r--0001-wined3d-Implement-a-simple-heap-allocator-backed-by-.patch456
-rw-r--r--0002-wined3d-Allocate-global-write-only-persistent-buffer.patch81
-rw-r--r--0003-wined3d-Add-support-for-persistently-mapped-wined3d_.patch701
-rw-r--r--0004-wined3d-Implement-aligned-persistent-heaps-for-persi.patch240
-rw-r--r--0005-wined3d-Experimental-buffer-heap-fence-batching-uppe.patch455
-rw-r--r--0006-wined3d-Switch-wined3d_buffer_heap-to-be-backed-by-a.patch383
-rw-r--r--0007-wined3d-Add-segregated-free-bins-to-complement-rbtre.patch305
-rw-r--r--0008-wined3d-Implement-lazy-free-using-a-deferred-free-li.patch612
-rw-r--r--30-win32-aliases.conf20
-rw-r--r--PKGBUILD219
-rw-r--r--harmony-fix.diff63
12 files changed, 3725 insertions, 0 deletions
diff --git a/.SRCINFO b/.SRCINFO
new file mode 100644
index 000000000000..c8a5c3ab0f40
--- /dev/null
+++ b/.SRCINFO
@@ -0,0 +1,190 @@
+pkgbase = wine-staging-pba
+ pkgdesc = A compatibility layer for running Windows programs - Staging branch
+ pkgver = 2.21
+ pkgrel = 1
+ url = http://www.wine-staging.com
+ arch = x86_64
+ license = LGPL
+ makedepends = autoconf
+ makedepends = ncurses
+ makedepends = bison
+ makedepends = perl
+ makedepends = fontforge
+ makedepends = flex
+ makedepends = gcc>=4.5.0-2
+ makedepends = giflib
+ makedepends = lib32-giflib
+ makedepends = libpng
+ makedepends = lib32-libpng
+ makedepends = gnutls
+ makedepends = lib32-gnutls
+ makedepends = libxinerama
+ makedepends = lib32-libxinerama
+ makedepends = libxcomposite
+ makedepends = lib32-libxcomposite
+ makedepends = libxmu
+ makedepends = lib32-libxmu
+ makedepends = libxxf86vm
+ makedepends = lib32-libxxf86vm
+ makedepends = libldap
+ makedepends = lib32-libldap
+ makedepends = mpg123
+ makedepends = lib32-mpg123
+ makedepends = openal
+ makedepends = lib32-openal
+ makedepends = v4l-utils
+ makedepends = lib32-v4l-utils
+ makedepends = alsa-lib
+ makedepends = lib32-alsa-lib
+ makedepends = libxcomposite
+ makedepends = lib32-libxcomposite
+ makedepends = mesa
+ makedepends = lib32-mesa
+ makedepends = mesa-libgl
+ makedepends = lib32-mesa-libgl
+ makedepends = opencl-icd-loader
+ makedepends = lib32-opencl-icd-loader
+ makedepends = libxslt
+ makedepends = lib32-libxslt
+ makedepends = libpulse
+ makedepends = lib32-libpulse
+ makedepends = libva
+ makedepends = lib32-libva
+ makedepends = gtk3
+ makedepends = lib32-gtk3
+ makedepends = gst-plugins-base-libs
+ makedepends = lib32-gst-plugins-base-libs
+ makedepends = samba
+ makedepends = opencl-headers
+ makedepends = attr
+ makedepends = lib32-attr
+ makedepends = fontconfig
+ makedepends = lib32-fontconfig
+ makedepends = lcms2
+ makedepends = lib32-lcms2
+ makedepends = libxml2
+ makedepends = lib32-libxml2
+ makedepends = libxcursor
+ makedepends = lib32-libxcursor
+ makedepends = libxrandr
+ makedepends = lib32-libxrandr
+ makedepends = libxdamage
+ makedepends = lib32-libxdamage
+ makedepends = libxi
+ makedepends = lib32-libxi
+ makedepends = gettext
+ makedepends = lib32-gettext
+ makedepends = freetype2
+ makedepends = lib32-freetype2
+ makedepends = glu
+ makedepends = lib32-glu
+ makedepends = libsm
+ makedepends = lib32-libsm
+ makedepends = gcc-libs
+ makedepends = lib32-gcc-libs
+ makedepends = libpcap
+ makedepends = lib32-libpcap
+ makedepends = desktop-file-utils
+ optdepends = giflib
+ optdepends = lib32-giflib
+ optdepends = libpng
+ optdepends = lib32-libpng
+ optdepends = libldap
+ optdepends = lib32-libldap
+ optdepends = gnutls
+ optdepends = lib32-gnutls
+ optdepends = mpg123
+ optdepends = lib32-mpg123
+ optdepends = openal
+ optdepends = lib32-openal
+ optdepends = v4l-utils
+ optdepends = lib32-v4l-utils
+ optdepends = libpulse
+ optdepends = lib32-libpulse
+ optdepends = alsa-plugins
+ optdepends = lib32-alsa-plugins
+ optdepends = alsa-lib
+ optdepends = lib32-alsa-lib
+ optdepends = libjpeg-turbo
+ optdepends = lib32-libjpeg-turbo
+ optdepends = libxcomposite
+ optdepends = lib32-libxcomposite
+ optdepends = libxinerama
+ optdepends = lib32-libxinerama
+ optdepends = ncurses
+ optdepends = lib32-ncurses
+ optdepends = opencl-icd-loader
+ optdepends = lib32-opencl-icd-loader
+ optdepends = libxslt
+ optdepends = lib32-libxslt
+ optdepends = libva
+ optdepends = lib32-libva
+ optdepends = gtk3
+ optdepends = lib32-gtk3
+ optdepends = gst-plugins-base-libs
+ optdepends = lib32-gst-plugins-base-libs
+ optdepends = vulkan-icd-loader
+ optdepends = lib32-vulkan-icd-loader
+ optdepends = cups
+ optdepends = samba
+ optdepends = dosbox
+ provides = wine=2.21
+ provides = wine-wow64=2.21
+ conflicts = wine
+ conflicts = wine-wow64
+ conflicts = wine-staging
+ options = staticlibs
+ source = https://github.com/wine-compholio/wine-patched/archive/staging-2.21.tar.gz
+ source = harmony-fix.diff
+ source = 30-win32-aliases.conf
+ source = 0001-wined3d-Implement-a-simple-heap-allocator-backed-by-.patch
+ source = 0002-wined3d-Allocate-global-write-only-persistent-buffer.patch
+ source = 0003-wined3d-Add-support-for-persistently-mapped-wined3d_.patch
+ source = 0004-wined3d-Implement-aligned-persistent-heaps-for-persi.patch
+ source = 0005-wined3d-Experimental-buffer-heap-fence-batching-uppe.patch
+ source = 0006-wined3d-Switch-wined3d_buffer_heap-to-be-backed-by-a.patch
+ source = 0007-wined3d-Add-segregated-free-bins-to-complement-rbtre.patch
+ source = 0008-wined3d-Implement-lazy-free-using-a-deferred-free-li.patch
+ sha512sums = 4e3fe2eb81360bfa095194ab5b9647636cbeac0dc3955e6a3ee26062f650c66a4bd2353a1cd8063f9b7c65a6bcc1f892cc7c1d0f00c3c8525a124ec2109d1e86
+ sha512sums = b86edf07bfc560f403fdfd5a71f97930ee2a4c3f76c92cc1a0dbb2e107be9db3bed3a727a0430d8a049583c63dd11f5d4567fb7aa69b193997c6da241acc4f2e
+ sha512sums = 6e54ece7ec7022b3c9d94ad64bdf1017338da16c618966e8baf398e6f18f80f7b0576edf1d1da47ed77b96d577e4cbb2bb0156b0b11c183a0accf22654b0a2bb
+ sha512sums = b9141fbe8f5189eb46c83b178497f9ee6d1f2daec3009877557ba28e5f2ce6d818cfef5b0eba15c1e9e4c50dd5950486f8091793d704ec532f82155056725e71
+ sha512sums = 8e112e25392fb2bd035c4b8792e43ad86bf81b1c24ff429ff8943a2c02ee761fc25446791475e4115e6b03f50cdb4cf6a8f128cc770c3941b59ee1dfbe79137b
+ sha512sums = 7335797924f1c4403a438ccfe36f8a650ddf8271d33ca962e270cf34762170038017cd53cad35f1ad61128f2c496edb68791783259df33cb997a73959136bdc0
+ sha512sums = 52ebb56c6adfbef526d2db19618f9155084dacd7600d166f04ba5423c63a4294294589d675c391e577330f1b68755bb5d3b6a2cd3006902269cb73140973dba3
+ sha512sums = d326b8da8fb02462bac178a23e18f5468de62780717c24eadb453201b2b6b6439d2be7dda38e40f24fdc570dd5bc54102e7bf05868c53b17b27f6b9a06fccdb0
+ sha512sums = 04b41d4198138dbfe1399e7ed1e406fb265472d08a3e4de3c5c8584574b167613c598d7fa397c6944b809a96f699a4447694291296fa01a8e07b8ea96026ed2f
+ sha512sums = 9f90b7adc0ed87daac0f453caf2fff8b338061d96a9cd890f305704f9b22581232c6a207eb9eb1670c69b083caa780a6e44280df47c95b4e6e8e73f046f7c8a5
+ sha512sums = 8fd8d2e262327e78dad69186ebf091dbc034fab2675f0be91df75c88ae6f5e5ae6f456a2098c460861946390ce139e998f4b0f77e33671c8a7062a5e06b6e4ca
+
+pkgname = wine-staging-pba
+ depends = attr
+ depends = lib32-attr
+ depends = fontconfig
+ depends = lib32-fontconfig
+ depends = lcms2
+ depends = lib32-lcms2
+ depends = libxml2
+ depends = lib32-libxml2
+ depends = libxcursor
+ depends = lib32-libxcursor
+ depends = libxrandr
+ depends = lib32-libxrandr
+ depends = libxdamage
+ depends = lib32-libxdamage
+ depends = libxi
+ depends = lib32-libxi
+ depends = gettext
+ depends = lib32-gettext
+ depends = freetype2
+ depends = lib32-freetype2
+ depends = glu
+ depends = lib32-glu
+ depends = libsm
+ depends = lib32-libsm
+ depends = gcc-libs
+ depends = lib32-gcc-libs
+ depends = libpcap
+ depends = lib32-libpcap
+ depends = desktop-file-utils
+
diff --git a/0001-wined3d-Implement-a-simple-heap-allocator-backed-by-.patch b/0001-wined3d-Implement-a-simple-heap-allocator-backed-by-.patch
new file mode 100644
index 000000000000..565b172be923
--- /dev/null
+++ b/0001-wined3d-Implement-a-simple-heap-allocator-backed-by-.patch
@@ -0,0 +1,456 @@
+From 636d39db43f9cd176fe85869db5e07d3a39f80fb Mon Sep 17 00:00:00 2001
+From: Andrew Comminos <andrew@comminos.com>
+Date: Fri, 23 Feb 2018 17:39:13 -0800
+Subject: [PATCH 1/8] wined3d: Implement a simple heap allocator backed by a
+ persistent buffer.
+
+---
+ dlls/wined3d-csmt/Makefile.in | 1 +
+ dlls/wined3d/Makefile.in | 1 +
+ dlls/wined3d/buffer_heap.c | 321 +++++++++++++++++++++++++++++++++++++++++
+ dlls/wined3d/directx.c | 3 +
+ dlls/wined3d/wined3d_gl.h | 1 +
+ dlls/wined3d/wined3d_private.h | 41 +++++-
+ 6 files changed, 364 insertions(+), 4 deletions(-)
+ create mode 100644 dlls/wined3d/buffer_heap.c
+
+diff --git a/dlls/wined3d-csmt/Makefile.in b/dlls/wined3d-csmt/Makefile.in
+index bf064ed16f..cab1e6fdc1 100644
+--- a/dlls/wined3d-csmt/Makefile.in
++++ b/dlls/wined3d-csmt/Makefile.in
+@@ -8,6 +8,7 @@ C_SRCS = \
+ arb_program_shader.c \
+ ati_fragment_shader.c \
+ buffer.c \
++ buffer_heap.c \
+ context.c \
+ cs.c \
+ device.c \
+diff --git a/dlls/wined3d/Makefile.in b/dlls/wined3d/Makefile.in
+index 70f47c6a5f..e78745cc11 100644
+--- a/dlls/wined3d/Makefile.in
++++ b/dlls/wined3d/Makefile.in
+@@ -6,6 +6,7 @@ C_SRCS = \
+ arb_program_shader.c \
+ ati_fragment_shader.c \
+ buffer.c \
++ buffer_heap.c \
+ context.c \
+ cs.c \
+ device.c \
+diff --git a/dlls/wined3d/buffer_heap.c b/dlls/wined3d/buffer_heap.c
+new file mode 100644
+index 0000000000..900e2d24bb
+--- /dev/null
++++ b/dlls/wined3d/buffer_heap.c
+@@ -0,0 +1,321 @@
++/*
++ * Copyright 2018 Andrew Comminos
++ *
++ * This library is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU Lesser General Public
++ * License as published by the Free Software Foundation; either
++ * version 2.1 of the License, or (at your option) any later version.
++ *
++ * This library is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
++ * Lesser General Public License for more details.
++ *
++ * You should have received a copy of the GNU Lesser General Public
++ * License along with this library; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
++ *
++ */
++
++#include "config.h"
++#include "wine/port.h"
++#include "wined3d_private.h"
++
++WINE_DEFAULT_DEBUG_CHANNEL(d3d);
++
++struct wined3d_buffer_heap_element
++{
++ struct wined3d_map_range range;
++
++ struct wined3d_buffer_heap_element *prev;
++ struct wined3d_buffer_heap_element *next;
++};
++
++struct wined3d_buffer_heap_fenced_element
++{
++ struct wined3d_map_range range;
++ struct wined3d_fence *fence;
++
++ struct wined3d_buffer_heap_element *next;
++};
++
++static struct wined3d_buffer_heap_element* element_new(GLintptr offset, GLsizeiptr size)
++{
++ struct wined3d_buffer_heap_element* elem;
++ elem = HeapAlloc(GetProcessHeap(), 0, sizeof(struct wined3d_buffer_heap_element));
++ if (!elem)
++ return NULL;
++ elem->range.offset = offset;
++ elem->range.size = size;
++ elem->next = NULL;
++ elem->prev = NULL;
++ return elem;
++}
++
++static struct wined3d_buffer_heap_fenced_element* fenced_element_new(struct wined3d_map_range range, struct wined3d_fence* fence)
++{
++ struct wined3d_buffer_heap_fenced_element* elem;
++ elem = HeapAlloc(GetProcessHeap(), 0, sizeof(struct wined3d_buffer_heap_fenced_element));
++ if (!elem)
++ return NULL;
++ elem->range = range;
++ elem->fence = fence;
++ elem->next = NULL;
++ return elem;
++}
++
++static void element_merge_adjacent(struct wined3d_buffer_heap *heap, struct wined3d_buffer_heap_element *elem)
++{
++ struct wined3d_buffer_heap_element *cur_prev = elem->prev;
++ struct wined3d_buffer_heap_element *cur_next = elem->next;
++ if (cur_prev && cur_prev->range.offset + cur_prev->range.size == elem->range.offset)
++ {
++ elem->range.offset = cur_prev->range.offset;
++ elem->range.size += cur_prev->range.size;
++
++ elem->prev = cur_prev->prev;
++ if (cur_prev->prev)
++ cur_prev->prev->next = elem;
++
++ if (cur_prev == heap->free_list_head)
++ heap->free_list_head = elem;
++
++ HeapFree(GetProcessHeap(), 0, cur_prev);
++ }
++ if (cur_next && cur_next->range.offset == elem->range.offset + elem->range.size)
++ {
++ elem->range.size += cur_next->range.size;
++ elem->next = cur_next->next;
++ if (cur_next->next)
++ {
++ cur_next->next->prev = elem;
++ }
++ HeapFree(GetProcessHeap(), 0, cur_next);
++ }
++}
++
++/* Context activation is done by the caller. */
++HRESULT wined3d_buffer_heap_create(struct wined3d_context *context, GLsizeiptr size, BOOL write_only, struct wined3d_buffer_heap **buffer_heap)
++{
++ const struct wined3d_gl_info *gl_info = context->gl_info;
++ const GLenum buffer_target = GL_ARRAY_BUFFER;
++ GLbitfield access_flags;
++ GLbitfield storage_flags;
++
++ struct wined3d_buffer_heap *object;
++
++ if (!(object = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(*object))))
++ {
++ return E_OUTOFMEMORY;
++ }
++
++ access_flags = GL_MAP_PERSISTENT_BIT | GL_MAP_COHERENT_BIT | GL_MAP_WRITE_BIT;
++ if (!write_only)
++ {
++ access_flags |= GL_MAP_READ_BIT;
++ }
++ storage_flags = access_flags; // XXX(acomminos): will we need dynamic storage?
++
++ // TODO(acomminos): where should we be checking for errors here?
++
++ // TODO(acomminos): assert from CS thread?
++ GL_EXTCALL(glGenBuffers(1, &object->buffer_object));
++
++ // XXX(acomminos): use glNamedBufferStorage?
++ context_bind_bo(context, buffer_target, object->buffer_object);
++
++ // TODO(acomminos): assert glBufferStorage supported?
++ GL_EXTCALL(glBufferStorage(buffer_target, size, NULL, storage_flags));
++
++ if (!(object->map_ptr = GL_EXTCALL(glMapBufferRange(buffer_target, 0, size, access_flags))))
++ {
++ // TODO(acomminos): include error message
++ ERR("Couldn't map persistent buffer.\n");
++ return -1; // FIXME(acomminos): proper error code, cleanup
++ }
++ context_bind_bo(context, buffer_target, 0);
++
++ object->free_list_head = element_new(0, size);
++ object->fenced_head = object->fenced_tail = NULL;
++ InitializeCriticalSection(&object->temp_lock);
++
++ *buffer_heap = object;
++
++ return WINED3D_OK;
++}
++
++/* Context activation is done by the caller. */
++HRESULT wined3d_buffer_heap_destroy(struct wined3d_buffer_heap *heap, struct wined3d_context *context)
++{
++ // TODO
++ return WINED3D_OK;
++}
++
++HRESULT wined3d_buffer_heap_alloc(struct wined3d_buffer_heap *heap, GLsizeiptr size, struct wined3d_map_range *out_range) {
++ EnterCriticalSection(&heap->temp_lock);
++
++ // TODO(acomminos): free list binning?
++ struct wined3d_buffer_heap_element *elem = heap->free_list_head;
++ // XXX(acomminos): Avoid fragmentation by rounding to nearest power of two.
++ while (elem != NULL)
++ {
++ TRACE("allocation at %p, size %lld\n", heap->map_ptr + elem->range.offset, elem->range.size);
++ // XXX(acomminos): first fit is highly likely to be dumb, needs more analysis.
++ if (elem->range.size >= size)
++ {
++ // Pull the range from the start of the free list element.
++ out_range->offset = elem->range.offset;
++ out_range->size = size; // XXX(acomminos): should we really give the exact size requested?
++
++ elem->range.offset += size;
++ elem->range.size -= size;
++
++ if (elem->range.size == 0)
++ {
++ if (elem->prev)
++ {
++ elem->prev->next = elem->next;
++ }
++ if (elem->next)
++ {
++ elem->next->prev = elem->prev;
++ }
++ if (heap->free_list_head == elem)
++ {
++ heap->free_list_head = elem->next;
++ }
++ HeapFree(GetProcessHeap(), 0, elem);
++ }
++ LeaveCriticalSection(&heap->temp_lock);
++ return WINED3D_OK;
++ }
++ elem = elem->next;
++ }
++
++ LeaveCriticalSection(&heap->temp_lock);
++ return WINED3DERR_OUTOFVIDEOMEMORY; // FIXME(acomminos): probably wrong return code.
++}
++
++HRESULT wined3d_buffer_heap_free(struct wined3d_buffer_heap *heap, struct wined3d_map_range range)
++{
++ EnterCriticalSection(&heap->temp_lock);
++ struct wined3d_buffer_heap_element *new_elem;
++ struct wined3d_buffer_heap_element *elem = heap->free_list_head;
++ struct wined3d_buffer_heap_element *last_elem = NULL;
++
++ // Special case where the head doesn't exist.
++ if (!elem)
++ {
++ new_elem = element_new(range.offset, range.size);
++ heap->free_list_head = new_elem;
++ goto success;
++ }
++
++ while (elem)
++ {
++ struct wined3d_map_range *erange = &elem->range;
++ if (range.offset + range.size == erange->offset)
++ {
++ // Left side merge
++ erange->offset = range.offset;
++ erange->size += range.size;
++ // Check if this causes a merge with elem->prev
++ element_merge_adjacent(heap, elem);
++ goto success;
++ }
++ else if (erange->offset + erange->size == range.offset)
++ {
++ // Right side merge
++ erange->size += range.size;
++ // Check if this causes a merge with elem->prev
++ element_merge_adjacent(heap, elem);
++ goto success;
++ }
++ else if (range.offset < erange->offset)
++ {
++ // Append to left, non-merge case.
++ new_elem = element_new(range.offset, range.size);
++ new_elem->prev = elem->prev;
++ new_elem->next = elem;
++ if (elem->prev)
++ {
++ elem->prev->next = new_elem;
++ }
++ if (heap->free_list_head == elem)
++ {
++ heap->free_list_head = new_elem;
++ }
++ elem->prev = new_elem;
++ goto success;
++ }
++ last_elem = elem;
++ elem = elem->next;
++ }
++
++ // Larger offset than all other elements in the list, append to the end.
++ new_elem = element_new(range.offset, range.size);
++ new_elem->prev = last_elem;
++ last_elem->next = new_elem;
++
++success:
++ LeaveCriticalSection(&heap->temp_lock);
++ return WINED3D_OK;
++}
++
++HRESULT wined3d_buffer_heap_free_fenced(struct wined3d_buffer_heap *heap, struct wined3d_map_range range, struct wined3d_fence *fence)
++{
++ struct wined3d_buffer_heap_fenced_element *elem;
++ elem = fenced_element_new(range, fence);
++ if (!elem)
++ return E_OUTOFMEMORY;
++
++ // Append to end of fenced list, which works well if you assume that buffers
++ // are freed in some ascending draw call ordering.
++ if (!heap->fenced_head)
++ {
++ heap->fenced_head = elem;
++ heap->fenced_tail = elem;
++ }
++ else
++ {
++ heap->fenced_tail->next = elem;
++ heap->fenced_tail = elem;
++ }
++
++ return WINED3D_OK;
++}
++
++HRESULT wined3d_buffer_heap_cs_poll_fences(struct wined3d_buffer_heap *heap, struct wined3d_device *device)
++{
++ enum wined3d_fence_result res;
++ struct wined3d_buffer_heap_fenced_element *elem = heap->fenced_head;
++ if (!elem)
++ return WINED3D_OK;
++
++ while (elem)
++ {
++ res = wined3d_fence_test(elem->fence, device, 0);
++ switch (res)
++ {
++ case WINED3D_FENCE_OK:
++ case WINED3D_FENCE_NOT_STARTED:
++ {
++ struct wined3d_buffer_heap_fenced_element *next = elem->next;
++
++ wined3d_fence_destroy(elem->fence);
++ wined3d_buffer_heap_free(heap, elem->range);
++
++ heap->fenced_head = elem->next;
++ HeapFree(GetProcessHeap(), 0, elem);
++ // TODO(acomminos): bother to null out fenced_tail?
++
++ elem = next;
++ break;
++ }
++ default:
++ return WINED3D_OK;
++ }
++ }
++
++ return WINED3D_OK;
++}
+diff --git a/dlls/wined3d/directx.c b/dlls/wined3d/directx.c
+index 012561090f..9cf8f8efe5 100644
+--- a/dlls/wined3d/directx.c
++++ b/dlls/wined3d/directx.c
+@@ -2711,6 +2711,9 @@ static void load_gl_funcs(struct wined3d_gl_info *gl_info)
+ /* GL_ARB_blend_func_extended */
+ USE_GL_FUNC(glBindFragDataLocationIndexed)
+ USE_GL_FUNC(glGetFragDataIndex)
++ /* GL_ARB_buffer_storage */
++ USE_GL_FUNC(glBufferStorage)
++ USE_GL_FUNC(glNamedBufferStorage)
+ /* GL_ARB_clear_buffer_object */
+ USE_GL_FUNC(glClearBufferData)
+ USE_GL_FUNC(glClearBufferSubData)
+diff --git a/dlls/wined3d/wined3d_gl.h b/dlls/wined3d/wined3d_gl.h
+index 7ac163459b..04957cb5b1 100644
+--- a/dlls/wined3d/wined3d_gl.h
++++ b/dlls/wined3d/wined3d_gl.h
+@@ -44,6 +44,7 @@ enum wined3d_gl_extension
+ /* ARB */
+ ARB_BASE_INSTANCE,
+ ARB_BLEND_FUNC_EXTENDED,
++ ARB_BUFFER_STORAGE,
+ ARB_CLEAR_BUFFER_OBJECT,
+ ARB_CLEAR_TEXTURE,
+ ARB_CLIP_CONTROL,
+diff --git a/dlls/wined3d/wined3d_private.h b/dlls/wined3d/wined3d_private.h
+index 9b16a361e4..4d0555a76c 100644
+--- a/dlls/wined3d/wined3d_private.h
++++ b/dlls/wined3d/wined3d_private.h
+@@ -3465,6 +3465,12 @@ void state_init(struct wined3d_state *state, struct wined3d_fb_state *fb,
+ DWORD flags) DECLSPEC_HIDDEN;
+ void state_unbind_resources(struct wined3d_state *state) DECLSPEC_HIDDEN;
+
++struct wined3d_map_range
++{
++ GLintptr offset;
++ GLsizeiptr size;
++};
++
+ enum wined3d_cs_queue_id
+ {
+ WINED3D_CS_QUEUE_DEFAULT = 0,
+@@ -3646,11 +3652,38 @@ enum wined3d_buffer_conversion_type
+ CONV_POSITIONT,
+ };
+
+-struct wined3d_map_range
++struct wined3d_buffer_heap_element;
++struct wined3d_buffer_heap_fenced_element;
++
++// A heap that manages allocations with a single GL buffer.
++struct wined3d_buffer_heap
+ {
+- UINT offset;
+- UINT size;
+-};
++ GLuint buffer_object;
++ void *map_ptr;
++ CRITICAL_SECTION temp_lock; // Temporary lock while we implement the fenced free list.
++
++ // TODO: add buckets for free regions of a given size.
++ struct wined3d_buffer_heap_element *free_list_head;
++
++ // store in FIFO order? that way, we can minimize our amount of time
++ // waiting on fences?
++ // XXX(acomminos): are fences guaranteed to be triggered in a serial
++ // ordering? if so, we can early-terminate our polling
++ struct wined3d_buffer_heap_fenced_element *fenced_head;
++ struct wined3d_buffer_heap_fenced_element *fenced_tail;
++};
++
++HRESULT wined3d_buffer_heap_create(struct wined3d_context *context, GLsizeiptr size, BOOL write_only, struct wined3d_buffer_heap **heap) DECLSPEC_HIDDEN;
++HRESULT wined3d_buffer_heap_destroy(struct wined3d_buffer_heap *heap, struct wined3d_context *context) DECLSPEC_HIDDEN;
++// Fetches a buffer from the heap of at least the given size.
++HRESULT wined3d_buffer_heap_alloc(struct wined3d_buffer_heap *heap, GLsizeiptr size, struct wined3d_map_range* out_range) DECLSPEC_HIDDEN;
++// Immediately frees a heap-allocated buffer segment.
++HRESULT wined3d_buffer_heap_free(struct wined3d_buffer_heap *heap, struct wined3d_map_range range) DECLSPEC_HIDDEN;
++// Enqueues a buffer segment to return to the heap once its fence has been signaled.
++HRESULT wined3d_buffer_heap_free_fenced(struct wined3d_buffer_heap *heap, struct wined3d_map_range range, struct wined3d_fence *fence) DECLSPEC_HIDDEN;
++// Moves a buffers with a signaled fence from the fenced list to the free list.
++// Must be executed on the CS thread.
++HRESULT wined3d_buffer_heap_cs_poll_fences(struct wined3d_buffer_heap *heap, struct wined3d_device *device) DECLSPEC_HIDDEN;
+
+ struct wined3d_buffer
+ {
+--
+2.16.2
+
diff --git a/0002-wined3d-Allocate-global-write-only-persistent-buffer.patch b/0002-wined3d-Allocate-global-write-only-persistent-buffer.patch
new file mode 100644
index 000000000000..66e2e25d69f8
--- /dev/null
+++ b/0002-wined3d-Allocate-global-write-only-persistent-buffer.patch
@@ -0,0 +1,81 @@
+From 7f141de6d631a6e0c9cd778f6b3259d41a700bb4 Mon Sep 17 00:00:00 2001
+From: Andrew Comminos <andrew@comminos.com>
+Date: Fri, 23 Feb 2018 17:42:21 -0800
+Subject: [PATCH 2/8] wined3d: Allocate global write-only persistent buffer
+ heap at device initialization.
+
+---
+ dlls/wined3d/device.c | 28 ++++++++++++++++++++++++++++
+ dlls/wined3d/wined3d_private.h | 3 +++
+ 2 files changed, 31 insertions(+)
+
+diff --git a/dlls/wined3d/device.c b/dlls/wined3d/device.c
+index 58f4993abe..363dcb17f0 100644
+--- a/dlls/wined3d/device.c
++++ b/dlls/wined3d/device.c
+@@ -845,6 +845,29 @@ static void destroy_default_samplers(struct wined3d_device *device, struct wined
+ device->null_sampler = NULL;
+ }
+
++/* Context activation is done by the caller. */
++static void create_buffer_heap(struct wined3d_device *device, struct wined3d_context *context)
++{
++ // TODO(acomminos): check if ARB_buffer_storage is supported, first-
++ // possibly make wined3d_buffer_heap_create fail.
++ // TODO(acomminos): definitely don't take up all of vram. this is gonna get
++ // paged anyway, though.
++ const GLsizeiptr HBO_SIZE = device->adapter->vram_bytes / 4;
++
++ HRESULT hr;
++ if (FAILED(hr = wined3d_buffer_heap_create(context, HBO_SIZE, TRUE, &device->wo_buffer_heap)))
++ {
++ ERR("Failed to create write-only persistent buffer heap, hr %#x.\n", hr);
++ }
++}
++
++/* Context activation is done by the caller. */
++static void destroy_buffer_heap(struct wined3d_device *device, struct wined3d_context *context)
++{
++ if (device->wo_buffer_heap)
++ wined3d_buffer_heap_destroy(device->wo_buffer_heap, context);
++}
++
+ static LONG fullscreen_style(LONG style)
+ {
+ /* Make sure the window is managed, otherwise we won't get keyboard input. */
+@@ -1013,6 +1036,8 @@ static void wined3d_device_delete_opengl_contexts_cs(void *object)
+ device->shader_backend->shader_free_private(device);
+ destroy_dummy_textures(device, context);
+ destroy_default_samplers(device, context);
++ destroy_buffer_heap(device, context);
++
+ context_release(context);
+
+ while (device->context_count)
+@@ -1060,6 +1085,9 @@ static void wined3d_device_create_primary_opengl_context_cs(void *object)
+ context = context_acquire(device, target, 0);
+ create_dummy_textures(device, context);
+ create_default_samplers(device, context);
++
++ create_buffer_heap(device, context);
++
+ context_release(context);
+ }
+
+diff --git a/dlls/wined3d/wined3d_private.h b/dlls/wined3d/wined3d_private.h
+index 4d0555a76c..96bda81eb9 100644
+--- a/dlls/wined3d/wined3d_private.h
++++ b/dlls/wined3d/wined3d_private.h
+@@ -2966,6 +2966,9 @@ struct wined3d_device
+ /* Context management */
+ struct wined3d_context **contexts;
+ UINT context_count;
++
++ /* Dynamic buffer heap */
++ struct wined3d_buffer_heap *wo_buffer_heap;
+ };
+
+ void device_clear_render_targets(struct wined3d_device *device, UINT rt_count, const struct wined3d_fb_state *fb,
+--
+2.16.2
+
diff --git a/0003-wined3d-Add-support-for-persistently-mapped-wined3d_.patch b/0003-wined3d-Add-support-for-persistently-mapped-wined3d_.patch
new file mode 100644
index 000000000000..5a3a499ab2a0
--- /dev/null
+++ b/0003-wined3d-Add-support-for-persistently-mapped-wined3d_.patch
@@ -0,0 +1,701 @@
+From 3e72163af5712be1a51957effa183edc7a9fb2a6 Mon Sep 17 00:00:00 2001
+From: Andrew Comminos <andrew@comminos.com>
+Date: Fri, 23 Feb 2018 17:41:43 -0800
+Subject: [PATCH 3/8] wined3d: Add support for persistently mapped
+ wined3d_buffer resources.
+
+---
+ dlls/wined3d/buffer.c | 211 ++++++++++++++++++++++++++++++++++++++++-
+ dlls/wined3d/buffer_heap.c | 8 +-
+ dlls/wined3d/cs.c | 62 +++++++++++-
+ dlls/wined3d/drawprim.c | 7 +-
+ dlls/wined3d/query.c | 2 +-
+ dlls/wined3d/resource.c | 20 +++-
+ dlls/wined3d/state.c | 6 +-
+ dlls/wined3d/texture.c | 13 +++
+ dlls/wined3d/utils.c | 1 +
+ dlls/wined3d/wined3d_private.h | 13 +++
+ 10 files changed, 326 insertions(+), 17 deletions(-)
+
+diff --git a/dlls/wined3d/buffer.c b/dlls/wined3d/buffer.c
+index d61321e1a5..ccb090c907 100644
+--- a/dlls/wined3d/buffer.c
++++ b/dlls/wined3d/buffer.c
+@@ -28,12 +28,14 @@
+ #include "wined3d_private.h"
+
+ WINE_DEFAULT_DEBUG_CHANNEL(d3d);
++WINE_DECLARE_DEBUG_CHANNEL(d3d_perf);
+
+ #define WINED3D_BUFFER_HASDESC 0x01 /* A vertex description has been found. */
+ #define WINED3D_BUFFER_USE_BO 0x02 /* Use a buffer object for this buffer. */
+ #define WINED3D_BUFFER_PIN_SYSMEM 0x04 /* Keep a system memory copy for this buffer. */
+ #define WINED3D_BUFFER_DISCARD 0x08 /* A DISCARD lock has occurred since the last preload. */
+ #define WINED3D_BUFFER_APPLESYNC 0x10 /* Using sync as in GL_APPLE_flush_buffer_range. */
++#define WINED3D_BUFFER_PERSISTENT 0x20 /* Uses a persistent-mapped buffer via ARB_buffer_storage. */
+
+ #define VB_MAXDECLCHANGES 100 /* After that number of decl changes we stop converting */
+ #define VB_RESETDECLCHANGE 1000 /* Reset the decl changecount after that number of draws */
+@@ -269,6 +271,50 @@ fail:
+ return FALSE;
+ }
+
++/* Context activation is done by the caller. */
++static BOOL buffer_alloc_persistent_map(struct wined3d_buffer *buffer, struct wined3d_context *context)
++{
++ struct wined3d_device *device = buffer->resource.device;
++ struct wined3d_buffer_heap *heap;
++ struct wined3d_map_range map_range;
++ HRESULT hr;
++
++ if (buffer->resource.usage & WINED3DUSAGE_WRITEONLY)
++ {
++ heap = device->wo_buffer_heap;
++ }
++ else
++ {
++ FIXME("Using write-only heap for a persistent buffer without WINED3DUSAGE_WRITEONLY.\n");
++ heap = device->rw_buffer_heap;
++ }
++
++ buffer->buffer_heap = heap;
++ if (FAILED(hr = wined3d_buffer_heap_alloc(heap, buffer->resource.size, &map_range)))
++ {
++ goto fail;
++ }
++ buffer->cs_persistent_map = map_range;
++ buffer->mt_persistent_map = map_range;
++ return TRUE;
++
++fail:
++ // FIXME(acomminos): fall back to standalone BO here?
++ ERR("Failed to create persistent map for buffer %p, hr=%x\n", buffer, hr);
++ buffer->buffer_heap = NULL;
++ return FALSE;
++}
++
++static void buffer_free_persistent_map(struct wined3d_buffer *buffer)
++{
++ if (!buffer->buffer_heap)
++ return;
++
++ // TODO(acomminos): get the CS thread to free pending main thread buffers.
++ wined3d_buffer_heap_free(buffer->buffer_heap, buffer->cs_persistent_map);
++ buffer->buffer_heap = NULL;
++}
++
+ static BOOL buffer_process_converted_attribute(struct wined3d_buffer *buffer,
+ const enum wined3d_buffer_conversion_type conversion_type,
+ const struct wined3d_stream_info_element *attrib, DWORD *stride_this_run)
+@@ -630,6 +676,16 @@ static BOOL wined3d_buffer_prepare_location(struct wined3d_buffer *buffer,
+ return FALSE;
+ }
+ return buffer_create_buffer_object(buffer, context);
++ case WINED3D_LOCATION_PERSISTENT_MAP:
++ if (buffer->buffer_heap)
++ return TRUE;
++
++ if (!(buffer->flags & WINED3D_BUFFER_PERSISTENT))
++ {
++ WARN("Trying to map a persistent region for buffer %p without WINED3D_BUFFER_PERSISTENT.\n", buffer);
++ return FALSE;
++ }
++ return buffer_alloc_persistent_map(buffer, context);
+
+ default:
+ ERR("Invalid location %s.\n", wined3d_debug_location(location));
+@@ -688,16 +744,32 @@ BOOL wined3d_buffer_load_location(struct wined3d_buffer *buffer,
+ buffer_conversion_upload(buffer, context);
+ break;
+
++ case WINED3D_LOCATION_PERSISTENT_MAP:
++ // TODO(acomminos): are we guaranteed location_sysmem to be kept?
++ // no.
++ if (buffer->conversion_map)
++ FIXME("Attempting to use conversion map with persistent mapping.\n");
++ memcpy(buffer->buffer_heap->map_ptr +
++ buffer->cs_persistent_map.offset,
++ buffer->resource.heap_memory, buffer->resource.size);
++ break;
++
+ default:
+ ERR("Invalid location %s.\n", wined3d_debug_location(location));
+ return FALSE;
+ }
+
+ wined3d_buffer_validate_location(buffer, location);
+- if (buffer->resource.heap_memory && location == WINED3D_LOCATION_BUFFER
++ if (buffer->resource.heap_memory
++ && location & WINED3D_LOCATION_BUFFER
+ && !(buffer->resource.usage & WINED3DUSAGE_DYNAMIC))
+ wined3d_buffer_evict_sysmem(buffer);
+
++ // FIXME(acomminos)
++ if (buffer->resource.heap_memory
++ && location & WINED3D_LOCATION_PERSISTENT_MAP)
++ wined3d_buffer_evict_sysmem(buffer);
++
+ return TRUE;
+ }
+
+@@ -721,6 +793,13 @@ DWORD wined3d_buffer_get_memory(struct wined3d_buffer *buffer,
+ data->addr = NULL;
+ return WINED3D_LOCATION_BUFFER;
+ }
++ if (locations & WINED3D_LOCATION_PERSISTENT_MAP)
++ {
++ // FIXME(acomminos): should we expose a buffer object we don't wholly own here?
++ data->buffer_object = buffer->buffer_heap->buffer_object;
++ data->addr = buffer->cs_persistent_map.offset;
++ return WINED3D_LOCATION_PERSISTENT_MAP;
++ }
+ if (locations & WINED3D_LOCATION_SYSMEM)
+ {
+ data->buffer_object = 0;
+@@ -760,6 +839,8 @@ static void buffer_unload(struct wined3d_resource *resource)
+ buffer->flags &= ~WINED3D_BUFFER_HASDESC;
+ }
+
++ buffer_free_persistent_map(buffer);
++
+ resource_unload(resource);
+ }
+
+@@ -783,6 +864,8 @@ static void wined3d_buffer_destroy_object(void *object)
+ HeapFree(GetProcessHeap(), 0, buffer->conversion_map);
+ }
+
++ buffer_free_persistent_map(buffer);
++
+ HeapFree(GetProcessHeap(), 0, buffer->maps);
+ HeapFree(GetProcessHeap(), 0, buffer);
+ }
+@@ -899,6 +982,13 @@ void wined3d_buffer_load(struct wined3d_buffer *buffer, struct wined3d_context *
+
+ buffer_mark_used(buffer);
+
++ if (buffer->flags & WINED3D_BUFFER_PERSISTENT)
++ {
++ if (!wined3d_buffer_load_location(buffer, context, WINED3D_LOCATION_PERSISTENT_MAP))
++ ERR("Failed to preload persistent mapping.\n");
++ return;
++ }
++
+ /* TODO: Make converting independent from VBOs */
+ if (!(buffer->flags & WINED3D_BUFFER_USE_BO))
+ {
+@@ -1009,6 +1099,25 @@ static HRESULT wined3d_buffer_map(struct wined3d_buffer *buffer, UINT offset, UI
+
+ count = ++buffer->resource.map_count;
+
++ if (buffer->locations & WINED3D_LOCATION_PERSISTENT_MAP)
++ {
++ const struct wined3d_gl_info *gl_info;
++ context = context_acquire(device, NULL, 0);
++
++ FIXME_(d3d_perf)("Fences not used for persistent buffer maps on CS thread, using glFinish.\n");
++
++ gl_info = context->gl_info;
++ gl_info->gl_ops.gl.p_glFinish();
++
++ base = buffer->buffer_heap->map_ptr
++ + buffer->cs_persistent_map.offset;
++ *data = base + offset;
++
++ context_release(context);
++
++ return WINED3D_OK;
++ }
++
+ if (buffer->buffer_object)
+ {
+ unsigned int dirty_offset = offset, dirty_size = size;
+@@ -1151,6 +1260,12 @@ static void wined3d_buffer_unmap(struct wined3d_buffer *buffer)
+ return;
+ }
+
++ if (buffer->flags & WINED3D_BUFFER_PERSISTENT)
++ {
++ TRACE("Persistent buffer, ignore unmap.\n");
++ return;
++ }
++
+ if (buffer->map_ptr)
+ {
+ struct wined3d_device *device = buffer->resource.device;
+@@ -1273,6 +1388,64 @@ static void buffer_resource_preload(struct wined3d_resource *resource)
+
+ static HRESULT buffer_resource_sub_resource_map(struct wined3d_resource *resource, unsigned int sub_resource_idx,
+ struct wined3d_map_desc *map_desc, const struct wined3d_box *box, DWORD flags)
++{
++ struct wined3d_buffer *buffer = buffer_from_resource(resource);
++ UINT offset = box ? box->left : 0;
++
++ if (sub_resource_idx)
++ {
++ WARN("Invalid sub_resource_idx %u.\n", sub_resource_idx);
++ return E_INVALIDARG;
++ }
++
++ // Support immediate mapping of persistent buffers off the command thread,
++ // which require no GL calls to interface with.
++ if (buffer->locations & WINED3D_LOCATION_PERSISTENT_MAP)
++ {
++ map_desc->row_pitch = map_desc->slice_pitch = buffer->desc.byte_width;
++ if (flags & WINED3D_MAP_DISCARD)
++ {
++ HRESULT hr;
++ struct wined3d_map_range map_range;
++ if (FAILED(hr = wined3d_buffer_heap_alloc(buffer->buffer_heap, resource->size, &map_range)))
++ {
++ FIXME("Failed to allocate new buffer, falling back to sync path.\n");
++ return hr;
++ }
++ map_desc->data = buffer->buffer_heap->map_ptr + map_range.offset + offset;
++ resource->map_count++;
++
++ buffer->mt_persistent_map = map_range;
++
++ // Discard handler on CSMT thread is responsible for returning the
++ // currently used buffer to the free pool, along with the fence that
++ // must be called before the buffer can be reused.
++ wined3d_cs_emit_discard_buffer(resource->device->cs, buffer, map_range);
++ return WINED3D_OK;
++ }
++ else if (flags & WINED3D_MAP_NOOVERWRITE)
++ {
++ // Allow immediate access for persistent buffers without a fence.
++ // Always use the latest buffer in this case in case the latest
++ // DISCARDed one hasn't reached the command stream yet.
++ struct wined3d_map_range map_range = buffer->mt_persistent_map;
++ map_desc->data = buffer->buffer_heap->map_ptr + map_range.offset + offset;
++ resource->map_count++;
++ return WINED3D_OK;
++ }
++ else
++ {
++ // TODO(acomminos): Should check mapped ranges to see if the region is writeable even though NOOVERWRITE is specified.
++ WARN_(d3d_perf)("Mapping persistent buffer %p in sync with CS thread.\n", buffer);
++ // XXX(acomminos): kill this early return. they're the worst.
++ }
++ }
++
++ return E_NOTIMPL;
++}
++
++static HRESULT buffer_resource_sub_resource_map_cs(struct wined3d_resource *resource, unsigned int sub_resource_idx,
++ struct wined3d_map_desc *map_desc, const struct wined3d_box *box, DWORD flags)
+ {
+ struct wined3d_buffer *buffer = buffer_from_resource(resource);
+ UINT offset, size;
+@@ -1316,6 +1489,18 @@ static HRESULT buffer_resource_sub_resource_map_info(struct wined3d_resource *re
+ }
+
+ static HRESULT buffer_resource_sub_resource_unmap(struct wined3d_resource *resource, unsigned int sub_resource_idx)
++{
++ struct wined3d_buffer *buffer = buffer_from_resource(resource);
++ if (buffer->locations & WINED3D_LOCATION_PERSISTENT_MAP)
++ {
++ // Nothing to be done to unmap a region of a persistent buffer.
++ resource->map_count--;
++ return WINED3D_OK;
++ }
++ return E_NOTIMPL;
++}
++
++static HRESULT buffer_resource_sub_resource_unmap_cs(struct wined3d_resource *resource, unsigned int sub_resource_idx)
+ {
+ if (sub_resource_idx)
+ {
+@@ -1334,8 +1519,10 @@ static const struct wined3d_resource_ops buffer_resource_ops =
+ buffer_resource_preload,
+ buffer_unload,
+ buffer_resource_sub_resource_map,
++ buffer_resource_sub_resource_map_cs,
+ buffer_resource_sub_resource_map_info,
+ buffer_resource_sub_resource_unmap,
++ buffer_resource_sub_resource_unmap_cs,
+ };
+
+ static GLenum buffer_type_hint_from_bind_flags(const struct wined3d_gl_info *gl_info,
+@@ -1411,12 +1598,32 @@ static HRESULT buffer_init(struct wined3d_buffer *buffer, struct wined3d_device
+ buffer->flags |= WINED3D_BUFFER_PIN_SYSMEM;
+ }
+
++ // FIXME(acomminos)
++ if (buffer->resource.usage & WINED3DUSAGE_DYNAMIC)
++ {
++ // FIXME(acomminos): why is this returning false?
++ if (FALSE && !gl_info->supported[ARB_BUFFER_STORAGE])
++ {
++ WARN_(d3d_perf)("Not creating a persistent mapping for a dynamic buffer because ARB_buffer_storage is unsupported.\n");
++ }
++ else
++ {
++ // If supported, use persistent mapped buffers instead of a
++ // standalone BO for dynamic buffers.
++ buffer->flags |= WINED3D_BUFFER_PERSISTENT;
++ }
++ }
++
+ /* Observations show that draw_primitive_immediate_mode() is faster on
+ * dynamic vertex buffers than converting + draw_primitive_arrays().
+ * (Half-Life 2 and others.) */
+ dynamic_buffer_ok = gl_info->supported[APPLE_FLUSH_BUFFER_RANGE] || gl_info->supported[ARB_MAP_BUFFER_RANGE];
+
+- if (!gl_info->supported[ARB_VERTEX_BUFFER_OBJECT])
++ if (buffer->flags & WINED3D_BUFFER_PERSISTENT)
++ {
++ TRACE("Not creating a BO because a persistent mapped buffer will be used.\n");
++ }
++ else if (!gl_info->supported[ARB_VERTEX_BUFFER_OBJECT])
+ {
+ TRACE("Not creating a BO because GL_ARB_vertex_buffer is not supported.\n");
+ }
+diff --git a/dlls/wined3d/buffer_heap.c b/dlls/wined3d/buffer_heap.c
+index 900e2d24bb..f24fddffb4 100644
+--- a/dlls/wined3d/buffer_heap.c
++++ b/dlls/wined3d/buffer_heap.c
+@@ -114,14 +114,11 @@ HRESULT wined3d_buffer_heap_create(struct wined3d_context *context, GLsizeiptr s
+ {
+ access_flags |= GL_MAP_READ_BIT;
+ }
+- storage_flags = access_flags; // XXX(acomminos): will we need dynamic storage?
++ storage_flags = access_flags;
+
+ // TODO(acomminos): where should we be checking for errors here?
+-
+- // TODO(acomminos): assert from CS thread?
+ GL_EXTCALL(glGenBuffers(1, &object->buffer_object));
+
+- // XXX(acomminos): use glNamedBufferStorage?
+ context_bind_bo(context, buffer_target, object->buffer_object);
+
+ // TODO(acomminos): assert glBufferStorage supported?
+@@ -129,7 +126,6 @@ HRESULT wined3d_buffer_heap_create(struct wined3d_context *context, GLsizeiptr s
+
+ if (!(object->map_ptr = GL_EXTCALL(glMapBufferRange(buffer_target, 0, size, access_flags))))
+ {
+- // TODO(acomminos): include error message
+ ERR("Couldn't map persistent buffer.\n");
+ return -1; // FIXME(acomminos): proper error code, cleanup
+ }
+@@ -147,7 +143,7 @@ HRESULT wined3d_buffer_heap_create(struct wined3d_context *context, GLsizeiptr s
+ /* Context activation is done by the caller. */
+ HRESULT wined3d_buffer_heap_destroy(struct wined3d_buffer_heap *heap, struct wined3d_context *context)
+ {
+- // TODO
++ FIXME("Unimplemented, leaking buffer");
+ return WINED3D_OK;
+ }
+
+diff --git a/dlls/wined3d/cs.c b/dlls/wined3d/cs.c
+index 7e72b30933..edcf521b72 100644
+--- a/dlls/wined3d/cs.c
++++ b/dlls/wined3d/cs.c
+@@ -73,6 +73,7 @@ enum wined3d_cs_op
+ WINED3D_CS_OP_COPY_UAV_COUNTER,
+ WINED3D_CS_OP_COPY_SUB_RESOURCE,
+ WINED3D_CS_OP_GENERATE_MIPS,
++ WINED3D_CS_OP_DISCARD_BUFFER,
+ WINED3D_CS_OP_STOP,
+ };
+
+@@ -444,6 +445,13 @@ struct wined3d_cs_generate_mips
+ struct wined3d_shader_resource_view *view;
+ };
+
++struct wined3d_cs_discard_buffer
++{
++ enum wined3d_cs_op opcode;
++ struct wined3d_buffer *buffer;
++ struct wined3d_map_range map_range;
++};
++
+ struct wined3d_cs_stop
+ {
+ enum wined3d_cs_op opcode;
+@@ -1986,7 +1994,7 @@ static void wined3d_cs_exec_map(struct wined3d_cs *cs, const void *data)
+ const struct wined3d_cs_map *op = data;
+ struct wined3d_resource *resource = op->resource;
+
+- *op->hr = resource->resource_ops->resource_sub_resource_map(resource,
++ *op->hr = resource->resource_ops->resource_sub_resource_map_cs(resource,
+ op->sub_resource_idx, op->map_desc, op->box, op->flags);
+ }
+
+@@ -2020,7 +2028,7 @@ static void wined3d_cs_exec_unmap(struct wined3d_cs *cs, const void *data)
+ const struct wined3d_cs_unmap *op = data;
+ struct wined3d_resource *resource = op->resource;
+
+- *op->hr = resource->resource_ops->resource_sub_resource_unmap(resource, op->sub_resource_idx);
++ *op->hr = resource->resource_ops->resource_sub_resource_unmap_cs(resource, op->sub_resource_idx);
+ }
+
+ HRESULT wined3d_cs_unmap(struct wined3d_cs *cs, struct wined3d_resource *resource, unsigned int sub_resource_idx)
+@@ -2630,6 +2638,55 @@ void wined3d_cs_emit_generate_mips(struct wined3d_cs *cs, struct wined3d_shader_
+ cs->ops->submit(cs, WINED3D_CS_QUEUE_DEFAULT);
+ }
+
++static void wined3d_cs_exec_discard_buffer(struct wined3d_cs *cs, const void *data)
++{
++ const struct wined3d_cs_discard_buffer *op = data;
++ struct wined3d_buffer *buffer = op->buffer;
++ HRESULT hr;
++ struct wined3d_fence *fence;
++
++ // Poll for discarded buffers whose fenced have been triggered here to avoid
++ // excessive VRAM consumption.
++ wined3d_buffer_heap_cs_poll_fences(buffer->buffer_heap, cs->device);
++
++ // TODO(acomminos): should call into buffer.c here instead.
++ // XXX(acomminos): should we always create a new fence here?
++ if (!FAILED(hr = wined3d_fence_create(cs->device, &fence)))
++ {
++ // TODO(acomminos): make more informed fences based on prior info. for now,
++ // we do this because allocating and deleting fences repeatedly is brutal
++ // for performance. look into why.
++ wined3d_fence_issue(fence, cs->device);
++
++ wined3d_buffer_heap_free_fenced(buffer->buffer_heap, buffer->cs_persistent_map, fence);
++ }
++ else
++ {
++ ERR("Failed to create fence for discarded buffer %p, hr %x\n. Freeing anyway.", buffer, hr);
++ wined3d_buffer_heap_free(buffer->buffer_heap, buffer->cs_persistent_map);
++ }
++
++ buffer->cs_persistent_map = op->map_range;
++
++ device_invalidate_state(cs->device, STATE_STREAMSRC);
++
++ wined3d_resource_release(&op->buffer->resource);
++}
++
++void wined3d_cs_emit_discard_buffer(struct wined3d_cs *cs, struct wined3d_buffer *buffer, struct wined3d_map_range map_range)
++{
++ struct wined3d_cs_discard_buffer *op;
++
++ op = cs->ops->require_space(cs, sizeof(*op), WINED3D_CS_QUEUE_DEFAULT);
++ op->opcode = WINED3D_CS_OP_DISCARD_BUFFER;
++ op->buffer = buffer;
++ op->map_range = map_range;
++
++ wined3d_resource_acquire(&buffer->resource);
++
++ cs->ops->submit(cs, WINED3D_CS_QUEUE_DEFAULT);
++}
++
+ static void wined3d_cs_emit_stop(struct wined3d_cs *cs)
+ {
+ struct wined3d_cs_stop *op;
+@@ -2690,6 +2747,7 @@ static void (* const wined3d_cs_op_handlers[])(struct wined3d_cs *cs, const void
+ /* WINED3D_CS_OP_COPY_UAV_COUNTER */ wined3d_cs_exec_copy_uav_counter,
+ /* WINED3D_CS_OP_COPY_SUB_RESOURCE */ wined3d_cs_exec_copy_sub_resource,
+ /* WINED3D_CS_OP_GENERATE_MIPS */ wined3d_cs_exec_generate_mips,
++ /* WINED3D_CS_OP_DISCARD_BUFFER */ wined3d_cs_exec_discard_buffer,
+ };
+
+ #if defined(STAGING_CSMT)
+diff --git a/dlls/wined3d/drawprim.c b/dlls/wined3d/drawprim.c
+index 404623c9ac..7b622c9b14 100644
+--- a/dlls/wined3d/drawprim.c
++++ b/dlls/wined3d/drawprim.c
+@@ -688,7 +688,12 @@ void draw_primitive(struct wined3d_device *device, const struct wined3d_state *s
+ if (parameters->indexed)
+ {
+ struct wined3d_buffer *index_buffer = state->index_buffer;
+- if (!index_buffer->buffer_object || !stream_info->all_vbo)
++ if (index_buffer->locations & WINED3D_LOCATION_PERSISTENT_MAP)
++ {
++ idx_data = index_buffer->cs_persistent_map.offset;
++ ib_fence = index_buffer->fence; // FIXME(acomminos): use this fence or not?
++ }
++ else if (!index_buffer->buffer_object || !stream_info->all_vbo)
+ {
+ idx_data = index_buffer->resource.heap_memory;
+ }
+diff --git a/dlls/wined3d/query.c b/dlls/wined3d/query.c
+index f394af87c7..cf665bfd11 100644
+--- a/dlls/wined3d/query.c
++++ b/dlls/wined3d/query.c
+@@ -88,7 +88,7 @@ static BOOL wined3d_fence_supported(const struct wined3d_gl_info *gl_info)
+ return gl_info->supported[ARB_SYNC] || gl_info->supported[NV_FENCE] || gl_info->supported[APPLE_FENCE];
+ }
+
+-static enum wined3d_fence_result wined3d_fence_test(const struct wined3d_fence *fence,
++enum wined3d_fence_result wined3d_fence_test(const struct wined3d_fence *fence,
+ const struct wined3d_device *device, DWORD flags)
+ {
+ const struct wined3d_gl_info *gl_info;
+diff --git a/dlls/wined3d/resource.c b/dlls/wined3d/resource.c
+index 78deb5078b..9b3a303b08 100644
+--- a/dlls/wined3d/resource.c
++++ b/dlls/wined3d/resource.c
+@@ -358,13 +358,18 @@ static DWORD wined3d_resource_sanitise_map_flags(const struct wined3d_resource *
+ HRESULT CDECL wined3d_resource_map(struct wined3d_resource *resource, unsigned int sub_resource_idx,
+ struct wined3d_map_desc *map_desc, const struct wined3d_box *box, DWORD flags)
+ {
++ HRESULT hr;
+ TRACE("resource %p, sub_resource_idx %u, map_desc %p, box %s, flags %#x.\n",
+ resource, sub_resource_idx, map_desc, debug_box(box), flags);
+
+ flags = wined3d_resource_sanitise_map_flags(resource, flags);
+- wined3d_resource_wait_idle(resource);
+-
+- return wined3d_cs_map(resource->device->cs, resource, sub_resource_idx, map_desc, box, flags);
++ if (FAILED(hr = resource->resource_ops->resource_sub_resource_map(resource, sub_resource_idx, map_desc, box, flags)))
++ {
++ TRACE_(d3d_perf)("Mapping resource %p on the command stream.\n", resource);
++ wined3d_resource_wait_idle(resource);
++ hr = wined3d_cs_map(resource->device->cs, resource, sub_resource_idx, map_desc, box, flags);
++ }
++ return hr;
+ }
+
+ HRESULT CDECL wined3d_resource_map_info(struct wined3d_resource *resource, unsigned int sub_resource_idx,
+@@ -377,9 +382,16 @@ HRESULT CDECL wined3d_resource_map_info(struct wined3d_resource *resource, unsig
+
+ HRESULT CDECL wined3d_resource_unmap(struct wined3d_resource *resource, unsigned int sub_resource_idx)
+ {
++ HRESULT hr;
+ TRACE("resource %p, sub_resource_idx %u.\n", resource, sub_resource_idx);
+
+- return wined3d_cs_unmap(resource->device->cs, resource, sub_resource_idx);
++ if (FAILED(hr = resource->resource_ops->resource_sub_resource_unmap(resource, sub_resource_idx)))
++ {
++ TRACE_(d3d_perf)("Unmapping resource %p on the command stream.\n", resource);
++ hr = wined3d_cs_unmap(resource->device->cs, resource, sub_resource_idx);
++ }
++ return hr;
++
+ }
+
+ UINT CDECL wined3d_resource_update_info(struct wined3d_resource *resource, unsigned int sub_resource_idx,
+diff --git a/dlls/wined3d/state.c b/dlls/wined3d/state.c
+index f5b9eca520..142a932d07 100644
+--- a/dlls/wined3d/state.c
++++ b/dlls/wined3d/state.c
+@@ -4910,7 +4910,11 @@ static void indexbuffer(struct wined3d_context *context, const struct wined3d_st
+ else
+ {
+ struct wined3d_buffer *ib = state->index_buffer;
+- GL_EXTCALL(glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, ib->buffer_object));
++ // FIXME(acomminos): disasterous.
++ if (ib->locations & WINED3D_LOCATION_PERSISTENT_MAP)
++ GL_EXTCALL(glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, ib->buffer_heap->buffer_object));
++ else
++ GL_EXTCALL(glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, ib->buffer_object));
+ }
+ }
+
+diff --git a/dlls/wined3d/texture.c b/dlls/wined3d/texture.c
+index 69565f355d..51c37762cd 100644
+--- a/dlls/wined3d/texture.c
++++ b/dlls/wined3d/texture.c
+@@ -2297,6 +2297,12 @@ static void wined3d_texture_unload(struct wined3d_resource *resource)
+
+ static HRESULT texture_resource_sub_resource_map(struct wined3d_resource *resource, unsigned int sub_resource_idx,
+ struct wined3d_map_desc *map_desc, const struct wined3d_box *box, DWORD flags)
++{
++ return E_NOTIMPL;
++}
++
++static HRESULT texture_resource_sub_resource_map_cs(struct wined3d_resource *resource, unsigned int sub_resource_idx,
++ struct wined3d_map_desc *map_desc, const struct wined3d_box *box, DWORD flags)
+ {
+ const struct wined3d_format *format = resource->format;
+ struct wined3d_texture_sub_resource *sub_resource;
+@@ -2464,6 +2470,11 @@ static HRESULT texture_resource_sub_resource_map_info(struct wined3d_resource *r
+ }
+
+ static HRESULT texture_resource_sub_resource_unmap(struct wined3d_resource *resource, unsigned int sub_resource_idx)
++{
++ return E_NOTIMPL;
++}
++
++static HRESULT texture_resource_sub_resource_unmap_cs(struct wined3d_resource *resource, unsigned int sub_resource_idx)
+ {
+ struct wined3d_texture_sub_resource *sub_resource;
+ struct wined3d_device *device = resource->device;
+@@ -2514,8 +2525,10 @@ static const struct wined3d_resource_ops texture_resource_ops =
+ texture_resource_preload,
+ wined3d_texture_unload,
+ texture_resource_sub_resource_map,
++ texture_resource_sub_resource_map_cs,
+ texture_resource_sub_resource_map_info,
+ texture_resource_sub_resource_unmap,
++ texture_resource_sub_resource_unmap_cs,
+ };
+
+ static HRESULT texture1d_init(struct wined3d_texture *texture, const struct wined3d_resource_desc *desc,
+diff --git a/dlls/wined3d/utils.c b/dlls/wined3d/utils.c
+index ee519d2b32..25626749fa 100644
+--- a/dlls/wined3d/utils.c
++++ b/dlls/wined3d/utils.c
+@@ -6264,6 +6264,7 @@ const char *wined3d_debug_location(DWORD location)
+ LOCATION_TO_STR(WINED3D_LOCATION_DRAWABLE);
+ LOCATION_TO_STR(WINED3D_LOCATION_RB_MULTISAMPLE);
+ LOCATION_TO_STR(WINED3D_LOCATION_RB_RESOLVED);
++ LOCATION_TO_STR(WINED3D_LOCATION_PERSISTENT_MAP);
+ #undef LOCATION_TO_STR
+ if (location) FIXME("Unrecognized location flag(s) %#x.\n", location);
+
+diff --git a/dlls/wined3d/wined3d_private.h b/dlls/wined3d/wined3d_private.h
+index 96bda81eb9..d049d57206 100644
+--- a/dlls/wined3d/wined3d_private.h
++++ b/dlls/wined3d/wined3d_private.h
+@@ -1701,6 +1701,9 @@ void wined3d_fence_destroy(struct wined3d_fence *fence) DECLSPEC_HIDDEN;
+ void wined3d_fence_issue(struct wined3d_fence *fence, const struct wined3d_device *device) DECLSPEC_HIDDEN;
+ enum wined3d_fence_result wined3d_fence_wait(const struct wined3d_fence *fence,
+ const struct wined3d_device *device) DECLSPEC_HIDDEN;
++// XXX(acomminos): really expose this?
++enum wined3d_fence_result wined3d_fence_test(const struct wined3d_fence *fence,
++ const struct wined3d_device *device, DWORD flags) DECLSPEC_HIDDEN;
+
+ /* Direct3D terminology with little modifications. We do not have an issued
+ * state because only the driver knows about it, but we have a created state
+@@ -3009,9 +3012,12 @@ struct wined3d_resource_ops
+ void (*resource_unload)(struct wined3d_resource *resource);
+ HRESULT (*resource_sub_resource_map)(struct wined3d_resource *resource, unsigned int sub_resource_idx,
+ struct wined3d_map_desc *map_desc, const struct wined3d_box *box, DWORD flags);
++ HRESULT (*resource_sub_resource_map_cs)(struct wined3d_resource *resource, unsigned int sub_resource_idx,
++ struct wined3d_map_desc *map_desc, const struct wined3d_box *box, DWORD flags);
+ HRESULT (*resource_map_info)(struct wined3d_resource *resource, unsigned int sub_resource_idx,
+ struct wined3d_map_info *info, DWORD flags);
+ HRESULT (*resource_sub_resource_unmap)(struct wined3d_resource *resource, unsigned int sub_resource_idx);
++ HRESULT (*resource_sub_resource_unmap_cs)(struct wined3d_resource *resource, unsigned int sub_resource_idx);
+ };
+
+ struct wined3d_resource
+@@ -3266,6 +3272,7 @@ void wined3d_texture_validate_location(struct wined3d_texture *texture,
+ #define WINED3D_LOCATION_DRAWABLE 0x00000040
+ #define WINED3D_LOCATION_RB_MULTISAMPLE 0x00000080
+ #define WINED3D_LOCATION_RB_RESOLVED 0x00000100
++#define WINED3D_LOCATION_PERSISTENT_MAP 0x00000200
+
+ const char *wined3d_debug_location(DWORD location) DECLSPEC_HIDDEN;
+
+@@ -3622,6 +3629,7 @@ void wined3d_cs_emit_copy_sub_resource(struct wined3d_cs *cs, struct wined3d_res
+ unsigned int dst_sub_resource_idx, const struct wined3d_box *dst_box, struct wined3d_resource *src_resource,
+ unsigned int src_sub_resource_idx, const struct wined3d_box *src_box) DECLSPEC_HIDDEN;
+ void wined3d_cs_emit_generate_mips(struct wined3d_cs *cs, struct wined3d_shader_resource_view *view) DECLSPEC_HIDDEN;
++void wined3d_cs_emit_discard_buffer(struct wined3d_cs *cs, struct wined3d_buffer *buffer, struct wined3d_map_range map_range) DECLSPEC_HIDDEN;
+ void wined3d_cs_init_object(struct wined3d_cs *cs,
+ void (*callback)(void *object), void *object) DECLSPEC_HIDDEN;
+ HRESULT wined3d_cs_map(struct wined3d_cs *cs, struct wined3d_resource *resource, unsigned int sub_resource_idx,
+@@ -3712,6 +3720,11 @@ struct wined3d_buffer
+ UINT stride; /* 0 if no conversion */
+ enum wined3d_buffer_conversion_type *conversion_map; /* NULL if no conversion */
+ UINT conversion_stride; /* 0 if no shifted conversion */
++
++ /* persistent mapped buffer */
++ struct wined3d_buffer_heap *buffer_heap;
++ struct wined3d_map_range cs_persistent_map;
++ struct wined3d_map_range mt_persistent_map; // TODO: make struct list?
+ };
+
+ static inline struct wined3d_buffer *buffer_from_resource(struct wined3d_resource *resource)
+--
+2.16.2
+
diff --git a/0004-wined3d-Implement-aligned-persistent-heaps-for-persi.patch b/0004-wined3d-Implement-aligned-persistent-heaps-for-persi.patch
new file mode 100644
index 000000000000..7b4a9e433488
--- /dev/null
+++ b/0004-wined3d-Implement-aligned-persistent-heaps-for-persi.patch
@@ -0,0 +1,240 @@
+From 65595c191d2a01b2486ba10618f743c930af362b Mon Sep 17 00:00:00 2001
+From: Andrew Comminos <andrew@comminos.com>
+Date: Sat, 24 Feb 2018 14:38:59 -0800
+Subject: [PATCH 4/8] wined3d: Implement aligned persistent heaps for
+ persistently mapped UBOs.
+
+---
+ dlls/wined3d/buffer.c | 16 ++++++++++++----
+ dlls/wined3d/buffer_heap.c | 12 +++++++++---
+ dlls/wined3d/cs.c | 15 ++++++++++++++-
+ dlls/wined3d/device.c | 15 ++++++++++++++-
+ dlls/wined3d/state.c | 11 ++++++++++-
+ dlls/wined3d/wined3d_private.h | 5 ++++-
+ 6 files changed, 63 insertions(+), 11 deletions(-)
+
+diff --git a/dlls/wined3d/buffer.c b/dlls/wined3d/buffer.c
+index ccb090c907..7d7e506817 100644
+--- a/dlls/wined3d/buffer.c
++++ b/dlls/wined3d/buffer.c
+@@ -279,14 +279,16 @@ static BOOL buffer_alloc_persistent_map(struct wined3d_buffer *buffer, struct wi
+ struct wined3d_map_range map_range;
+ HRESULT hr;
+
+- if (buffer->resource.usage & WINED3DUSAGE_WRITEONLY)
++ if (buffer->bind_flags & WINED3D_BIND_CONSTANT_BUFFER)
+ {
+- heap = device->wo_buffer_heap;
++ // Use a heap aligned to constant buffer offset requirements.
++ heap = device->cb_buffer_heap;
+ }
+ else
+ {
+- FIXME("Using write-only heap for a persistent buffer without WINED3DUSAGE_WRITEONLY.\n");
+- heap = device->rw_buffer_heap;
++ if (!(buffer->resource.usage & WINED3DUSAGE_WRITEONLY))
++ FIXME("Using a write-only persistent buffer for %p without WINED3DUSAGE_WRITEONLY.\n", buffer);
++ heap = device->wo_buffer_heap;
+ }
+
+ buffer->buffer_heap = heap;
+@@ -791,6 +793,7 @@ DWORD wined3d_buffer_get_memory(struct wined3d_buffer *buffer,
+ {
+ data->buffer_object = buffer->buffer_object;
+ data->addr = NULL;
++ data->length = buffer->resource.size;
+ return WINED3D_LOCATION_BUFFER;
+ }
+ if (locations & WINED3D_LOCATION_PERSISTENT_MAP)
+@@ -798,12 +801,17 @@ DWORD wined3d_buffer_get_memory(struct wined3d_buffer *buffer,
+ // FIXME(acomminos): should we expose a buffer object we don't wholly own here?
+ data->buffer_object = buffer->buffer_heap->buffer_object;
+ data->addr = buffer->cs_persistent_map.offset;
++ // Note that the size of the underlying buffer allocation may be larger
++ // than the buffer knows about. In this case, we've rounded it up to be
++ // aligned (e.g. for uniform buffer offsets).
++ data->length = buffer->cs_persistent_map.size;
+ return WINED3D_LOCATION_PERSISTENT_MAP;
+ }
+ if (locations & WINED3D_LOCATION_SYSMEM)
+ {
+ data->buffer_object = 0;
+ data->addr = buffer->resource.heap_memory;
++ data->length = buffer->resource.size;
+ return WINED3D_LOCATION_SYSMEM;
+ }
+
+diff --git a/dlls/wined3d/buffer_heap.c b/dlls/wined3d/buffer_heap.c
+index f24fddffb4..02b925b658 100644
+--- a/dlls/wined3d/buffer_heap.c
++++ b/dlls/wined3d/buffer_heap.c
+@@ -95,7 +95,7 @@ static void element_merge_adjacent(struct wined3d_buffer_heap *heap, struct wine
+ }
+
+ /* Context activation is done by the caller. */
+-HRESULT wined3d_buffer_heap_create(struct wined3d_context *context, GLsizeiptr size, BOOL write_only, struct wined3d_buffer_heap **buffer_heap)
++HRESULT wined3d_buffer_heap_create(struct wined3d_context *context, GLsizeiptr size, GLsizeiptr alignment, BOOL write_only, struct wined3d_buffer_heap **buffer_heap)
+ {
+ const struct wined3d_gl_info *gl_info = context->gl_info;
+ const GLenum buffer_target = GL_ARRAY_BUFFER;
+@@ -133,6 +133,7 @@ HRESULT wined3d_buffer_heap_create(struct wined3d_context *context, GLsizeiptr s
+
+ object->free_list_head = element_new(0, size);
+ object->fenced_head = object->fenced_tail = NULL;
++ object->alignment = alignment;
+ InitializeCriticalSection(&object->temp_lock);
+
+ *buffer_heap = object;
+@@ -147,12 +148,17 @@ HRESULT wined3d_buffer_heap_destroy(struct wined3d_buffer_heap *heap, struct win
+ return WINED3D_OK;
+ }
+
+-HRESULT wined3d_buffer_heap_alloc(struct wined3d_buffer_heap *heap, GLsizeiptr size, struct wined3d_map_range *out_range) {
++HRESULT wined3d_buffer_heap_alloc(struct wined3d_buffer_heap *heap, GLsizeiptr size, struct wined3d_map_range *out_range)
++{
+ EnterCriticalSection(&heap->temp_lock);
+
+ // TODO(acomminos): free list binning?
+ struct wined3d_buffer_heap_element *elem = heap->free_list_head;
+- // XXX(acomminos): Avoid fragmentation by rounding to nearest power of two.
++
++ // Round up the size to a multiple of the heap's alignment.
++ if (heap->alignment)
++ size += heap->alignment - (size % heap->alignment);
++
+ while (elem != NULL)
+ {
+ TRACE("allocation at %p, size %lld\n", heap->map_ptr + elem->range.offset, elem->range.size);
+diff --git a/dlls/wined3d/cs.c b/dlls/wined3d/cs.c
+index edcf521b72..d7bdc21a25 100644
+--- a/dlls/wined3d/cs.c
++++ b/dlls/wined3d/cs.c
+@@ -2668,7 +2668,20 @@ static void wined3d_cs_exec_discard_buffer(struct wined3d_cs *cs, const void *da
+
+ buffer->cs_persistent_map = op->map_range;
+
+- device_invalidate_state(cs->device, STATE_STREAMSRC);
++ // TODO(acomminos): merge this logic with buffer.c functions for standalone BOs
++ if (buffer->bind_flags & WINED3D_BIND_VERTEX_BUFFER)
++ device_invalidate_state(cs->device, STATE_STREAMSRC);
++ if (buffer->bind_flags & WINED3D_BIND_INDEX_BUFFER)
++ device_invalidate_state(cs->device, STATE_INDEXBUFFER);
++ if (buffer->bind_flags & WINED3D_BIND_CONSTANT_BUFFER)
++ {
++ device_invalidate_state(cs->device, STATE_CONSTANT_BUFFER(WINED3D_SHADER_TYPE_VERTEX));
++ device_invalidate_state(cs->device, STATE_CONSTANT_BUFFER(WINED3D_SHADER_TYPE_HULL));
++ device_invalidate_state(cs->device, STATE_CONSTANT_BUFFER(WINED3D_SHADER_TYPE_DOMAIN));
++ device_invalidate_state(cs->device, STATE_CONSTANT_BUFFER(WINED3D_SHADER_TYPE_GEOMETRY));
++ device_invalidate_state(cs->device, STATE_CONSTANT_BUFFER(WINED3D_SHADER_TYPE_PIXEL));
++ device_invalidate_state(cs->device, STATE_CONSTANT_BUFFER(WINED3D_SHADER_TYPE_COMPUTE));
++ }
+
+ wined3d_resource_release(&op->buffer->resource);
+ }
+diff --git a/dlls/wined3d/device.c b/dlls/wined3d/device.c
+index 363dcb17f0..e0871d1636 100644
+--- a/dlls/wined3d/device.c
++++ b/dlls/wined3d/device.c
+@@ -853,12 +853,22 @@ static void create_buffer_heap(struct wined3d_device *device, struct wined3d_con
+ // TODO(acomminos): definitely don't take up all of vram. this is gonna get
+ // paged anyway, though.
+ const GLsizeiptr HBO_SIZE = device->adapter->vram_bytes / 4;
++ const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
++
++ GLint ub_alignment;
++ gl_info->gl_ops.gl.p_glGetIntegerv(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT, &ub_alignment);
+
+ HRESULT hr;
+- if (FAILED(hr = wined3d_buffer_heap_create(context, HBO_SIZE, TRUE, &device->wo_buffer_heap)))
++ if (FAILED(hr = wined3d_buffer_heap_create(context, HBO_SIZE, 0, TRUE, &device->wo_buffer_heap)))
+ {
+ ERR("Failed to create write-only persistent buffer heap, hr %#x.\n", hr);
+ }
++
++ // TODO(acomminos): can likely use a way smaller heap for CBs by querying limits
++ if (FAILED(hr = wined3d_buffer_heap_create(context, HBO_SIZE, ub_alignment, TRUE, &device->cb_buffer_heap)))
++ {
++ ERR("Failed to create persistent buffer heap for constant buffers, hr %#x.\n", hr);
++ }
+ }
+
+ /* Context activation is done by the caller. */
+@@ -866,6 +876,9 @@ static void destroy_buffer_heap(struct wined3d_device *device, struct wined3d_co
+ {
+ if (device->wo_buffer_heap)
+ wined3d_buffer_heap_destroy(device->wo_buffer_heap, context);
++
++ if (device->cb_buffer_heap)
++ wined3d_buffer_heap_destroy(device->cb_buffer_heap, context);
+ }
+
+ static LONG fullscreen_style(LONG style)
+diff --git a/dlls/wined3d/state.c b/dlls/wined3d/state.c
+index 142a932d07..ce007d1a8e 100644
+--- a/dlls/wined3d/state.c
++++ b/dlls/wined3d/state.c
+@@ -4980,6 +4980,7 @@ static void state_cb(struct wined3d_context *context, const struct wined3d_state
+ enum wined3d_shader_type shader_type;
+ struct wined3d_buffer *buffer;
+ unsigned int i, base, count;
++ struct wined3d_bo_address bo_addr;
+
+ TRACE("context %p, state %p, state_id %#x.\n", context, state, state_id);
+
+@@ -4992,7 +4993,15 @@ static void state_cb(struct wined3d_context *context, const struct wined3d_state
+ for (i = 0; i < count; ++i)
+ {
+ buffer = state->cb[shader_type][i];
+- GL_EXTCALL(glBindBufferBase(GL_UNIFORM_BUFFER, base + i, buffer ? buffer->buffer_object : 0));
++ if (buffer)
++ {
++ wined3d_buffer_get_memory(buffer, &bo_addr, buffer->locations);
++ GL_EXTCALL(glBindBufferRange(GL_UNIFORM_BUFFER, base + i, bo_addr.buffer_object, bo_addr.addr, bo_addr.length));
++ }
++ else
++ {
++ GL_EXTCALL(glBindBufferBase(GL_UNIFORM_BUFFER, base + i, 0));
++ }
+ }
+ checkGLcall("bind constant buffers");
+ }
+diff --git a/dlls/wined3d/wined3d_private.h b/dlls/wined3d/wined3d_private.h
+index d049d57206..cfa48a5f3e 100644
+--- a/dlls/wined3d/wined3d_private.h
++++ b/dlls/wined3d/wined3d_private.h
+@@ -1462,6 +1462,7 @@ struct wined3d_bo_address
+ {
+ GLuint buffer_object;
+ BYTE *addr;
++ GLsizeiptr length;
+ };
+
+ struct wined3d_const_bo_address
+@@ -2972,6 +2973,7 @@ struct wined3d_device
+
+ /* Dynamic buffer heap */
+ struct wined3d_buffer_heap *wo_buffer_heap;
++ struct wined3d_buffer_heap *cb_buffer_heap;
+ };
+
+ void device_clear_render_targets(struct wined3d_device *device, UINT rt_count, const struct wined3d_fb_state *fb,
+@@ -3671,6 +3673,7 @@ struct wined3d_buffer_heap
+ {
+ GLuint buffer_object;
+ void *map_ptr;
++ GLsizeiptr alignment;
+ CRITICAL_SECTION temp_lock; // Temporary lock while we implement the fenced free list.
+
+ // TODO: add buckets for free regions of a given size.
+@@ -3684,7 +3687,7 @@ struct wined3d_buffer_heap
+ struct wined3d_buffer_heap_fenced_element *fenced_tail;
+ };
+
+-HRESULT wined3d_buffer_heap_create(struct wined3d_context *context, GLsizeiptr size, BOOL write_only, struct wined3d_buffer_heap **heap) DECLSPEC_HIDDEN;
++HRESULT wined3d_buffer_heap_create(struct wined3d_context *context, GLsizeiptr size, GLsizeiptr alignment, BOOL write_only, struct wined3d_buffer_heap **heap) DECLSPEC_HIDDEN;
+ HRESULT wined3d_buffer_heap_destroy(struct wined3d_buffer_heap *heap, struct wined3d_context *context) DECLSPEC_HIDDEN;
+ // Fetches a buffer from the heap of at least the given size.
+ HRESULT wined3d_buffer_heap_alloc(struct wined3d_buffer_heap *heap, GLsizeiptr size, struct wined3d_map_range* out_range) DECLSPEC_HIDDEN;
+--
+2.16.2
+
diff --git a/0005-wined3d-Experimental-buffer-heap-fence-batching-uppe.patch b/0005-wined3d-Experimental-buffer-heap-fence-batching-uppe.patch
new file mode 100644
index 000000000000..cac70eac997d
--- /dev/null
+++ b/0005-wined3d-Experimental-buffer-heap-fence-batching-uppe.patch
@@ -0,0 +1,455 @@
+From 7a630d56cb1bddfd03cae3cdc43aee949b04abe2 Mon Sep 17 00:00:00 2001
+From: Andrew Comminos <andrew@comminos.com>
+Date: Mon, 26 Feb 2018 21:35:40 -0800
+Subject: [PATCH 5/8] wined3d: Experimental buffer heap fence batching, upper
+ bound on heap size.
+
+---
+ dlls/wined3d/buffer.c | 11 +-
+ dlls/wined3d/buffer_heap.c | 232 ++++++++++++++++++++++++-----------------
+ dlls/wined3d/cs.c | 15 +--
+ dlls/wined3d/device.c | 5 +-
+ dlls/wined3d/wined3d_private.h | 12 ++-
+ 5 files changed, 154 insertions(+), 121 deletions(-)
+
+diff --git a/dlls/wined3d/buffer.c b/dlls/wined3d/buffer.c
+index 7d7e506817..fbec613c92 100644
+--- a/dlls/wined3d/buffer.c
++++ b/dlls/wined3d/buffer.c
+@@ -992,9 +992,12 @@ void wined3d_buffer_load(struct wined3d_buffer *buffer, struct wined3d_context *
+
+ if (buffer->flags & WINED3D_BUFFER_PERSISTENT)
+ {
+- if (!wined3d_buffer_load_location(buffer, context, WINED3D_LOCATION_PERSISTENT_MAP))
+- ERR("Failed to preload persistent mapping.\n");
+- return;
++ if (wined3d_buffer_load_location(buffer, context, WINED3D_LOCATION_PERSISTENT_MAP))
++ return;
++
++ ERR("Failed to preload persistent mapping for %p, falling back to BO.\n", buffer);
++ buffer->flags |= WINED3D_BUFFER_USE_BO;
++ buffer->flags &= ~WINED3D_BUFFER_PERSISTENT;
+ }
+
+ /* TODO: Make converting independent from VBOs */
+@@ -1417,7 +1420,7 @@ static HRESULT buffer_resource_sub_resource_map(struct wined3d_resource *resourc
+ struct wined3d_map_range map_range;
+ if (FAILED(hr = wined3d_buffer_heap_alloc(buffer->buffer_heap, resource->size, &map_range)))
+ {
+- FIXME("Failed to allocate new buffer, falling back to sync path.\n");
++ FIXME_(d3d_perf)("Failed to allocate new buffer, falling back to sync path.\n");
+ return hr;
+ }
+ map_desc->data = buffer->buffer_heap->map_ptr + map_range.offset + offset;
+diff --git a/dlls/wined3d/buffer_heap.c b/dlls/wined3d/buffer_heap.c
+index 02b925b658..165a957edd 100644
+--- a/dlls/wined3d/buffer_heap.c
++++ b/dlls/wined3d/buffer_heap.c
+@@ -22,6 +22,7 @@
+ #include "wined3d_private.h"
+
+ WINE_DEFAULT_DEBUG_CHANNEL(d3d);
++WINE_DECLARE_DEBUG_CHANNEL(d3d_perf);
+
+ struct wined3d_buffer_heap_element
+ {
+@@ -33,7 +34,7 @@ struct wined3d_buffer_heap_element
+
+ struct wined3d_buffer_heap_fenced_element
+ {
+- struct wined3d_map_range range;
++ struct wined3d_buffer_heap_element *ranges;
+ struct wined3d_fence *fence;
+
+ struct wined3d_buffer_heap_element *next;
+@@ -52,19 +53,19 @@ static struct wined3d_buffer_heap_element* element_new(GLintptr offset, GLsizeip
+ return elem;
+ }
+
+-static struct wined3d_buffer_heap_fenced_element* fenced_element_new(struct wined3d_map_range range, struct wined3d_fence* fence)
++static struct wined3d_buffer_heap_fenced_element* fenced_element_new(struct wined3d_buffer_heap_element *ranges, struct wined3d_fence* fence)
+ {
+ struct wined3d_buffer_heap_fenced_element* elem;
+ elem = HeapAlloc(GetProcessHeap(), 0, sizeof(struct wined3d_buffer_heap_fenced_element));
+ if (!elem)
+ return NULL;
+- elem->range = range;
++ elem->ranges = ranges;
+ elem->fence = fence;
+ elem->next = NULL;
+ return elem;
+ }
+
+-static void element_merge_adjacent(struct wined3d_buffer_heap *heap, struct wined3d_buffer_heap_element *elem)
++static void element_merge_adjacent(struct wined3d_buffer_heap_element **head, struct wined3d_buffer_heap_element *elem)
+ {
+ struct wined3d_buffer_heap_element *cur_prev = elem->prev;
+ struct wined3d_buffer_heap_element *cur_next = elem->next;
+@@ -77,8 +78,8 @@ static void element_merge_adjacent(struct wined3d_buffer_heap *heap, struct wine
+ if (cur_prev->prev)
+ cur_prev->prev->next = elem;
+
+- if (cur_prev == heap->free_list_head)
+- heap->free_list_head = elem;
++ if (cur_prev == *head)
++ *head = elem;
+
+ HeapFree(GetProcessHeap(), 0, cur_prev);
+ }
+@@ -94,6 +95,69 @@ static void element_merge_adjacent(struct wined3d_buffer_heap *heap, struct wine
+ }
+ }
+
++// Inserts a range into the list starting at `elem`.
++// Updates the head of the list, if necessary.
++static void element_insert_range(struct wined3d_buffer_heap_element **head, struct wined3d_map_range range)
++{
++ struct wined3d_buffer_heap_element *elem = *head;
++ struct wined3d_buffer_heap_element *new_elem;
++ struct wined3d_buffer_heap_element *last_elem = NULL;
++
++ // Special case where the head doesn't exist.
++ if (!elem)
++ {
++ new_elem = element_new(range.offset, range.size);
++ *head = new_elem;
++ return;
++ }
++
++ while (elem)
++ {
++ struct wined3d_map_range *erange = &elem->range;
++ if (range.offset + range.size == erange->offset)
++ {
++ // Left side merge
++ erange->offset = range.offset;
++ erange->size += range.size;
++ // Check if this causes a merge with elem->prev
++ element_merge_adjacent(head, elem);
++ return;
++ }
++ else if (erange->offset + erange->size == range.offset)
++ {
++ // Right side merge
++ erange->size += range.size;
++ // Check if this causes a merge with elem->prev
++ element_merge_adjacent(head, elem);
++ return;
++ }
++ else if (range.offset < erange->offset)
++ {
++ // Append to left, non-merge case.
++ new_elem = element_new(range.offset, range.size);
++ new_elem->prev = elem->prev;
++ new_elem->next = elem;
++ if (elem->prev)
++ {
++ elem->prev->next = new_elem;
++ }
++ if (*head == elem)
++ {
++ *head = new_elem;
++ }
++ elem->prev = new_elem;
++ return;
++ }
++ last_elem = elem;
++ elem = elem->next;
++ }
++
++ // Larger offset than all other elements in the list, append to the end.
++ new_elem = element_new(range.offset, range.size);
++ new_elem->prev = last_elem;
++ last_elem->next = new_elem;
++}
++
+ /* Context activation is done by the caller. */
+ HRESULT wined3d_buffer_heap_create(struct wined3d_context *context, GLsizeiptr size, GLsizeiptr alignment, BOOL write_only, struct wined3d_buffer_heap **buffer_heap)
+ {
+@@ -134,6 +198,9 @@ HRESULT wined3d_buffer_heap_create(struct wined3d_context *context, GLsizeiptr s
+ object->free_list_head = element_new(0, size);
+ object->fenced_head = object->fenced_tail = NULL;
+ object->alignment = alignment;
++ object->pending_fenced_bytes = 0;
++ object->pending_fenced_head = NULL;
++ object->pending_fenced_threshold_bytes = size / 4; // FIXME(acomminos): make this externally declared
+ InitializeCriticalSection(&object->temp_lock);
+
+ *buffer_heap = object;
+@@ -155,6 +222,9 @@ HRESULT wined3d_buffer_heap_alloc(struct wined3d_buffer_heap *heap, GLsizeiptr s
+ // TODO(acomminos): free list binning?
+ struct wined3d_buffer_heap_element *elem = heap->free_list_head;
+
++ // Round to the nearest power of two to reduce fragmentation.
++ size = 1ULL << (int)ceil(log2(size));
++
+ // Round up the size to a multiple of the heap's alignment.
+ if (heap->alignment)
+ size += heap->alignment - (size % heap->alignment);
+@@ -188,6 +258,7 @@ HRESULT wined3d_buffer_heap_alloc(struct wined3d_buffer_heap *heap, GLsizeiptr s
+ }
+ HeapFree(GetProcessHeap(), 0, elem);
+ }
++
+ LeaveCriticalSection(&heap->temp_lock);
+ return WINED3D_OK;
+ }
+@@ -201,87 +272,53 @@ HRESULT wined3d_buffer_heap_alloc(struct wined3d_buffer_heap *heap, GLsizeiptr s
+ HRESULT wined3d_buffer_heap_free(struct wined3d_buffer_heap *heap, struct wined3d_map_range range)
+ {
+ EnterCriticalSection(&heap->temp_lock);
+- struct wined3d_buffer_heap_element *new_elem;
+- struct wined3d_buffer_heap_element *elem = heap->free_list_head;
+- struct wined3d_buffer_heap_element *last_elem = NULL;
+
+- // Special case where the head doesn't exist.
+- if (!elem)
+- {
+- new_elem = element_new(range.offset, range.size);
+- heap->free_list_head = new_elem;
+- goto success;
+- }
++ element_insert_range(&heap->free_list_head, range);
+
+- while (elem)
++ LeaveCriticalSection(&heap->temp_lock);
++ return WINED3D_OK;
++}
++
++HRESULT wined3d_buffer_heap_free_fenced(struct wined3d_buffer_heap *heap, struct wined3d_device *device, struct wined3d_map_range range)
++{
++ element_insert_range(&heap->pending_fenced_head, range);
++
++ heap->pending_fenced_bytes += range.size;
++ if (heap->pending_fenced_bytes >= heap->pending_fenced_threshold_bytes)
+ {
+- struct wined3d_map_range *erange = &elem->range;
+- if (range.offset + range.size == erange->offset)
++ // TODO(acomminos): break this out into a separate function
++ struct wined3d_buffer_heap_fenced_element *fenced_elem;
++ struct wined3d_fence *fence;
++ HRESULT hr;
++
++ if (FAILED(hr = wined3d_fence_create(device, &fence)))
+ {
+- // Left side merge
+- erange->offset = range.offset;
+- erange->size += range.size;
+- // Check if this causes a merge with elem->prev
+- element_merge_adjacent(heap, elem);
+- goto success;
++ ERR("Failed to create fence.\n");
++ return hr;
+ }
+- else if (erange->offset + erange->size == range.offset)
++
++ fenced_elem = fenced_element_new(heap->pending_fenced_head, fence);
++ if (!fenced_elem)
++ return E_OUTOFMEMORY;
++
++ TRACE_(d3d_perf)("Dispatching fenced buffer set.\n");
++ heap->pending_fenced_bytes = 0;
++ heap->pending_fenced_head = NULL;
++
++ // Append to end of fenced list, which works well if you assume that buffers
++ // are freed in some ascending draw call ordering.
++ if (!heap->fenced_head)
+ {
+- // Right side merge
+- erange->size += range.size;
+- // Check if this causes a merge with elem->prev
+- element_merge_adjacent(heap, elem);
+- goto success;
++ heap->fenced_head = fenced_elem;
++ heap->fenced_tail = fenced_elem;
+ }
+- else if (range.offset < erange->offset)
++ else
+ {
+- // Append to left, non-merge case.
+- new_elem = element_new(range.offset, range.size);
+- new_elem->prev = elem->prev;
+- new_elem->next = elem;
+- if (elem->prev)
+- {
+- elem->prev->next = new_elem;
+- }
+- if (heap->free_list_head == elem)
+- {
+- heap->free_list_head = new_elem;
+- }
+- elem->prev = new_elem;
+- goto success;
++ heap->fenced_tail->next = fenced_elem;
++ heap->fenced_tail = fenced_elem;
+ }
+- last_elem = elem;
+- elem = elem->next;
+- }
+-
+- // Larger offset than all other elements in the list, append to the end.
+- new_elem = element_new(range.offset, range.size);
+- new_elem->prev = last_elem;
+- last_elem->next = new_elem;
+-
+-success:
+- LeaveCriticalSection(&heap->temp_lock);
+- return WINED3D_OK;
+-}
+-
+-HRESULT wined3d_buffer_heap_free_fenced(struct wined3d_buffer_heap *heap, struct wined3d_map_range range, struct wined3d_fence *fence)
+-{
+- struct wined3d_buffer_heap_fenced_element *elem;
+- elem = fenced_element_new(range, fence);
+- if (!elem)
+- return E_OUTOFMEMORY;
+
+- // Append to end of fenced list, which works well if you assume that buffers
+- // are freed in some ascending draw call ordering.
+- if (!heap->fenced_head)
+- {
+- heap->fenced_head = elem;
+- heap->fenced_tail = elem;
+- }
+- else
+- {
+- heap->fenced_tail->next = elem;
+- heap->fenced_tail = elem;
++ wined3d_fence_issue(fence, device);
+ }
+
+ return WINED3D_OK;
+@@ -294,29 +331,32 @@ HRESULT wined3d_buffer_heap_cs_poll_fences(struct wined3d_buffer_heap *heap, str
+ if (!elem)
+ return WINED3D_OK;
+
+- while (elem)
++ res = wined3d_fence_test(elem->fence, device, 0);
++ switch (res)
+ {
+- res = wined3d_fence_test(elem->fence, device, 0);
+- switch (res)
+- {
+- case WINED3D_FENCE_OK:
+- case WINED3D_FENCE_NOT_STARTED:
++ case WINED3D_FENCE_OK:
++ case WINED3D_FENCE_NOT_STARTED:
++ {
++ TRACE_(d3d_perf)("Freed fence group.\n");
++ struct wined3d_buffer_heap_element *range_elem = elem->ranges;
++ // FIXME(acomminos): this might take a while. incrementally do this?
++ while (range_elem)
+ {
+- struct wined3d_buffer_heap_fenced_element *next = elem->next;
+-
+- wined3d_fence_destroy(elem->fence);
+- wined3d_buffer_heap_free(heap, elem->range);
++ struct wined3d_buffer_heap_element *next = range_elem->next;
++ wined3d_buffer_heap_free(heap, range_elem->range);
++ HeapFree(GetProcessHeap(), 0, range_elem);
++ range_elem = next;
++ }
+
+- heap->fenced_head = elem->next;
+- HeapFree(GetProcessHeap(), 0, elem);
+- // TODO(acomminos): bother to null out fenced_tail?
++ wined3d_fence_destroy(elem->fence);
+
+- elem = next;
+- break;
+- }
+- default:
+- return WINED3D_OK;
+- }
++ heap->fenced_head = elem->next;
++ HeapFree(GetProcessHeap(), 0, elem);
++ // TODO(acomminos): bother to null out fenced_tail?
++ break;
++ }
++ default:
++ return WINED3D_OK;
+ }
+
+ return WINED3D_OK;
+diff --git a/dlls/wined3d/cs.c b/dlls/wined3d/cs.c
+index d7bdc21a25..bae5d9f4a1 100644
+--- a/dlls/wined3d/cs.c
++++ b/dlls/wined3d/cs.c
+@@ -2643,26 +2643,15 @@ static void wined3d_cs_exec_discard_buffer(struct wined3d_cs *cs, const void *da
+ const struct wined3d_cs_discard_buffer *op = data;
+ struct wined3d_buffer *buffer = op->buffer;
+ HRESULT hr;
+- struct wined3d_fence *fence;
+
+ // Poll for discarded buffers whose fenced have been triggered here to avoid
+ // excessive VRAM consumption.
+ wined3d_buffer_heap_cs_poll_fences(buffer->buffer_heap, cs->device);
+
+ // TODO(acomminos): should call into buffer.c here instead.
+- // XXX(acomminos): should we always create a new fence here?
+- if (!FAILED(hr = wined3d_fence_create(cs->device, &fence)))
++ if (FAILED(hr = wined3d_buffer_heap_free_fenced(buffer->buffer_heap, cs->device, buffer->cs_persistent_map)))
+ {
+- // TODO(acomminos): make more informed fences based on prior info. for now,
+- // we do this because allocating and deleting fences repeatedly is brutal
+- // for performance. look into why.
+- wined3d_fence_issue(fence, cs->device);
+-
+- wined3d_buffer_heap_free_fenced(buffer->buffer_heap, buffer->cs_persistent_map, fence);
+- }
+- else
+- {
+- ERR("Failed to create fence for discarded buffer %p, hr %x\n. Freeing anyway.", buffer, hr);
++ ERR("Failed to do a fenced free on discarded buffer %p, hr %x\n. Freeing anyway.", buffer, hr);
+ wined3d_buffer_heap_free(buffer->buffer_heap, buffer->cs_persistent_map);
+ }
+
+diff --git a/dlls/wined3d/device.c b/dlls/wined3d/device.c
+index e0871d1636..bdab83b935 100644
+--- a/dlls/wined3d/device.c
++++ b/dlls/wined3d/device.c
+@@ -850,9 +850,8 @@ static void create_buffer_heap(struct wined3d_device *device, struct wined3d_con
+ {
+ // TODO(acomminos): check if ARB_buffer_storage is supported, first-
+ // possibly make wined3d_buffer_heap_create fail.
+- // TODO(acomminos): definitely don't take up all of vram. this is gonna get
+- // paged anyway, though.
+- const GLsizeiptr HBO_SIZE = device->adapter->vram_bytes / 4;
++ // TODO(acomminos): 512MB is sane for geometry, maybe not for PBO.
++ const GLsizeiptr HBO_SIZE = min(512000000, device->adapter->vram_bytes / 4);
+ const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
+
+ GLint ub_alignment;
+diff --git a/dlls/wined3d/wined3d_private.h b/dlls/wined3d/wined3d_private.h
+index cfa48a5f3e..62433a39b1 100644
+--- a/dlls/wined3d/wined3d_private.h
++++ b/dlls/wined3d/wined3d_private.h
+@@ -3679,10 +3679,12 @@ struct wined3d_buffer_heap
+ // TODO: add buckets for free regions of a given size.
+ struct wined3d_buffer_heap_element *free_list_head;
+
+- // store in FIFO order? that way, we can minimize our amount of time
+- // waiting on fences?
+- // XXX(acomminos): are fences guaranteed to be triggered in a serial
+- // ordering? if so, we can early-terminate our polling
++ // Elements that need to be fenced, but haven't reached the required size.
++ struct wined3d_buffer_heap_element *pending_fenced_head;
++ GLsizeiptr pending_fenced_bytes; // Number of free bytes in the active fenced region.
++ GLsizeiptr pending_fenced_threshold_bytes; // Number of bytes required before fencing.
++
++ // List of sets of buffers behind a common fence, in FIFO order.
+ struct wined3d_buffer_heap_fenced_element *fenced_head;
+ struct wined3d_buffer_heap_fenced_element *fenced_tail;
+ };
+@@ -3694,7 +3696,7 @@ HRESULT wined3d_buffer_heap_alloc(struct wined3d_buffer_heap *heap, GLsizeiptr s
+ // Immediately frees a heap-allocated buffer segment.
+ HRESULT wined3d_buffer_heap_free(struct wined3d_buffer_heap *heap, struct wined3d_map_range range) DECLSPEC_HIDDEN;
+ // Enqueues a buffer segment to return to the heap once its fence has been signaled.
+-HRESULT wined3d_buffer_heap_free_fenced(struct wined3d_buffer_heap *heap, struct wined3d_map_range range, struct wined3d_fence *fence) DECLSPEC_HIDDEN;
++HRESULT wined3d_buffer_heap_free_fenced(struct wined3d_buffer_heap *heap, struct wined3d_device *device, struct wined3d_map_range range) DECLSPEC_HIDDEN;
+ // Moves a buffers with a signaled fence from the fenced list to the free list.
+ // Must be executed on the CS thread.
+ HRESULT wined3d_buffer_heap_cs_poll_fences(struct wined3d_buffer_heap *heap, struct wined3d_device *device) DECLSPEC_HIDDEN;
+--
+2.16.2
+
diff --git a/0006-wined3d-Switch-wined3d_buffer_heap-to-be-backed-by-a.patch b/0006-wined3d-Switch-wined3d_buffer_heap-to-be-backed-by-a.patch
new file mode 100644
index 000000000000..89c9c8ec9eda
--- /dev/null
+++ b/0006-wined3d-Switch-wined3d_buffer_heap-to-be-backed-by-a.patch
@@ -0,0 +1,383 @@
+From 2acd4b6ca9cadb84eb38bf1fc4bd5b2ccab3c532 Mon Sep 17 00:00:00 2001
+From: Andrew Comminos <andrew@comminos.com>
+Date: Tue, 27 Feb 2018 16:11:10 -0800
+Subject: [PATCH 6/8] wined3d: Switch wined3d_buffer_heap to be backed by an
+ rb-tree.
+
+---
+ dlls/wined3d/buffer_heap.c | 230 ++++++++++++++++++-----------------------
+ dlls/wined3d/wined3d_private.h | 6 +-
+ 2 files changed, 105 insertions(+), 131 deletions(-)
+
+diff --git a/dlls/wined3d/buffer_heap.c b/dlls/wined3d/buffer_heap.c
+index 165a957edd..45d3a2c7d7 100644
+--- a/dlls/wined3d/buffer_heap.c
++++ b/dlls/wined3d/buffer_heap.c
+@@ -19,6 +19,7 @@
+
+ #include "config.h"
+ #include "wine/port.h"
++#include "wine/rbtree.h"
+ #include "wined3d_private.h"
+
+ WINE_DEFAULT_DEBUG_CHANNEL(d3d);
+@@ -26,21 +27,26 @@ WINE_DECLARE_DEBUG_CHANNEL(d3d_perf);
+
+ struct wined3d_buffer_heap_element
+ {
++ struct wine_rb_entry entry;
+ struct wined3d_map_range range;
++};
+
+- struct wined3d_buffer_heap_element *prev;
+- struct wined3d_buffer_heap_element *next;
++struct wined3d_buffer_heap_range
++{
++ struct wined3d_map_range range;
++
++ struct wined3d_buffer_heap_range *next;
+ };
+
+ struct wined3d_buffer_heap_fenced_element
+ {
+- struct wined3d_buffer_heap_element *ranges;
++ struct wined3d_buffer_heap_range *ranges;
+ struct wined3d_fence *fence;
+
+- struct wined3d_buffer_heap_element *next;
++ struct wined3d_buffer_heap_fenced_element *next;
+ };
+
+-static struct wined3d_buffer_heap_element* element_new(GLintptr offset, GLsizeiptr size)
++static struct wined3d_buffer_heap_element* element_new(GLsizei offset, GLsizei size)
+ {
+ struct wined3d_buffer_heap_element* elem;
+ elem = HeapAlloc(GetProcessHeap(), 0, sizeof(struct wined3d_buffer_heap_element));
+@@ -48,12 +54,10 @@ static struct wined3d_buffer_heap_element* element_new(GLintptr offset, GLsizeip
+ return NULL;
+ elem->range.offset = offset;
+ elem->range.size = size;
+- elem->next = NULL;
+- elem->prev = NULL;
+ return elem;
+ }
+
+-static struct wined3d_buffer_heap_fenced_element* fenced_element_new(struct wined3d_buffer_heap_element *ranges, struct wined3d_fence* fence)
++static struct wined3d_buffer_heap_fenced_element* fenced_element_new(struct wined3d_buffer_heap_range *ranges, struct wined3d_fence* fence)
+ {
+ struct wined3d_buffer_heap_fenced_element* elem;
+ elem = HeapAlloc(GetProcessHeap(), 0, sizeof(struct wined3d_buffer_heap_fenced_element));
+@@ -65,97 +69,16 @@ static struct wined3d_buffer_heap_fenced_element* fenced_element_new(struct wine
+ return elem;
+ }
+
+-static void element_merge_adjacent(struct wined3d_buffer_heap_element **head, struct wined3d_buffer_heap_element *elem)
+-{
+- struct wined3d_buffer_heap_element *cur_prev = elem->prev;
+- struct wined3d_buffer_heap_element *cur_next = elem->next;
+- if (cur_prev && cur_prev->range.offset + cur_prev->range.size == elem->range.offset)
+- {
+- elem->range.offset = cur_prev->range.offset;
+- elem->range.size += cur_prev->range.size;
+-
+- elem->prev = cur_prev->prev;
+- if (cur_prev->prev)
+- cur_prev->prev->next = elem;
+-
+- if (cur_prev == *head)
+- *head = elem;
+-
+- HeapFree(GetProcessHeap(), 0, cur_prev);
+- }
+- if (cur_next && cur_next->range.offset == elem->range.offset + elem->range.size)
+- {
+- elem->range.size += cur_next->range.size;
+- elem->next = cur_next->next;
+- if (cur_next->next)
+- {
+- cur_next->next->prev = elem;
+- }
+- HeapFree(GetProcessHeap(), 0, cur_next);
+- }
+-}
+-
+-// Inserts a range into the list starting at `elem`.
+-// Updates the head of the list, if necessary.
+-static void element_insert_range(struct wined3d_buffer_heap_element **head, struct wined3d_map_range range)
++static int free_tree_compare(const void *key, const struct wine_rb_entry *entry)
+ {
+- struct wined3d_buffer_heap_element *elem = *head;
+- struct wined3d_buffer_heap_element *new_elem;
+- struct wined3d_buffer_heap_element *last_elem = NULL;
+-
+- // Special case where the head doesn't exist.
+- if (!elem)
+- {
+- new_elem = element_new(range.offset, range.size);
+- *head = new_elem;
+- return;
+- }
+-
+- while (elem)
+- {
+- struct wined3d_map_range *erange = &elem->range;
+- if (range.offset + range.size == erange->offset)
+- {
+- // Left side merge
+- erange->offset = range.offset;
+- erange->size += range.size;
+- // Check if this causes a merge with elem->prev
+- element_merge_adjacent(head, elem);
+- return;
+- }
+- else if (erange->offset + erange->size == range.offset)
+- {
+- // Right side merge
+- erange->size += range.size;
+- // Check if this causes a merge with elem->prev
+- element_merge_adjacent(head, elem);
+- return;
+- }
+- else if (range.offset < erange->offset)
+- {
+- // Append to left, non-merge case.
+- new_elem = element_new(range.offset, range.size);
+- new_elem->prev = elem->prev;
+- new_elem->next = elem;
+- if (elem->prev)
+- {
+- elem->prev->next = new_elem;
+- }
+- if (*head == elem)
+- {
+- *head = new_elem;
+- }
+- elem->prev = new_elem;
+- return;
+- }
+- last_elem = elem;
+- elem = elem->next;
+- }
+-
+- // Larger offset than all other elements in the list, append to the end.
+- new_elem = element_new(range.offset, range.size);
+- new_elem->prev = last_elem;
+- last_elem->next = new_elem;
++ const GLsizei offset = (const GLsizei) key;
++ struct wined3d_buffer_heap_element *elem = WINE_RB_ENTRY_VALUE(entry, struct wined3d_buffer_heap_element, entry);
++
++ if (offset < elem->range.offset)
++ return -1;
++ if (offset > elem->range.offset)
++ return 1;
++ return 0;
+ }
+
+ /* Context activation is done by the caller. */
+@@ -165,6 +88,7 @@ HRESULT wined3d_buffer_heap_create(struct wined3d_context *context, GLsizeiptr s
+ const GLenum buffer_target = GL_ARRAY_BUFFER;
+ GLbitfield access_flags;
+ GLbitfield storage_flags;
++ struct wined3d_buffer_heap_element *initial_elem;
+
+ struct wined3d_buffer_heap *object;
+
+@@ -195,7 +119,11 @@ HRESULT wined3d_buffer_heap_create(struct wined3d_context *context, GLsizeiptr s
+ }
+ context_bind_bo(context, buffer_target, 0);
+
+- object->free_list_head = element_new(0, size);
++ wine_rb_init(&object->free_tree, free_tree_compare);
++
++ initial_elem = element_new(0, size);
++ wine_rb_put(&object->free_tree, initial_elem->range.offset, &initial_elem->entry);
++
+ object->fenced_head = object->fenced_tail = NULL;
+ object->alignment = alignment;
+ object->pending_fenced_bytes = 0;
+@@ -217,10 +145,10 @@ HRESULT wined3d_buffer_heap_destroy(struct wined3d_buffer_heap *heap, struct win
+
+ HRESULT wined3d_buffer_heap_alloc(struct wined3d_buffer_heap *heap, GLsizeiptr size, struct wined3d_map_range *out_range)
+ {
+- EnterCriticalSection(&heap->temp_lock);
++ struct wine_rb_entry *iter;
+
+ // TODO(acomminos): free list binning?
+- struct wined3d_buffer_heap_element *elem = heap->free_list_head;
++ EnterCriticalSection(&heap->temp_lock);
+
+ // Round to the nearest power of two to reduce fragmentation.
+ size = 1ULL << (int)ceil(log2(size));
+@@ -229,40 +157,35 @@ HRESULT wined3d_buffer_heap_alloc(struct wined3d_buffer_heap *heap, GLsizeiptr s
+ if (heap->alignment)
+ size += heap->alignment - (size % heap->alignment);
+
+- while (elem != NULL)
++ iter = wine_rb_head(heap->free_tree.root);
++ while (iter)
+ {
+- TRACE("allocation at %p, size %lld\n", heap->map_ptr + elem->range.offset, elem->range.size);
+- // XXX(acomminos): first fit is highly likely to be dumb, needs more analysis.
++ struct wined3d_buffer_heap_element *elem = WINE_RB_ENTRY_VALUE(iter, struct wined3d_buffer_heap_element, entry);
+ if (elem->range.size >= size)
+ {
+- // Pull the range from the start of the free list element.
+- out_range->offset = elem->range.offset;
+- out_range->size = size; // XXX(acomminos): should we really give the exact size requested?
++ // FIXME(acomminos): should key based on end so that we can slice
++ // off the front without changing the key.
++ GLsizei remaining = elem->range.size - size;
+
+- elem->range.offset += size;
+- elem->range.size -= size;
++ out_range->offset = elem->range.offset;
++ out_range->size = size;
+
+- if (elem->range.size == 0)
++ wine_rb_remove(&heap->free_tree, iter);
++ if (remaining > 0)
++ {
++ elem->range.offset += size;
++ elem->range.size -= size;
++ wine_rb_put(&heap->free_tree, (const void*) elem->range.offset, &elem->entry);
++ }
++ else
+ {
+- if (elem->prev)
+- {
+- elem->prev->next = elem->next;
+- }
+- if (elem->next)
+- {
+- elem->next->prev = elem->prev;
+- }
+- if (heap->free_list_head == elem)
+- {
+- heap->free_list_head = elem->next;
+- }
+ HeapFree(GetProcessHeap(), 0, elem);
+ }
+-
++ TRACE("Allocated %lld bytes at %lld\n", out_range->size, out_range->offset);
+ LeaveCriticalSection(&heap->temp_lock);
+ return WINED3D_OK;
+ }
+- elem = elem->next;
++ iter = wine_rb_next(iter);
+ }
+
+ LeaveCriticalSection(&heap->temp_lock);
+@@ -271,17 +194,68 @@ HRESULT wined3d_buffer_heap_alloc(struct wined3d_buffer_heap *heap, GLsizeiptr s
+
+ HRESULT wined3d_buffer_heap_free(struct wined3d_buffer_heap *heap, struct wined3d_map_range range)
+ {
++ struct wined3d_buffer_heap_element *elem = element_new(range.offset, range.size);
++ struct wine_rb_entry *entry;
++ HRESULT hr;
++
++ if (!elem)
++ return E_OUTOFMEMORY;
++
+ EnterCriticalSection(&heap->temp_lock);
++ if (wine_rb_put(&heap->free_tree, (const void*) elem->range.offset, &elem->entry) == -1)
++ {
++ LeaveCriticalSection(&heap->temp_lock);
++ HeapFree(GetProcessHeap(), 0, elem);
++ return E_FAIL;
++ }
+
+- element_insert_range(&heap->free_list_head, range);
++ // Coalesce left.
++ entry = wine_rb_prev(&elem->entry);
++ if (entry)
++ {
++ TRACE("Coalesced left.\n");
++ struct wined3d_buffer_heap_element *left_elem = WINE_RB_ENTRY_VALUE(elem, struct wined3d_buffer_heap_element, entry);
++ if (left_elem->range.offset + left_elem->range.size == elem->range.offset)
++ {
++ // Replace the newly inserted element with an extended node to its
++ // left. This doesn't change the key properties of the left node.
++ left_elem->range.size += range.size;
++
++ wine_rb_remove(&heap->free_tree, &elem->entry);
++ HeapFree(GetProcessHeap(), 0, elem);
++
++ elem = left_elem;
++ }
++ }
++
++ // Coalesce right.
++ entry = wine_rb_next(&elem->entry);
++ if (entry)
++ {
++ TRACE("Coalesced right.\n");
++ struct wined3d_buffer_heap_element *right_elem = WINE_RB_ENTRY_VALUE(elem, struct wined3d_buffer_heap_element, entry);
++ if (elem->range.offset + elem->range.size == right_elem->range.offset)
++ {
++ // Remove the right element, this doesn't change the keying of our
++ // newly inserted element.
++ elem->range.size += right_elem->range.size;
++
++ wine_rb_remove(&heap->free_tree, &right_elem->entry);
++ HeapFree(GetProcessHeap(), 0, right_elem);
++ }
++ }
+
+ LeaveCriticalSection(&heap->temp_lock);
++
+ return WINED3D_OK;
+ }
+
+ HRESULT wined3d_buffer_heap_free_fenced(struct wined3d_buffer_heap *heap, struct wined3d_device *device, struct wined3d_map_range range)
+ {
+- element_insert_range(&heap->pending_fenced_head, range);
++ struct wined3d_buffer_heap_range *elem = HeapAlloc(GetProcessHeap(), 0, sizeof(struct wined3d_buffer_heap_range));
++ elem->range = range;
++ elem->next = heap->pending_fenced_head;
++ heap->pending_fenced_head = elem;
+
+ heap->pending_fenced_bytes += range.size;
+ if (heap->pending_fenced_bytes >= heap->pending_fenced_threshold_bytes)
+@@ -338,11 +312,11 @@ HRESULT wined3d_buffer_heap_cs_poll_fences(struct wined3d_buffer_heap *heap, str
+ case WINED3D_FENCE_NOT_STARTED:
+ {
+ TRACE_(d3d_perf)("Freed fence group.\n");
+- struct wined3d_buffer_heap_element *range_elem = elem->ranges;
++ struct wined3d_buffer_heap_range *range_elem = elem->ranges;
+ // FIXME(acomminos): this might take a while. incrementally do this?
+ while (range_elem)
+ {
+- struct wined3d_buffer_heap_element *next = range_elem->next;
++ struct wined3d_buffer_heap_range *next = range_elem->next;
+ wined3d_buffer_heap_free(heap, range_elem->range);
+ HeapFree(GetProcessHeap(), 0, range_elem);
+ range_elem = next;
+diff --git a/dlls/wined3d/wined3d_private.h b/dlls/wined3d/wined3d_private.h
+index 62433a39b1..3a45d9931e 100644
+--- a/dlls/wined3d/wined3d_private.h
++++ b/dlls/wined3d/wined3d_private.h
+@@ -3665,7 +3665,7 @@ enum wined3d_buffer_conversion_type
+ CONV_POSITIONT,
+ };
+
+-struct wined3d_buffer_heap_element;
++struct wined3d_buffer_heap_range;
+ struct wined3d_buffer_heap_fenced_element;
+
+ // A heap that manages allocations with a single GL buffer.
+@@ -3677,10 +3677,10 @@ struct wined3d_buffer_heap
+ CRITICAL_SECTION temp_lock; // Temporary lock while we implement the fenced free list.
+
+ // TODO: add buckets for free regions of a given size.
+- struct wined3d_buffer_heap_element *free_list_head;
++ struct wine_rb_tree free_tree; // Free regions keyed on their base address.
+
+ // Elements that need to be fenced, but haven't reached the required size.
+- struct wined3d_buffer_heap_element *pending_fenced_head;
++ struct wined3d_buffer_heap_range *pending_fenced_head;
+ GLsizeiptr pending_fenced_bytes; // Number of free bytes in the active fenced region.
+ GLsizeiptr pending_fenced_threshold_bytes; // Number of bytes required before fencing.
+
+--
+2.16.2
+
diff --git a/0007-wined3d-Add-segregated-free-bins-to-complement-rbtre.patch b/0007-wined3d-Add-segregated-free-bins-to-complement-rbtre.patch
new file mode 100644
index 000000000000..fb80a0f89597
--- /dev/null
+++ b/0007-wined3d-Add-segregated-free-bins-to-complement-rbtre.patch
@@ -0,0 +1,305 @@
+From 89ca25afda23b8ed5f6dc5cc6a3fe010a4b63352 Mon Sep 17 00:00:00 2001
+From: Andrew Comminos <andrew@comminos.com>
+Date: Tue, 27 Feb 2018 18:10:36 -0800
+Subject: [PATCH 7/8] wined3d: Add segregated free bins to complement
+ rbtree-backed free list.
+
+---
+ dlls/wined3d/buffer_heap.c | 154 +++++++++++++++++++++++++++++++----------
+ dlls/wined3d/wined3d_private.h | 8 ++-
+ 2 files changed, 122 insertions(+), 40 deletions(-)
+
+diff --git a/dlls/wined3d/buffer_heap.c b/dlls/wined3d/buffer_heap.c
+index 45d3a2c7d7..f4af1b93b9 100644
+--- a/dlls/wined3d/buffer_heap.c
++++ b/dlls/wined3d/buffer_heap.c
+@@ -29,6 +29,10 @@ struct wined3d_buffer_heap_element
+ {
+ struct wine_rb_entry entry;
+ struct wined3d_map_range range;
++
++ // Binned free list positions
++ struct wined3d_buffer_heap_element *next;
++ struct wined3d_buffer_heap_element *prev;
+ };
+
+ struct wined3d_buffer_heap_range
+@@ -54,9 +58,76 @@ static struct wined3d_buffer_heap_element* element_new(GLsizei offset, GLsizei s
+ return NULL;
+ elem->range.offset = offset;
+ elem->range.size = size;
++ elem->prev = NULL;
++ elem->next = NULL;
+ return elem;
+ }
+
++static inline int bitwise_log2_floor(GLsizei size)
++{
++ // XXX(acomminos): I hope this gets unrolled.
++ for (int i = 8 * sizeof(GLsizei) - 1; i >= 0; i--)
++ {
++ if ((size >> i) & 1) {
++ return i;
++ }
++ }
++ return 0;
++}
++
++static inline int bitwise_log2_ceil(GLsizei size)
++{
++ // Add one to the floor of size if size isn't a power of two.
++ return bitwise_log2_floor(size) + !!(size & (size - 1));
++}
++
++static int element_bin(struct wined3d_buffer_heap_element *elem)
++{
++ return min(WINED3D_BUFFER_HEAP_BINS - 1, bitwise_log2_floor(elem->range.size));
++}
++
++// Inserts and element into the free tree and its bin.
++// Does not coalesce.
++static void element_insert_free(struct wined3d_buffer_heap *heap, struct wined3d_buffer_heap_element *elem)
++{
++ int bin = element_bin(elem);
++
++ elem->prev = NULL;
++ elem->next = heap->free_bins[bin];
++ if (heap->free_bins[bin])
++ heap->free_bins[bin]->prev = elem;
++ heap->free_bins[bin] = elem;
++
++ if (wine_rb_put(&heap->free_tree, &elem->range.offset, &elem->entry) == -1)
++ {
++ ERR("Failed to insert element into free tree.\n");
++ }
++
++ TRACE("Inserted allocation at %p of size %lld into bin %d\n", elem->range.offset, elem->range.size, bin);
++}
++
++// Removes an element from the free tree and its bin.
++static void element_remove_free(struct wined3d_buffer_heap *heap, struct wined3d_buffer_heap_element *elem)
++{
++ int bin = element_bin(elem);
++
++ if (elem->prev)
++ elem->prev->next = elem->next;
++
++ if (elem->next)
++ elem->next->prev = elem->prev;
++
++ if (!elem->prev)
++ heap->free_bins[bin] = elem->next;
++
++ elem->prev = NULL;
++ elem->next = NULL;
++
++ wine_rb_remove(&heap->free_tree, &elem->entry);
++
++ TRACE("Freed allocation at %p of size %lld from bin %d\n", elem->range.offset, elem->range.size, bin);
++}
++
+ static struct wined3d_buffer_heap_fenced_element* fenced_element_new(struct wined3d_buffer_heap_range *ranges, struct wined3d_fence* fence)
+ {
+ struct wined3d_buffer_heap_fenced_element* elem;
+@@ -71,7 +142,7 @@ static struct wined3d_buffer_heap_fenced_element* fenced_element_new(struct wine
+
+ static int free_tree_compare(const void *key, const struct wine_rb_entry *entry)
+ {
+- const GLsizei offset = (const GLsizei) key;
++ const GLsizei offset = *(const GLsizei*) key;
+ struct wined3d_buffer_heap_element *elem = WINE_RB_ENTRY_VALUE(entry, struct wined3d_buffer_heap_element, entry);
+
+ if (offset < elem->range.offset)
+@@ -121,9 +192,6 @@ HRESULT wined3d_buffer_heap_create(struct wined3d_context *context, GLsizeiptr s
+
+ wine_rb_init(&object->free_tree, free_tree_compare);
+
+- initial_elem = element_new(0, size);
+- wine_rb_put(&object->free_tree, initial_elem->range.offset, &initial_elem->entry);
+-
+ object->fenced_head = object->fenced_tail = NULL;
+ object->alignment = alignment;
+ object->pending_fenced_bytes = 0;
+@@ -131,6 +199,9 @@ HRESULT wined3d_buffer_heap_create(struct wined3d_context *context, GLsizeiptr s
+ object->pending_fenced_threshold_bytes = size / 4; // FIXME(acomminos): make this externally declared
+ InitializeCriticalSection(&object->temp_lock);
+
++ initial_elem = element_new(0, size);
++ element_insert_free(object, initial_elem);
++
+ *buffer_heap = object;
+
+ return WINED3D_OK;
+@@ -145,56 +216,57 @@ HRESULT wined3d_buffer_heap_destroy(struct wined3d_buffer_heap *heap, struct win
+
+ HRESULT wined3d_buffer_heap_alloc(struct wined3d_buffer_heap *heap, GLsizeiptr size, struct wined3d_map_range *out_range)
+ {
+- struct wine_rb_entry *iter;
+-
+- // TODO(acomminos): free list binning?
++ int initial_bin;
+ EnterCriticalSection(&heap->temp_lock);
+
+ // Round to the nearest power of two to reduce fragmentation.
+- size = 1ULL << (int)ceil(log2(size));
++ size = 1ULL << bitwise_log2_ceil(size);
+
+ // Round up the size to a multiple of the heap's alignment.
+ if (heap->alignment)
+ size += heap->alignment - (size % heap->alignment);
+
+- iter = wine_rb_head(heap->free_tree.root);
+- while (iter)
++ // TODO(acomminos): use bitwise arithmetic instead
++ initial_bin = min(WINED3D_BUFFER_HEAP_BINS - 1, bitwise_log2_ceil(size));
++
++ for (int i = initial_bin; i < WINED3D_BUFFER_HEAP_BINS; i++)
+ {
+- struct wined3d_buffer_heap_element *elem = WINE_RB_ENTRY_VALUE(iter, struct wined3d_buffer_heap_element, entry);
+- if (elem->range.size >= size)
++ struct wined3d_buffer_heap_element *elem = heap->free_bins[i];
++ if (elem)
+ {
+- // FIXME(acomminos): should key based on end so that we can slice
+- // off the front without changing the key.
+- GLsizei remaining = elem->range.size - size;
++ struct wined3d_map_range remaining_range;
++ remaining_range.offset = elem->range.offset + size;
++ remaining_range.size = elem->range.size - size;
+
+ out_range->offset = elem->range.offset;
+ out_range->size = size;
+
+- wine_rb_remove(&heap->free_tree, iter);
+- if (remaining > 0)
++ // Remove the element from its current free bin to move it to the correct list.
++ element_remove_free(heap, elem);
++
++ if (remaining_range.size > 0)
+ {
+- elem->range.offset += size;
+- elem->range.size -= size;
+- wine_rb_put(&heap->free_tree, (const void*) elem->range.offset, &elem->entry);
++ elem->range = remaining_range;
++ element_insert_free(heap, elem);
+ }
+ else
+ {
+ HeapFree(GetProcessHeap(), 0, elem);
+ }
+- TRACE("Allocated %lld bytes at %lld\n", out_range->size, out_range->offset);
++
+ LeaveCriticalSection(&heap->temp_lock);
+ return WINED3D_OK;
+ }
+- iter = wine_rb_next(iter);
+ }
+
+ LeaveCriticalSection(&heap->temp_lock);
+- return WINED3DERR_OUTOFVIDEOMEMORY; // FIXME(acomminos): probably wrong return code.
++ return WINED3DERR_OUTOFVIDEOMEMORY;
+ }
+
+ HRESULT wined3d_buffer_heap_free(struct wined3d_buffer_heap *heap, struct wined3d_map_range range)
+ {
+ struct wined3d_buffer_heap_element *elem = element_new(range.offset, range.size);
++ struct wined3d_map_range coalesced_range = range;
+ struct wine_rb_entry *entry;
+ HRESULT hr;
+
+@@ -202,7 +274,12 @@ HRESULT wined3d_buffer_heap_free(struct wined3d_buffer_heap *heap, struct wined3
+ return E_OUTOFMEMORY;
+
+ EnterCriticalSection(&heap->temp_lock);
+- if (wine_rb_put(&heap->free_tree, (const void*) elem->range.offset, &elem->entry) == -1)
++
++ // TODO(acomminos): implement lower_bound, upper_bound.
++ // we don't have to allocate a new elem here, this sentry
++ // is just so I can get this proof of concept out the door.
++
++ if (wine_rb_put(&heap->free_tree, &elem->range.offset, &elem->entry) == -1)
+ {
+ LeaveCriticalSection(&heap->temp_lock);
+ HeapFree(GetProcessHeap(), 0, elem);
+@@ -214,17 +291,14 @@ HRESULT wined3d_buffer_heap_free(struct wined3d_buffer_heap *heap, struct wined3
+ if (entry)
+ {
+ TRACE("Coalesced left.\n");
+- struct wined3d_buffer_heap_element *left_elem = WINE_RB_ENTRY_VALUE(elem, struct wined3d_buffer_heap_element, entry);
+- if (left_elem->range.offset + left_elem->range.size == elem->range.offset)
++ struct wined3d_buffer_heap_element *left_elem = WINE_RB_ENTRY_VALUE(entry, struct wined3d_buffer_heap_element, entry);
++ if (left_elem->range.offset + left_elem->range.size == coalesced_range.offset)
+ {
+- // Replace the newly inserted element with an extended node to its
+- // left. This doesn't change the key properties of the left node.
+- left_elem->range.size += range.size;
+-
+- wine_rb_remove(&heap->free_tree, &elem->entry);
+- HeapFree(GetProcessHeap(), 0, elem);
++ coalesced_range.offset = left_elem->range.offset;
++ coalesced_range.size = coalesced_range.size + left_elem->range.size;
+
+- elem = left_elem;
++ element_remove_free(heap, left_elem);
++ HeapFree(GetProcessHeap(), 0, left_elem);
+ }
+ }
+
+@@ -233,18 +307,22 @@ HRESULT wined3d_buffer_heap_free(struct wined3d_buffer_heap *heap, struct wined3
+ if (entry)
+ {
+ TRACE("Coalesced right.\n");
+- struct wined3d_buffer_heap_element *right_elem = WINE_RB_ENTRY_VALUE(elem, struct wined3d_buffer_heap_element, entry);
++ struct wined3d_buffer_heap_element *right_elem = WINE_RB_ENTRY_VALUE(entry, struct wined3d_buffer_heap_element, entry);
+ if (elem->range.offset + elem->range.size == right_elem->range.offset)
+ {
+- // Remove the right element, this doesn't change the keying of our
+- // newly inserted element.
+- elem->range.size += right_elem->range.size;
++ coalesced_range.size += right_elem->range.size;
+
+- wine_rb_remove(&heap->free_tree, &right_elem->entry);
++ element_remove_free(heap, right_elem);
+ HeapFree(GetProcessHeap(), 0, right_elem);
+ }
+ }
+
++ wine_rb_remove(&heap->free_tree, &elem->entry);
++
++ // Update with coalesced range.
++ elem->range = coalesced_range;
++ element_insert_free(heap, elem);
++
+ LeaveCriticalSection(&heap->temp_lock);
+
+ return WINED3D_OK;
+diff --git a/dlls/wined3d/wined3d_private.h b/dlls/wined3d/wined3d_private.h
+index 3a45d9931e..14cad92f0f 100644
+--- a/dlls/wined3d/wined3d_private.h
++++ b/dlls/wined3d/wined3d_private.h
+@@ -3665,8 +3665,12 @@ enum wined3d_buffer_conversion_type
+ CONV_POSITIONT,
+ };
+
+-struct wined3d_buffer_heap_range;
++struct wined3d_buffer_heap_element;
+ struct wined3d_buffer_heap_fenced_element;
++struct wined3d_buffer_heap_range;
++
++// Number of power-of-two buckets to populate.
++#define WINED3D_BUFFER_HEAP_BINS 32
+
+ // A heap that manages allocations with a single GL buffer.
+ struct wined3d_buffer_heap
+@@ -3676,7 +3680,7 @@ struct wined3d_buffer_heap
+ GLsizeiptr alignment;
+ CRITICAL_SECTION temp_lock; // Temporary lock while we implement the fenced free list.
+
+- // TODO: add buckets for free regions of a given size.
++ struct wined3d_buffer_heap_element *free_bins[WINED3D_BUFFER_HEAP_BINS];
+ struct wine_rb_tree free_tree; // Free regions keyed on their base address.
+
+ // Elements that need to be fenced, but haven't reached the required size.
+--
+2.16.2
+
diff --git a/0008-wined3d-Implement-lazy-free-using-a-deferred-free-li.patch b/0008-wined3d-Implement-lazy-free-using-a-deferred-free-li.patch
new file mode 100644
index 000000000000..7dd0c7735c85
--- /dev/null
+++ b/0008-wined3d-Implement-lazy-free-using-a-deferred-free-li.patch
@@ -0,0 +1,612 @@
+From 44fba11f530b1dff8a8e10fec15b0ca6465e3623 Mon Sep 17 00:00:00 2001
+From: Andrew Comminos <andrew@comminos.com>
+Date: Wed, 28 Feb 2018 22:46:31 -0800
+Subject: [PATCH 8/8] wined3d: Implement lazy-free using a deferred free list.
+
+---
+ dlls/wined3d/buffer_heap.c | 308 ++++++++++++++++++++++++++++-------------
+ dlls/wined3d/cs.c | 12 +-
+ dlls/wined3d/device.c | 16 ++-
+ dlls/wined3d/wined3d_private.h | 22 ++-
+ 4 files changed, 248 insertions(+), 110 deletions(-)
+
+diff --git a/dlls/wined3d/buffer_heap.c b/dlls/wined3d/buffer_heap.c
+index f4af1b93b9..3fe5541a6a 100644
+--- a/dlls/wined3d/buffer_heap.c
++++ b/dlls/wined3d/buffer_heap.c
+@@ -27,24 +27,20 @@ WINE_DECLARE_DEBUG_CHANNEL(d3d_perf);
+
+ struct wined3d_buffer_heap_element
+ {
+- struct wine_rb_entry entry;
+ struct wined3d_map_range range;
+
++ // rbtree data
++ struct wine_rb_entry entry;
++ BOOL in_tree;
++
+ // Binned free list positions
+ struct wined3d_buffer_heap_element *next;
+ struct wined3d_buffer_heap_element *prev;
+ };
+
+-struct wined3d_buffer_heap_range
+-{
+- struct wined3d_map_range range;
+-
+- struct wined3d_buffer_heap_range *next;
+-};
+-
+ struct wined3d_buffer_heap_fenced_element
+ {
+- struct wined3d_buffer_heap_range *ranges;
++ struct wined3d_buffer_heap_bin_set free_list;
+ struct wined3d_fence *fence;
+
+ struct wined3d_buffer_heap_fenced_element *next;
+@@ -58,6 +54,7 @@ static struct wined3d_buffer_heap_element* element_new(GLsizei offset, GLsizei s
+ return NULL;
+ elem->range.offset = offset;
+ elem->range.size = size;
++ elem->in_tree = FALSE;
+ elem->prev = NULL;
+ elem->next = NULL;
+ return elem;
+@@ -86,27 +83,41 @@ static int element_bin(struct wined3d_buffer_heap_element *elem)
+ return min(WINED3D_BUFFER_HEAP_BINS - 1, bitwise_log2_floor(elem->range.size));
+ }
+
+-// Inserts and element into the free tree and its bin.
+-// Does not coalesce.
+-static void element_insert_free(struct wined3d_buffer_heap *heap, struct wined3d_buffer_heap_element *elem)
++// Inserts an element into the appropriate free list bin.
++static void element_insert_free_bin(struct wined3d_buffer_heap *heap, struct wined3d_buffer_heap_element *elem)
+ {
+ int bin = element_bin(elem);
+
+ elem->prev = NULL;
+- elem->next = heap->free_bins[bin];
+- if (heap->free_bins[bin])
+- heap->free_bins[bin]->prev = elem;
+- heap->free_bins[bin] = elem;
++ elem->next = heap->free_list.bins[bin].head;
++ if (heap->free_list.bins[bin].head)
++ heap->free_list.bins[bin].head->prev = elem;
++ heap->free_list.bins[bin].head = elem;
++
++ if (!heap->free_list.bins[bin].tail)
++ heap->free_list.bins[bin].tail = elem;
++
++ TRACE("Inserted allocation at %p of size %lld into bin %d\n", elem->range.offset, elem->range.size, bin);
++}
+
++// Inserts an elemnet into the free tree. Does not perform coalescing.
++static void element_insert_free_tree(struct wined3d_buffer_heap *heap, struct wined3d_buffer_heap_element *elem)
++{
++ if (elem->in_tree)
++ {
++ FIXME("Element %p already in free tree, ignoring.\n", elem);
++ return;
++ }
+ if (wine_rb_put(&heap->free_tree, &elem->range.offset, &elem->entry) == -1)
+ {
+ ERR("Failed to insert element into free tree.\n");
++ return;
+ }
+-
+- TRACE("Inserted allocation at %p of size %lld into bin %d\n", elem->range.offset, elem->range.size, bin);
++ TRACE("Inserted allocation at %p of size %lld into free tree\n", elem->range.offset, elem->range.size);
++ elem->in_tree = TRUE;
+ }
+
+-// Removes an element from the free tree and its bin.
++// Removes an element from the free tree, its bin, and the coalesce list.
+ static void element_remove_free(struct wined3d_buffer_heap *heap, struct wined3d_buffer_heap_element *elem)
+ {
+ int bin = element_bin(elem);
+@@ -117,24 +128,31 @@ static void element_remove_free(struct wined3d_buffer_heap *heap, struct wined3d
+ if (elem->next)
+ elem->next->prev = elem->prev;
+
+- if (!elem->prev)
+- heap->free_bins[bin] = elem->next;
++ if (elem == heap->free_list.bins[bin].head)
++ heap->free_list.bins[bin].head = elem->next;
++
++ if (elem == heap->free_list.bins[bin].tail)
++ heap->free_list.bins[bin].head = elem->prev;
+
+ elem->prev = NULL;
+ elem->next = NULL;
+
+- wine_rb_remove(&heap->free_tree, &elem->entry);
++ if (elem->in_tree)
++ {
++ wine_rb_remove(&heap->free_tree, &elem->entry);
++ elem->in_tree = FALSE;
++ }
+
+ TRACE("Freed allocation at %p of size %lld from bin %d\n", elem->range.offset, elem->range.size, bin);
+ }
+
+-static struct wined3d_buffer_heap_fenced_element* fenced_element_new(struct wined3d_buffer_heap_range *ranges, struct wined3d_fence* fence)
++static struct wined3d_buffer_heap_fenced_element* fenced_element_new(struct wined3d_buffer_heap_bin_set bins, struct wined3d_fence* fence)
+ {
+ struct wined3d_buffer_heap_fenced_element* elem;
+ elem = HeapAlloc(GetProcessHeap(), 0, sizeof(struct wined3d_buffer_heap_fenced_element));
+ if (!elem)
+ return NULL;
+- elem->ranges = ranges;
++ elem->free_list = bins;
+ elem->fence = fence;
+ elem->next = NULL;
+ return elem;
+@@ -163,6 +181,11 @@ HRESULT wined3d_buffer_heap_create(struct wined3d_context *context, GLsizeiptr s
+
+ struct wined3d_buffer_heap *object;
+
++ if ((alignment & (alignment - 1)) != 0)
++ {
++ return E_FAIL;
++ }
++
+ if (!(object = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(*object))))
+ {
+ return E_OUTOFMEMORY;
+@@ -194,13 +217,13 @@ HRESULT wined3d_buffer_heap_create(struct wined3d_context *context, GLsizeiptr s
+
+ object->fenced_head = object->fenced_tail = NULL;
+ object->alignment = alignment;
+- object->pending_fenced_bytes = 0;
+- object->pending_fenced_head = NULL;
+- object->pending_fenced_threshold_bytes = size / 4; // FIXME(acomminos): make this externally declared
++ // FIXME(acomminos): make this externally declared
++ object->pending_fenced_threshold_bytes = 16 * 1024 * 1024;
+ InitializeCriticalSection(&object->temp_lock);
+
+ initial_elem = element_new(0, size);
+- element_insert_free(object, initial_elem);
++ // Don't bother adding the initial allocation to the coalescing tree.
++ element_insert_free_bin(object, initial_elem);
+
+ *buffer_heap = object;
+
+@@ -217,21 +240,23 @@ HRESULT wined3d_buffer_heap_destroy(struct wined3d_buffer_heap *heap, struct win
+ HRESULT wined3d_buffer_heap_alloc(struct wined3d_buffer_heap *heap, GLsizeiptr size, struct wined3d_map_range *out_range)
+ {
+ int initial_bin;
+- EnterCriticalSection(&heap->temp_lock);
+
+- // Round to the nearest power of two to reduce fragmentation.
+- size = 1ULL << bitwise_log2_ceil(size);
++ EnterCriticalSection(&heap->temp_lock);
+
+- // Round up the size to a multiple of the heap's alignment.
++ // Align size values where possible.
+ if (heap->alignment)
+ size += heap->alignment - (size % heap->alignment);
+
+- // TODO(acomminos): use bitwise arithmetic instead
++ // After alignment, reduce fragmentation by rounding to next power of two.
++ // If the alignment is a power of two (which it should be), this should be
++ // no problem.
++ size = 1 << bitwise_log2_ceil(size);
++
+ initial_bin = min(WINED3D_BUFFER_HEAP_BINS - 1, bitwise_log2_ceil(size));
+
+ for (int i = initial_bin; i < WINED3D_BUFFER_HEAP_BINS; i++)
+ {
+- struct wined3d_buffer_heap_element *elem = heap->free_bins[i];
++ struct wined3d_buffer_heap_element *elem = heap->free_list.bins[i].head;
+ if (elem)
+ {
+ struct wined3d_map_range remaining_range;
+@@ -247,7 +272,7 @@ HRESULT wined3d_buffer_heap_alloc(struct wined3d_buffer_heap *heap, GLsizeiptr s
+ if (remaining_range.size > 0)
+ {
+ elem->range = remaining_range;
+- element_insert_free(heap, elem);
++ element_insert_free_bin(heap, elem);
+ }
+ else
+ {
+@@ -260,68 +285,33 @@ HRESULT wined3d_buffer_heap_alloc(struct wined3d_buffer_heap *heap, GLsizeiptr s
+ }
+
+ LeaveCriticalSection(&heap->temp_lock);
++
++ // Attempt to coalesce blocks until an allocation of the requested size is
++ // available.
++ GLsizei coalesced_size;
++ while (SUCCEEDED(wined3d_buffer_heap_deferred_coalesce(heap, &coalesced_size)))
++ {
++ FIXME_(d3d_perf)("Forcing coalesce, not enough free space in buffer heap.\n");
++ if (coalesced_size >= size)
++ {
++ return wined3d_buffer_heap_alloc(heap, size, out_range);
++ }
++ }
++
+ return WINED3DERR_OUTOFVIDEOMEMORY;
+ }
+
+ HRESULT wined3d_buffer_heap_free(struct wined3d_buffer_heap *heap, struct wined3d_map_range range)
+ {
+ struct wined3d_buffer_heap_element *elem = element_new(range.offset, range.size);
+- struct wined3d_map_range coalesced_range = range;
+- struct wine_rb_entry *entry;
+- HRESULT hr;
+
+ if (!elem)
+ return E_OUTOFMEMORY;
+
+ EnterCriticalSection(&heap->temp_lock);
+
+- // TODO(acomminos): implement lower_bound, upper_bound.
+- // we don't have to allocate a new elem here, this sentry
+- // is just so I can get this proof of concept out the door.
+-
+- if (wine_rb_put(&heap->free_tree, &elem->range.offset, &elem->entry) == -1)
+- {
+- LeaveCriticalSection(&heap->temp_lock);
+- HeapFree(GetProcessHeap(), 0, elem);
+- return E_FAIL;
+- }
+-
+- // Coalesce left.
+- entry = wine_rb_prev(&elem->entry);
+- if (entry)
+- {
+- TRACE("Coalesced left.\n");
+- struct wined3d_buffer_heap_element *left_elem = WINE_RB_ENTRY_VALUE(entry, struct wined3d_buffer_heap_element, entry);
+- if (left_elem->range.offset + left_elem->range.size == coalesced_range.offset)
+- {
+- coalesced_range.offset = left_elem->range.offset;
+- coalesced_range.size = coalesced_range.size + left_elem->range.size;
+-
+- element_remove_free(heap, left_elem);
+- HeapFree(GetProcessHeap(), 0, left_elem);
+- }
+- }
+-
+- // Coalesce right.
+- entry = wine_rb_next(&elem->entry);
+- if (entry)
+- {
+- TRACE("Coalesced right.\n");
+- struct wined3d_buffer_heap_element *right_elem = WINE_RB_ENTRY_VALUE(entry, struct wined3d_buffer_heap_element, entry);
+- if (elem->range.offset + elem->range.size == right_elem->range.offset)
+- {
+- coalesced_range.size += right_elem->range.size;
+-
+- element_remove_free(heap, right_elem);
+- HeapFree(GetProcessHeap(), 0, right_elem);
+- }
+- }
+-
+- wine_rb_remove(&heap->free_tree, &elem->entry);
+-
+- // Update with coalesced range.
+- elem->range = coalesced_range;
+- element_insert_free(heap, elem);
++ // Only insert the element into a free bin, coalescing will occur later.
++ element_insert_free_bin(heap, elem);
+
+ LeaveCriticalSection(&heap->temp_lock);
+
+@@ -330,10 +320,21 @@ HRESULT wined3d_buffer_heap_free(struct wined3d_buffer_heap *heap, struct wined3
+
+ HRESULT wined3d_buffer_heap_free_fenced(struct wined3d_buffer_heap *heap, struct wined3d_device *device, struct wined3d_map_range range)
+ {
+- struct wined3d_buffer_heap_range *elem = HeapAlloc(GetProcessHeap(), 0, sizeof(struct wined3d_buffer_heap_range));
+- elem->range = range;
+- elem->next = heap->pending_fenced_head;
+- heap->pending_fenced_head = elem;
++ struct wined3d_buffer_heap_element *elem = element_new(range.offset, range.size);
++ int bin_index = element_bin(elem);
++ struct wined3d_buffer_heap_bin *bin = &heap->pending_fenced_bins.bins[bin_index];
++
++ if (bin->tail)
++ {
++ bin->tail->next = elem;
++ elem->prev = bin->tail;
++ bin->tail = elem;
++ }
++ else
++ {
++ bin->head = elem;
++ bin->tail = elem;
++ }
+
+ heap->pending_fenced_bytes += range.size;
+ if (heap->pending_fenced_bytes >= heap->pending_fenced_threshold_bytes)
+@@ -349,13 +350,13 @@ HRESULT wined3d_buffer_heap_free_fenced(struct wined3d_buffer_heap *heap, struct
+ return hr;
+ }
+
+- fenced_elem = fenced_element_new(heap->pending_fenced_head, fence);
++ fenced_elem = fenced_element_new(heap->pending_fenced_bins, fence);
+ if (!fenced_elem)
+ return E_OUTOFMEMORY;
+
+ TRACE_(d3d_perf)("Dispatching fenced buffer set.\n");
+ heap->pending_fenced_bytes = 0;
+- heap->pending_fenced_head = NULL;
++ memset(&heap->pending_fenced_bins, 0, sizeof(heap->pending_fenced_bins));
+
+ // Append to end of fenced list, which works well if you assume that buffers
+ // are freed in some ascending draw call ordering.
+@@ -390,15 +391,28 @@ HRESULT wined3d_buffer_heap_cs_poll_fences(struct wined3d_buffer_heap *heap, str
+ case WINED3D_FENCE_NOT_STARTED:
+ {
+ TRACE_(d3d_perf)("Freed fence group.\n");
+- struct wined3d_buffer_heap_range *range_elem = elem->ranges;
+- // FIXME(acomminos): this might take a while. incrementally do this?
+- while (range_elem)
++
++ EnterCriticalSection(&heap->temp_lock);
++ for (int i = 0; i < WINED3D_BUFFER_HEAP_BINS; i++)
+ {
+- struct wined3d_buffer_heap_range *next = range_elem->next;
+- wined3d_buffer_heap_free(heap, range_elem->range);
+- HeapFree(GetProcessHeap(), 0, range_elem);
+- range_elem = next;
++ struct wined3d_buffer_heap_bin *elem_bin = &elem->free_list.bins[i];
++ if (!elem_bin->tail)
++ continue;
++
++ struct wined3d_buffer_heap_bin *heap_bin = &heap->free_list.bins[i];
++ if (heap_bin->head)
++ {
++ elem_bin->tail->next = heap_bin->head;
++ heap_bin->head->prev = elem_bin->tail;
++ heap_bin->head = elem_bin->head;
++ }
++ else
++ {
++ heap_bin->head = elem_bin->head;
++ heap_bin->tail = elem_bin->tail;
++ }
+ }
++ LeaveCriticalSection(&heap->temp_lock);
+
+ wined3d_fence_destroy(elem->fence);
+
+@@ -413,3 +427,101 @@ HRESULT wined3d_buffer_heap_cs_poll_fences(struct wined3d_buffer_heap *heap, str
+
+ return WINED3D_OK;
+ }
++
++HRESULT wined3d_buffer_heap_deferred_coalesce(struct wined3d_buffer_heap *heap, GLsizei *coalesced_size)
++{
++ struct wined3d_buffer_heap_element *elem = NULL;
++ struct wine_rb_entry *entry;
++ struct wined3d_map_range coalesced_range;
++
++ // XXX(acomminos): is it always the best idea to coalesce by smallest
++ // chunks? these are the most likely to be useless.
++ EnterCriticalSection(&heap->temp_lock);
++
++ // TODO(acomminos): on one hand, if there's a lot of elements in the list,
++ // it's highly fragmented. on the other, we can potentially waste a decent
++ // sum of time checking for uncoalesced bins.
++ for (int i = 0; !elem && i < WINED3D_BUFFER_HEAP_BINS; i++)
++ {
++ struct wined3d_buffer_heap_element *next = heap->free_list.bins[i].head;
++ while (next)
++ {
++ if (!next->in_tree)
++ {
++ // Find the first element not in-tree.
++ elem = next;
++ break;
++ }
++ next = next->next;
++ }
++ }
++
++ // TODO(acomminos): acquire a separate lock for the free tree here.
++ if (!elem)
++ {
++ LeaveCriticalSection(&heap->temp_lock);
++ return E_FAIL;
++ }
++ element_remove_free(heap, elem);
++
++ // Remove element from free list, we may change its size or offset.
++ coalesced_range = elem->range;
++
++ // TODO(acomminos): implement lower_bound, upper_bound.
++ // we don't have to allocate a new elem here, this sentry
++ // is just so I can get this proof of concept out the door.
++
++ if (wine_rb_put(&heap->free_tree, &elem->range.offset, &elem->entry) == -1)
++ {
++ LeaveCriticalSection(&heap->temp_lock);
++ return E_FAIL;
++ }
++
++ // Coalesce left.
++ entry = wine_rb_prev(&elem->entry);
++ if (entry)
++ {
++ TRACE("Coalesced left.\n");
++ struct wined3d_buffer_heap_element *left_elem = WINE_RB_ENTRY_VALUE(entry, struct wined3d_buffer_heap_element, entry);
++ if (left_elem->range.offset + left_elem->range.size == coalesced_range.offset)
++ {
++ coalesced_range.offset = left_elem->range.offset;
++ coalesced_range.size = coalesced_range.size + left_elem->range.size;
++
++ element_remove_free(heap, left_elem);
++ HeapFree(GetProcessHeap(), 0, left_elem);
++ }
++ }
++
++ // Coalesce right.
++ entry = wine_rb_next(&elem->entry);
++ if (entry)
++ {
++ TRACE("Coalesced right.\n");
++ struct wined3d_buffer_heap_element *right_elem = WINE_RB_ENTRY_VALUE(entry, struct wined3d_buffer_heap_element, entry);
++ if (elem->range.offset + elem->range.size == right_elem->range.offset)
++ {
++ coalesced_range.size += right_elem->range.size;
++
++ element_remove_free(heap, right_elem);
++ HeapFree(GetProcessHeap(), 0, right_elem);
++ }
++ }
++
++ wine_rb_remove(&heap->free_tree, &elem->entry);
++
++ if (coalesced_range.size > elem->range.size)
++ FIXME_(d3d_perf)("Coalesced out an extra %lld bytes\n", coalesced_range.size - elem->range.size);
++
++ // Update with coalesced range.
++ elem->range = coalesced_range;
++
++ if (coalesced_size)
++ *coalesced_size = coalesced_range.size;
++
++ element_insert_free_bin(heap, elem);
++ element_insert_free_tree(heap, elem);
++
++ LeaveCriticalSection(&heap->temp_lock);
++ return WINED3D_OK;
++}
+diff --git a/dlls/wined3d/cs.c b/dlls/wined3d/cs.c
+index bae5d9f4a1..8fd9b01a36 100644
+--- a/dlls/wined3d/cs.c
++++ b/dlls/wined3d/cs.c
+@@ -2644,10 +2644,6 @@ static void wined3d_cs_exec_discard_buffer(struct wined3d_cs *cs, const void *da
+ struct wined3d_buffer *buffer = op->buffer;
+ HRESULT hr;
+
+- // Poll for discarded buffers whose fenced have been triggered here to avoid
+- // excessive VRAM consumption.
+- wined3d_buffer_heap_cs_poll_fences(buffer->buffer_heap, cs->device);
+-
+ // TODO(acomminos): should call into buffer.c here instead.
+ if (FAILED(hr = wined3d_buffer_heap_free_fenced(buffer->buffer_heap, cs->device, buffer->cs_persistent_map)))
+ {
+@@ -2975,6 +2971,14 @@ static void poll_queries(struct wined3d_cs *cs)
+ list_init(&query->poll_list_entry);
+ InterlockedIncrement(&query->counter_retrieved);
+ }
++
++ // Poll for discarded persistent buffers whose fences have been triggered
++ // here to avoid excessive VRAM consumption.
++ // XXX(acomminos): clean this up, integrate with prior section.
++ if (cs->device->wo_buffer_heap)
++ wined3d_buffer_heap_cs_poll_fences(cs->device->wo_buffer_heap, cs->device);
++ if (cs->device->cb_buffer_heap)
++ wined3d_buffer_heap_cs_poll_fences(cs->device->cb_buffer_heap, cs->device);
+ }
+
+ static void wined3d_cs_wait_event(struct wined3d_cs *cs)
+diff --git a/dlls/wined3d/device.c b/dlls/wined3d/device.c
+index bdab83b935..9f300ca572 100644
+--- a/dlls/wined3d/device.c
++++ b/dlls/wined3d/device.c
+@@ -848,26 +848,32 @@ static void destroy_default_samplers(struct wined3d_device *device, struct wined
+ /* Context activation is done by the caller. */
+ static void create_buffer_heap(struct wined3d_device *device, struct wined3d_context *context)
+ {
++ const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
+ // TODO(acomminos): check if ARB_buffer_storage is supported, first-
+ // possibly make wined3d_buffer_heap_create fail.
+- // TODO(acomminos): 512MB is sane for geometry, maybe not for PBO.
+- const GLsizeiptr HBO_SIZE = min(512000000, device->adapter->vram_bytes / 4);
+- const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
++ // TODO(acomminos): kill this magic number. perhaps base on vram.
++ GLsizeiptr geo_heap_size = 512 * 1024 * 1024;
++ GLsizeiptr cb_heap_size = 256 * 1024 * 1024;
+
+ GLint ub_alignment;
+ gl_info->gl_ops.gl.p_glGetIntegerv(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT, &ub_alignment);
+
++ // Align constant buffer heap size, in case GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT isn't a power of two (for some reason).
++ cb_heap_size -= cb_heap_size % ub_alignment;
++
+ HRESULT hr;
+- if (FAILED(hr = wined3d_buffer_heap_create(context, HBO_SIZE, 0, TRUE, &device->wo_buffer_heap)))
++ if (FAILED(hr = wined3d_buffer_heap_create(context, geo_heap_size, 0, TRUE, &device->wo_buffer_heap)))
+ {
+ ERR("Failed to create write-only persistent buffer heap, hr %#x.\n", hr);
+ }
+
+ // TODO(acomminos): can likely use a way smaller heap for CBs by querying limits
+- if (FAILED(hr = wined3d_buffer_heap_create(context, HBO_SIZE, ub_alignment, TRUE, &device->cb_buffer_heap)))
++ if (FAILED(hr = wined3d_buffer_heap_create(context, cb_heap_size, ub_alignment, TRUE, &device->cb_buffer_heap)))
+ {
+ ERR("Failed to create persistent buffer heap for constant buffers, hr %#x.\n", hr);
+ }
++
++ FIXME("Initialized wine-pba (geo_heap_size: %lld, cb_heap_size: %lld, ub_align: %d)\n", geo_heap_size, cb_heap_size, ub_alignment);
+ }
+
+ /* Context activation is done by the caller. */
+diff --git a/dlls/wined3d/wined3d_private.h b/dlls/wined3d/wined3d_private.h
+index 14cad92f0f..3011609ee1 100644
+--- a/dlls/wined3d/wined3d_private.h
++++ b/dlls/wined3d/wined3d_private.h
+@@ -3667,11 +3667,21 @@ enum wined3d_buffer_conversion_type
+
+ struct wined3d_buffer_heap_element;
+ struct wined3d_buffer_heap_fenced_element;
+-struct wined3d_buffer_heap_range;
+
+ // Number of power-of-two buckets to populate.
+ #define WINED3D_BUFFER_HEAP_BINS 32
+
++struct wined3d_buffer_heap_bin
++{
++ struct wined3d_buffer_heap_element *head;
++ struct wined3d_buffer_heap_element *tail;
++};
++
++struct wined3d_buffer_heap_bin_set
++{
++ struct wined3d_buffer_heap_bin bins[WINED3D_BUFFER_HEAP_BINS];
++};
++
+ // A heap that manages allocations with a single GL buffer.
+ struct wined3d_buffer_heap
+ {
+@@ -3680,11 +3690,11 @@ struct wined3d_buffer_heap
+ GLsizeiptr alignment;
+ CRITICAL_SECTION temp_lock; // Temporary lock while we implement the fenced free list.
+
+- struct wined3d_buffer_heap_element *free_bins[WINED3D_BUFFER_HEAP_BINS];
++ struct wined3d_buffer_heap_bin_set free_list;
+ struct wine_rb_tree free_tree; // Free regions keyed on their base address.
+
+ // Elements that need to be fenced, but haven't reached the required size.
+- struct wined3d_buffer_heap_range *pending_fenced_head;
++ struct wined3d_buffer_heap_bin_set pending_fenced_bins;
+ GLsizeiptr pending_fenced_bytes; // Number of free bytes in the active fenced region.
+ GLsizeiptr pending_fenced_threshold_bytes; // Number of bytes required before fencing.
+
+@@ -3696,6 +3706,7 @@ struct wined3d_buffer_heap
+ HRESULT wined3d_buffer_heap_create(struct wined3d_context *context, GLsizeiptr size, GLsizeiptr alignment, BOOL write_only, struct wined3d_buffer_heap **heap) DECLSPEC_HIDDEN;
+ HRESULT wined3d_buffer_heap_destroy(struct wined3d_buffer_heap *heap, struct wined3d_context *context) DECLSPEC_HIDDEN;
+ // Fetches a buffer from the heap of at least the given size.
++// Attempts to coalesce blocks under memory pressure.
+ HRESULT wined3d_buffer_heap_alloc(struct wined3d_buffer_heap *heap, GLsizeiptr size, struct wined3d_map_range* out_range) DECLSPEC_HIDDEN;
+ // Immediately frees a heap-allocated buffer segment.
+ HRESULT wined3d_buffer_heap_free(struct wined3d_buffer_heap *heap, struct wined3d_map_range range) DECLSPEC_HIDDEN;
+@@ -3704,6 +3715,11 @@ HRESULT wined3d_buffer_heap_free_fenced(struct wined3d_buffer_heap *heap, struct
+ // Moves a buffers with a signaled fence from the fenced list to the free list.
+ // Must be executed on the CS thread.
+ HRESULT wined3d_buffer_heap_cs_poll_fences(struct wined3d_buffer_heap *heap, struct wined3d_device *device) DECLSPEC_HIDDEN;
++// Performs deferred coalescing of fenced buffers. To be called when the CS
++// thread is idle, or under memory pressure.
++// Outputs the size of the new coalesced region in `coalesced_size`, or an error
++// if there are no remaining elements to be coalesced.
++HRESULT wined3d_buffer_heap_deferred_coalesce(struct wined3d_buffer_heap *heap, GLsizei *coalesced_size) DECLSPEC_HIDDEN;
+
+ struct wined3d_buffer
+ {
+--
+2.16.2
+
diff --git a/30-win32-aliases.conf b/30-win32-aliases.conf
new file mode 100644
index 000000000000..99ae1f7b83a5
--- /dev/null
+++ b/30-win32-aliases.conf
@@ -0,0 +1,20 @@
+<?xml version="1.0"?>
+<!DOCTYPE fontconfig SYSTEM "fonts.dtd">
+<fontconfig>
+ <alias binding="same">
+ <family>MS Shell Dlg</family>
+ <accept><family>Microsoft Sans Serif</family></accept>
+ <default><family>sans-serif</family></default>
+ </alias>
+ <alias binding="same">
+ <family>MS Shell Dlg 2</family>
+ <accept><family>Tahoma</family></accept>
+ <default><family>sans-serif</family></default>
+ </alias>
+
+ <alias binding="same">
+ <family>MS Sans Serif</family>
+ <prefer><family>Microsoft Sans Serif</family></prefer>
+ <default><family>sans-serif</family></default>
+ </alias>
+</fontconfig>
diff --git a/PKGBUILD b/PKGBUILD
new file mode 100644
index 000000000000..82b86bc8e7d2
--- /dev/null
+++ b/PKGBUILD
@@ -0,0 +1,219 @@
+# $Id$
+# Maintainer: Stefan Schmidt <thrimbor.github@gmail.com>
+# Contributor: Felix Yan <felixonmars@archlinux.org>
+# Contributor: Sven-Hendrik Haase <sh@lutzhaase.com>
+# Contributor: Jan "heftig" Steffens <jan.steffens@gmail.com>
+# Contributor: Eduardo Romero <eduardo@archlinux.org>
+# Contributor: Giovanni Scafora <giovanni@archlinux.org>
+
+pkgname=wine-staging-pba
+pkgver=2.21
+pkgrel=1
+
+_pkgbasever=${pkgver/rc/-rc}
+
+source=("https://github.com/wine-compholio/wine-patched/archive/staging-$_pkgbasever.tar.gz"
+ harmony-fix.diff
+ 30-win32-aliases.conf
+ "0001-wined3d-Implement-a-simple-heap-allocator-backed-by-.patch"
+ "0002-wined3d-Allocate-global-write-only-persistent-buffer.patch"
+ "0003-wined3d-Add-support-for-persistently-mapped-wined3d_.patch"
+ "0004-wined3d-Implement-aligned-persistent-heaps-for-persi.patch"
+ "0005-wined3d-Experimental-buffer-heap-fence-batching-uppe.patch"
+ "0006-wined3d-Switch-wined3d_buffer_heap-to-be-backed-by-a.patch"
+ "0007-wined3d-Add-segregated-free-bins-to-complement-rbtre.patch"
+ "0008-wined3d-Implement-lazy-free-using-a-deferred-free-li.patch")
+
+sha512sums=('4e3fe2eb81360bfa095194ab5b9647636cbeac0dc3955e6a3ee26062f650c66a4bd2353a1cd8063f9b7c65a6bcc1f892cc7c1d0f00c3c8525a124ec2109d1e86'
+ 'b86edf07bfc560f403fdfd5a71f97930ee2a4c3f76c92cc1a0dbb2e107be9db3bed3a727a0430d8a049583c63dd11f5d4567fb7aa69b193997c6da241acc4f2e'
+ '6e54ece7ec7022b3c9d94ad64bdf1017338da16c618966e8baf398e6f18f80f7b0576edf1d1da47ed77b96d577e4cbb2bb0156b0b11c183a0accf22654b0a2bb'
+ 'b9141fbe8f5189eb46c83b178497f9ee6d1f2daec3009877557ba28e5f2ce6d818cfef5b0eba15c1e9e4c50dd5950486f8091793d704ec532f82155056725e71'
+ '8e112e25392fb2bd035c4b8792e43ad86bf81b1c24ff429ff8943a2c02ee761fc25446791475e4115e6b03f50cdb4cf6a8f128cc770c3941b59ee1dfbe79137b'
+ '7335797924f1c4403a438ccfe36f8a650ddf8271d33ca962e270cf34762170038017cd53cad35f1ad61128f2c496edb68791783259df33cb997a73959136bdc0'
+ '52ebb56c6adfbef526d2db19618f9155084dacd7600d166f04ba5423c63a4294294589d675c391e577330f1b68755bb5d3b6a2cd3006902269cb73140973dba3'
+ 'd326b8da8fb02462bac178a23e18f5468de62780717c24eadb453201b2b6b6439d2be7dda38e40f24fdc570dd5bc54102e7bf05868c53b17b27f6b9a06fccdb0'
+ '04b41d4198138dbfe1399e7ed1e406fb265472d08a3e4de3c5c8584574b167613c598d7fa397c6944b809a96f699a4447694291296fa01a8e07b8ea96026ed2f'
+ '9f90b7adc0ed87daac0f453caf2fff8b338061d96a9cd890f305704f9b22581232c6a207eb9eb1670c69b083caa780a6e44280df47c95b4e6e8e73f046f7c8a5'
+ '8fd8d2e262327e78dad69186ebf091dbc034fab2675f0be91df75c88ae6f5e5ae6f456a2098c460861946390ce139e998f4b0f77e33671c8a7062a5e06b6e4ca')
+
+pkgdesc="A compatibility layer for running Windows programs - Staging branch"
+url="http://www.wine-staging.com"
+arch=(x86_64)
+options=(staticlibs)
+license=(LGPL)
+
+_depends=(
+ attr lib32-attr
+ fontconfig lib32-fontconfig
+ lcms2 lib32-lcms2
+ libxml2 lib32-libxml2
+ libxcursor lib32-libxcursor
+ libxrandr lib32-libxrandr
+ libxdamage lib32-libxdamage
+ libxi lib32-libxi
+ gettext lib32-gettext
+ freetype2 lib32-freetype2
+ glu lib32-glu
+ libsm lib32-libsm
+ gcc-libs lib32-gcc-libs
+ libpcap lib32-libpcap
+ desktop-file-utils
+)
+
+makedepends=(autoconf ncurses bison perl fontforge flex
+ 'gcc>=4.5.0-2'
+ giflib lib32-giflib
+ libpng lib32-libpng
+ gnutls lib32-gnutls
+ libxinerama lib32-libxinerama
+ libxcomposite lib32-libxcomposite
+ libxmu lib32-libxmu
+ libxxf86vm lib32-libxxf86vm
+ libldap lib32-libldap
+ mpg123 lib32-mpg123
+ openal lib32-openal
+ v4l-utils lib32-v4l-utils
+ alsa-lib lib32-alsa-lib
+ libxcomposite lib32-libxcomposite
+ mesa lib32-mesa
+ mesa-libgl lib32-mesa-libgl
+ opencl-icd-loader lib32-opencl-icd-loader
+ libxslt lib32-libxslt
+ libpulse lib32-libpulse
+ libva lib32-libva
+ gtk3 lib32-gtk3
+ gst-plugins-base-libs lib32-gst-plugins-base-libs
+ samba
+ opencl-headers
+)
+
+optdepends=(
+ giflib lib32-giflib
+ libpng lib32-libpng
+ libldap lib32-libldap
+ gnutls lib32-gnutls
+ mpg123 lib32-mpg123
+ openal lib32-openal
+ v4l-utils lib32-v4l-utils
+ libpulse lib32-libpulse
+ alsa-plugins lib32-alsa-plugins
+ alsa-lib lib32-alsa-lib
+ libjpeg-turbo lib32-libjpeg-turbo
+ libxcomposite lib32-libxcomposite
+ libxinerama lib32-libxinerama
+ ncurses lib32-ncurses
+ opencl-icd-loader lib32-opencl-icd-loader
+ libxslt lib32-libxslt
+ libva lib32-libva
+ gtk3 lib32-gtk3
+ gst-plugins-base-libs lib32-gst-plugins-base-libs
+ vulkan-icd-loader lib32-vulkan-icd-loader
+ cups
+ samba dosbox
+)
+
+if [[ $CARCH == i686 ]]; then
+ # Strip lib32 etc. on i686
+ _depends=(${_depends[@]/*32-*/})
+ makedepends=(${makedepends[@]/*32-*/} ${_depends[@]})
+ makedepends=(${makedepends[@]/*-multilib*/})
+ optdepends=(${optdepends[@]/*32-*/})
+ provides=("wine=$pkgver")
+ conflicts=('wine' 'wine-staging')
+else
+ makedepends=(${makedepends[@]} ${_depends[@]})
+ provides=("wine=$pkgver" "wine-wow64=$pkgver")
+ conflicts=('wine' 'wine-wow64' 'wine-staging')
+fi
+
+prepare() {
+ # Allow ccache to work
+ mv wine-patched-staging-$_pkgbasever $pkgname
+
+ # https://bugs.winehq.org/show_bug.cgi?id=43530
+ export CFLAGS="${CFLAGS/-fno-plt/}"
+ export LDFLAGS="${LDFLAGS/,-z,now/}"
+
+ patch -d $pkgname -Np1 < harmony-fix.diff
+
+ patch -d $pkgname -Np1 < 0001-wined3d-Implement-a-simple-heap-allocator-backed-by-.patch
+ patch -d $pkgname -Np1 < 0002-wined3d-Allocate-global-write-only-persistent-buffer.patch
+ patch -d $pkgname -Np1 < 0003-wined3d-Add-support-for-persistently-mapped-wined3d_.patch
+ patch -d $pkgname -Np1 < 0004-wined3d-Implement-aligned-persistent-heaps-for-persi.patch
+ patch -d $pkgname -Np1 < 0005-wined3d-Experimental-buffer-heap-fence-batching-uppe.patch
+ patch -d $pkgname -Np1 < 0006-wined3d-Switch-wined3d_buffer_heap-to-be-backed-by-a.patch
+ patch -d $pkgname -Np1 < 0007-wined3d-Add-segregated-free-bins-to-complement-rbtre.patch
+ patch -d $pkgname -Np1 < 0008-wined3d-Implement-lazy-free-using-a-deferred-free-li.patch
+
+ sed 's|OpenCL/opencl.h|CL/opencl.h|g' -i $pkgname/configure*
+
+ # Get rid of old build dirs
+ rm -rf $pkgname-{32,64}-build
+ mkdir $pkgname-32-build
+}
+
+build() {
+ cd "$srcdir"
+
+ if [[ $CARCH == x86_64 ]]; then
+ msg2 "Building Wine-64..."
+
+ mkdir $pkgname-64-build
+ cd "$srcdir/$pkgname-64-build"
+ ../$pkgname/configure \
+ --prefix=/usr \
+ --libdir=/usr/lib \
+ --with-x \
+ --with-gstreamer \
+ --enable-win64 \
+ --with-xattr
+
+ make
+
+ _wine32opts=(
+ --libdir=/usr/lib32
+ --with-wine64="$srcdir/$pkgname-64-build"
+ )
+
+ export PKG_CONFIG_PATH="/usr/lib32/pkgconfig"
+ fi
+
+ msg2 "Building Wine-32..."
+ cd "$srcdir/$pkgname-32-build"
+ ../$pkgname/configure \
+ --prefix=/usr \
+ --with-x \
+ --with-gstreamer \
+ --with-xattr \
+ "${_wine32opts[@]}"
+
+ make
+}
+
+package() {
+ depends=(${_depends[@]})
+
+ msg2 "Packaging Wine-32..."
+ cd "$srcdir/$pkgname-32-build"
+
+ if [[ $CARCH == i686 ]]; then
+ make prefix="$pkgdir/usr" install
+ else
+ make prefix="$pkgdir/usr" \
+ libdir="$pkgdir/usr/lib32" \
+ dlldir="$pkgdir/usr/lib32/wine" install
+
+ msg2 "Packaging Wine-64..."
+ cd "$srcdir/$pkgname-64-build"
+ make prefix="$pkgdir/usr" \
+ libdir="$pkgdir/usr/lib" \
+ dlldir="$pkgdir/usr/lib/wine" install
+ fi
+
+ # Font aliasing settings for Win32 applications
+ install -d "$pkgdir"/etc/fonts/conf.{avail,d}
+ install -m644 "$srcdir/30-win32-aliases.conf" "$pkgdir/etc/fonts/conf.avail"
+ ln -s ../conf.avail/30-win32-aliases.conf "$pkgdir/etc/fonts/conf.d/30-win32-aliases.conf"
+}
+
+# vim:set ts=8 sts=2 sw=2 et:
diff --git a/harmony-fix.diff b/harmony-fix.diff
new file mode 100644
index 000000000000..fe0c8c929d4a
--- /dev/null
+++ b/harmony-fix.diff
@@ -0,0 +1,63 @@
+diff -u -r wine/dlls/gdi32/freetype.c wine-ft281/dlls/gdi32/freetype.c
+--- wine/dlls/gdi32/freetype.c 2017-10-04 18:01:36.000000000 +0200
++++ wine-ft281/dlls/gdi32/freetype.c 2017-10-10 10:29:17.506632615 +0200
+@@ -996,18 +996,23 @@
+
+ static BOOL is_subpixel_rendering_enabled( void )
+ {
+-#ifdef FT_LCD_FILTER_H
+ static int enabled = -1;
+ if (enabled == -1)
+ {
+- enabled = (pFT_Library_SetLcdFilter &&
+- pFT_Library_SetLcdFilter( NULL, 0 ) != FT_Err_Unimplemented_Feature);
++ /* >= 2.8.1 provides LCD rendering without filters */
++ if (FT_Version.major > 2 ||
++ FT_Version.major == 2 && FT_Version.minor > 8 ||
++ FT_Version.major == 2 && FT_Version.minor == 8 && FT_Version.patch >= 1)
++ enabled = TRUE;
++#ifdef FT_LCD_FILTER_H
++ else if (pFT_Library_SetLcdFilter &&
++ pFT_Library_SetLcdFilter( NULL, 0 ) != FT_Err_Unimplemented_Feature)
++ enabled = TRUE;
++#endif
++ else enabled = FALSE;
+ TRACE("subpixel rendering is %senabled\n", enabled ? "" : "NOT ");
+ }
+ return enabled;
+-#else
+- return FALSE;
+-#endif
+ }
+
+
+@@ -7271,7 +7276,6 @@
+ case WINE_GGO_HBGR_BITMAP:
+ case WINE_GGO_VRGB_BITMAP:
+ case WINE_GGO_VBGR_BITMAP:
+-#ifdef FT_LCD_FILTER_H
+ {
+ switch (ft_face->glyph->format)
+ {
+@@ -7357,8 +7361,11 @@
+ if ( needsTransform )
+ pFT_Outline_Transform (&ft_face->glyph->outline, &transMatTategaki);
+
++#ifdef FT_LCD_FILTER_H
+ if ( pFT_Library_SetLcdFilter )
+ pFT_Library_SetLcdFilter( library, FT_LCD_FILTER_DEFAULT );
++#endif
++
+ pFT_Render_Glyph (ft_face->glyph, render_mode);
+
+ src = ft_face->glyph->bitmap.buffer;
+@@ -7439,9 +7446,6 @@
+
+ break;
+ }
+-#else
+- return GDI_ERROR;
+-#endif
+
+ case GGO_NATIVE:
+ {