summarylogtreecommitdiffstats
diff options
context:
space:
mode:
authorLars Norberg2018-03-29 00:00:43 +0200
committerLars Norberg2018-03-29 00:00:43 +0200
commita6782c800f37a78b2aa12a7d0a2365dda7117ca2 (patch)
treed9169e7562d271af2783f79b801bd1ba9a3f94d5
parent9ea985bb81d8d89f644f88ea8cb15435f3fe0faf (diff)
downloadaur-a6782c800f37a78b2aa12a7d0a2365dda7117ca2.tar.gz
restructured pba patches
-rw-r--r--.SRCINFO24
-rw-r--r--0001-wined3d-Initial-implementation-of-a-persistent-mappe.patch806
-rw-r--r--0002-wined3d-Add-support-for-backing-dynamic-wined3d_buff.patch679
-rw-r--r--0003-wined3d-Use-ARB_multi_bind-to-speed-up-UBO-updates.patch121
-rw-r--r--0004-wined3d-Use-GL_CLIENT_STORAGE_BIT-for-persistent-map.patch26
-rw-r--r--0005-wined3d-Disable-persistently-mapped-shader-resource-.patch28
-rw-r--r--0006-wined3d-Perform-initial-allocation-of-persistent-buf.patch92
-rw-r--r--0007-wined3d-Avoid-freeing-persistent-buffer-heap-element.patch351
-rw-r--r--0008-wined3d-Add-DISABLE_PBA-envvar-some-PBA-cleanup.patch211
-rw-r--r--0009-wined3d-Add-quirk-to-use-GL_CLIENT_STORAGE_BIT-for-m.patch96
-rw-r--r--PKGBUILD44
-rw-r--r--wine-staging-pba.patch1574
12 files changed, 1582 insertions, 2470 deletions
diff --git a/.SRCINFO b/.SRCINFO
index 662b1585f519..f32f0644a891 100644
--- a/.SRCINFO
+++ b/.SRCINFO
@@ -1,6 +1,6 @@
pkgbase = wine-staging-pba-git
pkgdesc = Wine staging branch with PBA patches for increased D3D performance. Git versions. (Also includes Path of Exile DX11 patch!)
- pkgver = 3.4.r3604.e50f0488+wine.3.4.r178.ge1c7a1f7ce+pba.r29.87307b1
+ pkgver = 3.4.r3607.5876a3f7+wine.3.4.r192.gd7430abd40
pkgrel = 1
url = https://github.com/acomminos/wine-pba
install = wine.install
@@ -174,16 +174,7 @@ pkgbase = wine-staging-pba-git
options = staticlibs
source = wine-git::git://source.winehq.org/git/wine.git
source = wine-staging-git::git+https://github.com/wine-staging/wine-staging.git
- source = wine-pba::git+https://github.com/acomminos/wine-pba.git
- source = 0001-wined3d-Initial-implementation-of-a-persistent-mappe.patch
- source = 0002-wined3d-Add-support-for-backing-dynamic-wined3d_buff.patch
- source = 0003-wined3d-Use-ARB_multi_bind-to-speed-up-UBO-updates.patch
- source = 0004-wined3d-Use-GL_CLIENT_STORAGE_BIT-for-persistent-map.patch
- source = 0005-wined3d-Disable-persistently-mapped-shader-resource-.patch
- source = 0006-wined3d-Perform-initial-allocation-of-persistent-buf.patch
- source = 0007-wined3d-Avoid-freeing-persistent-buffer-heap-element.patch
- source = 0008-wined3d-Add-DISABLE_PBA-envvar-some-PBA-cleanup.patch
- source = 0009-wined3d-Add-quirk-to-use-GL_CLIENT_STORAGE_BIT-for-m.patch
+ source = wine-staging-pba.patch
source = steam.patch
source = poe-fix.patch
source = harmony-fix.diff
@@ -191,16 +182,7 @@ pkgbase = wine-staging-pba-git
source = wine-binfmt.conf
sha256sums = SKIP
sha256sums = SKIP
- sha256sums = SKIP
- sha256sums = f5f8c507f79c829b118125a3749f80ed31eb8ba8ad024d99554a1a6458c438eb
- sha256sums = 98372adbb16949edca4c90604cceac5db3d4bf37eccc13d59d3e5735f53f2501
- sha256sums = 112f8fc68d5421805fb1de32c0216c41412afae21153d803127c9d1c1103e35b
- sha256sums = 016ee498c9ff7af0d14c7b0e42f4bc5255f5dae6d391fd36c2060668fcade662
- sha256sums = ec11046f6335c2831e3b89c2b0c241b74974415a64523f35f0a606d27d1dbfbb
- sha256sums = d2a8febc2500d6a7bed418232efedf82f114e7d14ca1199789abe576dddae90b
- sha256sums = ff5ef40b945fdad16db99a1f736c20c53711cfe002d367ea4aa55d84bf6a1207
- sha256sums = dee52666fc680b74f5d5ba1a2a74de715c7b49376895ff057ccada9daaef5911
- sha256sums = 5c3776e5c94b51b368384c79aec9b26716fc6517935d782c121c856f21dfd223
+ sha256sums = cc229607e417841d3e900cc93c3ab2f79c0851705a07e7206729193ffa3dc9db
sha256sums = 972d6b114f7621c5f3bd34b1105dd390b318db18fbc76328001c984db488a9b0
sha256sums = 1c8be30224a67c0f279ae1324165708371aad8f290ebc6da69c686d0904e606c
sha256sums = 50ccb5bd2067e5d2739c5f7abcef11ef096aa246f5ceea11d2c3b508fc7f77a1
diff --git a/0001-wined3d-Initial-implementation-of-a-persistent-mappe.patch b/0001-wined3d-Initial-implementation-of-a-persistent-mappe.patch
deleted file mode 100644
index ab14b215569b..000000000000
--- a/0001-wined3d-Initial-implementation-of-a-persistent-mappe.patch
+++ /dev/null
@@ -1,806 +0,0 @@
-From 1f69076549bf2351eb6d8d885b35a46b4dc69813 Mon Sep 17 00:00:00 2001
-From: Andrew Comminos <andrew@comminos.com>
-Date: Mon, 5 Mar 2018 15:38:35 -0800
-Subject: [PATCH 1/9] wined3d: Initial implementation of a persistent mapped
- buffer allocator.
-
----
- dlls/wined3d-csmt/Makefile.in | 1 +
- dlls/wined3d/Makefile.in | 1 +
- dlls/wined3d/buffer_heap.c | 508 +++++++++++++++++++++++++++++++++++++++++
- dlls/wined3d/cs.c | 9 +
- dlls/wined3d/device.c | 52 +++++
- dlls/wined3d/directx.c | 3 +
- dlls/wined3d/query.c | 2 +-
- dlls/wined3d/wined3d_gl.h | 1 +
- dlls/wined3d/wined3d_private.h | 68 +++++-
- 9 files changed, 641 insertions(+), 4 deletions(-)
- create mode 100644 dlls/wined3d/buffer_heap.c
-
-diff --git a/dlls/wined3d-csmt/Makefile.in b/dlls/wined3d-csmt/Makefile.in
-index 1d0458eb46..cb3a5484c6 100644
---- a/dlls/wined3d-csmt/Makefile.in
-+++ b/dlls/wined3d-csmt/Makefile.in
-@@ -8,6 +8,7 @@ C_SRCS = \
- arb_program_shader.c \
- ati_fragment_shader.c \
- buffer.c \
-+ buffer_heap.c \
- context.c \
- cs.c \
- device.c \
-diff --git a/dlls/wined3d/Makefile.in b/dlls/wined3d/Makefile.in
-index b850ba6872..52ef8666fb 100644
---- a/dlls/wined3d/Makefile.in
-+++ b/dlls/wined3d/Makefile.in
-@@ -6,6 +6,7 @@ C_SRCS = \
- arb_program_shader.c \
- ati_fragment_shader.c \
- buffer.c \
-+ buffer_heap.c \
- context.c \
- cs.c \
- device.c \
-diff --git a/dlls/wined3d/buffer_heap.c b/dlls/wined3d/buffer_heap.c
-new file mode 100644
-index 0000000000..b133bd6893
---- /dev/null
-+++ b/dlls/wined3d/buffer_heap.c
-@@ -0,0 +1,508 @@
-+/*
-+ * Copyright 2018 Andrew Comminos
-+ *
-+ * This library is free software; you can redistribute it and/or
-+ * modify it under the terms of the GNU Lesser General Public
-+ * License as published by the Free Software Foundation; either
-+ * version 2.1 of the License, or (at your option) any later version.
-+ *
-+ * This library is distributed in the hope that it will be useful,
-+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
-+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-+ * Lesser General Public License for more details.
-+ *
-+ * You should have received a copy of the GNU Lesser General Public
-+ * License along with this library; if not, write to the Free Software
-+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
-+ *
-+ */
-+
-+#include "config.h"
-+#include "wine/port.h"
-+#include "wine/rbtree.h"
-+#include "wined3d_private.h"
-+
-+WINE_DEFAULT_DEBUG_CHANNEL(d3d);
-+WINE_DECLARE_DEBUG_CHANNEL(d3d_perf);
-+
-+struct wined3d_buffer_heap_element
-+{
-+ struct wined3d_map_range range;
-+
-+ // rbtree data
-+ struct wine_rb_entry entry;
-+
-+ // Binned free list positions
-+ struct wined3d_buffer_heap_element *next;
-+ struct wined3d_buffer_heap_element *prev;
-+};
-+
-+struct wined3d_buffer_heap_fenced_element
-+{
-+ struct wined3d_buffer_heap_bin_set free_list;
-+ struct wined3d_fence *fence;
-+
-+ struct wined3d_buffer_heap_fenced_element *next;
-+};
-+
-+static struct wined3d_buffer_heap_element* element_new(GLsizei offset, GLsizei size)
-+{
-+ struct wined3d_buffer_heap_element* elem;
-+ elem = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(struct wined3d_buffer_heap_element));
-+ if (!elem)
-+ return NULL;
-+ elem->range.offset = offset;
-+ elem->range.size = size;
-+ return elem;
-+}
-+
-+static inline int bitwise_log2_floor(GLsizei size)
-+{
-+ // XXX(acomminos): I hope this gets unrolled.
-+ for (int i = 8 * sizeof(GLsizei) - 1; i >= 0; i--)
-+ {
-+ if ((size >> i) & 1) {
-+ return i;
-+ }
-+ }
-+ return 0;
-+}
-+
-+static inline int bitwise_log2_ceil(GLsizei size)
-+{
-+ // Add one to the floor of size if size isn't a power of two.
-+ return bitwise_log2_floor(size) + !!(size & (size - 1));
-+}
-+
-+static int element_bin(struct wined3d_buffer_heap_element *elem)
-+{
-+ return min(WINED3D_BUFFER_HEAP_BINS - 1, bitwise_log2_floor(elem->range.size));
-+}
-+
-+// Inserts an element into the appropriate free list bin.
-+static void element_insert_free_bin(struct wined3d_buffer_heap *heap, struct wined3d_buffer_heap_element *elem)
-+{
-+ int bin = element_bin(elem);
-+
-+ elem->prev = NULL;
-+ elem->next = heap->free_list.bins[bin].head;
-+ if (heap->free_list.bins[bin].head)
-+ heap->free_list.bins[bin].head->prev = elem;
-+ heap->free_list.bins[bin].head = elem;
-+
-+ if (!heap->free_list.bins[bin].tail)
-+ heap->free_list.bins[bin].tail = elem;
-+
-+ TRACE("Inserted allocation at %p of size %lld into bin %d\n", elem->range.offset, elem->range.size, bin);
-+}
-+
-+// Removes an element from the free tree, its bin, and the coalesce list.
-+static void element_remove_free(struct wined3d_buffer_heap *heap, struct wined3d_buffer_heap_element *elem)
-+{
-+ int bin = element_bin(elem);
-+
-+ if (elem->prev)
-+ elem->prev->next = elem->next;
-+
-+ if (elem->next)
-+ elem->next->prev = elem->prev;
-+
-+ if (elem == heap->free_list.bins[bin].head)
-+ heap->free_list.bins[bin].head = elem->next;
-+
-+ if (elem == heap->free_list.bins[bin].tail)
-+ heap->free_list.bins[bin].tail = elem->prev;
-+
-+ elem->prev = NULL;
-+ elem->next = NULL;
-+
-+ TRACE("Freed allocation at %p of size %lld from bin %d\n", elem->range.offset, elem->range.size, bin);
-+}
-+
-+static struct wined3d_buffer_heap_fenced_element* fenced_element_new(struct wined3d_buffer_heap_bin_set bins, struct wined3d_fence* fence)
-+{
-+ struct wined3d_buffer_heap_fenced_element* elem;
-+ elem = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(struct wined3d_buffer_heap_fenced_element));
-+ if (!elem)
-+ return NULL;
-+ elem->free_list = bins;
-+ elem->fence = fence;
-+ elem->next = NULL;
-+ return elem;
-+}
-+
-+static int free_tree_compare(const void *key, const struct wine_rb_entry *entry)
-+{
-+ const GLsizei offset = *(const GLsizei*) key;
-+ struct wined3d_buffer_heap_element *elem = WINE_RB_ENTRY_VALUE(entry, struct wined3d_buffer_heap_element, entry);
-+
-+ if (offset < elem->range.offset)
-+ return -1;
-+ if (offset > elem->range.offset)
-+ return 1;
-+ return 0;
-+}
-+
-+/* Context activation is done by the caller. */
-+HRESULT wined3d_buffer_heap_create(struct wined3d_context *context, GLsizeiptr size, GLsizeiptr alignment, BOOL write_only, struct wined3d_buffer_heap **buffer_heap)
-+{
-+ const struct wined3d_gl_info *gl_info = context->gl_info;
-+ const GLenum buffer_target = GL_ARRAY_BUFFER;
-+ GLbitfield access_flags;
-+ GLbitfield storage_flags;
-+ struct wined3d_buffer_heap_element *initial_elem;
-+
-+ struct wined3d_buffer_heap *object;
-+
-+ if ((alignment & (alignment - 1)) != 0)
-+ {
-+ return E_FAIL;
-+ }
-+
-+ if (!(object = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(*object))))
-+ {
-+ return E_OUTOFMEMORY;
-+ }
-+
-+ access_flags = GL_MAP_PERSISTENT_BIT | GL_MAP_COHERENT_BIT | GL_MAP_WRITE_BIT;
-+ if (!write_only)
-+ {
-+ access_flags |= GL_MAP_READ_BIT;
-+ }
-+ storage_flags = access_flags;
-+
-+ // TODO(acomminos): where should we be checking for errors here?
-+ GL_EXTCALL(glGenBuffers(1, &object->buffer_object));
-+
-+ context_bind_bo(context, buffer_target, object->buffer_object);
-+
-+ // TODO(acomminos): assert glBufferStorage supported?
-+ GL_EXTCALL(glBufferStorage(buffer_target, size, NULL, storage_flags));
-+
-+ if (!(object->map_ptr = GL_EXTCALL(glMapBufferRange(buffer_target, 0, size, access_flags))))
-+ {
-+ ERR("Couldn't map persistent buffer.\n");
-+ return -1; // FIXME(acomminos): proper error code, cleanup
-+ }
-+ context_bind_bo(context, buffer_target, 0);
-+
-+ object->fenced_head = object->fenced_tail = NULL;
-+ object->alignment = alignment;
-+ InitializeCriticalSection(&object->temp_lock);
-+
-+ initial_elem = element_new(0, size);
-+ // Don't bother adding the initial allocation to the coalescing tree.
-+ element_insert_free_bin(object, initial_elem);
-+
-+ *buffer_heap = object;
-+
-+ return WINED3D_OK;
-+}
-+
-+/* Context activation is done by the caller. */
-+HRESULT wined3d_buffer_heap_destroy(struct wined3d_buffer_heap *heap, struct wined3d_context *context)
-+{
-+ FIXME("Unimplemented, leaking buffer");
-+ return WINED3D_OK;
-+}
-+
-+HRESULT wined3d_buffer_heap_alloc(struct wined3d_buffer_heap *heap, GLsizeiptr size, struct wined3d_map_range *out_range)
-+{
-+ int initial_bin;
-+ int initial_size = size;
-+
-+ EnterCriticalSection(&heap->temp_lock);
-+
-+ // After alignment, reduce fragmentation by rounding to next power of two.
-+ // If the alignment is a power of two (which it should be), this should be
-+ // no problem.
-+ size = 1 << bitwise_log2_ceil(size);
-+
-+ // Align size values where possible.
-+ if (heap->alignment && (size % heap->alignment != 0))
-+ size += heap->alignment - (size % heap->alignment);
-+
-+ initial_bin = min(WINED3D_BUFFER_HEAP_BINS - 1, bitwise_log2_ceil(size));
-+
-+ for (int i = initial_bin; i < WINED3D_BUFFER_HEAP_BINS; i++)
-+ {
-+ struct wined3d_buffer_heap_element *elem = heap->free_list.bins[i].head;
-+ if (elem)
-+ {
-+ struct wined3d_map_range remaining_range;
-+ remaining_range.offset = elem->range.offset + size;
-+ remaining_range.size = elem->range.size - size;
-+
-+ out_range->offset = elem->range.offset;
-+ out_range->size = size;
-+
-+ TRACE_(d3d_perf)("Allocated %d (requested %d) at %p from bin %d (initial %d)\n", size, initial_size, elem->range.offset, i, initial_bin);
-+
-+ // Remove the element from its current free bin to move it to the correct list.
-+ element_remove_free(heap, elem);
-+
-+ if (remaining_range.size > 0)
-+ {
-+ TRACE_(d3d_perf)("Imperfect fit allocated, fragmenting remainder of %lld at %p.\n", remaining_range.size, remaining_range.offset);
-+
-+ elem->range = remaining_range;
-+ element_insert_free_bin(heap, elem);
-+ }
-+ else
-+ {
-+ HeapFree(GetProcessHeap(), 0, elem);
-+ }
-+
-+ LeaveCriticalSection(&heap->temp_lock);
-+ return WINED3D_OK;
-+ }
-+ }
-+
-+ LeaveCriticalSection(&heap->temp_lock);
-+
-+ FIXME_(d3d_perf)("Forcing coalesce, not enough free space in buffer heap.\n");
-+ int num_coalesced;
-+ if (SUCCEEDED(wined3d_buffer_heap_deferred_coalesce(heap, &num_coalesced)))
-+ {
-+ if (num_coalesced > 0)
-+ return wined3d_buffer_heap_alloc(heap, size, out_range);
-+ }
-+
-+ FIXME_(d3d_perf)("Coalescing did not create new blocks, failing.\n");
-+
-+ return WINED3DERR_OUTOFVIDEOMEMORY;
-+}
-+
-+HRESULT wined3d_buffer_heap_free(struct wined3d_buffer_heap *heap, struct wined3d_map_range range)
-+{
-+ struct wined3d_buffer_heap_element *elem = element_new(range.offset, range.size);
-+
-+ if (!elem)
-+ return E_OUTOFMEMORY;
-+
-+ EnterCriticalSection(&heap->temp_lock);
-+
-+ // Only insert the element into a free bin, coalescing will occur later.
-+ element_insert_free_bin(heap, elem);
-+
-+ LeaveCriticalSection(&heap->temp_lock);
-+
-+ return WINED3D_OK;
-+}
-+
-+HRESULT wined3d_buffer_heap_free_fenced(struct wined3d_buffer_heap *heap, struct wined3d_device *device, struct wined3d_map_range range)
-+{
-+ struct wined3d_buffer_heap_element *elem = element_new(range.offset, range.size);
-+ int bin_index = element_bin(elem);
-+ struct wined3d_buffer_heap_bin *bin = &heap->pending_fenced_bins.bins[bin_index];
-+
-+ if (bin->tail)
-+ {
-+ bin->tail->next = elem;
-+ elem->prev = bin->tail;
-+ bin->tail = elem;
-+ }
-+ else
-+ {
-+ bin->head = elem;
-+ bin->tail = elem;
-+ }
-+
-+ return WINED3D_OK;
-+}
-+
-+HRESULT wined3d_buffer_heap_cs_fence_issue(struct wined3d_buffer_heap *heap, struct wined3d_device *device)
-+{
-+ struct wined3d_buffer_heap_fenced_element *fenced_elem;
-+ struct wined3d_fence *fence;
-+ HRESULT hr;
-+
-+ if (heap->fenced_head)
-+ {
-+ // XXX(acomminos): double or triple buffer this?
-+ wined3d_buffer_heap_cs_fence_wait(heap, device);
-+ }
-+
-+ if (FAILED(hr = wined3d_fence_create(device, &fence)))
-+ {
-+ ERR("Failed to create fence.\n");
-+ return hr;
-+ }
-+
-+ fenced_elem = fenced_element_new(heap->pending_fenced_bins, fence);
-+ if (!fenced_elem)
-+ return E_OUTOFMEMORY;
-+
-+ TRACE_(d3d_perf)("Dispatching fenced buffer set.\n");
-+ memset(&heap->pending_fenced_bins, 0, sizeof(heap->pending_fenced_bins));
-+
-+ // Append to end of fenced list, which works well if you assume that buffers
-+ // are freed in some ascending draw call ordering.
-+ if (!heap->fenced_head)
-+ {
-+ heap->fenced_head = fenced_elem;
-+ heap->fenced_tail = fenced_elem;
-+ }
-+ else
-+ {
-+ heap->fenced_tail->next = fenced_elem;
-+ heap->fenced_tail = fenced_elem;
-+ }
-+
-+ wined3d_fence_issue(fence, device);
-+ return WINED3D_OK;
-+}
-+
-+HRESULT wined3d_buffer_heap_cs_fence_wait(struct wined3d_buffer_heap *heap, struct wined3d_device *device)
-+{
-+ enum wined3d_fence_result res;
-+ struct wined3d_buffer_heap_fenced_element *elem = heap->fenced_head;
-+ if (!elem)
-+ return WINED3D_OK;
-+
-+ res = wined3d_fence_wait(elem->fence, device);
-+ switch (res)
-+ {
-+ case WINED3D_FENCE_OK:
-+ case WINED3D_FENCE_NOT_STARTED:
-+ {
-+ TRACE_(d3d_perf)("Freed fence group.\n");
-+
-+ EnterCriticalSection(&heap->temp_lock);
-+ for (int i = 0; i < WINED3D_BUFFER_HEAP_BINS; i++)
-+ {
-+ struct wined3d_buffer_heap_bin *elem_bin = &elem->free_list.bins[i];
-+ if (!elem_bin->tail)
-+ continue;
-+
-+ struct wined3d_buffer_heap_bin *heap_bin = &heap->free_list.bins[i];
-+ if (heap_bin->head)
-+ {
-+ // Insert to front.
-+ elem_bin->tail->next = heap_bin->head;
-+ heap_bin->head->prev = elem_bin->tail;
-+
-+ elem_bin->head->prev = NULL;
-+ heap_bin->head = elem_bin->head;
-+ }
-+ else
-+ {
-+ elem_bin->head->prev = NULL;
-+ heap_bin->head = elem_bin->head;
-+ elem_bin->tail->next = NULL;
-+ heap_bin->tail = elem_bin->tail;
-+ }
-+ }
-+ LeaveCriticalSection(&heap->temp_lock);
-+
-+ wined3d_fence_destroy(elem->fence);
-+
-+ heap->fenced_head = elem->next;
-+ HeapFree(GetProcessHeap(), 0, elem);
-+ // TODO(acomminos): bother to null out fenced_tail?
-+ break;
-+ }
-+ default:
-+ return WINED3D_OK;
-+ }
-+
-+ return WINED3D_OK;
-+}
-+
-+HRESULT wined3d_buffer_heap_deferred_coalesce(struct wined3d_buffer_heap *heap, int *coalesced_count)
-+{
-+ struct wined3d_buffer_heap_element *elem = NULL;
-+ struct wined3d_buffer_heap_element *next = NULL;
-+ struct wine_rb_entry *entry;
-+ struct wined3d_map_range coalesced_range;
-+
-+ struct wine_rb_tree free_tree;
-+ int num_coalesced = 0;
-+
-+ wine_rb_init(&free_tree, free_tree_compare);
-+
-+ EnterCriticalSection(&heap->temp_lock);
-+
-+ // TODO(acomminos): on one hand, if there's a lot of elements in the list,
-+ // it's highly fragmented. on the other, we can potentially waste a decent
-+ // sum of time checking for uncoalesced bins.
-+ for (int i = 0; i < WINED3D_BUFFER_HEAP_BINS; i++)
-+ {
-+ elem = heap->free_list.bins[i].head;
-+ while (elem)
-+ {
-+ // Insert a sentry. FIXME(acomminos): can skip this with traversal.
-+ if (wine_rb_put(&free_tree, &elem->range.offset, &elem->entry) == -1)
-+ {
-+ ERR("Failed to insert key %x in tree.\n", elem->range.offset);
-+ elem = elem->next;
-+ continue;
-+ }
-+
-+ coalesced_range = elem->range;
-+
-+ // Coalesce right.
-+ entry = wine_rb_next(&elem->entry);
-+ if (entry)
-+ {
-+ TRACE("Coalesced right.\n");
-+ struct wined3d_buffer_heap_element *right_elem = WINE_RB_ENTRY_VALUE(entry, struct wined3d_buffer_heap_element, entry);
-+ if (elem->range.offset + elem->range.size == right_elem->range.offset)
-+ {
-+ coalesced_range.size += right_elem->range.size;
-+
-+ wine_rb_remove(&free_tree, entry);
-+ element_remove_free(heap, right_elem);
-+ HeapFree(GetProcessHeap(), 0, right_elem);
-+
-+ num_coalesced++;
-+ }
-+ }
-+
-+ // Coalesce left.
-+ entry = wine_rb_prev(&elem->entry);
-+ if (entry)
-+ {
-+ TRACE("Coalesced left.\n");
-+ struct wined3d_buffer_heap_element *left_elem = WINE_RB_ENTRY_VALUE(entry, struct wined3d_buffer_heap_element, entry);
-+ if (left_elem->range.offset + left_elem->range.size == coalesced_range.offset)
-+ {
-+ coalesced_range.offset = left_elem->range.offset;
-+ coalesced_range.size += left_elem->range.size;
-+
-+ wine_rb_remove(&free_tree, entry);
-+ element_remove_free(heap, left_elem);
-+ HeapFree(GetProcessHeap(), 0, left_elem);
-+
-+ num_coalesced++;
-+ }
-+ }
-+
-+ next = elem->next;
-+
-+ if (elem->range.size != coalesced_range.size)
-+ {
-+ FIXME_(d3d_perf)("Coalesced range from (%p, %ld) to (%p, %ld)\n", elem->range.offset, elem->range.size, coalesced_range.offset, coalesced_range.size);
-+
-+ wine_rb_remove(&free_tree, &elem->entry);
-+
-+ // Move to the correct free bin.
-+ element_remove_free(heap, elem);
-+ elem->range = coalesced_range;
-+ element_insert_free_bin(heap, elem);
-+
-+ wine_rb_put(&free_tree, &elem->range.offset, &elem->entry);
-+ }
-+
-+ elem = next;
-+ }
-+ }
-+
-+ LeaveCriticalSection(&heap->temp_lock);
-+
-+ FIXME_(d3d_perf)("Performed %d coalesces.\n", num_coalesced);
-+ if (coalesced_count)
-+ *coalesced_count = num_coalesced;
-+
-+ return WINED3D_OK;
-+}
-diff --git a/dlls/wined3d/cs.c b/dlls/wined3d/cs.c
-index 3a7c95ddd8..50a4d041cd 100644
---- a/dlls/wined3d/cs.c
-+++ b/dlls/wined3d/cs.c
-@@ -472,6 +472,15 @@ static void wined3d_cs_exec_present(struct wined3d_cs *cs, const void *data)
- }
-
- InterlockedDecrement(&cs->pending_presents);
-+
-+ // FIXME(acomminos): is this the right place to put double-buffered frame
-+ // timing based logic?
-+ // FIXME(acomminos): this conditional sucks, replace with fancier feature check
-+ if (cs->device->wo_buffer_heap && cs->device->cb_buffer_heap)
-+ {
-+ wined3d_buffer_heap_cs_fence_issue(cs->device->wo_buffer_heap, cs->device);
-+ wined3d_buffer_heap_cs_fence_issue(cs->device->cb_buffer_heap, cs->device);
-+ }
- }
-
- void wined3d_cs_emit_present(struct wined3d_cs *cs, struct wined3d_swapchain *swapchain,
-diff --git a/dlls/wined3d/device.c b/dlls/wined3d/device.c
-index e2b27e0cf4..785841a062 100644
---- a/dlls/wined3d/device.c
-+++ b/dlls/wined3d/device.c
-@@ -833,6 +833,53 @@ static void destroy_default_samplers(struct wined3d_device *device, struct wined
- device->null_sampler = NULL;
- }
-
-+/* Context activation is done by the caller. */
-+static void create_buffer_heap(struct wined3d_device *device, struct wined3d_context *context)
-+{
-+ const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
-+ // TODO(acomminos): kill this magic number. perhaps base on vram.
-+ GLsizeiptr geo_heap_size = 512 * 1024 * 1024;
-+ // We choose a constant buffer size of 128MB, the same as NVIDIA claims to
-+ // use in their Direct3D driver for discarded constant buffers.
-+ GLsizeiptr cb_heap_size = 128 * 1024 * 1024;
-+ GLint ub_alignment;
-+ HRESULT hr;
-+
-+ if (gl_info->supported[ARB_BUFFER_STORAGE])
-+ {
-+ gl_info->gl_ops.gl.p_glGetIntegerv(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT, &ub_alignment);
-+
-+ // Align constant buffer heap size, in case GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT isn't a power of two (for some reason).
-+ cb_heap_size -= cb_heap_size % ub_alignment;
-+
-+ if (FAILED(hr = wined3d_buffer_heap_create(context, geo_heap_size, 0, TRUE, &device->wo_buffer_heap)))
-+ {
-+ ERR("Failed to create write-only persistent buffer heap, hr %#x.\n", hr);
-+ }
-+
-+ if (FAILED(hr = wined3d_buffer_heap_create(context, cb_heap_size, ub_alignment, TRUE, &device->cb_buffer_heap)))
-+ {
-+ ERR("Failed to create persistent buffer heap for constant buffers, hr %#x.\n", hr);
-+ }
-+
-+ FIXME("Initialized PBA (geo_heap_size: %ld, cb_heap_size: %ld, ub_align: %d)\n", geo_heap_size, cb_heap_size, ub_alignment);
-+ }
-+ else
-+ {
-+ FIXME("Not using PBA, ARB_buffer_storage unsupported.\n");
-+ }
-+}
-+
-+/* Context activation is done by the caller. */
-+static void destroy_buffer_heap(struct wined3d_device *device, struct wined3d_context *context)
-+{
-+ if (device->wo_buffer_heap)
-+ wined3d_buffer_heap_destroy(device->wo_buffer_heap, context);
-+
-+ if (device->cb_buffer_heap)
-+ wined3d_buffer_heap_destroy(device->cb_buffer_heap, context);
-+}
-+
- static LONG fullscreen_style(LONG style)
- {
- /* Make sure the window is managed, otherwise we won't get keyboard input. */
-@@ -997,6 +1044,8 @@ static void wined3d_device_delete_opengl_contexts_cs(void *object)
- device->shader_backend->shader_free_private(device);
- destroy_dummy_textures(device, context);
- destroy_default_samplers(device, context);
-+ destroy_buffer_heap(device, context);
-+
- context_release(context);
-
- while (device->context_count)
-@@ -1045,6 +1094,9 @@ static void wined3d_device_create_primary_opengl_context_cs(void *object)
- context = context_acquire(device, target, 0);
- create_dummy_textures(device, context);
- create_default_samplers(device, context);
-+
-+ create_buffer_heap(device, context);
-+
- context_release(context);
- }
-
-diff --git a/dlls/wined3d/directx.c b/dlls/wined3d/directx.c
-index 8720fc7ad6..46c6a59536 100644
---- a/dlls/wined3d/directx.c
-+++ b/dlls/wined3d/directx.c
-@@ -111,6 +111,7 @@ static const struct wined3d_extension_map gl_extension_map[] =
- /* ARB */
- {"GL_ARB_base_instance", ARB_BASE_INSTANCE },
- {"GL_ARB_blend_func_extended", ARB_BLEND_FUNC_EXTENDED },
-+ {"GL_ARB_buffer_storage", ARB_BUFFER_STORAGE },
- {"GL_ARB_clear_buffer_object", ARB_CLEAR_BUFFER_OBJECT },
- {"GL_ARB_clear_texture", ARB_CLEAR_TEXTURE },
- {"GL_ARB_clip_control", ARB_CLIP_CONTROL },
-@@ -2714,6 +2715,8 @@ static void load_gl_funcs(struct wined3d_gl_info *gl_info)
- /* GL_ARB_blend_func_extended */
- USE_GL_FUNC(glBindFragDataLocationIndexed)
- USE_GL_FUNC(glGetFragDataIndex)
-+ /* GL_ARB_buffer_storage */
-+ USE_GL_FUNC(glBufferStorage)
- /* GL_ARB_clear_buffer_object */
- USE_GL_FUNC(glClearBufferData)
- USE_GL_FUNC(glClearBufferSubData)
-diff --git a/dlls/wined3d/query.c b/dlls/wined3d/query.c
-index 5ea79b6e4a..f3ca1630e5 100644
---- a/dlls/wined3d/query.c
-+++ b/dlls/wined3d/query.c
-@@ -88,7 +88,7 @@ static BOOL wined3d_fence_supported(const struct wined3d_gl_info *gl_info)
- return gl_info->supported[ARB_SYNC] || gl_info->supported[NV_FENCE] || gl_info->supported[APPLE_FENCE];
- }
-
--static enum wined3d_fence_result wined3d_fence_test(const struct wined3d_fence *fence,
-+enum wined3d_fence_result wined3d_fence_test(const struct wined3d_fence *fence,
- const struct wined3d_device *device, DWORD flags)
- {
- const struct wined3d_gl_info *gl_info;
-diff --git a/dlls/wined3d/wined3d_gl.h b/dlls/wined3d/wined3d_gl.h
-index 87283c850e..7626864ef2 100644
---- a/dlls/wined3d/wined3d_gl.h
-+++ b/dlls/wined3d/wined3d_gl.h
-@@ -44,6 +44,7 @@ enum wined3d_gl_extension
- /* ARB */
- ARB_BASE_INSTANCE,
- ARB_BLEND_FUNC_EXTENDED,
-+ ARB_BUFFER_STORAGE,
- ARB_CLEAR_BUFFER_OBJECT,
- ARB_CLEAR_TEXTURE,
- ARB_CLIP_CONTROL,
-diff --git a/dlls/wined3d/wined3d_private.h b/dlls/wined3d/wined3d_private.h
-index 8aa61d811f..3d535f4e17 100644
---- a/dlls/wined3d/wined3d_private.h
-+++ b/dlls/wined3d/wined3d_private.h
-@@ -1712,6 +1712,9 @@ void wined3d_fence_destroy(struct wined3d_fence *fence) DECLSPEC_HIDDEN;
- void wined3d_fence_issue(struct wined3d_fence *fence, const struct wined3d_device *device) DECLSPEC_HIDDEN;
- enum wined3d_fence_result wined3d_fence_wait(const struct wined3d_fence *fence,
- const struct wined3d_device *device) DECLSPEC_HIDDEN;
-+// XXX(acomminos): really expose this?
-+enum wined3d_fence_result wined3d_fence_test(const struct wined3d_fence *fence,
-+ const struct wined3d_device *device, DWORD flags) DECLSPEC_HIDDEN;
-
- /* Direct3D terminology with little modifications. We do not have an issued
- * state because only the driver knows about it, but we have a created state
-@@ -2993,6 +2996,10 @@ struct wined3d_device
- /* Context management */
- struct wined3d_context **contexts;
- UINT context_count;
-+
-+ /* Dynamic buffer heap */
-+ struct wined3d_buffer_heap *wo_buffer_heap;
-+ struct wined3d_buffer_heap *cb_buffer_heap;
- };
-
- void device_clear_render_targets(struct wined3d_device *device, UINT rt_count, const struct wined3d_fb_state *fb,
-@@ -3513,6 +3520,12 @@ void state_init(struct wined3d_state *state, struct wined3d_fb_state *fb,
- DWORD flags) DECLSPEC_HIDDEN;
- void state_unbind_resources(struct wined3d_state *state) DECLSPEC_HIDDEN;
-
-+struct wined3d_map_range
-+{
-+ GLintptr offset;
-+ GLsizeiptr size;
-+};
-+
- enum wined3d_cs_queue_id
- {
- WINED3D_CS_QUEUE_DEFAULT = 0,
-@@ -3692,12 +3705,61 @@ enum wined3d_buffer_conversion_type
- CONV_POSITIONT,
- };
-
--struct wined3d_map_range
-+struct wined3d_buffer_heap_element;
-+struct wined3d_buffer_heap_fenced_element;
-+
-+// Number of power-of-two buckets to populate.
-+#define WINED3D_BUFFER_HEAP_BINS 32
-+
-+struct wined3d_buffer_heap_bin
- {
-- UINT offset;
-- UINT size;
-+ struct wined3d_buffer_heap_element *head;
-+ struct wined3d_buffer_heap_element *tail;
-+};
-+
-+struct wined3d_buffer_heap_bin_set
-+{
-+ struct wined3d_buffer_heap_bin bins[WINED3D_BUFFER_HEAP_BINS];
- };
-
-+// A heap that manages allocations with a single GL buffer.
-+struct wined3d_buffer_heap
-+{
-+ GLuint buffer_object;
-+ void *map_ptr;
-+ GLsizeiptr alignment;
-+ CRITICAL_SECTION temp_lock; // Temporary lock while we implement the fenced free list.
-+
-+ struct wined3d_buffer_heap_bin_set free_list;
-+
-+ // Elements that need to be fenced, but haven't reached the required size.
-+ struct wined3d_buffer_heap_bin_set pending_fenced_bins;
-+
-+ // List of sets of buffers behind a common fence, in FIFO order.
-+ struct wined3d_buffer_heap_fenced_element *fenced_head;
-+ struct wined3d_buffer_heap_fenced_element *fenced_tail;
-+};
-+
-+HRESULT wined3d_buffer_heap_create(struct wined3d_context *context, GLsizeiptr size, GLsizeiptr alignment, BOOL write_only, struct wined3d_buffer_heap **heap) DECLSPEC_HIDDEN;
-+HRESULT wined3d_buffer_heap_destroy(struct wined3d_buffer_heap *heap, struct wined3d_context *context) DECLSPEC_HIDDEN;
-+// Fetches a buffer from the heap of at least the given size.
-+// Attempts to coalesce blocks under memory pressure.
-+HRESULT wined3d_buffer_heap_alloc(struct wined3d_buffer_heap *heap, GLsizeiptr size, struct wined3d_map_range* out_range) DECLSPEC_HIDDEN;
-+// Immediately frees a heap-allocated buffer segment.
-+HRESULT wined3d_buffer_heap_free(struct wined3d_buffer_heap *heap, struct wined3d_map_range range) DECLSPEC_HIDDEN;
-+// Enqueues a buffer segment to return to the heap once its fence has been signaled.
-+HRESULT wined3d_buffer_heap_free_fenced(struct wined3d_buffer_heap *heap, struct wined3d_device *device, struct wined3d_map_range range) DECLSPEC_HIDDEN;
-+// Issues a fence for the current set of pending fenced buffers.
-+// Double-buffered: if the last fence issued has not yet been triggered, waits
-+// on it.
-+HRESULT wined3d_buffer_heap_cs_fence_issue(struct wined3d_buffer_heap *heap, struct wined3d_device *device) DECLSPEC_HIDDEN;
-+// Waits on the next issued fence in FIFO order. Frees the fenced buffers after
-+// the fence has been triggered.
-+HRESULT wined3d_buffer_heap_cs_fence_wait(struct wined3d_buffer_heap *heap, struct wined3d_device *device) DECLSPEC_HIDDEN;
-+// Performs deferred coalescing of buffers. To be called under memory pressure.
-+// Outputs the number of coalesced regions in `num_coalesced`.
-+HRESULT wined3d_buffer_heap_deferred_coalesce(struct wined3d_buffer_heap *heap, int *num_coalesced) DECLSPEC_HIDDEN;
-+
- struct wined3d_buffer
- {
- struct wined3d_resource resource;
---
-2.16.2
-
diff --git a/0002-wined3d-Add-support-for-backing-dynamic-wined3d_buff.patch b/0002-wined3d-Add-support-for-backing-dynamic-wined3d_buff.patch
deleted file mode 100644
index d4b2299641b0..000000000000
--- a/0002-wined3d-Add-support-for-backing-dynamic-wined3d_buff.patch
+++ /dev/null
@@ -1,679 +0,0 @@
-From af82b8e867af940f7ec68998a797aa5d7dfc540a Mon Sep 17 00:00:00 2001
-From: Andrew Comminos <andrew@comminos.com>
-Date: Mon, 5 Mar 2018 15:39:11 -0800
-Subject: [PATCH 2/9] wined3d: Add support for backing dynamic wined3d_buffer
- objects by a persistent map.
-
----
- dlls/wined3d/buffer.c | 220 ++++++++++++++++++++++++++++++++++++++++-
- dlls/wined3d/context.c | 6 +-
- dlls/wined3d/cs.c | 60 ++++++++++-
- dlls/wined3d/resource.c | 18 +++-
- dlls/wined3d/state.c | 17 +++-
- dlls/wined3d/texture.c | 13 +++
- dlls/wined3d/utils.c | 1 +
- dlls/wined3d/wined3d_private.h | 11 +++
- 8 files changed, 336 insertions(+), 10 deletions(-)
-
-diff --git a/dlls/wined3d/buffer.c b/dlls/wined3d/buffer.c
-index cae7ef8788..e7a0f59a67 100644
---- a/dlls/wined3d/buffer.c
-+++ b/dlls/wined3d/buffer.c
-@@ -28,12 +28,14 @@
- #include "wined3d_private.h"
-
- WINE_DEFAULT_DEBUG_CHANNEL(d3d);
-+WINE_DECLARE_DEBUG_CHANNEL(d3d_perf);
-
- #define WINED3D_BUFFER_HASDESC 0x01 /* A vertex description has been found. */
- #define WINED3D_BUFFER_USE_BO 0x02 /* Use a buffer object for this buffer. */
- #define WINED3D_BUFFER_PIN_SYSMEM 0x04 /* Keep a system memory copy for this buffer. */
- #define WINED3D_BUFFER_DISCARD 0x08 /* A DISCARD lock has occurred since the last preload. */
- #define WINED3D_BUFFER_APPLESYNC 0x10 /* Using sync as in GL_APPLE_flush_buffer_range. */
-+#define WINED3D_BUFFER_PERSISTENT 0x20 /* Uses a persistent-mapped buffer via ARB_buffer_storage. */
-
- #define VB_MAXDECLCHANGES 100 /* After that number of decl changes we stop converting */
- #define VB_RESETDECLCHANGE 1000 /* Reset the decl changecount after that number of draws */
-@@ -269,6 +271,52 @@ fail:
- return FALSE;
- }
-
-+/* Context activation is done by the caller. */
-+static BOOL buffer_alloc_persistent_map(struct wined3d_buffer *buffer, struct wined3d_context *context)
-+{
-+ struct wined3d_device *device = buffer->resource.device;
-+ struct wined3d_buffer_heap *heap;
-+ struct wined3d_map_range map_range;
-+ HRESULT hr;
-+
-+ if (buffer->bind_flags & WINED3D_BIND_CONSTANT_BUFFER)
-+ {
-+ // Use a heap aligned to constant buffer offset requirements.
-+ heap = device->cb_buffer_heap;
-+ }
-+ else
-+ {
-+ if (!(buffer->resource.usage & WINED3DUSAGE_WRITEONLY))
-+ FIXME("Using a write-only persistent buffer for %p without WINED3DUSAGE_WRITEONLY.\n", buffer);
-+ heap = device->wo_buffer_heap;
-+ }
-+
-+ buffer->buffer_heap = heap;
-+ if (FAILED(hr = wined3d_buffer_heap_alloc(heap, buffer->resource.size, &map_range)))
-+ {
-+ goto fail;
-+ }
-+ buffer->cs_persistent_map = map_range;
-+ buffer->mt_persistent_map = map_range;
-+ return TRUE;
-+
-+fail:
-+ // FIXME(acomminos): fall back to standalone BO here?
-+ ERR("Failed to create persistent map for buffer %p, hr=%x\n", buffer, hr);
-+ buffer->buffer_heap = NULL;
-+ return FALSE;
-+}
-+
-+static void buffer_free_persistent_map(struct wined3d_buffer *buffer)
-+{
-+ if (!buffer->buffer_heap)
-+ return;
-+
-+ // TODO(acomminos): get the CS thread to free pending main thread buffers.
-+ wined3d_buffer_heap_free(buffer->buffer_heap, buffer->cs_persistent_map);
-+ buffer->buffer_heap = NULL;
-+}
-+
- static BOOL buffer_process_converted_attribute(struct wined3d_buffer *buffer,
- const enum wined3d_buffer_conversion_type conversion_type,
- const struct wined3d_stream_info_element *attrib, DWORD *stride_this_run)
-@@ -631,6 +679,16 @@ static BOOL wined3d_buffer_prepare_location(struct wined3d_buffer *buffer,
- return FALSE;
- }
- return buffer_create_buffer_object(buffer, context);
-+ case WINED3D_LOCATION_PERSISTENT_MAP:
-+ if (buffer->buffer_heap)
-+ return TRUE;
-+
-+ if (!(buffer->flags & WINED3D_BUFFER_PERSISTENT))
-+ {
-+ WARN("Trying to map a persistent region for buffer %p without WINED3D_BUFFER_PERSISTENT.\n", buffer);
-+ return FALSE;
-+ }
-+ return buffer_alloc_persistent_map(buffer, context);
-
- default:
- ERR("Invalid location %s.\n", wined3d_debug_location(location));
-@@ -689,16 +747,32 @@ BOOL wined3d_buffer_load_location(struct wined3d_buffer *buffer,
- buffer_conversion_upload(buffer, context);
- break;
-
-+ case WINED3D_LOCATION_PERSISTENT_MAP:
-+ // TODO(acomminos): are we guaranteed location_sysmem to be kept?
-+ // no.
-+ if (buffer->conversion_map)
-+ FIXME("Attempting to use conversion map with persistent mapping.\n");
-+ memcpy(buffer->buffer_heap->map_ptr +
-+ buffer->cs_persistent_map.offset,
-+ buffer->resource.heap_memory, buffer->resource.size);
-+ break;
-+
- default:
- ERR("Invalid location %s.\n", wined3d_debug_location(location));
- return FALSE;
- }
-
- wined3d_buffer_validate_location(buffer, location);
-- if (buffer->resource.heap_memory && location == WINED3D_LOCATION_BUFFER
-+ if (buffer->resource.heap_memory
-+ && location & WINED3D_LOCATION_BUFFER
- && !(buffer->resource.usage & WINED3DUSAGE_DYNAMIC))
- wined3d_buffer_evict_sysmem(buffer);
-
-+ // FIXME(acomminos)
-+ if (buffer->resource.heap_memory
-+ && location & WINED3D_LOCATION_PERSISTENT_MAP)
-+ wined3d_buffer_evict_sysmem(buffer);
-+
- return TRUE;
- }
-
-@@ -720,12 +794,25 @@ DWORD wined3d_buffer_get_memory(struct wined3d_buffer *buffer,
- {
- data->buffer_object = buffer->buffer_object;
- data->addr = NULL;
-+ data->length = buffer->resource.size;
- return WINED3D_LOCATION_BUFFER;
- }
-+ if (locations & WINED3D_LOCATION_PERSISTENT_MAP)
-+ {
-+ // FIXME(acomminos): should we expose a buffer object we don't wholly own here?
-+ data->buffer_object = buffer->buffer_heap->buffer_object;
-+ data->addr = buffer->cs_persistent_map.offset;
-+ // Note that the size of the underlying buffer allocation may be larger
-+ // than the buffer knows about. In this case, we've rounded it up to be
-+ // aligned (e.g. for uniform buffer offsets).
-+ data->length = buffer->cs_persistent_map.size;
-+ return WINED3D_LOCATION_PERSISTENT_MAP;
-+ }
- if (locations & WINED3D_LOCATION_SYSMEM)
- {
- data->buffer_object = 0;
- data->addr = buffer->resource.heap_memory;
-+ data->length = buffer->resource.size;
- return WINED3D_LOCATION_SYSMEM;
- }
-
-@@ -761,6 +848,8 @@ static void buffer_unload(struct wined3d_resource *resource)
- buffer->flags &= ~WINED3D_BUFFER_HASDESC;
- }
-
-+ buffer_free_persistent_map(buffer);
-+
- resource_unload(resource);
- }
-
-@@ -784,6 +873,8 @@ static void wined3d_buffer_destroy_object(void *object)
- heap_free(buffer->conversion_map);
- }
-
-+ buffer_free_persistent_map(buffer);
-+
- heap_free(buffer->maps);
- heap_free(buffer);
- }
-@@ -900,6 +991,16 @@ void wined3d_buffer_load(struct wined3d_buffer *buffer, struct wined3d_context *
-
- buffer_mark_used(buffer);
-
-+ if (buffer->flags & WINED3D_BUFFER_PERSISTENT)
-+ {
-+ if (wined3d_buffer_load_location(buffer, context, WINED3D_LOCATION_PERSISTENT_MAP))
-+ return;
-+
-+ ERR("Failed to preload persistent mapping for %p, falling back to BO.\n", buffer);
-+ buffer->flags |= WINED3D_BUFFER_USE_BO;
-+ buffer->flags &= ~WINED3D_BUFFER_PERSISTENT;
-+ }
-+
- /* TODO: Make converting independent from VBOs */
- if (!(buffer->flags & WINED3D_BUFFER_USE_BO))
- {
-@@ -1010,6 +1111,25 @@ static HRESULT wined3d_buffer_map(struct wined3d_buffer *buffer, UINT offset, UI
-
- count = ++buffer->resource.map_count;
-
-+ if (buffer->locations & WINED3D_LOCATION_PERSISTENT_MAP)
-+ {
-+ const struct wined3d_gl_info *gl_info;
-+ context = context_acquire(device, NULL, 0);
-+
-+ FIXME_(d3d_perf)("Fences not used for persistent buffer maps on CS thread, using glFinish.\n");
-+
-+ gl_info = context->gl_info;
-+ gl_info->gl_ops.gl.p_glFinish();
-+
-+ base = buffer->buffer_heap->map_ptr
-+ + buffer->cs_persistent_map.offset;
-+ *data = base + offset;
-+
-+ context_release(context);
-+
-+ return WINED3D_OK;
-+ }
-+
- if (buffer->buffer_object)
- {
- unsigned int dirty_offset = offset, dirty_size = size;
-@@ -1152,6 +1272,12 @@ static void wined3d_buffer_unmap(struct wined3d_buffer *buffer)
- return;
- }
-
-+ if (buffer->flags & WINED3D_BUFFER_PERSISTENT)
-+ {
-+ TRACE("Persistent buffer, ignore unmap.\n");
-+ return;
-+ }
-+
- if (buffer->map_ptr)
- {
- struct wined3d_device *device = buffer->resource.device;
-@@ -1256,6 +1382,64 @@ static void buffer_resource_preload(struct wined3d_resource *resource)
-
- static HRESULT buffer_resource_sub_resource_map(struct wined3d_resource *resource, unsigned int sub_resource_idx,
- struct wined3d_map_desc *map_desc, const struct wined3d_box *box, DWORD flags)
-+{
-+ struct wined3d_buffer *buffer = buffer_from_resource(resource);
-+ UINT offset = box ? box->left : 0;
-+
-+ if (sub_resource_idx)
-+ {
-+ WARN("Invalid sub_resource_idx %u.\n", sub_resource_idx);
-+ return E_INVALIDARG;
-+ }
-+
-+ // Support immediate mapping of persistent buffers off the command thread,
-+ // which require no GL calls to interface with.
-+ if (buffer->locations & WINED3D_LOCATION_PERSISTENT_MAP)
-+ {
-+ map_desc->row_pitch = map_desc->slice_pitch = buffer->desc.byte_width;
-+ if (flags & WINED3D_MAP_DISCARD)
-+ {
-+ HRESULT hr;
-+ struct wined3d_map_range map_range;
-+ if (FAILED(hr = wined3d_buffer_heap_alloc(buffer->buffer_heap, resource->size, &map_range)))
-+ {
-+ FIXME_(d3d_perf)("Failed to allocate new buffer, falling back to sync path.\n");
-+ return hr;
-+ }
-+ map_desc->data = buffer->buffer_heap->map_ptr + map_range.offset + offset;
-+ resource->map_count++;
-+
-+ buffer->mt_persistent_map = map_range;
-+
-+ // Discard handler on CSMT thread is responsible for returning the
-+ // currently used buffer to the free pool, along with the fence that
-+ // must be called before the buffer can be reused.
-+ wined3d_cs_emit_discard_buffer(resource->device->cs, buffer, map_range);
-+ return WINED3D_OK;
-+ }
-+ else if (flags & WINED3D_MAP_NOOVERWRITE)
-+ {
-+ // Allow immediate access for persistent buffers without a fence.
-+ // Always use the latest buffer in this case in case the latest
-+ // DISCARDed one hasn't reached the command stream yet.
-+ struct wined3d_map_range map_range = buffer->mt_persistent_map;
-+ map_desc->data = buffer->buffer_heap->map_ptr + map_range.offset + offset;
-+ resource->map_count++;
-+ return WINED3D_OK;
-+ }
-+ else
-+ {
-+ // TODO(acomminos): Should check mapped ranges to see if the region is writeable even though NOOVERWRITE is specified.
-+ WARN_(d3d_perf)("Mapping persistent buffer %p in sync with CS thread.\n", buffer);
-+ // XXX(acomminos): kill this early return. they're the worst.
-+ }
-+ }
-+
-+ return E_NOTIMPL;
-+}
-+
-+static HRESULT buffer_resource_sub_resource_map_cs(struct wined3d_resource *resource, unsigned int sub_resource_idx,
-+ struct wined3d_map_desc *map_desc, const struct wined3d_box *box, DWORD flags)
- {
- struct wined3d_buffer *buffer = buffer_from_resource(resource);
- UINT offset, size;
-@@ -1299,6 +1483,18 @@ static HRESULT buffer_resource_sub_resource_map_info(struct wined3d_resource *re
- }
-
- static HRESULT buffer_resource_sub_resource_unmap(struct wined3d_resource *resource, unsigned int sub_resource_idx)
-+{
-+ struct wined3d_buffer *buffer = buffer_from_resource(resource);
-+ if (buffer->locations & WINED3D_LOCATION_PERSISTENT_MAP)
-+ {
-+ // Nothing to be done to unmap a region of a persistent buffer.
-+ resource->map_count--;
-+ return WINED3D_OK;
-+ }
-+ return E_NOTIMPL;
-+}
-+
-+static HRESULT buffer_resource_sub_resource_unmap_cs(struct wined3d_resource *resource, unsigned int sub_resource_idx)
- {
- if (sub_resource_idx)
- {
-@@ -1319,6 +1515,8 @@ static const struct wined3d_resource_ops buffer_resource_ops =
- buffer_resource_sub_resource_map,
- buffer_resource_sub_resource_map_info,
- buffer_resource_sub_resource_unmap,
-+ buffer_resource_sub_resource_map_cs,
-+ buffer_resource_sub_resource_unmap_cs,
- };
-
- static GLenum buffer_type_hint_from_bind_flags(const struct wined3d_gl_info *gl_info,
-@@ -1394,12 +1592,30 @@ static HRESULT buffer_init(struct wined3d_buffer *buffer, struct wined3d_device
- buffer->flags |= WINED3D_BUFFER_PIN_SYSMEM;
- }
-
-+ if (buffer->resource.usage & WINED3DUSAGE_DYNAMIC)
-+ {
-+ if (!gl_info->supported[ARB_BUFFER_STORAGE])
-+ {
-+ WARN_(d3d_perf)("Not creating a persistent mapping for a dynamic buffer because ARB_buffer_storage is unsupported.\n");
-+ }
-+ else
-+ {
-+ // If supported, use persistent mapped buffers instead of a
-+ // standalone BO for dynamic buffers.
-+ buffer->flags |= WINED3D_BUFFER_PERSISTENT;
-+ }
-+ }
-+
- /* Observations show that draw_primitive_immediate_mode() is faster on
- * dynamic vertex buffers than converting + draw_primitive_arrays().
- * (Half-Life 2 and others.) */
- dynamic_buffer_ok = gl_info->supported[APPLE_FLUSH_BUFFER_RANGE] || gl_info->supported[ARB_MAP_BUFFER_RANGE];
-
-- if (!gl_info->supported[ARB_VERTEX_BUFFER_OBJECT])
-+ if (buffer->flags & WINED3D_BUFFER_PERSISTENT)
-+ {
-+ TRACE("Not creating a BO because a persistent mapped buffer will be used.\n");
-+ }
-+ else if (!gl_info->supported[ARB_VERTEX_BUFFER_OBJECT])
- {
- TRACE("Not creating a BO because GL_ARB_vertex_buffer is not supported.\n");
- }
-diff --git a/dlls/wined3d/context.c b/dlls/wined3d/context.c
-index 0e2e68b4b0..eae2c3a79d 100644
---- a/dlls/wined3d/context.c
-+++ b/dlls/wined3d/context.c
-@@ -5005,7 +5005,11 @@ void draw_primitive(struct wined3d_device *device, const struct wined3d_state *s
- if (parameters->indexed)
- {
- struct wined3d_buffer *index_buffer = state->index_buffer;
-- if (!index_buffer->buffer_object || !stream_info->all_vbo)
-+ if (index_buffer->locations & WINED3D_LOCATION_PERSISTENT_MAP)
-+ {
-+ idx_data = index_buffer->cs_persistent_map.offset;
-+ }
-+ else if (!index_buffer->buffer_object || !stream_info->all_vbo)
- {
- idx_data = index_buffer->resource.heap_memory;
- }
-diff --git a/dlls/wined3d/cs.c b/dlls/wined3d/cs.c
-index 50a4d041cd..e61b8dedbb 100644
---- a/dlls/wined3d/cs.c
-+++ b/dlls/wined3d/cs.c
-@@ -73,6 +73,7 @@ enum wined3d_cs_op
- WINED3D_CS_OP_CLEAR_UNORDERED_ACCESS_VIEW,
- WINED3D_CS_OP_COPY_UAV_COUNTER,
- WINED3D_CS_OP_GENERATE_MIPMAPS,
-+ WINED3D_CS_OP_DISCARD_BUFFER,
- WINED3D_CS_OP_STOP,
- };
-
-@@ -439,6 +440,13 @@ struct wined3d_cs_generate_mipmaps
- struct wined3d_shader_resource_view *view;
- };
-
-+struct wined3d_cs_discard_buffer
-+{
-+ enum wined3d_cs_op opcode;
-+ struct wined3d_buffer *buffer;
-+ struct wined3d_map_range map_range;
-+};
-+
- struct wined3d_cs_stop
- {
- enum wined3d_cs_op opcode;
-@@ -2002,7 +2010,7 @@ static void wined3d_cs_exec_map(struct wined3d_cs *cs, const void *data)
- const struct wined3d_cs_map *op = data;
- struct wined3d_resource *resource = op->resource;
-
-- *op->hr = resource->resource_ops->resource_sub_resource_map(resource,
-+ *op->hr = resource->resource_ops->resource_sub_resource_map_cs(resource,
- op->sub_resource_idx, op->map_desc, op->box, op->flags);
- }
-
-@@ -2036,7 +2044,7 @@ static void wined3d_cs_exec_unmap(struct wined3d_cs *cs, const void *data)
- const struct wined3d_cs_unmap *op = data;
- struct wined3d_resource *resource = op->resource;
-
-- *op->hr = resource->resource_ops->resource_sub_resource_unmap(resource, op->sub_resource_idx);
-+ *op->hr = resource->resource_ops->resource_sub_resource_unmap_cs(resource, op->sub_resource_idx);
- }
-
- HRESULT wined3d_cs_unmap(struct wined3d_cs *cs, struct wined3d_resource *resource, unsigned int sub_resource_idx)
-@@ -2455,6 +2463,53 @@ void wined3d_cs_emit_generate_mipmaps(struct wined3d_cs *cs, struct wined3d_shad
- cs->ops->submit(cs, WINED3D_CS_QUEUE_DEFAULT);
- }
-
-+static void wined3d_cs_exec_discard_buffer(struct wined3d_cs *cs, const void *data)
-+{
-+ const struct wined3d_cs_discard_buffer *op = data;
-+ struct wined3d_buffer *buffer = op->buffer;
-+ HRESULT hr;
-+
-+ // TODO(acomminos): should call into buffer.c here instead.
-+ if (FAILED(hr = wined3d_buffer_heap_free_fenced(buffer->buffer_heap, cs->device, buffer->cs_persistent_map)))
-+ {
-+ ERR("Failed to do a fenced free on discarded buffer %p, hr %x\n. Freeing anyway.", buffer, hr);
-+ wined3d_buffer_heap_free(buffer->buffer_heap, buffer->cs_persistent_map);
-+ }
-+
-+ buffer->cs_persistent_map = op->map_range;
-+
-+ // TODO(acomminos): merge this logic with buffer.c functions for standalone BOs
-+ if (buffer->bind_flags & WINED3D_BIND_VERTEX_BUFFER)
-+ device_invalidate_state(cs->device, STATE_STREAMSRC);
-+ if (buffer->bind_flags & WINED3D_BIND_INDEX_BUFFER)
-+ device_invalidate_state(cs->device, STATE_INDEXBUFFER);
-+ if (buffer->bind_flags & WINED3D_BIND_CONSTANT_BUFFER)
-+ {
-+ device_invalidate_state(cs->device, STATE_CONSTANT_BUFFER(WINED3D_SHADER_TYPE_VERTEX));
-+ device_invalidate_state(cs->device, STATE_CONSTANT_BUFFER(WINED3D_SHADER_TYPE_HULL));
-+ device_invalidate_state(cs->device, STATE_CONSTANT_BUFFER(WINED3D_SHADER_TYPE_DOMAIN));
-+ device_invalidate_state(cs->device, STATE_CONSTANT_BUFFER(WINED3D_SHADER_TYPE_GEOMETRY));
-+ device_invalidate_state(cs->device, STATE_CONSTANT_BUFFER(WINED3D_SHADER_TYPE_PIXEL));
-+ device_invalidate_state(cs->device, STATE_CONSTANT_BUFFER(WINED3D_SHADER_TYPE_COMPUTE));
-+ }
-+
-+ wined3d_resource_release(&op->buffer->resource);
-+}
-+
-+void wined3d_cs_emit_discard_buffer(struct wined3d_cs *cs, struct wined3d_buffer *buffer, struct wined3d_map_range map_range)
-+{
-+ struct wined3d_cs_discard_buffer *op;
-+
-+ op = cs->ops->require_space(cs, sizeof(*op), WINED3D_CS_QUEUE_DEFAULT);
-+ op->opcode = WINED3D_CS_OP_DISCARD_BUFFER;
-+ op->buffer = buffer;
-+ op->map_range = map_range;
-+
-+ wined3d_resource_acquire(&buffer->resource);
-+
-+ cs->ops->submit(cs, WINED3D_CS_QUEUE_DEFAULT);
-+}
-+
- static void wined3d_cs_emit_stop(struct wined3d_cs *cs)
- {
- struct wined3d_cs_stop *op;
-@@ -2515,6 +2570,7 @@ static void (* const wined3d_cs_op_handlers[])(struct wined3d_cs *cs, const void
- /* WINED3D_CS_OP_CLEAR_UNORDERED_ACCESS_VIEW */ wined3d_cs_exec_clear_unordered_access_view,
- /* WINED3D_CS_OP_COPY_UAV_COUNTER */ wined3d_cs_exec_copy_uav_counter,
- /* WINED3D_CS_OP_GENERATE_MIPMAPS */ wined3d_cs_exec_generate_mipmaps,
-+ /* WINED3D_CS_OP_DISCARD_BUFFER */ wined3d_cs_exec_discard_buffer,
- };
-
- #if defined(STAGING_CSMT)
-diff --git a/dlls/wined3d/resource.c b/dlls/wined3d/resource.c
-index 8b7f17bb6b..02d469bc20 100644
---- a/dlls/wined3d/resource.c
-+++ b/dlls/wined3d/resource.c
-@@ -344,6 +344,7 @@ static DWORD wined3d_resource_sanitise_map_flags(const struct wined3d_resource *
- HRESULT CDECL wined3d_resource_map(struct wined3d_resource *resource, unsigned int sub_resource_idx,
- struct wined3d_map_desc *map_desc, const struct wined3d_box *box, DWORD flags)
- {
-+ HRESULT hr;
- TRACE("resource %p, sub_resource_idx %u, map_desc %p, box %s, flags %#x.\n",
- resource, sub_resource_idx, map_desc, debug_box(box), flags);
-
-@@ -366,9 +367,14 @@ HRESULT CDECL wined3d_resource_map(struct wined3d_resource *resource, unsigned i
- }
-
- flags = wined3d_resource_sanitise_map_flags(resource, flags);
-- wined3d_resource_wait_idle(resource);
-+ if (FAILED(hr = resource->resource_ops->resource_sub_resource_map(resource, sub_resource_idx, map_desc, box, flags)))
-+ {
-+ TRACE_(d3d_perf)("Mapping resource %p on the command stream.\n", resource);
-+ wined3d_resource_wait_idle(resource);
-+ hr = wined3d_cs_map(resource->device->cs, resource, sub_resource_idx, map_desc, box, flags);
-+ }
-
-- return wined3d_cs_map(resource->device->cs, resource, sub_resource_idx, map_desc, box, flags);
-+ return hr;
- }
-
- HRESULT CDECL wined3d_resource_map_info(struct wined3d_resource *resource, unsigned int sub_resource_idx,
-@@ -381,9 +387,15 @@ HRESULT CDECL wined3d_resource_map_info(struct wined3d_resource *resource, unsig
-
- HRESULT CDECL wined3d_resource_unmap(struct wined3d_resource *resource, unsigned int sub_resource_idx)
- {
-+ HRESULT hr;
- TRACE("resource %p, sub_resource_idx %u.\n", resource, sub_resource_idx);
-
-- return wined3d_cs_unmap(resource->device->cs, resource, sub_resource_idx);
-+ if (FAILED(hr = resource->resource_ops->resource_sub_resource_unmap(resource, sub_resource_idx)))
-+ {
-+ TRACE_(d3d_perf)("Unmapping resource %p on the command stream.\n", resource);
-+ hr = wined3d_cs_unmap(resource->device->cs, resource, sub_resource_idx);
-+ }
-+ return hr;
- }
-
- UINT CDECL wined3d_resource_update_info(struct wined3d_resource *resource, unsigned int sub_resource_idx,
-diff --git a/dlls/wined3d/state.c b/dlls/wined3d/state.c
-index 2f506c36d1..6f7805b8bd 100644
---- a/dlls/wined3d/state.c
-+++ b/dlls/wined3d/state.c
-@@ -4934,7 +4934,11 @@ static void indexbuffer(struct wined3d_context *context, const struct wined3d_st
- else
- {
- struct wined3d_buffer *ib = state->index_buffer;
-- GL_EXTCALL(glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, ib->buffer_object));
-+ // FIXME(acomminos): disasterous.
-+ if (ib->locations & WINED3D_LOCATION_PERSISTENT_MAP)
-+ GL_EXTCALL(glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, ib->buffer_heap->buffer_object));
-+ else
-+ GL_EXTCALL(glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, ib->buffer_object));
- }
- }
-
-@@ -5000,6 +5004,7 @@ static void state_cb(struct wined3d_context *context, const struct wined3d_state
- enum wined3d_shader_type shader_type;
- struct wined3d_buffer *buffer;
- unsigned int i, base, count;
-+ struct wined3d_bo_address bo_addr;
-
- TRACE("context %p, state %p, state_id %#x.\n", context, state, state_id);
-
-@@ -5012,7 +5017,15 @@ static void state_cb(struct wined3d_context *context, const struct wined3d_state
- for (i = 0; i < count; ++i)
- {
- buffer = state->cb[shader_type][i];
-- GL_EXTCALL(glBindBufferBase(GL_UNIFORM_BUFFER, base + i, buffer ? buffer->buffer_object : 0));
-+ if (buffer)
-+ {
-+ wined3d_buffer_get_memory(buffer, &bo_addr, buffer->locations);
-+ GL_EXTCALL(glBindBufferRange(GL_UNIFORM_BUFFER, base + i, bo_addr.buffer_object, bo_addr.addr, bo_addr.length));
-+ }
-+ else
-+ {
-+ GL_EXTCALL(glBindBufferBase(GL_UNIFORM_BUFFER, base + i, 0));
-+ }
- }
- checkGLcall("bind constant buffers");
- }
-diff --git a/dlls/wined3d/texture.c b/dlls/wined3d/texture.c
-index e6af0c7508..7260f902cf 100644
---- a/dlls/wined3d/texture.c
-+++ b/dlls/wined3d/texture.c
-@@ -2301,6 +2301,12 @@ static void wined3d_texture_unload(struct wined3d_resource *resource)
-
- static HRESULT texture_resource_sub_resource_map(struct wined3d_resource *resource, unsigned int sub_resource_idx,
- struct wined3d_map_desc *map_desc, const struct wined3d_box *box, DWORD flags)
-+{
-+ return E_NOTIMPL;
-+}
-+
-+static HRESULT texture_resource_sub_resource_map_cs(struct wined3d_resource *resource, unsigned int sub_resource_idx,
-+ struct wined3d_map_desc *map_desc, const struct wined3d_box *box, DWORD flags)
- {
- const struct wined3d_format *format = resource->format;
- struct wined3d_texture_sub_resource *sub_resource;
-@@ -2461,6 +2467,11 @@ static HRESULT texture_resource_sub_resource_map_info(struct wined3d_resource *r
- }
-
- static HRESULT texture_resource_sub_resource_unmap(struct wined3d_resource *resource, unsigned int sub_resource_idx)
-+{
-+ return E_NOTIMPL;
-+}
-+
-+static HRESULT texture_resource_sub_resource_unmap_cs(struct wined3d_resource *resource, unsigned int sub_resource_idx)
- {
- struct wined3d_texture_sub_resource *sub_resource;
- struct wined3d_device *device = resource->device;
-@@ -2513,6 +2524,8 @@ static const struct wined3d_resource_ops texture_resource_ops =
- texture_resource_sub_resource_map,
- texture_resource_sub_resource_map_info,
- texture_resource_sub_resource_unmap,
-+ texture_resource_sub_resource_map_cs,
-+ texture_resource_sub_resource_unmap_cs,
- };
-
- static HRESULT texture1d_init(struct wined3d_texture *texture, const struct wined3d_resource_desc *desc,
-diff --git a/dlls/wined3d/utils.c b/dlls/wined3d/utils.c
-index b8b7880501..62758ae056 100644
---- a/dlls/wined3d/utils.c
-+++ b/dlls/wined3d/utils.c
-@@ -6404,6 +6404,7 @@ const char *wined3d_debug_location(DWORD location)
- LOCATION_TO_STR(WINED3D_LOCATION_DRAWABLE);
- LOCATION_TO_STR(WINED3D_LOCATION_RB_MULTISAMPLE);
- LOCATION_TO_STR(WINED3D_LOCATION_RB_RESOLVED);
-+ LOCATION_TO_STR(WINED3D_LOCATION_PERSISTENT_MAP);
- #undef LOCATION_TO_STR
- if (location)
- FIXME("Unrecognized location flag(s) %#x.\n", location);
-diff --git a/dlls/wined3d/wined3d_private.h b/dlls/wined3d/wined3d_private.h
-index 3d535f4e17..b3fd0136ff 100644
---- a/dlls/wined3d/wined3d_private.h
-+++ b/dlls/wined3d/wined3d_private.h
-@@ -1470,6 +1470,7 @@ struct wined3d_bo_address
- {
- GLuint buffer_object;
- BYTE *addr;
-+ GLsizeiptr length;
- };
-
- struct wined3d_const_bo_address
-@@ -3047,6 +3048,9 @@ struct wined3d_resource_ops
- HRESULT (*resource_map_info)(struct wined3d_resource *resource, unsigned int sub_resource_idx,
- struct wined3d_map_info *info, DWORD flags);
- HRESULT (*resource_sub_resource_unmap)(struct wined3d_resource *resource, unsigned int sub_resource_idx);
-+ HRESULT (*resource_sub_resource_map_cs)(struct wined3d_resource *resource, unsigned int sub_resource_idx,
-+ struct wined3d_map_desc *map_desc, const struct wined3d_box *box, DWORD flags);
-+ HRESULT (*resource_sub_resource_unmap_cs)(struct wined3d_resource *resource, unsigned int sub_resource_idx);
- };
-
- struct wined3d_resource
-@@ -3325,6 +3329,7 @@ void wined3d_texture_validate_location(struct wined3d_texture *texture,
- #define WINED3D_LOCATION_DRAWABLE 0x00000040
- #define WINED3D_LOCATION_RB_MULTISAMPLE 0x00000080
- #define WINED3D_LOCATION_RB_RESOLVED 0x00000100
-+#define WINED3D_LOCATION_PERSISTENT_MAP 0x00000200
-
- const char *wined3d_debug_location(DWORD location) DECLSPEC_HIDDEN;
-
-@@ -3672,6 +3677,7 @@ void wined3d_cs_emit_unload_resource(struct wined3d_cs *cs, struct wined3d_resou
- void wined3d_cs_emit_update_sub_resource(struct wined3d_cs *cs, struct wined3d_resource *resource,
- unsigned int sub_resource_idx, const struct wined3d_box *box, const void *data, unsigned int row_pitch,
- unsigned int slice_pitch) DECLSPEC_HIDDEN;
-+void wined3d_cs_emit_discard_buffer(struct wined3d_cs *cs, struct wined3d_buffer *buffer, struct wined3d_map_range map_range) DECLSPEC_HIDDEN;
- void wined3d_cs_init_object(struct wined3d_cs *cs,
- void (*callback)(void *object), void *object) DECLSPEC_HIDDEN;
- HRESULT wined3d_cs_map(struct wined3d_cs *cs, struct wined3d_resource *resource, unsigned int sub_resource_idx,
-@@ -3784,6 +3790,11 @@ struct wined3d_buffer
- UINT stride; /* 0 if no conversion */
- enum wined3d_buffer_conversion_type *conversion_map; /* NULL if no conversion */
- UINT conversion_stride; /* 0 if no shifted conversion */
-+
-+ /* persistent mapped buffer */
-+ struct wined3d_buffer_heap *buffer_heap;
-+ struct wined3d_map_range cs_persistent_map;
-+ struct wined3d_map_range mt_persistent_map; // TODO: make struct list?
- };
-
- static inline struct wined3d_buffer *buffer_from_resource(struct wined3d_resource *resource)
---
-2.16.2
-
diff --git a/0003-wined3d-Use-ARB_multi_bind-to-speed-up-UBO-updates.patch b/0003-wined3d-Use-ARB_multi_bind-to-speed-up-UBO-updates.patch
deleted file mode 100644
index 67c2dd9d0ebe..000000000000
--- a/0003-wined3d-Use-ARB_multi_bind-to-speed-up-UBO-updates.patch
+++ /dev/null
@@ -1,121 +0,0 @@
-From d8f54b1fedbbe64ebc5c08ff107408f454794f71 Mon Sep 17 00:00:00 2001
-From: Andrew Comminos <andrew@comminos.com>
-Date: Mon, 5 Mar 2018 20:28:34 -0800
-Subject: [PATCH 3/9] wined3d: Use ARB_multi_bind to speed up UBO updates.
-
-More frequent UBO remaps as a result of the persistent buffer allocator
-causes glBindBufferRange to be a bottleneck. Using ARB_multi_bind
-massively reduces state change overhead.
----
- dlls/wined3d/directx.c | 4 ++++
- dlls/wined3d/state.c | 46 +++++++++++++++++++++++++++++++++++++++-------
- dlls/wined3d/wined3d_gl.h | 1 +
- 3 files changed, 44 insertions(+), 7 deletions(-)
-
-diff --git a/dlls/wined3d/directx.c b/dlls/wined3d/directx.c
-index 46c6a59536..8789a501ec 100644
---- a/dlls/wined3d/directx.c
-+++ b/dlls/wined3d/directx.c
-@@ -149,6 +149,7 @@ static const struct wined3d_extension_map gl_extension_map[] =
- {"GL_ARB_internalformat_query2", ARB_INTERNALFORMAT_QUERY2 },
- {"GL_ARB_map_buffer_alignment", ARB_MAP_BUFFER_ALIGNMENT },
- {"GL_ARB_map_buffer_range", ARB_MAP_BUFFER_RANGE },
-+ {"GL_ARB_multi_bind", ARB_MULTI_BIND },
- {"GL_ARB_multisample", ARB_MULTISAMPLE },
- {"GL_ARB_multitexture", ARB_MULTITEXTURE },
- {"GL_ARB_occlusion_query", ARB_OCCLUSION_QUERY },
-@@ -2796,6 +2797,8 @@ static void load_gl_funcs(struct wined3d_gl_info *gl_info)
- /* GL_ARB_map_buffer_range */
- USE_GL_FUNC(glFlushMappedBufferRange)
- USE_GL_FUNC(glMapBufferRange)
-+ /* GL_ARB_multi_bind */
-+ USE_GL_FUNC(glBindBuffersRange)
- /* GL_ARB_multisample */
- USE_GL_FUNC(glSampleCoverageARB)
- /* GL_ARB_multitexture */
-@@ -3973,6 +3976,7 @@ static BOOL wined3d_adapter_init_gl_caps(struct wined3d_adapter *adapter,
- {ARB_TEXTURE_VIEW, MAKEDWORD_VERSION(4, 3)},
-
- {ARB_CLEAR_TEXTURE, MAKEDWORD_VERSION(4, 4)},
-+ {ARB_MULTI_BIND, MAKEDWORD_VERSION(4, 4)},
-
- {ARB_CLIP_CONTROL, MAKEDWORD_VERSION(4, 5)},
- {ARB_CULL_DISTANCE, MAKEDWORD_VERSION(4, 5)},
-diff --git a/dlls/wined3d/state.c b/dlls/wined3d/state.c
-index 6f7805b8bd..4d0718514f 100644
---- a/dlls/wined3d/state.c
-+++ b/dlls/wined3d/state.c
-@@ -5014,19 +5014,51 @@ static void state_cb(struct wined3d_context *context, const struct wined3d_state
- shader_type = WINED3D_SHADER_TYPE_COMPUTE;
-
- wined3d_gl_limits_get_uniform_block_range(&gl_info->limits, shader_type, &base, &count);
-- for (i = 0; i < count; ++i)
-+
-+ if (gl_info->supported[ARB_MULTI_BIND])
- {
-- buffer = state->cb[shader_type][i];
-- if (buffer)
-+ GLuint buffer_objects[count];
-+ GLsizeiptr buffer_offsets[count];
-+ GLsizeiptr buffer_sizes[count];
-+
-+ for (i = 0; i < count; ++i)
- {
-- wined3d_buffer_get_memory(buffer, &bo_addr, buffer->locations);
-- GL_EXTCALL(glBindBufferRange(GL_UNIFORM_BUFFER, base + i, bo_addr.buffer_object, bo_addr.addr, bo_addr.length));
-+ buffer = state->cb[shader_type][i];
-+ if (buffer)
-+ {
-+ wined3d_buffer_get_memory(buffer, &bo_addr, buffer->locations);
-+ buffer_objects[i] = bo_addr.buffer_object;
-+ buffer_offsets[i] = bo_addr.addr;
-+ buffer_sizes[i] = bo_addr.length;
-+ }
-+ else
-+ {
-+ buffer_objects[i] = buffer_offsets[i] = 0;
-+ // The ARB_multi_bind spec states that an error may be thrown if
-+ // `size` is less than or equal to zero, Thus, we specify a size for
-+ // unused buffers anyway.
-+ buffer_sizes[i] = 1;
-+ }
- }
-- else
-+ GL_EXTCALL(glBindBuffersRange(GL_UNIFORM_BUFFER, base, count, buffer_objects, buffer_offsets, buffer_sizes));
-+ }
-+ else
-+ {
-+ for (i = 0; i < count; ++i)
- {
-- GL_EXTCALL(glBindBufferBase(GL_UNIFORM_BUFFER, base + i, 0));
-+ buffer = state->cb[shader_type][i];
-+ if (buffer)
-+ {
-+ wined3d_buffer_get_memory(buffer, &bo_addr, buffer->locations);
-+ GL_EXTCALL(glBindBufferRange(GL_UNIFORM_BUFFER, base + i, bo_addr.buffer_object, bo_addr.addr, bo_addr.length));
-+ }
-+ else
-+ {
-+ GL_EXTCALL(glBindBufferBase(GL_UNIFORM_BUFFER, base + i, 0));
-+ }
- }
- }
-+
- checkGLcall("bind constant buffers");
- }
-
-diff --git a/dlls/wined3d/wined3d_gl.h b/dlls/wined3d/wined3d_gl.h
-index 7626864ef2..25c3301c94 100644
---- a/dlls/wined3d/wined3d_gl.h
-+++ b/dlls/wined3d/wined3d_gl.h
-@@ -82,6 +82,7 @@ enum wined3d_gl_extension
- ARB_INTERNALFORMAT_QUERY2,
- ARB_MAP_BUFFER_ALIGNMENT,
- ARB_MAP_BUFFER_RANGE,
-+ ARB_MULTI_BIND,
- ARB_MULTISAMPLE,
- ARB_MULTITEXTURE,
- ARB_OCCLUSION_QUERY,
---
-2.16.2
-
diff --git a/0004-wined3d-Use-GL_CLIENT_STORAGE_BIT-for-persistent-map.patch b/0004-wined3d-Use-GL_CLIENT_STORAGE_BIT-for-persistent-map.patch
deleted file mode 100644
index 954f929294c8..000000000000
--- a/0004-wined3d-Use-GL_CLIENT_STORAGE_BIT-for-persistent-map.patch
+++ /dev/null
@@ -1,26 +0,0 @@
-From 08fc0e139509a6755489af3936ede49936896122 Mon Sep 17 00:00:00 2001
-From: Andrew Comminos <andrew@comminos.com>
-Date: Tue, 6 Mar 2018 02:07:31 -0800
-Subject: [PATCH 4/9] wined3d: Use GL_CLIENT_STORAGE_BIT for persistent
- mappings.
-
----
- dlls/wined3d/buffer_heap.c | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/dlls/wined3d/buffer_heap.c b/dlls/wined3d/buffer_heap.c
-index b133bd6893..75f84b0088 100644
---- a/dlls/wined3d/buffer_heap.c
-+++ b/dlls/wined3d/buffer_heap.c
-@@ -169,7 +169,7 @@ HRESULT wined3d_buffer_heap_create(struct wined3d_context *context, GLsizeiptr s
- {
- access_flags |= GL_MAP_READ_BIT;
- }
-- storage_flags = access_flags;
-+ storage_flags = GL_CLIENT_STORAGE_BIT | access_flags;
-
- // TODO(acomminos): where should we be checking for errors here?
- GL_EXTCALL(glGenBuffers(1, &object->buffer_object));
---
-2.16.2
-
diff --git a/0005-wined3d-Disable-persistently-mapped-shader-resource-.patch b/0005-wined3d-Disable-persistently-mapped-shader-resource-.patch
deleted file mode 100644
index 10895a9ea107..000000000000
--- a/0005-wined3d-Disable-persistently-mapped-shader-resource-.patch
+++ /dev/null
@@ -1,28 +0,0 @@
-From 5ded1310d3de32fdfc467b20ab3937ab2b97d5b1 Mon Sep 17 00:00:00 2001
-From: Andrew Comminos <andrew@comminos.com>
-Date: Thu, 8 Mar 2018 22:00:33 -0800
-Subject: [PATCH 5/9] wined3d: Disable persistently mapped shader resource
- buffers.
-
----
- dlls/wined3d/buffer.c | 4 ++++
- 1 file changed, 4 insertions(+)
-
-diff --git a/dlls/wined3d/buffer.c b/dlls/wined3d/buffer.c
-index e7a0f59a67..a2ac411b5e 100644
---- a/dlls/wined3d/buffer.c
-+++ b/dlls/wined3d/buffer.c
-@@ -1598,6 +1598,10 @@ static HRESULT buffer_init(struct wined3d_buffer *buffer, struct wined3d_device
- {
- WARN_(d3d_perf)("Not creating a persistent mapping for a dynamic buffer because ARB_buffer_storage is unsupported.\n");
- }
-+ else if (bind_flags & WINED3D_BIND_SHADER_RESOURCE)
-+ {
-+ FIXME_(d3d_perf)("Not using a persistent mapping for shader resource buffer %p (unimplemented)\n", buffer);
-+ }
- else
- {
- // If supported, use persistent mapped buffers instead of a
---
-2.16.2
-
diff --git a/0006-wined3d-Perform-initial-allocation-of-persistent-buf.patch b/0006-wined3d-Perform-initial-allocation-of-persistent-buf.patch
deleted file mode 100644
index 2777b6a5324e..000000000000
--- a/0006-wined3d-Perform-initial-allocation-of-persistent-buf.patch
+++ /dev/null
@@ -1,92 +0,0 @@
-From 71b7b3340d147bf7a9b5567c080b32ccd3a39dc6 Mon Sep 17 00:00:00 2001
-From: Andrew Comminos <andrew@comminos.com>
-Date: Thu, 8 Mar 2018 22:42:03 -0800
-Subject: [PATCH 6/9] wined3d: Perform initial allocation of persistent buffers
- asynchronously.
-
----
- dlls/wined3d/buffer.c | 30 ++++++++++++++++++++----------
- 1 file changed, 20 insertions(+), 10 deletions(-)
-
-diff --git a/dlls/wined3d/buffer.c b/dlls/wined3d/buffer.c
-index a2ac411b5e..c492fcc8c6 100644
---- a/dlls/wined3d/buffer.c
-+++ b/dlls/wined3d/buffer.c
-@@ -272,7 +272,7 @@ fail:
- }
-
- /* Context activation is done by the caller. */
--static BOOL buffer_alloc_persistent_map(struct wined3d_buffer *buffer, struct wined3d_context *context)
-+static BOOL buffer_alloc_persistent_map(struct wined3d_buffer *buffer)
- {
- struct wined3d_device *device = buffer->resource.device;
- struct wined3d_buffer_heap *heap;
-@@ -688,7 +688,7 @@ static BOOL wined3d_buffer_prepare_location(struct wined3d_buffer *buffer,
- WARN("Trying to map a persistent region for buffer %p without WINED3D_BUFFER_PERSISTENT.\n", buffer);
- return FALSE;
- }
-- return buffer_alloc_persistent_map(buffer, context);
-+ return buffer_alloc_persistent_map(buffer);
-
- default:
- ERR("Invalid location %s.\n", wined3d_debug_location(location));
-@@ -1116,7 +1116,7 @@ static HRESULT wined3d_buffer_map(struct wined3d_buffer *buffer, UINT offset, UI
- const struct wined3d_gl_info *gl_info;
- context = context_acquire(device, NULL, 0);
-
-- FIXME_(d3d_perf)("Fences not used for persistent buffer maps on CS thread, using glFinish.\n");
-+ FIXME_(d3d_perf)("Fences not used for persistent buffer maps on CS thread, using glFinish (flags: %x)\n", flags);
-
- gl_info = context->gl_info;
- gl_info->gl_ops.gl.p_glFinish();
-@@ -1394,8 +1394,20 @@ static HRESULT buffer_resource_sub_resource_map(struct wined3d_resource *resourc
-
- // Support immediate mapping of persistent buffers off the command thread,
- // which require no GL calls to interface with.
-- if (buffer->locations & WINED3D_LOCATION_PERSISTENT_MAP)
-+ if (buffer->flags & WINED3D_BUFFER_PERSISTENT)
- {
-+ // Attempt to load a persistent map without syncing, if possible.
-+ if (!(buffer->locations & WINED3D_LOCATION_PERSISTENT_MAP))
-+ {
-+ wined3d_resource_wait_idle(resource);
-+ if (!buffer_alloc_persistent_map(buffer))
-+ {
-+ ERR_(d3d_perf)("Failed to allocate persistent buffer, falling back to sync path.");
-+ return E_FAIL;
-+ }
-+ wined3d_buffer_validate_location(buffer, WINED3D_LOCATION_PERSISTENT_MAP);
-+ }
-+
- map_desc->row_pitch = map_desc->slice_pitch = buffer->desc.byte_width;
- if (flags & WINED3D_MAP_DISCARD)
- {
-@@ -1415,6 +1427,7 @@ static HRESULT buffer_resource_sub_resource_map(struct wined3d_resource *resourc
- // currently used buffer to the free pool, along with the fence that
- // must be called before the buffer can be reused.
- wined3d_cs_emit_discard_buffer(resource->device->cs, buffer, map_range);
-+
- return WINED3D_OK;
- }
- else if (flags & WINED3D_MAP_NOOVERWRITE)
-@@ -1425,14 +1438,11 @@ static HRESULT buffer_resource_sub_resource_map(struct wined3d_resource *resourc
- struct wined3d_map_range map_range = buffer->mt_persistent_map;
- map_desc->data = buffer->buffer_heap->map_ptr + map_range.offset + offset;
- resource->map_count++;
-+
- return WINED3D_OK;
- }
-- else
-- {
-- // TODO(acomminos): Should check mapped ranges to see if the region is writeable even though NOOVERWRITE is specified.
-- WARN_(d3d_perf)("Mapping persistent buffer %p in sync with CS thread.\n", buffer);
-- // XXX(acomminos): kill this early return. they're the worst.
-- }
-+
-+ WARN_(d3d_perf)("Mapping persistent buffer %p in sync with CS thread.\n", buffer);
- }
-
- return E_NOTIMPL;
---
-2.16.2
-
diff --git a/0007-wined3d-Avoid-freeing-persistent-buffer-heap-element.patch b/0007-wined3d-Avoid-freeing-persistent-buffer-heap-element.patch
deleted file mode 100644
index d0872df82707..000000000000
--- a/0007-wined3d-Avoid-freeing-persistent-buffer-heap-element.patch
+++ /dev/null
@@ -1,351 +0,0 @@
-From fc7907d5264c1606477f9287c949c3c8794859ec Mon Sep 17 00:00:00 2001
-From: Andrew Comminos <andrew@comminos.com>
-Date: Thu, 8 Mar 2018 23:01:50 -0800
-Subject: [PATCH 7/9] wined3d: Avoid freeing persistent buffer heap elements
- during use.
-
-Using HeapFree is expensive, especially when we don't have our buffers
-for long.
----
- dlls/wined3d/buffer.c | 29 +++++++++++----------
- dlls/wined3d/buffer_heap.c | 57 ++++++++++++++++++------------------------
- dlls/wined3d/context.c | 4 +--
- dlls/wined3d/cs.c | 6 ++---
- dlls/wined3d/wined3d_private.h | 25 ++++++++++++------
- 5 files changed, 61 insertions(+), 60 deletions(-)
-
-diff --git a/dlls/wined3d/buffer.c b/dlls/wined3d/buffer.c
-index c492fcc8c6..74b3ba8abd 100644
---- a/dlls/wined3d/buffer.c
-+++ b/dlls/wined3d/buffer.c
-@@ -276,7 +276,7 @@ static BOOL buffer_alloc_persistent_map(struct wined3d_buffer *buffer)
- {
- struct wined3d_device *device = buffer->resource.device;
- struct wined3d_buffer_heap *heap;
-- struct wined3d_map_range map_range;
-+ struct wined3d_buffer_heap_element *elem;
- HRESULT hr;
-
- if (buffer->bind_flags & WINED3D_BIND_CONSTANT_BUFFER)
-@@ -292,12 +292,12 @@ static BOOL buffer_alloc_persistent_map(struct wined3d_buffer *buffer)
- }
-
- buffer->buffer_heap = heap;
-- if (FAILED(hr = wined3d_buffer_heap_alloc(heap, buffer->resource.size, &map_range)))
-+ if (FAILED(hr = wined3d_buffer_heap_alloc(heap, buffer->resource.size, &elem)))
- {
- goto fail;
- }
-- buffer->cs_persistent_map = map_range;
-- buffer->mt_persistent_map = map_range;
-+ buffer->cs_persistent_map = elem;
-+ buffer->mt_persistent_map = elem;
- return TRUE;
-
- fail:
-@@ -753,7 +753,7 @@ BOOL wined3d_buffer_load_location(struct wined3d_buffer *buffer,
- if (buffer->conversion_map)
- FIXME("Attempting to use conversion map with persistent mapping.\n");
- memcpy(buffer->buffer_heap->map_ptr +
-- buffer->cs_persistent_map.offset,
-+ buffer->cs_persistent_map->range.offset,
- buffer->resource.heap_memory, buffer->resource.size);
- break;
-
-@@ -801,11 +801,11 @@ DWORD wined3d_buffer_get_memory(struct wined3d_buffer *buffer,
- {
- // FIXME(acomminos): should we expose a buffer object we don't wholly own here?
- data->buffer_object = buffer->buffer_heap->buffer_object;
-- data->addr = buffer->cs_persistent_map.offset;
-+ data->addr = buffer->cs_persistent_map->range.offset;
- // Note that the size of the underlying buffer allocation may be larger
- // than the buffer knows about. In this case, we've rounded it up to be
- // aligned (e.g. for uniform buffer offsets).
-- data->length = buffer->cs_persistent_map.size;
-+ data->length = buffer->cs_persistent_map->range.size;
- return WINED3D_LOCATION_PERSISTENT_MAP;
- }
- if (locations & WINED3D_LOCATION_SYSMEM)
-@@ -1122,7 +1122,7 @@ static HRESULT wined3d_buffer_map(struct wined3d_buffer *buffer, UINT offset, UI
- gl_info->gl_ops.gl.p_glFinish();
-
- base = buffer->buffer_heap->map_ptr
-- + buffer->cs_persistent_map.offset;
-+ + buffer->cs_persistent_map->range.offset;
- *data = base + offset;
-
- context_release(context);
-@@ -1412,22 +1412,21 @@ static HRESULT buffer_resource_sub_resource_map(struct wined3d_resource *resourc
- if (flags & WINED3D_MAP_DISCARD)
- {
- HRESULT hr;
-- struct wined3d_map_range map_range;
-- if (FAILED(hr = wined3d_buffer_heap_alloc(buffer->buffer_heap, resource->size, &map_range)))
-+ struct wined3d_buffer_heap_element *mt_elem;
-+ if (FAILED(hr = wined3d_buffer_heap_alloc(buffer->buffer_heap, resource->size, &mt_elem)))
- {
- FIXME_(d3d_perf)("Failed to allocate new buffer, falling back to sync path.\n");
- return hr;
- }
-- map_desc->data = buffer->buffer_heap->map_ptr + map_range.offset + offset;
-+ map_desc->data = buffer->buffer_heap->map_ptr + mt_elem->range.offset + offset;
- resource->map_count++;
-
-- buffer->mt_persistent_map = map_range;
-+ buffer->mt_persistent_map = mt_elem;
-
- // Discard handler on CSMT thread is responsible for returning the
- // currently used buffer to the free pool, along with the fence that
- // must be called before the buffer can be reused.
-- wined3d_cs_emit_discard_buffer(resource->device->cs, buffer, map_range);
--
-+ wined3d_cs_emit_discard_buffer(resource->device->cs, buffer, mt_elem);
- return WINED3D_OK;
- }
- else if (flags & WINED3D_MAP_NOOVERWRITE)
-@@ -1435,7 +1434,7 @@ static HRESULT buffer_resource_sub_resource_map(struct wined3d_resource *resourc
- // Allow immediate access for persistent buffers without a fence.
- // Always use the latest buffer in this case in case the latest
- // DISCARDed one hasn't reached the command stream yet.
-- struct wined3d_map_range map_range = buffer->mt_persistent_map;
-+ struct wined3d_map_range map_range = buffer->mt_persistent_map->range;
- map_desc->data = buffer->buffer_heap->map_ptr + map_range.offset + offset;
- resource->map_count++;
-
-diff --git a/dlls/wined3d/buffer_heap.c b/dlls/wined3d/buffer_heap.c
-index 75f84b0088..80670c515f 100644
---- a/dlls/wined3d/buffer_heap.c
-+++ b/dlls/wined3d/buffer_heap.c
-@@ -25,18 +25,6 @@
- WINE_DEFAULT_DEBUG_CHANNEL(d3d);
- WINE_DECLARE_DEBUG_CHANNEL(d3d_perf);
-
--struct wined3d_buffer_heap_element
--{
-- struct wined3d_map_range range;
--
-- // rbtree data
-- struct wine_rb_entry entry;
--
-- // Binned free list positions
-- struct wined3d_buffer_heap_element *next;
-- struct wined3d_buffer_heap_element *prev;
--};
--
- struct wined3d_buffer_heap_fenced_element
- {
- struct wined3d_buffer_heap_bin_set free_list;
-@@ -82,6 +70,11 @@ static int element_bin(struct wined3d_buffer_heap_element *elem)
- // Inserts an element into the appropriate free list bin.
- static void element_insert_free_bin(struct wined3d_buffer_heap *heap, struct wined3d_buffer_heap_element *elem)
- {
-+ if (elem->prev || elem->next)
-+ {
-+ ERR("Element %p in already in a free list (for some reason).\n", elem);
-+ }
-+
- int bin = element_bin(elem);
-
- elem->prev = NULL;
-@@ -206,7 +199,7 @@ HRESULT wined3d_buffer_heap_destroy(struct wined3d_buffer_heap *heap, struct win
- return WINED3D_OK;
- }
-
--HRESULT wined3d_buffer_heap_alloc(struct wined3d_buffer_heap *heap, GLsizeiptr size, struct wined3d_map_range *out_range)
-+HRESULT wined3d_buffer_heap_alloc(struct wined3d_buffer_heap *heap, GLsizeiptr size, struct wined3d_buffer_heap_element **out_elem)
- {
- int initial_bin;
- int initial_size = size;
-@@ -233,24 +226,24 @@ HRESULT wined3d_buffer_heap_alloc(struct wined3d_buffer_heap *heap, GLsizeiptr s
- remaining_range.offset = elem->range.offset + size;
- remaining_range.size = elem->range.size - size;
-
-- out_range->offset = elem->range.offset;
-- out_range->size = size;
-+ // Take the element from the free list, transferring ownership to
-+ // the caller.
-+ element_remove_free(heap, elem);
-+ // Resize the element so that we can free the remainder.
-+ elem->range.size = size;
-
-- TRACE_(d3d_perf)("Allocated %d (requested %d) at %p from bin %d (initial %d)\n", size, initial_size, elem->range.offset, i, initial_bin);
-+ *out_elem = elem;
-
-- // Remove the element from its current free bin to move it to the correct list.
-- element_remove_free(heap, elem);
-+ TRACE_(d3d_perf)("Allocated %d (requested %d) at %p from bin %d (initial %d)\n", size, initial_size, elem->range.offset, i, initial_bin);
-
- if (remaining_range.size > 0)
- {
-+ struct wined3d_buffer_heap_element *remaining_elem;
-+
- TRACE_(d3d_perf)("Imperfect fit allocated, fragmenting remainder of %lld at %p.\n", remaining_range.size, remaining_range.offset);
-
-- elem->range = remaining_range;
-- element_insert_free_bin(heap, elem);
-- }
-- else
-- {
-- HeapFree(GetProcessHeap(), 0, elem);
-+ remaining_elem = element_new(remaining_range.offset, remaining_range.size);
-+ element_insert_free_bin(heap, remaining_elem);
- }
-
- LeaveCriticalSection(&heap->temp_lock);
-@@ -265,7 +258,7 @@ HRESULT wined3d_buffer_heap_alloc(struct wined3d_buffer_heap *heap, GLsizeiptr s
- if (SUCCEEDED(wined3d_buffer_heap_deferred_coalesce(heap, &num_coalesced)))
- {
- if (num_coalesced > 0)
-- return wined3d_buffer_heap_alloc(heap, size, out_range);
-+ return wined3d_buffer_heap_alloc(heap, size, out_elem);
- }
-
- FIXME_(d3d_perf)("Coalescing did not create new blocks, failing.\n");
-@@ -273,16 +266,15 @@ HRESULT wined3d_buffer_heap_alloc(struct wined3d_buffer_heap *heap, GLsizeiptr s
- return WINED3DERR_OUTOFVIDEOMEMORY;
- }
-
--HRESULT wined3d_buffer_heap_free(struct wined3d_buffer_heap *heap, struct wined3d_map_range range)
-+HRESULT wined3d_buffer_heap_free(struct wined3d_buffer_heap *heap, struct wined3d_buffer_heap_element *elem)
- {
-- struct wined3d_buffer_heap_element *elem = element_new(range.offset, range.size);
--
-- if (!elem)
-- return E_OUTOFMEMORY;
--
- EnterCriticalSection(&heap->temp_lock);
-
- // Only insert the element into a free bin, coalescing will occur later.
-+ //
-+ // Note that the reason that we pass around wined3d_buffer_heap_element
-+ // instead of a range is to avoid frequent HeapAlloc/HeapFree operations
-+ // when we're reusing buffers.
- element_insert_free_bin(heap, elem);
-
- LeaveCriticalSection(&heap->temp_lock);
-@@ -290,9 +282,8 @@ HRESULT wined3d_buffer_heap_free(struct wined3d_buffer_heap *heap, struct wined3
- return WINED3D_OK;
- }
-
--HRESULT wined3d_buffer_heap_free_fenced(struct wined3d_buffer_heap *heap, struct wined3d_device *device, struct wined3d_map_range range)
-+HRESULT wined3d_buffer_heap_free_fenced(struct wined3d_buffer_heap *heap, struct wined3d_device *device, struct wined3d_buffer_heap_element *elem)
- {
-- struct wined3d_buffer_heap_element *elem = element_new(range.offset, range.size);
- int bin_index = element_bin(elem);
- struct wined3d_buffer_heap_bin *bin = &heap->pending_fenced_bins.bins[bin_index];
-
-diff --git a/dlls/wined3d/context.c b/dlls/wined3d/context.c
-index eae2c3a79d..01aa53597f 100644
---- a/dlls/wined3d/context.c
-+++ b/dlls/wined3d/context.c
-@@ -5005,9 +5005,9 @@ void draw_primitive(struct wined3d_device *device, const struct wined3d_state *s
- if (parameters->indexed)
- {
- struct wined3d_buffer *index_buffer = state->index_buffer;
-- if (index_buffer->locations & WINED3D_LOCATION_PERSISTENT_MAP)
-+ if (index_buffer->cs_persistent_map)
- {
-- idx_data = index_buffer->cs_persistent_map.offset;
-+ idx_data = index_buffer->cs_persistent_map->range.offset;
- }
- else if (!index_buffer->buffer_object || !stream_info->all_vbo)
- {
-diff --git a/dlls/wined3d/cs.c b/dlls/wined3d/cs.c
-index e61b8dedbb..d1f665d505 100644
---- a/dlls/wined3d/cs.c
-+++ b/dlls/wined3d/cs.c
-@@ -444,7 +444,7 @@ struct wined3d_cs_discard_buffer
- {
- enum wined3d_cs_op opcode;
- struct wined3d_buffer *buffer;
-- struct wined3d_map_range map_range;
-+ struct wined3d_buffer_heap_element *map_range;
- };
-
- struct wined3d_cs_stop
-@@ -2496,14 +2496,14 @@ static void wined3d_cs_exec_discard_buffer(struct wined3d_cs *cs, const void *da
- wined3d_resource_release(&op->buffer->resource);
- }
-
--void wined3d_cs_emit_discard_buffer(struct wined3d_cs *cs, struct wined3d_buffer *buffer, struct wined3d_map_range map_range)
-+void wined3d_cs_emit_discard_buffer(struct wined3d_cs *cs, struct wined3d_buffer *buffer, struct wined3d_buffer_heap_element *elem)
- {
- struct wined3d_cs_discard_buffer *op;
-
- op = cs->ops->require_space(cs, sizeof(*op), WINED3D_CS_QUEUE_DEFAULT);
- op->opcode = WINED3D_CS_OP_DISCARD_BUFFER;
- op->buffer = buffer;
-- op->map_range = map_range;
-+ op->map_range = elem;
-
- wined3d_resource_acquire(&buffer->resource);
-
-diff --git a/dlls/wined3d/wined3d_private.h b/dlls/wined3d/wined3d_private.h
-index b3fd0136ff..0114444943 100644
---- a/dlls/wined3d/wined3d_private.h
-+++ b/dlls/wined3d/wined3d_private.h
-@@ -3531,6 +3531,18 @@ struct wined3d_map_range
- GLsizeiptr size;
- };
-
-+struct wined3d_buffer_heap_element
-+{
-+ struct wined3d_map_range range;
-+
-+ // rbtree data
-+ struct wine_rb_entry entry;
-+
-+ // Binned free list positions
-+ struct wined3d_buffer_heap_element *next;
-+ struct wined3d_buffer_heap_element *prev;
-+};
-+
- enum wined3d_cs_queue_id
- {
- WINED3D_CS_QUEUE_DEFAULT = 0,
-@@ -3677,7 +3689,7 @@ void wined3d_cs_emit_unload_resource(struct wined3d_cs *cs, struct wined3d_resou
- void wined3d_cs_emit_update_sub_resource(struct wined3d_cs *cs, struct wined3d_resource *resource,
- unsigned int sub_resource_idx, const struct wined3d_box *box, const void *data, unsigned int row_pitch,
- unsigned int slice_pitch) DECLSPEC_HIDDEN;
--void wined3d_cs_emit_discard_buffer(struct wined3d_cs *cs, struct wined3d_buffer *buffer, struct wined3d_map_range map_range) DECLSPEC_HIDDEN;
-+void wined3d_cs_emit_discard_buffer(struct wined3d_cs *cs, struct wined3d_buffer *buffer, struct wined3d_buffer_heap_element *map_range) DECLSPEC_HIDDEN;
- void wined3d_cs_init_object(struct wined3d_cs *cs,
- void (*callback)(void *object), void *object) DECLSPEC_HIDDEN;
- HRESULT wined3d_cs_map(struct wined3d_cs *cs, struct wined3d_resource *resource, unsigned int sub_resource_idx,
-@@ -3711,7 +3723,6 @@ enum wined3d_buffer_conversion_type
- CONV_POSITIONT,
- };
-
--struct wined3d_buffer_heap_element;
- struct wined3d_buffer_heap_fenced_element;
-
- // Number of power-of-two buckets to populate.
-@@ -3750,11 +3761,11 @@ HRESULT wined3d_buffer_heap_create(struct wined3d_context *context, GLsizeiptr s
- HRESULT wined3d_buffer_heap_destroy(struct wined3d_buffer_heap *heap, struct wined3d_context *context) DECLSPEC_HIDDEN;
- // Fetches a buffer from the heap of at least the given size.
- // Attempts to coalesce blocks under memory pressure.
--HRESULT wined3d_buffer_heap_alloc(struct wined3d_buffer_heap *heap, GLsizeiptr size, struct wined3d_map_range* out_range) DECLSPEC_HIDDEN;
-+HRESULT wined3d_buffer_heap_alloc(struct wined3d_buffer_heap *heap, GLsizeiptr size, struct wined3d_buffer_heap_element** out_elem) DECLSPEC_HIDDEN;
- // Immediately frees a heap-allocated buffer segment.
--HRESULT wined3d_buffer_heap_free(struct wined3d_buffer_heap *heap, struct wined3d_map_range range) DECLSPEC_HIDDEN;
-+HRESULT wined3d_buffer_heap_free(struct wined3d_buffer_heap *heap, struct wined3d_buffer_heap_element *elem) DECLSPEC_HIDDEN;
- // Enqueues a buffer segment to return to the heap once its fence has been signaled.
--HRESULT wined3d_buffer_heap_free_fenced(struct wined3d_buffer_heap *heap, struct wined3d_device *device, struct wined3d_map_range range) DECLSPEC_HIDDEN;
-+HRESULT wined3d_buffer_heap_free_fenced(struct wined3d_buffer_heap *heap, struct wined3d_device *device, struct wined3d_buffer_heap_element *elem) DECLSPEC_HIDDEN;
- // Issues a fence for the current set of pending fenced buffers.
- // Double-buffered: if the last fence issued has not yet been triggered, waits
- // on it.
-@@ -3793,8 +3804,8 @@ struct wined3d_buffer
-
- /* persistent mapped buffer */
- struct wined3d_buffer_heap *buffer_heap;
-- struct wined3d_map_range cs_persistent_map;
-- struct wined3d_map_range mt_persistent_map; // TODO: make struct list?
-+ struct wined3d_buffer_heap_element *cs_persistent_map;
-+ struct wined3d_buffer_heap_element *mt_persistent_map;
- };
-
- static inline struct wined3d_buffer *buffer_from_resource(struct wined3d_resource *resource)
---
-2.16.2
-
diff --git a/0008-wined3d-Add-DISABLE_PBA-envvar-some-PBA-cleanup.patch b/0008-wined3d-Add-DISABLE_PBA-envvar-some-PBA-cleanup.patch
deleted file mode 100644
index 4487d4b8c1d7..000000000000
--- a/0008-wined3d-Add-DISABLE_PBA-envvar-some-PBA-cleanup.patch
+++ /dev/null
@@ -1,211 +0,0 @@
-From 8af55b60fa87bb0fb21afd17e3467253b53e61a1 Mon Sep 17 00:00:00 2001
-From: Andrew Comminos <andrew@comminos.com>
-Date: Thu, 15 Mar 2018 21:07:21 -0700
-Subject: [PATCH 8/9] wined3d: Add DISABLE_PBA envvar, some PBA cleanup.
-
----
- dlls/wined3d/buffer.c | 4 ++--
- dlls/wined3d/buffer_heap.c | 34 ++++++++++++++++++++++++++--------
- dlls/wined3d/device.c | 38 ++++++++++++++++++++++++++------------
- dlls/wined3d/query.c | 2 +-
- dlls/wined3d/wined3d_private.h | 6 ++----
- 5 files changed, 57 insertions(+), 27 deletions(-)
-
-diff --git a/dlls/wined3d/buffer.c b/dlls/wined3d/buffer.c
-index 74b3ba8abd..651d9a4360 100644
---- a/dlls/wined3d/buffer.c
-+++ b/dlls/wined3d/buffer.c
-@@ -1603,9 +1603,9 @@ static HRESULT buffer_init(struct wined3d_buffer *buffer, struct wined3d_device
-
- if (buffer->resource.usage & WINED3DUSAGE_DYNAMIC)
- {
-- if (!gl_info->supported[ARB_BUFFER_STORAGE])
-+ if (!device->use_pba)
- {
-- WARN_(d3d_perf)("Not creating a persistent mapping for a dynamic buffer because ARB_buffer_storage is unsupported.\n");
-+ WARN_(d3d_perf)("Not creating a persistent mapping for dynamic buffer %p because the PBA is disabled.\n", buffer);
- }
- else if (bind_flags & WINED3D_BIND_SHADER_RESOURCE)
- {
-diff --git a/dlls/wined3d/buffer_heap.c b/dlls/wined3d/buffer_heap.c
-index 80670c515f..899aad9612 100644
---- a/dlls/wined3d/buffer_heap.c
-+++ b/dlls/wined3d/buffer_heap.c
-@@ -25,6 +25,9 @@
- WINE_DEFAULT_DEBUG_CHANNEL(d3d);
- WINE_DECLARE_DEBUG_CHANNEL(d3d_perf);
-
-+// Arbitrary binding to use when binding the persistent buffer.
-+#define BIND_TARGET GL_ARRAY_BUFFER
-+
- struct wined3d_buffer_heap_fenced_element
- {
- struct wined3d_buffer_heap_bin_set free_list;
-@@ -140,7 +143,6 @@ static int free_tree_compare(const void *key, const struct wine_rb_entry *entry)
- HRESULT wined3d_buffer_heap_create(struct wined3d_context *context, GLsizeiptr size, GLsizeiptr alignment, BOOL write_only, struct wined3d_buffer_heap **buffer_heap)
- {
- const struct wined3d_gl_info *gl_info = context->gl_info;
-- const GLenum buffer_target = GL_ARRAY_BUFFER;
- GLbitfield access_flags;
- GLbitfield storage_flags;
- struct wined3d_buffer_heap_element *initial_elem;
-@@ -162,22 +164,23 @@ HRESULT wined3d_buffer_heap_create(struct wined3d_context *context, GLsizeiptr s
- {
- access_flags |= GL_MAP_READ_BIT;
- }
-+
- storage_flags = GL_CLIENT_STORAGE_BIT | access_flags;
-
-- // TODO(acomminos): where should we be checking for errors here?
- GL_EXTCALL(glGenBuffers(1, &object->buffer_object));
-+ checkGLcall("glGenBuffers");
-
-- context_bind_bo(context, buffer_target, object->buffer_object);
-+ context_bind_bo(context, BIND_TARGET, object->buffer_object);
-
-- // TODO(acomminos): assert glBufferStorage supported?
-- GL_EXTCALL(glBufferStorage(buffer_target, size, NULL, storage_flags));
-+ GL_EXTCALL(glBufferStorage(BIND_TARGET, size, NULL, storage_flags));
-+ checkGLcall("glBufferStorage");
-
-- if (!(object->map_ptr = GL_EXTCALL(glMapBufferRange(buffer_target, 0, size, access_flags))))
-+ if (!(object->map_ptr = GL_EXTCALL(glMapBufferRange(BIND_TARGET, 0, size, access_flags))))
- {
- ERR("Couldn't map persistent buffer.\n");
- return -1; // FIXME(acomminos): proper error code, cleanup
- }
-- context_bind_bo(context, buffer_target, 0);
-+ context_bind_bo(context, BIND_TARGET, 0);
-
- object->fenced_head = object->fenced_tail = NULL;
- object->alignment = alignment;
-@@ -195,7 +198,22 @@ HRESULT wined3d_buffer_heap_create(struct wined3d_context *context, GLsizeiptr s
- /* Context activation is done by the caller. */
- HRESULT wined3d_buffer_heap_destroy(struct wined3d_buffer_heap *heap, struct wined3d_context *context)
- {
-- FIXME("Unimplemented, leaking buffer");
-+ const struct wined3d_gl_info *gl_info = context->gl_info;
-+
-+ context_bind_bo(context, BIND_TARGET, heap->buffer_object);
-+ GL_EXTCALL(glUnmapBuffer(BIND_TARGET));
-+ checkGLcall("glUnmapBuffer");
-+ context_bind_bo(context, BIND_TARGET, 0);
-+
-+ GL_EXTCALL(glDeleteBuffers(1, &heap->buffer_object));
-+ checkGLcall("glDeleteBuffers");
-+
-+ DeleteCriticalSection(&heap->temp_lock);
-+
-+ // TODO(acomminos): cleanup free lists, fenced list, etc.
-+
-+ HeapFree(GetProcessHeap(), 0, heap);
-+
- return WINED3D_OK;
- }
-
-diff --git a/dlls/wined3d/device.c b/dlls/wined3d/device.c
-index 785841a062..f4c9dc7bd6 100644
---- a/dlls/wined3d/device.c
-+++ b/dlls/wined3d/device.c
-@@ -837,16 +837,27 @@ static void destroy_default_samplers(struct wined3d_device *device, struct wined
- static void create_buffer_heap(struct wined3d_device *device, struct wined3d_context *context)
- {
- const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
-- // TODO(acomminos): kill this magic number. perhaps base on vram.
-- GLsizeiptr geo_heap_size = 512 * 1024 * 1024;
-- // We choose a constant buffer size of 128MB, the same as NVIDIA claims to
-- // use in their Direct3D driver for discarded constant buffers.
-- GLsizeiptr cb_heap_size = 128 * 1024 * 1024;
-- GLint ub_alignment;
-- HRESULT hr;
-+ BOOL use_pba = FALSE;
-+ char *env_pba_disable;
-
-- if (gl_info->supported[ARB_BUFFER_STORAGE])
-+ if (!gl_info->supported[ARB_BUFFER_STORAGE])
-+ {
-+ FIXME("Not using PBA, ARB_buffer_storage unsupported.\n");
-+ }
-+ else if ((env_pba_disable = getenv("PBA_DISABLE")) && *env_pba_disable != '0')
- {
-+ FIXME("Not using PBA, envvar 'PBA_DISABLE' set.\n");
-+ }
-+ else
-+ {
-+ // TODO(acomminos): kill this magic number. perhaps base on vram.
-+ GLsizeiptr geo_heap_size = 512 * 1024 * 1024;
-+ // We choose a constant buffer size of 128MB, the same as NVIDIA claims to
-+ // use in their Direct3D driver for discarded constant buffers.
-+ GLsizeiptr cb_heap_size = 128 * 1024 * 1024;
-+ GLint ub_alignment;
-+ HRESULT hr;
-+
- gl_info->gl_ops.gl.p_glGetIntegerv(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT, &ub_alignment);
-
- // Align constant buffer heap size, in case GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT isn't a power of two (for some reason).
-@@ -855,19 +866,22 @@ static void create_buffer_heap(struct wined3d_device *device, struct wined3d_con
- if (FAILED(hr = wined3d_buffer_heap_create(context, geo_heap_size, 0, TRUE, &device->wo_buffer_heap)))
- {
- ERR("Failed to create write-only persistent buffer heap, hr %#x.\n", hr);
-+ goto fail;
- }
-
- if (FAILED(hr = wined3d_buffer_heap_create(context, cb_heap_size, ub_alignment, TRUE, &device->cb_buffer_heap)))
- {
- ERR("Failed to create persistent buffer heap for constant buffers, hr %#x.\n", hr);
-+ goto fail;
- }
-
- FIXME("Initialized PBA (geo_heap_size: %ld, cb_heap_size: %ld, ub_align: %d)\n", geo_heap_size, cb_heap_size, ub_alignment);
-+
-+ use_pba = TRUE;
- }
-- else
-- {
-- FIXME("Not using PBA, ARB_buffer_storage unsupported.\n");
-- }
-+
-+fail:
-+ device->use_pba = use_pba;
- }
-
- /* Context activation is done by the caller. */
-diff --git a/dlls/wined3d/query.c b/dlls/wined3d/query.c
-index f3ca1630e5..5ea79b6e4a 100644
---- a/dlls/wined3d/query.c
-+++ b/dlls/wined3d/query.c
-@@ -88,7 +88,7 @@ static BOOL wined3d_fence_supported(const struct wined3d_gl_info *gl_info)
- return gl_info->supported[ARB_SYNC] || gl_info->supported[NV_FENCE] || gl_info->supported[APPLE_FENCE];
- }
-
--enum wined3d_fence_result wined3d_fence_test(const struct wined3d_fence *fence,
-+static enum wined3d_fence_result wined3d_fence_test(const struct wined3d_fence *fence,
- const struct wined3d_device *device, DWORD flags)
- {
- const struct wined3d_gl_info *gl_info;
-diff --git a/dlls/wined3d/wined3d_private.h b/dlls/wined3d/wined3d_private.h
-index 0114444943..63f004d57e 100644
---- a/dlls/wined3d/wined3d_private.h
-+++ b/dlls/wined3d/wined3d_private.h
-@@ -1713,9 +1713,6 @@
- void wined3d_fence_issue(struct wined3d_fence *fence, const struct wined3d_device *device) DECLSPEC_HIDDEN;
- enum wined3d_fence_result wined3d_fence_wait(const struct wined3d_fence *fence,
- const struct wined3d_device *device) DECLSPEC_HIDDEN;
--// XXX(acomminos): really expose this?
--enum wined3d_fence_result wined3d_fence_test(const struct wined3d_fence *fence,
-- const struct wined3d_device *device, DWORD flags) DECLSPEC_HIDDEN;
-
- /* Direct3D terminology with little modifications. We do not have an issued
- * state because only the driver knows about it, but we have a created state
-@@ -2943,6 +2940,7 @@
- BYTE inScene : 1; /* A flag to check for proper BeginScene / EndScene call pairs */
- BYTE softwareVertexProcessing : 1; /* process vertex shaders using software or hardware */
- BYTE filter_messages : 1;
-+ BYTE use_pba : 1; /* A flag to use the persistent buffer allocator for dynamic buffers. */
- BYTE padding : 3;
-
- unsigned char surface_alignment; /* Line Alignment of surfaces */
-
---
-2.16.2
-
diff --git a/0009-wined3d-Add-quirk-to-use-GL_CLIENT_STORAGE_BIT-for-m.patch b/0009-wined3d-Add-quirk-to-use-GL_CLIENT_STORAGE_BIT-for-m.patch
deleted file mode 100644
index 7d8bbb854e32..000000000000
--- a/0009-wined3d-Add-quirk-to-use-GL_CLIENT_STORAGE_BIT-for-m.patch
+++ /dev/null
@@ -1,96 +0,0 @@
-From a2326162cf4fb5601c0f296bfd1294a493912bce Mon Sep 17 00:00:00 2001
-From: Andrew Comminos <andrew@comminos.com>
-Date: Thu, 15 Mar 2018 21:22:06 -0700
-Subject: [PATCH 9/9] wined3d: Add quirk to use GL_CLIENT_STORAGE_BIT for mesa.
-
----
- dlls/wined3d/buffer_heap.c | 15 ++++++++++++++-
- dlls/wined3d/directx.c | 19 +++++++++++++++++++
- dlls/wined3d/wined3d_private.h | 1 +
- 3 files changed, 34 insertions(+), 1 deletion(-)
-
-diff --git a/dlls/wined3d/buffer_heap.c b/dlls/wined3d/buffer_heap.c
-index 899aad9612..9e8f2d799d 100644
---- a/dlls/wined3d/buffer_heap.c
-+++ b/dlls/wined3d/buffer_heap.c
-@@ -165,7 +165,20 @@ HRESULT wined3d_buffer_heap_create(struct wined3d_context *context, GLsizeiptr s
- access_flags |= GL_MAP_READ_BIT;
- }
-
-- storage_flags = GL_CLIENT_STORAGE_BIT | access_flags;
-+ storage_flags = access_flags;
-+ // FIXME(acomminos): So, about GL_CLIENT_STORAGE_BIT:
-+ // - On NVIDIA, DMA CACHED memory is used when this flag is set. SYSTEM HEAP
-+ // memory is used without it, which (in my testing) is much faster.
-+ // - On Mesa, GTT is used when this flag is set. This is what we want- we
-+ // upload to VRAM occur otherwise, which is unusably slow (on radeon).
-+ //
-+ // Thus, we're only going to set this on mesa for now.
-+ // Hints are awful anyway.
-+ if (gl_info->quirks & WINED3D_QUIRK_USE_CLIENT_STORAGE_BIT)
-+ {
-+ FIXME_(d3d_perf)("PBA: using GL_CLIENT_STORAGE_BIT quirk");
-+ storage_flags |= GL_CLIENT_STORAGE_BIT;
-+ }
-
- GL_EXTCALL(glGenBuffers(1, &object->buffer_object));
- checkGLcall("glGenBuffers");
-diff --git a/dlls/wined3d/directx.c b/dlls/wined3d/directx.c
-index 8789a501ec..f455ed54e1 100644
---- a/dlls/wined3d/directx.c
-+++ b/dlls/wined3d/directx.c
-@@ -947,6 +947,13 @@ static BOOL match_broken_viewport_subpixel_bits(const struct wined3d_gl_info *gl
- return !wined3d_caps_gl_ctx_test_viewport_subpixel_bits(ctx);
- }
-
-+static BOOL match_mesa(const struct wined3d_gl_info *gl_info, struct wined3d_caps_gl_ctx *ctx,
-+ const char *gl_renderer, enum wined3d_gl_vendor gl_vendor,
-+ enum wined3d_pci_vendor card_vendor, enum wined3d_pci_device device)
-+{
-+ return gl_vendor == GL_VENDOR_MESA;
-+}
-+
- static void quirk_apple_glsl_constants(struct wined3d_gl_info *gl_info)
- {
- /* MacOS needs uniforms for relative addressing offsets. This can accumulate to quite a few uniforms.
-@@ -1084,6 +1091,13 @@ static void quirk_broken_viewport_subpixel_bits(struct wined3d_gl_info *gl_info)
- }
- }
-
-+static void quirk_use_client_storage_bit(struct wined3d_gl_info *gl_info)
-+{
-+ // Using ARB_buffer_storage on Mesa requires the GL_CLIENT_STORAGE_BIT to be
-+ // set to use GTT for immutable buffers on radeon (see PIPE_USAGE_STREAM).
-+ gl_info->quirks |= WINED3D_QUIRK_USE_CLIENT_STORAGE_BIT;
-+}
-+
- struct driver_quirk
- {
- BOOL (*match)(const struct wined3d_gl_info *gl_info, struct wined3d_caps_gl_ctx *ctx,
-@@ -1180,6 +1194,11 @@ static const struct driver_quirk quirk_table[] =
- quirk_broken_viewport_subpixel_bits,
- "Nvidia viewport subpixel bits bug"
- },
-+ {
-+ match_mesa,
-+ quirk_use_client_storage_bit,
-+ "Use GL_CLIENT_STORAGE_BIT for persistent buffers on mesa",
-+ },
- };
-
- /* Certain applications (Steam) complain if we report an outdated driver version. In general,
-diff --git a/dlls/wined3d/wined3d_private.h b/dlls/wined3d/wined3d_private.h
-index 63f004d57e..96715261b1 100644
---- a/dlls/wined3d/wined3d_private.h
-+++ b/dlls/wined3d/wined3d_private.h
-@@ -75,6 +75,7 @@
- #define WINED3D_QUIRK_INFO_LOG_SPAM 0x00000080
- #define WINED3D_QUIRK_LIMITED_TEX_FILTERING 0x00000100
- #define WINED3D_QUIRK_BROKEN_ARB_FOG 0x00000200
-+#define WINED3D_QUIRK_USE_CLIENT_STORAGE_BIT 0x00000400
-
- enum wined3d_ffp_idx
- {
---
-2.16.2
-
diff --git a/PKGBUILD b/PKGBUILD
index fcbeb41471ae..e62a7852ed66 100644
--- a/PKGBUILD
+++ b/PKGBUILD
@@ -2,11 +2,10 @@
# Maintainer: Lars Norberg < arch-packages at cogwerkz dot org >
pkgname=wine-staging-pba-git
-pkgver=3.4.r3604.e50f0488+wine.3.4.r178.ge1c7a1f7ce+pba.r29.87307b1
+pkgver=3.4.r3607.5876a3f7+wine.3.4.r192.gd7430abd40
pkgrel=1
_winesrcdir='wine-git'
_stgsrcdir='wine-staging-git'
-_pbasrcdir='wine-pba'
pkgdesc='Wine staging branch with PBA patches for increased D3D performance. Git versions. (Also includes Path of Exile DX11 patch!)'
url='https://github.com/acomminos/wine-pba'
arch=('x86_64')
@@ -93,16 +92,7 @@ optdepends=(
)
source=("$_winesrcdir"::'git://source.winehq.org/git/wine.git'
"$_stgsrcdir"::'git+https://github.com/wine-staging/wine-staging.git'
- "$_pbasrcdir"::'git+https://github.com/acomminos/wine-pba.git'
- '0001-wined3d-Initial-implementation-of-a-persistent-mappe.patch'
- '0002-wined3d-Add-support-for-backing-dynamic-wined3d_buff.patch'
- '0003-wined3d-Use-ARB_multi_bind-to-speed-up-UBO-updates.patch'
- '0004-wined3d-Use-GL_CLIENT_STORAGE_BIT-for-persistent-map.patch'
- '0005-wined3d-Disable-persistently-mapped-shader-resource-.patch'
- '0006-wined3d-Perform-initial-allocation-of-persistent-buf.patch'
- '0007-wined3d-Avoid-freeing-persistent-buffer-heap-element.patch'
- '0008-wined3d-Add-DISABLE_PBA-envvar-some-PBA-cleanup.patch'
- '0009-wined3d-Add-quirk-to-use-GL_CLIENT_STORAGE_BIT-for-m.patch'
+ 'wine-staging-pba.patch'
'steam.patch'
'poe-fix.patch'
'harmony-fix.diff'
@@ -110,16 +100,7 @@ source=("$_winesrcdir"::'git://source.winehq.org/git/wine.git'
'wine-binfmt.conf')
sha256sums=('SKIP'
'SKIP'
- 'SKIP'
- 'f5f8c507f79c829b118125a3749f80ed31eb8ba8ad024d99554a1a6458c438eb'
- '98372adbb16949edca4c90604cceac5db3d4bf37eccc13d59d3e5735f53f2501'
- '112f8fc68d5421805fb1de32c0216c41412afae21153d803127c9d1c1103e35b'
- '016ee498c9ff7af0d14c7b0e42f4bc5255f5dae6d391fd36c2060668fcade662'
- 'ec11046f6335c2831e3b89c2b0c241b74974415a64523f35f0a606d27d1dbfbb'
- 'd2a8febc2500d6a7bed418232efedf82f114e7d14ca1199789abe576dddae90b'
- 'ff5ef40b945fdad16db99a1f736c20c53711cfe002d367ea4aa55d84bf6a1207'
- 'dee52666fc680b74f5d5ba1a2a74de715c7b49376895ff057ccada9daaef5911'
- '5c3776e5c94b51b368384c79aec9b26716fc6517935d782c121c856f21dfd223'
+ 'cc229607e417841d3e900cc93c3ab2f79c0851705a07e7206729193ffa3dc9db'
'972d6b114f7621c5f3bd34b1105dd390b318db18fbc76328001c984db488a9b0'
'1c8be30224a67c0f279ae1324165708371aad8f290ebc6da69c686d0904e606c'
'50ccb5bd2067e5d2739c5f7abcef11ef096aa246f5ceea11d2c3b508fc7f77a1'
@@ -141,13 +122,9 @@ pkgver() {
cd "${srcdir}/${_winesrcdir}"
local _wineVer="$(git describe --long --tags | sed 's/\([^-]*-g\)/r\1/;s/-/./g;s/^v//;s/\.rc/rc/')"
- # retrieve current wine-pba version
- cd "${srcdir}/${_pbasrcdir}"
- local _pbaVer=$( printf 'pba.r%s.%s' "$(git rev-list --count HEAD)" "$(git rev-parse --short HEAD)" )
-
# version string might be a bit over the top,
# but I want the build versions of all the 3 source repositories in it.
- printf '%s+%s+%s' "$_stagingVer" "$_wineVer" "$_pbaVer"
+ printf '%s+%s' "$_stagingVer" "$_wineVer"
}
prepare() {
@@ -196,18 +173,7 @@ prepare() {
"${srcdir}"/"${_stgsrcdir}"/patches/patchinstall.sh DESTDIR="${srcdir}/${_winesrcdir}" --all
# apply wine-pba patches
- #for _f in $(ls "${srcdir}"/"${_pbasrcdir}"/'patches'); do
- # patch -d "${srcdir}"/"${_winesrcdir}" -Np1 < "${srcdir}"/"${_pbasrcdir}"/'patches'/"${_f}"
- #done
- patch -Np1 < ../'0001-wined3d-Initial-implementation-of-a-persistent-mappe.patch'
- patch -Np1 < ../'0002-wined3d-Add-support-for-backing-dynamic-wined3d_buff.patch'
- patch -Np1 < ../'0003-wined3d-Use-ARB_multi_bind-to-speed-up-UBO-updates.patch'
- patch -Np1 < ../'0004-wined3d-Use-GL_CLIENT_STORAGE_BIT-for-persistent-map.patch'
- patch -Np1 < ../'0005-wined3d-Disable-persistently-mapped-shader-resource-.patch'
- patch -Np1 < ../'0006-wined3d-Perform-initial-allocation-of-persistent-buf.patch'
- patch -Np1 < ../'0007-wined3d-Avoid-freeing-persistent-buffer-heap-element.patch'
- patch -Np1 < ../'0008-wined3d-Add-DISABLE_PBA-envvar-some-PBA-cleanup.patch'
- patch -Np1 < ../'0009-wined3d-Add-quirk-to-use-GL_CLIENT_STORAGE_BIT-for-m.patch'
+ patch -Np1 < ../'wine-staging-pba.patch'
# fix path of opencl headers
sed 's|OpenCL/opencl.h|CL/opencl.h|g' -i configure*
diff --git a/wine-staging-pba.patch b/wine-staging-pba.patch
new file mode 100644
index 000000000000..050a7c3c4071
--- /dev/null
+++ b/wine-staging-pba.patch
@@ -0,0 +1,1574 @@
+diff -r -u --new-file a/dlls/wined3d/buffer.c b/dlls/wined3d/buffer.c
+--- a/dlls/wined3d/buffer.c 2018-03-28 23:57:50.342915285 +0200
++++ b/dlls/wined3d/buffer.c 2018-03-28 23:27:46.013998252 +0200
+@@ -28,12 +28,14 @@
+ #include "wined3d_private.h"
+
+ WINE_DEFAULT_DEBUG_CHANNEL(d3d);
++WINE_DECLARE_DEBUG_CHANNEL(d3d_perf);
+
+ #define WINED3D_BUFFER_HASDESC 0x01 /* A vertex description has been found. */
+ #define WINED3D_BUFFER_USE_BO 0x02 /* Use a buffer object for this buffer. */
+ #define WINED3D_BUFFER_PIN_SYSMEM 0x04 /* Keep a system memory copy for this buffer. */
+ #define WINED3D_BUFFER_DISCARD 0x08 /* A DISCARD lock has occurred since the last preload. */
+ #define WINED3D_BUFFER_APPLESYNC 0x10 /* Using sync as in GL_APPLE_flush_buffer_range. */
++#define WINED3D_BUFFER_PERSISTENT 0x20 /* Uses a persistent-mapped buffer via ARB_buffer_storage. */
+
+ #define VB_MAXDECLCHANGES 100 /* After that number of decl changes we stop converting */
+ #define VB_RESETDECLCHANGE 1000 /* Reset the decl changecount after that number of draws */
+@@ -269,6 +271,53 @@
+ return FALSE;
+ }
+
++/* Context activation is done by the caller. */
++static BOOL buffer_alloc_persistent_map(struct wined3d_buffer *buffer)
++{
++ struct wined3d_device *device = buffer->resource.device;
++ struct wined3d_buffer_heap *heap;
++ struct wined3d_buffer_heap_element *elem;
++ HRESULT hr;
++
++ if (buffer->bind_flags & WINED3D_BIND_CONSTANT_BUFFER)
++ {
++ // Use a heap aligned to constant buffer offset requirements.
++ heap = device->cb_buffer_heap;
++ }
++ else
++ {
++ if (!(buffer->resource.usage & WINED3DUSAGE_WRITEONLY))
++ FIXME("Using a write-only persistent buffer for %p without WINED3DUSAGE_WRITEONLY.\n", buffer);
++ heap = device->wo_buffer_heap;
++ }
++
++ buffer->buffer_heap = heap;
++ if (FAILED(hr = wined3d_buffer_heap_alloc(heap, buffer->resource.size, &elem)))
++ {
++ goto fail;
++ }
++ buffer->cs_persistent_map = elem;
++ buffer->mt_persistent_map = elem;
++ return TRUE;
++
++fail:
++ // FIXME(acomminos): fall back to standalone BO here?
++ ERR("Failed to create persistent map for buffer %p, hr=%x\n", buffer, hr);
++ buffer->buffer_heap = NULL;
++ return FALSE;
++}
++
++static void buffer_free_persistent_map(struct wined3d_buffer *buffer)
++{
++ if (!buffer->buffer_heap)
++ return;
++
++ // TODO(acomminos): get the CS thread to free pending main thread buffers.
++ wined3d_buffer_heap_free(buffer->buffer_heap, buffer->cs_persistent_map);
++ buffer->buffer_heap = NULL;
++}
++
++
+ static BOOL buffer_process_converted_attribute(struct wined3d_buffer *buffer,
+ const enum wined3d_buffer_conversion_type conversion_type,
+ const struct wined3d_stream_info_element *attrib, DWORD *stride_this_run)
+@@ -632,6 +681,17 @@
+ }
+ return buffer_create_buffer_object(buffer, context);
+
++ case WINED3D_LOCATION_PERSISTENT_MAP:
++ if (buffer->buffer_heap)
++ return TRUE;
++
++ if (!(buffer->flags & WINED3D_BUFFER_PERSISTENT))
++ {
++ WARN("Trying to map a persistent region for buffer %p without WINED3D_BUFFER_PERSISTENT.\n", buffer);
++ return FALSE;
++ }
++ return buffer_alloc_persistent_map(buffer);
++
+ default:
+ ERR("Invalid location %s.\n", wined3d_debug_location(location));
+ return FALSE;
+@@ -689,16 +749,32 @@
+ buffer_conversion_upload(buffer, context);
+ break;
+
++ case WINED3D_LOCATION_PERSISTENT_MAP:
++ // TODO(acomminos): are we guaranteed location_sysmem to be kept?
++ // no.
++ if (buffer->conversion_map)
++ FIXME("Attempting to use conversion map with persistent mapping.\n");
++ memcpy(buffer->buffer_heap->map_ptr +
++ buffer->cs_persistent_map->range.offset,
++ buffer->resource.heap_memory, buffer->resource.size);
++ break;
++
+ default:
+ ERR("Invalid location %s.\n", wined3d_debug_location(location));
+ return FALSE;
+ }
+
+ wined3d_buffer_validate_location(buffer, location);
+- if (buffer->resource.heap_memory && location == WINED3D_LOCATION_BUFFER
++ if (buffer->resource.heap_memory
++ && location & WINED3D_LOCATION_BUFFER
+ && !(buffer->resource.usage & WINED3DUSAGE_DYNAMIC))
+ wined3d_buffer_evict_sysmem(buffer);
+
++ // FIXME(acomminos)
++ if (buffer->resource.heap_memory
++ && location & WINED3D_LOCATION_PERSISTENT_MAP)
++ wined3d_buffer_evict_sysmem(buffer);
++
+ return TRUE;
+ }
+
+@@ -720,12 +796,26 @@
+ {
+ data->buffer_object = buffer->buffer_object;
+ data->addr = NULL;
++ data->length = buffer->resource.size;
+ return WINED3D_LOCATION_BUFFER;
+ }
++ if (locations & WINED3D_LOCATION_PERSISTENT_MAP)
++ {
++ // FIXME(acomminos): should we expose a buffer object we don't wholly own here?
++ data->buffer_object = buffer->buffer_heap->buffer_object;
++ data->addr = buffer->cs_persistent_map->range.offset;
++
++ // Note that the size of the underlying buffer allocation may be larger
++ // than the buffer knows about. In this case, we've rounded it up to be
++ // aligned (e.g. for uniform buffer offsets).
++ data->length = buffer->cs_persistent_map->range.size;
++ return WINED3D_LOCATION_PERSISTENT_MAP;
++ }
+ if (locations & WINED3D_LOCATION_SYSMEM)
+ {
+ data->buffer_object = 0;
+ data->addr = buffer->resource.heap_memory;
++ data->length = buffer->resource.size;
+ return WINED3D_LOCATION_SYSMEM;
+ }
+
+@@ -761,6 +851,8 @@
+ buffer->flags &= ~WINED3D_BUFFER_HASDESC;
+ }
+
++ buffer_free_persistent_map(buffer);
++
+ resource_unload(resource);
+ }
+
+@@ -784,6 +876,8 @@
+ heap_free(buffer->conversion_map);
+ }
+
++ buffer_free_persistent_map(buffer);
++
+ heap_free(buffer->maps);
+ heap_free(buffer);
+ }
+@@ -900,6 +994,16 @@
+
+ buffer_mark_used(buffer);
+
++ if (buffer->flags & WINED3D_BUFFER_PERSISTENT)
++ {
++ if (wined3d_buffer_load_location(buffer, context, WINED3D_LOCATION_PERSISTENT_MAP))
++ return;
++
++ ERR("Failed to preload persistent mapping for %p, falling back to BO.\n", buffer);
++ buffer->flags |= WINED3D_BUFFER_USE_BO;
++ buffer->flags &= ~WINED3D_BUFFER_PERSISTENT;
++ }
++
+ /* TODO: Make converting independent from VBOs */
+ if (!(buffer->flags & WINED3D_BUFFER_USE_BO))
+ {
+@@ -1010,6 +1114,25 @@
+
+ count = ++buffer->resource.map_count;
+
++ if (buffer->locations & WINED3D_LOCATION_PERSISTENT_MAP)
++ {
++ const struct wined3d_gl_info *gl_info;
++ context = context_acquire(device, NULL, 0);
++
++ FIXME_(d3d_perf)("Fences not used for persistent buffer maps on CS thread, using glFinish (flags: %x)\n", flags);
++
++ gl_info = context->gl_info;
++ gl_info->gl_ops.gl.p_glFinish();
++
++ base = buffer->buffer_heap->map_ptr
++ + buffer->cs_persistent_map->range.offset;
++ *data = base + offset;
++
++ context_release(context);
++
++ return WINED3D_OK;
++ }
++
+ if (buffer->buffer_object)
+ {
+ unsigned int dirty_offset = offset, dirty_size = size;
+@@ -1152,6 +1275,12 @@
+ return;
+ }
+
++ if (buffer->flags & WINED3D_BUFFER_PERSISTENT)
++ {
++ TRACE("Persistent buffer, ignore unmap.\n");
++ return;
++ }
++
+ if (buffer->map_ptr)
+ {
+ struct wined3d_device *device = buffer->resource.device;
+@@ -1256,6 +1385,73 @@
+ struct wined3d_map_desc *map_desc, const struct wined3d_box *box, DWORD flags)
+ {
+ struct wined3d_buffer *buffer = buffer_from_resource(resource);
++ UINT offset = box ? box->left : 0;
++
++ if (sub_resource_idx)
++ {
++ WARN("Invalid sub_resource_idx %u.\n", sub_resource_idx);
++ return E_INVALIDARG;
++ }
++
++ // Support immediate mapping of persistent buffers off the command thread,
++ // which require no GL calls to interface with.
++ if (buffer->flags & WINED3D_BUFFER_PERSISTENT)
++ {
++ // Attempt to load a persistent map without syncing, if possible.
++ if (!(buffer->locations & WINED3D_LOCATION_PERSISTENT_MAP))
++ {
++ wined3d_resource_wait_idle(resource);
++ if (!buffer_alloc_persistent_map(buffer))
++ {
++ ERR_(d3d_perf)("Failed to allocate persistent buffer, falling back to sync path.");
++ return E_FAIL;
++ }
++ wined3d_buffer_validate_location(buffer, WINED3D_LOCATION_PERSISTENT_MAP);
++ }
++
++ map_desc->row_pitch = map_desc->slice_pitch = buffer->desc.byte_width;
++ if (flags & WINED3D_MAP_DISCARD)
++ {
++ HRESULT hr;
++ struct wined3d_buffer_heap_element *mt_elem;
++ if (FAILED(hr = wined3d_buffer_heap_alloc(buffer->buffer_heap, resource->size, &mt_elem)))
++ {
++ FIXME_(d3d_perf)("Failed to allocate new buffer, falling back to sync path.\n");
++ return hr;
++ }
++ map_desc->data = buffer->buffer_heap->map_ptr + mt_elem->range.offset + offset;
++ resource->map_count++;
++
++ buffer->mt_persistent_map = mt_elem;
++
++ // Discard handler on CSMT thread is responsible for returning the
++ // currently used buffer to the free pool, along with the fence that
++ // must be called before the buffer can be reused.
++ wined3d_cs_emit_discard_buffer(resource->device->cs, buffer, mt_elem);
++
++ return WINED3D_OK;
++ }
++ else if (flags & WINED3D_MAP_NOOVERWRITE)
++ {
++ // Allow immediate access for persistent buffers without a fence.
++ // Always use the latest buffer in this case in case the latest
++ // DISCARDed one hasn't reached the command stream yet.
++ struct wined3d_map_range map_range = buffer->mt_persistent_map->range;
++ map_desc->data = buffer->buffer_heap->map_ptr + map_range.offset + offset;
++ resource->map_count++;
++ return WINED3D_OK;
++ }
++
++ WARN_(d3d_perf)("Mapping persistent buffer %p in sync with CS thread.\n", buffer);
++ }
++
++ return E_NOTIMPL;
++}
++
++static HRESULT buffer_resource_sub_resource_map_cs(struct wined3d_resource *resource, unsigned int sub_resource_idx,
++ struct wined3d_map_desc *map_desc, const struct wined3d_box *box, DWORD flags)
++{
++ struct wined3d_buffer *buffer = buffer_from_resource(resource);
+ UINT offset, size;
+
+ if (sub_resource_idx)
+@@ -1298,6 +1494,18 @@
+
+ static HRESULT buffer_resource_sub_resource_unmap(struct wined3d_resource *resource, unsigned int sub_resource_idx)
+ {
++ struct wined3d_buffer *buffer = buffer_from_resource(resource);
++ if (buffer->locations & WINED3D_LOCATION_PERSISTENT_MAP)
++ {
++ // Nothing to be done to unmap a region of a persistent buffer.
++ resource->map_count--;
++ return WINED3D_OK;
++ }
++ return E_NOTIMPL;
++}
++
++static HRESULT buffer_resource_sub_resource_unmap_cs(struct wined3d_resource *resource, unsigned int sub_resource_idx)
++{
+ if (sub_resource_idx)
+ {
+ WARN("Invalid sub_resource_idx %u.\n", sub_resource_idx);
+@@ -1317,6 +1525,8 @@
+ buffer_resource_sub_resource_map,
+ buffer_resource_sub_resource_map_info,
+ buffer_resource_sub_resource_unmap,
++ buffer_resource_sub_resource_map_cs,
++ buffer_resource_sub_resource_unmap_cs,
+ };
+
+ static GLenum buffer_type_hint_from_bind_flags(const struct wined3d_gl_info *gl_info,
+@@ -1392,12 +1602,34 @@
+ buffer->flags |= WINED3D_BUFFER_PIN_SYSMEM;
+ }
+
++ if (buffer->resource.usage & WINED3DUSAGE_DYNAMIC)
++ {
++ if (!device->use_pba)
++ {
++ WARN_(d3d_perf)("Not creating a persistent mapping for dynamic buffer %p because the PBA is disabled.\n", buffer);
++ }
++ else if (bind_flags & WINED3D_BIND_SHADER_RESOURCE)
++ {
++ FIXME_(d3d_perf)("Not using a persistent mapping for shader resource buffer %p (unimplemented)\n", buffer);
++ }
++ else
++ {
++ // If supported, use persistent mapped buffers instead of a
++ // standalone BO for dynamic buffers.
++ buffer->flags |= WINED3D_BUFFER_PERSISTENT;
++ }
++ }
++
+ /* Observations show that draw_primitive_immediate_mode() is faster on
+ * dynamic vertex buffers than converting + draw_primitive_arrays().
+ * (Half-Life 2 and others.) */
+ dynamic_buffer_ok = gl_info->supported[APPLE_FLUSH_BUFFER_RANGE] || gl_info->supported[ARB_MAP_BUFFER_RANGE];
+
+- if (!gl_info->supported[ARB_VERTEX_BUFFER_OBJECT])
++ if (buffer->flags & WINED3D_BUFFER_PERSISTENT)
++ {
++ TRACE("Not creating a BO because a persistent mapped buffer will be used.\n");
++ }
++ else if (!gl_info->supported[ARB_VERTEX_BUFFER_OBJECT])
+ {
+ TRACE("Not creating a BO because GL_ARB_vertex_buffer is not supported.\n");
+ }
+diff -r -u --new-file a/dlls/wined3d/buffer_heap.c b/dlls/wined3d/buffer_heap.c
+--- a/dlls/wined3d/buffer_heap.c 1970-01-01 01:00:00.000000000 +0100
++++ b/dlls/wined3d/buffer_heap.c 2018-03-28 23:35:39.595966143 +0200
+@@ -0,0 +1,530 @@
++/*
++ * Copyright 2018 Andrew Comminos
++ *
++ * This library is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU Lesser General Public
++ * License as published by the Free Software Foundation; either
++ * version 2.1 of the License, or (at your option) any later version.
++ *
++ * This library is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
++ * Lesser General Public License for more details.
++ *
++ * You should have received a copy of the GNU Lesser General Public
++ * License along with this library; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
++ *
++ */
++
++#include "config.h"
++#include "wine/port.h"
++#include "wine/rbtree.h"
++#include "wined3d_private.h"
++
++WINE_DEFAULT_DEBUG_CHANNEL(d3d);
++WINE_DECLARE_DEBUG_CHANNEL(d3d_perf);
++
++// Arbitrary binding to use when binding the persistent buffer.
++#define BIND_TARGET GL_ARRAY_BUFFER
++
++
++struct wined3d_buffer_heap_fenced_element
++{
++ struct wined3d_buffer_heap_bin_set free_list;
++ struct wined3d_fence *fence;
++
++ struct wined3d_buffer_heap_fenced_element *next;
++};
++
++static struct wined3d_buffer_heap_element* element_new(GLsizei offset, GLsizei size)
++{
++ struct wined3d_buffer_heap_element* elem;
++ elem = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(struct wined3d_buffer_heap_element));
++ if (!elem)
++ return NULL;
++ elem->range.offset = offset;
++ elem->range.size = size;
++ return elem;
++}
++
++static inline int bitwise_log2_floor(GLsizei size)
++{
++ // XXX(acomminos): I hope this gets unrolled.
++ for (int i = 8 * sizeof(GLsizei) - 1; i >= 0; i--)
++ {
++ if ((size >> i) & 1) {
++ return i;
++ }
++ }
++ return 0;
++}
++
++static inline int bitwise_log2_ceil(GLsizei size)
++{
++ // Add one to the floor of size if size isn't a power of two.
++ return bitwise_log2_floor(size) + !!(size & (size - 1));
++}
++
++static int element_bin(struct wined3d_buffer_heap_element *elem)
++{
++ return min(WINED3D_BUFFER_HEAP_BINS - 1, bitwise_log2_floor(elem->range.size));
++}
++
++// Inserts an element into the appropriate free list bin.
++static void element_insert_free_bin(struct wined3d_buffer_heap *heap, struct wined3d_buffer_heap_element *elem)
++{
++ if (elem->prev || elem->next)
++ {
++ ERR("Element %p in already in a free list (for some reason).\n", elem);
++ }
++
++ int bin = element_bin(elem);
++
++ elem->prev = NULL;
++ elem->next = heap->free_list.bins[bin].head;
++ if (heap->free_list.bins[bin].head)
++ heap->free_list.bins[bin].head->prev = elem;
++ heap->free_list.bins[bin].head = elem;
++
++ if (!heap->free_list.bins[bin].tail)
++ heap->free_list.bins[bin].tail = elem;
++
++ TRACE("Inserted allocation at %p of size %lld into bin %d\n", elem->range.offset, elem->range.size, bin);
++}
++
++// Removes an element from the free tree, its bin, and the coalesce list.
++static void element_remove_free(struct wined3d_buffer_heap *heap, struct wined3d_buffer_heap_element *elem)
++{
++ int bin = element_bin(elem);
++
++ if (elem->prev)
++ elem->prev->next = elem->next;
++
++ if (elem->next)
++ elem->next->prev = elem->prev;
++
++ if (elem == heap->free_list.bins[bin].head)
++ heap->free_list.bins[bin].head = elem->next;
++
++ if (elem == heap->free_list.bins[bin].tail)
++ heap->free_list.bins[bin].tail = elem->prev;
++
++ elem->prev = NULL;
++ elem->next = NULL;
++
++ TRACE("Freed allocation at %p of size %lld from bin %d\n", elem->range.offset, elem->range.size, bin);
++}
++
++static struct wined3d_buffer_heap_fenced_element* fenced_element_new(struct wined3d_buffer_heap_bin_set bins, struct wined3d_fence* fence)
++{
++ struct wined3d_buffer_heap_fenced_element* elem;
++ elem = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(struct wined3d_buffer_heap_fenced_element));
++ if (!elem)
++ return NULL;
++ elem->free_list = bins;
++ elem->fence = fence;
++ elem->next = NULL;
++ return elem;
++}
++
++static int free_tree_compare(const void *key, const struct wine_rb_entry *entry)
++{
++ const GLsizei offset = *(const GLsizei*) key;
++ struct wined3d_buffer_heap_element *elem = WINE_RB_ENTRY_VALUE(entry, struct wined3d_buffer_heap_element, entry);
++
++ if (offset < elem->range.offset)
++ return -1;
++ if (offset > elem->range.offset)
++ return 1;
++ return 0;
++}
++
++/* Context activation is done by the caller. */
++HRESULT wined3d_buffer_heap_create(struct wined3d_context *context, GLsizeiptr size, GLsizeiptr alignment, BOOL write_only, struct wined3d_buffer_heap **buffer_heap)
++{
++ const struct wined3d_gl_info *gl_info = context->gl_info;
++ GLbitfield access_flags;
++ GLbitfield storage_flags;
++ struct wined3d_buffer_heap_element *initial_elem;
++
++ struct wined3d_buffer_heap *object;
++
++ if ((alignment & (alignment - 1)) != 0)
++ {
++ return E_FAIL;
++ }
++
++ if (!(object = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(*object))))
++ {
++ return E_OUTOFMEMORY;
++ }
++
++ access_flags = GL_MAP_PERSISTENT_BIT | GL_MAP_COHERENT_BIT | GL_MAP_WRITE_BIT;
++ if (!write_only)
++ {
++ access_flags |= GL_MAP_READ_BIT;
++ }
++ storage_flags = access_flags;
++ // FIXME(acomminos): So, about GL_CLIENT_STORAGE_BIT:
++ // - On NVIDIA, DMA CACHED memory is used when this flag is set. SYSTEM HEAP
++ // memory is used without it, which (in my testing) is much faster.
++ // - On Mesa, GTT is used when this flag is set. This is what we want- we
++ // upload to VRAM occur otherwise, which is unusably slow (on radeon).
++ //
++ // Thus, we're only going to set this on mesa for now.
++ // Hints are awful anyway.
++ if (gl_info->quirks & WINED3D_QUIRK_USE_CLIENT_STORAGE_BIT)
++ {
++ FIXME_(d3d_perf)("PBA: using GL_CLIENT_STORAGE_BIT quirk");
++ storage_flags |= GL_CLIENT_STORAGE_BIT;
++ }
++
++ GL_EXTCALL(glGenBuffers(1, &object->buffer_object));
++ checkGLcall("glGenBuffers");
++
++ context_bind_bo(context, BIND_TARGET, object->buffer_object);
++
++ GL_EXTCALL(glBufferStorage(BIND_TARGET, size, NULL, storage_flags));
++ checkGLcall("glBufferStorage");
++
++ if (!(object->map_ptr = GL_EXTCALL(glMapBufferRange(BIND_TARGET, 0, size, access_flags))))
++ {
++ ERR("Couldn't map persistent buffer.\n");
++ return -1; // FIXME(acomminos): proper error code, cleanup
++ }
++ context_bind_bo(context, BIND_TARGET, 0);
++
++ object->fenced_head = object->fenced_tail = NULL;
++ object->alignment = alignment;
++ InitializeCriticalSection(&object->temp_lock);
++
++ initial_elem = element_new(0, size);
++ // Don't bother adding the initial allocation to the coalescing tree.
++ element_insert_free_bin(object, initial_elem);
++
++ *buffer_heap = object;
++
++ return WINED3D_OK;
++}
++
++/* Context activation is done by the caller. */
++HRESULT wined3d_buffer_heap_destroy(struct wined3d_buffer_heap *heap, struct wined3d_context *context)
++{
++ const struct wined3d_gl_info *gl_info = context->gl_info;
++
++ context_bind_bo(context, BIND_TARGET, heap->buffer_object);
++ GL_EXTCALL(glUnmapBuffer(BIND_TARGET));
++ checkGLcall("glUnmapBuffer");
++ context_bind_bo(context, BIND_TARGET, 0);
++
++ GL_EXTCALL(glDeleteBuffers(1, &heap->buffer_object));
++ checkGLcall("glDeleteBuffers");
++
++ DeleteCriticalSection(&heap->temp_lock);
++
++ // TODO(acomminos): cleanup free lists, fenced list, etc.
++
++ HeapFree(GetProcessHeap(), 0, heap);
++
++ return WINED3D_OK;
++}
++
++HRESULT wined3d_buffer_heap_alloc(struct wined3d_buffer_heap *heap, GLsizeiptr size, struct wined3d_buffer_heap_element **out_elem)
++{
++ int initial_bin;
++ int initial_size = size;
++
++ EnterCriticalSection(&heap->temp_lock);
++
++ // After alignment, reduce fragmentation by rounding to next power of two.
++ // If the alignment is a power of two (which it should be), this should be
++ // no problem.
++ size = 1 << bitwise_log2_ceil(size);
++
++ // Align size values where possible.
++ if (heap->alignment && (size % heap->alignment != 0))
++ size += heap->alignment - (size % heap->alignment);
++
++ initial_bin = min(WINED3D_BUFFER_HEAP_BINS - 1, bitwise_log2_ceil(size));
++
++ for (int i = initial_bin; i < WINED3D_BUFFER_HEAP_BINS; i++)
++ {
++ struct wined3d_buffer_heap_element *elem = heap->free_list.bins[i].head;
++ if (elem)
++ {
++ struct wined3d_map_range remaining_range;
++ remaining_range.offset = elem->range.offset + size;
++ remaining_range.size = elem->range.size - size;
++
++ // Take the element from the free list, transferring ownership to
++ // the caller.
++ element_remove_free(heap, elem);
++
++ // Resize the element so that we can free the remainder.
++ elem->range.size = size;
++ *out_elem = elem;
++
++ TRACE_(d3d_perf)("Allocated %d (requested %d) at %p from bin %d (initial %d)\n", size, initial_size, elem->range.offset, i, initial_bin);
++
++ if (remaining_range.size > 0)
++ {
++ struct wined3d_buffer_heap_element *remaining_elem;
++
++ TRACE_(d3d_perf)("Imperfect fit allocated, fragmenting remainder of %lld at %p.\n", remaining_range.size, remaining_range.offset);
++
++ remaining_elem = element_new(remaining_range.offset, remaining_range.size);
++ element_insert_free_bin(heap, remaining_elem);
++ }
++
++ LeaveCriticalSection(&heap->temp_lock);
++ return WINED3D_OK;
++ }
++ }
++
++ LeaveCriticalSection(&heap->temp_lock);
++
++ FIXME_(d3d_perf)("Forcing coalesce, not enough free space in buffer heap.\n");
++ int num_coalesced;
++ if (SUCCEEDED(wined3d_buffer_heap_deferred_coalesce(heap, &num_coalesced)))
++ {
++ if (num_coalesced > 0)
++ return wined3d_buffer_heap_alloc(heap, size, out_elem);
++ }
++
++ FIXME_(d3d_perf)("Coalescing did not create new blocks, failing.\n");
++
++ return WINED3DERR_OUTOFVIDEOMEMORY;
++}
++
++HRESULT wined3d_buffer_heap_free(struct wined3d_buffer_heap *heap, struct wined3d_buffer_heap_element *elem)
++{
++ EnterCriticalSection(&heap->temp_lock);
++
++ // Only insert the element into a free bin, coalescing will occur later.
++ //
++ // Note that the reason that we pass around wined3d_buffer_heap_element
++ // instead of a range is to avoid frequent HeapAlloc/HeapFree operations
++ // when we're reusing buffers.
++ element_insert_free_bin(heap, elem);
++
++ LeaveCriticalSection(&heap->temp_lock);
++
++ return WINED3D_OK;
++}
++
++HRESULT wined3d_buffer_heap_free_fenced(struct wined3d_buffer_heap *heap, struct wined3d_device *device, struct wined3d_buffer_heap_element *elem)
++{
++ int bin_index = element_bin(elem);
++ struct wined3d_buffer_heap_bin *bin = &heap->pending_fenced_bins.bins[bin_index];
++
++ if (bin->tail)
++ {
++ bin->tail->next = elem;
++ elem->prev = bin->tail;
++ bin->tail = elem;
++ }
++ else
++ {
++ bin->head = elem;
++ bin->tail = elem;
++ }
++
++ return WINED3D_OK;
++}
++
++HRESULT wined3d_buffer_heap_cs_fence_issue(struct wined3d_buffer_heap *heap, struct wined3d_device *device)
++{
++ struct wined3d_buffer_heap_fenced_element *fenced_elem;
++ struct wined3d_fence *fence;
++ HRESULT hr;
++
++ if (heap->fenced_head)
++ {
++ // XXX(acomminos): double or triple buffer this?
++ wined3d_buffer_heap_cs_fence_wait(heap, device);
++ }
++
++ if (FAILED(hr = wined3d_fence_create(device, &fence)))
++ {
++ ERR("Failed to create fence.\n");
++ return hr;
++ }
++
++ fenced_elem = fenced_element_new(heap->pending_fenced_bins, fence);
++ if (!fenced_elem)
++ return E_OUTOFMEMORY;
++
++ TRACE_(d3d_perf)("Dispatching fenced buffer set.\n");
++ memset(&heap->pending_fenced_bins, 0, sizeof(heap->pending_fenced_bins));
++
++ // Append to end of fenced list, which works well if you assume that buffers
++ // are freed in some ascending draw call ordering.
++ if (!heap->fenced_head)
++ {
++ heap->fenced_head = fenced_elem;
++ heap->fenced_tail = fenced_elem;
++ }
++ else
++ {
++ heap->fenced_tail->next = fenced_elem;
++ heap->fenced_tail = fenced_elem;
++ }
++
++ wined3d_fence_issue(fence, device);
++ return WINED3D_OK;
++}
++
++HRESULT wined3d_buffer_heap_cs_fence_wait(struct wined3d_buffer_heap *heap, struct wined3d_device *device)
++{
++ enum wined3d_fence_result res;
++ struct wined3d_buffer_heap_fenced_element *elem = heap->fenced_head;
++ if (!elem)
++ return WINED3D_OK;
++
++ res = wined3d_fence_wait(elem->fence, device);
++ switch (res)
++ {
++ case WINED3D_FENCE_OK:
++ case WINED3D_FENCE_NOT_STARTED:
++ {
++ TRACE_(d3d_perf)("Freed fence group.\n");
++
++ EnterCriticalSection(&heap->temp_lock);
++ for (int i = 0; i < WINED3D_BUFFER_HEAP_BINS; i++)
++ {
++ struct wined3d_buffer_heap_bin *elem_bin = &elem->free_list.bins[i];
++ if (!elem_bin->tail)
++ continue;
++
++ struct wined3d_buffer_heap_bin *heap_bin = &heap->free_list.bins[i];
++ if (heap_bin->head)
++ {
++ // Insert to front.
++ elem_bin->tail->next = heap_bin->head;
++ heap_bin->head->prev = elem_bin->tail;
++
++ elem_bin->head->prev = NULL;
++ heap_bin->head = elem_bin->head;
++ }
++ else
++ {
++ elem_bin->head->prev = NULL;
++ heap_bin->head = elem_bin->head;
++ elem_bin->tail->next = NULL;
++ heap_bin->tail = elem_bin->tail;
++ }
++ }
++ LeaveCriticalSection(&heap->temp_lock);
++
++ wined3d_fence_destroy(elem->fence);
++
++ heap->fenced_head = elem->next;
++ HeapFree(GetProcessHeap(), 0, elem);
++ // TODO(acomminos): bother to null out fenced_tail?
++ break;
++ }
++ default:
++ return WINED3D_OK;
++ }
++
++ return WINED3D_OK;
++}
++
++HRESULT wined3d_buffer_heap_deferred_coalesce(struct wined3d_buffer_heap *heap, int *coalesced_count)
++{
++ struct wined3d_buffer_heap_element *elem = NULL;
++ struct wined3d_buffer_heap_element *next = NULL;
++ struct wine_rb_entry *entry;
++ struct wined3d_map_range coalesced_range;
++
++ struct wine_rb_tree free_tree;
++ int num_coalesced = 0;
++
++ wine_rb_init(&free_tree, free_tree_compare);
++
++ EnterCriticalSection(&heap->temp_lock);
++
++ // TODO(acomminos): on one hand, if there's a lot of elements in the list,
++ // it's highly fragmented. on the other, we can potentially waste a decent
++ // sum of time checking for uncoalesced bins.
++ for (int i = 0; i < WINED3D_BUFFER_HEAP_BINS; i++)
++ {
++ elem = heap->free_list.bins[i].head;
++ while (elem)
++ {
++ // Insert a sentry. FIXME(acomminos): can skip this with traversal.
++ if (wine_rb_put(&free_tree, &elem->range.offset, &elem->entry) == -1)
++ {
++ ERR("Failed to insert key %x in tree.\n", elem->range.offset);
++ elem = elem->next;
++ continue;
++ }
++
++ coalesced_range = elem->range;
++
++ // Coalesce right.
++ entry = wine_rb_next(&elem->entry);
++ if (entry)
++ {
++ TRACE("Coalesced right.\n");
++ struct wined3d_buffer_heap_element *right_elem = WINE_RB_ENTRY_VALUE(entry, struct wined3d_buffer_heap_element, entry);
++ if (elem->range.offset + elem->range.size == right_elem->range.offset)
++ {
++ coalesced_range.size += right_elem->range.size;
++
++ wine_rb_remove(&free_tree, entry);
++ element_remove_free(heap, right_elem);
++ HeapFree(GetProcessHeap(), 0, right_elem);
++
++ num_coalesced++;
++ }
++ }
++
++ // Coalesce left.
++ entry = wine_rb_prev(&elem->entry);
++ if (entry)
++ {
++ TRACE("Coalesced left.\n");
++ struct wined3d_buffer_heap_element *left_elem = WINE_RB_ENTRY_VALUE(entry, struct wined3d_buffer_heap_element, entry);
++ if (left_elem->range.offset + left_elem->range.size == coalesced_range.offset)
++ {
++ coalesced_range.offset = left_elem->range.offset;
++ coalesced_range.size += left_elem->range.size;
++
++ wine_rb_remove(&free_tree, entry);
++ element_remove_free(heap, left_elem);
++ HeapFree(GetProcessHeap(), 0, left_elem);
++
++ num_coalesced++;
++ }
++ }
++
++ next = elem->next;
++
++ if (elem->range.size != coalesced_range.size)
++ {
++ FIXME_(d3d_perf)("Coalesced range from (%p, %ld) to (%p, %ld)\n", elem->range.offset, elem->range.size, coalesced_range.offset, coalesced_range.size);
++
++ wine_rb_remove(&free_tree, &elem->entry);
++
++ // Move to the correct free bin.
++ element_remove_free(heap, elem);
++ elem->range = coalesced_range;
++ element_insert_free_bin(heap, elem);
++
++ wine_rb_put(&free_tree, &elem->range.offset, &elem->entry);
++ }
++
++ elem = next;
++ }
++ }
++
++ LeaveCriticalSection(&heap->temp_lock);
++
++ FIXME_(d3d_perf)("Performed %d coalesces.\n", num_coalesced);
++ if (coalesced_count)
++ *coalesced_count = num_coalesced;
++
++ return WINED3D_OK;
++}
+\ No newline at end of file
+diff -r -u --new-file a/dlls/wined3d/context.c b/dlls/wined3d/context.c
+--- a/dlls/wined3d/context.c 2018-03-28 23:57:59.979900010 +0200
++++ b/dlls/wined3d/context.c 2018-03-28 23:22:42.726661696 +0200
+@@ -4956,7 +4956,11 @@
+ if (parameters->indexed)
+ {
+ struct wined3d_buffer *index_buffer = state->index_buffer;
+- if (!index_buffer->buffer_object || !stream_info->all_vbo)
++ if (index_buffer->cs_persistent_map)
++ {
++ idx_data = index_buffer->cs_persistent_map->range.offset;
++ }
++ else if (!index_buffer->buffer_object || !stream_info->all_vbo)
+ {
+ idx_data = index_buffer->resource.heap_memory;
+ }
+diff -r -u --new-file a/dlls/wined3d/cs.c b/dlls/wined3d/cs.c
+--- a/dlls/wined3d/cs.c 2018-03-28 23:57:59.780900325 +0200
++++ b/dlls/wined3d/cs.c 2018-03-28 23:23:49.308515873 +0200
+@@ -73,6 +73,7 @@
+ WINED3D_CS_OP_CLEAR_UNORDERED_ACCESS_VIEW,
+ WINED3D_CS_OP_COPY_UAV_COUNTER,
+ WINED3D_CS_OP_GENERATE_MIPMAPS,
++ WINED3D_CS_OP_DISCARD_BUFFER,
+ WINED3D_CS_OP_STOP,
+ };
+
+@@ -437,6 +438,13 @@
+ struct wined3d_shader_resource_view *view;
+ };
+
++struct wined3d_cs_discard_buffer
++{
++ enum wined3d_cs_op opcode;
++ struct wined3d_buffer *buffer;
++ struct wined3d_buffer_heap_element *map_range;
++};
++
+ struct wined3d_cs_stop
+ {
+ enum wined3d_cs_op opcode;
+@@ -465,6 +473,15 @@
+ }
+
+ InterlockedDecrement(&cs->pending_presents);
++
++ // FIXME(acomminos): is this the right place to put double-buffered frame
++ // timing based logic?
++ // FIXME(acomminos): this conditional sucks, replace with fancier feature check
++ if (cs->device->wo_buffer_heap && cs->device->cb_buffer_heap)
++ {
++ wined3d_buffer_heap_cs_fence_issue(cs->device->wo_buffer_heap, cs->device);
++ wined3d_buffer_heap_cs_fence_issue(cs->device->cb_buffer_heap, cs->device);
++ }
+ }
+
+ void wined3d_cs_emit_present(struct wined3d_cs *cs, struct wined3d_swapchain *swapchain,
+@@ -1984,7 +2001,7 @@
+ const struct wined3d_cs_map *op = data;
+ struct wined3d_resource *resource = op->resource;
+
+- *op->hr = resource->resource_ops->resource_sub_resource_map(resource,
++ *op->hr = resource->resource_ops->resource_sub_resource_map_cs(resource,
+ op->sub_resource_idx, op->map_desc, op->box, op->flags);
+ }
+
+@@ -2018,7 +2035,7 @@
+ const struct wined3d_cs_unmap *op = data;
+ struct wined3d_resource *resource = op->resource;
+
+- *op->hr = resource->resource_ops->resource_sub_resource_unmap(resource, op->sub_resource_idx);
++ *op->hr = resource->resource_ops->resource_sub_resource_unmap_cs(resource, op->sub_resource_idx);
+ }
+
+ HRESULT wined3d_cs_unmap(struct wined3d_cs *cs, struct wined3d_resource *resource, unsigned int sub_resource_idx)
+@@ -2417,6 +2434,53 @@
+ cs->ops->submit(cs, WINED3D_CS_QUEUE_DEFAULT);
+ }
+
++static void wined3d_cs_exec_discard_buffer(struct wined3d_cs *cs, const void *data)
++{
++ const struct wined3d_cs_discard_buffer *op = data;
++ struct wined3d_buffer *buffer = op->buffer;
++ HRESULT hr;
++
++ // TODO(acomminos): should call into buffer.c here instead.
++ if (FAILED(hr = wined3d_buffer_heap_free_fenced(buffer->buffer_heap, cs->device, buffer->cs_persistent_map)))
++ {
++ ERR("Failed to do a fenced free on discarded buffer %p, hr %x\n. Freeing anyway.", buffer, hr);
++ wined3d_buffer_heap_free(buffer->buffer_heap, buffer->cs_persistent_map);
++ }
++
++ buffer->cs_persistent_map = op->map_range;
++
++ // TODO(acomminos): merge this logic with buffer.c functions for standalone BOs
++ if (buffer->bind_flags & WINED3D_BIND_VERTEX_BUFFER)
++ device_invalidate_state(cs->device, STATE_STREAMSRC);
++ if (buffer->bind_flags & WINED3D_BIND_INDEX_BUFFER)
++ device_invalidate_state(cs->device, STATE_INDEXBUFFER);
++ if (buffer->bind_flags & WINED3D_BIND_CONSTANT_BUFFER)
++ {
++ device_invalidate_state(cs->device, STATE_CONSTANT_BUFFER(WINED3D_SHADER_TYPE_VERTEX));
++ device_invalidate_state(cs->device, STATE_CONSTANT_BUFFER(WINED3D_SHADER_TYPE_HULL));
++ device_invalidate_state(cs->device, STATE_CONSTANT_BUFFER(WINED3D_SHADER_TYPE_DOMAIN));
++ device_invalidate_state(cs->device, STATE_CONSTANT_BUFFER(WINED3D_SHADER_TYPE_GEOMETRY));
++ device_invalidate_state(cs->device, STATE_CONSTANT_BUFFER(WINED3D_SHADER_TYPE_PIXEL));
++ device_invalidate_state(cs->device, STATE_CONSTANT_BUFFER(WINED3D_SHADER_TYPE_COMPUTE));
++ }
++
++ wined3d_resource_release(&op->buffer->resource);
++}
++
++void wined3d_cs_emit_discard_buffer(struct wined3d_cs *cs, struct wined3d_buffer *buffer, struct wined3d_buffer_heap_element *elem)
++{
++ struct wined3d_cs_discard_buffer *op;
++
++ op = cs->ops->require_space(cs, sizeof(*op), WINED3D_CS_QUEUE_DEFAULT);
++ op->opcode = WINED3D_CS_OP_DISCARD_BUFFER;
++ op->buffer = buffer;
++ op->map_range = elem;
++
++ wined3d_resource_acquire(&buffer->resource);
++
++ cs->ops->submit(cs, WINED3D_CS_QUEUE_DEFAULT);
++}
++
+ static void wined3d_cs_emit_stop(struct wined3d_cs *cs)
+ {
+ struct wined3d_cs_stop *op;
+@@ -2477,6 +2541,7 @@
+ /* WINED3D_CS_OP_CLEAR_UNORDERED_ACCESS_VIEW */ wined3d_cs_exec_clear_unordered_access_view,
+ /* WINED3D_CS_OP_COPY_UAV_COUNTER */ wined3d_cs_exec_copy_uav_counter,
+ /* WINED3D_CS_OP_GENERATE_MIPMAPS */ wined3d_cs_exec_generate_mipmaps,
++ /* WINED3D_CS_OP_DISCARD_BUFFER */ wined3d_cs_exec_discard_buffer,
+ };
+
+ static BOOL wined3d_cs_st_check_space(struct wined3d_cs *cs, size_t size, enum wined3d_cs_queue_id queue_id)
+diff -r -u --new-file a/dlls/wined3d/device.c b/dlls/wined3d/device.c
+--- a/dlls/wined3d/device.c 2018-03-28 23:58:00.031899927 +0200
++++ b/dlls/wined3d/device.c 2018-03-28 23:33:09.909291883 +0200
+@@ -840,6 +840,66 @@
+ device->null_sampler = NULL;
+ }
+
++/* Context activation is done by the caller. */
++static void create_buffer_heap(struct wined3d_device *device, struct wined3d_context *context)
++{
++ const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
++ BOOL use_pba = FALSE;
++ char *env_pba_disable;
++
++ if (!gl_info->supported[ARB_BUFFER_STORAGE])
++ {
++ FIXME("Not using PBA, ARB_buffer_storage unsupported.\n");
++ }
++ else if ((env_pba_disable = getenv("PBA_DISABLE")) && *env_pba_disable != '0')
++ {
++ FIXME("Not using PBA, envvar 'PBA_DISABLE' set.\n");
++ }
++ else
++ {
++ // TODO(acomminos): kill this magic number. perhaps base on vram.
++ GLsizeiptr geo_heap_size = 512 * 1024 * 1024;
++ // We choose a constant buffer size of 128MB, the same as NVIDIA claims to
++ // use in their Direct3D driver for discarded constant buffers.
++ GLsizeiptr cb_heap_size = 128 * 1024 * 1024;
++ GLint ub_alignment;
++ HRESULT hr;
++
++ gl_info->gl_ops.gl.p_glGetIntegerv(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT, &ub_alignment);
++
++ // Align constant buffer heap size, in case GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT isn't a power of two (for some reason).
++ cb_heap_size -= cb_heap_size % ub_alignment;
++
++ if (FAILED(hr = wined3d_buffer_heap_create(context, geo_heap_size, 0, TRUE, &device->wo_buffer_heap)))
++ {
++ ERR("Failed to create write-only persistent buffer heap, hr %#x.\n", hr);
++ goto fail;
++ }
++
++ if (FAILED(hr = wined3d_buffer_heap_create(context, cb_heap_size, ub_alignment, TRUE, &device->cb_buffer_heap)))
++ {
++ ERR("Failed to create persistent buffer heap for constant buffers, hr %#x.\n", hr);
++ goto fail;
++ }
++
++ FIXME("Initialized PBA (geo_heap_size: %ld, cb_heap_size: %ld, ub_align: %d)\n", geo_heap_size, cb_heap_size, ub_alignment);
++
++ use_pba = TRUE;
++ }
++fail:
++ device->use_pba = use_pba;
++}
++
++/* Context activation is done by the caller. */
++static void destroy_buffer_heap(struct wined3d_device *device, struct wined3d_context *context)
++{
++ if (device->wo_buffer_heap)
++ wined3d_buffer_heap_destroy(device->wo_buffer_heap, context);
++
++ if (device->cb_buffer_heap)
++ wined3d_buffer_heap_destroy(device->cb_buffer_heap, context);
++}
++
+ static LONG fullscreen_style(LONG style)
+ {
+ /* Make sure the window is managed, otherwise we won't get keyboard input. */
+@@ -1004,6 +1064,7 @@
+ device->shader_backend->shader_free_private(device);
+ destroy_dummy_textures(device, context);
+ destroy_default_samplers(device, context);
++ destroy_buffer_heap(device, context);
+ context_release(context);
+
+ while (device->context_count)
+@@ -1052,6 +1113,7 @@
+ context = context_acquire(device, target, 0);
+ create_dummy_textures(device, context);
+ create_default_samplers(device, context);
++ create_buffer_heap(device, context);
+ context_release(context);
+ }
+
+diff -r -u --new-file a/dlls/wined3d/directx.c b/dlls/wined3d/directx.c
+--- a/dlls/wined3d/directx.c 2018-03-28 23:58:00.062899878 +0200
++++ b/dlls/wined3d/directx.c 2018-03-28 23:37:33.887751410 +0200
+@@ -111,6 +111,7 @@
+ /* ARB */
+ {"GL_ARB_base_instance", ARB_BASE_INSTANCE },
+ {"GL_ARB_blend_func_extended", ARB_BLEND_FUNC_EXTENDED },
++ {"GL_ARB_buffer_storage", ARB_BUFFER_STORAGE },
+ {"GL_ARB_clear_buffer_object", ARB_CLEAR_BUFFER_OBJECT },
+ {"GL_ARB_clear_texture", ARB_CLEAR_TEXTURE },
+ {"GL_ARB_clip_control", ARB_CLIP_CONTROL },
+@@ -148,6 +149,7 @@
+ {"GL_ARB_internalformat_query2", ARB_INTERNALFORMAT_QUERY2 },
+ {"GL_ARB_map_buffer_alignment", ARB_MAP_BUFFER_ALIGNMENT },
+ {"GL_ARB_map_buffer_range", ARB_MAP_BUFFER_RANGE },
++ {"GL_ARB_multi_bind", ARB_MULTI_BIND },
+ {"GL_ARB_multisample", ARB_MULTISAMPLE },
+ {"GL_ARB_multitexture", ARB_MULTITEXTURE },
+ {"GL_ARB_occlusion_query", ARB_OCCLUSION_QUERY },
+@@ -944,6 +946,13 @@
+ return !wined3d_caps_gl_ctx_test_viewport_subpixel_bits(ctx);
+ }
+
++static BOOL match_mesa(const struct wined3d_gl_info *gl_info, struct wined3d_caps_gl_ctx *ctx,
++ const char *gl_renderer, enum wined3d_gl_vendor gl_vendor,
++ enum wined3d_pci_vendor card_vendor, enum wined3d_pci_device device)
++{
++ return gl_vendor == GL_VENDOR_MESA;
++}
++
+ static void quirk_apple_glsl_constants(struct wined3d_gl_info *gl_info)
+ {
+ /* MacOS needs uniforms for relative addressing offsets. This can accumulate to quite a few uniforms.
+@@ -1081,6 +1090,13 @@
+ }
+ }
+
++static void quirk_use_client_storage_bit(struct wined3d_gl_info *gl_info)
++{
++ // Using ARB_buffer_storage on Mesa requires the GL_CLIENT_STORAGE_BIT to be
++ // set to use GTT for immutable buffers on radeon (see PIPE_USAGE_STREAM).
++ gl_info->quirks |= WINED3D_QUIRK_USE_CLIENT_STORAGE_BIT;
++}
++
+ struct driver_quirk
+ {
+ BOOL (*match)(const struct wined3d_gl_info *gl_info, struct wined3d_caps_gl_ctx *ctx,
+@@ -1177,6 +1193,11 @@
+ quirk_broken_viewport_subpixel_bits,
+ "Nvidia viewport subpixel bits bug"
+ },
++ {
++ match_mesa,
++ quirk_use_client_storage_bit,
++ "Use GL_CLIENT_STORAGE_BIT for persistent buffers on mesa",
++ },
+ };
+
+ /* Certain applications (Steam) complain if we report an outdated driver version. In general,
+@@ -2713,6 +2734,8 @@
+ /* GL_ARB_blend_func_extended */
+ USE_GL_FUNC(glBindFragDataLocationIndexed)
+ USE_GL_FUNC(glGetFragDataIndex)
++ /* GL_ARB_buffer_storage */
++ USE_GL_FUNC(glBufferStorage)
+ /* GL_ARB_clear_buffer_object */
+ USE_GL_FUNC(glClearBufferData)
+ USE_GL_FUNC(glClearBufferSubData)
+@@ -2792,6 +2815,8 @@
+ /* GL_ARB_map_buffer_range */
+ USE_GL_FUNC(glFlushMappedBufferRange)
+ USE_GL_FUNC(glMapBufferRange)
++ /* GL_ARB_multi_bind */
++ USE_GL_FUNC(glBindBuffersRange)
+ /* GL_ARB_multisample */
+ USE_GL_FUNC(glSampleCoverageARB)
+ /* GL_ARB_multitexture */
+@@ -3951,6 +3976,7 @@
+ {ARB_TEXTURE_VIEW, MAKEDWORD_VERSION(4, 3)},
+
+ {ARB_CLEAR_TEXTURE, MAKEDWORD_VERSION(4, 4)},
++ {ARB_MULTI_BIND, MAKEDWORD_VERSION(4, 4)},
+
+ {ARB_CLIP_CONTROL, MAKEDWORD_VERSION(4, 5)},
+ {ARB_CULL_DISTANCE, MAKEDWORD_VERSION(4, 5)},
+diff -r -u --new-file a/dlls/wined3d/Makefile.in b/dlls/wined3d/Makefile.in
+--- a/dlls/wined3d/Makefile.in 2018-03-28 23:57:51.270913815 +0200
++++ b/dlls/wined3d/Makefile.in 2018-03-28 21:23:02.867794542 +0200
+@@ -6,6 +6,7 @@
+ arb_program_shader.c \
+ ati_fragment_shader.c \
+ buffer.c \
++ buffer_heap.c \
+ context.c \
+ cs.c \
+ device.c \
+diff -r -u --new-file a/dlls/wined3d/resource.c b/dlls/wined3d/resource.c
+--- a/dlls/wined3d/resource.c 2018-03-28 23:57:59.708900439 +0200
++++ b/dlls/wined3d/resource.c 2018-03-28 22:54:16.100016102 +0200
+@@ -340,6 +340,7 @@
+ HRESULT CDECL wined3d_resource_map(struct wined3d_resource *resource, unsigned int sub_resource_idx,
+ struct wined3d_map_desc *map_desc, const struct wined3d_box *box, DWORD flags)
+ {
++ HRESULT hr;
+ TRACE("resource %p, sub_resource_idx %u, map_desc %p, box %s, flags %#x.\n",
+ resource, sub_resource_idx, map_desc, debug_box(box), flags);
+
+@@ -362,9 +363,14 @@
+ }
+
+ flags = wined3d_resource_sanitise_map_flags(resource, flags);
+- wined3d_resource_wait_idle(resource);
+-
+- return wined3d_cs_map(resource->device->cs, resource, sub_resource_idx, map_desc, box, flags);
++ if (FAILED(hr = resource->resource_ops->resource_sub_resource_map(resource, sub_resource_idx, map_desc, box, flags)))
++ {
++ TRACE_(d3d_perf)("Mapping resource %p on the command stream.\n", resource);
++ wined3d_resource_wait_idle(resource);
++ hr = wined3d_cs_map(resource->device->cs, resource, sub_resource_idx, map_desc, box, flags);
++ }
++
++ return hr;
+ }
+
+ HRESULT CDECL wined3d_resource_map_info(struct wined3d_resource *resource, unsigned int sub_resource_idx,
+@@ -379,7 +385,12 @@
+ {
+ TRACE("resource %p, sub_resource_idx %u.\n", resource, sub_resource_idx);
+
+- return wined3d_cs_unmap(resource->device->cs, resource, sub_resource_idx);
++ if (FAILED(hr = resource->resource_ops->resource_sub_resource_unmap(resource, sub_resource_idx)))
++ {
++ TRACE_(d3d_perf)("Unmapping resource %p on the command stream.\n", resource);
++ hr = wined3d_cs_unmap(resource->device->cs, resource, sub_resource_idx);
++ }
++ return hr;
+ }
+
+ UINT CDECL wined3d_resource_update_info(struct wined3d_resource *resource, unsigned int sub_resource_idx,
+diff -r -u --new-file a/dlls/wined3d/state.c b/dlls/wined3d/state.c
+--- a/dlls/wined3d/state.c 2018-03-28 23:57:59.808900281 +0200
++++ b/dlls/wined3d/state.c 2018-03-28 23:06:47.167455200 +0200
+@@ -4797,7 +4797,11 @@
+ else
+ {
+ struct wined3d_buffer *ib = state->index_buffer;
+- GL_EXTCALL(glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, ib->buffer_object));
++ // FIXME(acomminos): disasterous.
++ if (ib->locations & WINED3D_LOCATION_PERSISTENT_MAP)
++ GL_EXTCALL(glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, ib->buffer_heap->buffer_object));
++ else
++ GL_EXTCALL(glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, ib->buffer_object));
+ }
+ }
+
+@@ -4863,6 +4867,7 @@
+ enum wined3d_shader_type shader_type;
+ struct wined3d_buffer *buffer;
+ unsigned int i, base, count;
++ struct wined3d_bo_address bo_addr;
+
+ TRACE("context %p, state %p, state_id %#x.\n", context, state, state_id);
+
+@@ -4872,10 +4877,49 @@
+ shader_type = WINED3D_SHADER_TYPE_COMPUTE;
+
+ wined3d_gl_limits_get_uniform_block_range(&gl_info->limits, shader_type, &base, &count);
+- for (i = 0; i < count; ++i)
++
++ if (gl_info->supported[ARB_MULTI_BIND])
++ {
++ GLuint buffer_objects[count];
++ GLsizeiptr buffer_offsets[count];
++ GLsizeiptr buffer_sizes[count];
++
++ for (i = 0; i < count; ++i)
++ {
++ buffer = state->cb[shader_type][i];
++ if (buffer)
++ {
++ wined3d_buffer_get_memory(buffer, &bo_addr, buffer->locations);
++ buffer_objects[i] = bo_addr.buffer_object;
++ buffer_offsets[i] = bo_addr.addr;
++ buffer_sizes[i] = bo_addr.length;
++ }
++ else
++ {
++ buffer_objects[i] = buffer_offsets[i] = 0;
++ // The ARB_multi_bind spec states that an error may be thrown if
++ // `size` is less than or equal to zero, Thus, we specify a size for
++ // unused buffers anyway.
++ buffer_sizes[i] = 1;
++ }
++ }
++ GL_EXTCALL(glBindBuffersRange(GL_UNIFORM_BUFFER, base, count, buffer_objects, buffer_offsets, buffer_sizes));
++ }
++ else
+ {
+- buffer = state->cb[shader_type][i];
+- GL_EXTCALL(glBindBufferBase(GL_UNIFORM_BUFFER, base + i, buffer ? buffer->buffer_object : 0));
++ for (i = 0; i < count; ++i)
++ {
++ buffer = state->cb[shader_type][i];
++ if (buffer)
++ {
++ wined3d_buffer_get_memory(buffer, &bo_addr, buffer->locations);
++ GL_EXTCALL(glBindBufferRange(GL_UNIFORM_BUFFER, base + i, bo_addr.buffer_object, bo_addr.addr, bo_addr.length));
++ }
++ else
++ {
++ GL_EXTCALL(glBindBufferBase(GL_UNIFORM_BUFFER, base + i, 0));
++ }
++ }
+ }
+ checkGLcall("bind constant buffers");
+ }
+diff -r -u --new-file a/dlls/wined3d/texture.c b/dlls/wined3d/texture.c
+--- a/dlls/wined3d/texture.c 2018-03-28 23:57:59.729900406 +0200
++++ b/dlls/wined3d/texture.c 2018-03-28 22:58:49.906449411 +0200
+@@ -2096,6 +2096,12 @@
+ static HRESULT texture_resource_sub_resource_map(struct wined3d_resource *resource, unsigned int sub_resource_idx,
+ struct wined3d_map_desc *map_desc, const struct wined3d_box *box, DWORD flags)
+ {
++ return E_NOTIMPL;
++}
++
++static HRESULT texture_resource_sub_resource_map_cs(struct wined3d_resource *resource, unsigned int sub_resource_idx,
++ struct wined3d_map_desc *map_desc, const struct wined3d_box *box, DWORD flags)
++{
+ const struct wined3d_format *format = resource->format;
+ struct wined3d_texture_sub_resource *sub_resource;
+ struct wined3d_device *device = resource->device;
+@@ -2256,6 +2262,11 @@
+
+ static HRESULT texture_resource_sub_resource_unmap(struct wined3d_resource *resource, unsigned int sub_resource_idx)
+ {
++ return E_NOTIMPL;
++}
++
++static HRESULT texture_resource_sub_resource_unmap_cs(struct wined3d_resource *resource, unsigned int sub_resource_idx)
++{
+ struct wined3d_texture_sub_resource *sub_resource;
+ struct wined3d_device *device = resource->device;
+ struct wined3d_context *context = NULL;
+@@ -2307,6 +2318,8 @@
+ texture_resource_sub_resource_map,
+ texture_resource_sub_resource_map_info,
+ texture_resource_sub_resource_unmap,
++ texture_resource_sub_resource_map_cs,
++ texture_resource_sub_resource_unmap_cs,
+ };
+
+ /* Context activation is done by the caller. */
+diff -r -u --new-file a/dlls/wined3d/utils.c b/dlls/wined3d/utils.c
+--- a/dlls/wined3d/utils.c 2018-03-28 23:57:59.981900006 +0200
++++ b/dlls/wined3d/utils.c 2018-03-28 22:59:19.628387862 +0200
+@@ -6368,6 +6368,7 @@
+ LOCATION_TO_STR(WINED3D_LOCATION_DRAWABLE);
+ LOCATION_TO_STR(WINED3D_LOCATION_RB_MULTISAMPLE);
+ LOCATION_TO_STR(WINED3D_LOCATION_RB_RESOLVED);
++ LOCATION_TO_STR(WINED3D_LOCATION_PERSISTENT_MAP);
+ #undef LOCATION_TO_STR
+ if (location)
+ FIXME("Unrecognized location flag(s) %#x.\n", location);
+diff -r -u --new-file a/dlls/wined3d/wined3d_gl.h b/dlls/wined3d/wined3d_gl.h
+--- a/dlls/wined3d/wined3d_gl.h 2018-03-28 23:57:59.555900682 +0200
++++ b/dlls/wined3d/wined3d_gl.h 2018-03-28 23:07:20.628395936 +0200
+@@ -44,6 +44,7 @@
+ /* ARB */
+ ARB_BASE_INSTANCE,
+ ARB_BLEND_FUNC_EXTENDED,
++ ARB_BUFFER_STORAGE,
+ ARB_CLEAR_BUFFER_OBJECT,
+ ARB_CLEAR_TEXTURE,
+ ARB_CLIP_CONTROL,
+@@ -81,6 +82,7 @@
+ ARB_INTERNALFORMAT_QUERY2,
+ ARB_MAP_BUFFER_ALIGNMENT,
+ ARB_MAP_BUFFER_RANGE,
++ ARB_MULTI_BIND,
+ ARB_MULTISAMPLE,
+ ARB_MULTITEXTURE,
+ ARB_OCCLUSION_QUERY,
+diff -r -u --new-file a/dlls/wined3d/wined3d_private.h b/dlls/wined3d/wined3d_private.h
+--- a/dlls/wined3d/wined3d_private.h 2018-03-28 23:58:00.048899900 +0200
++++ b/dlls/wined3d/wined3d_private.h 2018-03-28 23:37:57.861715709 +0200
+@@ -75,6 +75,7 @@
+ #define WINED3D_QUIRK_INFO_LOG_SPAM 0x00000080
+ #define WINED3D_QUIRK_LIMITED_TEX_FILTERING 0x00000100
+ #define WINED3D_QUIRK_BROKEN_ARB_FOG 0x00000200
++#define WINED3D_QUIRK_USE_CLIENT_STORAGE_BIT 0x00000400
+
+ enum wined3d_ffp_idx
+ {
+@@ -1470,6 +1471,7 @@
+ {
+ GLuint buffer_object;
+ BYTE *addr;
++ GLsizeiptr length;
+ };
+
+ struct wined3d_const_bo_address
+@@ -2929,7 +2931,8 @@
+ BYTE inScene : 1; /* A flag to check for proper BeginScene / EndScene call pairs */
+ BYTE softwareVertexProcessing : 1; /* process vertex shaders using software or hardware */
+ BYTE filter_messages : 1;
+- BYTE padding : 3;
++ BYTE use_pba : 1; /* A flag to use the persistent buffer allocator for dynamic buffers. */
++ BYTE padding : 2;
+
+ unsigned char surface_alignment; /* Line Alignment of surfaces */
+
+@@ -2980,6 +2983,10 @@
+ /* Context management */
+ struct wined3d_context **contexts;
+ UINT context_count;
++
++ /* Dynamic buffer heap */
++ struct wined3d_buffer_heap *wo_buffer_heap;
++ struct wined3d_buffer_heap *cb_buffer_heap;
+ };
+
+ void device_clear_render_targets(struct wined3d_device *device, UINT rt_count, const struct wined3d_fb_state *fb,
+@@ -3021,6 +3028,9 @@
+ HRESULT (*resource_map_info)(struct wined3d_resource *resource, unsigned int sub_resource_idx,
+ struct wined3d_map_info *info, DWORD flags);
+ HRESULT (*resource_sub_resource_unmap)(struct wined3d_resource *resource, unsigned int sub_resource_idx);
++ HRESULT (*resource_sub_resource_map_cs)(struct wined3d_resource *resource, unsigned int sub_resource_idx,
++ struct wined3d_map_desc *map_desc, const struct wined3d_box *box, DWORD flags);
++ HRESULT (*resource_sub_resource_unmap_cs)(struct wined3d_resource *resource, unsigned int sub_resource_idx);
+ };
+
+ struct wined3d_resource
+@@ -3324,6 +3334,7 @@
+ #define WINED3D_LOCATION_DRAWABLE 0x00000040
+ #define WINED3D_LOCATION_RB_MULTISAMPLE 0x00000080
+ #define WINED3D_LOCATION_RB_RESOLVED 0x00000100
++#define WINED3D_LOCATION_PERSISTENT_MAP 0x00000200
+
+ const char *wined3d_debug_location(DWORD location) DECLSPEC_HIDDEN;
+
+@@ -3480,6 +3491,25 @@
+ DWORD flags) DECLSPEC_HIDDEN;
+ void state_unbind_resources(struct wined3d_state *state) DECLSPEC_HIDDEN;
+
++struct wined3d_map_range
++{
++ GLintptr offset;
++ GLsizeiptr size;
++};
++
++struct wined3d_buffer_heap_element
++{
++ struct wined3d_map_range range;
++
++ // rbtree data
++ struct wine_rb_entry entry;
++
++ // Binned free list positions
++ struct wined3d_buffer_heap_element *next;
++ struct wined3d_buffer_heap_element *prev;
++};
++
++
+ enum wined3d_cs_queue_id
+ {
+ WINED3D_CS_QUEUE_DEFAULT = 0,
+@@ -3624,6 +3654,7 @@
+ void wined3d_cs_emit_update_sub_resource(struct wined3d_cs *cs, struct wined3d_resource *resource,
+ unsigned int sub_resource_idx, const struct wined3d_box *box, const void *data, unsigned int row_pitch,
+ unsigned int slice_pitch) DECLSPEC_HIDDEN;
++void wined3d_cs_emit_discard_buffer(struct wined3d_cs *cs, struct wined3d_buffer *buffer, struct wined3d_buffer_heap_element *map_range) DECLSPEC_HIDDEN;
+ void wined3d_cs_init_object(struct wined3d_cs *cs,
+ void (*callback)(void *object), void *object) DECLSPEC_HIDDEN;
+ HRESULT wined3d_cs_map(struct wined3d_cs *cs, struct wined3d_resource *resource, unsigned int sub_resource_idx,
+@@ -3657,12 +3688,61 @@
+ CONV_POSITIONT,
+ };
+
+-struct wined3d_map_range
+-{
+- UINT offset;
+- UINT size;
++struct wined3d_buffer_heap_fenced_element;
++
++// Number of power-of-two buckets to populate.
++#define WINED3D_BUFFER_HEAP_BINS 32
++
++struct wined3d_buffer_heap_bin
++ {
++ struct wined3d_buffer_heap_element *head;
++ struct wined3d_buffer_heap_element *tail;
+ };
+
++struct wined3d_buffer_heap_bin_set
++{
++ struct wined3d_buffer_heap_bin bins[WINED3D_BUFFER_HEAP_BINS];
++ };
++
++// A heap that manages allocations with a single GL buffer.
++struct wined3d_buffer_heap
++{
++ GLuint buffer_object;
++ void *map_ptr;
++ GLsizeiptr alignment;
++ CRITICAL_SECTION temp_lock; // Temporary lock while we implement the fenced free list.
++
++ struct wined3d_buffer_heap_bin_set free_list;
++
++ // Elements that need to be fenced, but haven't reached the required size.
++ struct wined3d_buffer_heap_bin_set pending_fenced_bins;
++
++ // List of sets of buffers behind a common fence, in FIFO order.
++ struct wined3d_buffer_heap_fenced_element *fenced_head;
++ struct wined3d_buffer_heap_fenced_element *fenced_tail;
++};
++
++HRESULT wined3d_buffer_heap_create(struct wined3d_context *context, GLsizeiptr size, GLsizeiptr alignment, BOOL write_only, struct wined3d_buffer_heap **heap) DECLSPEC_HIDDEN;
++HRESULT wined3d_buffer_heap_destroy(struct wined3d_buffer_heap *heap, struct wined3d_context *context) DECLSPEC_HIDDEN;
++// Fetches a buffer from the heap of at least the given size.
++// Attempts to coalesce blocks under memory pressure.
++HRESULT wined3d_buffer_heap_alloc(struct wined3d_buffer_heap *heap, GLsizeiptr size, struct wined3d_buffer_heap_element** out_elem) DECLSPEC_HIDDEN;
++// Immediately frees a heap-allocated buffer segment.
++HRESULT wined3d_buffer_heap_free(struct wined3d_buffer_heap *heap, struct wined3d_buffer_heap_element *elem) DECLSPEC_HIDDEN;
++// Enqueues a buffer segment to return to the heap once its fence has been signaled.
++HRESULT wined3d_buffer_heap_free_fenced(struct wined3d_buffer_heap *heap, struct wined3d_device *device, struct wined3d_buffer_heap_element *elem) DECLSPEC_HIDDEN;
++// Issues a fence for the current set of pending fenced buffers.
++// Double-buffered: if the last fence issued has not yet been triggered, waits
++// on it.
++HRESULT wined3d_buffer_heap_cs_fence_issue(struct wined3d_buffer_heap *heap, struct wined3d_device *device) DECLSPEC_HIDDEN;
++// Waits on the next issued fence in FIFO order. Frees the fenced buffers after
++// the fence has been triggered.
++HRESULT wined3d_buffer_heap_cs_fence_wait(struct wined3d_buffer_heap *heap, struct wined3d_device *device) DECLSPEC_HIDDEN;
++// Performs deferred coalescing of buffers. To be called under memory pressure.
++// Outputs the number of coalesced regions in `num_coalesced`.
++HRESULT wined3d_buffer_heap_deferred_coalesce(struct wined3d_buffer_heap *heap, int *num_coalesced) DECLSPEC_HIDDEN;
++
++
+ struct wined3d_buffer
+ {
+ struct wined3d_resource resource;
+@@ -3687,6 +3767,11 @@
+ UINT stride; /* 0 if no conversion */
+ enum wined3d_buffer_conversion_type *conversion_map; /* NULL if no conversion */
+ UINT conversion_stride; /* 0 if no shifted conversion */
++
++ /* persistent mapped buffer */
++ struct wined3d_buffer_heap *buffer_heap;
++ struct wined3d_buffer_heap_element *cs_persistent_map;
++ struct wined3d_buffer_heap_element *mt_persistent_map;
+ };
+
+ static inline struct wined3d_buffer *buffer_from_resource(struct wined3d_resource *resource)