diff options
author | Lars Norberg | 2018-03-29 00:00:43 +0200 |
---|---|---|
committer | Lars Norberg | 2018-03-29 00:00:43 +0200 |
commit | a6782c800f37a78b2aa12a7d0a2365dda7117ca2 (patch) | |
tree | d9169e7562d271af2783f79b801bd1ba9a3f94d5 | |
parent | 9ea985bb81d8d89f644f88ea8cb15435f3fe0faf (diff) | |
download | aur-a6782c800f37a78b2aa12a7d0a2365dda7117ca2.tar.gz |
restructured pba patches
-rw-r--r-- | .SRCINFO | 24 | ||||
-rw-r--r-- | 0001-wined3d-Initial-implementation-of-a-persistent-mappe.patch | 806 | ||||
-rw-r--r-- | 0002-wined3d-Add-support-for-backing-dynamic-wined3d_buff.patch | 679 | ||||
-rw-r--r-- | 0003-wined3d-Use-ARB_multi_bind-to-speed-up-UBO-updates.patch | 121 | ||||
-rw-r--r-- | 0004-wined3d-Use-GL_CLIENT_STORAGE_BIT-for-persistent-map.patch | 26 | ||||
-rw-r--r-- | 0005-wined3d-Disable-persistently-mapped-shader-resource-.patch | 28 | ||||
-rw-r--r-- | 0006-wined3d-Perform-initial-allocation-of-persistent-buf.patch | 92 | ||||
-rw-r--r-- | 0007-wined3d-Avoid-freeing-persistent-buffer-heap-element.patch | 351 | ||||
-rw-r--r-- | 0008-wined3d-Add-DISABLE_PBA-envvar-some-PBA-cleanup.patch | 211 | ||||
-rw-r--r-- | 0009-wined3d-Add-quirk-to-use-GL_CLIENT_STORAGE_BIT-for-m.patch | 96 | ||||
-rw-r--r-- | PKGBUILD | 44 | ||||
-rw-r--r-- | wine-staging-pba.patch | 1574 |
12 files changed, 1582 insertions, 2470 deletions
@@ -1,6 +1,6 @@ pkgbase = wine-staging-pba-git pkgdesc = Wine staging branch with PBA patches for increased D3D performance. Git versions. (Also includes Path of Exile DX11 patch!) - pkgver = 3.4.r3604.e50f0488+wine.3.4.r178.ge1c7a1f7ce+pba.r29.87307b1 + pkgver = 3.4.r3607.5876a3f7+wine.3.4.r192.gd7430abd40 pkgrel = 1 url = https://github.com/acomminos/wine-pba install = wine.install @@ -174,16 +174,7 @@ pkgbase = wine-staging-pba-git options = staticlibs source = wine-git::git://source.winehq.org/git/wine.git source = wine-staging-git::git+https://github.com/wine-staging/wine-staging.git - source = wine-pba::git+https://github.com/acomminos/wine-pba.git - source = 0001-wined3d-Initial-implementation-of-a-persistent-mappe.patch - source = 0002-wined3d-Add-support-for-backing-dynamic-wined3d_buff.patch - source = 0003-wined3d-Use-ARB_multi_bind-to-speed-up-UBO-updates.patch - source = 0004-wined3d-Use-GL_CLIENT_STORAGE_BIT-for-persistent-map.patch - source = 0005-wined3d-Disable-persistently-mapped-shader-resource-.patch - source = 0006-wined3d-Perform-initial-allocation-of-persistent-buf.patch - source = 0007-wined3d-Avoid-freeing-persistent-buffer-heap-element.patch - source = 0008-wined3d-Add-DISABLE_PBA-envvar-some-PBA-cleanup.patch - source = 0009-wined3d-Add-quirk-to-use-GL_CLIENT_STORAGE_BIT-for-m.patch + source = wine-staging-pba.patch source = steam.patch source = poe-fix.patch source = harmony-fix.diff @@ -191,16 +182,7 @@ pkgbase = wine-staging-pba-git source = wine-binfmt.conf sha256sums = SKIP sha256sums = SKIP - sha256sums = SKIP - sha256sums = f5f8c507f79c829b118125a3749f80ed31eb8ba8ad024d99554a1a6458c438eb - sha256sums = 98372adbb16949edca4c90604cceac5db3d4bf37eccc13d59d3e5735f53f2501 - sha256sums = 112f8fc68d5421805fb1de32c0216c41412afae21153d803127c9d1c1103e35b - sha256sums = 016ee498c9ff7af0d14c7b0e42f4bc5255f5dae6d391fd36c2060668fcade662 - sha256sums = ec11046f6335c2831e3b89c2b0c241b74974415a64523f35f0a606d27d1dbfbb - sha256sums = d2a8febc2500d6a7bed418232efedf82f114e7d14ca1199789abe576dddae90b - sha256sums = ff5ef40b945fdad16db99a1f736c20c53711cfe002d367ea4aa55d84bf6a1207 - sha256sums = dee52666fc680b74f5d5ba1a2a74de715c7b49376895ff057ccada9daaef5911 - sha256sums = 5c3776e5c94b51b368384c79aec9b26716fc6517935d782c121c856f21dfd223 + sha256sums = cc229607e417841d3e900cc93c3ab2f79c0851705a07e7206729193ffa3dc9db sha256sums = 972d6b114f7621c5f3bd34b1105dd390b318db18fbc76328001c984db488a9b0 sha256sums = 1c8be30224a67c0f279ae1324165708371aad8f290ebc6da69c686d0904e606c sha256sums = 50ccb5bd2067e5d2739c5f7abcef11ef096aa246f5ceea11d2c3b508fc7f77a1 diff --git a/0001-wined3d-Initial-implementation-of-a-persistent-mappe.patch b/0001-wined3d-Initial-implementation-of-a-persistent-mappe.patch deleted file mode 100644 index ab14b215569b..000000000000 --- a/0001-wined3d-Initial-implementation-of-a-persistent-mappe.patch +++ /dev/null @@ -1,806 +0,0 @@ -From 1f69076549bf2351eb6d8d885b35a46b4dc69813 Mon Sep 17 00:00:00 2001 -From: Andrew Comminos <andrew@comminos.com> -Date: Mon, 5 Mar 2018 15:38:35 -0800 -Subject: [PATCH 1/9] wined3d: Initial implementation of a persistent mapped - buffer allocator. - ---- - dlls/wined3d-csmt/Makefile.in | 1 + - dlls/wined3d/Makefile.in | 1 + - dlls/wined3d/buffer_heap.c | 508 +++++++++++++++++++++++++++++++++++++++++ - dlls/wined3d/cs.c | 9 + - dlls/wined3d/device.c | 52 +++++ - dlls/wined3d/directx.c | 3 + - dlls/wined3d/query.c | 2 +- - dlls/wined3d/wined3d_gl.h | 1 + - dlls/wined3d/wined3d_private.h | 68 +++++- - 9 files changed, 641 insertions(+), 4 deletions(-) - create mode 100644 dlls/wined3d/buffer_heap.c - -diff --git a/dlls/wined3d-csmt/Makefile.in b/dlls/wined3d-csmt/Makefile.in -index 1d0458eb46..cb3a5484c6 100644 ---- a/dlls/wined3d-csmt/Makefile.in -+++ b/dlls/wined3d-csmt/Makefile.in -@@ -8,6 +8,7 @@ C_SRCS = \ - arb_program_shader.c \ - ati_fragment_shader.c \ - buffer.c \ -+ buffer_heap.c \ - context.c \ - cs.c \ - device.c \ -diff --git a/dlls/wined3d/Makefile.in b/dlls/wined3d/Makefile.in -index b850ba6872..52ef8666fb 100644 ---- a/dlls/wined3d/Makefile.in -+++ b/dlls/wined3d/Makefile.in -@@ -6,6 +6,7 @@ C_SRCS = \ - arb_program_shader.c \ - ati_fragment_shader.c \ - buffer.c \ -+ buffer_heap.c \ - context.c \ - cs.c \ - device.c \ -diff --git a/dlls/wined3d/buffer_heap.c b/dlls/wined3d/buffer_heap.c -new file mode 100644 -index 0000000000..b133bd6893 ---- /dev/null -+++ b/dlls/wined3d/buffer_heap.c -@@ -0,0 +1,508 @@ -+/* -+ * Copyright 2018 Andrew Comminos -+ * -+ * This library is free software; you can redistribute it and/or -+ * modify it under the terms of the GNU Lesser General Public -+ * License as published by the Free Software Foundation; either -+ * version 2.1 of the License, or (at your option) any later version. -+ * -+ * This library is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ * Lesser General Public License for more details. -+ * -+ * You should have received a copy of the GNU Lesser General Public -+ * License along with this library; if not, write to the Free Software -+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA -+ * -+ */ -+ -+#include "config.h" -+#include "wine/port.h" -+#include "wine/rbtree.h" -+#include "wined3d_private.h" -+ -+WINE_DEFAULT_DEBUG_CHANNEL(d3d); -+WINE_DECLARE_DEBUG_CHANNEL(d3d_perf); -+ -+struct wined3d_buffer_heap_element -+{ -+ struct wined3d_map_range range; -+ -+ // rbtree data -+ struct wine_rb_entry entry; -+ -+ // Binned free list positions -+ struct wined3d_buffer_heap_element *next; -+ struct wined3d_buffer_heap_element *prev; -+}; -+ -+struct wined3d_buffer_heap_fenced_element -+{ -+ struct wined3d_buffer_heap_bin_set free_list; -+ struct wined3d_fence *fence; -+ -+ struct wined3d_buffer_heap_fenced_element *next; -+}; -+ -+static struct wined3d_buffer_heap_element* element_new(GLsizei offset, GLsizei size) -+{ -+ struct wined3d_buffer_heap_element* elem; -+ elem = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(struct wined3d_buffer_heap_element)); -+ if (!elem) -+ return NULL; -+ elem->range.offset = offset; -+ elem->range.size = size; -+ return elem; -+} -+ -+static inline int bitwise_log2_floor(GLsizei size) -+{ -+ // XXX(acomminos): I hope this gets unrolled. -+ for (int i = 8 * sizeof(GLsizei) - 1; i >= 0; i--) -+ { -+ if ((size >> i) & 1) { -+ return i; -+ } -+ } -+ return 0; -+} -+ -+static inline int bitwise_log2_ceil(GLsizei size) -+{ -+ // Add one to the floor of size if size isn't a power of two. -+ return bitwise_log2_floor(size) + !!(size & (size - 1)); -+} -+ -+static int element_bin(struct wined3d_buffer_heap_element *elem) -+{ -+ return min(WINED3D_BUFFER_HEAP_BINS - 1, bitwise_log2_floor(elem->range.size)); -+} -+ -+// Inserts an element into the appropriate free list bin. -+static void element_insert_free_bin(struct wined3d_buffer_heap *heap, struct wined3d_buffer_heap_element *elem) -+{ -+ int bin = element_bin(elem); -+ -+ elem->prev = NULL; -+ elem->next = heap->free_list.bins[bin].head; -+ if (heap->free_list.bins[bin].head) -+ heap->free_list.bins[bin].head->prev = elem; -+ heap->free_list.bins[bin].head = elem; -+ -+ if (!heap->free_list.bins[bin].tail) -+ heap->free_list.bins[bin].tail = elem; -+ -+ TRACE("Inserted allocation at %p of size %lld into bin %d\n", elem->range.offset, elem->range.size, bin); -+} -+ -+// Removes an element from the free tree, its bin, and the coalesce list. -+static void element_remove_free(struct wined3d_buffer_heap *heap, struct wined3d_buffer_heap_element *elem) -+{ -+ int bin = element_bin(elem); -+ -+ if (elem->prev) -+ elem->prev->next = elem->next; -+ -+ if (elem->next) -+ elem->next->prev = elem->prev; -+ -+ if (elem == heap->free_list.bins[bin].head) -+ heap->free_list.bins[bin].head = elem->next; -+ -+ if (elem == heap->free_list.bins[bin].tail) -+ heap->free_list.bins[bin].tail = elem->prev; -+ -+ elem->prev = NULL; -+ elem->next = NULL; -+ -+ TRACE("Freed allocation at %p of size %lld from bin %d\n", elem->range.offset, elem->range.size, bin); -+} -+ -+static struct wined3d_buffer_heap_fenced_element* fenced_element_new(struct wined3d_buffer_heap_bin_set bins, struct wined3d_fence* fence) -+{ -+ struct wined3d_buffer_heap_fenced_element* elem; -+ elem = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(struct wined3d_buffer_heap_fenced_element)); -+ if (!elem) -+ return NULL; -+ elem->free_list = bins; -+ elem->fence = fence; -+ elem->next = NULL; -+ return elem; -+} -+ -+static int free_tree_compare(const void *key, const struct wine_rb_entry *entry) -+{ -+ const GLsizei offset = *(const GLsizei*) key; -+ struct wined3d_buffer_heap_element *elem = WINE_RB_ENTRY_VALUE(entry, struct wined3d_buffer_heap_element, entry); -+ -+ if (offset < elem->range.offset) -+ return -1; -+ if (offset > elem->range.offset) -+ return 1; -+ return 0; -+} -+ -+/* Context activation is done by the caller. */ -+HRESULT wined3d_buffer_heap_create(struct wined3d_context *context, GLsizeiptr size, GLsizeiptr alignment, BOOL write_only, struct wined3d_buffer_heap **buffer_heap) -+{ -+ const struct wined3d_gl_info *gl_info = context->gl_info; -+ const GLenum buffer_target = GL_ARRAY_BUFFER; -+ GLbitfield access_flags; -+ GLbitfield storage_flags; -+ struct wined3d_buffer_heap_element *initial_elem; -+ -+ struct wined3d_buffer_heap *object; -+ -+ if ((alignment & (alignment - 1)) != 0) -+ { -+ return E_FAIL; -+ } -+ -+ if (!(object = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(*object)))) -+ { -+ return E_OUTOFMEMORY; -+ } -+ -+ access_flags = GL_MAP_PERSISTENT_BIT | GL_MAP_COHERENT_BIT | GL_MAP_WRITE_BIT; -+ if (!write_only) -+ { -+ access_flags |= GL_MAP_READ_BIT; -+ } -+ storage_flags = access_flags; -+ -+ // TODO(acomminos): where should we be checking for errors here? -+ GL_EXTCALL(glGenBuffers(1, &object->buffer_object)); -+ -+ context_bind_bo(context, buffer_target, object->buffer_object); -+ -+ // TODO(acomminos): assert glBufferStorage supported? -+ GL_EXTCALL(glBufferStorage(buffer_target, size, NULL, storage_flags)); -+ -+ if (!(object->map_ptr = GL_EXTCALL(glMapBufferRange(buffer_target, 0, size, access_flags)))) -+ { -+ ERR("Couldn't map persistent buffer.\n"); -+ return -1; // FIXME(acomminos): proper error code, cleanup -+ } -+ context_bind_bo(context, buffer_target, 0); -+ -+ object->fenced_head = object->fenced_tail = NULL; -+ object->alignment = alignment; -+ InitializeCriticalSection(&object->temp_lock); -+ -+ initial_elem = element_new(0, size); -+ // Don't bother adding the initial allocation to the coalescing tree. -+ element_insert_free_bin(object, initial_elem); -+ -+ *buffer_heap = object; -+ -+ return WINED3D_OK; -+} -+ -+/* Context activation is done by the caller. */ -+HRESULT wined3d_buffer_heap_destroy(struct wined3d_buffer_heap *heap, struct wined3d_context *context) -+{ -+ FIXME("Unimplemented, leaking buffer"); -+ return WINED3D_OK; -+} -+ -+HRESULT wined3d_buffer_heap_alloc(struct wined3d_buffer_heap *heap, GLsizeiptr size, struct wined3d_map_range *out_range) -+{ -+ int initial_bin; -+ int initial_size = size; -+ -+ EnterCriticalSection(&heap->temp_lock); -+ -+ // After alignment, reduce fragmentation by rounding to next power of two. -+ // If the alignment is a power of two (which it should be), this should be -+ // no problem. -+ size = 1 << bitwise_log2_ceil(size); -+ -+ // Align size values where possible. -+ if (heap->alignment && (size % heap->alignment != 0)) -+ size += heap->alignment - (size % heap->alignment); -+ -+ initial_bin = min(WINED3D_BUFFER_HEAP_BINS - 1, bitwise_log2_ceil(size)); -+ -+ for (int i = initial_bin; i < WINED3D_BUFFER_HEAP_BINS; i++) -+ { -+ struct wined3d_buffer_heap_element *elem = heap->free_list.bins[i].head; -+ if (elem) -+ { -+ struct wined3d_map_range remaining_range; -+ remaining_range.offset = elem->range.offset + size; -+ remaining_range.size = elem->range.size - size; -+ -+ out_range->offset = elem->range.offset; -+ out_range->size = size; -+ -+ TRACE_(d3d_perf)("Allocated %d (requested %d) at %p from bin %d (initial %d)\n", size, initial_size, elem->range.offset, i, initial_bin); -+ -+ // Remove the element from its current free bin to move it to the correct list. -+ element_remove_free(heap, elem); -+ -+ if (remaining_range.size > 0) -+ { -+ TRACE_(d3d_perf)("Imperfect fit allocated, fragmenting remainder of %lld at %p.\n", remaining_range.size, remaining_range.offset); -+ -+ elem->range = remaining_range; -+ element_insert_free_bin(heap, elem); -+ } -+ else -+ { -+ HeapFree(GetProcessHeap(), 0, elem); -+ } -+ -+ LeaveCriticalSection(&heap->temp_lock); -+ return WINED3D_OK; -+ } -+ } -+ -+ LeaveCriticalSection(&heap->temp_lock); -+ -+ FIXME_(d3d_perf)("Forcing coalesce, not enough free space in buffer heap.\n"); -+ int num_coalesced; -+ if (SUCCEEDED(wined3d_buffer_heap_deferred_coalesce(heap, &num_coalesced))) -+ { -+ if (num_coalesced > 0) -+ return wined3d_buffer_heap_alloc(heap, size, out_range); -+ } -+ -+ FIXME_(d3d_perf)("Coalescing did not create new blocks, failing.\n"); -+ -+ return WINED3DERR_OUTOFVIDEOMEMORY; -+} -+ -+HRESULT wined3d_buffer_heap_free(struct wined3d_buffer_heap *heap, struct wined3d_map_range range) -+{ -+ struct wined3d_buffer_heap_element *elem = element_new(range.offset, range.size); -+ -+ if (!elem) -+ return E_OUTOFMEMORY; -+ -+ EnterCriticalSection(&heap->temp_lock); -+ -+ // Only insert the element into a free bin, coalescing will occur later. -+ element_insert_free_bin(heap, elem); -+ -+ LeaveCriticalSection(&heap->temp_lock); -+ -+ return WINED3D_OK; -+} -+ -+HRESULT wined3d_buffer_heap_free_fenced(struct wined3d_buffer_heap *heap, struct wined3d_device *device, struct wined3d_map_range range) -+{ -+ struct wined3d_buffer_heap_element *elem = element_new(range.offset, range.size); -+ int bin_index = element_bin(elem); -+ struct wined3d_buffer_heap_bin *bin = &heap->pending_fenced_bins.bins[bin_index]; -+ -+ if (bin->tail) -+ { -+ bin->tail->next = elem; -+ elem->prev = bin->tail; -+ bin->tail = elem; -+ } -+ else -+ { -+ bin->head = elem; -+ bin->tail = elem; -+ } -+ -+ return WINED3D_OK; -+} -+ -+HRESULT wined3d_buffer_heap_cs_fence_issue(struct wined3d_buffer_heap *heap, struct wined3d_device *device) -+{ -+ struct wined3d_buffer_heap_fenced_element *fenced_elem; -+ struct wined3d_fence *fence; -+ HRESULT hr; -+ -+ if (heap->fenced_head) -+ { -+ // XXX(acomminos): double or triple buffer this? -+ wined3d_buffer_heap_cs_fence_wait(heap, device); -+ } -+ -+ if (FAILED(hr = wined3d_fence_create(device, &fence))) -+ { -+ ERR("Failed to create fence.\n"); -+ return hr; -+ } -+ -+ fenced_elem = fenced_element_new(heap->pending_fenced_bins, fence); -+ if (!fenced_elem) -+ return E_OUTOFMEMORY; -+ -+ TRACE_(d3d_perf)("Dispatching fenced buffer set.\n"); -+ memset(&heap->pending_fenced_bins, 0, sizeof(heap->pending_fenced_bins)); -+ -+ // Append to end of fenced list, which works well if you assume that buffers -+ // are freed in some ascending draw call ordering. -+ if (!heap->fenced_head) -+ { -+ heap->fenced_head = fenced_elem; -+ heap->fenced_tail = fenced_elem; -+ } -+ else -+ { -+ heap->fenced_tail->next = fenced_elem; -+ heap->fenced_tail = fenced_elem; -+ } -+ -+ wined3d_fence_issue(fence, device); -+ return WINED3D_OK; -+} -+ -+HRESULT wined3d_buffer_heap_cs_fence_wait(struct wined3d_buffer_heap *heap, struct wined3d_device *device) -+{ -+ enum wined3d_fence_result res; -+ struct wined3d_buffer_heap_fenced_element *elem = heap->fenced_head; -+ if (!elem) -+ return WINED3D_OK; -+ -+ res = wined3d_fence_wait(elem->fence, device); -+ switch (res) -+ { -+ case WINED3D_FENCE_OK: -+ case WINED3D_FENCE_NOT_STARTED: -+ { -+ TRACE_(d3d_perf)("Freed fence group.\n"); -+ -+ EnterCriticalSection(&heap->temp_lock); -+ for (int i = 0; i < WINED3D_BUFFER_HEAP_BINS; i++) -+ { -+ struct wined3d_buffer_heap_bin *elem_bin = &elem->free_list.bins[i]; -+ if (!elem_bin->tail) -+ continue; -+ -+ struct wined3d_buffer_heap_bin *heap_bin = &heap->free_list.bins[i]; -+ if (heap_bin->head) -+ { -+ // Insert to front. -+ elem_bin->tail->next = heap_bin->head; -+ heap_bin->head->prev = elem_bin->tail; -+ -+ elem_bin->head->prev = NULL; -+ heap_bin->head = elem_bin->head; -+ } -+ else -+ { -+ elem_bin->head->prev = NULL; -+ heap_bin->head = elem_bin->head; -+ elem_bin->tail->next = NULL; -+ heap_bin->tail = elem_bin->tail; -+ } -+ } -+ LeaveCriticalSection(&heap->temp_lock); -+ -+ wined3d_fence_destroy(elem->fence); -+ -+ heap->fenced_head = elem->next; -+ HeapFree(GetProcessHeap(), 0, elem); -+ // TODO(acomminos): bother to null out fenced_tail? -+ break; -+ } -+ default: -+ return WINED3D_OK; -+ } -+ -+ return WINED3D_OK; -+} -+ -+HRESULT wined3d_buffer_heap_deferred_coalesce(struct wined3d_buffer_heap *heap, int *coalesced_count) -+{ -+ struct wined3d_buffer_heap_element *elem = NULL; -+ struct wined3d_buffer_heap_element *next = NULL; -+ struct wine_rb_entry *entry; -+ struct wined3d_map_range coalesced_range; -+ -+ struct wine_rb_tree free_tree; -+ int num_coalesced = 0; -+ -+ wine_rb_init(&free_tree, free_tree_compare); -+ -+ EnterCriticalSection(&heap->temp_lock); -+ -+ // TODO(acomminos): on one hand, if there's a lot of elements in the list, -+ // it's highly fragmented. on the other, we can potentially waste a decent -+ // sum of time checking for uncoalesced bins. -+ for (int i = 0; i < WINED3D_BUFFER_HEAP_BINS; i++) -+ { -+ elem = heap->free_list.bins[i].head; -+ while (elem) -+ { -+ // Insert a sentry. FIXME(acomminos): can skip this with traversal. -+ if (wine_rb_put(&free_tree, &elem->range.offset, &elem->entry) == -1) -+ { -+ ERR("Failed to insert key %x in tree.\n", elem->range.offset); -+ elem = elem->next; -+ continue; -+ } -+ -+ coalesced_range = elem->range; -+ -+ // Coalesce right. -+ entry = wine_rb_next(&elem->entry); -+ if (entry) -+ { -+ TRACE("Coalesced right.\n"); -+ struct wined3d_buffer_heap_element *right_elem = WINE_RB_ENTRY_VALUE(entry, struct wined3d_buffer_heap_element, entry); -+ if (elem->range.offset + elem->range.size == right_elem->range.offset) -+ { -+ coalesced_range.size += right_elem->range.size; -+ -+ wine_rb_remove(&free_tree, entry); -+ element_remove_free(heap, right_elem); -+ HeapFree(GetProcessHeap(), 0, right_elem); -+ -+ num_coalesced++; -+ } -+ } -+ -+ // Coalesce left. -+ entry = wine_rb_prev(&elem->entry); -+ if (entry) -+ { -+ TRACE("Coalesced left.\n"); -+ struct wined3d_buffer_heap_element *left_elem = WINE_RB_ENTRY_VALUE(entry, struct wined3d_buffer_heap_element, entry); -+ if (left_elem->range.offset + left_elem->range.size == coalesced_range.offset) -+ { -+ coalesced_range.offset = left_elem->range.offset; -+ coalesced_range.size += left_elem->range.size; -+ -+ wine_rb_remove(&free_tree, entry); -+ element_remove_free(heap, left_elem); -+ HeapFree(GetProcessHeap(), 0, left_elem); -+ -+ num_coalesced++; -+ } -+ } -+ -+ next = elem->next; -+ -+ if (elem->range.size != coalesced_range.size) -+ { -+ FIXME_(d3d_perf)("Coalesced range from (%p, %ld) to (%p, %ld)\n", elem->range.offset, elem->range.size, coalesced_range.offset, coalesced_range.size); -+ -+ wine_rb_remove(&free_tree, &elem->entry); -+ -+ // Move to the correct free bin. -+ element_remove_free(heap, elem); -+ elem->range = coalesced_range; -+ element_insert_free_bin(heap, elem); -+ -+ wine_rb_put(&free_tree, &elem->range.offset, &elem->entry); -+ } -+ -+ elem = next; -+ } -+ } -+ -+ LeaveCriticalSection(&heap->temp_lock); -+ -+ FIXME_(d3d_perf)("Performed %d coalesces.\n", num_coalesced); -+ if (coalesced_count) -+ *coalesced_count = num_coalesced; -+ -+ return WINED3D_OK; -+} -diff --git a/dlls/wined3d/cs.c b/dlls/wined3d/cs.c -index 3a7c95ddd8..50a4d041cd 100644 ---- a/dlls/wined3d/cs.c -+++ b/dlls/wined3d/cs.c -@@ -472,6 +472,15 @@ static void wined3d_cs_exec_present(struct wined3d_cs *cs, const void *data) - } - - InterlockedDecrement(&cs->pending_presents); -+ -+ // FIXME(acomminos): is this the right place to put double-buffered frame -+ // timing based logic? -+ // FIXME(acomminos): this conditional sucks, replace with fancier feature check -+ if (cs->device->wo_buffer_heap && cs->device->cb_buffer_heap) -+ { -+ wined3d_buffer_heap_cs_fence_issue(cs->device->wo_buffer_heap, cs->device); -+ wined3d_buffer_heap_cs_fence_issue(cs->device->cb_buffer_heap, cs->device); -+ } - } - - void wined3d_cs_emit_present(struct wined3d_cs *cs, struct wined3d_swapchain *swapchain, -diff --git a/dlls/wined3d/device.c b/dlls/wined3d/device.c -index e2b27e0cf4..785841a062 100644 ---- a/dlls/wined3d/device.c -+++ b/dlls/wined3d/device.c -@@ -833,6 +833,53 @@ static void destroy_default_samplers(struct wined3d_device *device, struct wined - device->null_sampler = NULL; - } - -+/* Context activation is done by the caller. */ -+static void create_buffer_heap(struct wined3d_device *device, struct wined3d_context *context) -+{ -+ const struct wined3d_gl_info *gl_info = &device->adapter->gl_info; -+ // TODO(acomminos): kill this magic number. perhaps base on vram. -+ GLsizeiptr geo_heap_size = 512 * 1024 * 1024; -+ // We choose a constant buffer size of 128MB, the same as NVIDIA claims to -+ // use in their Direct3D driver for discarded constant buffers. -+ GLsizeiptr cb_heap_size = 128 * 1024 * 1024; -+ GLint ub_alignment; -+ HRESULT hr; -+ -+ if (gl_info->supported[ARB_BUFFER_STORAGE]) -+ { -+ gl_info->gl_ops.gl.p_glGetIntegerv(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT, &ub_alignment); -+ -+ // Align constant buffer heap size, in case GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT isn't a power of two (for some reason). -+ cb_heap_size -= cb_heap_size % ub_alignment; -+ -+ if (FAILED(hr = wined3d_buffer_heap_create(context, geo_heap_size, 0, TRUE, &device->wo_buffer_heap))) -+ { -+ ERR("Failed to create write-only persistent buffer heap, hr %#x.\n", hr); -+ } -+ -+ if (FAILED(hr = wined3d_buffer_heap_create(context, cb_heap_size, ub_alignment, TRUE, &device->cb_buffer_heap))) -+ { -+ ERR("Failed to create persistent buffer heap for constant buffers, hr %#x.\n", hr); -+ } -+ -+ FIXME("Initialized PBA (geo_heap_size: %ld, cb_heap_size: %ld, ub_align: %d)\n", geo_heap_size, cb_heap_size, ub_alignment); -+ } -+ else -+ { -+ FIXME("Not using PBA, ARB_buffer_storage unsupported.\n"); -+ } -+} -+ -+/* Context activation is done by the caller. */ -+static void destroy_buffer_heap(struct wined3d_device *device, struct wined3d_context *context) -+{ -+ if (device->wo_buffer_heap) -+ wined3d_buffer_heap_destroy(device->wo_buffer_heap, context); -+ -+ if (device->cb_buffer_heap) -+ wined3d_buffer_heap_destroy(device->cb_buffer_heap, context); -+} -+ - static LONG fullscreen_style(LONG style) - { - /* Make sure the window is managed, otherwise we won't get keyboard input. */ -@@ -997,6 +1044,8 @@ static void wined3d_device_delete_opengl_contexts_cs(void *object) - device->shader_backend->shader_free_private(device); - destroy_dummy_textures(device, context); - destroy_default_samplers(device, context); -+ destroy_buffer_heap(device, context); -+ - context_release(context); - - while (device->context_count) -@@ -1045,6 +1094,9 @@ static void wined3d_device_create_primary_opengl_context_cs(void *object) - context = context_acquire(device, target, 0); - create_dummy_textures(device, context); - create_default_samplers(device, context); -+ -+ create_buffer_heap(device, context); -+ - context_release(context); - } - -diff --git a/dlls/wined3d/directx.c b/dlls/wined3d/directx.c -index 8720fc7ad6..46c6a59536 100644 ---- a/dlls/wined3d/directx.c -+++ b/dlls/wined3d/directx.c -@@ -111,6 +111,7 @@ static const struct wined3d_extension_map gl_extension_map[] = - /* ARB */ - {"GL_ARB_base_instance", ARB_BASE_INSTANCE }, - {"GL_ARB_blend_func_extended", ARB_BLEND_FUNC_EXTENDED }, -+ {"GL_ARB_buffer_storage", ARB_BUFFER_STORAGE }, - {"GL_ARB_clear_buffer_object", ARB_CLEAR_BUFFER_OBJECT }, - {"GL_ARB_clear_texture", ARB_CLEAR_TEXTURE }, - {"GL_ARB_clip_control", ARB_CLIP_CONTROL }, -@@ -2714,6 +2715,8 @@ static void load_gl_funcs(struct wined3d_gl_info *gl_info) - /* GL_ARB_blend_func_extended */ - USE_GL_FUNC(glBindFragDataLocationIndexed) - USE_GL_FUNC(glGetFragDataIndex) -+ /* GL_ARB_buffer_storage */ -+ USE_GL_FUNC(glBufferStorage) - /* GL_ARB_clear_buffer_object */ - USE_GL_FUNC(glClearBufferData) - USE_GL_FUNC(glClearBufferSubData) -diff --git a/dlls/wined3d/query.c b/dlls/wined3d/query.c -index 5ea79b6e4a..f3ca1630e5 100644 ---- a/dlls/wined3d/query.c -+++ b/dlls/wined3d/query.c -@@ -88,7 +88,7 @@ static BOOL wined3d_fence_supported(const struct wined3d_gl_info *gl_info) - return gl_info->supported[ARB_SYNC] || gl_info->supported[NV_FENCE] || gl_info->supported[APPLE_FENCE]; - } - --static enum wined3d_fence_result wined3d_fence_test(const struct wined3d_fence *fence, -+enum wined3d_fence_result wined3d_fence_test(const struct wined3d_fence *fence, - const struct wined3d_device *device, DWORD flags) - { - const struct wined3d_gl_info *gl_info; -diff --git a/dlls/wined3d/wined3d_gl.h b/dlls/wined3d/wined3d_gl.h -index 87283c850e..7626864ef2 100644 ---- a/dlls/wined3d/wined3d_gl.h -+++ b/dlls/wined3d/wined3d_gl.h -@@ -44,6 +44,7 @@ enum wined3d_gl_extension - /* ARB */ - ARB_BASE_INSTANCE, - ARB_BLEND_FUNC_EXTENDED, -+ ARB_BUFFER_STORAGE, - ARB_CLEAR_BUFFER_OBJECT, - ARB_CLEAR_TEXTURE, - ARB_CLIP_CONTROL, -diff --git a/dlls/wined3d/wined3d_private.h b/dlls/wined3d/wined3d_private.h -index 8aa61d811f..3d535f4e17 100644 ---- a/dlls/wined3d/wined3d_private.h -+++ b/dlls/wined3d/wined3d_private.h -@@ -1712,6 +1712,9 @@ void wined3d_fence_destroy(struct wined3d_fence *fence) DECLSPEC_HIDDEN; - void wined3d_fence_issue(struct wined3d_fence *fence, const struct wined3d_device *device) DECLSPEC_HIDDEN; - enum wined3d_fence_result wined3d_fence_wait(const struct wined3d_fence *fence, - const struct wined3d_device *device) DECLSPEC_HIDDEN; -+// XXX(acomminos): really expose this? -+enum wined3d_fence_result wined3d_fence_test(const struct wined3d_fence *fence, -+ const struct wined3d_device *device, DWORD flags) DECLSPEC_HIDDEN; - - /* Direct3D terminology with little modifications. We do not have an issued - * state because only the driver knows about it, but we have a created state -@@ -2993,6 +2996,10 @@ struct wined3d_device - /* Context management */ - struct wined3d_context **contexts; - UINT context_count; -+ -+ /* Dynamic buffer heap */ -+ struct wined3d_buffer_heap *wo_buffer_heap; -+ struct wined3d_buffer_heap *cb_buffer_heap; - }; - - void device_clear_render_targets(struct wined3d_device *device, UINT rt_count, const struct wined3d_fb_state *fb, -@@ -3513,6 +3520,12 @@ void state_init(struct wined3d_state *state, struct wined3d_fb_state *fb, - DWORD flags) DECLSPEC_HIDDEN; - void state_unbind_resources(struct wined3d_state *state) DECLSPEC_HIDDEN; - -+struct wined3d_map_range -+{ -+ GLintptr offset; -+ GLsizeiptr size; -+}; -+ - enum wined3d_cs_queue_id - { - WINED3D_CS_QUEUE_DEFAULT = 0, -@@ -3692,12 +3705,61 @@ enum wined3d_buffer_conversion_type - CONV_POSITIONT, - }; - --struct wined3d_map_range -+struct wined3d_buffer_heap_element; -+struct wined3d_buffer_heap_fenced_element; -+ -+// Number of power-of-two buckets to populate. -+#define WINED3D_BUFFER_HEAP_BINS 32 -+ -+struct wined3d_buffer_heap_bin - { -- UINT offset; -- UINT size; -+ struct wined3d_buffer_heap_element *head; -+ struct wined3d_buffer_heap_element *tail; -+}; -+ -+struct wined3d_buffer_heap_bin_set -+{ -+ struct wined3d_buffer_heap_bin bins[WINED3D_BUFFER_HEAP_BINS]; - }; - -+// A heap that manages allocations with a single GL buffer. -+struct wined3d_buffer_heap -+{ -+ GLuint buffer_object; -+ void *map_ptr; -+ GLsizeiptr alignment; -+ CRITICAL_SECTION temp_lock; // Temporary lock while we implement the fenced free list. -+ -+ struct wined3d_buffer_heap_bin_set free_list; -+ -+ // Elements that need to be fenced, but haven't reached the required size. -+ struct wined3d_buffer_heap_bin_set pending_fenced_bins; -+ -+ // List of sets of buffers behind a common fence, in FIFO order. -+ struct wined3d_buffer_heap_fenced_element *fenced_head; -+ struct wined3d_buffer_heap_fenced_element *fenced_tail; -+}; -+ -+HRESULT wined3d_buffer_heap_create(struct wined3d_context *context, GLsizeiptr size, GLsizeiptr alignment, BOOL write_only, struct wined3d_buffer_heap **heap) DECLSPEC_HIDDEN; -+HRESULT wined3d_buffer_heap_destroy(struct wined3d_buffer_heap *heap, struct wined3d_context *context) DECLSPEC_HIDDEN; -+// Fetches a buffer from the heap of at least the given size. -+// Attempts to coalesce blocks under memory pressure. -+HRESULT wined3d_buffer_heap_alloc(struct wined3d_buffer_heap *heap, GLsizeiptr size, struct wined3d_map_range* out_range) DECLSPEC_HIDDEN; -+// Immediately frees a heap-allocated buffer segment. -+HRESULT wined3d_buffer_heap_free(struct wined3d_buffer_heap *heap, struct wined3d_map_range range) DECLSPEC_HIDDEN; -+// Enqueues a buffer segment to return to the heap once its fence has been signaled. -+HRESULT wined3d_buffer_heap_free_fenced(struct wined3d_buffer_heap *heap, struct wined3d_device *device, struct wined3d_map_range range) DECLSPEC_HIDDEN; -+// Issues a fence for the current set of pending fenced buffers. -+// Double-buffered: if the last fence issued has not yet been triggered, waits -+// on it. -+HRESULT wined3d_buffer_heap_cs_fence_issue(struct wined3d_buffer_heap *heap, struct wined3d_device *device) DECLSPEC_HIDDEN; -+// Waits on the next issued fence in FIFO order. Frees the fenced buffers after -+// the fence has been triggered. -+HRESULT wined3d_buffer_heap_cs_fence_wait(struct wined3d_buffer_heap *heap, struct wined3d_device *device) DECLSPEC_HIDDEN; -+// Performs deferred coalescing of buffers. To be called under memory pressure. -+// Outputs the number of coalesced regions in `num_coalesced`. -+HRESULT wined3d_buffer_heap_deferred_coalesce(struct wined3d_buffer_heap *heap, int *num_coalesced) DECLSPEC_HIDDEN; -+ - struct wined3d_buffer - { - struct wined3d_resource resource; --- -2.16.2 - diff --git a/0002-wined3d-Add-support-for-backing-dynamic-wined3d_buff.patch b/0002-wined3d-Add-support-for-backing-dynamic-wined3d_buff.patch deleted file mode 100644 index d4b2299641b0..000000000000 --- a/0002-wined3d-Add-support-for-backing-dynamic-wined3d_buff.patch +++ /dev/null @@ -1,679 +0,0 @@ -From af82b8e867af940f7ec68998a797aa5d7dfc540a Mon Sep 17 00:00:00 2001 -From: Andrew Comminos <andrew@comminos.com> -Date: Mon, 5 Mar 2018 15:39:11 -0800 -Subject: [PATCH 2/9] wined3d: Add support for backing dynamic wined3d_buffer - objects by a persistent map. - ---- - dlls/wined3d/buffer.c | 220 ++++++++++++++++++++++++++++++++++++++++- - dlls/wined3d/context.c | 6 +- - dlls/wined3d/cs.c | 60 ++++++++++- - dlls/wined3d/resource.c | 18 +++- - dlls/wined3d/state.c | 17 +++- - dlls/wined3d/texture.c | 13 +++ - dlls/wined3d/utils.c | 1 + - dlls/wined3d/wined3d_private.h | 11 +++ - 8 files changed, 336 insertions(+), 10 deletions(-) - -diff --git a/dlls/wined3d/buffer.c b/dlls/wined3d/buffer.c -index cae7ef8788..e7a0f59a67 100644 ---- a/dlls/wined3d/buffer.c -+++ b/dlls/wined3d/buffer.c -@@ -28,12 +28,14 @@ - #include "wined3d_private.h" - - WINE_DEFAULT_DEBUG_CHANNEL(d3d); -+WINE_DECLARE_DEBUG_CHANNEL(d3d_perf); - - #define WINED3D_BUFFER_HASDESC 0x01 /* A vertex description has been found. */ - #define WINED3D_BUFFER_USE_BO 0x02 /* Use a buffer object for this buffer. */ - #define WINED3D_BUFFER_PIN_SYSMEM 0x04 /* Keep a system memory copy for this buffer. */ - #define WINED3D_BUFFER_DISCARD 0x08 /* A DISCARD lock has occurred since the last preload. */ - #define WINED3D_BUFFER_APPLESYNC 0x10 /* Using sync as in GL_APPLE_flush_buffer_range. */ -+#define WINED3D_BUFFER_PERSISTENT 0x20 /* Uses a persistent-mapped buffer via ARB_buffer_storage. */ - - #define VB_MAXDECLCHANGES 100 /* After that number of decl changes we stop converting */ - #define VB_RESETDECLCHANGE 1000 /* Reset the decl changecount after that number of draws */ -@@ -269,6 +271,52 @@ fail: - return FALSE; - } - -+/* Context activation is done by the caller. */ -+static BOOL buffer_alloc_persistent_map(struct wined3d_buffer *buffer, struct wined3d_context *context) -+{ -+ struct wined3d_device *device = buffer->resource.device; -+ struct wined3d_buffer_heap *heap; -+ struct wined3d_map_range map_range; -+ HRESULT hr; -+ -+ if (buffer->bind_flags & WINED3D_BIND_CONSTANT_BUFFER) -+ { -+ // Use a heap aligned to constant buffer offset requirements. -+ heap = device->cb_buffer_heap; -+ } -+ else -+ { -+ if (!(buffer->resource.usage & WINED3DUSAGE_WRITEONLY)) -+ FIXME("Using a write-only persistent buffer for %p without WINED3DUSAGE_WRITEONLY.\n", buffer); -+ heap = device->wo_buffer_heap; -+ } -+ -+ buffer->buffer_heap = heap; -+ if (FAILED(hr = wined3d_buffer_heap_alloc(heap, buffer->resource.size, &map_range))) -+ { -+ goto fail; -+ } -+ buffer->cs_persistent_map = map_range; -+ buffer->mt_persistent_map = map_range; -+ return TRUE; -+ -+fail: -+ // FIXME(acomminos): fall back to standalone BO here? -+ ERR("Failed to create persistent map for buffer %p, hr=%x\n", buffer, hr); -+ buffer->buffer_heap = NULL; -+ return FALSE; -+} -+ -+static void buffer_free_persistent_map(struct wined3d_buffer *buffer) -+{ -+ if (!buffer->buffer_heap) -+ return; -+ -+ // TODO(acomminos): get the CS thread to free pending main thread buffers. -+ wined3d_buffer_heap_free(buffer->buffer_heap, buffer->cs_persistent_map); -+ buffer->buffer_heap = NULL; -+} -+ - static BOOL buffer_process_converted_attribute(struct wined3d_buffer *buffer, - const enum wined3d_buffer_conversion_type conversion_type, - const struct wined3d_stream_info_element *attrib, DWORD *stride_this_run) -@@ -631,6 +679,16 @@ static BOOL wined3d_buffer_prepare_location(struct wined3d_buffer *buffer, - return FALSE; - } - return buffer_create_buffer_object(buffer, context); -+ case WINED3D_LOCATION_PERSISTENT_MAP: -+ if (buffer->buffer_heap) -+ return TRUE; -+ -+ if (!(buffer->flags & WINED3D_BUFFER_PERSISTENT)) -+ { -+ WARN("Trying to map a persistent region for buffer %p without WINED3D_BUFFER_PERSISTENT.\n", buffer); -+ return FALSE; -+ } -+ return buffer_alloc_persistent_map(buffer, context); - - default: - ERR("Invalid location %s.\n", wined3d_debug_location(location)); -@@ -689,16 +747,32 @@ BOOL wined3d_buffer_load_location(struct wined3d_buffer *buffer, - buffer_conversion_upload(buffer, context); - break; - -+ case WINED3D_LOCATION_PERSISTENT_MAP: -+ // TODO(acomminos): are we guaranteed location_sysmem to be kept? -+ // no. -+ if (buffer->conversion_map) -+ FIXME("Attempting to use conversion map with persistent mapping.\n"); -+ memcpy(buffer->buffer_heap->map_ptr + -+ buffer->cs_persistent_map.offset, -+ buffer->resource.heap_memory, buffer->resource.size); -+ break; -+ - default: - ERR("Invalid location %s.\n", wined3d_debug_location(location)); - return FALSE; - } - - wined3d_buffer_validate_location(buffer, location); -- if (buffer->resource.heap_memory && location == WINED3D_LOCATION_BUFFER -+ if (buffer->resource.heap_memory -+ && location & WINED3D_LOCATION_BUFFER - && !(buffer->resource.usage & WINED3DUSAGE_DYNAMIC)) - wined3d_buffer_evict_sysmem(buffer); - -+ // FIXME(acomminos) -+ if (buffer->resource.heap_memory -+ && location & WINED3D_LOCATION_PERSISTENT_MAP) -+ wined3d_buffer_evict_sysmem(buffer); -+ - return TRUE; - } - -@@ -720,12 +794,25 @@ DWORD wined3d_buffer_get_memory(struct wined3d_buffer *buffer, - { - data->buffer_object = buffer->buffer_object; - data->addr = NULL; -+ data->length = buffer->resource.size; - return WINED3D_LOCATION_BUFFER; - } -+ if (locations & WINED3D_LOCATION_PERSISTENT_MAP) -+ { -+ // FIXME(acomminos): should we expose a buffer object we don't wholly own here? -+ data->buffer_object = buffer->buffer_heap->buffer_object; -+ data->addr = buffer->cs_persistent_map.offset; -+ // Note that the size of the underlying buffer allocation may be larger -+ // than the buffer knows about. In this case, we've rounded it up to be -+ // aligned (e.g. for uniform buffer offsets). -+ data->length = buffer->cs_persistent_map.size; -+ return WINED3D_LOCATION_PERSISTENT_MAP; -+ } - if (locations & WINED3D_LOCATION_SYSMEM) - { - data->buffer_object = 0; - data->addr = buffer->resource.heap_memory; -+ data->length = buffer->resource.size; - return WINED3D_LOCATION_SYSMEM; - } - -@@ -761,6 +848,8 @@ static void buffer_unload(struct wined3d_resource *resource) - buffer->flags &= ~WINED3D_BUFFER_HASDESC; - } - -+ buffer_free_persistent_map(buffer); -+ - resource_unload(resource); - } - -@@ -784,6 +873,8 @@ static void wined3d_buffer_destroy_object(void *object) - heap_free(buffer->conversion_map); - } - -+ buffer_free_persistent_map(buffer); -+ - heap_free(buffer->maps); - heap_free(buffer); - } -@@ -900,6 +991,16 @@ void wined3d_buffer_load(struct wined3d_buffer *buffer, struct wined3d_context * - - buffer_mark_used(buffer); - -+ if (buffer->flags & WINED3D_BUFFER_PERSISTENT) -+ { -+ if (wined3d_buffer_load_location(buffer, context, WINED3D_LOCATION_PERSISTENT_MAP)) -+ return; -+ -+ ERR("Failed to preload persistent mapping for %p, falling back to BO.\n", buffer); -+ buffer->flags |= WINED3D_BUFFER_USE_BO; -+ buffer->flags &= ~WINED3D_BUFFER_PERSISTENT; -+ } -+ - /* TODO: Make converting independent from VBOs */ - if (!(buffer->flags & WINED3D_BUFFER_USE_BO)) - { -@@ -1010,6 +1111,25 @@ static HRESULT wined3d_buffer_map(struct wined3d_buffer *buffer, UINT offset, UI - - count = ++buffer->resource.map_count; - -+ if (buffer->locations & WINED3D_LOCATION_PERSISTENT_MAP) -+ { -+ const struct wined3d_gl_info *gl_info; -+ context = context_acquire(device, NULL, 0); -+ -+ FIXME_(d3d_perf)("Fences not used for persistent buffer maps on CS thread, using glFinish.\n"); -+ -+ gl_info = context->gl_info; -+ gl_info->gl_ops.gl.p_glFinish(); -+ -+ base = buffer->buffer_heap->map_ptr -+ + buffer->cs_persistent_map.offset; -+ *data = base + offset; -+ -+ context_release(context); -+ -+ return WINED3D_OK; -+ } -+ - if (buffer->buffer_object) - { - unsigned int dirty_offset = offset, dirty_size = size; -@@ -1152,6 +1272,12 @@ static void wined3d_buffer_unmap(struct wined3d_buffer *buffer) - return; - } - -+ if (buffer->flags & WINED3D_BUFFER_PERSISTENT) -+ { -+ TRACE("Persistent buffer, ignore unmap.\n"); -+ return; -+ } -+ - if (buffer->map_ptr) - { - struct wined3d_device *device = buffer->resource.device; -@@ -1256,6 +1382,64 @@ static void buffer_resource_preload(struct wined3d_resource *resource) - - static HRESULT buffer_resource_sub_resource_map(struct wined3d_resource *resource, unsigned int sub_resource_idx, - struct wined3d_map_desc *map_desc, const struct wined3d_box *box, DWORD flags) -+{ -+ struct wined3d_buffer *buffer = buffer_from_resource(resource); -+ UINT offset = box ? box->left : 0; -+ -+ if (sub_resource_idx) -+ { -+ WARN("Invalid sub_resource_idx %u.\n", sub_resource_idx); -+ return E_INVALIDARG; -+ } -+ -+ // Support immediate mapping of persistent buffers off the command thread, -+ // which require no GL calls to interface with. -+ if (buffer->locations & WINED3D_LOCATION_PERSISTENT_MAP) -+ { -+ map_desc->row_pitch = map_desc->slice_pitch = buffer->desc.byte_width; -+ if (flags & WINED3D_MAP_DISCARD) -+ { -+ HRESULT hr; -+ struct wined3d_map_range map_range; -+ if (FAILED(hr = wined3d_buffer_heap_alloc(buffer->buffer_heap, resource->size, &map_range))) -+ { -+ FIXME_(d3d_perf)("Failed to allocate new buffer, falling back to sync path.\n"); -+ return hr; -+ } -+ map_desc->data = buffer->buffer_heap->map_ptr + map_range.offset + offset; -+ resource->map_count++; -+ -+ buffer->mt_persistent_map = map_range; -+ -+ // Discard handler on CSMT thread is responsible for returning the -+ // currently used buffer to the free pool, along with the fence that -+ // must be called before the buffer can be reused. -+ wined3d_cs_emit_discard_buffer(resource->device->cs, buffer, map_range); -+ return WINED3D_OK; -+ } -+ else if (flags & WINED3D_MAP_NOOVERWRITE) -+ { -+ // Allow immediate access for persistent buffers without a fence. -+ // Always use the latest buffer in this case in case the latest -+ // DISCARDed one hasn't reached the command stream yet. -+ struct wined3d_map_range map_range = buffer->mt_persistent_map; -+ map_desc->data = buffer->buffer_heap->map_ptr + map_range.offset + offset; -+ resource->map_count++; -+ return WINED3D_OK; -+ } -+ else -+ { -+ // TODO(acomminos): Should check mapped ranges to see if the region is writeable even though NOOVERWRITE is specified. -+ WARN_(d3d_perf)("Mapping persistent buffer %p in sync with CS thread.\n", buffer); -+ // XXX(acomminos): kill this early return. they're the worst. -+ } -+ } -+ -+ return E_NOTIMPL; -+} -+ -+static HRESULT buffer_resource_sub_resource_map_cs(struct wined3d_resource *resource, unsigned int sub_resource_idx, -+ struct wined3d_map_desc *map_desc, const struct wined3d_box *box, DWORD flags) - { - struct wined3d_buffer *buffer = buffer_from_resource(resource); - UINT offset, size; -@@ -1299,6 +1483,18 @@ static HRESULT buffer_resource_sub_resource_map_info(struct wined3d_resource *re - } - - static HRESULT buffer_resource_sub_resource_unmap(struct wined3d_resource *resource, unsigned int sub_resource_idx) -+{ -+ struct wined3d_buffer *buffer = buffer_from_resource(resource); -+ if (buffer->locations & WINED3D_LOCATION_PERSISTENT_MAP) -+ { -+ // Nothing to be done to unmap a region of a persistent buffer. -+ resource->map_count--; -+ return WINED3D_OK; -+ } -+ return E_NOTIMPL; -+} -+ -+static HRESULT buffer_resource_sub_resource_unmap_cs(struct wined3d_resource *resource, unsigned int sub_resource_idx) - { - if (sub_resource_idx) - { -@@ -1319,6 +1515,8 @@ static const struct wined3d_resource_ops buffer_resource_ops = - buffer_resource_sub_resource_map, - buffer_resource_sub_resource_map_info, - buffer_resource_sub_resource_unmap, -+ buffer_resource_sub_resource_map_cs, -+ buffer_resource_sub_resource_unmap_cs, - }; - - static GLenum buffer_type_hint_from_bind_flags(const struct wined3d_gl_info *gl_info, -@@ -1394,12 +1592,30 @@ static HRESULT buffer_init(struct wined3d_buffer *buffer, struct wined3d_device - buffer->flags |= WINED3D_BUFFER_PIN_SYSMEM; - } - -+ if (buffer->resource.usage & WINED3DUSAGE_DYNAMIC) -+ { -+ if (!gl_info->supported[ARB_BUFFER_STORAGE]) -+ { -+ WARN_(d3d_perf)("Not creating a persistent mapping for a dynamic buffer because ARB_buffer_storage is unsupported.\n"); -+ } -+ else -+ { -+ // If supported, use persistent mapped buffers instead of a -+ // standalone BO for dynamic buffers. -+ buffer->flags |= WINED3D_BUFFER_PERSISTENT; -+ } -+ } -+ - /* Observations show that draw_primitive_immediate_mode() is faster on - * dynamic vertex buffers than converting + draw_primitive_arrays(). - * (Half-Life 2 and others.) */ - dynamic_buffer_ok = gl_info->supported[APPLE_FLUSH_BUFFER_RANGE] || gl_info->supported[ARB_MAP_BUFFER_RANGE]; - -- if (!gl_info->supported[ARB_VERTEX_BUFFER_OBJECT]) -+ if (buffer->flags & WINED3D_BUFFER_PERSISTENT) -+ { -+ TRACE("Not creating a BO because a persistent mapped buffer will be used.\n"); -+ } -+ else if (!gl_info->supported[ARB_VERTEX_BUFFER_OBJECT]) - { - TRACE("Not creating a BO because GL_ARB_vertex_buffer is not supported.\n"); - } -diff --git a/dlls/wined3d/context.c b/dlls/wined3d/context.c -index 0e2e68b4b0..eae2c3a79d 100644 ---- a/dlls/wined3d/context.c -+++ b/dlls/wined3d/context.c -@@ -5005,7 +5005,11 @@ void draw_primitive(struct wined3d_device *device, const struct wined3d_state *s - if (parameters->indexed) - { - struct wined3d_buffer *index_buffer = state->index_buffer; -- if (!index_buffer->buffer_object || !stream_info->all_vbo) -+ if (index_buffer->locations & WINED3D_LOCATION_PERSISTENT_MAP) -+ { -+ idx_data = index_buffer->cs_persistent_map.offset; -+ } -+ else if (!index_buffer->buffer_object || !stream_info->all_vbo) - { - idx_data = index_buffer->resource.heap_memory; - } -diff --git a/dlls/wined3d/cs.c b/dlls/wined3d/cs.c -index 50a4d041cd..e61b8dedbb 100644 ---- a/dlls/wined3d/cs.c -+++ b/dlls/wined3d/cs.c -@@ -73,6 +73,7 @@ enum wined3d_cs_op - WINED3D_CS_OP_CLEAR_UNORDERED_ACCESS_VIEW, - WINED3D_CS_OP_COPY_UAV_COUNTER, - WINED3D_CS_OP_GENERATE_MIPMAPS, -+ WINED3D_CS_OP_DISCARD_BUFFER, - WINED3D_CS_OP_STOP, - }; - -@@ -439,6 +440,13 @@ struct wined3d_cs_generate_mipmaps - struct wined3d_shader_resource_view *view; - }; - -+struct wined3d_cs_discard_buffer -+{ -+ enum wined3d_cs_op opcode; -+ struct wined3d_buffer *buffer; -+ struct wined3d_map_range map_range; -+}; -+ - struct wined3d_cs_stop - { - enum wined3d_cs_op opcode; -@@ -2002,7 +2010,7 @@ static void wined3d_cs_exec_map(struct wined3d_cs *cs, const void *data) - const struct wined3d_cs_map *op = data; - struct wined3d_resource *resource = op->resource; - -- *op->hr = resource->resource_ops->resource_sub_resource_map(resource, -+ *op->hr = resource->resource_ops->resource_sub_resource_map_cs(resource, - op->sub_resource_idx, op->map_desc, op->box, op->flags); - } - -@@ -2036,7 +2044,7 @@ static void wined3d_cs_exec_unmap(struct wined3d_cs *cs, const void *data) - const struct wined3d_cs_unmap *op = data; - struct wined3d_resource *resource = op->resource; - -- *op->hr = resource->resource_ops->resource_sub_resource_unmap(resource, op->sub_resource_idx); -+ *op->hr = resource->resource_ops->resource_sub_resource_unmap_cs(resource, op->sub_resource_idx); - } - - HRESULT wined3d_cs_unmap(struct wined3d_cs *cs, struct wined3d_resource *resource, unsigned int sub_resource_idx) -@@ -2455,6 +2463,53 @@ void wined3d_cs_emit_generate_mipmaps(struct wined3d_cs *cs, struct wined3d_shad - cs->ops->submit(cs, WINED3D_CS_QUEUE_DEFAULT); - } - -+static void wined3d_cs_exec_discard_buffer(struct wined3d_cs *cs, const void *data) -+{ -+ const struct wined3d_cs_discard_buffer *op = data; -+ struct wined3d_buffer *buffer = op->buffer; -+ HRESULT hr; -+ -+ // TODO(acomminos): should call into buffer.c here instead. -+ if (FAILED(hr = wined3d_buffer_heap_free_fenced(buffer->buffer_heap, cs->device, buffer->cs_persistent_map))) -+ { -+ ERR("Failed to do a fenced free on discarded buffer %p, hr %x\n. Freeing anyway.", buffer, hr); -+ wined3d_buffer_heap_free(buffer->buffer_heap, buffer->cs_persistent_map); -+ } -+ -+ buffer->cs_persistent_map = op->map_range; -+ -+ // TODO(acomminos): merge this logic with buffer.c functions for standalone BOs -+ if (buffer->bind_flags & WINED3D_BIND_VERTEX_BUFFER) -+ device_invalidate_state(cs->device, STATE_STREAMSRC); -+ if (buffer->bind_flags & WINED3D_BIND_INDEX_BUFFER) -+ device_invalidate_state(cs->device, STATE_INDEXBUFFER); -+ if (buffer->bind_flags & WINED3D_BIND_CONSTANT_BUFFER) -+ { -+ device_invalidate_state(cs->device, STATE_CONSTANT_BUFFER(WINED3D_SHADER_TYPE_VERTEX)); -+ device_invalidate_state(cs->device, STATE_CONSTANT_BUFFER(WINED3D_SHADER_TYPE_HULL)); -+ device_invalidate_state(cs->device, STATE_CONSTANT_BUFFER(WINED3D_SHADER_TYPE_DOMAIN)); -+ device_invalidate_state(cs->device, STATE_CONSTANT_BUFFER(WINED3D_SHADER_TYPE_GEOMETRY)); -+ device_invalidate_state(cs->device, STATE_CONSTANT_BUFFER(WINED3D_SHADER_TYPE_PIXEL)); -+ device_invalidate_state(cs->device, STATE_CONSTANT_BUFFER(WINED3D_SHADER_TYPE_COMPUTE)); -+ } -+ -+ wined3d_resource_release(&op->buffer->resource); -+} -+ -+void wined3d_cs_emit_discard_buffer(struct wined3d_cs *cs, struct wined3d_buffer *buffer, struct wined3d_map_range map_range) -+{ -+ struct wined3d_cs_discard_buffer *op; -+ -+ op = cs->ops->require_space(cs, sizeof(*op), WINED3D_CS_QUEUE_DEFAULT); -+ op->opcode = WINED3D_CS_OP_DISCARD_BUFFER; -+ op->buffer = buffer; -+ op->map_range = map_range; -+ -+ wined3d_resource_acquire(&buffer->resource); -+ -+ cs->ops->submit(cs, WINED3D_CS_QUEUE_DEFAULT); -+} -+ - static void wined3d_cs_emit_stop(struct wined3d_cs *cs) - { - struct wined3d_cs_stop *op; -@@ -2515,6 +2570,7 @@ static void (* const wined3d_cs_op_handlers[])(struct wined3d_cs *cs, const void - /* WINED3D_CS_OP_CLEAR_UNORDERED_ACCESS_VIEW */ wined3d_cs_exec_clear_unordered_access_view, - /* WINED3D_CS_OP_COPY_UAV_COUNTER */ wined3d_cs_exec_copy_uav_counter, - /* WINED3D_CS_OP_GENERATE_MIPMAPS */ wined3d_cs_exec_generate_mipmaps, -+ /* WINED3D_CS_OP_DISCARD_BUFFER */ wined3d_cs_exec_discard_buffer, - }; - - #if defined(STAGING_CSMT) -diff --git a/dlls/wined3d/resource.c b/dlls/wined3d/resource.c -index 8b7f17bb6b..02d469bc20 100644 ---- a/dlls/wined3d/resource.c -+++ b/dlls/wined3d/resource.c -@@ -344,6 +344,7 @@ static DWORD wined3d_resource_sanitise_map_flags(const struct wined3d_resource * - HRESULT CDECL wined3d_resource_map(struct wined3d_resource *resource, unsigned int sub_resource_idx, - struct wined3d_map_desc *map_desc, const struct wined3d_box *box, DWORD flags) - { -+ HRESULT hr; - TRACE("resource %p, sub_resource_idx %u, map_desc %p, box %s, flags %#x.\n", - resource, sub_resource_idx, map_desc, debug_box(box), flags); - -@@ -366,9 +367,14 @@ HRESULT CDECL wined3d_resource_map(struct wined3d_resource *resource, unsigned i - } - - flags = wined3d_resource_sanitise_map_flags(resource, flags); -- wined3d_resource_wait_idle(resource); -+ if (FAILED(hr = resource->resource_ops->resource_sub_resource_map(resource, sub_resource_idx, map_desc, box, flags))) -+ { -+ TRACE_(d3d_perf)("Mapping resource %p on the command stream.\n", resource); -+ wined3d_resource_wait_idle(resource); -+ hr = wined3d_cs_map(resource->device->cs, resource, sub_resource_idx, map_desc, box, flags); -+ } - -- return wined3d_cs_map(resource->device->cs, resource, sub_resource_idx, map_desc, box, flags); -+ return hr; - } - - HRESULT CDECL wined3d_resource_map_info(struct wined3d_resource *resource, unsigned int sub_resource_idx, -@@ -381,9 +387,15 @@ HRESULT CDECL wined3d_resource_map_info(struct wined3d_resource *resource, unsig - - HRESULT CDECL wined3d_resource_unmap(struct wined3d_resource *resource, unsigned int sub_resource_idx) - { -+ HRESULT hr; - TRACE("resource %p, sub_resource_idx %u.\n", resource, sub_resource_idx); - -- return wined3d_cs_unmap(resource->device->cs, resource, sub_resource_idx); -+ if (FAILED(hr = resource->resource_ops->resource_sub_resource_unmap(resource, sub_resource_idx))) -+ { -+ TRACE_(d3d_perf)("Unmapping resource %p on the command stream.\n", resource); -+ hr = wined3d_cs_unmap(resource->device->cs, resource, sub_resource_idx); -+ } -+ return hr; - } - - UINT CDECL wined3d_resource_update_info(struct wined3d_resource *resource, unsigned int sub_resource_idx, -diff --git a/dlls/wined3d/state.c b/dlls/wined3d/state.c -index 2f506c36d1..6f7805b8bd 100644 ---- a/dlls/wined3d/state.c -+++ b/dlls/wined3d/state.c -@@ -4934,7 +4934,11 @@ static void indexbuffer(struct wined3d_context *context, const struct wined3d_st - else - { - struct wined3d_buffer *ib = state->index_buffer; -- GL_EXTCALL(glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, ib->buffer_object)); -+ // FIXME(acomminos): disasterous. -+ if (ib->locations & WINED3D_LOCATION_PERSISTENT_MAP) -+ GL_EXTCALL(glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, ib->buffer_heap->buffer_object)); -+ else -+ GL_EXTCALL(glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, ib->buffer_object)); - } - } - -@@ -5000,6 +5004,7 @@ static void state_cb(struct wined3d_context *context, const struct wined3d_state - enum wined3d_shader_type shader_type; - struct wined3d_buffer *buffer; - unsigned int i, base, count; -+ struct wined3d_bo_address bo_addr; - - TRACE("context %p, state %p, state_id %#x.\n", context, state, state_id); - -@@ -5012,7 +5017,15 @@ static void state_cb(struct wined3d_context *context, const struct wined3d_state - for (i = 0; i < count; ++i) - { - buffer = state->cb[shader_type][i]; -- GL_EXTCALL(glBindBufferBase(GL_UNIFORM_BUFFER, base + i, buffer ? buffer->buffer_object : 0)); -+ if (buffer) -+ { -+ wined3d_buffer_get_memory(buffer, &bo_addr, buffer->locations); -+ GL_EXTCALL(glBindBufferRange(GL_UNIFORM_BUFFER, base + i, bo_addr.buffer_object, bo_addr.addr, bo_addr.length)); -+ } -+ else -+ { -+ GL_EXTCALL(glBindBufferBase(GL_UNIFORM_BUFFER, base + i, 0)); -+ } - } - checkGLcall("bind constant buffers"); - } -diff --git a/dlls/wined3d/texture.c b/dlls/wined3d/texture.c -index e6af0c7508..7260f902cf 100644 ---- a/dlls/wined3d/texture.c -+++ b/dlls/wined3d/texture.c -@@ -2301,6 +2301,12 @@ static void wined3d_texture_unload(struct wined3d_resource *resource) - - static HRESULT texture_resource_sub_resource_map(struct wined3d_resource *resource, unsigned int sub_resource_idx, - struct wined3d_map_desc *map_desc, const struct wined3d_box *box, DWORD flags) -+{ -+ return E_NOTIMPL; -+} -+ -+static HRESULT texture_resource_sub_resource_map_cs(struct wined3d_resource *resource, unsigned int sub_resource_idx, -+ struct wined3d_map_desc *map_desc, const struct wined3d_box *box, DWORD flags) - { - const struct wined3d_format *format = resource->format; - struct wined3d_texture_sub_resource *sub_resource; -@@ -2461,6 +2467,11 @@ static HRESULT texture_resource_sub_resource_map_info(struct wined3d_resource *r - } - - static HRESULT texture_resource_sub_resource_unmap(struct wined3d_resource *resource, unsigned int sub_resource_idx) -+{ -+ return E_NOTIMPL; -+} -+ -+static HRESULT texture_resource_sub_resource_unmap_cs(struct wined3d_resource *resource, unsigned int sub_resource_idx) - { - struct wined3d_texture_sub_resource *sub_resource; - struct wined3d_device *device = resource->device; -@@ -2513,6 +2524,8 @@ static const struct wined3d_resource_ops texture_resource_ops = - texture_resource_sub_resource_map, - texture_resource_sub_resource_map_info, - texture_resource_sub_resource_unmap, -+ texture_resource_sub_resource_map_cs, -+ texture_resource_sub_resource_unmap_cs, - }; - - static HRESULT texture1d_init(struct wined3d_texture *texture, const struct wined3d_resource_desc *desc, -diff --git a/dlls/wined3d/utils.c b/dlls/wined3d/utils.c -index b8b7880501..62758ae056 100644 ---- a/dlls/wined3d/utils.c -+++ b/dlls/wined3d/utils.c -@@ -6404,6 +6404,7 @@ const char *wined3d_debug_location(DWORD location) - LOCATION_TO_STR(WINED3D_LOCATION_DRAWABLE); - LOCATION_TO_STR(WINED3D_LOCATION_RB_MULTISAMPLE); - LOCATION_TO_STR(WINED3D_LOCATION_RB_RESOLVED); -+ LOCATION_TO_STR(WINED3D_LOCATION_PERSISTENT_MAP); - #undef LOCATION_TO_STR - if (location) - FIXME("Unrecognized location flag(s) %#x.\n", location); -diff --git a/dlls/wined3d/wined3d_private.h b/dlls/wined3d/wined3d_private.h -index 3d535f4e17..b3fd0136ff 100644 ---- a/dlls/wined3d/wined3d_private.h -+++ b/dlls/wined3d/wined3d_private.h -@@ -1470,6 +1470,7 @@ struct wined3d_bo_address - { - GLuint buffer_object; - BYTE *addr; -+ GLsizeiptr length; - }; - - struct wined3d_const_bo_address -@@ -3047,6 +3048,9 @@ struct wined3d_resource_ops - HRESULT (*resource_map_info)(struct wined3d_resource *resource, unsigned int sub_resource_idx, - struct wined3d_map_info *info, DWORD flags); - HRESULT (*resource_sub_resource_unmap)(struct wined3d_resource *resource, unsigned int sub_resource_idx); -+ HRESULT (*resource_sub_resource_map_cs)(struct wined3d_resource *resource, unsigned int sub_resource_idx, -+ struct wined3d_map_desc *map_desc, const struct wined3d_box *box, DWORD flags); -+ HRESULT (*resource_sub_resource_unmap_cs)(struct wined3d_resource *resource, unsigned int sub_resource_idx); - }; - - struct wined3d_resource -@@ -3325,6 +3329,7 @@ void wined3d_texture_validate_location(struct wined3d_texture *texture, - #define WINED3D_LOCATION_DRAWABLE 0x00000040 - #define WINED3D_LOCATION_RB_MULTISAMPLE 0x00000080 - #define WINED3D_LOCATION_RB_RESOLVED 0x00000100 -+#define WINED3D_LOCATION_PERSISTENT_MAP 0x00000200 - - const char *wined3d_debug_location(DWORD location) DECLSPEC_HIDDEN; - -@@ -3672,6 +3677,7 @@ void wined3d_cs_emit_unload_resource(struct wined3d_cs *cs, struct wined3d_resou - void wined3d_cs_emit_update_sub_resource(struct wined3d_cs *cs, struct wined3d_resource *resource, - unsigned int sub_resource_idx, const struct wined3d_box *box, const void *data, unsigned int row_pitch, - unsigned int slice_pitch) DECLSPEC_HIDDEN; -+void wined3d_cs_emit_discard_buffer(struct wined3d_cs *cs, struct wined3d_buffer *buffer, struct wined3d_map_range map_range) DECLSPEC_HIDDEN; - void wined3d_cs_init_object(struct wined3d_cs *cs, - void (*callback)(void *object), void *object) DECLSPEC_HIDDEN; - HRESULT wined3d_cs_map(struct wined3d_cs *cs, struct wined3d_resource *resource, unsigned int sub_resource_idx, -@@ -3784,6 +3790,11 @@ struct wined3d_buffer - UINT stride; /* 0 if no conversion */ - enum wined3d_buffer_conversion_type *conversion_map; /* NULL if no conversion */ - UINT conversion_stride; /* 0 if no shifted conversion */ -+ -+ /* persistent mapped buffer */ -+ struct wined3d_buffer_heap *buffer_heap; -+ struct wined3d_map_range cs_persistent_map; -+ struct wined3d_map_range mt_persistent_map; // TODO: make struct list? - }; - - static inline struct wined3d_buffer *buffer_from_resource(struct wined3d_resource *resource) --- -2.16.2 - diff --git a/0003-wined3d-Use-ARB_multi_bind-to-speed-up-UBO-updates.patch b/0003-wined3d-Use-ARB_multi_bind-to-speed-up-UBO-updates.patch deleted file mode 100644 index 67c2dd9d0ebe..000000000000 --- a/0003-wined3d-Use-ARB_multi_bind-to-speed-up-UBO-updates.patch +++ /dev/null @@ -1,121 +0,0 @@ -From d8f54b1fedbbe64ebc5c08ff107408f454794f71 Mon Sep 17 00:00:00 2001 -From: Andrew Comminos <andrew@comminos.com> -Date: Mon, 5 Mar 2018 20:28:34 -0800 -Subject: [PATCH 3/9] wined3d: Use ARB_multi_bind to speed up UBO updates. - -More frequent UBO remaps as a result of the persistent buffer allocator -causes glBindBufferRange to be a bottleneck. Using ARB_multi_bind -massively reduces state change overhead. ---- - dlls/wined3d/directx.c | 4 ++++ - dlls/wined3d/state.c | 46 +++++++++++++++++++++++++++++++++++++++------- - dlls/wined3d/wined3d_gl.h | 1 + - 3 files changed, 44 insertions(+), 7 deletions(-) - -diff --git a/dlls/wined3d/directx.c b/dlls/wined3d/directx.c -index 46c6a59536..8789a501ec 100644 ---- a/dlls/wined3d/directx.c -+++ b/dlls/wined3d/directx.c -@@ -149,6 +149,7 @@ static const struct wined3d_extension_map gl_extension_map[] = - {"GL_ARB_internalformat_query2", ARB_INTERNALFORMAT_QUERY2 }, - {"GL_ARB_map_buffer_alignment", ARB_MAP_BUFFER_ALIGNMENT }, - {"GL_ARB_map_buffer_range", ARB_MAP_BUFFER_RANGE }, -+ {"GL_ARB_multi_bind", ARB_MULTI_BIND }, - {"GL_ARB_multisample", ARB_MULTISAMPLE }, - {"GL_ARB_multitexture", ARB_MULTITEXTURE }, - {"GL_ARB_occlusion_query", ARB_OCCLUSION_QUERY }, -@@ -2796,6 +2797,8 @@ static void load_gl_funcs(struct wined3d_gl_info *gl_info) - /* GL_ARB_map_buffer_range */ - USE_GL_FUNC(glFlushMappedBufferRange) - USE_GL_FUNC(glMapBufferRange) -+ /* GL_ARB_multi_bind */ -+ USE_GL_FUNC(glBindBuffersRange) - /* GL_ARB_multisample */ - USE_GL_FUNC(glSampleCoverageARB) - /* GL_ARB_multitexture */ -@@ -3973,6 +3976,7 @@ static BOOL wined3d_adapter_init_gl_caps(struct wined3d_adapter *adapter, - {ARB_TEXTURE_VIEW, MAKEDWORD_VERSION(4, 3)}, - - {ARB_CLEAR_TEXTURE, MAKEDWORD_VERSION(4, 4)}, -+ {ARB_MULTI_BIND, MAKEDWORD_VERSION(4, 4)}, - - {ARB_CLIP_CONTROL, MAKEDWORD_VERSION(4, 5)}, - {ARB_CULL_DISTANCE, MAKEDWORD_VERSION(4, 5)}, -diff --git a/dlls/wined3d/state.c b/dlls/wined3d/state.c -index 6f7805b8bd..4d0718514f 100644 ---- a/dlls/wined3d/state.c -+++ b/dlls/wined3d/state.c -@@ -5014,19 +5014,51 @@ static void state_cb(struct wined3d_context *context, const struct wined3d_state - shader_type = WINED3D_SHADER_TYPE_COMPUTE; - - wined3d_gl_limits_get_uniform_block_range(&gl_info->limits, shader_type, &base, &count); -- for (i = 0; i < count; ++i) -+ -+ if (gl_info->supported[ARB_MULTI_BIND]) - { -- buffer = state->cb[shader_type][i]; -- if (buffer) -+ GLuint buffer_objects[count]; -+ GLsizeiptr buffer_offsets[count]; -+ GLsizeiptr buffer_sizes[count]; -+ -+ for (i = 0; i < count; ++i) - { -- wined3d_buffer_get_memory(buffer, &bo_addr, buffer->locations); -- GL_EXTCALL(glBindBufferRange(GL_UNIFORM_BUFFER, base + i, bo_addr.buffer_object, bo_addr.addr, bo_addr.length)); -+ buffer = state->cb[shader_type][i]; -+ if (buffer) -+ { -+ wined3d_buffer_get_memory(buffer, &bo_addr, buffer->locations); -+ buffer_objects[i] = bo_addr.buffer_object; -+ buffer_offsets[i] = bo_addr.addr; -+ buffer_sizes[i] = bo_addr.length; -+ } -+ else -+ { -+ buffer_objects[i] = buffer_offsets[i] = 0; -+ // The ARB_multi_bind spec states that an error may be thrown if -+ // `size` is less than or equal to zero, Thus, we specify a size for -+ // unused buffers anyway. -+ buffer_sizes[i] = 1; -+ } - } -- else -+ GL_EXTCALL(glBindBuffersRange(GL_UNIFORM_BUFFER, base, count, buffer_objects, buffer_offsets, buffer_sizes)); -+ } -+ else -+ { -+ for (i = 0; i < count; ++i) - { -- GL_EXTCALL(glBindBufferBase(GL_UNIFORM_BUFFER, base + i, 0)); -+ buffer = state->cb[shader_type][i]; -+ if (buffer) -+ { -+ wined3d_buffer_get_memory(buffer, &bo_addr, buffer->locations); -+ GL_EXTCALL(glBindBufferRange(GL_UNIFORM_BUFFER, base + i, bo_addr.buffer_object, bo_addr.addr, bo_addr.length)); -+ } -+ else -+ { -+ GL_EXTCALL(glBindBufferBase(GL_UNIFORM_BUFFER, base + i, 0)); -+ } - } - } -+ - checkGLcall("bind constant buffers"); - } - -diff --git a/dlls/wined3d/wined3d_gl.h b/dlls/wined3d/wined3d_gl.h -index 7626864ef2..25c3301c94 100644 ---- a/dlls/wined3d/wined3d_gl.h -+++ b/dlls/wined3d/wined3d_gl.h -@@ -82,6 +82,7 @@ enum wined3d_gl_extension - ARB_INTERNALFORMAT_QUERY2, - ARB_MAP_BUFFER_ALIGNMENT, - ARB_MAP_BUFFER_RANGE, -+ ARB_MULTI_BIND, - ARB_MULTISAMPLE, - ARB_MULTITEXTURE, - ARB_OCCLUSION_QUERY, --- -2.16.2 - diff --git a/0004-wined3d-Use-GL_CLIENT_STORAGE_BIT-for-persistent-map.patch b/0004-wined3d-Use-GL_CLIENT_STORAGE_BIT-for-persistent-map.patch deleted file mode 100644 index 954f929294c8..000000000000 --- a/0004-wined3d-Use-GL_CLIENT_STORAGE_BIT-for-persistent-map.patch +++ /dev/null @@ -1,26 +0,0 @@ -From 08fc0e139509a6755489af3936ede49936896122 Mon Sep 17 00:00:00 2001 -From: Andrew Comminos <andrew@comminos.com> -Date: Tue, 6 Mar 2018 02:07:31 -0800 -Subject: [PATCH 4/9] wined3d: Use GL_CLIENT_STORAGE_BIT for persistent - mappings. - ---- - dlls/wined3d/buffer_heap.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/dlls/wined3d/buffer_heap.c b/dlls/wined3d/buffer_heap.c -index b133bd6893..75f84b0088 100644 ---- a/dlls/wined3d/buffer_heap.c -+++ b/dlls/wined3d/buffer_heap.c -@@ -169,7 +169,7 @@ HRESULT wined3d_buffer_heap_create(struct wined3d_context *context, GLsizeiptr s - { - access_flags |= GL_MAP_READ_BIT; - } -- storage_flags = access_flags; -+ storage_flags = GL_CLIENT_STORAGE_BIT | access_flags; - - // TODO(acomminos): where should we be checking for errors here? - GL_EXTCALL(glGenBuffers(1, &object->buffer_object)); --- -2.16.2 - diff --git a/0005-wined3d-Disable-persistently-mapped-shader-resource-.patch b/0005-wined3d-Disable-persistently-mapped-shader-resource-.patch deleted file mode 100644 index 10895a9ea107..000000000000 --- a/0005-wined3d-Disable-persistently-mapped-shader-resource-.patch +++ /dev/null @@ -1,28 +0,0 @@ -From 5ded1310d3de32fdfc467b20ab3937ab2b97d5b1 Mon Sep 17 00:00:00 2001 -From: Andrew Comminos <andrew@comminos.com> -Date: Thu, 8 Mar 2018 22:00:33 -0800 -Subject: [PATCH 5/9] wined3d: Disable persistently mapped shader resource - buffers. - ---- - dlls/wined3d/buffer.c | 4 ++++ - 1 file changed, 4 insertions(+) - -diff --git a/dlls/wined3d/buffer.c b/dlls/wined3d/buffer.c -index e7a0f59a67..a2ac411b5e 100644 ---- a/dlls/wined3d/buffer.c -+++ b/dlls/wined3d/buffer.c -@@ -1598,6 +1598,10 @@ static HRESULT buffer_init(struct wined3d_buffer *buffer, struct wined3d_device - { - WARN_(d3d_perf)("Not creating a persistent mapping for a dynamic buffer because ARB_buffer_storage is unsupported.\n"); - } -+ else if (bind_flags & WINED3D_BIND_SHADER_RESOURCE) -+ { -+ FIXME_(d3d_perf)("Not using a persistent mapping for shader resource buffer %p (unimplemented)\n", buffer); -+ } - else - { - // If supported, use persistent mapped buffers instead of a --- -2.16.2 - diff --git a/0006-wined3d-Perform-initial-allocation-of-persistent-buf.patch b/0006-wined3d-Perform-initial-allocation-of-persistent-buf.patch deleted file mode 100644 index 2777b6a5324e..000000000000 --- a/0006-wined3d-Perform-initial-allocation-of-persistent-buf.patch +++ /dev/null @@ -1,92 +0,0 @@ -From 71b7b3340d147bf7a9b5567c080b32ccd3a39dc6 Mon Sep 17 00:00:00 2001 -From: Andrew Comminos <andrew@comminos.com> -Date: Thu, 8 Mar 2018 22:42:03 -0800 -Subject: [PATCH 6/9] wined3d: Perform initial allocation of persistent buffers - asynchronously. - ---- - dlls/wined3d/buffer.c | 30 ++++++++++++++++++++---------- - 1 file changed, 20 insertions(+), 10 deletions(-) - -diff --git a/dlls/wined3d/buffer.c b/dlls/wined3d/buffer.c -index a2ac411b5e..c492fcc8c6 100644 ---- a/dlls/wined3d/buffer.c -+++ b/dlls/wined3d/buffer.c -@@ -272,7 +272,7 @@ fail: - } - - /* Context activation is done by the caller. */ --static BOOL buffer_alloc_persistent_map(struct wined3d_buffer *buffer, struct wined3d_context *context) -+static BOOL buffer_alloc_persistent_map(struct wined3d_buffer *buffer) - { - struct wined3d_device *device = buffer->resource.device; - struct wined3d_buffer_heap *heap; -@@ -688,7 +688,7 @@ static BOOL wined3d_buffer_prepare_location(struct wined3d_buffer *buffer, - WARN("Trying to map a persistent region for buffer %p without WINED3D_BUFFER_PERSISTENT.\n", buffer); - return FALSE; - } -- return buffer_alloc_persistent_map(buffer, context); -+ return buffer_alloc_persistent_map(buffer); - - default: - ERR("Invalid location %s.\n", wined3d_debug_location(location)); -@@ -1116,7 +1116,7 @@ static HRESULT wined3d_buffer_map(struct wined3d_buffer *buffer, UINT offset, UI - const struct wined3d_gl_info *gl_info; - context = context_acquire(device, NULL, 0); - -- FIXME_(d3d_perf)("Fences not used for persistent buffer maps on CS thread, using glFinish.\n"); -+ FIXME_(d3d_perf)("Fences not used for persistent buffer maps on CS thread, using glFinish (flags: %x)\n", flags); - - gl_info = context->gl_info; - gl_info->gl_ops.gl.p_glFinish(); -@@ -1394,8 +1394,20 @@ static HRESULT buffer_resource_sub_resource_map(struct wined3d_resource *resourc - - // Support immediate mapping of persistent buffers off the command thread, - // which require no GL calls to interface with. -- if (buffer->locations & WINED3D_LOCATION_PERSISTENT_MAP) -+ if (buffer->flags & WINED3D_BUFFER_PERSISTENT) - { -+ // Attempt to load a persistent map without syncing, if possible. -+ if (!(buffer->locations & WINED3D_LOCATION_PERSISTENT_MAP)) -+ { -+ wined3d_resource_wait_idle(resource); -+ if (!buffer_alloc_persistent_map(buffer)) -+ { -+ ERR_(d3d_perf)("Failed to allocate persistent buffer, falling back to sync path."); -+ return E_FAIL; -+ } -+ wined3d_buffer_validate_location(buffer, WINED3D_LOCATION_PERSISTENT_MAP); -+ } -+ - map_desc->row_pitch = map_desc->slice_pitch = buffer->desc.byte_width; - if (flags & WINED3D_MAP_DISCARD) - { -@@ -1415,6 +1427,7 @@ static HRESULT buffer_resource_sub_resource_map(struct wined3d_resource *resourc - // currently used buffer to the free pool, along with the fence that - // must be called before the buffer can be reused. - wined3d_cs_emit_discard_buffer(resource->device->cs, buffer, map_range); -+ - return WINED3D_OK; - } - else if (flags & WINED3D_MAP_NOOVERWRITE) -@@ -1425,14 +1438,11 @@ static HRESULT buffer_resource_sub_resource_map(struct wined3d_resource *resourc - struct wined3d_map_range map_range = buffer->mt_persistent_map; - map_desc->data = buffer->buffer_heap->map_ptr + map_range.offset + offset; - resource->map_count++; -+ - return WINED3D_OK; - } -- else -- { -- // TODO(acomminos): Should check mapped ranges to see if the region is writeable even though NOOVERWRITE is specified. -- WARN_(d3d_perf)("Mapping persistent buffer %p in sync with CS thread.\n", buffer); -- // XXX(acomminos): kill this early return. they're the worst. -- } -+ -+ WARN_(d3d_perf)("Mapping persistent buffer %p in sync with CS thread.\n", buffer); - } - - return E_NOTIMPL; --- -2.16.2 - diff --git a/0007-wined3d-Avoid-freeing-persistent-buffer-heap-element.patch b/0007-wined3d-Avoid-freeing-persistent-buffer-heap-element.patch deleted file mode 100644 index d0872df82707..000000000000 --- a/0007-wined3d-Avoid-freeing-persistent-buffer-heap-element.patch +++ /dev/null @@ -1,351 +0,0 @@ -From fc7907d5264c1606477f9287c949c3c8794859ec Mon Sep 17 00:00:00 2001 -From: Andrew Comminos <andrew@comminos.com> -Date: Thu, 8 Mar 2018 23:01:50 -0800 -Subject: [PATCH 7/9] wined3d: Avoid freeing persistent buffer heap elements - during use. - -Using HeapFree is expensive, especially when we don't have our buffers -for long. ---- - dlls/wined3d/buffer.c | 29 +++++++++++---------- - dlls/wined3d/buffer_heap.c | 57 ++++++++++++++++++------------------------ - dlls/wined3d/context.c | 4 +-- - dlls/wined3d/cs.c | 6 ++--- - dlls/wined3d/wined3d_private.h | 25 ++++++++++++------ - 5 files changed, 61 insertions(+), 60 deletions(-) - -diff --git a/dlls/wined3d/buffer.c b/dlls/wined3d/buffer.c -index c492fcc8c6..74b3ba8abd 100644 ---- a/dlls/wined3d/buffer.c -+++ b/dlls/wined3d/buffer.c -@@ -276,7 +276,7 @@ static BOOL buffer_alloc_persistent_map(struct wined3d_buffer *buffer) - { - struct wined3d_device *device = buffer->resource.device; - struct wined3d_buffer_heap *heap; -- struct wined3d_map_range map_range; -+ struct wined3d_buffer_heap_element *elem; - HRESULT hr; - - if (buffer->bind_flags & WINED3D_BIND_CONSTANT_BUFFER) -@@ -292,12 +292,12 @@ static BOOL buffer_alloc_persistent_map(struct wined3d_buffer *buffer) - } - - buffer->buffer_heap = heap; -- if (FAILED(hr = wined3d_buffer_heap_alloc(heap, buffer->resource.size, &map_range))) -+ if (FAILED(hr = wined3d_buffer_heap_alloc(heap, buffer->resource.size, &elem))) - { - goto fail; - } -- buffer->cs_persistent_map = map_range; -- buffer->mt_persistent_map = map_range; -+ buffer->cs_persistent_map = elem; -+ buffer->mt_persistent_map = elem; - return TRUE; - - fail: -@@ -753,7 +753,7 @@ BOOL wined3d_buffer_load_location(struct wined3d_buffer *buffer, - if (buffer->conversion_map) - FIXME("Attempting to use conversion map with persistent mapping.\n"); - memcpy(buffer->buffer_heap->map_ptr + -- buffer->cs_persistent_map.offset, -+ buffer->cs_persistent_map->range.offset, - buffer->resource.heap_memory, buffer->resource.size); - break; - -@@ -801,11 +801,11 @@ DWORD wined3d_buffer_get_memory(struct wined3d_buffer *buffer, - { - // FIXME(acomminos): should we expose a buffer object we don't wholly own here? - data->buffer_object = buffer->buffer_heap->buffer_object; -- data->addr = buffer->cs_persistent_map.offset; -+ data->addr = buffer->cs_persistent_map->range.offset; - // Note that the size of the underlying buffer allocation may be larger - // than the buffer knows about. In this case, we've rounded it up to be - // aligned (e.g. for uniform buffer offsets). -- data->length = buffer->cs_persistent_map.size; -+ data->length = buffer->cs_persistent_map->range.size; - return WINED3D_LOCATION_PERSISTENT_MAP; - } - if (locations & WINED3D_LOCATION_SYSMEM) -@@ -1122,7 +1122,7 @@ static HRESULT wined3d_buffer_map(struct wined3d_buffer *buffer, UINT offset, UI - gl_info->gl_ops.gl.p_glFinish(); - - base = buffer->buffer_heap->map_ptr -- + buffer->cs_persistent_map.offset; -+ + buffer->cs_persistent_map->range.offset; - *data = base + offset; - - context_release(context); -@@ -1412,22 +1412,21 @@ static HRESULT buffer_resource_sub_resource_map(struct wined3d_resource *resourc - if (flags & WINED3D_MAP_DISCARD) - { - HRESULT hr; -- struct wined3d_map_range map_range; -- if (FAILED(hr = wined3d_buffer_heap_alloc(buffer->buffer_heap, resource->size, &map_range))) -+ struct wined3d_buffer_heap_element *mt_elem; -+ if (FAILED(hr = wined3d_buffer_heap_alloc(buffer->buffer_heap, resource->size, &mt_elem))) - { - FIXME_(d3d_perf)("Failed to allocate new buffer, falling back to sync path.\n"); - return hr; - } -- map_desc->data = buffer->buffer_heap->map_ptr + map_range.offset + offset; -+ map_desc->data = buffer->buffer_heap->map_ptr + mt_elem->range.offset + offset; - resource->map_count++; - -- buffer->mt_persistent_map = map_range; -+ buffer->mt_persistent_map = mt_elem; - - // Discard handler on CSMT thread is responsible for returning the - // currently used buffer to the free pool, along with the fence that - // must be called before the buffer can be reused. -- wined3d_cs_emit_discard_buffer(resource->device->cs, buffer, map_range); -- -+ wined3d_cs_emit_discard_buffer(resource->device->cs, buffer, mt_elem); - return WINED3D_OK; - } - else if (flags & WINED3D_MAP_NOOVERWRITE) -@@ -1435,7 +1434,7 @@ static HRESULT buffer_resource_sub_resource_map(struct wined3d_resource *resourc - // Allow immediate access for persistent buffers without a fence. - // Always use the latest buffer in this case in case the latest - // DISCARDed one hasn't reached the command stream yet. -- struct wined3d_map_range map_range = buffer->mt_persistent_map; -+ struct wined3d_map_range map_range = buffer->mt_persistent_map->range; - map_desc->data = buffer->buffer_heap->map_ptr + map_range.offset + offset; - resource->map_count++; - -diff --git a/dlls/wined3d/buffer_heap.c b/dlls/wined3d/buffer_heap.c -index 75f84b0088..80670c515f 100644 ---- a/dlls/wined3d/buffer_heap.c -+++ b/dlls/wined3d/buffer_heap.c -@@ -25,18 +25,6 @@ - WINE_DEFAULT_DEBUG_CHANNEL(d3d); - WINE_DECLARE_DEBUG_CHANNEL(d3d_perf); - --struct wined3d_buffer_heap_element --{ -- struct wined3d_map_range range; -- -- // rbtree data -- struct wine_rb_entry entry; -- -- // Binned free list positions -- struct wined3d_buffer_heap_element *next; -- struct wined3d_buffer_heap_element *prev; --}; -- - struct wined3d_buffer_heap_fenced_element - { - struct wined3d_buffer_heap_bin_set free_list; -@@ -82,6 +70,11 @@ static int element_bin(struct wined3d_buffer_heap_element *elem) - // Inserts an element into the appropriate free list bin. - static void element_insert_free_bin(struct wined3d_buffer_heap *heap, struct wined3d_buffer_heap_element *elem) - { -+ if (elem->prev || elem->next) -+ { -+ ERR("Element %p in already in a free list (for some reason).\n", elem); -+ } -+ - int bin = element_bin(elem); - - elem->prev = NULL; -@@ -206,7 +199,7 @@ HRESULT wined3d_buffer_heap_destroy(struct wined3d_buffer_heap *heap, struct win - return WINED3D_OK; - } - --HRESULT wined3d_buffer_heap_alloc(struct wined3d_buffer_heap *heap, GLsizeiptr size, struct wined3d_map_range *out_range) -+HRESULT wined3d_buffer_heap_alloc(struct wined3d_buffer_heap *heap, GLsizeiptr size, struct wined3d_buffer_heap_element **out_elem) - { - int initial_bin; - int initial_size = size; -@@ -233,24 +226,24 @@ HRESULT wined3d_buffer_heap_alloc(struct wined3d_buffer_heap *heap, GLsizeiptr s - remaining_range.offset = elem->range.offset + size; - remaining_range.size = elem->range.size - size; - -- out_range->offset = elem->range.offset; -- out_range->size = size; -+ // Take the element from the free list, transferring ownership to -+ // the caller. -+ element_remove_free(heap, elem); -+ // Resize the element so that we can free the remainder. -+ elem->range.size = size; - -- TRACE_(d3d_perf)("Allocated %d (requested %d) at %p from bin %d (initial %d)\n", size, initial_size, elem->range.offset, i, initial_bin); -+ *out_elem = elem; - -- // Remove the element from its current free bin to move it to the correct list. -- element_remove_free(heap, elem); -+ TRACE_(d3d_perf)("Allocated %d (requested %d) at %p from bin %d (initial %d)\n", size, initial_size, elem->range.offset, i, initial_bin); - - if (remaining_range.size > 0) - { -+ struct wined3d_buffer_heap_element *remaining_elem; -+ - TRACE_(d3d_perf)("Imperfect fit allocated, fragmenting remainder of %lld at %p.\n", remaining_range.size, remaining_range.offset); - -- elem->range = remaining_range; -- element_insert_free_bin(heap, elem); -- } -- else -- { -- HeapFree(GetProcessHeap(), 0, elem); -+ remaining_elem = element_new(remaining_range.offset, remaining_range.size); -+ element_insert_free_bin(heap, remaining_elem); - } - - LeaveCriticalSection(&heap->temp_lock); -@@ -265,7 +258,7 @@ HRESULT wined3d_buffer_heap_alloc(struct wined3d_buffer_heap *heap, GLsizeiptr s - if (SUCCEEDED(wined3d_buffer_heap_deferred_coalesce(heap, &num_coalesced))) - { - if (num_coalesced > 0) -- return wined3d_buffer_heap_alloc(heap, size, out_range); -+ return wined3d_buffer_heap_alloc(heap, size, out_elem); - } - - FIXME_(d3d_perf)("Coalescing did not create new blocks, failing.\n"); -@@ -273,16 +266,15 @@ HRESULT wined3d_buffer_heap_alloc(struct wined3d_buffer_heap *heap, GLsizeiptr s - return WINED3DERR_OUTOFVIDEOMEMORY; - } - --HRESULT wined3d_buffer_heap_free(struct wined3d_buffer_heap *heap, struct wined3d_map_range range) -+HRESULT wined3d_buffer_heap_free(struct wined3d_buffer_heap *heap, struct wined3d_buffer_heap_element *elem) - { -- struct wined3d_buffer_heap_element *elem = element_new(range.offset, range.size); -- -- if (!elem) -- return E_OUTOFMEMORY; -- - EnterCriticalSection(&heap->temp_lock); - - // Only insert the element into a free bin, coalescing will occur later. -+ // -+ // Note that the reason that we pass around wined3d_buffer_heap_element -+ // instead of a range is to avoid frequent HeapAlloc/HeapFree operations -+ // when we're reusing buffers. - element_insert_free_bin(heap, elem); - - LeaveCriticalSection(&heap->temp_lock); -@@ -290,9 +282,8 @@ HRESULT wined3d_buffer_heap_free(struct wined3d_buffer_heap *heap, struct wined3 - return WINED3D_OK; - } - --HRESULT wined3d_buffer_heap_free_fenced(struct wined3d_buffer_heap *heap, struct wined3d_device *device, struct wined3d_map_range range) -+HRESULT wined3d_buffer_heap_free_fenced(struct wined3d_buffer_heap *heap, struct wined3d_device *device, struct wined3d_buffer_heap_element *elem) - { -- struct wined3d_buffer_heap_element *elem = element_new(range.offset, range.size); - int bin_index = element_bin(elem); - struct wined3d_buffer_heap_bin *bin = &heap->pending_fenced_bins.bins[bin_index]; - -diff --git a/dlls/wined3d/context.c b/dlls/wined3d/context.c -index eae2c3a79d..01aa53597f 100644 ---- a/dlls/wined3d/context.c -+++ b/dlls/wined3d/context.c -@@ -5005,9 +5005,9 @@ void draw_primitive(struct wined3d_device *device, const struct wined3d_state *s - if (parameters->indexed) - { - struct wined3d_buffer *index_buffer = state->index_buffer; -- if (index_buffer->locations & WINED3D_LOCATION_PERSISTENT_MAP) -+ if (index_buffer->cs_persistent_map) - { -- idx_data = index_buffer->cs_persistent_map.offset; -+ idx_data = index_buffer->cs_persistent_map->range.offset; - } - else if (!index_buffer->buffer_object || !stream_info->all_vbo) - { -diff --git a/dlls/wined3d/cs.c b/dlls/wined3d/cs.c -index e61b8dedbb..d1f665d505 100644 ---- a/dlls/wined3d/cs.c -+++ b/dlls/wined3d/cs.c -@@ -444,7 +444,7 @@ struct wined3d_cs_discard_buffer - { - enum wined3d_cs_op opcode; - struct wined3d_buffer *buffer; -- struct wined3d_map_range map_range; -+ struct wined3d_buffer_heap_element *map_range; - }; - - struct wined3d_cs_stop -@@ -2496,14 +2496,14 @@ static void wined3d_cs_exec_discard_buffer(struct wined3d_cs *cs, const void *da - wined3d_resource_release(&op->buffer->resource); - } - --void wined3d_cs_emit_discard_buffer(struct wined3d_cs *cs, struct wined3d_buffer *buffer, struct wined3d_map_range map_range) -+void wined3d_cs_emit_discard_buffer(struct wined3d_cs *cs, struct wined3d_buffer *buffer, struct wined3d_buffer_heap_element *elem) - { - struct wined3d_cs_discard_buffer *op; - - op = cs->ops->require_space(cs, sizeof(*op), WINED3D_CS_QUEUE_DEFAULT); - op->opcode = WINED3D_CS_OP_DISCARD_BUFFER; - op->buffer = buffer; -- op->map_range = map_range; -+ op->map_range = elem; - - wined3d_resource_acquire(&buffer->resource); - -diff --git a/dlls/wined3d/wined3d_private.h b/dlls/wined3d/wined3d_private.h -index b3fd0136ff..0114444943 100644 ---- a/dlls/wined3d/wined3d_private.h -+++ b/dlls/wined3d/wined3d_private.h -@@ -3531,6 +3531,18 @@ struct wined3d_map_range - GLsizeiptr size; - }; - -+struct wined3d_buffer_heap_element -+{ -+ struct wined3d_map_range range; -+ -+ // rbtree data -+ struct wine_rb_entry entry; -+ -+ // Binned free list positions -+ struct wined3d_buffer_heap_element *next; -+ struct wined3d_buffer_heap_element *prev; -+}; -+ - enum wined3d_cs_queue_id - { - WINED3D_CS_QUEUE_DEFAULT = 0, -@@ -3677,7 +3689,7 @@ void wined3d_cs_emit_unload_resource(struct wined3d_cs *cs, struct wined3d_resou - void wined3d_cs_emit_update_sub_resource(struct wined3d_cs *cs, struct wined3d_resource *resource, - unsigned int sub_resource_idx, const struct wined3d_box *box, const void *data, unsigned int row_pitch, - unsigned int slice_pitch) DECLSPEC_HIDDEN; --void wined3d_cs_emit_discard_buffer(struct wined3d_cs *cs, struct wined3d_buffer *buffer, struct wined3d_map_range map_range) DECLSPEC_HIDDEN; -+void wined3d_cs_emit_discard_buffer(struct wined3d_cs *cs, struct wined3d_buffer *buffer, struct wined3d_buffer_heap_element *map_range) DECLSPEC_HIDDEN; - void wined3d_cs_init_object(struct wined3d_cs *cs, - void (*callback)(void *object), void *object) DECLSPEC_HIDDEN; - HRESULT wined3d_cs_map(struct wined3d_cs *cs, struct wined3d_resource *resource, unsigned int sub_resource_idx, -@@ -3711,7 +3723,6 @@ enum wined3d_buffer_conversion_type - CONV_POSITIONT, - }; - --struct wined3d_buffer_heap_element; - struct wined3d_buffer_heap_fenced_element; - - // Number of power-of-two buckets to populate. -@@ -3750,11 +3761,11 @@ HRESULT wined3d_buffer_heap_create(struct wined3d_context *context, GLsizeiptr s - HRESULT wined3d_buffer_heap_destroy(struct wined3d_buffer_heap *heap, struct wined3d_context *context) DECLSPEC_HIDDEN; - // Fetches a buffer from the heap of at least the given size. - // Attempts to coalesce blocks under memory pressure. --HRESULT wined3d_buffer_heap_alloc(struct wined3d_buffer_heap *heap, GLsizeiptr size, struct wined3d_map_range* out_range) DECLSPEC_HIDDEN; -+HRESULT wined3d_buffer_heap_alloc(struct wined3d_buffer_heap *heap, GLsizeiptr size, struct wined3d_buffer_heap_element** out_elem) DECLSPEC_HIDDEN; - // Immediately frees a heap-allocated buffer segment. --HRESULT wined3d_buffer_heap_free(struct wined3d_buffer_heap *heap, struct wined3d_map_range range) DECLSPEC_HIDDEN; -+HRESULT wined3d_buffer_heap_free(struct wined3d_buffer_heap *heap, struct wined3d_buffer_heap_element *elem) DECLSPEC_HIDDEN; - // Enqueues a buffer segment to return to the heap once its fence has been signaled. --HRESULT wined3d_buffer_heap_free_fenced(struct wined3d_buffer_heap *heap, struct wined3d_device *device, struct wined3d_map_range range) DECLSPEC_HIDDEN; -+HRESULT wined3d_buffer_heap_free_fenced(struct wined3d_buffer_heap *heap, struct wined3d_device *device, struct wined3d_buffer_heap_element *elem) DECLSPEC_HIDDEN; - // Issues a fence for the current set of pending fenced buffers. - // Double-buffered: if the last fence issued has not yet been triggered, waits - // on it. -@@ -3793,8 +3804,8 @@ struct wined3d_buffer - - /* persistent mapped buffer */ - struct wined3d_buffer_heap *buffer_heap; -- struct wined3d_map_range cs_persistent_map; -- struct wined3d_map_range mt_persistent_map; // TODO: make struct list? -+ struct wined3d_buffer_heap_element *cs_persistent_map; -+ struct wined3d_buffer_heap_element *mt_persistent_map; - }; - - static inline struct wined3d_buffer *buffer_from_resource(struct wined3d_resource *resource) --- -2.16.2 - diff --git a/0008-wined3d-Add-DISABLE_PBA-envvar-some-PBA-cleanup.patch b/0008-wined3d-Add-DISABLE_PBA-envvar-some-PBA-cleanup.patch deleted file mode 100644 index 4487d4b8c1d7..000000000000 --- a/0008-wined3d-Add-DISABLE_PBA-envvar-some-PBA-cleanup.patch +++ /dev/null @@ -1,211 +0,0 @@ -From 8af55b60fa87bb0fb21afd17e3467253b53e61a1 Mon Sep 17 00:00:00 2001 -From: Andrew Comminos <andrew@comminos.com> -Date: Thu, 15 Mar 2018 21:07:21 -0700 -Subject: [PATCH 8/9] wined3d: Add DISABLE_PBA envvar, some PBA cleanup. - ---- - dlls/wined3d/buffer.c | 4 ++-- - dlls/wined3d/buffer_heap.c | 34 ++++++++++++++++++++++++++-------- - dlls/wined3d/device.c | 38 ++++++++++++++++++++++++++------------ - dlls/wined3d/query.c | 2 +- - dlls/wined3d/wined3d_private.h | 6 ++---- - 5 files changed, 57 insertions(+), 27 deletions(-) - -diff --git a/dlls/wined3d/buffer.c b/dlls/wined3d/buffer.c -index 74b3ba8abd..651d9a4360 100644 ---- a/dlls/wined3d/buffer.c -+++ b/dlls/wined3d/buffer.c -@@ -1603,9 +1603,9 @@ static HRESULT buffer_init(struct wined3d_buffer *buffer, struct wined3d_device - - if (buffer->resource.usage & WINED3DUSAGE_DYNAMIC) - { -- if (!gl_info->supported[ARB_BUFFER_STORAGE]) -+ if (!device->use_pba) - { -- WARN_(d3d_perf)("Not creating a persistent mapping for a dynamic buffer because ARB_buffer_storage is unsupported.\n"); -+ WARN_(d3d_perf)("Not creating a persistent mapping for dynamic buffer %p because the PBA is disabled.\n", buffer); - } - else if (bind_flags & WINED3D_BIND_SHADER_RESOURCE) - { -diff --git a/dlls/wined3d/buffer_heap.c b/dlls/wined3d/buffer_heap.c -index 80670c515f..899aad9612 100644 ---- a/dlls/wined3d/buffer_heap.c -+++ b/dlls/wined3d/buffer_heap.c -@@ -25,6 +25,9 @@ - WINE_DEFAULT_DEBUG_CHANNEL(d3d); - WINE_DECLARE_DEBUG_CHANNEL(d3d_perf); - -+// Arbitrary binding to use when binding the persistent buffer. -+#define BIND_TARGET GL_ARRAY_BUFFER -+ - struct wined3d_buffer_heap_fenced_element - { - struct wined3d_buffer_heap_bin_set free_list; -@@ -140,7 +143,6 @@ static int free_tree_compare(const void *key, const struct wine_rb_entry *entry) - HRESULT wined3d_buffer_heap_create(struct wined3d_context *context, GLsizeiptr size, GLsizeiptr alignment, BOOL write_only, struct wined3d_buffer_heap **buffer_heap) - { - const struct wined3d_gl_info *gl_info = context->gl_info; -- const GLenum buffer_target = GL_ARRAY_BUFFER; - GLbitfield access_flags; - GLbitfield storage_flags; - struct wined3d_buffer_heap_element *initial_elem; -@@ -162,22 +164,23 @@ HRESULT wined3d_buffer_heap_create(struct wined3d_context *context, GLsizeiptr s - { - access_flags |= GL_MAP_READ_BIT; - } -+ - storage_flags = GL_CLIENT_STORAGE_BIT | access_flags; - -- // TODO(acomminos): where should we be checking for errors here? - GL_EXTCALL(glGenBuffers(1, &object->buffer_object)); -+ checkGLcall("glGenBuffers"); - -- context_bind_bo(context, buffer_target, object->buffer_object); -+ context_bind_bo(context, BIND_TARGET, object->buffer_object); - -- // TODO(acomminos): assert glBufferStorage supported? -- GL_EXTCALL(glBufferStorage(buffer_target, size, NULL, storage_flags)); -+ GL_EXTCALL(glBufferStorage(BIND_TARGET, size, NULL, storage_flags)); -+ checkGLcall("glBufferStorage"); - -- if (!(object->map_ptr = GL_EXTCALL(glMapBufferRange(buffer_target, 0, size, access_flags)))) -+ if (!(object->map_ptr = GL_EXTCALL(glMapBufferRange(BIND_TARGET, 0, size, access_flags)))) - { - ERR("Couldn't map persistent buffer.\n"); - return -1; // FIXME(acomminos): proper error code, cleanup - } -- context_bind_bo(context, buffer_target, 0); -+ context_bind_bo(context, BIND_TARGET, 0); - - object->fenced_head = object->fenced_tail = NULL; - object->alignment = alignment; -@@ -195,7 +198,22 @@ HRESULT wined3d_buffer_heap_create(struct wined3d_context *context, GLsizeiptr s - /* Context activation is done by the caller. */ - HRESULT wined3d_buffer_heap_destroy(struct wined3d_buffer_heap *heap, struct wined3d_context *context) - { -- FIXME("Unimplemented, leaking buffer"); -+ const struct wined3d_gl_info *gl_info = context->gl_info; -+ -+ context_bind_bo(context, BIND_TARGET, heap->buffer_object); -+ GL_EXTCALL(glUnmapBuffer(BIND_TARGET)); -+ checkGLcall("glUnmapBuffer"); -+ context_bind_bo(context, BIND_TARGET, 0); -+ -+ GL_EXTCALL(glDeleteBuffers(1, &heap->buffer_object)); -+ checkGLcall("glDeleteBuffers"); -+ -+ DeleteCriticalSection(&heap->temp_lock); -+ -+ // TODO(acomminos): cleanup free lists, fenced list, etc. -+ -+ HeapFree(GetProcessHeap(), 0, heap); -+ - return WINED3D_OK; - } - -diff --git a/dlls/wined3d/device.c b/dlls/wined3d/device.c -index 785841a062..f4c9dc7bd6 100644 ---- a/dlls/wined3d/device.c -+++ b/dlls/wined3d/device.c -@@ -837,16 +837,27 @@ static void destroy_default_samplers(struct wined3d_device *device, struct wined - static void create_buffer_heap(struct wined3d_device *device, struct wined3d_context *context) - { - const struct wined3d_gl_info *gl_info = &device->adapter->gl_info; -- // TODO(acomminos): kill this magic number. perhaps base on vram. -- GLsizeiptr geo_heap_size = 512 * 1024 * 1024; -- // We choose a constant buffer size of 128MB, the same as NVIDIA claims to -- // use in their Direct3D driver for discarded constant buffers. -- GLsizeiptr cb_heap_size = 128 * 1024 * 1024; -- GLint ub_alignment; -- HRESULT hr; -+ BOOL use_pba = FALSE; -+ char *env_pba_disable; - -- if (gl_info->supported[ARB_BUFFER_STORAGE]) -+ if (!gl_info->supported[ARB_BUFFER_STORAGE]) -+ { -+ FIXME("Not using PBA, ARB_buffer_storage unsupported.\n"); -+ } -+ else if ((env_pba_disable = getenv("PBA_DISABLE")) && *env_pba_disable != '0') - { -+ FIXME("Not using PBA, envvar 'PBA_DISABLE' set.\n"); -+ } -+ else -+ { -+ // TODO(acomminos): kill this magic number. perhaps base on vram. -+ GLsizeiptr geo_heap_size = 512 * 1024 * 1024; -+ // We choose a constant buffer size of 128MB, the same as NVIDIA claims to -+ // use in their Direct3D driver for discarded constant buffers. -+ GLsizeiptr cb_heap_size = 128 * 1024 * 1024; -+ GLint ub_alignment; -+ HRESULT hr; -+ - gl_info->gl_ops.gl.p_glGetIntegerv(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT, &ub_alignment); - - // Align constant buffer heap size, in case GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT isn't a power of two (for some reason). -@@ -855,19 +866,22 @@ static void create_buffer_heap(struct wined3d_device *device, struct wined3d_con - if (FAILED(hr = wined3d_buffer_heap_create(context, geo_heap_size, 0, TRUE, &device->wo_buffer_heap))) - { - ERR("Failed to create write-only persistent buffer heap, hr %#x.\n", hr); -+ goto fail; - } - - if (FAILED(hr = wined3d_buffer_heap_create(context, cb_heap_size, ub_alignment, TRUE, &device->cb_buffer_heap))) - { - ERR("Failed to create persistent buffer heap for constant buffers, hr %#x.\n", hr); -+ goto fail; - } - - FIXME("Initialized PBA (geo_heap_size: %ld, cb_heap_size: %ld, ub_align: %d)\n", geo_heap_size, cb_heap_size, ub_alignment); -+ -+ use_pba = TRUE; - } -- else -- { -- FIXME("Not using PBA, ARB_buffer_storage unsupported.\n"); -- } -+ -+fail: -+ device->use_pba = use_pba; - } - - /* Context activation is done by the caller. */ -diff --git a/dlls/wined3d/query.c b/dlls/wined3d/query.c -index f3ca1630e5..5ea79b6e4a 100644 ---- a/dlls/wined3d/query.c -+++ b/dlls/wined3d/query.c -@@ -88,7 +88,7 @@ static BOOL wined3d_fence_supported(const struct wined3d_gl_info *gl_info) - return gl_info->supported[ARB_SYNC] || gl_info->supported[NV_FENCE] || gl_info->supported[APPLE_FENCE]; - } - --enum wined3d_fence_result wined3d_fence_test(const struct wined3d_fence *fence, -+static enum wined3d_fence_result wined3d_fence_test(const struct wined3d_fence *fence, - const struct wined3d_device *device, DWORD flags) - { - const struct wined3d_gl_info *gl_info; -diff --git a/dlls/wined3d/wined3d_private.h b/dlls/wined3d/wined3d_private.h -index 0114444943..63f004d57e 100644 ---- a/dlls/wined3d/wined3d_private.h -+++ b/dlls/wined3d/wined3d_private.h -@@ -1713,9 +1713,6 @@ - void wined3d_fence_issue(struct wined3d_fence *fence, const struct wined3d_device *device) DECLSPEC_HIDDEN; - enum wined3d_fence_result wined3d_fence_wait(const struct wined3d_fence *fence, - const struct wined3d_device *device) DECLSPEC_HIDDEN; --// XXX(acomminos): really expose this? --enum wined3d_fence_result wined3d_fence_test(const struct wined3d_fence *fence, -- const struct wined3d_device *device, DWORD flags) DECLSPEC_HIDDEN; - - /* Direct3D terminology with little modifications. We do not have an issued - * state because only the driver knows about it, but we have a created state -@@ -2943,6 +2940,7 @@ - BYTE inScene : 1; /* A flag to check for proper BeginScene / EndScene call pairs */ - BYTE softwareVertexProcessing : 1; /* process vertex shaders using software or hardware */ - BYTE filter_messages : 1; -+ BYTE use_pba : 1; /* A flag to use the persistent buffer allocator for dynamic buffers. */ - BYTE padding : 3; - - unsigned char surface_alignment; /* Line Alignment of surfaces */ - --- -2.16.2 - diff --git a/0009-wined3d-Add-quirk-to-use-GL_CLIENT_STORAGE_BIT-for-m.patch b/0009-wined3d-Add-quirk-to-use-GL_CLIENT_STORAGE_BIT-for-m.patch deleted file mode 100644 index 7d8bbb854e32..000000000000 --- a/0009-wined3d-Add-quirk-to-use-GL_CLIENT_STORAGE_BIT-for-m.patch +++ /dev/null @@ -1,96 +0,0 @@ -From a2326162cf4fb5601c0f296bfd1294a493912bce Mon Sep 17 00:00:00 2001 -From: Andrew Comminos <andrew@comminos.com> -Date: Thu, 15 Mar 2018 21:22:06 -0700 -Subject: [PATCH 9/9] wined3d: Add quirk to use GL_CLIENT_STORAGE_BIT for mesa. - ---- - dlls/wined3d/buffer_heap.c | 15 ++++++++++++++- - dlls/wined3d/directx.c | 19 +++++++++++++++++++ - dlls/wined3d/wined3d_private.h | 1 + - 3 files changed, 34 insertions(+), 1 deletion(-) - -diff --git a/dlls/wined3d/buffer_heap.c b/dlls/wined3d/buffer_heap.c -index 899aad9612..9e8f2d799d 100644 ---- a/dlls/wined3d/buffer_heap.c -+++ b/dlls/wined3d/buffer_heap.c -@@ -165,7 +165,20 @@ HRESULT wined3d_buffer_heap_create(struct wined3d_context *context, GLsizeiptr s - access_flags |= GL_MAP_READ_BIT; - } - -- storage_flags = GL_CLIENT_STORAGE_BIT | access_flags; -+ storage_flags = access_flags; -+ // FIXME(acomminos): So, about GL_CLIENT_STORAGE_BIT: -+ // - On NVIDIA, DMA CACHED memory is used when this flag is set. SYSTEM HEAP -+ // memory is used without it, which (in my testing) is much faster. -+ // - On Mesa, GTT is used when this flag is set. This is what we want- we -+ // upload to VRAM occur otherwise, which is unusably slow (on radeon). -+ // -+ // Thus, we're only going to set this on mesa for now. -+ // Hints are awful anyway. -+ if (gl_info->quirks & WINED3D_QUIRK_USE_CLIENT_STORAGE_BIT) -+ { -+ FIXME_(d3d_perf)("PBA: using GL_CLIENT_STORAGE_BIT quirk"); -+ storage_flags |= GL_CLIENT_STORAGE_BIT; -+ } - - GL_EXTCALL(glGenBuffers(1, &object->buffer_object)); - checkGLcall("glGenBuffers"); -diff --git a/dlls/wined3d/directx.c b/dlls/wined3d/directx.c -index 8789a501ec..f455ed54e1 100644 ---- a/dlls/wined3d/directx.c -+++ b/dlls/wined3d/directx.c -@@ -947,6 +947,13 @@ static BOOL match_broken_viewport_subpixel_bits(const struct wined3d_gl_info *gl - return !wined3d_caps_gl_ctx_test_viewport_subpixel_bits(ctx); - } - -+static BOOL match_mesa(const struct wined3d_gl_info *gl_info, struct wined3d_caps_gl_ctx *ctx, -+ const char *gl_renderer, enum wined3d_gl_vendor gl_vendor, -+ enum wined3d_pci_vendor card_vendor, enum wined3d_pci_device device) -+{ -+ return gl_vendor == GL_VENDOR_MESA; -+} -+ - static void quirk_apple_glsl_constants(struct wined3d_gl_info *gl_info) - { - /* MacOS needs uniforms for relative addressing offsets. This can accumulate to quite a few uniforms. -@@ -1084,6 +1091,13 @@ static void quirk_broken_viewport_subpixel_bits(struct wined3d_gl_info *gl_info) - } - } - -+static void quirk_use_client_storage_bit(struct wined3d_gl_info *gl_info) -+{ -+ // Using ARB_buffer_storage on Mesa requires the GL_CLIENT_STORAGE_BIT to be -+ // set to use GTT for immutable buffers on radeon (see PIPE_USAGE_STREAM). -+ gl_info->quirks |= WINED3D_QUIRK_USE_CLIENT_STORAGE_BIT; -+} -+ - struct driver_quirk - { - BOOL (*match)(const struct wined3d_gl_info *gl_info, struct wined3d_caps_gl_ctx *ctx, -@@ -1180,6 +1194,11 @@ static const struct driver_quirk quirk_table[] = - quirk_broken_viewport_subpixel_bits, - "Nvidia viewport subpixel bits bug" - }, -+ { -+ match_mesa, -+ quirk_use_client_storage_bit, -+ "Use GL_CLIENT_STORAGE_BIT for persistent buffers on mesa", -+ }, - }; - - /* Certain applications (Steam) complain if we report an outdated driver version. In general, -diff --git a/dlls/wined3d/wined3d_private.h b/dlls/wined3d/wined3d_private.h -index 63f004d57e..96715261b1 100644 ---- a/dlls/wined3d/wined3d_private.h -+++ b/dlls/wined3d/wined3d_private.h -@@ -75,6 +75,7 @@ - #define WINED3D_QUIRK_INFO_LOG_SPAM 0x00000080 - #define WINED3D_QUIRK_LIMITED_TEX_FILTERING 0x00000100 - #define WINED3D_QUIRK_BROKEN_ARB_FOG 0x00000200 -+#define WINED3D_QUIRK_USE_CLIENT_STORAGE_BIT 0x00000400 - - enum wined3d_ffp_idx - { --- -2.16.2 - @@ -2,11 +2,10 @@ # Maintainer: Lars Norberg < arch-packages at cogwerkz dot org > pkgname=wine-staging-pba-git -pkgver=3.4.r3604.e50f0488+wine.3.4.r178.ge1c7a1f7ce+pba.r29.87307b1 +pkgver=3.4.r3607.5876a3f7+wine.3.4.r192.gd7430abd40 pkgrel=1 _winesrcdir='wine-git' _stgsrcdir='wine-staging-git' -_pbasrcdir='wine-pba' pkgdesc='Wine staging branch with PBA patches for increased D3D performance. Git versions. (Also includes Path of Exile DX11 patch!)' url='https://github.com/acomminos/wine-pba' arch=('x86_64') @@ -93,16 +92,7 @@ optdepends=( ) source=("$_winesrcdir"::'git://source.winehq.org/git/wine.git' "$_stgsrcdir"::'git+https://github.com/wine-staging/wine-staging.git' - "$_pbasrcdir"::'git+https://github.com/acomminos/wine-pba.git' - '0001-wined3d-Initial-implementation-of-a-persistent-mappe.patch' - '0002-wined3d-Add-support-for-backing-dynamic-wined3d_buff.patch' - '0003-wined3d-Use-ARB_multi_bind-to-speed-up-UBO-updates.patch' - '0004-wined3d-Use-GL_CLIENT_STORAGE_BIT-for-persistent-map.patch' - '0005-wined3d-Disable-persistently-mapped-shader-resource-.patch' - '0006-wined3d-Perform-initial-allocation-of-persistent-buf.patch' - '0007-wined3d-Avoid-freeing-persistent-buffer-heap-element.patch' - '0008-wined3d-Add-DISABLE_PBA-envvar-some-PBA-cleanup.patch' - '0009-wined3d-Add-quirk-to-use-GL_CLIENT_STORAGE_BIT-for-m.patch' + 'wine-staging-pba.patch' 'steam.patch' 'poe-fix.patch' 'harmony-fix.diff' @@ -110,16 +100,7 @@ source=("$_winesrcdir"::'git://source.winehq.org/git/wine.git' 'wine-binfmt.conf') sha256sums=('SKIP' 'SKIP' - 'SKIP' - 'f5f8c507f79c829b118125a3749f80ed31eb8ba8ad024d99554a1a6458c438eb' - '98372adbb16949edca4c90604cceac5db3d4bf37eccc13d59d3e5735f53f2501' - '112f8fc68d5421805fb1de32c0216c41412afae21153d803127c9d1c1103e35b' - '016ee498c9ff7af0d14c7b0e42f4bc5255f5dae6d391fd36c2060668fcade662' - 'ec11046f6335c2831e3b89c2b0c241b74974415a64523f35f0a606d27d1dbfbb' - 'd2a8febc2500d6a7bed418232efedf82f114e7d14ca1199789abe576dddae90b' - 'ff5ef40b945fdad16db99a1f736c20c53711cfe002d367ea4aa55d84bf6a1207' - 'dee52666fc680b74f5d5ba1a2a74de715c7b49376895ff057ccada9daaef5911' - '5c3776e5c94b51b368384c79aec9b26716fc6517935d782c121c856f21dfd223' + 'cc229607e417841d3e900cc93c3ab2f79c0851705a07e7206729193ffa3dc9db' '972d6b114f7621c5f3bd34b1105dd390b318db18fbc76328001c984db488a9b0' '1c8be30224a67c0f279ae1324165708371aad8f290ebc6da69c686d0904e606c' '50ccb5bd2067e5d2739c5f7abcef11ef096aa246f5ceea11d2c3b508fc7f77a1' @@ -141,13 +122,9 @@ pkgver() { cd "${srcdir}/${_winesrcdir}" local _wineVer="$(git describe --long --tags | sed 's/\([^-]*-g\)/r\1/;s/-/./g;s/^v//;s/\.rc/rc/')" - # retrieve current wine-pba version - cd "${srcdir}/${_pbasrcdir}" - local _pbaVer=$( printf 'pba.r%s.%s' "$(git rev-list --count HEAD)" "$(git rev-parse --short HEAD)" ) - # version string might be a bit over the top, # but I want the build versions of all the 3 source repositories in it. - printf '%s+%s+%s' "$_stagingVer" "$_wineVer" "$_pbaVer" + printf '%s+%s' "$_stagingVer" "$_wineVer" } prepare() { @@ -196,18 +173,7 @@ prepare() { "${srcdir}"/"${_stgsrcdir}"/patches/patchinstall.sh DESTDIR="${srcdir}/${_winesrcdir}" --all # apply wine-pba patches - #for _f in $(ls "${srcdir}"/"${_pbasrcdir}"/'patches'); do - # patch -d "${srcdir}"/"${_winesrcdir}" -Np1 < "${srcdir}"/"${_pbasrcdir}"/'patches'/"${_f}" - #done - patch -Np1 < ../'0001-wined3d-Initial-implementation-of-a-persistent-mappe.patch' - patch -Np1 < ../'0002-wined3d-Add-support-for-backing-dynamic-wined3d_buff.patch' - patch -Np1 < ../'0003-wined3d-Use-ARB_multi_bind-to-speed-up-UBO-updates.patch' - patch -Np1 < ../'0004-wined3d-Use-GL_CLIENT_STORAGE_BIT-for-persistent-map.patch' - patch -Np1 < ../'0005-wined3d-Disable-persistently-mapped-shader-resource-.patch' - patch -Np1 < ../'0006-wined3d-Perform-initial-allocation-of-persistent-buf.patch' - patch -Np1 < ../'0007-wined3d-Avoid-freeing-persistent-buffer-heap-element.patch' - patch -Np1 < ../'0008-wined3d-Add-DISABLE_PBA-envvar-some-PBA-cleanup.patch' - patch -Np1 < ../'0009-wined3d-Add-quirk-to-use-GL_CLIENT_STORAGE_BIT-for-m.patch' + patch -Np1 < ../'wine-staging-pba.patch' # fix path of opencl headers sed 's|OpenCL/opencl.h|CL/opencl.h|g' -i configure* diff --git a/wine-staging-pba.patch b/wine-staging-pba.patch new file mode 100644 index 000000000000..050a7c3c4071 --- /dev/null +++ b/wine-staging-pba.patch @@ -0,0 +1,1574 @@ +diff -r -u --new-file a/dlls/wined3d/buffer.c b/dlls/wined3d/buffer.c +--- a/dlls/wined3d/buffer.c 2018-03-28 23:57:50.342915285 +0200 ++++ b/dlls/wined3d/buffer.c 2018-03-28 23:27:46.013998252 +0200 +@@ -28,12 +28,14 @@ + #include "wined3d_private.h" + + WINE_DEFAULT_DEBUG_CHANNEL(d3d); ++WINE_DECLARE_DEBUG_CHANNEL(d3d_perf); + + #define WINED3D_BUFFER_HASDESC 0x01 /* A vertex description has been found. */ + #define WINED3D_BUFFER_USE_BO 0x02 /* Use a buffer object for this buffer. */ + #define WINED3D_BUFFER_PIN_SYSMEM 0x04 /* Keep a system memory copy for this buffer. */ + #define WINED3D_BUFFER_DISCARD 0x08 /* A DISCARD lock has occurred since the last preload. */ + #define WINED3D_BUFFER_APPLESYNC 0x10 /* Using sync as in GL_APPLE_flush_buffer_range. */ ++#define WINED3D_BUFFER_PERSISTENT 0x20 /* Uses a persistent-mapped buffer via ARB_buffer_storage. */ + + #define VB_MAXDECLCHANGES 100 /* After that number of decl changes we stop converting */ + #define VB_RESETDECLCHANGE 1000 /* Reset the decl changecount after that number of draws */ +@@ -269,6 +271,53 @@ + return FALSE; + } + ++/* Context activation is done by the caller. */ ++static BOOL buffer_alloc_persistent_map(struct wined3d_buffer *buffer) ++{ ++ struct wined3d_device *device = buffer->resource.device; ++ struct wined3d_buffer_heap *heap; ++ struct wined3d_buffer_heap_element *elem; ++ HRESULT hr; ++ ++ if (buffer->bind_flags & WINED3D_BIND_CONSTANT_BUFFER) ++ { ++ // Use a heap aligned to constant buffer offset requirements. ++ heap = device->cb_buffer_heap; ++ } ++ else ++ { ++ if (!(buffer->resource.usage & WINED3DUSAGE_WRITEONLY)) ++ FIXME("Using a write-only persistent buffer for %p without WINED3DUSAGE_WRITEONLY.\n", buffer); ++ heap = device->wo_buffer_heap; ++ } ++ ++ buffer->buffer_heap = heap; ++ if (FAILED(hr = wined3d_buffer_heap_alloc(heap, buffer->resource.size, &elem))) ++ { ++ goto fail; ++ } ++ buffer->cs_persistent_map = elem; ++ buffer->mt_persistent_map = elem; ++ return TRUE; ++ ++fail: ++ // FIXME(acomminos): fall back to standalone BO here? ++ ERR("Failed to create persistent map for buffer %p, hr=%x\n", buffer, hr); ++ buffer->buffer_heap = NULL; ++ return FALSE; ++} ++ ++static void buffer_free_persistent_map(struct wined3d_buffer *buffer) ++{ ++ if (!buffer->buffer_heap) ++ return; ++ ++ // TODO(acomminos): get the CS thread to free pending main thread buffers. ++ wined3d_buffer_heap_free(buffer->buffer_heap, buffer->cs_persistent_map); ++ buffer->buffer_heap = NULL; ++} ++ ++ + static BOOL buffer_process_converted_attribute(struct wined3d_buffer *buffer, + const enum wined3d_buffer_conversion_type conversion_type, + const struct wined3d_stream_info_element *attrib, DWORD *stride_this_run) +@@ -632,6 +681,17 @@ + } + return buffer_create_buffer_object(buffer, context); + ++ case WINED3D_LOCATION_PERSISTENT_MAP: ++ if (buffer->buffer_heap) ++ return TRUE; ++ ++ if (!(buffer->flags & WINED3D_BUFFER_PERSISTENT)) ++ { ++ WARN("Trying to map a persistent region for buffer %p without WINED3D_BUFFER_PERSISTENT.\n", buffer); ++ return FALSE; ++ } ++ return buffer_alloc_persistent_map(buffer); ++ + default: + ERR("Invalid location %s.\n", wined3d_debug_location(location)); + return FALSE; +@@ -689,16 +749,32 @@ + buffer_conversion_upload(buffer, context); + break; + ++ case WINED3D_LOCATION_PERSISTENT_MAP: ++ // TODO(acomminos): are we guaranteed location_sysmem to be kept? ++ // no. ++ if (buffer->conversion_map) ++ FIXME("Attempting to use conversion map with persistent mapping.\n"); ++ memcpy(buffer->buffer_heap->map_ptr + ++ buffer->cs_persistent_map->range.offset, ++ buffer->resource.heap_memory, buffer->resource.size); ++ break; ++ + default: + ERR("Invalid location %s.\n", wined3d_debug_location(location)); + return FALSE; + } + + wined3d_buffer_validate_location(buffer, location); +- if (buffer->resource.heap_memory && location == WINED3D_LOCATION_BUFFER ++ if (buffer->resource.heap_memory ++ && location & WINED3D_LOCATION_BUFFER + && !(buffer->resource.usage & WINED3DUSAGE_DYNAMIC)) + wined3d_buffer_evict_sysmem(buffer); + ++ // FIXME(acomminos) ++ if (buffer->resource.heap_memory ++ && location & WINED3D_LOCATION_PERSISTENT_MAP) ++ wined3d_buffer_evict_sysmem(buffer); ++ + return TRUE; + } + +@@ -720,12 +796,26 @@ + { + data->buffer_object = buffer->buffer_object; + data->addr = NULL; ++ data->length = buffer->resource.size; + return WINED3D_LOCATION_BUFFER; + } ++ if (locations & WINED3D_LOCATION_PERSISTENT_MAP) ++ { ++ // FIXME(acomminos): should we expose a buffer object we don't wholly own here? ++ data->buffer_object = buffer->buffer_heap->buffer_object; ++ data->addr = buffer->cs_persistent_map->range.offset; ++ ++ // Note that the size of the underlying buffer allocation may be larger ++ // than the buffer knows about. In this case, we've rounded it up to be ++ // aligned (e.g. for uniform buffer offsets). ++ data->length = buffer->cs_persistent_map->range.size; ++ return WINED3D_LOCATION_PERSISTENT_MAP; ++ } + if (locations & WINED3D_LOCATION_SYSMEM) + { + data->buffer_object = 0; + data->addr = buffer->resource.heap_memory; ++ data->length = buffer->resource.size; + return WINED3D_LOCATION_SYSMEM; + } + +@@ -761,6 +851,8 @@ + buffer->flags &= ~WINED3D_BUFFER_HASDESC; + } + ++ buffer_free_persistent_map(buffer); ++ + resource_unload(resource); + } + +@@ -784,6 +876,8 @@ + heap_free(buffer->conversion_map); + } + ++ buffer_free_persistent_map(buffer); ++ + heap_free(buffer->maps); + heap_free(buffer); + } +@@ -900,6 +994,16 @@ + + buffer_mark_used(buffer); + ++ if (buffer->flags & WINED3D_BUFFER_PERSISTENT) ++ { ++ if (wined3d_buffer_load_location(buffer, context, WINED3D_LOCATION_PERSISTENT_MAP)) ++ return; ++ ++ ERR("Failed to preload persistent mapping for %p, falling back to BO.\n", buffer); ++ buffer->flags |= WINED3D_BUFFER_USE_BO; ++ buffer->flags &= ~WINED3D_BUFFER_PERSISTENT; ++ } ++ + /* TODO: Make converting independent from VBOs */ + if (!(buffer->flags & WINED3D_BUFFER_USE_BO)) + { +@@ -1010,6 +1114,25 @@ + + count = ++buffer->resource.map_count; + ++ if (buffer->locations & WINED3D_LOCATION_PERSISTENT_MAP) ++ { ++ const struct wined3d_gl_info *gl_info; ++ context = context_acquire(device, NULL, 0); ++ ++ FIXME_(d3d_perf)("Fences not used for persistent buffer maps on CS thread, using glFinish (flags: %x)\n", flags); ++ ++ gl_info = context->gl_info; ++ gl_info->gl_ops.gl.p_glFinish(); ++ ++ base = buffer->buffer_heap->map_ptr ++ + buffer->cs_persistent_map->range.offset; ++ *data = base + offset; ++ ++ context_release(context); ++ ++ return WINED3D_OK; ++ } ++ + if (buffer->buffer_object) + { + unsigned int dirty_offset = offset, dirty_size = size; +@@ -1152,6 +1275,12 @@ + return; + } + ++ if (buffer->flags & WINED3D_BUFFER_PERSISTENT) ++ { ++ TRACE("Persistent buffer, ignore unmap.\n"); ++ return; ++ } ++ + if (buffer->map_ptr) + { + struct wined3d_device *device = buffer->resource.device; +@@ -1256,6 +1385,73 @@ + struct wined3d_map_desc *map_desc, const struct wined3d_box *box, DWORD flags) + { + struct wined3d_buffer *buffer = buffer_from_resource(resource); ++ UINT offset = box ? box->left : 0; ++ ++ if (sub_resource_idx) ++ { ++ WARN("Invalid sub_resource_idx %u.\n", sub_resource_idx); ++ return E_INVALIDARG; ++ } ++ ++ // Support immediate mapping of persistent buffers off the command thread, ++ // which require no GL calls to interface with. ++ if (buffer->flags & WINED3D_BUFFER_PERSISTENT) ++ { ++ // Attempt to load a persistent map without syncing, if possible. ++ if (!(buffer->locations & WINED3D_LOCATION_PERSISTENT_MAP)) ++ { ++ wined3d_resource_wait_idle(resource); ++ if (!buffer_alloc_persistent_map(buffer)) ++ { ++ ERR_(d3d_perf)("Failed to allocate persistent buffer, falling back to sync path."); ++ return E_FAIL; ++ } ++ wined3d_buffer_validate_location(buffer, WINED3D_LOCATION_PERSISTENT_MAP); ++ } ++ ++ map_desc->row_pitch = map_desc->slice_pitch = buffer->desc.byte_width; ++ if (flags & WINED3D_MAP_DISCARD) ++ { ++ HRESULT hr; ++ struct wined3d_buffer_heap_element *mt_elem; ++ if (FAILED(hr = wined3d_buffer_heap_alloc(buffer->buffer_heap, resource->size, &mt_elem))) ++ { ++ FIXME_(d3d_perf)("Failed to allocate new buffer, falling back to sync path.\n"); ++ return hr; ++ } ++ map_desc->data = buffer->buffer_heap->map_ptr + mt_elem->range.offset + offset; ++ resource->map_count++; ++ ++ buffer->mt_persistent_map = mt_elem; ++ ++ // Discard handler on CSMT thread is responsible for returning the ++ // currently used buffer to the free pool, along with the fence that ++ // must be called before the buffer can be reused. ++ wined3d_cs_emit_discard_buffer(resource->device->cs, buffer, mt_elem); ++ ++ return WINED3D_OK; ++ } ++ else if (flags & WINED3D_MAP_NOOVERWRITE) ++ { ++ // Allow immediate access for persistent buffers without a fence. ++ // Always use the latest buffer in this case in case the latest ++ // DISCARDed one hasn't reached the command stream yet. ++ struct wined3d_map_range map_range = buffer->mt_persistent_map->range; ++ map_desc->data = buffer->buffer_heap->map_ptr + map_range.offset + offset; ++ resource->map_count++; ++ return WINED3D_OK; ++ } ++ ++ WARN_(d3d_perf)("Mapping persistent buffer %p in sync with CS thread.\n", buffer); ++ } ++ ++ return E_NOTIMPL; ++} ++ ++static HRESULT buffer_resource_sub_resource_map_cs(struct wined3d_resource *resource, unsigned int sub_resource_idx, ++ struct wined3d_map_desc *map_desc, const struct wined3d_box *box, DWORD flags) ++{ ++ struct wined3d_buffer *buffer = buffer_from_resource(resource); + UINT offset, size; + + if (sub_resource_idx) +@@ -1298,6 +1494,18 @@ + + static HRESULT buffer_resource_sub_resource_unmap(struct wined3d_resource *resource, unsigned int sub_resource_idx) + { ++ struct wined3d_buffer *buffer = buffer_from_resource(resource); ++ if (buffer->locations & WINED3D_LOCATION_PERSISTENT_MAP) ++ { ++ // Nothing to be done to unmap a region of a persistent buffer. ++ resource->map_count--; ++ return WINED3D_OK; ++ } ++ return E_NOTIMPL; ++} ++ ++static HRESULT buffer_resource_sub_resource_unmap_cs(struct wined3d_resource *resource, unsigned int sub_resource_idx) ++{ + if (sub_resource_idx) + { + WARN("Invalid sub_resource_idx %u.\n", sub_resource_idx); +@@ -1317,6 +1525,8 @@ + buffer_resource_sub_resource_map, + buffer_resource_sub_resource_map_info, + buffer_resource_sub_resource_unmap, ++ buffer_resource_sub_resource_map_cs, ++ buffer_resource_sub_resource_unmap_cs, + }; + + static GLenum buffer_type_hint_from_bind_flags(const struct wined3d_gl_info *gl_info, +@@ -1392,12 +1602,34 @@ + buffer->flags |= WINED3D_BUFFER_PIN_SYSMEM; + } + ++ if (buffer->resource.usage & WINED3DUSAGE_DYNAMIC) ++ { ++ if (!device->use_pba) ++ { ++ WARN_(d3d_perf)("Not creating a persistent mapping for dynamic buffer %p because the PBA is disabled.\n", buffer); ++ } ++ else if (bind_flags & WINED3D_BIND_SHADER_RESOURCE) ++ { ++ FIXME_(d3d_perf)("Not using a persistent mapping for shader resource buffer %p (unimplemented)\n", buffer); ++ } ++ else ++ { ++ // If supported, use persistent mapped buffers instead of a ++ // standalone BO for dynamic buffers. ++ buffer->flags |= WINED3D_BUFFER_PERSISTENT; ++ } ++ } ++ + /* Observations show that draw_primitive_immediate_mode() is faster on + * dynamic vertex buffers than converting + draw_primitive_arrays(). + * (Half-Life 2 and others.) */ + dynamic_buffer_ok = gl_info->supported[APPLE_FLUSH_BUFFER_RANGE] || gl_info->supported[ARB_MAP_BUFFER_RANGE]; + +- if (!gl_info->supported[ARB_VERTEX_BUFFER_OBJECT]) ++ if (buffer->flags & WINED3D_BUFFER_PERSISTENT) ++ { ++ TRACE("Not creating a BO because a persistent mapped buffer will be used.\n"); ++ } ++ else if (!gl_info->supported[ARB_VERTEX_BUFFER_OBJECT]) + { + TRACE("Not creating a BO because GL_ARB_vertex_buffer is not supported.\n"); + } +diff -r -u --new-file a/dlls/wined3d/buffer_heap.c b/dlls/wined3d/buffer_heap.c +--- a/dlls/wined3d/buffer_heap.c 1970-01-01 01:00:00.000000000 +0100 ++++ b/dlls/wined3d/buffer_heap.c 2018-03-28 23:35:39.595966143 +0200 +@@ -0,0 +1,530 @@ ++/* ++ * Copyright 2018 Andrew Comminos ++ * ++ * This library is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU Lesser General Public ++ * License as published by the Free Software Foundation; either ++ * version 2.1 of the License, or (at your option) any later version. ++ * ++ * This library is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * Lesser General Public License for more details. ++ * ++ * You should have received a copy of the GNU Lesser General Public ++ * License along with this library; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA ++ * ++ */ ++ ++#include "config.h" ++#include "wine/port.h" ++#include "wine/rbtree.h" ++#include "wined3d_private.h" ++ ++WINE_DEFAULT_DEBUG_CHANNEL(d3d); ++WINE_DECLARE_DEBUG_CHANNEL(d3d_perf); ++ ++// Arbitrary binding to use when binding the persistent buffer. ++#define BIND_TARGET GL_ARRAY_BUFFER ++ ++ ++struct wined3d_buffer_heap_fenced_element ++{ ++ struct wined3d_buffer_heap_bin_set free_list; ++ struct wined3d_fence *fence; ++ ++ struct wined3d_buffer_heap_fenced_element *next; ++}; ++ ++static struct wined3d_buffer_heap_element* element_new(GLsizei offset, GLsizei size) ++{ ++ struct wined3d_buffer_heap_element* elem; ++ elem = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(struct wined3d_buffer_heap_element)); ++ if (!elem) ++ return NULL; ++ elem->range.offset = offset; ++ elem->range.size = size; ++ return elem; ++} ++ ++static inline int bitwise_log2_floor(GLsizei size) ++{ ++ // XXX(acomminos): I hope this gets unrolled. ++ for (int i = 8 * sizeof(GLsizei) - 1; i >= 0; i--) ++ { ++ if ((size >> i) & 1) { ++ return i; ++ } ++ } ++ return 0; ++} ++ ++static inline int bitwise_log2_ceil(GLsizei size) ++{ ++ // Add one to the floor of size if size isn't a power of two. ++ return bitwise_log2_floor(size) + !!(size & (size - 1)); ++} ++ ++static int element_bin(struct wined3d_buffer_heap_element *elem) ++{ ++ return min(WINED3D_BUFFER_HEAP_BINS - 1, bitwise_log2_floor(elem->range.size)); ++} ++ ++// Inserts an element into the appropriate free list bin. ++static void element_insert_free_bin(struct wined3d_buffer_heap *heap, struct wined3d_buffer_heap_element *elem) ++{ ++ if (elem->prev || elem->next) ++ { ++ ERR("Element %p in already in a free list (for some reason).\n", elem); ++ } ++ ++ int bin = element_bin(elem); ++ ++ elem->prev = NULL; ++ elem->next = heap->free_list.bins[bin].head; ++ if (heap->free_list.bins[bin].head) ++ heap->free_list.bins[bin].head->prev = elem; ++ heap->free_list.bins[bin].head = elem; ++ ++ if (!heap->free_list.bins[bin].tail) ++ heap->free_list.bins[bin].tail = elem; ++ ++ TRACE("Inserted allocation at %p of size %lld into bin %d\n", elem->range.offset, elem->range.size, bin); ++} ++ ++// Removes an element from the free tree, its bin, and the coalesce list. ++static void element_remove_free(struct wined3d_buffer_heap *heap, struct wined3d_buffer_heap_element *elem) ++{ ++ int bin = element_bin(elem); ++ ++ if (elem->prev) ++ elem->prev->next = elem->next; ++ ++ if (elem->next) ++ elem->next->prev = elem->prev; ++ ++ if (elem == heap->free_list.bins[bin].head) ++ heap->free_list.bins[bin].head = elem->next; ++ ++ if (elem == heap->free_list.bins[bin].tail) ++ heap->free_list.bins[bin].tail = elem->prev; ++ ++ elem->prev = NULL; ++ elem->next = NULL; ++ ++ TRACE("Freed allocation at %p of size %lld from bin %d\n", elem->range.offset, elem->range.size, bin); ++} ++ ++static struct wined3d_buffer_heap_fenced_element* fenced_element_new(struct wined3d_buffer_heap_bin_set bins, struct wined3d_fence* fence) ++{ ++ struct wined3d_buffer_heap_fenced_element* elem; ++ elem = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(struct wined3d_buffer_heap_fenced_element)); ++ if (!elem) ++ return NULL; ++ elem->free_list = bins; ++ elem->fence = fence; ++ elem->next = NULL; ++ return elem; ++} ++ ++static int free_tree_compare(const void *key, const struct wine_rb_entry *entry) ++{ ++ const GLsizei offset = *(const GLsizei*) key; ++ struct wined3d_buffer_heap_element *elem = WINE_RB_ENTRY_VALUE(entry, struct wined3d_buffer_heap_element, entry); ++ ++ if (offset < elem->range.offset) ++ return -1; ++ if (offset > elem->range.offset) ++ return 1; ++ return 0; ++} ++ ++/* Context activation is done by the caller. */ ++HRESULT wined3d_buffer_heap_create(struct wined3d_context *context, GLsizeiptr size, GLsizeiptr alignment, BOOL write_only, struct wined3d_buffer_heap **buffer_heap) ++{ ++ const struct wined3d_gl_info *gl_info = context->gl_info; ++ GLbitfield access_flags; ++ GLbitfield storage_flags; ++ struct wined3d_buffer_heap_element *initial_elem; ++ ++ struct wined3d_buffer_heap *object; ++ ++ if ((alignment & (alignment - 1)) != 0) ++ { ++ return E_FAIL; ++ } ++ ++ if (!(object = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(*object)))) ++ { ++ return E_OUTOFMEMORY; ++ } ++ ++ access_flags = GL_MAP_PERSISTENT_BIT | GL_MAP_COHERENT_BIT | GL_MAP_WRITE_BIT; ++ if (!write_only) ++ { ++ access_flags |= GL_MAP_READ_BIT; ++ } ++ storage_flags = access_flags; ++ // FIXME(acomminos): So, about GL_CLIENT_STORAGE_BIT: ++ // - On NVIDIA, DMA CACHED memory is used when this flag is set. SYSTEM HEAP ++ // memory is used without it, which (in my testing) is much faster. ++ // - On Mesa, GTT is used when this flag is set. This is what we want- we ++ // upload to VRAM occur otherwise, which is unusably slow (on radeon). ++ // ++ // Thus, we're only going to set this on mesa for now. ++ // Hints are awful anyway. ++ if (gl_info->quirks & WINED3D_QUIRK_USE_CLIENT_STORAGE_BIT) ++ { ++ FIXME_(d3d_perf)("PBA: using GL_CLIENT_STORAGE_BIT quirk"); ++ storage_flags |= GL_CLIENT_STORAGE_BIT; ++ } ++ ++ GL_EXTCALL(glGenBuffers(1, &object->buffer_object)); ++ checkGLcall("glGenBuffers"); ++ ++ context_bind_bo(context, BIND_TARGET, object->buffer_object); ++ ++ GL_EXTCALL(glBufferStorage(BIND_TARGET, size, NULL, storage_flags)); ++ checkGLcall("glBufferStorage"); ++ ++ if (!(object->map_ptr = GL_EXTCALL(glMapBufferRange(BIND_TARGET, 0, size, access_flags)))) ++ { ++ ERR("Couldn't map persistent buffer.\n"); ++ return -1; // FIXME(acomminos): proper error code, cleanup ++ } ++ context_bind_bo(context, BIND_TARGET, 0); ++ ++ object->fenced_head = object->fenced_tail = NULL; ++ object->alignment = alignment; ++ InitializeCriticalSection(&object->temp_lock); ++ ++ initial_elem = element_new(0, size); ++ // Don't bother adding the initial allocation to the coalescing tree. ++ element_insert_free_bin(object, initial_elem); ++ ++ *buffer_heap = object; ++ ++ return WINED3D_OK; ++} ++ ++/* Context activation is done by the caller. */ ++HRESULT wined3d_buffer_heap_destroy(struct wined3d_buffer_heap *heap, struct wined3d_context *context) ++{ ++ const struct wined3d_gl_info *gl_info = context->gl_info; ++ ++ context_bind_bo(context, BIND_TARGET, heap->buffer_object); ++ GL_EXTCALL(glUnmapBuffer(BIND_TARGET)); ++ checkGLcall("glUnmapBuffer"); ++ context_bind_bo(context, BIND_TARGET, 0); ++ ++ GL_EXTCALL(glDeleteBuffers(1, &heap->buffer_object)); ++ checkGLcall("glDeleteBuffers"); ++ ++ DeleteCriticalSection(&heap->temp_lock); ++ ++ // TODO(acomminos): cleanup free lists, fenced list, etc. ++ ++ HeapFree(GetProcessHeap(), 0, heap); ++ ++ return WINED3D_OK; ++} ++ ++HRESULT wined3d_buffer_heap_alloc(struct wined3d_buffer_heap *heap, GLsizeiptr size, struct wined3d_buffer_heap_element **out_elem) ++{ ++ int initial_bin; ++ int initial_size = size; ++ ++ EnterCriticalSection(&heap->temp_lock); ++ ++ // After alignment, reduce fragmentation by rounding to next power of two. ++ // If the alignment is a power of two (which it should be), this should be ++ // no problem. ++ size = 1 << bitwise_log2_ceil(size); ++ ++ // Align size values where possible. ++ if (heap->alignment && (size % heap->alignment != 0)) ++ size += heap->alignment - (size % heap->alignment); ++ ++ initial_bin = min(WINED3D_BUFFER_HEAP_BINS - 1, bitwise_log2_ceil(size)); ++ ++ for (int i = initial_bin; i < WINED3D_BUFFER_HEAP_BINS; i++) ++ { ++ struct wined3d_buffer_heap_element *elem = heap->free_list.bins[i].head; ++ if (elem) ++ { ++ struct wined3d_map_range remaining_range; ++ remaining_range.offset = elem->range.offset + size; ++ remaining_range.size = elem->range.size - size; ++ ++ // Take the element from the free list, transferring ownership to ++ // the caller. ++ element_remove_free(heap, elem); ++ ++ // Resize the element so that we can free the remainder. ++ elem->range.size = size; ++ *out_elem = elem; ++ ++ TRACE_(d3d_perf)("Allocated %d (requested %d) at %p from bin %d (initial %d)\n", size, initial_size, elem->range.offset, i, initial_bin); ++ ++ if (remaining_range.size > 0) ++ { ++ struct wined3d_buffer_heap_element *remaining_elem; ++ ++ TRACE_(d3d_perf)("Imperfect fit allocated, fragmenting remainder of %lld at %p.\n", remaining_range.size, remaining_range.offset); ++ ++ remaining_elem = element_new(remaining_range.offset, remaining_range.size); ++ element_insert_free_bin(heap, remaining_elem); ++ } ++ ++ LeaveCriticalSection(&heap->temp_lock); ++ return WINED3D_OK; ++ } ++ } ++ ++ LeaveCriticalSection(&heap->temp_lock); ++ ++ FIXME_(d3d_perf)("Forcing coalesce, not enough free space in buffer heap.\n"); ++ int num_coalesced; ++ if (SUCCEEDED(wined3d_buffer_heap_deferred_coalesce(heap, &num_coalesced))) ++ { ++ if (num_coalesced > 0) ++ return wined3d_buffer_heap_alloc(heap, size, out_elem); ++ } ++ ++ FIXME_(d3d_perf)("Coalescing did not create new blocks, failing.\n"); ++ ++ return WINED3DERR_OUTOFVIDEOMEMORY; ++} ++ ++HRESULT wined3d_buffer_heap_free(struct wined3d_buffer_heap *heap, struct wined3d_buffer_heap_element *elem) ++{ ++ EnterCriticalSection(&heap->temp_lock); ++ ++ // Only insert the element into a free bin, coalescing will occur later. ++ // ++ // Note that the reason that we pass around wined3d_buffer_heap_element ++ // instead of a range is to avoid frequent HeapAlloc/HeapFree operations ++ // when we're reusing buffers. ++ element_insert_free_bin(heap, elem); ++ ++ LeaveCriticalSection(&heap->temp_lock); ++ ++ return WINED3D_OK; ++} ++ ++HRESULT wined3d_buffer_heap_free_fenced(struct wined3d_buffer_heap *heap, struct wined3d_device *device, struct wined3d_buffer_heap_element *elem) ++{ ++ int bin_index = element_bin(elem); ++ struct wined3d_buffer_heap_bin *bin = &heap->pending_fenced_bins.bins[bin_index]; ++ ++ if (bin->tail) ++ { ++ bin->tail->next = elem; ++ elem->prev = bin->tail; ++ bin->tail = elem; ++ } ++ else ++ { ++ bin->head = elem; ++ bin->tail = elem; ++ } ++ ++ return WINED3D_OK; ++} ++ ++HRESULT wined3d_buffer_heap_cs_fence_issue(struct wined3d_buffer_heap *heap, struct wined3d_device *device) ++{ ++ struct wined3d_buffer_heap_fenced_element *fenced_elem; ++ struct wined3d_fence *fence; ++ HRESULT hr; ++ ++ if (heap->fenced_head) ++ { ++ // XXX(acomminos): double or triple buffer this? ++ wined3d_buffer_heap_cs_fence_wait(heap, device); ++ } ++ ++ if (FAILED(hr = wined3d_fence_create(device, &fence))) ++ { ++ ERR("Failed to create fence.\n"); ++ return hr; ++ } ++ ++ fenced_elem = fenced_element_new(heap->pending_fenced_bins, fence); ++ if (!fenced_elem) ++ return E_OUTOFMEMORY; ++ ++ TRACE_(d3d_perf)("Dispatching fenced buffer set.\n"); ++ memset(&heap->pending_fenced_bins, 0, sizeof(heap->pending_fenced_bins)); ++ ++ // Append to end of fenced list, which works well if you assume that buffers ++ // are freed in some ascending draw call ordering. ++ if (!heap->fenced_head) ++ { ++ heap->fenced_head = fenced_elem; ++ heap->fenced_tail = fenced_elem; ++ } ++ else ++ { ++ heap->fenced_tail->next = fenced_elem; ++ heap->fenced_tail = fenced_elem; ++ } ++ ++ wined3d_fence_issue(fence, device); ++ return WINED3D_OK; ++} ++ ++HRESULT wined3d_buffer_heap_cs_fence_wait(struct wined3d_buffer_heap *heap, struct wined3d_device *device) ++{ ++ enum wined3d_fence_result res; ++ struct wined3d_buffer_heap_fenced_element *elem = heap->fenced_head; ++ if (!elem) ++ return WINED3D_OK; ++ ++ res = wined3d_fence_wait(elem->fence, device); ++ switch (res) ++ { ++ case WINED3D_FENCE_OK: ++ case WINED3D_FENCE_NOT_STARTED: ++ { ++ TRACE_(d3d_perf)("Freed fence group.\n"); ++ ++ EnterCriticalSection(&heap->temp_lock); ++ for (int i = 0; i < WINED3D_BUFFER_HEAP_BINS; i++) ++ { ++ struct wined3d_buffer_heap_bin *elem_bin = &elem->free_list.bins[i]; ++ if (!elem_bin->tail) ++ continue; ++ ++ struct wined3d_buffer_heap_bin *heap_bin = &heap->free_list.bins[i]; ++ if (heap_bin->head) ++ { ++ // Insert to front. ++ elem_bin->tail->next = heap_bin->head; ++ heap_bin->head->prev = elem_bin->tail; ++ ++ elem_bin->head->prev = NULL; ++ heap_bin->head = elem_bin->head; ++ } ++ else ++ { ++ elem_bin->head->prev = NULL; ++ heap_bin->head = elem_bin->head; ++ elem_bin->tail->next = NULL; ++ heap_bin->tail = elem_bin->tail; ++ } ++ } ++ LeaveCriticalSection(&heap->temp_lock); ++ ++ wined3d_fence_destroy(elem->fence); ++ ++ heap->fenced_head = elem->next; ++ HeapFree(GetProcessHeap(), 0, elem); ++ // TODO(acomminos): bother to null out fenced_tail? ++ break; ++ } ++ default: ++ return WINED3D_OK; ++ } ++ ++ return WINED3D_OK; ++} ++ ++HRESULT wined3d_buffer_heap_deferred_coalesce(struct wined3d_buffer_heap *heap, int *coalesced_count) ++{ ++ struct wined3d_buffer_heap_element *elem = NULL; ++ struct wined3d_buffer_heap_element *next = NULL; ++ struct wine_rb_entry *entry; ++ struct wined3d_map_range coalesced_range; ++ ++ struct wine_rb_tree free_tree; ++ int num_coalesced = 0; ++ ++ wine_rb_init(&free_tree, free_tree_compare); ++ ++ EnterCriticalSection(&heap->temp_lock); ++ ++ // TODO(acomminos): on one hand, if there's a lot of elements in the list, ++ // it's highly fragmented. on the other, we can potentially waste a decent ++ // sum of time checking for uncoalesced bins. ++ for (int i = 0; i < WINED3D_BUFFER_HEAP_BINS; i++) ++ { ++ elem = heap->free_list.bins[i].head; ++ while (elem) ++ { ++ // Insert a sentry. FIXME(acomminos): can skip this with traversal. ++ if (wine_rb_put(&free_tree, &elem->range.offset, &elem->entry) == -1) ++ { ++ ERR("Failed to insert key %x in tree.\n", elem->range.offset); ++ elem = elem->next; ++ continue; ++ } ++ ++ coalesced_range = elem->range; ++ ++ // Coalesce right. ++ entry = wine_rb_next(&elem->entry); ++ if (entry) ++ { ++ TRACE("Coalesced right.\n"); ++ struct wined3d_buffer_heap_element *right_elem = WINE_RB_ENTRY_VALUE(entry, struct wined3d_buffer_heap_element, entry); ++ if (elem->range.offset + elem->range.size == right_elem->range.offset) ++ { ++ coalesced_range.size += right_elem->range.size; ++ ++ wine_rb_remove(&free_tree, entry); ++ element_remove_free(heap, right_elem); ++ HeapFree(GetProcessHeap(), 0, right_elem); ++ ++ num_coalesced++; ++ } ++ } ++ ++ // Coalesce left. ++ entry = wine_rb_prev(&elem->entry); ++ if (entry) ++ { ++ TRACE("Coalesced left.\n"); ++ struct wined3d_buffer_heap_element *left_elem = WINE_RB_ENTRY_VALUE(entry, struct wined3d_buffer_heap_element, entry); ++ if (left_elem->range.offset + left_elem->range.size == coalesced_range.offset) ++ { ++ coalesced_range.offset = left_elem->range.offset; ++ coalesced_range.size += left_elem->range.size; ++ ++ wine_rb_remove(&free_tree, entry); ++ element_remove_free(heap, left_elem); ++ HeapFree(GetProcessHeap(), 0, left_elem); ++ ++ num_coalesced++; ++ } ++ } ++ ++ next = elem->next; ++ ++ if (elem->range.size != coalesced_range.size) ++ { ++ FIXME_(d3d_perf)("Coalesced range from (%p, %ld) to (%p, %ld)\n", elem->range.offset, elem->range.size, coalesced_range.offset, coalesced_range.size); ++ ++ wine_rb_remove(&free_tree, &elem->entry); ++ ++ // Move to the correct free bin. ++ element_remove_free(heap, elem); ++ elem->range = coalesced_range; ++ element_insert_free_bin(heap, elem); ++ ++ wine_rb_put(&free_tree, &elem->range.offset, &elem->entry); ++ } ++ ++ elem = next; ++ } ++ } ++ ++ LeaveCriticalSection(&heap->temp_lock); ++ ++ FIXME_(d3d_perf)("Performed %d coalesces.\n", num_coalesced); ++ if (coalesced_count) ++ *coalesced_count = num_coalesced; ++ ++ return WINED3D_OK; ++} +\ No newline at end of file +diff -r -u --new-file a/dlls/wined3d/context.c b/dlls/wined3d/context.c +--- a/dlls/wined3d/context.c 2018-03-28 23:57:59.979900010 +0200 ++++ b/dlls/wined3d/context.c 2018-03-28 23:22:42.726661696 +0200 +@@ -4956,7 +4956,11 @@ + if (parameters->indexed) + { + struct wined3d_buffer *index_buffer = state->index_buffer; +- if (!index_buffer->buffer_object || !stream_info->all_vbo) ++ if (index_buffer->cs_persistent_map) ++ { ++ idx_data = index_buffer->cs_persistent_map->range.offset; ++ } ++ else if (!index_buffer->buffer_object || !stream_info->all_vbo) + { + idx_data = index_buffer->resource.heap_memory; + } +diff -r -u --new-file a/dlls/wined3d/cs.c b/dlls/wined3d/cs.c +--- a/dlls/wined3d/cs.c 2018-03-28 23:57:59.780900325 +0200 ++++ b/dlls/wined3d/cs.c 2018-03-28 23:23:49.308515873 +0200 +@@ -73,6 +73,7 @@ + WINED3D_CS_OP_CLEAR_UNORDERED_ACCESS_VIEW, + WINED3D_CS_OP_COPY_UAV_COUNTER, + WINED3D_CS_OP_GENERATE_MIPMAPS, ++ WINED3D_CS_OP_DISCARD_BUFFER, + WINED3D_CS_OP_STOP, + }; + +@@ -437,6 +438,13 @@ + struct wined3d_shader_resource_view *view; + }; + ++struct wined3d_cs_discard_buffer ++{ ++ enum wined3d_cs_op opcode; ++ struct wined3d_buffer *buffer; ++ struct wined3d_buffer_heap_element *map_range; ++}; ++ + struct wined3d_cs_stop + { + enum wined3d_cs_op opcode; +@@ -465,6 +473,15 @@ + } + + InterlockedDecrement(&cs->pending_presents); ++ ++ // FIXME(acomminos): is this the right place to put double-buffered frame ++ // timing based logic? ++ // FIXME(acomminos): this conditional sucks, replace with fancier feature check ++ if (cs->device->wo_buffer_heap && cs->device->cb_buffer_heap) ++ { ++ wined3d_buffer_heap_cs_fence_issue(cs->device->wo_buffer_heap, cs->device); ++ wined3d_buffer_heap_cs_fence_issue(cs->device->cb_buffer_heap, cs->device); ++ } + } + + void wined3d_cs_emit_present(struct wined3d_cs *cs, struct wined3d_swapchain *swapchain, +@@ -1984,7 +2001,7 @@ + const struct wined3d_cs_map *op = data; + struct wined3d_resource *resource = op->resource; + +- *op->hr = resource->resource_ops->resource_sub_resource_map(resource, ++ *op->hr = resource->resource_ops->resource_sub_resource_map_cs(resource, + op->sub_resource_idx, op->map_desc, op->box, op->flags); + } + +@@ -2018,7 +2035,7 @@ + const struct wined3d_cs_unmap *op = data; + struct wined3d_resource *resource = op->resource; + +- *op->hr = resource->resource_ops->resource_sub_resource_unmap(resource, op->sub_resource_idx); ++ *op->hr = resource->resource_ops->resource_sub_resource_unmap_cs(resource, op->sub_resource_idx); + } + + HRESULT wined3d_cs_unmap(struct wined3d_cs *cs, struct wined3d_resource *resource, unsigned int sub_resource_idx) +@@ -2417,6 +2434,53 @@ + cs->ops->submit(cs, WINED3D_CS_QUEUE_DEFAULT); + } + ++static void wined3d_cs_exec_discard_buffer(struct wined3d_cs *cs, const void *data) ++{ ++ const struct wined3d_cs_discard_buffer *op = data; ++ struct wined3d_buffer *buffer = op->buffer; ++ HRESULT hr; ++ ++ // TODO(acomminos): should call into buffer.c here instead. ++ if (FAILED(hr = wined3d_buffer_heap_free_fenced(buffer->buffer_heap, cs->device, buffer->cs_persistent_map))) ++ { ++ ERR("Failed to do a fenced free on discarded buffer %p, hr %x\n. Freeing anyway.", buffer, hr); ++ wined3d_buffer_heap_free(buffer->buffer_heap, buffer->cs_persistent_map); ++ } ++ ++ buffer->cs_persistent_map = op->map_range; ++ ++ // TODO(acomminos): merge this logic with buffer.c functions for standalone BOs ++ if (buffer->bind_flags & WINED3D_BIND_VERTEX_BUFFER) ++ device_invalidate_state(cs->device, STATE_STREAMSRC); ++ if (buffer->bind_flags & WINED3D_BIND_INDEX_BUFFER) ++ device_invalidate_state(cs->device, STATE_INDEXBUFFER); ++ if (buffer->bind_flags & WINED3D_BIND_CONSTANT_BUFFER) ++ { ++ device_invalidate_state(cs->device, STATE_CONSTANT_BUFFER(WINED3D_SHADER_TYPE_VERTEX)); ++ device_invalidate_state(cs->device, STATE_CONSTANT_BUFFER(WINED3D_SHADER_TYPE_HULL)); ++ device_invalidate_state(cs->device, STATE_CONSTANT_BUFFER(WINED3D_SHADER_TYPE_DOMAIN)); ++ device_invalidate_state(cs->device, STATE_CONSTANT_BUFFER(WINED3D_SHADER_TYPE_GEOMETRY)); ++ device_invalidate_state(cs->device, STATE_CONSTANT_BUFFER(WINED3D_SHADER_TYPE_PIXEL)); ++ device_invalidate_state(cs->device, STATE_CONSTANT_BUFFER(WINED3D_SHADER_TYPE_COMPUTE)); ++ } ++ ++ wined3d_resource_release(&op->buffer->resource); ++} ++ ++void wined3d_cs_emit_discard_buffer(struct wined3d_cs *cs, struct wined3d_buffer *buffer, struct wined3d_buffer_heap_element *elem) ++{ ++ struct wined3d_cs_discard_buffer *op; ++ ++ op = cs->ops->require_space(cs, sizeof(*op), WINED3D_CS_QUEUE_DEFAULT); ++ op->opcode = WINED3D_CS_OP_DISCARD_BUFFER; ++ op->buffer = buffer; ++ op->map_range = elem; ++ ++ wined3d_resource_acquire(&buffer->resource); ++ ++ cs->ops->submit(cs, WINED3D_CS_QUEUE_DEFAULT); ++} ++ + static void wined3d_cs_emit_stop(struct wined3d_cs *cs) + { + struct wined3d_cs_stop *op; +@@ -2477,6 +2541,7 @@ + /* WINED3D_CS_OP_CLEAR_UNORDERED_ACCESS_VIEW */ wined3d_cs_exec_clear_unordered_access_view, + /* WINED3D_CS_OP_COPY_UAV_COUNTER */ wined3d_cs_exec_copy_uav_counter, + /* WINED3D_CS_OP_GENERATE_MIPMAPS */ wined3d_cs_exec_generate_mipmaps, ++ /* WINED3D_CS_OP_DISCARD_BUFFER */ wined3d_cs_exec_discard_buffer, + }; + + static BOOL wined3d_cs_st_check_space(struct wined3d_cs *cs, size_t size, enum wined3d_cs_queue_id queue_id) +diff -r -u --new-file a/dlls/wined3d/device.c b/dlls/wined3d/device.c +--- a/dlls/wined3d/device.c 2018-03-28 23:58:00.031899927 +0200 ++++ b/dlls/wined3d/device.c 2018-03-28 23:33:09.909291883 +0200 +@@ -840,6 +840,66 @@ + device->null_sampler = NULL; + } + ++/* Context activation is done by the caller. */ ++static void create_buffer_heap(struct wined3d_device *device, struct wined3d_context *context) ++{ ++ const struct wined3d_gl_info *gl_info = &device->adapter->gl_info; ++ BOOL use_pba = FALSE; ++ char *env_pba_disable; ++ ++ if (!gl_info->supported[ARB_BUFFER_STORAGE]) ++ { ++ FIXME("Not using PBA, ARB_buffer_storage unsupported.\n"); ++ } ++ else if ((env_pba_disable = getenv("PBA_DISABLE")) && *env_pba_disable != '0') ++ { ++ FIXME("Not using PBA, envvar 'PBA_DISABLE' set.\n"); ++ } ++ else ++ { ++ // TODO(acomminos): kill this magic number. perhaps base on vram. ++ GLsizeiptr geo_heap_size = 512 * 1024 * 1024; ++ // We choose a constant buffer size of 128MB, the same as NVIDIA claims to ++ // use in their Direct3D driver for discarded constant buffers. ++ GLsizeiptr cb_heap_size = 128 * 1024 * 1024; ++ GLint ub_alignment; ++ HRESULT hr; ++ ++ gl_info->gl_ops.gl.p_glGetIntegerv(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT, &ub_alignment); ++ ++ // Align constant buffer heap size, in case GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT isn't a power of two (for some reason). ++ cb_heap_size -= cb_heap_size % ub_alignment; ++ ++ if (FAILED(hr = wined3d_buffer_heap_create(context, geo_heap_size, 0, TRUE, &device->wo_buffer_heap))) ++ { ++ ERR("Failed to create write-only persistent buffer heap, hr %#x.\n", hr); ++ goto fail; ++ } ++ ++ if (FAILED(hr = wined3d_buffer_heap_create(context, cb_heap_size, ub_alignment, TRUE, &device->cb_buffer_heap))) ++ { ++ ERR("Failed to create persistent buffer heap for constant buffers, hr %#x.\n", hr); ++ goto fail; ++ } ++ ++ FIXME("Initialized PBA (geo_heap_size: %ld, cb_heap_size: %ld, ub_align: %d)\n", geo_heap_size, cb_heap_size, ub_alignment); ++ ++ use_pba = TRUE; ++ } ++fail: ++ device->use_pba = use_pba; ++} ++ ++/* Context activation is done by the caller. */ ++static void destroy_buffer_heap(struct wined3d_device *device, struct wined3d_context *context) ++{ ++ if (device->wo_buffer_heap) ++ wined3d_buffer_heap_destroy(device->wo_buffer_heap, context); ++ ++ if (device->cb_buffer_heap) ++ wined3d_buffer_heap_destroy(device->cb_buffer_heap, context); ++} ++ + static LONG fullscreen_style(LONG style) + { + /* Make sure the window is managed, otherwise we won't get keyboard input. */ +@@ -1004,6 +1064,7 @@ + device->shader_backend->shader_free_private(device); + destroy_dummy_textures(device, context); + destroy_default_samplers(device, context); ++ destroy_buffer_heap(device, context); + context_release(context); + + while (device->context_count) +@@ -1052,6 +1113,7 @@ + context = context_acquire(device, target, 0); + create_dummy_textures(device, context); + create_default_samplers(device, context); ++ create_buffer_heap(device, context); + context_release(context); + } + +diff -r -u --new-file a/dlls/wined3d/directx.c b/dlls/wined3d/directx.c +--- a/dlls/wined3d/directx.c 2018-03-28 23:58:00.062899878 +0200 ++++ b/dlls/wined3d/directx.c 2018-03-28 23:37:33.887751410 +0200 +@@ -111,6 +111,7 @@ + /* ARB */ + {"GL_ARB_base_instance", ARB_BASE_INSTANCE }, + {"GL_ARB_blend_func_extended", ARB_BLEND_FUNC_EXTENDED }, ++ {"GL_ARB_buffer_storage", ARB_BUFFER_STORAGE }, + {"GL_ARB_clear_buffer_object", ARB_CLEAR_BUFFER_OBJECT }, + {"GL_ARB_clear_texture", ARB_CLEAR_TEXTURE }, + {"GL_ARB_clip_control", ARB_CLIP_CONTROL }, +@@ -148,6 +149,7 @@ + {"GL_ARB_internalformat_query2", ARB_INTERNALFORMAT_QUERY2 }, + {"GL_ARB_map_buffer_alignment", ARB_MAP_BUFFER_ALIGNMENT }, + {"GL_ARB_map_buffer_range", ARB_MAP_BUFFER_RANGE }, ++ {"GL_ARB_multi_bind", ARB_MULTI_BIND }, + {"GL_ARB_multisample", ARB_MULTISAMPLE }, + {"GL_ARB_multitexture", ARB_MULTITEXTURE }, + {"GL_ARB_occlusion_query", ARB_OCCLUSION_QUERY }, +@@ -944,6 +946,13 @@ + return !wined3d_caps_gl_ctx_test_viewport_subpixel_bits(ctx); + } + ++static BOOL match_mesa(const struct wined3d_gl_info *gl_info, struct wined3d_caps_gl_ctx *ctx, ++ const char *gl_renderer, enum wined3d_gl_vendor gl_vendor, ++ enum wined3d_pci_vendor card_vendor, enum wined3d_pci_device device) ++{ ++ return gl_vendor == GL_VENDOR_MESA; ++} ++ + static void quirk_apple_glsl_constants(struct wined3d_gl_info *gl_info) + { + /* MacOS needs uniforms for relative addressing offsets. This can accumulate to quite a few uniforms. +@@ -1081,6 +1090,13 @@ + } + } + ++static void quirk_use_client_storage_bit(struct wined3d_gl_info *gl_info) ++{ ++ // Using ARB_buffer_storage on Mesa requires the GL_CLIENT_STORAGE_BIT to be ++ // set to use GTT for immutable buffers on radeon (see PIPE_USAGE_STREAM). ++ gl_info->quirks |= WINED3D_QUIRK_USE_CLIENT_STORAGE_BIT; ++} ++ + struct driver_quirk + { + BOOL (*match)(const struct wined3d_gl_info *gl_info, struct wined3d_caps_gl_ctx *ctx, +@@ -1177,6 +1193,11 @@ + quirk_broken_viewport_subpixel_bits, + "Nvidia viewport subpixel bits bug" + }, ++ { ++ match_mesa, ++ quirk_use_client_storage_bit, ++ "Use GL_CLIENT_STORAGE_BIT for persistent buffers on mesa", ++ }, + }; + + /* Certain applications (Steam) complain if we report an outdated driver version. In general, +@@ -2713,6 +2734,8 @@ + /* GL_ARB_blend_func_extended */ + USE_GL_FUNC(glBindFragDataLocationIndexed) + USE_GL_FUNC(glGetFragDataIndex) ++ /* GL_ARB_buffer_storage */ ++ USE_GL_FUNC(glBufferStorage) + /* GL_ARB_clear_buffer_object */ + USE_GL_FUNC(glClearBufferData) + USE_GL_FUNC(glClearBufferSubData) +@@ -2792,6 +2815,8 @@ + /* GL_ARB_map_buffer_range */ + USE_GL_FUNC(glFlushMappedBufferRange) + USE_GL_FUNC(glMapBufferRange) ++ /* GL_ARB_multi_bind */ ++ USE_GL_FUNC(glBindBuffersRange) + /* GL_ARB_multisample */ + USE_GL_FUNC(glSampleCoverageARB) + /* GL_ARB_multitexture */ +@@ -3951,6 +3976,7 @@ + {ARB_TEXTURE_VIEW, MAKEDWORD_VERSION(4, 3)}, + + {ARB_CLEAR_TEXTURE, MAKEDWORD_VERSION(4, 4)}, ++ {ARB_MULTI_BIND, MAKEDWORD_VERSION(4, 4)}, + + {ARB_CLIP_CONTROL, MAKEDWORD_VERSION(4, 5)}, + {ARB_CULL_DISTANCE, MAKEDWORD_VERSION(4, 5)}, +diff -r -u --new-file a/dlls/wined3d/Makefile.in b/dlls/wined3d/Makefile.in +--- a/dlls/wined3d/Makefile.in 2018-03-28 23:57:51.270913815 +0200 ++++ b/dlls/wined3d/Makefile.in 2018-03-28 21:23:02.867794542 +0200 +@@ -6,6 +6,7 @@ + arb_program_shader.c \ + ati_fragment_shader.c \ + buffer.c \ ++ buffer_heap.c \ + context.c \ + cs.c \ + device.c \ +diff -r -u --new-file a/dlls/wined3d/resource.c b/dlls/wined3d/resource.c +--- a/dlls/wined3d/resource.c 2018-03-28 23:57:59.708900439 +0200 ++++ b/dlls/wined3d/resource.c 2018-03-28 22:54:16.100016102 +0200 +@@ -340,6 +340,7 @@ + HRESULT CDECL wined3d_resource_map(struct wined3d_resource *resource, unsigned int sub_resource_idx, + struct wined3d_map_desc *map_desc, const struct wined3d_box *box, DWORD flags) + { ++ HRESULT hr; + TRACE("resource %p, sub_resource_idx %u, map_desc %p, box %s, flags %#x.\n", + resource, sub_resource_idx, map_desc, debug_box(box), flags); + +@@ -362,9 +363,14 @@ + } + + flags = wined3d_resource_sanitise_map_flags(resource, flags); +- wined3d_resource_wait_idle(resource); +- +- return wined3d_cs_map(resource->device->cs, resource, sub_resource_idx, map_desc, box, flags); ++ if (FAILED(hr = resource->resource_ops->resource_sub_resource_map(resource, sub_resource_idx, map_desc, box, flags))) ++ { ++ TRACE_(d3d_perf)("Mapping resource %p on the command stream.\n", resource); ++ wined3d_resource_wait_idle(resource); ++ hr = wined3d_cs_map(resource->device->cs, resource, sub_resource_idx, map_desc, box, flags); ++ } ++ ++ return hr; + } + + HRESULT CDECL wined3d_resource_map_info(struct wined3d_resource *resource, unsigned int sub_resource_idx, +@@ -379,7 +385,12 @@ + { + TRACE("resource %p, sub_resource_idx %u.\n", resource, sub_resource_idx); + +- return wined3d_cs_unmap(resource->device->cs, resource, sub_resource_idx); ++ if (FAILED(hr = resource->resource_ops->resource_sub_resource_unmap(resource, sub_resource_idx))) ++ { ++ TRACE_(d3d_perf)("Unmapping resource %p on the command stream.\n", resource); ++ hr = wined3d_cs_unmap(resource->device->cs, resource, sub_resource_idx); ++ } ++ return hr; + } + + UINT CDECL wined3d_resource_update_info(struct wined3d_resource *resource, unsigned int sub_resource_idx, +diff -r -u --new-file a/dlls/wined3d/state.c b/dlls/wined3d/state.c +--- a/dlls/wined3d/state.c 2018-03-28 23:57:59.808900281 +0200 ++++ b/dlls/wined3d/state.c 2018-03-28 23:06:47.167455200 +0200 +@@ -4797,7 +4797,11 @@ + else + { + struct wined3d_buffer *ib = state->index_buffer; +- GL_EXTCALL(glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, ib->buffer_object)); ++ // FIXME(acomminos): disasterous. ++ if (ib->locations & WINED3D_LOCATION_PERSISTENT_MAP) ++ GL_EXTCALL(glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, ib->buffer_heap->buffer_object)); ++ else ++ GL_EXTCALL(glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, ib->buffer_object)); + } + } + +@@ -4863,6 +4867,7 @@ + enum wined3d_shader_type shader_type; + struct wined3d_buffer *buffer; + unsigned int i, base, count; ++ struct wined3d_bo_address bo_addr; + + TRACE("context %p, state %p, state_id %#x.\n", context, state, state_id); + +@@ -4872,10 +4877,49 @@ + shader_type = WINED3D_SHADER_TYPE_COMPUTE; + + wined3d_gl_limits_get_uniform_block_range(&gl_info->limits, shader_type, &base, &count); +- for (i = 0; i < count; ++i) ++ ++ if (gl_info->supported[ARB_MULTI_BIND]) ++ { ++ GLuint buffer_objects[count]; ++ GLsizeiptr buffer_offsets[count]; ++ GLsizeiptr buffer_sizes[count]; ++ ++ for (i = 0; i < count; ++i) ++ { ++ buffer = state->cb[shader_type][i]; ++ if (buffer) ++ { ++ wined3d_buffer_get_memory(buffer, &bo_addr, buffer->locations); ++ buffer_objects[i] = bo_addr.buffer_object; ++ buffer_offsets[i] = bo_addr.addr; ++ buffer_sizes[i] = bo_addr.length; ++ } ++ else ++ { ++ buffer_objects[i] = buffer_offsets[i] = 0; ++ // The ARB_multi_bind spec states that an error may be thrown if ++ // `size` is less than or equal to zero, Thus, we specify a size for ++ // unused buffers anyway. ++ buffer_sizes[i] = 1; ++ } ++ } ++ GL_EXTCALL(glBindBuffersRange(GL_UNIFORM_BUFFER, base, count, buffer_objects, buffer_offsets, buffer_sizes)); ++ } ++ else + { +- buffer = state->cb[shader_type][i]; +- GL_EXTCALL(glBindBufferBase(GL_UNIFORM_BUFFER, base + i, buffer ? buffer->buffer_object : 0)); ++ for (i = 0; i < count; ++i) ++ { ++ buffer = state->cb[shader_type][i]; ++ if (buffer) ++ { ++ wined3d_buffer_get_memory(buffer, &bo_addr, buffer->locations); ++ GL_EXTCALL(glBindBufferRange(GL_UNIFORM_BUFFER, base + i, bo_addr.buffer_object, bo_addr.addr, bo_addr.length)); ++ } ++ else ++ { ++ GL_EXTCALL(glBindBufferBase(GL_UNIFORM_BUFFER, base + i, 0)); ++ } ++ } + } + checkGLcall("bind constant buffers"); + } +diff -r -u --new-file a/dlls/wined3d/texture.c b/dlls/wined3d/texture.c +--- a/dlls/wined3d/texture.c 2018-03-28 23:57:59.729900406 +0200 ++++ b/dlls/wined3d/texture.c 2018-03-28 22:58:49.906449411 +0200 +@@ -2096,6 +2096,12 @@ + static HRESULT texture_resource_sub_resource_map(struct wined3d_resource *resource, unsigned int sub_resource_idx, + struct wined3d_map_desc *map_desc, const struct wined3d_box *box, DWORD flags) + { ++ return E_NOTIMPL; ++} ++ ++static HRESULT texture_resource_sub_resource_map_cs(struct wined3d_resource *resource, unsigned int sub_resource_idx, ++ struct wined3d_map_desc *map_desc, const struct wined3d_box *box, DWORD flags) ++{ + const struct wined3d_format *format = resource->format; + struct wined3d_texture_sub_resource *sub_resource; + struct wined3d_device *device = resource->device; +@@ -2256,6 +2262,11 @@ + + static HRESULT texture_resource_sub_resource_unmap(struct wined3d_resource *resource, unsigned int sub_resource_idx) + { ++ return E_NOTIMPL; ++} ++ ++static HRESULT texture_resource_sub_resource_unmap_cs(struct wined3d_resource *resource, unsigned int sub_resource_idx) ++{ + struct wined3d_texture_sub_resource *sub_resource; + struct wined3d_device *device = resource->device; + struct wined3d_context *context = NULL; +@@ -2307,6 +2318,8 @@ + texture_resource_sub_resource_map, + texture_resource_sub_resource_map_info, + texture_resource_sub_resource_unmap, ++ texture_resource_sub_resource_map_cs, ++ texture_resource_sub_resource_unmap_cs, + }; + + /* Context activation is done by the caller. */ +diff -r -u --new-file a/dlls/wined3d/utils.c b/dlls/wined3d/utils.c +--- a/dlls/wined3d/utils.c 2018-03-28 23:57:59.981900006 +0200 ++++ b/dlls/wined3d/utils.c 2018-03-28 22:59:19.628387862 +0200 +@@ -6368,6 +6368,7 @@ + LOCATION_TO_STR(WINED3D_LOCATION_DRAWABLE); + LOCATION_TO_STR(WINED3D_LOCATION_RB_MULTISAMPLE); + LOCATION_TO_STR(WINED3D_LOCATION_RB_RESOLVED); ++ LOCATION_TO_STR(WINED3D_LOCATION_PERSISTENT_MAP); + #undef LOCATION_TO_STR + if (location) + FIXME("Unrecognized location flag(s) %#x.\n", location); +diff -r -u --new-file a/dlls/wined3d/wined3d_gl.h b/dlls/wined3d/wined3d_gl.h +--- a/dlls/wined3d/wined3d_gl.h 2018-03-28 23:57:59.555900682 +0200 ++++ b/dlls/wined3d/wined3d_gl.h 2018-03-28 23:07:20.628395936 +0200 +@@ -44,6 +44,7 @@ + /* ARB */ + ARB_BASE_INSTANCE, + ARB_BLEND_FUNC_EXTENDED, ++ ARB_BUFFER_STORAGE, + ARB_CLEAR_BUFFER_OBJECT, + ARB_CLEAR_TEXTURE, + ARB_CLIP_CONTROL, +@@ -81,6 +82,7 @@ + ARB_INTERNALFORMAT_QUERY2, + ARB_MAP_BUFFER_ALIGNMENT, + ARB_MAP_BUFFER_RANGE, ++ ARB_MULTI_BIND, + ARB_MULTISAMPLE, + ARB_MULTITEXTURE, + ARB_OCCLUSION_QUERY, +diff -r -u --new-file a/dlls/wined3d/wined3d_private.h b/dlls/wined3d/wined3d_private.h +--- a/dlls/wined3d/wined3d_private.h 2018-03-28 23:58:00.048899900 +0200 ++++ b/dlls/wined3d/wined3d_private.h 2018-03-28 23:37:57.861715709 +0200 +@@ -75,6 +75,7 @@ + #define WINED3D_QUIRK_INFO_LOG_SPAM 0x00000080 + #define WINED3D_QUIRK_LIMITED_TEX_FILTERING 0x00000100 + #define WINED3D_QUIRK_BROKEN_ARB_FOG 0x00000200 ++#define WINED3D_QUIRK_USE_CLIENT_STORAGE_BIT 0x00000400 + + enum wined3d_ffp_idx + { +@@ -1470,6 +1471,7 @@ + { + GLuint buffer_object; + BYTE *addr; ++ GLsizeiptr length; + }; + + struct wined3d_const_bo_address +@@ -2929,7 +2931,8 @@ + BYTE inScene : 1; /* A flag to check for proper BeginScene / EndScene call pairs */ + BYTE softwareVertexProcessing : 1; /* process vertex shaders using software or hardware */ + BYTE filter_messages : 1; +- BYTE padding : 3; ++ BYTE use_pba : 1; /* A flag to use the persistent buffer allocator for dynamic buffers. */ ++ BYTE padding : 2; + + unsigned char surface_alignment; /* Line Alignment of surfaces */ + +@@ -2980,6 +2983,10 @@ + /* Context management */ + struct wined3d_context **contexts; + UINT context_count; ++ ++ /* Dynamic buffer heap */ ++ struct wined3d_buffer_heap *wo_buffer_heap; ++ struct wined3d_buffer_heap *cb_buffer_heap; + }; + + void device_clear_render_targets(struct wined3d_device *device, UINT rt_count, const struct wined3d_fb_state *fb, +@@ -3021,6 +3028,9 @@ + HRESULT (*resource_map_info)(struct wined3d_resource *resource, unsigned int sub_resource_idx, + struct wined3d_map_info *info, DWORD flags); + HRESULT (*resource_sub_resource_unmap)(struct wined3d_resource *resource, unsigned int sub_resource_idx); ++ HRESULT (*resource_sub_resource_map_cs)(struct wined3d_resource *resource, unsigned int sub_resource_idx, ++ struct wined3d_map_desc *map_desc, const struct wined3d_box *box, DWORD flags); ++ HRESULT (*resource_sub_resource_unmap_cs)(struct wined3d_resource *resource, unsigned int sub_resource_idx); + }; + + struct wined3d_resource +@@ -3324,6 +3334,7 @@ + #define WINED3D_LOCATION_DRAWABLE 0x00000040 + #define WINED3D_LOCATION_RB_MULTISAMPLE 0x00000080 + #define WINED3D_LOCATION_RB_RESOLVED 0x00000100 ++#define WINED3D_LOCATION_PERSISTENT_MAP 0x00000200 + + const char *wined3d_debug_location(DWORD location) DECLSPEC_HIDDEN; + +@@ -3480,6 +3491,25 @@ + DWORD flags) DECLSPEC_HIDDEN; + void state_unbind_resources(struct wined3d_state *state) DECLSPEC_HIDDEN; + ++struct wined3d_map_range ++{ ++ GLintptr offset; ++ GLsizeiptr size; ++}; ++ ++struct wined3d_buffer_heap_element ++{ ++ struct wined3d_map_range range; ++ ++ // rbtree data ++ struct wine_rb_entry entry; ++ ++ // Binned free list positions ++ struct wined3d_buffer_heap_element *next; ++ struct wined3d_buffer_heap_element *prev; ++}; ++ ++ + enum wined3d_cs_queue_id + { + WINED3D_CS_QUEUE_DEFAULT = 0, +@@ -3624,6 +3654,7 @@ + void wined3d_cs_emit_update_sub_resource(struct wined3d_cs *cs, struct wined3d_resource *resource, + unsigned int sub_resource_idx, const struct wined3d_box *box, const void *data, unsigned int row_pitch, + unsigned int slice_pitch) DECLSPEC_HIDDEN; ++void wined3d_cs_emit_discard_buffer(struct wined3d_cs *cs, struct wined3d_buffer *buffer, struct wined3d_buffer_heap_element *map_range) DECLSPEC_HIDDEN; + void wined3d_cs_init_object(struct wined3d_cs *cs, + void (*callback)(void *object), void *object) DECLSPEC_HIDDEN; + HRESULT wined3d_cs_map(struct wined3d_cs *cs, struct wined3d_resource *resource, unsigned int sub_resource_idx, +@@ -3657,12 +3688,61 @@ + CONV_POSITIONT, + }; + +-struct wined3d_map_range +-{ +- UINT offset; +- UINT size; ++struct wined3d_buffer_heap_fenced_element; ++ ++// Number of power-of-two buckets to populate. ++#define WINED3D_BUFFER_HEAP_BINS 32 ++ ++struct wined3d_buffer_heap_bin ++ { ++ struct wined3d_buffer_heap_element *head; ++ struct wined3d_buffer_heap_element *tail; + }; + ++struct wined3d_buffer_heap_bin_set ++{ ++ struct wined3d_buffer_heap_bin bins[WINED3D_BUFFER_HEAP_BINS]; ++ }; ++ ++// A heap that manages allocations with a single GL buffer. ++struct wined3d_buffer_heap ++{ ++ GLuint buffer_object; ++ void *map_ptr; ++ GLsizeiptr alignment; ++ CRITICAL_SECTION temp_lock; // Temporary lock while we implement the fenced free list. ++ ++ struct wined3d_buffer_heap_bin_set free_list; ++ ++ // Elements that need to be fenced, but haven't reached the required size. ++ struct wined3d_buffer_heap_bin_set pending_fenced_bins; ++ ++ // List of sets of buffers behind a common fence, in FIFO order. ++ struct wined3d_buffer_heap_fenced_element *fenced_head; ++ struct wined3d_buffer_heap_fenced_element *fenced_tail; ++}; ++ ++HRESULT wined3d_buffer_heap_create(struct wined3d_context *context, GLsizeiptr size, GLsizeiptr alignment, BOOL write_only, struct wined3d_buffer_heap **heap) DECLSPEC_HIDDEN; ++HRESULT wined3d_buffer_heap_destroy(struct wined3d_buffer_heap *heap, struct wined3d_context *context) DECLSPEC_HIDDEN; ++// Fetches a buffer from the heap of at least the given size. ++// Attempts to coalesce blocks under memory pressure. ++HRESULT wined3d_buffer_heap_alloc(struct wined3d_buffer_heap *heap, GLsizeiptr size, struct wined3d_buffer_heap_element** out_elem) DECLSPEC_HIDDEN; ++// Immediately frees a heap-allocated buffer segment. ++HRESULT wined3d_buffer_heap_free(struct wined3d_buffer_heap *heap, struct wined3d_buffer_heap_element *elem) DECLSPEC_HIDDEN; ++// Enqueues a buffer segment to return to the heap once its fence has been signaled. ++HRESULT wined3d_buffer_heap_free_fenced(struct wined3d_buffer_heap *heap, struct wined3d_device *device, struct wined3d_buffer_heap_element *elem) DECLSPEC_HIDDEN; ++// Issues a fence for the current set of pending fenced buffers. ++// Double-buffered: if the last fence issued has not yet been triggered, waits ++// on it. ++HRESULT wined3d_buffer_heap_cs_fence_issue(struct wined3d_buffer_heap *heap, struct wined3d_device *device) DECLSPEC_HIDDEN; ++// Waits on the next issued fence in FIFO order. Frees the fenced buffers after ++// the fence has been triggered. ++HRESULT wined3d_buffer_heap_cs_fence_wait(struct wined3d_buffer_heap *heap, struct wined3d_device *device) DECLSPEC_HIDDEN; ++// Performs deferred coalescing of buffers. To be called under memory pressure. ++// Outputs the number of coalesced regions in `num_coalesced`. ++HRESULT wined3d_buffer_heap_deferred_coalesce(struct wined3d_buffer_heap *heap, int *num_coalesced) DECLSPEC_HIDDEN; ++ ++ + struct wined3d_buffer + { + struct wined3d_resource resource; +@@ -3687,6 +3767,11 @@ + UINT stride; /* 0 if no conversion */ + enum wined3d_buffer_conversion_type *conversion_map; /* NULL if no conversion */ + UINT conversion_stride; /* 0 if no shifted conversion */ ++ ++ /* persistent mapped buffer */ ++ struct wined3d_buffer_heap *buffer_heap; ++ struct wined3d_buffer_heap_element *cs_persistent_map; ++ struct wined3d_buffer_heap_element *mt_persistent_map; + }; + + static inline struct wined3d_buffer *buffer_from_resource(struct wined3d_resource *resource) |