summarylogtreecommitdiffstats
path: root/0005-wined3d-Experimental-buffer-heap-fence-batching-uppe.patch
diff options
context:
space:
mode:
Diffstat (limited to '0005-wined3d-Experimental-buffer-heap-fence-batching-uppe.patch')
-rw-r--r--0005-wined3d-Experimental-buffer-heap-fence-batching-uppe.patch455
1 files changed, 455 insertions, 0 deletions
diff --git a/0005-wined3d-Experimental-buffer-heap-fence-batching-uppe.patch b/0005-wined3d-Experimental-buffer-heap-fence-batching-uppe.patch
new file mode 100644
index 000000000000..cac70eac997d
--- /dev/null
+++ b/0005-wined3d-Experimental-buffer-heap-fence-batching-uppe.patch
@@ -0,0 +1,455 @@
+From 7a630d56cb1bddfd03cae3cdc43aee949b04abe2 Mon Sep 17 00:00:00 2001
+From: Andrew Comminos <andrew@comminos.com>
+Date: Mon, 26 Feb 2018 21:35:40 -0800
+Subject: [PATCH 5/8] wined3d: Experimental buffer heap fence batching, upper
+ bound on heap size.
+
+---
+ dlls/wined3d/buffer.c | 11 +-
+ dlls/wined3d/buffer_heap.c | 232 ++++++++++++++++++++++++-----------------
+ dlls/wined3d/cs.c | 15 +--
+ dlls/wined3d/device.c | 5 +-
+ dlls/wined3d/wined3d_private.h | 12 ++-
+ 5 files changed, 154 insertions(+), 121 deletions(-)
+
+diff --git a/dlls/wined3d/buffer.c b/dlls/wined3d/buffer.c
+index 7d7e506817..fbec613c92 100644
+--- a/dlls/wined3d/buffer.c
++++ b/dlls/wined3d/buffer.c
+@@ -992,9 +992,12 @@ void wined3d_buffer_load(struct wined3d_buffer *buffer, struct wined3d_context *
+
+ if (buffer->flags & WINED3D_BUFFER_PERSISTENT)
+ {
+- if (!wined3d_buffer_load_location(buffer, context, WINED3D_LOCATION_PERSISTENT_MAP))
+- ERR("Failed to preload persistent mapping.\n");
+- return;
++ if (wined3d_buffer_load_location(buffer, context, WINED3D_LOCATION_PERSISTENT_MAP))
++ return;
++
++ ERR("Failed to preload persistent mapping for %p, falling back to BO.\n", buffer);
++ buffer->flags |= WINED3D_BUFFER_USE_BO;
++ buffer->flags &= ~WINED3D_BUFFER_PERSISTENT;
+ }
+
+ /* TODO: Make converting independent from VBOs */
+@@ -1417,7 +1420,7 @@ static HRESULT buffer_resource_sub_resource_map(struct wined3d_resource *resourc
+ struct wined3d_map_range map_range;
+ if (FAILED(hr = wined3d_buffer_heap_alloc(buffer->buffer_heap, resource->size, &map_range)))
+ {
+- FIXME("Failed to allocate new buffer, falling back to sync path.\n");
++ FIXME_(d3d_perf)("Failed to allocate new buffer, falling back to sync path.\n");
+ return hr;
+ }
+ map_desc->data = buffer->buffer_heap->map_ptr + map_range.offset + offset;
+diff --git a/dlls/wined3d/buffer_heap.c b/dlls/wined3d/buffer_heap.c
+index 02b925b658..165a957edd 100644
+--- a/dlls/wined3d/buffer_heap.c
++++ b/dlls/wined3d/buffer_heap.c
+@@ -22,6 +22,7 @@
+ #include "wined3d_private.h"
+
+ WINE_DEFAULT_DEBUG_CHANNEL(d3d);
++WINE_DECLARE_DEBUG_CHANNEL(d3d_perf);
+
+ struct wined3d_buffer_heap_element
+ {
+@@ -33,7 +34,7 @@ struct wined3d_buffer_heap_element
+
+ struct wined3d_buffer_heap_fenced_element
+ {
+- struct wined3d_map_range range;
++ struct wined3d_buffer_heap_element *ranges;
+ struct wined3d_fence *fence;
+
+ struct wined3d_buffer_heap_element *next;
+@@ -52,19 +53,19 @@ static struct wined3d_buffer_heap_element* element_new(GLintptr offset, GLsizeip
+ return elem;
+ }
+
+-static struct wined3d_buffer_heap_fenced_element* fenced_element_new(struct wined3d_map_range range, struct wined3d_fence* fence)
++static struct wined3d_buffer_heap_fenced_element* fenced_element_new(struct wined3d_buffer_heap_element *ranges, struct wined3d_fence* fence)
+ {
+ struct wined3d_buffer_heap_fenced_element* elem;
+ elem = HeapAlloc(GetProcessHeap(), 0, sizeof(struct wined3d_buffer_heap_fenced_element));
+ if (!elem)
+ return NULL;
+- elem->range = range;
++ elem->ranges = ranges;
+ elem->fence = fence;
+ elem->next = NULL;
+ return elem;
+ }
+
+-static void element_merge_adjacent(struct wined3d_buffer_heap *heap, struct wined3d_buffer_heap_element *elem)
++static void element_merge_adjacent(struct wined3d_buffer_heap_element **head, struct wined3d_buffer_heap_element *elem)
+ {
+ struct wined3d_buffer_heap_element *cur_prev = elem->prev;
+ struct wined3d_buffer_heap_element *cur_next = elem->next;
+@@ -77,8 +78,8 @@ static void element_merge_adjacent(struct wined3d_buffer_heap *heap, struct wine
+ if (cur_prev->prev)
+ cur_prev->prev->next = elem;
+
+- if (cur_prev == heap->free_list_head)
+- heap->free_list_head = elem;
++ if (cur_prev == *head)
++ *head = elem;
+
+ HeapFree(GetProcessHeap(), 0, cur_prev);
+ }
+@@ -94,6 +95,69 @@ static void element_merge_adjacent(struct wined3d_buffer_heap *heap, struct wine
+ }
+ }
+
++// Inserts a range into the list starting at `elem`.
++// Updates the head of the list, if necessary.
++static void element_insert_range(struct wined3d_buffer_heap_element **head, struct wined3d_map_range range)
++{
++ struct wined3d_buffer_heap_element *elem = *head;
++ struct wined3d_buffer_heap_element *new_elem;
++ struct wined3d_buffer_heap_element *last_elem = NULL;
++
++ // Special case where the head doesn't exist.
++ if (!elem)
++ {
++ new_elem = element_new(range.offset, range.size);
++ *head = new_elem;
++ return;
++ }
++
++ while (elem)
++ {
++ struct wined3d_map_range *erange = &elem->range;
++ if (range.offset + range.size == erange->offset)
++ {
++ // Left side merge
++ erange->offset = range.offset;
++ erange->size += range.size;
++ // Check if this causes a merge with elem->prev
++ element_merge_adjacent(head, elem);
++ return;
++ }
++ else if (erange->offset + erange->size == range.offset)
++ {
++ // Right side merge
++ erange->size += range.size;
++ // Check if this causes a merge with elem->prev
++ element_merge_adjacent(head, elem);
++ return;
++ }
++ else if (range.offset < erange->offset)
++ {
++ // Append to left, non-merge case.
++ new_elem = element_new(range.offset, range.size);
++ new_elem->prev = elem->prev;
++ new_elem->next = elem;
++ if (elem->prev)
++ {
++ elem->prev->next = new_elem;
++ }
++ if (*head == elem)
++ {
++ *head = new_elem;
++ }
++ elem->prev = new_elem;
++ return;
++ }
++ last_elem = elem;
++ elem = elem->next;
++ }
++
++ // Larger offset than all other elements in the list, append to the end.
++ new_elem = element_new(range.offset, range.size);
++ new_elem->prev = last_elem;
++ last_elem->next = new_elem;
++}
++
+ /* Context activation is done by the caller. */
+ HRESULT wined3d_buffer_heap_create(struct wined3d_context *context, GLsizeiptr size, GLsizeiptr alignment, BOOL write_only, struct wined3d_buffer_heap **buffer_heap)
+ {
+@@ -134,6 +198,9 @@ HRESULT wined3d_buffer_heap_create(struct wined3d_context *context, GLsizeiptr s
+ object->free_list_head = element_new(0, size);
+ object->fenced_head = object->fenced_tail = NULL;
+ object->alignment = alignment;
++ object->pending_fenced_bytes = 0;
++ object->pending_fenced_head = NULL;
++ object->pending_fenced_threshold_bytes = size / 4; // FIXME(acomminos): make this externally declared
+ InitializeCriticalSection(&object->temp_lock);
+
+ *buffer_heap = object;
+@@ -155,6 +222,9 @@ HRESULT wined3d_buffer_heap_alloc(struct wined3d_buffer_heap *heap, GLsizeiptr s
+ // TODO(acomminos): free list binning?
+ struct wined3d_buffer_heap_element *elem = heap->free_list_head;
+
++ // Round to the nearest power of two to reduce fragmentation.
++ size = 1ULL << (int)ceil(log2(size));
++
+ // Round up the size to a multiple of the heap's alignment.
+ if (heap->alignment)
+ size += heap->alignment - (size % heap->alignment);
+@@ -188,6 +258,7 @@ HRESULT wined3d_buffer_heap_alloc(struct wined3d_buffer_heap *heap, GLsizeiptr s
+ }
+ HeapFree(GetProcessHeap(), 0, elem);
+ }
++
+ LeaveCriticalSection(&heap->temp_lock);
+ return WINED3D_OK;
+ }
+@@ -201,87 +272,53 @@ HRESULT wined3d_buffer_heap_alloc(struct wined3d_buffer_heap *heap, GLsizeiptr s
+ HRESULT wined3d_buffer_heap_free(struct wined3d_buffer_heap *heap, struct wined3d_map_range range)
+ {
+ EnterCriticalSection(&heap->temp_lock);
+- struct wined3d_buffer_heap_element *new_elem;
+- struct wined3d_buffer_heap_element *elem = heap->free_list_head;
+- struct wined3d_buffer_heap_element *last_elem = NULL;
+
+- // Special case where the head doesn't exist.
+- if (!elem)
+- {
+- new_elem = element_new(range.offset, range.size);
+- heap->free_list_head = new_elem;
+- goto success;
+- }
++ element_insert_range(&heap->free_list_head, range);
+
+- while (elem)
++ LeaveCriticalSection(&heap->temp_lock);
++ return WINED3D_OK;
++}
++
++HRESULT wined3d_buffer_heap_free_fenced(struct wined3d_buffer_heap *heap, struct wined3d_device *device, struct wined3d_map_range range)
++{
++ element_insert_range(&heap->pending_fenced_head, range);
++
++ heap->pending_fenced_bytes += range.size;
++ if (heap->pending_fenced_bytes >= heap->pending_fenced_threshold_bytes)
+ {
+- struct wined3d_map_range *erange = &elem->range;
+- if (range.offset + range.size == erange->offset)
++ // TODO(acomminos): break this out into a separate function
++ struct wined3d_buffer_heap_fenced_element *fenced_elem;
++ struct wined3d_fence *fence;
++ HRESULT hr;
++
++ if (FAILED(hr = wined3d_fence_create(device, &fence)))
+ {
+- // Left side merge
+- erange->offset = range.offset;
+- erange->size += range.size;
+- // Check if this causes a merge with elem->prev
+- element_merge_adjacent(heap, elem);
+- goto success;
++ ERR("Failed to create fence.\n");
++ return hr;
+ }
+- else if (erange->offset + erange->size == range.offset)
++
++ fenced_elem = fenced_element_new(heap->pending_fenced_head, fence);
++ if (!fenced_elem)
++ return E_OUTOFMEMORY;
++
++ TRACE_(d3d_perf)("Dispatching fenced buffer set.\n");
++ heap->pending_fenced_bytes = 0;
++ heap->pending_fenced_head = NULL;
++
++ // Append to end of fenced list, which works well if you assume that buffers
++ // are freed in some ascending draw call ordering.
++ if (!heap->fenced_head)
+ {
+- // Right side merge
+- erange->size += range.size;
+- // Check if this causes a merge with elem->prev
+- element_merge_adjacent(heap, elem);
+- goto success;
++ heap->fenced_head = fenced_elem;
++ heap->fenced_tail = fenced_elem;
+ }
+- else if (range.offset < erange->offset)
++ else
+ {
+- // Append to left, non-merge case.
+- new_elem = element_new(range.offset, range.size);
+- new_elem->prev = elem->prev;
+- new_elem->next = elem;
+- if (elem->prev)
+- {
+- elem->prev->next = new_elem;
+- }
+- if (heap->free_list_head == elem)
+- {
+- heap->free_list_head = new_elem;
+- }
+- elem->prev = new_elem;
+- goto success;
++ heap->fenced_tail->next = fenced_elem;
++ heap->fenced_tail = fenced_elem;
+ }
+- last_elem = elem;
+- elem = elem->next;
+- }
+-
+- // Larger offset than all other elements in the list, append to the end.
+- new_elem = element_new(range.offset, range.size);
+- new_elem->prev = last_elem;
+- last_elem->next = new_elem;
+-
+-success:
+- LeaveCriticalSection(&heap->temp_lock);
+- return WINED3D_OK;
+-}
+-
+-HRESULT wined3d_buffer_heap_free_fenced(struct wined3d_buffer_heap *heap, struct wined3d_map_range range, struct wined3d_fence *fence)
+-{
+- struct wined3d_buffer_heap_fenced_element *elem;
+- elem = fenced_element_new(range, fence);
+- if (!elem)
+- return E_OUTOFMEMORY;
+
+- // Append to end of fenced list, which works well if you assume that buffers
+- // are freed in some ascending draw call ordering.
+- if (!heap->fenced_head)
+- {
+- heap->fenced_head = elem;
+- heap->fenced_tail = elem;
+- }
+- else
+- {
+- heap->fenced_tail->next = elem;
+- heap->fenced_tail = elem;
++ wined3d_fence_issue(fence, device);
+ }
+
+ return WINED3D_OK;
+@@ -294,29 +331,32 @@ HRESULT wined3d_buffer_heap_cs_poll_fences(struct wined3d_buffer_heap *heap, str
+ if (!elem)
+ return WINED3D_OK;
+
+- while (elem)
++ res = wined3d_fence_test(elem->fence, device, 0);
++ switch (res)
+ {
+- res = wined3d_fence_test(elem->fence, device, 0);
+- switch (res)
+- {
+- case WINED3D_FENCE_OK:
+- case WINED3D_FENCE_NOT_STARTED:
++ case WINED3D_FENCE_OK:
++ case WINED3D_FENCE_NOT_STARTED:
++ {
++ TRACE_(d3d_perf)("Freed fence group.\n");
++ struct wined3d_buffer_heap_element *range_elem = elem->ranges;
++ // FIXME(acomminos): this might take a while. incrementally do this?
++ while (range_elem)
+ {
+- struct wined3d_buffer_heap_fenced_element *next = elem->next;
+-
+- wined3d_fence_destroy(elem->fence);
+- wined3d_buffer_heap_free(heap, elem->range);
++ struct wined3d_buffer_heap_element *next = range_elem->next;
++ wined3d_buffer_heap_free(heap, range_elem->range);
++ HeapFree(GetProcessHeap(), 0, range_elem);
++ range_elem = next;
++ }
+
+- heap->fenced_head = elem->next;
+- HeapFree(GetProcessHeap(), 0, elem);
+- // TODO(acomminos): bother to null out fenced_tail?
++ wined3d_fence_destroy(elem->fence);
+
+- elem = next;
+- break;
+- }
+- default:
+- return WINED3D_OK;
+- }
++ heap->fenced_head = elem->next;
++ HeapFree(GetProcessHeap(), 0, elem);
++ // TODO(acomminos): bother to null out fenced_tail?
++ break;
++ }
++ default:
++ return WINED3D_OK;
+ }
+
+ return WINED3D_OK;
+diff --git a/dlls/wined3d/cs.c b/dlls/wined3d/cs.c
+index d7bdc21a25..bae5d9f4a1 100644
+--- a/dlls/wined3d/cs.c
++++ b/dlls/wined3d/cs.c
+@@ -2643,26 +2643,15 @@ static void wined3d_cs_exec_discard_buffer(struct wined3d_cs *cs, const void *da
+ const struct wined3d_cs_discard_buffer *op = data;
+ struct wined3d_buffer *buffer = op->buffer;
+ HRESULT hr;
+- struct wined3d_fence *fence;
+
+ // Poll for discarded buffers whose fenced have been triggered here to avoid
+ // excessive VRAM consumption.
+ wined3d_buffer_heap_cs_poll_fences(buffer->buffer_heap, cs->device);
+
+ // TODO(acomminos): should call into buffer.c here instead.
+- // XXX(acomminos): should we always create a new fence here?
+- if (!FAILED(hr = wined3d_fence_create(cs->device, &fence)))
++ if (FAILED(hr = wined3d_buffer_heap_free_fenced(buffer->buffer_heap, cs->device, buffer->cs_persistent_map)))
+ {
+- // TODO(acomminos): make more informed fences based on prior info. for now,
+- // we do this because allocating and deleting fences repeatedly is brutal
+- // for performance. look into why.
+- wined3d_fence_issue(fence, cs->device);
+-
+- wined3d_buffer_heap_free_fenced(buffer->buffer_heap, buffer->cs_persistent_map, fence);
+- }
+- else
+- {
+- ERR("Failed to create fence for discarded buffer %p, hr %x\n. Freeing anyway.", buffer, hr);
++ ERR("Failed to do a fenced free on discarded buffer %p, hr %x\n. Freeing anyway.", buffer, hr);
+ wined3d_buffer_heap_free(buffer->buffer_heap, buffer->cs_persistent_map);
+ }
+
+diff --git a/dlls/wined3d/device.c b/dlls/wined3d/device.c
+index e0871d1636..bdab83b935 100644
+--- a/dlls/wined3d/device.c
++++ b/dlls/wined3d/device.c
+@@ -850,9 +850,8 @@ static void create_buffer_heap(struct wined3d_device *device, struct wined3d_con
+ {
+ // TODO(acomminos): check if ARB_buffer_storage is supported, first-
+ // possibly make wined3d_buffer_heap_create fail.
+- // TODO(acomminos): definitely don't take up all of vram. this is gonna get
+- // paged anyway, though.
+- const GLsizeiptr HBO_SIZE = device->adapter->vram_bytes / 4;
++ // TODO(acomminos): 512MB is sane for geometry, maybe not for PBO.
++ const GLsizeiptr HBO_SIZE = min(512000000, device->adapter->vram_bytes / 4);
+ const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
+
+ GLint ub_alignment;
+diff --git a/dlls/wined3d/wined3d_private.h b/dlls/wined3d/wined3d_private.h
+index cfa48a5f3e..62433a39b1 100644
+--- a/dlls/wined3d/wined3d_private.h
++++ b/dlls/wined3d/wined3d_private.h
+@@ -3679,10 +3679,12 @@ struct wined3d_buffer_heap
+ // TODO: add buckets for free regions of a given size.
+ struct wined3d_buffer_heap_element *free_list_head;
+
+- // store in FIFO order? that way, we can minimize our amount of time
+- // waiting on fences?
+- // XXX(acomminos): are fences guaranteed to be triggered in a serial
+- // ordering? if so, we can early-terminate our polling
++ // Elements that need to be fenced, but haven't reached the required size.
++ struct wined3d_buffer_heap_element *pending_fenced_head;
++ GLsizeiptr pending_fenced_bytes; // Number of free bytes in the active fenced region.
++ GLsizeiptr pending_fenced_threshold_bytes; // Number of bytes required before fencing.
++
++ // List of sets of buffers behind a common fence, in FIFO order.
+ struct wined3d_buffer_heap_fenced_element *fenced_head;
+ struct wined3d_buffer_heap_fenced_element *fenced_tail;
+ };
+@@ -3694,7 +3696,7 @@ HRESULT wined3d_buffer_heap_alloc(struct wined3d_buffer_heap *heap, GLsizeiptr s
+ // Immediately frees a heap-allocated buffer segment.
+ HRESULT wined3d_buffer_heap_free(struct wined3d_buffer_heap *heap, struct wined3d_map_range range) DECLSPEC_HIDDEN;
+ // Enqueues a buffer segment to return to the heap once its fence has been signaled.
+-HRESULT wined3d_buffer_heap_free_fenced(struct wined3d_buffer_heap *heap, struct wined3d_map_range range, struct wined3d_fence *fence) DECLSPEC_HIDDEN;
++HRESULT wined3d_buffer_heap_free_fenced(struct wined3d_buffer_heap *heap, struct wined3d_device *device, struct wined3d_map_range range) DECLSPEC_HIDDEN;
+ // Moves a buffers with a signaled fence from the fenced list to the free list.
+ // Must be executed on the CS thread.
+ HRESULT wined3d_buffer_heap_cs_poll_fences(struct wined3d_buffer_heap *heap, struct wined3d_device *device) DECLSPEC_HIDDEN;
+--
+2.16.2
+