diff options
Diffstat (limited to '0008-wined3d-Implement-lazy-free-using-a-deferred-free-li.patch')
-rw-r--r-- | 0008-wined3d-Implement-lazy-free-using-a-deferred-free-li.patch | 94 |
1 files changed, 60 insertions, 34 deletions
diff --git a/0008-wined3d-Implement-lazy-free-using-a-deferred-free-li.patch b/0008-wined3d-Implement-lazy-free-using-a-deferred-free-li.patch index 7dd0c7735c85..1597c85366f6 100644 --- a/0008-wined3d-Implement-lazy-free-using-a-deferred-free-li.patch +++ b/0008-wined3d-Implement-lazy-free-using-a-deferred-free-li.patch @@ -1,17 +1,17 @@ -From 44fba11f530b1dff8a8e10fec15b0ca6465e3623 Mon Sep 17 00:00:00 2001 +From 831d8bc7117ddb24507bd60f4c9c0df37b4b9699 Mon Sep 17 00:00:00 2001 From: Andrew Comminos <andrew@comminos.com> Date: Wed, 28 Feb 2018 22:46:31 -0800 Subject: [PATCH 8/8] wined3d: Implement lazy-free using a deferred free list. --- - dlls/wined3d/buffer_heap.c | 308 ++++++++++++++++++++++++++++------------- + dlls/wined3d/buffer_heap.c | 325 ++++++++++++++++++++++++++++------------- dlls/wined3d/cs.c | 12 +- - dlls/wined3d/device.c | 16 ++- + dlls/wined3d/device.c | 16 +- dlls/wined3d/wined3d_private.h | 22 ++- - 4 files changed, 248 insertions(+), 110 deletions(-) + 4 files changed, 261 insertions(+), 114 deletions(-) diff --git a/dlls/wined3d/buffer_heap.c b/dlls/wined3d/buffer_heap.c -index f4af1b93b9..3fe5541a6a 100644 +index f4af1b93b9..4d90dcf861 100644 --- a/dlls/wined3d/buffer_heap.c +++ b/dlls/wined3d/buffer_heap.c @@ -27,24 +27,20 @@ WINE_DECLARE_DEBUG_CHANNEL(d3d_perf); @@ -44,15 +44,23 @@ index f4af1b93b9..3fe5541a6a 100644 struct wined3d_fence *fence; struct wined3d_buffer_heap_fenced_element *next; -@@ -58,6 +54,7 @@ static struct wined3d_buffer_heap_element* element_new(GLsizei offset, GLsizei s +@@ -53,13 +49,12 @@ struct wined3d_buffer_heap_fenced_element + static struct wined3d_buffer_heap_element* element_new(GLsizei offset, GLsizei size) + { + struct wined3d_buffer_heap_element* elem; +- elem = HeapAlloc(GetProcessHeap(), 0, sizeof(struct wined3d_buffer_heap_element)); ++ elem = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(struct wined3d_buffer_heap_element)); + if (!elem) return NULL; elem->range.offset = offset; elem->range.size = size; +- elem->prev = NULL; +- elem->next = NULL; + elem->in_tree = FALSE; - elem->prev = NULL; - elem->next = NULL; return elem; -@@ -86,27 +83,41 @@ static int element_bin(struct wined3d_buffer_heap_element *elem) + } + +@@ -86,27 +81,41 @@ static int element_bin(struct wined3d_buffer_heap_element *elem) return min(WINED3D_BUFFER_HEAP_BINS - 1, bitwise_log2_floor(elem->range.size)); } @@ -73,13 +81,13 @@ index f4af1b93b9..3fe5541a6a 100644 + if (heap->free_list.bins[bin].head) + heap->free_list.bins[bin].head->prev = elem; + heap->free_list.bins[bin].head = elem; -+ + + if (!heap->free_list.bins[bin].tail) + heap->free_list.bins[bin].tail = elem; + + TRACE("Inserted allocation at %p of size %lld into bin %d\n", elem->range.offset, elem->range.size, bin); +} - ++ +// Inserts an elemnet into the free tree. Does not perform coalescing. +static void element_insert_free_tree(struct wined3d_buffer_heap *heap, struct wined3d_buffer_heap_element *elem) +{ @@ -104,7 +112,7 @@ index f4af1b93b9..3fe5541a6a 100644 static void element_remove_free(struct wined3d_buffer_heap *heap, struct wined3d_buffer_heap_element *elem) { int bin = element_bin(elem); -@@ -117,24 +128,31 @@ static void element_remove_free(struct wined3d_buffer_heap *heap, struct wined3d +@@ -117,24 +126,31 @@ static void element_remove_free(struct wined3d_buffer_heap *heap, struct wined3d if (elem->next) elem->next->prev = elem->prev; @@ -114,7 +122,7 @@ index f4af1b93b9..3fe5541a6a 100644 + heap->free_list.bins[bin].head = elem->next; + + if (elem == heap->free_list.bins[bin].tail) -+ heap->free_list.bins[bin].head = elem->prev; ++ heap->free_list.bins[bin].tail = elem->prev; elem->prev = NULL; elem->next = NULL; @@ -133,7 +141,8 @@ index f4af1b93b9..3fe5541a6a 100644 +static struct wined3d_buffer_heap_fenced_element* fenced_element_new(struct wined3d_buffer_heap_bin_set bins, struct wined3d_fence* fence) { struct wined3d_buffer_heap_fenced_element* elem; - elem = HeapAlloc(GetProcessHeap(), 0, sizeof(struct wined3d_buffer_heap_fenced_element)); +- elem = HeapAlloc(GetProcessHeap(), 0, sizeof(struct wined3d_buffer_heap_fenced_element)); ++ elem = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(struct wined3d_buffer_heap_fenced_element)); if (!elem) return NULL; - elem->ranges = ranges; @@ -141,7 +150,7 @@ index f4af1b93b9..3fe5541a6a 100644 elem->fence = fence; elem->next = NULL; return elem; -@@ -163,6 +181,11 @@ HRESULT wined3d_buffer_heap_create(struct wined3d_context *context, GLsizeiptr s +@@ -163,6 +179,11 @@ HRESULT wined3d_buffer_heap_create(struct wined3d_context *context, GLsizeiptr s struct wined3d_buffer_heap *object; @@ -153,7 +162,7 @@ index f4af1b93b9..3fe5541a6a 100644 if (!(object = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(*object)))) { return E_OUTOFMEMORY; -@@ -194,13 +217,13 @@ HRESULT wined3d_buffer_heap_create(struct wined3d_context *context, GLsizeiptr s +@@ -194,13 +215,13 @@ HRESULT wined3d_buffer_heap_create(struct wined3d_context *context, GLsizeiptr s object->fenced_head = object->fenced_tail = NULL; object->alignment = alignment; @@ -171,27 +180,28 @@ index f4af1b93b9..3fe5541a6a 100644 *buffer_heap = object; -@@ -217,21 +240,23 @@ HRESULT wined3d_buffer_heap_destroy(struct wined3d_buffer_heap *heap, struct win +@@ -217,21 +238,24 @@ HRESULT wined3d_buffer_heap_destroy(struct wined3d_buffer_heap *heap, struct win HRESULT wined3d_buffer_heap_alloc(struct wined3d_buffer_heap *heap, GLsizeiptr size, struct wined3d_map_range *out_range) { int initial_bin; -- EnterCriticalSection(&heap->temp_lock); ++ int initial_size = size; ++ + EnterCriticalSection(&heap->temp_lock); - // Round to the nearest power of two to reduce fragmentation. - size = 1ULL << bitwise_log2_ceil(size); -+ EnterCriticalSection(&heap->temp_lock); ++ // After alignment, reduce fragmentation by rounding to next power of two. ++ // If the alignment is a power of two (which it should be), this should be ++ // no problem. ++ size = 1 << bitwise_log2_ceil(size); - // Round up the size to a multiple of the heap's alignment. +- if (heap->alignment) + // Align size values where possible. - if (heap->alignment) ++ if (heap->alignment && (size % heap->alignment != 0)) size += heap->alignment - (size % heap->alignment); - // TODO(acomminos): use bitwise arithmetic instead -+ // After alignment, reduce fragmentation by rounding to next power of two. -+ // If the alignment is a power of two (which it should be), this should be -+ // no problem. -+ size = 1 << bitwise_log2_ceil(size); -+ initial_bin = min(WINED3D_BUFFER_HEAP_BINS - 1, bitwise_log2_ceil(size)); for (int i = initial_bin; i < WINED3D_BUFFER_HEAP_BINS; i++) @@ -201,16 +211,26 @@ index f4af1b93b9..3fe5541a6a 100644 if (elem) { struct wined3d_map_range remaining_range; -@@ -247,7 +272,7 @@ HRESULT wined3d_buffer_heap_alloc(struct wined3d_buffer_heap *heap, GLsizeiptr s +@@ -241,13 +265,17 @@ HRESULT wined3d_buffer_heap_alloc(struct wined3d_buffer_heap *heap, GLsizeiptr s + out_range->offset = elem->range.offset; + out_range->size = size; + ++ TRACE_(d3d_perf)("Allocated %d (requested %d) at %p from bin %d (initial %d)\n", size, initial_size, elem->range.offset, i, initial_bin); ++ + // Remove the element from its current free bin to move it to the correct list. + element_remove_free(heap, elem); + if (remaining_range.size > 0) { ++ TRACE_(d3d_perf)("Imperfect fit allocated, fragmenting remainder of %lld at %p.\n", remaining_range.size, remaining_range.offset); ++ elem->range = remaining_range; - element_insert_free(heap, elem); + element_insert_free_bin(heap, elem); } else { -@@ -260,68 +285,33 @@ HRESULT wined3d_buffer_heap_alloc(struct wined3d_buffer_heap *heap, GLsizeiptr s +@@ -260,68 +288,33 @@ HRESULT wined3d_buffer_heap_alloc(struct wined3d_buffer_heap *heap, GLsizeiptr s } LeaveCriticalSection(&heap->temp_lock); @@ -294,7 +314,7 @@ index f4af1b93b9..3fe5541a6a 100644 LeaveCriticalSection(&heap->temp_lock); -@@ -330,10 +320,21 @@ HRESULT wined3d_buffer_heap_free(struct wined3d_buffer_heap *heap, struct wined3 +@@ -330,10 +323,21 @@ HRESULT wined3d_buffer_heap_free(struct wined3d_buffer_heap *heap, struct wined3 HRESULT wined3d_buffer_heap_free_fenced(struct wined3d_buffer_heap *heap, struct wined3d_device *device, struct wined3d_map_range range) { @@ -320,7 +340,7 @@ index f4af1b93b9..3fe5541a6a 100644 heap->pending_fenced_bytes += range.size; if (heap->pending_fenced_bytes >= heap->pending_fenced_threshold_bytes) -@@ -349,13 +350,13 @@ HRESULT wined3d_buffer_heap_free_fenced(struct wined3d_buffer_heap *heap, struct +@@ -349,13 +353,13 @@ HRESULT wined3d_buffer_heap_free_fenced(struct wined3d_buffer_heap *heap, struct return hr; } @@ -336,7 +356,7 @@ index f4af1b93b9..3fe5541a6a 100644 // Append to end of fenced list, which works well if you assume that buffers // are freed in some ascending draw call ordering. -@@ -390,15 +391,28 @@ HRESULT wined3d_buffer_heap_cs_poll_fences(struct wined3d_buffer_heap *heap, str +@@ -390,15 +394,33 @@ HRESULT wined3d_buffer_heap_cs_poll_fences(struct wined3d_buffer_heap *heap, str case WINED3D_FENCE_NOT_STARTED: { TRACE_(d3d_perf)("Freed fence group.\n"); @@ -358,13 +378,18 @@ index f4af1b93b9..3fe5541a6a 100644 + struct wined3d_buffer_heap_bin *heap_bin = &heap->free_list.bins[i]; + if (heap_bin->head) + { ++ // Insert to front. + elem_bin->tail->next = heap_bin->head; + heap_bin->head->prev = elem_bin->tail; ++ ++ elem_bin->head->prev = NULL; + heap_bin->head = elem_bin->head; + } + else + { ++ elem_bin->head->prev = NULL; + heap_bin->head = elem_bin->head; ++ elem_bin->tail->next = NULL; + heap_bin->tail = elem_bin->tail; + } } @@ -372,7 +397,7 @@ index f4af1b93b9..3fe5541a6a 100644 wined3d_fence_destroy(elem->fence); -@@ -413,3 +427,101 @@ HRESULT wined3d_buffer_heap_cs_poll_fences(struct wined3d_buffer_heap *heap, str +@@ -413,3 +435,102 @@ HRESULT wined3d_buffer_heap_cs_poll_fences(struct wined3d_buffer_heap *heap, str return WINED3D_OK; } @@ -390,12 +415,12 @@ index f4af1b93b9..3fe5541a6a 100644 + // TODO(acomminos): on one hand, if there's a lot of elements in the list, + // it's highly fragmented. on the other, we can potentially waste a decent + // sum of time checking for uncoalesced bins. -+ for (int i = 0; !elem && i < WINED3D_BUFFER_HEAP_BINS; i++) ++ for (int i = 0; i < WINED3D_BUFFER_HEAP_BINS && elem == NULL; i++) + { + struct wined3d_buffer_heap_element *next = heap->free_list.bins[i].head; + while (next) + { -+ if (!next->in_tree) ++ if (next->in_tree == FALSE) + { + // Find the first element not in-tree. + elem = next; @@ -408,6 +433,7 @@ index f4af1b93b9..3fe5541a6a 100644 + // TODO(acomminos): acquire a separate lock for the free tree here. + if (!elem) + { ++ ERR("Failed to find element to coalesce.\n"); + LeaveCriticalSection(&heap->temp_lock); + return E_FAIL; + } @@ -435,7 +461,7 @@ index f4af1b93b9..3fe5541a6a 100644 + if (left_elem->range.offset + left_elem->range.size == coalesced_range.offset) + { + coalesced_range.offset = left_elem->range.offset; -+ coalesced_range.size = coalesced_range.size + left_elem->range.size; ++ coalesced_range.size += left_elem->range.size; + + element_remove_free(heap, left_elem); + HeapFree(GetProcessHeap(), 0, left_elem); |