summarylogtreecommitdiffstats
path: root/0005-wined3d-Experimental-buffer-heap-fence-batching-uppe.patch
blob: cac70eac997d176f902f3bada20d941aa0b4d73c (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
From 7a630d56cb1bddfd03cae3cdc43aee949b04abe2 Mon Sep 17 00:00:00 2001
From: Andrew Comminos <andrew@comminos.com>
Date: Mon, 26 Feb 2018 21:35:40 -0800
Subject: [PATCH 5/8] wined3d: Experimental buffer heap fence batching, upper
 bound on heap size.

---
 dlls/wined3d/buffer.c          |  11 +-
 dlls/wined3d/buffer_heap.c     | 232 ++++++++++++++++++++++++-----------------
 dlls/wined3d/cs.c              |  15 +--
 dlls/wined3d/device.c          |   5 +-
 dlls/wined3d/wined3d_private.h |  12 ++-
 5 files changed, 154 insertions(+), 121 deletions(-)

diff --git a/dlls/wined3d/buffer.c b/dlls/wined3d/buffer.c
index 7d7e506817..fbec613c92 100644
--- a/dlls/wined3d/buffer.c
+++ b/dlls/wined3d/buffer.c
@@ -992,9 +992,12 @@ void wined3d_buffer_load(struct wined3d_buffer *buffer, struct wined3d_context *
 
     if (buffer->flags & WINED3D_BUFFER_PERSISTENT)
     {
-        if (!wined3d_buffer_load_location(buffer, context, WINED3D_LOCATION_PERSISTENT_MAP))
-            ERR("Failed to preload persistent mapping.\n");
-        return;
+        if (wined3d_buffer_load_location(buffer, context, WINED3D_LOCATION_PERSISTENT_MAP))
+            return;
+
+        ERR("Failed to preload persistent mapping for %p, falling back to BO.\n", buffer);
+        buffer->flags |= WINED3D_BUFFER_USE_BO;
+        buffer->flags &= ~WINED3D_BUFFER_PERSISTENT;
     }
 
     /* TODO: Make converting independent from VBOs */
@@ -1417,7 +1420,7 @@ static HRESULT buffer_resource_sub_resource_map(struct wined3d_resource *resourc
             struct wined3d_map_range map_range;
             if (FAILED(hr = wined3d_buffer_heap_alloc(buffer->buffer_heap, resource->size, &map_range)))
             {
-                FIXME("Failed to allocate new buffer, falling back to sync path.\n");
+                FIXME_(d3d_perf)("Failed to allocate new buffer, falling back to sync path.\n");
                 return hr;
             }
             map_desc->data = buffer->buffer_heap->map_ptr + map_range.offset + offset;
diff --git a/dlls/wined3d/buffer_heap.c b/dlls/wined3d/buffer_heap.c
index 02b925b658..165a957edd 100644
--- a/dlls/wined3d/buffer_heap.c
+++ b/dlls/wined3d/buffer_heap.c
@@ -22,6 +22,7 @@
 #include "wined3d_private.h"
 
 WINE_DEFAULT_DEBUG_CHANNEL(d3d);
+WINE_DECLARE_DEBUG_CHANNEL(d3d_perf);
 
 struct wined3d_buffer_heap_element
 {
@@ -33,7 +34,7 @@ struct wined3d_buffer_heap_element
 
 struct wined3d_buffer_heap_fenced_element
 {
-    struct wined3d_map_range range;
+    struct wined3d_buffer_heap_element *ranges;
     struct wined3d_fence *fence;
 
     struct wined3d_buffer_heap_element *next;
@@ -52,19 +53,19 @@ static struct wined3d_buffer_heap_element* element_new(GLintptr offset, GLsizeip
     return elem;
 }
 
-static struct wined3d_buffer_heap_fenced_element* fenced_element_new(struct wined3d_map_range range, struct wined3d_fence* fence)
+static struct wined3d_buffer_heap_fenced_element* fenced_element_new(struct wined3d_buffer_heap_element *ranges, struct wined3d_fence* fence)
 {
     struct wined3d_buffer_heap_fenced_element* elem;
     elem = HeapAlloc(GetProcessHeap(), 0, sizeof(struct wined3d_buffer_heap_fenced_element));
     if (!elem)
         return NULL;
-    elem->range = range;
+    elem->ranges = ranges;
     elem->fence = fence;
     elem->next = NULL;
     return elem;
 }
 
-static void element_merge_adjacent(struct wined3d_buffer_heap *heap, struct wined3d_buffer_heap_element *elem)
+static void element_merge_adjacent(struct wined3d_buffer_heap_element **head, struct wined3d_buffer_heap_element *elem)
 {
     struct wined3d_buffer_heap_element *cur_prev = elem->prev;
     struct wined3d_buffer_heap_element *cur_next = elem->next;
@@ -77,8 +78,8 @@ static void element_merge_adjacent(struct wined3d_buffer_heap *heap, struct wine
         if (cur_prev->prev)
             cur_prev->prev->next = elem;
 
-        if (cur_prev == heap->free_list_head)
-            heap->free_list_head = elem;
+        if (cur_prev == *head)
+            *head = elem;
 
         HeapFree(GetProcessHeap(), 0, cur_prev);
     }
@@ -94,6 +95,69 @@ static void element_merge_adjacent(struct wined3d_buffer_heap *heap, struct wine
     }
 }
 
+// Inserts a range into the list starting at `elem`.
+// Updates the head of the list, if necessary.
+static void element_insert_range(struct wined3d_buffer_heap_element **head, struct wined3d_map_range range)
+{
+    struct wined3d_buffer_heap_element *elem = *head;
+    struct wined3d_buffer_heap_element *new_elem;
+    struct wined3d_buffer_heap_element *last_elem = NULL;
+
+    // Special case where the head doesn't exist.
+    if (!elem)
+    {
+        new_elem = element_new(range.offset, range.size);
+        *head = new_elem;
+        return;
+    }
+
+    while (elem)
+    {
+        struct wined3d_map_range *erange = &elem->range;
+        if (range.offset + range.size == erange->offset)
+        {
+            // Left side merge
+            erange->offset = range.offset;
+            erange->size += range.size;
+            // Check if this causes a merge with elem->prev
+            element_merge_adjacent(head, elem);
+            return;
+        }
+        else if (erange->offset + erange->size == range.offset)
+        {
+            // Right side merge
+            erange->size += range.size;
+            // Check if this causes a merge with elem->prev
+            element_merge_adjacent(head, elem);
+            return;
+        }
+        else if (range.offset < erange->offset)
+        {
+            // Append to left, non-merge case.
+            new_elem = element_new(range.offset, range.size);
+            new_elem->prev = elem->prev;
+            new_elem->next = elem;
+            if (elem->prev)
+            {
+                elem->prev->next = new_elem;
+            }
+            if (*head == elem)
+            {
+                *head = new_elem;
+            }
+            elem->prev = new_elem;
+            return;
+        }
+        last_elem = elem;
+        elem = elem->next;
+    }
+
+    // Larger offset than all other elements in the list, append to the end.
+    new_elem = element_new(range.offset, range.size);
+    new_elem->prev = last_elem;
+    last_elem->next = new_elem;
+}
+
 /* Context activation is done by the caller. */
 HRESULT wined3d_buffer_heap_create(struct wined3d_context *context, GLsizeiptr size, GLsizeiptr alignment, BOOL write_only, struct wined3d_buffer_heap **buffer_heap)
 {
@@ -134,6 +198,9 @@ HRESULT wined3d_buffer_heap_create(struct wined3d_context *context, GLsizeiptr s
     object->free_list_head = element_new(0, size);
     object->fenced_head = object->fenced_tail = NULL;
     object->alignment = alignment;
+    object->pending_fenced_bytes = 0;
+    object->pending_fenced_head = NULL;
+    object->pending_fenced_threshold_bytes = size / 4; // FIXME(acomminos): make this externally declared
     InitializeCriticalSection(&object->temp_lock);
 
     *buffer_heap = object;
@@ -155,6 +222,9 @@ HRESULT wined3d_buffer_heap_alloc(struct wined3d_buffer_heap *heap, GLsizeiptr s
     // TODO(acomminos): free list binning?
     struct wined3d_buffer_heap_element *elem = heap->free_list_head;
 
+    // Round to the nearest power of two to reduce fragmentation.
+    size = 1ULL << (int)ceil(log2(size));
+
     // Round up the size to a multiple of the heap's alignment.
     if (heap->alignment)
         size += heap->alignment - (size % heap->alignment);
@@ -188,6 +258,7 @@ HRESULT wined3d_buffer_heap_alloc(struct wined3d_buffer_heap *heap, GLsizeiptr s
                 }
                 HeapFree(GetProcessHeap(), 0, elem);
             }
+
             LeaveCriticalSection(&heap->temp_lock);
             return WINED3D_OK;
         }
@@ -201,87 +272,53 @@ HRESULT wined3d_buffer_heap_alloc(struct wined3d_buffer_heap *heap, GLsizeiptr s
 HRESULT wined3d_buffer_heap_free(struct wined3d_buffer_heap *heap, struct wined3d_map_range range)
 {
     EnterCriticalSection(&heap->temp_lock);
-    struct wined3d_buffer_heap_element *new_elem;
-    struct wined3d_buffer_heap_element *elem = heap->free_list_head;
-    struct wined3d_buffer_heap_element *last_elem = NULL;
 
-    // Special case where the head doesn't exist.
-    if (!elem)
-    {
-        new_elem = element_new(range.offset, range.size);
-        heap->free_list_head = new_elem;
-        goto success;
-    }
+    element_insert_range(&heap->free_list_head, range);
 
-    while (elem)
+    LeaveCriticalSection(&heap->temp_lock);
+    return WINED3D_OK;
+}
+
+HRESULT wined3d_buffer_heap_free_fenced(struct wined3d_buffer_heap *heap, struct wined3d_device *device, struct wined3d_map_range range)
+{
+    element_insert_range(&heap->pending_fenced_head, range);
+
+    heap->pending_fenced_bytes += range.size;
+    if (heap->pending_fenced_bytes >= heap->pending_fenced_threshold_bytes)
     {
-        struct wined3d_map_range *erange = &elem->range;
-        if (range.offset + range.size == erange->offset)
+        // TODO(acomminos): break this out into a separate function
+        struct wined3d_buffer_heap_fenced_element *fenced_elem;
+        struct wined3d_fence *fence;
+        HRESULT hr;
+
+        if (FAILED(hr = wined3d_fence_create(device, &fence)))
         {
-            // Left side merge
-            erange->offset = range.offset;
-            erange->size += range.size;
-            // Check if this causes a merge with elem->prev
-            element_merge_adjacent(heap, elem);
-            goto success;
+            ERR("Failed to create fence.\n");
+            return hr;
         }
-        else if (erange->offset + erange->size == range.offset)
+
+        fenced_elem = fenced_element_new(heap->pending_fenced_head, fence);
+        if (!fenced_elem)
+            return E_OUTOFMEMORY;
+
+        TRACE_(d3d_perf)("Dispatching fenced buffer set.\n");
+        heap->pending_fenced_bytes = 0;
+        heap->pending_fenced_head = NULL;
+
+        // Append to end of fenced list, which works well if you assume that buffers
+        // are freed in some ascending draw call ordering.
+        if (!heap->fenced_head)
         {
-            // Right side merge
-            erange->size += range.size;
-            // Check if this causes a merge with elem->prev
-            element_merge_adjacent(heap, elem);
-            goto success;
+            heap->fenced_head = fenced_elem;
+            heap->fenced_tail = fenced_elem;
         }
-        else if (range.offset < erange->offset)
+        else
         {
-            // Append to left, non-merge case.
-            new_elem = element_new(range.offset, range.size);
-            new_elem->prev = elem->prev;
-            new_elem->next = elem;
-            if (elem->prev)
-            {
-                elem->prev->next = new_elem;
-            }
-            if (heap->free_list_head == elem)
-            {
-                heap->free_list_head = new_elem;
-            }
-            elem->prev = new_elem;
-            goto success;
+            heap->fenced_tail->next = fenced_elem;
+            heap->fenced_tail = fenced_elem;
         }
-        last_elem = elem;
-        elem = elem->next;
-    }
-
-    // Larger offset than all other elements in the list, append to the end.
-    new_elem = element_new(range.offset, range.size);
-    new_elem->prev = last_elem;
-    last_elem->next = new_elem;
-
-success:
-    LeaveCriticalSection(&heap->temp_lock);
-    return WINED3D_OK;
-}
-
-HRESULT wined3d_buffer_heap_free_fenced(struct wined3d_buffer_heap *heap, struct wined3d_map_range range, struct wined3d_fence *fence)
-{
-    struct wined3d_buffer_heap_fenced_element *elem;
-    elem = fenced_element_new(range, fence);
-    if (!elem)
-        return E_OUTOFMEMORY;
 
-    // Append to end of fenced list, which works well if you assume that buffers
-    // are freed in some ascending draw call ordering.
-    if (!heap->fenced_head)
-    {
-        heap->fenced_head = elem;
-        heap->fenced_tail = elem;
-    }
-    else
-    {
-        heap->fenced_tail->next = elem;
-        heap->fenced_tail = elem;
+        wined3d_fence_issue(fence, device);
     }
 
     return WINED3D_OK;
@@ -294,29 +331,32 @@ HRESULT wined3d_buffer_heap_cs_poll_fences(struct wined3d_buffer_heap *heap, str
     if (!elem)
         return WINED3D_OK;
 
-    while (elem)
+    res = wined3d_fence_test(elem->fence, device, 0);
+    switch (res)
     {
-        res = wined3d_fence_test(elem->fence, device, 0);
-        switch (res)
-        {
-            case WINED3D_FENCE_OK:
-            case WINED3D_FENCE_NOT_STARTED:
+        case WINED3D_FENCE_OK:
+        case WINED3D_FENCE_NOT_STARTED:
+            {
+                TRACE_(d3d_perf)("Freed fence group.\n");
+                struct wined3d_buffer_heap_element *range_elem = elem->ranges;
+                // FIXME(acomminos): this might take a while. incrementally do this?
+                while (range_elem)
                 {
-                    struct wined3d_buffer_heap_fenced_element *next = elem->next;
-
-                    wined3d_fence_destroy(elem->fence);
-                    wined3d_buffer_heap_free(heap, elem->range);
+                    struct wined3d_buffer_heap_element *next = range_elem->next;
+                    wined3d_buffer_heap_free(heap, range_elem->range);
+                    HeapFree(GetProcessHeap(), 0, range_elem);
+                    range_elem = next;
+                }
 
-                    heap->fenced_head = elem->next;
-                    HeapFree(GetProcessHeap(), 0, elem);
-                    // TODO(acomminos): bother to null out fenced_tail?
+                wined3d_fence_destroy(elem->fence);
 
-                    elem = next;
-                    break;
-                }
-            default:
-                return WINED3D_OK;
-        }
+                heap->fenced_head = elem->next;
+                HeapFree(GetProcessHeap(), 0, elem);
+                // TODO(acomminos): bother to null out fenced_tail?
+                break;
+            }
+        default:
+            return WINED3D_OK;
     }
 
     return WINED3D_OK;
diff --git a/dlls/wined3d/cs.c b/dlls/wined3d/cs.c
index d7bdc21a25..bae5d9f4a1 100644
--- a/dlls/wined3d/cs.c
+++ b/dlls/wined3d/cs.c
@@ -2643,26 +2643,15 @@ static void wined3d_cs_exec_discard_buffer(struct wined3d_cs *cs, const void *da
     const struct wined3d_cs_discard_buffer *op = data;
     struct wined3d_buffer *buffer = op->buffer;
     HRESULT hr;
-    struct wined3d_fence *fence;
 
     // Poll for discarded buffers whose fenced have been triggered here to avoid
     // excessive VRAM consumption.
     wined3d_buffer_heap_cs_poll_fences(buffer->buffer_heap, cs->device);
 
     // TODO(acomminos): should call into buffer.c here instead.
-    // XXX(acomminos): should we always create a new fence here?
-    if (!FAILED(hr = wined3d_fence_create(cs->device, &fence)))
+    if (FAILED(hr = wined3d_buffer_heap_free_fenced(buffer->buffer_heap, cs->device, buffer->cs_persistent_map)))
     {
-        // TODO(acomminos): make more informed fences based on prior info. for now,
-        // we do this because allocating and deleting fences repeatedly is brutal
-        // for performance. look into why.
-        wined3d_fence_issue(fence, cs->device);
-
-        wined3d_buffer_heap_free_fenced(buffer->buffer_heap, buffer->cs_persistent_map, fence);
-    }
-    else
-    {
-        ERR("Failed to create fence for discarded buffer %p, hr %x\n. Freeing anyway.", buffer, hr);
+        ERR("Failed to do a fenced free on discarded buffer %p, hr %x\n. Freeing anyway.", buffer, hr);
         wined3d_buffer_heap_free(buffer->buffer_heap, buffer->cs_persistent_map);
     }
 
diff --git a/dlls/wined3d/device.c b/dlls/wined3d/device.c
index e0871d1636..bdab83b935 100644
--- a/dlls/wined3d/device.c
+++ b/dlls/wined3d/device.c
@@ -850,9 +850,8 @@ static void create_buffer_heap(struct wined3d_device *device, struct wined3d_con
 {
     // TODO(acomminos): check if ARB_buffer_storage is supported, first-
     //                  possibly make wined3d_buffer_heap_create fail.
-    // TODO(acomminos): definitely don't take up all of vram. this is gonna get
-    //                  paged anyway, though.
-    const GLsizeiptr HBO_SIZE = device->adapter->vram_bytes / 4;
+    // TODO(acomminos): 512MB is sane for geometry, maybe not for PBO.
+    const GLsizeiptr HBO_SIZE = min(512000000, device->adapter->vram_bytes / 4);
     const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
 
     GLint ub_alignment;
diff --git a/dlls/wined3d/wined3d_private.h b/dlls/wined3d/wined3d_private.h
index cfa48a5f3e..62433a39b1 100644
--- a/dlls/wined3d/wined3d_private.h
+++ b/dlls/wined3d/wined3d_private.h
@@ -3679,10 +3679,12 @@ struct wined3d_buffer_heap
     // TODO: add buckets for free regions of a given size.
     struct wined3d_buffer_heap_element *free_list_head;
 
-    // store in FIFO order? that way, we can minimize our amount of time
-    // waiting on fences?
-    // XXX(acomminos): are fences guaranteed to be triggered in a serial
-    //                 ordering? if so, we can early-terminate our polling
+    // Elements that need to be fenced, but haven't reached the required size.
+    struct wined3d_buffer_heap_element *pending_fenced_head;
+    GLsizeiptr pending_fenced_bytes; // Number of free bytes in the active fenced region.
+    GLsizeiptr pending_fenced_threshold_bytes; // Number of bytes required before fencing.
+
+    // List of sets of buffers behind a common fence, in FIFO order.
     struct wined3d_buffer_heap_fenced_element *fenced_head;
     struct wined3d_buffer_heap_fenced_element *fenced_tail;
 };
@@ -3694,7 +3696,7 @@ HRESULT wined3d_buffer_heap_alloc(struct wined3d_buffer_heap *heap, GLsizeiptr s
 // Immediately frees a heap-allocated buffer segment.
 HRESULT wined3d_buffer_heap_free(struct wined3d_buffer_heap *heap, struct wined3d_map_range range) DECLSPEC_HIDDEN;
 // Enqueues a buffer segment to return to the heap once its fence has been signaled.
-HRESULT wined3d_buffer_heap_free_fenced(struct wined3d_buffer_heap *heap, struct wined3d_map_range range, struct wined3d_fence *fence) DECLSPEC_HIDDEN;
+HRESULT wined3d_buffer_heap_free_fenced(struct wined3d_buffer_heap *heap, struct wined3d_device *device, struct wined3d_map_range range) DECLSPEC_HIDDEN;
 // Moves a buffers with a signaled fence from the fenced list to the free list.
 // Must be executed on the CS thread.
 HRESULT wined3d_buffer_heap_cs_poll_fences(struct wined3d_buffer_heap *heap, struct wined3d_device *device) DECLSPEC_HIDDEN;
-- 
2.16.2