diff options
author | Stefan Schmidt | 2018-03-01 13:40:31 +0100 |
---|---|---|
committer | Stefan Schmidt | 2018-03-01 13:40:31 +0100 |
commit | 9065f70a5d47e4cf8f466b68104d5ddeb7f02409 (patch) | |
tree | 7227516d4faeca513d8ff787cd22ed72ba2cd064 | |
download | aur-9065f70a5d47e4cf8f466b68104d5ddeb7f02409.tar.gz |
Initial version (tracks 68de8e9b3f26e68bc6d64f353e0954ddab2f7590)
-rw-r--r-- | .SRCINFO | 190 | ||||
-rw-r--r-- | 0001-wined3d-Implement-a-simple-heap-allocator-backed-by-.patch | 456 | ||||
-rw-r--r-- | 0002-wined3d-Allocate-global-write-only-persistent-buffer.patch | 81 | ||||
-rw-r--r-- | 0003-wined3d-Add-support-for-persistently-mapped-wined3d_.patch | 701 | ||||
-rw-r--r-- | 0004-wined3d-Implement-aligned-persistent-heaps-for-persi.patch | 240 | ||||
-rw-r--r-- | 0005-wined3d-Experimental-buffer-heap-fence-batching-uppe.patch | 455 | ||||
-rw-r--r-- | 0006-wined3d-Switch-wined3d_buffer_heap-to-be-backed-by-a.patch | 383 | ||||
-rw-r--r-- | 0007-wined3d-Add-segregated-free-bins-to-complement-rbtre.patch | 305 | ||||
-rw-r--r-- | 0008-wined3d-Implement-lazy-free-using-a-deferred-free-li.patch | 612 | ||||
-rw-r--r-- | 30-win32-aliases.conf | 20 | ||||
-rw-r--r-- | PKGBUILD | 219 | ||||
-rw-r--r-- | harmony-fix.diff | 63 |
12 files changed, 3725 insertions, 0 deletions
diff --git a/.SRCINFO b/.SRCINFO new file mode 100644 index 000000000000..c8a5c3ab0f40 --- /dev/null +++ b/.SRCINFO @@ -0,0 +1,190 @@ +pkgbase = wine-staging-pba + pkgdesc = A compatibility layer for running Windows programs - Staging branch + pkgver = 2.21 + pkgrel = 1 + url = http://www.wine-staging.com + arch = x86_64 + license = LGPL + makedepends = autoconf + makedepends = ncurses + makedepends = bison + makedepends = perl + makedepends = fontforge + makedepends = flex + makedepends = gcc>=4.5.0-2 + makedepends = giflib + makedepends = lib32-giflib + makedepends = libpng + makedepends = lib32-libpng + makedepends = gnutls + makedepends = lib32-gnutls + makedepends = libxinerama + makedepends = lib32-libxinerama + makedepends = libxcomposite + makedepends = lib32-libxcomposite + makedepends = libxmu + makedepends = lib32-libxmu + makedepends = libxxf86vm + makedepends = lib32-libxxf86vm + makedepends = libldap + makedepends = lib32-libldap + makedepends = mpg123 + makedepends = lib32-mpg123 + makedepends = openal + makedepends = lib32-openal + makedepends = v4l-utils + makedepends = lib32-v4l-utils + makedepends = alsa-lib + makedepends = lib32-alsa-lib + makedepends = libxcomposite + makedepends = lib32-libxcomposite + makedepends = mesa + makedepends = lib32-mesa + makedepends = mesa-libgl + makedepends = lib32-mesa-libgl + makedepends = opencl-icd-loader + makedepends = lib32-opencl-icd-loader + makedepends = libxslt + makedepends = lib32-libxslt + makedepends = libpulse + makedepends = lib32-libpulse + makedepends = libva + makedepends = lib32-libva + makedepends = gtk3 + makedepends = lib32-gtk3 + makedepends = gst-plugins-base-libs + makedepends = lib32-gst-plugins-base-libs + makedepends = samba + makedepends = opencl-headers + makedepends = attr + makedepends = lib32-attr + makedepends = fontconfig + makedepends = lib32-fontconfig + makedepends = lcms2 + makedepends = lib32-lcms2 + makedepends = libxml2 + makedepends = lib32-libxml2 + makedepends = libxcursor + makedepends = lib32-libxcursor + makedepends = libxrandr + makedepends = lib32-libxrandr + makedepends = libxdamage + makedepends = lib32-libxdamage + makedepends = libxi + makedepends = lib32-libxi + makedepends = gettext + makedepends = lib32-gettext + makedepends = freetype2 + makedepends = lib32-freetype2 + makedepends = glu + makedepends = lib32-glu + makedepends = libsm + makedepends = lib32-libsm + makedepends = gcc-libs + makedepends = lib32-gcc-libs + makedepends = libpcap + makedepends = lib32-libpcap + makedepends = desktop-file-utils + optdepends = giflib + optdepends = lib32-giflib + optdepends = libpng + optdepends = lib32-libpng + optdepends = libldap + optdepends = lib32-libldap + optdepends = gnutls + optdepends = lib32-gnutls + optdepends = mpg123 + optdepends = lib32-mpg123 + optdepends = openal + optdepends = lib32-openal + optdepends = v4l-utils + optdepends = lib32-v4l-utils + optdepends = libpulse + optdepends = lib32-libpulse + optdepends = alsa-plugins + optdepends = lib32-alsa-plugins + optdepends = alsa-lib + optdepends = lib32-alsa-lib + optdepends = libjpeg-turbo + optdepends = lib32-libjpeg-turbo + optdepends = libxcomposite + optdepends = lib32-libxcomposite + optdepends = libxinerama + optdepends = lib32-libxinerama + optdepends = ncurses + optdepends = lib32-ncurses + optdepends = opencl-icd-loader + optdepends = lib32-opencl-icd-loader + optdepends = libxslt + optdepends = lib32-libxslt + optdepends = libva + optdepends = lib32-libva + optdepends = gtk3 + optdepends = lib32-gtk3 + optdepends = gst-plugins-base-libs + optdepends = lib32-gst-plugins-base-libs + optdepends = vulkan-icd-loader + optdepends = lib32-vulkan-icd-loader + optdepends = cups + optdepends = samba + optdepends = dosbox + provides = wine=2.21 + provides = wine-wow64=2.21 + conflicts = wine + conflicts = wine-wow64 + conflicts = wine-staging + options = staticlibs + source = https://github.com/wine-compholio/wine-patched/archive/staging-2.21.tar.gz + source = harmony-fix.diff + source = 30-win32-aliases.conf + source = 0001-wined3d-Implement-a-simple-heap-allocator-backed-by-.patch + source = 0002-wined3d-Allocate-global-write-only-persistent-buffer.patch + source = 0003-wined3d-Add-support-for-persistently-mapped-wined3d_.patch + source = 0004-wined3d-Implement-aligned-persistent-heaps-for-persi.patch + source = 0005-wined3d-Experimental-buffer-heap-fence-batching-uppe.patch + source = 0006-wined3d-Switch-wined3d_buffer_heap-to-be-backed-by-a.patch + source = 0007-wined3d-Add-segregated-free-bins-to-complement-rbtre.patch + source = 0008-wined3d-Implement-lazy-free-using-a-deferred-free-li.patch + sha512sums = 4e3fe2eb81360bfa095194ab5b9647636cbeac0dc3955e6a3ee26062f650c66a4bd2353a1cd8063f9b7c65a6bcc1f892cc7c1d0f00c3c8525a124ec2109d1e86 + sha512sums = b86edf07bfc560f403fdfd5a71f97930ee2a4c3f76c92cc1a0dbb2e107be9db3bed3a727a0430d8a049583c63dd11f5d4567fb7aa69b193997c6da241acc4f2e + sha512sums = 6e54ece7ec7022b3c9d94ad64bdf1017338da16c618966e8baf398e6f18f80f7b0576edf1d1da47ed77b96d577e4cbb2bb0156b0b11c183a0accf22654b0a2bb + sha512sums = b9141fbe8f5189eb46c83b178497f9ee6d1f2daec3009877557ba28e5f2ce6d818cfef5b0eba15c1e9e4c50dd5950486f8091793d704ec532f82155056725e71 + sha512sums = 8e112e25392fb2bd035c4b8792e43ad86bf81b1c24ff429ff8943a2c02ee761fc25446791475e4115e6b03f50cdb4cf6a8f128cc770c3941b59ee1dfbe79137b + sha512sums = 7335797924f1c4403a438ccfe36f8a650ddf8271d33ca962e270cf34762170038017cd53cad35f1ad61128f2c496edb68791783259df33cb997a73959136bdc0 + sha512sums = 52ebb56c6adfbef526d2db19618f9155084dacd7600d166f04ba5423c63a4294294589d675c391e577330f1b68755bb5d3b6a2cd3006902269cb73140973dba3 + sha512sums = d326b8da8fb02462bac178a23e18f5468de62780717c24eadb453201b2b6b6439d2be7dda38e40f24fdc570dd5bc54102e7bf05868c53b17b27f6b9a06fccdb0 + sha512sums = 04b41d4198138dbfe1399e7ed1e406fb265472d08a3e4de3c5c8584574b167613c598d7fa397c6944b809a96f699a4447694291296fa01a8e07b8ea96026ed2f + sha512sums = 9f90b7adc0ed87daac0f453caf2fff8b338061d96a9cd890f305704f9b22581232c6a207eb9eb1670c69b083caa780a6e44280df47c95b4e6e8e73f046f7c8a5 + sha512sums = 8fd8d2e262327e78dad69186ebf091dbc034fab2675f0be91df75c88ae6f5e5ae6f456a2098c460861946390ce139e998f4b0f77e33671c8a7062a5e06b6e4ca + +pkgname = wine-staging-pba + depends = attr + depends = lib32-attr + depends = fontconfig + depends = lib32-fontconfig + depends = lcms2 + depends = lib32-lcms2 + depends = libxml2 + depends = lib32-libxml2 + depends = libxcursor + depends = lib32-libxcursor + depends = libxrandr + depends = lib32-libxrandr + depends = libxdamage + depends = lib32-libxdamage + depends = libxi + depends = lib32-libxi + depends = gettext + depends = lib32-gettext + depends = freetype2 + depends = lib32-freetype2 + depends = glu + depends = lib32-glu + depends = libsm + depends = lib32-libsm + depends = gcc-libs + depends = lib32-gcc-libs + depends = libpcap + depends = lib32-libpcap + depends = desktop-file-utils + diff --git a/0001-wined3d-Implement-a-simple-heap-allocator-backed-by-.patch b/0001-wined3d-Implement-a-simple-heap-allocator-backed-by-.patch new file mode 100644 index 000000000000..565b172be923 --- /dev/null +++ b/0001-wined3d-Implement-a-simple-heap-allocator-backed-by-.patch @@ -0,0 +1,456 @@ +From 636d39db43f9cd176fe85869db5e07d3a39f80fb Mon Sep 17 00:00:00 2001 +From: Andrew Comminos <andrew@comminos.com> +Date: Fri, 23 Feb 2018 17:39:13 -0800 +Subject: [PATCH 1/8] wined3d: Implement a simple heap allocator backed by a + persistent buffer. + +--- + dlls/wined3d-csmt/Makefile.in | 1 + + dlls/wined3d/Makefile.in | 1 + + dlls/wined3d/buffer_heap.c | 321 +++++++++++++++++++++++++++++++++++++++++ + dlls/wined3d/directx.c | 3 + + dlls/wined3d/wined3d_gl.h | 1 + + dlls/wined3d/wined3d_private.h | 41 +++++- + 6 files changed, 364 insertions(+), 4 deletions(-) + create mode 100644 dlls/wined3d/buffer_heap.c + +diff --git a/dlls/wined3d-csmt/Makefile.in b/dlls/wined3d-csmt/Makefile.in +index bf064ed16f..cab1e6fdc1 100644 +--- a/dlls/wined3d-csmt/Makefile.in ++++ b/dlls/wined3d-csmt/Makefile.in +@@ -8,6 +8,7 @@ C_SRCS = \ + arb_program_shader.c \ + ati_fragment_shader.c \ + buffer.c \ ++ buffer_heap.c \ + context.c \ + cs.c \ + device.c \ +diff --git a/dlls/wined3d/Makefile.in b/dlls/wined3d/Makefile.in +index 70f47c6a5f..e78745cc11 100644 +--- a/dlls/wined3d/Makefile.in ++++ b/dlls/wined3d/Makefile.in +@@ -6,6 +6,7 @@ C_SRCS = \ + arb_program_shader.c \ + ati_fragment_shader.c \ + buffer.c \ ++ buffer_heap.c \ + context.c \ + cs.c \ + device.c \ +diff --git a/dlls/wined3d/buffer_heap.c b/dlls/wined3d/buffer_heap.c +new file mode 100644 +index 0000000000..900e2d24bb +--- /dev/null ++++ b/dlls/wined3d/buffer_heap.c +@@ -0,0 +1,321 @@ ++/* ++ * Copyright 2018 Andrew Comminos ++ * ++ * This library is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU Lesser General Public ++ * License as published by the Free Software Foundation; either ++ * version 2.1 of the License, or (at your option) any later version. ++ * ++ * This library is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * Lesser General Public License for more details. ++ * ++ * You should have received a copy of the GNU Lesser General Public ++ * License along with this library; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA ++ * ++ */ ++ ++#include "config.h" ++#include "wine/port.h" ++#include "wined3d_private.h" ++ ++WINE_DEFAULT_DEBUG_CHANNEL(d3d); ++ ++struct wined3d_buffer_heap_element ++{ ++ struct wined3d_map_range range; ++ ++ struct wined3d_buffer_heap_element *prev; ++ struct wined3d_buffer_heap_element *next; ++}; ++ ++struct wined3d_buffer_heap_fenced_element ++{ ++ struct wined3d_map_range range; ++ struct wined3d_fence *fence; ++ ++ struct wined3d_buffer_heap_element *next; ++}; ++ ++static struct wined3d_buffer_heap_element* element_new(GLintptr offset, GLsizeiptr size) ++{ ++ struct wined3d_buffer_heap_element* elem; ++ elem = HeapAlloc(GetProcessHeap(), 0, sizeof(struct wined3d_buffer_heap_element)); ++ if (!elem) ++ return NULL; ++ elem->range.offset = offset; ++ elem->range.size = size; ++ elem->next = NULL; ++ elem->prev = NULL; ++ return elem; ++} ++ ++static struct wined3d_buffer_heap_fenced_element* fenced_element_new(struct wined3d_map_range range, struct wined3d_fence* fence) ++{ ++ struct wined3d_buffer_heap_fenced_element* elem; ++ elem = HeapAlloc(GetProcessHeap(), 0, sizeof(struct wined3d_buffer_heap_fenced_element)); ++ if (!elem) ++ return NULL; ++ elem->range = range; ++ elem->fence = fence; ++ elem->next = NULL; ++ return elem; ++} ++ ++static void element_merge_adjacent(struct wined3d_buffer_heap *heap, struct wined3d_buffer_heap_element *elem) ++{ ++ struct wined3d_buffer_heap_element *cur_prev = elem->prev; ++ struct wined3d_buffer_heap_element *cur_next = elem->next; ++ if (cur_prev && cur_prev->range.offset + cur_prev->range.size == elem->range.offset) ++ { ++ elem->range.offset = cur_prev->range.offset; ++ elem->range.size += cur_prev->range.size; ++ ++ elem->prev = cur_prev->prev; ++ if (cur_prev->prev) ++ cur_prev->prev->next = elem; ++ ++ if (cur_prev == heap->free_list_head) ++ heap->free_list_head = elem; ++ ++ HeapFree(GetProcessHeap(), 0, cur_prev); ++ } ++ if (cur_next && cur_next->range.offset == elem->range.offset + elem->range.size) ++ { ++ elem->range.size += cur_next->range.size; ++ elem->next = cur_next->next; ++ if (cur_next->next) ++ { ++ cur_next->next->prev = elem; ++ } ++ HeapFree(GetProcessHeap(), 0, cur_next); ++ } ++} ++ ++/* Context activation is done by the caller. */ ++HRESULT wined3d_buffer_heap_create(struct wined3d_context *context, GLsizeiptr size, BOOL write_only, struct wined3d_buffer_heap **buffer_heap) ++{ ++ const struct wined3d_gl_info *gl_info = context->gl_info; ++ const GLenum buffer_target = GL_ARRAY_BUFFER; ++ GLbitfield access_flags; ++ GLbitfield storage_flags; ++ ++ struct wined3d_buffer_heap *object; ++ ++ if (!(object = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(*object)))) ++ { ++ return E_OUTOFMEMORY; ++ } ++ ++ access_flags = GL_MAP_PERSISTENT_BIT | GL_MAP_COHERENT_BIT | GL_MAP_WRITE_BIT; ++ if (!write_only) ++ { ++ access_flags |= GL_MAP_READ_BIT; ++ } ++ storage_flags = access_flags; // XXX(acomminos): will we need dynamic storage? ++ ++ // TODO(acomminos): where should we be checking for errors here? ++ ++ // TODO(acomminos): assert from CS thread? ++ GL_EXTCALL(glGenBuffers(1, &object->buffer_object)); ++ ++ // XXX(acomminos): use glNamedBufferStorage? ++ context_bind_bo(context, buffer_target, object->buffer_object); ++ ++ // TODO(acomminos): assert glBufferStorage supported? ++ GL_EXTCALL(glBufferStorage(buffer_target, size, NULL, storage_flags)); ++ ++ if (!(object->map_ptr = GL_EXTCALL(glMapBufferRange(buffer_target, 0, size, access_flags)))) ++ { ++ // TODO(acomminos): include error message ++ ERR("Couldn't map persistent buffer.\n"); ++ return -1; // FIXME(acomminos): proper error code, cleanup ++ } ++ context_bind_bo(context, buffer_target, 0); ++ ++ object->free_list_head = element_new(0, size); ++ object->fenced_head = object->fenced_tail = NULL; ++ InitializeCriticalSection(&object->temp_lock); ++ ++ *buffer_heap = object; ++ ++ return WINED3D_OK; ++} ++ ++/* Context activation is done by the caller. */ ++HRESULT wined3d_buffer_heap_destroy(struct wined3d_buffer_heap *heap, struct wined3d_context *context) ++{ ++ // TODO ++ return WINED3D_OK; ++} ++ ++HRESULT wined3d_buffer_heap_alloc(struct wined3d_buffer_heap *heap, GLsizeiptr size, struct wined3d_map_range *out_range) { ++ EnterCriticalSection(&heap->temp_lock); ++ ++ // TODO(acomminos): free list binning? ++ struct wined3d_buffer_heap_element *elem = heap->free_list_head; ++ // XXX(acomminos): Avoid fragmentation by rounding to nearest power of two. ++ while (elem != NULL) ++ { ++ TRACE("allocation at %p, size %lld\n", heap->map_ptr + elem->range.offset, elem->range.size); ++ // XXX(acomminos): first fit is highly likely to be dumb, needs more analysis. ++ if (elem->range.size >= size) ++ { ++ // Pull the range from the start of the free list element. ++ out_range->offset = elem->range.offset; ++ out_range->size = size; // XXX(acomminos): should we really give the exact size requested? ++ ++ elem->range.offset += size; ++ elem->range.size -= size; ++ ++ if (elem->range.size == 0) ++ { ++ if (elem->prev) ++ { ++ elem->prev->next = elem->next; ++ } ++ if (elem->next) ++ { ++ elem->next->prev = elem->prev; ++ } ++ if (heap->free_list_head == elem) ++ { ++ heap->free_list_head = elem->next; ++ } ++ HeapFree(GetProcessHeap(), 0, elem); ++ } ++ LeaveCriticalSection(&heap->temp_lock); ++ return WINED3D_OK; ++ } ++ elem = elem->next; ++ } ++ ++ LeaveCriticalSection(&heap->temp_lock); ++ return WINED3DERR_OUTOFVIDEOMEMORY; // FIXME(acomminos): probably wrong return code. ++} ++ ++HRESULT wined3d_buffer_heap_free(struct wined3d_buffer_heap *heap, struct wined3d_map_range range) ++{ ++ EnterCriticalSection(&heap->temp_lock); ++ struct wined3d_buffer_heap_element *new_elem; ++ struct wined3d_buffer_heap_element *elem = heap->free_list_head; ++ struct wined3d_buffer_heap_element *last_elem = NULL; ++ ++ // Special case where the head doesn't exist. ++ if (!elem) ++ { ++ new_elem = element_new(range.offset, range.size); ++ heap->free_list_head = new_elem; ++ goto success; ++ } ++ ++ while (elem) ++ { ++ struct wined3d_map_range *erange = &elem->range; ++ if (range.offset + range.size == erange->offset) ++ { ++ // Left side merge ++ erange->offset = range.offset; ++ erange->size += range.size; ++ // Check if this causes a merge with elem->prev ++ element_merge_adjacent(heap, elem); ++ goto success; ++ } ++ else if (erange->offset + erange->size == range.offset) ++ { ++ // Right side merge ++ erange->size += range.size; ++ // Check if this causes a merge with elem->prev ++ element_merge_adjacent(heap, elem); ++ goto success; ++ } ++ else if (range.offset < erange->offset) ++ { ++ // Append to left, non-merge case. ++ new_elem = element_new(range.offset, range.size); ++ new_elem->prev = elem->prev; ++ new_elem->next = elem; ++ if (elem->prev) ++ { ++ elem->prev->next = new_elem; ++ } ++ if (heap->free_list_head == elem) ++ { ++ heap->free_list_head = new_elem; ++ } ++ elem->prev = new_elem; ++ goto success; ++ } ++ last_elem = elem; ++ elem = elem->next; ++ } ++ ++ // Larger offset than all other elements in the list, append to the end. ++ new_elem = element_new(range.offset, range.size); ++ new_elem->prev = last_elem; ++ last_elem->next = new_elem; ++ ++success: ++ LeaveCriticalSection(&heap->temp_lock); ++ return WINED3D_OK; ++} ++ ++HRESULT wined3d_buffer_heap_free_fenced(struct wined3d_buffer_heap *heap, struct wined3d_map_range range, struct wined3d_fence *fence) ++{ ++ struct wined3d_buffer_heap_fenced_element *elem; ++ elem = fenced_element_new(range, fence); ++ if (!elem) ++ return E_OUTOFMEMORY; ++ ++ // Append to end of fenced list, which works well if you assume that buffers ++ // are freed in some ascending draw call ordering. ++ if (!heap->fenced_head) ++ { ++ heap->fenced_head = elem; ++ heap->fenced_tail = elem; ++ } ++ else ++ { ++ heap->fenced_tail->next = elem; ++ heap->fenced_tail = elem; ++ } ++ ++ return WINED3D_OK; ++} ++ ++HRESULT wined3d_buffer_heap_cs_poll_fences(struct wined3d_buffer_heap *heap, struct wined3d_device *device) ++{ ++ enum wined3d_fence_result res; ++ struct wined3d_buffer_heap_fenced_element *elem = heap->fenced_head; ++ if (!elem) ++ return WINED3D_OK; ++ ++ while (elem) ++ { ++ res = wined3d_fence_test(elem->fence, device, 0); ++ switch (res) ++ { ++ case WINED3D_FENCE_OK: ++ case WINED3D_FENCE_NOT_STARTED: ++ { ++ struct wined3d_buffer_heap_fenced_element *next = elem->next; ++ ++ wined3d_fence_destroy(elem->fence); ++ wined3d_buffer_heap_free(heap, elem->range); ++ ++ heap->fenced_head = elem->next; ++ HeapFree(GetProcessHeap(), 0, elem); ++ // TODO(acomminos): bother to null out fenced_tail? ++ ++ elem = next; ++ break; ++ } ++ default: ++ return WINED3D_OK; ++ } ++ } ++ ++ return WINED3D_OK; ++} +diff --git a/dlls/wined3d/directx.c b/dlls/wined3d/directx.c +index 012561090f..9cf8f8efe5 100644 +--- a/dlls/wined3d/directx.c ++++ b/dlls/wined3d/directx.c +@@ -2711,6 +2711,9 @@ static void load_gl_funcs(struct wined3d_gl_info *gl_info) + /* GL_ARB_blend_func_extended */ + USE_GL_FUNC(glBindFragDataLocationIndexed) + USE_GL_FUNC(glGetFragDataIndex) ++ /* GL_ARB_buffer_storage */ ++ USE_GL_FUNC(glBufferStorage) ++ USE_GL_FUNC(glNamedBufferStorage) + /* GL_ARB_clear_buffer_object */ + USE_GL_FUNC(glClearBufferData) + USE_GL_FUNC(glClearBufferSubData) +diff --git a/dlls/wined3d/wined3d_gl.h b/dlls/wined3d/wined3d_gl.h +index 7ac163459b..04957cb5b1 100644 +--- a/dlls/wined3d/wined3d_gl.h ++++ b/dlls/wined3d/wined3d_gl.h +@@ -44,6 +44,7 @@ enum wined3d_gl_extension + /* ARB */ + ARB_BASE_INSTANCE, + ARB_BLEND_FUNC_EXTENDED, ++ ARB_BUFFER_STORAGE, + ARB_CLEAR_BUFFER_OBJECT, + ARB_CLEAR_TEXTURE, + ARB_CLIP_CONTROL, +diff --git a/dlls/wined3d/wined3d_private.h b/dlls/wined3d/wined3d_private.h +index 9b16a361e4..4d0555a76c 100644 +--- a/dlls/wined3d/wined3d_private.h ++++ b/dlls/wined3d/wined3d_private.h +@@ -3465,6 +3465,12 @@ void state_init(struct wined3d_state *state, struct wined3d_fb_state *fb, + DWORD flags) DECLSPEC_HIDDEN; + void state_unbind_resources(struct wined3d_state *state) DECLSPEC_HIDDEN; + ++struct wined3d_map_range ++{ ++ GLintptr offset; ++ GLsizeiptr size; ++}; ++ + enum wined3d_cs_queue_id + { + WINED3D_CS_QUEUE_DEFAULT = 0, +@@ -3646,11 +3652,38 @@ enum wined3d_buffer_conversion_type + CONV_POSITIONT, + }; + +-struct wined3d_map_range ++struct wined3d_buffer_heap_element; ++struct wined3d_buffer_heap_fenced_element; ++ ++// A heap that manages allocations with a single GL buffer. ++struct wined3d_buffer_heap + { +- UINT offset; +- UINT size; +-}; ++ GLuint buffer_object; ++ void *map_ptr; ++ CRITICAL_SECTION temp_lock; // Temporary lock while we implement the fenced free list. ++ ++ // TODO: add buckets for free regions of a given size. ++ struct wined3d_buffer_heap_element *free_list_head; ++ ++ // store in FIFO order? that way, we can minimize our amount of time ++ // waiting on fences? ++ // XXX(acomminos): are fences guaranteed to be triggered in a serial ++ // ordering? if so, we can early-terminate our polling ++ struct wined3d_buffer_heap_fenced_element *fenced_head; ++ struct wined3d_buffer_heap_fenced_element *fenced_tail; ++}; ++ ++HRESULT wined3d_buffer_heap_create(struct wined3d_context *context, GLsizeiptr size, BOOL write_only, struct wined3d_buffer_heap **heap) DECLSPEC_HIDDEN; ++HRESULT wined3d_buffer_heap_destroy(struct wined3d_buffer_heap *heap, struct wined3d_context *context) DECLSPEC_HIDDEN; ++// Fetches a buffer from the heap of at least the given size. ++HRESULT wined3d_buffer_heap_alloc(struct wined3d_buffer_heap *heap, GLsizeiptr size, struct wined3d_map_range* out_range) DECLSPEC_HIDDEN; ++// Immediately frees a heap-allocated buffer segment. ++HRESULT wined3d_buffer_heap_free(struct wined3d_buffer_heap *heap, struct wined3d_map_range range) DECLSPEC_HIDDEN; ++// Enqueues a buffer segment to return to the heap once its fence has been signaled. ++HRESULT wined3d_buffer_heap_free_fenced(struct wined3d_buffer_heap *heap, struct wined3d_map_range range, struct wined3d_fence *fence) DECLSPEC_HIDDEN; ++// Moves a buffers with a signaled fence from the fenced list to the free list. ++// Must be executed on the CS thread. ++HRESULT wined3d_buffer_heap_cs_poll_fences(struct wined3d_buffer_heap *heap, struct wined3d_device *device) DECLSPEC_HIDDEN; + + struct wined3d_buffer + { +-- +2.16.2 + diff --git a/0002-wined3d-Allocate-global-write-only-persistent-buffer.patch b/0002-wined3d-Allocate-global-write-only-persistent-buffer.patch new file mode 100644 index 000000000000..66e2e25d69f8 --- /dev/null +++ b/0002-wined3d-Allocate-global-write-only-persistent-buffer.patch @@ -0,0 +1,81 @@ +From 7f141de6d631a6e0c9cd778f6b3259d41a700bb4 Mon Sep 17 00:00:00 2001 +From: Andrew Comminos <andrew@comminos.com> +Date: Fri, 23 Feb 2018 17:42:21 -0800 +Subject: [PATCH 2/8] wined3d: Allocate global write-only persistent buffer + heap at device initialization. + +--- + dlls/wined3d/device.c | 28 ++++++++++++++++++++++++++++ + dlls/wined3d/wined3d_private.h | 3 +++ + 2 files changed, 31 insertions(+) + +diff --git a/dlls/wined3d/device.c b/dlls/wined3d/device.c +index 58f4993abe..363dcb17f0 100644 +--- a/dlls/wined3d/device.c ++++ b/dlls/wined3d/device.c +@@ -845,6 +845,29 @@ static void destroy_default_samplers(struct wined3d_device *device, struct wined + device->null_sampler = NULL; + } + ++/* Context activation is done by the caller. */ ++static void create_buffer_heap(struct wined3d_device *device, struct wined3d_context *context) ++{ ++ // TODO(acomminos): check if ARB_buffer_storage is supported, first- ++ // possibly make wined3d_buffer_heap_create fail. ++ // TODO(acomminos): definitely don't take up all of vram. this is gonna get ++ // paged anyway, though. ++ const GLsizeiptr HBO_SIZE = device->adapter->vram_bytes / 4; ++ ++ HRESULT hr; ++ if (FAILED(hr = wined3d_buffer_heap_create(context, HBO_SIZE, TRUE, &device->wo_buffer_heap))) ++ { ++ ERR("Failed to create write-only persistent buffer heap, hr %#x.\n", hr); ++ } ++} ++ ++/* Context activation is done by the caller. */ ++static void destroy_buffer_heap(struct wined3d_device *device, struct wined3d_context *context) ++{ ++ if (device->wo_buffer_heap) ++ wined3d_buffer_heap_destroy(device->wo_buffer_heap, context); ++} ++ + static LONG fullscreen_style(LONG style) + { + /* Make sure the window is managed, otherwise we won't get keyboard input. */ +@@ -1013,6 +1036,8 @@ static void wined3d_device_delete_opengl_contexts_cs(void *object) + device->shader_backend->shader_free_private(device); + destroy_dummy_textures(device, context); + destroy_default_samplers(device, context); ++ destroy_buffer_heap(device, context); ++ + context_release(context); + + while (device->context_count) +@@ -1060,6 +1085,9 @@ static void wined3d_device_create_primary_opengl_context_cs(void *object) + context = context_acquire(device, target, 0); + create_dummy_textures(device, context); + create_default_samplers(device, context); ++ ++ create_buffer_heap(device, context); ++ + context_release(context); + } + +diff --git a/dlls/wined3d/wined3d_private.h b/dlls/wined3d/wined3d_private.h +index 4d0555a76c..96bda81eb9 100644 +--- a/dlls/wined3d/wined3d_private.h ++++ b/dlls/wined3d/wined3d_private.h +@@ -2966,6 +2966,9 @@ struct wined3d_device + /* Context management */ + struct wined3d_context **contexts; + UINT context_count; ++ ++ /* Dynamic buffer heap */ ++ struct wined3d_buffer_heap *wo_buffer_heap; + }; + + void device_clear_render_targets(struct wined3d_device *device, UINT rt_count, const struct wined3d_fb_state *fb, +-- +2.16.2 + diff --git a/0003-wined3d-Add-support-for-persistently-mapped-wined3d_.patch b/0003-wined3d-Add-support-for-persistently-mapped-wined3d_.patch new file mode 100644 index 000000000000..5a3a499ab2a0 --- /dev/null +++ b/0003-wined3d-Add-support-for-persistently-mapped-wined3d_.patch @@ -0,0 +1,701 @@ +From 3e72163af5712be1a51957effa183edc7a9fb2a6 Mon Sep 17 00:00:00 2001 +From: Andrew Comminos <andrew@comminos.com> +Date: Fri, 23 Feb 2018 17:41:43 -0800 +Subject: [PATCH 3/8] wined3d: Add support for persistently mapped + wined3d_buffer resources. + +--- + dlls/wined3d/buffer.c | 211 ++++++++++++++++++++++++++++++++++++++++- + dlls/wined3d/buffer_heap.c | 8 +- + dlls/wined3d/cs.c | 62 +++++++++++- + dlls/wined3d/drawprim.c | 7 +- + dlls/wined3d/query.c | 2 +- + dlls/wined3d/resource.c | 20 +++- + dlls/wined3d/state.c | 6 +- + dlls/wined3d/texture.c | 13 +++ + dlls/wined3d/utils.c | 1 + + dlls/wined3d/wined3d_private.h | 13 +++ + 10 files changed, 326 insertions(+), 17 deletions(-) + +diff --git a/dlls/wined3d/buffer.c b/dlls/wined3d/buffer.c +index d61321e1a5..ccb090c907 100644 +--- a/dlls/wined3d/buffer.c ++++ b/dlls/wined3d/buffer.c +@@ -28,12 +28,14 @@ + #include "wined3d_private.h" + + WINE_DEFAULT_DEBUG_CHANNEL(d3d); ++WINE_DECLARE_DEBUG_CHANNEL(d3d_perf); + + #define WINED3D_BUFFER_HASDESC 0x01 /* A vertex description has been found. */ + #define WINED3D_BUFFER_USE_BO 0x02 /* Use a buffer object for this buffer. */ + #define WINED3D_BUFFER_PIN_SYSMEM 0x04 /* Keep a system memory copy for this buffer. */ + #define WINED3D_BUFFER_DISCARD 0x08 /* A DISCARD lock has occurred since the last preload. */ + #define WINED3D_BUFFER_APPLESYNC 0x10 /* Using sync as in GL_APPLE_flush_buffer_range. */ ++#define WINED3D_BUFFER_PERSISTENT 0x20 /* Uses a persistent-mapped buffer via ARB_buffer_storage. */ + + #define VB_MAXDECLCHANGES 100 /* After that number of decl changes we stop converting */ + #define VB_RESETDECLCHANGE 1000 /* Reset the decl changecount after that number of draws */ +@@ -269,6 +271,50 @@ fail: + return FALSE; + } + ++/* Context activation is done by the caller. */ ++static BOOL buffer_alloc_persistent_map(struct wined3d_buffer *buffer, struct wined3d_context *context) ++{ ++ struct wined3d_device *device = buffer->resource.device; ++ struct wined3d_buffer_heap *heap; ++ struct wined3d_map_range map_range; ++ HRESULT hr; ++ ++ if (buffer->resource.usage & WINED3DUSAGE_WRITEONLY) ++ { ++ heap = device->wo_buffer_heap; ++ } ++ else ++ { ++ FIXME("Using write-only heap for a persistent buffer without WINED3DUSAGE_WRITEONLY.\n"); ++ heap = device->rw_buffer_heap; ++ } ++ ++ buffer->buffer_heap = heap; ++ if (FAILED(hr = wined3d_buffer_heap_alloc(heap, buffer->resource.size, &map_range))) ++ { ++ goto fail; ++ } ++ buffer->cs_persistent_map = map_range; ++ buffer->mt_persistent_map = map_range; ++ return TRUE; ++ ++fail: ++ // FIXME(acomminos): fall back to standalone BO here? ++ ERR("Failed to create persistent map for buffer %p, hr=%x\n", buffer, hr); ++ buffer->buffer_heap = NULL; ++ return FALSE; ++} ++ ++static void buffer_free_persistent_map(struct wined3d_buffer *buffer) ++{ ++ if (!buffer->buffer_heap) ++ return; ++ ++ // TODO(acomminos): get the CS thread to free pending main thread buffers. ++ wined3d_buffer_heap_free(buffer->buffer_heap, buffer->cs_persistent_map); ++ buffer->buffer_heap = NULL; ++} ++ + static BOOL buffer_process_converted_attribute(struct wined3d_buffer *buffer, + const enum wined3d_buffer_conversion_type conversion_type, + const struct wined3d_stream_info_element *attrib, DWORD *stride_this_run) +@@ -630,6 +676,16 @@ static BOOL wined3d_buffer_prepare_location(struct wined3d_buffer *buffer, + return FALSE; + } + return buffer_create_buffer_object(buffer, context); ++ case WINED3D_LOCATION_PERSISTENT_MAP: ++ if (buffer->buffer_heap) ++ return TRUE; ++ ++ if (!(buffer->flags & WINED3D_BUFFER_PERSISTENT)) ++ { ++ WARN("Trying to map a persistent region for buffer %p without WINED3D_BUFFER_PERSISTENT.\n", buffer); ++ return FALSE; ++ } ++ return buffer_alloc_persistent_map(buffer, context); + + default: + ERR("Invalid location %s.\n", wined3d_debug_location(location)); +@@ -688,16 +744,32 @@ BOOL wined3d_buffer_load_location(struct wined3d_buffer *buffer, + buffer_conversion_upload(buffer, context); + break; + ++ case WINED3D_LOCATION_PERSISTENT_MAP: ++ // TODO(acomminos): are we guaranteed location_sysmem to be kept? ++ // no. ++ if (buffer->conversion_map) ++ FIXME("Attempting to use conversion map with persistent mapping.\n"); ++ memcpy(buffer->buffer_heap->map_ptr + ++ buffer->cs_persistent_map.offset, ++ buffer->resource.heap_memory, buffer->resource.size); ++ break; ++ + default: + ERR("Invalid location %s.\n", wined3d_debug_location(location)); + return FALSE; + } + + wined3d_buffer_validate_location(buffer, location); +- if (buffer->resource.heap_memory && location == WINED3D_LOCATION_BUFFER ++ if (buffer->resource.heap_memory ++ && location & WINED3D_LOCATION_BUFFER + && !(buffer->resource.usage & WINED3DUSAGE_DYNAMIC)) + wined3d_buffer_evict_sysmem(buffer); + ++ // FIXME(acomminos) ++ if (buffer->resource.heap_memory ++ && location & WINED3D_LOCATION_PERSISTENT_MAP) ++ wined3d_buffer_evict_sysmem(buffer); ++ + return TRUE; + } + +@@ -721,6 +793,13 @@ DWORD wined3d_buffer_get_memory(struct wined3d_buffer *buffer, + data->addr = NULL; + return WINED3D_LOCATION_BUFFER; + } ++ if (locations & WINED3D_LOCATION_PERSISTENT_MAP) ++ { ++ // FIXME(acomminos): should we expose a buffer object we don't wholly own here? ++ data->buffer_object = buffer->buffer_heap->buffer_object; ++ data->addr = buffer->cs_persistent_map.offset; ++ return WINED3D_LOCATION_PERSISTENT_MAP; ++ } + if (locations & WINED3D_LOCATION_SYSMEM) + { + data->buffer_object = 0; +@@ -760,6 +839,8 @@ static void buffer_unload(struct wined3d_resource *resource) + buffer->flags &= ~WINED3D_BUFFER_HASDESC; + } + ++ buffer_free_persistent_map(buffer); ++ + resource_unload(resource); + } + +@@ -783,6 +864,8 @@ static void wined3d_buffer_destroy_object(void *object) + HeapFree(GetProcessHeap(), 0, buffer->conversion_map); + } + ++ buffer_free_persistent_map(buffer); ++ + HeapFree(GetProcessHeap(), 0, buffer->maps); + HeapFree(GetProcessHeap(), 0, buffer); + } +@@ -899,6 +982,13 @@ void wined3d_buffer_load(struct wined3d_buffer *buffer, struct wined3d_context * + + buffer_mark_used(buffer); + ++ if (buffer->flags & WINED3D_BUFFER_PERSISTENT) ++ { ++ if (!wined3d_buffer_load_location(buffer, context, WINED3D_LOCATION_PERSISTENT_MAP)) ++ ERR("Failed to preload persistent mapping.\n"); ++ return; ++ } ++ + /* TODO: Make converting independent from VBOs */ + if (!(buffer->flags & WINED3D_BUFFER_USE_BO)) + { +@@ -1009,6 +1099,25 @@ static HRESULT wined3d_buffer_map(struct wined3d_buffer *buffer, UINT offset, UI + + count = ++buffer->resource.map_count; + ++ if (buffer->locations & WINED3D_LOCATION_PERSISTENT_MAP) ++ { ++ const struct wined3d_gl_info *gl_info; ++ context = context_acquire(device, NULL, 0); ++ ++ FIXME_(d3d_perf)("Fences not used for persistent buffer maps on CS thread, using glFinish.\n"); ++ ++ gl_info = context->gl_info; ++ gl_info->gl_ops.gl.p_glFinish(); ++ ++ base = buffer->buffer_heap->map_ptr ++ + buffer->cs_persistent_map.offset; ++ *data = base + offset; ++ ++ context_release(context); ++ ++ return WINED3D_OK; ++ } ++ + if (buffer->buffer_object) + { + unsigned int dirty_offset = offset, dirty_size = size; +@@ -1151,6 +1260,12 @@ static void wined3d_buffer_unmap(struct wined3d_buffer *buffer) + return; + } + ++ if (buffer->flags & WINED3D_BUFFER_PERSISTENT) ++ { ++ TRACE("Persistent buffer, ignore unmap.\n"); ++ return; ++ } ++ + if (buffer->map_ptr) + { + struct wined3d_device *device = buffer->resource.device; +@@ -1273,6 +1388,64 @@ static void buffer_resource_preload(struct wined3d_resource *resource) + + static HRESULT buffer_resource_sub_resource_map(struct wined3d_resource *resource, unsigned int sub_resource_idx, + struct wined3d_map_desc *map_desc, const struct wined3d_box *box, DWORD flags) ++{ ++ struct wined3d_buffer *buffer = buffer_from_resource(resource); ++ UINT offset = box ? box->left : 0; ++ ++ if (sub_resource_idx) ++ { ++ WARN("Invalid sub_resource_idx %u.\n", sub_resource_idx); ++ return E_INVALIDARG; ++ } ++ ++ // Support immediate mapping of persistent buffers off the command thread, ++ // which require no GL calls to interface with. ++ if (buffer->locations & WINED3D_LOCATION_PERSISTENT_MAP) ++ { ++ map_desc->row_pitch = map_desc->slice_pitch = buffer->desc.byte_width; ++ if (flags & WINED3D_MAP_DISCARD) ++ { ++ HRESULT hr; ++ struct wined3d_map_range map_range; ++ if (FAILED(hr = wined3d_buffer_heap_alloc(buffer->buffer_heap, resource->size, &map_range))) ++ { ++ FIXME("Failed to allocate new buffer, falling back to sync path.\n"); ++ return hr; ++ } ++ map_desc->data = buffer->buffer_heap->map_ptr + map_range.offset + offset; ++ resource->map_count++; ++ ++ buffer->mt_persistent_map = map_range; ++ ++ // Discard handler on CSMT thread is responsible for returning the ++ // currently used buffer to the free pool, along with the fence that ++ // must be called before the buffer can be reused. ++ wined3d_cs_emit_discard_buffer(resource->device->cs, buffer, map_range); ++ return WINED3D_OK; ++ } ++ else if (flags & WINED3D_MAP_NOOVERWRITE) ++ { ++ // Allow immediate access for persistent buffers without a fence. ++ // Always use the latest buffer in this case in case the latest ++ // DISCARDed one hasn't reached the command stream yet. ++ struct wined3d_map_range map_range = buffer->mt_persistent_map; ++ map_desc->data = buffer->buffer_heap->map_ptr + map_range.offset + offset; ++ resource->map_count++; ++ return WINED3D_OK; ++ } ++ else ++ { ++ // TODO(acomminos): Should check mapped ranges to see if the region is writeable even though NOOVERWRITE is specified. ++ WARN_(d3d_perf)("Mapping persistent buffer %p in sync with CS thread.\n", buffer); ++ // XXX(acomminos): kill this early return. they're the worst. ++ } ++ } ++ ++ return E_NOTIMPL; ++} ++ ++static HRESULT buffer_resource_sub_resource_map_cs(struct wined3d_resource *resource, unsigned int sub_resource_idx, ++ struct wined3d_map_desc *map_desc, const struct wined3d_box *box, DWORD flags) + { + struct wined3d_buffer *buffer = buffer_from_resource(resource); + UINT offset, size; +@@ -1316,6 +1489,18 @@ static HRESULT buffer_resource_sub_resource_map_info(struct wined3d_resource *re + } + + static HRESULT buffer_resource_sub_resource_unmap(struct wined3d_resource *resource, unsigned int sub_resource_idx) ++{ ++ struct wined3d_buffer *buffer = buffer_from_resource(resource); ++ if (buffer->locations & WINED3D_LOCATION_PERSISTENT_MAP) ++ { ++ // Nothing to be done to unmap a region of a persistent buffer. ++ resource->map_count--; ++ return WINED3D_OK; ++ } ++ return E_NOTIMPL; ++} ++ ++static HRESULT buffer_resource_sub_resource_unmap_cs(struct wined3d_resource *resource, unsigned int sub_resource_idx) + { + if (sub_resource_idx) + { +@@ -1334,8 +1519,10 @@ static const struct wined3d_resource_ops buffer_resource_ops = + buffer_resource_preload, + buffer_unload, + buffer_resource_sub_resource_map, ++ buffer_resource_sub_resource_map_cs, + buffer_resource_sub_resource_map_info, + buffer_resource_sub_resource_unmap, ++ buffer_resource_sub_resource_unmap_cs, + }; + + static GLenum buffer_type_hint_from_bind_flags(const struct wined3d_gl_info *gl_info, +@@ -1411,12 +1598,32 @@ static HRESULT buffer_init(struct wined3d_buffer *buffer, struct wined3d_device + buffer->flags |= WINED3D_BUFFER_PIN_SYSMEM; + } + ++ // FIXME(acomminos) ++ if (buffer->resource.usage & WINED3DUSAGE_DYNAMIC) ++ { ++ // FIXME(acomminos): why is this returning false? ++ if (FALSE && !gl_info->supported[ARB_BUFFER_STORAGE]) ++ { ++ WARN_(d3d_perf)("Not creating a persistent mapping for a dynamic buffer because ARB_buffer_storage is unsupported.\n"); ++ } ++ else ++ { ++ // If supported, use persistent mapped buffers instead of a ++ // standalone BO for dynamic buffers. ++ buffer->flags |= WINED3D_BUFFER_PERSISTENT; ++ } ++ } ++ + /* Observations show that draw_primitive_immediate_mode() is faster on + * dynamic vertex buffers than converting + draw_primitive_arrays(). + * (Half-Life 2 and others.) */ + dynamic_buffer_ok = gl_info->supported[APPLE_FLUSH_BUFFER_RANGE] || gl_info->supported[ARB_MAP_BUFFER_RANGE]; + +- if (!gl_info->supported[ARB_VERTEX_BUFFER_OBJECT]) ++ if (buffer->flags & WINED3D_BUFFER_PERSISTENT) ++ { ++ TRACE("Not creating a BO because a persistent mapped buffer will be used.\n"); ++ } ++ else if (!gl_info->supported[ARB_VERTEX_BUFFER_OBJECT]) + { + TRACE("Not creating a BO because GL_ARB_vertex_buffer is not supported.\n"); + } +diff --git a/dlls/wined3d/buffer_heap.c b/dlls/wined3d/buffer_heap.c +index 900e2d24bb..f24fddffb4 100644 +--- a/dlls/wined3d/buffer_heap.c ++++ b/dlls/wined3d/buffer_heap.c +@@ -114,14 +114,11 @@ HRESULT wined3d_buffer_heap_create(struct wined3d_context *context, GLsizeiptr s + { + access_flags |= GL_MAP_READ_BIT; + } +- storage_flags = access_flags; // XXX(acomminos): will we need dynamic storage? ++ storage_flags = access_flags; + + // TODO(acomminos): where should we be checking for errors here? +- +- // TODO(acomminos): assert from CS thread? + GL_EXTCALL(glGenBuffers(1, &object->buffer_object)); + +- // XXX(acomminos): use glNamedBufferStorage? + context_bind_bo(context, buffer_target, object->buffer_object); + + // TODO(acomminos): assert glBufferStorage supported? +@@ -129,7 +126,6 @@ HRESULT wined3d_buffer_heap_create(struct wined3d_context *context, GLsizeiptr s + + if (!(object->map_ptr = GL_EXTCALL(glMapBufferRange(buffer_target, 0, size, access_flags)))) + { +- // TODO(acomminos): include error message + ERR("Couldn't map persistent buffer.\n"); + return -1; // FIXME(acomminos): proper error code, cleanup + } +@@ -147,7 +143,7 @@ HRESULT wined3d_buffer_heap_create(struct wined3d_context *context, GLsizeiptr s + /* Context activation is done by the caller. */ + HRESULT wined3d_buffer_heap_destroy(struct wined3d_buffer_heap *heap, struct wined3d_context *context) + { +- // TODO ++ FIXME("Unimplemented, leaking buffer"); + return WINED3D_OK; + } + +diff --git a/dlls/wined3d/cs.c b/dlls/wined3d/cs.c +index 7e72b30933..edcf521b72 100644 +--- a/dlls/wined3d/cs.c ++++ b/dlls/wined3d/cs.c +@@ -73,6 +73,7 @@ enum wined3d_cs_op + WINED3D_CS_OP_COPY_UAV_COUNTER, + WINED3D_CS_OP_COPY_SUB_RESOURCE, + WINED3D_CS_OP_GENERATE_MIPS, ++ WINED3D_CS_OP_DISCARD_BUFFER, + WINED3D_CS_OP_STOP, + }; + +@@ -444,6 +445,13 @@ struct wined3d_cs_generate_mips + struct wined3d_shader_resource_view *view; + }; + ++struct wined3d_cs_discard_buffer ++{ ++ enum wined3d_cs_op opcode; ++ struct wined3d_buffer *buffer; ++ struct wined3d_map_range map_range; ++}; ++ + struct wined3d_cs_stop + { + enum wined3d_cs_op opcode; +@@ -1986,7 +1994,7 @@ static void wined3d_cs_exec_map(struct wined3d_cs *cs, const void *data) + const struct wined3d_cs_map *op = data; + struct wined3d_resource *resource = op->resource; + +- *op->hr = resource->resource_ops->resource_sub_resource_map(resource, ++ *op->hr = resource->resource_ops->resource_sub_resource_map_cs(resource, + op->sub_resource_idx, op->map_desc, op->box, op->flags); + } + +@@ -2020,7 +2028,7 @@ static void wined3d_cs_exec_unmap(struct wined3d_cs *cs, const void *data) + const struct wined3d_cs_unmap *op = data; + struct wined3d_resource *resource = op->resource; + +- *op->hr = resource->resource_ops->resource_sub_resource_unmap(resource, op->sub_resource_idx); ++ *op->hr = resource->resource_ops->resource_sub_resource_unmap_cs(resource, op->sub_resource_idx); + } + + HRESULT wined3d_cs_unmap(struct wined3d_cs *cs, struct wined3d_resource *resource, unsigned int sub_resource_idx) +@@ -2630,6 +2638,55 @@ void wined3d_cs_emit_generate_mips(struct wined3d_cs *cs, struct wined3d_shader_ + cs->ops->submit(cs, WINED3D_CS_QUEUE_DEFAULT); + } + ++static void wined3d_cs_exec_discard_buffer(struct wined3d_cs *cs, const void *data) ++{ ++ const struct wined3d_cs_discard_buffer *op = data; ++ struct wined3d_buffer *buffer = op->buffer; ++ HRESULT hr; ++ struct wined3d_fence *fence; ++ ++ // Poll for discarded buffers whose fenced have been triggered here to avoid ++ // excessive VRAM consumption. ++ wined3d_buffer_heap_cs_poll_fences(buffer->buffer_heap, cs->device); ++ ++ // TODO(acomminos): should call into buffer.c here instead. ++ // XXX(acomminos): should we always create a new fence here? ++ if (!FAILED(hr = wined3d_fence_create(cs->device, &fence))) ++ { ++ // TODO(acomminos): make more informed fences based on prior info. for now, ++ // we do this because allocating and deleting fences repeatedly is brutal ++ // for performance. look into why. ++ wined3d_fence_issue(fence, cs->device); ++ ++ wined3d_buffer_heap_free_fenced(buffer->buffer_heap, buffer->cs_persistent_map, fence); ++ } ++ else ++ { ++ ERR("Failed to create fence for discarded buffer %p, hr %x\n. Freeing anyway.", buffer, hr); ++ wined3d_buffer_heap_free(buffer->buffer_heap, buffer->cs_persistent_map); ++ } ++ ++ buffer->cs_persistent_map = op->map_range; ++ ++ device_invalidate_state(cs->device, STATE_STREAMSRC); ++ ++ wined3d_resource_release(&op->buffer->resource); ++} ++ ++void wined3d_cs_emit_discard_buffer(struct wined3d_cs *cs, struct wined3d_buffer *buffer, struct wined3d_map_range map_range) ++{ ++ struct wined3d_cs_discard_buffer *op; ++ ++ op = cs->ops->require_space(cs, sizeof(*op), WINED3D_CS_QUEUE_DEFAULT); ++ op->opcode = WINED3D_CS_OP_DISCARD_BUFFER; ++ op->buffer = buffer; ++ op->map_range = map_range; ++ ++ wined3d_resource_acquire(&buffer->resource); ++ ++ cs->ops->submit(cs, WINED3D_CS_QUEUE_DEFAULT); ++} ++ + static void wined3d_cs_emit_stop(struct wined3d_cs *cs) + { + struct wined3d_cs_stop *op; +@@ -2690,6 +2747,7 @@ static void (* const wined3d_cs_op_handlers[])(struct wined3d_cs *cs, const void + /* WINED3D_CS_OP_COPY_UAV_COUNTER */ wined3d_cs_exec_copy_uav_counter, + /* WINED3D_CS_OP_COPY_SUB_RESOURCE */ wined3d_cs_exec_copy_sub_resource, + /* WINED3D_CS_OP_GENERATE_MIPS */ wined3d_cs_exec_generate_mips, ++ /* WINED3D_CS_OP_DISCARD_BUFFER */ wined3d_cs_exec_discard_buffer, + }; + + #if defined(STAGING_CSMT) +diff --git a/dlls/wined3d/drawprim.c b/dlls/wined3d/drawprim.c +index 404623c9ac..7b622c9b14 100644 +--- a/dlls/wined3d/drawprim.c ++++ b/dlls/wined3d/drawprim.c +@@ -688,7 +688,12 @@ void draw_primitive(struct wined3d_device *device, const struct wined3d_state *s + if (parameters->indexed) + { + struct wined3d_buffer *index_buffer = state->index_buffer; +- if (!index_buffer->buffer_object || !stream_info->all_vbo) ++ if (index_buffer->locations & WINED3D_LOCATION_PERSISTENT_MAP) ++ { ++ idx_data = index_buffer->cs_persistent_map.offset; ++ ib_fence = index_buffer->fence; // FIXME(acomminos): use this fence or not? ++ } ++ else if (!index_buffer->buffer_object || !stream_info->all_vbo) + { + idx_data = index_buffer->resource.heap_memory; + } +diff --git a/dlls/wined3d/query.c b/dlls/wined3d/query.c +index f394af87c7..cf665bfd11 100644 +--- a/dlls/wined3d/query.c ++++ b/dlls/wined3d/query.c +@@ -88,7 +88,7 @@ static BOOL wined3d_fence_supported(const struct wined3d_gl_info *gl_info) + return gl_info->supported[ARB_SYNC] || gl_info->supported[NV_FENCE] || gl_info->supported[APPLE_FENCE]; + } + +-static enum wined3d_fence_result wined3d_fence_test(const struct wined3d_fence *fence, ++enum wined3d_fence_result wined3d_fence_test(const struct wined3d_fence *fence, + const struct wined3d_device *device, DWORD flags) + { + const struct wined3d_gl_info *gl_info; +diff --git a/dlls/wined3d/resource.c b/dlls/wined3d/resource.c +index 78deb5078b..9b3a303b08 100644 +--- a/dlls/wined3d/resource.c ++++ b/dlls/wined3d/resource.c +@@ -358,13 +358,18 @@ static DWORD wined3d_resource_sanitise_map_flags(const struct wined3d_resource * + HRESULT CDECL wined3d_resource_map(struct wined3d_resource *resource, unsigned int sub_resource_idx, + struct wined3d_map_desc *map_desc, const struct wined3d_box *box, DWORD flags) + { ++ HRESULT hr; + TRACE("resource %p, sub_resource_idx %u, map_desc %p, box %s, flags %#x.\n", + resource, sub_resource_idx, map_desc, debug_box(box), flags); + + flags = wined3d_resource_sanitise_map_flags(resource, flags); +- wined3d_resource_wait_idle(resource); +- +- return wined3d_cs_map(resource->device->cs, resource, sub_resource_idx, map_desc, box, flags); ++ if (FAILED(hr = resource->resource_ops->resource_sub_resource_map(resource, sub_resource_idx, map_desc, box, flags))) ++ { ++ TRACE_(d3d_perf)("Mapping resource %p on the command stream.\n", resource); ++ wined3d_resource_wait_idle(resource); ++ hr = wined3d_cs_map(resource->device->cs, resource, sub_resource_idx, map_desc, box, flags); ++ } ++ return hr; + } + + HRESULT CDECL wined3d_resource_map_info(struct wined3d_resource *resource, unsigned int sub_resource_idx, +@@ -377,9 +382,16 @@ HRESULT CDECL wined3d_resource_map_info(struct wined3d_resource *resource, unsig + + HRESULT CDECL wined3d_resource_unmap(struct wined3d_resource *resource, unsigned int sub_resource_idx) + { ++ HRESULT hr; + TRACE("resource %p, sub_resource_idx %u.\n", resource, sub_resource_idx); + +- return wined3d_cs_unmap(resource->device->cs, resource, sub_resource_idx); ++ if (FAILED(hr = resource->resource_ops->resource_sub_resource_unmap(resource, sub_resource_idx))) ++ { ++ TRACE_(d3d_perf)("Unmapping resource %p on the command stream.\n", resource); ++ hr = wined3d_cs_unmap(resource->device->cs, resource, sub_resource_idx); ++ } ++ return hr; ++ + } + + UINT CDECL wined3d_resource_update_info(struct wined3d_resource *resource, unsigned int sub_resource_idx, +diff --git a/dlls/wined3d/state.c b/dlls/wined3d/state.c +index f5b9eca520..142a932d07 100644 +--- a/dlls/wined3d/state.c ++++ b/dlls/wined3d/state.c +@@ -4910,7 +4910,11 @@ static void indexbuffer(struct wined3d_context *context, const struct wined3d_st + else + { + struct wined3d_buffer *ib = state->index_buffer; +- GL_EXTCALL(glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, ib->buffer_object)); ++ // FIXME(acomminos): disasterous. ++ if (ib->locations & WINED3D_LOCATION_PERSISTENT_MAP) ++ GL_EXTCALL(glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, ib->buffer_heap->buffer_object)); ++ else ++ GL_EXTCALL(glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, ib->buffer_object)); + } + } + +diff --git a/dlls/wined3d/texture.c b/dlls/wined3d/texture.c +index 69565f355d..51c37762cd 100644 +--- a/dlls/wined3d/texture.c ++++ b/dlls/wined3d/texture.c +@@ -2297,6 +2297,12 @@ static void wined3d_texture_unload(struct wined3d_resource *resource) + + static HRESULT texture_resource_sub_resource_map(struct wined3d_resource *resource, unsigned int sub_resource_idx, + struct wined3d_map_desc *map_desc, const struct wined3d_box *box, DWORD flags) ++{ ++ return E_NOTIMPL; ++} ++ ++static HRESULT texture_resource_sub_resource_map_cs(struct wined3d_resource *resource, unsigned int sub_resource_idx, ++ struct wined3d_map_desc *map_desc, const struct wined3d_box *box, DWORD flags) + { + const struct wined3d_format *format = resource->format; + struct wined3d_texture_sub_resource *sub_resource; +@@ -2464,6 +2470,11 @@ static HRESULT texture_resource_sub_resource_map_info(struct wined3d_resource *r + } + + static HRESULT texture_resource_sub_resource_unmap(struct wined3d_resource *resource, unsigned int sub_resource_idx) ++{ ++ return E_NOTIMPL; ++} ++ ++static HRESULT texture_resource_sub_resource_unmap_cs(struct wined3d_resource *resource, unsigned int sub_resource_idx) + { + struct wined3d_texture_sub_resource *sub_resource; + struct wined3d_device *device = resource->device; +@@ -2514,8 +2525,10 @@ static const struct wined3d_resource_ops texture_resource_ops = + texture_resource_preload, + wined3d_texture_unload, + texture_resource_sub_resource_map, ++ texture_resource_sub_resource_map_cs, + texture_resource_sub_resource_map_info, + texture_resource_sub_resource_unmap, ++ texture_resource_sub_resource_unmap_cs, + }; + + static HRESULT texture1d_init(struct wined3d_texture *texture, const struct wined3d_resource_desc *desc, +diff --git a/dlls/wined3d/utils.c b/dlls/wined3d/utils.c +index ee519d2b32..25626749fa 100644 +--- a/dlls/wined3d/utils.c ++++ b/dlls/wined3d/utils.c +@@ -6264,6 +6264,7 @@ const char *wined3d_debug_location(DWORD location) + LOCATION_TO_STR(WINED3D_LOCATION_DRAWABLE); + LOCATION_TO_STR(WINED3D_LOCATION_RB_MULTISAMPLE); + LOCATION_TO_STR(WINED3D_LOCATION_RB_RESOLVED); ++ LOCATION_TO_STR(WINED3D_LOCATION_PERSISTENT_MAP); + #undef LOCATION_TO_STR + if (location) FIXME("Unrecognized location flag(s) %#x.\n", location); + +diff --git a/dlls/wined3d/wined3d_private.h b/dlls/wined3d/wined3d_private.h +index 96bda81eb9..d049d57206 100644 +--- a/dlls/wined3d/wined3d_private.h ++++ b/dlls/wined3d/wined3d_private.h +@@ -1701,6 +1701,9 @@ void wined3d_fence_destroy(struct wined3d_fence *fence) DECLSPEC_HIDDEN; + void wined3d_fence_issue(struct wined3d_fence *fence, const struct wined3d_device *device) DECLSPEC_HIDDEN; + enum wined3d_fence_result wined3d_fence_wait(const struct wined3d_fence *fence, + const struct wined3d_device *device) DECLSPEC_HIDDEN; ++// XXX(acomminos): really expose this? ++enum wined3d_fence_result wined3d_fence_test(const struct wined3d_fence *fence, ++ const struct wined3d_device *device, DWORD flags) DECLSPEC_HIDDEN; + + /* Direct3D terminology with little modifications. We do not have an issued + * state because only the driver knows about it, but we have a created state +@@ -3009,9 +3012,12 @@ struct wined3d_resource_ops + void (*resource_unload)(struct wined3d_resource *resource); + HRESULT (*resource_sub_resource_map)(struct wined3d_resource *resource, unsigned int sub_resource_idx, + struct wined3d_map_desc *map_desc, const struct wined3d_box *box, DWORD flags); ++ HRESULT (*resource_sub_resource_map_cs)(struct wined3d_resource *resource, unsigned int sub_resource_idx, ++ struct wined3d_map_desc *map_desc, const struct wined3d_box *box, DWORD flags); + HRESULT (*resource_map_info)(struct wined3d_resource *resource, unsigned int sub_resource_idx, + struct wined3d_map_info *info, DWORD flags); + HRESULT (*resource_sub_resource_unmap)(struct wined3d_resource *resource, unsigned int sub_resource_idx); ++ HRESULT (*resource_sub_resource_unmap_cs)(struct wined3d_resource *resource, unsigned int sub_resource_idx); + }; + + struct wined3d_resource +@@ -3266,6 +3272,7 @@ void wined3d_texture_validate_location(struct wined3d_texture *texture, + #define WINED3D_LOCATION_DRAWABLE 0x00000040 + #define WINED3D_LOCATION_RB_MULTISAMPLE 0x00000080 + #define WINED3D_LOCATION_RB_RESOLVED 0x00000100 ++#define WINED3D_LOCATION_PERSISTENT_MAP 0x00000200 + + const char *wined3d_debug_location(DWORD location) DECLSPEC_HIDDEN; + +@@ -3622,6 +3629,7 @@ void wined3d_cs_emit_copy_sub_resource(struct wined3d_cs *cs, struct wined3d_res + unsigned int dst_sub_resource_idx, const struct wined3d_box *dst_box, struct wined3d_resource *src_resource, + unsigned int src_sub_resource_idx, const struct wined3d_box *src_box) DECLSPEC_HIDDEN; + void wined3d_cs_emit_generate_mips(struct wined3d_cs *cs, struct wined3d_shader_resource_view *view) DECLSPEC_HIDDEN; ++void wined3d_cs_emit_discard_buffer(struct wined3d_cs *cs, struct wined3d_buffer *buffer, struct wined3d_map_range map_range) DECLSPEC_HIDDEN; + void wined3d_cs_init_object(struct wined3d_cs *cs, + void (*callback)(void *object), void *object) DECLSPEC_HIDDEN; + HRESULT wined3d_cs_map(struct wined3d_cs *cs, struct wined3d_resource *resource, unsigned int sub_resource_idx, +@@ -3712,6 +3720,11 @@ struct wined3d_buffer + UINT stride; /* 0 if no conversion */ + enum wined3d_buffer_conversion_type *conversion_map; /* NULL if no conversion */ + UINT conversion_stride; /* 0 if no shifted conversion */ ++ ++ /* persistent mapped buffer */ ++ struct wined3d_buffer_heap *buffer_heap; ++ struct wined3d_map_range cs_persistent_map; ++ struct wined3d_map_range mt_persistent_map; // TODO: make struct list? + }; + + static inline struct wined3d_buffer *buffer_from_resource(struct wined3d_resource *resource) +-- +2.16.2 + diff --git a/0004-wined3d-Implement-aligned-persistent-heaps-for-persi.patch b/0004-wined3d-Implement-aligned-persistent-heaps-for-persi.patch new file mode 100644 index 000000000000..7b4a9e433488 --- /dev/null +++ b/0004-wined3d-Implement-aligned-persistent-heaps-for-persi.patch @@ -0,0 +1,240 @@ +From 65595c191d2a01b2486ba10618f743c930af362b Mon Sep 17 00:00:00 2001 +From: Andrew Comminos <andrew@comminos.com> +Date: Sat, 24 Feb 2018 14:38:59 -0800 +Subject: [PATCH 4/8] wined3d: Implement aligned persistent heaps for + persistently mapped UBOs. + +--- + dlls/wined3d/buffer.c | 16 ++++++++++++---- + dlls/wined3d/buffer_heap.c | 12 +++++++++--- + dlls/wined3d/cs.c | 15 ++++++++++++++- + dlls/wined3d/device.c | 15 ++++++++++++++- + dlls/wined3d/state.c | 11 ++++++++++- + dlls/wined3d/wined3d_private.h | 5 ++++- + 6 files changed, 63 insertions(+), 11 deletions(-) + +diff --git a/dlls/wined3d/buffer.c b/dlls/wined3d/buffer.c +index ccb090c907..7d7e506817 100644 +--- a/dlls/wined3d/buffer.c ++++ b/dlls/wined3d/buffer.c +@@ -279,14 +279,16 @@ static BOOL buffer_alloc_persistent_map(struct wined3d_buffer *buffer, struct wi + struct wined3d_map_range map_range; + HRESULT hr; + +- if (buffer->resource.usage & WINED3DUSAGE_WRITEONLY) ++ if (buffer->bind_flags & WINED3D_BIND_CONSTANT_BUFFER) + { +- heap = device->wo_buffer_heap; ++ // Use a heap aligned to constant buffer offset requirements. ++ heap = device->cb_buffer_heap; + } + else + { +- FIXME("Using write-only heap for a persistent buffer without WINED3DUSAGE_WRITEONLY.\n"); +- heap = device->rw_buffer_heap; ++ if (!(buffer->resource.usage & WINED3DUSAGE_WRITEONLY)) ++ FIXME("Using a write-only persistent buffer for %p without WINED3DUSAGE_WRITEONLY.\n", buffer); ++ heap = device->wo_buffer_heap; + } + + buffer->buffer_heap = heap; +@@ -791,6 +793,7 @@ DWORD wined3d_buffer_get_memory(struct wined3d_buffer *buffer, + { + data->buffer_object = buffer->buffer_object; + data->addr = NULL; ++ data->length = buffer->resource.size; + return WINED3D_LOCATION_BUFFER; + } + if (locations & WINED3D_LOCATION_PERSISTENT_MAP) +@@ -798,12 +801,17 @@ DWORD wined3d_buffer_get_memory(struct wined3d_buffer *buffer, + // FIXME(acomminos): should we expose a buffer object we don't wholly own here? + data->buffer_object = buffer->buffer_heap->buffer_object; + data->addr = buffer->cs_persistent_map.offset; ++ // Note that the size of the underlying buffer allocation may be larger ++ // than the buffer knows about. In this case, we've rounded it up to be ++ // aligned (e.g. for uniform buffer offsets). ++ data->length = buffer->cs_persistent_map.size; + return WINED3D_LOCATION_PERSISTENT_MAP; + } + if (locations & WINED3D_LOCATION_SYSMEM) + { + data->buffer_object = 0; + data->addr = buffer->resource.heap_memory; ++ data->length = buffer->resource.size; + return WINED3D_LOCATION_SYSMEM; + } + +diff --git a/dlls/wined3d/buffer_heap.c b/dlls/wined3d/buffer_heap.c +index f24fddffb4..02b925b658 100644 +--- a/dlls/wined3d/buffer_heap.c ++++ b/dlls/wined3d/buffer_heap.c +@@ -95,7 +95,7 @@ static void element_merge_adjacent(struct wined3d_buffer_heap *heap, struct wine + } + + /* Context activation is done by the caller. */ +-HRESULT wined3d_buffer_heap_create(struct wined3d_context *context, GLsizeiptr size, BOOL write_only, struct wined3d_buffer_heap **buffer_heap) ++HRESULT wined3d_buffer_heap_create(struct wined3d_context *context, GLsizeiptr size, GLsizeiptr alignment, BOOL write_only, struct wined3d_buffer_heap **buffer_heap) + { + const struct wined3d_gl_info *gl_info = context->gl_info; + const GLenum buffer_target = GL_ARRAY_BUFFER; +@@ -133,6 +133,7 @@ HRESULT wined3d_buffer_heap_create(struct wined3d_context *context, GLsizeiptr s + + object->free_list_head = element_new(0, size); + object->fenced_head = object->fenced_tail = NULL; ++ object->alignment = alignment; + InitializeCriticalSection(&object->temp_lock); + + *buffer_heap = object; +@@ -147,12 +148,17 @@ HRESULT wined3d_buffer_heap_destroy(struct wined3d_buffer_heap *heap, struct win + return WINED3D_OK; + } + +-HRESULT wined3d_buffer_heap_alloc(struct wined3d_buffer_heap *heap, GLsizeiptr size, struct wined3d_map_range *out_range) { ++HRESULT wined3d_buffer_heap_alloc(struct wined3d_buffer_heap *heap, GLsizeiptr size, struct wined3d_map_range *out_range) ++{ + EnterCriticalSection(&heap->temp_lock); + + // TODO(acomminos): free list binning? + struct wined3d_buffer_heap_element *elem = heap->free_list_head; +- // XXX(acomminos): Avoid fragmentation by rounding to nearest power of two. ++ ++ // Round up the size to a multiple of the heap's alignment. ++ if (heap->alignment) ++ size += heap->alignment - (size % heap->alignment); ++ + while (elem != NULL) + { + TRACE("allocation at %p, size %lld\n", heap->map_ptr + elem->range.offset, elem->range.size); +diff --git a/dlls/wined3d/cs.c b/dlls/wined3d/cs.c +index edcf521b72..d7bdc21a25 100644 +--- a/dlls/wined3d/cs.c ++++ b/dlls/wined3d/cs.c +@@ -2668,7 +2668,20 @@ static void wined3d_cs_exec_discard_buffer(struct wined3d_cs *cs, const void *da + + buffer->cs_persistent_map = op->map_range; + +- device_invalidate_state(cs->device, STATE_STREAMSRC); ++ // TODO(acomminos): merge this logic with buffer.c functions for standalone BOs ++ if (buffer->bind_flags & WINED3D_BIND_VERTEX_BUFFER) ++ device_invalidate_state(cs->device, STATE_STREAMSRC); ++ if (buffer->bind_flags & WINED3D_BIND_INDEX_BUFFER) ++ device_invalidate_state(cs->device, STATE_INDEXBUFFER); ++ if (buffer->bind_flags & WINED3D_BIND_CONSTANT_BUFFER) ++ { ++ device_invalidate_state(cs->device, STATE_CONSTANT_BUFFER(WINED3D_SHADER_TYPE_VERTEX)); ++ device_invalidate_state(cs->device, STATE_CONSTANT_BUFFER(WINED3D_SHADER_TYPE_HULL)); ++ device_invalidate_state(cs->device, STATE_CONSTANT_BUFFER(WINED3D_SHADER_TYPE_DOMAIN)); ++ device_invalidate_state(cs->device, STATE_CONSTANT_BUFFER(WINED3D_SHADER_TYPE_GEOMETRY)); ++ device_invalidate_state(cs->device, STATE_CONSTANT_BUFFER(WINED3D_SHADER_TYPE_PIXEL)); ++ device_invalidate_state(cs->device, STATE_CONSTANT_BUFFER(WINED3D_SHADER_TYPE_COMPUTE)); ++ } + + wined3d_resource_release(&op->buffer->resource); + } +diff --git a/dlls/wined3d/device.c b/dlls/wined3d/device.c +index 363dcb17f0..e0871d1636 100644 +--- a/dlls/wined3d/device.c ++++ b/dlls/wined3d/device.c +@@ -853,12 +853,22 @@ static void create_buffer_heap(struct wined3d_device *device, struct wined3d_con + // TODO(acomminos): definitely don't take up all of vram. this is gonna get + // paged anyway, though. + const GLsizeiptr HBO_SIZE = device->adapter->vram_bytes / 4; ++ const struct wined3d_gl_info *gl_info = &device->adapter->gl_info; ++ ++ GLint ub_alignment; ++ gl_info->gl_ops.gl.p_glGetIntegerv(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT, &ub_alignment); + + HRESULT hr; +- if (FAILED(hr = wined3d_buffer_heap_create(context, HBO_SIZE, TRUE, &device->wo_buffer_heap))) ++ if (FAILED(hr = wined3d_buffer_heap_create(context, HBO_SIZE, 0, TRUE, &device->wo_buffer_heap))) + { + ERR("Failed to create write-only persistent buffer heap, hr %#x.\n", hr); + } ++ ++ // TODO(acomminos): can likely use a way smaller heap for CBs by querying limits ++ if (FAILED(hr = wined3d_buffer_heap_create(context, HBO_SIZE, ub_alignment, TRUE, &device->cb_buffer_heap))) ++ { ++ ERR("Failed to create persistent buffer heap for constant buffers, hr %#x.\n", hr); ++ } + } + + /* Context activation is done by the caller. */ +@@ -866,6 +876,9 @@ static void destroy_buffer_heap(struct wined3d_device *device, struct wined3d_co + { + if (device->wo_buffer_heap) + wined3d_buffer_heap_destroy(device->wo_buffer_heap, context); ++ ++ if (device->cb_buffer_heap) ++ wined3d_buffer_heap_destroy(device->cb_buffer_heap, context); + } + + static LONG fullscreen_style(LONG style) +diff --git a/dlls/wined3d/state.c b/dlls/wined3d/state.c +index 142a932d07..ce007d1a8e 100644 +--- a/dlls/wined3d/state.c ++++ b/dlls/wined3d/state.c +@@ -4980,6 +4980,7 @@ static void state_cb(struct wined3d_context *context, const struct wined3d_state + enum wined3d_shader_type shader_type; + struct wined3d_buffer *buffer; + unsigned int i, base, count; ++ struct wined3d_bo_address bo_addr; + + TRACE("context %p, state %p, state_id %#x.\n", context, state, state_id); + +@@ -4992,7 +4993,15 @@ static void state_cb(struct wined3d_context *context, const struct wined3d_state + for (i = 0; i < count; ++i) + { + buffer = state->cb[shader_type][i]; +- GL_EXTCALL(glBindBufferBase(GL_UNIFORM_BUFFER, base + i, buffer ? buffer->buffer_object : 0)); ++ if (buffer) ++ { ++ wined3d_buffer_get_memory(buffer, &bo_addr, buffer->locations); ++ GL_EXTCALL(glBindBufferRange(GL_UNIFORM_BUFFER, base + i, bo_addr.buffer_object, bo_addr.addr, bo_addr.length)); ++ } ++ else ++ { ++ GL_EXTCALL(glBindBufferBase(GL_UNIFORM_BUFFER, base + i, 0)); ++ } + } + checkGLcall("bind constant buffers"); + } +diff --git a/dlls/wined3d/wined3d_private.h b/dlls/wined3d/wined3d_private.h +index d049d57206..cfa48a5f3e 100644 +--- a/dlls/wined3d/wined3d_private.h ++++ b/dlls/wined3d/wined3d_private.h +@@ -1462,6 +1462,7 @@ struct wined3d_bo_address + { + GLuint buffer_object; + BYTE *addr; ++ GLsizeiptr length; + }; + + struct wined3d_const_bo_address +@@ -2972,6 +2973,7 @@ struct wined3d_device + + /* Dynamic buffer heap */ + struct wined3d_buffer_heap *wo_buffer_heap; ++ struct wined3d_buffer_heap *cb_buffer_heap; + }; + + void device_clear_render_targets(struct wined3d_device *device, UINT rt_count, const struct wined3d_fb_state *fb, +@@ -3671,6 +3673,7 @@ struct wined3d_buffer_heap + { + GLuint buffer_object; + void *map_ptr; ++ GLsizeiptr alignment; + CRITICAL_SECTION temp_lock; // Temporary lock while we implement the fenced free list. + + // TODO: add buckets for free regions of a given size. +@@ -3684,7 +3687,7 @@ struct wined3d_buffer_heap + struct wined3d_buffer_heap_fenced_element *fenced_tail; + }; + +-HRESULT wined3d_buffer_heap_create(struct wined3d_context *context, GLsizeiptr size, BOOL write_only, struct wined3d_buffer_heap **heap) DECLSPEC_HIDDEN; ++HRESULT wined3d_buffer_heap_create(struct wined3d_context *context, GLsizeiptr size, GLsizeiptr alignment, BOOL write_only, struct wined3d_buffer_heap **heap) DECLSPEC_HIDDEN; + HRESULT wined3d_buffer_heap_destroy(struct wined3d_buffer_heap *heap, struct wined3d_context *context) DECLSPEC_HIDDEN; + // Fetches a buffer from the heap of at least the given size. + HRESULT wined3d_buffer_heap_alloc(struct wined3d_buffer_heap *heap, GLsizeiptr size, struct wined3d_map_range* out_range) DECLSPEC_HIDDEN; +-- +2.16.2 + diff --git a/0005-wined3d-Experimental-buffer-heap-fence-batching-uppe.patch b/0005-wined3d-Experimental-buffer-heap-fence-batching-uppe.patch new file mode 100644 index 000000000000..cac70eac997d --- /dev/null +++ b/0005-wined3d-Experimental-buffer-heap-fence-batching-uppe.patch @@ -0,0 +1,455 @@ +From 7a630d56cb1bddfd03cae3cdc43aee949b04abe2 Mon Sep 17 00:00:00 2001 +From: Andrew Comminos <andrew@comminos.com> +Date: Mon, 26 Feb 2018 21:35:40 -0800 +Subject: [PATCH 5/8] wined3d: Experimental buffer heap fence batching, upper + bound on heap size. + +--- + dlls/wined3d/buffer.c | 11 +- + dlls/wined3d/buffer_heap.c | 232 ++++++++++++++++++++++++----------------- + dlls/wined3d/cs.c | 15 +-- + dlls/wined3d/device.c | 5 +- + dlls/wined3d/wined3d_private.h | 12 ++- + 5 files changed, 154 insertions(+), 121 deletions(-) + +diff --git a/dlls/wined3d/buffer.c b/dlls/wined3d/buffer.c +index 7d7e506817..fbec613c92 100644 +--- a/dlls/wined3d/buffer.c ++++ b/dlls/wined3d/buffer.c +@@ -992,9 +992,12 @@ void wined3d_buffer_load(struct wined3d_buffer *buffer, struct wined3d_context * + + if (buffer->flags & WINED3D_BUFFER_PERSISTENT) + { +- if (!wined3d_buffer_load_location(buffer, context, WINED3D_LOCATION_PERSISTENT_MAP)) +- ERR("Failed to preload persistent mapping.\n"); +- return; ++ if (wined3d_buffer_load_location(buffer, context, WINED3D_LOCATION_PERSISTENT_MAP)) ++ return; ++ ++ ERR("Failed to preload persistent mapping for %p, falling back to BO.\n", buffer); ++ buffer->flags |= WINED3D_BUFFER_USE_BO; ++ buffer->flags &= ~WINED3D_BUFFER_PERSISTENT; + } + + /* TODO: Make converting independent from VBOs */ +@@ -1417,7 +1420,7 @@ static HRESULT buffer_resource_sub_resource_map(struct wined3d_resource *resourc + struct wined3d_map_range map_range; + if (FAILED(hr = wined3d_buffer_heap_alloc(buffer->buffer_heap, resource->size, &map_range))) + { +- FIXME("Failed to allocate new buffer, falling back to sync path.\n"); ++ FIXME_(d3d_perf)("Failed to allocate new buffer, falling back to sync path.\n"); + return hr; + } + map_desc->data = buffer->buffer_heap->map_ptr + map_range.offset + offset; +diff --git a/dlls/wined3d/buffer_heap.c b/dlls/wined3d/buffer_heap.c +index 02b925b658..165a957edd 100644 +--- a/dlls/wined3d/buffer_heap.c ++++ b/dlls/wined3d/buffer_heap.c +@@ -22,6 +22,7 @@ + #include "wined3d_private.h" + + WINE_DEFAULT_DEBUG_CHANNEL(d3d); ++WINE_DECLARE_DEBUG_CHANNEL(d3d_perf); + + struct wined3d_buffer_heap_element + { +@@ -33,7 +34,7 @@ struct wined3d_buffer_heap_element + + struct wined3d_buffer_heap_fenced_element + { +- struct wined3d_map_range range; ++ struct wined3d_buffer_heap_element *ranges; + struct wined3d_fence *fence; + + struct wined3d_buffer_heap_element *next; +@@ -52,19 +53,19 @@ static struct wined3d_buffer_heap_element* element_new(GLintptr offset, GLsizeip + return elem; + } + +-static struct wined3d_buffer_heap_fenced_element* fenced_element_new(struct wined3d_map_range range, struct wined3d_fence* fence) ++static struct wined3d_buffer_heap_fenced_element* fenced_element_new(struct wined3d_buffer_heap_element *ranges, struct wined3d_fence* fence) + { + struct wined3d_buffer_heap_fenced_element* elem; + elem = HeapAlloc(GetProcessHeap(), 0, sizeof(struct wined3d_buffer_heap_fenced_element)); + if (!elem) + return NULL; +- elem->range = range; ++ elem->ranges = ranges; + elem->fence = fence; + elem->next = NULL; + return elem; + } + +-static void element_merge_adjacent(struct wined3d_buffer_heap *heap, struct wined3d_buffer_heap_element *elem) ++static void element_merge_adjacent(struct wined3d_buffer_heap_element **head, struct wined3d_buffer_heap_element *elem) + { + struct wined3d_buffer_heap_element *cur_prev = elem->prev; + struct wined3d_buffer_heap_element *cur_next = elem->next; +@@ -77,8 +78,8 @@ static void element_merge_adjacent(struct wined3d_buffer_heap *heap, struct wine + if (cur_prev->prev) + cur_prev->prev->next = elem; + +- if (cur_prev == heap->free_list_head) +- heap->free_list_head = elem; ++ if (cur_prev == *head) ++ *head = elem; + + HeapFree(GetProcessHeap(), 0, cur_prev); + } +@@ -94,6 +95,69 @@ static void element_merge_adjacent(struct wined3d_buffer_heap *heap, struct wine + } + } + ++// Inserts a range into the list starting at `elem`. ++// Updates the head of the list, if necessary. ++static void element_insert_range(struct wined3d_buffer_heap_element **head, struct wined3d_map_range range) ++{ ++ struct wined3d_buffer_heap_element *elem = *head; ++ struct wined3d_buffer_heap_element *new_elem; ++ struct wined3d_buffer_heap_element *last_elem = NULL; ++ ++ // Special case where the head doesn't exist. ++ if (!elem) ++ { ++ new_elem = element_new(range.offset, range.size); ++ *head = new_elem; ++ return; ++ } ++ ++ while (elem) ++ { ++ struct wined3d_map_range *erange = &elem->range; ++ if (range.offset + range.size == erange->offset) ++ { ++ // Left side merge ++ erange->offset = range.offset; ++ erange->size += range.size; ++ // Check if this causes a merge with elem->prev ++ element_merge_adjacent(head, elem); ++ return; ++ } ++ else if (erange->offset + erange->size == range.offset) ++ { ++ // Right side merge ++ erange->size += range.size; ++ // Check if this causes a merge with elem->prev ++ element_merge_adjacent(head, elem); ++ return; ++ } ++ else if (range.offset < erange->offset) ++ { ++ // Append to left, non-merge case. ++ new_elem = element_new(range.offset, range.size); ++ new_elem->prev = elem->prev; ++ new_elem->next = elem; ++ if (elem->prev) ++ { ++ elem->prev->next = new_elem; ++ } ++ if (*head == elem) ++ { ++ *head = new_elem; ++ } ++ elem->prev = new_elem; ++ return; ++ } ++ last_elem = elem; ++ elem = elem->next; ++ } ++ ++ // Larger offset than all other elements in the list, append to the end. ++ new_elem = element_new(range.offset, range.size); ++ new_elem->prev = last_elem; ++ last_elem->next = new_elem; ++} ++ + /* Context activation is done by the caller. */ + HRESULT wined3d_buffer_heap_create(struct wined3d_context *context, GLsizeiptr size, GLsizeiptr alignment, BOOL write_only, struct wined3d_buffer_heap **buffer_heap) + { +@@ -134,6 +198,9 @@ HRESULT wined3d_buffer_heap_create(struct wined3d_context *context, GLsizeiptr s + object->free_list_head = element_new(0, size); + object->fenced_head = object->fenced_tail = NULL; + object->alignment = alignment; ++ object->pending_fenced_bytes = 0; ++ object->pending_fenced_head = NULL; ++ object->pending_fenced_threshold_bytes = size / 4; // FIXME(acomminos): make this externally declared + InitializeCriticalSection(&object->temp_lock); + + *buffer_heap = object; +@@ -155,6 +222,9 @@ HRESULT wined3d_buffer_heap_alloc(struct wined3d_buffer_heap *heap, GLsizeiptr s + // TODO(acomminos): free list binning? + struct wined3d_buffer_heap_element *elem = heap->free_list_head; + ++ // Round to the nearest power of two to reduce fragmentation. ++ size = 1ULL << (int)ceil(log2(size)); ++ + // Round up the size to a multiple of the heap's alignment. + if (heap->alignment) + size += heap->alignment - (size % heap->alignment); +@@ -188,6 +258,7 @@ HRESULT wined3d_buffer_heap_alloc(struct wined3d_buffer_heap *heap, GLsizeiptr s + } + HeapFree(GetProcessHeap(), 0, elem); + } ++ + LeaveCriticalSection(&heap->temp_lock); + return WINED3D_OK; + } +@@ -201,87 +272,53 @@ HRESULT wined3d_buffer_heap_alloc(struct wined3d_buffer_heap *heap, GLsizeiptr s + HRESULT wined3d_buffer_heap_free(struct wined3d_buffer_heap *heap, struct wined3d_map_range range) + { + EnterCriticalSection(&heap->temp_lock); +- struct wined3d_buffer_heap_element *new_elem; +- struct wined3d_buffer_heap_element *elem = heap->free_list_head; +- struct wined3d_buffer_heap_element *last_elem = NULL; + +- // Special case where the head doesn't exist. +- if (!elem) +- { +- new_elem = element_new(range.offset, range.size); +- heap->free_list_head = new_elem; +- goto success; +- } ++ element_insert_range(&heap->free_list_head, range); + +- while (elem) ++ LeaveCriticalSection(&heap->temp_lock); ++ return WINED3D_OK; ++} ++ ++HRESULT wined3d_buffer_heap_free_fenced(struct wined3d_buffer_heap *heap, struct wined3d_device *device, struct wined3d_map_range range) ++{ ++ element_insert_range(&heap->pending_fenced_head, range); ++ ++ heap->pending_fenced_bytes += range.size; ++ if (heap->pending_fenced_bytes >= heap->pending_fenced_threshold_bytes) + { +- struct wined3d_map_range *erange = &elem->range; +- if (range.offset + range.size == erange->offset) ++ // TODO(acomminos): break this out into a separate function ++ struct wined3d_buffer_heap_fenced_element *fenced_elem; ++ struct wined3d_fence *fence; ++ HRESULT hr; ++ ++ if (FAILED(hr = wined3d_fence_create(device, &fence))) + { +- // Left side merge +- erange->offset = range.offset; +- erange->size += range.size; +- // Check if this causes a merge with elem->prev +- element_merge_adjacent(heap, elem); +- goto success; ++ ERR("Failed to create fence.\n"); ++ return hr; + } +- else if (erange->offset + erange->size == range.offset) ++ ++ fenced_elem = fenced_element_new(heap->pending_fenced_head, fence); ++ if (!fenced_elem) ++ return E_OUTOFMEMORY; ++ ++ TRACE_(d3d_perf)("Dispatching fenced buffer set.\n"); ++ heap->pending_fenced_bytes = 0; ++ heap->pending_fenced_head = NULL; ++ ++ // Append to end of fenced list, which works well if you assume that buffers ++ // are freed in some ascending draw call ordering. ++ if (!heap->fenced_head) + { +- // Right side merge +- erange->size += range.size; +- // Check if this causes a merge with elem->prev +- element_merge_adjacent(heap, elem); +- goto success; ++ heap->fenced_head = fenced_elem; ++ heap->fenced_tail = fenced_elem; + } +- else if (range.offset < erange->offset) ++ else + { +- // Append to left, non-merge case. +- new_elem = element_new(range.offset, range.size); +- new_elem->prev = elem->prev; +- new_elem->next = elem; +- if (elem->prev) +- { +- elem->prev->next = new_elem; +- } +- if (heap->free_list_head == elem) +- { +- heap->free_list_head = new_elem; +- } +- elem->prev = new_elem; +- goto success; ++ heap->fenced_tail->next = fenced_elem; ++ heap->fenced_tail = fenced_elem; + } +- last_elem = elem; +- elem = elem->next; +- } +- +- // Larger offset than all other elements in the list, append to the end. +- new_elem = element_new(range.offset, range.size); +- new_elem->prev = last_elem; +- last_elem->next = new_elem; +- +-success: +- LeaveCriticalSection(&heap->temp_lock); +- return WINED3D_OK; +-} +- +-HRESULT wined3d_buffer_heap_free_fenced(struct wined3d_buffer_heap *heap, struct wined3d_map_range range, struct wined3d_fence *fence) +-{ +- struct wined3d_buffer_heap_fenced_element *elem; +- elem = fenced_element_new(range, fence); +- if (!elem) +- return E_OUTOFMEMORY; + +- // Append to end of fenced list, which works well if you assume that buffers +- // are freed in some ascending draw call ordering. +- if (!heap->fenced_head) +- { +- heap->fenced_head = elem; +- heap->fenced_tail = elem; +- } +- else +- { +- heap->fenced_tail->next = elem; +- heap->fenced_tail = elem; ++ wined3d_fence_issue(fence, device); + } + + return WINED3D_OK; +@@ -294,29 +331,32 @@ HRESULT wined3d_buffer_heap_cs_poll_fences(struct wined3d_buffer_heap *heap, str + if (!elem) + return WINED3D_OK; + +- while (elem) ++ res = wined3d_fence_test(elem->fence, device, 0); ++ switch (res) + { +- res = wined3d_fence_test(elem->fence, device, 0); +- switch (res) +- { +- case WINED3D_FENCE_OK: +- case WINED3D_FENCE_NOT_STARTED: ++ case WINED3D_FENCE_OK: ++ case WINED3D_FENCE_NOT_STARTED: ++ { ++ TRACE_(d3d_perf)("Freed fence group.\n"); ++ struct wined3d_buffer_heap_element *range_elem = elem->ranges; ++ // FIXME(acomminos): this might take a while. incrementally do this? ++ while (range_elem) + { +- struct wined3d_buffer_heap_fenced_element *next = elem->next; +- +- wined3d_fence_destroy(elem->fence); +- wined3d_buffer_heap_free(heap, elem->range); ++ struct wined3d_buffer_heap_element *next = range_elem->next; ++ wined3d_buffer_heap_free(heap, range_elem->range); ++ HeapFree(GetProcessHeap(), 0, range_elem); ++ range_elem = next; ++ } + +- heap->fenced_head = elem->next; +- HeapFree(GetProcessHeap(), 0, elem); +- // TODO(acomminos): bother to null out fenced_tail? ++ wined3d_fence_destroy(elem->fence); + +- elem = next; +- break; +- } +- default: +- return WINED3D_OK; +- } ++ heap->fenced_head = elem->next; ++ HeapFree(GetProcessHeap(), 0, elem); ++ // TODO(acomminos): bother to null out fenced_tail? ++ break; ++ } ++ default: ++ return WINED3D_OK; + } + + return WINED3D_OK; +diff --git a/dlls/wined3d/cs.c b/dlls/wined3d/cs.c +index d7bdc21a25..bae5d9f4a1 100644 +--- a/dlls/wined3d/cs.c ++++ b/dlls/wined3d/cs.c +@@ -2643,26 +2643,15 @@ static void wined3d_cs_exec_discard_buffer(struct wined3d_cs *cs, const void *da + const struct wined3d_cs_discard_buffer *op = data; + struct wined3d_buffer *buffer = op->buffer; + HRESULT hr; +- struct wined3d_fence *fence; + + // Poll for discarded buffers whose fenced have been triggered here to avoid + // excessive VRAM consumption. + wined3d_buffer_heap_cs_poll_fences(buffer->buffer_heap, cs->device); + + // TODO(acomminos): should call into buffer.c here instead. +- // XXX(acomminos): should we always create a new fence here? +- if (!FAILED(hr = wined3d_fence_create(cs->device, &fence))) ++ if (FAILED(hr = wined3d_buffer_heap_free_fenced(buffer->buffer_heap, cs->device, buffer->cs_persistent_map))) + { +- // TODO(acomminos): make more informed fences based on prior info. for now, +- // we do this because allocating and deleting fences repeatedly is brutal +- // for performance. look into why. +- wined3d_fence_issue(fence, cs->device); +- +- wined3d_buffer_heap_free_fenced(buffer->buffer_heap, buffer->cs_persistent_map, fence); +- } +- else +- { +- ERR("Failed to create fence for discarded buffer %p, hr %x\n. Freeing anyway.", buffer, hr); ++ ERR("Failed to do a fenced free on discarded buffer %p, hr %x\n. Freeing anyway.", buffer, hr); + wined3d_buffer_heap_free(buffer->buffer_heap, buffer->cs_persistent_map); + } + +diff --git a/dlls/wined3d/device.c b/dlls/wined3d/device.c +index e0871d1636..bdab83b935 100644 +--- a/dlls/wined3d/device.c ++++ b/dlls/wined3d/device.c +@@ -850,9 +850,8 @@ static void create_buffer_heap(struct wined3d_device *device, struct wined3d_con + { + // TODO(acomminos): check if ARB_buffer_storage is supported, first- + // possibly make wined3d_buffer_heap_create fail. +- // TODO(acomminos): definitely don't take up all of vram. this is gonna get +- // paged anyway, though. +- const GLsizeiptr HBO_SIZE = device->adapter->vram_bytes / 4; ++ // TODO(acomminos): 512MB is sane for geometry, maybe not for PBO. ++ const GLsizeiptr HBO_SIZE = min(512000000, device->adapter->vram_bytes / 4); + const struct wined3d_gl_info *gl_info = &device->adapter->gl_info; + + GLint ub_alignment; +diff --git a/dlls/wined3d/wined3d_private.h b/dlls/wined3d/wined3d_private.h +index cfa48a5f3e..62433a39b1 100644 +--- a/dlls/wined3d/wined3d_private.h ++++ b/dlls/wined3d/wined3d_private.h +@@ -3679,10 +3679,12 @@ struct wined3d_buffer_heap + // TODO: add buckets for free regions of a given size. + struct wined3d_buffer_heap_element *free_list_head; + +- // store in FIFO order? that way, we can minimize our amount of time +- // waiting on fences? +- // XXX(acomminos): are fences guaranteed to be triggered in a serial +- // ordering? if so, we can early-terminate our polling ++ // Elements that need to be fenced, but haven't reached the required size. ++ struct wined3d_buffer_heap_element *pending_fenced_head; ++ GLsizeiptr pending_fenced_bytes; // Number of free bytes in the active fenced region. ++ GLsizeiptr pending_fenced_threshold_bytes; // Number of bytes required before fencing. ++ ++ // List of sets of buffers behind a common fence, in FIFO order. + struct wined3d_buffer_heap_fenced_element *fenced_head; + struct wined3d_buffer_heap_fenced_element *fenced_tail; + }; +@@ -3694,7 +3696,7 @@ HRESULT wined3d_buffer_heap_alloc(struct wined3d_buffer_heap *heap, GLsizeiptr s + // Immediately frees a heap-allocated buffer segment. + HRESULT wined3d_buffer_heap_free(struct wined3d_buffer_heap *heap, struct wined3d_map_range range) DECLSPEC_HIDDEN; + // Enqueues a buffer segment to return to the heap once its fence has been signaled. +-HRESULT wined3d_buffer_heap_free_fenced(struct wined3d_buffer_heap *heap, struct wined3d_map_range range, struct wined3d_fence *fence) DECLSPEC_HIDDEN; ++HRESULT wined3d_buffer_heap_free_fenced(struct wined3d_buffer_heap *heap, struct wined3d_device *device, struct wined3d_map_range range) DECLSPEC_HIDDEN; + // Moves a buffers with a signaled fence from the fenced list to the free list. + // Must be executed on the CS thread. + HRESULT wined3d_buffer_heap_cs_poll_fences(struct wined3d_buffer_heap *heap, struct wined3d_device *device) DECLSPEC_HIDDEN; +-- +2.16.2 + diff --git a/0006-wined3d-Switch-wined3d_buffer_heap-to-be-backed-by-a.patch b/0006-wined3d-Switch-wined3d_buffer_heap-to-be-backed-by-a.patch new file mode 100644 index 000000000000..89c9c8ec9eda --- /dev/null +++ b/0006-wined3d-Switch-wined3d_buffer_heap-to-be-backed-by-a.patch @@ -0,0 +1,383 @@ +From 2acd4b6ca9cadb84eb38bf1fc4bd5b2ccab3c532 Mon Sep 17 00:00:00 2001 +From: Andrew Comminos <andrew@comminos.com> +Date: Tue, 27 Feb 2018 16:11:10 -0800 +Subject: [PATCH 6/8] wined3d: Switch wined3d_buffer_heap to be backed by an + rb-tree. + +--- + dlls/wined3d/buffer_heap.c | 230 ++++++++++++++++++----------------------- + dlls/wined3d/wined3d_private.h | 6 +- + 2 files changed, 105 insertions(+), 131 deletions(-) + +diff --git a/dlls/wined3d/buffer_heap.c b/dlls/wined3d/buffer_heap.c +index 165a957edd..45d3a2c7d7 100644 +--- a/dlls/wined3d/buffer_heap.c ++++ b/dlls/wined3d/buffer_heap.c +@@ -19,6 +19,7 @@ + + #include "config.h" + #include "wine/port.h" ++#include "wine/rbtree.h" + #include "wined3d_private.h" + + WINE_DEFAULT_DEBUG_CHANNEL(d3d); +@@ -26,21 +27,26 @@ WINE_DECLARE_DEBUG_CHANNEL(d3d_perf); + + struct wined3d_buffer_heap_element + { ++ struct wine_rb_entry entry; + struct wined3d_map_range range; ++}; + +- struct wined3d_buffer_heap_element *prev; +- struct wined3d_buffer_heap_element *next; ++struct wined3d_buffer_heap_range ++{ ++ struct wined3d_map_range range; ++ ++ struct wined3d_buffer_heap_range *next; + }; + + struct wined3d_buffer_heap_fenced_element + { +- struct wined3d_buffer_heap_element *ranges; ++ struct wined3d_buffer_heap_range *ranges; + struct wined3d_fence *fence; + +- struct wined3d_buffer_heap_element *next; ++ struct wined3d_buffer_heap_fenced_element *next; + }; + +-static struct wined3d_buffer_heap_element* element_new(GLintptr offset, GLsizeiptr size) ++static struct wined3d_buffer_heap_element* element_new(GLsizei offset, GLsizei size) + { + struct wined3d_buffer_heap_element* elem; + elem = HeapAlloc(GetProcessHeap(), 0, sizeof(struct wined3d_buffer_heap_element)); +@@ -48,12 +54,10 @@ static struct wined3d_buffer_heap_element* element_new(GLintptr offset, GLsizeip + return NULL; + elem->range.offset = offset; + elem->range.size = size; +- elem->next = NULL; +- elem->prev = NULL; + return elem; + } + +-static struct wined3d_buffer_heap_fenced_element* fenced_element_new(struct wined3d_buffer_heap_element *ranges, struct wined3d_fence* fence) ++static struct wined3d_buffer_heap_fenced_element* fenced_element_new(struct wined3d_buffer_heap_range *ranges, struct wined3d_fence* fence) + { + struct wined3d_buffer_heap_fenced_element* elem; + elem = HeapAlloc(GetProcessHeap(), 0, sizeof(struct wined3d_buffer_heap_fenced_element)); +@@ -65,97 +69,16 @@ static struct wined3d_buffer_heap_fenced_element* fenced_element_new(struct wine + return elem; + } + +-static void element_merge_adjacent(struct wined3d_buffer_heap_element **head, struct wined3d_buffer_heap_element *elem) +-{ +- struct wined3d_buffer_heap_element *cur_prev = elem->prev; +- struct wined3d_buffer_heap_element *cur_next = elem->next; +- if (cur_prev && cur_prev->range.offset + cur_prev->range.size == elem->range.offset) +- { +- elem->range.offset = cur_prev->range.offset; +- elem->range.size += cur_prev->range.size; +- +- elem->prev = cur_prev->prev; +- if (cur_prev->prev) +- cur_prev->prev->next = elem; +- +- if (cur_prev == *head) +- *head = elem; +- +- HeapFree(GetProcessHeap(), 0, cur_prev); +- } +- if (cur_next && cur_next->range.offset == elem->range.offset + elem->range.size) +- { +- elem->range.size += cur_next->range.size; +- elem->next = cur_next->next; +- if (cur_next->next) +- { +- cur_next->next->prev = elem; +- } +- HeapFree(GetProcessHeap(), 0, cur_next); +- } +-} +- +-// Inserts a range into the list starting at `elem`. +-// Updates the head of the list, if necessary. +-static void element_insert_range(struct wined3d_buffer_heap_element **head, struct wined3d_map_range range) ++static int free_tree_compare(const void *key, const struct wine_rb_entry *entry) + { +- struct wined3d_buffer_heap_element *elem = *head; +- struct wined3d_buffer_heap_element *new_elem; +- struct wined3d_buffer_heap_element *last_elem = NULL; +- +- // Special case where the head doesn't exist. +- if (!elem) +- { +- new_elem = element_new(range.offset, range.size); +- *head = new_elem; +- return; +- } +- +- while (elem) +- { +- struct wined3d_map_range *erange = &elem->range; +- if (range.offset + range.size == erange->offset) +- { +- // Left side merge +- erange->offset = range.offset; +- erange->size += range.size; +- // Check if this causes a merge with elem->prev +- element_merge_adjacent(head, elem); +- return; +- } +- else if (erange->offset + erange->size == range.offset) +- { +- // Right side merge +- erange->size += range.size; +- // Check if this causes a merge with elem->prev +- element_merge_adjacent(head, elem); +- return; +- } +- else if (range.offset < erange->offset) +- { +- // Append to left, non-merge case. +- new_elem = element_new(range.offset, range.size); +- new_elem->prev = elem->prev; +- new_elem->next = elem; +- if (elem->prev) +- { +- elem->prev->next = new_elem; +- } +- if (*head == elem) +- { +- *head = new_elem; +- } +- elem->prev = new_elem; +- return; +- } +- last_elem = elem; +- elem = elem->next; +- } +- +- // Larger offset than all other elements in the list, append to the end. +- new_elem = element_new(range.offset, range.size); +- new_elem->prev = last_elem; +- last_elem->next = new_elem; ++ const GLsizei offset = (const GLsizei) key; ++ struct wined3d_buffer_heap_element *elem = WINE_RB_ENTRY_VALUE(entry, struct wined3d_buffer_heap_element, entry); ++ ++ if (offset < elem->range.offset) ++ return -1; ++ if (offset > elem->range.offset) ++ return 1; ++ return 0; + } + + /* Context activation is done by the caller. */ +@@ -165,6 +88,7 @@ HRESULT wined3d_buffer_heap_create(struct wined3d_context *context, GLsizeiptr s + const GLenum buffer_target = GL_ARRAY_BUFFER; + GLbitfield access_flags; + GLbitfield storage_flags; ++ struct wined3d_buffer_heap_element *initial_elem; + + struct wined3d_buffer_heap *object; + +@@ -195,7 +119,11 @@ HRESULT wined3d_buffer_heap_create(struct wined3d_context *context, GLsizeiptr s + } + context_bind_bo(context, buffer_target, 0); + +- object->free_list_head = element_new(0, size); ++ wine_rb_init(&object->free_tree, free_tree_compare); ++ ++ initial_elem = element_new(0, size); ++ wine_rb_put(&object->free_tree, initial_elem->range.offset, &initial_elem->entry); ++ + object->fenced_head = object->fenced_tail = NULL; + object->alignment = alignment; + object->pending_fenced_bytes = 0; +@@ -217,10 +145,10 @@ HRESULT wined3d_buffer_heap_destroy(struct wined3d_buffer_heap *heap, struct win + + HRESULT wined3d_buffer_heap_alloc(struct wined3d_buffer_heap *heap, GLsizeiptr size, struct wined3d_map_range *out_range) + { +- EnterCriticalSection(&heap->temp_lock); ++ struct wine_rb_entry *iter; + + // TODO(acomminos): free list binning? +- struct wined3d_buffer_heap_element *elem = heap->free_list_head; ++ EnterCriticalSection(&heap->temp_lock); + + // Round to the nearest power of two to reduce fragmentation. + size = 1ULL << (int)ceil(log2(size)); +@@ -229,40 +157,35 @@ HRESULT wined3d_buffer_heap_alloc(struct wined3d_buffer_heap *heap, GLsizeiptr s + if (heap->alignment) + size += heap->alignment - (size % heap->alignment); + +- while (elem != NULL) ++ iter = wine_rb_head(heap->free_tree.root); ++ while (iter) + { +- TRACE("allocation at %p, size %lld\n", heap->map_ptr + elem->range.offset, elem->range.size); +- // XXX(acomminos): first fit is highly likely to be dumb, needs more analysis. ++ struct wined3d_buffer_heap_element *elem = WINE_RB_ENTRY_VALUE(iter, struct wined3d_buffer_heap_element, entry); + if (elem->range.size >= size) + { +- // Pull the range from the start of the free list element. +- out_range->offset = elem->range.offset; +- out_range->size = size; // XXX(acomminos): should we really give the exact size requested? ++ // FIXME(acomminos): should key based on end so that we can slice ++ // off the front without changing the key. ++ GLsizei remaining = elem->range.size - size; + +- elem->range.offset += size; +- elem->range.size -= size; ++ out_range->offset = elem->range.offset; ++ out_range->size = size; + +- if (elem->range.size == 0) ++ wine_rb_remove(&heap->free_tree, iter); ++ if (remaining > 0) ++ { ++ elem->range.offset += size; ++ elem->range.size -= size; ++ wine_rb_put(&heap->free_tree, (const void*) elem->range.offset, &elem->entry); ++ } ++ else + { +- if (elem->prev) +- { +- elem->prev->next = elem->next; +- } +- if (elem->next) +- { +- elem->next->prev = elem->prev; +- } +- if (heap->free_list_head == elem) +- { +- heap->free_list_head = elem->next; +- } + HeapFree(GetProcessHeap(), 0, elem); + } +- ++ TRACE("Allocated %lld bytes at %lld\n", out_range->size, out_range->offset); + LeaveCriticalSection(&heap->temp_lock); + return WINED3D_OK; + } +- elem = elem->next; ++ iter = wine_rb_next(iter); + } + + LeaveCriticalSection(&heap->temp_lock); +@@ -271,17 +194,68 @@ HRESULT wined3d_buffer_heap_alloc(struct wined3d_buffer_heap *heap, GLsizeiptr s + + HRESULT wined3d_buffer_heap_free(struct wined3d_buffer_heap *heap, struct wined3d_map_range range) + { ++ struct wined3d_buffer_heap_element *elem = element_new(range.offset, range.size); ++ struct wine_rb_entry *entry; ++ HRESULT hr; ++ ++ if (!elem) ++ return E_OUTOFMEMORY; ++ + EnterCriticalSection(&heap->temp_lock); ++ if (wine_rb_put(&heap->free_tree, (const void*) elem->range.offset, &elem->entry) == -1) ++ { ++ LeaveCriticalSection(&heap->temp_lock); ++ HeapFree(GetProcessHeap(), 0, elem); ++ return E_FAIL; ++ } + +- element_insert_range(&heap->free_list_head, range); ++ // Coalesce left. ++ entry = wine_rb_prev(&elem->entry); ++ if (entry) ++ { ++ TRACE("Coalesced left.\n"); ++ struct wined3d_buffer_heap_element *left_elem = WINE_RB_ENTRY_VALUE(elem, struct wined3d_buffer_heap_element, entry); ++ if (left_elem->range.offset + left_elem->range.size == elem->range.offset) ++ { ++ // Replace the newly inserted element with an extended node to its ++ // left. This doesn't change the key properties of the left node. ++ left_elem->range.size += range.size; ++ ++ wine_rb_remove(&heap->free_tree, &elem->entry); ++ HeapFree(GetProcessHeap(), 0, elem); ++ ++ elem = left_elem; ++ } ++ } ++ ++ // Coalesce right. ++ entry = wine_rb_next(&elem->entry); ++ if (entry) ++ { ++ TRACE("Coalesced right.\n"); ++ struct wined3d_buffer_heap_element *right_elem = WINE_RB_ENTRY_VALUE(elem, struct wined3d_buffer_heap_element, entry); ++ if (elem->range.offset + elem->range.size == right_elem->range.offset) ++ { ++ // Remove the right element, this doesn't change the keying of our ++ // newly inserted element. ++ elem->range.size += right_elem->range.size; ++ ++ wine_rb_remove(&heap->free_tree, &right_elem->entry); ++ HeapFree(GetProcessHeap(), 0, right_elem); ++ } ++ } + + LeaveCriticalSection(&heap->temp_lock); ++ + return WINED3D_OK; + } + + HRESULT wined3d_buffer_heap_free_fenced(struct wined3d_buffer_heap *heap, struct wined3d_device *device, struct wined3d_map_range range) + { +- element_insert_range(&heap->pending_fenced_head, range); ++ struct wined3d_buffer_heap_range *elem = HeapAlloc(GetProcessHeap(), 0, sizeof(struct wined3d_buffer_heap_range)); ++ elem->range = range; ++ elem->next = heap->pending_fenced_head; ++ heap->pending_fenced_head = elem; + + heap->pending_fenced_bytes += range.size; + if (heap->pending_fenced_bytes >= heap->pending_fenced_threshold_bytes) +@@ -338,11 +312,11 @@ HRESULT wined3d_buffer_heap_cs_poll_fences(struct wined3d_buffer_heap *heap, str + case WINED3D_FENCE_NOT_STARTED: + { + TRACE_(d3d_perf)("Freed fence group.\n"); +- struct wined3d_buffer_heap_element *range_elem = elem->ranges; ++ struct wined3d_buffer_heap_range *range_elem = elem->ranges; + // FIXME(acomminos): this might take a while. incrementally do this? + while (range_elem) + { +- struct wined3d_buffer_heap_element *next = range_elem->next; ++ struct wined3d_buffer_heap_range *next = range_elem->next; + wined3d_buffer_heap_free(heap, range_elem->range); + HeapFree(GetProcessHeap(), 0, range_elem); + range_elem = next; +diff --git a/dlls/wined3d/wined3d_private.h b/dlls/wined3d/wined3d_private.h +index 62433a39b1..3a45d9931e 100644 +--- a/dlls/wined3d/wined3d_private.h ++++ b/dlls/wined3d/wined3d_private.h +@@ -3665,7 +3665,7 @@ enum wined3d_buffer_conversion_type + CONV_POSITIONT, + }; + +-struct wined3d_buffer_heap_element; ++struct wined3d_buffer_heap_range; + struct wined3d_buffer_heap_fenced_element; + + // A heap that manages allocations with a single GL buffer. +@@ -3677,10 +3677,10 @@ struct wined3d_buffer_heap + CRITICAL_SECTION temp_lock; // Temporary lock while we implement the fenced free list. + + // TODO: add buckets for free regions of a given size. +- struct wined3d_buffer_heap_element *free_list_head; ++ struct wine_rb_tree free_tree; // Free regions keyed on their base address. + + // Elements that need to be fenced, but haven't reached the required size. +- struct wined3d_buffer_heap_element *pending_fenced_head; ++ struct wined3d_buffer_heap_range *pending_fenced_head; + GLsizeiptr pending_fenced_bytes; // Number of free bytes in the active fenced region. + GLsizeiptr pending_fenced_threshold_bytes; // Number of bytes required before fencing. + +-- +2.16.2 + diff --git a/0007-wined3d-Add-segregated-free-bins-to-complement-rbtre.patch b/0007-wined3d-Add-segregated-free-bins-to-complement-rbtre.patch new file mode 100644 index 000000000000..fb80a0f89597 --- /dev/null +++ b/0007-wined3d-Add-segregated-free-bins-to-complement-rbtre.patch @@ -0,0 +1,305 @@ +From 89ca25afda23b8ed5f6dc5cc6a3fe010a4b63352 Mon Sep 17 00:00:00 2001 +From: Andrew Comminos <andrew@comminos.com> +Date: Tue, 27 Feb 2018 18:10:36 -0800 +Subject: [PATCH 7/8] wined3d: Add segregated free bins to complement + rbtree-backed free list. + +--- + dlls/wined3d/buffer_heap.c | 154 +++++++++++++++++++++++++++++++---------- + dlls/wined3d/wined3d_private.h | 8 ++- + 2 files changed, 122 insertions(+), 40 deletions(-) + +diff --git a/dlls/wined3d/buffer_heap.c b/dlls/wined3d/buffer_heap.c +index 45d3a2c7d7..f4af1b93b9 100644 +--- a/dlls/wined3d/buffer_heap.c ++++ b/dlls/wined3d/buffer_heap.c +@@ -29,6 +29,10 @@ struct wined3d_buffer_heap_element + { + struct wine_rb_entry entry; + struct wined3d_map_range range; ++ ++ // Binned free list positions ++ struct wined3d_buffer_heap_element *next; ++ struct wined3d_buffer_heap_element *prev; + }; + + struct wined3d_buffer_heap_range +@@ -54,9 +58,76 @@ static struct wined3d_buffer_heap_element* element_new(GLsizei offset, GLsizei s + return NULL; + elem->range.offset = offset; + elem->range.size = size; ++ elem->prev = NULL; ++ elem->next = NULL; + return elem; + } + ++static inline int bitwise_log2_floor(GLsizei size) ++{ ++ // XXX(acomminos): I hope this gets unrolled. ++ for (int i = 8 * sizeof(GLsizei) - 1; i >= 0; i--) ++ { ++ if ((size >> i) & 1) { ++ return i; ++ } ++ } ++ return 0; ++} ++ ++static inline int bitwise_log2_ceil(GLsizei size) ++{ ++ // Add one to the floor of size if size isn't a power of two. ++ return bitwise_log2_floor(size) + !!(size & (size - 1)); ++} ++ ++static int element_bin(struct wined3d_buffer_heap_element *elem) ++{ ++ return min(WINED3D_BUFFER_HEAP_BINS - 1, bitwise_log2_floor(elem->range.size)); ++} ++ ++// Inserts and element into the free tree and its bin. ++// Does not coalesce. ++static void element_insert_free(struct wined3d_buffer_heap *heap, struct wined3d_buffer_heap_element *elem) ++{ ++ int bin = element_bin(elem); ++ ++ elem->prev = NULL; ++ elem->next = heap->free_bins[bin]; ++ if (heap->free_bins[bin]) ++ heap->free_bins[bin]->prev = elem; ++ heap->free_bins[bin] = elem; ++ ++ if (wine_rb_put(&heap->free_tree, &elem->range.offset, &elem->entry) == -1) ++ { ++ ERR("Failed to insert element into free tree.\n"); ++ } ++ ++ TRACE("Inserted allocation at %p of size %lld into bin %d\n", elem->range.offset, elem->range.size, bin); ++} ++ ++// Removes an element from the free tree and its bin. ++static void element_remove_free(struct wined3d_buffer_heap *heap, struct wined3d_buffer_heap_element *elem) ++{ ++ int bin = element_bin(elem); ++ ++ if (elem->prev) ++ elem->prev->next = elem->next; ++ ++ if (elem->next) ++ elem->next->prev = elem->prev; ++ ++ if (!elem->prev) ++ heap->free_bins[bin] = elem->next; ++ ++ elem->prev = NULL; ++ elem->next = NULL; ++ ++ wine_rb_remove(&heap->free_tree, &elem->entry); ++ ++ TRACE("Freed allocation at %p of size %lld from bin %d\n", elem->range.offset, elem->range.size, bin); ++} ++ + static struct wined3d_buffer_heap_fenced_element* fenced_element_new(struct wined3d_buffer_heap_range *ranges, struct wined3d_fence* fence) + { + struct wined3d_buffer_heap_fenced_element* elem; +@@ -71,7 +142,7 @@ static struct wined3d_buffer_heap_fenced_element* fenced_element_new(struct wine + + static int free_tree_compare(const void *key, const struct wine_rb_entry *entry) + { +- const GLsizei offset = (const GLsizei) key; ++ const GLsizei offset = *(const GLsizei*) key; + struct wined3d_buffer_heap_element *elem = WINE_RB_ENTRY_VALUE(entry, struct wined3d_buffer_heap_element, entry); + + if (offset < elem->range.offset) +@@ -121,9 +192,6 @@ HRESULT wined3d_buffer_heap_create(struct wined3d_context *context, GLsizeiptr s + + wine_rb_init(&object->free_tree, free_tree_compare); + +- initial_elem = element_new(0, size); +- wine_rb_put(&object->free_tree, initial_elem->range.offset, &initial_elem->entry); +- + object->fenced_head = object->fenced_tail = NULL; + object->alignment = alignment; + object->pending_fenced_bytes = 0; +@@ -131,6 +199,9 @@ HRESULT wined3d_buffer_heap_create(struct wined3d_context *context, GLsizeiptr s + object->pending_fenced_threshold_bytes = size / 4; // FIXME(acomminos): make this externally declared + InitializeCriticalSection(&object->temp_lock); + ++ initial_elem = element_new(0, size); ++ element_insert_free(object, initial_elem); ++ + *buffer_heap = object; + + return WINED3D_OK; +@@ -145,56 +216,57 @@ HRESULT wined3d_buffer_heap_destroy(struct wined3d_buffer_heap *heap, struct win + + HRESULT wined3d_buffer_heap_alloc(struct wined3d_buffer_heap *heap, GLsizeiptr size, struct wined3d_map_range *out_range) + { +- struct wine_rb_entry *iter; +- +- // TODO(acomminos): free list binning? ++ int initial_bin; + EnterCriticalSection(&heap->temp_lock); + + // Round to the nearest power of two to reduce fragmentation. +- size = 1ULL << (int)ceil(log2(size)); ++ size = 1ULL << bitwise_log2_ceil(size); + + // Round up the size to a multiple of the heap's alignment. + if (heap->alignment) + size += heap->alignment - (size % heap->alignment); + +- iter = wine_rb_head(heap->free_tree.root); +- while (iter) ++ // TODO(acomminos): use bitwise arithmetic instead ++ initial_bin = min(WINED3D_BUFFER_HEAP_BINS - 1, bitwise_log2_ceil(size)); ++ ++ for (int i = initial_bin; i < WINED3D_BUFFER_HEAP_BINS; i++) + { +- struct wined3d_buffer_heap_element *elem = WINE_RB_ENTRY_VALUE(iter, struct wined3d_buffer_heap_element, entry); +- if (elem->range.size >= size) ++ struct wined3d_buffer_heap_element *elem = heap->free_bins[i]; ++ if (elem) + { +- // FIXME(acomminos): should key based on end so that we can slice +- // off the front without changing the key. +- GLsizei remaining = elem->range.size - size; ++ struct wined3d_map_range remaining_range; ++ remaining_range.offset = elem->range.offset + size; ++ remaining_range.size = elem->range.size - size; + + out_range->offset = elem->range.offset; + out_range->size = size; + +- wine_rb_remove(&heap->free_tree, iter); +- if (remaining > 0) ++ // Remove the element from its current free bin to move it to the correct list. ++ element_remove_free(heap, elem); ++ ++ if (remaining_range.size > 0) + { +- elem->range.offset += size; +- elem->range.size -= size; +- wine_rb_put(&heap->free_tree, (const void*) elem->range.offset, &elem->entry); ++ elem->range = remaining_range; ++ element_insert_free(heap, elem); + } + else + { + HeapFree(GetProcessHeap(), 0, elem); + } +- TRACE("Allocated %lld bytes at %lld\n", out_range->size, out_range->offset); ++ + LeaveCriticalSection(&heap->temp_lock); + return WINED3D_OK; + } +- iter = wine_rb_next(iter); + } + + LeaveCriticalSection(&heap->temp_lock); +- return WINED3DERR_OUTOFVIDEOMEMORY; // FIXME(acomminos): probably wrong return code. ++ return WINED3DERR_OUTOFVIDEOMEMORY; + } + + HRESULT wined3d_buffer_heap_free(struct wined3d_buffer_heap *heap, struct wined3d_map_range range) + { + struct wined3d_buffer_heap_element *elem = element_new(range.offset, range.size); ++ struct wined3d_map_range coalesced_range = range; + struct wine_rb_entry *entry; + HRESULT hr; + +@@ -202,7 +274,12 @@ HRESULT wined3d_buffer_heap_free(struct wined3d_buffer_heap *heap, struct wined3 + return E_OUTOFMEMORY; + + EnterCriticalSection(&heap->temp_lock); +- if (wine_rb_put(&heap->free_tree, (const void*) elem->range.offset, &elem->entry) == -1) ++ ++ // TODO(acomminos): implement lower_bound, upper_bound. ++ // we don't have to allocate a new elem here, this sentry ++ // is just so I can get this proof of concept out the door. ++ ++ if (wine_rb_put(&heap->free_tree, &elem->range.offset, &elem->entry) == -1) + { + LeaveCriticalSection(&heap->temp_lock); + HeapFree(GetProcessHeap(), 0, elem); +@@ -214,17 +291,14 @@ HRESULT wined3d_buffer_heap_free(struct wined3d_buffer_heap *heap, struct wined3 + if (entry) + { + TRACE("Coalesced left.\n"); +- struct wined3d_buffer_heap_element *left_elem = WINE_RB_ENTRY_VALUE(elem, struct wined3d_buffer_heap_element, entry); +- if (left_elem->range.offset + left_elem->range.size == elem->range.offset) ++ struct wined3d_buffer_heap_element *left_elem = WINE_RB_ENTRY_VALUE(entry, struct wined3d_buffer_heap_element, entry); ++ if (left_elem->range.offset + left_elem->range.size == coalesced_range.offset) + { +- // Replace the newly inserted element with an extended node to its +- // left. This doesn't change the key properties of the left node. +- left_elem->range.size += range.size; +- +- wine_rb_remove(&heap->free_tree, &elem->entry); +- HeapFree(GetProcessHeap(), 0, elem); ++ coalesced_range.offset = left_elem->range.offset; ++ coalesced_range.size = coalesced_range.size + left_elem->range.size; + +- elem = left_elem; ++ element_remove_free(heap, left_elem); ++ HeapFree(GetProcessHeap(), 0, left_elem); + } + } + +@@ -233,18 +307,22 @@ HRESULT wined3d_buffer_heap_free(struct wined3d_buffer_heap *heap, struct wined3 + if (entry) + { + TRACE("Coalesced right.\n"); +- struct wined3d_buffer_heap_element *right_elem = WINE_RB_ENTRY_VALUE(elem, struct wined3d_buffer_heap_element, entry); ++ struct wined3d_buffer_heap_element *right_elem = WINE_RB_ENTRY_VALUE(entry, struct wined3d_buffer_heap_element, entry); + if (elem->range.offset + elem->range.size == right_elem->range.offset) + { +- // Remove the right element, this doesn't change the keying of our +- // newly inserted element. +- elem->range.size += right_elem->range.size; ++ coalesced_range.size += right_elem->range.size; + +- wine_rb_remove(&heap->free_tree, &right_elem->entry); ++ element_remove_free(heap, right_elem); + HeapFree(GetProcessHeap(), 0, right_elem); + } + } + ++ wine_rb_remove(&heap->free_tree, &elem->entry); ++ ++ // Update with coalesced range. ++ elem->range = coalesced_range; ++ element_insert_free(heap, elem); ++ + LeaveCriticalSection(&heap->temp_lock); + + return WINED3D_OK; +diff --git a/dlls/wined3d/wined3d_private.h b/dlls/wined3d/wined3d_private.h +index 3a45d9931e..14cad92f0f 100644 +--- a/dlls/wined3d/wined3d_private.h ++++ b/dlls/wined3d/wined3d_private.h +@@ -3665,8 +3665,12 @@ enum wined3d_buffer_conversion_type + CONV_POSITIONT, + }; + +-struct wined3d_buffer_heap_range; ++struct wined3d_buffer_heap_element; + struct wined3d_buffer_heap_fenced_element; ++struct wined3d_buffer_heap_range; ++ ++// Number of power-of-two buckets to populate. ++#define WINED3D_BUFFER_HEAP_BINS 32 + + // A heap that manages allocations with a single GL buffer. + struct wined3d_buffer_heap +@@ -3676,7 +3680,7 @@ struct wined3d_buffer_heap + GLsizeiptr alignment; + CRITICAL_SECTION temp_lock; // Temporary lock while we implement the fenced free list. + +- // TODO: add buckets for free regions of a given size. ++ struct wined3d_buffer_heap_element *free_bins[WINED3D_BUFFER_HEAP_BINS]; + struct wine_rb_tree free_tree; // Free regions keyed on their base address. + + // Elements that need to be fenced, but haven't reached the required size. +-- +2.16.2 + diff --git a/0008-wined3d-Implement-lazy-free-using-a-deferred-free-li.patch b/0008-wined3d-Implement-lazy-free-using-a-deferred-free-li.patch new file mode 100644 index 000000000000..7dd0c7735c85 --- /dev/null +++ b/0008-wined3d-Implement-lazy-free-using-a-deferred-free-li.patch @@ -0,0 +1,612 @@ +From 44fba11f530b1dff8a8e10fec15b0ca6465e3623 Mon Sep 17 00:00:00 2001 +From: Andrew Comminos <andrew@comminos.com> +Date: Wed, 28 Feb 2018 22:46:31 -0800 +Subject: [PATCH 8/8] wined3d: Implement lazy-free using a deferred free list. + +--- + dlls/wined3d/buffer_heap.c | 308 ++++++++++++++++++++++++++++------------- + dlls/wined3d/cs.c | 12 +- + dlls/wined3d/device.c | 16 ++- + dlls/wined3d/wined3d_private.h | 22 ++- + 4 files changed, 248 insertions(+), 110 deletions(-) + +diff --git a/dlls/wined3d/buffer_heap.c b/dlls/wined3d/buffer_heap.c +index f4af1b93b9..3fe5541a6a 100644 +--- a/dlls/wined3d/buffer_heap.c ++++ b/dlls/wined3d/buffer_heap.c +@@ -27,24 +27,20 @@ WINE_DECLARE_DEBUG_CHANNEL(d3d_perf); + + struct wined3d_buffer_heap_element + { +- struct wine_rb_entry entry; + struct wined3d_map_range range; + ++ // rbtree data ++ struct wine_rb_entry entry; ++ BOOL in_tree; ++ + // Binned free list positions + struct wined3d_buffer_heap_element *next; + struct wined3d_buffer_heap_element *prev; + }; + +-struct wined3d_buffer_heap_range +-{ +- struct wined3d_map_range range; +- +- struct wined3d_buffer_heap_range *next; +-}; +- + struct wined3d_buffer_heap_fenced_element + { +- struct wined3d_buffer_heap_range *ranges; ++ struct wined3d_buffer_heap_bin_set free_list; + struct wined3d_fence *fence; + + struct wined3d_buffer_heap_fenced_element *next; +@@ -58,6 +54,7 @@ static struct wined3d_buffer_heap_element* element_new(GLsizei offset, GLsizei s + return NULL; + elem->range.offset = offset; + elem->range.size = size; ++ elem->in_tree = FALSE; + elem->prev = NULL; + elem->next = NULL; + return elem; +@@ -86,27 +83,41 @@ static int element_bin(struct wined3d_buffer_heap_element *elem) + return min(WINED3D_BUFFER_HEAP_BINS - 1, bitwise_log2_floor(elem->range.size)); + } + +-// Inserts and element into the free tree and its bin. +-// Does not coalesce. +-static void element_insert_free(struct wined3d_buffer_heap *heap, struct wined3d_buffer_heap_element *elem) ++// Inserts an element into the appropriate free list bin. ++static void element_insert_free_bin(struct wined3d_buffer_heap *heap, struct wined3d_buffer_heap_element *elem) + { + int bin = element_bin(elem); + + elem->prev = NULL; +- elem->next = heap->free_bins[bin]; +- if (heap->free_bins[bin]) +- heap->free_bins[bin]->prev = elem; +- heap->free_bins[bin] = elem; ++ elem->next = heap->free_list.bins[bin].head; ++ if (heap->free_list.bins[bin].head) ++ heap->free_list.bins[bin].head->prev = elem; ++ heap->free_list.bins[bin].head = elem; ++ ++ if (!heap->free_list.bins[bin].tail) ++ heap->free_list.bins[bin].tail = elem; ++ ++ TRACE("Inserted allocation at %p of size %lld into bin %d\n", elem->range.offset, elem->range.size, bin); ++} + ++// Inserts an elemnet into the free tree. Does not perform coalescing. ++static void element_insert_free_tree(struct wined3d_buffer_heap *heap, struct wined3d_buffer_heap_element *elem) ++{ ++ if (elem->in_tree) ++ { ++ FIXME("Element %p already in free tree, ignoring.\n", elem); ++ return; ++ } + if (wine_rb_put(&heap->free_tree, &elem->range.offset, &elem->entry) == -1) + { + ERR("Failed to insert element into free tree.\n"); ++ return; + } +- +- TRACE("Inserted allocation at %p of size %lld into bin %d\n", elem->range.offset, elem->range.size, bin); ++ TRACE("Inserted allocation at %p of size %lld into free tree\n", elem->range.offset, elem->range.size); ++ elem->in_tree = TRUE; + } + +-// Removes an element from the free tree and its bin. ++// Removes an element from the free tree, its bin, and the coalesce list. + static void element_remove_free(struct wined3d_buffer_heap *heap, struct wined3d_buffer_heap_element *elem) + { + int bin = element_bin(elem); +@@ -117,24 +128,31 @@ static void element_remove_free(struct wined3d_buffer_heap *heap, struct wined3d + if (elem->next) + elem->next->prev = elem->prev; + +- if (!elem->prev) +- heap->free_bins[bin] = elem->next; ++ if (elem == heap->free_list.bins[bin].head) ++ heap->free_list.bins[bin].head = elem->next; ++ ++ if (elem == heap->free_list.bins[bin].tail) ++ heap->free_list.bins[bin].head = elem->prev; + + elem->prev = NULL; + elem->next = NULL; + +- wine_rb_remove(&heap->free_tree, &elem->entry); ++ if (elem->in_tree) ++ { ++ wine_rb_remove(&heap->free_tree, &elem->entry); ++ elem->in_tree = FALSE; ++ } + + TRACE("Freed allocation at %p of size %lld from bin %d\n", elem->range.offset, elem->range.size, bin); + } + +-static struct wined3d_buffer_heap_fenced_element* fenced_element_new(struct wined3d_buffer_heap_range *ranges, struct wined3d_fence* fence) ++static struct wined3d_buffer_heap_fenced_element* fenced_element_new(struct wined3d_buffer_heap_bin_set bins, struct wined3d_fence* fence) + { + struct wined3d_buffer_heap_fenced_element* elem; + elem = HeapAlloc(GetProcessHeap(), 0, sizeof(struct wined3d_buffer_heap_fenced_element)); + if (!elem) + return NULL; +- elem->ranges = ranges; ++ elem->free_list = bins; + elem->fence = fence; + elem->next = NULL; + return elem; +@@ -163,6 +181,11 @@ HRESULT wined3d_buffer_heap_create(struct wined3d_context *context, GLsizeiptr s + + struct wined3d_buffer_heap *object; + ++ if ((alignment & (alignment - 1)) != 0) ++ { ++ return E_FAIL; ++ } ++ + if (!(object = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(*object)))) + { + return E_OUTOFMEMORY; +@@ -194,13 +217,13 @@ HRESULT wined3d_buffer_heap_create(struct wined3d_context *context, GLsizeiptr s + + object->fenced_head = object->fenced_tail = NULL; + object->alignment = alignment; +- object->pending_fenced_bytes = 0; +- object->pending_fenced_head = NULL; +- object->pending_fenced_threshold_bytes = size / 4; // FIXME(acomminos): make this externally declared ++ // FIXME(acomminos): make this externally declared ++ object->pending_fenced_threshold_bytes = 16 * 1024 * 1024; + InitializeCriticalSection(&object->temp_lock); + + initial_elem = element_new(0, size); +- element_insert_free(object, initial_elem); ++ // Don't bother adding the initial allocation to the coalescing tree. ++ element_insert_free_bin(object, initial_elem); + + *buffer_heap = object; + +@@ -217,21 +240,23 @@ HRESULT wined3d_buffer_heap_destroy(struct wined3d_buffer_heap *heap, struct win + HRESULT wined3d_buffer_heap_alloc(struct wined3d_buffer_heap *heap, GLsizeiptr size, struct wined3d_map_range *out_range) + { + int initial_bin; +- EnterCriticalSection(&heap->temp_lock); + +- // Round to the nearest power of two to reduce fragmentation. +- size = 1ULL << bitwise_log2_ceil(size); ++ EnterCriticalSection(&heap->temp_lock); + +- // Round up the size to a multiple of the heap's alignment. ++ // Align size values where possible. + if (heap->alignment) + size += heap->alignment - (size % heap->alignment); + +- // TODO(acomminos): use bitwise arithmetic instead ++ // After alignment, reduce fragmentation by rounding to next power of two. ++ // If the alignment is a power of two (which it should be), this should be ++ // no problem. ++ size = 1 << bitwise_log2_ceil(size); ++ + initial_bin = min(WINED3D_BUFFER_HEAP_BINS - 1, bitwise_log2_ceil(size)); + + for (int i = initial_bin; i < WINED3D_BUFFER_HEAP_BINS; i++) + { +- struct wined3d_buffer_heap_element *elem = heap->free_bins[i]; ++ struct wined3d_buffer_heap_element *elem = heap->free_list.bins[i].head; + if (elem) + { + struct wined3d_map_range remaining_range; +@@ -247,7 +272,7 @@ HRESULT wined3d_buffer_heap_alloc(struct wined3d_buffer_heap *heap, GLsizeiptr s + if (remaining_range.size > 0) + { + elem->range = remaining_range; +- element_insert_free(heap, elem); ++ element_insert_free_bin(heap, elem); + } + else + { +@@ -260,68 +285,33 @@ HRESULT wined3d_buffer_heap_alloc(struct wined3d_buffer_heap *heap, GLsizeiptr s + } + + LeaveCriticalSection(&heap->temp_lock); ++ ++ // Attempt to coalesce blocks until an allocation of the requested size is ++ // available. ++ GLsizei coalesced_size; ++ while (SUCCEEDED(wined3d_buffer_heap_deferred_coalesce(heap, &coalesced_size))) ++ { ++ FIXME_(d3d_perf)("Forcing coalesce, not enough free space in buffer heap.\n"); ++ if (coalesced_size >= size) ++ { ++ return wined3d_buffer_heap_alloc(heap, size, out_range); ++ } ++ } ++ + return WINED3DERR_OUTOFVIDEOMEMORY; + } + + HRESULT wined3d_buffer_heap_free(struct wined3d_buffer_heap *heap, struct wined3d_map_range range) + { + struct wined3d_buffer_heap_element *elem = element_new(range.offset, range.size); +- struct wined3d_map_range coalesced_range = range; +- struct wine_rb_entry *entry; +- HRESULT hr; + + if (!elem) + return E_OUTOFMEMORY; + + EnterCriticalSection(&heap->temp_lock); + +- // TODO(acomminos): implement lower_bound, upper_bound. +- // we don't have to allocate a new elem here, this sentry +- // is just so I can get this proof of concept out the door. +- +- if (wine_rb_put(&heap->free_tree, &elem->range.offset, &elem->entry) == -1) +- { +- LeaveCriticalSection(&heap->temp_lock); +- HeapFree(GetProcessHeap(), 0, elem); +- return E_FAIL; +- } +- +- // Coalesce left. +- entry = wine_rb_prev(&elem->entry); +- if (entry) +- { +- TRACE("Coalesced left.\n"); +- struct wined3d_buffer_heap_element *left_elem = WINE_RB_ENTRY_VALUE(entry, struct wined3d_buffer_heap_element, entry); +- if (left_elem->range.offset + left_elem->range.size == coalesced_range.offset) +- { +- coalesced_range.offset = left_elem->range.offset; +- coalesced_range.size = coalesced_range.size + left_elem->range.size; +- +- element_remove_free(heap, left_elem); +- HeapFree(GetProcessHeap(), 0, left_elem); +- } +- } +- +- // Coalesce right. +- entry = wine_rb_next(&elem->entry); +- if (entry) +- { +- TRACE("Coalesced right.\n"); +- struct wined3d_buffer_heap_element *right_elem = WINE_RB_ENTRY_VALUE(entry, struct wined3d_buffer_heap_element, entry); +- if (elem->range.offset + elem->range.size == right_elem->range.offset) +- { +- coalesced_range.size += right_elem->range.size; +- +- element_remove_free(heap, right_elem); +- HeapFree(GetProcessHeap(), 0, right_elem); +- } +- } +- +- wine_rb_remove(&heap->free_tree, &elem->entry); +- +- // Update with coalesced range. +- elem->range = coalesced_range; +- element_insert_free(heap, elem); ++ // Only insert the element into a free bin, coalescing will occur later. ++ element_insert_free_bin(heap, elem); + + LeaveCriticalSection(&heap->temp_lock); + +@@ -330,10 +320,21 @@ HRESULT wined3d_buffer_heap_free(struct wined3d_buffer_heap *heap, struct wined3 + + HRESULT wined3d_buffer_heap_free_fenced(struct wined3d_buffer_heap *heap, struct wined3d_device *device, struct wined3d_map_range range) + { +- struct wined3d_buffer_heap_range *elem = HeapAlloc(GetProcessHeap(), 0, sizeof(struct wined3d_buffer_heap_range)); +- elem->range = range; +- elem->next = heap->pending_fenced_head; +- heap->pending_fenced_head = elem; ++ struct wined3d_buffer_heap_element *elem = element_new(range.offset, range.size); ++ int bin_index = element_bin(elem); ++ struct wined3d_buffer_heap_bin *bin = &heap->pending_fenced_bins.bins[bin_index]; ++ ++ if (bin->tail) ++ { ++ bin->tail->next = elem; ++ elem->prev = bin->tail; ++ bin->tail = elem; ++ } ++ else ++ { ++ bin->head = elem; ++ bin->tail = elem; ++ } + + heap->pending_fenced_bytes += range.size; + if (heap->pending_fenced_bytes >= heap->pending_fenced_threshold_bytes) +@@ -349,13 +350,13 @@ HRESULT wined3d_buffer_heap_free_fenced(struct wined3d_buffer_heap *heap, struct + return hr; + } + +- fenced_elem = fenced_element_new(heap->pending_fenced_head, fence); ++ fenced_elem = fenced_element_new(heap->pending_fenced_bins, fence); + if (!fenced_elem) + return E_OUTOFMEMORY; + + TRACE_(d3d_perf)("Dispatching fenced buffer set.\n"); + heap->pending_fenced_bytes = 0; +- heap->pending_fenced_head = NULL; ++ memset(&heap->pending_fenced_bins, 0, sizeof(heap->pending_fenced_bins)); + + // Append to end of fenced list, which works well if you assume that buffers + // are freed in some ascending draw call ordering. +@@ -390,15 +391,28 @@ HRESULT wined3d_buffer_heap_cs_poll_fences(struct wined3d_buffer_heap *heap, str + case WINED3D_FENCE_NOT_STARTED: + { + TRACE_(d3d_perf)("Freed fence group.\n"); +- struct wined3d_buffer_heap_range *range_elem = elem->ranges; +- // FIXME(acomminos): this might take a while. incrementally do this? +- while (range_elem) ++ ++ EnterCriticalSection(&heap->temp_lock); ++ for (int i = 0; i < WINED3D_BUFFER_HEAP_BINS; i++) + { +- struct wined3d_buffer_heap_range *next = range_elem->next; +- wined3d_buffer_heap_free(heap, range_elem->range); +- HeapFree(GetProcessHeap(), 0, range_elem); +- range_elem = next; ++ struct wined3d_buffer_heap_bin *elem_bin = &elem->free_list.bins[i]; ++ if (!elem_bin->tail) ++ continue; ++ ++ struct wined3d_buffer_heap_bin *heap_bin = &heap->free_list.bins[i]; ++ if (heap_bin->head) ++ { ++ elem_bin->tail->next = heap_bin->head; ++ heap_bin->head->prev = elem_bin->tail; ++ heap_bin->head = elem_bin->head; ++ } ++ else ++ { ++ heap_bin->head = elem_bin->head; ++ heap_bin->tail = elem_bin->tail; ++ } + } ++ LeaveCriticalSection(&heap->temp_lock); + + wined3d_fence_destroy(elem->fence); + +@@ -413,3 +427,101 @@ HRESULT wined3d_buffer_heap_cs_poll_fences(struct wined3d_buffer_heap *heap, str + + return WINED3D_OK; + } ++ ++HRESULT wined3d_buffer_heap_deferred_coalesce(struct wined3d_buffer_heap *heap, GLsizei *coalesced_size) ++{ ++ struct wined3d_buffer_heap_element *elem = NULL; ++ struct wine_rb_entry *entry; ++ struct wined3d_map_range coalesced_range; ++ ++ // XXX(acomminos): is it always the best idea to coalesce by smallest ++ // chunks? these are the most likely to be useless. ++ EnterCriticalSection(&heap->temp_lock); ++ ++ // TODO(acomminos): on one hand, if there's a lot of elements in the list, ++ // it's highly fragmented. on the other, we can potentially waste a decent ++ // sum of time checking for uncoalesced bins. ++ for (int i = 0; !elem && i < WINED3D_BUFFER_HEAP_BINS; i++) ++ { ++ struct wined3d_buffer_heap_element *next = heap->free_list.bins[i].head; ++ while (next) ++ { ++ if (!next->in_tree) ++ { ++ // Find the first element not in-tree. ++ elem = next; ++ break; ++ } ++ next = next->next; ++ } ++ } ++ ++ // TODO(acomminos): acquire a separate lock for the free tree here. ++ if (!elem) ++ { ++ LeaveCriticalSection(&heap->temp_lock); ++ return E_FAIL; ++ } ++ element_remove_free(heap, elem); ++ ++ // Remove element from free list, we may change its size or offset. ++ coalesced_range = elem->range; ++ ++ // TODO(acomminos): implement lower_bound, upper_bound. ++ // we don't have to allocate a new elem here, this sentry ++ // is just so I can get this proof of concept out the door. ++ ++ if (wine_rb_put(&heap->free_tree, &elem->range.offset, &elem->entry) == -1) ++ { ++ LeaveCriticalSection(&heap->temp_lock); ++ return E_FAIL; ++ } ++ ++ // Coalesce left. ++ entry = wine_rb_prev(&elem->entry); ++ if (entry) ++ { ++ TRACE("Coalesced left.\n"); ++ struct wined3d_buffer_heap_element *left_elem = WINE_RB_ENTRY_VALUE(entry, struct wined3d_buffer_heap_element, entry); ++ if (left_elem->range.offset + left_elem->range.size == coalesced_range.offset) ++ { ++ coalesced_range.offset = left_elem->range.offset; ++ coalesced_range.size = coalesced_range.size + left_elem->range.size; ++ ++ element_remove_free(heap, left_elem); ++ HeapFree(GetProcessHeap(), 0, left_elem); ++ } ++ } ++ ++ // Coalesce right. ++ entry = wine_rb_next(&elem->entry); ++ if (entry) ++ { ++ TRACE("Coalesced right.\n"); ++ struct wined3d_buffer_heap_element *right_elem = WINE_RB_ENTRY_VALUE(entry, struct wined3d_buffer_heap_element, entry); ++ if (elem->range.offset + elem->range.size == right_elem->range.offset) ++ { ++ coalesced_range.size += right_elem->range.size; ++ ++ element_remove_free(heap, right_elem); ++ HeapFree(GetProcessHeap(), 0, right_elem); ++ } ++ } ++ ++ wine_rb_remove(&heap->free_tree, &elem->entry); ++ ++ if (coalesced_range.size > elem->range.size) ++ FIXME_(d3d_perf)("Coalesced out an extra %lld bytes\n", coalesced_range.size - elem->range.size); ++ ++ // Update with coalesced range. ++ elem->range = coalesced_range; ++ ++ if (coalesced_size) ++ *coalesced_size = coalesced_range.size; ++ ++ element_insert_free_bin(heap, elem); ++ element_insert_free_tree(heap, elem); ++ ++ LeaveCriticalSection(&heap->temp_lock); ++ return WINED3D_OK; ++} +diff --git a/dlls/wined3d/cs.c b/dlls/wined3d/cs.c +index bae5d9f4a1..8fd9b01a36 100644 +--- a/dlls/wined3d/cs.c ++++ b/dlls/wined3d/cs.c +@@ -2644,10 +2644,6 @@ static void wined3d_cs_exec_discard_buffer(struct wined3d_cs *cs, const void *da + struct wined3d_buffer *buffer = op->buffer; + HRESULT hr; + +- // Poll for discarded buffers whose fenced have been triggered here to avoid +- // excessive VRAM consumption. +- wined3d_buffer_heap_cs_poll_fences(buffer->buffer_heap, cs->device); +- + // TODO(acomminos): should call into buffer.c here instead. + if (FAILED(hr = wined3d_buffer_heap_free_fenced(buffer->buffer_heap, cs->device, buffer->cs_persistent_map))) + { +@@ -2975,6 +2971,14 @@ static void poll_queries(struct wined3d_cs *cs) + list_init(&query->poll_list_entry); + InterlockedIncrement(&query->counter_retrieved); + } ++ ++ // Poll for discarded persistent buffers whose fences have been triggered ++ // here to avoid excessive VRAM consumption. ++ // XXX(acomminos): clean this up, integrate with prior section. ++ if (cs->device->wo_buffer_heap) ++ wined3d_buffer_heap_cs_poll_fences(cs->device->wo_buffer_heap, cs->device); ++ if (cs->device->cb_buffer_heap) ++ wined3d_buffer_heap_cs_poll_fences(cs->device->cb_buffer_heap, cs->device); + } + + static void wined3d_cs_wait_event(struct wined3d_cs *cs) +diff --git a/dlls/wined3d/device.c b/dlls/wined3d/device.c +index bdab83b935..9f300ca572 100644 +--- a/dlls/wined3d/device.c ++++ b/dlls/wined3d/device.c +@@ -848,26 +848,32 @@ static void destroy_default_samplers(struct wined3d_device *device, struct wined + /* Context activation is done by the caller. */ + static void create_buffer_heap(struct wined3d_device *device, struct wined3d_context *context) + { ++ const struct wined3d_gl_info *gl_info = &device->adapter->gl_info; + // TODO(acomminos): check if ARB_buffer_storage is supported, first- + // possibly make wined3d_buffer_heap_create fail. +- // TODO(acomminos): 512MB is sane for geometry, maybe not for PBO. +- const GLsizeiptr HBO_SIZE = min(512000000, device->adapter->vram_bytes / 4); +- const struct wined3d_gl_info *gl_info = &device->adapter->gl_info; ++ // TODO(acomminos): kill this magic number. perhaps base on vram. ++ GLsizeiptr geo_heap_size = 512 * 1024 * 1024; ++ GLsizeiptr cb_heap_size = 256 * 1024 * 1024; + + GLint ub_alignment; + gl_info->gl_ops.gl.p_glGetIntegerv(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT, &ub_alignment); + ++ // Align constant buffer heap size, in case GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT isn't a power of two (for some reason). ++ cb_heap_size -= cb_heap_size % ub_alignment; ++ + HRESULT hr; +- if (FAILED(hr = wined3d_buffer_heap_create(context, HBO_SIZE, 0, TRUE, &device->wo_buffer_heap))) ++ if (FAILED(hr = wined3d_buffer_heap_create(context, geo_heap_size, 0, TRUE, &device->wo_buffer_heap))) + { + ERR("Failed to create write-only persistent buffer heap, hr %#x.\n", hr); + } + + // TODO(acomminos): can likely use a way smaller heap for CBs by querying limits +- if (FAILED(hr = wined3d_buffer_heap_create(context, HBO_SIZE, ub_alignment, TRUE, &device->cb_buffer_heap))) ++ if (FAILED(hr = wined3d_buffer_heap_create(context, cb_heap_size, ub_alignment, TRUE, &device->cb_buffer_heap))) + { + ERR("Failed to create persistent buffer heap for constant buffers, hr %#x.\n", hr); + } ++ ++ FIXME("Initialized wine-pba (geo_heap_size: %lld, cb_heap_size: %lld, ub_align: %d)\n", geo_heap_size, cb_heap_size, ub_alignment); + } + + /* Context activation is done by the caller. */ +diff --git a/dlls/wined3d/wined3d_private.h b/dlls/wined3d/wined3d_private.h +index 14cad92f0f..3011609ee1 100644 +--- a/dlls/wined3d/wined3d_private.h ++++ b/dlls/wined3d/wined3d_private.h +@@ -3667,11 +3667,21 @@ enum wined3d_buffer_conversion_type + + struct wined3d_buffer_heap_element; + struct wined3d_buffer_heap_fenced_element; +-struct wined3d_buffer_heap_range; + + // Number of power-of-two buckets to populate. + #define WINED3D_BUFFER_HEAP_BINS 32 + ++struct wined3d_buffer_heap_bin ++{ ++ struct wined3d_buffer_heap_element *head; ++ struct wined3d_buffer_heap_element *tail; ++}; ++ ++struct wined3d_buffer_heap_bin_set ++{ ++ struct wined3d_buffer_heap_bin bins[WINED3D_BUFFER_HEAP_BINS]; ++}; ++ + // A heap that manages allocations with a single GL buffer. + struct wined3d_buffer_heap + { +@@ -3680,11 +3690,11 @@ struct wined3d_buffer_heap + GLsizeiptr alignment; + CRITICAL_SECTION temp_lock; // Temporary lock while we implement the fenced free list. + +- struct wined3d_buffer_heap_element *free_bins[WINED3D_BUFFER_HEAP_BINS]; ++ struct wined3d_buffer_heap_bin_set free_list; + struct wine_rb_tree free_tree; // Free regions keyed on their base address. + + // Elements that need to be fenced, but haven't reached the required size. +- struct wined3d_buffer_heap_range *pending_fenced_head; ++ struct wined3d_buffer_heap_bin_set pending_fenced_bins; + GLsizeiptr pending_fenced_bytes; // Number of free bytes in the active fenced region. + GLsizeiptr pending_fenced_threshold_bytes; // Number of bytes required before fencing. + +@@ -3696,6 +3706,7 @@ struct wined3d_buffer_heap + HRESULT wined3d_buffer_heap_create(struct wined3d_context *context, GLsizeiptr size, GLsizeiptr alignment, BOOL write_only, struct wined3d_buffer_heap **heap) DECLSPEC_HIDDEN; + HRESULT wined3d_buffer_heap_destroy(struct wined3d_buffer_heap *heap, struct wined3d_context *context) DECLSPEC_HIDDEN; + // Fetches a buffer from the heap of at least the given size. ++// Attempts to coalesce blocks under memory pressure. + HRESULT wined3d_buffer_heap_alloc(struct wined3d_buffer_heap *heap, GLsizeiptr size, struct wined3d_map_range* out_range) DECLSPEC_HIDDEN; + // Immediately frees a heap-allocated buffer segment. + HRESULT wined3d_buffer_heap_free(struct wined3d_buffer_heap *heap, struct wined3d_map_range range) DECLSPEC_HIDDEN; +@@ -3704,6 +3715,11 @@ HRESULT wined3d_buffer_heap_free_fenced(struct wined3d_buffer_heap *heap, struct + // Moves a buffers with a signaled fence from the fenced list to the free list. + // Must be executed on the CS thread. + HRESULT wined3d_buffer_heap_cs_poll_fences(struct wined3d_buffer_heap *heap, struct wined3d_device *device) DECLSPEC_HIDDEN; ++// Performs deferred coalescing of fenced buffers. To be called when the CS ++// thread is idle, or under memory pressure. ++// Outputs the size of the new coalesced region in `coalesced_size`, or an error ++// if there are no remaining elements to be coalesced. ++HRESULT wined3d_buffer_heap_deferred_coalesce(struct wined3d_buffer_heap *heap, GLsizei *coalesced_size) DECLSPEC_HIDDEN; + + struct wined3d_buffer + { +-- +2.16.2 + diff --git a/30-win32-aliases.conf b/30-win32-aliases.conf new file mode 100644 index 000000000000..99ae1f7b83a5 --- /dev/null +++ b/30-win32-aliases.conf @@ -0,0 +1,20 @@ +<?xml version="1.0"?> +<!DOCTYPE fontconfig SYSTEM "fonts.dtd"> +<fontconfig> + <alias binding="same"> + <family>MS Shell Dlg</family> + <accept><family>Microsoft Sans Serif</family></accept> + <default><family>sans-serif</family></default> + </alias> + <alias binding="same"> + <family>MS Shell Dlg 2</family> + <accept><family>Tahoma</family></accept> + <default><family>sans-serif</family></default> + </alias> + + <alias binding="same"> + <family>MS Sans Serif</family> + <prefer><family>Microsoft Sans Serif</family></prefer> + <default><family>sans-serif</family></default> + </alias> +</fontconfig> diff --git a/PKGBUILD b/PKGBUILD new file mode 100644 index 000000000000..82b86bc8e7d2 --- /dev/null +++ b/PKGBUILD @@ -0,0 +1,219 @@ +# $Id$ +# Maintainer: Stefan Schmidt <thrimbor.github@gmail.com> +# Contributor: Felix Yan <felixonmars@archlinux.org> +# Contributor: Sven-Hendrik Haase <sh@lutzhaase.com> +# Contributor: Jan "heftig" Steffens <jan.steffens@gmail.com> +# Contributor: Eduardo Romero <eduardo@archlinux.org> +# Contributor: Giovanni Scafora <giovanni@archlinux.org> + +pkgname=wine-staging-pba +pkgver=2.21 +pkgrel=1 + +_pkgbasever=${pkgver/rc/-rc} + +source=("https://github.com/wine-compholio/wine-patched/archive/staging-$_pkgbasever.tar.gz" + harmony-fix.diff + 30-win32-aliases.conf + "0001-wined3d-Implement-a-simple-heap-allocator-backed-by-.patch" + "0002-wined3d-Allocate-global-write-only-persistent-buffer.patch" + "0003-wined3d-Add-support-for-persistently-mapped-wined3d_.patch" + "0004-wined3d-Implement-aligned-persistent-heaps-for-persi.patch" + "0005-wined3d-Experimental-buffer-heap-fence-batching-uppe.patch" + "0006-wined3d-Switch-wined3d_buffer_heap-to-be-backed-by-a.patch" + "0007-wined3d-Add-segregated-free-bins-to-complement-rbtre.patch" + "0008-wined3d-Implement-lazy-free-using-a-deferred-free-li.patch") + +sha512sums=('4e3fe2eb81360bfa095194ab5b9647636cbeac0dc3955e6a3ee26062f650c66a4bd2353a1cd8063f9b7c65a6bcc1f892cc7c1d0f00c3c8525a124ec2109d1e86' + 'b86edf07bfc560f403fdfd5a71f97930ee2a4c3f76c92cc1a0dbb2e107be9db3bed3a727a0430d8a049583c63dd11f5d4567fb7aa69b193997c6da241acc4f2e' + '6e54ece7ec7022b3c9d94ad64bdf1017338da16c618966e8baf398e6f18f80f7b0576edf1d1da47ed77b96d577e4cbb2bb0156b0b11c183a0accf22654b0a2bb' + 'b9141fbe8f5189eb46c83b178497f9ee6d1f2daec3009877557ba28e5f2ce6d818cfef5b0eba15c1e9e4c50dd5950486f8091793d704ec532f82155056725e71' + '8e112e25392fb2bd035c4b8792e43ad86bf81b1c24ff429ff8943a2c02ee761fc25446791475e4115e6b03f50cdb4cf6a8f128cc770c3941b59ee1dfbe79137b' + '7335797924f1c4403a438ccfe36f8a650ddf8271d33ca962e270cf34762170038017cd53cad35f1ad61128f2c496edb68791783259df33cb997a73959136bdc0' + '52ebb56c6adfbef526d2db19618f9155084dacd7600d166f04ba5423c63a4294294589d675c391e577330f1b68755bb5d3b6a2cd3006902269cb73140973dba3' + 'd326b8da8fb02462bac178a23e18f5468de62780717c24eadb453201b2b6b6439d2be7dda38e40f24fdc570dd5bc54102e7bf05868c53b17b27f6b9a06fccdb0' + '04b41d4198138dbfe1399e7ed1e406fb265472d08a3e4de3c5c8584574b167613c598d7fa397c6944b809a96f699a4447694291296fa01a8e07b8ea96026ed2f' + '9f90b7adc0ed87daac0f453caf2fff8b338061d96a9cd890f305704f9b22581232c6a207eb9eb1670c69b083caa780a6e44280df47c95b4e6e8e73f046f7c8a5' + '8fd8d2e262327e78dad69186ebf091dbc034fab2675f0be91df75c88ae6f5e5ae6f456a2098c460861946390ce139e998f4b0f77e33671c8a7062a5e06b6e4ca') + +pkgdesc="A compatibility layer for running Windows programs - Staging branch" +url="http://www.wine-staging.com" +arch=(x86_64) +options=(staticlibs) +license=(LGPL) + +_depends=( + attr lib32-attr + fontconfig lib32-fontconfig + lcms2 lib32-lcms2 + libxml2 lib32-libxml2 + libxcursor lib32-libxcursor + libxrandr lib32-libxrandr + libxdamage lib32-libxdamage + libxi lib32-libxi + gettext lib32-gettext + freetype2 lib32-freetype2 + glu lib32-glu + libsm lib32-libsm + gcc-libs lib32-gcc-libs + libpcap lib32-libpcap + desktop-file-utils +) + +makedepends=(autoconf ncurses bison perl fontforge flex + 'gcc>=4.5.0-2' + giflib lib32-giflib + libpng lib32-libpng + gnutls lib32-gnutls + libxinerama lib32-libxinerama + libxcomposite lib32-libxcomposite + libxmu lib32-libxmu + libxxf86vm lib32-libxxf86vm + libldap lib32-libldap + mpg123 lib32-mpg123 + openal lib32-openal + v4l-utils lib32-v4l-utils + alsa-lib lib32-alsa-lib + libxcomposite lib32-libxcomposite + mesa lib32-mesa + mesa-libgl lib32-mesa-libgl + opencl-icd-loader lib32-opencl-icd-loader + libxslt lib32-libxslt + libpulse lib32-libpulse + libva lib32-libva + gtk3 lib32-gtk3 + gst-plugins-base-libs lib32-gst-plugins-base-libs + samba + opencl-headers +) + +optdepends=( + giflib lib32-giflib + libpng lib32-libpng + libldap lib32-libldap + gnutls lib32-gnutls + mpg123 lib32-mpg123 + openal lib32-openal + v4l-utils lib32-v4l-utils + libpulse lib32-libpulse + alsa-plugins lib32-alsa-plugins + alsa-lib lib32-alsa-lib + libjpeg-turbo lib32-libjpeg-turbo + libxcomposite lib32-libxcomposite + libxinerama lib32-libxinerama + ncurses lib32-ncurses + opencl-icd-loader lib32-opencl-icd-loader + libxslt lib32-libxslt + libva lib32-libva + gtk3 lib32-gtk3 + gst-plugins-base-libs lib32-gst-plugins-base-libs + vulkan-icd-loader lib32-vulkan-icd-loader + cups + samba dosbox +) + +if [[ $CARCH == i686 ]]; then + # Strip lib32 etc. on i686 + _depends=(${_depends[@]/*32-*/}) + makedepends=(${makedepends[@]/*32-*/} ${_depends[@]}) + makedepends=(${makedepends[@]/*-multilib*/}) + optdepends=(${optdepends[@]/*32-*/}) + provides=("wine=$pkgver") + conflicts=('wine' 'wine-staging') +else + makedepends=(${makedepends[@]} ${_depends[@]}) + provides=("wine=$pkgver" "wine-wow64=$pkgver") + conflicts=('wine' 'wine-wow64' 'wine-staging') +fi + +prepare() { + # Allow ccache to work + mv wine-patched-staging-$_pkgbasever $pkgname + + # https://bugs.winehq.org/show_bug.cgi?id=43530 + export CFLAGS="${CFLAGS/-fno-plt/}" + export LDFLAGS="${LDFLAGS/,-z,now/}" + + patch -d $pkgname -Np1 < harmony-fix.diff + + patch -d $pkgname -Np1 < 0001-wined3d-Implement-a-simple-heap-allocator-backed-by-.patch + patch -d $pkgname -Np1 < 0002-wined3d-Allocate-global-write-only-persistent-buffer.patch + patch -d $pkgname -Np1 < 0003-wined3d-Add-support-for-persistently-mapped-wined3d_.patch + patch -d $pkgname -Np1 < 0004-wined3d-Implement-aligned-persistent-heaps-for-persi.patch + patch -d $pkgname -Np1 < 0005-wined3d-Experimental-buffer-heap-fence-batching-uppe.patch + patch -d $pkgname -Np1 < 0006-wined3d-Switch-wined3d_buffer_heap-to-be-backed-by-a.patch + patch -d $pkgname -Np1 < 0007-wined3d-Add-segregated-free-bins-to-complement-rbtre.patch + patch -d $pkgname -Np1 < 0008-wined3d-Implement-lazy-free-using-a-deferred-free-li.patch + + sed 's|OpenCL/opencl.h|CL/opencl.h|g' -i $pkgname/configure* + + # Get rid of old build dirs + rm -rf $pkgname-{32,64}-build + mkdir $pkgname-32-build +} + +build() { + cd "$srcdir" + + if [[ $CARCH == x86_64 ]]; then + msg2 "Building Wine-64..." + + mkdir $pkgname-64-build + cd "$srcdir/$pkgname-64-build" + ../$pkgname/configure \ + --prefix=/usr \ + --libdir=/usr/lib \ + --with-x \ + --with-gstreamer \ + --enable-win64 \ + --with-xattr + + make + + _wine32opts=( + --libdir=/usr/lib32 + --with-wine64="$srcdir/$pkgname-64-build" + ) + + export PKG_CONFIG_PATH="/usr/lib32/pkgconfig" + fi + + msg2 "Building Wine-32..." + cd "$srcdir/$pkgname-32-build" + ../$pkgname/configure \ + --prefix=/usr \ + --with-x \ + --with-gstreamer \ + --with-xattr \ + "${_wine32opts[@]}" + + make +} + +package() { + depends=(${_depends[@]}) + + msg2 "Packaging Wine-32..." + cd "$srcdir/$pkgname-32-build" + + if [[ $CARCH == i686 ]]; then + make prefix="$pkgdir/usr" install + else + make prefix="$pkgdir/usr" \ + libdir="$pkgdir/usr/lib32" \ + dlldir="$pkgdir/usr/lib32/wine" install + + msg2 "Packaging Wine-64..." + cd "$srcdir/$pkgname-64-build" + make prefix="$pkgdir/usr" \ + libdir="$pkgdir/usr/lib" \ + dlldir="$pkgdir/usr/lib/wine" install + fi + + # Font aliasing settings for Win32 applications + install -d "$pkgdir"/etc/fonts/conf.{avail,d} + install -m644 "$srcdir/30-win32-aliases.conf" "$pkgdir/etc/fonts/conf.avail" + ln -s ../conf.avail/30-win32-aliases.conf "$pkgdir/etc/fonts/conf.d/30-win32-aliases.conf" +} + +# vim:set ts=8 sts=2 sw=2 et: diff --git a/harmony-fix.diff b/harmony-fix.diff new file mode 100644 index 000000000000..fe0c8c929d4a --- /dev/null +++ b/harmony-fix.diff @@ -0,0 +1,63 @@ +diff -u -r wine/dlls/gdi32/freetype.c wine-ft281/dlls/gdi32/freetype.c +--- wine/dlls/gdi32/freetype.c 2017-10-04 18:01:36.000000000 +0200 ++++ wine-ft281/dlls/gdi32/freetype.c 2017-10-10 10:29:17.506632615 +0200 +@@ -996,18 +996,23 @@ + + static BOOL is_subpixel_rendering_enabled( void ) + { +-#ifdef FT_LCD_FILTER_H + static int enabled = -1; + if (enabled == -1) + { +- enabled = (pFT_Library_SetLcdFilter && +- pFT_Library_SetLcdFilter( NULL, 0 ) != FT_Err_Unimplemented_Feature); ++ /* >= 2.8.1 provides LCD rendering without filters */ ++ if (FT_Version.major > 2 || ++ FT_Version.major == 2 && FT_Version.minor > 8 || ++ FT_Version.major == 2 && FT_Version.minor == 8 && FT_Version.patch >= 1) ++ enabled = TRUE; ++#ifdef FT_LCD_FILTER_H ++ else if (pFT_Library_SetLcdFilter && ++ pFT_Library_SetLcdFilter( NULL, 0 ) != FT_Err_Unimplemented_Feature) ++ enabled = TRUE; ++#endif ++ else enabled = FALSE; + TRACE("subpixel rendering is %senabled\n", enabled ? "" : "NOT "); + } + return enabled; +-#else +- return FALSE; +-#endif + } + + +@@ -7271,7 +7276,6 @@ + case WINE_GGO_HBGR_BITMAP: + case WINE_GGO_VRGB_BITMAP: + case WINE_GGO_VBGR_BITMAP: +-#ifdef FT_LCD_FILTER_H + { + switch (ft_face->glyph->format) + { +@@ -7357,8 +7361,11 @@ + if ( needsTransform ) + pFT_Outline_Transform (&ft_face->glyph->outline, &transMatTategaki); + ++#ifdef FT_LCD_FILTER_H + if ( pFT_Library_SetLcdFilter ) + pFT_Library_SetLcdFilter( library, FT_LCD_FILTER_DEFAULT ); ++#endif ++ + pFT_Render_Glyph (ft_face->glyph, render_mode); + + src = ft_face->glyph->bitmap.buffer; +@@ -7439,9 +7446,6 @@ + + break; + } +-#else +- return GDI_ERROR; +-#endif + + case GGO_NATIVE: + { |