From d64aafc0ede047d1eff93ee7aa77411c0d27aa21 Mon Sep 17 00:00:00 2001 From: Andrew Comminos Date: Mon, 5 Mar 2018 20:28:34 -0800 Subject: [PATCH 3/5] wined3d: Use ARB_multi_bind to speed up UBO updates. More frequent UBO remaps as a result of the persistent buffer allocator causes glBindBufferRange to be a bottleneck. Using ARB_multi_bind massively reduces state change overhead. --- dlls/wined3d/directx.c | 4 ++++ dlls/wined3d/state.c | 46 +++++++++++++++++++++++++++++++++++++++------- dlls/wined3d/wined3d_gl.h | 1 + 3 files changed, 44 insertions(+), 7 deletions(-) diff --git a/dlls/wined3d/directx.c b/dlls/wined3d/directx.c index 46c6a59536..8789a501ec 100644 --- a/dlls/wined3d/directx.c +++ b/dlls/wined3d/directx.c @@ -149,6 +149,7 @@ static const struct wined3d_extension_map gl_extension_map[] = {"GL_ARB_internalformat_query2", ARB_INTERNALFORMAT_QUERY2 }, {"GL_ARB_map_buffer_alignment", ARB_MAP_BUFFER_ALIGNMENT }, {"GL_ARB_map_buffer_range", ARB_MAP_BUFFER_RANGE }, + {"GL_ARB_multi_bind", ARB_MULTI_BIND }, {"GL_ARB_multisample", ARB_MULTISAMPLE }, {"GL_ARB_multitexture", ARB_MULTITEXTURE }, {"GL_ARB_occlusion_query", ARB_OCCLUSION_QUERY }, @@ -2796,6 +2797,8 @@ static void load_gl_funcs(struct wined3d_gl_info *gl_info) /* GL_ARB_map_buffer_range */ USE_GL_FUNC(glFlushMappedBufferRange) USE_GL_FUNC(glMapBufferRange) + /* GL_ARB_multi_bind */ + USE_GL_FUNC(glBindBuffersRange) /* GL_ARB_multisample */ USE_GL_FUNC(glSampleCoverageARB) /* GL_ARB_multitexture */ @@ -3973,6 +3976,7 @@ static BOOL wined3d_adapter_init_gl_caps(struct wined3d_adapter *adapter, {ARB_TEXTURE_VIEW, MAKEDWORD_VERSION(4, 3)}, {ARB_CLEAR_TEXTURE, MAKEDWORD_VERSION(4, 4)}, + {ARB_MULTI_BIND, MAKEDWORD_VERSION(4, 4)}, {ARB_CLIP_CONTROL, MAKEDWORD_VERSION(4, 5)}, {ARB_CULL_DISTANCE, MAKEDWORD_VERSION(4, 5)}, diff --git a/dlls/wined3d/state.c b/dlls/wined3d/state.c index 6f7805b8bd..4d0718514f 100644 --- a/dlls/wined3d/state.c +++ b/dlls/wined3d/state.c @@ -5014,19 +5014,51 @@ static void state_cb(struct wined3d_context *context, const struct wined3d_state shader_type = WINED3D_SHADER_TYPE_COMPUTE; wined3d_gl_limits_get_uniform_block_range(&gl_info->limits, shader_type, &base, &count); - for (i = 0; i < count; ++i) + + if (gl_info->supported[ARB_MULTI_BIND]) { - buffer = state->cb[shader_type][i]; - if (buffer) + GLuint buffer_objects[count]; + GLsizeiptr buffer_offsets[count]; + GLsizeiptr buffer_sizes[count]; + + for (i = 0; i < count; ++i) { - wined3d_buffer_get_memory(buffer, &bo_addr, buffer->locations); - GL_EXTCALL(glBindBufferRange(GL_UNIFORM_BUFFER, base + i, bo_addr.buffer_object, bo_addr.addr, bo_addr.length)); + buffer = state->cb[shader_type][i]; + if (buffer) + { + wined3d_buffer_get_memory(buffer, &bo_addr, buffer->locations); + buffer_objects[i] = bo_addr.buffer_object; + buffer_offsets[i] = bo_addr.addr; + buffer_sizes[i] = bo_addr.length; + } + else + { + buffer_objects[i] = buffer_offsets[i] = 0; + // The ARB_multi_bind spec states that an error may be thrown if + // `size` is less than or equal to zero, Thus, we specify a size for + // unused buffers anyway. + buffer_sizes[i] = 1; + } } - else + GL_EXTCALL(glBindBuffersRange(GL_UNIFORM_BUFFER, base, count, buffer_objects, buffer_offsets, buffer_sizes)); + } + else + { + for (i = 0; i < count; ++i) { - GL_EXTCALL(glBindBufferBase(GL_UNIFORM_BUFFER, base + i, 0)); + buffer = state->cb[shader_type][i]; + if (buffer) + { + wined3d_buffer_get_memory(buffer, &bo_addr, buffer->locations); + GL_EXTCALL(glBindBufferRange(GL_UNIFORM_BUFFER, base + i, bo_addr.buffer_object, bo_addr.addr, bo_addr.length)); + } + else + { + GL_EXTCALL(glBindBufferBase(GL_UNIFORM_BUFFER, base + i, 0)); + } } } + checkGLcall("bind constant buffers"); } diff --git a/dlls/wined3d/wined3d_gl.h b/dlls/wined3d/wined3d_gl.h index 7626864ef2..25c3301c94 100644 --- a/dlls/wined3d/wined3d_gl.h +++ b/dlls/wined3d/wined3d_gl.h @@ -82,6 +82,7 @@ enum wined3d_gl_extension ARB_INTERNALFORMAT_QUERY2, ARB_MAP_BUFFER_ALIGNMENT, ARB_MAP_BUFFER_RANGE, + ARB_MULTI_BIND, ARB_MULTISAMPLE, ARB_MULTITEXTURE, ARB_OCCLUSION_QUERY, -- 2.16.2