diff options
author | Milan Stastny | 2021-08-15 00:12:32 +0200 |
---|---|---|
committer | Milan Stastny | 2021-08-15 00:12:32 +0200 |
commit | ef44e52829dd5da6d9c514af4ddb37ea6896e764 (patch) | |
tree | c8bc13bc0079fe29cf1dd3af943e59e3f617fa98 /v6-2-2-BZ-17645-fix-slow-DSO-sorting-behavior-in-dynamic-loader----Algorithm-changes.patch | |
parent | 9fd6bc6df6000c6f0b8df229b18b1ffd37f0fab3 (diff) | |
download | aur-ef44e52829dd5da6d9c514af4ddb37ea6896e764.tar.gz |
Update to patch v6
Diffstat (limited to 'v6-2-2-BZ-17645-fix-slow-DSO-sorting-behavior-in-dynamic-loader----Algorithm-changes.patch')
-rw-r--r-- | v6-2-2-BZ-17645-fix-slow-DSO-sorting-behavior-in-dynamic-loader----Algorithm-changes.patch | 456 |
1 files changed, 456 insertions, 0 deletions
diff --git a/v6-2-2-BZ-17645-fix-slow-DSO-sorting-behavior-in-dynamic-loader----Algorithm-changes.patch b/v6-2-2-BZ-17645-fix-slow-DSO-sorting-behavior-in-dynamic-loader----Algorithm-changes.patch new file mode 100644 index 000000000000..c83a2fb02c17 --- /dev/null +++ b/v6-2-2-BZ-17645-fix-slow-DSO-sorting-behavior-in-dynamic-loader----Algorithm-changes.patch @@ -0,0 +1,456 @@ +diff --git a/elf/dl-close.c b/elf/dl-close.c +index f39001cab9..a55b3a00fa 100644 +--- a/elf/dl-close.c ++++ b/elf/dl-close.c +@@ -167,8 +167,6 @@ _dl_close_worker (struct link_map *map, bool force) + + bool any_tls = false; + const unsigned int nloaded = ns->_ns_nloaded; +- char used[nloaded]; +- char done[nloaded]; + struct link_map *maps[nloaded]; + + /* Run over the list and assign indexes to the link maps and enter +@@ -176,24 +174,21 @@ _dl_close_worker (struct link_map *map, bool force) + int idx = 0; + for (struct link_map *l = ns->_ns_loaded; l != NULL; l = l->l_next) + { ++ l->l_map_used = 0; ++ l->l_map_done = 0; + l->l_idx = idx; + maps[idx] = l; + ++idx; +- + } + assert (idx == nloaded); + +- /* Prepare the bitmaps. */ +- memset (used, '\0', sizeof (used)); +- memset (done, '\0', sizeof (done)); +- + /* Keep track of the lowest index link map we have covered already. */ + int done_index = -1; + while (++done_index < nloaded) + { + struct link_map *l = maps[done_index]; + +- if (done[done_index]) ++ if (l->l_map_done) + /* Already handled. */ + continue; + +@@ -204,12 +199,12 @@ _dl_close_worker (struct link_map *map, bool force) + /* See CONCURRENCY NOTES in cxa_thread_atexit_impl.c to know why + acquire is sufficient and correct. */ + && atomic_load_acquire (&l->l_tls_dtor_count) == 0 +- && !used[done_index]) ++ && !l->l_map_used) + continue; + + /* We need this object and we handle it now. */ +- done[done_index] = 1; +- used[done_index] = 1; ++ l->l_map_used = 1; ++ l->l_map_done = 1; + /* Signal the object is still needed. */ + l->l_idx = IDX_STILL_USED; + +@@ -225,9 +220,9 @@ _dl_close_worker (struct link_map *map, bool force) + { + assert ((*lp)->l_idx >= 0 && (*lp)->l_idx < nloaded); + +- if (!used[(*lp)->l_idx]) ++ if (!(*lp)->l_map_used) + { +- used[(*lp)->l_idx] = 1; ++ (*lp)->l_map_used = 1; + /* If we marked a new object as used, and we've + already processed it, then we need to go back + and process again from that point forward to +@@ -250,9 +245,9 @@ _dl_close_worker (struct link_map *map, bool force) + { + assert (jmap->l_idx >= 0 && jmap->l_idx < nloaded); + +- if (!used[jmap->l_idx]) ++ if (!jmap->l_map_used) + { +- used[jmap->l_idx] = 1; ++ jmap->l_map_used = 1; + if (jmap->l_idx - 1 < done_index) + done_index = jmap->l_idx - 1; + } +@@ -262,8 +257,7 @@ _dl_close_worker (struct link_map *map, bool force) + + /* Sort the entries. We can skip looking for the binary itself which is + at the front of the search list for the main namespace. */ +- _dl_sort_maps (maps + (nsid == LM_ID_BASE), nloaded - (nsid == LM_ID_BASE), +- used + (nsid == LM_ID_BASE), true); ++ _dl_sort_maps (maps, nloaded, (nsid == LM_ID_BASE), true); + + /* Call all termination functions at once. */ + #ifdef SHARED +@@ -280,7 +274,7 @@ _dl_close_worker (struct link_map *map, bool force) + /* All elements must be in the same namespace. */ + assert (imap->l_ns == nsid); + +- if (!used[i]) ++ if (!imap->l_map_used) + { + assert (imap->l_type == lt_loaded && !imap->l_nodelete_active); + +@@ -333,7 +327,7 @@ _dl_close_worker (struct link_map *map, bool force) + if (i < first_loaded) + first_loaded = i; + } +- /* Else used[i]. */ ++ /* Else imap->l_map_used. */ + else if (imap->l_type == lt_loaded) + { + struct r_scope_elem *new_list = NULL; +@@ -557,7 +551,7 @@ _dl_close_worker (struct link_map *map, bool force) + for (unsigned int i = first_loaded; i < nloaded; ++i) + { + struct link_map *imap = maps[i]; +- if (!used[i]) ++ if (!imap->l_map_used) + { + assert (imap->l_type == lt_loaded); + +diff --git a/elf/dl-deps.c b/elf/dl-deps.c +index 087a49b212..237d9636c5 100644 +--- a/elf/dl-deps.c ++++ b/elf/dl-deps.c +@@ -613,10 +613,9 @@ Filters not supported with LD_TRACE_PRELINKING")); + + /* If libc.so.6 is the main map, it participates in the sort, so + that the relocation order is correct regarding libc.so.6. */ +- if (l_initfini[0] == GL (dl_ns)[l_initfini[0]->l_ns].libc_map) +- _dl_sort_maps (l_initfini, nlist, NULL, false); +- else +- _dl_sort_maps (&l_initfini[1], nlist - 1, NULL, false); ++ _dl_sort_maps (l_initfini, nlist, ++ (l_initfini[0] != GL (dl_ns)[l_initfini[0]->l_ns].libc_map), ++ false); + + /* Terminate the list of dependencies. */ + l_initfini[nlist] = NULL; +diff --git a/elf/dl-fini.c b/elf/dl-fini.c +index 6dbdfe4b3e..c683884c35 100644 +--- a/elf/dl-fini.c ++++ b/elf/dl-fini.c +@@ -92,8 +92,7 @@ _dl_fini (void) + /* Now we have to do the sorting. We can skip looking for the + binary itself which is at the front of the search list for + the main namespace. */ +- _dl_sort_maps (maps + (ns == LM_ID_BASE), nmaps - (ns == LM_ID_BASE), +- NULL, true); ++ _dl_sort_maps (maps, nmaps, (ns == LM_ID_BASE), true); + + /* We do not rely on the linked list of loaded object anymore + from this point on. We have our own list here (maps). The +diff --git a/elf/dl-sort-maps.c b/elf/dl-sort-maps.c +index d21770267a..91c5009ac4 100644 +--- a/elf/dl-sort-maps.c ++++ b/elf/dl-sort-maps.c +@@ -16,16 +16,24 @@ + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + ++#include <assert.h> + #include <ldsodefs.h> ++#include <elf/dl-tunables.h> + ++/* Note: this is the older, "original" sorting algorithm, being used as ++ default up to 2.32. + +-/* Sort array MAPS according to dependencies of the contained objects. +- Array USED, if non-NULL, is permutated along MAPS. If FOR_FINI this is +- called for finishing an object. */ +-void +-_dl_sort_maps (struct link_map **maps, unsigned int nmaps, char *used, +- bool for_fini) ++ Sort array MAPS according to dependencies of the contained objects. ++ If FOR_FINI is true, this is called for finishing an object. */ ++static void ++_dl_sort_maps_original (struct link_map **maps, unsigned int nmaps, ++ unsigned int skip, bool for_fini) + { ++ /* Allows caller to do the common optimization of skipping the first map, ++ usually the main binary. */ ++ maps += skip; ++ nmaps -= skip; ++ + /* A list of one element need not be sorted. */ + if (nmaps <= 1) + return; +@@ -66,14 +74,6 @@ _dl_sort_maps (struct link_map **maps, unsigned int nmaps, char *used, + (k - i) * sizeof (maps[0])); + maps[k] = thisp; + +- if (used != NULL) +- { +- char here_used = used[i]; +- memmove (&used[i], &used[i + 1], +- (k - i) * sizeof (used[0])); +- used[k] = here_used; +- } +- + if (seen[i + 1] > nmaps - i) + { + ++i; +@@ -120,3 +120,183 @@ _dl_sort_maps (struct link_map **maps, unsigned int nmaps, char *used, + next:; + } + } ++ ++#if !HAVE_TUNABLES ++/* In this case, just default to the original algorithm. */ ++strong_alias (_dl_sort_maps_original, _dl_sort_maps); ++#else ++ ++/* We use a recursive function due to its better clarity and ease of ++ implementation, as well as faster execution speed. We already use ++ alloca() for list allocation during the breadth-first search of ++ dependencies in _dl_map_object_deps(), and this should be on the ++ same order of worst-case stack usage. ++ ++ Note: the '*rpo' parameter is supposed to point to one past the ++ last element of the array where we save the sort results, and is ++ decremented before storing the current map at each level. */ ++ ++static void ++dfs_traversal (struct link_map ***rpo, struct link_map *map, ++ bool *do_reldeps) ++{ ++ if (map->l_visited) ++ return; ++ ++ map->l_visited = 1; ++ ++ if (map->l_initfini) ++ { ++ for (int i = 0; map->l_initfini[i] != NULL; i++) ++ { ++ struct link_map *dep = map->l_initfini[i]; ++ if (dep->l_visited == 0 ++ && dep->l_main_map == 0) ++ dfs_traversal (rpo, dep, do_reldeps); ++ } ++ } ++ ++ if (__glibc_unlikely (do_reldeps != NULL && map->l_reldeps != NULL)) ++ { ++ /* Indicate that we encountered relocation dependencies during ++ traversal. */ ++ *do_reldeps = true; ++ ++ for (int m = map->l_reldeps->act - 1; m >= 0; m--) ++ { ++ struct link_map *dep = map->l_reldeps->list[m]; ++ if (dep->l_visited == 0 ++ && dep->l_main_map == 0) ++ dfs_traversal (rpo, dep, do_reldeps); ++ } ++ } ++ ++ *rpo -= 1; ++ **rpo = map; ++} ++ ++/* Topologically sort array MAPS according to dependencies of the contained ++ objects. */ ++ ++static void ++_dl_sort_maps_dfs (struct link_map **maps, unsigned int nmaps, ++ unsigned int skip __attribute__ ((unused)), bool for_fini) ++{ ++ for (int i = nmaps - 1; i >= 0; i--) ++ maps[i]->l_visited = 0; ++ ++ /* We apply DFS traversal for each of maps[i] until the whole total order ++ is found and we're at the start of the Reverse-Postorder (RPO) sequence, ++ which is a topological sort. ++ ++ We go from maps[nmaps - 1] backwards towards maps[0] at this level. ++ Due to the breadth-first search (BFS) ordering we receive, going ++ backwards usually gives a more shallow depth-first recursion depth, ++ adding more stack usage safety. Also, combined with the natural ++ processing order of l_initfini[] at each node during DFS, this maintains ++ an ordering closer to the original link ordering in the sorting results ++ under most simpler cases. ++ ++ Another reason we order the top level backwards, it that maps[0] is ++ usually exactly the main object of which we're in the midst of ++ _dl_map_object_deps() processing, and maps[0]->l_initfini[] is still ++ blank. If we start the traversal from maps[0], since having no ++ dependencies yet filled in, maps[0] will always be immediately ++ incorrectly placed at the last place in the order (first in reverse). ++ Adjusting the order so that maps[0] is last traversed naturally avoids ++ this problem. ++ ++ Further, the old "optimization" of skipping the main object at maps[0] ++ from the call-site (i.e. _dl_sort_maps(maps+1,nmaps-1)) is in general ++ no longer valid, since traversing along object dependency-links ++ may "find" the main object even when it is not included in the initial ++ order (e.g. a dlopen()'ed shared object can have circular dependencies ++ linked back to itself). In such a case, traversing N-1 objects will ++ create a N-object result, and raise problems. ++ ++ To summarize, just passing in the full list, and iterating from back ++ to front makes things much more straightforward. */ ++ ++ /* Array to hold RPO sorting results, before we copy back to maps[]. */ ++ struct link_map *rpo[nmaps]; ++ ++ /* The 'head' position during each DFS iteration. Note that we start at ++ one past the last element due to first-decrement-then-store (see the ++ bottom of above dfs_traversal() routine). */ ++ struct link_map **rpo_head = &rpo[nmaps]; ++ ++ bool do_reldeps = false; ++ bool *do_reldeps_ref = (for_fini ? &do_reldeps : NULL); ++ ++ for (int i = nmaps - 1; i >= 0; i--) ++ { ++ dfs_traversal (&rpo_head, maps[i], do_reldeps_ref); ++ ++ /* We can break early if all objects are already placed. */ ++ if (rpo_head == rpo) ++ goto end; ++ } ++ assert (rpo_head == rpo); ++ ++ end: ++ /* Here we may do a second pass of sorting, using only l_initfini[] ++ static dependency links. This is avoided if !FOR_FINI or if we didn't ++ find any reldeps in the first DFS traversal. ++ ++ The reason we do this is: while it is unspecified how circular ++ dependencies should be handled, the presumed reasonable behavior is to ++ have destructors to respect static dependency links as much as possible, ++ overriding reldeps if needed. And the first sorting pass, which takes ++ l_initfini/l_reldeps links equally, may not preserve this priority. ++ ++ Hence we do a 2nd sorting pass, taking only DT_NEEDED links into account ++ (see how the do_reldeps argument to dfs_traversal() is NULL below). */ ++ if (do_reldeps) ++ { ++ for (int i = nmaps - 1; i >= 0; i--) ++ rpo[i]->l_visited = 0; ++ ++ struct link_map **maps_head = &maps[nmaps]; ++ for (int i = nmaps - 1; i >= 0; i--) ++ { ++ dfs_traversal (&maps_head, rpo[i], NULL); ++ ++ /* We can break early if all objects are already placed. ++ The below memcpy is not needed in the do_reldeps case here, ++ since we wrote back to maps[] during DFS traversal. */ ++ if (maps_head == maps) ++ return; ++ } ++ assert (maps_head == maps); ++ return; ++ } ++ ++ memcpy (maps, rpo, sizeof (struct link_map *) * nmaps); ++} ++ ++void ++_dl_sort_maps (struct link_map **maps, unsigned int nmaps, ++ unsigned int skip, bool for_fini) ++{ ++ /* Index code for sorting algorithm currently in use. */ ++ static int32_t algorithm = 0; ++ if (__glibc_unlikely (algorithm == 0)) ++ algorithm = TUNABLE_GET (glibc, rtld, dynamic_sort, ++ int32_t, NULL); ++ ++ /* It can be tempting to use a static function pointer to store and call ++ the current selected sorting algorithm routine, but experimentation ++ shows that current processors still do not handle indirect branches ++ that efficiently, plus a static function pointer will involve ++ PTR_MANGLE/DEMANGLE, further impairing performance of small, common ++ input cases. A simple if-case with direct function calls appears to ++ be the fastest. */ ++ if (__glibc_likely (algorithm == 1)) ++ _dl_sort_maps_original (maps, nmaps, skip, for_fini); ++ else if (algorithm == 2) ++ _dl_sort_maps_dfs (maps, nmaps, skip, for_fini); ++ else ++ __builtin_unreachable (); ++} ++ ++#endif /* HAVE_TUNABLES. */ +diff --git a/elf/dl-tunables.list b/elf/dl-tunables.list +index 8ddd4a2314..46ffb23784 100644 +--- a/elf/dl-tunables.list ++++ b/elf/dl-tunables.list +@@ -156,4 +156,13 @@ glibc { + security_level: SXID_IGNORE + } + } ++ ++ rtld { ++ dynamic_sort { ++ type: INT_32 ++ minval: 1 ++ maxval: 2 ++ default: 1 ++ } ++ } + } +diff --git a/elf/rtld.c b/elf/rtld.c +index d733359eaf..abdc13c13a 100644 +--- a/elf/rtld.c ++++ b/elf/rtld.c +@@ -1389,6 +1389,9 @@ dl_main (const ElfW(Phdr) *phdr, + main_map->l_name = (char *) ""; + *user_entry = main_map->l_entry; + ++ /* Set bit indicating this is the main program map. */ ++ main_map->l_main_map = 1; ++ + #ifdef HAVE_AUX_VECTOR + /* Adjust the on-stack auxiliary vector so that it looks like the + binary was executed directly. */ +diff --git a/elf/tst-rtld-list-tunables.exp b/elf/tst-rtld-list-tunables.exp +index 9f66c52885..9bf572715f 100644 +--- a/elf/tst-rtld-list-tunables.exp ++++ b/elf/tst-rtld-list-tunables.exp +@@ -10,5 +10,6 @@ glibc.malloc.tcache_max: 0x0 (min: 0x0, max: 0x[f]+) + glibc.malloc.tcache_unsorted_limit: 0x0 (min: 0x0, max: 0x[f]+) + glibc.malloc.top_pad: 0x0 (min: 0x0, max: 0x[f]+) + glibc.malloc.trim_threshold: 0x0 (min: 0x0, max: 0x[f]+) ++glibc.rtld.dynamic_sort: 1 (min: 1, max: 2) + glibc.rtld.nns: 0x4 (min: 0x1, max: 0x10) + glibc.rtld.optional_static_tls: 0x200 (min: 0x0, max: 0x[f]+) +diff --git a/include/link.h b/include/link.h +index 4af16cb596..50f45db243 100644 +--- a/include/link.h ++++ b/include/link.h +@@ -181,6 +181,11 @@ struct link_map + unsigned int l_init_called:1; /* Nonzero if DT_INIT function called. */ + unsigned int l_global:1; /* Nonzero if object in _dl_global_scope. */ + unsigned int l_reserved:2; /* Reserved for internal use. */ ++ unsigned int l_main_map:1; /* Nonzero for the map of the main program. */ ++ unsigned int l_visited:1; /* Used internally for map dependency ++ graph traversal. */ ++ unsigned int l_map_used:1; /* These two bits are used during traversal */ ++ unsigned int l_map_done:1; /* of maps in _dl_close_worker. */ + unsigned int l_phdr_allocated:1; /* Nonzero if the data structure pointed + to by `l_phdr' is allocated. */ + unsigned int l_soname_added:1; /* Nonzero if the SONAME is for sure in +diff --git a/sysdeps/generic/ldsodefs.h b/sysdeps/generic/ldsodefs.h +index 9c15259236..ad48a0d7da 100644 +--- a/sysdeps/generic/ldsodefs.h ++++ b/sysdeps/generic/ldsodefs.h +@@ -1084,7 +1084,7 @@ extern void _dl_fini (void) attribute_hidden; + + /* Sort array MAPS according to dependencies of the contained objects. */ + extern void _dl_sort_maps (struct link_map **maps, unsigned int nmaps, +- char *used, bool for_fini) attribute_hidden; ++ unsigned int skip, bool for_fini) attribute_hidden; + + /* The dynamic linker calls this function before and having changing + any shared object mappings. The `r_state' member of `struct r_debug' |