diff options
author | sirlucjan | 2017-08-25 20:05:11 +0200 |
---|---|---|
committer | sirlucjan | 2017-08-25 20:05:11 +0200 |
commit | 5ef90c32e3856aa60245df7b81cf902e1bdc4602 (patch) | |
tree | 5fd779c6988f1d5c454ebda4439527146fba0093 | |
parent | 9c67733b4846365636cb2f94c1c97595dfa8378f (diff) | |
download | aur-5ef90c32e3856aa60245df7b81cf902e1bdc4602.tar.gz |
Some improvements
-rw-r--r-- | .SRCINFO | 10 | ||||
-rw-r--r-- | 0005-BFQ-update-to-v8r12.patch | 481 | ||||
-rw-r--r-- | 0006-BFQ-bugfix-for-v8r12.patch | 780 | ||||
-rw-r--r-- | PKGBUILD | 12 |
4 files changed, 12 insertions, 1271 deletions
@@ -1,6 +1,6 @@ pkgbase = linux-bfq pkgver = 4.11.12 - pkgrel = 3 + pkgrel = 4 url = http://algo.ing.unimo.it arch = i686 arch = x86_64 @@ -22,8 +22,8 @@ pkgbase = linux-bfq source = config.x86_64 source = 90-linux.hook source = linux.preset - source = 0005-BFQ-update-to-v8r12.patch - source = 0006-BFQ-bugfix-for-v8r12.patch + source = https://gitlab.com/sirlucjan/kernel-patches/raw/master/4.11/0005-BFQ-update-to-v8r12.patch + source = https://gitlab.com/sirlucjan/kernel-patches/raw/master/4.11/0006-BFQ-bugfix-for-v8r12.patch validpgpkeys = ABAF11C65A2970B130ABE3C479BE3E4300411886 validpgpkeys = 647F28654894E3BD457199BE38DBBDC86092693E sha512sums = 6610eed97ffb7207c71771198c36179b8244ace7222bebb109507720e26c5f17d918079a56d5febdd8605844d67fb2df0ebe910fa2f2f53690daf6e2a8ad09c3 @@ -39,8 +39,8 @@ pkgbase = linux-bfq sha512sums = 57addf780fc68d8e2914514e47d2edd27600cc0d1bf0c7d3786bc3e16ec9c6527eb8e9d95f156da8b77c11a53ac2a8f0d23360547a26350ebc3dca93721ebc42 sha512sums = d6faa67f3ef40052152254ae43fee031365d0b1524aa0718b659eb75afc21a3f79ea8d62d66ea311a800109bed545bc8f79e8752319cd378eef2cbd3a09aba22 sha512sums = 2dc6b0ba8f7dbf19d2446c5c5f1823587de89f4e28e9595937dd51a87755099656f2acec50e3e2546ea633ad1bfd1c722e0c2b91eef1d609103d8abdc0a7cbaf - sha512sums = b1f6306a27d7e25eb4ff3eb51cb1fe38b0ca035cff229537d1b9f68bdc25861f2fecdeeeb1582e34cd166ee4275e49e4c679247a4c36109b2dcd6d4fa9456d60 - sha512sums = 9a80a4824e6a8f34a9bcc687b1e1fa91300a5a5fb515db6022e7e43cbacbf2ad93dda1250f968e070be4f6e2429a21e9cf97838120915de9a1144745c34a7188 + sha512sums = 05e38539dc51ad64df02223174ba961264355a34fb703555fccfa18e296492c0622f191e522bcf341d6e7f8763c9e57f85ff8645a62e3b8a42446d17d190afb9 + sha512sums = 30c44c4b603f6ca15e0c58d98160a40a44c8212b94cd7a3457dbf0303d88962a960800f269334f55b4070a6d872d8d9dcccdbfea3ca2aaa389bef7051132495a pkgname = linux-bfq pkgdesc = Linux Kernel and modules with the BFQ scheduler. diff --git a/0005-BFQ-update-to-v8r12.patch b/0005-BFQ-update-to-v8r12.patch deleted file mode 100644 index f4535cbb718b..000000000000 --- a/0005-BFQ-update-to-v8r12.patch +++ /dev/null @@ -1,481 +0,0 @@ -From 493c15c90a12961dc3a61ac37ecf4c8839eb189e Mon Sep 17 00:00:00 2001 -From: Paolo Valente <paolo.valente@linaro.org> -Date: Thu, 4 May 2017 17:47:13 +0200 -Subject: [PATCH 1/5] block, bfq: update wr_busy_queues if needed on a queue - split - -This commit fixes a bug triggered by a non-trivial sequence of -events. These events are briefly described in the next two -paragraphs. The impatiens, or those who are familiar with queue -merging and splitting, can jump directly to the last paragraph. - -On each I/O-request arrival for a shared bfq_queue, i.e., for a -bfq_queue that is the result of the merge of two or more bfq_queues, -BFQ checks whether the shared bfq_queue has become seeky (i.e., if too -many random I/O requests have arrived for the bfq_queue; if the device -is non rotational, then random requests must be also small for the -bfq_queue to be tagged as seeky). If the shared bfq_queue is actually -detected as seeky, then a split occurs: the bfq I/O context of the -process that has issued the request is redirected from the shared -bfq_queue to a new non-shared bfq_queue. As a degenerate case, if the -shared bfq_queue actually happens to be shared only by one process -(because of previous splits), then no new bfq_queue is created: the -state of the shared bfq_queue is just changed from shared to non -shared. - -Regardless of whether a brand new non-shared bfq_queue is created, or -the pre-existing shared bfq_queue is just turned into a non-shared -bfq_queue, several parameters of the non-shared bfq_queue are set -(restored) to the original values they had when the bfq_queue -associated with the bfq I/O context of the process (that has just -issued an I/O request) was merged with the shared bfq_queue. One of -these parameters is the weight-raising state. - -If, on the split of a shared bfq_queue, -1) a pre-existing shared bfq_queue is turned into a non-shared -bfq_queue; -2) the previously shared bfq_queue happens to be busy; -3) the weight-raising state of the previously shared bfq_queue happens -to change; -the number of weight-raised busy queues changes. The field -wr_busy_queues must then be updated accordingly, but such an update -was missing. This commit adds the missing update. - -Signed-off-by: Paolo Valente <paolo.valente@linaro.org> ---- - block/bfq-iosched.c | 47 +++++++++++++++++++++++++++++++++++++++-------- - block/bfq-sched.c | 9 +++++++-- - 2 files changed, 46 insertions(+), 10 deletions(-) - -diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c -index a56888ea9d94..5e8cbeab45a1 100644 ---- a/block/bfq-iosched.c -+++ b/block/bfq-iosched.c -@@ -673,8 +673,12 @@ static unsigned int bfq_wr_duration(struct bfq_data *bfqd) - } - - static void --bfq_bfqq_resume_state(struct bfq_queue *bfqq, struct bfq_io_cq *bic) -+bfq_bfqq_resume_state(struct bfq_queue *bfqq, struct bfq_data *bfqd, -+ struct bfq_io_cq *bic, bool bfq_already_existing) - { -+ unsigned int old_wr_coeff; -+ bool busy = bfq_already_existing && bfq_bfqq_busy(bfqq); -+ - if (bic->saved_idle_window) - bfq_mark_bfqq_idle_window(bfqq); - else -@@ -685,6 +689,9 @@ bfq_bfqq_resume_state(struct bfq_queue *bfqq, struct bfq_io_cq *bic) - else - bfq_clear_bfqq_IO_bound(bfqq); - -+ if (unlikely(busy)) -+ old_wr_coeff = bfqq->wr_coeff; -+ - bfqq->wr_coeff = bic->saved_wr_coeff; - bfqq->wr_start_at_switch_to_srt = bic->saved_wr_start_at_switch_to_srt; - BUG_ON(time_is_after_jiffies(bfqq->wr_start_at_switch_to_srt)); -@@ -693,8 +700,8 @@ bfq_bfqq_resume_state(struct bfq_queue *bfqq, struct bfq_io_cq *bic) - BUG_ON(time_is_after_jiffies(bfqq->last_wr_start_finish)); - - if (bfqq->wr_coeff > 1 && (bfq_bfqq_in_large_burst(bfqq) || -- time_is_before_jiffies(bfqq->last_wr_start_finish + -- bfqq->wr_cur_max_time))) { -+ time_is_before_jiffies(bfqq->last_wr_start_finish + -+ bfqq->wr_cur_max_time))) { - bfq_log_bfqq(bfqq->bfqd, bfqq, - "resume state: switching off wr (%lu + %lu < %lu)", - bfqq->last_wr_start_finish, bfqq->wr_cur_max_time, -@@ -702,8 +709,20 @@ bfq_bfqq_resume_state(struct bfq_queue *bfqq, struct bfq_io_cq *bic) - - bfqq->wr_coeff = 1; - } -+ - /* make sure weight will be updated, however we got here */ - bfqq->entity.prio_changed = 1; -+ -+ if (likely(!busy)) -+ return; -+ -+ if (old_wr_coeff == 1 && bfqq->wr_coeff > 1) { -+ bfqd->wr_busy_queues++; -+ BUG_ON(bfqd->wr_busy_queues > bfqd->busy_queues); -+ } else if (old_wr_coeff > 1 && bfqq->wr_coeff == 1) { -+ bfqd->wr_busy_queues--; -+ BUG_ON(bfqd->wr_busy_queues < 0); -+ } - } - - static int bfqq_process_refs(struct bfq_queue *bfqq) -@@ -1450,6 +1469,7 @@ static void bfq_add_request(struct request *rq) - bfqq->wr_cur_max_time = bfq_wr_duration(bfqd); - - bfqd->wr_busy_queues++; -+ BUG_ON(bfqd->wr_busy_queues > bfqd->busy_queues); - bfqq->entity.prio_changed = 1; - bfq_log_bfqq(bfqd, bfqq, - "non-idle wrais starting, " -@@ -1689,8 +1709,10 @@ static void bfq_bfqq_end_wr(struct bfq_queue *bfqq) - { - BUG_ON(!bfqq); - -- if (bfq_bfqq_busy(bfqq)) -+ if (bfq_bfqq_busy(bfqq)) { - bfqq->bfqd->wr_busy_queues--; -+ BUG_ON(bfqq->bfqd->wr_busy_queues < 0); -+ } - bfqq->wr_coeff = 1; - bfqq->wr_cur_max_time = 0; - bfqq->last_wr_start_finish = jiffies; -@@ -2069,8 +2091,11 @@ bfq_merge_bfqqs(struct bfq_data *bfqd, struct bfq_io_cq *bic, - new_bfqq->last_wr_start_finish = bfqq->last_wr_start_finish; - new_bfqq->wr_start_at_switch_to_srt = - bfqq->wr_start_at_switch_to_srt; -- if (bfq_bfqq_busy(new_bfqq)) -+ if (bfq_bfqq_busy(new_bfqq)) { - bfqd->wr_busy_queues++; -+ BUG_ON(bfqd->wr_busy_queues > bfqd->busy_queues); -+ } -+ - new_bfqq->entity.prio_changed = 1; - bfq_log_bfqq(bfqd, new_bfqq, - "wr start after merge with %d, rais_max_time %u", -@@ -2081,8 +2106,11 @@ bfq_merge_bfqqs(struct bfq_data *bfqd, struct bfq_io_cq *bic, - if (bfqq->wr_coeff > 1) { /* bfqq has given its wr to new_bfqq */ - bfqq->wr_coeff = 1; - bfqq->entity.prio_changed = 1; -- if (bfq_bfqq_busy(bfqq)) -+ if (bfq_bfqq_busy(bfqq)) { - bfqd->wr_busy_queues--; -+ BUG_ON(bfqd->wr_busy_queues < 0); -+ } -+ - } - - bfq_log_bfqq(bfqd, new_bfqq, "merge_bfqqs: wr_busy %d", -@@ -4553,7 +4581,7 @@ static int bfq_set_request(struct request_queue *q, struct request *rq, - const int is_sync = rq_is_sync(rq); - struct bfq_queue *bfqq; - unsigned long flags; -- bool split = false; -+ bool bfqq_already_existing = false, split = false; - - spin_lock_irqsave(q->queue_lock, flags); - bfq_check_ioprio_change(bic, bio); -@@ -4613,6 +4641,8 @@ static int bfq_set_request(struct request_queue *q, struct request *rq, - split = true; - if (!bfqq) - goto new_queue; -+ else -+ bfqq_already_existing = true; - } - } - -@@ -4638,7 +4668,8 @@ static int bfq_set_request(struct request_queue *q, struct request *rq, - * queue, restore the idle window and the possible - * weight raising period. - */ -- bfq_bfqq_resume_state(bfqq, bic); -+ bfq_bfqq_resume_state(bfqq, bfqd, bic, -+ bfqq_already_existing); - } - } - -diff --git a/block/bfq-sched.c b/block/bfq-sched.c -index 1fde0702bfef..36fb773c0832 100644 ---- a/block/bfq-sched.c -+++ b/block/bfq-sched.c -@@ -1988,8 +1988,10 @@ static void bfq_del_bfqq_busy(struct bfq_data *bfqd, struct bfq_queue *bfqq, - bfq_weights_tree_remove(bfqd, &bfqq->entity, - &bfqd->queue_weights_tree); - -- if (bfqq->wr_coeff > 1) -+ if (bfqq->wr_coeff > 1) { - bfqd->wr_busy_queues--; -+ BUG_ON(bfqd->wr_busy_queues < 0); -+ } - - bfqg_stats_update_dequeue(bfqq_group(bfqq)); - -@@ -2018,6 +2020,9 @@ static void bfq_add_bfqq_busy(struct bfq_data *bfqd, struct bfq_queue *bfqq) - bfq_weights_tree_add(bfqd, &bfqq->entity, - &bfqd->queue_weights_tree); - -- if (bfqq->wr_coeff > 1) -+ if (bfqq->wr_coeff > 1) { - bfqd->wr_busy_queues++; -+ BUG_ON(bfqd->wr_busy_queues > bfqd->busy_queues); -+ } -+ - } - -From 69373c8ac58080b60ca461689db6afb200d962e2 Mon Sep 17 00:00:00 2001 -From: Paolo Valente <paolo.valente@linaro.org> -Date: Tue, 9 May 2017 12:52:16 +0200 -Subject: [PATCH 2/5] block, bfq: stress that low_latency must be off to get - max throughput - -The introduction of the BFQ and Kyber I/O schedulers has triggered a -new wave of I/O benchmarks. Unfortunately, comments and discussions on -these benchmarks confirm that there is still little awareness that it -is very hard to achieve, at the same time, a low latency and a high -throughput. In particular, virtually all benchmarks measure -throughput, or throughput-related figures of merit, but, for BFQ, they -use the scheduler in its default configuration. This configuration is -geared, instead, toward a low latency. This is evidently a sign that -BFQ documentation is still too unclear on this important aspect. This -commit addresses this issue by stressing how BFQ configuration must be -(easily) changed if the only goal is maximum throughput. - -Signed-off-by: Paolo Valente <paolo.valente@linaro.org> ---- - Documentation/block/bfq-iosched.txt | 17 ++++++++++++++++- - block/bfq-iosched.c | 5 +++++ - 2 files changed, 21 insertions(+), 1 deletion(-) - -diff --git a/Documentation/block/bfq-iosched.txt b/Documentation/block/bfq-iosched.txt -index 13b5248eba7e..0539e87962ed 100644 ---- a/Documentation/block/bfq-iosched.txt -+++ b/Documentation/block/bfq-iosched.txt -@@ -11,6 +11,13 @@ controllers), BFQ's main features are: - groups (switching back to time distribution when needed to keep - throughput high). - -+In its default configuration, BFQ privileges latency over -+throughput. So, when needed for achieving a lower latency, BFQ builds -+schedules that may lead to a lower throughput. If your main or only -+goal, for a given device, is to achieve the maximum-possible -+throughput at all times, then do switch off all low-latency heuristics -+for that device, by setting low_latency to 0. Full details in Section 3. -+ - On average CPUs, the current version of BFQ can handle devices - performing at most ~30K IOPS; at most ~50 KIOPS on faster CPUs. As a - reference, 30-50 KIOPS correspond to very high bandwidths with -@@ -374,11 +381,19 @@ default, low latency mode is enabled. If enabled, interactive and soft - real-time applications are privileged and experience a lower latency, - as explained in more detail in the description of how BFQ works. - --DO NOT enable this mode if you need full control on bandwidth -+DISABLE this mode if you need full control on bandwidth - distribution. In fact, if it is enabled, then BFQ automatically - increases the bandwidth share of privileged applications, as the main - means to guarantee a lower latency to them. - -+In addition, as already highlighted at the beginning of this document, -+DISABLE this mode if your only goal is to achieve a high throughput. -+In fact, privileging the I/O of some application over the rest may -+entail a lower throughput. To achieve the highest-possible throughput -+on a non-rotational device, setting slice_idle to 0 may be needed too -+(at the cost of giving up any strong guarantee on fairness and low -+latency). -+ - timeout_sync - ------------ - -diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c -index 5e8cbeab45a1..cdf608e76048 100644 ---- a/block/bfq-iosched.c -+++ b/block/bfq-iosched.c -@@ -36,6 +36,11 @@ - * boost the throughput), and yet guarantee a low latency to - * interactive and soft real-time applications. - * -+ * NOTE: if the main or only goal, with a given device, is to achieve -+ * the maximum-possible throughput at all times, then do switch off -+ * all low-latency heuristics for that device, by setting low_latency -+ * to 0. -+ * - * BFQ is described in [1], where also a reference to the initial, more - * theoretical paper on BFQ can be found. The interested reader can find - * in the latter paper full details on the main algorithm, as well as - -From eb93d3e00badadb619cc8f62c1c04d1bf61e2890 Mon Sep 17 00:00:00 2001 -From: Paolo Valente <paolo.valente@linaro.org> -Date: Fri, 12 May 2017 10:19:52 +0200 -Subject: [PATCH 3/5] Fix commit "don't dereference bic before null checking - it" - -Signed-off-by: Paolo Valente <paolo.valente@linaro.org> ---- - block/bfq-iosched.c | 1 - - 1 file changed, 1 deletion(-) - -diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c -index cdf608e76048..c98dff9637c0 100644 ---- a/block/bfq-iosched.c -+++ b/block/bfq-iosched.c -@@ -4589,7 +4589,6 @@ static int bfq_set_request(struct request_queue *q, struct request *rq, - bool bfqq_already_existing = false, split = false; - - spin_lock_irqsave(q->queue_lock, flags); -- bfq_check_ioprio_change(bic, bio); - - if (!bic) - goto queue_fail; - -From 8a119d7f3e592372a52d87093b2b4eb90330f055 Mon Sep 17 00:00:00 2001 -From: Paolo Valente <paolo.valente@linaro.org> -Date: Wed, 14 Jun 2017 15:35:44 +0200 -Subject: [PATCH 4/5] block, bfq: don't change ioprio class for a bfq_queue on - a service tree - -On each deactivation or re-scheduling (after being served) of a -bfq_queue, BFQ invokes the function __bfq_entity_update_weight_prio(), -to perform pending updates of ioprio, weight and ioprio class for the -bfq_queue. BFQ also invokes this function on I/O-request dispatches, -to raise or lower weights more quickly when needed, thereby improving -latency. However, the entity representing the bfq_queue may be on the -active (sub)tree of a service tree when this happens, and, although -with a very low probability, the bfq_queue may happen to also have a -pending change of its ioprio class. If both conditions hold when -__bfq_entity_update_weight_prio() is invoked, then the entity moves to -a sort of hybrid state: the new service tree for the entity, as -returned by bfq_entity_service_tree(), differs from service tree on -which the entity still is. The functions that handle activations and -deactivations of entities do not cope with such a hybrid state (and -would need to become more complex to cope). - -This commit addresses this issue by just making -__bfq_entity_update_weight_prio() not perform also a possible pending -change of ioprio class, when invoked on an I/O-request dispatch for a -bfq_queue. Such a change is thus postponed to when -__bfq_entity_update_weight_prio() is invoked on deactivation or -re-scheduling of the bfq_queue. - -Reported-by: Marco Piazza <mpiazza@gmail.com> -Reported-by: Laurentiu Nicola <lnicola@dend.ro> -Signed-off-by: Paolo Valente <paolo.valente@linaro.org> -Tested-by: Marco Piazza <mpiazza@gmail.com> ---- - block/bfq-iosched.c | 14 ++++++++++---- - block/bfq-sched.c | 38 ++++++++++++++++++++++++++++++++++---- - 2 files changed, 44 insertions(+), 8 deletions(-) - -diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c -index c98dff9637c0..c390711ea523 100644 ---- a/block/bfq-iosched.c -+++ b/block/bfq-iosched.c -@@ -3689,11 +3689,17 @@ static void bfq_update_wr_data(struct bfq_data *bfqd, struct bfq_queue *bfqq) - } - } - } -- /* Update weight both if it must be raised and if it must be lowered */ -+ /* -+ * To improve latency (for this or other queues), immediately -+ * update weight both if it must be raised and if it must be -+ * lowered. Since, entity may be on some active tree here, and -+ * might have a pending change of its ioprio class, invoke -+ * next function with the last parameter unset (see the -+ * comments on the function). -+ */ - if ((entity->weight > entity->orig_weight) != (bfqq->wr_coeff > 1)) -- __bfq_entity_update_weight_prio( -- bfq_entity_service_tree(entity), -- entity); -+ __bfq_entity_update_weight_prio(bfq_entity_service_tree(entity), -+ entity, false); - } - - /* -diff --git a/block/bfq-sched.c b/block/bfq-sched.c -index 36fb773c0832..d9a107dadc6d 100644 ---- a/block/bfq-sched.c -+++ b/block/bfq-sched.c -@@ -766,9 +766,28 @@ static void bfq_forget_idle(struct bfq_service_tree *st) - bfq_put_idle_entity(st, first_idle); - } - -+/* -+ * Update weight and priority of entity. If update_class_too is true, -+ * then update the ioprio_class of entity too. -+ * -+ * The reason why the update of ioprio_class is controlled through the -+ * last parameter is as follows. Changing the ioprio class of an -+ * entity implies changing the destination service trees for that -+ * entity. If such a change occurred when the entity is already on one -+ * of the service trees for its previous class, then the state of the -+ * entity would become more complex: none of the new possible service -+ * trees for the entity, according to bfq_entity_service_tree(), would -+ * match any of the possible service trees on which the entity -+ * is. Complex operations involving these trees, such as entity -+ * activations and deactivations, should take into account this -+ * additional complexity. To avoid this issue, this function is -+ * invoked with update_class_too unset in the points in the code where -+ * entity may happen to be on some tree. -+ */ - static struct bfq_service_tree * - __bfq_entity_update_weight_prio(struct bfq_service_tree *old_st, -- struct bfq_entity *entity) -+ struct bfq_entity *entity, -+ bool update_class_too) - { - struct bfq_service_tree *new_st = old_st; - -@@ -813,9 +832,15 @@ __bfq_entity_update_weight_prio(struct bfq_service_tree *old_st, - bfq_weight_to_ioprio(entity->orig_weight); - } - -- if (bfqq) -+ if (bfqq && update_class_too) - bfqq->ioprio_class = bfqq->new_ioprio_class; -- entity->prio_changed = 0; -+ -+ /* -+ * Reset prio_changed only if the ioprio_class change -+ * is not pending any longer. -+ */ -+ if (!bfqq || bfqq->ioprio_class == bfqq->new_ioprio_class) -+ entity->prio_changed = 0; - - /* - * NOTE: here we may be changing the weight too early, -@@ -964,7 +989,12 @@ static void bfq_update_fin_time_enqueue(struct bfq_entity *entity, - struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity); - struct bfq_sched_data *sd = entity->sched_data; - -- st = __bfq_entity_update_weight_prio(st, entity); -+ /* -+ * When this function is invoked, entity is not in any service -+ * tree, then it is safe to invoke next function with the last -+ * parameter set (see the comments on the function). -+ */ -+ st = __bfq_entity_update_weight_prio(st, entity, true); - bfq_calc_finish(entity, entity->budget); - - /* - -From 2024487bd20c823bb5f96fe7ffbccf136628a2f7 Mon Sep 17 00:00:00 2001 -From: Paolo Valente <paolo.valente@linaro.org> -Date: Mon, 3 Jul 2017 22:00:29 +0200 -Subject: [PATCH 5/5] BFQ-v8r12 - -Signed-off-by: Paolo Valente <paolo.valente@linaro.org> ---- - block/bfq-iosched.c | 2 +- - block/bfq.h | 2 +- - 2 files changed, 2 insertions(+), 2 deletions(-) - -diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c -index c390711ea523..ea92c7461b89 100644 ---- a/block/bfq-iosched.c -+++ b/block/bfq-iosched.c -@@ -5300,7 +5300,7 @@ static struct blkcg_policy blkcg_policy_bfq = { - static int __init bfq_init(void) - { - int ret; -- char msg[60] = "BFQ I/O-scheduler: v8r11"; -+ char msg[60] = "BFQ I/O-scheduler: v8r12"; - - #ifdef CONFIG_BFQ_GROUP_IOSCHED - ret = blkcg_policy_register(&blkcg_policy_bfq); -diff --git a/block/bfq.h b/block/bfq.h -index 5f08990be66e..fe7816832035 100644 ---- a/block/bfq.h -+++ b/block/bfq.h -@@ -1,5 +1,5 @@ - /* -- * BFQ v8r11 for 4.11.0: data structures and common functions prototypes. -+ * BFQ v8r12 for 4.11.0: data structures and common functions prototypes. - * - * Based on ideas and code from CFQ: - * Copyright (C) 2003 Jens Axboe <axboe@kernel.dk> diff --git a/0006-BFQ-bugfix-for-v8r12.patch b/0006-BFQ-bugfix-for-v8r12.patch deleted file mode 100644 index 09317c62fb45..000000000000 --- a/0006-BFQ-bugfix-for-v8r12.patch +++ /dev/null @@ -1,780 +0,0 @@ -From 518e5275d6057ee8b3b4eeebaaa779c5aca7a15c Mon Sep 17 00:00:00 2001 -From: Paolo Valente <paolo.valente@linaro.org> -Date: Thu, 20 Jul 2017 10:46:39 +0200 -Subject: [PATCH 1/4] Add extra checks related to entity scheduling - -- extra checks related to ioprioi-class changes -- specific check on st->idle in __bfq_requeue_entity - -Signed-off-by: Paolo Valente <paolo.valente@linaro.org> ---- - block/bfq-sched.c | 9 ++++++++- - 1 file changed, 8 insertions(+), 1 deletion(-) - -diff --git a/block/bfq-sched.c b/block/bfq-sched.c -index 90d2856358a1..b6eb25887262 100644 ---- a/block/bfq-sched.c -+++ b/block/bfq-sched.c -@@ -812,6 +812,7 @@ __bfq_entity_update_weight_prio(struct bfq_service_tree *old_st, - } - #endif - -+ BUG_ON(entity->tree && update_class_too); - BUG_ON(old_st->wsum < entity->weight); - old_st->wsum -= entity->weight; - -@@ -883,8 +884,10 @@ __bfq_entity_update_weight_prio(struct bfq_service_tree *old_st, - - new_st->wsum += entity->weight; - -- if (new_st != old_st) -+ if (new_st != old_st) { -+ BUG_ON(!update_class_too); - entity->start = new_st->vtime; -+ } - } - - return new_st; -@@ -993,6 +996,7 @@ static void bfq_update_fin_time_enqueue(struct bfq_entity *entity, - * tree, then it is safe to invoke next function with the last - * parameter set (see the comments on the function). - */ -+ BUG_ON(entity->tree); - st = __bfq_entity_update_weight_prio(st, entity, true); - bfq_calc_finish(entity, entity->budget); - -@@ -1113,9 +1117,11 @@ static void __bfq_activate_entity(struct bfq_entity *entity, - * check for that. - */ - bfq_idle_extract(st, entity); -+ BUG_ON(entity->tree); - entity->start = bfq_gt(min_vstart, entity->finish) ? - min_vstart : entity->finish; - } else { -+ BUG_ON(entity->tree); - /* - * The finish time of the entity may be invalid, and - * it is in the past for sure, otherwise the queue -@@ -1203,6 +1209,7 @@ static void __bfq_requeue_entity(struct bfq_entity *entity) - */ - bfq_calc_finish(entity, entity->service); - entity->start = entity->finish; -+ BUG_ON(entity->tree && entity->tree == &st->idle); - BUG_ON(entity->tree && entity->tree != &st->active); - /* - * In addition, if the entity had more than one child - -From 6af8852b69527087d11c81a4dcb49d24f297dbce Mon Sep 17 00:00:00 2001 -From: Paolo Valente <paolo.valente@linaro.org> -Date: Fri, 21 Jul 2017 12:08:57 +0200 -Subject: [PATCH 2/4] block, bfq: reset in_service_entity if it becomes idle - -BFQ implements hierarchical scheduling by representing each group of -queues with a generic parent entity. For each parent entity, BFQ -maintains an in_service_entity pointer: if one of the child entities -happens to be in service, in_service_entity points to it. The -resetting of these pointers happens only on queue expirations: when -the in-service queue is expired, i.e., stops to be the queue in -service, BFQ resets all in_service_entity pointers along the -parent-entity path from this queue to the root entity. - -Functions handling the scheduling of entities assume, naturally, that -in-service entities are active, i.e., have pending I/O requests (or, -as a special case, even if they have no pending requests, they are -expected to receive a new request very soon, with the scheduler idling -the storage device while waiting for such an event). Unfortunately, -the above resetting scheme of the in_service_entity pointers may cause -this assumption to be violated. For example, the in-service queue may -happen to remain without requests because of a request merge. In this -case the queue does become idle, and all related data structures are -updated accordingly. But in_service_entity still points to the queue -in the parent entity. This inconsistency may even propagate to -higher-level parent entities, if they happen to become idle as well, -as a consequence of the leaf queue becoming idle. For this queue and -parent entities, scheduling functions have an undefined behaviour, -and, as reported, may easily lead to kernel crashes or hangs. - -This commit addresses this issue by simply resetting the -in_service_entity field also when it is detected to point to an entity -becoming idle (regardless of why the entity becomes idle). - -Reported-by: Laurentiu Nicola <lnicola@dend.ro> -Signed-off-by: Paolo Valente <paolo.valente@linaro.org> -Tested-by: Laurentiu Nicola <lnicola@dend.ro> ---- - block/bfq-sched.c | 4 +++- - 1 file changed, 3 insertions(+), 1 deletion(-) - -diff --git a/block/bfq-sched.c b/block/bfq-sched.c -index b6eb25887262..fdf1c713d050 100644 ---- a/block/bfq-sched.c -+++ b/block/bfq-sched.c -@@ -1336,8 +1336,10 @@ static bool __bfq_deactivate_entity(struct bfq_entity *entity, - - BUG_ON(is_in_service && entity->tree && entity->tree != &st->active); - -- if (is_in_service) -+ if (is_in_service) { - bfq_calc_finish(entity, entity->service); -+ sd->in_service_entity = NULL; -+ } - - if (entity->tree == &st->active) - bfq_active_extract(st, entity); - -From 52ce0ffe68b5e3fefaed21cfb32f104202eddc25 Mon Sep 17 00:00:00 2001 -From: Paolo Valente <paolo.valente@linaro.org> -Date: Fri, 28 Jul 2017 21:09:51 +0200 -Subject: [PATCH 3/4] block, bfq: consider also in_service_entity to state - whether an entity is active - -Groups of BFQ queues are represented by generic entities in BFQ. When -a queue belonging to a parent entity is deactivated, the parent entity -may need to be deactivated too, in case the deactivated queue was the -only active queue for the parent entity. This deactivation may need to -be propagated upwards if the entity belongs, in its turn, to a further -higher-level entity, and so on. In particular, the upward propagation -of deactivation stops at the first parent entity that remains active -even if one of its child entities has been deactivated. - -To decide whether the last non-deactivation condition holds for a -parent entity, BFQ checks whether the field next_in_service is still -not NULL for the parent entity, after the deactivation of one of its -child entity. If it is not NULL, then there are certainly other active -entities in the parent entity, and deactivations can stop. - -Unfortunately, this check misses a corner case: if in_service_entity -is not NULL, then next_in_service may happen to be NULL, although the -parent entity is evidently active. This happens if: 1) the entity -pointed by in_service_entity is the only active entity in the parent -entity, and 2) according to the definition of next_in_service, the -in_service_entity cannot be considered as next_in_service. See the -comments on the definition of next_in_service for details on this -second point. - -Hitting the above corner case causes crashes. - -To address this issue, this commit: -1) Extends the above check on only next_in_service to controlling both -next_in_service and in_service_entity (if any of them is not NULL, -then no further deactivation is performed) -2) Improves the (important) comments on how next_in_service is defined -and updated; in particular it fixes a few rather obscure paragraphs - -Reported-by: Eric Wheeler <bfq-sched@lists.ewheeler.net> -Reported-by: Rick Yiu <rick_yiu@htc.com> -Reported-by: Tom X Nguyen <tom81094@gmail.com> -Signed-off-by: Paolo Valente <paolo.valente@linaro.org> -Tested-by: Eric Wheeler <bfq-sched@lists.ewheeler.net> -Tested-by: Rick Yiu <rick_yiu@htc.com> -Tested-by: Laurentiu Nicola <lnicola@dend.ro> -Tested-by: Tom X Nguyen <tom81094@gmail.com> ---- - block/bfq-sched.c | 140 ++++++++++++++++++++++++++++++------------------------ - block/bfq.h | 23 +++++++-- - 2 files changed, 95 insertions(+), 68 deletions(-) - -diff --git a/block/bfq-sched.c b/block/bfq-sched.c -index fdf1c713d050..be985d9d5f17 100644 ---- a/block/bfq-sched.c -+++ b/block/bfq-sched.c -@@ -196,21 +196,23 @@ static bool bfq_update_parent_budget(struct bfq_entity *next_in_service) - - /* - * This function tells whether entity stops being a candidate for next -- * service, according to the following logic. -+ * service, according to the restrictive definition of the field -+ * next_in_service. In particular, this function is invoked for an -+ * entity that is about to be set in service. - * -- * This function is invoked for an entity that is about to be set in -- * service. If such an entity is a queue, then the entity is no longer -- * a candidate for next service (i.e, a candidate entity to serve -- * after the in-service entity is expired). The function then returns -- * true. -+ * If entity is a queue, then the entity is no longer a candidate for -+ * next service according to the that definition, because entity is -+ * about to become the in-service queue. This function then returns -+ * true if entity is a queue. - * -- * In contrast, the entity could stil be a candidate for next service -- * if it is not a queue, and has more than one child. In fact, even if -- * one of its children is about to be set in service, other children -- * may still be the next to serve. As a consequence, a non-queue -- * entity is not a candidate for next-service only if it has only one -- * child. And only if this condition holds, then the function returns -- * true for a non-queue entity. -+ * In contrast, entity could still be a candidate for next service if -+ * it is not a queue, and has more than one active child. In fact, -+ * even if one of its children is about to be set in service, other -+ * active children may still be the next to serve, for the parent -+ * entity, even according to the above definition. As a consequence, a -+ * non-queue entity is not a candidate for next-service only if it has -+ * only one active child. And only if this condition holds, then this -+ * function returns true for a non-queue entity. - */ - static bool bfq_no_longer_next_in_service(struct bfq_entity *entity) - { -@@ -223,6 +225,18 @@ static bool bfq_no_longer_next_in_service(struct bfq_entity *entity) - - BUG_ON(bfqg == ((struct bfq_data *)(bfqg->bfqd))->root_group); - BUG_ON(bfqg->active_entities == 0); -+ /* -+ * The field active_entities does not always contain the -+ * actual number of active children entities: it happens to -+ * not account for the in-service entity in case the latter is -+ * removed from its active tree (which may get done after -+ * invoking the function bfq_no_longer_next_in_service in -+ * bfq_get_next_queue). Fortunately, here, i.e., while -+ * bfq_no_longer_next_in_service is not yet completed in -+ * bfq_get_next_queue, bfq_active_extract has not yet been -+ * invoked, and thus active_entities still coincides with the -+ * actual number of active entities. -+ */ - if (bfqg->active_entities == 1) - return true; - -@@ -1089,7 +1103,7 @@ static void bfq_update_fin_time_enqueue(struct bfq_entity *entity, - * one of its children receives a new request. - * - * Basically, this function updates the timestamps of entity and -- * inserts entity into its active tree, ater possible extracting it -+ * inserts entity into its active tree, ater possibly extracting it - * from its idle tree. - */ - static void __bfq_activate_entity(struct bfq_entity *entity, -@@ -1213,7 +1227,7 @@ static void __bfq_requeue_entity(struct bfq_entity *entity) - BUG_ON(entity->tree && entity->tree != &st->active); - /* - * In addition, if the entity had more than one child -- * when set in service, then was not extracted from -+ * when set in service, then it was not extracted from - * the active tree. This implies that the position of - * the entity in the active tree may need to be - * changed now, because we have just updated the start -@@ -1221,9 +1235,8 @@ static void __bfq_requeue_entity(struct bfq_entity *entity) - * time in a moment (the requeueing is then, more - * precisely, a repositioning in this case). To - * implement this repositioning, we: 1) dequeue the -- * entity here, 2) update the finish time and -- * requeue the entity according to the new -- * timestamps below. -+ * entity here, 2) update the finish time and requeue -+ * the entity according to the new timestamps below. - */ - if (entity->tree) - bfq_active_extract(st, entity); -@@ -1270,9 +1283,9 @@ static void __bfq_activate_requeue_entity(struct bfq_entity *entity, - - - /** -- * bfq_activate_entity - activate or requeue an entity representing a bfq_queue, -- * and activate, requeue or reposition all ancestors -- * for which such an update becomes necessary. -+ * bfq_activate_requeue_entity - activate or requeue an entity representing a bfq_queue, -+ * and activate, requeue or reposition all ancestors -+ * for which such an update becomes necessary. - * @entity: the entity to activate. - * @non_blocking_wait_rq: true if this entity was waiting for a request - * @requeue: true if this is a requeue, which implies that bfqq is -@@ -1308,9 +1321,9 @@ static void bfq_activate_requeue_entity(struct bfq_entity *entity, - * @ins_into_idle_tree: if false, the entity will not be put into the - * idle tree. - * -- * Deactivates an entity, independently from its previous state. Must -+ * Deactivates an entity, independently of its previous state. Must - * be invoked only if entity is on a service tree. Extracts the entity -- * from that tree, and if necessary and allowed, puts it on the idle -+ * from that tree, and if necessary and allowed, puts it into the idle - * tree. - */ - static bool __bfq_deactivate_entity(struct bfq_entity *entity, -@@ -1359,7 +1372,7 @@ static bool __bfq_deactivate_entity(struct bfq_entity *entity, - /** - * bfq_deactivate_entity - deactivate an entity representing a bfq_queue. - * @entity: the entity to deactivate. -- * @ins_into_idle_tree: true if the entity can be put on the idle tree -+ * @ins_into_idle_tree: true if the entity can be put into the idle tree - */ - static void bfq_deactivate_entity(struct bfq_entity *entity, - bool ins_into_idle_tree, -@@ -1406,16 +1419,29 @@ static void bfq_deactivate_entity(struct bfq_entity *entity, - */ - bfq_update_next_in_service(sd, NULL); - -- if (sd->next_in_service) { -+ if (sd->next_in_service || sd->in_service_entity) { - /* -- * The parent entity is still backlogged, -- * because next_in_service is not NULL. So, no -- * further upwards deactivation must be -- * performed. Yet, next_in_service has -- * changed. Then the schedule does need to be -- * updated upwards. -+ * The parent entity is still active, because -+ * either next_in_service or in_service_entity -+ * is not NULL. So, no further upwards -+ * deactivation must be performed. Yet, -+ * next_in_service has changed. Then the -+ * schedule does need to be updated upwards. -+ * -+ * NOTE If in_service_entity is not NULL, then -+ * next_in_service may happen to be NULL, -+ * although the parent entity is evidently -+ * active. This happens if 1) the entity -+ * pointed by in_service_entity is the only -+ * active entity in the parent entity, and 2) -+ * according to the definition of -+ * next_in_service, the in_service_entity -+ * cannot be considered as -+ * next_in_service. See the comments on the -+ * definition of next_in_service for details. - */ - BUG_ON(sd->next_in_service == entity); -+ BUG_ON(sd->in_service_entity == entity); - break; - } - -@@ -1806,45 +1832,33 @@ static struct bfq_queue *bfq_get_next_queue(struct bfq_data *bfqd) - - /* - * If entity is no longer a candidate for next -- * service, then we extract it from its active tree, -- * for the following reason. To further boost the -- * throughput in some special case, BFQ needs to know -- * which is the next candidate entity to serve, while -- * there is already an entity in service. In this -- * respect, to make it easy to compute/update the next -- * candidate entity to serve after the current -- * candidate has been set in service, there is a case -- * where it is necessary to extract the current -- * candidate from its service tree. Such a case is -- * when the entity just set in service cannot be also -- * a candidate for next service. Details about when -- * this conditions holds are reported in the comments -- * on the function bfq_no_longer_next_in_service() -- * invoked below. -+ * service, then it must be extracted from its active -+ * tree, so as to make sure that it won't be -+ * considered when computing next_in_service. See the -+ * comments on the function -+ * bfq_no_longer_next_in_service() for details. - */ - if (bfq_no_longer_next_in_service(entity)) - bfq_active_extract(bfq_entity_service_tree(entity), - entity); - - /* -- * For the same reason why we may have just extracted -- * entity from its active tree, we may need to update -- * next_in_service for the sched_data of entity too, -- * regardless of whether entity has been extracted. -- * In fact, even if entity has not been extracted, a -- * descendant entity may get extracted. Such an event -- * would cause a change in next_in_service for the -- * level of the descendant entity, and thus possibly -- * back to upper levels. -+ * Even if entity is not to be extracted according to -+ * the above check, a descendant entity may get -+ * extracted in one of the next iterations of this -+ * loop. Such an event could cause a change in -+ * next_in_service for the level of the descendant -+ * entity, and thus possibly back to this level. - * -- * We cannot perform the resulting needed update -- * before the end of this loop, because, to know which -- * is the correct next-to-serve candidate entity for -- * each level, we need first to find the leaf entity -- * to set in service. In fact, only after we know -- * which is the next-to-serve leaf entity, we can -- * discover whether the parent entity of the leaf -- * entity becomes the next-to-serve, and so on. -+ * However, we cannot perform the resulting needed -+ * update of next_in_service for this level before the -+ * end of the whole loop, because, to know which is -+ * the correct next-to-serve candidate entity for each -+ * level, we need first to find the leaf entity to set -+ * in service. In fact, only after we know which is -+ * the next-to-serve leaf entity, we can discover -+ * whether the parent entity of the leaf entity -+ * becomes the next-to-serve, and so on. - */ - - /* Log some information */ -diff --git a/block/bfq.h b/block/bfq.h -index 77dc72c35fbf..ffa833863d88 100644 ---- a/block/bfq.h -+++ b/block/bfq.h -@@ -68,17 +68,30 @@ struct bfq_service_tree { - * - * bfq_sched_data is the basic scheduler queue. It supports three - * ioprio_classes, and can be used either as a toplevel queue or as an -- * intermediate queue on a hierarchical setup. @next_in_service -- * points to the active entity of the sched_data service trees that -- * will be scheduled next. It is used to reduce the number of steps -- * needed for each hierarchical-schedule update. -+ * intermediate queue in a hierarchical setup. - * - * The supported ioprio_classes are the same as in CFQ, in descending - * priority order, IOPRIO_CLASS_RT, IOPRIO_CLASS_BE, IOPRIO_CLASS_IDLE. - * Requests from higher priority queues are served before all the - * requests from lower priority queues; among requests of the same - * queue requests are served according to B-WF2Q+. -- * All the fields are protected by the queue lock of the containing bfqd. -+ * -+ * The schedule is implemented by the service trees, plus the field -+ * @next_in_service, which points to the entity on the active trees -+ * that will be served next, if 1) no changes in the schedule occurs -+ * before the current in-service entity is expired, 2) the in-service -+ * queue becomes idle when it expires, and 3) if the entity pointed by -+ * in_service_entity is not a queue, then the in-service child entity -+ * of the entity pointed by in_service_entity becomes idle on -+ * expiration. This peculiar definition allows for the following -+ * optimization, not yet exploited: while a given entity is still in -+ * service, we already know which is the best candidate for next -+ * service among the other active entitities in the same parent -+ * entity. We can then quickly compare the timestamps of the -+ * in-service entity with those of such best candidate. -+ * -+ * All the fields are protected by the queue lock of the containing -+ * bfqd. - */ - struct bfq_sched_data { - struct bfq_entity *in_service_entity; /* entity in service */ - -From ef8d4eb597ec77088b5d8cd08ff5a9145c47a57c Mon Sep 17 00:00:00 2001 -From: Paolo Valente <paolo.valente@linaro.org> -Date: Thu, 4 May 2017 10:53:43 +0200 -Subject: [PATCH 4/4] block, bfq: improve and refactor throughput-boosting - logic - -When a queue associated with a process remains empty, there are cases -where throughput gets boosted if the device is idled to await the -arrival of a new I/O request for that queue. Currently, BFQ assumes -that one of these cases is when the device has no internal queueing -(regardless of the properties of the I/O being served). Unfortunately, -this condition has proved to be too general. So, this commit refines it -as "the device has no internal queueing and is rotational". - -This refinement provides a significant throughput boost with random -I/O, on flash-based storage without internal queueing. For example, on -a HiKey board, throughput increases by up to 125%, growing, e.g., from -6.9MB/s to 15.6MB/s with two or three random readers in parallel. - -This commit also refactors the code related to device idling, for the -following reason. Finding the change that provides the above large -improvement has been slightly more difficult than it had to be, -because the logic that decides whether to idle the device is still -scattered across three functions. Almost all of the logic is in the -function bfq_bfqq_may_idle, but (1) part of the decision is made in -bfq_update_idle_window, and (2) the function bfq_bfqq_must_idle may -switch off idling regardless of the output of bfq_bfqq_may_idle. In -addition, both bfq_update_idle_window and bfq_bfqq_must_idle make -their decisions as a function of parameters that are used, for similar -purposes, also in bfq_bfqq_may_idle. This commit addresses this issue -by moving all the logic into bfq_bfqq_may_idle. - -Signed-off-by: Paolo Valente <paolo.valente@linaro.org> -Signed-off-by: Luca Miccio <lucmiccio@gmail.com> ---- - block/bfq-iosched.c | 141 ++++++++++++++++++++++++++++------------------------ - block/bfq.h | 12 ++--- - 2 files changed, 83 insertions(+), 70 deletions(-) - -diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c -index ea92c7461b89..e75df37a1cd3 100644 ---- a/block/bfq-iosched.c -+++ b/block/bfq-iosched.c -@@ -684,10 +684,10 @@ bfq_bfqq_resume_state(struct bfq_queue *bfqq, struct bfq_data *bfqd, - unsigned int old_wr_coeff; - bool busy = bfq_already_existing && bfq_bfqq_busy(bfqq); - -- if (bic->saved_idle_window) -- bfq_mark_bfqq_idle_window(bfqq); -+ if (bic->saved_has_short_ttime) -+ bfq_mark_bfqq_has_short_ttime(bfqq); - else -- bfq_clear_bfqq_idle_window(bfqq); -+ bfq_clear_bfqq_has_short_ttime(bfqq); - - if (bic->saved_IO_bound) - bfq_mark_bfqq_IO_bound(bfqq); -@@ -2047,7 +2047,7 @@ static void bfq_bfqq_save_state(struct bfq_queue *bfqq) - if (!bic) - return; - -- bic->saved_idle_window = bfq_bfqq_idle_window(bfqq); -+ bic->saved_has_short_ttime = bfq_bfqq_has_short_ttime(bfqq); - bic->saved_IO_bound = bfq_bfqq_IO_bound(bfqq); - bic->saved_in_large_burst = bfq_bfqq_in_large_burst(bfqq); - bic->was_in_burst_list = !hlist_unhashed(&bfqq->burst_list_node); -@@ -3214,9 +3214,9 @@ static void bfq_bfqq_expire(struct bfq_data *bfqd, - } - - bfq_log_bfqq(bfqd, bfqq, -- "expire (%d, slow %d, num_disp %d, idle_win %d, weight %d)", -+ "expire (%d, slow %d, num_disp %d, short_ttime %d, weight %d)", - reason, slow, bfqq->dispatched, -- bfq_bfqq_idle_window(bfqq), entity->weight); -+ bfq_bfqq_has_short_ttime(bfqq), entity->weight); - - /* - * Increase, decrease or leave budget unchanged according to -@@ -3298,7 +3298,10 @@ static bool bfq_may_expire_for_budg_timeout(struct bfq_queue *bfqq) - static bool bfq_bfqq_may_idle(struct bfq_queue *bfqq) - { - struct bfq_data *bfqd = bfqq->bfqd; -- bool idling_boosts_thr, idling_boosts_thr_without_issues, -+ bool rot_without_queueing = -+ !blk_queue_nonrot(bfqd->queue) && !bfqd->hw_tag, -+ bfqq_sequential_and_IO_bound, -+ idling_boosts_thr, idling_boosts_thr_without_issues, - idling_needed_for_service_guarantees, - asymmetric_scenario; - -@@ -3306,27 +3309,44 @@ static bool bfq_bfqq_may_idle(struct bfq_queue *bfqq) - return true; - - /* -+ * Idling is performed only if slice_idle > 0. In addition, we -+ * do not idle if -+ * (a) bfqq is async -+ * (b) bfqq is in the idle io prio class: in this case we do -+ * not idle because we want to minimize the bandwidth that -+ * queues in this class can steal to higher-priority queues -+ */ -+ if (bfqd->bfq_slice_idle == 0 || !bfq_bfqq_sync(bfqq) || -+ bfq_class_idle(bfqq)) -+ return false; -+ -+ bfqq_sequential_and_IO_bound = !BFQQ_SEEKY(bfqq) && -+ bfq_bfqq_IO_bound(bfqq) && bfq_bfqq_has_short_ttime(bfqq); -+ /* - * The next variable takes into account the cases where idling - * boosts the throughput. - * - * The value of the variable is computed considering, first, that - * idling is virtually always beneficial for the throughput if: -- * (a) the device is not NCQ-capable, or -- * (b) regardless of the presence of NCQ, the device is rotational -- * and the request pattern for bfqq is I/O-bound and sequential. -+ * (a) the device is not NCQ-capable and rotational, or -+ * (b) regardless of the presence of NCQ, the device is rotational and -+ * the request pattern for bfqq is I/O-bound and sequential, or -+ * (c) regardless of whether it is rotational, the device is -+ * not NCQ-capable and the request pattern for bfqq is -+ * I/O-bound and sequential. - * - * Secondly, and in contrast to the above item (b), idling an - * NCQ-capable flash-based device would not boost the - * throughput even with sequential I/O; rather it would lower - * the throughput in proportion to how fast the device - * is. Accordingly, the next variable is true if any of the -- * above conditions (a) and (b) is true, and, in particular, -- * happens to be false if bfqd is an NCQ-capable flash-based -- * device. -+ * above conditions (a), (b) or (c) is true, and, in -+ * particular, happens to be false if bfqd is an NCQ-capable -+ * flash-based device. - */ -- idling_boosts_thr = !bfqd->hw_tag || -- (!blk_queue_nonrot(bfqd->queue) && bfq_bfqq_IO_bound(bfqq) && -- bfq_bfqq_idle_window(bfqq)); -+ idling_boosts_thr = rot_without_queueing || -+ ((!blk_queue_nonrot(bfqd->queue) || !bfqd->hw_tag) && -+ bfqq_sequential_and_IO_bound); - - /* - * The value of the next variable, -@@ -3497,12 +3517,10 @@ static bool bfq_bfqq_may_idle(struct bfq_queue *bfqq) - asymmetric_scenario && !bfq_bfqq_in_large_burst(bfqq); - - /* -- * We have now all the components we need to compute the return -- * value of the function, which is true only if both the following -- * conditions hold: -- * 1) bfqq is sync, because idling make sense only for sync queues; -- * 2) idling either boosts the throughput (without issues), or -- * is necessary to preserve service guarantees. -+ * We have now all the components we need to compute the -+ * return value of the function, which is true only if idling -+ * either boosts the throughput (without issues), or is -+ * necessary to preserve service guarantees. - */ - bfq_log_bfqq(bfqd, bfqq, "may_idle: sync %d idling_boosts_thr %d", - bfq_bfqq_sync(bfqq), idling_boosts_thr); -@@ -3514,9 +3532,8 @@ static bool bfq_bfqq_may_idle(struct bfq_queue *bfqq) - bfq_bfqq_IO_bound(bfqq), - idling_needed_for_service_guarantees); - -- return bfq_bfqq_sync(bfqq) && -- (idling_boosts_thr_without_issues || -- idling_needed_for_service_guarantees); -+ return idling_boosts_thr_without_issues || -+ idling_needed_for_service_guarantees; - } - - /* -@@ -3532,10 +3549,7 @@ static bool bfq_bfqq_may_idle(struct bfq_queue *bfqq) - */ - static bool bfq_bfqq_must_idle(struct bfq_queue *bfqq) - { -- struct bfq_data *bfqd = bfqq->bfqd; -- -- return RB_EMPTY_ROOT(&bfqq->sort_list) && bfqd->bfq_slice_idle != 0 && -- bfq_bfqq_may_idle(bfqq); -+ return RB_EMPTY_ROOT(&bfqq->sort_list) && bfq_bfqq_may_idle(bfqq); - } - - /* -@@ -3994,7 +4008,6 @@ static void bfq_set_next_ioprio_data(struct bfq_queue *bfqq, - case IOPRIO_CLASS_IDLE: - bfqq->new_ioprio_class = IOPRIO_CLASS_IDLE; - bfqq->new_ioprio = 7; -- bfq_clear_bfqq_idle_window(bfqq); - break; - } - -@@ -4058,8 +4071,14 @@ static void bfq_init_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq, - bfq_set_next_ioprio_data(bfqq, bic); - - if (is_sync) { -+ /* -+ * No need to mark as has_short_ttime if in -+ * idle_class, because no device idling is performed -+ * for queues in idle class -+ */ - if (!bfq_class_idle(bfqq)) -- bfq_mark_bfqq_idle_window(bfqq); -+ /* tentatively mark as has_short_ttime */ -+ bfq_mark_bfqq_has_short_ttime(bfqq); - bfq_mark_bfqq_sync(bfqq); - bfq_mark_bfqq_just_created(bfqq); - } else -@@ -4195,18 +4214,19 @@ bfq_update_io_seektime(struct bfq_data *bfqd, struct bfq_queue *bfqq, - blk_rq_sectors(rq) < BFQQ_SECT_THR_NONROT); - } - --/* -- * Disable idle window if the process thinks too long or seeks so much that -- * it doesn't matter. -- */ --static void bfq_update_idle_window(struct bfq_data *bfqd, -- struct bfq_queue *bfqq, -- struct bfq_io_cq *bic) -+static void bfq_update_has_short_ttime(struct bfq_data *bfqd, -+ struct bfq_queue *bfqq, -+ struct bfq_io_cq *bic) - { -- int enable_idle; -+ bool has_short_ttime = true; - -- /* Don't idle for async or idle io prio class. */ -- if (!bfq_bfqq_sync(bfqq) || bfq_class_idle(bfqq)) -+ /* -+ * No need to update has_short_ttime if bfqq is async or in -+ * idle io prio class, or if bfq_slice_idle is zero, because -+ * no device idling is performed for bfqq in this case. -+ */ -+ if (!bfq_bfqq_sync(bfqq) || bfq_class_idle(bfqq) || -+ bfqd->bfq_slice_idle == 0) - return; - - /* Idle window just restored, statistics are meaningless. */ -@@ -4214,27 +4234,22 @@ static void bfq_update_idle_window(struct bfq_data *bfqd, - bfqd->bfq_wr_min_idle_time)) - return; - -- enable_idle = bfq_bfqq_idle_window(bfqq); -- -+ /* Think time is infinite if no process is linked to -+ * bfqq. Otherwise check average think time to -+ * decide whether to mark as has_short_ttime -+ */ - if (atomic_read(&bic->icq.ioc->active_ref) == 0 || -- bfqd->bfq_slice_idle == 0 || -- (bfqd->hw_tag && BFQQ_SEEKY(bfqq) && -- bfqq->wr_coeff == 1)) -- enable_idle = 0; -- else if (bfq_sample_valid(bic->ttime.ttime_samples)) { -- if (bic->ttime.ttime_mean > bfqd->bfq_slice_idle && -- bfqq->wr_coeff == 1) -- enable_idle = 0; -- else -- enable_idle = 1; -- } -- bfq_log_bfqq(bfqd, bfqq, "update_idle_window: enable_idle %d", -- enable_idle); -+ (bfq_sample_valid(bic->ttime.ttime_samples) && -+ bic->ttime.ttime_mean > bfqd->bfq_slice_idle)) -+ has_short_ttime = false; -+ -+ bfq_log_bfqq(bfqd, bfqq, "update_has_short_ttime: has_short_ttime %d", -+ has_short_ttime); - -- if (enable_idle) -- bfq_mark_bfqq_idle_window(bfqq); -+ if (has_short_ttime) -+ bfq_mark_bfqq_has_short_ttime(bfqq); - else -- bfq_clear_bfqq_idle_window(bfqq); -+ bfq_clear_bfqq_has_short_ttime(bfqq); - } - - /* -@@ -4250,14 +4265,12 @@ static void bfq_rq_enqueued(struct bfq_data *bfqd, struct bfq_queue *bfqq, - bfqq->meta_pending++; - - bfq_update_io_thinktime(bfqd, bic); -+ bfq_update_has_short_ttime(bfqd, bfqq, bic); - bfq_update_io_seektime(bfqd, bfqq, rq); -- if (bfqq->entity.service > bfq_max_budget(bfqd) / 8 || -- !BFQQ_SEEKY(bfqq)) -- bfq_update_idle_window(bfqd, bfqq, bic); - - bfq_log_bfqq(bfqd, bfqq, -- "rq_enqueued: idle_window=%d (seeky %d)", -- bfq_bfqq_idle_window(bfqq), BFQQ_SEEKY(bfqq)); -+ "rq_enqueued: has_short_ttime=%d (seeky %d)", -+ bfq_bfqq_has_short_ttime(bfqq), BFQQ_SEEKY(bfqq)); - - bfqq->last_request_pos = blk_rq_pos(rq) + blk_rq_sectors(rq); - -diff --git a/block/bfq.h b/block/bfq.h -index ffa833863d88..1f2c7518d507 100644 ---- a/block/bfq.h -+++ b/block/bfq.h -@@ -349,11 +349,11 @@ struct bfq_io_cq { - #endif - - /* -- * Snapshot of the idle window before merging; taken to -- * remember this value while the queue is merged, so as to be -- * able to restore it in case of split. -+ * Snapshot of the has_short_time flag before merging; taken -+ * to remember its value while the queue is merged, so as to -+ * be able to restore it in case of split. - */ -- bool saved_idle_window; -+ bool saved_has_short_ttime; - /* - * Same purpose as the previous two fields for the I/O bound - * classification of a queue. -@@ -610,7 +610,7 @@ enum bfqq_state_flags { - */ - BFQ_BFQQ_FLAG_must_alloc, /* must be allowed rq alloc */ - BFQ_BFQQ_FLAG_fifo_expire, /* FIFO checked in this slice */ -- BFQ_BFQQ_FLAG_idle_window, /* slice idling enabled */ -+ BFQ_BFQQ_FLAG_has_short_ttime, /* queue has a short think time */ - BFQ_BFQQ_FLAG_sync, /* synchronous queue */ - BFQ_BFQQ_FLAG_IO_bound, /* - * bfqq has timed-out at least once -@@ -649,7 +649,7 @@ BFQ_BFQQ_FNS(wait_request); - BFQ_BFQQ_FNS(non_blocking_wait_rq); - BFQ_BFQQ_FNS(must_alloc); - BFQ_BFQQ_FNS(fifo_expire); --BFQ_BFQQ_FNS(idle_window); -+BFQ_BFQQ_FNS(has_short_ttime); - BFQ_BFQQ_FNS(sync); - BFQ_BFQQ_FNS(IO_bound); - BFQ_BFQQ_FNS(in_large_burst); @@ -52,7 +52,7 @@ pkgbase=linux-bfq # pkgname=('linux-bfq' 'linux-bfq-headers' 'linux-bfq-docs') _srcname=linux-4.11 pkgver=4.11.12 -pkgrel=3 +pkgrel=4 arch=('i686' 'x86_64') url="http://algo.ing.unimo.it" license=('GPL2') @@ -62,6 +62,8 @@ _bfqrel=v7r11 _bfqver=v8r11 _bfqpath="http://algo.ing.unimo.it/people/paolo/disk_sched/patches/4.11.0-${_bfqver}" #_bfqpath="https://pf.natalenko.name/mirrors/bfq/4.11.0-${_bfqver}" +#_lucjanpath="https://raw.githubusercontent.com/sirlucjan/lucjan-kernels/master/patches/4.11" +_lucjanpath="https://gitlab.com/sirlucjan/kernel-patches/raw/master/4.11" _gcc_patch="enable_additional_cpu_optimizations_for_gcc_v4.9+_kernel_v3.15+.patch" source=("http://www.kernel.org/pub/linux/kernel/v4.x/${_srcname}.tar.xz" @@ -80,8 +82,8 @@ source=("http://www.kernel.org/pub/linux/kernel/v4.x/${_srcname}.tar.xz" # standard config files for mkinitcpio ramdisk 'linux.preset' # patches from https://github.com/linusw/linux-bfq/commits/bfq-v8 - '0005-BFQ-update-to-v8r12.patch' - '0006-BFQ-bugfix-for-v8r12.patch') + "${_lucjanpath}/0005-BFQ-update-to-v8r12.patch" + "${_lucjanpath}/0006-BFQ-bugfix-for-v8r12.patch") _kernelname=${pkgbase#linux} @@ -437,8 +439,8 @@ sha512sums=('6610eed97ffb7207c71771198c36179b8244ace7222bebb109507720e26c5f17d91 '57addf780fc68d8e2914514e47d2edd27600cc0d1bf0c7d3786bc3e16ec9c6527eb8e9d95f156da8b77c11a53ac2a8f0d23360547a26350ebc3dca93721ebc42' 'd6faa67f3ef40052152254ae43fee031365d0b1524aa0718b659eb75afc21a3f79ea8d62d66ea311a800109bed545bc8f79e8752319cd378eef2cbd3a09aba22' '2dc6b0ba8f7dbf19d2446c5c5f1823587de89f4e28e9595937dd51a87755099656f2acec50e3e2546ea633ad1bfd1c722e0c2b91eef1d609103d8abdc0a7cbaf' - 'b1f6306a27d7e25eb4ff3eb51cb1fe38b0ca035cff229537d1b9f68bdc25861f2fecdeeeb1582e34cd166ee4275e49e4c679247a4c36109b2dcd6d4fa9456d60' - '9a80a4824e6a8f34a9bcc687b1e1fa91300a5a5fb515db6022e7e43cbacbf2ad93dda1250f968e070be4f6e2429a21e9cf97838120915de9a1144745c34a7188') + '05e38539dc51ad64df02223174ba961264355a34fb703555fccfa18e296492c0622f191e522bcf341d6e7f8763c9e57f85ff8645a62e3b8a42446d17d190afb9' + '30c44c4b603f6ca15e0c58d98160a40a44c8212b94cd7a3457dbf0303d88962a960800f269334f55b4070a6d872d8d9dcccdbfea3ca2aaa389bef7051132495a') validpgpkeys=( 'ABAF11C65A2970B130ABE3C479BE3E4300411886' # Linus Torvalds |