summarylogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--.SRCINFO14
-rw-r--r--0005-BFQ-Fix.patch33
-rw-r--r--0005-BFQ-update-to-v8r6.patch2806
-rw-r--r--0006-BFQ-Fix.patch24
-rw-r--r--PKGBUILD20
-rw-r--r--config3
-rw-r--r--config.x86_643
7 files changed, 2824 insertions, 79 deletions
diff --git a/.SRCINFO b/.SRCINFO
index 20f637797111..5f0ed1830f9c 100644
--- a/.SRCINFO
+++ b/.SRCINFO
@@ -1,6 +1,6 @@
pkgbase = linux-bfq
pkgver = 4.8.15
- pkgrel = 1
+ pkgrel = 2
url = http://algo.ing.unimo.it
arch = i686
arch = x86_64
@@ -25,8 +25,7 @@ pkgbase = linux-bfq
source = linux.preset
source = net_handle_no_dst_on_skb_in_icmp6_send.patch
source = 0001-x86-fpu-Fix-invalid-FPU-ptrace-state-after-execve.patch
- source = 0005-BFQ-Fix.patch
- source = 0006-BFQ-Fix.patch
+ source = 0005-BFQ-update-to-v8r6.patch
validpgpkeys = ABAF11C65A2970B130ABE3C479BE3E4300411886
validpgpkeys = 647F28654894E3BD457199BE38DBBDC86092693E
sha512sums = a48a065f21e1c7c4de4cf8ca47b8b8d9a70f86b64e7cfa6e01be490f78895745b9c8790734b1d22182cf1f930fb87eaaa84e62ec8cc1f64ac4be9b949e7c0358
@@ -37,16 +36,15 @@ pkgbase = linux-bfq
sha512sums = dc0649dfe2a5ce8e8879a62df29a4a1959eb1a84e5d896a9cb119d6a85a9bad1b17135371799e0be96532e17c66043d298e4a14b18fad3078a1f425108f888c9
sha512sums = 135afcffee2439e2260a71202658dce9ec5f555de3291b2d49a5cffdd953c6d7851b8a90e961576895555142a618e52220a7e4a46521ca4ba19d37df71887581
sha512sums = 87ae76889ab84ced46c237374c124786551982f8ff7325102af9153aed1ab7be72bec4db1f7516426c5ee34c5ce17221679eb2f129c5cbdeec05c0d3cb7f785d
- sha512sums = 8e8f407262f3f573654e7f0fceec6075fd2da0258a384206d28e5f59698168609e575f3d855adb8bb476b0684da409dab22f7ddc0f00e9c10c67debe4409c30b
+ sha512sums = 6046432243b8f0497b43ea2c5fe3fde5135e058f2be25c02773ead93e2ab3b525b6f7c1c4898b9e67ac2bec0959b972829e997e79fabf6ac87e006700dfaf987
sha512sums = d9d28e02e964704ea96645a5107f8b65cae5f4fb4f537e224e5e3d087fd296cb770c29ac76e0ce95d173bc420ea87fb8f187d616672a60a0cae618b0ef15b8c8
- sha512sums = 643cad54fc54f4790e1d28dbc60f775f117fa3e25e15aff418b0406cc7a9b49db5d67fc02ff7056a951a2d64d5bbc58306c40fa0d588460664c2921ad613745c
- sha512sums = 94fbb7e67b77bb8911b69cdb23bbb74317e75a4ed786e4c6486bf2927042f9374e932ea879d1b250837e42c628abf7082fd6f895a2257c5d3ad652f6a520306a
+ sha512sums = dc9ad074791a9edb3414a71480525a1c4a75b257137c754749c251e45ef376e313d3310766fedcabcd392837a8022bbcd97bfcd17c42feae63f71b4c4064b73d
+ sha512sums = bcc657cd2366b55ba303d4d091ab9958e4f97121e7fdd64013a90be7fb6b19ac28ca2e8973496935475b9a8f8386bcbed8924981a20bb51cf3fc5c6f4f14b22a
sha512sums = d6faa67f3ef40052152254ae43fee031365d0b1524aa0718b659eb75afc21a3f79ea8d62d66ea311a800109bed545bc8f79e8752319cd378eef2cbd3a09aba22
sha512sums = 2dc6b0ba8f7dbf19d2446c5c5f1823587de89f4e28e9595937dd51a87755099656f2acec50e3e2546ea633ad1bfd1c722e0c2b91eef1d609103d8abdc0a7cbaf
sha512sums = c53bd47527adbd2599a583e05a7d24f930dc4e86b1de017486588205ad6f262a51a4551593bc7a1218c96541ea073ea03b770278d947b1cd0d2801311fcc80e5
sha512sums = 002d5e0ccfa5824c1750912a6400ff722672d6587b74ba66b1127cf1228048f604bba107617cf4f4477884039af4d4d196cf1b74cefe43b0bddc82270f11940d
- sha512sums = 3889679e288d51f6fecc7ed6581ccde34acbf1e4861f5c9ca237a1ad13158502757d3fc457d7b6caf1c8c99c9dba842265004154a71cffb8ec14e1030e49e312
- sha512sums = 3c3f3b6407d9a1a63cd91c2b5c35e6c932afa5bf33f1b2e8a530dbd9eacda8c8f956616c4cc9228657624da58e354ba5714252237d84ff3386fd65cf44f06ddc
+ sha512sums = 98612f37013d1b41c76887bc5ed80f70753f6fc859748038dc83f3b53aa89464321951c6c21b364f56b2d4860e594c2091240c70ff70ce616d6b3fdadb817934
pkgname = linux-bfq
pkgdesc = Linux Kernel and modules with the BFQ scheduler.
diff --git a/0005-BFQ-Fix.patch b/0005-BFQ-Fix.patch
deleted file mode 100644
index ad66ca69a2ba..000000000000
--- a/0005-BFQ-Fix.patch
+++ /dev/null
@@ -1,33 +0,0 @@
-From 69f18bb587a4805b2b18bb4ba91dced87a8fda06 Mon Sep 17 00:00:00 2001
-From: Paolo Valente <paolo.valente@linaro.org>
-Date: Sat, 22 Oct 2016 15:26:33 +0200
-Subject: [PATCH 86/86] BUGFIX: Replace max wrongly used for modulo numbers
-
-Signed-off-by: Paolo Valente <paolo.valente@linaro.org>
----
- block/bfq-iosched.c | 10 +++++++---
- 1 file changed, 7 insertions(+), 3 deletions(-)
-
-diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
-index eef6ff4..c161ff0 100644
---- a/block/bfq-iosched.c
-+++ b/block/bfq-iosched.c
-@@ -2179,9 +2179,13 @@ static void __bfq_set_in_service_queue(struct bfq_data *bfqd,
- * not only expires, but also remains with no
- * request.
- */
-- bfqq->last_wr_start_finish += jiffies -
-- max_t(unsigned long, bfqq->last_wr_start_finish,
-- bfqq->budget_timeout);
-+ if (time_after(bfqq->budget_timeout,
-+ bfqq->last_wr_start_finish))
-+ bfqq->last_wr_start_finish +=
-+ jiffies - bfqq->budget_timeout;
-+ else
-+ bfqq->last_wr_start_finish = jiffies;
-+
- if (time_is_after_jiffies(bfqq->last_wr_start_finish)) {
- pr_crit(
- "BFQ WARNING:last %lu budget %lu jiffies %lu",
-Contact GitHub API Training Shop Blog About
-
diff --git a/0005-BFQ-update-to-v8r6.patch b/0005-BFQ-update-to-v8r6.patch
new file mode 100644
index 000000000000..1b42af900abe
--- /dev/null
+++ b/0005-BFQ-update-to-v8r6.patch
@@ -0,0 +1,2806 @@
+From 1acff6b391cd626a495e42c9782bf2139b9f330f Mon Sep 17 00:00:00 2001
+From: Paolo Valente <paolo.valente@linaro.org>
+Date: Wed, 19 Oct 2016 20:11:42 +0200
+Subject: [PATCH 01/24] Documentation/block: add bfq-iosched.txt
+
+Documentation of BFQ benefits, inner workings, interface and tunables.
+
+Signed-off-by: Paolo Valente <paolo.valente@linaro.org>
+---
+ Documentation/block/00-INDEX | 2 +
+ Documentation/block/bfq-iosched.txt | 508 ++++++++++++++++++++++++++++++++++++
+ 2 files changed, 510 insertions(+)
+ create mode 100644 Documentation/block/bfq-iosched.txt
+
+diff --git a/Documentation/block/00-INDEX b/Documentation/block/00-INDEX
+index e55103a..8d55b4b 100644
+--- a/Documentation/block/00-INDEX
++++ b/Documentation/block/00-INDEX
+@@ -1,5 +1,7 @@
+ 00-INDEX
+ - This file
++bfq-iosched.txt
++ - BFQ IO scheduler and its tunables
+ biodoc.txt
+ - Notes on the Generic Block Layer Rewrite in Linux 2.5
+ biovecs.txt
+diff --git a/Documentation/block/bfq-iosched.txt b/Documentation/block/bfq-iosched.txt
+new file mode 100644
+index 0000000..67e1269
+--- /dev/null
++++ b/Documentation/block/bfq-iosched.txt
+@@ -0,0 +1,508 @@
++BFQ (Budget Fair Queueing)
++==========================
++
++BFQ is a proportional-share I/O scheduler, with some extra
++low-latency capabilities. In addition to cgroups support (blkio or io
++controllers), BFQ's main features are:
++- BFQ guarantees a high system and application responsiveness, and a
++ low latency for time-sensitive applications, such as audio or video
++ players;
++- BFQ distributes bandwidth, and not just time, among processes or
++ groups (switching back to time distribution when needed to keep
++ throughput high).
++
++On average CPUs, the current version of BFQ can handle devices
++performing at most ~30K IOPS; at most ~50 KIOPS on faster CPUs. As a
++reference, 30-50 KIOPS correspond to very high bandwidths with
++sequential I/O (e.g., 8-12 GB/s if I/O requests are 256 KB large), and
++to 120-200 MB/s with 4KB random I/O.
++
++The table of contents follow. Impatients can just jump to Section 3.
++
++CONTENTS
++
++1. When may BFQ be useful?
++ 1-1 Personal systems
++ 1-2 Server systems
++2. How does BFQ work?
++3. What are BFQ's tunable?
++4. BFQ group scheduling
++ 4-1 Service guarantees provided
++ 4-2 Interface
++
++1. When may BFQ be useful?
++==========================
++
++BFQ provides the following benefits on personal and server systems.
++
++1-1 Personal systems
++--------------------
++
++Low latency for interactive applications
++
++Regardless of the actual background workload, BFQ guarantees that, for
++interactive tasks, the storage device is virtually as responsive as if
++it was idle. For example, even if one or more of the following
++background workloads are being executed:
++- one or more large files are being read, written or copied,
++- a tree of source files is being compiled,
++- one or more virtual machines are performing I/O,
++- a software update is in progress,
++- indexing daemons are scanning filesystems and updating their
++ databases,
++starting an application or loading a file from within an application
++takes about the same time as if the storage device was idle. As a
++comparison, with CFQ, NOOP or DEADLINE, and in the same conditions,
++applications experience high latencies, or even become unresponsive
++until the background workload terminates (also on SSDs).
++
++Low latency for soft real-time applications
++
++Also soft real-time applications, such as audio and video
++players/streamers, enjoy a low latency and a low drop rate, regardless
++of the background I/O workload. As a consequence, these applications
++do not suffer from almost any glitch due to the background workload.
++
++Higher speed for code-development tasks
++
++If some additional workload happens to be executed in parallel, then
++BFQ executes the I/O-related components of typical code-development
++tasks (compilation, checkout, merge, ...) much more quickly than CFQ,
++NOOP or DEADLINE.
++
++High throughput
++
++On hard disks, BFQ achieves up to 30% higher throughput than CFQ, and
++up to 150% higher throughput than DEADLINE and NOOP, with all the
++sequential workloads considered in our tests. With random workloads,
++and with all the workloads on flash-based devices, BFQ achieves,
++instead, about the same throughput as the other schedulers.
++
++Strong fairness/bandwidth guarantees
++
++BFQ distributes the device throughput, and not just the device time,
++as desired among I/O-bound applications, with any workload and
++regardless of the device parameters. BFQ switches to time-based
++distribution (only) for processes that would otherwise cause a
++throughput loss.
++
++1-2 Server systems
++------------------
++
++Most benefits for server systems follow from the same service
++properties as above. In particular, regardless of whether additional,
++possibly heavy workloads are being served, BFQ guarantees:
++
++. audio and video-streaming with zero or very low jitter and drop
++ rate;
++
++. fast retrieval of WEB pages and embedded objects;
++
++. real-time recording of data in live-dumping applications (e.g.,
++ packet logging);
++
++. responsiveness in local and remote access to a server.
++
++
++2. How does BFQ work?
++=====================
++
++BFQ is a proportional-share I/O scheduler, whose general structure,
++plus a lot of code, are borrowed from CFQ.
++
++- Each process doing I/O on a device is associated with a weight and a
++ (bfq_)queue.
++
++- BFQ grants exclusive access to the device, for a while, to one queue
++ (process) at a time, and implements this service model by
++ associating every queue with a budget, measured in number of
++ sectors.
++
++ - After a queue is granted access to the device, the budget of the
++ queue is decremented, on each request dispatch, by the size of the
++ request.
++
++ - The in-service queue is expired, i.e., its service is suspended,
++ only if one of the following events occurs: 1) the queue finishes
++ its budget, 2) the queue empties, 3) a "budget timeout" fires.
++
++ - The budget timeout prevents processes doing random I/O from
++ holding the device for too long and dramatically reducing
++ throughput.
++
++ - Actually, as in CFQ, a queue associated with a process issuing
++ sync requests may not be expired immediately when it empties. In
++ contrast, BFQ may idle the device for a short time interval,
++ giving the process the chance to go on being served if it issues
++ a new request in time. Device idling typically boosts the
++ throughput on rotational devices, if processes do synchronous
++ and sequential I/O. In addition, under BFQ, device idling is
++ also instrumental in guaranteeing the desired throughput
++ fraction to processes issuing sync requests (see the description
++ of the slice_idle tunable in this document, or [1, 2], for more
++ details).
++
++ - With respect to idling for service guarantees, if several
++ processes are competing for the device at the same time, but
++ all processes (and groups, after the following commit) have
++ the same weight, then BFQ guarantees the expected throughput
++ distribution without ever idling the device. Throughput is
++ thus as high as possible in this common scenario.
++
++ - If low-latency mode is enabled (default configuration), BFQ
++ executes some special heuristics to detect interactive and soft
++ real-time applications (e.g., video or audio players/streamers),
++ and to reduce their latency. The most important action taken to
++ achieve this goal is to give to the queues associated with these
++ applications more than their fair share of the device
++ throughput. For brevity, we call just "weight-raising" the whole
++ sets of actions taken by BFQ to privilege these queues. In
++ particular, BFQ provides a milder form of weight-raising for
++ interactive applications, and a stronger form for soft real-time
++ applications.
++
++ - BFQ automatically deactivates idling for queues born in a burst of
++ queue creations. In fact, these queues are usually associated with
++ the processes of applications and services that benefit mostly
++ from a high throughput. Examples are systemd during boot, or git
++ grep.
++
++ - As CFQ, BFQ merges queues performing interleaved I/O, i.e.,
++ performing random I/O that becomes mostly sequential if
++ merged. Differently from CFQ, BFQ achieves this goal with a more
++ reactive mechanism, called Early Queue Merge (EQM). EQM is so
++ responsive in detecting interleaved I/O (cooperating processes),
++ that it enables BFQ to achieve a high throughput, by queue
++ merging, even for queues for which CFQ needs a different
++ mechanism, preemption, to get a high throughput. As such EQM is a
++ unified mechanism to achieve a high throughput with interleaved
++ I/O.
++
++ - Queues are scheduled according to a variant of WF2Q+, named
++ B-WF2Q+, and implemented using an augmented rb-tree to preserve an
++ O(log N) overall complexity. See [2] for more details. B-WF2Q+ is
++ also ready for hierarchical scheduling. However, for a cleaner
++ logical breakdown, the code that enables and completes
++ hierarchical support is provided in the next commit, which focuses
++ exactly on this feature.
++
++ - B-WF2Q+ guarantees a tight deviation with respect to an ideal,
++ perfectly fair, and smooth service. In particular, B-WF2Q+
++ guarantees that each queue receives a fraction of the device
++ throughput proportional to its weight, even if the throughput
++ fluctuates, and regardless of: the device parameters, the current
++ workload and the budgets assigned to the queue.
++
++ - The last, budget-independence, property (although probably
++ counterintuitive in the first place) is definitely beneficial, for
++ the following reasons:
++
++ - First, with any proportional-share scheduler, the maximum
++ deviation with respect to an ideal service is proportional to
++ the maximum budget (slice) assigned to queues. As a consequence,
++ BFQ can keep this deviation tight not only because of the
++ accurate service of B-WF2Q+, but also because BFQ *does not*
++ need to assign a larger budget to a queue to let the queue
++ receive a higher fraction of the device throughput.
++
++ - Second, BFQ is free to choose, for every process (queue), the
++ budget that best fits the needs of the process, or best
++ leverages the I/O pattern of the process. In particular, BFQ
++ updates queue budgets with a simple feedback-loop algorithm that
++ allows a high throughput to be achieved, while still providing
++ tight latency guarantees to time-sensitive applications. When
++ the in-service queue expires, this algorithm computes the next
++ budget of the queue so as to:
++
++ - Let large budgets be eventually assigned to the queues
++ associated with I/O-bound applications performing sequential
++ I/O: in fact, the longer these applications are served once
++ got access to the device, the higher the throughput is.
++
++ - Let small budgets be eventually assigned to the queues
++ associated with time-sensitive applications (which typically
++ perform sporadic and short I/O), because, the smaller the
++ budget assigned to a queue waiting for service is, the sooner
++ B-WF2Q+ will serve that queue (Subsec 3.3 in [2]).
++
++- ioprio classes are served in strict priority order, i.e.,
++ lower-priority queues are not served as long as there are
++ higher-priority queues. Among queues in the same class, the
++ bandwidth is distributed in proportion to the weight of each
++ queue. A very thin extra bandwidth is however guaranteed to
++ the Idle class, to prevent it from starving.
++
++
++3. What are BFQ's tunable?
++==========================
++
++The tunables back_seek-max, back_seek_penalty, fifo_expire_async and
++fifo_expire_sync below are the same as in CFQ. Their description is
++just copied from that for CFQ. Some considerations in the description
++of slice_idle are copied from CFQ too.
++
++per-process ioprio and weight
++-----------------------------
++
++Unless the cgroups interface is used, weights can be assigned to
++processes only indirectly, through I/O priorities, and according to
++the relation: weight = (IOPRIO_BE_NR - ioprio) * 10.
++
++slice_idle
++----------
++
++This parameter specifies how long BFQ should idle for next I/O
++request, when certain sync BFQ queues become empty. By default
++slice_idle is a non-zero value. Idling has a double purpose: boosting
++throughput and making sure that the desired throughput distribution is
++respected (see the description of how BFQ works, and, if needed, the
++papers referred there).
++
++As for throughput, idling can be very helpful on highly seeky media
++like single spindle SATA/SAS disks where we can cut down on overall
++number of seeks and see improved throughput.
++
++Setting slice_idle to 0 will remove all the idling on queues and one
++should see an overall improved throughput on faster storage devices
++like multiple SATA/SAS disks in hardware RAID configuration.
++
++So depending on storage and workload, it might be useful to set
++slice_idle=0. In general for SATA/SAS disks and software RAID of
++SATA/SAS disks keeping slice_idle enabled should be useful. For any
++configurations where there are multiple spindles behind single LUN
++(Host based hardware RAID controller or for storage arrays), setting
++slice_idle=0 might end up in better throughput and acceptable
++latencies.
++
++Idling is however necessary to have service guarantees enforced in
++case of differentiated weights or differentiated I/O-request lengths.
++To see why, suppose that a given BFQ queue A must get several I/O
++requests served for each request served for another queue B. Idling
++ensures that, if A makes a new I/O request slightly after becoming
++empty, then no request of B is dispatched in the middle, and thus A
++does not lose the possibility to get more than one request dispatched
++before the next request of B is dispatched. Note that idling
++guarantees the desired differentiated treatment of queues only in
++terms of I/O-request dispatches. To guarantee that the actual service
++order then corresponds to the dispatch order, the strict_guarantees
++tunable must be set too.
++
++There is an important flipside for idling: apart from the above cases
++where it is beneficial also for throughput, idling can severely impact
++throughput. One important case is random workload. Because of this
++issue, BFQ tends to avoid idling as much as possible, when it is not
++beneficial also for throughput. As a consequence of this behavior, and
++of further issues described for the strict_guarantees tunable,
++short-term service guarantees may be occasionally violated. And, in
++some cases, these guarantees may be more important than guaranteeing
++maximum throughput. For example, in video playing/streaming, a very
++low drop rate may be more important than maximum throughput. In these
++cases, consider setting the strict_guarantees parameter.
++
++strict_guarantees
++-----------------
++
++If this parameter is set (default: unset), then BFQ
++
++- always performs idling when the in-service queue becomes empty;
++
++- forces the device to serve one I/O request at a time, by dispatching a
++ new request only if there is no outstanding request.
++
++In the presence of differentiated weights or I/O-request sizes, both
++the above conditions are needed to guarantee that every BFQ queue
++receives its allotted share of the bandwidth. The first condition is
++needed for the reasons explained in the description of the slice_idle
++tunable. The second condition is needed because all modern storage
++devices reorder internally-queued requests, which may trivially break
++the service guarantees enforced by the I/O scheduler.
++
++Setting strict_guarantees may evidently affect throughput.
++
++back_seek_max
++-------------
++
++This specifies, given in Kbytes, the maximum "distance" for backward seeking.
++The distance is the amount of space from the current head location to the
++sectors that are backward in terms of distance.
++
++This parameter allows the scheduler to anticipate requests in the "backward"
++direction and consider them as being the "next" if they are within this
++distance from the current head location.
++
++back_seek_penalty
++-----------------
++
++This parameter is used to compute the cost of backward seeking. If the
++backward distance of request is just 1/back_seek_penalty from a "front"
++request, then the seeking cost of two requests is considered equivalent.
++
++So scheduler will not bias toward one or the other request (otherwise scheduler
++will bias toward front request). Default value of back_seek_penalty is 2.
++
++fifo_expire_async
++-----------------
++
++This parameter is used to set the timeout of asynchronous requests. Default
++value of this is 248ms.
++
++fifo_expire_sync
++----------------
++
++This parameter is used to set the timeout of synchronous requests. Default
++value of this is 124ms. In case to favor synchronous requests over asynchronous
++one, this value should be decreased relative to fifo_expire_async.
++
++low_latency
++-----------
++
++This parameter is used to enable/disable BFQ's low latency mode. By
++default, low latency mode is enabled. If enabled, interactive and soft
++real-time applications are privileged and experience a lower latency,
++as explained in more detail in the description of how BFQ works.
++
++timeout_sync
++------------
++
++Maximum amount of device time that can be given to a task (queue) once
++it has been selected for service. On devices with costly seeks,
++increasing this time usually increases maximum throughput. On the
++opposite end, increasing this time coarsens the granularity of the
++short-term bandwidth and latency guarantees, especially if the
++following parameter is set to zero.
++
++max_budget
++----------
++
++Maximum amount of service, measured in sectors, that can be provided
++to a BFQ queue once it is set in service (of course within the limits
++of the above timeout). According to what said in the description of
++the algoritm, larger values increase the throughput in proportion to
++the percentage of sequential I/O requests issued. The price of larger
++values is that they coarsen the granularity of short-term bandwidth
++and latency guarantees.
++
++The default value is 0, which enables auto-tuning: BFQ sets max_budget
++to the maximum number of sectors that can be served during
++timeout_sync, according to the estimated peak rate.
++
++weights
++-------
++
++Read-only parameter, used to show the weights of the currently active
++BFQ queues.
++
++
++wr_ tunables
++------------
++
++BFQ exports a few parameters to control/tune the behavior of
++low-latency heuristics.
++
++wr_coeff
++
++Factor by which the weight of a weight-raised queue is multiplied. If
++the queue is deemed soft real-time, then the weight is further
++multiplied by an additional, constant factor.
++
++wr_max_time
++
++Maximum duration of a weight-raising period for an interactive task
++(ms). If set to zero (default value), then this value is computed
++automatically, as a function of the peak rate of the device. In any
++case, when the value of this parameter is read, it always reports the
++current duration, regardless of whether it has been set manually or
++computed automatically.
++
++wr_max_softrt_rate
++
++Maximum service rate below which a queue is deemed to be associated
++with a soft real-time application, and is then weight-raised
++accordingly (sectors/sec).
++
++wr_min_idle_time
++
++Minimum idle period after which interactive weight-raising may be
++reactivated for a queue (in ms).
++
++wr_rt_max_time
++
++Maximum weight-raising duration for soft real-time queues (in ms). The
++start time from which this duration is considered is automatically
++moved forward if the queue is detected to be still soft real-time
++before the current soft real-time weight-raising period finishes.
++
++wr_min_inter_arr_async
++
++Minimum period between I/O request arrivals after which weight-raising
++may be reactivated for an already busy async queue (in ms).
++
++
++4. Group scheduling with BFQ
++============================
++
++BFQ supports both cgroup-v1 and cgroup-v2 io controllers, namely blkio
++and io. In particular, BFQ supports weight-based proportional
++share.
++
++4-1 Service guarantees provided
++-------------------------------
++
++With BFQ, proportional share means true proportional share of the
++device bandwidth, according to group weights. For example, a group
++with weight 200 gets twice the bandwidth, and not just twice the time,
++of a group with weight 100.
++
++BFQ supports hierarchies (group trees) of any depth. Bandwidth is
++distributed among groups and processes in the expected way: for each
++group, the children of the group share the whole bandwidth of the
++group in proportion to their weights. In particular, this implies
++that, for each leaf group, every process of the group receives the
++same share of the whole group bandwidth, unless the ioprio of the
++process is modified.
++
++The resource-sharing guarantee for a group may partially or totally
++switch from bandwidth to time, if providing bandwidth guarantees to
++the group lowers the throughput too much. This switch occurs on a
++per-process basis: if a process of a leaf group causes throughput loss
++if served in such a way to receive its share of the bandwidth, then
++BFQ switches back to just time-based proportional share for that
++process.
++
++4-2 Interface
++-------------
++
++To get proportional sharing of bandwidth with BFQ for a given device,
++BFQ must of course be the active scheduler for that device.
++
++Within each group directory, the names of the files associated with
++BFQ-specific cgroup parameters and stats begin with the "bfq."
++prefix. So, with cgroups-v1 or cgroups-v2, the full prefix for
++BFQ-specific files is "blkio.bfq." or "io.bfq." For example, the group
++parameter to set the weight of a group with BFQ is blkio.bfq.weight
++or io.bfq.weight.
++
++Parameters to set
++-----------------
++
++For each group, there is only the following parameter to set.
++
++weight (namely blkio.bfq.weight or io.bfq-weight): the weight of the
++group inside its parent. Available values: 1..10000 (default 100). The
++linear mapping between ioprio and weights, described at the beginning
++of the tunable section, is still valid, but all weights higher than
++IOPRIO_BE_NR*10 are mapped to ioprio 0.
++
++
++[1] P. Valente, A. Avanzini, "Evolution of the BFQ Storage I/O
++ Scheduler", Proceedings of the First Workshop on Mobile System
++ Technologies (MST-2015), May 2015.
++ http://algogroup.unimore.it/people/paolo/disk_sched/mst-2015.pdf
++
++[2] P. Valente and M. Andreolini, "Improving Application
++ Responsiveness with the BFQ Disk I/O Scheduler", Proceedings of
++ the 5th Annual International Systems and Storage Conference
++ (SYSTOR '12), June 2012.
++ Slightly extended version:
++ http://algogroup.unimore.it/people/paolo/disk_sched/bfq-v1-suite-
++ results.pdf
+
+From 11f0d8caa035ff4fe6fe02ebc0f2f14dc5473479 Mon Sep 17 00:00:00 2001
+From: Paolo Valente <paolo.valente@linaro.org>
+Date: Sat, 22 Oct 2016 15:26:33 +0200
+Subject: [PATCH 02/24] BUGFIX: Replace max wrongly used for modulo numbers
+
+Signed-off-by: Paolo Valente <paolo.valente@linaro.org>
+---
+ block/bfq-iosched.c | 10 +++++++---
+ 1 file changed, 7 insertions(+), 3 deletions(-)
+
+diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
+index eef6ff4..c161ff0 100644
+--- a/block/bfq-iosched.c
++++ b/block/bfq-iosched.c
+@@ -2179,9 +2179,13 @@ static void __bfq_set_in_service_queue(struct bfq_data *bfqd,
+ * not only expires, but also remains with no
+ * request.
+ */
+- bfqq->last_wr_start_finish += jiffies -
+- max_t(unsigned long, bfqq->last_wr_start_finish,
+- bfqq->budget_timeout);
++ if (time_after(bfqq->budget_timeout,
++ bfqq->last_wr_start_finish))
++ bfqq->last_wr_start_finish +=
++ jiffies - bfqq->budget_timeout;
++ else
++ bfqq->last_wr_start_finish = jiffies;
++
+ if (time_is_after_jiffies(bfqq->last_wr_start_finish)) {
+ pr_crit(
+ "BFQ WARNING:last %lu budget %lu jiffies %lu",
+
+From eb59d4ac664f64372c440006b0cd27bf137a2025 Mon Sep 17 00:00:00 2001
+From: Paolo Valente <paolo.valente@linaro.org>
+Date: Mon, 24 Oct 2016 17:08:32 +0200
+Subject: [PATCH 03/24] Improve bfq-iosched.txt
+
+Signed-off-by: Paolo Valente <paolo.valente@linaro.org>
+---
+ Documentation/block/bfq-iosched.txt | 12 +++++++-----
+ 1 file changed, 7 insertions(+), 5 deletions(-)
+
+diff --git a/Documentation/block/bfq-iosched.txt b/Documentation/block/bfq-iosched.txt
+index 67e1269..8626b14 100644
+--- a/Documentation/block/bfq-iosched.txt
++++ b/Documentation/block/bfq-iosched.txt
+@@ -78,13 +78,15 @@ sequential workloads considered in our tests. With random workloads,
+ and with all the workloads on flash-based devices, BFQ achieves,
+ instead, about the same throughput as the other schedulers.
+
+-Strong fairness/bandwidth guarantees
++Strong fairness, bandwidth and delay guarantees
+
+ BFQ distributes the device throughput, and not just the device time,
+-as desired among I/O-bound applications, with any workload and
+-regardless of the device parameters. BFQ switches to time-based
+-distribution (only) for processes that would otherwise cause a
+-throughput loss.
++among I/O-bound applications in proportion their weights, with any
++workload and regardless of the device parameters. From these bandwidth
++guarantees, it is possible to compute tight per-I/O-request delay
++guarantees by a simple formula. If not configured for strict service
++guarantees, BFQ switches to time-based resource sharing (only) for
++applications that would otherwise cause a throughput loss.
+
+ 1-2 Server systems
+ ------------------
+
+From a53d556f54f33de6e71f909748c6994ff90fa243 Mon Sep 17 00:00:00 2001
+From: Paolo Valente <paolo.valente@linaro.org>
+Date: Mon, 24 Oct 2016 20:36:53 +0200
+Subject: [PATCH 04/24] Improve help message in Kconfig.iosched
+
+Signed-off-by: Paolo Valente <paolo.valente@linaro.org>
+---
+ block/Kconfig.iosched | 11 ++++-------
+ 1 file changed, 4 insertions(+), 7 deletions(-)
+
+diff --git a/block/Kconfig.iosched b/block/Kconfig.iosched
+index 6d92579..277b112 100644
+--- a/block/Kconfig.iosched
++++ b/block/Kconfig.iosched
+@@ -43,13 +43,10 @@ config IOSCHED_BFQ
+ tristate "BFQ I/O scheduler"
+ default n
+ ---help---
+- The BFQ I/O scheduler tries to distribute bandwidth among
+- all processes according to their weights.
+- It aims at distributing the bandwidth as desired, independently of
+- the disk parameters and with any workload. It also tries to
+- guarantee low latency to interactive and soft real-time
+- applications. If compiled built-in (saying Y here), BFQ can
+- be configured to support hierarchical scheduling.
++ The BFQ I/O scheduler distributes bandwidth among all
++ processes according to their weights, regardless of the
++ device parameters and with any workload. It also guarantees
++ a low latency to interactive and soft real-time applications.
+
+ config BFQ_GROUP_IOSCHED
+ bool "BFQ hierarchical scheduling support"
+
+From 710824a320c015b314abba3bc2982d0f3caa0acc Mon Sep 17 00:00:00 2001
+From: Paolo Valente <paolo.valente@linaro.org>
+Date: Mon, 24 Oct 2016 20:37:59 +0200
+Subject: [PATCH 05/24] Remove stray disk word in first line
+
+SIgned-off-by: Paolo Valente <paolo.valente@linaro.org>
+---
+ block/bfq-iosched.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
+index c161ff0..b5fdd45 100644
+--- a/block/bfq-iosched.c
++++ b/block/bfq-iosched.c
+@@ -1,5 +1,5 @@
+ /*
+- * Budget Fair Queueing (BFQ) disk scheduler.
++ * Budget Fair Queueing (BFQ) I/O scheduler.
+ *
+ * Based on ideas and code from CFQ:
+ * Copyright (C) 2003 Jens Axboe <axboe@kernel.dk>
+
+From a2f6ff7877e60f533cdf39e941fc45f7b5aafcee Mon Sep 17 00:00:00 2001
+From: Paolo Valente <paolo.valente@linaro.org>
+Date: Mon, 24 Oct 2016 20:41:03 +0200
+Subject: [PATCH 06/24] BUGFIX: Remove wrong conversion in use of
+ bfq_fifo_expire
+
+Signed-off-by: Paolo Valente <paolo.valente@linaro.org>
+---
+ block/bfq-iosched.c | 3 +--
+ 1 file changed, 1 insertion(+), 2 deletions(-)
+
+diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
+index b5fdd45..a8c6887 100644
+--- a/block/bfq-iosched.c
++++ b/block/bfq-iosched.c
+@@ -4268,8 +4268,7 @@ static void bfq_insert_request(struct request_queue *q, struct request *rq)
+
+ bfq_add_request(rq);
+
+- rq->fifo_time = ktime_get_ns() +
+- jiffies_to_nsecs(bfqd->bfq_fifo_expire[rq_is_sync(rq)]);
++ rq->fifo_time = ktime_get_ns() + bfqd->bfq_fifo_expire[rq_is_sync(rq)];
+ list_add_tail(&rq->queuelist, &bfqq->fifo);
+
+ bfq_rq_enqueued(bfqd, bfqq, rq);
+
+From c91736449d06052469c2c3abaf5d455b85e77417 Mon Sep 17 00:00:00 2001
+From: Paolo Valente <paolo.valente@linaro.org>
+Date: Tue, 25 Oct 2016 09:31:37 +0200
+Subject: [PATCH 07/24] bfq-iosched.txt: add description of preemption
+
+Signed-off-by: Paolo Valente <paolo.valente@linaro.org>
+---
+ Documentation/block/bfq-iosched.txt | 6 ++++++
+ 1 file changed, 6 insertions(+)
+
+diff --git a/Documentation/block/bfq-iosched.txt b/Documentation/block/bfq-iosched.txt
+index 8626b14..8979408 100644
+--- a/Documentation/block/bfq-iosched.txt
++++ b/Documentation/block/bfq-iosched.txt
+@@ -227,6 +227,12 @@ plus a lot of code, are borrowed from CFQ.
+ budget assigned to a queue waiting for service is, the sooner
+ B-WF2Q+ will serve that queue (Subsec 3.3 in [2]).
+
++- If several processes are competing for the device at the same time,
++ but all processes and groups have the same weight, then BFQ
++ guarantees the expected throughput distribution without ever idling
++ the device. It uses preemption instead. Throughput is then much
++ higher in this common scenario.
++
+ - ioprio classes are served in strict priority order, i.e.,
+ lower-priority queues are not served as long as there are
+ higher-priority queues. Among queues in the same class, the
+
+From 2dc9393e7ff65d0e5340118402134cff03276b1c Mon Sep 17 00:00:00 2001
+From: Paolo Valente <paolo.valente@linaro.org>
+Date: Sat, 5 Nov 2016 18:09:07 +0100
+Subject: [PATCH 08/24] Add parentheses to complex macros
+
+Signed-off-by: Paolo Valente <paolo.valente@linaro.org>
+---
+ block/bfq-iosched.c | 10 +++++-----
+ 1 file changed, 5 insertions(+), 5 deletions(-)
+
+diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
+index a8c6887..88be34e 100644
+--- a/block/bfq-iosched.c
++++ b/block/bfq-iosched.c
+@@ -77,19 +77,19 @@
+ static const u64 bfq_fifo_expire[2] = { NSEC_PER_SEC / 4, NSEC_PER_SEC / 8 };
+
+ /* Maximum backwards seek, in KiB. */
+-static const int bfq_back_max = 16 * 1024;
++static const int bfq_back_max = (16 * 1024);
+
+ /* Penalty of a backwards seek, in number of sectors. */
+ static const int bfq_back_penalty = 2;
+
+ /* Idling period duration, in ns. */
+-static u32 bfq_slice_idle = NSEC_PER_SEC / 125;
++static u32 bfq_slice_idle = (NSEC_PER_SEC / 125);
+
+ /* Minimum number of assigned budgets for which stats are safe to compute. */
+ static const int bfq_stats_min_budgets = 194;
+
+ /* Default maximum budget values, in sectors and number of requests. */
+-static const int bfq_default_max_budget = 16 * 1024;
++static const int bfq_default_max_budget = (16 * 1024);
+
+ /*
+ * Async to sync throughput distribution is controlled as follows:
+@@ -99,7 +99,7 @@ static const int bfq_default_max_budget = 16 * 1024;
+ static const int bfq_async_charge_factor = 10;
+
+ /* Default timeout values, in jiffies, approximating CFQ defaults. */
+-static const int bfq_timeout = HZ / 8;
++static const int bfq_timeout = (HZ / 8);
+
+ struct kmem_cache *bfq_pool;
+
+@@ -117,7 +117,7 @@ struct kmem_cache *bfq_pool;
+ /* Min number of samples required to perform peak-rate update */
+ #define BFQ_RATE_MIN_SAMPLES 32
+ /* Min observation time interval required to perform a peak-rate update (ns) */
+-#define BFQ_RATE_MIN_INTERVAL 300*NSEC_PER_MSEC
++#define BFQ_RATE_MIN_INTERVAL (300*NSEC_PER_MSEC)
+ /* Target observation time interval for a peak-rate update (ns) */
+ #define BFQ_RATE_REF_INTERVAL NSEC_PER_SEC
+
+
+From 882f0607d82efe44d98634ce1fdcc4b602ccf5ec Mon Sep 17 00:00:00 2001
+From: Paolo Valente <paolo.valente@linaro.org>
+Date: Sat, 5 Nov 2016 18:32:20 +0100
+Subject: [PATCH 09/24] Fix type in bfq-iosched.txt
+
+Sigend-off-by: Paolo Valente <paolo.valente@linaro.org>
+---
+ Documentation/block/bfq-iosched.txt | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/Documentation/block/bfq-iosched.txt b/Documentation/block/bfq-iosched.txt
+index 8979408..5ba67af 100644
+--- a/Documentation/block/bfq-iosched.txt
++++ b/Documentation/block/bfq-iosched.txt
+@@ -385,7 +385,7 @@ max_budget
+ Maximum amount of service, measured in sectors, that can be provided
+ to a BFQ queue once it is set in service (of course within the limits
+ of the above timeout). According to what said in the description of
+-the algoritm, larger values increase the throughput in proportion to
++the algorithm, larger values increase the throughput in proportion to
+ the percentage of sequential I/O requests issued. The price of larger
+ values is that they coarsen the granularity of short-term bandwidth
+ and latency guarantees.
+
+From 99b1da54ca162abdc9e62915050154b35bd9bc27 Mon Sep 17 00:00:00 2001
+From: Paolo Valente <paolo.valente@linaro.org>
+Date: Sat, 5 Nov 2016 18:34:58 +0100
+Subject: [PATCH 10/24] BFQ-v8r5
+
+Signed-off-by: Paolo Valente <paolo.valente@linaro.org>
+---
+ block/bfq-iosched.c | 2 +-
+ block/bfq.h | 2 +-
+ 2 files changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
+index 88be34e..0f3081d 100644
+--- a/block/bfq-iosched.c
++++ b/block/bfq-iosched.c
+@@ -5212,7 +5212,7 @@ static struct blkcg_policy blkcg_policy_bfq = {
+ static int __init bfq_init(void)
+ {
+ int ret;
+- char msg[50] = "BFQ I/O-scheduler: v8r4";
++ char msg[50] = "BFQ I/O-scheduler: v8r5";
+
+ #ifdef CONFIG_BFQ_GROUP_IOSCHED
+ ret = blkcg_policy_register(&blkcg_policy_bfq);
+diff --git a/block/bfq.h b/block/bfq.h
+index ea1e7d8..b80abe0 100644
+--- a/block/bfq.h
++++ b/block/bfq.h
+@@ -1,5 +1,5 @@
+ /*
+- * BFQ-v8r4 for 4.8.0: data structures and common functions prototypes.
++ * BFQ-v8r5 for 4.8.0: data structures and common functions prototypes.
+ *
+ * Based on ideas and code from CFQ:
+ * Copyright (C) 2003 Jens Axboe <axboe@kernel.dk>
+
+From d825ef204aa934e1fd9ebd925276ca0e67783508 Mon Sep 17 00:00:00 2001
+From: Paolo Valente <paolo.valente@linaro.org>
+Date: Wed, 7 Dec 2016 19:31:37 +0100
+Subject: [PATCH 11/24] Improve documentation
+
+Signed-off-by: Paolo Valente <paolo.valente@linaro.org>
+
+Squash me
+---
+ Documentation/block/bfq-iosched.txt | 26 ++++++++++++++++++++------
+ block/Kconfig.iosched | 5 ++++-
+ 2 files changed, 24 insertions(+), 7 deletions(-)
+
+diff --git a/Documentation/block/bfq-iosched.txt b/Documentation/block/bfq-iosched.txt
+index 5ba67af..13b5248 100644
+--- a/Documentation/block/bfq-iosched.txt
++++ b/Documentation/block/bfq-iosched.txt
+@@ -252,9 +252,14 @@ of slice_idle are copied from CFQ too.
+ per-process ioprio and weight
+ -----------------------------
+
+-Unless the cgroups interface is used, weights can be assigned to
+-processes only indirectly, through I/O priorities, and according to
+-the relation: weight = (IOPRIO_BE_NR - ioprio) * 10.
++Unless the cgroups interface is used (see "4. BFQ group scheduling"),
++weights can be assigned to processes only indirectly, through I/O
++priorities, and according to the relation:
++weight = (IOPRIO_BE_NR - ioprio) * 10.
++
++Beware that, if low-latency is set, then BFQ automatically raises the
++weight of the queues associated with interactive and soft real-time
++applications. Unset this tunable if you need/want to control weights.
+
+ slice_idle
+ ----------
+@@ -369,6 +374,11 @@ default, low latency mode is enabled. If enabled, interactive and soft
+ real-time applications are privileged and experience a lower latency,
+ as explained in more detail in the description of how BFQ works.
+
++DO NOT enable this mode if you need full control on bandwidth
++distribution. In fact, if it is enabled, then BFQ automatically
++increases the bandwidth share of privileged applications, as the main
++means to guarantee a lower latency to them.
++
+ timeout_sync
+ ------------
+
+@@ -449,9 +459,9 @@ may be reactivated for an already busy async queue (in ms).
+ 4. Group scheduling with BFQ
+ ============================
+
+-BFQ supports both cgroup-v1 and cgroup-v2 io controllers, namely blkio
+-and io. In particular, BFQ supports weight-based proportional
+-share.
++BFQ supports both cgroups-v1 and cgroups-v2 io controllers, namely
++blkio and io. In particular, BFQ supports weight-based proportional
++share. To activate cgroups support, set BFQ_GROUP_IOSCHED.
+
+ 4-1 Service guarantees provided
+ -------------------------------
+@@ -501,6 +511,10 @@ linear mapping between ioprio and weights, described at the beginning
+ of the tunable section, is still valid, but all weights higher than
+ IOPRIO_BE_NR*10 are mapped to ioprio 0.
+
++Recall that, if low-latency is set, then BFQ automatically raises the
++weight of the queues associated with interactive and soft real-time
++applications. Unset this tunable if you need/want to control weights.
++
+
+ [1] P. Valente, A. Avanzini, "Evolution of the BFQ Storage I/O
+ Scheduler", Proceedings of the First Workshop on Mobile System
+diff --git a/block/Kconfig.iosched b/block/Kconfig.iosched
+index 277b112..f2cd945 100644
+--- a/block/Kconfig.iosched
++++ b/block/Kconfig.iosched
+@@ -47,13 +47,16 @@ config IOSCHED_BFQ
+ processes according to their weights, regardless of the
+ device parameters and with any workload. It also guarantees
+ a low latency to interactive and soft real-time applications.
++ Details in Documentation/block/bfq-iosched.txt
+
+ config BFQ_GROUP_IOSCHED
+ bool "BFQ hierarchical scheduling support"
+ depends on IOSCHED_BFQ && BLK_CGROUP
+ default n
+ ---help---
+- Enable hierarchical scheduling in BFQ, using the blkio controller.
++
++ Enable hierarchical scheduling in BFQ, using the blkio
++ (cgroups-v1) or io (cgroups-v2) controller.
+
+ choice
+ prompt "Default I/O scheduler"
+
+From 880441682487350cd5b116c2cea830351f9c9478 Mon Sep 17 00:00:00 2001
+From: Paolo Valente <paolo.valente@linaro.org>
+Date: Wed, 7 Dec 2016 19:43:58 +0100
+Subject: [PATCH 12/24] Add and improve pointers in initial comments
+
+---
+ block/bfq-iosched.c | 14 ++++++++++----
+ 1 file changed, 10 insertions(+), 4 deletions(-)
+
+diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
+index 0f3081d..b07d251 100644
+--- a/block/bfq-iosched.c
++++ b/block/bfq-iosched.c
+@@ -14,6 +14,12 @@
+ * Licensed under the GPL-2 as detailed in the accompanying COPYING.BFQ
+ * file.
+ *
++ * BFQ is a proportional-share I/O scheduler, with some extra
++ * low-latency capabilities. BFQ also supports full hierarchical
++ * scheduling through cgroups. Next paragraphs provide an introduction
++ * on BFQ inner workings. Details on BFQ benefits and usage can be
++ * found in Documentation/block/bfq-iosched.txt.
++ *
+ * BFQ is a proportional-share storage-I/O scheduling algorithm based
+ * on the slice-by-slice service scheme of CFQ. But BFQ assigns
+ * budgets, measured in number of sectors, to processes instead of
+@@ -43,10 +49,10 @@
+ * H-WF2Q+, while the augmented tree used to implement B-WF2Q+ with O(log N)
+ * complexity derives from the one introduced with EEVDF in [3].
+ *
+- * [1] P. Valente and M. Andreolini, ``Improving Application Responsiveness
+- * with the BFQ Disk I/O Scheduler'',
+- * Proceedings of the 5th Annual International Systems and Storage
+- * Conference (SYSTOR '12), June 2012.
++ * [1] P. Valente, A. Avanzini, "Evolution of the BFQ Storage I/O
++ * Scheduler", Proceedings of the First Workshop on Mobile System
++ * Technologies (MST-2015), May 2015.
++ * http://algogroup.unimore.it/people/paolo/disk_sched/mst-2015.pdf
+ *
+ * http://algogroup.unimo.it/people/paolo/disk_sched/bf1-v1-suite-results.pdf
+ *
+
+From 60dad3833c9afa52315508579cbb4bac7e55d3a3 Mon Sep 17 00:00:00 2001
+From: Paolo Valente <paolo.valente@linaro.org>
+Date: Fri, 9 Dec 2016 07:05:29 +0100
+Subject: [PATCH 13/24] Fix typo in comments on bfq_lookup_next_entity
+
+Signed-off-by: Paolo Valente <paolo.valente@linaro.org>
+---
+ block/bfq-sched.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/block/bfq-sched.c b/block/bfq-sched.c
+index 45d63d3..9afeaab 100644
+--- a/block/bfq-sched.c
++++ b/block/bfq-sched.c
+@@ -1265,7 +1265,7 @@ __bfq_lookup_next_entity(struct bfq_service_tree *st, bool force)
+ * NOTE: since we cache the next_in_service entity at each level of the
+ * hierarchy, the complexity of the lookup can be decreased with
+ * absolutely no effort just returning the cached next_in_service value;
+- * we prefer to do full lookups to test the consistency of * the data
++ * we prefer to do full lookups to test the consistency of the data
+ * structures.
+ */
+ static struct bfq_entity *bfq_lookup_next_entity(struct bfq_sched_data *sd,
+
+From 15f446f3bc189df88c64366930b5e237c8d9d5d5 Mon Sep 17 00:00:00 2001
+From: Paolo Valente <paolo.valente@linaro.org>
+Date: Fri, 9 Dec 2016 09:46:41 +0100
+Subject: [PATCH 14/24] Fix false-positive warning for uninitialized var
+
+Signed-off-by: Paolo Valente <paolo.valente@linaro.org>
+---
+ block/bfq-iosched.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
+index b07d251..3043e5e 100644
+--- a/block/bfq-iosched.c
++++ b/block/bfq-iosched.c
+@@ -5044,7 +5044,7 @@ STORE_FUNCTION(bfq_wr_max_softrt_rate_store, &bfqd->bfq_wr_max_softrt_rate, 0,
+ static ssize_t __FUNC(struct elevator_queue *e, const char *page, size_t count)\
+ { \
+ struct bfq_data *bfqd = e->elevator_data; \
+- unsigned long __data; \
++ unsigned long uninitialized_var(__data); \
+ int ret = bfq_var_store(&__data, (page), count); \
+ if (__data < (MIN)) \
+ __data = (MIN); \
+
+From 1430feb8c7194f72be0a8066ad6b996d78bf3e09 Mon Sep 17 00:00:00 2001
+From: Paolo Valente <paolo.valente@linaro.org>
+Date: Sun, 11 Dec 2016 11:46:13 +0100
+Subject: [PATCH 15/24] Improve and fix some bugs in hierarchical code
+
+This commit provides the following contributions.
+
+BUGFIX Removed the check that, when the new queue to set in service
+must be selected, the cached next_in_service entities coincide with
+the entities chosen by __bfq_lookup_next_entity. This check,
+issuing a warning on failure, was wrong, because the cached and the
+newly chosen entity could differ in case of a CLASS_IDLE timeout.
+
+EFFICIENCY IMPROVEMENT (this improvement is related to the above
+BUGFIX) The cached next_in_service entities are now really used to
+select the next queue to serve when the in-service queue
+expires. Before this change, the cached values were used only for
+extra (and in general wrong) consistency checks. This caused
+additional overhead instead of reducing it.
+
+EFFICIENCY IMPROVEMENT The next entity to serve, for each level of the
+hierarchy, is now updated on every event that may change it, i.e., on
+every activation or deactivation of any entity. This finer granularity
+is not strictly needed for corectness, because it is only on queue
+expirations that BFQ needs to know what are the next entities to
+serve. Yet this change makes it possible to implement optimizations in
+which it is necessary to know the next queue to serve before the
+in-service queue expires.
+
+SERVICE-ACCURACY IMPROVEMENT The per-device CLASS_IDLE service timeout
+has been turned into a much more accurate per-group timeout.
+
+CODE-QUALITY IMPROVEMENT The non-trivial parts touched by the above
+improvements have been partially rewritten, and enriched of comments,
+so as to improve their transparency and understandability.
+
+Signed-off-by: Paolo Valente <paolo.valente@linaro.org>
+---
+ block/bfq-cgroup.c | 2 +-
+ block/bfq-iosched.c | 2 +-
+ block/bfq-sched.c | 497 ++++++++++++++++++++++++++++++++++++----------------
+ block/bfq.h | 5 +-
+ 4 files changed, 348 insertions(+), 158 deletions(-)
+
+diff --git a/block/bfq-cgroup.c b/block/bfq-cgroup.c
+index b50ae8e..a04bc40 100644
+--- a/block/bfq-cgroup.c
++++ b/block/bfq-cgroup.c
+@@ -314,7 +314,7 @@ static void bfq_init_entity(struct bfq_entity *entity,
+ bfqq->ioprio_class = bfqq->new_ioprio_class;
+ bfqg_get(bfqg);
+ }
+- entity->parent = bfqg->my_entity;
++ entity->parent = bfqg->my_entity; /* NULL for root group */
+ entity->sched_data = &bfqg->sched_data;
+ }
+
+diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
+index 3043e5e..2d00bd10 100644
+--- a/block/bfq-iosched.c
++++ b/block/bfq-iosched.c
+@@ -4763,6 +4763,7 @@ static void bfq_init_root_group(struct bfq_group *root_group,
+ root_group->rq_pos_tree = RB_ROOT;
+ for (i = 0; i < BFQ_IOPRIO_CLASSES; i++)
+ root_group->sched_data.service_tree[i] = BFQ_SERVICE_TREE_INIT;
++ root_group->sched_data.bfq_class_idle_last_service = jiffies;
+ }
+
+ static int bfq_init_queue(struct request_queue *q, struct elevator_type *e)
+@@ -4836,7 +4837,6 @@ static int bfq_init_queue(struct request_queue *q, struct elevator_type *e)
+ bfqd->bfq_back_max = bfq_back_max;
+ bfqd->bfq_back_penalty = bfq_back_penalty;
+ bfqd->bfq_slice_idle = bfq_slice_idle;
+- bfqd->bfq_class_idle_last_service = 0;
+ bfqd->bfq_timeout = bfq_timeout;
+
+ bfqd->bfq_requests_within_timer = 120;
+diff --git a/block/bfq-sched.c b/block/bfq-sched.c
+index 9afeaab..2f54ac5 100644
+--- a/block/bfq-sched.c
++++ b/block/bfq-sched.c
+@@ -15,16 +15,15 @@
+ static struct bfq_group *bfqq_group(struct bfq_queue *bfqq);
+
+ #ifdef CONFIG_BFQ_GROUP_IOSCHED
+-#define for_each_entity(entity) \
++/* both next loops stop at one of the child entities of the root group */
++#define for_each_entity(entity) \
+ for (; entity ; entity = entity->parent)
+
+ #define for_each_entity_safe(entity, parent) \
+ for (; entity && ({ parent = entity->parent; 1; }); entity = parent)
+
+
+-static struct bfq_entity *bfq_lookup_next_entity(struct bfq_sched_data *sd,
+- int extract,
+- struct bfq_data *bfqd);
++static struct bfq_entity *bfq_lookup_next_entity(struct bfq_sched_data *sd);
+
+ static void bfq_update_budget(struct bfq_entity *next_in_service)
+ {
+@@ -47,23 +46,18 @@ static void bfq_update_budget(struct bfq_entity *next_in_service)
+ bfqg_entity->budget = next_in_service->budget;
+ }
+
+-static int bfq_update_next_in_service(struct bfq_sched_data *sd)
++/*
++ * Incomplete version that always returns true. It will return also
++ * false in its complete version, in case either next_in_service has
++ * not changed, or next_in_service has changed but in a way that will
++ * not influence upper levels.
++ */
++static bool bfq_update_next_in_service(struct bfq_sched_data *sd)
+ {
+ struct bfq_entity *next_in_service;
+ struct bfq_queue *bfqq;
+
+- if (sd->in_service_entity)
+- /* will update/requeue at the end of service */
+- return 0;
+-
+- /*
+- * NOTE: this can be improved in many ways, such as returning
+- * 1 (and thus propagating upwards the update) only when the
+- * budget changes, or caching the bfqq that will be scheduled
+- * next from this subtree. By now we worry more about
+- * correctness than about performance...
+- */
+- next_in_service = bfq_lookup_next_entity(sd, 0, NULL);
++ next_in_service = bfq_lookup_next_entity(sd);
+ sd->next_in_service = next_in_service;
+
+ if (next_in_service)
+@@ -75,6 +69,7 @@ static int bfq_update_next_in_service(struct bfq_sched_data *sd)
+ if (bfqq)
+ bfq_log_bfqq(bfqq->bfqd, bfqq,
+ "update_next_in_service: chosen this queue");
++#ifdef CONFIG_BFQ_GROUP_IOSCHED
+ else {
+ struct bfq_group *bfqg =
+ container_of(next_in_service,
+@@ -83,15 +78,11 @@ static int bfq_update_next_in_service(struct bfq_sched_data *sd)
+ bfq_log_bfqg((struct bfq_data *)bfqg->bfqd, bfqg,
+ "update_next_in_service: chosen this entity");
+ }
++#endif
+ exit:
+- return 1;
++ return true;
+ }
+
+-static void bfq_check_next_in_service(struct bfq_sched_data *sd,
+- struct bfq_entity *entity)
+-{
+- WARN_ON(sd->next_in_service != entity);
+-}
+ #else
+ #define for_each_entity(entity) \
+ for (; entity ; entity = NULL)
+@@ -104,11 +95,6 @@ static int bfq_update_next_in_service(struct bfq_sched_data *sd)
+ return 0;
+ }
+
+-static void bfq_check_next_in_service(struct bfq_sched_data *sd,
+- struct bfq_entity *entity)
+-{
+-}
+-
+ static void bfq_update_budget(struct bfq_entity *next_in_service)
+ {
+ }
+@@ -851,23 +837,59 @@ static void __bfq_activate_entity(struct bfq_entity *entity,
+ BUG_ON(!sd);
+ BUG_ON(!st);
+ if (entity == sd->in_service_entity) {
+- BUG_ON(entity->tree);
+ /*
+- * If we are requeueing the current entity we have
+- * to take care of not charging to it service it has
+- * not received.
++ * We are requeueing the current in-service entity,
++ * because of the requeueing of a just-expired
++ * non-idle leaf entity in the path originating from
++ * this entity. In fact, in this case, then all
++ * entities in this path need to be requeued again for
++ * next service.
++ *
++ * Before requeueing, the start time of the entity
++ * must be moved forward to account for the service
++ * that the entity has received while in service. The
++ * finish time will then be updated according to this
++ * new value of the start time, and to the budget of
++ * the entity.
+ */
+ bfq_calc_finish(entity, entity->service);
+ entity->start = entity->finish;
+ sd->in_service_entity = NULL;
++ BUG_ON(entity->tree && entity->tree != &st->active);
++ /*
++ * In addition, if the entity had more than one child
++ * when set in service, then was not extracted from
++ * the active tree. This implies that the position of
++ * the entity in the active tree may need to be
++ * changed now, because we have just updated the start
++ * time of the entity, and we will update its finish
++ * time in a moment (the requeueing is then, more
++ * precisely, a repositioning in this case). To
++ * implement this repositioning, we: 1) dequeue the
++ * entity here, 2) update the finish time and
++ * requeue the entity according to the new
++ * timestamps below.
++ */
++ if (entity->tree)
++ bfq_active_extract(st, entity);
+ } else if (entity->tree == &st->active) {
+ /*
+- * Requeueing an entity due to a change of some
+- * next_in_service entity below it. We reuse the
+- * old start time.
++ * The entity is already active, and not in
++ * service. In this case, this function gets called
++ * only if the next_in_service entity below this
++ * entity has changed, and this change has caused the
++ * budget of this entity to change, which, finally
++ * implies that the finish time of this entity must be
++ * updated. Such an update may cause the scheduling,
++ * i.e., the position in the active tree, of this
++ * entity to change. We handle this fact by: 1)
++ * dequeueing the entity here, 2) updating the finish
++ * time and requeueing the entity according to the new
++ * timestamps below. This is the same approach as the
++ * non-extracted-entity sub-case above.
+ */
+ bfq_active_extract(st, entity);
+- } else {
++ } else { /* This is a 'true' activation, not a requeueing. */
+ unsigned long long min_vstart;
+
+ /* See comments on bfq_fqq_update_budg_for_activation */
+@@ -977,6 +999,11 @@ static void __bfq_activate_entity(struct bfq_entity *entity,
+ entity->start <= st->vtime ? "" : "non ", st);
+ #endif
+ }
++ BUG_ON(RB_EMPTY_ROOT(&st->active));
++ BUG_ON(&st->active != &sd->service_tree->active &&
++ &st->active != &(sd->service_tree+1)->active &&
++ &st->active != &(sd->service_tree+2)->active);
++
+ }
+
+ /**
+@@ -996,13 +1023,20 @@ static void bfq_activate_entity(struct bfq_entity *entity,
+ __bfq_activate_entity(entity, non_blocking_wait_rq);
+
+ sd = entity->sched_data;
+- if (!bfq_update_next_in_service(sd))
++ BUG_ON(RB_EMPTY_ROOT(&sd->service_tree->active) &&
++ RB_EMPTY_ROOT(&(sd->service_tree+1)->active) &&
++ RB_EMPTY_ROOT(&(sd->service_tree+2)->active));
++
++ if (!bfq_update_next_in_service(sd)) {
++ BUG_ON(!sd->next_in_service);
+ /*
+ * No need to propagate the activation to the
+ * upper entities, as they will be updated when
+ * the in-service entity is rescheduled.
+ */
+ break;
++ }
++ BUG_ON(!sd->next_in_service);
+ }
+ }
+
+@@ -1016,31 +1050,33 @@ static void bfq_activate_entity(struct bfq_entity *entity,
+ * any scheduler tree, extract it from that tree, and if necessary
+ * and if the caller did not specify @requeue, put it on the idle tree.
+ *
+- * Return %1 if the caller should update the entity hierarchy, i.e.,
++ * Return %true if the caller should update the entity hierarchy, i.e.,
+ * if the entity was in service or if it was the next_in_service for
+- * its sched_data; return %0 otherwise.
++ * its sched_data; return %false otherwise.
+ */
+-static int __bfq_deactivate_entity(struct bfq_entity *entity, int requeue)
++static bool __bfq_deactivate_entity(struct bfq_entity *entity, int requeue)
+ {
+ struct bfq_sched_data *sd = entity->sched_data;
+ struct bfq_service_tree *st;
+ int was_in_service;
+- int ret = 0;
++ bool ret = false;
+
+ if (sd == NULL || !entity->on_st) /* never activated, or inactive */
+- return 0;
++ return false;
+
+ st = bfq_entity_service_tree(entity);
+ was_in_service = entity == sd->in_service_entity;
+
+- BUG_ON(was_in_service && entity->tree);
++ BUG_ON(was_in_service && entity->tree && entity->tree != &st->active);
+
+ if (was_in_service) {
+ bfq_calc_finish(entity, entity->service);
+ sd->in_service_entity = NULL;
+- } else if (entity->tree == &st->active)
++ }
++
++ if (entity->tree == &st->active)
+ bfq_active_extract(st, entity);
+- else if (entity->tree == &st->idle)
++ else if (!was_in_service && entity->tree == &st->idle)
+ bfq_idle_extract(st, entity);
+ else if (entity->tree)
+ BUG();
+@@ -1124,19 +1160,13 @@ static void bfq_deactivate_entity(struct bfq_entity *entity, int requeue)
+ }
+
+ /**
+- * bfq_update_vtime - update vtime if necessary.
++ * bfq_calc_vtime_jump - compute the value to which the vtime should jump,
++ * if needed, to have at least one entity eligible.
+ * @st: the service tree to act upon.
+ *
+- * If necessary update the service tree vtime to have at least one
+- * eligible entity, skipping to its start time. Assumes that the
+- * active tree of the device is not empty.
+- *
+- * NOTE: this hierarchical implementation updates vtimes quite often,
+- * we may end up with reactivated processes getting timestamps after a
+- * vtime skip done because we needed a ->first_active entity on some
+- * intermediate node.
++ * Assumes that st is not empty.
+ */
+-static void bfq_update_vtime(struct bfq_service_tree *st)
++static u64 bfq_calc_vtime_jump(struct bfq_service_tree *st)
+ {
+ struct bfq_entity *entry;
+ struct rb_node *node = st->active.rb_node;
+@@ -1144,22 +1174,30 @@ static void bfq_update_vtime(struct bfq_service_tree *st)
+ entry = rb_entry(node, struct bfq_entity, rb_node);
+ if (bfq_gt(entry->min_start, st->vtime)) {
+ struct bfq_queue *bfqq = bfq_entity_to_bfqq(entry);
+- st->vtime = entry->min_start;
+
+ if (bfqq)
+ bfq_log_bfqq(bfqq->bfqd, bfqq,
+- "update_vtime: new vtime %llu %p",
+- ((st->vtime>>10)*1000)>>12, st);
++ "calc_vtime_jump: new value %llu",
++ entry->min_start);
+ #ifdef CONFIG_BFQ_GROUP_IOSCHED
+ else {
+ struct bfq_group *bfqg =
+ container_of(entry, struct bfq_group, entity);
+
+ bfq_log_bfqg((struct bfq_data *)bfqg->bfqd, bfqg,
+- "update_vtime: new vtime %llu %p",
+- ((st->vtime>>10)*1000)>>12, st);
++ "calc_vtime_jump: new value %llu",
++ entry->min_start);
+ }
+ #endif
++ return entry->min_start;
++ }
++ return st->vtime;
++}
++
++static void bfq_update_vtime(struct bfq_service_tree *st, u64 new_value)
++{
++ if (new_value > st->vtime) {
++ st->vtime = new_value;
+ bfq_forget_idle(st);
+ }
+ }
+@@ -1168,6 +1206,7 @@ static void bfq_update_vtime(struct bfq_service_tree *st)
+ * bfq_first_active_entity - find the eligible entity with
+ * the smallest finish time
+ * @st: the service tree to select from.
++ * @vtime: the system virtual to use as a reference for eligibility
+ *
+ * This function searches the first schedulable entity, starting from the
+ * root of the tree and going on the left every time on this side there is
+@@ -1175,7 +1214,8 @@ static void bfq_update_vtime(struct bfq_service_tree *st)
+ * the right is followed only if a) the left subtree contains no eligible
+ * entities and b) no eligible entity has been found yet.
+ */
+-static struct bfq_entity *bfq_first_active_entity(struct bfq_service_tree *st)
++static struct bfq_entity *bfq_first_active_entity(struct bfq_service_tree *st,
++ u64 vtime)
+ {
+ struct bfq_entity *entry, *first = NULL;
+ struct rb_node *node = st->active.rb_node;
+@@ -1183,15 +1223,15 @@ static struct bfq_entity *bfq_first_active_entity(struct bfq_service_tree *st)
+ while (node) {
+ entry = rb_entry(node, struct bfq_entity, rb_node);
+ left:
+- if (!bfq_gt(entry->start, st->vtime))
++ if (!bfq_gt(entry->start, vtime))
+ first = entry;
+
+- BUG_ON(bfq_gt(entry->min_start, st->vtime));
++ BUG_ON(bfq_gt(entry->min_start, vtime));
+
+ if (node->rb_left) {
+ entry = rb_entry(node->rb_left,
+ struct bfq_entity, rb_node);
+- if (!bfq_gt(entry->min_start, st->vtime)) {
++ if (!bfq_gt(entry->min_start, vtime)) {
+ node = node->rb_left;
+ goto left;
+ }
+@@ -1209,28 +1249,71 @@ static struct bfq_entity *bfq_first_active_entity(struct bfq_service_tree *st)
+ * __bfq_lookup_next_entity - return the first eligible entity in @st.
+ * @st: the service tree.
+ *
+- * Update the virtual time in @st and return the first eligible entity
+- * it contains.
++ * If there is no in-service entity for the sched_data st belongs to,
++ * then return the entity that will be set in service if:
++ * 1) the parent entity this st belongs to is set in service;
++ * 2) no entity belonging to such parent entity undergoes a state change
++ * that would influence the timestamps of the entity (e.g., becomes idle,
++ * becomes backlogged, changes its budget, ...).
++ *
++ * In this first case, update the virtual time in @st too (see the
++ * comments on this update inside the function).
++ *
++ * In constrast, if there is an in-service entity, then return the
++ * entity that would be set in service if not only the above
++ * conditions, but also the next one held true: the currently
++ * in-service entity, on expiration,
++ * 1) gets a finish time equal to the current one, or
++ * 2) is not eligible any more, or
++ * 3) is idle.
+ */
+ static struct bfq_entity *
+-__bfq_lookup_next_entity(struct bfq_service_tree *st, bool force)
++__bfq_lookup_next_entity(struct bfq_service_tree *st, bool in_service
++#if 0
++ , bool force
++#endif
++ )
+ {
+- struct bfq_entity *entity, *new_next_in_service = NULL;
++ struct bfq_entity *entity
++#if 0
++ , *new_next_in_service = NULL
++#endif
++ ;
++ u64 new_vtime;
+ struct bfq_queue *bfqq;
+
+ if (RB_EMPTY_ROOT(&st->active))
+ return NULL;
+
+- bfq_update_vtime(st);
+- entity = bfq_first_active_entity(st);
+- BUG_ON(bfq_gt(entity->start, st->vtime));
++ /*
++ * Get the value of the system virtual time for which at
++ * least one entity is eligible.
++ */
++ new_vtime = bfq_calc_vtime_jump(st);
++
++ /*
++ * If there is no in-service entity for the sched_data this
++ * active tree belongs to, then push the system virtual time
++ * up to the value that guarantees that at least one entity is
++ * eligible. If, instead, there is an in-service entity, then
++ * do not make any such update, because there is already an
++ * eligible entity, namely the in-service one (even if the
++ * entity is not on st, because it was extracted when set in
++ * service).
++ */
++ if (!in_service)
++ bfq_update_vtime(st, new_vtime);
+
++ entity = bfq_first_active_entity(st, new_vtime);
++ BUG_ON(bfq_gt(entity->start, new_vtime));
++
++ /* Log some information */
+ bfqq = bfq_entity_to_bfqq(entity);
+ if (bfqq)
+ bfq_log_bfqq(bfqq->bfqd, bfqq,
+ "__lookup_next: start %llu vtime %llu st %p",
+ ((entity->start>>10)*1000)>>12,
+- ((st->vtime>>10)*1000)>>12, st);
++ ((new_vtime>>10)*1000)>>12, st);
+ #ifdef CONFIG_BFQ_GROUP_IOSCHED
+ else {
+ struct bfq_group *bfqg =
+@@ -1239,20 +1322,11 @@ __bfq_lookup_next_entity(struct bfq_service_tree *st, bool force)
+ bfq_log_bfqg((struct bfq_data *)bfqg->bfqd, bfqg,
+ "__lookup_next: start %llu vtime %llu st %p",
+ ((entity->start>>10)*1000)>>12,
+- ((st->vtime>>10)*1000)>>12, st);
++ ((new_vtime>>10)*1000)>>12, st);
+ }
+ #endif
+
+- /*
+- * If the chosen entity does not match with the sched_data's
+- * next_in_service and we are forcedly serving the IDLE priority
+- * class tree, bubble up budget update.
+- */
+- if (unlikely(force && entity != entity->sched_data->next_in_service)) {
+- new_next_in_service = entity;
+- for_each_entity(new_next_in_service)
+- bfq_update_budget(new_next_in_service);
+- }
++ BUG_ON(!entity);
+
+ return entity;
+ }
+@@ -1260,90 +1334,70 @@ __bfq_lookup_next_entity(struct bfq_service_tree *st, bool force)
+ /**
+ * bfq_lookup_next_entity - return the first eligible entity in @sd.
+ * @sd: the sched_data.
+- * @extract: if true the returned entity will be also extracted from @sd.
+ *
+- * NOTE: since we cache the next_in_service entity at each level of the
+- * hierarchy, the complexity of the lookup can be decreased with
+- * absolutely no effort just returning the cached next_in_service value;
+- * we prefer to do full lookups to test the consistency of the data
+- * structures.
++ * This function is invoked when there has been a change in the trees
++ * for sd, and we need know what is the new next entity after this
++ * change.
+ */
+-static struct bfq_entity *bfq_lookup_next_entity(struct bfq_sched_data *sd,
+- int extract,
+- struct bfq_data *bfqd)
++static struct bfq_entity *bfq_lookup_next_entity(struct bfq_sched_data *sd)
+ {
+ struct bfq_service_tree *st = sd->service_tree;
+- struct bfq_entity *entity;
+- int i = 0;
+-
+- BUG_ON(sd->in_service_entity);
++ struct bfq_service_tree *idle_class_st = st + (BFQ_IOPRIO_CLASSES - 1);
++ struct bfq_entity *entity = NULL;
++ struct bfq_queue *bfqq;
++ int class_idx = 0;
+
++ BUG_ON(!sd);
++ BUG_ON(!st);
+ /*
+ * Choose from idle class, if needed to guarantee a minimum
+- * bandwidth to this class. This should also mitigate
++ * bandwidth to this class (and if there is some active entity
++ * in idle class). This should also mitigate
+ * priority-inversion problems in case a low priority task is
+ * holding file system resources.
+ */
+- if (bfqd &&
+- jiffies - bfqd->bfq_class_idle_last_service >
+- BFQ_CL_IDLE_TIMEOUT) {
+- entity = __bfq_lookup_next_entity(st + BFQ_IOPRIO_CLASSES - 1,
+- true);
+- if (entity) {
+- struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
++ if (time_is_before_jiffies(sd->bfq_class_idle_last_service +
++ BFQ_CL_IDLE_TIMEOUT)) {
++ if (!RB_EMPTY_ROOT(&idle_class_st->active))
++ class_idx = BFQ_IOPRIO_CLASSES - 1;
++ /* About to be served if backlogged, or not yet backlogged */
++ sd->bfq_class_idle_last_service = jiffies;
++ }
+
+- if (bfqq)
+- bfq_log_bfqq(bfqd, bfqq,
+- "idle chosen from st %p %d",
+- st + BFQ_IOPRIO_CLASSES - 1,
+- BFQ_IOPRIO_CLASSES - 1);
+-#ifdef CONFIG_BFQ_GROUP_IOSCHED
+- else {
+- struct bfq_group *bfqg =
+- container_of(entity, struct bfq_group, entity);
++ /*
++ * Find the next entity to serve for the highest-priority
++ * class, unless the idle class needs to be served.
++ */
++ for (; class_idx < BFQ_IOPRIO_CLASSES; class_idx++) {
++ entity = __bfq_lookup_next_entity(st + class_idx,
++ sd->in_service_entity);
+
+- bfq_log_bfqg(bfqd, bfqg,
+- "idle chosen from st %p %d",
+- st + BFQ_IOPRIO_CLASSES - 1,
+- BFQ_IOPRIO_CLASSES - 1);
+- }
+-#endif
+- i = BFQ_IOPRIO_CLASSES - 1;
+- bfqd->bfq_class_idle_last_service = jiffies;
+- sd->next_in_service = entity;
+- }
++ if (entity)
++ break;
+ }
+- for (; i < BFQ_IOPRIO_CLASSES; i++) {
+- entity = __bfq_lookup_next_entity(st + i, false);
+- if (entity) {
+- if (bfqd != NULL) {
+- struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
+
+- if (bfqq)
+- bfq_log_bfqq(bfqd, bfqq,
+- "chosen from st %p %d",
+- st + i, i);
+-#ifdef CONFIG_BFQ_GROUP_IOSCHED
+- else {
+- struct bfq_group *bfqg =
+- container_of(entity, struct bfq_group, entity);
++ BUG_ON(!entity &&
++ (!RB_EMPTY_ROOT(&st->active) || !RB_EMPTY_ROOT(&(st+1)->active) ||
++ !RB_EMPTY_ROOT(&(st+2)->active)));
+
+- bfq_log_bfqg(bfqd, bfqg,
+- "chosen from st %p %d",
+- st + i, i);
+- }
+-#endif
+- }
++ if (!entity)
++ return NULL;
+
+- if (extract) {
+- bfq_check_next_in_service(sd, entity);
+- bfq_active_extract(st + i, entity);
+- sd->in_service_entity = entity;
+- sd->next_in_service = NULL;
+- }
+- break;
+- }
++ /* Log some information */
++ bfqq = bfq_entity_to_bfqq(entity);
++ if (bfqq)
++ bfq_log_bfqq(bfqq->bfqd, bfqq, "chosen from st %p %d",
++ st + class_idx, class_idx);
++#ifdef CONFIG_BFQ_GROUP_IOSCHED
++ else {
++ struct bfq_group *bfqg =
++ container_of(entity, struct bfq_group, entity);
++
++ bfq_log_bfqg((struct bfq_data *)bfqg->bfqd, bfqg,
++ "chosen from st %p %d",
++ st + class_idx, class_idx);
+ }
++#endif
+
+ return entity;
+ }
+@@ -1356,6 +1410,43 @@ static bool next_queue_may_preempt(struct bfq_data *bfqd)
+ }
+
+ /*
++ * This function tells whether entity stops being a candidate for next
++ * service, according to the following logic.
++ *
++ * This function is invoked for an entity that is about to be set in
++ * service. If such an entity is a queue, then the entity is no longer
++ * a candidate for next service (i.e, a candidate entity to serve
++ * after the in-service entity is expired). The function then returns
++ * true.
++ *
++ * In contrast, the entity could stil be a candidate for next service
++ * if it is not a queue, and has more than one child. In fact, even if
++ * one of its children is about to be set in service, other children
++ * may still be the next to serve. As a consequence, a non-queue
++ * entity is not a candidate for next-service only if it has only one
++ * child. And only if this condition holds, then the function returns
++ * true for a non-queue entity.
++ */
++static bool bfq_no_longer_next_in_service(struct bfq_entity *entity)
++{
++ struct bfq_group *bfqg;
++
++ if (bfq_entity_to_bfqq(entity))
++ return true;
++
++#ifdef CONFIG_BFQ_GROUP_IOSCHED
++ bfqg = container_of(entity, struct bfq_group, entity);
++
++ BUG_ON(bfqg == ((struct bfq_data *)(bfqg->bfqd))->root_group);
++ BUG_ON(bfqg->active_entities == 0);
++ if (bfqg->active_entities == 1)
++ return true;
++#endif
++
++ return false;
++}
++
++/*
+ * Get next queue for service.
+ */
+ static struct bfq_queue *bfq_get_next_queue(struct bfq_data *bfqd)
+@@ -1369,6 +1460,11 @@ static struct bfq_queue *bfq_get_next_queue(struct bfq_data *bfqd)
+ if (bfqd->busy_queues == 0)
+ return NULL;
+
++ /*
++ * Traverse the path from the root to the leaf entity to
++ * serve. Set in service all the entities visited along the
++ * way.
++ */
+ sd = &bfqd->root_group->sched_data;
+ for (; sd ; sd = entity->my_sched_data) {
+ #ifdef CONFIG_BFQ_GROUP_IOSCHED
+@@ -1378,13 +1474,96 @@ static struct bfq_queue *bfq_get_next_queue(struct bfq_data *bfqd)
+
+ bfq_log_bfqg(bfqd, bfqg,
+ "get_next_queue: lookup in this group");
+- } else
++ if (!sd->next_in_service)
++ pr_crit("get_next_queue: lookup in this group");
++ } else {
+ bfq_log_bfqg(bfqd, bfqd->root_group,
+ "get_next_queue: lookup in root group");
++ if (!sd->next_in_service)
++ pr_crit("get_next_queue: lookup in root group");
++ }
+ #endif
+
+- entity = bfq_lookup_next_entity(sd, 1, bfqd);
++ BUG_ON(!sd->next_in_service);
++
++ /*
++ * WARNING. We are about to set the in-service entity
++ * to sd->next_in_service, i.e., to the (cached) value
++ * returned by bfq_lookup_next_entity(sd) the last
++ * time it was invoked, i.e., the last time when the
++ * service order in sd changed as a consequence of the
++ * activation or deactivation of an entity. In this
++ * respect, if we execute bfq_lookup_next_entity(sd)
++ * in this very moment, it may, although with low
++ * probability, yield a different entity than that
++ * pointed to by sd->next_in_service. This rare event
++ * happens in case there was no CLASS_IDLE entity to
++ * serve for sd when bfq_lookup_next_entity(sd) was
++ * invoked for the last time, while there is now one
++ * such entity.
++ *
++ * If the above event happens, then the scheduling of
++ * such entity in CLASS_IDLE is postponed until the
++ * service of the sd->next_in_service entity
++ * finishes. In fact, when the latter is expired,
++ * bfq_lookup_next_entity(sd) gets called again,
++ * exactly to update sd->next_in_service.
++ */
++
++ /* Make next_in_service entity become in_service_entity */
++ entity = sd->next_in_service;
++ sd->in_service_entity = entity;
++
++ /*
++ * Reset the accumulator of the amount of service that
++ * the entity is about to receive.
++ */
++ entity->service = 0;
++
++ /*
++ * If entity is no longer a candidate for next
++ * service, then we extract it from its active tree,
++ * for the following reason. To further boost the
++ * throughput in some special case, BFQ needs to know
++ * which is the next candidate entity to serve, while
++ * there is already an entity in service. In this
++ * respect, to make it easy to compute/update the next
++ * candidate entity to serve after the current
++ * candidate has been set in service, there is a case
++ * where it is necessary to extract the current
++ * candidate from its service tree. Such a case is
++ * when the entity just set in service cannot be also
++ * a candidate for next service. Details about when
++ * this conditions holds are reported in the comments
++ * on the function bfq_no_longer_next_in_service()
++ * invoked below.
++ */
++ if (bfq_no_longer_next_in_service(entity))
++ bfq_active_extract(bfq_entity_service_tree(entity),
++ entity);
++
++ /*
++ * For the same reason why we may have just extracted
++ * entity from its active tree, we may need to update
++ * next_in_service for the sched_data of entity too,
++ * regardless of whether entity has been extracted.
++ * In fact, even if entity has not been extracted, a
++ * descendant entity may get extracted. Such an event
++ * would cause a change in next_in_service for the
++ * level of the descendant entity, and thus possibly
++ * back to upper levels.
++ *
++ * We cannot perform the resulting needed update
++ * before the end of this loop, because, to know which
++ * is the correct next-to-serve candidate entity for
++ * each level, we need first to find the leaf entity
++ * to set in service. In fact, only after we know
++ * which is the next-to-serve leaf entity, we can
++ * discover whether the parent entity of the leaf
++ * entity becomes the next-to-serve, and so on.
++ */
+
++ /* Log some information */
+ bfqq = bfq_entity_to_bfqq(entity);
+ if (bfqq)
+ bfq_log_bfqq(bfqd, bfqq,
+@@ -1401,13 +1580,23 @@ static struct bfq_queue *bfq_get_next_queue(struct bfq_data *bfqd)
+ }
+ #endif
+
+- BUG_ON(!entity);
+- entity->service = 0;
+ }
+
++ BUG_ON(!entity);
+ bfqq = bfq_entity_to_bfqq(entity);
+ BUG_ON(!bfqq);
+
++ /*
++ * We can finally update all next-to-serve entities along the
++ * path from the leaf entity just set in service to the root.
++ */
++ for_each_entity(entity) {
++ struct bfq_sched_data *sd = entity->sched_data;
++
++ if(!bfq_update_next_in_service(sd))
++ break;
++ }
++
+ return bfqq;
+ }
+
+diff --git a/block/bfq.h b/block/bfq.h
+index b80abe0..6b28030 100644
+--- a/block/bfq.h
++++ b/block/bfq.h
+@@ -88,6 +88,9 @@ struct bfq_sched_data {
+ struct bfq_entity *next_in_service;
+ /* array of service trees, one per ioprio_class */
+ struct bfq_service_tree service_tree[BFQ_IOPRIO_CLASSES];
++ /* last time CLASS_IDLE was served */
++ unsigned long bfq_class_idle_last_service;
++
+ };
+
+ /**
+@@ -487,8 +490,6 @@ struct bfq_data {
+ unsigned int bfq_back_max;
+ /* maximum idling time */
+ u32 bfq_slice_idle;
+- /* last time CLASS_IDLE was served */
+- u64 bfq_class_idle_last_service;
+
+ /* user-configured max budget value (0 for auto-tuning) */
+ int bfq_user_max_budget;
+
+From b5734696e18577dda6ef6571af7cb3718937d2f5 Mon Sep 17 00:00:00 2001
+From: Paolo Valente <paolo.valente@linaro.org>
+Date: Wed, 14 Dec 2016 17:56:25 +0100
+Subject: [PATCH 16/24] BFQ v8r6-rc1
+
+Signed-off-by: Paolo Valente <paolo.valente@linaro.org>
+---
+ block/bfq-iosched.c | 2 +-
+ block/bfq.h | 2 +-
+ 2 files changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
+index 2d00bd10..72443bd 100644
+--- a/block/bfq-iosched.c
++++ b/block/bfq-iosched.c
+@@ -5218,7 +5218,7 @@ static struct blkcg_policy blkcg_policy_bfq = {
+ static int __init bfq_init(void)
+ {
+ int ret;
+- char msg[50] = "BFQ I/O-scheduler: v8r5";
++ char msg[60] = "BFQ I/O-scheduler: v8r6-rc1";
+
+ #ifdef CONFIG_BFQ_GROUP_IOSCHED
+ ret = blkcg_policy_register(&blkcg_policy_bfq);
+diff --git a/block/bfq.h b/block/bfq.h
+index 6b28030..d4d784f 100644
+--- a/block/bfq.h
++++ b/block/bfq.h
+@@ -1,5 +1,5 @@
+ /*
+- * BFQ-v8r5 for 4.8.0: data structures and common functions prototypes.
++ * BFQ v8r6-rc1 for 4.9.0: data structures and common functions prototypes.
+ *
+ * Based on ideas and code from CFQ:
+ * Copyright (C) 2003 Jens Axboe <axboe@kernel.dk>
+
+From a7c98629904dcc103d676c2b8e2ceaff4ca19ca2 Mon Sep 17 00:00:00 2001
+From: Paolo Valente <paolo.valente@linaro.org>
+Date: Wed, 14 Dec 2016 18:07:41 +0100
+Subject: [PATCH 17/24] Port and improve CFQ commit 41647e7a
+
+BFQ currently used the same logic for detecting seeky queues for
+for rotational disks and SSDs. This logic is appropriate for the
+former, as it takes into account only inter-request distance, and
+this is the dominant latency factor on a rotational device. Yet
+things change with flash-based devices, where servign a large
+requests still yields a high throughput, even the request is far
+from the previous request served. This commits extends seeky
+detection to take into accoutn also this fact with flash-based
+devices. In particular, this commit is an improved port of the
+original commit 41647e7a for CFQ.
+
+Signed-off-by: Paolo Valente <paolo.valente@linaro.org>
+---
+ block/bfq-iosched.c | 5 ++++-
+ 1 file changed, 4 insertions(+), 1 deletion(-)
+
+diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
+index 72443bd..153292f 100644
+--- a/block/bfq-iosched.c
++++ b/block/bfq-iosched.c
+@@ -117,6 +117,7 @@ struct kmem_cache *bfq_pool;
+ #define BFQ_HW_QUEUE_SAMPLES 32
+
+ #define BFQQ_SEEK_THR (sector_t)(8 * 100)
++#define BFQQ_SECT_THR_NONROT (sector_t)(2 * 32)
+ #define BFQQ_CLOSE_THR (sector_t)(8 * 1024)
+ #define BFQQ_SEEKY(bfqq) (hweight32(bfqq->seek_history) > 32/8)
+
+@@ -4117,7 +4118,9 @@ bfq_update_io_seektime(struct bfq_data *bfqd, struct bfq_queue *bfqq,
+ {
+ bfqq->seek_history <<= 1;
+ bfqq->seek_history |=
+- get_sdist(bfqq->last_request_pos, rq) > BFQQ_SEEK_THR;
++ get_sdist(bfqq->last_request_pos, rq) > BFQQ_SEEK_THR &&
++ (!blk_queue_nonrot(bfqd->queue) ||
++ blk_rq_sectors(rq) < BFQQ_SECT_THR_NONROT);
+ }
+
+ /*
+
+From 5cacb798d523b9fa7e394ca3a369a0c196e0b022 Mon Sep 17 00:00:00 2001
+From: Paolo Valente <paolo.valente@linaro.org>
+Date: Thu, 15 Dec 2016 15:57:22 +0100
+Subject: [PATCH 18/24] Provide more details in switching-off-wr message
+
+Signed-off-by: Paolo Valente <paolo.valente@linaro.org>
+---
+ block/bfq-iosched.c | 4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
+index 153292f..a5853c7 100644
+--- a/block/bfq-iosched.c
++++ b/block/bfq-iosched.c
+@@ -656,7 +656,9 @@ bfq_bfqq_resume_state(struct bfq_queue *bfqq, struct bfq_io_cq *bic)
+ time_is_before_jiffies(bfqq->last_wr_start_finish +
+ bfqq->wr_cur_max_time))) {
+ bfq_log_bfqq(bfqq->bfqd, bfqq,
+- "resume state: switching off wr");
++ "resume state: switching off wr (%u + %u < %u)",
++ bfqq->last_wr_start_finish, bfqq->wr_cur_max_time,
++ jiffies);
+
+ bfqq->wr_coeff = 1;
+ }
+
+From 6f62391c914a25818f839b932a9e1b16ee28c1f1 Mon Sep 17 00:00:00 2001
+From: Paolo Valente <paolo.valente@linaro.org>
+Date: Thu, 15 Dec 2016 16:10:17 +0100
+Subject: [PATCH 19/24] Remove useless parameter from bfq_del_bfqq_busy
+
+Signed-off-by: Paolo Valente <paolo.valente@linaro.org>
+---
+ block/bfq-iosched.c | 6 +++---
+ block/bfq-sched.c | 5 ++---
+ 2 files changed, 5 insertions(+), 6 deletions(-)
+
+diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
+index a5853c7..0d54cc1 100644
+--- a/block/bfq-iosched.c
++++ b/block/bfq-iosched.c
+@@ -656,7 +656,7 @@ bfq_bfqq_resume_state(struct bfq_queue *bfqq, struct bfq_io_cq *bic)
+ time_is_before_jiffies(bfqq->last_wr_start_finish +
+ bfqq->wr_cur_max_time))) {
+ bfq_log_bfqq(bfqq->bfqd, bfqq,
+- "resume state: switching off wr (%u + %u < %u)",
++ "resume state: switching off wr (%lu + %lu < %lu)",
+ bfqq->last_wr_start_finish, bfqq->wr_cur_max_time,
+ jiffies);
+
+@@ -1525,7 +1525,7 @@ static void bfq_remove_request(struct request *rq)
+ BUG_ON(bfqq->entity.budget < 0);
+
+ if (bfq_bfqq_busy(bfqq) && bfqq != bfqd->in_service_queue) {
+- bfq_del_bfqq_busy(bfqd, bfqq, 1);
++ bfq_del_bfqq_busy(bfqd, bfqq);
+
+ /* bfqq emptied. In normal operation, when
+ * bfqq is empty, bfqq->entity.service and
+@@ -2662,7 +2662,7 @@ static void __bfq_bfqq_expire(struct bfq_data *bfqd, struct bfq_queue *bfqq)
+ */
+ bfqq->budget_timeout = jiffies;
+
+- bfq_del_bfqq_busy(bfqd, bfqq, 1);
++ bfq_del_bfqq_busy(bfqd, bfqq);
+ } else {
+ bfq_activate_bfqq(bfqd, bfqq);
+ /*
+diff --git a/block/bfq-sched.c b/block/bfq-sched.c
+index 2f54ac5..fae28ee 100644
+--- a/block/bfq-sched.c
++++ b/block/bfq-sched.c
+@@ -1635,8 +1635,7 @@ static void bfqg_stats_update_dequeue(struct bfq_group *bfqg);
+ * Called when the bfqq no longer has requests pending, remove it from
+ * the service tree.
+ */
+-static void bfq_del_bfqq_busy(struct bfq_data *bfqd, struct bfq_queue *bfqq,
+- int requeue)
++static void bfq_del_bfqq_busy(struct bfq_data *bfqd, struct bfq_queue *bfqq)
+ {
+ BUG_ON(!bfq_bfqq_busy(bfqq));
+ BUG_ON(!RB_EMPTY_ROOT(&bfqq->sort_list));
+@@ -1660,7 +1659,7 @@ static void bfq_del_bfqq_busy(struct bfq_data *bfqd, struct bfq_queue *bfqq,
+
+ BUG_ON(bfqq->entity.budget < 0);
+
+- bfq_deactivate_bfqq(bfqd, bfqq, requeue);
++ bfq_deactivate_bfqq(bfqd, bfqq, 1);
+
+ BUG_ON(bfqq->entity.budget < 0);
+ }
+
+From 1469db5fec95e3bfad0fe60dc1a843b0c5de4efa Mon Sep 17 00:00:00 2001
+From: Paolo Valente <paolo.valente@linaro.org>
+Date: Thu, 15 Dec 2016 16:40:09 +0100
+Subject: [PATCH 20/24] Further improve code and comments for hierarchical code
+
+Signed-off-by: Paolo Valente <paolo.valente@linaro.org>
+---
+ block/bfq-cgroup.c | 6 +++---
+ block/bfq-iosched.c | 2 +-
+ block/bfq-sched.c | 45 ++++++++++++++++++++++++++++++---------------
+ 3 files changed, 34 insertions(+), 19 deletions(-)
+
+diff --git a/block/bfq-cgroup.c b/block/bfq-cgroup.c
+index a04bc40..bd31da5 100644
+--- a/block/bfq-cgroup.c
++++ b/block/bfq-cgroup.c
+@@ -552,7 +552,7 @@ static void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq,
+ BUG_ON(RB_EMPTY_ROOT(&bfqq->sort_list) && bfq_bfqq_busy(bfqq));
+
+ if (bfq_bfqq_busy(bfqq))
+- bfq_deactivate_bfqq(bfqd, bfqq, 0);
++ bfq_deactivate_bfqq(bfqd, bfqq, false);
+ else if (entity->on_st) {
+ BUG_ON(&bfq_entity_service_tree(entity)->idle !=
+ entity->tree);
+@@ -664,7 +664,7 @@ static void bfq_flush_idle_tree(struct bfq_service_tree *st)
+ struct bfq_entity *entity = st->first_idle;
+
+ for (; entity ; entity = st->first_idle)
+- __bfq_deactivate_entity(entity, 0);
++ __bfq_deactivate_entity(entity, false);
+ }
+
+ /**
+@@ -769,7 +769,7 @@ static void bfq_pd_offline(struct blkg_policy_data *pd)
+ BUG_ON(bfqg->sched_data.next_in_service);
+ BUG_ON(bfqg->sched_data.in_service_entity);
+
+- __bfq_deactivate_entity(entity, 0);
++ __bfq_deactivate_entity(entity, false);
+ bfq_put_async_queues(bfqd, bfqg);
+ BUG_ON(entity->tree);
+
+diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
+index 0d54cc1..7a56603 100644
+--- a/block/bfq-iosched.c
++++ b/block/bfq-iosched.c
+@@ -4738,7 +4738,7 @@ static void bfq_exit_queue(struct elevator_queue *e)
+
+ BUG_ON(bfqd->in_service_queue);
+ list_for_each_entry_safe(bfqq, n, &bfqd->idle_list, bfqq_list)
+- bfq_deactivate_bfqq(bfqd, bfqq, 0);
++ bfq_deactivate_bfqq(bfqd, bfqq, false);
+
+ spin_unlock_irq(q->queue_lock);
+
+diff --git a/block/bfq-sched.c b/block/bfq-sched.c
+index fae28ee..52911cc 100644
+--- a/block/bfq-sched.c
++++ b/block/bfq-sched.c
+@@ -841,9 +841,9 @@ static void __bfq_activate_entity(struct bfq_entity *entity,
+ * We are requeueing the current in-service entity,
+ * because of the requeueing of a just-expired
+ * non-idle leaf entity in the path originating from
+- * this entity. In fact, in this case, then all
+- * entities in this path need to be requeued again for
+- * next service.
++ * this entity. In fact, in this case, all entities in
++ * this path need to be requeued again for next
++ * service.
+ *
+ * Before requeueing, the start time of the entity
+ * must be moved forward to account for the service
+@@ -882,7 +882,7 @@ static void __bfq_activate_entity(struct bfq_entity *entity,
+ * implies that the finish time of this entity must be
+ * updated. Such an update may cause the scheduling,
+ * i.e., the position in the active tree, of this
+- * entity to change. We handle this fact by: 1)
++ * entity to change. We handle this change by: 1)
+ * dequeueing the entity here, 2) updating the finish
+ * time and requeueing the entity according to the new
+ * timestamps below. This is the same approach as the
+@@ -1054,7 +1054,7 @@ static void bfq_activate_entity(struct bfq_entity *entity,
+ * if the entity was in service or if it was the next_in_service for
+ * its sched_data; return %false otherwise.
+ */
+-static bool __bfq_deactivate_entity(struct bfq_entity *entity, int requeue)
++static bool __bfq_deactivate_entity(struct bfq_entity *entity, bool requeue)
+ {
+ struct bfq_sched_data *sd = entity->sched_data;
+ struct bfq_service_tree *st;
+@@ -1118,26 +1118,41 @@ static void bfq_deactivate_entity(struct bfq_entity *entity, int requeue)
+ if (sd->next_in_service)
+ /*
+ * The parent entity is still backlogged,
+- * because next_in_service is not NULL, and
++ * because next_in_service is not NULL. So, no
++ * upwards deactivation is needed. Yet,
+ * next_in_service has been updated (see
+- * comment on the body of the above if):
+- * upwards update of the schedule is needed.
++ * comment on the body of the above if). Then
++ * the schedule nees to be updated upwards.
+ */
+- goto update;
++ goto update_schedule;
+
+ /*
+- * If we get here, then the parent is no more backlogged and
+- * we want to propagate the deactivation upwards.
++ * If we get here, then the parent is no more
++ * backlogged and we need to propagate the
++ * deactivation upwards. Then let the loop go on.
+ */
+- requeue = 1;
++
++ /*
++ * Also let parent be queued into the idle tree on
++ * deactivation, to preserve service guarantees, and
++ * assuming that who invoked this function does not
++ * need parent entities too to be removed by any tree.
++ */
++ requeue = true;
+ }
+
+ return;
+
+-update:
++update_schedule:
+ entity = parent;
+ for_each_entity(entity) {
+ struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
++ /*
++ * Invoke __bfq_activate_entity on entity, even if
++ * already active, to update its position in the
++ * active tree (because sd->next_in_service has
++ * changed)
++ */
+ __bfq_activate_entity(entity, false);
+
+ sd = entity->sched_data;
+@@ -1613,7 +1628,7 @@ static void __bfq_bfqd_reset_in_service(struct bfq_data *bfqd)
+ }
+
+ static void bfq_deactivate_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq,
+- int requeue)
++ bool requeue)
+ {
+ struct bfq_entity *entity = &bfqq->entity;
+
+@@ -1659,7 +1674,7 @@ static void bfq_del_bfqq_busy(struct bfq_data *bfqd, struct bfq_queue *bfqq)
+
+ BUG_ON(bfqq->entity.budget < 0);
+
+- bfq_deactivate_bfqq(bfqd, bfqq, 1);
++ bfq_deactivate_bfqq(bfqd, bfqq, true);
+
+ BUG_ON(bfqq->entity.budget < 0);
+ }
+
+From d12d41bee45b988e1ae4f8dcece4bcecb5ad74a9 Mon Sep 17 00:00:00 2001
+From: Paolo Valente <paolo.valente@linaro.org>
+Date: Thu, 15 Dec 2016 22:11:39 +0100
+Subject: [PATCH 21/24] Optimize update of next_in_service entity
+
+If the update of the next_in_service candidate entity is triggered by
+the activation of an entity, then it is not necessary to perform full
+lookups in the active trees to update next_in_service. In fact, it is
+enough to check whether the just-activated entity has a higher
+priority of next_in_service, or, even if it has the same priority as
+next_in_service, is eligible and has a lower virtual finish time than
+next_in_service. If this compound condition holds, then the new entity
+becomes the new next_in_service. Otherwise no change is needed. This
+commit implements this optimization.
+
+Signed-off-by: Paolo Valente <paolo.valente@linaro.org>
+---
+ block/bfq-sched.c | 167 ++++++++++++++++++++++++++++++++++++++++--------------
+ block/bfq.h | 11 +++-
+ 2 files changed, 134 insertions(+), 44 deletions(-)
+
+diff --git a/block/bfq-sched.c b/block/bfq-sched.c
+index 52911cc..09e494b 100644
+--- a/block/bfq-sched.c
++++ b/block/bfq-sched.c
+@@ -14,6 +14,18 @@
+
+ static struct bfq_group *bfqq_group(struct bfq_queue *bfqq);
+
++/**
++ * bfq_gt - compare two timestamps.
++ * @a: first ts.
++ * @b: second ts.
++ *
++ * Return @a > @b, dealing with wrapping correctly.
++ */
++static int bfq_gt(u64 a, u64 b)
++{
++ return (s64)(a - b) > 0;
++}
++
+ #ifdef CONFIG_BFQ_GROUP_IOSCHED
+ /* both next loops stop at one of the child entities of the root group */
+ #define for_each_entity(entity) \
+@@ -46,18 +58,99 @@ static void bfq_update_budget(struct bfq_entity *next_in_service)
+ bfqg_entity->budget = next_in_service->budget;
+ }
+
+-/*
+- * Incomplete version that always returns true. It will return also
+- * false in its complete version, in case either next_in_service has
+- * not changed, or next_in_service has changed but in a way that will
+- * not influence upper levels.
++static struct bfq_entity *bfq_root_active_entity(struct rb_root *tree)
++{
++ struct rb_node *node = tree->rb_node;
++
++ return rb_entry(node, struct bfq_entity, rb_node);
++}
++
++/**
++ * bfq_update_next_in_service - update sd->next_in_service
++ * @sd: sched_data for which to perform the update.
++ * @new_entity: if not NULL, pointer to the entity whose activation
++ * triggered the invocation of this function.
++ *
++ * This function is called to update sd->next_in_service, which, in
++ * its turn, may change as a consequence of the insertion or
++ * extraction of an entity into/from one of the active trees of
++ * sd. These insertions/extractions occur as a consequence of
++ * activations/deactivations of entities, with some activations being
++ * 'true' activations, and other activations being requeueings (i.e.,
++ * implementing the second, requeueing phase of the mechanism used to
++ * reposition an entity in its active tree; see comments on
++ * __bfq_activate_entity for details). In both the last two activation
++ * sub-cases, new_entity points to the just activated or requeued
++ * entity.
++ *
++ * This is a still incomplete version of this function, which always
++ * returns true. It will return also false in its complete version, in
++ * case either next_in_service has not changed, or next_in_service has
++ * changed but in a way that will not influence upper levels.
+ */
+-static bool bfq_update_next_in_service(struct bfq_sched_data *sd)
++static bool bfq_update_next_in_service(struct bfq_sched_data *sd,
++ struct bfq_entity *new_entity)
+ {
+- struct bfq_entity *next_in_service;
++ struct bfq_entity *next_in_service = sd->next_in_service;
+ struct bfq_queue *bfqq;
+
+- next_in_service = bfq_lookup_next_entity(sd);
++ /*
++ * If this update is triggered by the activation of a new
++ * entity, then a full lookup in the active tree can be
++ * avoided. In fact, it is enough to check whether the
++ * just-activated entity has a higher priority of
++ * sd->next_in_service, or, even if it has the same priority
++ * as sd->next_in_service, is eligible and has a lower virtual
++ * finish time than sd->next_in_service. If this compound
++ * condition holds, then the new entity becomes the new
++ * next_in_service. Otherwise no change is needed.
++ */
++ if (new_entity) {
++ /*
++ * Flag used to decide whether to replace
++ * sd->next_in_service with new_entity. Tentatively
++ * set to true, and left as true if
++ * sd->next_in_service is NULL.
++ */
++ bool replace_next = true;
++
++ if (new_entity == sd->next_in_service)
++ return false;
++
++ /*
++ * If there is already a next_in_service candidate
++ * entity, then compare class priorities or timestamps
++ * to decide whether to replace sd->service_tree with
++ * new_entity.
++ */
++ if (next_in_service) {
++ unsigned int new_entity_class_idx =
++ bfq_class_idx(new_entity);
++ struct bfq_service_tree *st =
++ sd->service_tree + new_entity_class_idx;
++
++ /*
++ * For efficiency, evaluate the most likely
++ * sub-condition first.
++ */
++ replace_next =
++ (new_entity_class_idx ==
++ bfq_class_idx(next_in_service)
++ &&
++ !bfq_gt(new_entity->start, st->vtime)
++ &&
++ bfq_gt(next_in_service->finish,
++ new_entity->finish))
++ ||
++ new_entity_class_idx <
++ bfq_class_idx(next_in_service);
++ }
++
++ if (replace_next)
++ next_in_service = new_entity;
++ } else /* invoked because of a deactivation: lookup needed */
++ next_in_service = bfq_lookup_next_entity(sd);
++
+ sd->next_in_service = next_in_service;
+
+ if (next_in_service)
+@@ -109,18 +202,6 @@ static void bfq_update_budget(struct bfq_entity *next_in_service)
+ */
+ #define WFQ_SERVICE_SHIFT 22
+
+-/**
+- * bfq_gt - compare two timestamps.
+- * @a: first ts.
+- * @b: second ts.
+- *
+- * Return @a > @b, dealing with wrapping correctly.
+- */
+-static int bfq_gt(u64 a, u64 b)
+-{
+- return (s64)(a - b) > 0;
+-}
+-
+ static struct bfq_queue *bfq_entity_to_bfqq(struct bfq_entity *entity)
+ {
+ struct bfq_queue *bfqq = NULL;
+@@ -816,15 +897,18 @@ static void bfq_bfqq_charge_time(struct bfq_data *bfqd, struct bfq_queue *bfqq,
+ }
+
+ /**
+- * __bfq_activate_entity - activate an entity.
+- * @entity: the entity being activated.
+- * @non_blocking_wait_rq: true if this entity was waiting for a request
++ * __bfq_activate_entity - handle activation or requeueing of an entity.
++ * @entity: the entity being activated or requeued.
++ * @non_blocking_wait_rq: true if entity was waiting for a request
+ *
+- * Called whenever an entity is activated, i.e., it is not active and one
+- * of its children receives a new request, or has to be reactivated due to
+- * budget exhaustion. It uses the current budget of the entity (and the
+- * service received if @entity is active) of the queue to calculate its
+- * timestamps.
++ * Called whenever an entity is activated, which happens either if the
++ * entity is not active and one of its children receives a new request
++ * (true activation), or if the entity needs to be repositioned in its
++ * active tree (requeueing, as explained in detail in the comments
++ * inside the function).
++ *
++ * Basically, this function updates the timestamps of entity and
++ * (re)inserts entity into its active tree.
+ */
+ static void __bfq_activate_entity(struct bfq_entity *entity,
+ bool non_blocking_wait_rq)
+@@ -1027,7 +1111,7 @@ static void bfq_activate_entity(struct bfq_entity *entity,
+ RB_EMPTY_ROOT(&(sd->service_tree+1)->active) &&
+ RB_EMPTY_ROOT(&(sd->service_tree+2)->active));
+
+- if (!bfq_update_next_in_service(sd)) {
++ if (!bfq_update_next_in_service(sd, entity)) {
+ BUG_ON(!sd->next_in_service);
+ /*
+ * No need to propagate the activation to the
+@@ -1081,8 +1165,8 @@ static bool __bfq_deactivate_entity(struct bfq_entity *entity, bool requeue)
+ else if (entity->tree)
+ BUG();
+
+- if (was_in_service || sd->next_in_service == entity)
+- ret = bfq_update_next_in_service(sd);
++ if (sd->next_in_service == entity)
++ ret = bfq_update_next_in_service(sd, NULL);
+
+ if (!requeue || !bfq_gt(entity->finish, st->vtime))
+ bfq_forget_entity(st, entity);
+@@ -1169,7 +1253,7 @@ static void bfq_deactivate_entity(struct bfq_entity *entity, int requeue)
+ "invoking udpdate_next for this entity");
+ }
+ #endif
+- if (!bfq_update_next_in_service(sd))
++ if (!bfq_update_next_in_service(sd, entity))
+ break;
+ }
+ }
+@@ -1183,28 +1267,27 @@ static void bfq_deactivate_entity(struct bfq_entity *entity, int requeue)
+ */
+ static u64 bfq_calc_vtime_jump(struct bfq_service_tree *st)
+ {
+- struct bfq_entity *entry;
+- struct rb_node *node = st->active.rb_node;
++ struct bfq_entity *root_entity = bfq_root_active_entity(&st->active);
+
+- entry = rb_entry(node, struct bfq_entity, rb_node);
+- if (bfq_gt(entry->min_start, st->vtime)) {
+- struct bfq_queue *bfqq = bfq_entity_to_bfqq(entry);
++ if (bfq_gt(root_entity->min_start, st->vtime)) {
++ struct bfq_queue *bfqq = bfq_entity_to_bfqq(root_entity);
+
+ if (bfqq)
+ bfq_log_bfqq(bfqq->bfqd, bfqq,
+ "calc_vtime_jump: new value %llu",
+- entry->min_start);
++ root_entity->min_start);
+ #ifdef CONFIG_BFQ_GROUP_IOSCHED
+ else {
+ struct bfq_group *bfqg =
+- container_of(entry, struct bfq_group, entity);
++ container_of(root_entity, struct bfq_group,
++ entity);
+
+ bfq_log_bfqg((struct bfq_data *)bfqg->bfqd, bfqg,
+ "calc_vtime_jump: new value %llu",
+- entry->min_start);
++ root_entity->min_start);
+ }
+ #endif
+- return entry->min_start;
++ return root_entity->min_start;
+ }
+ return st->vtime;
+ }
+@@ -1608,7 +1691,7 @@ static struct bfq_queue *bfq_get_next_queue(struct bfq_data *bfqd)
+ for_each_entity(entity) {
+ struct bfq_sched_data *sd = entity->sched_data;
+
+- if(!bfq_update_next_in_service(sd))
++ if(!bfq_update_next_in_service(sd, NULL))
+ break;
+ }
+
+diff --git a/block/bfq.h b/block/bfq.h
+index d4d784f..9c68d8b 100644
+--- a/block/bfq.h
++++ b/block/bfq.h
+@@ -806,13 +806,20 @@ struct bfq_group {
+
+ static struct bfq_queue *bfq_entity_to_bfqq(struct bfq_entity *entity);
+
++static unsigned int bfq_class_idx(struct bfq_entity *entity)
++{
++ struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
++
++ return bfqq ? bfqq->ioprio_class - 1 :
++ BFQ_DEFAULT_GRP_CLASS - 1;
++}
++
+ static struct bfq_service_tree *
+ bfq_entity_service_tree(struct bfq_entity *entity)
+ {
+ struct bfq_sched_data *sched_data = entity->sched_data;
+ struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
+- unsigned int idx = bfqq ? bfqq->ioprio_class - 1 :
+- BFQ_DEFAULT_GRP_CLASS - 1;
++ unsigned int idx = bfq_class_idx(entity);
+
+ BUG_ON(idx >= BFQ_IOPRIO_CLASSES);
+ BUG_ON(sched_data == NULL);
+
+From 634414378918d47830a12f2c0e0a6c526d227502 Mon Sep 17 00:00:00 2001
+From: Paolo Valente <paolo.valente@linaro.org>
+Date: Thu, 15 Dec 2016 22:15:42 +0100
+Subject: [PATCH 22/24] Fix bug causing occasional loss of weight raising
+
+When a bfq_queue, say bfqq, is split after a merging with another
+bfq_queue, BFQ checks whether it has to restore for bfqq the
+weight-raising state that bfqq had before being merged. In
+particular, the weight-raising is restored only if, according to the
+weight-raising duration decided for bfqq when it started to be
+weight-raised (before being merged), bfqq would not have already
+finished its weight-raising period.
+
+Yet, by mistake, such a duration is not saved when bfqq is merged. So,
+if bfqq is freed and reallocated when it is split, then this duration
+is wrongly set to zero on the split. As a consequence, the
+weight-raising state of bfqq is wrongly not restored, which causes BFQ
+to fail in guaranteeing a low latency to bfqq.
+
+This commit fixes this bug by saving weight-raising duration when bfqq
+is merged, and correctly restoring it when bfqq is split.
+
+Signed-off-by: Paolo Valente <paolo.valente@linaro.org>
+---
+ block/bfq-iosched.c | 2 ++
+ block/bfq.h | 1 +
+ 2 files changed, 3 insertions(+)
+
+diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
+index 7a56603..7aa3d55 100644
+--- a/block/bfq-iosched.c
++++ b/block/bfq-iosched.c
+@@ -650,6 +650,7 @@ bfq_bfqq_resume_state(struct bfq_queue *bfqq, struct bfq_io_cq *bic)
+ bfqq->wr_start_at_switch_to_srt = bic->saved_wr_start_at_switch_to_srt;
+ BUG_ON(time_is_after_jiffies(bfqq->wr_start_at_switch_to_srt));
+ bfqq->last_wr_start_finish = bic->saved_last_wr_start_finish;
++ bfqq->wr_cur_max_time = bic->saved_wr_cur_max_time;
+ BUG_ON(time_is_after_jiffies(bfqq->last_wr_start_finish));
+
+ if (bfqq->wr_coeff > 1 && (bfq_bfqq_in_large_burst(bfqq) ||
+@@ -1988,6 +1989,7 @@ static void bfq_bfqq_save_state(struct bfq_queue *bfqq)
+ bic->saved_wr_coeff = bfqq->wr_coeff;
+ bic->saved_wr_start_at_switch_to_srt = bfqq->wr_start_at_switch_to_srt;
+ bic->saved_last_wr_start_finish = bfqq->last_wr_start_finish;
++ bic->saved_wr_cur_max_time = bfqq->wr_cur_max_time;
+ BUG_ON(time_is_after_jiffies(bfqq->last_wr_start_finish));
+ }
+
+diff --git a/block/bfq.h b/block/bfq.h
+index 9c68d8b..1a03521 100644
+--- a/block/bfq.h
++++ b/block/bfq.h
+@@ -366,6 +366,7 @@ struct bfq_io_cq {
+ unsigned long saved_wr_coeff;
+ unsigned long saved_last_wr_start_finish;
+ unsigned long saved_wr_start_at_switch_to_srt;
++ unsigned int saved_wr_cur_max_time;
+ };
+
+ enum bfq_device_speed {
+
+From cb826b5dbc9fcf524cd672857bcbcb68c6aa7236 Mon Sep 17 00:00:00 2001
+From: Paolo Valente <paolo.valente@linaro.org>
+Date: Fri, 16 Dec 2016 17:57:00 +0100
+Subject: [PATCH 23/24] Fix wrong reset of in-service entities in commit
+ 1430feb8
+
+In-service entities were reset with an indirect logic, which
+happened to be even buggy for some cases. This commit fixes
+this bug by replacing this indirect logic with a direct logic,
+in which all involved entities are immediately reset, with a
+bubble-up loop, when the in-service queue is reset.
+
+Signed-off-by: Paolo Valente <paolo.valente@linaro.org>
+---
+ block/bfq-iosched.c | 5 ++---
+ block/bfq-sched.c | 45 +++++++++++++++++++++++++++++----------------
+ 2 files changed, 31 insertions(+), 19 deletions(-)
+
+diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
+index 7aa3d55..4d70d5e 100644
+--- a/block/bfq-iosched.c
++++ b/block/bfq-iosched.c
+@@ -2643,8 +2643,6 @@ static void __bfq_bfqq_expire(struct bfq_data *bfqd, struct bfq_queue *bfqq)
+ {
+ BUG_ON(bfqq != bfqd->in_service_queue);
+
+- __bfq_bfqd_reset_in_service(bfqd);
+-
+ /*
+ * If this bfqq is shared between multiple processes, check
+ * to make sure that those processes are still issuing I/Os
+@@ -2672,6 +2670,8 @@ static void __bfq_bfqq_expire(struct bfq_data *bfqd, struct bfq_queue *bfqq)
+ */
+ bfq_pos_tree_add_move(bfqd, bfqq);
+ }
++
++ __bfq_bfqd_reset_in_service(bfqd);
+ }
+
+ /**
+@@ -3819,7 +3819,6 @@ static void bfq_put_queue(struct bfq_queue *bfqq)
+ BUG_ON(bfqq->allocated[READ] + bfqq->allocated[WRITE] != 0);
+ BUG_ON(bfqq->entity.tree);
+ BUG_ON(bfq_bfqq_busy(bfqq));
+- BUG_ON(bfqq->bfqd->in_service_queue == bfqq);
+
+ if (bfq_bfqq_sync(bfqq))
+ /*
+diff --git a/block/bfq-sched.c b/block/bfq-sched.c
+index 09e494b..d9e63712 100644
+--- a/block/bfq-sched.c
++++ b/block/bfq-sched.c
+@@ -923,22 +923,28 @@ static void __bfq_activate_entity(struct bfq_entity *entity,
+ if (entity == sd->in_service_entity) {
+ /*
+ * We are requeueing the current in-service entity,
+- * because of the requeueing of a just-expired
+- * non-idle leaf entity in the path originating from
+- * this entity. In fact, in this case, all entities in
+- * this path need to be requeued again for next
+- * service.
++ * which may have to be done for one of the following
++ * reasons:
++ * - entity represents the in-service queue, and the
++ * in-service queue is being requeued after an
++ * expiration;
++ * - entity represents a group, and its budget has
++ * changed because one of its child entities has
++ * just been either activated or requeued for some
++ * reason; the timestamps of the entity need then to
++ * be updated, and the entity needs to be enqueued
++ * or repositioned accordingly.
+ *
+- * Before requeueing, the start time of the entity
+- * must be moved forward to account for the service
+- * that the entity has received while in service. The
++ * In particular, before requeueing, the start time of
++ * the entity must be moved forward to account for the
++ * service that the entity has received while in
++ * service. This is done by the next instructions. The
+ * finish time will then be updated according to this
+ * new value of the start time, and to the budget of
+ * the entity.
+ */
+ bfq_calc_finish(entity, entity->service);
+ entity->start = entity->finish;
+- sd->in_service_entity = NULL;
+ BUG_ON(entity->tree && entity->tree != &st->active);
+ /*
+ * In addition, if the entity had more than one child
+@@ -1001,7 +1007,8 @@ static void __bfq_activate_entity(struct bfq_entity *entity,
+ st->wsum += entity->weight;
+ bfq_get_entity(entity);
+
+- BUG_ON(entity->on_st);
++ BUG_ON(entity->on_st && bfqq);
++ BUG_ON(entity->on_st && !bfqq);
+ entity->on_st = 1;
+ }
+ }
+@@ -1153,10 +1160,8 @@ static bool __bfq_deactivate_entity(struct bfq_entity *entity, bool requeue)
+
+ BUG_ON(was_in_service && entity->tree && entity->tree != &st->active);
+
+- if (was_in_service) {
++ if (was_in_service)
+ bfq_calc_finish(entity, entity->service);
+- sd->in_service_entity = NULL;
+- }
+
+ if (entity->tree == &st->active)
+ bfq_active_extract(st, entity);
+@@ -1173,7 +1178,6 @@ static bool __bfq_deactivate_entity(struct bfq_entity *entity, bool requeue)
+ else
+ bfq_idle_insert(st, entity);
+
+- BUG_ON(sd->in_service_entity == entity);
+ BUG_ON(sd->next_in_service == entity);
+
+ return ret;
+@@ -1700,6 +1704,8 @@ static struct bfq_queue *bfq_get_next_queue(struct bfq_data *bfqd)
+
+ static void __bfq_bfqd_reset_in_service(struct bfq_data *bfqd)
+ {
++ struct bfq_entity *entity = &bfqd->in_service_queue->entity;
++
+ if (bfqd->in_service_bic) {
+ put_io_context(bfqd->in_service_bic->icq.ioc);
+ bfqd->in_service_bic = NULL;
+@@ -1708,6 +1714,10 @@ static void __bfq_bfqd_reset_in_service(struct bfq_data *bfqd)
+ bfq_clear_bfqq_wait_request(bfqd->in_service_queue);
+ hrtimer_try_to_cancel(&bfqd->idle_slice_timer);
+ bfqd->in_service_queue = NULL;
++
++ /* Reset in_service_entity along the path from entity to the root */
++ for_each_entity(entity)
++ entity->sched_data->in_service_entity = NULL;
+ }
+
+ static void bfq_deactivate_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq,
+@@ -1715,13 +1725,17 @@ static void bfq_deactivate_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq,
+ {
+ struct bfq_entity *entity = &bfqq->entity;
+
+- BUG_ON(bfqq == bfqd->in_service_queue);
+ bfq_deactivate_entity(entity, requeue);
+ }
+
+ static void bfq_activate_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq)
+ {
+ struct bfq_entity *entity = &bfqq->entity;
++ struct bfq_service_tree *st = bfq_entity_service_tree(entity);
++
++ BUG_ON(bfqq != bfqd->in_service_queue &&
++ entity->tree != &st->active && entity->tree != &st->idle &&
++ entity->on_st);
+
+ bfq_activate_entity(entity, bfq_bfqq_non_blocking_wait_rq(bfqq));
+ bfq_clear_bfqq_non_blocking_wait_rq(bfqq);
+@@ -1737,7 +1751,6 @@ static void bfq_del_bfqq_busy(struct bfq_data *bfqd, struct bfq_queue *bfqq)
+ {
+ BUG_ON(!bfq_bfqq_busy(bfqq));
+ BUG_ON(!RB_EMPTY_ROOT(&bfqq->sort_list));
+- BUG_ON(bfqq == bfqd->in_service_queue);
+
+ bfq_log_bfqq(bfqd, bfqq, "del from busy");
+
+
+From 316c31562f169ccb8752045217aead1688229315 Mon Sep 17 00:00:00 2001
+From: Paolo Valente <paolo.valente@linaro.org>
+Date: Fri, 16 Dec 2016 18:01:52 +0100
+Subject: [PATCH 24/24] Add code to redirect trace log to console
+
+Signed-off-by: Paolo Valente <paolo.valente@linaro.org>
+---
+ block/bfq.h | 38 ++++++++++++++++++++++++++++++++++++++
+ 1 file changed, 38 insertions(+)
+
+diff --git a/block/bfq.h b/block/bfq.h
+index 1a03521..a09804e 100644
+--- a/block/bfq.h
++++ b/block/bfq.h
+@@ -648,6 +648,43 @@ BFQ_BFQQ_FNS(softrt_update);
+ #undef BFQ_BFQQ_FNS
+
+ /* Logging facilities. */
++#ifdef CONFIG_BFQ_REDIRECT_TO_CONSOLE
++#ifdef CONFIG_BFQ_GROUP_IOSCHED
++static struct bfq_group *bfqq_group(struct bfq_queue *bfqq);
++static struct blkcg_gq *bfqg_to_blkg(struct bfq_group *bfqg);
++
++#define bfq_log_bfqq(bfqd, bfqq, fmt, args...) do { \
++ char __pbuf[128]; \
++ \
++ assert_spin_locked((bfqd)->queue->queue_lock); \
++ blkg_path(bfqg_to_blkg(bfqq_group(bfqq)), __pbuf, sizeof(__pbuf)); \
++ pr_crit("bfq%d%c %s " fmt "\n", \
++ (bfqq)->pid, \
++ bfq_bfqq_sync((bfqq)) ? 'S' : 'A', \
++ __pbuf, ##args); \
++} while (0)
++
++#define bfq_log_bfqg(bfqd, bfqg, fmt, args...) do { \
++ char __pbuf[128]; \
++ \
++ blkg_path(bfqg_to_blkg(bfqg), __pbuf, sizeof(__pbuf)); \
++ pr_crit("%s " fmt "\n", __pbuf, ##args); \
++} while (0)
++
++#else /* CONFIG_BFQ_GROUP_IOSCHED */
++
++#define bfq_log_bfqq(bfqd, bfqq, fmt, args...) \
++ pr_crit("bfq%d%c " fmt "\n", (bfqq)->pid, \
++ bfq_bfqq_sync((bfqq)) ? 'S' : 'A', \
++ ##args)
++#define bfq_log_bfqg(bfqd, bfqg, fmt, args...) do {} while (0)
++
++#endif /* CONFIG_BFQ_GROUP_IOSCHED */
++
++#define bfq_log(bfqd, fmt, args...) \
++ pr_crit("bfq " fmt "\n", ##args)
++
++#else /* CONFIG_BFQ_REDIRECT_TO_CONSOLE */
+ #ifdef CONFIG_BFQ_GROUP_IOSCHED
+ static struct bfq_group *bfqq_group(struct bfq_queue *bfqq);
+ static struct blkcg_gq *bfqg_to_blkg(struct bfq_group *bfqg);
+@@ -682,6 +719,7 @@ static struct blkcg_gq *bfqg_to_blkg(struct bfq_group *bfqg);
+
+ #define bfq_log(bfqd, fmt, args...) \
+ blk_add_trace_msg((bfqd)->queue, "bfq " fmt, ##args)
++#endif /* CONFIG_BFQ_REDIRECT_TO_CONSOLE */
+
+ /* Expiration reasons. */
+ enum bfqq_expiration {
+
diff --git a/0006-BFQ-Fix.patch b/0006-BFQ-Fix.patch
deleted file mode 100644
index fa361f2778e5..000000000000
--- a/0006-BFQ-Fix.patch
+++ /dev/null
@@ -1,24 +0,0 @@
-From 69f18bb587a4805b2b18bb4ba91dced87a8fda06 Mon Sep 17 00:00:00 2001
-From: Paolo Valente <paolo.valente@linaro.org>
-Date: Sat, 22 Oct 2016 15:26:33 +0200
-Subject: [PATCH 86/86] BUGFIX: Remove wrong conversion in use of bfq_fifo_expirebug
-
-Signed-off-by: Paolo Valente <paolo.valente@linaro.org>
----
- block/bfq-iosched.c | 3 +--
- 1 file changed, 1 insertions(+), 2 deletions(-)
-
-diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
-index eef6ff4..c161ff0 100644
---- a/block/bfq-iosched.c
-+++ b/block/bfq-iosched.c
-@@ -4268,8 +4268,7 @@
-
- bfq_add_request(rq);
-
-- rq->fifo_time = ktime_get_ns() +
-- jiffies_to_nsecs(bfqd->bfq_fifo_expire[rq_is_sync(rq)]);
-+ rq->fifo_time = ktime_get_ns() + bfqd->bfq_fifo_expire[rq_is_sync(rq)];
- list_add_tail(&rq->queuelist, &bfqq->fifo);
-
- bfq_rq_enqueued(bfqd, bfqq, rq);
diff --git a/PKGBUILD b/PKGBUILD
index 085c3c34ca3f..d625f8cc1140 100644
--- a/PKGBUILD
+++ b/PKGBUILD
@@ -52,7 +52,7 @@ pkgbase=linux-bfq
# pkgname=('linux-bfq' 'linux-bfq-headers' 'linux-bfq-docs')
_srcname=linux-4.8
pkgver=4.8.15
-pkgrel=1
+pkgrel=2
arch=('i686' 'x86_64')
url="http://algo.ing.unimo.it"
license=('GPL2')
@@ -83,8 +83,7 @@ source=("http://www.kernel.org/pub/linux/kernel/v4.x/${_srcname}.tar.xz"
'net_handle_no_dst_on_skb_in_icmp6_send.patch'
'0001-x86-fpu-Fix-invalid-FPU-ptrace-state-after-execve.patch'
# patches from https://github.com/linusw/linux-bfq/commits/bfq-v8
- '0005-BFQ-Fix.patch'
- '0006-BFQ-Fix.patch')
+ '0005-BFQ-update-to-v8r6.patch')
_kernelname=${pkgbase#linux}
@@ -112,7 +111,7 @@ prepare() {
### Patch source with BFQ
msg "Patching source with BFQ patches"
- for p in "${srcdir}"/000{1,2,3,4,5,6}-*BFQ*.patch; do
+ for p in "${srcdir}"/000{1,2,3,4,5}-*BFQ*.patch; do
msg " $p"
patch -Np1 -i "$p"
done
@@ -193,7 +192,9 @@ prepare() {
if [ -n "$_localmodcfg" ]; then
msg "If you have modprobe-db installed, running it in recall mode now"
if [ -e /usr/bin/modprobed-db ]; then
- [[ ! -x /usr/bin/sudo ]] && echo "Cannot call modprobe with sudo. Install via pacman -S sudo and configure to work with this user." && exit 1
+ [[ -x /usr/bin/sudo ]] || {
+ echo "Cannot call modprobe with sudo. Install sudo and configure it to work with this user."
+ exit 1; }
sudo /usr/bin/modprobed-db recall
fi
msg "Running Steven Rostedt's make localmodconfig now"
@@ -461,16 +462,15 @@ sha512sums=('a48a065f21e1c7c4de4cf8ca47b8b8d9a70f86b64e7cfa6e01be490f78895745b9c
'dc0649dfe2a5ce8e8879a62df29a4a1959eb1a84e5d896a9cb119d6a85a9bad1b17135371799e0be96532e17c66043d298e4a14b18fad3078a1f425108f888c9'
'135afcffee2439e2260a71202658dce9ec5f555de3291b2d49a5cffdd953c6d7851b8a90e961576895555142a618e52220a7e4a46521ca4ba19d37df71887581'
'87ae76889ab84ced46c237374c124786551982f8ff7325102af9153aed1ab7be72bec4db1f7516426c5ee34c5ce17221679eb2f129c5cbdeec05c0d3cb7f785d'
- '8e8f407262f3f573654e7f0fceec6075fd2da0258a384206d28e5f59698168609e575f3d855adb8bb476b0684da409dab22f7ddc0f00e9c10c67debe4409c30b'
+ '6046432243b8f0497b43ea2c5fe3fde5135e058f2be25c02773ead93e2ab3b525b6f7c1c4898b9e67ac2bec0959b972829e997e79fabf6ac87e006700dfaf987'
'd9d28e02e964704ea96645a5107f8b65cae5f4fb4f537e224e5e3d087fd296cb770c29ac76e0ce95d173bc420ea87fb8f187d616672a60a0cae618b0ef15b8c8'
- '643cad54fc54f4790e1d28dbc60f775f117fa3e25e15aff418b0406cc7a9b49db5d67fc02ff7056a951a2d64d5bbc58306c40fa0d588460664c2921ad613745c'
- '94fbb7e67b77bb8911b69cdb23bbb74317e75a4ed786e4c6486bf2927042f9374e932ea879d1b250837e42c628abf7082fd6f895a2257c5d3ad652f6a520306a'
+ 'dc9ad074791a9edb3414a71480525a1c4a75b257137c754749c251e45ef376e313d3310766fedcabcd392837a8022bbcd97bfcd17c42feae63f71b4c4064b73d'
+ 'bcc657cd2366b55ba303d4d091ab9958e4f97121e7fdd64013a90be7fb6b19ac28ca2e8973496935475b9a8f8386bcbed8924981a20bb51cf3fc5c6f4f14b22a'
'd6faa67f3ef40052152254ae43fee031365d0b1524aa0718b659eb75afc21a3f79ea8d62d66ea311a800109bed545bc8f79e8752319cd378eef2cbd3a09aba22'
'2dc6b0ba8f7dbf19d2446c5c5f1823587de89f4e28e9595937dd51a87755099656f2acec50e3e2546ea633ad1bfd1c722e0c2b91eef1d609103d8abdc0a7cbaf'
'c53bd47527adbd2599a583e05a7d24f930dc4e86b1de017486588205ad6f262a51a4551593bc7a1218c96541ea073ea03b770278d947b1cd0d2801311fcc80e5'
'002d5e0ccfa5824c1750912a6400ff722672d6587b74ba66b1127cf1228048f604bba107617cf4f4477884039af4d4d196cf1b74cefe43b0bddc82270f11940d'
- '3889679e288d51f6fecc7ed6581ccde34acbf1e4861f5c9ca237a1ad13158502757d3fc457d7b6caf1c8c99c9dba842265004154a71cffb8ec14e1030e49e312'
- '3c3f3b6407d9a1a63cd91c2b5c35e6c932afa5bf33f1b2e8a530dbd9eacda8c8f956616c4cc9228657624da58e354ba5714252237d84ff3386fd65cf44f06ddc')
+ '98612f37013d1b41c76887bc5ed80f70753f6fc859748038dc83f3b53aa89464321951c6c21b364f56b2d4860e594c2091240c70ff70ce616d6b3fdadb817934')
validpgpkeys=(
'ABAF11C65A2970B130ABE3C479BE3E4300411886' # Linus Torvalds
diff --git a/config b/config
index 1899356f2d2f..788f14dd440e 100644
--- a/config
+++ b/config
@@ -1,6 +1,6 @@
#
# Automatically generated file; DO NOT EDIT.
-# Linux/x86 4.8.10-1 Kernel Configuration
+# Linux/x86 4.8.15-1 Kernel Configuration
#
# CONFIG_64BIT is not set
CONFIG_X86_32=y
@@ -445,7 +445,6 @@ CONFIG_M686=y
# CONFIG_MHASWELL is not set
# CONFIG_MBROADWELL is not set
# CONFIG_MSKYLAKE is not set
-# CONFIG_MNATIVE is not set
CONFIG_X86_GENERIC=y
CONFIG_X86_INTERNODE_CACHE_SHIFT=6
CONFIG_X86_L1_CACHE_SHIFT=6
diff --git a/config.x86_64 b/config.x86_64
index ba4d0d888574..246520db8b4f 100644
--- a/config.x86_64
+++ b/config.x86_64
@@ -1,6 +1,6 @@
#
# Automatically generated file; DO NOT EDIT.
-# Linux/x86 4.8.10-1 Kernel Configuration
+# Linux/x86 4.8.15-1 Kernel Configuration
#
CONFIG_64BIT=y
CONFIG_X86_64=y
@@ -448,7 +448,6 @@ CONFIG_NO_BOOTMEM=y
# CONFIG_MBROADWELL is not set
# CONFIG_MSKYLAKE is not set
CONFIG_GENERIC_CPU=y
-# CONFIG_MNATIVE is not set
CONFIG_X86_INTERNODE_CACHE_SHIFT=6
CONFIG_X86_L1_CACHE_SHIFT=6
CONFIG_X86_TSC=y