7 files changed, 2824 insertions, 79 deletions
diff --git a/.SRCINFO b/.SRCINFO
index 20f637797111..5f0ed1830f9c 100644
--- a/.SRCINFO
+++ b/.SRCINFO
@@ -1,6 +1,6 @@
 pkgbase = linux-bfq
 	pkgver = 4.8.15
-	pkgrel = 1
+	pkgrel = 2
 	url = http://algo.ing.unimo.it
 	arch = i686
 	arch = x86_64
@@ -25,8 +25,7 @@ pkgbase = linux-bfq
 	source = linux.preset
 	source = net_handle_no_dst_on_skb_in_icmp6_send.patch
 	source = 0001-x86-fpu-Fix-invalid-FPU-ptrace-state-after-execve.patch
-	source = 0005-BFQ-Fix.patch
-	source = 0006-BFQ-Fix.patch
+	source = 0005-BFQ-update-to-v8r6.patch
 	validpgpkeys = ABAF11C65A2970B130ABE3C479BE3E4300411886
 	validpgpkeys = 647F28654894E3BD457199BE38DBBDC86092693E
 	sha512sums = a48a065f21e1c7c4de4cf8ca47b8b8d9a70f86b64e7cfa6e01be490f78895745b9c8790734b1d22182cf1f930fb87eaaa84e62ec8cc1f64ac4be9b949e7c0358
@@ -37,16 +36,15 @@ pkgbase = linux-bfq
 	sha512sums = dc0649dfe2a5ce8e8879a62df29a4a1959eb1a84e5d896a9cb119d6a85a9bad1b17135371799e0be96532e17c66043d298e4a14b18fad3078a1f425108f888c9
 	sha512sums = 135afcffee2439e2260a71202658dce9ec5f555de3291b2d49a5cffdd953c6d7851b8a90e961576895555142a618e52220a7e4a46521ca4ba19d37df71887581
 	sha512sums = 87ae76889ab84ced46c237374c124786551982f8ff7325102af9153aed1ab7be72bec4db1f7516426c5ee34c5ce17221679eb2f129c5cbdeec05c0d3cb7f785d
-	sha512sums = 8e8f407262f3f573654e7f0fceec6075fd2da0258a384206d28e5f59698168609e575f3d855adb8bb476b0684da409dab22f7ddc0f00e9c10c67debe4409c30b
+	sha512sums = 6046432243b8f0497b43ea2c5fe3fde5135e058f2be25c02773ead93e2ab3b525b6f7c1c4898b9e67ac2bec0959b972829e997e79fabf6ac87e006700dfaf987
 	sha512sums = d9d28e02e964704ea96645a5107f8b65cae5f4fb4f537e224e5e3d087fd296cb770c29ac76e0ce95d173bc420ea87fb8f187d616672a60a0cae618b0ef15b8c8
-	sha512sums = 643cad54fc54f4790e1d28dbc60f775f117fa3e25e15aff418b0406cc7a9b49db5d67fc02ff7056a951a2d64d5bbc58306c40fa0d588460664c2921ad613745c
-	sha512sums = 94fbb7e67b77bb8911b69cdb23bbb74317e75a4ed786e4c6486bf2927042f9374e932ea879d1b250837e42c628abf7082fd6f895a2257c5d3ad652f6a520306a
+	sha512sums = dc9ad074791a9edb3414a71480525a1c4a75b257137c754749c251e45ef376e313d3310766fedcabcd392837a8022bbcd97bfcd17c42feae63f71b4c4064b73d
+	sha512sums = bcc657cd2366b55ba303d4d091ab9958e4f97121e7fdd64013a90be7fb6b19ac28ca2e8973496935475b9a8f8386bcbed8924981a20bb51cf3fc5c6f4f14b22a
 	sha512sums = d6faa67f3ef40052152254ae43fee031365d0b1524aa0718b659eb75afc21a3f79ea8d62d66ea311a800109bed545bc8f79e8752319cd378eef2cbd3a09aba22
 	sha512sums = 2dc6b0ba8f7dbf19d2446c5c5f1823587de89f4e28e9595937dd51a87755099656f2acec50e3e2546ea633ad1bfd1c722e0c2b91eef1d609103d8abdc0a7cbaf
 	sha512sums = c53bd47527adbd2599a583e05a7d24f930dc4e86b1de017486588205ad6f262a51a4551593bc7a1218c96541ea073ea03b770278d947b1cd0d2801311fcc80e5
 	sha512sums = 002d5e0ccfa5824c1750912a6400ff722672d6587b74ba66b1127cf1228048f604bba107617cf4f4477884039af4d4d196cf1b74cefe43b0bddc82270f11940d
-	sha512sums = 3889679e288d51f6fecc7ed6581ccde34acbf1e4861f5c9ca237a1ad13158502757d3fc457d7b6caf1c8c99c9dba842265004154a71cffb8ec14e1030e49e312
-	sha512sums = 3c3f3b6407d9a1a63cd91c2b5c35e6c932afa5bf33f1b2e8a530dbd9eacda8c8f956616c4cc9228657624da58e354ba5714252237d84ff3386fd65cf44f06ddc
+	sha512sums = 98612f37013d1b41c76887bc5ed80f70753f6fc859748038dc83f3b53aa89464321951c6c21b364f56b2d4860e594c2091240c70ff70ce616d6b3fdadb817934
 
 pkgname = linux-bfq
 	pkgdesc = Linux Kernel and modules with the BFQ scheduler.
diff --git a/0005-BFQ-Fix.patch b/0005-BFQ-Fix.patch
deleted file mode 100644
index ad66ca69a2ba..000000000000
--- a/0005-BFQ-Fix.patch
+++ /dev/null
@@ -1,33 +0,0 @@
-From 69f18bb587a4805b2b18bb4ba91dced87a8fda06 Mon Sep 17 00:00:00 2001
-From: Paolo Valente <paolo.valente@linaro.org>
-Date: Sat, 22 Oct 2016 15:26:33 +0200
-Subject: [PATCH 86/86] BUGFIX: Replace max wrongly used for modulo numbers
-
-Signed-off-by: Paolo Valente <paolo.valente@linaro.org>
----
- block/bfq-iosched.c | 10 +++++++---
- 1 file changed, 7 insertions(+), 3 deletions(-)
-
-diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
-index eef6ff4..c161ff0 100644
---- a/block/bfq-iosched.c
-+++ b/block/bfq-iosched.c
-@@ -2179,9 +2179,13 @@ static void __bfq_set_in_service_queue(struct bfq_data *bfqd,
- 			 * not only expires, but also remains with no
- 			 * request.
- 			 */
--			bfqq->last_wr_start_finish += jiffies -
--				max_t(unsigned long, bfqq->last_wr_start_finish,
--				      bfqq->budget_timeout);
-+			if (time_after(bfqq->budget_timeout,
-+				       bfqq->last_wr_start_finish))
-+				bfqq->last_wr_start_finish +=
-+					jiffies - bfqq->budget_timeout;
-+			else
-+				bfqq->last_wr_start_finish = jiffies;
-+
- 			if (time_is_after_jiffies(bfqq->last_wr_start_finish)) {
- 			       pr_crit(
- 			       "BFQ WARNING:last %lu budget %lu jiffies %lu",
-Contact GitHub API Training Shop Blog About
- 
diff --git a/0005-BFQ-update-to-v8r6.patch b/0005-BFQ-update-to-v8r6.patch
new file mode 100644
index 000000000000..1b42af900abe
--- /dev/null
+++ b/0005-BFQ-update-to-v8r6.patch
@@ -0,0 +1,2806 @@
+From 1acff6b391cd626a495e42c9782bf2139b9f330f Mon Sep 17 00:00:00 2001
+From: Paolo Valente <paolo.valente@linaro.org>
+Date: Wed, 19 Oct 2016 20:11:42 +0200
+Subject: [PATCH 01/24] Documentation/block: add bfq-iosched.txt
+
+Documentation of BFQ benefits, inner workings, interface and tunables.
+
+Signed-off-by: Paolo Valente <paolo.valente@linaro.org>
+---
+ Documentation/block/00-INDEX        |   2 +
+ Documentation/block/bfq-iosched.txt | 508 ++++++++++++++++++++++++++++++++++++
+ 2 files changed, 510 insertions(+)
+ create mode 100644 Documentation/block/bfq-iosched.txt
+
+diff --git a/Documentation/block/00-INDEX b/Documentation/block/00-INDEX
+index e55103a..8d55b4b 100644
+--- a/Documentation/block/00-INDEX
++++ b/Documentation/block/00-INDEX
+@@ -1,5 +1,7 @@
+ 00-INDEX
+ 	- This file
++bfq-iosched.txt
++	- BFQ IO scheduler and its tunables
+ biodoc.txt
+ 	- Notes on the Generic Block Layer Rewrite in Linux 2.5
+ biovecs.txt
+diff --git a/Documentation/block/bfq-iosched.txt b/Documentation/block/bfq-iosched.txt
+new file mode 100644
+index 0000000..67e1269
+--- /dev/null
++++ b/Documentation/block/bfq-iosched.txt
+@@ -0,0 +1,508 @@
++BFQ (Budget Fair Queueing)
++==========================
++
++BFQ is a proportional-share I/O scheduler, with some extra
++low-latency capabilities. In addition to cgroups support (blkio or io
++controllers), BFQ's main features are:
++- BFQ guarantees a high system and application responsiveness, and a
++  low latency for time-sensitive applications, such as audio or video
++  players;
++- BFQ distributes bandwidth, and not just time, among processes or
++  groups (switching back to time distribution when needed to keep
++  throughput high).
++
++On average CPUs, the current version of BFQ can handle devices
++performing at most ~30K IOPS; at most ~50 KIOPS on faster CPUs. As a
++reference, 30-50 KIOPS correspond to very high bandwidths with
++sequential I/O (e.g., 8-12 GB/s if I/O requests are 256 KB large), and
++to 120-200 MB/s with 4KB random I/O.
++
++The table of contents follow. Impatients can just jump to Section 3.
++
++CONTENTS
++
++1. When may BFQ be useful?
++ 1-1 Personal systems
++ 1-2 Server systems
++2. How does BFQ work?
++3. What are BFQ's tunable?
++4. BFQ group scheduling
++ 4-1 Service guarantees provided
++ 4-2 Interface
++
++1. When may BFQ be useful?
++==========================
++
++BFQ provides the following benefits on personal and server systems.
++
++1-1 Personal systems
++--------------------
++
++Low latency for interactive applications
++
++Regardless of the actual background workload, BFQ guarantees that, for
++interactive tasks, the storage device is virtually as responsive as if
++it was idle. For example, even if one or more of the following
++background workloads are being executed:
++- one or more large files are being read, written or copied,
++- a tree of source files is being compiled,
++- one or more virtual machines are performing I/O,
++- a software update is in progress,
++- indexing daemons are scanning filesystems and updating their
++  databases,
++starting an application or loading a file from within an application
++takes about the same time as if the storage device was idle. As a
++comparison, with CFQ, NOOP or DEADLINE, and in the same conditions,
++applications experience high latencies, or even become unresponsive
++until the background workload terminates (also on SSDs).
++
++Low latency for soft real-time applications
++
++Also soft real-time applications, such as audio and video
++players/streamers, enjoy a low latency and a low drop rate, regardless
++of the background I/O workload. As a consequence, these applications
++do not suffer from almost any glitch due to the background workload.
++
++Higher speed for code-development tasks
++
++If some additional workload happens to be executed in parallel, then
++BFQ executes the I/O-related components of typical code-development
++tasks (compilation, checkout, merge, ...) much more quickly than CFQ,
++NOOP or DEADLINE.
++
++High throughput
++
++On hard disks, BFQ achieves up to 30% higher throughput than CFQ, and
++up to 150% higher throughput than DEADLINE and NOOP, with all the
++sequential workloads considered in our tests. With random workloads,
++and with all the workloads on flash-based devices, BFQ achieves,
++instead, about the same throughput as the other schedulers.
++
++Strong fairness/bandwidth guarantees
++
++BFQ distributes the device throughput, and not just the device time,
++as desired among I/O-bound applications, with any workload and
++regardless of the device parameters. BFQ switches to time-based
++distribution (only) for processes that would otherwise cause a
++throughput loss.
++
++1-2 Server systems
++------------------
++
++Most benefits for server systems follow from the same service
++properties as above. In particular, regardless of whether additional,
++possibly heavy workloads are being served, BFQ guarantees:
++
++. audio and video-streaming with zero or very low jitter and drop
++  rate;
++
++. fast retrieval of WEB pages and embedded objects;
++
++. real-time recording of data in live-dumping applications (e.g.,
++  packet logging);
++
++. responsiveness in local and remote access to a server.
++
++
++2. How does BFQ work?
++=====================
++
++BFQ is a proportional-share I/O scheduler, whose general structure,
++plus a lot of code, are borrowed from CFQ.
++
++- Each process doing I/O on a device is associated with a weight and a
++  (bfq_)queue.
++
++- BFQ grants exclusive access to the device, for a while, to one queue
++  (process) at a time, and implements this service model by
++  associating every queue with a budget, measured in number of
++  sectors.
++
++  - After a queue is granted access to the device, the budget of the
++    queue is decremented, on each request dispatch, by the size of the
++    request.
++
++  - The in-service queue is expired, i.e., its service is suspended,
++    only if one of the following events occurs: 1) the queue finishes
++    its budget, 2) the queue empties, 3) a "budget timeout" fires.
++
++    - The budget timeout prevents processes doing random I/O from
++      holding the device for too long and dramatically reducing
++      throughput.
++
++    - Actually, as in CFQ, a queue associated with a process issuing
++      sync requests may not be expired immediately when it empties. In
++      contrast, BFQ may idle the device for a short time interval,
++      giving the process the chance to go on being served if it issues
++      a new request in time. Device idling typically boosts the
++      throughput on rotational devices, if processes do synchronous
++      and sequential I/O. In addition, under BFQ, device idling is
++      also instrumental in guaranteeing the desired throughput
++      fraction to processes issuing sync requests (see the description
++      of the slice_idle tunable in this document, or [1, 2], for more
++      details).
++
++      - With respect to idling for service guarantees, if several
++	processes are competing for the device at the same time, but
++	all processes (and groups, after the following commit) have
++	the same weight, then BFQ guarantees the expected throughput
++	distribution without ever idling the device. Throughput is
++	thus as high as possible in this common scenario.
++
++  - If low-latency mode is enabled (default configuration), BFQ
++    executes some special heuristics to detect interactive and soft
++    real-time applications (e.g., video or audio players/streamers),
++    and to reduce their latency. The most important action taken to
++    achieve this goal is to give to the queues associated with these
++    applications more than their fair share of the device
++    throughput. For brevity, we call just "weight-raising" the whole
++    sets of actions taken by BFQ to privilege these queues. In
++    particular, BFQ provides a milder form of weight-raising for
++    interactive applications, and a stronger form for soft real-time
++    applications.
++
++  - BFQ automatically deactivates idling for queues born in a burst of
++    queue creations. In fact, these queues are usually associated with
++    the processes of applications and services that benefit mostly
++    from a high throughput. Examples are systemd during boot, or git
++    grep.
++
++  - As CFQ, BFQ merges queues performing interleaved I/O, i.e.,
++    performing random I/O that becomes mostly sequential if
++    merged. Differently from CFQ, BFQ achieves this goal with a more
++    reactive mechanism, called Early Queue Merge (EQM). EQM is so
++    responsive in detecting interleaved I/O (cooperating processes),
++    that it enables BFQ to achieve a high throughput, by queue
++    merging, even for queues for which CFQ needs a different
++    mechanism, preemption, to get a high throughput. As such EQM is a
++    unified mechanism to achieve a high throughput with interleaved
++    I/O.
++
++  - Queues are scheduled according to a variant of WF2Q+, named
++    B-WF2Q+, and implemented using an augmented rb-tree to preserve an
++    O(log N) overall complexity.  See [2] for more details. B-WF2Q+ is
++    also ready for hierarchical scheduling. However, for a cleaner
++    logical breakdown, the code that enables and completes
++    hierarchical support is provided in the next commit, which focuses
++    exactly on this feature.
++
++  - B-WF2Q+ guarantees a tight deviation with respect to an ideal,
++    perfectly fair, and smooth service. In particular, B-WF2Q+
++    guarantees that each queue receives a fraction of the device
++    throughput proportional to its weight, even if the throughput
++    fluctuates, and regardless of: the device parameters, the current
++    workload and the budgets assigned to the queue.
++
++  - The last, budget-independence, property (although probably
++    counterintuitive in the first place) is definitely beneficial, for
++    the following reasons:
++
++    - First, with any proportional-share scheduler, the maximum
++      deviation with respect to an ideal service is proportional to
++      the maximum budget (slice) assigned to queues. As a consequence,
++      BFQ can keep this deviation tight not only because of the
++      accurate service of B-WF2Q+, but also because BFQ *does not*
++      need to assign a larger budget to a queue to let the queue
++      receive a higher fraction of the device throughput.
++
++    - Second, BFQ is free to choose, for every process (queue), the
++      budget that best fits the needs of the process, or best
++      leverages the I/O pattern of the process. In particular, BFQ
++      updates queue budgets with a simple feedback-loop algorithm that
++      allows a high throughput to be achieved, while still providing
++      tight latency guarantees to time-sensitive applications. When
++      the in-service queue expires, this algorithm computes the next
++      budget of the queue so as to:
++
++      - Let large budgets be eventually assigned to the queues
++	associated with I/O-bound applications performing sequential
++	I/O: in fact, the longer these applications are served once
++	got access to the device, the higher the throughput is.
++
++      - Let small budgets be eventually assigned to the queues
++	associated with time-sensitive applications (which typically
++	perform sporadic and short I/O), because, the smaller the
++	budget assigned to a queue waiting for service is, the sooner
++	B-WF2Q+ will serve that queue (Subsec 3.3 in [2]).
++
++- ioprio classes are served in strict priority order, i.e.,
++  lower-priority queues are not served as long as there are
++  higher-priority queues.  Among queues in the same class, the
++  bandwidth is distributed in proportion to the weight of each
++  queue. A very thin extra bandwidth is however guaranteed to
++  the Idle class, to prevent it from starving.
++
++
++3. What are BFQ's tunable?
++==========================
++
++The tunables back_seek-max, back_seek_penalty, fifo_expire_async and
++fifo_expire_sync below are the same as in CFQ. Their description is
++just copied from that for CFQ. Some considerations in the description
++of slice_idle are copied from CFQ too.
++
++per-process ioprio and weight
++-----------------------------
++
++Unless the cgroups interface is used, weights can be assigned to
++processes only indirectly, through I/O priorities, and according to
++the relation: weight = (IOPRIO_BE_NR - ioprio) * 10.
++
++slice_idle
++----------
++
++This parameter specifies how long BFQ should idle for next I/O
++request, when certain sync BFQ queues become empty. By default
++slice_idle is a non-zero value. Idling has a double purpose: boosting
++throughput and making sure that the desired throughput distribution is
++respected (see the description of how BFQ works, and, if needed, the
++papers referred there).
++
++As for throughput, idling can be very helpful on highly seeky media
++like single spindle SATA/SAS disks where we can cut down on overall
++number of seeks and see improved throughput.
++
++Setting slice_idle to 0 will remove all the idling on queues and one
++should see an overall improved throughput on faster storage devices
++like multiple SATA/SAS disks in hardware RAID configuration.
++
++So depending on storage and workload, it might be useful to set
++slice_idle=0.  In general for SATA/SAS disks and software RAID of
++SATA/SAS disks keeping slice_idle enabled should be useful. For any
++configurations where there are multiple spindles behind single LUN
++(Host based hardware RAID controller or for storage arrays), setting
++slice_idle=0 might end up in better throughput and acceptable
++latencies.
++
++Idling is however necessary to have service guarantees enforced in
++case of differentiated weights or differentiated I/O-request lengths.
++To see why, suppose that a given BFQ queue A must get several I/O
++requests served for each request served for another queue B. Idling
++ensures that, if A makes a new I/O request slightly after becoming
++empty, then no request of B is dispatched in the middle, and thus A
++does not lose the possibility to get more than one request dispatched
++before the next request of B is dispatched. Note that idling
++guarantees the desired differentiated treatment of queues only in
++terms of I/O-request dispatches. To guarantee that the actual service
++order then corresponds to the dispatch order, the strict_guarantees
++tunable must be set too.
++
++There is an important flipside for idling: apart from the above cases
++where it is beneficial also for throughput, idling can severely impact
++throughput. One important case is random workload. Because of this
++issue, BFQ tends to avoid idling as much as possible, when it is not
++beneficial also for throughput. As a consequence of this behavior, and
++of further issues described for the strict_guarantees tunable,
++short-term service guarantees may be occasionally violated. And, in
++some cases, these guarantees may be more important than guaranteeing
++maximum throughput. For example, in video playing/streaming, a very
++low drop rate may be more important than maximum throughput. In these
++cases, consider setting the strict_guarantees parameter.
++
++strict_guarantees
++-----------------
++
++If this parameter is set (default: unset), then BFQ
++
++- always performs idling when the in-service queue becomes empty;
++
++- forces the device to serve one I/O request at a time, by dispatching a
++  new request only if there is no outstanding request.
++
++In the presence of differentiated weights or I/O-request sizes, both
++the above conditions are needed to guarantee that every BFQ queue
++receives its allotted share of the bandwidth. The first condition is
++needed for the reasons explained in the description of the slice_idle
++tunable.  The second condition is needed because all modern storage
++devices reorder internally-queued requests, which may trivially break
++the service guarantees enforced by the I/O scheduler.
++
++Setting strict_guarantees may evidently affect throughput.
++
++back_seek_max
++-------------
++
++This specifies, given in Kbytes, the maximum "distance" for backward seeking.
++The distance is the amount of space from the current head location to the
++sectors that are backward in terms of distance.
++
++This parameter allows the scheduler to anticipate requests in the "backward"
++direction and consider them as being the "next" if they are within this
++distance from the current head location.
++
++back_seek_penalty
++-----------------
++
++This parameter is used to compute the cost of backward seeking. If the
++backward distance of request is just 1/back_seek_penalty from a "front"
++request, then the seeking cost of two requests is considered equivalent.
++
++So scheduler will not bias toward one or the other request (otherwise scheduler
++will bias toward front request). Default value of back_seek_penalty is 2.
++
++fifo_expire_async
++-----------------
++
++This parameter is used to set the timeout of asynchronous requests. Default
++value of this is 248ms.
++
++fifo_expire_sync
++----------------
++
++This parameter is used to set the timeout of synchronous requests. Default
++value of this is 124ms. In case to favor synchronous requests over asynchronous
++one, this value should be decreased relative to fifo_expire_async.
++
++low_latency
++-----------
++
++This parameter is used to enable/disable BFQ's low latency mode. By
++default, low latency mode is enabled. If enabled, interactive and soft
++real-time applications are privileged and experience a lower latency,
++as explained in more detail in the description of how BFQ works.
++
++timeout_sync
++------------
++
++Maximum amount of device time that can be given to a task (queue) once
++it has been selected for service. On devices with costly seeks,
++increasing this time usually increases maximum throughput. On the
++opposite end, increasing this time coarsens the granularity of the
++short-term bandwidth and latency guarantees, especially if the
++following parameter is set to zero.
++
++max_budget
++----------
++
++Maximum amount of service, measured in sectors, that can be provided
++to a BFQ queue once it is set in service (of course within the limits
++of the above timeout). According to what said in the description of
++the algoritm, larger values increase the throughput in proportion to
++the percentage of sequential I/O requests issued. The price of larger
++values is that they coarsen the granularity of short-term bandwidth
++and latency guarantees.
++
++The default value is 0, which enables auto-tuning: BFQ sets max_budget
++to the maximum number of sectors that can be served during
++timeout_sync, according to the estimated peak rate.
++
++weights
++-------
++
++Read-only parameter, used to show the weights of the currently active
++BFQ queues.
++
++
++wr_ tunables
++------------
++
++BFQ exports a few parameters to control/tune the behavior of
++low-latency heuristics.
++
++wr_coeff
++
++Factor by which the weight of a weight-raised queue is multiplied. If
++the queue is deemed soft real-time, then the weight is further
++multiplied by an additional, constant factor.
++
++wr_max_time
++
++Maximum duration of a weight-raising period for an interactive task
++(ms). If set to zero (default value), then this value is computed
++automatically, as a function of the peak rate of the device. In any
++case, when the value of this parameter is read, it always reports the
++current duration, regardless of whether it has been set manually or
++computed automatically.
++
++wr_max_softrt_rate
++
++Maximum service rate below which a queue is deemed to be associated
++with a soft real-time application, and is then weight-raised
++accordingly (sectors/sec).
++
++wr_min_idle_time
++
++Minimum idle period after which interactive weight-raising may be
++reactivated for a queue (in ms).
++
++wr_rt_max_time
++
++Maximum weight-raising duration for soft real-time queues (in ms). The
++start time from which this duration is considered is automatically
++moved forward if the queue is detected to be still soft real-time
++before the current soft real-time weight-raising period finishes.
++
++wr_min_inter_arr_async
++
++Minimum period between I/O request arrivals after which weight-raising
++may be reactivated for an already busy async queue (in ms).
++
++
++4. Group scheduling with BFQ
++============================
++
++BFQ supports both cgroup-v1 and cgroup-v2 io controllers, namely blkio
++and io. In particular, BFQ supports weight-based proportional
++share.
++
++4-1 Service guarantees provided
++-------------------------------
++
++With BFQ, proportional share means true proportional share of the
++device bandwidth, according to group weights. For example, a group
++with weight 200 gets twice the bandwidth, and not just twice the time,
++of a group with weight 100.
++
++BFQ supports hierarchies (group trees) of any depth. Bandwidth is
++distributed among groups and processes in the expected way: for each
++group, the children of the group share the whole bandwidth of the
++group in proportion to their weights. In particular, this implies
++that, for each leaf group, every process of the group receives the
++same share of the whole group bandwidth, unless the ioprio of the
++process is modified.
++
++The resource-sharing guarantee for a group may partially or totally
++switch from bandwidth to time, if providing bandwidth guarantees to
++the group lowers the throughput too much. This switch occurs on a
++per-process basis: if a process of a leaf group causes throughput loss
++if served in such a way to receive its share of the bandwidth, then
++BFQ switches back to just time-based proportional share for that
++process.
++
++4-2 Interface
++-------------
++
++To get proportional sharing of bandwidth with BFQ for a given device,
++BFQ must of course be the active scheduler for that device.
++
++Within each group directory, the names of the files associated with
++BFQ-specific cgroup parameters and stats begin with the "bfq."
++prefix. So, with cgroups-v1 or cgroups-v2, the full prefix for
++BFQ-specific files is "blkio.bfq." or "io.bfq." For example, the group
++parameter to set the weight of a group with BFQ is blkio.bfq.weight
++or io.bfq.weight.
++
++Parameters to set
++-----------------
++
++For each group, there is only the following parameter to set.
++
++weight (namely blkio.bfq.weight or io.bfq-weight): the weight of the
++group inside its parent. Available values: 1..10000 (default 100). The
++linear mapping between ioprio and weights, described at the beginning
++of the tunable section, is still valid, but all weights higher than
++IOPRIO_BE_NR*10 are mapped to ioprio 0.
++
++
++[1] P. Valente, A. Avanzini, "Evolution of the BFQ Storage I/O
++    Scheduler", Proceedings of the First Workshop on Mobile System
++    Technologies (MST-2015), May 2015.
++    http://algogroup.unimore.it/people/paolo/disk_sched/mst-2015.pdf
++
++[2] P. Valente and M. Andreolini, "Improving Application
++    Responsiveness with the BFQ Disk I/O Scheduler", Proceedings of
++    the 5th Annual International Systems and Storage Conference
++    (SYSTOR '12), June 2012.
++    Slightly extended version:
++    http://algogroup.unimore.it/people/paolo/disk_sched/bfq-v1-suite-
++							results.pdf
+
+From 11f0d8caa035ff4fe6fe02ebc0f2f14dc5473479 Mon Sep 17 00:00:00 2001
+From: Paolo Valente <paolo.valente@linaro.org>
+Date: Sat, 22 Oct 2016 15:26:33 +0200
+Subject: [PATCH 02/24] BUGFIX: Replace max wrongly used for modulo numbers
+
+Signed-off-by: Paolo Valente <paolo.valente@linaro.org>
+---
+ block/bfq-iosched.c | 10 +++++++---
+ 1 file changed, 7 insertions(+), 3 deletions(-)
+
+diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
+index eef6ff4..c161ff0 100644
+--- a/block/bfq-iosched.c
++++ b/block/bfq-iosched.c
+@@ -2179,9 +2179,13 @@ static void __bfq_set_in_service_queue(struct bfq_data *bfqd,
+ 			 * not only expires, but also remains with no
+ 			 * request.
+ 			 */
+-			bfqq->last_wr_start_finish += jiffies -
+-				max_t(unsigned long, bfqq->last_wr_start_finish,
+-				      bfqq->budget_timeout);
++			if (time_after(bfqq->budget_timeout,
++				       bfqq->last_wr_start_finish))
++				bfqq->last_wr_start_finish +=
++					jiffies - bfqq->budget_timeout;
++			else
++				bfqq->last_wr_start_finish = jiffies;
++
+ 			if (time_is_after_jiffies(bfqq->last_wr_start_finish)) {
+ 			       pr_crit(
+ 			       "BFQ WARNING:last %lu budget %lu jiffies %lu",
+
+From eb59d4ac664f64372c440006b0cd27bf137a2025 Mon Sep 17 00:00:00 2001
+From: Paolo Valente <paolo.valente@linaro.org>
+Date: Mon, 24 Oct 2016 17:08:32 +0200
+Subject: [PATCH 03/24] Improve bfq-iosched.txt
+
+Signed-off-by: Paolo Valente <paolo.valente@linaro.org>
+---
+ Documentation/block/bfq-iosched.txt | 12 +++++++-----
+ 1 file changed, 7 insertions(+), 5 deletions(-)
+
+diff --git a/Documentation/block/bfq-iosched.txt b/Documentation/block/bfq-iosched.txt
+index 67e1269..8626b14 100644
+--- a/Documentation/block/bfq-iosched.txt
++++ b/Documentation/block/bfq-iosched.txt
+@@ -78,13 +78,15 @@ sequential workloads considered in our tests. With random workloads,
+ and with all the workloads on flash-based devices, BFQ achieves,
+ instead, about the same throughput as the other schedulers.
+ 
+-Strong fairness/bandwidth guarantees
++Strong fairness, bandwidth and delay guarantees
+ 
+ BFQ distributes the device throughput, and not just the device time,
+-as desired among I/O-bound applications, with any workload and
+-regardless of the device parameters. BFQ switches to time-based
+-distribution (only) for processes that would otherwise cause a
+-throughput loss.
++among I/O-bound applications in proportion their weights, with any
++workload and regardless of the device parameters. From these bandwidth
++guarantees, it is possible to compute tight per-I/O-request delay
++guarantees by a simple formula. If not configured for strict service
++guarantees, BFQ switches to time-based resource sharing (only) for
++applications that would otherwise cause a throughput loss.
+ 
+ 1-2 Server systems
+ ------------------
+
+From a53d556f54f33de6e71f909748c6994ff90fa243 Mon Sep 17 00:00:00 2001
+From: Paolo Valente <paolo.valente@linaro.org>
+Date: Mon, 24 Oct 2016 20:36:53 +0200
+Subject: [PATCH 04/24] Improve help message in Kconfig.iosched
+
+Signed-off-by: Paolo Valente <paolo.valente@linaro.org>
+---
+ block/Kconfig.iosched | 11 ++++-------
+ 1 file changed, 4 insertions(+), 7 deletions(-)
+
+diff --git a/block/Kconfig.iosched b/block/Kconfig.iosched
+index 6d92579..277b112 100644
+--- a/block/Kconfig.iosched
++++ b/block/Kconfig.iosched
+@@ -43,13 +43,10 @@ config IOSCHED_BFQ
+ 	tristate "BFQ I/O scheduler"
+ 	default n
+ 	---help---
+-	  The BFQ I/O scheduler tries to distribute bandwidth among
+-	  all processes according to their weights.
+-	  It aims at distributing the bandwidth as desired, independently of
+-	  the disk parameters and with any workload. It also tries to
+-	  guarantee low latency to interactive and soft real-time
+-	  applications. If compiled built-in (saying Y here), BFQ can
+-	  be configured to support hierarchical scheduling.
++	The BFQ I/O scheduler distributes bandwidth among all
++	processes according to their weights, regardless of the
++	device parameters and with any workload. It also guarantees
++	a low latency to interactive and soft real-time applications.
+ 
+ config BFQ_GROUP_IOSCHED
+ 	bool "BFQ hierarchical scheduling support"
+
+From 710824a320c015b314abba3bc2982d0f3caa0acc Mon Sep 17 00:00:00 2001
+From: Paolo Valente <paolo.valente@linaro.org>
+Date: Mon, 24 Oct 2016 20:37:59 +0200
+Subject: [PATCH 05/24] Remove stray disk word in first line
+
+SIgned-off-by: Paolo Valente <paolo.valente@linaro.org>
+---
+ block/bfq-iosched.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
+index c161ff0..b5fdd45 100644
+--- a/block/bfq-iosched.c
++++ b/block/bfq-iosched.c
+@@ -1,5 +1,5 @@
+ /*
+- * Budget Fair Queueing (BFQ) disk scheduler.
++ * Budget Fair Queueing (BFQ) I/O scheduler.
+  *
+  * Based on ideas and code from CFQ:
+  * Copyright (C) 2003 Jens Axboe <axboe@kernel.dk>
+
+From a2f6ff7877e60f533cdf39e941fc45f7b5aafcee Mon Sep 17 00:00:00 2001
+From: Paolo Valente <paolo.valente@linaro.org>
+Date: Mon, 24 Oct 2016 20:41:03 +0200
+Subject: [PATCH 06/24] BUGFIX: Remove wrong conversion in use of
+ bfq_fifo_expire
+
+Signed-off-by: Paolo Valente <paolo.valente@linaro.org>
+---
+ block/bfq-iosched.c | 3 +--
+ 1 file changed, 1 insertion(+), 2 deletions(-)
+
+diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
+index b5fdd45..a8c6887 100644
+--- a/block/bfq-iosched.c
++++ b/block/bfq-iosched.c
+@@ -4268,8 +4268,7 @@ static void bfq_insert_request(struct request_queue *q, struct request *rq)
+ 
+ 	bfq_add_request(rq);
+ 
+-	rq->fifo_time = ktime_get_ns() +
+-	  jiffies_to_nsecs(bfqd->bfq_fifo_expire[rq_is_sync(rq)]);
++	rq->fifo_time = ktime_get_ns() + bfqd->bfq_fifo_expire[rq_is_sync(rq)];
+ 	list_add_tail(&rq->queuelist, &bfqq->fifo);
+ 
+ 	bfq_rq_enqueued(bfqd, bfqq, rq);
+
+From c91736449d06052469c2c3abaf5d455b85e77417 Mon Sep 17 00:00:00 2001
+From: Paolo Valente <paolo.valente@linaro.org>
+Date: Tue, 25 Oct 2016 09:31:37 +0200
+Subject: [PATCH 07/24] bfq-iosched.txt: add description of preemption
+
+Signed-off-by: Paolo Valente <paolo.valente@linaro.org>
+---
+ Documentation/block/bfq-iosched.txt | 6 ++++++
+ 1 file changed, 6 insertions(+)
+
+diff --git a/Documentation/block/bfq-iosched.txt b/Documentation/block/bfq-iosched.txt
+index 8626b14..8979408 100644
+--- a/Documentation/block/bfq-iosched.txt
++++ b/Documentation/block/bfq-iosched.txt
+@@ -227,6 +227,12 @@ plus a lot of code, are borrowed from CFQ.
+ 	budget assigned to a queue waiting for service is, the sooner
+ 	B-WF2Q+ will serve that queue (Subsec 3.3 in [2]).
+ 
++- If several processes are competing for the device at the same time,
++  but all processes and groups have the same weight, then BFQ
++  guarantees the expected throughput distribution without ever idling
++  the device. It uses preemption instead. Throughput is then much
++  higher in this common scenario.
++
+ - ioprio classes are served in strict priority order, i.e.,
+   lower-priority queues are not served as long as there are
+   higher-priority queues.  Among queues in the same class, the
+
+From 2dc9393e7ff65d0e5340118402134cff03276b1c Mon Sep 17 00:00:00 2001
+From: Paolo Valente <paolo.valente@linaro.org>
+Date: Sat, 5 Nov 2016 18:09:07 +0100
+Subject: [PATCH 08/24] Add parentheses to complex macros
+
+Signed-off-by: Paolo Valente <paolo.valente@linaro.org>
+---
+ block/bfq-iosched.c | 10 +++++-----
+ 1 file changed, 5 insertions(+), 5 deletions(-)
+
+diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
+index a8c6887..88be34e 100644
+--- a/block/bfq-iosched.c
++++ b/block/bfq-iosched.c
+@@ -77,19 +77,19 @@
+ static const u64 bfq_fifo_expire[2] = { NSEC_PER_SEC / 4, NSEC_PER_SEC / 8 };
+ 
+ /* Maximum backwards seek, in KiB. */
+-static const int bfq_back_max = 16 * 1024;
++static const int bfq_back_max = (16 * 1024);
+ 
+ /* Penalty of a backwards seek, in number of sectors. */
+ static const int bfq_back_penalty = 2;
+ 
+ /* Idling period duration, in ns. */
+-static u32 bfq_slice_idle = NSEC_PER_SEC / 125;
++static u32 bfq_slice_idle = (NSEC_PER_SEC / 125);
+ 
+ /* Minimum number of assigned budgets for which stats are safe to compute. */
+ static const int bfq_stats_min_budgets = 194;
+ 
+ /* Default maximum budget values, in sectors and number of requests. */
+-static const int bfq_default_max_budget = 16 * 1024;
++static const int bfq_default_max_budget = (16 * 1024);
+ 
+ /*
+  * Async to sync throughput distribution is controlled as follows:
+@@ -99,7 +99,7 @@ static const int bfq_default_max_budget = 16 * 1024;
+ static const int bfq_async_charge_factor = 10;
+ 
+ /* Default timeout values, in jiffies, approximating CFQ defaults. */
+-static const int bfq_timeout = HZ / 8;
++static const int bfq_timeout = (HZ / 8);
+ 
+ struct kmem_cache *bfq_pool;
+ 
+@@ -117,7 +117,7 @@ struct kmem_cache *bfq_pool;
+ /* Min number of samples required to perform peak-rate update */
+ #define BFQ_RATE_MIN_SAMPLES	32
+ /* Min observation time interval required to perform a peak-rate update (ns) */
+-#define BFQ_RATE_MIN_INTERVAL	300*NSEC_PER_MSEC
++#define BFQ_RATE_MIN_INTERVAL	(300*NSEC_PER_MSEC)
+ /* Target observation time interval for a peak-rate update (ns) */
+ #define BFQ_RATE_REF_INTERVAL	NSEC_PER_SEC
+ 
+
+From 882f0607d82efe44d98634ce1fdcc4b602ccf5ec Mon Sep 17 00:00:00 2001
+From: Paolo Valente <paolo.valente@linaro.org>
+Date: Sat, 5 Nov 2016 18:32:20 +0100
+Subject: [PATCH 09/24] Fix type in bfq-iosched.txt
+
+Sigend-off-by: Paolo Valente <paolo.valente@linaro.org>
+---
+ Documentation/block/bfq-iosched.txt | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/Documentation/block/bfq-iosched.txt b/Documentation/block/bfq-iosched.txt
+index 8979408..5ba67af 100644
+--- a/Documentation/block/bfq-iosched.txt
++++ b/Documentation/block/bfq-iosched.txt
+@@ -385,7 +385,7 @@ max_budget
+ Maximum amount of service, measured in sectors, that can be provided
+ to a BFQ queue once it is set in service (of course within the limits
+ of the above timeout). According to what said in the description of
+-the algoritm, larger values increase the throughput in proportion to
++the algorithm, larger values increase the throughput in proportion to
+ the percentage of sequential I/O requests issued. The price of larger
+ values is that they coarsen the granularity of short-term bandwidth
+ and latency guarantees.
+
+From 99b1da54ca162abdc9e62915050154b35bd9bc27 Mon Sep 17 00:00:00 2001
+From: Paolo Valente <paolo.valente@linaro.org>
+Date: Sat, 5 Nov 2016 18:34:58 +0100
+Subject: [PATCH 10/24] BFQ-v8r5
+
+Signed-off-by: Paolo Valente <paolo.valente@linaro.org>
+---
+ block/bfq-iosched.c | 2 +-
+ block/bfq.h         | 2 +-
+ 2 files changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
+index 88be34e..0f3081d 100644
+--- a/block/bfq-iosched.c
++++ b/block/bfq-iosched.c
+@@ -5212,7 +5212,7 @@ static struct blkcg_policy blkcg_policy_bfq = {
+ static int __init bfq_init(void)
+ {
+ 	int ret;
+-	char msg[50] = "BFQ I/O-scheduler: v8r4";
++	char msg[50] = "BFQ I/O-scheduler: v8r5";
+ 
+ #ifdef CONFIG_BFQ_GROUP_IOSCHED
+ 	ret = blkcg_policy_register(&blkcg_policy_bfq);
+diff --git a/block/bfq.h b/block/bfq.h
+index ea1e7d8..b80abe0 100644
+--- a/block/bfq.h
++++ b/block/bfq.h
+@@ -1,5 +1,5 @@
+ /*
+- * BFQ-v8r4 for 4.8.0: data structures and common functions prototypes.
++ * BFQ-v8r5 for 4.8.0: data structures and common functions prototypes.
+  *
+  * Based on ideas and code from CFQ:
+  * Copyright (C) 2003 Jens Axboe <axboe@kernel.dk>
+
+From d825ef204aa934e1fd9ebd925276ca0e67783508 Mon Sep 17 00:00:00 2001
+From: Paolo Valente <paolo.valente@linaro.org>
+Date: Wed, 7 Dec 2016 19:31:37 +0100
+Subject: [PATCH 11/24] Improve documentation
+
+Signed-off-by: Paolo Valente <paolo.valente@linaro.org>
+
+Squash me
+---
+ Documentation/block/bfq-iosched.txt | 26 ++++++++++++++++++++------
+ block/Kconfig.iosched               |  5 ++++-
+ 2 files changed, 24 insertions(+), 7 deletions(-)
+
+diff --git a/Documentation/block/bfq-iosched.txt b/Documentation/block/bfq-iosched.txt
+index 5ba67af..13b5248 100644
+--- a/Documentation/block/bfq-iosched.txt
++++ b/Documentation/block/bfq-iosched.txt
+@@ -252,9 +252,14 @@ of slice_idle are copied from CFQ too.
+ per-process ioprio and weight
+ -----------------------------
+ 
+-Unless the cgroups interface is used, weights can be assigned to
+-processes only indirectly, through I/O priorities, and according to
+-the relation: weight = (IOPRIO_BE_NR - ioprio) * 10.
++Unless the cgroups interface is used (see "4. BFQ group scheduling"),
++weights can be assigned to processes only indirectly, through I/O
++priorities, and according to the relation:
++weight = (IOPRIO_BE_NR - ioprio) * 10.
++
++Beware that, if low-latency is set, then BFQ automatically raises the
++weight of the queues associated with interactive and soft real-time
++applications. Unset this tunable if you need/want to control weights.
+ 
+ slice_idle
+ ----------
+@@ -369,6 +374,11 @@ default, low latency mode is enabled. If enabled, interactive and soft
+ real-time applications are privileged and experience a lower latency,
+ as explained in more detail in the description of how BFQ works.
+ 
++DO NOT enable this mode if you need full control on bandwidth
++distribution. In fact, if it is enabled, then BFQ automatically
++increases the bandwidth share of privileged applications, as the main
++means to guarantee a lower latency to them.
++
+ timeout_sync
+ ------------
+ 
+@@ -449,9 +459,9 @@ may be reactivated for an already busy async queue (in ms).
+ 4. Group scheduling with BFQ
+ ============================
+ 
+-BFQ supports both cgroup-v1 and cgroup-v2 io controllers, namely blkio
+-and io. In particular, BFQ supports weight-based proportional
+-share.
++BFQ supports both cgroups-v1 and cgroups-v2 io controllers, namely
++blkio and io. In particular, BFQ supports weight-based proportional
++share. To activate cgroups support, set BFQ_GROUP_IOSCHED.
+ 
+ 4-1 Service guarantees provided
+ -------------------------------
+@@ -501,6 +511,10 @@ linear mapping between ioprio and weights, described at the beginning
+ of the tunable section, is still valid, but all weights higher than
+ IOPRIO_BE_NR*10 are mapped to ioprio 0.
+ 
++Recall that, if low-latency is set, then BFQ automatically raises the
++weight of the queues associated with interactive and soft real-time
++applications. Unset this tunable if you need/want to control weights.
++
+ 
+ [1] P. Valente, A. Avanzini, "Evolution of the BFQ Storage I/O
+     Scheduler", Proceedings of the First Workshop on Mobile System
+diff --git a/block/Kconfig.iosched b/block/Kconfig.iosched
+index 277b112..f2cd945 100644
+--- a/block/Kconfig.iosched
++++ b/block/Kconfig.iosched
+@@ -47,13 +47,16 @@ config IOSCHED_BFQ
+ 	processes according to their weights, regardless of the
+ 	device parameters and with any workload. It also guarantees
+ 	a low latency to interactive and soft real-time applications.
++	Details in Documentation/block/bfq-iosched.txt
+ 
+ config BFQ_GROUP_IOSCHED
+ 	bool "BFQ hierarchical scheduling support"
+ 	depends on IOSCHED_BFQ && BLK_CGROUP
+ 	default n
+ 	---help---
+-	  Enable hierarchical scheduling in BFQ, using the blkio controller.
++
++	Enable hierarchical scheduling in BFQ, using the blkio
++	(cgroups-v1) or io (cgroups-v2) controller.
+ 
+ choice
+ 	prompt "Default I/O scheduler"
+
+From 880441682487350cd5b116c2cea830351f9c9478 Mon Sep 17 00:00:00 2001
+From: Paolo Valente <paolo.valente@linaro.org>
+Date: Wed, 7 Dec 2016 19:43:58 +0100
+Subject: [PATCH 12/24] Add and improve pointers in initial comments
+
+---
+ block/bfq-iosched.c | 14 ++++++++++----
+ 1 file changed, 10 insertions(+), 4 deletions(-)
+
+diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
+index 0f3081d..b07d251 100644
+--- a/block/bfq-iosched.c
++++ b/block/bfq-iosched.c
+@@ -14,6 +14,12 @@
+  * Licensed under the GPL-2 as detailed in the accompanying COPYING.BFQ
+  * file.
+  *
++ * BFQ is a proportional-share I/O scheduler, with some extra
++ * low-latency capabilities. BFQ also supports full hierarchical
++ * scheduling through cgroups. Next paragraphs provide an introduction
++ * on BFQ inner workings. Details on BFQ benefits and usage can be
++ * found in Documentation/block/bfq-iosched.txt.
++ *
+  * BFQ is a proportional-share storage-I/O scheduling algorithm based
+  * on the slice-by-slice service scheme of CFQ. But BFQ assigns
+  * budgets, measured in number of sectors, to processes instead of
+@@ -43,10 +49,10 @@
+  * H-WF2Q+, while the augmented tree used to implement B-WF2Q+ with O(log N)
+  * complexity derives from the one introduced with EEVDF in [3].
+  *
+- * [1] P. Valente and M. Andreolini, ``Improving Application Responsiveness
+- *     with the BFQ Disk I/O Scheduler'',
+- *     Proceedings of the 5th Annual International Systems and Storage
+- *     Conference (SYSTOR '12), June 2012.
++ * [1] P. Valente, A. Avanzini, "Evolution of the BFQ Storage I/O
++ *   Scheduler", Proceedings of the First Workshop on Mobile System
++ *   Technologies (MST-2015), May 2015.
++ *   http://algogroup.unimore.it/people/paolo/disk_sched/mst-2015.pdf
+  *
+  * http://algogroup.unimo.it/people/paolo/disk_sched/bf1-v1-suite-results.pdf
+  *
+
+From 60dad3833c9afa52315508579cbb4bac7e55d3a3 Mon Sep 17 00:00:00 2001
+From: Paolo Valente <paolo.valente@linaro.org>
+Date: Fri, 9 Dec 2016 07:05:29 +0100
+Subject: [PATCH 13/24] Fix typo in comments on bfq_lookup_next_entity
+
+Signed-off-by: Paolo Valente <paolo.valente@linaro.org>
+---
+ block/bfq-sched.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/block/bfq-sched.c b/block/bfq-sched.c
+index 45d63d3..9afeaab 100644
+--- a/block/bfq-sched.c
++++ b/block/bfq-sched.c
+@@ -1265,7 +1265,7 @@ __bfq_lookup_next_entity(struct bfq_service_tree *st, bool force)
+  * NOTE: since we cache the next_in_service entity at each level of the
+  * hierarchy, the complexity of the lookup can be decreased with
+  * absolutely no effort just returning the cached next_in_service value;
+- * we prefer to do full lookups to test the consistency of * the data
++ * we prefer to do full lookups to test the consistency of the data
+  * structures.
+  */
+ static struct bfq_entity *bfq_lookup_next_entity(struct bfq_sched_data *sd,
+
+From 15f446f3bc189df88c64366930b5e237c8d9d5d5 Mon Sep 17 00:00:00 2001
+From: Paolo Valente <paolo.valente@linaro.org>
+Date: Fri, 9 Dec 2016 09:46:41 +0100
+Subject: [PATCH 14/24] Fix false-positive warning for uninitialized var
+
+Signed-off-by: Paolo Valente <paolo.valente@linaro.org>
+---
+ block/bfq-iosched.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
+index b07d251..3043e5e 100644
+--- a/block/bfq-iosched.c
++++ b/block/bfq-iosched.c
+@@ -5044,7 +5044,7 @@ STORE_FUNCTION(bfq_wr_max_softrt_rate_store, &bfqd->bfq_wr_max_softrt_rate, 0,
+ static ssize_t __FUNC(struct elevator_queue *e, const char *page, size_t count)\
+ {									\
+ 	struct bfq_data *bfqd = e->elevator_data;			\
+-	unsigned long __data;						\
++	unsigned long uninitialized_var(__data);			\
+ 	int ret = bfq_var_store(&__data, (page), count);		\
+ 	if (__data < (MIN))						\
+ 		__data = (MIN);						\
+
+From 1430feb8c7194f72be0a8066ad6b996d78bf3e09 Mon Sep 17 00:00:00 2001
+From: Paolo Valente <paolo.valente@linaro.org>
+Date: Sun, 11 Dec 2016 11:46:13 +0100
+Subject: [PATCH 15/24] Improve and fix some bugs in hierarchical code
+
+This commit provides the following contributions.
+
+BUGFIX Removed the check that, when the new queue to set in service
+must be selected, the cached next_in_service entities coincide with
+the entities chosen by __bfq_lookup_next_entity. This check,
+issuing a warning on failure, was wrong, because the cached and the
+newly chosen entity could differ in case of a CLASS_IDLE timeout.
+
+EFFICIENCY IMPROVEMENT (this improvement is related to the above
+BUGFIX) The cached next_in_service entities are now really used to
+select the next queue to serve when the in-service queue
+expires. Before this change, the cached values were used only for
+extra (and in general wrong) consistency checks. This caused
+additional overhead instead of reducing it.
+
+EFFICIENCY IMPROVEMENT The next entity to serve, for each level of the
+hierarchy, is now updated on every event that may change it, i.e., on
+every activation or deactivation of any entity. This finer granularity
+is not strictly needed for corectness, because it is only on queue
+expirations that BFQ needs to know what are the next entities to
+serve. Yet this change makes it possible to implement optimizations in
+which it is necessary to know the next queue to serve before the
+in-service queue expires.
+
+SERVICE-ACCURACY IMPROVEMENT The per-device CLASS_IDLE service timeout
+has been turned into a much more accurate per-group timeout.
+
+CODE-QUALITY IMPROVEMENT The non-trivial parts touched by the above
+improvements have been partially rewritten, and enriched of comments,
+so as to improve their transparency and understandability.
+
+Signed-off-by: Paolo Valente <paolo.valente@linaro.org>
+---
+ block/bfq-cgroup.c  |   2 +-
+ block/bfq-iosched.c |   2 +-
+ block/bfq-sched.c   | 497 ++++++++++++++++++++++++++++++++++++----------------
+ block/bfq.h         |   5 +-
+ 4 files changed, 348 insertions(+), 158 deletions(-)
+
+diff --git a/block/bfq-cgroup.c b/block/bfq-cgroup.c
+index b50ae8e..a04bc40 100644
+--- a/block/bfq-cgroup.c
++++ b/block/bfq-cgroup.c
+@@ -314,7 +314,7 @@ static void bfq_init_entity(struct bfq_entity *entity,
+ 		bfqq->ioprio_class = bfqq->new_ioprio_class;
+ 		bfqg_get(bfqg);
+ 	}
+-	entity->parent = bfqg->my_entity;
++	entity->parent = bfqg->my_entity; /* NULL for root group */
+ 	entity->sched_data = &bfqg->sched_data;
+ }
+ 
+diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
+index 3043e5e..2d00bd10 100644
+--- a/block/bfq-iosched.c
++++ b/block/bfq-iosched.c
+@@ -4763,6 +4763,7 @@ static void bfq_init_root_group(struct bfq_group *root_group,
+ 	root_group->rq_pos_tree = RB_ROOT;
+ 	for (i = 0; i < BFQ_IOPRIO_CLASSES; i++)
+ 		root_group->sched_data.service_tree[i] = BFQ_SERVICE_TREE_INIT;
++	root_group->sched_data.bfq_class_idle_last_service = jiffies;
+ }
+ 
+ static int bfq_init_queue(struct request_queue *q, struct elevator_type *e)
+@@ -4836,7 +4837,6 @@ static int bfq_init_queue(struct request_queue *q, struct elevator_type *e)
+ 	bfqd->bfq_back_max = bfq_back_max;
+ 	bfqd->bfq_back_penalty = bfq_back_penalty;
+ 	bfqd->bfq_slice_idle = bfq_slice_idle;
+-	bfqd->bfq_class_idle_last_service = 0;
+ 	bfqd->bfq_timeout = bfq_timeout;
+ 
+ 	bfqd->bfq_requests_within_timer = 120;
+diff --git a/block/bfq-sched.c b/block/bfq-sched.c
+index 9afeaab..2f54ac5 100644
+--- a/block/bfq-sched.c
++++ b/block/bfq-sched.c
+@@ -15,16 +15,15 @@
+ static struct bfq_group *bfqq_group(struct bfq_queue *bfqq);
+ 
+ #ifdef CONFIG_BFQ_GROUP_IOSCHED
+-#define for_each_entity(entity)	\
++/* both next loops stop at one of the child entities of the root group */
++#define for_each_entity(entity)				\
+ 	for (; entity ; entity = entity->parent)
+ 
+ #define for_each_entity_safe(entity, parent) \
+ 	for (; entity && ({ parent = entity->parent; 1; }); entity = parent)
+ 
+ 
+-static struct bfq_entity *bfq_lookup_next_entity(struct bfq_sched_data *sd,
+-						 int extract,
+-						 struct bfq_data *bfqd);
++static struct bfq_entity *bfq_lookup_next_entity(struct bfq_sched_data *sd);
+ 
+ static void bfq_update_budget(struct bfq_entity *next_in_service)
+ {
+@@ -47,23 +46,18 @@ static void bfq_update_budget(struct bfq_entity *next_in_service)
+ 		bfqg_entity->budget = next_in_service->budget;
+ }
+ 
+-static int bfq_update_next_in_service(struct bfq_sched_data *sd)
++/*
++ * Incomplete version that always returns true. It will return also
++ * false in its complete version, in case either next_in_service has
++ * not changed, or next_in_service has changed but in a way that will
++ * not influence upper levels.
++ */
++static bool bfq_update_next_in_service(struct bfq_sched_data *sd)
+ {
+ 	struct bfq_entity *next_in_service;
+ 	struct bfq_queue *bfqq;
+ 
+-	if (sd->in_service_entity)
+-		/* will update/requeue at the end of service */
+-		return 0;
+-
+-	/*
+-	 * NOTE: this can be improved in many ways, such as returning
+-	 * 1 (and thus propagating upwards the update) only when the
+-	 * budget changes, or caching the bfqq that will be scheduled
+-	 * next from this subtree.  By now we worry more about
+-	 * correctness than about performance...
+-	 */
+-	next_in_service = bfq_lookup_next_entity(sd, 0, NULL);
++	next_in_service = bfq_lookup_next_entity(sd);
+ 	sd->next_in_service = next_in_service;
+ 
+ 	if (next_in_service)
+@@ -75,6 +69,7 @@ static int bfq_update_next_in_service(struct bfq_sched_data *sd)
+ 	if (bfqq)
+ 		bfq_log_bfqq(bfqq->bfqd, bfqq,
+ 			     "update_next_in_service: chosen this queue");
++#ifdef CONFIG_BFQ_GROUP_IOSCHED
+ 	else {
+ 		struct bfq_group *bfqg =
+ 			container_of(next_in_service,
+@@ -83,15 +78,11 @@ static int bfq_update_next_in_service(struct bfq_sched_data *sd)
+ 		bfq_log_bfqg((struct bfq_data *)bfqg->bfqd, bfqg,
+ 			     "update_next_in_service: chosen this entity");
+ 	}
++#endif
+ exit:
+-	return 1;
++	return true;
+ }
+ 
+-static void bfq_check_next_in_service(struct bfq_sched_data *sd,
+-				      struct bfq_entity *entity)
+-{
+-	WARN_ON(sd->next_in_service != entity);
+-}
+ #else
+ #define for_each_entity(entity)	\
+ 	for (; entity ; entity = NULL)
+@@ -104,11 +95,6 @@ static int bfq_update_next_in_service(struct bfq_sched_data *sd)
+ 	return 0;
+ }
+ 
+-static void bfq_check_next_in_service(struct bfq_sched_data *sd,
+-				      struct bfq_entity *entity)
+-{
+-}
+-
+ static void bfq_update_budget(struct bfq_entity *next_in_service)
+ {
+ }
+@@ -851,23 +837,59 @@ static void __bfq_activate_entity(struct bfq_entity *entity,
+ 	BUG_ON(!sd);
+ 	BUG_ON(!st);
+ 	if (entity == sd->in_service_entity) {
+-		BUG_ON(entity->tree);
+ 		/*
+-		 * If we are requeueing the current entity we have
+-		 * to take care of not charging to it service it has
+-		 * not received.
++		 * We are requeueing the current in-service entity,
++		 * because of the requeueing of a just-expired
++		 * non-idle leaf entity in the path originating from
++		 * this entity. In fact, in this case, then all
++		 * entities in this path need to be requeued again for
++		 * next service.
++		 *
++		 * Before requeueing, the start time of the entity
++		 * must be moved forward to account for the service
++		 * that the entity has received while in service. The
++		 * finish time will then be updated according to this
++		 * new value of the start time, and to the budget of
++		 * the entity.
+ 		 */
+ 		bfq_calc_finish(entity, entity->service);
+ 		entity->start = entity->finish;
+ 		sd->in_service_entity = NULL;
++		BUG_ON(entity->tree && entity->tree != &st->active);
++		/*
++		 * In addition, if the entity had more than one child
++		 * when set in service, then was not extracted from
++		 * the active tree. This implies that the position of
++		 * the entity in the active tree may need to be
++		 * changed now, because we have just updated the start
++		 * time of the entity, and we will update its finish
++		 * time in a moment (the requeueing is then, more
++		 * precisely, a repositioning in this case). To
++		 * implement this repositioning, we: 1) dequeue the
++		 * entity here, 2) update the finish time and
++		 * requeue the entity according to the new
++		 * timestamps below.
++		 */
++		if (entity->tree)
++			bfq_active_extract(st, entity);
+ 	} else if (entity->tree == &st->active) {
+ 		/*
+-		 * Requeueing an entity due to a change of some
+-		 * next_in_service entity below it.  We reuse the
+-		 * old start time.
++		 * The entity is already active, and not in
++		 * service. In this case, this function gets called
++		 * only if the next_in_service entity below this
++		 * entity has changed, and this change has caused the
++		 * budget of this entity to change, which, finally
++		 * implies that the finish time of this entity must be
++		 * updated. Such an update may cause the scheduling,
++		 * i.e., the position in the active tree, of this
++		 * entity to change. We handle this fact by: 1)
++		 * dequeueing the entity here, 2) updating the finish
++		 * time and requeueing the entity according to the new
++		 * timestamps below. This is the same approach as the
++		 * non-extracted-entity sub-case above.
+ 		 */
+ 		bfq_active_extract(st, entity);
+-	} else {
++	} else { /* This is a 'true' activation, not a requeueing. */
+ 		unsigned long long min_vstart;
+ 
+ 		/* See comments on bfq_fqq_update_budg_for_activation */
+@@ -977,6 +999,11 @@ static void __bfq_activate_entity(struct bfq_entity *entity,
+ 			     entity->start <= st->vtime ? "" : "non ", st);
+ #endif
+ 	}
++	BUG_ON(RB_EMPTY_ROOT(&st->active));
++	BUG_ON(&st->active != &sd->service_tree->active &&
++	       &st->active != &(sd->service_tree+1)->active &&
++	       &st->active != &(sd->service_tree+2)->active);
++
+ }
+ 
+ /**
+@@ -996,13 +1023,20 @@ static void bfq_activate_entity(struct bfq_entity *entity,
+ 		__bfq_activate_entity(entity, non_blocking_wait_rq);
+ 
+ 		sd = entity->sched_data;
+-		if (!bfq_update_next_in_service(sd))
++		BUG_ON(RB_EMPTY_ROOT(&sd->service_tree->active) &&
++		       RB_EMPTY_ROOT(&(sd->service_tree+1)->active) &&
++		       RB_EMPTY_ROOT(&(sd->service_tree+2)->active));
++
++		if (!bfq_update_next_in_service(sd)) {
++			BUG_ON(!sd->next_in_service);
+ 			/*
+ 			 * No need to propagate the activation to the
+ 			 * upper entities, as they will be updated when
+ 			 * the in-service entity is rescheduled.
+ 			 */
+ 			break;
++		}
++		BUG_ON(!sd->next_in_service);
+ 	}
+ }
+ 
+@@ -1016,31 +1050,33 @@ static void bfq_activate_entity(struct bfq_entity *entity,
+  * any scheduler tree, extract it from that tree, and if necessary
+  * and if the caller did not specify @requeue, put it on the idle tree.
+  *
+- * Return %1 if the caller should update the entity hierarchy, i.e.,
++ * Return %true if the caller should update the entity hierarchy, i.e.,
+  * if the entity was in service or if it was the next_in_service for
+- * its sched_data; return %0 otherwise.
++ * its sched_data; return %false otherwise.
+  */
+-static int __bfq_deactivate_entity(struct bfq_entity *entity, int requeue)
++static bool __bfq_deactivate_entity(struct bfq_entity *entity, int requeue)
+ {
+ 	struct bfq_sched_data *sd = entity->sched_data;
+ 	struct bfq_service_tree *st;
+ 	int was_in_service;
+-	int ret = 0;
++	bool ret = false;
+ 
+ 	if (sd == NULL || !entity->on_st) /* never activated, or inactive */
+-		return 0;
++		return false;
+ 
+ 	st = bfq_entity_service_tree(entity);
+ 	was_in_service = entity == sd->in_service_entity;
+ 
+-	BUG_ON(was_in_service && entity->tree);
++	BUG_ON(was_in_service && entity->tree && entity->tree != &st->active);
+ 
+ 	if (was_in_service) {
+ 		bfq_calc_finish(entity, entity->service);
+ 		sd->in_service_entity = NULL;
+-	} else if (entity->tree == &st->active)
++	}
++
++	if (entity->tree == &st->active)
+ 		bfq_active_extract(st, entity);
+-	else if (entity->tree == &st->idle)
++	else if (!was_in_service && entity->tree == &st->idle)
+ 		bfq_idle_extract(st, entity);
+ 	else if (entity->tree)
+ 		BUG();
+@@ -1124,19 +1160,13 @@ static void bfq_deactivate_entity(struct bfq_entity *entity, int requeue)
+ }
+ 
+ /**
+- * bfq_update_vtime - update vtime if necessary.
++ * bfq_calc_vtime_jump - compute the value to which the vtime should jump,
++ *                       if needed, to have at least one entity eligible.
+  * @st: the service tree to act upon.
+  *
+- * If necessary update the service tree vtime to have at least one
+- * eligible entity, skipping to its start time.  Assumes that the
+- * active tree of the device is not empty.
+- *
+- * NOTE: this hierarchical implementation updates vtimes quite often,
+- * we may end up with reactivated processes getting timestamps after a
+- * vtime skip done because we needed a ->first_active entity on some
+- * intermediate node.
++ * Assumes that st is not empty.
+  */
+-static void bfq_update_vtime(struct bfq_service_tree *st)
++static u64 bfq_calc_vtime_jump(struct bfq_service_tree *st)
+ {
+ 	struct bfq_entity *entry;
+ 	struct rb_node *node = st->active.rb_node;
+@@ -1144,22 +1174,30 @@ static void bfq_update_vtime(struct bfq_service_tree *st)
+ 	entry = rb_entry(node, struct bfq_entity, rb_node);
+ 	if (bfq_gt(entry->min_start, st->vtime)) {
+ 		struct bfq_queue *bfqq = bfq_entity_to_bfqq(entry);
+-		st->vtime = entry->min_start;
+ 
+ 		if (bfqq)
+ 			bfq_log_bfqq(bfqq->bfqd, bfqq,
+-				     "update_vtime: new vtime %llu %p",
+-				     ((st->vtime>>10)*1000)>>12, st);
++				     "calc_vtime_jump: new value %llu",
++				     entry->min_start);
+ #ifdef CONFIG_BFQ_GROUP_IOSCHED
+ 		else {
+ 			struct bfq_group *bfqg =
+ 				container_of(entry, struct bfq_group, entity);
+ 
+ 			bfq_log_bfqg((struct bfq_data *)bfqg->bfqd, bfqg,
+-				     "update_vtime: new vtime %llu %p",
+-				     ((st->vtime>>10)*1000)>>12, st);
++				     "calc_vtime_jump: new value %llu",
++				     entry->min_start);
+ 		}
+ #endif
++		return entry->min_start;
++	}
++	return st->vtime;
++}
++
++static void bfq_update_vtime(struct bfq_service_tree *st, u64 new_value)
++{
++	if (new_value > st->vtime) {
++		st->vtime = new_value;
+ 		bfq_forget_idle(st);
+ 	}
+ }
+@@ -1168,6 +1206,7 @@ static void bfq_update_vtime(struct bfq_service_tree *st)
+  * bfq_first_active_entity - find the eligible entity with
+  *                           the smallest finish time
+  * @st: the service tree to select from.
++ * @vtime: the system virtual to use as a reference for eligibility
+  *
+  * This function searches the first schedulable entity, starting from the
+  * root of the tree and going on the left every time on this side there is
+@@ -1175,7 +1214,8 @@ static void bfq_update_vtime(struct bfq_service_tree *st)
+  * the right is followed only if a) the left subtree contains no eligible
+  * entities and b) no eligible entity has been found yet.
+  */
+-static struct bfq_entity *bfq_first_active_entity(struct bfq_service_tree *st)
++static struct bfq_entity *bfq_first_active_entity(struct bfq_service_tree *st,
++						  u64 vtime)
+ {
+ 	struct bfq_entity *entry, *first = NULL;
+ 	struct rb_node *node = st->active.rb_node;
+@@ -1183,15 +1223,15 @@ static struct bfq_entity *bfq_first_active_entity(struct bfq_service_tree *st)
+ 	while (node) {
+ 		entry = rb_entry(node, struct bfq_entity, rb_node);
+ left:
+-		if (!bfq_gt(entry->start, st->vtime))
++		if (!bfq_gt(entry->start, vtime))
+ 			first = entry;
+ 
+-		BUG_ON(bfq_gt(entry->min_start, st->vtime));
++		BUG_ON(bfq_gt(entry->min_start, vtime));
+ 
+ 		if (node->rb_left) {
+ 			entry = rb_entry(node->rb_left,
+ 					 struct bfq_entity, rb_node);
+-			if (!bfq_gt(entry->min_start, st->vtime)) {
++			if (!bfq_gt(entry->min_start, vtime)) {
+ 				node = node->rb_left;
+ 				goto left;
+ 			}
+@@ -1209,28 +1249,71 @@ static struct bfq_entity *bfq_first_active_entity(struct bfq_service_tree *st)
+  * __bfq_lookup_next_entity - return the first eligible entity in @st.
+  * @st: the service tree.
+  *
+- * Update the virtual time in @st and return the first eligible entity
+- * it contains.
++ * If there is no in-service entity for the sched_data st belongs to,
++ * then return the entity that will be set in service if:
++ * 1) the parent entity this st belongs to is set in service;
++ * 2) no entity belonging to such parent entity undergoes a state change
++ * that would influence the timestamps of the entity (e.g., becomes idle,
++ * becomes backlogged, changes its budget, ...).
++ *
++ * In this first case, update the virtual time in @st too (see the
++ * comments on this update inside the function).
++ *
++ * In constrast, if there is an in-service entity, then return the
++ * entity that would be set in service if not only the above
++ * conditions, but also the next one held true: the currently
++ * in-service entity, on expiration,
++ * 1) gets a finish time equal to the current one, or
++ * 2) is not eligible any more, or
++ * 3) is idle.
+  */
+ static struct bfq_entity *
+-__bfq_lookup_next_entity(struct bfq_service_tree *st, bool force)
++__bfq_lookup_next_entity(struct bfq_service_tree *st, bool in_service
++#if 0
++			 , bool force
++#endif
++	)
+ {
+-	struct bfq_entity *entity, *new_next_in_service = NULL;
++	struct bfq_entity *entity
++#if 0
++		, *new_next_in_service = NULL
++#endif
++		;
++	u64 new_vtime;
+ 	struct bfq_queue *bfqq;
+ 
+ 	if (RB_EMPTY_ROOT(&st->active))
+ 		return NULL;
+ 
+-	bfq_update_vtime(st);
+-	entity = bfq_first_active_entity(st);
+-	BUG_ON(bfq_gt(entity->start, st->vtime));
++	/*
++	 * Get the value of the system virtual time for which at
++	 * least one entity is eligible.
++	 */
++	new_vtime = bfq_calc_vtime_jump(st);
++
++	/*
++	 * If there is no in-service entity for the sched_data this
++	 * active tree belongs to, then push the system virtual time
++	 * up to the value that guarantees that at least one entity is
++	 * eligible. If, instead, there is an in-service entity, then
++	 * do not make any such update, because there is already an
++	 * eligible entity, namely the in-service one (even if the
++	 * entity is not on st, because it was extracted when set in
++	 * service).
++	 */
++	if (!in_service)
++		bfq_update_vtime(st, new_vtime);
+ 
++	entity = bfq_first_active_entity(st, new_vtime);
++	BUG_ON(bfq_gt(entity->start, new_vtime));
++
++	/* Log some information */
+ 	bfqq = bfq_entity_to_bfqq(entity);
+ 	if (bfqq)
+ 		bfq_log_bfqq(bfqq->bfqd, bfqq,
+ 			     "__lookup_next: start %llu vtime %llu st %p",
+ 			     ((entity->start>>10)*1000)>>12,
+-			     ((st->vtime>>10)*1000)>>12, st);
++			     ((new_vtime>>10)*1000)>>12, st);
+ #ifdef CONFIG_BFQ_GROUP_IOSCHED
+ 	else {
+ 		struct bfq_group *bfqg =
+@@ -1239,20 +1322,11 @@ __bfq_lookup_next_entity(struct bfq_service_tree *st, bool force)
+ 		bfq_log_bfqg((struct bfq_data *)bfqg->bfqd, bfqg,
+ 			     "__lookup_next: start %llu vtime %llu st %p",
+ 			     ((entity->start>>10)*1000)>>12,
+-			     ((st->vtime>>10)*1000)>>12, st);
++			     ((new_vtime>>10)*1000)>>12, st);
+ 	}
+ #endif
+ 
+-	/*
+-	 * If the chosen entity does not match with the sched_data's
+-	 * next_in_service and we are forcedly serving the IDLE priority
+-	 * class tree, bubble up budget update.
+-	 */
+-	if (unlikely(force && entity != entity->sched_data->next_in_service)) {
+-		new_next_in_service = entity;
+-		for_each_entity(new_next_in_service)
+-			bfq_update_budget(new_next_in_service);
+-	}
++	BUG_ON(!entity);
+ 
+ 	return entity;
+ }
+@@ -1260,90 +1334,70 @@ __bfq_lookup_next_entity(struct bfq_service_tree *st, bool force)
+ /**
+  * bfq_lookup_next_entity - return the first eligible entity in @sd.
+  * @sd: the sched_data.
+- * @extract: if true the returned entity will be also extracted from @sd.
+  *
+- * NOTE: since we cache the next_in_service entity at each level of the
+- * hierarchy, the complexity of the lookup can be decreased with
+- * absolutely no effort just returning the cached next_in_service value;
+- * we prefer to do full lookups to test the consistency of the data
+- * structures.
++ * This function is invoked when there has been a change in the trees
++ * for sd, and we need know what is the new next entity after this
++ * change.
+  */
+-static struct bfq_entity *bfq_lookup_next_entity(struct bfq_sched_data *sd,
+-						 int extract,
+-						 struct bfq_data *bfqd)
++static struct bfq_entity *bfq_lookup_next_entity(struct bfq_sched_data *sd)
+ {
+ 	struct bfq_service_tree *st = sd->service_tree;
+-	struct bfq_entity *entity;
+-	int i = 0;
+-
+-	BUG_ON(sd->in_service_entity);
++	struct bfq_service_tree *idle_class_st = st + (BFQ_IOPRIO_CLASSES - 1);
++	struct bfq_entity *entity = NULL;
++	struct bfq_queue *bfqq;
++	int class_idx = 0;
+ 
++	BUG_ON(!sd);
++	BUG_ON(!st);
+ 	/*
+ 	 * Choose from idle class, if needed to guarantee a minimum
+-	 * bandwidth to this class. This should also mitigate
++	 * bandwidth to this class (and if there is some active entity
++	 * in idle class). This should also mitigate
+ 	 * priority-inversion problems in case a low priority task is
+ 	 * holding file system resources.
+ 	 */
+-	if (bfqd &&
+-	    jiffies - bfqd->bfq_class_idle_last_service >
+-	    BFQ_CL_IDLE_TIMEOUT) {
+-		entity = __bfq_lookup_next_entity(st + BFQ_IOPRIO_CLASSES - 1,
+-						  true);
+-		if (entity) {
+-			struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
++	if (time_is_before_jiffies(sd->bfq_class_idle_last_service +
++				   BFQ_CL_IDLE_TIMEOUT)) {
++		if (!RB_EMPTY_ROOT(&idle_class_st->active))
++			class_idx = BFQ_IOPRIO_CLASSES - 1;
++		/* About to be served if backlogged, or not yet backlogged */
++		sd->bfq_class_idle_last_service = jiffies;
++	}
+ 
+-			if (bfqq)
+-				bfq_log_bfqq(bfqd, bfqq,
+-					     "idle chosen from st %p %d",
+-					     st + BFQ_IOPRIO_CLASSES - 1,
+-					BFQ_IOPRIO_CLASSES - 1);
+-#ifdef CONFIG_BFQ_GROUP_IOSCHED
+-			else {
+-				struct bfq_group *bfqg =
+-				container_of(entity, struct bfq_group, entity);
++	/*
++	 * Find the next entity to serve for the highest-priority
++	 * class, unless the idle class needs to be served.
++	 */
++	for (; class_idx < BFQ_IOPRIO_CLASSES; class_idx++) {
++		entity = __bfq_lookup_next_entity(st + class_idx,
++						  sd->in_service_entity);
+ 
+-				bfq_log_bfqg(bfqd, bfqg,
+-					     "idle chosen from st %p %d",
+-					     st + BFQ_IOPRIO_CLASSES - 1,
+-					BFQ_IOPRIO_CLASSES - 1);
+-			}
+-#endif
+-			i = BFQ_IOPRIO_CLASSES - 1;
+-			bfqd->bfq_class_idle_last_service = jiffies;
+-			sd->next_in_service = entity;
+-		}
++		if (entity)
++			break;
+ 	}
+-	for (; i < BFQ_IOPRIO_CLASSES; i++) {
+-		entity = __bfq_lookup_next_entity(st + i, false);
+-		if (entity) {
+-			if (bfqd != NULL) {
+-			struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
+ 
+-			if (bfqq)
+-				bfq_log_bfqq(bfqd, bfqq,
+-					     "chosen from st %p %d",
+-					     st + i, i);
+-#ifdef CONFIG_BFQ_GROUP_IOSCHED
+-			else {
+-				struct bfq_group *bfqg =
+-				container_of(entity, struct bfq_group, entity);
++	BUG_ON(!entity &&
++	       (!RB_EMPTY_ROOT(&st->active) || !RB_EMPTY_ROOT(&(st+1)->active) ||
++		!RB_EMPTY_ROOT(&(st+2)->active)));
+ 
+-				bfq_log_bfqg(bfqd, bfqg,
+-					     "chosen from st %p %d",
+-					     st + i, i);
+-			}
+-#endif
+-			}
++	if (!entity)
++		return NULL;
+ 
+-			if (extract) {
+-				bfq_check_next_in_service(sd, entity);
+-				bfq_active_extract(st + i, entity);
+-				sd->in_service_entity = entity;
+-				sd->next_in_service = NULL;
+-			}
+-			break;
+-		}
++	/* Log some information */
++	bfqq = bfq_entity_to_bfqq(entity);
++	if (bfqq)
++		bfq_log_bfqq(bfqq->bfqd, bfqq, "chosen from st %p %d",
++			     st + class_idx, class_idx);
++#ifdef CONFIG_BFQ_GROUP_IOSCHED
++	else {
++		struct bfq_group *bfqg =
++			container_of(entity, struct bfq_group, entity);
++
++		bfq_log_bfqg((struct bfq_data *)bfqg->bfqd, bfqg,
++			     "chosen from st %p %d",
++			     st + class_idx, class_idx);
+ 	}
++#endif
+ 
+ 	return entity;
+ }
+@@ -1356,6 +1410,43 @@ static bool next_queue_may_preempt(struct bfq_data *bfqd)
+ }
+ 
+ /*
++ * This function tells whether entity stops being a candidate for next
++ * service, according to the following logic.
++ *
++ * This function is invoked for an entity that is about to be set in
++ * service. If such an entity is a queue, then the entity is no longer
++ * a candidate for next service (i.e, a candidate entity to serve
++ * after the in-service entity is expired). The function then returns
++ * true.
++ *
++ * In contrast, the entity could stil be a candidate for next service
++ * if it is not a queue, and has more than one child. In fact, even if
++ * one of its children is about to be set in service, other children
++ * may still be the next to serve. As a consequence, a non-queue
++ * entity is not a candidate for next-service only if it has only one
++ * child. And only if this condition holds, then the function returns
++ * true for a non-queue entity.
++ */
++static bool bfq_no_longer_next_in_service(struct bfq_entity *entity)
++{
++	struct bfq_group *bfqg;
++
++	if (bfq_entity_to_bfqq(entity))
++		return true;
++
++#ifdef CONFIG_BFQ_GROUP_IOSCHED
++	bfqg = container_of(entity, struct bfq_group, entity);
++
++	BUG_ON(bfqg == ((struct bfq_data *)(bfqg->bfqd))->root_group);
++	BUG_ON(bfqg->active_entities == 0);
++	if (bfqg->active_entities == 1)
++		return true;
++#endif
++
++	return false;
++}
++
++/*
+  * Get next queue for service.
+  */
+ static struct bfq_queue *bfq_get_next_queue(struct bfq_data *bfqd)
+@@ -1369,6 +1460,11 @@ static struct bfq_queue *bfq_get_next_queue(struct bfq_data *bfqd)
+ 	if (bfqd->busy_queues == 0)
+ 		return NULL;
+ 
++	/*
++	 * Traverse the path from the root to the leaf entity to
++	 * serve. Set in service all the entities visited along the
++	 * way.
++	 */
+ 	sd = &bfqd->root_group->sched_data;
+ 	for (; sd ; sd = entity->my_sched_data) {
+ #ifdef CONFIG_BFQ_GROUP_IOSCHED
+@@ -1378,13 +1474,96 @@ static struct bfq_queue *bfq_get_next_queue(struct bfq_data *bfqd)
+ 
+ 			bfq_log_bfqg(bfqd, bfqg,
+ 				     "get_next_queue: lookup in this group");
+-		} else
++			if (!sd->next_in_service)
++				pr_crit("get_next_queue: lookup in this group");
++		} else {
+ 			bfq_log_bfqg(bfqd, bfqd->root_group,
+ 				     "get_next_queue: lookup in root group");
++			if (!sd->next_in_service)
++				pr_crit("get_next_queue: lookup in root group");
++		}
+ #endif
+ 
+-		entity = bfq_lookup_next_entity(sd, 1, bfqd);
++		BUG_ON(!sd->next_in_service);
++
++		/*
++		 * WARNING. We are about to set the in-service entity
++		 * to sd->next_in_service, i.e., to the (cached) value
++		 * returned by bfq_lookup_next_entity(sd) the last
++		 * time it was invoked, i.e., the last time when the
++		 * service order in sd changed as a consequence of the
++		 * activation or deactivation of an entity. In this
++		 * respect, if we execute bfq_lookup_next_entity(sd)
++		 * in this very moment, it may, although with low
++		 * probability, yield a different entity than that
++		 * pointed to by sd->next_in_service. This rare event
++		 * happens in case there was no CLASS_IDLE entity to
++		 * serve for sd when bfq_lookup_next_entity(sd) was
++		 * invoked for the last time, while there is now one
++		 * such entity.
++		 *
++		 * If the above event happens, then the scheduling of
++		 * such entity in CLASS_IDLE is postponed until the
++		 * service of the sd->next_in_service entity
++		 * finishes. In fact, when the latter is expired,
++		 * bfq_lookup_next_entity(sd) gets called again,
++		 * exactly to update sd->next_in_service.
++		 */
++
++		/* Make next_in_service entity become in_service_entity */
++		entity = sd->next_in_service;
++		sd->in_service_entity = entity;
++
++		/*
++		 * Reset the accumulator of the amount of service that
++		 * the entity is about to receive.
++		 */
++		entity->service = 0;
++
++		/*
++		 * If entity is no longer a candidate for next
++		 * service, then we extract it from its active tree,
++		 * for the following reason. To further boost the
++		 * throughput in some special case, BFQ needs to know
++		 * which is the next candidate entity to serve, while
++		 * there is already an entity in service. In this
++		 * respect, to make it easy to compute/update the next
++		 * candidate entity to serve after the current
++		 * candidate has been set in service, there is a case
++		 * where it is necessary to extract the current
++		 * candidate from its service tree. Such a case is
++		 * when the entity just set in service cannot be also
++		 * a candidate for next service. Details about when
++		 * this conditions holds are reported in the comments
++		 * on the function bfq_no_longer_next_in_service()
++		 * invoked below.
++		 */
++		if (bfq_no_longer_next_in_service(entity))
++			bfq_active_extract(bfq_entity_service_tree(entity),
++					   entity);
++
++		/*
++		 * For the same reason why we may have just extracted
++		 * entity from its active tree, we may need to update
++		 * next_in_service for the sched_data of entity too,
++		 * regardless of whether entity has been extracted.
++		 * In fact, even if entity has not been extracted, a
++		 * descendant entity may get extracted. Such an event
++		 * would cause a change in next_in_service for the
++		 * level of the descendant entity, and thus possibly
++		 * back to upper levels.
++		 *
++		 * We cannot perform the resulting needed update
++		 * before the end of this loop, because, to know which
++		 * is the correct next-to-serve candidate entity for
++		 * each level, we need first to find the leaf entity
++		 * to set in service. In fact, only after we know
++		 * which is the next-to-serve leaf entity, we can
++		 * discover whether the parent entity of the leaf
++		 * entity becomes the next-to-serve, and so on.
++		 */
+ 
++		/* Log some information */
+ 		bfqq = bfq_entity_to_bfqq(entity);
+ 		if (bfqq)
+ 			bfq_log_bfqq(bfqd, bfqq,
+@@ -1401,13 +1580,23 @@ static struct bfq_queue *bfq_get_next_queue(struct bfq_data *bfqd)
+ 		}
+ #endif
+ 
+-		BUG_ON(!entity);
+-		entity->service = 0;
+ 	}
+ 
++	BUG_ON(!entity);
+ 	bfqq = bfq_entity_to_bfqq(entity);
+ 	BUG_ON(!bfqq);
+ 
++	/*
++	 * We can finally update all next-to-serve entities along the
++	 * path from the leaf entity just set in service to the root.
++	 */
++	for_each_entity(entity) {
++		struct bfq_sched_data *sd = entity->sched_data;
++
++		if(!bfq_update_next_in_service(sd))
++			break;
++	}
++
+ 	return bfqq;
+ }
+ 
+diff --git a/block/bfq.h b/block/bfq.h
+index b80abe0..6b28030 100644
+--- a/block/bfq.h
++++ b/block/bfq.h
+@@ -88,6 +88,9 @@ struct bfq_sched_data {
+ 	struct bfq_entity *next_in_service;
+ 	/* array of service trees, one per ioprio_class */
+ 	struct bfq_service_tree service_tree[BFQ_IOPRIO_CLASSES];
++	/* last time CLASS_IDLE was served */
++	unsigned long bfq_class_idle_last_service;
++
+ };
+ 
+ /**
+@@ -487,8 +490,6 @@ struct bfq_data {
+ 	unsigned int bfq_back_max;
+ 	/* maximum idling time */
+ 	u32 bfq_slice_idle;
+-	/* last time CLASS_IDLE was served */
+-	u64 bfq_class_idle_last_service;
+ 
+ 	/* user-configured max budget value (0 for auto-tuning) */
+ 	int bfq_user_max_budget;
+
+From b5734696e18577dda6ef6571af7cb3718937d2f5 Mon Sep 17 00:00:00 2001
+From: Paolo Valente <paolo.valente@linaro.org>
+Date: Wed, 14 Dec 2016 17:56:25 +0100
+Subject: [PATCH 16/24] BFQ v8r6-rc1
+
+Signed-off-by: Paolo Valente <paolo.valente@linaro.org>
+---
+ block/bfq-iosched.c | 2 +-
+ block/bfq.h         | 2 +-
+ 2 files changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
+index 2d00bd10..72443bd 100644
+--- a/block/bfq-iosched.c
++++ b/block/bfq-iosched.c
+@@ -5218,7 +5218,7 @@ static struct blkcg_policy blkcg_policy_bfq = {
+ static int __init bfq_init(void)
+ {
+ 	int ret;
+-	char msg[50] = "BFQ I/O-scheduler: v8r5";
++	char msg[60] = "BFQ I/O-scheduler: v8r6-rc1";
+ 
+ #ifdef CONFIG_BFQ_GROUP_IOSCHED
+ 	ret = blkcg_policy_register(&blkcg_policy_bfq);
+diff --git a/block/bfq.h b/block/bfq.h
+index 6b28030..d4d784f 100644
+--- a/block/bfq.h
++++ b/block/bfq.h
+@@ -1,5 +1,5 @@
+ /*
+- * BFQ-v8r5 for 4.8.0: data structures and common functions prototypes.
++ * BFQ v8r6-rc1 for 4.9.0: data structures and common functions prototypes.
+  *
+  * Based on ideas and code from CFQ:
+  * Copyright (C) 2003 Jens Axboe <axboe@kernel.dk>
+
+From a7c98629904dcc103d676c2b8e2ceaff4ca19ca2 Mon Sep 17 00:00:00 2001
+From: Paolo Valente <paolo.valente@linaro.org>
+Date: Wed, 14 Dec 2016 18:07:41 +0100
+Subject: [PATCH 17/24] Port and improve CFQ commit 41647e7a
+
+BFQ currently used the same logic for detecting seeky queues for
+for rotational disks and SSDs. This logic is appropriate for the
+former, as it takes into account only inter-request distance, and
+this is the dominant latency factor on a rotational device. Yet
+things change with flash-based devices, where servign a large
+requests still yields a high throughput, even the request is far
+from the previous request served. This commits extends seeky
+detection to take into accoutn also this fact with flash-based
+devices. In particular, this commit is an improved port of the
+original commit 41647e7a for CFQ.
+
+Signed-off-by: Paolo Valente <paolo.valente@linaro.org>
+---
+ block/bfq-iosched.c | 5 ++++-
+ 1 file changed, 4 insertions(+), 1 deletion(-)
+
+diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
+index 72443bd..153292f 100644
+--- a/block/bfq-iosched.c
++++ b/block/bfq-iosched.c
+@@ -117,6 +117,7 @@ struct kmem_cache *bfq_pool;
+ #define BFQ_HW_QUEUE_SAMPLES	32
+ 
+ #define BFQQ_SEEK_THR		(sector_t)(8 * 100)
++#define BFQQ_SECT_THR_NONROT	(sector_t)(2 * 32)
+ #define BFQQ_CLOSE_THR		(sector_t)(8 * 1024)
+ #define BFQQ_SEEKY(bfqq)	(hweight32(bfqq->seek_history) > 32/8)
+ 
+@@ -4117,7 +4118,9 @@ bfq_update_io_seektime(struct bfq_data *bfqd, struct bfq_queue *bfqq,
+ {
+ 	bfqq->seek_history <<= 1;
+ 	bfqq->seek_history |=
+-		get_sdist(bfqq->last_request_pos, rq) > BFQQ_SEEK_THR;
++		get_sdist(bfqq->last_request_pos, rq) > BFQQ_SEEK_THR &&
++		(!blk_queue_nonrot(bfqd->queue) ||
++		 blk_rq_sectors(rq) < BFQQ_SECT_THR_NONROT);
+ }
+ 
+ /*
+
+From 5cacb798d523b9fa7e394ca3a369a0c196e0b022 Mon Sep 17 00:00:00 2001
+From: Paolo Valente <paolo.valente@linaro.org>
+Date: Thu, 15 Dec 2016 15:57:22 +0100
+Subject: [PATCH 18/24] Provide more details in switching-off-wr message
+
+Signed-off-by: Paolo Valente <paolo.valente@linaro.org>
+---
+ block/bfq-iosched.c | 4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
+index 153292f..a5853c7 100644
+--- a/block/bfq-iosched.c
++++ b/block/bfq-iosched.c
+@@ -656,7 +656,9 @@ bfq_bfqq_resume_state(struct bfq_queue *bfqq, struct bfq_io_cq *bic)
+ 	    time_is_before_jiffies(bfqq->last_wr_start_finish +
+ 				   bfqq->wr_cur_max_time))) {
+ 		bfq_log_bfqq(bfqq->bfqd, bfqq,
+-		    "resume state: switching off wr");
++			     "resume state: switching off wr (%u + %u < %u)",
++			     bfqq->last_wr_start_finish, bfqq->wr_cur_max_time,
++			     jiffies);
+ 
+ 		bfqq->wr_coeff = 1;
+ 	}
+
+From 6f62391c914a25818f839b932a9e1b16ee28c1f1 Mon Sep 17 00:00:00 2001
+From: Paolo Valente <paolo.valente@linaro.org>
+Date: Thu, 15 Dec 2016 16:10:17 +0100
+Subject: [PATCH 19/24] Remove useless parameter from bfq_del_bfqq_busy
+
+Signed-off-by: Paolo Valente <paolo.valente@linaro.org>
+---
+ block/bfq-iosched.c | 6 +++---
+ block/bfq-sched.c   | 5 ++---
+ 2 files changed, 5 insertions(+), 6 deletions(-)
+
+diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
+index a5853c7..0d54cc1 100644
+--- a/block/bfq-iosched.c
++++ b/block/bfq-iosched.c
+@@ -656,7 +656,7 @@ bfq_bfqq_resume_state(struct bfq_queue *bfqq, struct bfq_io_cq *bic)
+ 	    time_is_before_jiffies(bfqq->last_wr_start_finish +
+ 				   bfqq->wr_cur_max_time))) {
+ 		bfq_log_bfqq(bfqq->bfqd, bfqq,
+-			     "resume state: switching off wr (%u + %u < %u)",
++			     "resume state: switching off wr (%lu + %lu < %lu)",
+ 			     bfqq->last_wr_start_finish, bfqq->wr_cur_max_time,
+ 			     jiffies);
+ 
+@@ -1525,7 +1525,7 @@ static void bfq_remove_request(struct request *rq)
+ 		BUG_ON(bfqq->entity.budget < 0);
+ 
+ 		if (bfq_bfqq_busy(bfqq) && bfqq != bfqd->in_service_queue) {
+-			bfq_del_bfqq_busy(bfqd, bfqq, 1);
++			bfq_del_bfqq_busy(bfqd, bfqq);
+ 
+ 			/* bfqq emptied. In normal operation, when
+ 			 * bfqq is empty, bfqq->entity.service and
+@@ -2662,7 +2662,7 @@ static void __bfq_bfqq_expire(struct bfq_data *bfqd, struct bfq_queue *bfqq)
+ 			 */
+ 			bfqq->budget_timeout = jiffies;
+ 
+-		bfq_del_bfqq_busy(bfqd, bfqq, 1);
++		bfq_del_bfqq_busy(bfqd, bfqq);
+ 	} else {
+ 		bfq_activate_bfqq(bfqd, bfqq);
+ 		/*
+diff --git a/block/bfq-sched.c b/block/bfq-sched.c
+index 2f54ac5..fae28ee 100644
+--- a/block/bfq-sched.c
++++ b/block/bfq-sched.c
+@@ -1635,8 +1635,7 @@ static void bfqg_stats_update_dequeue(struct bfq_group *bfqg);
+  * Called when the bfqq no longer has requests pending, remove it from
+  * the service tree.
+  */
+-static void bfq_del_bfqq_busy(struct bfq_data *bfqd, struct bfq_queue *bfqq,
+-			      int requeue)
++static void bfq_del_bfqq_busy(struct bfq_data *bfqd, struct bfq_queue *bfqq)
+ {
+ 	BUG_ON(!bfq_bfqq_busy(bfqq));
+ 	BUG_ON(!RB_EMPTY_ROOT(&bfqq->sort_list));
+@@ -1660,7 +1659,7 @@ static void bfq_del_bfqq_busy(struct bfq_data *bfqd, struct bfq_queue *bfqq,
+ 
+ 	BUG_ON(bfqq->entity.budget < 0);
+ 
+-	bfq_deactivate_bfqq(bfqd, bfqq, requeue);
++	bfq_deactivate_bfqq(bfqd, bfqq, 1);
+ 
+ 	BUG_ON(bfqq->entity.budget < 0);
+ }
+
+From 1469db5fec95e3bfad0fe60dc1a843b0c5de4efa Mon Sep 17 00:00:00 2001
+From: Paolo Valente <paolo.valente@linaro.org>
+Date: Thu, 15 Dec 2016 16:40:09 +0100
+Subject: [PATCH 20/24] Further improve code and comments for hierarchical code
+
+Signed-off-by: Paolo Valente <paolo.valente@linaro.org>
+---
+ block/bfq-cgroup.c  |  6 +++---
+ block/bfq-iosched.c |  2 +-
+ block/bfq-sched.c   | 45 ++++++++++++++++++++++++++++++---------------
+ 3 files changed, 34 insertions(+), 19 deletions(-)
+
+diff --git a/block/bfq-cgroup.c b/block/bfq-cgroup.c
+index a04bc40..bd31da5 100644
+--- a/block/bfq-cgroup.c
++++ b/block/bfq-cgroup.c
+@@ -552,7 +552,7 @@ static void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq,
+ 	BUG_ON(RB_EMPTY_ROOT(&bfqq->sort_list) && bfq_bfqq_busy(bfqq));
+ 
+ 	if (bfq_bfqq_busy(bfqq))
+-		bfq_deactivate_bfqq(bfqd, bfqq, 0);
++		bfq_deactivate_bfqq(bfqd, bfqq, false);
+ 	else if (entity->on_st) {
+ 		BUG_ON(&bfq_entity_service_tree(entity)->idle !=
+ 		       entity->tree);
+@@ -664,7 +664,7 @@ static void bfq_flush_idle_tree(struct bfq_service_tree *st)
+ 	struct bfq_entity *entity = st->first_idle;
+ 
+ 	for (; entity ; entity = st->first_idle)
+-		__bfq_deactivate_entity(entity, 0);
++		__bfq_deactivate_entity(entity, false);
+ }
+ 
+ /**
+@@ -769,7 +769,7 @@ static void bfq_pd_offline(struct blkg_policy_data *pd)
+ 	BUG_ON(bfqg->sched_data.next_in_service);
+ 	BUG_ON(bfqg->sched_data.in_service_entity);
+ 
+-	__bfq_deactivate_entity(entity, 0);
++	__bfq_deactivate_entity(entity, false);
+ 	bfq_put_async_queues(bfqd, bfqg);
+ 	BUG_ON(entity->tree);
+ 
+diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
+index 0d54cc1..7a56603 100644
+--- a/block/bfq-iosched.c
++++ b/block/bfq-iosched.c
+@@ -4738,7 +4738,7 @@ static void bfq_exit_queue(struct elevator_queue *e)
+ 
+ 	BUG_ON(bfqd->in_service_queue);
+ 	list_for_each_entry_safe(bfqq, n, &bfqd->idle_list, bfqq_list)
+-		bfq_deactivate_bfqq(bfqd, bfqq, 0);
++		bfq_deactivate_bfqq(bfqd, bfqq, false);
+ 
+ 	spin_unlock_irq(q->queue_lock);
+ 
+diff --git a/block/bfq-sched.c b/block/bfq-sched.c
+index fae28ee..52911cc 100644
+--- a/block/bfq-sched.c
++++ b/block/bfq-sched.c
+@@ -841,9 +841,9 @@ static void __bfq_activate_entity(struct bfq_entity *entity,
+ 		 * We are requeueing the current in-service entity,
+ 		 * because of the requeueing of a just-expired
+ 		 * non-idle leaf entity in the path originating from
+-		 * this entity. In fact, in this case, then all
+-		 * entities in this path need to be requeued again for
+-		 * next service.
++		 * this entity. In fact, in this case, all entities in
++		 * this path need to be requeued again for next
++		 * service.
+ 		 *
+ 		 * Before requeueing, the start time of the entity
+ 		 * must be moved forward to account for the service
+@@ -882,7 +882,7 @@ static void __bfq_activate_entity(struct bfq_entity *entity,
+ 		 * implies that the finish time of this entity must be
+ 		 * updated. Such an update may cause the scheduling,
+ 		 * i.e., the position in the active tree, of this
+-		 * entity to change. We handle this fact by: 1)
++		 * entity to change. We handle this change by: 1)
+ 		 * dequeueing the entity here, 2) updating the finish
+ 		 * time and requeueing the entity according to the new
+ 		 * timestamps below. This is the same approach as the
+@@ -1054,7 +1054,7 @@ static void bfq_activate_entity(struct bfq_entity *entity,
+  * if the entity was in service or if it was the next_in_service for
+  * its sched_data; return %false otherwise.
+  */
+-static bool __bfq_deactivate_entity(struct bfq_entity *entity, int requeue)
++static bool __bfq_deactivate_entity(struct bfq_entity *entity, bool requeue)
+ {
+ 	struct bfq_sched_data *sd = entity->sched_data;
+ 	struct bfq_service_tree *st;
+@@ -1118,26 +1118,41 @@ static void bfq_deactivate_entity(struct bfq_entity *entity, int requeue)
+ 		if (sd->next_in_service)
+ 			/*
+ 			 * The parent entity is still backlogged,
+-			 * because next_in_service is not NULL, and
++			 * because next_in_service is not NULL. So, no
++			 * upwards deactivation is needed.  Yet,
+ 			 * next_in_service has been updated (see
+-			 * comment on the body of the above if):
+-			 * upwards update of the schedule is needed.
++			 * comment on the body of the above if).  Then
++			 * the schedule nees to be updated upwards.
+ 			 */
+-			goto update;
++			goto update_schedule;
+ 
+ 		/*
+-		 * If we get here, then the parent is no more backlogged and
+-		 * we want to propagate the deactivation upwards.
++		 * If we get here, then the parent is no more
++		 * backlogged and we need to propagate the
++		 * deactivation upwards. Then let the loop go on.
+ 		 */
+-		requeue = 1;
++
++		/*
++		 * Also let parent be queued into the idle tree on
++		 * deactivation, to preserve service guarantees, and
++		 * assuming that who invoked this function does not
++		 * need parent entities too to be removed by any tree.
++		 */
++		requeue = true;
+ 	}
+ 
+ 	return;
+ 
+-update:
++update_schedule:
+ 	entity = parent;
+ 	for_each_entity(entity) {
+ 		struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
++		/*
++		 * Invoke __bfq_activate_entity on entity, even if
++		 * already active, to update its position in the
++		 * active tree (because sd->next_in_service has
++		 * changed)
++		 */
+ 		__bfq_activate_entity(entity, false);
+ 
+ 		sd = entity->sched_data;
+@@ -1613,7 +1628,7 @@ static void __bfq_bfqd_reset_in_service(struct bfq_data *bfqd)
+ }
+ 
+ static void bfq_deactivate_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq,
+-				int requeue)
++				bool requeue)
+ {
+ 	struct bfq_entity *entity = &bfqq->entity;
+ 
+@@ -1659,7 +1674,7 @@ static void bfq_del_bfqq_busy(struct bfq_data *bfqd, struct bfq_queue *bfqq)
+ 
+ 	BUG_ON(bfqq->entity.budget < 0);
+ 
+-	bfq_deactivate_bfqq(bfqd, bfqq, 1);
++	bfq_deactivate_bfqq(bfqd, bfqq, true);
+ 
+ 	BUG_ON(bfqq->entity.budget < 0);
+ }
+
+From d12d41bee45b988e1ae4f8dcece4bcecb5ad74a9 Mon Sep 17 00:00:00 2001
+From: Paolo Valente <paolo.valente@linaro.org>
+Date: Thu, 15 Dec 2016 22:11:39 +0100
+Subject: [PATCH 21/24] Optimize update of next_in_service entity
+
+If the update of the next_in_service candidate entity is triggered by
+the activation of an entity, then it is not necessary to perform full
+lookups in the active trees to update next_in_service. In fact, it is
+enough to check whether the just-activated entity has a higher
+priority of next_in_service, or, even if it has the same priority as
+next_in_service, is eligible and has a lower virtual finish time than
+next_in_service. If this compound condition holds, then the new entity
+becomes the new next_in_service. Otherwise no change is needed. This
+commit implements this optimization.
+
+Signed-off-by: Paolo Valente <paolo.valente@linaro.org>
+---
+ block/bfq-sched.c | 167 ++++++++++++++++++++++++++++++++++++++++--------------
+ block/bfq.h       |  11 +++-
+ 2 files changed, 134 insertions(+), 44 deletions(-)
+
+diff --git a/block/bfq-sched.c b/block/bfq-sched.c
+index 52911cc..09e494b 100644
+--- a/block/bfq-sched.c
++++ b/block/bfq-sched.c
+@@ -14,6 +14,18 @@
+ 
+ static struct bfq_group *bfqq_group(struct bfq_queue *bfqq);
+ 
++/**
++ * bfq_gt - compare two timestamps.
++ * @a: first ts.
++ * @b: second ts.
++ *
++ * Return @a > @b, dealing with wrapping correctly.
++ */
++static int bfq_gt(u64 a, u64 b)
++{
++	return (s64)(a - b) > 0;
++}
++
+ #ifdef CONFIG_BFQ_GROUP_IOSCHED
+ /* both next loops stop at one of the child entities of the root group */
+ #define for_each_entity(entity)				\
+@@ -46,18 +58,99 @@ static void bfq_update_budget(struct bfq_entity *next_in_service)
+ 		bfqg_entity->budget = next_in_service->budget;
+ }
+ 
+-/*
+- * Incomplete version that always returns true. It will return also
+- * false in its complete version, in case either next_in_service has
+- * not changed, or next_in_service has changed but in a way that will
+- * not influence upper levels.
++static struct bfq_entity *bfq_root_active_entity(struct rb_root *tree)
++{
++	struct rb_node *node = tree->rb_node;
++
++	return rb_entry(node, struct bfq_entity, rb_node);
++}
++
++/**
++ * bfq_update_next_in_service - update sd->next_in_service
++ * @sd: sched_data for which to perform the update.
++ * @new_entity: if not NULL, pointer to the entity whose activation
++ *		triggered the invocation of this function.
++ *
++ * This function is called to update sd->next_in_service, which, in
++ * its turn, may change as a consequence of the insertion or
++ * extraction of an entity into/from one of the active trees of
++ * sd. These insertions/extractions occur as a consequence of
++ * activations/deactivations of entities, with some activations being
++ * 'true' activations, and other activations being requeueings (i.e.,
++ * implementing the second, requeueing phase of the mechanism used to
++ * reposition an entity in its active tree; see comments on
++ * __bfq_activate_entity for details). In both the last two activation
++ * sub-cases, new_entity points to the just activated or requeued
++ * entity.
++ *
++ * This is a still incomplete version of this function, which always
++ * returns true. It will return also false in its complete version, in
++ * case either next_in_service has not changed, or next_in_service has
++ * changed but in a way that will not influence upper levels.
+  */
+-static bool bfq_update_next_in_service(struct bfq_sched_data *sd)
++static bool bfq_update_next_in_service(struct bfq_sched_data *sd,
++				       struct bfq_entity *new_entity)
+ {
+-	struct bfq_entity *next_in_service;
++	struct bfq_entity *next_in_service = sd->next_in_service;
+ 	struct bfq_queue *bfqq;
+ 
+-	next_in_service = bfq_lookup_next_entity(sd);
++	/*
++	 * If this update is triggered by the activation of a new
++	 * entity, then a full lookup in the active tree can be
++	 * avoided. In fact, it is enough to check whether the
++	 * just-activated entity has a higher priority of
++	 * sd->next_in_service, or, even if it has the same priority
++	 * as sd->next_in_service, is eligible and has a lower virtual
++	 * finish time than sd->next_in_service. If this compound
++	 * condition holds, then the new entity becomes the new
++	 * next_in_service. Otherwise no change is needed.
++	 */
++	if (new_entity) {
++		/*
++		 * Flag used to decide whether to replace
++		 * sd->next_in_service with new_entity. Tentatively
++		 * set to true, and left as true if
++		 * sd->next_in_service is NULL.
++		 */
++		bool replace_next = true;
++
++		if (new_entity == sd->next_in_service)
++			return false;
++
++		/*
++		 * If there is already a next_in_service candidate
++		 * entity, then compare class priorities or timestamps
++		 * to decide whether to replace sd->service_tree with
++		 * new_entity.
++		 */
++		if (next_in_service) {
++			unsigned int new_entity_class_idx =
++				bfq_class_idx(new_entity);
++			struct bfq_service_tree *st =
++				sd->service_tree + new_entity_class_idx;
++
++			/*
++			 * For efficiency, evaluate the most likely
++			 * sub-condition first.
++			 */
++			replace_next =
++				(new_entity_class_idx ==
++				 bfq_class_idx(next_in_service)
++				 &&
++				 !bfq_gt(new_entity->start, st->vtime)
++				 &&
++				 bfq_gt(next_in_service->finish,
++					new_entity->finish))
++				||
++				new_entity_class_idx <
++				bfq_class_idx(next_in_service);
++		}
++
++		if (replace_next)
++			next_in_service = new_entity;
++	} else /* invoked because of a deactivation: lookup needed */
++		next_in_service = bfq_lookup_next_entity(sd);
++
+ 	sd->next_in_service = next_in_service;
+ 
+ 	if (next_in_service)
+@@ -109,18 +202,6 @@ static void bfq_update_budget(struct bfq_entity *next_in_service)
+  */
+ #define WFQ_SERVICE_SHIFT	22
+ 
+-/**
+- * bfq_gt - compare two timestamps.
+- * @a: first ts.
+- * @b: second ts.
+- *
+- * Return @a > @b, dealing with wrapping correctly.
+- */
+-static int bfq_gt(u64 a, u64 b)
+-{
+-	return (s64)(a - b) > 0;
+-}
+-
+ static struct bfq_queue *bfq_entity_to_bfqq(struct bfq_entity *entity)
+ {
+ 	struct bfq_queue *bfqq = NULL;
+@@ -816,15 +897,18 @@ static void bfq_bfqq_charge_time(struct bfq_data *bfqd, struct bfq_queue *bfqq,
+ }
+ 
+ /**
+- * __bfq_activate_entity - activate an entity.
+- * @entity: the entity being activated.
+- * @non_blocking_wait_rq: true if this entity was waiting for a request
++ * __bfq_activate_entity - handle activation or requeueing of an entity.
++ * @entity: the entity being activated or requeued.
++ * @non_blocking_wait_rq: true if entity was waiting for a request
+  *
+- * Called whenever an entity is activated, i.e., it is not active and one
+- * of its children receives a new request, or has to be reactivated due to
+- * budget exhaustion.  It uses the current budget of the entity (and the
+- * service received if @entity is active) of the queue to calculate its
+- * timestamps.
++ * Called whenever an entity is activated, which happens either if the
++ * entity is not active and one of its children receives a new request
++ * (true activation), or if the entity needs to be repositioned in its
++ * active tree (requeueing, as explained in detail in the comments
++ * inside the function).
++ *
++ * Basically, this function updates the timestamps of entity and
++ * (re)inserts entity into its active tree.
+  */
+ static void __bfq_activate_entity(struct bfq_entity *entity,
+ 				  bool non_blocking_wait_rq)
+@@ -1027,7 +1111,7 @@ static void bfq_activate_entity(struct bfq_entity *entity,
+ 		       RB_EMPTY_ROOT(&(sd->service_tree+1)->active) &&
+ 		       RB_EMPTY_ROOT(&(sd->service_tree+2)->active));
+ 
+-		if (!bfq_update_next_in_service(sd)) {
++		if (!bfq_update_next_in_service(sd, entity)) {
+ 			BUG_ON(!sd->next_in_service);
+ 			/*
+ 			 * No need to propagate the activation to the
+@@ -1081,8 +1165,8 @@ static bool __bfq_deactivate_entity(struct bfq_entity *entity, bool requeue)
+ 	else if (entity->tree)
+ 		BUG();
+ 
+-	if (was_in_service || sd->next_in_service == entity)
+-		ret = bfq_update_next_in_service(sd);
++	if (sd->next_in_service == entity)
++		ret = bfq_update_next_in_service(sd, NULL);
+ 
+ 	if (!requeue || !bfq_gt(entity->finish, st->vtime))
+ 		bfq_forget_entity(st, entity);
+@@ -1169,7 +1253,7 @@ static void bfq_deactivate_entity(struct bfq_entity *entity, int requeue)
+ 				     "invoking udpdate_next for this entity");
+ 		}
+ #endif
+-		if (!bfq_update_next_in_service(sd))
++		if (!bfq_update_next_in_service(sd, entity))
+ 			break;
+ 	}
+ }
+@@ -1183,28 +1267,27 @@ static void bfq_deactivate_entity(struct bfq_entity *entity, int requeue)
+  */
+ static u64 bfq_calc_vtime_jump(struct bfq_service_tree *st)
+ {
+-	struct bfq_entity *entry;
+-	struct rb_node *node = st->active.rb_node;
++	struct bfq_entity *root_entity = bfq_root_active_entity(&st->active);
+ 
+-	entry = rb_entry(node, struct bfq_entity, rb_node);
+-	if (bfq_gt(entry->min_start, st->vtime)) {
+-		struct bfq_queue *bfqq = bfq_entity_to_bfqq(entry);
++	if (bfq_gt(root_entity->min_start, st->vtime)) {
++		struct bfq_queue *bfqq = bfq_entity_to_bfqq(root_entity);
+ 
+ 		if (bfqq)
+ 			bfq_log_bfqq(bfqq->bfqd, bfqq,
+ 				     "calc_vtime_jump: new value %llu",
+-				     entry->min_start);
++				     root_entity->min_start);
+ #ifdef CONFIG_BFQ_GROUP_IOSCHED
+ 		else {
+ 			struct bfq_group *bfqg =
+-				container_of(entry, struct bfq_group, entity);
++				container_of(root_entity, struct bfq_group,
++					     entity);
+ 
+ 			bfq_log_bfqg((struct bfq_data *)bfqg->bfqd, bfqg,
+ 				     "calc_vtime_jump: new value %llu",
+-				     entry->min_start);
++				     root_entity->min_start);
+ 		}
+ #endif
+-		return entry->min_start;
++		return root_entity->min_start;
+ 	}
+ 	return st->vtime;
+ }
+@@ -1608,7 +1691,7 @@ static struct bfq_queue *bfq_get_next_queue(struct bfq_data *bfqd)
+ 	for_each_entity(entity) {
+ 		struct bfq_sched_data *sd = entity->sched_data;
+ 
+-		if(!bfq_update_next_in_service(sd))
++		if(!bfq_update_next_in_service(sd, NULL))
+ 			break;
+ 	}
+ 
+diff --git a/block/bfq.h b/block/bfq.h
+index d4d784f..9c68d8b 100644
+--- a/block/bfq.h
++++ b/block/bfq.h
+@@ -806,13 +806,20 @@ struct bfq_group {
+ 
+ static struct bfq_queue *bfq_entity_to_bfqq(struct bfq_entity *entity);
+ 
++static unsigned int bfq_class_idx(struct bfq_entity *entity)
++{
++	struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
++
++	return bfqq ? bfqq->ioprio_class - 1 :
++		BFQ_DEFAULT_GRP_CLASS - 1;
++}
++
+ static struct bfq_service_tree *
+ bfq_entity_service_tree(struct bfq_entity *entity)
+ {
+ 	struct bfq_sched_data *sched_data = entity->sched_data;
+ 	struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
+-	unsigned int idx = bfqq ? bfqq->ioprio_class - 1 :
+-				  BFQ_DEFAULT_GRP_CLASS - 1;
++	unsigned int idx = bfq_class_idx(entity);
+ 
+ 	BUG_ON(idx >= BFQ_IOPRIO_CLASSES);
+ 	BUG_ON(sched_data == NULL);
+
+From 634414378918d47830a12f2c0e0a6c526d227502 Mon Sep 17 00:00:00 2001
+From: Paolo Valente <paolo.valente@linaro.org>
+Date: Thu, 15 Dec 2016 22:15:42 +0100
+Subject: [PATCH 22/24] Fix bug causing occasional loss of weight raising
+
+When a bfq_queue, say bfqq, is split after a merging with another
+bfq_queue, BFQ checks whether it has to restore for bfqq the
+weight-raising state that bfqq had before being merged.  In
+particular, the weight-raising is restored only if, according to the
+weight-raising duration decided for bfqq when it started to be
+weight-raised (before being merged), bfqq would not have already
+finished its weight-raising period.
+
+Yet, by mistake, such a duration is not saved when bfqq is merged. So,
+if bfqq is freed and reallocated when it is split, then this duration
+is wrongly set to zero on the split. As a consequence, the
+weight-raising state of bfqq is wrongly not restored, which causes BFQ
+to fail in guaranteeing a low latency to bfqq.
+
+This commit fixes this bug by saving weight-raising duration when bfqq
+is merged, and correctly restoring it when bfqq is split.
+
+Signed-off-by: Paolo Valente <paolo.valente@linaro.org>
+---
+ block/bfq-iosched.c | 2 ++
+ block/bfq.h         | 1 +
+ 2 files changed, 3 insertions(+)
+
+diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
+index 7a56603..7aa3d55 100644
+--- a/block/bfq-iosched.c
++++ b/block/bfq-iosched.c
+@@ -650,6 +650,7 @@ bfq_bfqq_resume_state(struct bfq_queue *bfqq, struct bfq_io_cq *bic)
+ 	bfqq->wr_start_at_switch_to_srt = bic->saved_wr_start_at_switch_to_srt;
+ 	BUG_ON(time_is_after_jiffies(bfqq->wr_start_at_switch_to_srt));
+ 	bfqq->last_wr_start_finish = bic->saved_last_wr_start_finish;
++	bfqq->wr_cur_max_time = bic->saved_wr_cur_max_time;
+ 	BUG_ON(time_is_after_jiffies(bfqq->last_wr_start_finish));
+ 
+ 	if (bfqq->wr_coeff > 1 && (bfq_bfqq_in_large_burst(bfqq) ||
+@@ -1988,6 +1989,7 @@ static void bfq_bfqq_save_state(struct bfq_queue *bfqq)
+ 	bic->saved_wr_coeff = bfqq->wr_coeff;
+ 	bic->saved_wr_start_at_switch_to_srt = bfqq->wr_start_at_switch_to_srt;
+ 	bic->saved_last_wr_start_finish = bfqq->last_wr_start_finish;
++	bic->saved_wr_cur_max_time = bfqq->wr_cur_max_time;
+ 	BUG_ON(time_is_after_jiffies(bfqq->last_wr_start_finish));
+ }
+ 
+diff --git a/block/bfq.h b/block/bfq.h
+index 9c68d8b..1a03521 100644
+--- a/block/bfq.h
++++ b/block/bfq.h
+@@ -366,6 +366,7 @@ struct bfq_io_cq {
+ 	unsigned long saved_wr_coeff;
+ 	unsigned long saved_last_wr_start_finish;
+ 	unsigned long saved_wr_start_at_switch_to_srt;
++	unsigned int saved_wr_cur_max_time;
+ };
+ 
+ enum bfq_device_speed {
+
+From cb826b5dbc9fcf524cd672857bcbcb68c6aa7236 Mon Sep 17 00:00:00 2001
+From: Paolo Valente <paolo.valente@linaro.org>
+Date: Fri, 16 Dec 2016 17:57:00 +0100
+Subject: [PATCH 23/24] Fix wrong reset of in-service entities in commit
+ 1430feb8
+
+In-service entities were reset with an indirect logic, which
+happened to be even buggy for some cases. This commit fixes
+this bug by replacing this indirect logic with a direct logic,
+in which all involved entities are immediately reset, with a
+bubble-up loop, when the in-service queue is reset.
+
+Signed-off-by: Paolo Valente <paolo.valente@linaro.org>
+---
+ block/bfq-iosched.c |  5 ++---
+ block/bfq-sched.c   | 45 +++++++++++++++++++++++++++++----------------
+ 2 files changed, 31 insertions(+), 19 deletions(-)
+
+diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
+index 7aa3d55..4d70d5e 100644
+--- a/block/bfq-iosched.c
++++ b/block/bfq-iosched.c
+@@ -2643,8 +2643,6 @@ static void __bfq_bfqq_expire(struct bfq_data *bfqd, struct bfq_queue *bfqq)
+ {
+ 	BUG_ON(bfqq != bfqd->in_service_queue);
+ 
+-	__bfq_bfqd_reset_in_service(bfqd);
+-
+ 	/*
+ 	 * If this bfqq is shared between multiple processes, check
+ 	 * to make sure that those processes are still issuing I/Os
+@@ -2672,6 +2670,8 @@ static void __bfq_bfqq_expire(struct bfq_data *bfqd, struct bfq_queue *bfqq)
+ 		 */
+ 		bfq_pos_tree_add_move(bfqd, bfqq);
+ 	}
++
++	__bfq_bfqd_reset_in_service(bfqd);
+ }
+ 
+ /**
+@@ -3819,7 +3819,6 @@ static void bfq_put_queue(struct bfq_queue *bfqq)
+ 	BUG_ON(bfqq->allocated[READ] + bfqq->allocated[WRITE] != 0);
+ 	BUG_ON(bfqq->entity.tree);
+ 	BUG_ON(bfq_bfqq_busy(bfqq));
+-	BUG_ON(bfqq->bfqd->in_service_queue == bfqq);
+ 
+ 	if (bfq_bfqq_sync(bfqq))
+ 		/*
+diff --git a/block/bfq-sched.c b/block/bfq-sched.c
+index 09e494b..d9e63712 100644
+--- a/block/bfq-sched.c
++++ b/block/bfq-sched.c
+@@ -923,22 +923,28 @@ static void __bfq_activate_entity(struct bfq_entity *entity,
+ 	if (entity == sd->in_service_entity) {
+ 		/*
+ 		 * We are requeueing the current in-service entity,
+-		 * because of the requeueing of a just-expired
+-		 * non-idle leaf entity in the path originating from
+-		 * this entity. In fact, in this case, all entities in
+-		 * this path need to be requeued again for next
+-		 * service.
++		 * which may have to be done for one of the following
++		 * reasons:
++		 * - entity represents the in-service queue, and the
++		 *   in-service queue is being requeued after an
++		 *   expiration;
++		 * - entity represents a group, and its budget has
++		 *   changed because one of its child entities has
++		 *   just been either activated or requeued for some
++		 *   reason; the timestamps of the entity need then to
++		 *   be updated, and the entity needs to be enqueued
++		 *   or repositioned accordingly.
+ 		 *
+-		 * Before requeueing, the start time of the entity
+-		 * must be moved forward to account for the service
+-		 * that the entity has received while in service. The
++		 * In particular, before requeueing, the start time of
++		 * the entity must be moved forward to account for the
++		 * service that the entity has received while in
++		 * service. This is done by the next instructions. The
+ 		 * finish time will then be updated according to this
+ 		 * new value of the start time, and to the budget of
+ 		 * the entity.
+ 		 */
+ 		bfq_calc_finish(entity, entity->service);
+ 		entity->start = entity->finish;
+-		sd->in_service_entity = NULL;
+ 		BUG_ON(entity->tree && entity->tree != &st->active);
+ 		/*
+ 		 * In addition, if the entity had more than one child
+@@ -1001,7 +1007,8 @@ static void __bfq_activate_entity(struct bfq_entity *entity,
+ 			st->wsum += entity->weight;
+ 			bfq_get_entity(entity);
+ 
+-			BUG_ON(entity->on_st);
++			BUG_ON(entity->on_st && bfqq);
++			BUG_ON(entity->on_st && !bfqq);
+ 			entity->on_st = 1;
+ 		}
+ 	}
+@@ -1153,10 +1160,8 @@ static bool __bfq_deactivate_entity(struct bfq_entity *entity, bool requeue)
+ 
+ 	BUG_ON(was_in_service && entity->tree && entity->tree != &st->active);
+ 
+-	if (was_in_service) {
++	if (was_in_service)
+ 		bfq_calc_finish(entity, entity->service);
+-		sd->in_service_entity = NULL;
+-	}
+ 
+ 	if (entity->tree == &st->active)
+ 		bfq_active_extract(st, entity);
+@@ -1173,7 +1178,6 @@ static bool __bfq_deactivate_entity(struct bfq_entity *entity, bool requeue)
+ 	else
+ 		bfq_idle_insert(st, entity);
+ 
+-	BUG_ON(sd->in_service_entity == entity);
+ 	BUG_ON(sd->next_in_service == entity);
+ 
+ 	return ret;
+@@ -1700,6 +1704,8 @@ static struct bfq_queue *bfq_get_next_queue(struct bfq_data *bfqd)
+ 
+ static void __bfq_bfqd_reset_in_service(struct bfq_data *bfqd)
+ {
++	struct bfq_entity *entity = &bfqd->in_service_queue->entity;
++
+ 	if (bfqd->in_service_bic) {
+ 		put_io_context(bfqd->in_service_bic->icq.ioc);
+ 		bfqd->in_service_bic = NULL;
+@@ -1708,6 +1714,10 @@ static void __bfq_bfqd_reset_in_service(struct bfq_data *bfqd)
+ 	bfq_clear_bfqq_wait_request(bfqd->in_service_queue);
+ 	hrtimer_try_to_cancel(&bfqd->idle_slice_timer);
+ 	bfqd->in_service_queue = NULL;
++
++	/* Reset in_service_entity along the path from entity to the root */
++	for_each_entity(entity)
++		entity->sched_data->in_service_entity = NULL;
+ }
+ 
+ static void bfq_deactivate_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq,
+@@ -1715,13 +1725,17 @@ static void bfq_deactivate_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq,
+ {
+ 	struct bfq_entity *entity = &bfqq->entity;
+ 
+-	BUG_ON(bfqq == bfqd->in_service_queue);
+ 	bfq_deactivate_entity(entity, requeue);
+ }
+ 
+ static void bfq_activate_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq)
+ {
+ 	struct bfq_entity *entity = &bfqq->entity;
++	struct bfq_service_tree *st = bfq_entity_service_tree(entity);
++
++	BUG_ON(bfqq != bfqd->in_service_queue &&
++	       entity->tree != &st->active && entity->tree != &st->idle &&
++	       entity->on_st);
+ 
+ 	bfq_activate_entity(entity, bfq_bfqq_non_blocking_wait_rq(bfqq));
+ 	bfq_clear_bfqq_non_blocking_wait_rq(bfqq);
+@@ -1737,7 +1751,6 @@ static void bfq_del_bfqq_busy(struct bfq_data *bfqd, struct bfq_queue *bfqq)
+ {
+ 	BUG_ON(!bfq_bfqq_busy(bfqq));
+ 	BUG_ON(!RB_EMPTY_ROOT(&bfqq->sort_list));
+-	BUG_ON(bfqq == bfqd->in_service_queue);
+ 
+ 	bfq_log_bfqq(bfqd, bfqq, "del from busy");
+ 
+
+From 316c31562f169ccb8752045217aead1688229315 Mon Sep 17 00:00:00 2001
+From: Paolo Valente <paolo.valente@linaro.org>
+Date: Fri, 16 Dec 2016 18:01:52 +0100
+Subject: [PATCH 24/24] Add code to redirect trace log to console
+
+Signed-off-by: Paolo Valente <paolo.valente@linaro.org>
+---
+ block/bfq.h | 38 ++++++++++++++++++++++++++++++++++++++
+ 1 file changed, 38 insertions(+)
+
+diff --git a/block/bfq.h b/block/bfq.h
+index 1a03521..a09804e 100644
+--- a/block/bfq.h
++++ b/block/bfq.h
+@@ -648,6 +648,43 @@ BFQ_BFQQ_FNS(softrt_update);
+ #undef BFQ_BFQQ_FNS
+ 
+ /* Logging facilities. */
++#ifdef CONFIG_BFQ_REDIRECT_TO_CONSOLE
++#ifdef CONFIG_BFQ_GROUP_IOSCHED
++static struct bfq_group *bfqq_group(struct bfq_queue *bfqq);
++static struct blkcg_gq *bfqg_to_blkg(struct bfq_group *bfqg);
++
++#define bfq_log_bfqq(bfqd, bfqq, fmt, args...)	do {			\
++	char __pbuf[128];						\
++									\
++	assert_spin_locked((bfqd)->queue->queue_lock);			\
++	blkg_path(bfqg_to_blkg(bfqq_group(bfqq)), __pbuf, sizeof(__pbuf)); \
++	pr_crit("bfq%d%c %s " fmt "\n", 			\
++		(bfqq)->pid,						\
++		bfq_bfqq_sync((bfqq)) ? 'S' : 'A',			\
++		__pbuf, ##args);					\
++} while (0)
++
++#define bfq_log_bfqg(bfqd, bfqg, fmt, args...)	do {			\
++	char __pbuf[128];						\
++									\
++	blkg_path(bfqg_to_blkg(bfqg), __pbuf, sizeof(__pbuf));		\
++	pr_crit("%s " fmt "\n", __pbuf, ##args);	\
++} while (0)
++
++#else /* CONFIG_BFQ_GROUP_IOSCHED */
++
++#define bfq_log_bfqq(bfqd, bfqq, fmt, args...)		\
++	pr_crit("bfq%d%c " fmt "\n", (bfqq)->pid,		\
++		bfq_bfqq_sync((bfqq)) ? 'S' : 'A',	\
++		##args)
++#define bfq_log_bfqg(bfqd, bfqg, fmt, args...)		do {} while (0)
++
++#endif /* CONFIG_BFQ_GROUP_IOSCHED */
++
++#define bfq_log(bfqd, fmt, args...) \
++	pr_crit("bfq " fmt "\n", ##args)
++
++#else /* CONFIG_BFQ_REDIRECT_TO_CONSOLE */
+ #ifdef CONFIG_BFQ_GROUP_IOSCHED
+ static struct bfq_group *bfqq_group(struct bfq_queue *bfqq);
+ static struct blkcg_gq *bfqg_to_blkg(struct bfq_group *bfqg);
+@@ -682,6 +719,7 @@ static struct blkcg_gq *bfqg_to_blkg(struct bfq_group *bfqg);
+ 
+ #define bfq_log(bfqd, fmt, args...) \
+ 	blk_add_trace_msg((bfqd)->queue, "bfq " fmt, ##args)
++#endif /* CONFIG_BFQ_REDIRECT_TO_CONSOLE */
+ 
+ /* Expiration reasons. */
+ enum bfqq_expiration {
+ 
diff --git a/0006-BFQ-Fix.patch b/0006-BFQ-Fix.patch
deleted file mode 100644
index fa361f2778e5..000000000000
--- a/0006-BFQ-Fix.patch
+++ /dev/null
@@ -1,24 +0,0 @@
-From 69f18bb587a4805b2b18bb4ba91dced87a8fda06 Mon Sep 17 00:00:00 2001
-From: Paolo Valente <paolo.valente@linaro.org>
-Date: Sat, 22 Oct 2016 15:26:33 +0200
-Subject: [PATCH 86/86] BUGFIX: Remove wrong conversion in use of bfq_fifo_expirebug
-
-Signed-off-by: Paolo Valente <paolo.valente@linaro.org>
----
- block/bfq-iosched.c | 3 +--
- 1 file changed, 1 insertions(+), 2 deletions(-)
-
-diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
-index eef6ff4..c161ff0 100644
---- a/block/bfq-iosched.c
-+++ b/block/bfq-iosched.c
-@@ -4268,8 +4268,7 @@
- 
- 	bfq_add_request(rq);
- 
--	rq->fifo_time = ktime_get_ns() +
--	  jiffies_to_nsecs(bfqd->bfq_fifo_expire[rq_is_sync(rq)]);
-+	rq->fifo_time = ktime_get_ns() + bfqd->bfq_fifo_expire[rq_is_sync(rq)];
- 	list_add_tail(&rq->queuelist, &bfqq->fifo);
- 
- 	bfq_rq_enqueued(bfqd, bfqq, rq);
diff --git a/PKGBUILD b/PKGBUILD
index 085c3c34ca3f..d625f8cc1140 100644
--- a/PKGBUILD
+++ b/PKGBUILD
@@ -52,7 +52,7 @@ pkgbase=linux-bfq
 # pkgname=('linux-bfq' 'linux-bfq-headers' 'linux-bfq-docs')
 _srcname=linux-4.8
 pkgver=4.8.15
-pkgrel=1
+pkgrel=2
 arch=('i686' 'x86_64')
 url="http://algo.ing.unimo.it"
 license=('GPL2')
@@ -83,8 +83,7 @@ source=("http://www.kernel.org/pub/linux/kernel/v4.x/${_srcname}.tar.xz"
         'net_handle_no_dst_on_skb_in_icmp6_send.patch'
         '0001-x86-fpu-Fix-invalid-FPU-ptrace-state-after-execve.patch'
         # patches from https://github.com/linusw/linux-bfq/commits/bfq-v8
-        '0005-BFQ-Fix.patch'
-        '0006-BFQ-Fix.patch')
+        '0005-BFQ-update-to-v8r6.patch')
 
 _kernelname=${pkgbase#linux} 
 
@@ -112,7 +111,7 @@ prepare() {
 
     ### Patch source with BFQ
         msg "Patching source with BFQ patches"
-        for p in "${srcdir}"/000{1,2,3,4,5,6}-*BFQ*.patch; do
+        for p in "${srcdir}"/000{1,2,3,4,5}-*BFQ*.patch; do
         msg " $p"
         patch -Np1 -i "$p"
         done
@@ -193,7 +192,9 @@ prepare() {
 		if [ -n "$_localmodcfg" ]; then
 		msg "If you have modprobe-db installed, running it in recall mode now"
 		if [ -e /usr/bin/modprobed-db ]; then
-			[[ ! -x /usr/bin/sudo ]] && echo "Cannot call modprobe with sudo.  Install via pacman -S sudo and configure to work with this user." && exit 1
+			[[ -x /usr/bin/sudo ]] || {
+                        echo "Cannot call modprobe with sudo. Install sudo and configure it to work with this user."
+                        exit 1; }
 			sudo /usr/bin/modprobed-db recall
 		fi
 		msg "Running Steven Rostedt's make localmodconfig now"
@@ -461,16 +462,15 @@ sha512sums=('a48a065f21e1c7c4de4cf8ca47b8b8d9a70f86b64e7cfa6e01be490f78895745b9c
             'dc0649dfe2a5ce8e8879a62df29a4a1959eb1a84e5d896a9cb119d6a85a9bad1b17135371799e0be96532e17c66043d298e4a14b18fad3078a1f425108f888c9'
             '135afcffee2439e2260a71202658dce9ec5f555de3291b2d49a5cffdd953c6d7851b8a90e961576895555142a618e52220a7e4a46521ca4ba19d37df71887581'
             '87ae76889ab84ced46c237374c124786551982f8ff7325102af9153aed1ab7be72bec4db1f7516426c5ee34c5ce17221679eb2f129c5cbdeec05c0d3cb7f785d'
-            '8e8f407262f3f573654e7f0fceec6075fd2da0258a384206d28e5f59698168609e575f3d855adb8bb476b0684da409dab22f7ddc0f00e9c10c67debe4409c30b'
+            '6046432243b8f0497b43ea2c5fe3fde5135e058f2be25c02773ead93e2ab3b525b6f7c1c4898b9e67ac2bec0959b972829e997e79fabf6ac87e006700dfaf987'
             'd9d28e02e964704ea96645a5107f8b65cae5f4fb4f537e224e5e3d087fd296cb770c29ac76e0ce95d173bc420ea87fb8f187d616672a60a0cae618b0ef15b8c8'
-            '643cad54fc54f4790e1d28dbc60f775f117fa3e25e15aff418b0406cc7a9b49db5d67fc02ff7056a951a2d64d5bbc58306c40fa0d588460664c2921ad613745c'
-            '94fbb7e67b77bb8911b69cdb23bbb74317e75a4ed786e4c6486bf2927042f9374e932ea879d1b250837e42c628abf7082fd6f895a2257c5d3ad652f6a520306a'
+            'dc9ad074791a9edb3414a71480525a1c4a75b257137c754749c251e45ef376e313d3310766fedcabcd392837a8022bbcd97bfcd17c42feae63f71b4c4064b73d'
+            'bcc657cd2366b55ba303d4d091ab9958e4f97121e7fdd64013a90be7fb6b19ac28ca2e8973496935475b9a8f8386bcbed8924981a20bb51cf3fc5c6f4f14b22a'
             'd6faa67f3ef40052152254ae43fee031365d0b1524aa0718b659eb75afc21a3f79ea8d62d66ea311a800109bed545bc8f79e8752319cd378eef2cbd3a09aba22'
             '2dc6b0ba8f7dbf19d2446c5c5f1823587de89f4e28e9595937dd51a87755099656f2acec50e3e2546ea633ad1bfd1c722e0c2b91eef1d609103d8abdc0a7cbaf'
             'c53bd47527adbd2599a583e05a7d24f930dc4e86b1de017486588205ad6f262a51a4551593bc7a1218c96541ea073ea03b770278d947b1cd0d2801311fcc80e5'
             '002d5e0ccfa5824c1750912a6400ff722672d6587b74ba66b1127cf1228048f604bba107617cf4f4477884039af4d4d196cf1b74cefe43b0bddc82270f11940d'
-            '3889679e288d51f6fecc7ed6581ccde34acbf1e4861f5c9ca237a1ad13158502757d3fc457d7b6caf1c8c99c9dba842265004154a71cffb8ec14e1030e49e312'
-            '3c3f3b6407d9a1a63cd91c2b5c35e6c932afa5bf33f1b2e8a530dbd9eacda8c8f956616c4cc9228657624da58e354ba5714252237d84ff3386fd65cf44f06ddc')
+            '98612f37013d1b41c76887bc5ed80f70753f6fc859748038dc83f3b53aa89464321951c6c21b364f56b2d4860e594c2091240c70ff70ce616d6b3fdadb817934')
             
 validpgpkeys=(
               'ABAF11C65A2970B130ABE3C479BE3E4300411886' # Linus Torvalds
diff --git a/config b/config
index 1899356f2d2f..788f14dd440e 100644
--- a/config
+++ b/config
@@ -1,6 +1,6 @@
 #
 # Automatically generated file; DO NOT EDIT.
-# Linux/x86 4.8.10-1 Kernel Configuration
+# Linux/x86 4.8.15-1 Kernel Configuration
 #
 # CONFIG_64BIT is not set
 CONFIG_X86_32=y
@@ -445,7 +445,6 @@ CONFIG_M686=y
 # CONFIG_MHASWELL is not set
 # CONFIG_MBROADWELL is not set
 # CONFIG_MSKYLAKE is not set
-# CONFIG_MNATIVE is not set
 CONFIG_X86_GENERIC=y
 CONFIG_X86_INTERNODE_CACHE_SHIFT=6
 CONFIG_X86_L1_CACHE_SHIFT=6
diff --git a/config.x86_64 b/config.x86_64
index ba4d0d888574..246520db8b4f 100644
--- a/config.x86_64
+++ b/config.x86_64
@@ -1,6 +1,6 @@
 #
 # Automatically generated file; DO NOT EDIT.
-# Linux/x86 4.8.10-1 Kernel Configuration
+# Linux/x86 4.8.15-1 Kernel Configuration
 #
 CONFIG_64BIT=y
 CONFIG_X86_64=y
@@ -448,7 +448,6 @@ CONFIG_NO_BOOTMEM=y
 # CONFIG_MBROADWELL is not set
 # CONFIG_MSKYLAKE is not set
 CONFIG_GENERIC_CPU=y
-# CONFIG_MNATIVE is not set
 CONFIG_X86_INTERNODE_CACHE_SHIFT=6
 CONFIG_X86_L1_CACHE_SHIFT=6
 CONFIG_X86_TSC=y