diff options
-rw-r--r-- | .SRCINFO | 14 | ||||
-rw-r--r-- | 0005-BFQ-Fix.patch | 33 | ||||
-rw-r--r-- | 0005-BFQ-update-to-v8r6.patch | 2806 | ||||
-rw-r--r-- | 0006-BFQ-Fix.patch | 24 | ||||
-rw-r--r-- | PKGBUILD | 20 | ||||
-rw-r--r-- | config | 3 | ||||
-rw-r--r-- | config.x86_64 | 3 |
7 files changed, 2824 insertions, 79 deletions
@@ -1,6 +1,6 @@ pkgbase = linux-bfq pkgver = 4.8.15 - pkgrel = 1 + pkgrel = 2 url = http://algo.ing.unimo.it arch = i686 arch = x86_64 @@ -25,8 +25,7 @@ pkgbase = linux-bfq source = linux.preset source = net_handle_no_dst_on_skb_in_icmp6_send.patch source = 0001-x86-fpu-Fix-invalid-FPU-ptrace-state-after-execve.patch - source = 0005-BFQ-Fix.patch - source = 0006-BFQ-Fix.patch + source = 0005-BFQ-update-to-v8r6.patch validpgpkeys = ABAF11C65A2970B130ABE3C479BE3E4300411886 validpgpkeys = 647F28654894E3BD457199BE38DBBDC86092693E sha512sums = a48a065f21e1c7c4de4cf8ca47b8b8d9a70f86b64e7cfa6e01be490f78895745b9c8790734b1d22182cf1f930fb87eaaa84e62ec8cc1f64ac4be9b949e7c0358 @@ -37,16 +36,15 @@ pkgbase = linux-bfq sha512sums = dc0649dfe2a5ce8e8879a62df29a4a1959eb1a84e5d896a9cb119d6a85a9bad1b17135371799e0be96532e17c66043d298e4a14b18fad3078a1f425108f888c9 sha512sums = 135afcffee2439e2260a71202658dce9ec5f555de3291b2d49a5cffdd953c6d7851b8a90e961576895555142a618e52220a7e4a46521ca4ba19d37df71887581 sha512sums = 87ae76889ab84ced46c237374c124786551982f8ff7325102af9153aed1ab7be72bec4db1f7516426c5ee34c5ce17221679eb2f129c5cbdeec05c0d3cb7f785d - sha512sums = 8e8f407262f3f573654e7f0fceec6075fd2da0258a384206d28e5f59698168609e575f3d855adb8bb476b0684da409dab22f7ddc0f00e9c10c67debe4409c30b + sha512sums = 6046432243b8f0497b43ea2c5fe3fde5135e058f2be25c02773ead93e2ab3b525b6f7c1c4898b9e67ac2bec0959b972829e997e79fabf6ac87e006700dfaf987 sha512sums = d9d28e02e964704ea96645a5107f8b65cae5f4fb4f537e224e5e3d087fd296cb770c29ac76e0ce95d173bc420ea87fb8f187d616672a60a0cae618b0ef15b8c8 - sha512sums = 643cad54fc54f4790e1d28dbc60f775f117fa3e25e15aff418b0406cc7a9b49db5d67fc02ff7056a951a2d64d5bbc58306c40fa0d588460664c2921ad613745c - sha512sums = 94fbb7e67b77bb8911b69cdb23bbb74317e75a4ed786e4c6486bf2927042f9374e932ea879d1b250837e42c628abf7082fd6f895a2257c5d3ad652f6a520306a + sha512sums = dc9ad074791a9edb3414a71480525a1c4a75b257137c754749c251e45ef376e313d3310766fedcabcd392837a8022bbcd97bfcd17c42feae63f71b4c4064b73d + sha512sums = bcc657cd2366b55ba303d4d091ab9958e4f97121e7fdd64013a90be7fb6b19ac28ca2e8973496935475b9a8f8386bcbed8924981a20bb51cf3fc5c6f4f14b22a sha512sums = d6faa67f3ef40052152254ae43fee031365d0b1524aa0718b659eb75afc21a3f79ea8d62d66ea311a800109bed545bc8f79e8752319cd378eef2cbd3a09aba22 sha512sums = 2dc6b0ba8f7dbf19d2446c5c5f1823587de89f4e28e9595937dd51a87755099656f2acec50e3e2546ea633ad1bfd1c722e0c2b91eef1d609103d8abdc0a7cbaf sha512sums = c53bd47527adbd2599a583e05a7d24f930dc4e86b1de017486588205ad6f262a51a4551593bc7a1218c96541ea073ea03b770278d947b1cd0d2801311fcc80e5 sha512sums = 002d5e0ccfa5824c1750912a6400ff722672d6587b74ba66b1127cf1228048f604bba107617cf4f4477884039af4d4d196cf1b74cefe43b0bddc82270f11940d - sha512sums = 3889679e288d51f6fecc7ed6581ccde34acbf1e4861f5c9ca237a1ad13158502757d3fc457d7b6caf1c8c99c9dba842265004154a71cffb8ec14e1030e49e312 - sha512sums = 3c3f3b6407d9a1a63cd91c2b5c35e6c932afa5bf33f1b2e8a530dbd9eacda8c8f956616c4cc9228657624da58e354ba5714252237d84ff3386fd65cf44f06ddc + sha512sums = 98612f37013d1b41c76887bc5ed80f70753f6fc859748038dc83f3b53aa89464321951c6c21b364f56b2d4860e594c2091240c70ff70ce616d6b3fdadb817934 pkgname = linux-bfq pkgdesc = Linux Kernel and modules with the BFQ scheduler. diff --git a/0005-BFQ-Fix.patch b/0005-BFQ-Fix.patch deleted file mode 100644 index ad66ca69a2ba..000000000000 --- a/0005-BFQ-Fix.patch +++ /dev/null @@ -1,33 +0,0 @@ -From 69f18bb587a4805b2b18bb4ba91dced87a8fda06 Mon Sep 17 00:00:00 2001 -From: Paolo Valente <paolo.valente@linaro.org> -Date: Sat, 22 Oct 2016 15:26:33 +0200 -Subject: [PATCH 86/86] BUGFIX: Replace max wrongly used for modulo numbers - -Signed-off-by: Paolo Valente <paolo.valente@linaro.org> ---- - block/bfq-iosched.c | 10 +++++++--- - 1 file changed, 7 insertions(+), 3 deletions(-) - -diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c -index eef6ff4..c161ff0 100644 ---- a/block/bfq-iosched.c -+++ b/block/bfq-iosched.c -@@ -2179,9 +2179,13 @@ static void __bfq_set_in_service_queue(struct bfq_data *bfqd, - * not only expires, but also remains with no - * request. - */ -- bfqq->last_wr_start_finish += jiffies - -- max_t(unsigned long, bfqq->last_wr_start_finish, -- bfqq->budget_timeout); -+ if (time_after(bfqq->budget_timeout, -+ bfqq->last_wr_start_finish)) -+ bfqq->last_wr_start_finish += -+ jiffies - bfqq->budget_timeout; -+ else -+ bfqq->last_wr_start_finish = jiffies; -+ - if (time_is_after_jiffies(bfqq->last_wr_start_finish)) { - pr_crit( - "BFQ WARNING:last %lu budget %lu jiffies %lu", -Contact GitHub API Training Shop Blog About - diff --git a/0005-BFQ-update-to-v8r6.patch b/0005-BFQ-update-to-v8r6.patch new file mode 100644 index 000000000000..1b42af900abe --- /dev/null +++ b/0005-BFQ-update-to-v8r6.patch @@ -0,0 +1,2806 @@ +From 1acff6b391cd626a495e42c9782bf2139b9f330f Mon Sep 17 00:00:00 2001 +From: Paolo Valente <paolo.valente@linaro.org> +Date: Wed, 19 Oct 2016 20:11:42 +0200 +Subject: [PATCH 01/24] Documentation/block: add bfq-iosched.txt + +Documentation of BFQ benefits, inner workings, interface and tunables. + +Signed-off-by: Paolo Valente <paolo.valente@linaro.org> +--- + Documentation/block/00-INDEX | 2 + + Documentation/block/bfq-iosched.txt | 508 ++++++++++++++++++++++++++++++++++++ + 2 files changed, 510 insertions(+) + create mode 100644 Documentation/block/bfq-iosched.txt + +diff --git a/Documentation/block/00-INDEX b/Documentation/block/00-INDEX +index e55103a..8d55b4b 100644 +--- a/Documentation/block/00-INDEX ++++ b/Documentation/block/00-INDEX +@@ -1,5 +1,7 @@ + 00-INDEX + - This file ++bfq-iosched.txt ++ - BFQ IO scheduler and its tunables + biodoc.txt + - Notes on the Generic Block Layer Rewrite in Linux 2.5 + biovecs.txt +diff --git a/Documentation/block/bfq-iosched.txt b/Documentation/block/bfq-iosched.txt +new file mode 100644 +index 0000000..67e1269 +--- /dev/null ++++ b/Documentation/block/bfq-iosched.txt +@@ -0,0 +1,508 @@ ++BFQ (Budget Fair Queueing) ++========================== ++ ++BFQ is a proportional-share I/O scheduler, with some extra ++low-latency capabilities. In addition to cgroups support (blkio or io ++controllers), BFQ's main features are: ++- BFQ guarantees a high system and application responsiveness, and a ++ low latency for time-sensitive applications, such as audio or video ++ players; ++- BFQ distributes bandwidth, and not just time, among processes or ++ groups (switching back to time distribution when needed to keep ++ throughput high). ++ ++On average CPUs, the current version of BFQ can handle devices ++performing at most ~30K IOPS; at most ~50 KIOPS on faster CPUs. As a ++reference, 30-50 KIOPS correspond to very high bandwidths with ++sequential I/O (e.g., 8-12 GB/s if I/O requests are 256 KB large), and ++to 120-200 MB/s with 4KB random I/O. ++ ++The table of contents follow. Impatients can just jump to Section 3. ++ ++CONTENTS ++ ++1. When may BFQ be useful? ++ 1-1 Personal systems ++ 1-2 Server systems ++2. How does BFQ work? ++3. What are BFQ's tunable? ++4. BFQ group scheduling ++ 4-1 Service guarantees provided ++ 4-2 Interface ++ ++1. When may BFQ be useful? ++========================== ++ ++BFQ provides the following benefits on personal and server systems. ++ ++1-1 Personal systems ++-------------------- ++ ++Low latency for interactive applications ++ ++Regardless of the actual background workload, BFQ guarantees that, for ++interactive tasks, the storage device is virtually as responsive as if ++it was idle. For example, even if one or more of the following ++background workloads are being executed: ++- one or more large files are being read, written or copied, ++- a tree of source files is being compiled, ++- one or more virtual machines are performing I/O, ++- a software update is in progress, ++- indexing daemons are scanning filesystems and updating their ++ databases, ++starting an application or loading a file from within an application ++takes about the same time as if the storage device was idle. As a ++comparison, with CFQ, NOOP or DEADLINE, and in the same conditions, ++applications experience high latencies, or even become unresponsive ++until the background workload terminates (also on SSDs). ++ ++Low latency for soft real-time applications ++ ++Also soft real-time applications, such as audio and video ++players/streamers, enjoy a low latency and a low drop rate, regardless ++of the background I/O workload. As a consequence, these applications ++do not suffer from almost any glitch due to the background workload. ++ ++Higher speed for code-development tasks ++ ++If some additional workload happens to be executed in parallel, then ++BFQ executes the I/O-related components of typical code-development ++tasks (compilation, checkout, merge, ...) much more quickly than CFQ, ++NOOP or DEADLINE. ++ ++High throughput ++ ++On hard disks, BFQ achieves up to 30% higher throughput than CFQ, and ++up to 150% higher throughput than DEADLINE and NOOP, with all the ++sequential workloads considered in our tests. With random workloads, ++and with all the workloads on flash-based devices, BFQ achieves, ++instead, about the same throughput as the other schedulers. ++ ++Strong fairness/bandwidth guarantees ++ ++BFQ distributes the device throughput, and not just the device time, ++as desired among I/O-bound applications, with any workload and ++regardless of the device parameters. BFQ switches to time-based ++distribution (only) for processes that would otherwise cause a ++throughput loss. ++ ++1-2 Server systems ++------------------ ++ ++Most benefits for server systems follow from the same service ++properties as above. In particular, regardless of whether additional, ++possibly heavy workloads are being served, BFQ guarantees: ++ ++. audio and video-streaming with zero or very low jitter and drop ++ rate; ++ ++. fast retrieval of WEB pages and embedded objects; ++ ++. real-time recording of data in live-dumping applications (e.g., ++ packet logging); ++ ++. responsiveness in local and remote access to a server. ++ ++ ++2. How does BFQ work? ++===================== ++ ++BFQ is a proportional-share I/O scheduler, whose general structure, ++plus a lot of code, are borrowed from CFQ. ++ ++- Each process doing I/O on a device is associated with a weight and a ++ (bfq_)queue. ++ ++- BFQ grants exclusive access to the device, for a while, to one queue ++ (process) at a time, and implements this service model by ++ associating every queue with a budget, measured in number of ++ sectors. ++ ++ - After a queue is granted access to the device, the budget of the ++ queue is decremented, on each request dispatch, by the size of the ++ request. ++ ++ - The in-service queue is expired, i.e., its service is suspended, ++ only if one of the following events occurs: 1) the queue finishes ++ its budget, 2) the queue empties, 3) a "budget timeout" fires. ++ ++ - The budget timeout prevents processes doing random I/O from ++ holding the device for too long and dramatically reducing ++ throughput. ++ ++ - Actually, as in CFQ, a queue associated with a process issuing ++ sync requests may not be expired immediately when it empties. In ++ contrast, BFQ may idle the device for a short time interval, ++ giving the process the chance to go on being served if it issues ++ a new request in time. Device idling typically boosts the ++ throughput on rotational devices, if processes do synchronous ++ and sequential I/O. In addition, under BFQ, device idling is ++ also instrumental in guaranteeing the desired throughput ++ fraction to processes issuing sync requests (see the description ++ of the slice_idle tunable in this document, or [1, 2], for more ++ details). ++ ++ - With respect to idling for service guarantees, if several ++ processes are competing for the device at the same time, but ++ all processes (and groups, after the following commit) have ++ the same weight, then BFQ guarantees the expected throughput ++ distribution without ever idling the device. Throughput is ++ thus as high as possible in this common scenario. ++ ++ - If low-latency mode is enabled (default configuration), BFQ ++ executes some special heuristics to detect interactive and soft ++ real-time applications (e.g., video or audio players/streamers), ++ and to reduce their latency. The most important action taken to ++ achieve this goal is to give to the queues associated with these ++ applications more than their fair share of the device ++ throughput. For brevity, we call just "weight-raising" the whole ++ sets of actions taken by BFQ to privilege these queues. In ++ particular, BFQ provides a milder form of weight-raising for ++ interactive applications, and a stronger form for soft real-time ++ applications. ++ ++ - BFQ automatically deactivates idling for queues born in a burst of ++ queue creations. In fact, these queues are usually associated with ++ the processes of applications and services that benefit mostly ++ from a high throughput. Examples are systemd during boot, or git ++ grep. ++ ++ - As CFQ, BFQ merges queues performing interleaved I/O, i.e., ++ performing random I/O that becomes mostly sequential if ++ merged. Differently from CFQ, BFQ achieves this goal with a more ++ reactive mechanism, called Early Queue Merge (EQM). EQM is so ++ responsive in detecting interleaved I/O (cooperating processes), ++ that it enables BFQ to achieve a high throughput, by queue ++ merging, even for queues for which CFQ needs a different ++ mechanism, preemption, to get a high throughput. As such EQM is a ++ unified mechanism to achieve a high throughput with interleaved ++ I/O. ++ ++ - Queues are scheduled according to a variant of WF2Q+, named ++ B-WF2Q+, and implemented using an augmented rb-tree to preserve an ++ O(log N) overall complexity. See [2] for more details. B-WF2Q+ is ++ also ready for hierarchical scheduling. However, for a cleaner ++ logical breakdown, the code that enables and completes ++ hierarchical support is provided in the next commit, which focuses ++ exactly on this feature. ++ ++ - B-WF2Q+ guarantees a tight deviation with respect to an ideal, ++ perfectly fair, and smooth service. In particular, B-WF2Q+ ++ guarantees that each queue receives a fraction of the device ++ throughput proportional to its weight, even if the throughput ++ fluctuates, and regardless of: the device parameters, the current ++ workload and the budgets assigned to the queue. ++ ++ - The last, budget-independence, property (although probably ++ counterintuitive in the first place) is definitely beneficial, for ++ the following reasons: ++ ++ - First, with any proportional-share scheduler, the maximum ++ deviation with respect to an ideal service is proportional to ++ the maximum budget (slice) assigned to queues. As a consequence, ++ BFQ can keep this deviation tight not only because of the ++ accurate service of B-WF2Q+, but also because BFQ *does not* ++ need to assign a larger budget to a queue to let the queue ++ receive a higher fraction of the device throughput. ++ ++ - Second, BFQ is free to choose, for every process (queue), the ++ budget that best fits the needs of the process, or best ++ leverages the I/O pattern of the process. In particular, BFQ ++ updates queue budgets with a simple feedback-loop algorithm that ++ allows a high throughput to be achieved, while still providing ++ tight latency guarantees to time-sensitive applications. When ++ the in-service queue expires, this algorithm computes the next ++ budget of the queue so as to: ++ ++ - Let large budgets be eventually assigned to the queues ++ associated with I/O-bound applications performing sequential ++ I/O: in fact, the longer these applications are served once ++ got access to the device, the higher the throughput is. ++ ++ - Let small budgets be eventually assigned to the queues ++ associated with time-sensitive applications (which typically ++ perform sporadic and short I/O), because, the smaller the ++ budget assigned to a queue waiting for service is, the sooner ++ B-WF2Q+ will serve that queue (Subsec 3.3 in [2]). ++ ++- ioprio classes are served in strict priority order, i.e., ++ lower-priority queues are not served as long as there are ++ higher-priority queues. Among queues in the same class, the ++ bandwidth is distributed in proportion to the weight of each ++ queue. A very thin extra bandwidth is however guaranteed to ++ the Idle class, to prevent it from starving. ++ ++ ++3. What are BFQ's tunable? ++========================== ++ ++The tunables back_seek-max, back_seek_penalty, fifo_expire_async and ++fifo_expire_sync below are the same as in CFQ. Their description is ++just copied from that for CFQ. Some considerations in the description ++of slice_idle are copied from CFQ too. ++ ++per-process ioprio and weight ++----------------------------- ++ ++Unless the cgroups interface is used, weights can be assigned to ++processes only indirectly, through I/O priorities, and according to ++the relation: weight = (IOPRIO_BE_NR - ioprio) * 10. ++ ++slice_idle ++---------- ++ ++This parameter specifies how long BFQ should idle for next I/O ++request, when certain sync BFQ queues become empty. By default ++slice_idle is a non-zero value. Idling has a double purpose: boosting ++throughput and making sure that the desired throughput distribution is ++respected (see the description of how BFQ works, and, if needed, the ++papers referred there). ++ ++As for throughput, idling can be very helpful on highly seeky media ++like single spindle SATA/SAS disks where we can cut down on overall ++number of seeks and see improved throughput. ++ ++Setting slice_idle to 0 will remove all the idling on queues and one ++should see an overall improved throughput on faster storage devices ++like multiple SATA/SAS disks in hardware RAID configuration. ++ ++So depending on storage and workload, it might be useful to set ++slice_idle=0. In general for SATA/SAS disks and software RAID of ++SATA/SAS disks keeping slice_idle enabled should be useful. For any ++configurations where there are multiple spindles behind single LUN ++(Host based hardware RAID controller or for storage arrays), setting ++slice_idle=0 might end up in better throughput and acceptable ++latencies. ++ ++Idling is however necessary to have service guarantees enforced in ++case of differentiated weights or differentiated I/O-request lengths. ++To see why, suppose that a given BFQ queue A must get several I/O ++requests served for each request served for another queue B. Idling ++ensures that, if A makes a new I/O request slightly after becoming ++empty, then no request of B is dispatched in the middle, and thus A ++does not lose the possibility to get more than one request dispatched ++before the next request of B is dispatched. Note that idling ++guarantees the desired differentiated treatment of queues only in ++terms of I/O-request dispatches. To guarantee that the actual service ++order then corresponds to the dispatch order, the strict_guarantees ++tunable must be set too. ++ ++There is an important flipside for idling: apart from the above cases ++where it is beneficial also for throughput, idling can severely impact ++throughput. One important case is random workload. Because of this ++issue, BFQ tends to avoid idling as much as possible, when it is not ++beneficial also for throughput. As a consequence of this behavior, and ++of further issues described for the strict_guarantees tunable, ++short-term service guarantees may be occasionally violated. And, in ++some cases, these guarantees may be more important than guaranteeing ++maximum throughput. For example, in video playing/streaming, a very ++low drop rate may be more important than maximum throughput. In these ++cases, consider setting the strict_guarantees parameter. ++ ++strict_guarantees ++----------------- ++ ++If this parameter is set (default: unset), then BFQ ++ ++- always performs idling when the in-service queue becomes empty; ++ ++- forces the device to serve one I/O request at a time, by dispatching a ++ new request only if there is no outstanding request. ++ ++In the presence of differentiated weights or I/O-request sizes, both ++the above conditions are needed to guarantee that every BFQ queue ++receives its allotted share of the bandwidth. The first condition is ++needed for the reasons explained in the description of the slice_idle ++tunable. The second condition is needed because all modern storage ++devices reorder internally-queued requests, which may trivially break ++the service guarantees enforced by the I/O scheduler. ++ ++Setting strict_guarantees may evidently affect throughput. ++ ++back_seek_max ++------------- ++ ++This specifies, given in Kbytes, the maximum "distance" for backward seeking. ++The distance is the amount of space from the current head location to the ++sectors that are backward in terms of distance. ++ ++This parameter allows the scheduler to anticipate requests in the "backward" ++direction and consider them as being the "next" if they are within this ++distance from the current head location. ++ ++back_seek_penalty ++----------------- ++ ++This parameter is used to compute the cost of backward seeking. If the ++backward distance of request is just 1/back_seek_penalty from a "front" ++request, then the seeking cost of two requests is considered equivalent. ++ ++So scheduler will not bias toward one or the other request (otherwise scheduler ++will bias toward front request). Default value of back_seek_penalty is 2. ++ ++fifo_expire_async ++----------------- ++ ++This parameter is used to set the timeout of asynchronous requests. Default ++value of this is 248ms. ++ ++fifo_expire_sync ++---------------- ++ ++This parameter is used to set the timeout of synchronous requests. Default ++value of this is 124ms. In case to favor synchronous requests over asynchronous ++one, this value should be decreased relative to fifo_expire_async. ++ ++low_latency ++----------- ++ ++This parameter is used to enable/disable BFQ's low latency mode. By ++default, low latency mode is enabled. If enabled, interactive and soft ++real-time applications are privileged and experience a lower latency, ++as explained in more detail in the description of how BFQ works. ++ ++timeout_sync ++------------ ++ ++Maximum amount of device time that can be given to a task (queue) once ++it has been selected for service. On devices with costly seeks, ++increasing this time usually increases maximum throughput. On the ++opposite end, increasing this time coarsens the granularity of the ++short-term bandwidth and latency guarantees, especially if the ++following parameter is set to zero. ++ ++max_budget ++---------- ++ ++Maximum amount of service, measured in sectors, that can be provided ++to a BFQ queue once it is set in service (of course within the limits ++of the above timeout). According to what said in the description of ++the algoritm, larger values increase the throughput in proportion to ++the percentage of sequential I/O requests issued. The price of larger ++values is that they coarsen the granularity of short-term bandwidth ++and latency guarantees. ++ ++The default value is 0, which enables auto-tuning: BFQ sets max_budget ++to the maximum number of sectors that can be served during ++timeout_sync, according to the estimated peak rate. ++ ++weights ++------- ++ ++Read-only parameter, used to show the weights of the currently active ++BFQ queues. ++ ++ ++wr_ tunables ++------------ ++ ++BFQ exports a few parameters to control/tune the behavior of ++low-latency heuristics. ++ ++wr_coeff ++ ++Factor by which the weight of a weight-raised queue is multiplied. If ++the queue is deemed soft real-time, then the weight is further ++multiplied by an additional, constant factor. ++ ++wr_max_time ++ ++Maximum duration of a weight-raising period for an interactive task ++(ms). If set to zero (default value), then this value is computed ++automatically, as a function of the peak rate of the device. In any ++case, when the value of this parameter is read, it always reports the ++current duration, regardless of whether it has been set manually or ++computed automatically. ++ ++wr_max_softrt_rate ++ ++Maximum service rate below which a queue is deemed to be associated ++with a soft real-time application, and is then weight-raised ++accordingly (sectors/sec). ++ ++wr_min_idle_time ++ ++Minimum idle period after which interactive weight-raising may be ++reactivated for a queue (in ms). ++ ++wr_rt_max_time ++ ++Maximum weight-raising duration for soft real-time queues (in ms). The ++start time from which this duration is considered is automatically ++moved forward if the queue is detected to be still soft real-time ++before the current soft real-time weight-raising period finishes. ++ ++wr_min_inter_arr_async ++ ++Minimum period between I/O request arrivals after which weight-raising ++may be reactivated for an already busy async queue (in ms). ++ ++ ++4. Group scheduling with BFQ ++============================ ++ ++BFQ supports both cgroup-v1 and cgroup-v2 io controllers, namely blkio ++and io. In particular, BFQ supports weight-based proportional ++share. ++ ++4-1 Service guarantees provided ++------------------------------- ++ ++With BFQ, proportional share means true proportional share of the ++device bandwidth, according to group weights. For example, a group ++with weight 200 gets twice the bandwidth, and not just twice the time, ++of a group with weight 100. ++ ++BFQ supports hierarchies (group trees) of any depth. Bandwidth is ++distributed among groups and processes in the expected way: for each ++group, the children of the group share the whole bandwidth of the ++group in proportion to their weights. In particular, this implies ++that, for each leaf group, every process of the group receives the ++same share of the whole group bandwidth, unless the ioprio of the ++process is modified. ++ ++The resource-sharing guarantee for a group may partially or totally ++switch from bandwidth to time, if providing bandwidth guarantees to ++the group lowers the throughput too much. This switch occurs on a ++per-process basis: if a process of a leaf group causes throughput loss ++if served in such a way to receive its share of the bandwidth, then ++BFQ switches back to just time-based proportional share for that ++process. ++ ++4-2 Interface ++------------- ++ ++To get proportional sharing of bandwidth with BFQ for a given device, ++BFQ must of course be the active scheduler for that device. ++ ++Within each group directory, the names of the files associated with ++BFQ-specific cgroup parameters and stats begin with the "bfq." ++prefix. So, with cgroups-v1 or cgroups-v2, the full prefix for ++BFQ-specific files is "blkio.bfq." or "io.bfq." For example, the group ++parameter to set the weight of a group with BFQ is blkio.bfq.weight ++or io.bfq.weight. ++ ++Parameters to set ++----------------- ++ ++For each group, there is only the following parameter to set. ++ ++weight (namely blkio.bfq.weight or io.bfq-weight): the weight of the ++group inside its parent. Available values: 1..10000 (default 100). The ++linear mapping between ioprio and weights, described at the beginning ++of the tunable section, is still valid, but all weights higher than ++IOPRIO_BE_NR*10 are mapped to ioprio 0. ++ ++ ++[1] P. Valente, A. Avanzini, "Evolution of the BFQ Storage I/O ++ Scheduler", Proceedings of the First Workshop on Mobile System ++ Technologies (MST-2015), May 2015. ++ http://algogroup.unimore.it/people/paolo/disk_sched/mst-2015.pdf ++ ++[2] P. Valente and M. Andreolini, "Improving Application ++ Responsiveness with the BFQ Disk I/O Scheduler", Proceedings of ++ the 5th Annual International Systems and Storage Conference ++ (SYSTOR '12), June 2012. ++ Slightly extended version: ++ http://algogroup.unimore.it/people/paolo/disk_sched/bfq-v1-suite- ++ results.pdf + +From 11f0d8caa035ff4fe6fe02ebc0f2f14dc5473479 Mon Sep 17 00:00:00 2001 +From: Paolo Valente <paolo.valente@linaro.org> +Date: Sat, 22 Oct 2016 15:26:33 +0200 +Subject: [PATCH 02/24] BUGFIX: Replace max wrongly used for modulo numbers + +Signed-off-by: Paolo Valente <paolo.valente@linaro.org> +--- + block/bfq-iosched.c | 10 +++++++--- + 1 file changed, 7 insertions(+), 3 deletions(-) + +diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c +index eef6ff4..c161ff0 100644 +--- a/block/bfq-iosched.c ++++ b/block/bfq-iosched.c +@@ -2179,9 +2179,13 @@ static void __bfq_set_in_service_queue(struct bfq_data *bfqd, + * not only expires, but also remains with no + * request. + */ +- bfqq->last_wr_start_finish += jiffies - +- max_t(unsigned long, bfqq->last_wr_start_finish, +- bfqq->budget_timeout); ++ if (time_after(bfqq->budget_timeout, ++ bfqq->last_wr_start_finish)) ++ bfqq->last_wr_start_finish += ++ jiffies - bfqq->budget_timeout; ++ else ++ bfqq->last_wr_start_finish = jiffies; ++ + if (time_is_after_jiffies(bfqq->last_wr_start_finish)) { + pr_crit( + "BFQ WARNING:last %lu budget %lu jiffies %lu", + +From eb59d4ac664f64372c440006b0cd27bf137a2025 Mon Sep 17 00:00:00 2001 +From: Paolo Valente <paolo.valente@linaro.org> +Date: Mon, 24 Oct 2016 17:08:32 +0200 +Subject: [PATCH 03/24] Improve bfq-iosched.txt + +Signed-off-by: Paolo Valente <paolo.valente@linaro.org> +--- + Documentation/block/bfq-iosched.txt | 12 +++++++----- + 1 file changed, 7 insertions(+), 5 deletions(-) + +diff --git a/Documentation/block/bfq-iosched.txt b/Documentation/block/bfq-iosched.txt +index 67e1269..8626b14 100644 +--- a/Documentation/block/bfq-iosched.txt ++++ b/Documentation/block/bfq-iosched.txt +@@ -78,13 +78,15 @@ sequential workloads considered in our tests. With random workloads, + and with all the workloads on flash-based devices, BFQ achieves, + instead, about the same throughput as the other schedulers. + +-Strong fairness/bandwidth guarantees ++Strong fairness, bandwidth and delay guarantees + + BFQ distributes the device throughput, and not just the device time, +-as desired among I/O-bound applications, with any workload and +-regardless of the device parameters. BFQ switches to time-based +-distribution (only) for processes that would otherwise cause a +-throughput loss. ++among I/O-bound applications in proportion their weights, with any ++workload and regardless of the device parameters. From these bandwidth ++guarantees, it is possible to compute tight per-I/O-request delay ++guarantees by a simple formula. If not configured for strict service ++guarantees, BFQ switches to time-based resource sharing (only) for ++applications that would otherwise cause a throughput loss. + + 1-2 Server systems + ------------------ + +From a53d556f54f33de6e71f909748c6994ff90fa243 Mon Sep 17 00:00:00 2001 +From: Paolo Valente <paolo.valente@linaro.org> +Date: Mon, 24 Oct 2016 20:36:53 +0200 +Subject: [PATCH 04/24] Improve help message in Kconfig.iosched + +Signed-off-by: Paolo Valente <paolo.valente@linaro.org> +--- + block/Kconfig.iosched | 11 ++++------- + 1 file changed, 4 insertions(+), 7 deletions(-) + +diff --git a/block/Kconfig.iosched b/block/Kconfig.iosched +index 6d92579..277b112 100644 +--- a/block/Kconfig.iosched ++++ b/block/Kconfig.iosched +@@ -43,13 +43,10 @@ config IOSCHED_BFQ + tristate "BFQ I/O scheduler" + default n + ---help--- +- The BFQ I/O scheduler tries to distribute bandwidth among +- all processes according to their weights. +- It aims at distributing the bandwidth as desired, independently of +- the disk parameters and with any workload. It also tries to +- guarantee low latency to interactive and soft real-time +- applications. If compiled built-in (saying Y here), BFQ can +- be configured to support hierarchical scheduling. ++ The BFQ I/O scheduler distributes bandwidth among all ++ processes according to their weights, regardless of the ++ device parameters and with any workload. It also guarantees ++ a low latency to interactive and soft real-time applications. + + config BFQ_GROUP_IOSCHED + bool "BFQ hierarchical scheduling support" + +From 710824a320c015b314abba3bc2982d0f3caa0acc Mon Sep 17 00:00:00 2001 +From: Paolo Valente <paolo.valente@linaro.org> +Date: Mon, 24 Oct 2016 20:37:59 +0200 +Subject: [PATCH 05/24] Remove stray disk word in first line + +SIgned-off-by: Paolo Valente <paolo.valente@linaro.org> +--- + block/bfq-iosched.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c +index c161ff0..b5fdd45 100644 +--- a/block/bfq-iosched.c ++++ b/block/bfq-iosched.c +@@ -1,5 +1,5 @@ + /* +- * Budget Fair Queueing (BFQ) disk scheduler. ++ * Budget Fair Queueing (BFQ) I/O scheduler. + * + * Based on ideas and code from CFQ: + * Copyright (C) 2003 Jens Axboe <axboe@kernel.dk> + +From a2f6ff7877e60f533cdf39e941fc45f7b5aafcee Mon Sep 17 00:00:00 2001 +From: Paolo Valente <paolo.valente@linaro.org> +Date: Mon, 24 Oct 2016 20:41:03 +0200 +Subject: [PATCH 06/24] BUGFIX: Remove wrong conversion in use of + bfq_fifo_expire + +Signed-off-by: Paolo Valente <paolo.valente@linaro.org> +--- + block/bfq-iosched.c | 3 +-- + 1 file changed, 1 insertion(+), 2 deletions(-) + +diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c +index b5fdd45..a8c6887 100644 +--- a/block/bfq-iosched.c ++++ b/block/bfq-iosched.c +@@ -4268,8 +4268,7 @@ static void bfq_insert_request(struct request_queue *q, struct request *rq) + + bfq_add_request(rq); + +- rq->fifo_time = ktime_get_ns() + +- jiffies_to_nsecs(bfqd->bfq_fifo_expire[rq_is_sync(rq)]); ++ rq->fifo_time = ktime_get_ns() + bfqd->bfq_fifo_expire[rq_is_sync(rq)]; + list_add_tail(&rq->queuelist, &bfqq->fifo); + + bfq_rq_enqueued(bfqd, bfqq, rq); + +From c91736449d06052469c2c3abaf5d455b85e77417 Mon Sep 17 00:00:00 2001 +From: Paolo Valente <paolo.valente@linaro.org> +Date: Tue, 25 Oct 2016 09:31:37 +0200 +Subject: [PATCH 07/24] bfq-iosched.txt: add description of preemption + +Signed-off-by: Paolo Valente <paolo.valente@linaro.org> +--- + Documentation/block/bfq-iosched.txt | 6 ++++++ + 1 file changed, 6 insertions(+) + +diff --git a/Documentation/block/bfq-iosched.txt b/Documentation/block/bfq-iosched.txt +index 8626b14..8979408 100644 +--- a/Documentation/block/bfq-iosched.txt ++++ b/Documentation/block/bfq-iosched.txt +@@ -227,6 +227,12 @@ plus a lot of code, are borrowed from CFQ. + budget assigned to a queue waiting for service is, the sooner + B-WF2Q+ will serve that queue (Subsec 3.3 in [2]). + ++- If several processes are competing for the device at the same time, ++ but all processes and groups have the same weight, then BFQ ++ guarantees the expected throughput distribution without ever idling ++ the device. It uses preemption instead. Throughput is then much ++ higher in this common scenario. ++ + - ioprio classes are served in strict priority order, i.e., + lower-priority queues are not served as long as there are + higher-priority queues. Among queues in the same class, the + +From 2dc9393e7ff65d0e5340118402134cff03276b1c Mon Sep 17 00:00:00 2001 +From: Paolo Valente <paolo.valente@linaro.org> +Date: Sat, 5 Nov 2016 18:09:07 +0100 +Subject: [PATCH 08/24] Add parentheses to complex macros + +Signed-off-by: Paolo Valente <paolo.valente@linaro.org> +--- + block/bfq-iosched.c | 10 +++++----- + 1 file changed, 5 insertions(+), 5 deletions(-) + +diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c +index a8c6887..88be34e 100644 +--- a/block/bfq-iosched.c ++++ b/block/bfq-iosched.c +@@ -77,19 +77,19 @@ + static const u64 bfq_fifo_expire[2] = { NSEC_PER_SEC / 4, NSEC_PER_SEC / 8 }; + + /* Maximum backwards seek, in KiB. */ +-static const int bfq_back_max = 16 * 1024; ++static const int bfq_back_max = (16 * 1024); + + /* Penalty of a backwards seek, in number of sectors. */ + static const int bfq_back_penalty = 2; + + /* Idling period duration, in ns. */ +-static u32 bfq_slice_idle = NSEC_PER_SEC / 125; ++static u32 bfq_slice_idle = (NSEC_PER_SEC / 125); + + /* Minimum number of assigned budgets for which stats are safe to compute. */ + static const int bfq_stats_min_budgets = 194; + + /* Default maximum budget values, in sectors and number of requests. */ +-static const int bfq_default_max_budget = 16 * 1024; ++static const int bfq_default_max_budget = (16 * 1024); + + /* + * Async to sync throughput distribution is controlled as follows: +@@ -99,7 +99,7 @@ static const int bfq_default_max_budget = 16 * 1024; + static const int bfq_async_charge_factor = 10; + + /* Default timeout values, in jiffies, approximating CFQ defaults. */ +-static const int bfq_timeout = HZ / 8; ++static const int bfq_timeout = (HZ / 8); + + struct kmem_cache *bfq_pool; + +@@ -117,7 +117,7 @@ struct kmem_cache *bfq_pool; + /* Min number of samples required to perform peak-rate update */ + #define BFQ_RATE_MIN_SAMPLES 32 + /* Min observation time interval required to perform a peak-rate update (ns) */ +-#define BFQ_RATE_MIN_INTERVAL 300*NSEC_PER_MSEC ++#define BFQ_RATE_MIN_INTERVAL (300*NSEC_PER_MSEC) + /* Target observation time interval for a peak-rate update (ns) */ + #define BFQ_RATE_REF_INTERVAL NSEC_PER_SEC + + +From 882f0607d82efe44d98634ce1fdcc4b602ccf5ec Mon Sep 17 00:00:00 2001 +From: Paolo Valente <paolo.valente@linaro.org> +Date: Sat, 5 Nov 2016 18:32:20 +0100 +Subject: [PATCH 09/24] Fix type in bfq-iosched.txt + +Sigend-off-by: Paolo Valente <paolo.valente@linaro.org> +--- + Documentation/block/bfq-iosched.txt | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/Documentation/block/bfq-iosched.txt b/Documentation/block/bfq-iosched.txt +index 8979408..5ba67af 100644 +--- a/Documentation/block/bfq-iosched.txt ++++ b/Documentation/block/bfq-iosched.txt +@@ -385,7 +385,7 @@ max_budget + Maximum amount of service, measured in sectors, that can be provided + to a BFQ queue once it is set in service (of course within the limits + of the above timeout). According to what said in the description of +-the algoritm, larger values increase the throughput in proportion to ++the algorithm, larger values increase the throughput in proportion to + the percentage of sequential I/O requests issued. The price of larger + values is that they coarsen the granularity of short-term bandwidth + and latency guarantees. + +From 99b1da54ca162abdc9e62915050154b35bd9bc27 Mon Sep 17 00:00:00 2001 +From: Paolo Valente <paolo.valente@linaro.org> +Date: Sat, 5 Nov 2016 18:34:58 +0100 +Subject: [PATCH 10/24] BFQ-v8r5 + +Signed-off-by: Paolo Valente <paolo.valente@linaro.org> +--- + block/bfq-iosched.c | 2 +- + block/bfq.h | 2 +- + 2 files changed, 2 insertions(+), 2 deletions(-) + +diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c +index 88be34e..0f3081d 100644 +--- a/block/bfq-iosched.c ++++ b/block/bfq-iosched.c +@@ -5212,7 +5212,7 @@ static struct blkcg_policy blkcg_policy_bfq = { + static int __init bfq_init(void) + { + int ret; +- char msg[50] = "BFQ I/O-scheduler: v8r4"; ++ char msg[50] = "BFQ I/O-scheduler: v8r5"; + + #ifdef CONFIG_BFQ_GROUP_IOSCHED + ret = blkcg_policy_register(&blkcg_policy_bfq); +diff --git a/block/bfq.h b/block/bfq.h +index ea1e7d8..b80abe0 100644 +--- a/block/bfq.h ++++ b/block/bfq.h +@@ -1,5 +1,5 @@ + /* +- * BFQ-v8r4 for 4.8.0: data structures and common functions prototypes. ++ * BFQ-v8r5 for 4.8.0: data structures and common functions prototypes. + * + * Based on ideas and code from CFQ: + * Copyright (C) 2003 Jens Axboe <axboe@kernel.dk> + +From d825ef204aa934e1fd9ebd925276ca0e67783508 Mon Sep 17 00:00:00 2001 +From: Paolo Valente <paolo.valente@linaro.org> +Date: Wed, 7 Dec 2016 19:31:37 +0100 +Subject: [PATCH 11/24] Improve documentation + +Signed-off-by: Paolo Valente <paolo.valente@linaro.org> + +Squash me +--- + Documentation/block/bfq-iosched.txt | 26 ++++++++++++++++++++------ + block/Kconfig.iosched | 5 ++++- + 2 files changed, 24 insertions(+), 7 deletions(-) + +diff --git a/Documentation/block/bfq-iosched.txt b/Documentation/block/bfq-iosched.txt +index 5ba67af..13b5248 100644 +--- a/Documentation/block/bfq-iosched.txt ++++ b/Documentation/block/bfq-iosched.txt +@@ -252,9 +252,14 @@ of slice_idle are copied from CFQ too. + per-process ioprio and weight + ----------------------------- + +-Unless the cgroups interface is used, weights can be assigned to +-processes only indirectly, through I/O priorities, and according to +-the relation: weight = (IOPRIO_BE_NR - ioprio) * 10. ++Unless the cgroups interface is used (see "4. BFQ group scheduling"), ++weights can be assigned to processes only indirectly, through I/O ++priorities, and according to the relation: ++weight = (IOPRIO_BE_NR - ioprio) * 10. ++ ++Beware that, if low-latency is set, then BFQ automatically raises the ++weight of the queues associated with interactive and soft real-time ++applications. Unset this tunable if you need/want to control weights. + + slice_idle + ---------- +@@ -369,6 +374,11 @@ default, low latency mode is enabled. If enabled, interactive and soft + real-time applications are privileged and experience a lower latency, + as explained in more detail in the description of how BFQ works. + ++DO NOT enable this mode if you need full control on bandwidth ++distribution. In fact, if it is enabled, then BFQ automatically ++increases the bandwidth share of privileged applications, as the main ++means to guarantee a lower latency to them. ++ + timeout_sync + ------------ + +@@ -449,9 +459,9 @@ may be reactivated for an already busy async queue (in ms). + 4. Group scheduling with BFQ + ============================ + +-BFQ supports both cgroup-v1 and cgroup-v2 io controllers, namely blkio +-and io. In particular, BFQ supports weight-based proportional +-share. ++BFQ supports both cgroups-v1 and cgroups-v2 io controllers, namely ++blkio and io. In particular, BFQ supports weight-based proportional ++share. To activate cgroups support, set BFQ_GROUP_IOSCHED. + + 4-1 Service guarantees provided + ------------------------------- +@@ -501,6 +511,10 @@ linear mapping between ioprio and weights, described at the beginning + of the tunable section, is still valid, but all weights higher than + IOPRIO_BE_NR*10 are mapped to ioprio 0. + ++Recall that, if low-latency is set, then BFQ automatically raises the ++weight of the queues associated with interactive and soft real-time ++applications. Unset this tunable if you need/want to control weights. ++ + + [1] P. Valente, A. Avanzini, "Evolution of the BFQ Storage I/O + Scheduler", Proceedings of the First Workshop on Mobile System +diff --git a/block/Kconfig.iosched b/block/Kconfig.iosched +index 277b112..f2cd945 100644 +--- a/block/Kconfig.iosched ++++ b/block/Kconfig.iosched +@@ -47,13 +47,16 @@ config IOSCHED_BFQ + processes according to their weights, regardless of the + device parameters and with any workload. It also guarantees + a low latency to interactive and soft real-time applications. ++ Details in Documentation/block/bfq-iosched.txt + + config BFQ_GROUP_IOSCHED + bool "BFQ hierarchical scheduling support" + depends on IOSCHED_BFQ && BLK_CGROUP + default n + ---help--- +- Enable hierarchical scheduling in BFQ, using the blkio controller. ++ ++ Enable hierarchical scheduling in BFQ, using the blkio ++ (cgroups-v1) or io (cgroups-v2) controller. + + choice + prompt "Default I/O scheduler" + +From 880441682487350cd5b116c2cea830351f9c9478 Mon Sep 17 00:00:00 2001 +From: Paolo Valente <paolo.valente@linaro.org> +Date: Wed, 7 Dec 2016 19:43:58 +0100 +Subject: [PATCH 12/24] Add and improve pointers in initial comments + +--- + block/bfq-iosched.c | 14 ++++++++++---- + 1 file changed, 10 insertions(+), 4 deletions(-) + +diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c +index 0f3081d..b07d251 100644 +--- a/block/bfq-iosched.c ++++ b/block/bfq-iosched.c +@@ -14,6 +14,12 @@ + * Licensed under the GPL-2 as detailed in the accompanying COPYING.BFQ + * file. + * ++ * BFQ is a proportional-share I/O scheduler, with some extra ++ * low-latency capabilities. BFQ also supports full hierarchical ++ * scheduling through cgroups. Next paragraphs provide an introduction ++ * on BFQ inner workings. Details on BFQ benefits and usage can be ++ * found in Documentation/block/bfq-iosched.txt. ++ * + * BFQ is a proportional-share storage-I/O scheduling algorithm based + * on the slice-by-slice service scheme of CFQ. But BFQ assigns + * budgets, measured in number of sectors, to processes instead of +@@ -43,10 +49,10 @@ + * H-WF2Q+, while the augmented tree used to implement B-WF2Q+ with O(log N) + * complexity derives from the one introduced with EEVDF in [3]. + * +- * [1] P. Valente and M. Andreolini, ``Improving Application Responsiveness +- * with the BFQ Disk I/O Scheduler'', +- * Proceedings of the 5th Annual International Systems and Storage +- * Conference (SYSTOR '12), June 2012. ++ * [1] P. Valente, A. Avanzini, "Evolution of the BFQ Storage I/O ++ * Scheduler", Proceedings of the First Workshop on Mobile System ++ * Technologies (MST-2015), May 2015. ++ * http://algogroup.unimore.it/people/paolo/disk_sched/mst-2015.pdf + * + * http://algogroup.unimo.it/people/paolo/disk_sched/bf1-v1-suite-results.pdf + * + +From 60dad3833c9afa52315508579cbb4bac7e55d3a3 Mon Sep 17 00:00:00 2001 +From: Paolo Valente <paolo.valente@linaro.org> +Date: Fri, 9 Dec 2016 07:05:29 +0100 +Subject: [PATCH 13/24] Fix typo in comments on bfq_lookup_next_entity + +Signed-off-by: Paolo Valente <paolo.valente@linaro.org> +--- + block/bfq-sched.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/block/bfq-sched.c b/block/bfq-sched.c +index 45d63d3..9afeaab 100644 +--- a/block/bfq-sched.c ++++ b/block/bfq-sched.c +@@ -1265,7 +1265,7 @@ __bfq_lookup_next_entity(struct bfq_service_tree *st, bool force) + * NOTE: since we cache the next_in_service entity at each level of the + * hierarchy, the complexity of the lookup can be decreased with + * absolutely no effort just returning the cached next_in_service value; +- * we prefer to do full lookups to test the consistency of * the data ++ * we prefer to do full lookups to test the consistency of the data + * structures. + */ + static struct bfq_entity *bfq_lookup_next_entity(struct bfq_sched_data *sd, + +From 15f446f3bc189df88c64366930b5e237c8d9d5d5 Mon Sep 17 00:00:00 2001 +From: Paolo Valente <paolo.valente@linaro.org> +Date: Fri, 9 Dec 2016 09:46:41 +0100 +Subject: [PATCH 14/24] Fix false-positive warning for uninitialized var + +Signed-off-by: Paolo Valente <paolo.valente@linaro.org> +--- + block/bfq-iosched.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c +index b07d251..3043e5e 100644 +--- a/block/bfq-iosched.c ++++ b/block/bfq-iosched.c +@@ -5044,7 +5044,7 @@ STORE_FUNCTION(bfq_wr_max_softrt_rate_store, &bfqd->bfq_wr_max_softrt_rate, 0, + static ssize_t __FUNC(struct elevator_queue *e, const char *page, size_t count)\ + { \ + struct bfq_data *bfqd = e->elevator_data; \ +- unsigned long __data; \ ++ unsigned long uninitialized_var(__data); \ + int ret = bfq_var_store(&__data, (page), count); \ + if (__data < (MIN)) \ + __data = (MIN); \ + +From 1430feb8c7194f72be0a8066ad6b996d78bf3e09 Mon Sep 17 00:00:00 2001 +From: Paolo Valente <paolo.valente@linaro.org> +Date: Sun, 11 Dec 2016 11:46:13 +0100 +Subject: [PATCH 15/24] Improve and fix some bugs in hierarchical code + +This commit provides the following contributions. + +BUGFIX Removed the check that, when the new queue to set in service +must be selected, the cached next_in_service entities coincide with +the entities chosen by __bfq_lookup_next_entity. This check, +issuing a warning on failure, was wrong, because the cached and the +newly chosen entity could differ in case of a CLASS_IDLE timeout. + +EFFICIENCY IMPROVEMENT (this improvement is related to the above +BUGFIX) The cached next_in_service entities are now really used to +select the next queue to serve when the in-service queue +expires. Before this change, the cached values were used only for +extra (and in general wrong) consistency checks. This caused +additional overhead instead of reducing it. + +EFFICIENCY IMPROVEMENT The next entity to serve, for each level of the +hierarchy, is now updated on every event that may change it, i.e., on +every activation or deactivation of any entity. This finer granularity +is not strictly needed for corectness, because it is only on queue +expirations that BFQ needs to know what are the next entities to +serve. Yet this change makes it possible to implement optimizations in +which it is necessary to know the next queue to serve before the +in-service queue expires. + +SERVICE-ACCURACY IMPROVEMENT The per-device CLASS_IDLE service timeout +has been turned into a much more accurate per-group timeout. + +CODE-QUALITY IMPROVEMENT The non-trivial parts touched by the above +improvements have been partially rewritten, and enriched of comments, +so as to improve their transparency and understandability. + +Signed-off-by: Paolo Valente <paolo.valente@linaro.org> +--- + block/bfq-cgroup.c | 2 +- + block/bfq-iosched.c | 2 +- + block/bfq-sched.c | 497 ++++++++++++++++++++++++++++++++++++---------------- + block/bfq.h | 5 +- + 4 files changed, 348 insertions(+), 158 deletions(-) + +diff --git a/block/bfq-cgroup.c b/block/bfq-cgroup.c +index b50ae8e..a04bc40 100644 +--- a/block/bfq-cgroup.c ++++ b/block/bfq-cgroup.c +@@ -314,7 +314,7 @@ static void bfq_init_entity(struct bfq_entity *entity, + bfqq->ioprio_class = bfqq->new_ioprio_class; + bfqg_get(bfqg); + } +- entity->parent = bfqg->my_entity; ++ entity->parent = bfqg->my_entity; /* NULL for root group */ + entity->sched_data = &bfqg->sched_data; + } + +diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c +index 3043e5e..2d00bd10 100644 +--- a/block/bfq-iosched.c ++++ b/block/bfq-iosched.c +@@ -4763,6 +4763,7 @@ static void bfq_init_root_group(struct bfq_group *root_group, + root_group->rq_pos_tree = RB_ROOT; + for (i = 0; i < BFQ_IOPRIO_CLASSES; i++) + root_group->sched_data.service_tree[i] = BFQ_SERVICE_TREE_INIT; ++ root_group->sched_data.bfq_class_idle_last_service = jiffies; + } + + static int bfq_init_queue(struct request_queue *q, struct elevator_type *e) +@@ -4836,7 +4837,6 @@ static int bfq_init_queue(struct request_queue *q, struct elevator_type *e) + bfqd->bfq_back_max = bfq_back_max; + bfqd->bfq_back_penalty = bfq_back_penalty; + bfqd->bfq_slice_idle = bfq_slice_idle; +- bfqd->bfq_class_idle_last_service = 0; + bfqd->bfq_timeout = bfq_timeout; + + bfqd->bfq_requests_within_timer = 120; +diff --git a/block/bfq-sched.c b/block/bfq-sched.c +index 9afeaab..2f54ac5 100644 +--- a/block/bfq-sched.c ++++ b/block/bfq-sched.c +@@ -15,16 +15,15 @@ + static struct bfq_group *bfqq_group(struct bfq_queue *bfqq); + + #ifdef CONFIG_BFQ_GROUP_IOSCHED +-#define for_each_entity(entity) \ ++/* both next loops stop at one of the child entities of the root group */ ++#define for_each_entity(entity) \ + for (; entity ; entity = entity->parent) + + #define for_each_entity_safe(entity, parent) \ + for (; entity && ({ parent = entity->parent; 1; }); entity = parent) + + +-static struct bfq_entity *bfq_lookup_next_entity(struct bfq_sched_data *sd, +- int extract, +- struct bfq_data *bfqd); ++static struct bfq_entity *bfq_lookup_next_entity(struct bfq_sched_data *sd); + + static void bfq_update_budget(struct bfq_entity *next_in_service) + { +@@ -47,23 +46,18 @@ static void bfq_update_budget(struct bfq_entity *next_in_service) + bfqg_entity->budget = next_in_service->budget; + } + +-static int bfq_update_next_in_service(struct bfq_sched_data *sd) ++/* ++ * Incomplete version that always returns true. It will return also ++ * false in its complete version, in case either next_in_service has ++ * not changed, or next_in_service has changed but in a way that will ++ * not influence upper levels. ++ */ ++static bool bfq_update_next_in_service(struct bfq_sched_data *sd) + { + struct bfq_entity *next_in_service; + struct bfq_queue *bfqq; + +- if (sd->in_service_entity) +- /* will update/requeue at the end of service */ +- return 0; +- +- /* +- * NOTE: this can be improved in many ways, such as returning +- * 1 (and thus propagating upwards the update) only when the +- * budget changes, or caching the bfqq that will be scheduled +- * next from this subtree. By now we worry more about +- * correctness than about performance... +- */ +- next_in_service = bfq_lookup_next_entity(sd, 0, NULL); ++ next_in_service = bfq_lookup_next_entity(sd); + sd->next_in_service = next_in_service; + + if (next_in_service) +@@ -75,6 +69,7 @@ static int bfq_update_next_in_service(struct bfq_sched_data *sd) + if (bfqq) + bfq_log_bfqq(bfqq->bfqd, bfqq, + "update_next_in_service: chosen this queue"); ++#ifdef CONFIG_BFQ_GROUP_IOSCHED + else { + struct bfq_group *bfqg = + container_of(next_in_service, +@@ -83,15 +78,11 @@ static int bfq_update_next_in_service(struct bfq_sched_data *sd) + bfq_log_bfqg((struct bfq_data *)bfqg->bfqd, bfqg, + "update_next_in_service: chosen this entity"); + } ++#endif + exit: +- return 1; ++ return true; + } + +-static void bfq_check_next_in_service(struct bfq_sched_data *sd, +- struct bfq_entity *entity) +-{ +- WARN_ON(sd->next_in_service != entity); +-} + #else + #define for_each_entity(entity) \ + for (; entity ; entity = NULL) +@@ -104,11 +95,6 @@ static int bfq_update_next_in_service(struct bfq_sched_data *sd) + return 0; + } + +-static void bfq_check_next_in_service(struct bfq_sched_data *sd, +- struct bfq_entity *entity) +-{ +-} +- + static void bfq_update_budget(struct bfq_entity *next_in_service) + { + } +@@ -851,23 +837,59 @@ static void __bfq_activate_entity(struct bfq_entity *entity, + BUG_ON(!sd); + BUG_ON(!st); + if (entity == sd->in_service_entity) { +- BUG_ON(entity->tree); + /* +- * If we are requeueing the current entity we have +- * to take care of not charging to it service it has +- * not received. ++ * We are requeueing the current in-service entity, ++ * because of the requeueing of a just-expired ++ * non-idle leaf entity in the path originating from ++ * this entity. In fact, in this case, then all ++ * entities in this path need to be requeued again for ++ * next service. ++ * ++ * Before requeueing, the start time of the entity ++ * must be moved forward to account for the service ++ * that the entity has received while in service. The ++ * finish time will then be updated according to this ++ * new value of the start time, and to the budget of ++ * the entity. + */ + bfq_calc_finish(entity, entity->service); + entity->start = entity->finish; + sd->in_service_entity = NULL; ++ BUG_ON(entity->tree && entity->tree != &st->active); ++ /* ++ * In addition, if the entity had more than one child ++ * when set in service, then was not extracted from ++ * the active tree. This implies that the position of ++ * the entity in the active tree may need to be ++ * changed now, because we have just updated the start ++ * time of the entity, and we will update its finish ++ * time in a moment (the requeueing is then, more ++ * precisely, a repositioning in this case). To ++ * implement this repositioning, we: 1) dequeue the ++ * entity here, 2) update the finish time and ++ * requeue the entity according to the new ++ * timestamps below. ++ */ ++ if (entity->tree) ++ bfq_active_extract(st, entity); + } else if (entity->tree == &st->active) { + /* +- * Requeueing an entity due to a change of some +- * next_in_service entity below it. We reuse the +- * old start time. ++ * The entity is already active, and not in ++ * service. In this case, this function gets called ++ * only if the next_in_service entity below this ++ * entity has changed, and this change has caused the ++ * budget of this entity to change, which, finally ++ * implies that the finish time of this entity must be ++ * updated. Such an update may cause the scheduling, ++ * i.e., the position in the active tree, of this ++ * entity to change. We handle this fact by: 1) ++ * dequeueing the entity here, 2) updating the finish ++ * time and requeueing the entity according to the new ++ * timestamps below. This is the same approach as the ++ * non-extracted-entity sub-case above. + */ + bfq_active_extract(st, entity); +- } else { ++ } else { /* This is a 'true' activation, not a requeueing. */ + unsigned long long min_vstart; + + /* See comments on bfq_fqq_update_budg_for_activation */ +@@ -977,6 +999,11 @@ static void __bfq_activate_entity(struct bfq_entity *entity, + entity->start <= st->vtime ? "" : "non ", st); + #endif + } ++ BUG_ON(RB_EMPTY_ROOT(&st->active)); ++ BUG_ON(&st->active != &sd->service_tree->active && ++ &st->active != &(sd->service_tree+1)->active && ++ &st->active != &(sd->service_tree+2)->active); ++ + } + + /** +@@ -996,13 +1023,20 @@ static void bfq_activate_entity(struct bfq_entity *entity, + __bfq_activate_entity(entity, non_blocking_wait_rq); + + sd = entity->sched_data; +- if (!bfq_update_next_in_service(sd)) ++ BUG_ON(RB_EMPTY_ROOT(&sd->service_tree->active) && ++ RB_EMPTY_ROOT(&(sd->service_tree+1)->active) && ++ RB_EMPTY_ROOT(&(sd->service_tree+2)->active)); ++ ++ if (!bfq_update_next_in_service(sd)) { ++ BUG_ON(!sd->next_in_service); + /* + * No need to propagate the activation to the + * upper entities, as they will be updated when + * the in-service entity is rescheduled. + */ + break; ++ } ++ BUG_ON(!sd->next_in_service); + } + } + +@@ -1016,31 +1050,33 @@ static void bfq_activate_entity(struct bfq_entity *entity, + * any scheduler tree, extract it from that tree, and if necessary + * and if the caller did not specify @requeue, put it on the idle tree. + * +- * Return %1 if the caller should update the entity hierarchy, i.e., ++ * Return %true if the caller should update the entity hierarchy, i.e., + * if the entity was in service or if it was the next_in_service for +- * its sched_data; return %0 otherwise. ++ * its sched_data; return %false otherwise. + */ +-static int __bfq_deactivate_entity(struct bfq_entity *entity, int requeue) ++static bool __bfq_deactivate_entity(struct bfq_entity *entity, int requeue) + { + struct bfq_sched_data *sd = entity->sched_data; + struct bfq_service_tree *st; + int was_in_service; +- int ret = 0; ++ bool ret = false; + + if (sd == NULL || !entity->on_st) /* never activated, or inactive */ +- return 0; ++ return false; + + st = bfq_entity_service_tree(entity); + was_in_service = entity == sd->in_service_entity; + +- BUG_ON(was_in_service && entity->tree); ++ BUG_ON(was_in_service && entity->tree && entity->tree != &st->active); + + if (was_in_service) { + bfq_calc_finish(entity, entity->service); + sd->in_service_entity = NULL; +- } else if (entity->tree == &st->active) ++ } ++ ++ if (entity->tree == &st->active) + bfq_active_extract(st, entity); +- else if (entity->tree == &st->idle) ++ else if (!was_in_service && entity->tree == &st->idle) + bfq_idle_extract(st, entity); + else if (entity->tree) + BUG(); +@@ -1124,19 +1160,13 @@ static void bfq_deactivate_entity(struct bfq_entity *entity, int requeue) + } + + /** +- * bfq_update_vtime - update vtime if necessary. ++ * bfq_calc_vtime_jump - compute the value to which the vtime should jump, ++ * if needed, to have at least one entity eligible. + * @st: the service tree to act upon. + * +- * If necessary update the service tree vtime to have at least one +- * eligible entity, skipping to its start time. Assumes that the +- * active tree of the device is not empty. +- * +- * NOTE: this hierarchical implementation updates vtimes quite often, +- * we may end up with reactivated processes getting timestamps after a +- * vtime skip done because we needed a ->first_active entity on some +- * intermediate node. ++ * Assumes that st is not empty. + */ +-static void bfq_update_vtime(struct bfq_service_tree *st) ++static u64 bfq_calc_vtime_jump(struct bfq_service_tree *st) + { + struct bfq_entity *entry; + struct rb_node *node = st->active.rb_node; +@@ -1144,22 +1174,30 @@ static void bfq_update_vtime(struct bfq_service_tree *st) + entry = rb_entry(node, struct bfq_entity, rb_node); + if (bfq_gt(entry->min_start, st->vtime)) { + struct bfq_queue *bfqq = bfq_entity_to_bfqq(entry); +- st->vtime = entry->min_start; + + if (bfqq) + bfq_log_bfqq(bfqq->bfqd, bfqq, +- "update_vtime: new vtime %llu %p", +- ((st->vtime>>10)*1000)>>12, st); ++ "calc_vtime_jump: new value %llu", ++ entry->min_start); + #ifdef CONFIG_BFQ_GROUP_IOSCHED + else { + struct bfq_group *bfqg = + container_of(entry, struct bfq_group, entity); + + bfq_log_bfqg((struct bfq_data *)bfqg->bfqd, bfqg, +- "update_vtime: new vtime %llu %p", +- ((st->vtime>>10)*1000)>>12, st); ++ "calc_vtime_jump: new value %llu", ++ entry->min_start); + } + #endif ++ return entry->min_start; ++ } ++ return st->vtime; ++} ++ ++static void bfq_update_vtime(struct bfq_service_tree *st, u64 new_value) ++{ ++ if (new_value > st->vtime) { ++ st->vtime = new_value; + bfq_forget_idle(st); + } + } +@@ -1168,6 +1206,7 @@ static void bfq_update_vtime(struct bfq_service_tree *st) + * bfq_first_active_entity - find the eligible entity with + * the smallest finish time + * @st: the service tree to select from. ++ * @vtime: the system virtual to use as a reference for eligibility + * + * This function searches the first schedulable entity, starting from the + * root of the tree and going on the left every time on this side there is +@@ -1175,7 +1214,8 @@ static void bfq_update_vtime(struct bfq_service_tree *st) + * the right is followed only if a) the left subtree contains no eligible + * entities and b) no eligible entity has been found yet. + */ +-static struct bfq_entity *bfq_first_active_entity(struct bfq_service_tree *st) ++static struct bfq_entity *bfq_first_active_entity(struct bfq_service_tree *st, ++ u64 vtime) + { + struct bfq_entity *entry, *first = NULL; + struct rb_node *node = st->active.rb_node; +@@ -1183,15 +1223,15 @@ static struct bfq_entity *bfq_first_active_entity(struct bfq_service_tree *st) + while (node) { + entry = rb_entry(node, struct bfq_entity, rb_node); + left: +- if (!bfq_gt(entry->start, st->vtime)) ++ if (!bfq_gt(entry->start, vtime)) + first = entry; + +- BUG_ON(bfq_gt(entry->min_start, st->vtime)); ++ BUG_ON(bfq_gt(entry->min_start, vtime)); + + if (node->rb_left) { + entry = rb_entry(node->rb_left, + struct bfq_entity, rb_node); +- if (!bfq_gt(entry->min_start, st->vtime)) { ++ if (!bfq_gt(entry->min_start, vtime)) { + node = node->rb_left; + goto left; + } +@@ -1209,28 +1249,71 @@ static struct bfq_entity *bfq_first_active_entity(struct bfq_service_tree *st) + * __bfq_lookup_next_entity - return the first eligible entity in @st. + * @st: the service tree. + * +- * Update the virtual time in @st and return the first eligible entity +- * it contains. ++ * If there is no in-service entity for the sched_data st belongs to, ++ * then return the entity that will be set in service if: ++ * 1) the parent entity this st belongs to is set in service; ++ * 2) no entity belonging to such parent entity undergoes a state change ++ * that would influence the timestamps of the entity (e.g., becomes idle, ++ * becomes backlogged, changes its budget, ...). ++ * ++ * In this first case, update the virtual time in @st too (see the ++ * comments on this update inside the function). ++ * ++ * In constrast, if there is an in-service entity, then return the ++ * entity that would be set in service if not only the above ++ * conditions, but also the next one held true: the currently ++ * in-service entity, on expiration, ++ * 1) gets a finish time equal to the current one, or ++ * 2) is not eligible any more, or ++ * 3) is idle. + */ + static struct bfq_entity * +-__bfq_lookup_next_entity(struct bfq_service_tree *st, bool force) ++__bfq_lookup_next_entity(struct bfq_service_tree *st, bool in_service ++#if 0 ++ , bool force ++#endif ++ ) + { +- struct bfq_entity *entity, *new_next_in_service = NULL; ++ struct bfq_entity *entity ++#if 0 ++ , *new_next_in_service = NULL ++#endif ++ ; ++ u64 new_vtime; + struct bfq_queue *bfqq; + + if (RB_EMPTY_ROOT(&st->active)) + return NULL; + +- bfq_update_vtime(st); +- entity = bfq_first_active_entity(st); +- BUG_ON(bfq_gt(entity->start, st->vtime)); ++ /* ++ * Get the value of the system virtual time for which at ++ * least one entity is eligible. ++ */ ++ new_vtime = bfq_calc_vtime_jump(st); ++ ++ /* ++ * If there is no in-service entity for the sched_data this ++ * active tree belongs to, then push the system virtual time ++ * up to the value that guarantees that at least one entity is ++ * eligible. If, instead, there is an in-service entity, then ++ * do not make any such update, because there is already an ++ * eligible entity, namely the in-service one (even if the ++ * entity is not on st, because it was extracted when set in ++ * service). ++ */ ++ if (!in_service) ++ bfq_update_vtime(st, new_vtime); + ++ entity = bfq_first_active_entity(st, new_vtime); ++ BUG_ON(bfq_gt(entity->start, new_vtime)); ++ ++ /* Log some information */ + bfqq = bfq_entity_to_bfqq(entity); + if (bfqq) + bfq_log_bfqq(bfqq->bfqd, bfqq, + "__lookup_next: start %llu vtime %llu st %p", + ((entity->start>>10)*1000)>>12, +- ((st->vtime>>10)*1000)>>12, st); ++ ((new_vtime>>10)*1000)>>12, st); + #ifdef CONFIG_BFQ_GROUP_IOSCHED + else { + struct bfq_group *bfqg = +@@ -1239,20 +1322,11 @@ __bfq_lookup_next_entity(struct bfq_service_tree *st, bool force) + bfq_log_bfqg((struct bfq_data *)bfqg->bfqd, bfqg, + "__lookup_next: start %llu vtime %llu st %p", + ((entity->start>>10)*1000)>>12, +- ((st->vtime>>10)*1000)>>12, st); ++ ((new_vtime>>10)*1000)>>12, st); + } + #endif + +- /* +- * If the chosen entity does not match with the sched_data's +- * next_in_service and we are forcedly serving the IDLE priority +- * class tree, bubble up budget update. +- */ +- if (unlikely(force && entity != entity->sched_data->next_in_service)) { +- new_next_in_service = entity; +- for_each_entity(new_next_in_service) +- bfq_update_budget(new_next_in_service); +- } ++ BUG_ON(!entity); + + return entity; + } +@@ -1260,90 +1334,70 @@ __bfq_lookup_next_entity(struct bfq_service_tree *st, bool force) + /** + * bfq_lookup_next_entity - return the first eligible entity in @sd. + * @sd: the sched_data. +- * @extract: if true the returned entity will be also extracted from @sd. + * +- * NOTE: since we cache the next_in_service entity at each level of the +- * hierarchy, the complexity of the lookup can be decreased with +- * absolutely no effort just returning the cached next_in_service value; +- * we prefer to do full lookups to test the consistency of the data +- * structures. ++ * This function is invoked when there has been a change in the trees ++ * for sd, and we need know what is the new next entity after this ++ * change. + */ +-static struct bfq_entity *bfq_lookup_next_entity(struct bfq_sched_data *sd, +- int extract, +- struct bfq_data *bfqd) ++static struct bfq_entity *bfq_lookup_next_entity(struct bfq_sched_data *sd) + { + struct bfq_service_tree *st = sd->service_tree; +- struct bfq_entity *entity; +- int i = 0; +- +- BUG_ON(sd->in_service_entity); ++ struct bfq_service_tree *idle_class_st = st + (BFQ_IOPRIO_CLASSES - 1); ++ struct bfq_entity *entity = NULL; ++ struct bfq_queue *bfqq; ++ int class_idx = 0; + ++ BUG_ON(!sd); ++ BUG_ON(!st); + /* + * Choose from idle class, if needed to guarantee a minimum +- * bandwidth to this class. This should also mitigate ++ * bandwidth to this class (and if there is some active entity ++ * in idle class). This should also mitigate + * priority-inversion problems in case a low priority task is + * holding file system resources. + */ +- if (bfqd && +- jiffies - bfqd->bfq_class_idle_last_service > +- BFQ_CL_IDLE_TIMEOUT) { +- entity = __bfq_lookup_next_entity(st + BFQ_IOPRIO_CLASSES - 1, +- true); +- if (entity) { +- struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity); ++ if (time_is_before_jiffies(sd->bfq_class_idle_last_service + ++ BFQ_CL_IDLE_TIMEOUT)) { ++ if (!RB_EMPTY_ROOT(&idle_class_st->active)) ++ class_idx = BFQ_IOPRIO_CLASSES - 1; ++ /* About to be served if backlogged, or not yet backlogged */ ++ sd->bfq_class_idle_last_service = jiffies; ++ } + +- if (bfqq) +- bfq_log_bfqq(bfqd, bfqq, +- "idle chosen from st %p %d", +- st + BFQ_IOPRIO_CLASSES - 1, +- BFQ_IOPRIO_CLASSES - 1); +-#ifdef CONFIG_BFQ_GROUP_IOSCHED +- else { +- struct bfq_group *bfqg = +- container_of(entity, struct bfq_group, entity); ++ /* ++ * Find the next entity to serve for the highest-priority ++ * class, unless the idle class needs to be served. ++ */ ++ for (; class_idx < BFQ_IOPRIO_CLASSES; class_idx++) { ++ entity = __bfq_lookup_next_entity(st + class_idx, ++ sd->in_service_entity); + +- bfq_log_bfqg(bfqd, bfqg, +- "idle chosen from st %p %d", +- st + BFQ_IOPRIO_CLASSES - 1, +- BFQ_IOPRIO_CLASSES - 1); +- } +-#endif +- i = BFQ_IOPRIO_CLASSES - 1; +- bfqd->bfq_class_idle_last_service = jiffies; +- sd->next_in_service = entity; +- } ++ if (entity) ++ break; + } +- for (; i < BFQ_IOPRIO_CLASSES; i++) { +- entity = __bfq_lookup_next_entity(st + i, false); +- if (entity) { +- if (bfqd != NULL) { +- struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity); + +- if (bfqq) +- bfq_log_bfqq(bfqd, bfqq, +- "chosen from st %p %d", +- st + i, i); +-#ifdef CONFIG_BFQ_GROUP_IOSCHED +- else { +- struct bfq_group *bfqg = +- container_of(entity, struct bfq_group, entity); ++ BUG_ON(!entity && ++ (!RB_EMPTY_ROOT(&st->active) || !RB_EMPTY_ROOT(&(st+1)->active) || ++ !RB_EMPTY_ROOT(&(st+2)->active))); + +- bfq_log_bfqg(bfqd, bfqg, +- "chosen from st %p %d", +- st + i, i); +- } +-#endif +- } ++ if (!entity) ++ return NULL; + +- if (extract) { +- bfq_check_next_in_service(sd, entity); +- bfq_active_extract(st + i, entity); +- sd->in_service_entity = entity; +- sd->next_in_service = NULL; +- } +- break; +- } ++ /* Log some information */ ++ bfqq = bfq_entity_to_bfqq(entity); ++ if (bfqq) ++ bfq_log_bfqq(bfqq->bfqd, bfqq, "chosen from st %p %d", ++ st + class_idx, class_idx); ++#ifdef CONFIG_BFQ_GROUP_IOSCHED ++ else { ++ struct bfq_group *bfqg = ++ container_of(entity, struct bfq_group, entity); ++ ++ bfq_log_bfqg((struct bfq_data *)bfqg->bfqd, bfqg, ++ "chosen from st %p %d", ++ st + class_idx, class_idx); + } ++#endif + + return entity; + } +@@ -1356,6 +1410,43 @@ static bool next_queue_may_preempt(struct bfq_data *bfqd) + } + + /* ++ * This function tells whether entity stops being a candidate for next ++ * service, according to the following logic. ++ * ++ * This function is invoked for an entity that is about to be set in ++ * service. If such an entity is a queue, then the entity is no longer ++ * a candidate for next service (i.e, a candidate entity to serve ++ * after the in-service entity is expired). The function then returns ++ * true. ++ * ++ * In contrast, the entity could stil be a candidate for next service ++ * if it is not a queue, and has more than one child. In fact, even if ++ * one of its children is about to be set in service, other children ++ * may still be the next to serve. As a consequence, a non-queue ++ * entity is not a candidate for next-service only if it has only one ++ * child. And only if this condition holds, then the function returns ++ * true for a non-queue entity. ++ */ ++static bool bfq_no_longer_next_in_service(struct bfq_entity *entity) ++{ ++ struct bfq_group *bfqg; ++ ++ if (bfq_entity_to_bfqq(entity)) ++ return true; ++ ++#ifdef CONFIG_BFQ_GROUP_IOSCHED ++ bfqg = container_of(entity, struct bfq_group, entity); ++ ++ BUG_ON(bfqg == ((struct bfq_data *)(bfqg->bfqd))->root_group); ++ BUG_ON(bfqg->active_entities == 0); ++ if (bfqg->active_entities == 1) ++ return true; ++#endif ++ ++ return false; ++} ++ ++/* + * Get next queue for service. + */ + static struct bfq_queue *bfq_get_next_queue(struct bfq_data *bfqd) +@@ -1369,6 +1460,11 @@ static struct bfq_queue *bfq_get_next_queue(struct bfq_data *bfqd) + if (bfqd->busy_queues == 0) + return NULL; + ++ /* ++ * Traverse the path from the root to the leaf entity to ++ * serve. Set in service all the entities visited along the ++ * way. ++ */ + sd = &bfqd->root_group->sched_data; + for (; sd ; sd = entity->my_sched_data) { + #ifdef CONFIG_BFQ_GROUP_IOSCHED +@@ -1378,13 +1474,96 @@ static struct bfq_queue *bfq_get_next_queue(struct bfq_data *bfqd) + + bfq_log_bfqg(bfqd, bfqg, + "get_next_queue: lookup in this group"); +- } else ++ if (!sd->next_in_service) ++ pr_crit("get_next_queue: lookup in this group"); ++ } else { + bfq_log_bfqg(bfqd, bfqd->root_group, + "get_next_queue: lookup in root group"); ++ if (!sd->next_in_service) ++ pr_crit("get_next_queue: lookup in root group"); ++ } + #endif + +- entity = bfq_lookup_next_entity(sd, 1, bfqd); ++ BUG_ON(!sd->next_in_service); ++ ++ /* ++ * WARNING. We are about to set the in-service entity ++ * to sd->next_in_service, i.e., to the (cached) value ++ * returned by bfq_lookup_next_entity(sd) the last ++ * time it was invoked, i.e., the last time when the ++ * service order in sd changed as a consequence of the ++ * activation or deactivation of an entity. In this ++ * respect, if we execute bfq_lookup_next_entity(sd) ++ * in this very moment, it may, although with low ++ * probability, yield a different entity than that ++ * pointed to by sd->next_in_service. This rare event ++ * happens in case there was no CLASS_IDLE entity to ++ * serve for sd when bfq_lookup_next_entity(sd) was ++ * invoked for the last time, while there is now one ++ * such entity. ++ * ++ * If the above event happens, then the scheduling of ++ * such entity in CLASS_IDLE is postponed until the ++ * service of the sd->next_in_service entity ++ * finishes. In fact, when the latter is expired, ++ * bfq_lookup_next_entity(sd) gets called again, ++ * exactly to update sd->next_in_service. ++ */ ++ ++ /* Make next_in_service entity become in_service_entity */ ++ entity = sd->next_in_service; ++ sd->in_service_entity = entity; ++ ++ /* ++ * Reset the accumulator of the amount of service that ++ * the entity is about to receive. ++ */ ++ entity->service = 0; ++ ++ /* ++ * If entity is no longer a candidate for next ++ * service, then we extract it from its active tree, ++ * for the following reason. To further boost the ++ * throughput in some special case, BFQ needs to know ++ * which is the next candidate entity to serve, while ++ * there is already an entity in service. In this ++ * respect, to make it easy to compute/update the next ++ * candidate entity to serve after the current ++ * candidate has been set in service, there is a case ++ * where it is necessary to extract the current ++ * candidate from its service tree. Such a case is ++ * when the entity just set in service cannot be also ++ * a candidate for next service. Details about when ++ * this conditions holds are reported in the comments ++ * on the function bfq_no_longer_next_in_service() ++ * invoked below. ++ */ ++ if (bfq_no_longer_next_in_service(entity)) ++ bfq_active_extract(bfq_entity_service_tree(entity), ++ entity); ++ ++ /* ++ * For the same reason why we may have just extracted ++ * entity from its active tree, we may need to update ++ * next_in_service for the sched_data of entity too, ++ * regardless of whether entity has been extracted. ++ * In fact, even if entity has not been extracted, a ++ * descendant entity may get extracted. Such an event ++ * would cause a change in next_in_service for the ++ * level of the descendant entity, and thus possibly ++ * back to upper levels. ++ * ++ * We cannot perform the resulting needed update ++ * before the end of this loop, because, to know which ++ * is the correct next-to-serve candidate entity for ++ * each level, we need first to find the leaf entity ++ * to set in service. In fact, only after we know ++ * which is the next-to-serve leaf entity, we can ++ * discover whether the parent entity of the leaf ++ * entity becomes the next-to-serve, and so on. ++ */ + ++ /* Log some information */ + bfqq = bfq_entity_to_bfqq(entity); + if (bfqq) + bfq_log_bfqq(bfqd, bfqq, +@@ -1401,13 +1580,23 @@ static struct bfq_queue *bfq_get_next_queue(struct bfq_data *bfqd) + } + #endif + +- BUG_ON(!entity); +- entity->service = 0; + } + ++ BUG_ON(!entity); + bfqq = bfq_entity_to_bfqq(entity); + BUG_ON(!bfqq); + ++ /* ++ * We can finally update all next-to-serve entities along the ++ * path from the leaf entity just set in service to the root. ++ */ ++ for_each_entity(entity) { ++ struct bfq_sched_data *sd = entity->sched_data; ++ ++ if(!bfq_update_next_in_service(sd)) ++ break; ++ } ++ + return bfqq; + } + +diff --git a/block/bfq.h b/block/bfq.h +index b80abe0..6b28030 100644 +--- a/block/bfq.h ++++ b/block/bfq.h +@@ -88,6 +88,9 @@ struct bfq_sched_data { + struct bfq_entity *next_in_service; + /* array of service trees, one per ioprio_class */ + struct bfq_service_tree service_tree[BFQ_IOPRIO_CLASSES]; ++ /* last time CLASS_IDLE was served */ ++ unsigned long bfq_class_idle_last_service; ++ + }; + + /** +@@ -487,8 +490,6 @@ struct bfq_data { + unsigned int bfq_back_max; + /* maximum idling time */ + u32 bfq_slice_idle; +- /* last time CLASS_IDLE was served */ +- u64 bfq_class_idle_last_service; + + /* user-configured max budget value (0 for auto-tuning) */ + int bfq_user_max_budget; + +From b5734696e18577dda6ef6571af7cb3718937d2f5 Mon Sep 17 00:00:00 2001 +From: Paolo Valente <paolo.valente@linaro.org> +Date: Wed, 14 Dec 2016 17:56:25 +0100 +Subject: [PATCH 16/24] BFQ v8r6-rc1 + +Signed-off-by: Paolo Valente <paolo.valente@linaro.org> +--- + block/bfq-iosched.c | 2 +- + block/bfq.h | 2 +- + 2 files changed, 2 insertions(+), 2 deletions(-) + +diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c +index 2d00bd10..72443bd 100644 +--- a/block/bfq-iosched.c ++++ b/block/bfq-iosched.c +@@ -5218,7 +5218,7 @@ static struct blkcg_policy blkcg_policy_bfq = { + static int __init bfq_init(void) + { + int ret; +- char msg[50] = "BFQ I/O-scheduler: v8r5"; ++ char msg[60] = "BFQ I/O-scheduler: v8r6-rc1"; + + #ifdef CONFIG_BFQ_GROUP_IOSCHED + ret = blkcg_policy_register(&blkcg_policy_bfq); +diff --git a/block/bfq.h b/block/bfq.h +index 6b28030..d4d784f 100644 +--- a/block/bfq.h ++++ b/block/bfq.h +@@ -1,5 +1,5 @@ + /* +- * BFQ-v8r5 for 4.8.0: data structures and common functions prototypes. ++ * BFQ v8r6-rc1 for 4.9.0: data structures and common functions prototypes. + * + * Based on ideas and code from CFQ: + * Copyright (C) 2003 Jens Axboe <axboe@kernel.dk> + +From a7c98629904dcc103d676c2b8e2ceaff4ca19ca2 Mon Sep 17 00:00:00 2001 +From: Paolo Valente <paolo.valente@linaro.org> +Date: Wed, 14 Dec 2016 18:07:41 +0100 +Subject: [PATCH 17/24] Port and improve CFQ commit 41647e7a + +BFQ currently used the same logic for detecting seeky queues for +for rotational disks and SSDs. This logic is appropriate for the +former, as it takes into account only inter-request distance, and +this is the dominant latency factor on a rotational device. Yet +things change with flash-based devices, where servign a large +requests still yields a high throughput, even the request is far +from the previous request served. This commits extends seeky +detection to take into accoutn also this fact with flash-based +devices. In particular, this commit is an improved port of the +original commit 41647e7a for CFQ. + +Signed-off-by: Paolo Valente <paolo.valente@linaro.org> +--- + block/bfq-iosched.c | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c +index 72443bd..153292f 100644 +--- a/block/bfq-iosched.c ++++ b/block/bfq-iosched.c +@@ -117,6 +117,7 @@ struct kmem_cache *bfq_pool; + #define BFQ_HW_QUEUE_SAMPLES 32 + + #define BFQQ_SEEK_THR (sector_t)(8 * 100) ++#define BFQQ_SECT_THR_NONROT (sector_t)(2 * 32) + #define BFQQ_CLOSE_THR (sector_t)(8 * 1024) + #define BFQQ_SEEKY(bfqq) (hweight32(bfqq->seek_history) > 32/8) + +@@ -4117,7 +4118,9 @@ bfq_update_io_seektime(struct bfq_data *bfqd, struct bfq_queue *bfqq, + { + bfqq->seek_history <<= 1; + bfqq->seek_history |= +- get_sdist(bfqq->last_request_pos, rq) > BFQQ_SEEK_THR; ++ get_sdist(bfqq->last_request_pos, rq) > BFQQ_SEEK_THR && ++ (!blk_queue_nonrot(bfqd->queue) || ++ blk_rq_sectors(rq) < BFQQ_SECT_THR_NONROT); + } + + /* + +From 5cacb798d523b9fa7e394ca3a369a0c196e0b022 Mon Sep 17 00:00:00 2001 +From: Paolo Valente <paolo.valente@linaro.org> +Date: Thu, 15 Dec 2016 15:57:22 +0100 +Subject: [PATCH 18/24] Provide more details in switching-off-wr message + +Signed-off-by: Paolo Valente <paolo.valente@linaro.org> +--- + block/bfq-iosched.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c +index 153292f..a5853c7 100644 +--- a/block/bfq-iosched.c ++++ b/block/bfq-iosched.c +@@ -656,7 +656,9 @@ bfq_bfqq_resume_state(struct bfq_queue *bfqq, struct bfq_io_cq *bic) + time_is_before_jiffies(bfqq->last_wr_start_finish + + bfqq->wr_cur_max_time))) { + bfq_log_bfqq(bfqq->bfqd, bfqq, +- "resume state: switching off wr"); ++ "resume state: switching off wr (%u + %u < %u)", ++ bfqq->last_wr_start_finish, bfqq->wr_cur_max_time, ++ jiffies); + + bfqq->wr_coeff = 1; + } + +From 6f62391c914a25818f839b932a9e1b16ee28c1f1 Mon Sep 17 00:00:00 2001 +From: Paolo Valente <paolo.valente@linaro.org> +Date: Thu, 15 Dec 2016 16:10:17 +0100 +Subject: [PATCH 19/24] Remove useless parameter from bfq_del_bfqq_busy + +Signed-off-by: Paolo Valente <paolo.valente@linaro.org> +--- + block/bfq-iosched.c | 6 +++--- + block/bfq-sched.c | 5 ++--- + 2 files changed, 5 insertions(+), 6 deletions(-) + +diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c +index a5853c7..0d54cc1 100644 +--- a/block/bfq-iosched.c ++++ b/block/bfq-iosched.c +@@ -656,7 +656,7 @@ bfq_bfqq_resume_state(struct bfq_queue *bfqq, struct bfq_io_cq *bic) + time_is_before_jiffies(bfqq->last_wr_start_finish + + bfqq->wr_cur_max_time))) { + bfq_log_bfqq(bfqq->bfqd, bfqq, +- "resume state: switching off wr (%u + %u < %u)", ++ "resume state: switching off wr (%lu + %lu < %lu)", + bfqq->last_wr_start_finish, bfqq->wr_cur_max_time, + jiffies); + +@@ -1525,7 +1525,7 @@ static void bfq_remove_request(struct request *rq) + BUG_ON(bfqq->entity.budget < 0); + + if (bfq_bfqq_busy(bfqq) && bfqq != bfqd->in_service_queue) { +- bfq_del_bfqq_busy(bfqd, bfqq, 1); ++ bfq_del_bfqq_busy(bfqd, bfqq); + + /* bfqq emptied. In normal operation, when + * bfqq is empty, bfqq->entity.service and +@@ -2662,7 +2662,7 @@ static void __bfq_bfqq_expire(struct bfq_data *bfqd, struct bfq_queue *bfqq) + */ + bfqq->budget_timeout = jiffies; + +- bfq_del_bfqq_busy(bfqd, bfqq, 1); ++ bfq_del_bfqq_busy(bfqd, bfqq); + } else { + bfq_activate_bfqq(bfqd, bfqq); + /* +diff --git a/block/bfq-sched.c b/block/bfq-sched.c +index 2f54ac5..fae28ee 100644 +--- a/block/bfq-sched.c ++++ b/block/bfq-sched.c +@@ -1635,8 +1635,7 @@ static void bfqg_stats_update_dequeue(struct bfq_group *bfqg); + * Called when the bfqq no longer has requests pending, remove it from + * the service tree. + */ +-static void bfq_del_bfqq_busy(struct bfq_data *bfqd, struct bfq_queue *bfqq, +- int requeue) ++static void bfq_del_bfqq_busy(struct bfq_data *bfqd, struct bfq_queue *bfqq) + { + BUG_ON(!bfq_bfqq_busy(bfqq)); + BUG_ON(!RB_EMPTY_ROOT(&bfqq->sort_list)); +@@ -1660,7 +1659,7 @@ static void bfq_del_bfqq_busy(struct bfq_data *bfqd, struct bfq_queue *bfqq, + + BUG_ON(bfqq->entity.budget < 0); + +- bfq_deactivate_bfqq(bfqd, bfqq, requeue); ++ bfq_deactivate_bfqq(bfqd, bfqq, 1); + + BUG_ON(bfqq->entity.budget < 0); + } + +From 1469db5fec95e3bfad0fe60dc1a843b0c5de4efa Mon Sep 17 00:00:00 2001 +From: Paolo Valente <paolo.valente@linaro.org> +Date: Thu, 15 Dec 2016 16:40:09 +0100 +Subject: [PATCH 20/24] Further improve code and comments for hierarchical code + +Signed-off-by: Paolo Valente <paolo.valente@linaro.org> +--- + block/bfq-cgroup.c | 6 +++--- + block/bfq-iosched.c | 2 +- + block/bfq-sched.c | 45 ++++++++++++++++++++++++++++++--------------- + 3 files changed, 34 insertions(+), 19 deletions(-) + +diff --git a/block/bfq-cgroup.c b/block/bfq-cgroup.c +index a04bc40..bd31da5 100644 +--- a/block/bfq-cgroup.c ++++ b/block/bfq-cgroup.c +@@ -552,7 +552,7 @@ static void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq, + BUG_ON(RB_EMPTY_ROOT(&bfqq->sort_list) && bfq_bfqq_busy(bfqq)); + + if (bfq_bfqq_busy(bfqq)) +- bfq_deactivate_bfqq(bfqd, bfqq, 0); ++ bfq_deactivate_bfqq(bfqd, bfqq, false); + else if (entity->on_st) { + BUG_ON(&bfq_entity_service_tree(entity)->idle != + entity->tree); +@@ -664,7 +664,7 @@ static void bfq_flush_idle_tree(struct bfq_service_tree *st) + struct bfq_entity *entity = st->first_idle; + + for (; entity ; entity = st->first_idle) +- __bfq_deactivate_entity(entity, 0); ++ __bfq_deactivate_entity(entity, false); + } + + /** +@@ -769,7 +769,7 @@ static void bfq_pd_offline(struct blkg_policy_data *pd) + BUG_ON(bfqg->sched_data.next_in_service); + BUG_ON(bfqg->sched_data.in_service_entity); + +- __bfq_deactivate_entity(entity, 0); ++ __bfq_deactivate_entity(entity, false); + bfq_put_async_queues(bfqd, bfqg); + BUG_ON(entity->tree); + +diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c +index 0d54cc1..7a56603 100644 +--- a/block/bfq-iosched.c ++++ b/block/bfq-iosched.c +@@ -4738,7 +4738,7 @@ static void bfq_exit_queue(struct elevator_queue *e) + + BUG_ON(bfqd->in_service_queue); + list_for_each_entry_safe(bfqq, n, &bfqd->idle_list, bfqq_list) +- bfq_deactivate_bfqq(bfqd, bfqq, 0); ++ bfq_deactivate_bfqq(bfqd, bfqq, false); + + spin_unlock_irq(q->queue_lock); + +diff --git a/block/bfq-sched.c b/block/bfq-sched.c +index fae28ee..52911cc 100644 +--- a/block/bfq-sched.c ++++ b/block/bfq-sched.c +@@ -841,9 +841,9 @@ static void __bfq_activate_entity(struct bfq_entity *entity, + * We are requeueing the current in-service entity, + * because of the requeueing of a just-expired + * non-idle leaf entity in the path originating from +- * this entity. In fact, in this case, then all +- * entities in this path need to be requeued again for +- * next service. ++ * this entity. In fact, in this case, all entities in ++ * this path need to be requeued again for next ++ * service. + * + * Before requeueing, the start time of the entity + * must be moved forward to account for the service +@@ -882,7 +882,7 @@ static void __bfq_activate_entity(struct bfq_entity *entity, + * implies that the finish time of this entity must be + * updated. Such an update may cause the scheduling, + * i.e., the position in the active tree, of this +- * entity to change. We handle this fact by: 1) ++ * entity to change. We handle this change by: 1) + * dequeueing the entity here, 2) updating the finish + * time and requeueing the entity according to the new + * timestamps below. This is the same approach as the +@@ -1054,7 +1054,7 @@ static void bfq_activate_entity(struct bfq_entity *entity, + * if the entity was in service or if it was the next_in_service for + * its sched_data; return %false otherwise. + */ +-static bool __bfq_deactivate_entity(struct bfq_entity *entity, int requeue) ++static bool __bfq_deactivate_entity(struct bfq_entity *entity, bool requeue) + { + struct bfq_sched_data *sd = entity->sched_data; + struct bfq_service_tree *st; +@@ -1118,26 +1118,41 @@ static void bfq_deactivate_entity(struct bfq_entity *entity, int requeue) + if (sd->next_in_service) + /* + * The parent entity is still backlogged, +- * because next_in_service is not NULL, and ++ * because next_in_service is not NULL. So, no ++ * upwards deactivation is needed. Yet, + * next_in_service has been updated (see +- * comment on the body of the above if): +- * upwards update of the schedule is needed. ++ * comment on the body of the above if). Then ++ * the schedule nees to be updated upwards. + */ +- goto update; ++ goto update_schedule; + + /* +- * If we get here, then the parent is no more backlogged and +- * we want to propagate the deactivation upwards. ++ * If we get here, then the parent is no more ++ * backlogged and we need to propagate the ++ * deactivation upwards. Then let the loop go on. + */ +- requeue = 1; ++ ++ /* ++ * Also let parent be queued into the idle tree on ++ * deactivation, to preserve service guarantees, and ++ * assuming that who invoked this function does not ++ * need parent entities too to be removed by any tree. ++ */ ++ requeue = true; + } + + return; + +-update: ++update_schedule: + entity = parent; + for_each_entity(entity) { + struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity); ++ /* ++ * Invoke __bfq_activate_entity on entity, even if ++ * already active, to update its position in the ++ * active tree (because sd->next_in_service has ++ * changed) ++ */ + __bfq_activate_entity(entity, false); + + sd = entity->sched_data; +@@ -1613,7 +1628,7 @@ static void __bfq_bfqd_reset_in_service(struct bfq_data *bfqd) + } + + static void bfq_deactivate_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq, +- int requeue) ++ bool requeue) + { + struct bfq_entity *entity = &bfqq->entity; + +@@ -1659,7 +1674,7 @@ static void bfq_del_bfqq_busy(struct bfq_data *bfqd, struct bfq_queue *bfqq) + + BUG_ON(bfqq->entity.budget < 0); + +- bfq_deactivate_bfqq(bfqd, bfqq, 1); ++ bfq_deactivate_bfqq(bfqd, bfqq, true); + + BUG_ON(bfqq->entity.budget < 0); + } + +From d12d41bee45b988e1ae4f8dcece4bcecb5ad74a9 Mon Sep 17 00:00:00 2001 +From: Paolo Valente <paolo.valente@linaro.org> +Date: Thu, 15 Dec 2016 22:11:39 +0100 +Subject: [PATCH 21/24] Optimize update of next_in_service entity + +If the update of the next_in_service candidate entity is triggered by +the activation of an entity, then it is not necessary to perform full +lookups in the active trees to update next_in_service. In fact, it is +enough to check whether the just-activated entity has a higher +priority of next_in_service, or, even if it has the same priority as +next_in_service, is eligible and has a lower virtual finish time than +next_in_service. If this compound condition holds, then the new entity +becomes the new next_in_service. Otherwise no change is needed. This +commit implements this optimization. + +Signed-off-by: Paolo Valente <paolo.valente@linaro.org> +--- + block/bfq-sched.c | 167 ++++++++++++++++++++++++++++++++++++++++-------------- + block/bfq.h | 11 +++- + 2 files changed, 134 insertions(+), 44 deletions(-) + +diff --git a/block/bfq-sched.c b/block/bfq-sched.c +index 52911cc..09e494b 100644 +--- a/block/bfq-sched.c ++++ b/block/bfq-sched.c +@@ -14,6 +14,18 @@ + + static struct bfq_group *bfqq_group(struct bfq_queue *bfqq); + ++/** ++ * bfq_gt - compare two timestamps. ++ * @a: first ts. ++ * @b: second ts. ++ * ++ * Return @a > @b, dealing with wrapping correctly. ++ */ ++static int bfq_gt(u64 a, u64 b) ++{ ++ return (s64)(a - b) > 0; ++} ++ + #ifdef CONFIG_BFQ_GROUP_IOSCHED + /* both next loops stop at one of the child entities of the root group */ + #define for_each_entity(entity) \ +@@ -46,18 +58,99 @@ static void bfq_update_budget(struct bfq_entity *next_in_service) + bfqg_entity->budget = next_in_service->budget; + } + +-/* +- * Incomplete version that always returns true. It will return also +- * false in its complete version, in case either next_in_service has +- * not changed, or next_in_service has changed but in a way that will +- * not influence upper levels. ++static struct bfq_entity *bfq_root_active_entity(struct rb_root *tree) ++{ ++ struct rb_node *node = tree->rb_node; ++ ++ return rb_entry(node, struct bfq_entity, rb_node); ++} ++ ++/** ++ * bfq_update_next_in_service - update sd->next_in_service ++ * @sd: sched_data for which to perform the update. ++ * @new_entity: if not NULL, pointer to the entity whose activation ++ * triggered the invocation of this function. ++ * ++ * This function is called to update sd->next_in_service, which, in ++ * its turn, may change as a consequence of the insertion or ++ * extraction of an entity into/from one of the active trees of ++ * sd. These insertions/extractions occur as a consequence of ++ * activations/deactivations of entities, with some activations being ++ * 'true' activations, and other activations being requeueings (i.e., ++ * implementing the second, requeueing phase of the mechanism used to ++ * reposition an entity in its active tree; see comments on ++ * __bfq_activate_entity for details). In both the last two activation ++ * sub-cases, new_entity points to the just activated or requeued ++ * entity. ++ * ++ * This is a still incomplete version of this function, which always ++ * returns true. It will return also false in its complete version, in ++ * case either next_in_service has not changed, or next_in_service has ++ * changed but in a way that will not influence upper levels. + */ +-static bool bfq_update_next_in_service(struct bfq_sched_data *sd) ++static bool bfq_update_next_in_service(struct bfq_sched_data *sd, ++ struct bfq_entity *new_entity) + { +- struct bfq_entity *next_in_service; ++ struct bfq_entity *next_in_service = sd->next_in_service; + struct bfq_queue *bfqq; + +- next_in_service = bfq_lookup_next_entity(sd); ++ /* ++ * If this update is triggered by the activation of a new ++ * entity, then a full lookup in the active tree can be ++ * avoided. In fact, it is enough to check whether the ++ * just-activated entity has a higher priority of ++ * sd->next_in_service, or, even if it has the same priority ++ * as sd->next_in_service, is eligible and has a lower virtual ++ * finish time than sd->next_in_service. If this compound ++ * condition holds, then the new entity becomes the new ++ * next_in_service. Otherwise no change is needed. ++ */ ++ if (new_entity) { ++ /* ++ * Flag used to decide whether to replace ++ * sd->next_in_service with new_entity. Tentatively ++ * set to true, and left as true if ++ * sd->next_in_service is NULL. ++ */ ++ bool replace_next = true; ++ ++ if (new_entity == sd->next_in_service) ++ return false; ++ ++ /* ++ * If there is already a next_in_service candidate ++ * entity, then compare class priorities or timestamps ++ * to decide whether to replace sd->service_tree with ++ * new_entity. ++ */ ++ if (next_in_service) { ++ unsigned int new_entity_class_idx = ++ bfq_class_idx(new_entity); ++ struct bfq_service_tree *st = ++ sd->service_tree + new_entity_class_idx; ++ ++ /* ++ * For efficiency, evaluate the most likely ++ * sub-condition first. ++ */ ++ replace_next = ++ (new_entity_class_idx == ++ bfq_class_idx(next_in_service) ++ && ++ !bfq_gt(new_entity->start, st->vtime) ++ && ++ bfq_gt(next_in_service->finish, ++ new_entity->finish)) ++ || ++ new_entity_class_idx < ++ bfq_class_idx(next_in_service); ++ } ++ ++ if (replace_next) ++ next_in_service = new_entity; ++ } else /* invoked because of a deactivation: lookup needed */ ++ next_in_service = bfq_lookup_next_entity(sd); ++ + sd->next_in_service = next_in_service; + + if (next_in_service) +@@ -109,18 +202,6 @@ static void bfq_update_budget(struct bfq_entity *next_in_service) + */ + #define WFQ_SERVICE_SHIFT 22 + +-/** +- * bfq_gt - compare two timestamps. +- * @a: first ts. +- * @b: second ts. +- * +- * Return @a > @b, dealing with wrapping correctly. +- */ +-static int bfq_gt(u64 a, u64 b) +-{ +- return (s64)(a - b) > 0; +-} +- + static struct bfq_queue *bfq_entity_to_bfqq(struct bfq_entity *entity) + { + struct bfq_queue *bfqq = NULL; +@@ -816,15 +897,18 @@ static void bfq_bfqq_charge_time(struct bfq_data *bfqd, struct bfq_queue *bfqq, + } + + /** +- * __bfq_activate_entity - activate an entity. +- * @entity: the entity being activated. +- * @non_blocking_wait_rq: true if this entity was waiting for a request ++ * __bfq_activate_entity - handle activation or requeueing of an entity. ++ * @entity: the entity being activated or requeued. ++ * @non_blocking_wait_rq: true if entity was waiting for a request + * +- * Called whenever an entity is activated, i.e., it is not active and one +- * of its children receives a new request, or has to be reactivated due to +- * budget exhaustion. It uses the current budget of the entity (and the +- * service received if @entity is active) of the queue to calculate its +- * timestamps. ++ * Called whenever an entity is activated, which happens either if the ++ * entity is not active and one of its children receives a new request ++ * (true activation), or if the entity needs to be repositioned in its ++ * active tree (requeueing, as explained in detail in the comments ++ * inside the function). ++ * ++ * Basically, this function updates the timestamps of entity and ++ * (re)inserts entity into its active tree. + */ + static void __bfq_activate_entity(struct bfq_entity *entity, + bool non_blocking_wait_rq) +@@ -1027,7 +1111,7 @@ static void bfq_activate_entity(struct bfq_entity *entity, + RB_EMPTY_ROOT(&(sd->service_tree+1)->active) && + RB_EMPTY_ROOT(&(sd->service_tree+2)->active)); + +- if (!bfq_update_next_in_service(sd)) { ++ if (!bfq_update_next_in_service(sd, entity)) { + BUG_ON(!sd->next_in_service); + /* + * No need to propagate the activation to the +@@ -1081,8 +1165,8 @@ static bool __bfq_deactivate_entity(struct bfq_entity *entity, bool requeue) + else if (entity->tree) + BUG(); + +- if (was_in_service || sd->next_in_service == entity) +- ret = bfq_update_next_in_service(sd); ++ if (sd->next_in_service == entity) ++ ret = bfq_update_next_in_service(sd, NULL); + + if (!requeue || !bfq_gt(entity->finish, st->vtime)) + bfq_forget_entity(st, entity); +@@ -1169,7 +1253,7 @@ static void bfq_deactivate_entity(struct bfq_entity *entity, int requeue) + "invoking udpdate_next for this entity"); + } + #endif +- if (!bfq_update_next_in_service(sd)) ++ if (!bfq_update_next_in_service(sd, entity)) + break; + } + } +@@ -1183,28 +1267,27 @@ static void bfq_deactivate_entity(struct bfq_entity *entity, int requeue) + */ + static u64 bfq_calc_vtime_jump(struct bfq_service_tree *st) + { +- struct bfq_entity *entry; +- struct rb_node *node = st->active.rb_node; ++ struct bfq_entity *root_entity = bfq_root_active_entity(&st->active); + +- entry = rb_entry(node, struct bfq_entity, rb_node); +- if (bfq_gt(entry->min_start, st->vtime)) { +- struct bfq_queue *bfqq = bfq_entity_to_bfqq(entry); ++ if (bfq_gt(root_entity->min_start, st->vtime)) { ++ struct bfq_queue *bfqq = bfq_entity_to_bfqq(root_entity); + + if (bfqq) + bfq_log_bfqq(bfqq->bfqd, bfqq, + "calc_vtime_jump: new value %llu", +- entry->min_start); ++ root_entity->min_start); + #ifdef CONFIG_BFQ_GROUP_IOSCHED + else { + struct bfq_group *bfqg = +- container_of(entry, struct bfq_group, entity); ++ container_of(root_entity, struct bfq_group, ++ entity); + + bfq_log_bfqg((struct bfq_data *)bfqg->bfqd, bfqg, + "calc_vtime_jump: new value %llu", +- entry->min_start); ++ root_entity->min_start); + } + #endif +- return entry->min_start; ++ return root_entity->min_start; + } + return st->vtime; + } +@@ -1608,7 +1691,7 @@ static struct bfq_queue *bfq_get_next_queue(struct bfq_data *bfqd) + for_each_entity(entity) { + struct bfq_sched_data *sd = entity->sched_data; + +- if(!bfq_update_next_in_service(sd)) ++ if(!bfq_update_next_in_service(sd, NULL)) + break; + } + +diff --git a/block/bfq.h b/block/bfq.h +index d4d784f..9c68d8b 100644 +--- a/block/bfq.h ++++ b/block/bfq.h +@@ -806,13 +806,20 @@ struct bfq_group { + + static struct bfq_queue *bfq_entity_to_bfqq(struct bfq_entity *entity); + ++static unsigned int bfq_class_idx(struct bfq_entity *entity) ++{ ++ struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity); ++ ++ return bfqq ? bfqq->ioprio_class - 1 : ++ BFQ_DEFAULT_GRP_CLASS - 1; ++} ++ + static struct bfq_service_tree * + bfq_entity_service_tree(struct bfq_entity *entity) + { + struct bfq_sched_data *sched_data = entity->sched_data; + struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity); +- unsigned int idx = bfqq ? bfqq->ioprio_class - 1 : +- BFQ_DEFAULT_GRP_CLASS - 1; ++ unsigned int idx = bfq_class_idx(entity); + + BUG_ON(idx >= BFQ_IOPRIO_CLASSES); + BUG_ON(sched_data == NULL); + +From 634414378918d47830a12f2c0e0a6c526d227502 Mon Sep 17 00:00:00 2001 +From: Paolo Valente <paolo.valente@linaro.org> +Date: Thu, 15 Dec 2016 22:15:42 +0100 +Subject: [PATCH 22/24] Fix bug causing occasional loss of weight raising + +When a bfq_queue, say bfqq, is split after a merging with another +bfq_queue, BFQ checks whether it has to restore for bfqq the +weight-raising state that bfqq had before being merged. In +particular, the weight-raising is restored only if, according to the +weight-raising duration decided for bfqq when it started to be +weight-raised (before being merged), bfqq would not have already +finished its weight-raising period. + +Yet, by mistake, such a duration is not saved when bfqq is merged. So, +if bfqq is freed and reallocated when it is split, then this duration +is wrongly set to zero on the split. As a consequence, the +weight-raising state of bfqq is wrongly not restored, which causes BFQ +to fail in guaranteeing a low latency to bfqq. + +This commit fixes this bug by saving weight-raising duration when bfqq +is merged, and correctly restoring it when bfqq is split. + +Signed-off-by: Paolo Valente <paolo.valente@linaro.org> +--- + block/bfq-iosched.c | 2 ++ + block/bfq.h | 1 + + 2 files changed, 3 insertions(+) + +diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c +index 7a56603..7aa3d55 100644 +--- a/block/bfq-iosched.c ++++ b/block/bfq-iosched.c +@@ -650,6 +650,7 @@ bfq_bfqq_resume_state(struct bfq_queue *bfqq, struct bfq_io_cq *bic) + bfqq->wr_start_at_switch_to_srt = bic->saved_wr_start_at_switch_to_srt; + BUG_ON(time_is_after_jiffies(bfqq->wr_start_at_switch_to_srt)); + bfqq->last_wr_start_finish = bic->saved_last_wr_start_finish; ++ bfqq->wr_cur_max_time = bic->saved_wr_cur_max_time; + BUG_ON(time_is_after_jiffies(bfqq->last_wr_start_finish)); + + if (bfqq->wr_coeff > 1 && (bfq_bfqq_in_large_burst(bfqq) || +@@ -1988,6 +1989,7 @@ static void bfq_bfqq_save_state(struct bfq_queue *bfqq) + bic->saved_wr_coeff = bfqq->wr_coeff; + bic->saved_wr_start_at_switch_to_srt = bfqq->wr_start_at_switch_to_srt; + bic->saved_last_wr_start_finish = bfqq->last_wr_start_finish; ++ bic->saved_wr_cur_max_time = bfqq->wr_cur_max_time; + BUG_ON(time_is_after_jiffies(bfqq->last_wr_start_finish)); + } + +diff --git a/block/bfq.h b/block/bfq.h +index 9c68d8b..1a03521 100644 +--- a/block/bfq.h ++++ b/block/bfq.h +@@ -366,6 +366,7 @@ struct bfq_io_cq { + unsigned long saved_wr_coeff; + unsigned long saved_last_wr_start_finish; + unsigned long saved_wr_start_at_switch_to_srt; ++ unsigned int saved_wr_cur_max_time; + }; + + enum bfq_device_speed { + +From cb826b5dbc9fcf524cd672857bcbcb68c6aa7236 Mon Sep 17 00:00:00 2001 +From: Paolo Valente <paolo.valente@linaro.org> +Date: Fri, 16 Dec 2016 17:57:00 +0100 +Subject: [PATCH 23/24] Fix wrong reset of in-service entities in commit + 1430feb8 + +In-service entities were reset with an indirect logic, which +happened to be even buggy for some cases. This commit fixes +this bug by replacing this indirect logic with a direct logic, +in which all involved entities are immediately reset, with a +bubble-up loop, when the in-service queue is reset. + +Signed-off-by: Paolo Valente <paolo.valente@linaro.org> +--- + block/bfq-iosched.c | 5 ++--- + block/bfq-sched.c | 45 +++++++++++++++++++++++++++++---------------- + 2 files changed, 31 insertions(+), 19 deletions(-) + +diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c +index 7aa3d55..4d70d5e 100644 +--- a/block/bfq-iosched.c ++++ b/block/bfq-iosched.c +@@ -2643,8 +2643,6 @@ static void __bfq_bfqq_expire(struct bfq_data *bfqd, struct bfq_queue *bfqq) + { + BUG_ON(bfqq != bfqd->in_service_queue); + +- __bfq_bfqd_reset_in_service(bfqd); +- + /* + * If this bfqq is shared between multiple processes, check + * to make sure that those processes are still issuing I/Os +@@ -2672,6 +2670,8 @@ static void __bfq_bfqq_expire(struct bfq_data *bfqd, struct bfq_queue *bfqq) + */ + bfq_pos_tree_add_move(bfqd, bfqq); + } ++ ++ __bfq_bfqd_reset_in_service(bfqd); + } + + /** +@@ -3819,7 +3819,6 @@ static void bfq_put_queue(struct bfq_queue *bfqq) + BUG_ON(bfqq->allocated[READ] + bfqq->allocated[WRITE] != 0); + BUG_ON(bfqq->entity.tree); + BUG_ON(bfq_bfqq_busy(bfqq)); +- BUG_ON(bfqq->bfqd->in_service_queue == bfqq); + + if (bfq_bfqq_sync(bfqq)) + /* +diff --git a/block/bfq-sched.c b/block/bfq-sched.c +index 09e494b..d9e63712 100644 +--- a/block/bfq-sched.c ++++ b/block/bfq-sched.c +@@ -923,22 +923,28 @@ static void __bfq_activate_entity(struct bfq_entity *entity, + if (entity == sd->in_service_entity) { + /* + * We are requeueing the current in-service entity, +- * because of the requeueing of a just-expired +- * non-idle leaf entity in the path originating from +- * this entity. In fact, in this case, all entities in +- * this path need to be requeued again for next +- * service. ++ * which may have to be done for one of the following ++ * reasons: ++ * - entity represents the in-service queue, and the ++ * in-service queue is being requeued after an ++ * expiration; ++ * - entity represents a group, and its budget has ++ * changed because one of its child entities has ++ * just been either activated or requeued for some ++ * reason; the timestamps of the entity need then to ++ * be updated, and the entity needs to be enqueued ++ * or repositioned accordingly. + * +- * Before requeueing, the start time of the entity +- * must be moved forward to account for the service +- * that the entity has received while in service. The ++ * In particular, before requeueing, the start time of ++ * the entity must be moved forward to account for the ++ * service that the entity has received while in ++ * service. This is done by the next instructions. The + * finish time will then be updated according to this + * new value of the start time, and to the budget of + * the entity. + */ + bfq_calc_finish(entity, entity->service); + entity->start = entity->finish; +- sd->in_service_entity = NULL; + BUG_ON(entity->tree && entity->tree != &st->active); + /* + * In addition, if the entity had more than one child +@@ -1001,7 +1007,8 @@ static void __bfq_activate_entity(struct bfq_entity *entity, + st->wsum += entity->weight; + bfq_get_entity(entity); + +- BUG_ON(entity->on_st); ++ BUG_ON(entity->on_st && bfqq); ++ BUG_ON(entity->on_st && !bfqq); + entity->on_st = 1; + } + } +@@ -1153,10 +1160,8 @@ static bool __bfq_deactivate_entity(struct bfq_entity *entity, bool requeue) + + BUG_ON(was_in_service && entity->tree && entity->tree != &st->active); + +- if (was_in_service) { ++ if (was_in_service) + bfq_calc_finish(entity, entity->service); +- sd->in_service_entity = NULL; +- } + + if (entity->tree == &st->active) + bfq_active_extract(st, entity); +@@ -1173,7 +1178,6 @@ static bool __bfq_deactivate_entity(struct bfq_entity *entity, bool requeue) + else + bfq_idle_insert(st, entity); + +- BUG_ON(sd->in_service_entity == entity); + BUG_ON(sd->next_in_service == entity); + + return ret; +@@ -1700,6 +1704,8 @@ static struct bfq_queue *bfq_get_next_queue(struct bfq_data *bfqd) + + static void __bfq_bfqd_reset_in_service(struct bfq_data *bfqd) + { ++ struct bfq_entity *entity = &bfqd->in_service_queue->entity; ++ + if (bfqd->in_service_bic) { + put_io_context(bfqd->in_service_bic->icq.ioc); + bfqd->in_service_bic = NULL; +@@ -1708,6 +1714,10 @@ static void __bfq_bfqd_reset_in_service(struct bfq_data *bfqd) + bfq_clear_bfqq_wait_request(bfqd->in_service_queue); + hrtimer_try_to_cancel(&bfqd->idle_slice_timer); + bfqd->in_service_queue = NULL; ++ ++ /* Reset in_service_entity along the path from entity to the root */ ++ for_each_entity(entity) ++ entity->sched_data->in_service_entity = NULL; + } + + static void bfq_deactivate_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq, +@@ -1715,13 +1725,17 @@ static void bfq_deactivate_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq, + { + struct bfq_entity *entity = &bfqq->entity; + +- BUG_ON(bfqq == bfqd->in_service_queue); + bfq_deactivate_entity(entity, requeue); + } + + static void bfq_activate_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq) + { + struct bfq_entity *entity = &bfqq->entity; ++ struct bfq_service_tree *st = bfq_entity_service_tree(entity); ++ ++ BUG_ON(bfqq != bfqd->in_service_queue && ++ entity->tree != &st->active && entity->tree != &st->idle && ++ entity->on_st); + + bfq_activate_entity(entity, bfq_bfqq_non_blocking_wait_rq(bfqq)); + bfq_clear_bfqq_non_blocking_wait_rq(bfqq); +@@ -1737,7 +1751,6 @@ static void bfq_del_bfqq_busy(struct bfq_data *bfqd, struct bfq_queue *bfqq) + { + BUG_ON(!bfq_bfqq_busy(bfqq)); + BUG_ON(!RB_EMPTY_ROOT(&bfqq->sort_list)); +- BUG_ON(bfqq == bfqd->in_service_queue); + + bfq_log_bfqq(bfqd, bfqq, "del from busy"); + + +From 316c31562f169ccb8752045217aead1688229315 Mon Sep 17 00:00:00 2001 +From: Paolo Valente <paolo.valente@linaro.org> +Date: Fri, 16 Dec 2016 18:01:52 +0100 +Subject: [PATCH 24/24] Add code to redirect trace log to console + +Signed-off-by: Paolo Valente <paolo.valente@linaro.org> +--- + block/bfq.h | 38 ++++++++++++++++++++++++++++++++++++++ + 1 file changed, 38 insertions(+) + +diff --git a/block/bfq.h b/block/bfq.h +index 1a03521..a09804e 100644 +--- a/block/bfq.h ++++ b/block/bfq.h +@@ -648,6 +648,43 @@ BFQ_BFQQ_FNS(softrt_update); + #undef BFQ_BFQQ_FNS + + /* Logging facilities. */ ++#ifdef CONFIG_BFQ_REDIRECT_TO_CONSOLE ++#ifdef CONFIG_BFQ_GROUP_IOSCHED ++static struct bfq_group *bfqq_group(struct bfq_queue *bfqq); ++static struct blkcg_gq *bfqg_to_blkg(struct bfq_group *bfqg); ++ ++#define bfq_log_bfqq(bfqd, bfqq, fmt, args...) do { \ ++ char __pbuf[128]; \ ++ \ ++ assert_spin_locked((bfqd)->queue->queue_lock); \ ++ blkg_path(bfqg_to_blkg(bfqq_group(bfqq)), __pbuf, sizeof(__pbuf)); \ ++ pr_crit("bfq%d%c %s " fmt "\n", \ ++ (bfqq)->pid, \ ++ bfq_bfqq_sync((bfqq)) ? 'S' : 'A', \ ++ __pbuf, ##args); \ ++} while (0) ++ ++#define bfq_log_bfqg(bfqd, bfqg, fmt, args...) do { \ ++ char __pbuf[128]; \ ++ \ ++ blkg_path(bfqg_to_blkg(bfqg), __pbuf, sizeof(__pbuf)); \ ++ pr_crit("%s " fmt "\n", __pbuf, ##args); \ ++} while (0) ++ ++#else /* CONFIG_BFQ_GROUP_IOSCHED */ ++ ++#define bfq_log_bfqq(bfqd, bfqq, fmt, args...) \ ++ pr_crit("bfq%d%c " fmt "\n", (bfqq)->pid, \ ++ bfq_bfqq_sync((bfqq)) ? 'S' : 'A', \ ++ ##args) ++#define bfq_log_bfqg(bfqd, bfqg, fmt, args...) do {} while (0) ++ ++#endif /* CONFIG_BFQ_GROUP_IOSCHED */ ++ ++#define bfq_log(bfqd, fmt, args...) \ ++ pr_crit("bfq " fmt "\n", ##args) ++ ++#else /* CONFIG_BFQ_REDIRECT_TO_CONSOLE */ + #ifdef CONFIG_BFQ_GROUP_IOSCHED + static struct bfq_group *bfqq_group(struct bfq_queue *bfqq); + static struct blkcg_gq *bfqg_to_blkg(struct bfq_group *bfqg); +@@ -682,6 +719,7 @@ static struct blkcg_gq *bfqg_to_blkg(struct bfq_group *bfqg); + + #define bfq_log(bfqd, fmt, args...) \ + blk_add_trace_msg((bfqd)->queue, "bfq " fmt, ##args) ++#endif /* CONFIG_BFQ_REDIRECT_TO_CONSOLE */ + + /* Expiration reasons. */ + enum bfqq_expiration { + diff --git a/0006-BFQ-Fix.patch b/0006-BFQ-Fix.patch deleted file mode 100644 index fa361f2778e5..000000000000 --- a/0006-BFQ-Fix.patch +++ /dev/null @@ -1,24 +0,0 @@ -From 69f18bb587a4805b2b18bb4ba91dced87a8fda06 Mon Sep 17 00:00:00 2001 -From: Paolo Valente <paolo.valente@linaro.org> -Date: Sat, 22 Oct 2016 15:26:33 +0200 -Subject: [PATCH 86/86] BUGFIX: Remove wrong conversion in use of bfq_fifo_expirebug - -Signed-off-by: Paolo Valente <paolo.valente@linaro.org> ---- - block/bfq-iosched.c | 3 +-- - 1 file changed, 1 insertions(+), 2 deletions(-) - -diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c -index eef6ff4..c161ff0 100644 ---- a/block/bfq-iosched.c -+++ b/block/bfq-iosched.c -@@ -4268,8 +4268,7 @@ - - bfq_add_request(rq); - -- rq->fifo_time = ktime_get_ns() + -- jiffies_to_nsecs(bfqd->bfq_fifo_expire[rq_is_sync(rq)]); -+ rq->fifo_time = ktime_get_ns() + bfqd->bfq_fifo_expire[rq_is_sync(rq)]; - list_add_tail(&rq->queuelist, &bfqq->fifo); - - bfq_rq_enqueued(bfqd, bfqq, rq); @@ -52,7 +52,7 @@ pkgbase=linux-bfq # pkgname=('linux-bfq' 'linux-bfq-headers' 'linux-bfq-docs') _srcname=linux-4.8 pkgver=4.8.15 -pkgrel=1 +pkgrel=2 arch=('i686' 'x86_64') url="http://algo.ing.unimo.it" license=('GPL2') @@ -83,8 +83,7 @@ source=("http://www.kernel.org/pub/linux/kernel/v4.x/${_srcname}.tar.xz" 'net_handle_no_dst_on_skb_in_icmp6_send.patch' '0001-x86-fpu-Fix-invalid-FPU-ptrace-state-after-execve.patch' # patches from https://github.com/linusw/linux-bfq/commits/bfq-v8 - '0005-BFQ-Fix.patch' - '0006-BFQ-Fix.patch') + '0005-BFQ-update-to-v8r6.patch') _kernelname=${pkgbase#linux} @@ -112,7 +111,7 @@ prepare() { ### Patch source with BFQ msg "Patching source with BFQ patches" - for p in "${srcdir}"/000{1,2,3,4,5,6}-*BFQ*.patch; do + for p in "${srcdir}"/000{1,2,3,4,5}-*BFQ*.patch; do msg " $p" patch -Np1 -i "$p" done @@ -193,7 +192,9 @@ prepare() { if [ -n "$_localmodcfg" ]; then msg "If you have modprobe-db installed, running it in recall mode now" if [ -e /usr/bin/modprobed-db ]; then - [[ ! -x /usr/bin/sudo ]] && echo "Cannot call modprobe with sudo. Install via pacman -S sudo and configure to work with this user." && exit 1 + [[ -x /usr/bin/sudo ]] || { + echo "Cannot call modprobe with sudo. Install sudo and configure it to work with this user." + exit 1; } sudo /usr/bin/modprobed-db recall fi msg "Running Steven Rostedt's make localmodconfig now" @@ -461,16 +462,15 @@ sha512sums=('a48a065f21e1c7c4de4cf8ca47b8b8d9a70f86b64e7cfa6e01be490f78895745b9c 'dc0649dfe2a5ce8e8879a62df29a4a1959eb1a84e5d896a9cb119d6a85a9bad1b17135371799e0be96532e17c66043d298e4a14b18fad3078a1f425108f888c9' '135afcffee2439e2260a71202658dce9ec5f555de3291b2d49a5cffdd953c6d7851b8a90e961576895555142a618e52220a7e4a46521ca4ba19d37df71887581' '87ae76889ab84ced46c237374c124786551982f8ff7325102af9153aed1ab7be72bec4db1f7516426c5ee34c5ce17221679eb2f129c5cbdeec05c0d3cb7f785d' - '8e8f407262f3f573654e7f0fceec6075fd2da0258a384206d28e5f59698168609e575f3d855adb8bb476b0684da409dab22f7ddc0f00e9c10c67debe4409c30b' + '6046432243b8f0497b43ea2c5fe3fde5135e058f2be25c02773ead93e2ab3b525b6f7c1c4898b9e67ac2bec0959b972829e997e79fabf6ac87e006700dfaf987' 'd9d28e02e964704ea96645a5107f8b65cae5f4fb4f537e224e5e3d087fd296cb770c29ac76e0ce95d173bc420ea87fb8f187d616672a60a0cae618b0ef15b8c8' - '643cad54fc54f4790e1d28dbc60f775f117fa3e25e15aff418b0406cc7a9b49db5d67fc02ff7056a951a2d64d5bbc58306c40fa0d588460664c2921ad613745c' - '94fbb7e67b77bb8911b69cdb23bbb74317e75a4ed786e4c6486bf2927042f9374e932ea879d1b250837e42c628abf7082fd6f895a2257c5d3ad652f6a520306a' + 'dc9ad074791a9edb3414a71480525a1c4a75b257137c754749c251e45ef376e313d3310766fedcabcd392837a8022bbcd97bfcd17c42feae63f71b4c4064b73d' + 'bcc657cd2366b55ba303d4d091ab9958e4f97121e7fdd64013a90be7fb6b19ac28ca2e8973496935475b9a8f8386bcbed8924981a20bb51cf3fc5c6f4f14b22a' 'd6faa67f3ef40052152254ae43fee031365d0b1524aa0718b659eb75afc21a3f79ea8d62d66ea311a800109bed545bc8f79e8752319cd378eef2cbd3a09aba22' '2dc6b0ba8f7dbf19d2446c5c5f1823587de89f4e28e9595937dd51a87755099656f2acec50e3e2546ea633ad1bfd1c722e0c2b91eef1d609103d8abdc0a7cbaf' 'c53bd47527adbd2599a583e05a7d24f930dc4e86b1de017486588205ad6f262a51a4551593bc7a1218c96541ea073ea03b770278d947b1cd0d2801311fcc80e5' '002d5e0ccfa5824c1750912a6400ff722672d6587b74ba66b1127cf1228048f604bba107617cf4f4477884039af4d4d196cf1b74cefe43b0bddc82270f11940d' - '3889679e288d51f6fecc7ed6581ccde34acbf1e4861f5c9ca237a1ad13158502757d3fc457d7b6caf1c8c99c9dba842265004154a71cffb8ec14e1030e49e312' - '3c3f3b6407d9a1a63cd91c2b5c35e6c932afa5bf33f1b2e8a530dbd9eacda8c8f956616c4cc9228657624da58e354ba5714252237d84ff3386fd65cf44f06ddc') + '98612f37013d1b41c76887bc5ed80f70753f6fc859748038dc83f3b53aa89464321951c6c21b364f56b2d4860e594c2091240c70ff70ce616d6b3fdadb817934') validpgpkeys=( 'ABAF11C65A2970B130ABE3C479BE3E4300411886' # Linus Torvalds @@ -1,6 +1,6 @@ # # Automatically generated file; DO NOT EDIT. -# Linux/x86 4.8.10-1 Kernel Configuration +# Linux/x86 4.8.15-1 Kernel Configuration # # CONFIG_64BIT is not set CONFIG_X86_32=y @@ -445,7 +445,6 @@ CONFIG_M686=y # CONFIG_MHASWELL is not set # CONFIG_MBROADWELL is not set # CONFIG_MSKYLAKE is not set -# CONFIG_MNATIVE is not set CONFIG_X86_GENERIC=y CONFIG_X86_INTERNODE_CACHE_SHIFT=6 CONFIG_X86_L1_CACHE_SHIFT=6 diff --git a/config.x86_64 b/config.x86_64 index ba4d0d888574..246520db8b4f 100644 --- a/config.x86_64 +++ b/config.x86_64 @@ -1,6 +1,6 @@ # # Automatically generated file; DO NOT EDIT. -# Linux/x86 4.8.10-1 Kernel Configuration +# Linux/x86 4.8.15-1 Kernel Configuration # CONFIG_64BIT=y CONFIG_X86_64=y @@ -448,7 +448,6 @@ CONFIG_NO_BOOTMEM=y # CONFIG_MBROADWELL is not set # CONFIG_MSKYLAKE is not set CONFIG_GENERIC_CPU=y -# CONFIG_MNATIVE is not set CONFIG_X86_INTERNODE_CACHE_SHIFT=6 CONFIG_X86_L1_CACHE_SHIFT=6 CONFIG_X86_TSC=y |