summarylogtreecommitdiffstats
path: root/0005-BFQ-update-to-v8r12.patch
blob: f4535cbb718bef711a6a4121c5b9f645292b4f55 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
From 493c15c90a12961dc3a61ac37ecf4c8839eb189e Mon Sep 17 00:00:00 2001
From: Paolo Valente <paolo.valente@linaro.org>
Date: Thu, 4 May 2017 17:47:13 +0200
Subject: [PATCH 1/5] block, bfq: update wr_busy_queues if needed on a queue
 split

This commit fixes a bug triggered by a non-trivial sequence of
events. These events are briefly described in the next two
paragraphs. The impatiens, or those who are familiar with queue
merging and splitting, can jump directly to the last paragraph.

On each I/O-request arrival for a shared bfq_queue, i.e., for a
bfq_queue that is the result of the merge of two or more bfq_queues,
BFQ checks whether the shared bfq_queue has become seeky (i.e., if too
many random I/O requests have arrived for the bfq_queue; if the device
is non rotational, then random requests must be also small for the
bfq_queue to be tagged as seeky). If the shared bfq_queue is actually
detected as seeky, then a split occurs: the bfq I/O context of the
process that has issued the request is redirected from the shared
bfq_queue to a new non-shared bfq_queue. As a degenerate case, if the
shared bfq_queue actually happens to be shared only by one process
(because of previous splits), then no new bfq_queue is created: the
state of the shared bfq_queue is just changed from shared to non
shared.

Regardless of whether a brand new non-shared bfq_queue is created, or
the pre-existing shared bfq_queue is just turned into a non-shared
bfq_queue, several parameters of the non-shared bfq_queue are set
(restored) to the original values they had when the bfq_queue
associated with the bfq I/O context of the process (that has just
issued an I/O request) was merged with the shared bfq_queue. One of
these parameters is the weight-raising state.

If, on the split of a shared bfq_queue,
1) a pre-existing shared bfq_queue is turned into a non-shared
bfq_queue;
2) the previously shared bfq_queue happens to be busy;
3) the weight-raising state of the previously shared bfq_queue happens
to change;
the number of weight-raised busy queues changes. The field
wr_busy_queues must then be updated accordingly, but such an update
was missing. This commit adds the missing update.

Signed-off-by: Paolo Valente <paolo.valente@linaro.org>
---
 block/bfq-iosched.c | 47 +++++++++++++++++++++++++++++++++++++++--------
 block/bfq-sched.c   |  9 +++++++--
 2 files changed, 46 insertions(+), 10 deletions(-)

diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
index a56888ea9d94..5e8cbeab45a1 100644
--- a/block/bfq-iosched.c
+++ b/block/bfq-iosched.c
@@ -673,8 +673,12 @@ static unsigned int bfq_wr_duration(struct bfq_data *bfqd)
 }
 
 static void
-bfq_bfqq_resume_state(struct bfq_queue *bfqq, struct bfq_io_cq *bic)
+bfq_bfqq_resume_state(struct bfq_queue *bfqq, struct bfq_data *bfqd,
+		      struct bfq_io_cq *bic, bool bfq_already_existing)
 {
+	unsigned int old_wr_coeff;
+	bool busy = bfq_already_existing && bfq_bfqq_busy(bfqq);
+
 	if (bic->saved_idle_window)
 		bfq_mark_bfqq_idle_window(bfqq);
 	else
@@ -685,6 +689,9 @@ bfq_bfqq_resume_state(struct bfq_queue *bfqq, struct bfq_io_cq *bic)
 	else
 		bfq_clear_bfqq_IO_bound(bfqq);
 
+	if (unlikely(busy))
+		old_wr_coeff = bfqq->wr_coeff;
+
 	bfqq->wr_coeff = bic->saved_wr_coeff;
 	bfqq->wr_start_at_switch_to_srt = bic->saved_wr_start_at_switch_to_srt;
 	BUG_ON(time_is_after_jiffies(bfqq->wr_start_at_switch_to_srt));
@@ -693,8 +700,8 @@ bfq_bfqq_resume_state(struct bfq_queue *bfqq, struct bfq_io_cq *bic)
 	BUG_ON(time_is_after_jiffies(bfqq->last_wr_start_finish));
 
 	if (bfqq->wr_coeff > 1 && (bfq_bfqq_in_large_burst(bfqq) ||
-	    time_is_before_jiffies(bfqq->last_wr_start_finish +
-				   bfqq->wr_cur_max_time))) {
+				   time_is_before_jiffies(bfqq->last_wr_start_finish +
+							  bfqq->wr_cur_max_time))) {
 		bfq_log_bfqq(bfqq->bfqd, bfqq,
 			     "resume state: switching off wr (%lu + %lu < %lu)",
 			     bfqq->last_wr_start_finish, bfqq->wr_cur_max_time,
@@ -702,8 +709,20 @@ bfq_bfqq_resume_state(struct bfq_queue *bfqq, struct bfq_io_cq *bic)
 
 		bfqq->wr_coeff = 1;
 	}
+
 	/* make sure weight will be updated, however we got here */
 	bfqq->entity.prio_changed = 1;
+
+	if (likely(!busy))
+		return;
+
+	if (old_wr_coeff == 1 && bfqq->wr_coeff > 1) {
+		bfqd->wr_busy_queues++;
+		BUG_ON(bfqd->wr_busy_queues > bfqd->busy_queues);
+	} else if (old_wr_coeff > 1 && bfqq->wr_coeff == 1) {
+		bfqd->wr_busy_queues--;
+		BUG_ON(bfqd->wr_busy_queues < 0);
+	}
 }
 
 static int bfqq_process_refs(struct bfq_queue *bfqq)
@@ -1450,6 +1469,7 @@ static void bfq_add_request(struct request *rq)
 			bfqq->wr_cur_max_time = bfq_wr_duration(bfqd);
 
 			bfqd->wr_busy_queues++;
+			BUG_ON(bfqd->wr_busy_queues > bfqd->busy_queues);
 			bfqq->entity.prio_changed = 1;
 			bfq_log_bfqq(bfqd, bfqq,
 				     "non-idle wrais starting, "
@@ -1689,8 +1709,10 @@ static void bfq_bfqq_end_wr(struct bfq_queue *bfqq)
 {
 	BUG_ON(!bfqq);
 
-	if (bfq_bfqq_busy(bfqq))
+	if (bfq_bfqq_busy(bfqq)) {
 		bfqq->bfqd->wr_busy_queues--;
+		BUG_ON(bfqq->bfqd->wr_busy_queues < 0);
+	}
 	bfqq->wr_coeff = 1;
 	bfqq->wr_cur_max_time = 0;
 	bfqq->last_wr_start_finish = jiffies;
@@ -2069,8 +2091,11 @@ bfq_merge_bfqqs(struct bfq_data *bfqd, struct bfq_io_cq *bic,
 		new_bfqq->last_wr_start_finish = bfqq->last_wr_start_finish;
 		new_bfqq->wr_start_at_switch_to_srt =
 			bfqq->wr_start_at_switch_to_srt;
-		if (bfq_bfqq_busy(new_bfqq))
+		if (bfq_bfqq_busy(new_bfqq)) {
 			bfqd->wr_busy_queues++;
+			BUG_ON(bfqd->wr_busy_queues > bfqd->busy_queues);
+		}
+
 		new_bfqq->entity.prio_changed = 1;
 		bfq_log_bfqq(bfqd, new_bfqq,
 			     "wr start after merge with %d, rais_max_time %u",
@@ -2081,8 +2106,11 @@ bfq_merge_bfqqs(struct bfq_data *bfqd, struct bfq_io_cq *bic,
 	if (bfqq->wr_coeff > 1) { /* bfqq has given its wr to new_bfqq */
 		bfqq->wr_coeff = 1;
 		bfqq->entity.prio_changed = 1;
-		if (bfq_bfqq_busy(bfqq))
+		if (bfq_bfqq_busy(bfqq)) {
 			bfqd->wr_busy_queues--;
+			BUG_ON(bfqd->wr_busy_queues < 0);
+		}
+
 	}
 
 	bfq_log_bfqq(bfqd, new_bfqq, "merge_bfqqs: wr_busy %d",
@@ -4553,7 +4581,7 @@ static int bfq_set_request(struct request_queue *q, struct request *rq,
 	const int is_sync = rq_is_sync(rq);
 	struct bfq_queue *bfqq;
 	unsigned long flags;
-	bool split = false;
+	bool bfqq_already_existing = false, split = false;
 
 	spin_lock_irqsave(q->queue_lock, flags);
 	bfq_check_ioprio_change(bic, bio);
@@ -4613,6 +4641,8 @@ static int bfq_set_request(struct request_queue *q, struct request *rq,
 			split = true;
 			if (!bfqq)
 				goto new_queue;
+			else
+				bfqq_already_existing = true;
 		}
 	}
 
@@ -4638,7 +4668,8 @@ static int bfq_set_request(struct request_queue *q, struct request *rq,
 			 * queue, restore the idle window and the possible
 			 * weight raising period.
 			 */
-			bfq_bfqq_resume_state(bfqq, bic);
+			bfq_bfqq_resume_state(bfqq, bfqd, bic,
+					      bfqq_already_existing);
 		}
 	}
 
diff --git a/block/bfq-sched.c b/block/bfq-sched.c
index 1fde0702bfef..36fb773c0832 100644
--- a/block/bfq-sched.c
+++ b/block/bfq-sched.c
@@ -1988,8 +1988,10 @@ static void bfq_del_bfqq_busy(struct bfq_data *bfqd, struct bfq_queue *bfqq,
 		bfq_weights_tree_remove(bfqd, &bfqq->entity,
 					&bfqd->queue_weights_tree);
 
-	if (bfqq->wr_coeff > 1)
+	if (bfqq->wr_coeff > 1) {
 		bfqd->wr_busy_queues--;
+		BUG_ON(bfqd->wr_busy_queues < 0);
+	}
 
 	bfqg_stats_update_dequeue(bfqq_group(bfqq));
 
@@ -2018,6 +2020,9 @@ static void bfq_add_bfqq_busy(struct bfq_data *bfqd, struct bfq_queue *bfqq)
 			bfq_weights_tree_add(bfqd, &bfqq->entity,
 					     &bfqd->queue_weights_tree);
 
-	if (bfqq->wr_coeff > 1)
+	if (bfqq->wr_coeff > 1) {
 		bfqd->wr_busy_queues++;
+		BUG_ON(bfqd->wr_busy_queues > bfqd->busy_queues);
+	}
+
 }

From 69373c8ac58080b60ca461689db6afb200d962e2 Mon Sep 17 00:00:00 2001
From: Paolo Valente <paolo.valente@linaro.org>
Date: Tue, 9 May 2017 12:52:16 +0200
Subject: [PATCH 2/5] block, bfq: stress that low_latency must be off to get
 max throughput

The introduction of the BFQ and Kyber I/O schedulers has triggered a
new wave of I/O benchmarks. Unfortunately, comments and discussions on
these benchmarks confirm that there is still little awareness that it
is very hard to achieve, at the same time, a low latency and a high
throughput. In particular, virtually all benchmarks measure
throughput, or throughput-related figures of merit, but, for BFQ, they
use the scheduler in its default configuration. This configuration is
geared, instead, toward a low latency. This is evidently a sign that
BFQ documentation is still too unclear on this important aspect. This
commit addresses this issue by stressing how BFQ configuration must be
(easily) changed if the only goal is maximum throughput.

Signed-off-by: Paolo Valente <paolo.valente@linaro.org>
---
 Documentation/block/bfq-iosched.txt | 17 ++++++++++++++++-
 block/bfq-iosched.c                 |  5 +++++
 2 files changed, 21 insertions(+), 1 deletion(-)

diff --git a/Documentation/block/bfq-iosched.txt b/Documentation/block/bfq-iosched.txt
index 13b5248eba7e..0539e87962ed 100644
--- a/Documentation/block/bfq-iosched.txt
+++ b/Documentation/block/bfq-iosched.txt
@@ -11,6 +11,13 @@ controllers), BFQ's main features are:
   groups (switching back to time distribution when needed to keep
   throughput high).
 
+In its default configuration, BFQ privileges latency over
+throughput. So, when needed for achieving a lower latency, BFQ builds
+schedules that may lead to a lower throughput. If your main or only
+goal, for a given device, is to achieve the maximum-possible
+throughput at all times, then do switch off all low-latency heuristics
+for that device, by setting low_latency to 0. Full details in Section 3.
+
 On average CPUs, the current version of BFQ can handle devices
 performing at most ~30K IOPS; at most ~50 KIOPS on faster CPUs. As a
 reference, 30-50 KIOPS correspond to very high bandwidths with
@@ -374,11 +381,19 @@ default, low latency mode is enabled. If enabled, interactive and soft
 real-time applications are privileged and experience a lower latency,
 as explained in more detail in the description of how BFQ works.
 
-DO NOT enable this mode if you need full control on bandwidth
+DISABLE this mode if you need full control on bandwidth
 distribution. In fact, if it is enabled, then BFQ automatically
 increases the bandwidth share of privileged applications, as the main
 means to guarantee a lower latency to them.
 
+In addition, as already highlighted at the beginning of this document,
+DISABLE this mode if your only goal is to achieve a high throughput.
+In fact, privileging the I/O of some application over the rest may
+entail a lower throughput. To achieve the highest-possible throughput
+on a non-rotational device, setting slice_idle to 0 may be needed too
+(at the cost of giving up any strong guarantee on fairness and low
+latency).
+
 timeout_sync
 ------------
 
diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
index 5e8cbeab45a1..cdf608e76048 100644
--- a/block/bfq-iosched.c
+++ b/block/bfq-iosched.c
@@ -36,6 +36,11 @@
  * boost the throughput), and yet guarantee a low latency to
  * interactive and soft real-time applications.
  *
+ * NOTE: if the main or only goal, with a given device, is to achieve
+ * the maximum-possible throughput at all times, then do switch off
+ * all low-latency heuristics for that device, by setting low_latency
+ * to 0.
+ *
  * BFQ is described in [1], where also a reference to the initial, more
  * theoretical paper on BFQ can be found. The interested reader can find
  * in the latter paper full details on the main algorithm, as well as

From eb93d3e00badadb619cc8f62c1c04d1bf61e2890 Mon Sep 17 00:00:00 2001
From: Paolo Valente <paolo.valente@linaro.org>
Date: Fri, 12 May 2017 10:19:52 +0200
Subject: [PATCH 3/5] Fix commit "don't dereference bic before null checking
 it"

Signed-off-by: Paolo Valente <paolo.valente@linaro.org>
---
 block/bfq-iosched.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
index cdf608e76048..c98dff9637c0 100644
--- a/block/bfq-iosched.c
+++ b/block/bfq-iosched.c
@@ -4589,7 +4589,6 @@ static int bfq_set_request(struct request_queue *q, struct request *rq,
 	bool bfqq_already_existing = false, split = false;
 
 	spin_lock_irqsave(q->queue_lock, flags);
-	bfq_check_ioprio_change(bic, bio);
 
 	if (!bic)
 		goto queue_fail;

From 8a119d7f3e592372a52d87093b2b4eb90330f055 Mon Sep 17 00:00:00 2001
From: Paolo Valente <paolo.valente@linaro.org>
Date: Wed, 14 Jun 2017 15:35:44 +0200
Subject: [PATCH 4/5] block, bfq: don't change ioprio class for a bfq_queue on
 a service tree

On each deactivation or re-scheduling (after being served) of a
bfq_queue, BFQ invokes the function __bfq_entity_update_weight_prio(),
to perform pending updates of ioprio, weight and ioprio class for the
bfq_queue. BFQ also invokes this function on I/O-request dispatches,
to raise or lower weights more quickly when needed, thereby improving
latency. However, the entity representing the bfq_queue may be on the
active (sub)tree of a service tree when this happens, and, although
with a very low probability, the bfq_queue may happen to also have a
pending change of its ioprio class. If both conditions hold when
__bfq_entity_update_weight_prio() is invoked, then the entity moves to
a sort of hybrid state: the new service tree for the entity, as
returned by bfq_entity_service_tree(), differs from service tree on
which the entity still is. The functions that handle activations and
deactivations of entities do not cope with such a hybrid state (and
would need to become more complex to cope).

This commit addresses this issue by just making
__bfq_entity_update_weight_prio() not perform also a possible pending
change of ioprio class, when invoked on an I/O-request dispatch for a
bfq_queue. Such a change is thus postponed to when
__bfq_entity_update_weight_prio() is invoked on deactivation or
re-scheduling of the bfq_queue.

Reported-by: Marco Piazza <mpiazza@gmail.com>
Reported-by: Laurentiu Nicola <lnicola@dend.ro>
Signed-off-by: Paolo Valente <paolo.valente@linaro.org>
Tested-by: Marco Piazza <mpiazza@gmail.com>
---
 block/bfq-iosched.c | 14 ++++++++++----
 block/bfq-sched.c   | 38 ++++++++++++++++++++++++++++++++++----
 2 files changed, 44 insertions(+), 8 deletions(-)

diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
index c98dff9637c0..c390711ea523 100644
--- a/block/bfq-iosched.c
+++ b/block/bfq-iosched.c
@@ -3689,11 +3689,17 @@ static void bfq_update_wr_data(struct bfq_data *bfqd, struct bfq_queue *bfqq)
 			}
 		}
 	}
-	/* Update weight both if it must be raised and if it must be lowered */
+	/*
+	 * To improve latency (for this or other queues), immediately
+	 * update weight both if it must be raised and if it must be
+	 * lowered. Since, entity may be on some active tree here, and
+	 * might have a pending change of its ioprio class, invoke
+	 * next function with the last parameter unset (see the
+	 * comments on the function).
+	 */
 	if ((entity->weight > entity->orig_weight) != (bfqq->wr_coeff > 1))
-		__bfq_entity_update_weight_prio(
-			bfq_entity_service_tree(entity),
-			entity);
+		__bfq_entity_update_weight_prio(bfq_entity_service_tree(entity),
+						entity, false);
 }
 
 /*
diff --git a/block/bfq-sched.c b/block/bfq-sched.c
index 36fb773c0832..d9a107dadc6d 100644
--- a/block/bfq-sched.c
+++ b/block/bfq-sched.c
@@ -766,9 +766,28 @@ static void bfq_forget_idle(struct bfq_service_tree *st)
 		bfq_put_idle_entity(st, first_idle);
 }
 
+/*
+ * Update weight and priority of entity. If update_class_too is true,
+ * then update the ioprio_class of entity too.
+ *
+ * The reason why the update of ioprio_class is controlled through the
+ * last parameter is as follows. Changing the ioprio class of an
+ * entity implies changing the destination service trees for that
+ * entity. If such a change occurred when the entity is already on one
+ * of the service trees for its previous class, then the state of the
+ * entity would become more complex: none of the new possible service
+ * trees for the entity, according to bfq_entity_service_tree(), would
+ * match any of the possible service trees on which the entity
+ * is. Complex operations involving these trees, such as entity
+ * activations and deactivations, should take into account this
+ * additional complexity.  To avoid this issue, this function is
+ * invoked with update_class_too unset in the points in the code where
+ * entity may happen to be on some tree.
+ */
 static struct bfq_service_tree *
 __bfq_entity_update_weight_prio(struct bfq_service_tree *old_st,
-			 struct bfq_entity *entity)
+				struct bfq_entity *entity,
+				bool update_class_too)
 {
 	struct bfq_service_tree *new_st = old_st;
 
@@ -813,9 +832,15 @@ __bfq_entity_update_weight_prio(struct bfq_service_tree *old_st,
 				  bfq_weight_to_ioprio(entity->orig_weight);
 		}
 
-		if (bfqq)
+		if (bfqq && update_class_too)
 			bfqq->ioprio_class = bfqq->new_ioprio_class;
-		entity->prio_changed = 0;
+
+		/*
+		 * Reset prio_changed only if the ioprio_class change
+		 * is not pending any longer.
+		 */
+		if (!bfqq || bfqq->ioprio_class == bfqq->new_ioprio_class)
+			entity->prio_changed = 0;
 
 		/*
 		 * NOTE: here we may be changing the weight too early,
@@ -964,7 +989,12 @@ static void bfq_update_fin_time_enqueue(struct bfq_entity *entity,
 	struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
 	struct bfq_sched_data *sd = entity->sched_data;
 
-	st = __bfq_entity_update_weight_prio(st, entity);
+	/*
+	 * When this function is invoked, entity is not in any service
+	 * tree, then it is safe to invoke next function with the last
+	 * parameter set (see the comments on the function).
+	 */
+	st = __bfq_entity_update_weight_prio(st, entity, true);
 	bfq_calc_finish(entity, entity->budget);
 
 	/*

From 2024487bd20c823bb5f96fe7ffbccf136628a2f7 Mon Sep 17 00:00:00 2001
From: Paolo Valente <paolo.valente@linaro.org>
Date: Mon, 3 Jul 2017 22:00:29 +0200
Subject: [PATCH 5/5] BFQ-v8r12

Signed-off-by: Paolo Valente <paolo.valente@linaro.org>
---
 block/bfq-iosched.c | 2 +-
 block/bfq.h         | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
index c390711ea523..ea92c7461b89 100644
--- a/block/bfq-iosched.c
+++ b/block/bfq-iosched.c
@@ -5300,7 +5300,7 @@ static struct blkcg_policy blkcg_policy_bfq = {
 static int __init bfq_init(void)
 {
 	int ret;
-	char msg[60] = "BFQ I/O-scheduler: v8r11";
+	char msg[60] = "BFQ I/O-scheduler: v8r12";
 
 #ifdef CONFIG_BFQ_GROUP_IOSCHED
 	ret = blkcg_policy_register(&blkcg_policy_bfq);
diff --git a/block/bfq.h b/block/bfq.h
index 5f08990be66e..fe7816832035 100644
--- a/block/bfq.h
+++ b/block/bfq.h
@@ -1,5 +1,5 @@
 /*
- * BFQ v8r11 for 4.11.0: data structures and common functions prototypes.
+ * BFQ v8r12 for 4.11.0: data structures and common functions prototypes.
  *
  * Based on ideas and code from CFQ:
  * Copyright (C) 2003 Jens Axboe <axboe@kernel.dk>