summarylogtreecommitdiffstats
path: root/0005-BFQ-update-to-v8r7.patch
blob: 57b68a17cc9308be889c75accd0d5ec77cc74727 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593
2594
2595
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609
2610
2611
2612
2613
2614
2615
2616
2617
2618
2619
2620
2621
2622
2623
2624
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670
2671
2672
2673
2674
2675
2676
2677
2678
2679
2680
2681
2682
2683
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715
2716
2717
2718
2719
2720
2721
2722
2723
2724
2725
2726
2727
2728
2729
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750
2751
2752
2753
2754
2755
2756
2757
2758
2759
2760
2761
2762
2763
2764
2765
2766
2767
2768
2769
2770
2771
2772
2773
2774
2775
2776
2777
2778
2779
2780
2781
2782
2783
2784
2785
2786
2787
2788
2789
2790
2791
2792
2793
2794
2795
2796
2797
2798
2799
2800
2801
2802
2803
2804
2805
2806
2807
2808
2809
2810
2811
2812
2813
2814
2815
2816
2817
2818
2819
2820
2821
2822
2823
2824
2825
2826
2827
2828
2829
2830
2831
2832
2833
2834
2835
2836
2837
2838
2839
2840
2841
2842
2843
2844
2845
2846
2847
2848
2849
2850
2851
2852
2853
2854
2855
2856
2857
2858
2859
2860
2861
2862
2863
2864
2865
2866
2867
2868
2869
2870
2871
2872
2873
2874
2875
2876
2877
2878
2879
2880
2881
2882
2883
2884
2885
2886
2887
2888
2889
2890
2891
2892
2893
2894
2895
2896
2897
2898
2899
2900
2901
2902
2903
2904
2905
2906
2907
2908
2909
2910
2911
2912
2913
2914
2915
2916
2917
2918
2919
2920
2921
2922
2923
2924
2925
2926
2927
2928
2929
2930
2931
2932
2933
2934
2935
2936
2937
2938
2939
2940
2941
2942
2943
2944
2945
2946
2947
2948
2949
2950
2951
2952
2953
2954
2955
2956
2957
2958
2959
2960
2961
2962
2963
2964
2965
2966
2967
2968
2969
2970
2971
2972
2973
2974
2975
2976
2977
2978
2979
2980
2981
2982
2983
2984
2985
2986
2987
2988
2989
2990
2991
2992
2993
2994
2995
2996
2997
2998
2999
3000
3001
3002
3003
3004
3005
3006
3007
3008
3009
3010
3011
3012
3013
3014
3015
3016
3017
3018
3019
3020
3021
3022
3023
3024
3025
3026
3027
3028
3029
3030
3031
3032
3033
3034
3035
3036
3037
3038
3039
3040
3041
3042
3043
3044
3045
3046
3047
3048
3049
3050
3051
3052
3053
3054
3055
3056
3057
3058
3059
3060
3061
3062
3063
3064
3065
3066
3067
3068
3069
3070
3071
3072
3073
3074
3075
3076
3077
3078
3079
3080
3081
3082
3083
3084
3085
3086
3087
3088
3089
3090
3091
3092
3093
3094
3095
3096
3097
3098
3099
3100
3101
3102
3103
3104
3105
3106
3107
3108
3109
3110
3111
3112
3113
3114
3115
3116
3117
3118
3119
3120
3121
3122
3123
3124
3125
3126
3127
3128
3129
3130
3131
3132
3133
3134
3135
3136
3137
3138
3139
3140
3141
3142
3143
3144
3145
3146
3147
3148
3149
3150
3151
3152
3153
3154
3155
3156
3157
3158
3159
3160
3161
3162
3163
3164
3165
3166
3167
3168
3169
3170
3171
3172
3173
3174
3175
3176
3177
3178
3179
3180
3181
3182
3183
3184
3185
3186
3187
3188
3189
3190
3191
3192
3193
3194
3195
3196
3197
3198
3199
3200
3201
3202
3203
3204
3205
3206
3207
3208
3209
3210
3211
3212
3213
3214
3215
3216
3217
3218
3219
3220
3221
3222
3223
3224
3225
3226
3227
3228
3229
3230
3231
3232
3233
3234
3235
3236
3237
3238
3239
3240
3241
3242
3243
3244
3245
3246
3247
3248
3249
3250
3251
3252
3253
3254
3255
3256
3257
3258
3259
3260
3261
3262
3263
3264
3265
3266
3267
3268
3269
3270
3271
3272
3273
3274
3275
3276
3277
3278
3279
3280
3281
3282
3283
3284
3285
3286
3287
3288
3289
3290
3291
3292
3293
3294
3295
3296
3297
3298
3299
3300
3301
3302
3303
3304
3305
3306
3307
3308
3309
3310
3311
3312
3313
3314
3315
3316
3317
3318
3319
3320
3321
3322
3323
3324
3325
3326
3327
3328
3329
3330
3331
3332
3333
3334
3335
3336
3337
3338
3339
3340
3341
3342
3343
3344
3345
3346
3347
3348
3349
3350
3351
3352
3353
3354
3355
3356
3357
3358
3359
3360
3361
3362
3363
3364
3365
3366
3367
3368
3369
3370
3371
3372
3373
3374
3375
3376
3377
3378
3379
3380
3381
3382
3383
3384
3385
3386
3387
3388
3389
3390
3391
3392
3393
3394
3395
3396
3397
3398
3399
3400
3401
3402
3403
3404
3405
3406
3407
3408
3409
3410
3411
3412
3413
3414
3415
3416
3417
3418
3419
3420
3421
3422
3423
3424
3425
3426
3427
3428
3429
3430
3431
3432
3433
3434
3435
3436
3437
3438
3439
3440
3441
3442
3443
3444
3445
3446
3447
3448
3449
3450
3451
3452
3453
3454
3455
3456
3457
3458
3459
3460
3461
3462
3463
3464
3465
3466
3467
3468
3469
3470
3471
3472
3473
3474
3475
3476
3477
3478
3479
3480
3481
3482
3483
3484
3485
3486
3487
3488
3489
3490
3491
3492
3493
3494
3495
3496
3497
3498
3499
3500
3501
3502
3503
3504
3505
3506
3507
3508
3509
3510
3511
3512
3513
3514
3515
3516
3517
3518
3519
3520
3521
3522
3523
3524
3525
3526
3527
3528
3529
3530
3531
3532
3533
3534
3535
3536
3537
3538
3539
3540
3541
3542
3543
3544
3545
3546
3547
3548
3549
3550
3551
3552
3553
3554
3555
3556
3557
3558
3559
3560
3561
3562
3563
3564
3565
3566
3567
3568
3569
3570
3571
3572
3573
3574
3575
3576
3577
3578
3579
3580
3581
3582
3583
3584
3585
3586
3587
3588
3589
3590
3591
3592
3593
3594
3595
3596
3597
3598
3599
3600
3601
3602
3603
3604
3605
3606
3607
3608
3609
3610
3611
3612
3613
3614
3615
3616
3617
3618
3619
3620
3621
3622
3623
3624
3625
3626
3627
3628
3629
3630
3631
3632
3633
3634
3635
3636
3637
3638
3639
3640
3641
3642
3643
3644
3645
3646
3647
3648
3649
3650
3651
3652
3653
3654
3655
3656
3657
3658
3659
3660
3661
3662
3663
3664
3665
3666
3667
3668
3669
3670
3671
3672
3673
3674
3675
3676
3677
3678
3679
3680
3681
3682
3683
3684
3685
3686
3687
3688
3689
3690
3691
3692
3693
3694
3695
3696
3697
3698
3699
3700
3701
3702
3703
3704
3705
3706
3707
3708
3709
3710
3711
3712
3713
3714
3715
3716
3717
3718
3719
3720
3721
3722
3723
3724
3725
3726
3727
3728
3729
3730
3731
3732
3733
3734
3735
3736
3737
3738
3739
3740
3741
3742
3743
3744
3745
3746
3747
3748
3749
3750
3751
3752
3753
3754
3755
3756
3757
3758
3759
3760
3761
3762
3763
3764
3765
3766
3767
3768
3769
3770
3771
3772
3773
3774
3775
3776
3777
3778
3779
3780
3781
3782
3783
3784
3785
3786
3787
3788
3789
3790
3791
3792
3793
3794
3795
3796
3797
3798
3799
3800
3801
3802
3803
3804
3805
3806
3807
3808
3809
3810
3811
3812
3813
3814
3815
3816
3817
3818
3819
3820
3821
3822
3823
3824
3825
3826
3827
3828
3829
3830
3831
3832
3833
3834
3835
3836
3837
3838
3839
3840
3841
3842
3843
3844
3845
3846
3847
3848
3849
3850
3851
3852
3853
3854
3855
3856
3857
3858
3859
3860
3861
3862
3863
3864
3865
3866
3867
3868
3869
3870
3871
3872
3873
3874
3875
3876
3877
3878
3879
3880
3881
3882
3883
3884
3885
3886
3887
3888
3889
3890
3891
3892
3893
3894
3895
3896
3897
3898
3899
3900
3901
3902
3903
3904
3905
3906
3907
3908
3909
3910
3911
3912
3913
3914
3915
3916
3917
3918
3919
3920
3921
3922
3923
3924
3925
3926
3927
3928
3929
3930
3931
3932
3933
3934
3935
3936
3937
3938
3939
3940
3941
3942
3943
3944
3945
3946
3947
3948
3949
3950
3951
3952
3953
3954
3955
3956
3957
3958
3959
3960
3961
3962
3963
3964
3965
3966
3967
3968
3969
3970
3971
3972
3973
3974
3975
3976
3977
3978
3979
3980
3981
3982
3983
3984
3985
3986
3987
3988
3989
3990
From 1acff6b391cd626a495e42c9782bf2139b9f330f Mon Sep 17 00:00:00 2001
From: Paolo Valente <paolo.valente@linaro.org>
Date: Wed, 19 Oct 2016 20:11:42 +0200
Subject: [PATCH 01/31] Documentation/block: add bfq-iosched.txt

Documentation of BFQ benefits, inner workings, interface and tunables.

Signed-off-by: Paolo Valente <paolo.valente@linaro.org>
---
 Documentation/block/00-INDEX        |   2 +
 Documentation/block/bfq-iosched.txt | 508 ++++++++++++++++++++++++++++++++++++
 2 files changed, 510 insertions(+)
 create mode 100644 Documentation/block/bfq-iosched.txt

diff --git a/Documentation/block/00-INDEX b/Documentation/block/00-INDEX
index e55103a..8d55b4b 100644
--- a/Documentation/block/00-INDEX
+++ b/Documentation/block/00-INDEX
@@ -1,5 +1,7 @@
 00-INDEX
 	- This file
+bfq-iosched.txt
+	- BFQ IO scheduler and its tunables
 biodoc.txt
 	- Notes on the Generic Block Layer Rewrite in Linux 2.5
 biovecs.txt
diff --git a/Documentation/block/bfq-iosched.txt b/Documentation/block/bfq-iosched.txt
new file mode 100644
index 0000000..67e1269
--- /dev/null
+++ b/Documentation/block/bfq-iosched.txt
@@ -0,0 +1,508 @@
+BFQ (Budget Fair Queueing)
+==========================
+
+BFQ is a proportional-share I/O scheduler, with some extra
+low-latency capabilities. In addition to cgroups support (blkio or io
+controllers), BFQ's main features are:
+- BFQ guarantees a high system and application responsiveness, and a
+  low latency for time-sensitive applications, such as audio or video
+  players;
+- BFQ distributes bandwidth, and not just time, among processes or
+  groups (switching back to time distribution when needed to keep
+  throughput high).
+
+On average CPUs, the current version of BFQ can handle devices
+performing at most ~30K IOPS; at most ~50 KIOPS on faster CPUs. As a
+reference, 30-50 KIOPS correspond to very high bandwidths with
+sequential I/O (e.g., 8-12 GB/s if I/O requests are 256 KB large), and
+to 120-200 MB/s with 4KB random I/O.
+
+The table of contents follow. Impatients can just jump to Section 3.
+
+CONTENTS
+
+1. When may BFQ be useful?
+ 1-1 Personal systems
+ 1-2 Server systems
+2. How does BFQ work?
+3. What are BFQ's tunable?
+4. BFQ group scheduling
+ 4-1 Service guarantees provided
+ 4-2 Interface
+
+1. When may BFQ be useful?
+==========================
+
+BFQ provides the following benefits on personal and server systems.
+
+1-1 Personal systems
+--------------------
+
+Low latency for interactive applications
+
+Regardless of the actual background workload, BFQ guarantees that, for
+interactive tasks, the storage device is virtually as responsive as if
+it was idle. For example, even if one or more of the following
+background workloads are being executed:
+- one or more large files are being read, written or copied,
+- a tree of source files is being compiled,
+- one or more virtual machines are performing I/O,
+- a software update is in progress,
+- indexing daemons are scanning filesystems and updating their
+  databases,
+starting an application or loading a file from within an application
+takes about the same time as if the storage device was idle. As a
+comparison, with CFQ, NOOP or DEADLINE, and in the same conditions,
+applications experience high latencies, or even become unresponsive
+until the background workload terminates (also on SSDs).
+
+Low latency for soft real-time applications
+
+Also soft real-time applications, such as audio and video
+players/streamers, enjoy a low latency and a low drop rate, regardless
+of the background I/O workload. As a consequence, these applications
+do not suffer from almost any glitch due to the background workload.
+
+Higher speed for code-development tasks
+
+If some additional workload happens to be executed in parallel, then
+BFQ executes the I/O-related components of typical code-development
+tasks (compilation, checkout, merge, ...) much more quickly than CFQ,
+NOOP or DEADLINE.
+
+High throughput
+
+On hard disks, BFQ achieves up to 30% higher throughput than CFQ, and
+up to 150% higher throughput than DEADLINE and NOOP, with all the
+sequential workloads considered in our tests. With random workloads,
+and with all the workloads on flash-based devices, BFQ achieves,
+instead, about the same throughput as the other schedulers.
+
+Strong fairness/bandwidth guarantees
+
+BFQ distributes the device throughput, and not just the device time,
+as desired among I/O-bound applications, with any workload and
+regardless of the device parameters. BFQ switches to time-based
+distribution (only) for processes that would otherwise cause a
+throughput loss.
+
+1-2 Server systems
+------------------
+
+Most benefits for server systems follow from the same service
+properties as above. In particular, regardless of whether additional,
+possibly heavy workloads are being served, BFQ guarantees:
+
+. audio and video-streaming with zero or very low jitter and drop
+  rate;
+
+. fast retrieval of WEB pages and embedded objects;
+
+. real-time recording of data in live-dumping applications (e.g.,
+  packet logging);
+
+. responsiveness in local and remote access to a server.
+
+
+2. How does BFQ work?
+=====================
+
+BFQ is a proportional-share I/O scheduler, whose general structure,
+plus a lot of code, are borrowed from CFQ.
+
+- Each process doing I/O on a device is associated with a weight and a
+  (bfq_)queue.
+
+- BFQ grants exclusive access to the device, for a while, to one queue
+  (process) at a time, and implements this service model by
+  associating every queue with a budget, measured in number of
+  sectors.
+
+  - After a queue is granted access to the device, the budget of the
+    queue is decremented, on each request dispatch, by the size of the
+    request.
+
+  - The in-service queue is expired, i.e., its service is suspended,
+    only if one of the following events occurs: 1) the queue finishes
+    its budget, 2) the queue empties, 3) a "budget timeout" fires.
+
+    - The budget timeout prevents processes doing random I/O from
+      holding the device for too long and dramatically reducing
+      throughput.
+
+    - Actually, as in CFQ, a queue associated with a process issuing
+      sync requests may not be expired immediately when it empties. In
+      contrast, BFQ may idle the device for a short time interval,
+      giving the process the chance to go on being served if it issues
+      a new request in time. Device idling typically boosts the
+      throughput on rotational devices, if processes do synchronous
+      and sequential I/O. In addition, under BFQ, device idling is
+      also instrumental in guaranteeing the desired throughput
+      fraction to processes issuing sync requests (see the description
+      of the slice_idle tunable in this document, or [1, 2], for more
+      details).
+
+      - With respect to idling for service guarantees, if several
+	processes are competing for the device at the same time, but
+	all processes (and groups, after the following commit) have
+	the same weight, then BFQ guarantees the expected throughput
+	distribution without ever idling the device. Throughput is
+	thus as high as possible in this common scenario.
+
+  - If low-latency mode is enabled (default configuration), BFQ
+    executes some special heuristics to detect interactive and soft
+    real-time applications (e.g., video or audio players/streamers),
+    and to reduce their latency. The most important action taken to
+    achieve this goal is to give to the queues associated with these
+    applications more than their fair share of the device
+    throughput. For brevity, we call just "weight-raising" the whole
+    sets of actions taken by BFQ to privilege these queues. In
+    particular, BFQ provides a milder form of weight-raising for
+    interactive applications, and a stronger form for soft real-time
+    applications.
+
+  - BFQ automatically deactivates idling for queues born in a burst of
+    queue creations. In fact, these queues are usually associated with
+    the processes of applications and services that benefit mostly
+    from a high throughput. Examples are systemd during boot, or git
+    grep.
+
+  - As CFQ, BFQ merges queues performing interleaved I/O, i.e.,
+    performing random I/O that becomes mostly sequential if
+    merged. Differently from CFQ, BFQ achieves this goal with a more
+    reactive mechanism, called Early Queue Merge (EQM). EQM is so
+    responsive in detecting interleaved I/O (cooperating processes),
+    that it enables BFQ to achieve a high throughput, by queue
+    merging, even for queues for which CFQ needs a different
+    mechanism, preemption, to get a high throughput. As such EQM is a
+    unified mechanism to achieve a high throughput with interleaved
+    I/O.
+
+  - Queues are scheduled according to a variant of WF2Q+, named
+    B-WF2Q+, and implemented using an augmented rb-tree to preserve an
+    O(log N) overall complexity.  See [2] for more details. B-WF2Q+ is
+    also ready for hierarchical scheduling. However, for a cleaner
+    logical breakdown, the code that enables and completes
+    hierarchical support is provided in the next commit, which focuses
+    exactly on this feature.
+
+  - B-WF2Q+ guarantees a tight deviation with respect to an ideal,
+    perfectly fair, and smooth service. In particular, B-WF2Q+
+    guarantees that each queue receives a fraction of the device
+    throughput proportional to its weight, even if the throughput
+    fluctuates, and regardless of: the device parameters, the current
+    workload and the budgets assigned to the queue.
+
+  - The last, budget-independence, property (although probably
+    counterintuitive in the first place) is definitely beneficial, for
+    the following reasons:
+
+    - First, with any proportional-share scheduler, the maximum
+      deviation with respect to an ideal service is proportional to
+      the maximum budget (slice) assigned to queues. As a consequence,
+      BFQ can keep this deviation tight not only because of the
+      accurate service of B-WF2Q+, but also because BFQ *does not*
+      need to assign a larger budget to a queue to let the queue
+      receive a higher fraction of the device throughput.
+
+    - Second, BFQ is free to choose, for every process (queue), the
+      budget that best fits the needs of the process, or best
+      leverages the I/O pattern of the process. In particular, BFQ
+      updates queue budgets with a simple feedback-loop algorithm that
+      allows a high throughput to be achieved, while still providing
+      tight latency guarantees to time-sensitive applications. When
+      the in-service queue expires, this algorithm computes the next
+      budget of the queue so as to:
+
+      - Let large budgets be eventually assigned to the queues
+	associated with I/O-bound applications performing sequential
+	I/O: in fact, the longer these applications are served once
+	got access to the device, the higher the throughput is.
+
+      - Let small budgets be eventually assigned to the queues
+	associated with time-sensitive applications (which typically
+	perform sporadic and short I/O), because, the smaller the
+	budget assigned to a queue waiting for service is, the sooner
+	B-WF2Q+ will serve that queue (Subsec 3.3 in [2]).
+
+- ioprio classes are served in strict priority order, i.e.,
+  lower-priority queues are not served as long as there are
+  higher-priority queues.  Among queues in the same class, the
+  bandwidth is distributed in proportion to the weight of each
+  queue. A very thin extra bandwidth is however guaranteed to
+  the Idle class, to prevent it from starving.
+
+
+3. What are BFQ's tunable?
+==========================
+
+The tunables back_seek-max, back_seek_penalty, fifo_expire_async and
+fifo_expire_sync below are the same as in CFQ. Their description is
+just copied from that for CFQ. Some considerations in the description
+of slice_idle are copied from CFQ too.
+
+per-process ioprio and weight
+-----------------------------
+
+Unless the cgroups interface is used, weights can be assigned to
+processes only indirectly, through I/O priorities, and according to
+the relation: weight = (IOPRIO_BE_NR - ioprio) * 10.
+
+slice_idle
+----------
+
+This parameter specifies how long BFQ should idle for next I/O
+request, when certain sync BFQ queues become empty. By default
+slice_idle is a non-zero value. Idling has a double purpose: boosting
+throughput and making sure that the desired throughput distribution is
+respected (see the description of how BFQ works, and, if needed, the
+papers referred there).
+
+As for throughput, idling can be very helpful on highly seeky media
+like single spindle SATA/SAS disks where we can cut down on overall
+number of seeks and see improved throughput.
+
+Setting slice_idle to 0 will remove all the idling on queues and one
+should see an overall improved throughput on faster storage devices
+like multiple SATA/SAS disks in hardware RAID configuration.
+
+So depending on storage and workload, it might be useful to set
+slice_idle=0.  In general for SATA/SAS disks and software RAID of
+SATA/SAS disks keeping slice_idle enabled should be useful. For any
+configurations where there are multiple spindles behind single LUN
+(Host based hardware RAID controller or for storage arrays), setting
+slice_idle=0 might end up in better throughput and acceptable
+latencies.
+
+Idling is however necessary to have service guarantees enforced in
+case of differentiated weights or differentiated I/O-request lengths.
+To see why, suppose that a given BFQ queue A must get several I/O
+requests served for each request served for another queue B. Idling
+ensures that, if A makes a new I/O request slightly after becoming
+empty, then no request of B is dispatched in the middle, and thus A
+does not lose the possibility to get more than one request dispatched
+before the next request of B is dispatched. Note that idling
+guarantees the desired differentiated treatment of queues only in
+terms of I/O-request dispatches. To guarantee that the actual service
+order then corresponds to the dispatch order, the strict_guarantees
+tunable must be set too.
+
+There is an important flipside for idling: apart from the above cases
+where it is beneficial also for throughput, idling can severely impact
+throughput. One important case is random workload. Because of this
+issue, BFQ tends to avoid idling as much as possible, when it is not
+beneficial also for throughput. As a consequence of this behavior, and
+of further issues described for the strict_guarantees tunable,
+short-term service guarantees may be occasionally violated. And, in
+some cases, these guarantees may be more important than guaranteeing
+maximum throughput. For example, in video playing/streaming, a very
+low drop rate may be more important than maximum throughput. In these
+cases, consider setting the strict_guarantees parameter.
+
+strict_guarantees
+-----------------
+
+If this parameter is set (default: unset), then BFQ
+
+- always performs idling when the in-service queue becomes empty;
+
+- forces the device to serve one I/O request at a time, by dispatching a
+  new request only if there is no outstanding request.
+
+In the presence of differentiated weights or I/O-request sizes, both
+the above conditions are needed to guarantee that every BFQ queue
+receives its allotted share of the bandwidth. The first condition is
+needed for the reasons explained in the description of the slice_idle
+tunable.  The second condition is needed because all modern storage
+devices reorder internally-queued requests, which may trivially break
+the service guarantees enforced by the I/O scheduler.
+
+Setting strict_guarantees may evidently affect throughput.
+
+back_seek_max
+-------------
+
+This specifies, given in Kbytes, the maximum "distance" for backward seeking.
+The distance is the amount of space from the current head location to the
+sectors that are backward in terms of distance.
+
+This parameter allows the scheduler to anticipate requests in the "backward"
+direction and consider them as being the "next" if they are within this
+distance from the current head location.
+
+back_seek_penalty
+-----------------
+
+This parameter is used to compute the cost of backward seeking. If the
+backward distance of request is just 1/back_seek_penalty from a "front"
+request, then the seeking cost of two requests is considered equivalent.
+
+So scheduler will not bias toward one or the other request (otherwise scheduler
+will bias toward front request). Default value of back_seek_penalty is 2.
+
+fifo_expire_async
+-----------------
+
+This parameter is used to set the timeout of asynchronous requests. Default
+value of this is 248ms.
+
+fifo_expire_sync
+----------------
+
+This parameter is used to set the timeout of synchronous requests. Default
+value of this is 124ms. In case to favor synchronous requests over asynchronous
+one, this value should be decreased relative to fifo_expire_async.
+
+low_latency
+-----------
+
+This parameter is used to enable/disable BFQ's low latency mode. By
+default, low latency mode is enabled. If enabled, interactive and soft
+real-time applications are privileged and experience a lower latency,
+as explained in more detail in the description of how BFQ works.
+
+timeout_sync
+------------
+
+Maximum amount of device time that can be given to a task (queue) once
+it has been selected for service. On devices with costly seeks,
+increasing this time usually increases maximum throughput. On the
+opposite end, increasing this time coarsens the granularity of the
+short-term bandwidth and latency guarantees, especially if the
+following parameter is set to zero.
+
+max_budget
+----------
+
+Maximum amount of service, measured in sectors, that can be provided
+to a BFQ queue once it is set in service (of course within the limits
+of the above timeout). According to what said in the description of
+the algoritm, larger values increase the throughput in proportion to
+the percentage of sequential I/O requests issued. The price of larger
+values is that they coarsen the granularity of short-term bandwidth
+and latency guarantees.
+
+The default value is 0, which enables auto-tuning: BFQ sets max_budget
+to the maximum number of sectors that can be served during
+timeout_sync, according to the estimated peak rate.
+
+weights
+-------
+
+Read-only parameter, used to show the weights of the currently active
+BFQ queues.
+
+
+wr_ tunables
+------------
+
+BFQ exports a few parameters to control/tune the behavior of
+low-latency heuristics.
+
+wr_coeff
+
+Factor by which the weight of a weight-raised queue is multiplied. If
+the queue is deemed soft real-time, then the weight is further
+multiplied by an additional, constant factor.
+
+wr_max_time
+
+Maximum duration of a weight-raising period for an interactive task
+(ms). If set to zero (default value), then this value is computed
+automatically, as a function of the peak rate of the device. In any
+case, when the value of this parameter is read, it always reports the
+current duration, regardless of whether it has been set manually or
+computed automatically.
+
+wr_max_softrt_rate
+
+Maximum service rate below which a queue is deemed to be associated
+with a soft real-time application, and is then weight-raised
+accordingly (sectors/sec).
+
+wr_min_idle_time
+
+Minimum idle period after which interactive weight-raising may be
+reactivated for a queue (in ms).
+
+wr_rt_max_time
+
+Maximum weight-raising duration for soft real-time queues (in ms). The
+start time from which this duration is considered is automatically
+moved forward if the queue is detected to be still soft real-time
+before the current soft real-time weight-raising period finishes.
+
+wr_min_inter_arr_async
+
+Minimum period between I/O request arrivals after which weight-raising
+may be reactivated for an already busy async queue (in ms).
+
+
+4. Group scheduling with BFQ
+============================
+
+BFQ supports both cgroup-v1 and cgroup-v2 io controllers, namely blkio
+and io. In particular, BFQ supports weight-based proportional
+share.
+
+4-1 Service guarantees provided
+-------------------------------
+
+With BFQ, proportional share means true proportional share of the
+device bandwidth, according to group weights. For example, a group
+with weight 200 gets twice the bandwidth, and not just twice the time,
+of a group with weight 100.
+
+BFQ supports hierarchies (group trees) of any depth. Bandwidth is
+distributed among groups and processes in the expected way: for each
+group, the children of the group share the whole bandwidth of the
+group in proportion to their weights. In particular, this implies
+that, for each leaf group, every process of the group receives the
+same share of the whole group bandwidth, unless the ioprio of the
+process is modified.
+
+The resource-sharing guarantee for a group may partially or totally
+switch from bandwidth to time, if providing bandwidth guarantees to
+the group lowers the throughput too much. This switch occurs on a
+per-process basis: if a process of a leaf group causes throughput loss
+if served in such a way to receive its share of the bandwidth, then
+BFQ switches back to just time-based proportional share for that
+process.
+
+4-2 Interface
+-------------
+
+To get proportional sharing of bandwidth with BFQ for a given device,
+BFQ must of course be the active scheduler for that device.
+
+Within each group directory, the names of the files associated with
+BFQ-specific cgroup parameters and stats begin with the "bfq."
+prefix. So, with cgroups-v1 or cgroups-v2, the full prefix for
+BFQ-specific files is "blkio.bfq." or "io.bfq." For example, the group
+parameter to set the weight of a group with BFQ is blkio.bfq.weight
+or io.bfq.weight.
+
+Parameters to set
+-----------------
+
+For each group, there is only the following parameter to set.
+
+weight (namely blkio.bfq.weight or io.bfq-weight): the weight of the
+group inside its parent. Available values: 1..10000 (default 100). The
+linear mapping between ioprio and weights, described at the beginning
+of the tunable section, is still valid, but all weights higher than
+IOPRIO_BE_NR*10 are mapped to ioprio 0.
+
+
+[1] P. Valente, A. Avanzini, "Evolution of the BFQ Storage I/O
+    Scheduler", Proceedings of the First Workshop on Mobile System
+    Technologies (MST-2015), May 2015.
+    http://algogroup.unimore.it/people/paolo/disk_sched/mst-2015.pdf
+
+[2] P. Valente and M. Andreolini, "Improving Application
+    Responsiveness with the BFQ Disk I/O Scheduler", Proceedings of
+    the 5th Annual International Systems and Storage Conference
+    (SYSTOR '12), June 2012.
+    Slightly extended version:
+    http://algogroup.unimore.it/people/paolo/disk_sched/bfq-v1-suite-
+							results.pdf

From 11f0d8caa035ff4fe6fe02ebc0f2f14dc5473479 Mon Sep 17 00:00:00 2001
From: Paolo Valente <paolo.valente@linaro.org>
Date: Sat, 22 Oct 2016 15:26:33 +0200
Subject: [PATCH 02/31] BUGFIX: Replace max wrongly used for modulo numbers

Signed-off-by: Paolo Valente <paolo.valente@linaro.org>
---
 block/bfq-iosched.c | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
index eef6ff4..c161ff0 100644
--- a/block/bfq-iosched.c
+++ b/block/bfq-iosched.c
@@ -2179,9 +2179,13 @@ static void __bfq_set_in_service_queue(struct bfq_data *bfqd,
 			 * not only expires, but also remains with no
 			 * request.
 			 */
-			bfqq->last_wr_start_finish += jiffies -
-				max_t(unsigned long, bfqq->last_wr_start_finish,
-				      bfqq->budget_timeout);
+			if (time_after(bfqq->budget_timeout,
+				       bfqq->last_wr_start_finish))
+				bfqq->last_wr_start_finish +=
+					jiffies - bfqq->budget_timeout;
+			else
+				bfqq->last_wr_start_finish = jiffies;
+
 			if (time_is_after_jiffies(bfqq->last_wr_start_finish)) {
 			       pr_crit(
 			       "BFQ WARNING:last %lu budget %lu jiffies %lu",

From eb59d4ac664f64372c440006b0cd27bf137a2025 Mon Sep 17 00:00:00 2001
From: Paolo Valente <paolo.valente@linaro.org>
Date: Mon, 24 Oct 2016 17:08:32 +0200
Subject: [PATCH 03/31] Improve bfq-iosched.txt

Signed-off-by: Paolo Valente <paolo.valente@linaro.org>
---
 Documentation/block/bfq-iosched.txt | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/Documentation/block/bfq-iosched.txt b/Documentation/block/bfq-iosched.txt
index 67e1269..8626b14 100644
--- a/Documentation/block/bfq-iosched.txt
+++ b/Documentation/block/bfq-iosched.txt
@@ -78,13 +78,15 @@ sequential workloads considered in our tests. With random workloads,
 and with all the workloads on flash-based devices, BFQ achieves,
 instead, about the same throughput as the other schedulers.
 
-Strong fairness/bandwidth guarantees
+Strong fairness, bandwidth and delay guarantees
 
 BFQ distributes the device throughput, and not just the device time,
-as desired among I/O-bound applications, with any workload and
-regardless of the device parameters. BFQ switches to time-based
-distribution (only) for processes that would otherwise cause a
-throughput loss.
+among I/O-bound applications in proportion their weights, with any
+workload and regardless of the device parameters. From these bandwidth
+guarantees, it is possible to compute tight per-I/O-request delay
+guarantees by a simple formula. If not configured for strict service
+guarantees, BFQ switches to time-based resource sharing (only) for
+applications that would otherwise cause a throughput loss.
 
 1-2 Server systems
 ------------------

From a53d556f54f33de6e71f909748c6994ff90fa243 Mon Sep 17 00:00:00 2001
From: Paolo Valente <paolo.valente@linaro.org>
Date: Mon, 24 Oct 2016 20:36:53 +0200
Subject: [PATCH 04/31] Improve help message in Kconfig.iosched

Signed-off-by: Paolo Valente <paolo.valente@linaro.org>
---
 block/Kconfig.iosched | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

diff --git a/block/Kconfig.iosched b/block/Kconfig.iosched
index 6d92579..277b112 100644
--- a/block/Kconfig.iosched
+++ b/block/Kconfig.iosched
@@ -43,13 +43,10 @@ config IOSCHED_BFQ
 	tristate "BFQ I/O scheduler"
 	default n
 	---help---
-	  The BFQ I/O scheduler tries to distribute bandwidth among
-	  all processes according to their weights.
-	  It aims at distributing the bandwidth as desired, independently of
-	  the disk parameters and with any workload. It also tries to
-	  guarantee low latency to interactive and soft real-time
-	  applications. If compiled built-in (saying Y here), BFQ can
-	  be configured to support hierarchical scheduling.
+	The BFQ I/O scheduler distributes bandwidth among all
+	processes according to their weights, regardless of the
+	device parameters and with any workload. It also guarantees
+	a low latency to interactive and soft real-time applications.
 
 config BFQ_GROUP_IOSCHED
 	bool "BFQ hierarchical scheduling support"

From 710824a320c015b314abba3bc2982d0f3caa0acc Mon Sep 17 00:00:00 2001
From: Paolo Valente <paolo.valente@linaro.org>
Date: Mon, 24 Oct 2016 20:37:59 +0200
Subject: [PATCH 05/31] Remove stray disk word in first line

SIgned-off-by: Paolo Valente <paolo.valente@linaro.org>
---
 block/bfq-iosched.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
index c161ff0..b5fdd45 100644
--- a/block/bfq-iosched.c
+++ b/block/bfq-iosched.c
@@ -1,5 +1,5 @@
 /*
- * Budget Fair Queueing (BFQ) disk scheduler.
+ * Budget Fair Queueing (BFQ) I/O scheduler.
  *
  * Based on ideas and code from CFQ:
  * Copyright (C) 2003 Jens Axboe <axboe@kernel.dk>

From a2f6ff7877e60f533cdf39e941fc45f7b5aafcee Mon Sep 17 00:00:00 2001
From: Paolo Valente <paolo.valente@linaro.org>
Date: Mon, 24 Oct 2016 20:41:03 +0200
Subject: [PATCH 06/31] BUGFIX: Remove wrong conversion in use of
 bfq_fifo_expire

Signed-off-by: Paolo Valente <paolo.valente@linaro.org>
---
 block/bfq-iosched.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
index b5fdd45..a8c6887 100644
--- a/block/bfq-iosched.c
+++ b/block/bfq-iosched.c
@@ -4268,8 +4268,7 @@ static void bfq_insert_request(struct request_queue *q, struct request *rq)
 
 	bfq_add_request(rq);
 
-	rq->fifo_time = ktime_get_ns() +
-	  jiffies_to_nsecs(bfqd->bfq_fifo_expire[rq_is_sync(rq)]);
+	rq->fifo_time = ktime_get_ns() + bfqd->bfq_fifo_expire[rq_is_sync(rq)];
 	list_add_tail(&rq->queuelist, &bfqq->fifo);
 
 	bfq_rq_enqueued(bfqd, bfqq, rq);

From c91736449d06052469c2c3abaf5d455b85e77417 Mon Sep 17 00:00:00 2001
From: Paolo Valente <paolo.valente@linaro.org>
Date: Tue, 25 Oct 2016 09:31:37 +0200
Subject: [PATCH 07/31] bfq-iosched.txt: add description of preemption

Signed-off-by: Paolo Valente <paolo.valente@linaro.org>
---
 Documentation/block/bfq-iosched.txt | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/Documentation/block/bfq-iosched.txt b/Documentation/block/bfq-iosched.txt
index 8626b14..8979408 100644
--- a/Documentation/block/bfq-iosched.txt
+++ b/Documentation/block/bfq-iosched.txt
@@ -227,6 +227,12 @@ plus a lot of code, are borrowed from CFQ.
 	budget assigned to a queue waiting for service is, the sooner
 	B-WF2Q+ will serve that queue (Subsec 3.3 in [2]).
 
+- If several processes are competing for the device at the same time,
+  but all processes and groups have the same weight, then BFQ
+  guarantees the expected throughput distribution without ever idling
+  the device. It uses preemption instead. Throughput is then much
+  higher in this common scenario.
+
 - ioprio classes are served in strict priority order, i.e.,
   lower-priority queues are not served as long as there are
   higher-priority queues.  Among queues in the same class, the

From 2dc9393e7ff65d0e5340118402134cff03276b1c Mon Sep 17 00:00:00 2001
From: Paolo Valente <paolo.valente@linaro.org>
Date: Sat, 5 Nov 2016 18:09:07 +0100
Subject: [PATCH 08/31] Add parentheses to complex macros

Signed-off-by: Paolo Valente <paolo.valente@linaro.org>
---
 block/bfq-iosched.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
index a8c6887..88be34e 100644
--- a/block/bfq-iosched.c
+++ b/block/bfq-iosched.c
@@ -77,19 +77,19 @@
 static const u64 bfq_fifo_expire[2] = { NSEC_PER_SEC / 4, NSEC_PER_SEC / 8 };
 
 /* Maximum backwards seek, in KiB. */
-static const int bfq_back_max = 16 * 1024;
+static const int bfq_back_max = (16 * 1024);
 
 /* Penalty of a backwards seek, in number of sectors. */
 static const int bfq_back_penalty = 2;
 
 /* Idling period duration, in ns. */
-static u32 bfq_slice_idle = NSEC_PER_SEC / 125;
+static u32 bfq_slice_idle = (NSEC_PER_SEC / 125);
 
 /* Minimum number of assigned budgets for which stats are safe to compute. */
 static const int bfq_stats_min_budgets = 194;
 
 /* Default maximum budget values, in sectors and number of requests. */
-static const int bfq_default_max_budget = 16 * 1024;
+static const int bfq_default_max_budget = (16 * 1024);
 
 /*
  * Async to sync throughput distribution is controlled as follows:
@@ -99,7 +99,7 @@ static const int bfq_default_max_budget = 16 * 1024;
 static const int bfq_async_charge_factor = 10;
 
 /* Default timeout values, in jiffies, approximating CFQ defaults. */
-static const int bfq_timeout = HZ / 8;
+static const int bfq_timeout = (HZ / 8);
 
 struct kmem_cache *bfq_pool;
 
@@ -117,7 +117,7 @@ struct kmem_cache *bfq_pool;
 /* Min number of samples required to perform peak-rate update */
 #define BFQ_RATE_MIN_SAMPLES	32
 /* Min observation time interval required to perform a peak-rate update (ns) */
-#define BFQ_RATE_MIN_INTERVAL	300*NSEC_PER_MSEC
+#define BFQ_RATE_MIN_INTERVAL	(300*NSEC_PER_MSEC)
 /* Target observation time interval for a peak-rate update (ns) */
 #define BFQ_RATE_REF_INTERVAL	NSEC_PER_SEC
 

From 882f0607d82efe44d98634ce1fdcc4b602ccf5ec Mon Sep 17 00:00:00 2001
From: Paolo Valente <paolo.valente@linaro.org>
Date: Sat, 5 Nov 2016 18:32:20 +0100
Subject: [PATCH 09/31] Fix type in bfq-iosched.txt

Sigend-off-by: Paolo Valente <paolo.valente@linaro.org>
---
 Documentation/block/bfq-iosched.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Documentation/block/bfq-iosched.txt b/Documentation/block/bfq-iosched.txt
index 8979408..5ba67af 100644
--- a/Documentation/block/bfq-iosched.txt
+++ b/Documentation/block/bfq-iosched.txt
@@ -385,7 +385,7 @@ max_budget
 Maximum amount of service, measured in sectors, that can be provided
 to a BFQ queue once it is set in service (of course within the limits
 of the above timeout). According to what said in the description of
-the algoritm, larger values increase the throughput in proportion to
+the algorithm, larger values increase the throughput in proportion to
 the percentage of sequential I/O requests issued. The price of larger
 values is that they coarsen the granularity of short-term bandwidth
 and latency guarantees.

From 99b1da54ca162abdc9e62915050154b35bd9bc27 Mon Sep 17 00:00:00 2001
From: Paolo Valente <paolo.valente@linaro.org>
Date: Sat, 5 Nov 2016 18:34:58 +0100
Subject: [PATCH 10/31] BFQ-v8r5

Signed-off-by: Paolo Valente <paolo.valente@linaro.org>
---
 block/bfq-iosched.c | 2 +-
 block/bfq.h         | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
index 88be34e..0f3081d 100644
--- a/block/bfq-iosched.c
+++ b/block/bfq-iosched.c
@@ -5212,7 +5212,7 @@ static struct blkcg_policy blkcg_policy_bfq = {
 static int __init bfq_init(void)
 {
 	int ret;
-	char msg[50] = "BFQ I/O-scheduler: v8r4";
+	char msg[50] = "BFQ I/O-scheduler: v8r5";
 
 #ifdef CONFIG_BFQ_GROUP_IOSCHED
 	ret = blkcg_policy_register(&blkcg_policy_bfq);
diff --git a/block/bfq.h b/block/bfq.h
index ea1e7d8..b80abe0 100644
--- a/block/bfq.h
+++ b/block/bfq.h
@@ -1,5 +1,5 @@
 /*
- * BFQ-v8r4 for 4.8.0: data structures and common functions prototypes.
+ * BFQ-v8r5 for 4.8.0: data structures and common functions prototypes.
  *
  * Based on ideas and code from CFQ:
  * Copyright (C) 2003 Jens Axboe <axboe@kernel.dk>

From d825ef204aa934e1fd9ebd925276ca0e67783508 Mon Sep 17 00:00:00 2001
From: Paolo Valente <paolo.valente@linaro.org>
Date: Wed, 7 Dec 2016 19:31:37 +0100
Subject: [PATCH 11/31] Improve documentation

Signed-off-by: Paolo Valente <paolo.valente@linaro.org>

Squash me
---
 Documentation/block/bfq-iosched.txt | 26 ++++++++++++++++++++------
 block/Kconfig.iosched               |  5 ++++-
 2 files changed, 24 insertions(+), 7 deletions(-)

diff --git a/Documentation/block/bfq-iosched.txt b/Documentation/block/bfq-iosched.txt
index 5ba67af..13b5248 100644
--- a/Documentation/block/bfq-iosched.txt
+++ b/Documentation/block/bfq-iosched.txt
@@ -252,9 +252,14 @@ of slice_idle are copied from CFQ too.
 per-process ioprio and weight
 -----------------------------
 
-Unless the cgroups interface is used, weights can be assigned to
-processes only indirectly, through I/O priorities, and according to
-the relation: weight = (IOPRIO_BE_NR - ioprio) * 10.
+Unless the cgroups interface is used (see "4. BFQ group scheduling"),
+weights can be assigned to processes only indirectly, through I/O
+priorities, and according to the relation:
+weight = (IOPRIO_BE_NR - ioprio) * 10.
+
+Beware that, if low-latency is set, then BFQ automatically raises the
+weight of the queues associated with interactive and soft real-time
+applications. Unset this tunable if you need/want to control weights.
 
 slice_idle
 ----------
@@ -369,6 +374,11 @@ default, low latency mode is enabled. If enabled, interactive and soft
 real-time applications are privileged and experience a lower latency,
 as explained in more detail in the description of how BFQ works.
 
+DO NOT enable this mode if you need full control on bandwidth
+distribution. In fact, if it is enabled, then BFQ automatically
+increases the bandwidth share of privileged applications, as the main
+means to guarantee a lower latency to them.
+
 timeout_sync
 ------------
 
@@ -449,9 +459,9 @@ may be reactivated for an already busy async queue (in ms).
 4. Group scheduling with BFQ
 ============================
 
-BFQ supports both cgroup-v1 and cgroup-v2 io controllers, namely blkio
-and io. In particular, BFQ supports weight-based proportional
-share.
+BFQ supports both cgroups-v1 and cgroups-v2 io controllers, namely
+blkio and io. In particular, BFQ supports weight-based proportional
+share. To activate cgroups support, set BFQ_GROUP_IOSCHED.
 
 4-1 Service guarantees provided
 -------------------------------
@@ -501,6 +511,10 @@ linear mapping between ioprio and weights, described at the beginning
 of the tunable section, is still valid, but all weights higher than
 IOPRIO_BE_NR*10 are mapped to ioprio 0.
 
+Recall that, if low-latency is set, then BFQ automatically raises the
+weight of the queues associated with interactive and soft real-time
+applications. Unset this tunable if you need/want to control weights.
+
 
 [1] P. Valente, A. Avanzini, "Evolution of the BFQ Storage I/O
     Scheduler", Proceedings of the First Workshop on Mobile System
diff --git a/block/Kconfig.iosched b/block/Kconfig.iosched
index 277b112..f2cd945 100644
--- a/block/Kconfig.iosched
+++ b/block/Kconfig.iosched
@@ -47,13 +47,16 @@ config IOSCHED_BFQ
 	processes according to their weights, regardless of the
 	device parameters and with any workload. It also guarantees
 	a low latency to interactive and soft real-time applications.
+	Details in Documentation/block/bfq-iosched.txt
 
 config BFQ_GROUP_IOSCHED
 	bool "BFQ hierarchical scheduling support"
 	depends on IOSCHED_BFQ && BLK_CGROUP
 	default n
 	---help---
-	  Enable hierarchical scheduling in BFQ, using the blkio controller.
+
+	Enable hierarchical scheduling in BFQ, using the blkio
+	(cgroups-v1) or io (cgroups-v2) controller.
 
 choice
 	prompt "Default I/O scheduler"

From 880441682487350cd5b116c2cea830351f9c9478 Mon Sep 17 00:00:00 2001
From: Paolo Valente <paolo.valente@linaro.org>
Date: Wed, 7 Dec 2016 19:43:58 +0100
Subject: [PATCH 12/31] Add and improve pointers in initial comments

---
 block/bfq-iosched.c | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
index 0f3081d..b07d251 100644
--- a/block/bfq-iosched.c
+++ b/block/bfq-iosched.c
@@ -14,6 +14,12 @@
  * Licensed under the GPL-2 as detailed in the accompanying COPYING.BFQ
  * file.
  *
+ * BFQ is a proportional-share I/O scheduler, with some extra
+ * low-latency capabilities. BFQ also supports full hierarchical
+ * scheduling through cgroups. Next paragraphs provide an introduction
+ * on BFQ inner workings. Details on BFQ benefits and usage can be
+ * found in Documentation/block/bfq-iosched.txt.
+ *
  * BFQ is a proportional-share storage-I/O scheduling algorithm based
  * on the slice-by-slice service scheme of CFQ. But BFQ assigns
  * budgets, measured in number of sectors, to processes instead of
@@ -43,10 +49,10 @@
  * H-WF2Q+, while the augmented tree used to implement B-WF2Q+ with O(log N)
  * complexity derives from the one introduced with EEVDF in [3].
  *
- * [1] P. Valente and M. Andreolini, ``Improving Application Responsiveness
- *     with the BFQ Disk I/O Scheduler'',
- *     Proceedings of the 5th Annual International Systems and Storage
- *     Conference (SYSTOR '12), June 2012.
+ * [1] P. Valente, A. Avanzini, "Evolution of the BFQ Storage I/O
+ *   Scheduler", Proceedings of the First Workshop on Mobile System
+ *   Technologies (MST-2015), May 2015.
+ *   http://algogroup.unimore.it/people/paolo/disk_sched/mst-2015.pdf
  *
  * http://algogroup.unimo.it/people/paolo/disk_sched/bf1-v1-suite-results.pdf
  *

From 60dad3833c9afa52315508579cbb4bac7e55d3a3 Mon Sep 17 00:00:00 2001
From: Paolo Valente <paolo.valente@linaro.org>
Date: Fri, 9 Dec 2016 07:05:29 +0100
Subject: [PATCH 13/31] Fix typo in comments on bfq_lookup_next_entity

Signed-off-by: Paolo Valente <paolo.valente@linaro.org>
---
 block/bfq-sched.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/block/bfq-sched.c b/block/bfq-sched.c
index 45d63d3..9afeaab 100644
--- a/block/bfq-sched.c
+++ b/block/bfq-sched.c
@@ -1265,7 +1265,7 @@ __bfq_lookup_next_entity(struct bfq_service_tree *st, bool force)
  * NOTE: since we cache the next_in_service entity at each level of the
  * hierarchy, the complexity of the lookup can be decreased with
  * absolutely no effort just returning the cached next_in_service value;
- * we prefer to do full lookups to test the consistency of * the data
+ * we prefer to do full lookups to test the consistency of the data
  * structures.
  */
 static struct bfq_entity *bfq_lookup_next_entity(struct bfq_sched_data *sd,

From 15f446f3bc189df88c64366930b5e237c8d9d5d5 Mon Sep 17 00:00:00 2001
From: Paolo Valente <paolo.valente@linaro.org>
Date: Fri, 9 Dec 2016 09:46:41 +0100
Subject: [PATCH 14/31] Fix false-positive warning for uninitialized var

Signed-off-by: Paolo Valente <paolo.valente@linaro.org>
---
 block/bfq-iosched.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
index b07d251..3043e5e 100644
--- a/block/bfq-iosched.c
+++ b/block/bfq-iosched.c
@@ -5044,7 +5044,7 @@ STORE_FUNCTION(bfq_wr_max_softrt_rate_store, &bfqd->bfq_wr_max_softrt_rate, 0,
 static ssize_t __FUNC(struct elevator_queue *e, const char *page, size_t count)\
 {									\
 	struct bfq_data *bfqd = e->elevator_data;			\
-	unsigned long __data;						\
+	unsigned long uninitialized_var(__data);			\
 	int ret = bfq_var_store(&__data, (page), count);		\
 	if (__data < (MIN))						\
 		__data = (MIN);						\

From 1430feb8c7194f72be0a8066ad6b996d78bf3e09 Mon Sep 17 00:00:00 2001
From: Paolo Valente <paolo.valente@linaro.org>
Date: Sun, 11 Dec 2016 11:46:13 +0100
Subject: [PATCH 15/31] Improve and fix some bugs in hierarchical code

This commit provides the following contributions.

BUGFIX Removed the check that, when the new queue to set in service
must be selected, the cached next_in_service entities coincide with
the entities chosen by __bfq_lookup_next_entity. This check,
issuing a warning on failure, was wrong, because the cached and the
newly chosen entity could differ in case of a CLASS_IDLE timeout.

EFFICIENCY IMPROVEMENT (this improvement is related to the above
BUGFIX) The cached next_in_service entities are now really used to
select the next queue to serve when the in-service queue
expires. Before this change, the cached values were used only for
extra (and in general wrong) consistency checks. This caused
additional overhead instead of reducing it.

EFFICIENCY IMPROVEMENT The next entity to serve, for each level of the
hierarchy, is now updated on every event that may change it, i.e., on
every activation or deactivation of any entity. This finer granularity
is not strictly needed for corectness, because it is only on queue
expirations that BFQ needs to know what are the next entities to
serve. Yet this change makes it possible to implement optimizations in
which it is necessary to know the next queue to serve before the
in-service queue expires.

SERVICE-ACCURACY IMPROVEMENT The per-device CLASS_IDLE service timeout
has been turned into a much more accurate per-group timeout.

CODE-QUALITY IMPROVEMENT The non-trivial parts touched by the above
improvements have been partially rewritten, and enriched of comments,
so as to improve their transparency and understandability.

Signed-off-by: Paolo Valente <paolo.valente@linaro.org>
---
 block/bfq-cgroup.c  |   2 +-
 block/bfq-iosched.c |   2 +-
 block/bfq-sched.c   | 497 ++++++++++++++++++++++++++++++++++++----------------
 block/bfq.h         |   5 +-
 4 files changed, 348 insertions(+), 158 deletions(-)

diff --git a/block/bfq-cgroup.c b/block/bfq-cgroup.c
index b50ae8e..a04bc40 100644
--- a/block/bfq-cgroup.c
+++ b/block/bfq-cgroup.c
@@ -314,7 +314,7 @@ static void bfq_init_entity(struct bfq_entity *entity,
 		bfqq->ioprio_class = bfqq->new_ioprio_class;
 		bfqg_get(bfqg);
 	}
-	entity->parent = bfqg->my_entity;
+	entity->parent = bfqg->my_entity; /* NULL for root group */
 	entity->sched_data = &bfqg->sched_data;
 }
 
diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
index 3043e5e..2d00bd10 100644
--- a/block/bfq-iosched.c
+++ b/block/bfq-iosched.c
@@ -4763,6 +4763,7 @@ static void bfq_init_root_group(struct bfq_group *root_group,
 	root_group->rq_pos_tree = RB_ROOT;
 	for (i = 0; i < BFQ_IOPRIO_CLASSES; i++)
 		root_group->sched_data.service_tree[i] = BFQ_SERVICE_TREE_INIT;
+	root_group->sched_data.bfq_class_idle_last_service = jiffies;
 }
 
 static int bfq_init_queue(struct request_queue *q, struct elevator_type *e)
@@ -4836,7 +4837,6 @@ static int bfq_init_queue(struct request_queue *q, struct elevator_type *e)
 	bfqd->bfq_back_max = bfq_back_max;
 	bfqd->bfq_back_penalty = bfq_back_penalty;
 	bfqd->bfq_slice_idle = bfq_slice_idle;
-	bfqd->bfq_class_idle_last_service = 0;
 	bfqd->bfq_timeout = bfq_timeout;
 
 	bfqd->bfq_requests_within_timer = 120;
diff --git a/block/bfq-sched.c b/block/bfq-sched.c
index 9afeaab..2f54ac5 100644
--- a/block/bfq-sched.c
+++ b/block/bfq-sched.c
@@ -15,16 +15,15 @@
 static struct bfq_group *bfqq_group(struct bfq_queue *bfqq);
 
 #ifdef CONFIG_BFQ_GROUP_IOSCHED
-#define for_each_entity(entity)	\
+/* both next loops stop at one of the child entities of the root group */
+#define for_each_entity(entity)				\
 	for (; entity ; entity = entity->parent)
 
 #define for_each_entity_safe(entity, parent) \
 	for (; entity && ({ parent = entity->parent; 1; }); entity = parent)
 
 
-static struct bfq_entity *bfq_lookup_next_entity(struct bfq_sched_data *sd,
-						 int extract,
-						 struct bfq_data *bfqd);
+static struct bfq_entity *bfq_lookup_next_entity(struct bfq_sched_data *sd);
 
 static void bfq_update_budget(struct bfq_entity *next_in_service)
 {
@@ -47,23 +46,18 @@ static void bfq_update_budget(struct bfq_entity *next_in_service)
 		bfqg_entity->budget = next_in_service->budget;
 }
 
-static int bfq_update_next_in_service(struct bfq_sched_data *sd)
+/*
+ * Incomplete version that always returns true. It will return also
+ * false in its complete version, in case either next_in_service has
+ * not changed, or next_in_service has changed but in a way that will
+ * not influence upper levels.
+ */
+static bool bfq_update_next_in_service(struct bfq_sched_data *sd)
 {
 	struct bfq_entity *next_in_service;
 	struct bfq_queue *bfqq;
 
-	if (sd->in_service_entity)
-		/* will update/requeue at the end of service */
-		return 0;
-
-	/*
-	 * NOTE: this can be improved in many ways, such as returning
-	 * 1 (and thus propagating upwards the update) only when the
-	 * budget changes, or caching the bfqq that will be scheduled
-	 * next from this subtree.  By now we worry more about
-	 * correctness than about performance...
-	 */
-	next_in_service = bfq_lookup_next_entity(sd, 0, NULL);
+	next_in_service = bfq_lookup_next_entity(sd);
 	sd->next_in_service = next_in_service;
 
 	if (next_in_service)
@@ -75,6 +69,7 @@ static int bfq_update_next_in_service(struct bfq_sched_data *sd)
 	if (bfqq)
 		bfq_log_bfqq(bfqq->bfqd, bfqq,
 			     "update_next_in_service: chosen this queue");
+#ifdef CONFIG_BFQ_GROUP_IOSCHED
 	else {
 		struct bfq_group *bfqg =
 			container_of(next_in_service,
@@ -83,15 +78,11 @@ static int bfq_update_next_in_service(struct bfq_sched_data *sd)
 		bfq_log_bfqg((struct bfq_data *)bfqg->bfqd, bfqg,
 			     "update_next_in_service: chosen this entity");
 	}
+#endif
 exit:
-	return 1;
+	return true;
 }
 
-static void bfq_check_next_in_service(struct bfq_sched_data *sd,
-				      struct bfq_entity *entity)
-{
-	WARN_ON(sd->next_in_service != entity);
-}
 #else
 #define for_each_entity(entity)	\
 	for (; entity ; entity = NULL)
@@ -104,11 +95,6 @@ static int bfq_update_next_in_service(struct bfq_sched_data *sd)
 	return 0;
 }
 
-static void bfq_check_next_in_service(struct bfq_sched_data *sd,
-				      struct bfq_entity *entity)
-{
-}
-
 static void bfq_update_budget(struct bfq_entity *next_in_service)
 {
 }
@@ -851,23 +837,59 @@ static void __bfq_activate_entity(struct bfq_entity *entity,
 	BUG_ON(!sd);
 	BUG_ON(!st);
 	if (entity == sd->in_service_entity) {
-		BUG_ON(entity->tree);
 		/*
-		 * If we are requeueing the current entity we have
-		 * to take care of not charging to it service it has
-		 * not received.
+		 * We are requeueing the current in-service entity,
+		 * because of the requeueing of a just-expired
+		 * non-idle leaf entity in the path originating from
+		 * this entity. In fact, in this case, then all
+		 * entities in this path need to be requeued again for
+		 * next service.
+		 *
+		 * Before requeueing, the start time of the entity
+		 * must be moved forward to account for the service
+		 * that the entity has received while in service. The
+		 * finish time will then be updated according to this
+		 * new value of the start time, and to the budget of
+		 * the entity.
 		 */
 		bfq_calc_finish(entity, entity->service);
 		entity->start = entity->finish;
 		sd->in_service_entity = NULL;
+		BUG_ON(entity->tree && entity->tree != &st->active);
+		/*
+		 * In addition, if the entity had more than one child
+		 * when set in service, then was not extracted from
+		 * the active tree. This implies that the position of
+		 * the entity in the active tree may need to be
+		 * changed now, because we have just updated the start
+		 * time of the entity, and we will update its finish
+		 * time in a moment (the requeueing is then, more
+		 * precisely, a repositioning in this case). To
+		 * implement this repositioning, we: 1) dequeue the
+		 * entity here, 2) update the finish time and
+		 * requeue the entity according to the new
+		 * timestamps below.
+		 */
+		if (entity->tree)
+			bfq_active_extract(st, entity);
 	} else if (entity->tree == &st->active) {
 		/*
-		 * Requeueing an entity due to a change of some
-		 * next_in_service entity below it.  We reuse the
-		 * old start time.
+		 * The entity is already active, and not in
+		 * service. In this case, this function gets called
+		 * only if the next_in_service entity below this
+		 * entity has changed, and this change has caused the
+		 * budget of this entity to change, which, finally
+		 * implies that the finish time of this entity must be
+		 * updated. Such an update may cause the scheduling,
+		 * i.e., the position in the active tree, of this
+		 * entity to change. We handle this fact by: 1)
+		 * dequeueing the entity here, 2) updating the finish
+		 * time and requeueing the entity according to the new
+		 * timestamps below. This is the same approach as the
+		 * non-extracted-entity sub-case above.
 		 */
 		bfq_active_extract(st, entity);
-	} else {
+	} else { /* This is a 'true' activation, not a requeueing. */
 		unsigned long long min_vstart;
 
 		/* See comments on bfq_fqq_update_budg_for_activation */
@@ -977,6 +999,11 @@ static void __bfq_activate_entity(struct bfq_entity *entity,
 			     entity->start <= st->vtime ? "" : "non ", st);
 #endif
 	}
+	BUG_ON(RB_EMPTY_ROOT(&st->active));
+	BUG_ON(&st->active != &sd->service_tree->active &&
+	       &st->active != &(sd->service_tree+1)->active &&
+	       &st->active != &(sd->service_tree+2)->active);
+
 }
 
 /**
@@ -996,13 +1023,20 @@ static void bfq_activate_entity(struct bfq_entity *entity,
 		__bfq_activate_entity(entity, non_blocking_wait_rq);
 
 		sd = entity->sched_data;
-		if (!bfq_update_next_in_service(sd))
+		BUG_ON(RB_EMPTY_ROOT(&sd->service_tree->active) &&
+		       RB_EMPTY_ROOT(&(sd->service_tree+1)->active) &&
+		       RB_EMPTY_ROOT(&(sd->service_tree+2)->active));
+
+		if (!bfq_update_next_in_service(sd)) {
+			BUG_ON(!sd->next_in_service);
 			/*
 			 * No need to propagate the activation to the
 			 * upper entities, as they will be updated when
 			 * the in-service entity is rescheduled.
 			 */
 			break;
+		}
+		BUG_ON(!sd->next_in_service);
 	}
 }
 
@@ -1016,31 +1050,33 @@ static void bfq_activate_entity(struct bfq_entity *entity,
  * any scheduler tree, extract it from that tree, and if necessary
  * and if the caller did not specify @requeue, put it on the idle tree.
  *
- * Return %1 if the caller should update the entity hierarchy, i.e.,
+ * Return %true if the caller should update the entity hierarchy, i.e.,
  * if the entity was in service or if it was the next_in_service for
- * its sched_data; return %0 otherwise.
+ * its sched_data; return %false otherwise.
  */
-static int __bfq_deactivate_entity(struct bfq_entity *entity, int requeue)
+static bool __bfq_deactivate_entity(struct bfq_entity *entity, int requeue)
 {
 	struct bfq_sched_data *sd = entity->sched_data;
 	struct bfq_service_tree *st;
 	int was_in_service;
-	int ret = 0;
+	bool ret = false;
 
 	if (sd == NULL || !entity->on_st) /* never activated, or inactive */
-		return 0;
+		return false;
 
 	st = bfq_entity_service_tree(entity);
 	was_in_service = entity == sd->in_service_entity;
 
-	BUG_ON(was_in_service && entity->tree);
+	BUG_ON(was_in_service && entity->tree && entity->tree != &st->active);
 
 	if (was_in_service) {
 		bfq_calc_finish(entity, entity->service);
 		sd->in_service_entity = NULL;
-	} else if (entity->tree == &st->active)
+	}
+
+	if (entity->tree == &st->active)
 		bfq_active_extract(st, entity);
-	else if (entity->tree == &st->idle)
+	else if (!was_in_service && entity->tree == &st->idle)
 		bfq_idle_extract(st, entity);
 	else if (entity->tree)
 		BUG();
@@ -1124,19 +1160,13 @@ static void bfq_deactivate_entity(struct bfq_entity *entity, int requeue)
 }
 
 /**
- * bfq_update_vtime - update vtime if necessary.
+ * bfq_calc_vtime_jump - compute the value to which the vtime should jump,
+ *                       if needed, to have at least one entity eligible.
  * @st: the service tree to act upon.
  *
- * If necessary update the service tree vtime to have at least one
- * eligible entity, skipping to its start time.  Assumes that the
- * active tree of the device is not empty.
- *
- * NOTE: this hierarchical implementation updates vtimes quite often,
- * we may end up with reactivated processes getting timestamps after a
- * vtime skip done because we needed a ->first_active entity on some
- * intermediate node.
+ * Assumes that st is not empty.
  */
-static void bfq_update_vtime(struct bfq_service_tree *st)
+static u64 bfq_calc_vtime_jump(struct bfq_service_tree *st)
 {
 	struct bfq_entity *entry;
 	struct rb_node *node = st->active.rb_node;
@@ -1144,22 +1174,30 @@ static void bfq_update_vtime(struct bfq_service_tree *st)
 	entry = rb_entry(node, struct bfq_entity, rb_node);
 	if (bfq_gt(entry->min_start, st->vtime)) {
 		struct bfq_queue *bfqq = bfq_entity_to_bfqq(entry);
-		st->vtime = entry->min_start;
 
 		if (bfqq)
 			bfq_log_bfqq(bfqq->bfqd, bfqq,
-				     "update_vtime: new vtime %llu %p",
-				     ((st->vtime>>10)*1000)>>12, st);
+				     "calc_vtime_jump: new value %llu",
+				     entry->min_start);
 #ifdef CONFIG_BFQ_GROUP_IOSCHED
 		else {
 			struct bfq_group *bfqg =
 				container_of(entry, struct bfq_group, entity);
 
 			bfq_log_bfqg((struct bfq_data *)bfqg->bfqd, bfqg,
-				     "update_vtime: new vtime %llu %p",
-				     ((st->vtime>>10)*1000)>>12, st);
+				     "calc_vtime_jump: new value %llu",
+				     entry->min_start);
 		}
 #endif
+		return entry->min_start;
+	}
+	return st->vtime;
+}
+
+static void bfq_update_vtime(struct bfq_service_tree *st, u64 new_value)
+{
+	if (new_value > st->vtime) {
+		st->vtime = new_value;
 		bfq_forget_idle(st);
 	}
 }
@@ -1168,6 +1206,7 @@ static void bfq_update_vtime(struct bfq_service_tree *st)
  * bfq_first_active_entity - find the eligible entity with
  *                           the smallest finish time
  * @st: the service tree to select from.
+ * @vtime: the system virtual to use as a reference for eligibility
  *
  * This function searches the first schedulable entity, starting from the
  * root of the tree and going on the left every time on this side there is
@@ -1175,7 +1214,8 @@ static void bfq_update_vtime(struct bfq_service_tree *st)
  * the right is followed only if a) the left subtree contains no eligible
  * entities and b) no eligible entity has been found yet.
  */
-static struct bfq_entity *bfq_first_active_entity(struct bfq_service_tree *st)
+static struct bfq_entity *bfq_first_active_entity(struct bfq_service_tree *st,
+						  u64 vtime)
 {
 	struct bfq_entity *entry, *first = NULL;
 	struct rb_node *node = st->active.rb_node;
@@ -1183,15 +1223,15 @@ static struct bfq_entity *bfq_first_active_entity(struct bfq_service_tree *st)
 	while (node) {
 		entry = rb_entry(node, struct bfq_entity, rb_node);
 left:
-		if (!bfq_gt(entry->start, st->vtime))
+		if (!bfq_gt(entry->start, vtime))
 			first = entry;
 
-		BUG_ON(bfq_gt(entry->min_start, st->vtime));
+		BUG_ON(bfq_gt(entry->min_start, vtime));
 
 		if (node->rb_left) {
 			entry = rb_entry(node->rb_left,
 					 struct bfq_entity, rb_node);
-			if (!bfq_gt(entry->min_start, st->vtime)) {
+			if (!bfq_gt(entry->min_start, vtime)) {
 				node = node->rb_left;
 				goto left;
 			}
@@ -1209,28 +1249,71 @@ static struct bfq_entity *bfq_first_active_entity(struct bfq_service_tree *st)
  * __bfq_lookup_next_entity - return the first eligible entity in @st.
  * @st: the service tree.
  *
- * Update the virtual time in @st and return the first eligible entity
- * it contains.
+ * If there is no in-service entity for the sched_data st belongs to,
+ * then return the entity that will be set in service if:
+ * 1) the parent entity this st belongs to is set in service;
+ * 2) no entity belonging to such parent entity undergoes a state change
+ * that would influence the timestamps of the entity (e.g., becomes idle,
+ * becomes backlogged, changes its budget, ...).
+ *
+ * In this first case, update the virtual time in @st too (see the
+ * comments on this update inside the function).
+ *
+ * In constrast, if there is an in-service entity, then return the
+ * entity that would be set in service if not only the above
+ * conditions, but also the next one held true: the currently
+ * in-service entity, on expiration,
+ * 1) gets a finish time equal to the current one, or
+ * 2) is not eligible any more, or
+ * 3) is idle.
  */
 static struct bfq_entity *
-__bfq_lookup_next_entity(struct bfq_service_tree *st, bool force)
+__bfq_lookup_next_entity(struct bfq_service_tree *st, bool in_service
+#if 0
+			 , bool force
+#endif
+	)
 {
-	struct bfq_entity *entity, *new_next_in_service = NULL;
+	struct bfq_entity *entity
+#if 0
+		, *new_next_in_service = NULL
+#endif
+		;
+	u64 new_vtime;
 	struct bfq_queue *bfqq;
 
 	if (RB_EMPTY_ROOT(&st->active))
 		return NULL;
 
-	bfq_update_vtime(st);
-	entity = bfq_first_active_entity(st);
-	BUG_ON(bfq_gt(entity->start, st->vtime));
+	/*
+	 * Get the value of the system virtual time for which at
+	 * least one entity is eligible.
+	 */
+	new_vtime = bfq_calc_vtime_jump(st);
+
+	/*
+	 * If there is no in-service entity for the sched_data this
+	 * active tree belongs to, then push the system virtual time
+	 * up to the value that guarantees that at least one entity is
+	 * eligible. If, instead, there is an in-service entity, then
+	 * do not make any such update, because there is already an
+	 * eligible entity, namely the in-service one (even if the
+	 * entity is not on st, because it was extracted when set in
+	 * service).
+	 */
+	if (!in_service)
+		bfq_update_vtime(st, new_vtime);
 
+	entity = bfq_first_active_entity(st, new_vtime);
+	BUG_ON(bfq_gt(entity->start, new_vtime));
+
+	/* Log some information */
 	bfqq = bfq_entity_to_bfqq(entity);
 	if (bfqq)
 		bfq_log_bfqq(bfqq->bfqd, bfqq,
 			     "__lookup_next: start %llu vtime %llu st %p",
 			     ((entity->start>>10)*1000)>>12,
-			     ((st->vtime>>10)*1000)>>12, st);
+			     ((new_vtime>>10)*1000)>>12, st);
 #ifdef CONFIG_BFQ_GROUP_IOSCHED
 	else {
 		struct bfq_group *bfqg =
@@ -1239,20 +1322,11 @@ __bfq_lookup_next_entity(struct bfq_service_tree *st, bool force)
 		bfq_log_bfqg((struct bfq_data *)bfqg->bfqd, bfqg,
 			     "__lookup_next: start %llu vtime %llu st %p",
 			     ((entity->start>>10)*1000)>>12,
-			     ((st->vtime>>10)*1000)>>12, st);
+			     ((new_vtime>>10)*1000)>>12, st);
 	}
 #endif
 
-	/*
-	 * If the chosen entity does not match with the sched_data's
-	 * next_in_service and we are forcedly serving the IDLE priority
-	 * class tree, bubble up budget update.
-	 */
-	if (unlikely(force && entity != entity->sched_data->next_in_service)) {
-		new_next_in_service = entity;
-		for_each_entity(new_next_in_service)
-			bfq_update_budget(new_next_in_service);
-	}
+	BUG_ON(!entity);
 
 	return entity;
 }
@@ -1260,90 +1334,70 @@ __bfq_lookup_next_entity(struct bfq_service_tree *st, bool force)
 /**
  * bfq_lookup_next_entity - return the first eligible entity in @sd.
  * @sd: the sched_data.
- * @extract: if true the returned entity will be also extracted from @sd.
  *
- * NOTE: since we cache the next_in_service entity at each level of the
- * hierarchy, the complexity of the lookup can be decreased with
- * absolutely no effort just returning the cached next_in_service value;
- * we prefer to do full lookups to test the consistency of the data
- * structures.
+ * This function is invoked when there has been a change in the trees
+ * for sd, and we need know what is the new next entity after this
+ * change.
  */
-static struct bfq_entity *bfq_lookup_next_entity(struct bfq_sched_data *sd,
-						 int extract,
-						 struct bfq_data *bfqd)
+static struct bfq_entity *bfq_lookup_next_entity(struct bfq_sched_data *sd)
 {
 	struct bfq_service_tree *st = sd->service_tree;
-	struct bfq_entity *entity;
-	int i = 0;
-
-	BUG_ON(sd->in_service_entity);
+	struct bfq_service_tree *idle_class_st = st + (BFQ_IOPRIO_CLASSES - 1);
+	struct bfq_entity *entity = NULL;
+	struct bfq_queue *bfqq;
+	int class_idx = 0;
 
+	BUG_ON(!sd);
+	BUG_ON(!st);
 	/*
 	 * Choose from idle class, if needed to guarantee a minimum
-	 * bandwidth to this class. This should also mitigate
+	 * bandwidth to this class (and if there is some active entity
+	 * in idle class). This should also mitigate
 	 * priority-inversion problems in case a low priority task is
 	 * holding file system resources.
 	 */
-	if (bfqd &&
-	    jiffies - bfqd->bfq_class_idle_last_service >
-	    BFQ_CL_IDLE_TIMEOUT) {
-		entity = __bfq_lookup_next_entity(st + BFQ_IOPRIO_CLASSES - 1,
-						  true);
-		if (entity) {
-			struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
+	if (time_is_before_jiffies(sd->bfq_class_idle_last_service +
+				   BFQ_CL_IDLE_TIMEOUT)) {
+		if (!RB_EMPTY_ROOT(&idle_class_st->active))
+			class_idx = BFQ_IOPRIO_CLASSES - 1;
+		/* About to be served if backlogged, or not yet backlogged */
+		sd->bfq_class_idle_last_service = jiffies;
+	}
 
-			if (bfqq)
-				bfq_log_bfqq(bfqd, bfqq,
-					     "idle chosen from st %p %d",
-					     st + BFQ_IOPRIO_CLASSES - 1,
-					BFQ_IOPRIO_CLASSES - 1);
-#ifdef CONFIG_BFQ_GROUP_IOSCHED
-			else {
-				struct bfq_group *bfqg =
-				container_of(entity, struct bfq_group, entity);
+	/*
+	 * Find the next entity to serve for the highest-priority
+	 * class, unless the idle class needs to be served.
+	 */
+	for (; class_idx < BFQ_IOPRIO_CLASSES; class_idx++) {
+		entity = __bfq_lookup_next_entity(st + class_idx,
+						  sd->in_service_entity);
 
-				bfq_log_bfqg(bfqd, bfqg,
-					     "idle chosen from st %p %d",
-					     st + BFQ_IOPRIO_CLASSES - 1,
-					BFQ_IOPRIO_CLASSES - 1);
-			}
-#endif
-			i = BFQ_IOPRIO_CLASSES - 1;
-			bfqd->bfq_class_idle_last_service = jiffies;
-			sd->next_in_service = entity;
-		}
+		if (entity)
+			break;
 	}
-	for (; i < BFQ_IOPRIO_CLASSES; i++) {
-		entity = __bfq_lookup_next_entity(st + i, false);
-		if (entity) {
-			if (bfqd != NULL) {
-			struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
 
-			if (bfqq)
-				bfq_log_bfqq(bfqd, bfqq,
-					     "chosen from st %p %d",
-					     st + i, i);
-#ifdef CONFIG_BFQ_GROUP_IOSCHED
-			else {
-				struct bfq_group *bfqg =
-				container_of(entity, struct bfq_group, entity);
+	BUG_ON(!entity &&
+	       (!RB_EMPTY_ROOT(&st->active) || !RB_EMPTY_ROOT(&(st+1)->active) ||
+		!RB_EMPTY_ROOT(&(st+2)->active)));
 
-				bfq_log_bfqg(bfqd, bfqg,
-					     "chosen from st %p %d",
-					     st + i, i);
-			}
-#endif
-			}
+	if (!entity)
+		return NULL;
 
-			if (extract) {
-				bfq_check_next_in_service(sd, entity);
-				bfq_active_extract(st + i, entity);
-				sd->in_service_entity = entity;
-				sd->next_in_service = NULL;
-			}
-			break;
-		}
+	/* Log some information */
+	bfqq = bfq_entity_to_bfqq(entity);
+	if (bfqq)
+		bfq_log_bfqq(bfqq->bfqd, bfqq, "chosen from st %p %d",
+			     st + class_idx, class_idx);
+#ifdef CONFIG_BFQ_GROUP_IOSCHED
+	else {
+		struct bfq_group *bfqg =
+			container_of(entity, struct bfq_group, entity);
+
+		bfq_log_bfqg((struct bfq_data *)bfqg->bfqd, bfqg,
+			     "chosen from st %p %d",
+			     st + class_idx, class_idx);
 	}
+#endif
 
 	return entity;
 }
@@ -1356,6 +1410,43 @@ static bool next_queue_may_preempt(struct bfq_data *bfqd)
 }
 
 /*
+ * This function tells whether entity stops being a candidate for next
+ * service, according to the following logic.
+ *
+ * This function is invoked for an entity that is about to be set in
+ * service. If such an entity is a queue, then the entity is no longer
+ * a candidate for next service (i.e, a candidate entity to serve
+ * after the in-service entity is expired). The function then returns
+ * true.
+ *
+ * In contrast, the entity could stil be a candidate for next service
+ * if it is not a queue, and has more than one child. In fact, even if
+ * one of its children is about to be set in service, other children
+ * may still be the next to serve. As a consequence, a non-queue
+ * entity is not a candidate for next-service only if it has only one
+ * child. And only if this condition holds, then the function returns
+ * true for a non-queue entity.
+ */
+static bool bfq_no_longer_next_in_service(struct bfq_entity *entity)
+{
+	struct bfq_group *bfqg;
+
+	if (bfq_entity_to_bfqq(entity))
+		return true;
+
+#ifdef CONFIG_BFQ_GROUP_IOSCHED
+	bfqg = container_of(entity, struct bfq_group, entity);
+
+	BUG_ON(bfqg == ((struct bfq_data *)(bfqg->bfqd))->root_group);
+	BUG_ON(bfqg->active_entities == 0);
+	if (bfqg->active_entities == 1)
+		return true;
+#endif
+
+	return false;
+}
+
+/*
  * Get next queue for service.
  */
 static struct bfq_queue *bfq_get_next_queue(struct bfq_data *bfqd)
@@ -1369,6 +1460,11 @@ static struct bfq_queue *bfq_get_next_queue(struct bfq_data *bfqd)
 	if (bfqd->busy_queues == 0)
 		return NULL;
 
+	/*
+	 * Traverse the path from the root to the leaf entity to
+	 * serve. Set in service all the entities visited along the
+	 * way.
+	 */
 	sd = &bfqd->root_group->sched_data;
 	for (; sd ; sd = entity->my_sched_data) {
 #ifdef CONFIG_BFQ_GROUP_IOSCHED
@@ -1378,13 +1474,96 @@ static struct bfq_queue *bfq_get_next_queue(struct bfq_data *bfqd)
 
 			bfq_log_bfqg(bfqd, bfqg,
 				     "get_next_queue: lookup in this group");
-		} else
+			if (!sd->next_in_service)
+				pr_crit("get_next_queue: lookup in this group");
+		} else {
 			bfq_log_bfqg(bfqd, bfqd->root_group,
 				     "get_next_queue: lookup in root group");
+			if (!sd->next_in_service)
+				pr_crit("get_next_queue: lookup in root group");
+		}
 #endif
 
-		entity = bfq_lookup_next_entity(sd, 1, bfqd);
+		BUG_ON(!sd->next_in_service);
+
+		/*
+		 * WARNING. We are about to set the in-service entity
+		 * to sd->next_in_service, i.e., to the (cached) value
+		 * returned by bfq_lookup_next_entity(sd) the last
+		 * time it was invoked, i.e., the last time when the
+		 * service order in sd changed as a consequence of the
+		 * activation or deactivation of an entity. In this
+		 * respect, if we execute bfq_lookup_next_entity(sd)
+		 * in this very moment, it may, although with low
+		 * probability, yield a different entity than that
+		 * pointed to by sd->next_in_service. This rare event
+		 * happens in case there was no CLASS_IDLE entity to
+		 * serve for sd when bfq_lookup_next_entity(sd) was
+		 * invoked for the last time, while there is now one
+		 * such entity.
+		 *
+		 * If the above event happens, then the scheduling of
+		 * such entity in CLASS_IDLE is postponed until the
+		 * service of the sd->next_in_service entity
+		 * finishes. In fact, when the latter is expired,
+		 * bfq_lookup_next_entity(sd) gets called again,
+		 * exactly to update sd->next_in_service.
+		 */
+
+		/* Make next_in_service entity become in_service_entity */
+		entity = sd->next_in_service;
+		sd->in_service_entity = entity;
+
+		/*
+		 * Reset the accumulator of the amount of service that
+		 * the entity is about to receive.
+		 */
+		entity->service = 0;
+
+		/*
+		 * If entity is no longer a candidate for next
+		 * service, then we extract it from its active tree,
+		 * for the following reason. To further boost the
+		 * throughput in some special case, BFQ needs to know
+		 * which is the next candidate entity to serve, while
+		 * there is already an entity in service. In this
+		 * respect, to make it easy to compute/update the next
+		 * candidate entity to serve after the current
+		 * candidate has been set in service, there is a case
+		 * where it is necessary to extract the current
+		 * candidate from its service tree. Such a case is
+		 * when the entity just set in service cannot be also
+		 * a candidate for next service. Details about when
+		 * this conditions holds are reported in the comments
+		 * on the function bfq_no_longer_next_in_service()
+		 * invoked below.
+		 */
+		if (bfq_no_longer_next_in_service(entity))
+			bfq_active_extract(bfq_entity_service_tree(entity),
+					   entity);
+
+		/*
+		 * For the same reason why we may have just extracted
+		 * entity from its active tree, we may need to update
+		 * next_in_service for the sched_data of entity too,
+		 * regardless of whether entity has been extracted.
+		 * In fact, even if entity has not been extracted, a
+		 * descendant entity may get extracted. Such an event
+		 * would cause a change in next_in_service for the
+		 * level of the descendant entity, and thus possibly
+		 * back to upper levels.
+		 *
+		 * We cannot perform the resulting needed update
+		 * before the end of this loop, because, to know which
+		 * is the correct next-to-serve candidate entity for
+		 * each level, we need first to find the leaf entity
+		 * to set in service. In fact, only after we know
+		 * which is the next-to-serve leaf entity, we can
+		 * discover whether the parent entity of the leaf
+		 * entity becomes the next-to-serve, and so on.
+		 */
 
+		/* Log some information */
 		bfqq = bfq_entity_to_bfqq(entity);
 		if (bfqq)
 			bfq_log_bfqq(bfqd, bfqq,
@@ -1401,13 +1580,23 @@ static struct bfq_queue *bfq_get_next_queue(struct bfq_data *bfqd)
 		}
 #endif
 
-		BUG_ON(!entity);
-		entity->service = 0;
 	}
 
+	BUG_ON(!entity);
 	bfqq = bfq_entity_to_bfqq(entity);
 	BUG_ON(!bfqq);
 
+	/*
+	 * We can finally update all next-to-serve entities along the
+	 * path from the leaf entity just set in service to the root.
+	 */
+	for_each_entity(entity) {
+		struct bfq_sched_data *sd = entity->sched_data;
+
+		if(!bfq_update_next_in_service(sd))
+			break;
+	}
+
 	return bfqq;
 }
 
diff --git a/block/bfq.h b/block/bfq.h
index b80abe0..6b28030 100644
--- a/block/bfq.h
+++ b/block/bfq.h
@@ -88,6 +88,9 @@ struct bfq_sched_data {
 	struct bfq_entity *next_in_service;
 	/* array of service trees, one per ioprio_class */
 	struct bfq_service_tree service_tree[BFQ_IOPRIO_CLASSES];
+	/* last time CLASS_IDLE was served */
+	unsigned long bfq_class_idle_last_service;
+
 };
 
 /**
@@ -487,8 +490,6 @@ struct bfq_data {
 	unsigned int bfq_back_max;
 	/* maximum idling time */
 	u32 bfq_slice_idle;
-	/* last time CLASS_IDLE was served */
-	u64 bfq_class_idle_last_service;
 
 	/* user-configured max budget value (0 for auto-tuning) */
 	int bfq_user_max_budget;

From b5734696e18577dda6ef6571af7cb3718937d2f5 Mon Sep 17 00:00:00 2001
From: Paolo Valente <paolo.valente@linaro.org>
Date: Wed, 14 Dec 2016 17:56:25 +0100
Subject: [PATCH 16/31] BFQ v8r6-rc1

Signed-off-by: Paolo Valente <paolo.valente@linaro.org>
---
 block/bfq-iosched.c | 2 +-
 block/bfq.h         | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
index 2d00bd10..72443bd 100644
--- a/block/bfq-iosched.c
+++ b/block/bfq-iosched.c
@@ -5218,7 +5218,7 @@ static struct blkcg_policy blkcg_policy_bfq = {
 static int __init bfq_init(void)
 {
 	int ret;
-	char msg[50] = "BFQ I/O-scheduler: v8r5";
+	char msg[60] = "BFQ I/O-scheduler: v8r6-rc1";
 
 #ifdef CONFIG_BFQ_GROUP_IOSCHED
 	ret = blkcg_policy_register(&blkcg_policy_bfq);
diff --git a/block/bfq.h b/block/bfq.h
index 6b28030..d4d784f 100644
--- a/block/bfq.h
+++ b/block/bfq.h
@@ -1,5 +1,5 @@
 /*
- * BFQ-v8r5 for 4.8.0: data structures and common functions prototypes.
+ * BFQ v8r6-rc1 for 4.9.0: data structures and common functions prototypes.
  *
  * Based on ideas and code from CFQ:
  * Copyright (C) 2003 Jens Axboe <axboe@kernel.dk>

From a7c98629904dcc103d676c2b8e2ceaff4ca19ca2 Mon Sep 17 00:00:00 2001
From: Paolo Valente <paolo.valente@linaro.org>
Date: Wed, 14 Dec 2016 18:07:41 +0100
Subject: [PATCH 17/31] Port and improve CFQ commit 41647e7a

BFQ currently used the same logic for detecting seeky queues for
for rotational disks and SSDs. This logic is appropriate for the
former, as it takes into account only inter-request distance, and
this is the dominant latency factor on a rotational device. Yet
things change with flash-based devices, where servign a large
requests still yields a high throughput, even the request is far
from the previous request served. This commits extends seeky
detection to take into accoutn also this fact with flash-based
devices. In particular, this commit is an improved port of the
original commit 41647e7a for CFQ.

Signed-off-by: Paolo Valente <paolo.valente@linaro.org>
---
 block/bfq-iosched.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
index 72443bd..153292f 100644
--- a/block/bfq-iosched.c
+++ b/block/bfq-iosched.c
@@ -117,6 +117,7 @@ struct kmem_cache *bfq_pool;
 #define BFQ_HW_QUEUE_SAMPLES	32
 
 #define BFQQ_SEEK_THR		(sector_t)(8 * 100)
+#define BFQQ_SECT_THR_NONROT	(sector_t)(2 * 32)
 #define BFQQ_CLOSE_THR		(sector_t)(8 * 1024)
 #define BFQQ_SEEKY(bfqq)	(hweight32(bfqq->seek_history) > 32/8)
 
@@ -4117,7 +4118,9 @@ bfq_update_io_seektime(struct bfq_data *bfqd, struct bfq_queue *bfqq,
 {
 	bfqq->seek_history <<= 1;
 	bfqq->seek_history |=
-		get_sdist(bfqq->last_request_pos, rq) > BFQQ_SEEK_THR;
+		get_sdist(bfqq->last_request_pos, rq) > BFQQ_SEEK_THR &&
+		(!blk_queue_nonrot(bfqd->queue) ||
+		 blk_rq_sectors(rq) < BFQQ_SECT_THR_NONROT);
 }
 
 /*

From 5cacb798d523b9fa7e394ca3a369a0c196e0b022 Mon Sep 17 00:00:00 2001
From: Paolo Valente <paolo.valente@linaro.org>
Date: Thu, 15 Dec 2016 15:57:22 +0100
Subject: [PATCH 18/31] Provide more details in switching-off-wr message

Signed-off-by: Paolo Valente <paolo.valente@linaro.org>
---
 block/bfq-iosched.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
index 153292f..a5853c7 100644
--- a/block/bfq-iosched.c
+++ b/block/bfq-iosched.c
@@ -656,7 +656,9 @@ bfq_bfqq_resume_state(struct bfq_queue *bfqq, struct bfq_io_cq *bic)
 	    time_is_before_jiffies(bfqq->last_wr_start_finish +
 				   bfqq->wr_cur_max_time))) {
 		bfq_log_bfqq(bfqq->bfqd, bfqq,
-		    "resume state: switching off wr");
+			     "resume state: switching off wr (%u + %u < %u)",
+			     bfqq->last_wr_start_finish, bfqq->wr_cur_max_time,
+			     jiffies);
 
 		bfqq->wr_coeff = 1;
 	}

From 6f62391c914a25818f839b932a9e1b16ee28c1f1 Mon Sep 17 00:00:00 2001
From: Paolo Valente <paolo.valente@linaro.org>
Date: Thu, 15 Dec 2016 16:10:17 +0100
Subject: [PATCH 19/31] Remove useless parameter from bfq_del_bfqq_busy

Signed-off-by: Paolo Valente <paolo.valente@linaro.org>
---
 block/bfq-iosched.c | 6 +++---
 block/bfq-sched.c   | 5 ++---
 2 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
index a5853c7..0d54cc1 100644
--- a/block/bfq-iosched.c
+++ b/block/bfq-iosched.c
@@ -656,7 +656,7 @@ bfq_bfqq_resume_state(struct bfq_queue *bfqq, struct bfq_io_cq *bic)
 	    time_is_before_jiffies(bfqq->last_wr_start_finish +
 				   bfqq->wr_cur_max_time))) {
 		bfq_log_bfqq(bfqq->bfqd, bfqq,
-			     "resume state: switching off wr (%u + %u < %u)",
+			     "resume state: switching off wr (%lu + %lu < %lu)",
 			     bfqq->last_wr_start_finish, bfqq->wr_cur_max_time,
 			     jiffies);
 
@@ -1525,7 +1525,7 @@ static void bfq_remove_request(struct request *rq)
 		BUG_ON(bfqq->entity.budget < 0);
 
 		if (bfq_bfqq_busy(bfqq) && bfqq != bfqd->in_service_queue) {
-			bfq_del_bfqq_busy(bfqd, bfqq, 1);
+			bfq_del_bfqq_busy(bfqd, bfqq);
 
 			/* bfqq emptied. In normal operation, when
 			 * bfqq is empty, bfqq->entity.service and
@@ -2662,7 +2662,7 @@ static void __bfq_bfqq_expire(struct bfq_data *bfqd, struct bfq_queue *bfqq)
 			 */
 			bfqq->budget_timeout = jiffies;
 
-		bfq_del_bfqq_busy(bfqd, bfqq, 1);
+		bfq_del_bfqq_busy(bfqd, bfqq);
 	} else {
 		bfq_activate_bfqq(bfqd, bfqq);
 		/*
diff --git a/block/bfq-sched.c b/block/bfq-sched.c
index 2f54ac5..fae28ee 100644
--- a/block/bfq-sched.c
+++ b/block/bfq-sched.c
@@ -1635,8 +1635,7 @@ static void bfqg_stats_update_dequeue(struct bfq_group *bfqg);
  * Called when the bfqq no longer has requests pending, remove it from
  * the service tree.
  */
-static void bfq_del_bfqq_busy(struct bfq_data *bfqd, struct bfq_queue *bfqq,
-			      int requeue)
+static void bfq_del_bfqq_busy(struct bfq_data *bfqd, struct bfq_queue *bfqq)
 {
 	BUG_ON(!bfq_bfqq_busy(bfqq));
 	BUG_ON(!RB_EMPTY_ROOT(&bfqq->sort_list));
@@ -1660,7 +1659,7 @@ static void bfq_del_bfqq_busy(struct bfq_data *bfqd, struct bfq_queue *bfqq,
 
 	BUG_ON(bfqq->entity.budget < 0);
 
-	bfq_deactivate_bfqq(bfqd, bfqq, requeue);
+	bfq_deactivate_bfqq(bfqd, bfqq, 1);
 
 	BUG_ON(bfqq->entity.budget < 0);
 }

From 1469db5fec95e3bfad0fe60dc1a843b0c5de4efa Mon Sep 17 00:00:00 2001
From: Paolo Valente <paolo.valente@linaro.org>
Date: Thu, 15 Dec 2016 16:40:09 +0100
Subject: [PATCH 20/31] Further improve code and comments for hierarchical code

Signed-off-by: Paolo Valente <paolo.valente@linaro.org>
---
 block/bfq-cgroup.c  |  6 +++---
 block/bfq-iosched.c |  2 +-
 block/bfq-sched.c   | 45 ++++++++++++++++++++++++++++++---------------
 3 files changed, 34 insertions(+), 19 deletions(-)

diff --git a/block/bfq-cgroup.c b/block/bfq-cgroup.c
index a04bc40..bd31da5 100644
--- a/block/bfq-cgroup.c
+++ b/block/bfq-cgroup.c
@@ -552,7 +552,7 @@ static void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq,
 	BUG_ON(RB_EMPTY_ROOT(&bfqq->sort_list) && bfq_bfqq_busy(bfqq));
 
 	if (bfq_bfqq_busy(bfqq))
-		bfq_deactivate_bfqq(bfqd, bfqq, 0);
+		bfq_deactivate_bfqq(bfqd, bfqq, false);
 	else if (entity->on_st) {
 		BUG_ON(&bfq_entity_service_tree(entity)->idle !=
 		       entity->tree);
@@ -664,7 +664,7 @@ static void bfq_flush_idle_tree(struct bfq_service_tree *st)
 	struct bfq_entity *entity = st->first_idle;
 
 	for (; entity ; entity = st->first_idle)
-		__bfq_deactivate_entity(entity, 0);
+		__bfq_deactivate_entity(entity, false);
 }
 
 /**
@@ -769,7 +769,7 @@ static void bfq_pd_offline(struct blkg_policy_data *pd)
 	BUG_ON(bfqg->sched_data.next_in_service);
 	BUG_ON(bfqg->sched_data.in_service_entity);
 
-	__bfq_deactivate_entity(entity, 0);
+	__bfq_deactivate_entity(entity, false);
 	bfq_put_async_queues(bfqd, bfqg);
 	BUG_ON(entity->tree);
 
diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
index 0d54cc1..7a56603 100644
--- a/block/bfq-iosched.c
+++ b/block/bfq-iosched.c
@@ -4738,7 +4738,7 @@ static void bfq_exit_queue(struct elevator_queue *e)
 
 	BUG_ON(bfqd->in_service_queue);
 	list_for_each_entry_safe(bfqq, n, &bfqd->idle_list, bfqq_list)
-		bfq_deactivate_bfqq(bfqd, bfqq, 0);
+		bfq_deactivate_bfqq(bfqd, bfqq, false);
 
 	spin_unlock_irq(q->queue_lock);
 
diff --git a/block/bfq-sched.c b/block/bfq-sched.c
index fae28ee..52911cc 100644
--- a/block/bfq-sched.c
+++ b/block/bfq-sched.c
@@ -841,9 +841,9 @@ static void __bfq_activate_entity(struct bfq_entity *entity,
 		 * We are requeueing the current in-service entity,
 		 * because of the requeueing of a just-expired
 		 * non-idle leaf entity in the path originating from
-		 * this entity. In fact, in this case, then all
-		 * entities in this path need to be requeued again for
-		 * next service.
+		 * this entity. In fact, in this case, all entities in
+		 * this path need to be requeued again for next
+		 * service.
 		 *
 		 * Before requeueing, the start time of the entity
 		 * must be moved forward to account for the service
@@ -882,7 +882,7 @@ static void __bfq_activate_entity(struct bfq_entity *entity,
 		 * implies that the finish time of this entity must be
 		 * updated. Such an update may cause the scheduling,
 		 * i.e., the position in the active tree, of this
-		 * entity to change. We handle this fact by: 1)
+		 * entity to change. We handle this change by: 1)
 		 * dequeueing the entity here, 2) updating the finish
 		 * time and requeueing the entity according to the new
 		 * timestamps below. This is the same approach as the
@@ -1054,7 +1054,7 @@ static void bfq_activate_entity(struct bfq_entity *entity,
  * if the entity was in service or if it was the next_in_service for
  * its sched_data; return %false otherwise.
  */
-static bool __bfq_deactivate_entity(struct bfq_entity *entity, int requeue)
+static bool __bfq_deactivate_entity(struct bfq_entity *entity, bool requeue)
 {
 	struct bfq_sched_data *sd = entity->sched_data;
 	struct bfq_service_tree *st;
@@ -1118,26 +1118,41 @@ static void bfq_deactivate_entity(struct bfq_entity *entity, int requeue)
 		if (sd->next_in_service)
 			/*
 			 * The parent entity is still backlogged,
-			 * because next_in_service is not NULL, and
+			 * because next_in_service is not NULL. So, no
+			 * upwards deactivation is needed.  Yet,
 			 * next_in_service has been updated (see
-			 * comment on the body of the above if):
-			 * upwards update of the schedule is needed.
+			 * comment on the body of the above if).  Then
+			 * the schedule nees to be updated upwards.
 			 */
-			goto update;
+			goto update_schedule;
 
 		/*
-		 * If we get here, then the parent is no more backlogged and
-		 * we want to propagate the deactivation upwards.
+		 * If we get here, then the parent is no more
+		 * backlogged and we need to propagate the
+		 * deactivation upwards. Then let the loop go on.
 		 */
-		requeue = 1;
+
+		/*
+		 * Also let parent be queued into the idle tree on
+		 * deactivation, to preserve service guarantees, and
+		 * assuming that who invoked this function does not
+		 * need parent entities too to be removed by any tree.
+		 */
+		requeue = true;
 	}
 
 	return;
 
-update:
+update_schedule:
 	entity = parent;
 	for_each_entity(entity) {
 		struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
+		/*
+		 * Invoke __bfq_activate_entity on entity, even if
+		 * already active, to update its position in the
+		 * active tree (because sd->next_in_service has
+		 * changed)
+		 */
 		__bfq_activate_entity(entity, false);
 
 		sd = entity->sched_data;
@@ -1613,7 +1628,7 @@ static void __bfq_bfqd_reset_in_service(struct bfq_data *bfqd)
 }
 
 static void bfq_deactivate_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq,
-				int requeue)
+				bool requeue)
 {
 	struct bfq_entity *entity = &bfqq->entity;
 
@@ -1659,7 +1674,7 @@ static void bfq_del_bfqq_busy(struct bfq_data *bfqd, struct bfq_queue *bfqq)
 
 	BUG_ON(bfqq->entity.budget < 0);
 
-	bfq_deactivate_bfqq(bfqd, bfqq, 1);
+	bfq_deactivate_bfqq(bfqd, bfqq, true);
 
 	BUG_ON(bfqq->entity.budget < 0);
 }

From d12d41bee45b988e1ae4f8dcece4bcecb5ad74a9 Mon Sep 17 00:00:00 2001
From: Paolo Valente <paolo.valente@linaro.org>
Date: Thu, 15 Dec 2016 22:11:39 +0100
Subject: [PATCH 21/31] Optimize update of next_in_service entity

If the update of the next_in_service candidate entity is triggered by
the activation of an entity, then it is not necessary to perform full
lookups in the active trees to update next_in_service. In fact, it is
enough to check whether the just-activated entity has a higher
priority of next_in_service, or, even if it has the same priority as
next_in_service, is eligible and has a lower virtual finish time than
next_in_service. If this compound condition holds, then the new entity
becomes the new next_in_service. Otherwise no change is needed. This
commit implements this optimization.

Signed-off-by: Paolo Valente <paolo.valente@linaro.org>
---
 block/bfq-sched.c | 167 ++++++++++++++++++++++++++++++++++++++++--------------
 block/bfq.h       |  11 +++-
 2 files changed, 134 insertions(+), 44 deletions(-)

diff --git a/block/bfq-sched.c b/block/bfq-sched.c
index 52911cc..09e494b 100644
--- a/block/bfq-sched.c
+++ b/block/bfq-sched.c
@@ -14,6 +14,18 @@
 
 static struct bfq_group *bfqq_group(struct bfq_queue *bfqq);
 
+/**
+ * bfq_gt - compare two timestamps.
+ * @a: first ts.
+ * @b: second ts.
+ *
+ * Return @a > @b, dealing with wrapping correctly.
+ */
+static int bfq_gt(u64 a, u64 b)
+{
+	return (s64)(a - b) > 0;
+}
+
 #ifdef CONFIG_BFQ_GROUP_IOSCHED
 /* both next loops stop at one of the child entities of the root group */
 #define for_each_entity(entity)				\
@@ -46,18 +58,99 @@ static void bfq_update_budget(struct bfq_entity *next_in_service)
 		bfqg_entity->budget = next_in_service->budget;
 }
 
-/*
- * Incomplete version that always returns true. It will return also
- * false in its complete version, in case either next_in_service has
- * not changed, or next_in_service has changed but in a way that will
- * not influence upper levels.
+static struct bfq_entity *bfq_root_active_entity(struct rb_root *tree)
+{
+	struct rb_node *node = tree->rb_node;
+
+	return rb_entry(node, struct bfq_entity, rb_node);
+}
+
+/**
+ * bfq_update_next_in_service - update sd->next_in_service
+ * @sd: sched_data for which to perform the update.
+ * @new_entity: if not NULL, pointer to the entity whose activation
+ *		triggered the invocation of this function.
+ *
+ * This function is called to update sd->next_in_service, which, in
+ * its turn, may change as a consequence of the insertion or
+ * extraction of an entity into/from one of the active trees of
+ * sd. These insertions/extractions occur as a consequence of
+ * activations/deactivations of entities, with some activations being
+ * 'true' activations, and other activations being requeueings (i.e.,
+ * implementing the second, requeueing phase of the mechanism used to
+ * reposition an entity in its active tree; see comments on
+ * __bfq_activate_entity for details). In both the last two activation
+ * sub-cases, new_entity points to the just activated or requeued
+ * entity.
+ *
+ * This is a still incomplete version of this function, which always
+ * returns true. It will return also false in its complete version, in
+ * case either next_in_service has not changed, or next_in_service has
+ * changed but in a way that will not influence upper levels.
  */
-static bool bfq_update_next_in_service(struct bfq_sched_data *sd)
+static bool bfq_update_next_in_service(struct bfq_sched_data *sd,
+				       struct bfq_entity *new_entity)
 {
-	struct bfq_entity *next_in_service;
+	struct bfq_entity *next_in_service = sd->next_in_service;
 	struct bfq_queue *bfqq;
 
-	next_in_service = bfq_lookup_next_entity(sd);
+	/*
+	 * If this update is triggered by the activation of a new
+	 * entity, then a full lookup in the active tree can be
+	 * avoided. In fact, it is enough to check whether the
+	 * just-activated entity has a higher priority of
+	 * sd->next_in_service, or, even if it has the same priority
+	 * as sd->next_in_service, is eligible and has a lower virtual
+	 * finish time than sd->next_in_service. If this compound
+	 * condition holds, then the new entity becomes the new
+	 * next_in_service. Otherwise no change is needed.
+	 */
+	if (new_entity) {
+		/*
+		 * Flag used to decide whether to replace
+		 * sd->next_in_service with new_entity. Tentatively
+		 * set to true, and left as true if
+		 * sd->next_in_service is NULL.
+		 */
+		bool replace_next = true;
+
+		if (new_entity == sd->next_in_service)
+			return false;
+
+		/*
+		 * If there is already a next_in_service candidate
+		 * entity, then compare class priorities or timestamps
+		 * to decide whether to replace sd->service_tree with
+		 * new_entity.
+		 */
+		if (next_in_service) {
+			unsigned int new_entity_class_idx =
+				bfq_class_idx(new_entity);
+			struct bfq_service_tree *st =
+				sd->service_tree + new_entity_class_idx;
+
+			/*
+			 * For efficiency, evaluate the most likely
+			 * sub-condition first.
+			 */
+			replace_next =
+				(new_entity_class_idx ==
+				 bfq_class_idx(next_in_service)
+				 &&
+				 !bfq_gt(new_entity->start, st->vtime)
+				 &&
+				 bfq_gt(next_in_service->finish,
+					new_entity->finish))
+				||
+				new_entity_class_idx <
+				bfq_class_idx(next_in_service);
+		}
+
+		if (replace_next)
+			next_in_service = new_entity;
+	} else /* invoked because of a deactivation: lookup needed */
+		next_in_service = bfq_lookup_next_entity(sd);
+
 	sd->next_in_service = next_in_service;
 
 	if (next_in_service)
@@ -109,18 +202,6 @@ static void bfq_update_budget(struct bfq_entity *next_in_service)
  */
 #define WFQ_SERVICE_SHIFT	22
 
-/**
- * bfq_gt - compare two timestamps.
- * @a: first ts.
- * @b: second ts.
- *
- * Return @a > @b, dealing with wrapping correctly.
- */
-static int bfq_gt(u64 a, u64 b)
-{
-	return (s64)(a - b) > 0;
-}
-
 static struct bfq_queue *bfq_entity_to_bfqq(struct bfq_entity *entity)
 {
 	struct bfq_queue *bfqq = NULL;
@@ -816,15 +897,18 @@ static void bfq_bfqq_charge_time(struct bfq_data *bfqd, struct bfq_queue *bfqq,
 }
 
 /**
- * __bfq_activate_entity - activate an entity.
- * @entity: the entity being activated.
- * @non_blocking_wait_rq: true if this entity was waiting for a request
+ * __bfq_activate_entity - handle activation or requeueing of an entity.
+ * @entity: the entity being activated or requeued.
+ * @non_blocking_wait_rq: true if entity was waiting for a request
  *
- * Called whenever an entity is activated, i.e., it is not active and one
- * of its children receives a new request, or has to be reactivated due to
- * budget exhaustion.  It uses the current budget of the entity (and the
- * service received if @entity is active) of the queue to calculate its
- * timestamps.
+ * Called whenever an entity is activated, which happens either if the
+ * entity is not active and one of its children receives a new request
+ * (true activation), or if the entity needs to be repositioned in its
+ * active tree (requeueing, as explained in detail in the comments
+ * inside the function).
+ *
+ * Basically, this function updates the timestamps of entity and
+ * (re)inserts entity into its active tree.
  */
 static void __bfq_activate_entity(struct bfq_entity *entity,
 				  bool non_blocking_wait_rq)
@@ -1027,7 +1111,7 @@ static void bfq_activate_entity(struct bfq_entity *entity,
 		       RB_EMPTY_ROOT(&(sd->service_tree+1)->active) &&
 		       RB_EMPTY_ROOT(&(sd->service_tree+2)->active));
 
-		if (!bfq_update_next_in_service(sd)) {
+		if (!bfq_update_next_in_service(sd, entity)) {
 			BUG_ON(!sd->next_in_service);
 			/*
 			 * No need to propagate the activation to the
@@ -1081,8 +1165,8 @@ static bool __bfq_deactivate_entity(struct bfq_entity *entity, bool requeue)
 	else if (entity->tree)
 		BUG();
 
-	if (was_in_service || sd->next_in_service == entity)
-		ret = bfq_update_next_in_service(sd);
+	if (sd->next_in_service == entity)
+		ret = bfq_update_next_in_service(sd, NULL);
 
 	if (!requeue || !bfq_gt(entity->finish, st->vtime))
 		bfq_forget_entity(st, entity);
@@ -1169,7 +1253,7 @@ static void bfq_deactivate_entity(struct bfq_entity *entity, int requeue)
 				     "invoking udpdate_next for this entity");
 		}
 #endif
-		if (!bfq_update_next_in_service(sd))
+		if (!bfq_update_next_in_service(sd, entity))
 			break;
 	}
 }
@@ -1183,28 +1267,27 @@ static void bfq_deactivate_entity(struct bfq_entity *entity, int requeue)
  */
 static u64 bfq_calc_vtime_jump(struct bfq_service_tree *st)
 {
-	struct bfq_entity *entry;
-	struct rb_node *node = st->active.rb_node;
+	struct bfq_entity *root_entity = bfq_root_active_entity(&st->active);
 
-	entry = rb_entry(node, struct bfq_entity, rb_node);
-	if (bfq_gt(entry->min_start, st->vtime)) {
-		struct bfq_queue *bfqq = bfq_entity_to_bfqq(entry);
+	if (bfq_gt(root_entity->min_start, st->vtime)) {
+		struct bfq_queue *bfqq = bfq_entity_to_bfqq(root_entity);
 
 		if (bfqq)
 			bfq_log_bfqq(bfqq->bfqd, bfqq,
 				     "calc_vtime_jump: new value %llu",
-				     entry->min_start);
+				     root_entity->min_start);
 #ifdef CONFIG_BFQ_GROUP_IOSCHED
 		else {
 			struct bfq_group *bfqg =
-				container_of(entry, struct bfq_group, entity);
+				container_of(root_entity, struct bfq_group,
+					     entity);
 
 			bfq_log_bfqg((struct bfq_data *)bfqg->bfqd, bfqg,
 				     "calc_vtime_jump: new value %llu",
-				     entry->min_start);
+				     root_entity->min_start);
 		}
 #endif
-		return entry->min_start;
+		return root_entity->min_start;
 	}
 	return st->vtime;
 }
@@ -1608,7 +1691,7 @@ static struct bfq_queue *bfq_get_next_queue(struct bfq_data *bfqd)
 	for_each_entity(entity) {
 		struct bfq_sched_data *sd = entity->sched_data;
 
-		if(!bfq_update_next_in_service(sd))
+		if(!bfq_update_next_in_service(sd, NULL))
 			break;
 	}
 
diff --git a/block/bfq.h b/block/bfq.h
index d4d784f..9c68d8b 100644
--- a/block/bfq.h
+++ b/block/bfq.h
@@ -806,13 +806,20 @@ struct bfq_group {
 
 static struct bfq_queue *bfq_entity_to_bfqq(struct bfq_entity *entity);
 
+static unsigned int bfq_class_idx(struct bfq_entity *entity)
+{
+	struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
+
+	return bfqq ? bfqq->ioprio_class - 1 :
+		BFQ_DEFAULT_GRP_CLASS - 1;
+}
+
 static struct bfq_service_tree *
 bfq_entity_service_tree(struct bfq_entity *entity)
 {
 	struct bfq_sched_data *sched_data = entity->sched_data;
 	struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
-	unsigned int idx = bfqq ? bfqq->ioprio_class - 1 :
-				  BFQ_DEFAULT_GRP_CLASS - 1;
+	unsigned int idx = bfq_class_idx(entity);
 
 	BUG_ON(idx >= BFQ_IOPRIO_CLASSES);
 	BUG_ON(sched_data == NULL);

From 634414378918d47830a12f2c0e0a6c526d227502 Mon Sep 17 00:00:00 2001
From: Paolo Valente <paolo.valente@linaro.org>
Date: Thu, 15 Dec 2016 22:15:42 +0100
Subject: [PATCH 22/31] Fix bug causing occasional loss of weight raising

When a bfq_queue, say bfqq, is split after a merging with another
bfq_queue, BFQ checks whether it has to restore for bfqq the
weight-raising state that bfqq had before being merged.  In
particular, the weight-raising is restored only if, according to the
weight-raising duration decided for bfqq when it started to be
weight-raised (before being merged), bfqq would not have already
finished its weight-raising period.

Yet, by mistake, such a duration is not saved when bfqq is merged. So,
if bfqq is freed and reallocated when it is split, then this duration
is wrongly set to zero on the split. As a consequence, the
weight-raising state of bfqq is wrongly not restored, which causes BFQ
to fail in guaranteeing a low latency to bfqq.

This commit fixes this bug by saving weight-raising duration when bfqq
is merged, and correctly restoring it when bfqq is split.

Signed-off-by: Paolo Valente <paolo.valente@linaro.org>
---
 block/bfq-iosched.c | 2 ++
 block/bfq.h         | 1 +
 2 files changed, 3 insertions(+)

diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
index 7a56603..7aa3d55 100644
--- a/block/bfq-iosched.c
+++ b/block/bfq-iosched.c
@@ -650,6 +650,7 @@ bfq_bfqq_resume_state(struct bfq_queue *bfqq, struct bfq_io_cq *bic)
 	bfqq->wr_start_at_switch_to_srt = bic->saved_wr_start_at_switch_to_srt;
 	BUG_ON(time_is_after_jiffies(bfqq->wr_start_at_switch_to_srt));
 	bfqq->last_wr_start_finish = bic->saved_last_wr_start_finish;
+	bfqq->wr_cur_max_time = bic->saved_wr_cur_max_time;
 	BUG_ON(time_is_after_jiffies(bfqq->last_wr_start_finish));
 
 	if (bfqq->wr_coeff > 1 && (bfq_bfqq_in_large_burst(bfqq) ||
@@ -1988,6 +1989,7 @@ static void bfq_bfqq_save_state(struct bfq_queue *bfqq)
 	bic->saved_wr_coeff = bfqq->wr_coeff;
 	bic->saved_wr_start_at_switch_to_srt = bfqq->wr_start_at_switch_to_srt;
 	bic->saved_last_wr_start_finish = bfqq->last_wr_start_finish;
+	bic->saved_wr_cur_max_time = bfqq->wr_cur_max_time;
 	BUG_ON(time_is_after_jiffies(bfqq->last_wr_start_finish));
 }
 
diff --git a/block/bfq.h b/block/bfq.h
index 9c68d8b..1a03521 100644
--- a/block/bfq.h
+++ b/block/bfq.h
@@ -366,6 +366,7 @@ struct bfq_io_cq {
 	unsigned long saved_wr_coeff;
 	unsigned long saved_last_wr_start_finish;
 	unsigned long saved_wr_start_at_switch_to_srt;
+	unsigned int saved_wr_cur_max_time;
 };
 
 enum bfq_device_speed {

From a63703d1fcce41e4d0b36e366c333b56fdd04fe3 Mon Sep 17 00:00:00 2001
From: Paolo Valente <paolo.valente@linaro.org>
Date: Fri, 16 Dec 2016 17:57:00 +0100
Subject: [PATCH 23/31] Fix wrong reset of in-service entities

In-service entities were reset with an indirect logic, which
happened to be even buggy for some cases. This commit fixes
this bug in two important steps. First, by replacing this
indirect logic with a direct logic, in which all involved
entities are immediately reset, with a bubble-up loop, when
the in-service queue is reset. Second, by restructuring the
code related to this change, so as to become not only correct
with respect to this change, but also cleaner and hopefully
clearer.

Signed-off-by: Paolo Valente <paolo.valente@linaro.org>
---
 block/bfq-cgroup.c  |   2 +-
 block/bfq-iosched.c |  20 ++-
 block/bfq-sched.c   | 499 +++++++++++++++++++++++++++++++++-------------------
 block/bfq.h         |   6 +-
 4 files changed, 332 insertions(+), 195 deletions(-)

diff --git a/block/bfq-cgroup.c b/block/bfq-cgroup.c
index bd31da5..bbaecd0 100644
--- a/block/bfq-cgroup.c
+++ b/block/bfq-cgroup.c
@@ -552,7 +552,7 @@ static void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq,
 	BUG_ON(RB_EMPTY_ROOT(&bfqq->sort_list) && bfq_bfqq_busy(bfqq));
 
 	if (bfq_bfqq_busy(bfqq))
-		bfq_deactivate_bfqq(bfqd, bfqq, false);
+		bfq_deactivate_bfqq(bfqd, bfqq, false, false);
 	else if (entity->on_st) {
 		BUG_ON(&bfq_entity_service_tree(entity)->idle !=
 		       entity->tree);
diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
index 7aa3d55..23e4c94 100644
--- a/block/bfq-iosched.c
+++ b/block/bfq-iosched.c
@@ -600,7 +600,7 @@ static void bfq_updated_next_req(struct bfq_data *bfqd,
 		entity->budget = new_budget;
 		bfq_log_bfqq(bfqd, bfqq, "updated next rq: new budget %lu",
 					 new_budget);
-		bfq_activate_bfqq(bfqd, bfqq);
+		bfq_requeue_bfqq(bfqd, bfqq);
 	}
 }
 
@@ -1526,7 +1526,7 @@ static void bfq_remove_request(struct request *rq)
 		BUG_ON(bfqq->entity.budget < 0);
 
 		if (bfq_bfqq_busy(bfqq) && bfqq != bfqd->in_service_queue) {
-			bfq_del_bfqq_busy(bfqd, bfqq);
+			bfq_del_bfqq_busy(bfqd, bfqq, false);
 
 			/* bfqq emptied. In normal operation, when
 			 * bfqq is empty, bfqq->entity.service and
@@ -2643,8 +2643,6 @@ static void __bfq_bfqq_expire(struct bfq_data *bfqd, struct bfq_queue *bfqq)
 {
 	BUG_ON(bfqq != bfqd->in_service_queue);
 
-	__bfq_bfqd_reset_in_service(bfqd);
-
 	/*
 	 * If this bfqq is shared between multiple processes, check
 	 * to make sure that those processes are still issuing I/Os
@@ -2664,14 +2662,21 @@ static void __bfq_bfqq_expire(struct bfq_data *bfqd, struct bfq_queue *bfqq)
 			 */
 			bfqq->budget_timeout = jiffies;
 
-		bfq_del_bfqq_busy(bfqd, bfqq);
+		bfq_del_bfqq_busy(bfqd, bfqq, true);
 	} else {
-		bfq_activate_bfqq(bfqd, bfqq);
+		bfq_requeue_bfqq(bfqd, bfqq);
 		/*
 		 * Resort priority tree of potential close cooperators.
 		 */
 		bfq_pos_tree_add_move(bfqd, bfqq);
 	}
+
+	/*
+	 * All in-service entities must have been properly deactivated
+	 * or requeued before executing the next function, which
+	 * resets all in-service entites as no more in service.
+	 */
+	__bfq_bfqd_reset_in_service(bfqd);
 }
 
 /**
@@ -3819,7 +3824,6 @@ static void bfq_put_queue(struct bfq_queue *bfqq)
 	BUG_ON(bfqq->allocated[READ] + bfqq->allocated[WRITE] != 0);
 	BUG_ON(bfqq->entity.tree);
 	BUG_ON(bfq_bfqq_busy(bfqq));
-	BUG_ON(bfqq->bfqd->in_service_queue == bfqq);
 
 	if (bfq_bfqq_sync(bfqq))
 		/*
@@ -4740,7 +4744,7 @@ static void bfq_exit_queue(struct elevator_queue *e)
 
 	BUG_ON(bfqd->in_service_queue);
 	list_for_each_entry_safe(bfqq, n, &bfqd->idle_list, bfqq_list)
-		bfq_deactivate_bfqq(bfqd, bfqq, false);
+		bfq_deactivate_bfqq(bfqd, bfqq, false, false);
 
 	spin_unlock_irq(q->queue_lock);
 
diff --git a/block/bfq-sched.c b/block/bfq-sched.c
index 09e494b..3b8e05b 100644
--- a/block/bfq-sched.c
+++ b/block/bfq-sched.c
@@ -79,9 +79,9 @@ static struct bfq_entity *bfq_root_active_entity(struct rb_root *tree)
  * 'true' activations, and other activations being requeueings (i.e.,
  * implementing the second, requeueing phase of the mechanism used to
  * reposition an entity in its active tree; see comments on
- * __bfq_activate_entity for details). In both the last two activation
- * sub-cases, new_entity points to the just activated or requeued
- * entity.
+ * __bfq_activate_entity and __bfq_requeue_entity for details). In
+ * both the last two activation sub-cases, new_entity points to the
+ * just activated or requeued entity.
  *
  * This is a still incomplete version of this function, which always
  * returns true. It will return also false in its complete version, in
@@ -658,7 +658,7 @@ static void bfq_forget_entity(struct bfq_service_tree *st,
 
 	BUG_ON(!entity->on_st);
 
-	entity->on_st = 0;
+	entity->on_st = false;
 	st->wsum -= entity->weight;
 	if (bfqq) {
 		sd = entity->sched_data;
@@ -896,115 +896,12 @@ static void bfq_bfqq_charge_time(struct bfq_data *bfqd, struct bfq_queue *bfqq,
 			max_t(int, 0, tot_serv_to_charge - entity->service));
 }
 
-/**
- * __bfq_activate_entity - handle activation or requeueing of an entity.
- * @entity: the entity being activated or requeued.
- * @non_blocking_wait_rq: true if entity was waiting for a request
- *
- * Called whenever an entity is activated, which happens either if the
- * entity is not active and one of its children receives a new request
- * (true activation), or if the entity needs to be repositioned in its
- * active tree (requeueing, as explained in detail in the comments
- * inside the function).
- *
- * Basically, this function updates the timestamps of entity and
- * (re)inserts entity into its active tree.
- */
-static void __bfq_activate_entity(struct bfq_entity *entity,
-				  bool non_blocking_wait_rq)
+static void bfq_update_fin_time_enqueue(struct bfq_entity *entity,
+					struct bfq_service_tree *st,
+					bool backshifted)
 {
-	struct bfq_sched_data *sd = entity->sched_data;
-	struct bfq_service_tree *st = bfq_entity_service_tree(entity);
 	struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
-	bool backshifted = false;
-
-	BUG_ON(!sd);
-	BUG_ON(!st);
-	if (entity == sd->in_service_entity) {
-		/*
-		 * We are requeueing the current in-service entity,
-		 * because of the requeueing of a just-expired
-		 * non-idle leaf entity in the path originating from
-		 * this entity. In fact, in this case, all entities in
-		 * this path need to be requeued again for next
-		 * service.
-		 *
-		 * Before requeueing, the start time of the entity
-		 * must be moved forward to account for the service
-		 * that the entity has received while in service. The
-		 * finish time will then be updated according to this
-		 * new value of the start time, and to the budget of
-		 * the entity.
-		 */
-		bfq_calc_finish(entity, entity->service);
-		entity->start = entity->finish;
-		sd->in_service_entity = NULL;
-		BUG_ON(entity->tree && entity->tree != &st->active);
-		/*
-		 * In addition, if the entity had more than one child
-		 * when set in service, then was not extracted from
-		 * the active tree. This implies that the position of
-		 * the entity in the active tree may need to be
-		 * changed now, because we have just updated the start
-		 * time of the entity, and we will update its finish
-		 * time in a moment (the requeueing is then, more
-		 * precisely, a repositioning in this case). To
-		 * implement this repositioning, we: 1) dequeue the
-		 * entity here, 2) update the finish time and
-		 * requeue the entity according to the new
-		 * timestamps below.
-		 */
-		if (entity->tree)
-			bfq_active_extract(st, entity);
-	} else if (entity->tree == &st->active) {
-		/*
-		 * The entity is already active, and not in
-		 * service. In this case, this function gets called
-		 * only if the next_in_service entity below this
-		 * entity has changed, and this change has caused the
-		 * budget of this entity to change, which, finally
-		 * implies that the finish time of this entity must be
-		 * updated. Such an update may cause the scheduling,
-		 * i.e., the position in the active tree, of this
-		 * entity to change. We handle this change by: 1)
-		 * dequeueing the entity here, 2) updating the finish
-		 * time and requeueing the entity according to the new
-		 * timestamps below. This is the same approach as the
-		 * non-extracted-entity sub-case above.
-		 */
-		bfq_active_extract(st, entity);
-	} else { /* This is a 'true' activation, not a requeueing. */
-		unsigned long long min_vstart;
-
-		/* See comments on bfq_fqq_update_budg_for_activation */
-		if (non_blocking_wait_rq && bfq_gt(st->vtime, entity->finish)) {
-			backshifted = true;
-			min_vstart = entity->finish;
-		} else
-			min_vstart = st->vtime;
-
-		if (entity->tree == &st->idle) {
-			/*
-			 * Must be on the idle tree, bfq_idle_extract() will
-			 * check for that.
-			 */
-			bfq_idle_extract(st, entity);
-			entity->start = bfq_gt(min_vstart, entity->finish) ?
-				min_vstart : entity->finish;
-		} else {
-			/*
-			 * The finish time of the entity may be invalid, and
-			 * it is in the past for sure, otherwise the queue
-			 * would have been on the idle tree.
-			 */
-			entity->start = min_vstart;
-			st->wsum += entity->weight;
-			bfq_get_entity(entity);
-
-			BUG_ON(entity->on_st);
-			entity->on_st = 1;
-		}
-	}
+	struct bfq_sched_data *sd = entity->sched_data;
 
 	st = __bfq_entity_update_weight_prio(st, entity);
 	bfq_calc_finish(entity, entity->budget);
@@ -1087,37 +984,215 @@ static void __bfq_activate_entity(struct bfq_entity *entity,
 	BUG_ON(&st->active != &sd->service_tree->active &&
 	       &st->active != &(sd->service_tree+1)->active &&
 	       &st->active != &(sd->service_tree+2)->active);
+}
+
+/**
+ * __bfq_activate_entity - handle activation of entity.
+ * @entity: the entity being activated.
+ * @non_blocking_wait_rq: true if entity was waiting for a request
+ *
+ * Called for a 'true' activation, i.e., if entity is not active and
+ * one of its children receives a new request.
+ *
+ * Basically, this function updates the timestamps of entity and
+ * inserts entity into its active tree, ater possible extracting it
+ * from its idle tree.
+ */
+static void __bfq_activate_entity(struct bfq_entity *entity,
+				  bool non_blocking_wait_rq)
+{
+	struct bfq_sched_data *sd = entity->sched_data;
+	struct bfq_service_tree *st = bfq_entity_service_tree(entity);
+	struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
+	bool backshifted = false;
+	unsigned long long min_vstart;
+
+	BUG_ON(!sd);
+	BUG_ON(!st);
+
+	/* See comments on bfq_fqq_update_budg_for_activation */
+	if (non_blocking_wait_rq && bfq_gt(st->vtime, entity->finish)) {
+		backshifted = true;
+		min_vstart = entity->finish;
+	} else
+		min_vstart = st->vtime;
+
+	if (entity->tree == &st->idle) {
+		/*
+		 * Must be on the idle tree, bfq_idle_extract() will
+		 * check for that.
+		 */
+		bfq_idle_extract(st, entity);
+		entity->start = bfq_gt(min_vstart, entity->finish) ?
+			min_vstart : entity->finish;
+	} else {
+		/*
+		 * The finish time of the entity may be invalid, and
+		 * it is in the past for sure, otherwise the queue
+		 * would have been on the idle tree.
+		 */
+		entity->start = min_vstart;
+		st->wsum += entity->weight;
+		bfq_get_entity(entity);
+
+		BUG_ON(entity->on_st && bfqq);
+
+#ifdef CONFIG_BFQ_GROUP_IOSCHED
+		if (entity->on_st && !bfqq) {
+			struct bfq_group *bfqg =
+				container_of(entity, struct bfq_group,
+					     entity);
+
+			bfq_log_bfqg((struct bfq_data *)bfqg->bfqd,
+				     bfqg,
+				     "activate bug, class %d in_service %p",
+				     bfq_class_idx(entity), sd->in_service_entity);
+		}
+#endif
+		BUG_ON(entity->on_st && !bfqq);
+		entity->on_st = true;
+	}
+
+	bfq_update_fin_time_enqueue(entity, st, backshifted);
+}
+
+/**
+ * __bfq_requeue_entity - handle requeueing or repositioning of an entity.
+ * @entity: the entity being requeued or repositioned.
+ *
+ * Requeueing is needed if this entity stops being served, which
+ * happens if a leaf descendant entity has expired. On the other hand,
+ * repositioning is needed if the next_inservice_entity for the child
+ * entity has changed. See the comments inside the function for
+ * details.
+ *
+ * Basically, this function: 1) removes entity from its active tree if
+ * present there, 2) updates the timestamps of entity and 3) inserts
+ * entity back into its active tree (in the new, right position for
+ * the new values of the timestamps).
+ */
+static void __bfq_requeue_entity(struct bfq_entity *entity)
+{
+	struct bfq_sched_data *sd = entity->sched_data;
+	struct bfq_service_tree *st = bfq_entity_service_tree(entity);
+
+	BUG_ON(!sd);
+	BUG_ON(!st);
+
+	BUG_ON(entity != sd->in_service_entity &&
+	       entity->tree != &st->active);
+
+	if (entity == sd->in_service_entity) {
+		/*
+		 * We are requeueing the current in-service entity,
+		 * which may have to be done for one of the following
+		 * reasons:
+		 * - entity represents the in-service queue, and the
+		 *   in-service queue is being requeued after an
+		 *   expiration;
+		 * - entity represents a group, and its budget has
+		 *   changed because one of its child entities has
+		 *   just been either activated or requeued for some
+		 *   reason; the timestamps of the entity need then to
+		 *   be updated, and the entity needs to be enqueued
+		 *   or repositioned accordingly.
+		 *
+		 * In particular, before requeueing, the start time of
+		 * the entity must be moved forward to account for the
+		 * service that the entity has received while in
+		 * service. This is done by the next instructions. The
+		 * finish time will then be updated according to this
+		 * new value of the start time, and to the budget of
+		 * the entity.
+		 */
+		bfq_calc_finish(entity, entity->service);
+		entity->start = entity->finish;
+		BUG_ON(entity->tree && entity->tree != &st->active);
+		/*
+		 * In addition, if the entity had more than one child
+		 * when set in service, then was not extracted from
+		 * the active tree. This implies that the position of
+		 * the entity in the active tree may need to be
+		 * changed now, because we have just updated the start
+		 * time of the entity, and we will update its finish
+		 * time in a moment (the requeueing is then, more
+		 * precisely, a repositioning in this case). To
+		 * implement this repositioning, we: 1) dequeue the
+		 * entity here, 2) update the finish time and
+		 * requeue the entity according to the new
+		 * timestamps below.
+		 */
+		if (entity->tree)
+			bfq_active_extract(st, entity);
+	} else { /* The entity is already active, and not in service */
+		/*
+		 * In this case, this function gets called only if the
+		 * next_in_service entity below this entity has
+		 * changed, and this change has caused the budget of
+		 * this entity to change, which, finally implies that
+		 * the finish time of this entity must be
+		 * updated. Such an update may cause the scheduling,
+		 * i.e., the position in the active tree, of this
+		 * entity to change. We handle this change by: 1)
+		 * dequeueing the entity here, 2) updating the finish
+		 * time and requeueing the entity according to the new
+		 * timestamps below. This is the same approach as the
+		 * non-extracted-entity sub-case above.
+		 */
+		bfq_active_extract(st, entity);
+	}
 
+	bfq_update_fin_time_enqueue(entity, st, false);
 }
 
+static void __bfq_activate_requeue_entity(struct bfq_entity *entity,
+					  struct bfq_sched_data *sd,
+					  bool non_blocking_wait_rq)
+{
+	struct bfq_service_tree *st = bfq_entity_service_tree(entity);
+
+	if (sd->in_service_entity == entity || entity->tree == &st->active)
+		 /*
+		  * in service or already queued on the active tree,
+		  * requeue or reposition
+		  */
+		__bfq_requeue_entity(entity);
+	else
+		/*
+		 * Not in service and not queued on its active tree:
+		 * the activity is idle and this is a true activation.
+		 */
+		__bfq_activate_entity(entity, non_blocking_wait_rq);
+}
+
+
 /**
- * bfq_activate_entity - activate an entity and its ancestors if necessary.
+ * bfq_activate_entity - activate or requeue an entity representing a bfq_queue,
+ *			 and activate, requeue or reposition all ancestors
+ *			 for which such an update becomes necessary.
  * @entity: the entity to activate.
  * @non_blocking_wait_rq: true if this entity was waiting for a request
- *
- * Activate @entity and all the entities on the path from it to the root.
+ * @requeue: true if this is a requeue, which implies that bfqq is
+ *	     being expired; thus ALL its ancestors stop being served and must
+ *	     therefore be requeued
  */
-static void bfq_activate_entity(struct bfq_entity *entity,
-				bool non_blocking_wait_rq)
+static void bfq_activate_requeue_entity(struct bfq_entity *entity,
+					bool non_blocking_wait_rq,
+					bool requeue)
 {
 	struct bfq_sched_data *sd;
 
 	for_each_entity(entity) {
 		BUG_ON(!entity);
-		__bfq_activate_entity(entity, non_blocking_wait_rq);
-
 		sd = entity->sched_data;
+		__bfq_activate_requeue_entity(entity, sd, non_blocking_wait_rq);
+
 		BUG_ON(RB_EMPTY_ROOT(&sd->service_tree->active) &&
 		       RB_EMPTY_ROOT(&(sd->service_tree+1)->active) &&
 		       RB_EMPTY_ROOT(&(sd->service_tree+2)->active));
 
-		if (!bfq_update_next_in_service(sd, entity)) {
+		if (!bfq_update_next_in_service(sd, entity) && !requeue) {
 			BUG_ON(!sd->next_in_service);
-			/*
-			 * No need to propagate the activation to the
-			 * upper entities, as they will be updated when
-			 * the in-service entity is rescheduled.
-			 */
 			break;
 		}
 		BUG_ON(!sd->next_in_service);
@@ -1127,36 +1202,30 @@ static void bfq_activate_entity(struct bfq_entity *entity,
 /**
  * __bfq_deactivate_entity - deactivate an entity from its service tree.
  * @entity: the entity to deactivate.
- * @requeue: if false, the entity will not be put into the idle tree.
- *
- * Deactivate an entity, independently from its previous state.  If the
- * entity was not on a service tree just return, otherwise if it is on
- * any scheduler tree, extract it from that tree, and if necessary
- * and if the caller did not specify @requeue, put it on the idle tree.
+ * @ins_into_idle_tree: if false, the entity will not be put into the
+ *			idle tree.
  *
- * Return %true if the caller should update the entity hierarchy, i.e.,
- * if the entity was in service or if it was the next_in_service for
- * its sched_data; return %false otherwise.
+ * Deactivates an entity, independently from its previous state.  Must
+ * be invoked only if entity is on a service tree. Extracts the entity
+ * from that tree, and if necessary and allowed, puts it on the idle
+ * tree.
  */
-static bool __bfq_deactivate_entity(struct bfq_entity *entity, bool requeue)
+static bool __bfq_deactivate_entity(struct bfq_entity *entity,
+				    bool ins_into_idle_tree)
 {
 	struct bfq_sched_data *sd = entity->sched_data;
-	struct bfq_service_tree *st;
-	int was_in_service;
-	bool ret = false;
+	struct bfq_service_tree *st = bfq_entity_service_tree(entity);
+	bool was_in_service = entity == sd->in_service_entity;
 
-	if (sd == NULL || !entity->on_st) /* never activated, or inactive */
+	if (!entity->on_st) { /* entity never activated, or already inactive */
+		BUG_ON(entity == entity->sched_data->in_service_entity);
 		return false;
-
-	st = bfq_entity_service_tree(entity);
-	was_in_service = entity == sd->in_service_entity;
+	}
 
 	BUG_ON(was_in_service && entity->tree && entity->tree != &st->active);
 
-	if (was_in_service) {
+	if (was_in_service)
 		bfq_calc_finish(entity, entity->service);
-		sd->in_service_entity = NULL;
-	}
 
 	if (entity->tree == &st->active)
 		bfq_active_extract(st, entity);
@@ -1165,26 +1234,22 @@ static bool __bfq_deactivate_entity(struct bfq_entity *entity, bool requeue)
 	else if (entity->tree)
 		BUG();
 
-	if (sd->next_in_service == entity)
-		ret = bfq_update_next_in_service(sd, NULL);
-
-	if (!requeue || !bfq_gt(entity->finish, st->vtime))
+	if (!ins_into_idle_tree || !bfq_gt(entity->finish, st->vtime))
 		bfq_forget_entity(st, entity);
 	else
 		bfq_idle_insert(st, entity);
 
-	BUG_ON(sd->in_service_entity == entity);
-	BUG_ON(sd->next_in_service == entity);
-
-	return ret;
+	return true;
 }
 
 /**
- * bfq_deactivate_entity - deactivate an entity.
+ * bfq_deactivate_entity - deactivate an entity representing a bfq_queue.
  * @entity: the entity to deactivate.
- * @requeue: true if the entity can be put on the idle tree
+ * @ins_into_idle_tree: true if the entity can be put on the idle tree
  */
-static void bfq_deactivate_entity(struct bfq_entity *entity, int requeue)
+static void bfq_deactivate_entity(struct bfq_entity *entity,
+				  bool ins_into_idle_tree,
+				  bool expiration)
 {
 	struct bfq_sched_data *sd;
 	struct bfq_entity *parent;
@@ -1192,54 +1257,91 @@ static void bfq_deactivate_entity(struct bfq_entity *entity, int requeue)
 	for_each_entity_safe(entity, parent) {
 		sd = entity->sched_data;
 
-		if (!__bfq_deactivate_entity(entity, requeue))
+		BUG_ON(sd == NULL); /*
+				     * It would mean that this is the
+				     * root group.
+				     */
+
+		BUG_ON(expiration && entity != sd->in_service_entity);
+
+		BUG_ON(entity != sd->in_service_entity &&
+		       entity->tree ==
+		       &bfq_entity_service_tree(entity)->active &&
+		       !sd->next_in_service);
+
+		if (!__bfq_deactivate_entity(entity, ins_into_idle_tree)) {
 			/*
-			 * next_in_service has not been changed, so
-			 * no upwards update is needed
+			 * Entity is not any tree any more, so, this
+			 * deactivation is a no-op, and there is
+			 * nothing to change for upper-level entities
+			 * (in case of expiration, this can never
+			 * happen).
 			 */
-			break;
+			BUG_ON(expiration); /*
+					     * entity cannot be already out of
+					     * any tree
+					     */
+			return;
+		}
 
-		if (sd->next_in_service)
+		if (sd->next_in_service == entity)
+			/*
+			 * entity was the next_in_service entity,
+			 * then, since entity has just been
+			 * deactivated, a new one must be found.
+			 */
+			bfq_update_next_in_service(sd, NULL);
+
+		if (sd->next_in_service) {
 			/*
 			 * The parent entity is still backlogged,
 			 * because next_in_service is not NULL. So, no
-			 * upwards deactivation is needed.  Yet,
-			 * next_in_service has been updated (see
-			 * comment on the body of the above if).  Then
-			 * the schedule nees to be updated upwards.
+			 * further upwards deactivation must be
+			 * performed.  Yet, next_in_service has
+			 * changed.  Then the schedule does need to be
+			 * updated upwards.
 			 */
-			goto update_schedule;
+			BUG_ON(sd->next_in_service == entity);
+			break;
+		}
 
 		/*
 		 * If we get here, then the parent is no more
 		 * backlogged and we need to propagate the
-		 * deactivation upwards. Then let the loop go on.
+		 * deactivation upwards. Thus let the loop go on.
 		 */
 
 		/*
 		 * Also let parent be queued into the idle tree on
 		 * deactivation, to preserve service guarantees, and
 		 * assuming that who invoked this function does not
-		 * need parent entities too to be removed by any tree.
+		 * need parent entities too to be removed completely.
 		 */
-		requeue = true;
+		ins_into_idle_tree = true;
 	}
 
-	return;
-
-update_schedule:
+	/*
+	 * If the deactivation loop is fully executed, then there are
+	 * no more entities to touch and next loop is not executed at
+	 * all. Otherwise, requeue remaining entities if they are
+	 * about to stop receiving service, or reposition them if this
+	 * is not the case.
+	 */
 	entity = parent;
 	for_each_entity(entity) {
 		struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
+
 		/*
-		 * Invoke __bfq_activate_entity on entity, even if
-		 * already active, to update its position in the
+		 * Invoke __bfq_requeue_entity on entity, even if
+		 * already active, to requeue/reposition it in the
 		 * active tree (because sd->next_in_service has
 		 * changed)
 		 */
-		__bfq_activate_entity(entity, false);
+		__bfq_requeue_entity(entity);
 
 		sd = entity->sched_data;
+		BUG_ON(expiration && sd->in_service_entity != entity);
+
 		if (bfqq)
 			bfq_log_bfqq(bfqq->bfqd, bfqq,
 				     "invoking udpdate_next for this queue");
@@ -1253,7 +1355,13 @@ static void bfq_deactivate_entity(struct bfq_entity *entity, int requeue)
 				     "invoking udpdate_next for this entity");
 		}
 #endif
-		if (!bfq_update_next_in_service(sd, entity))
+		if (!bfq_update_next_in_service(sd, entity) &&
+		    !expiration)
+			/*
+			 * next_in_service unchanged, and no
+			 * requeueing needed for expiration: stop
+			 * here.
+			 */
 			break;
 	}
 }
@@ -1700,6 +1808,8 @@ static struct bfq_queue *bfq_get_next_queue(struct bfq_data *bfqd)
 
 static void __bfq_bfqd_reset_in_service(struct bfq_data *bfqd)
 {
+	struct bfq_entity *entity = &bfqd->in_service_queue->entity;
+
 	if (bfqd->in_service_bic) {
 		put_io_context(bfqd->in_service_bic->icq.ioc);
 		bfqd->in_service_bic = NULL;
@@ -1708,36 +1818,59 @@ static void __bfq_bfqd_reset_in_service(struct bfq_data *bfqd)
 	bfq_clear_bfqq_wait_request(bfqd->in_service_queue);
 	hrtimer_try_to_cancel(&bfqd->idle_slice_timer);
 	bfqd->in_service_queue = NULL;
+
+	/*
+	 * When this function is called, all in-service entities have
+	 * been properly deactivated or requeued, so we can safely
+	 * execute the final step: reset in_service_entity along the
+	 * path from entity to the root.
+	 */
+	for_each_entity(entity)
+		entity->sched_data->in_service_entity = NULL;
 }
 
 static void bfq_deactivate_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq,
-				bool requeue)
+				bool ins_into_idle_tree, bool expiration)
 {
 	struct bfq_entity *entity = &bfqq->entity;
 
-	BUG_ON(bfqq == bfqd->in_service_queue);
-	bfq_deactivate_entity(entity, requeue);
+	bfq_deactivate_entity(entity, ins_into_idle_tree, expiration);
 }
 
 static void bfq_activate_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq)
 {
 	struct bfq_entity *entity = &bfqq->entity;
+	struct bfq_service_tree *st = bfq_entity_service_tree(entity);
+
+	BUG_ON(bfqq == bfqd->in_service_queue);
+	BUG_ON(entity->tree != &st->active && entity->tree != &st->idle &&
+	       entity->on_st);
 
-	bfq_activate_entity(entity, bfq_bfqq_non_blocking_wait_rq(bfqq));
+	bfq_activate_requeue_entity(entity, bfq_bfqq_non_blocking_wait_rq(bfqq),
+				    false);
 	bfq_clear_bfqq_non_blocking_wait_rq(bfqq);
 }
 
+static void bfq_requeue_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq)
+{
+	struct bfq_entity *entity = &bfqq->entity;
+
+	bfq_activate_requeue_entity(entity, false,
+				    bfqq == bfqd->in_service_queue);
+}
+
 static void bfqg_stats_update_dequeue(struct bfq_group *bfqg);
 
 /*
  * Called when the bfqq no longer has requests pending, remove it from
- * the service tree.
+ * the service tree. As a special case, it can be invoked during an
+ * expiration.
  */
-static void bfq_del_bfqq_busy(struct bfq_data *bfqd, struct bfq_queue *bfqq)
+static void bfq_del_bfqq_busy(struct bfq_data *bfqd, struct bfq_queue *bfqq,
+			      bool expiration)
 {
 	BUG_ON(!bfq_bfqq_busy(bfqq));
 	BUG_ON(!RB_EMPTY_ROOT(&bfqq->sort_list));
-	BUG_ON(bfqq == bfqd->in_service_queue);
 
 	bfq_log_bfqq(bfqd, bfqq, "del from busy");
 
@@ -1757,7 +1890,7 @@ static void bfq_del_bfqq_busy(struct bfq_data *bfqd, struct bfq_queue *bfqq)
 
 	BUG_ON(bfqq->entity.budget < 0);
 
-	bfq_deactivate_bfqq(bfqd, bfqq, true);
+	bfq_deactivate_bfqq(bfqd, bfqq, true, expiration);
 
 	BUG_ON(bfqq->entity.budget < 0);
 }
diff --git a/block/bfq.h b/block/bfq.h
index 1a03521..f90d534 100644
--- a/block/bfq.h
+++ b/block/bfq.h
@@ -140,10 +140,10 @@ struct bfq_entity {
 	struct bfq_weight_counter *weight_counter;
 
 	/*
-	 * flag, true if the entity is on a tree (either the active or
-	 * the idle one of its service_tree).
+	 * Flag, true if the entity is on a tree (either the active or
+	 * the idle one of its service_tree) or is in service.
 	 */
-	int on_st;
+	bool on_st;
 
 	u64 finish; /* B-WF2Q+ finish timestamp (aka F_i) */
 	u64 start;  /* B-WF2Q+ start timestamp (aka S_i) */

From 6ef4dcd808aa50f7ba49bc0fde68b72a8a68028c Mon Sep 17 00:00:00 2001
From: Paolo Valente <paolo.valente@linaro.org>
Date: Fri, 16 Dec 2016 18:01:52 +0100
Subject: [PATCH 24/31] Add code to redirect trace log to console

Signed-off-by: Paolo Valente <paolo.valente@linaro.org>
---
 block/bfq.h | 38 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 38 insertions(+)

diff --git a/block/bfq.h b/block/bfq.h
index f90d534..d98403e 100644
--- a/block/bfq.h
+++ b/block/bfq.h
@@ -648,6 +648,43 @@ BFQ_BFQQ_FNS(softrt_update);
 #undef BFQ_BFQQ_FNS
 
 /* Logging facilities. */
+#ifdef CONFIG_BFQ_REDIRECT_TO_CONSOLE
+#ifdef CONFIG_BFQ_GROUP_IOSCHED
+static struct bfq_group *bfqq_group(struct bfq_queue *bfqq);
+static struct blkcg_gq *bfqg_to_blkg(struct bfq_group *bfqg);
+
+#define bfq_log_bfqq(bfqd, bfqq, fmt, args...)	do {			\
+	char __pbuf[128];						\
+									\
+	assert_spin_locked((bfqd)->queue->queue_lock);			\
+	blkg_path(bfqg_to_blkg(bfqq_group(bfqq)), __pbuf, sizeof(__pbuf)); \
+	pr_crit("bfq%d%c %s " fmt "\n", 			\
+		(bfqq)->pid,						\
+		bfq_bfqq_sync((bfqq)) ? 'S' : 'A',			\
+		__pbuf, ##args);					\
+} while (0)
+
+#define bfq_log_bfqg(bfqd, bfqg, fmt, args...)	do {			\
+	char __pbuf[128];						\
+									\
+	blkg_path(bfqg_to_blkg(bfqg), __pbuf, sizeof(__pbuf));		\
+	pr_crit("%s " fmt "\n", __pbuf, ##args);	\
+} while (0)
+
+#else /* CONFIG_BFQ_GROUP_IOSCHED */
+
+#define bfq_log_bfqq(bfqd, bfqq, fmt, args...)		\
+	pr_crit("bfq%d%c " fmt "\n", (bfqq)->pid,		\
+		bfq_bfqq_sync((bfqq)) ? 'S' : 'A',	\
+		##args)
+#define bfq_log_bfqg(bfqd, bfqg, fmt, args...)		do {} while (0)
+
+#endif /* CONFIG_BFQ_GROUP_IOSCHED */
+
+#define bfq_log(bfqd, fmt, args...) \
+	pr_crit("bfq " fmt "\n", ##args)
+
+#else /* CONFIG_BFQ_REDIRECT_TO_CONSOLE */
 #ifdef CONFIG_BFQ_GROUP_IOSCHED
 static struct bfq_group *bfqq_group(struct bfq_queue *bfqq);
 static struct blkcg_gq *bfqg_to_blkg(struct bfq_group *bfqg);
@@ -682,6 +719,7 @@ static struct blkcg_gq *bfqg_to_blkg(struct bfq_group *bfqg);
 
 #define bfq_log(bfqd, fmt, args...) \
 	blk_add_trace_msg((bfqd)->queue, "bfq " fmt, ##args)
+#endif /* CONFIG_BFQ_REDIRECT_TO_CONSOLE */
 
 /* Expiration reasons. */
 enum bfqq_expiration {

From f7e139123c8becd5ac6de6b62d395f5d8e94e498 Mon Sep 17 00:00:00 2001
From: Paolo Valente <paolo.valente@linaro.org>
Date: Sun, 18 Dec 2016 22:45:07 +0100
Subject: [PATCH 25/31] BFQ v8r6-rc2

---
 block/bfq-iosched.c | 2 +-
 block/bfq.h         | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
index 23e4c94..7036f0b 100644
--- a/block/bfq-iosched.c
+++ b/block/bfq-iosched.c
@@ -5229,7 +5229,7 @@ static struct blkcg_policy blkcg_policy_bfq = {
 static int __init bfq_init(void)
 {
 	int ret;
-	char msg[60] = "BFQ I/O-scheduler: v8r6-rc1";
+	char msg[60] = "BFQ I/O-scheduler: v8r6-rc2";
 
 #ifdef CONFIG_BFQ_GROUP_IOSCHED
 	ret = blkcg_policy_register(&blkcg_policy_bfq);
diff --git a/block/bfq.h b/block/bfq.h
index d98403e..4dd4931 100644
--- a/block/bfq.h
+++ b/block/bfq.h
@@ -1,5 +1,5 @@
 /*
- * BFQ v8r6-rc1 for 4.9.0: data structures and common functions prototypes.
+ * BFQ v8r6-rc2 for 4.9.0: data structures and common functions prototypes.
  *
  * Based on ideas and code from CFQ:
  * Copyright (C) 2003 Jens Axboe <axboe@kernel.dk>

From 2d790c793eab404abd5aadce0981b97c498c996b Mon Sep 17 00:00:00 2001
From: Paolo Valente <paolo.valente@linaro.org>
Date: Mon, 19 Dec 2016 08:12:26 +0100
Subject: [PATCH 26/31] Fix bug in optimized update of next_in_service entity

There was a case where bfq_update_next_in_service did not update
next_in_service, even if it might need to be changed: in case of
requeueing or repositioning of the entity that happened to be
pointed exactly by next_in_service. This could result in violation
of service guarantees, because, after a change of timestamps for
such an entity, it might be the case that next_in_service had to
point to a different entity. This commit fixes this bug.

Signed-off-by: Paolo Valente <paolo.valente@linaro.org>
---
 block/bfq-sched.c | 19 +++++++++----------
 1 file changed, 9 insertions(+), 10 deletions(-)

diff --git a/block/bfq-sched.c b/block/bfq-sched.c
index 3b8e05b..41fd5c9 100644
--- a/block/bfq-sched.c
+++ b/block/bfq-sched.c
@@ -68,8 +68,9 @@ static struct bfq_entity *bfq_root_active_entity(struct rb_root *tree)
 /**
  * bfq_update_next_in_service - update sd->next_in_service
  * @sd: sched_data for which to perform the update.
- * @new_entity: if not NULL, pointer to the entity whose activation
- *		triggered the invocation of this function.
+ * @new_entity: if not NULL, pointer to the entity whose activation,
+ *		requeueing or repositionig triggered the invocation of
+ *		this function.
  *
  * This function is called to update sd->next_in_service, which, in
  * its turn, may change as a consequence of the insertion or
@@ -95,17 +96,18 @@ static bool bfq_update_next_in_service(struct bfq_sched_data *sd,
 	struct bfq_queue *bfqq;
 
 	/*
-	 * If this update is triggered by the activation of a new
-	 * entity, then a full lookup in the active tree can be
-	 * avoided. In fact, it is enough to check whether the
-	 * just-activated entity has a higher priority of
+	 * If this update is triggered by the activation, requeueing
+	 * or repositiong of an entity that does not coincide with
+	 * sd->next_in_service, then a full lookup in the active tree
+	 * can be avoided. In fact, it is enough to check whether the
+	 * just-modified entity has a higher priority than
 	 * sd->next_in_service, or, even if it has the same priority
 	 * as sd->next_in_service, is eligible and has a lower virtual
 	 * finish time than sd->next_in_service. If this compound
 	 * condition holds, then the new entity becomes the new
 	 * next_in_service. Otherwise no change is needed.
 	 */
-	if (new_entity) {
+	if (new_entity && new_entity != sd->next_in_service) {
 		/*
 		 * Flag used to decide whether to replace
 		 * sd->next_in_service with new_entity. Tentatively
@@ -114,9 +116,6 @@ static bool bfq_update_next_in_service(struct bfq_sched_data *sd,
 		 */
 		bool replace_next = true;
 
-		if (new_entity == sd->next_in_service)
-			return false;
-
 		/*
 		 * If there is already a next_in_service candidate
 		 * entity, then compare class priorities or timestamps

From 86fed232c6e9c6c54bb4e0035c5641977d734cc6 Mon Sep 17 00:00:00 2001
From: Paolo Valente <paolo.valente@linaro.org>
Date: Mon, 19 Dec 2016 09:11:50 +0100
Subject: [PATCH 27/31] Stop bubble-up of next_in_service update if possible

Signed-off-by: Paolo Valente <paolo.valente@linaro.org>
---
 block/bfq-sched.c | 38 +++++++++++++++++++++++++-------------
 1 file changed, 25 insertions(+), 13 deletions(-)

diff --git a/block/bfq-sched.c b/block/bfq-sched.c
index 41fd5c9..4af77f4 100644
--- a/block/bfq-sched.c
+++ b/block/bfq-sched.c
@@ -37,11 +37,16 @@ static int bfq_gt(u64 a, u64 b)
 
 static struct bfq_entity *bfq_lookup_next_entity(struct bfq_sched_data *sd);
 
-static void bfq_update_budget(struct bfq_entity *next_in_service)
+/*
+ * Returns true if this budget changes may let next_in_service->parent
+ * become the next_in_service entity for its parent entity.
+ */
+static bool bfq_update_parent_budget(struct bfq_entity *next_in_service)
 {
 	struct bfq_entity *bfqg_entity;
 	struct bfq_group *bfqg;
 	struct bfq_sched_data *group_sd;
+	bool ret = false;
 
 	BUG_ON(!next_in_service);
 
@@ -54,8 +59,13 @@ static void bfq_update_budget(struct bfq_entity *next_in_service)
 	 * as it must never become an in-service entity.
 	 */
 	bfqg_entity = bfqg->my_entity;
-	if (bfqg_entity)
+	if (bfqg_entity) {
+		if (bfqg_entity->budget > next_in_service->budget)
+			ret = true;
 		bfqg_entity->budget = next_in_service->budget;
+	}
+
+	return ret;
 }
 
 static struct bfq_entity *bfq_root_active_entity(struct rb_root *tree)
@@ -84,16 +94,16 @@ static struct bfq_entity *bfq_root_active_entity(struct rb_root *tree)
  * both the last two activation sub-cases, new_entity points to the
  * just activated or requeued entity.
  *
- * This is a still incomplete version of this function, which always
- * returns true. It will return also false in its complete version, in
- * case either next_in_service has not changed, or next_in_service has
- * changed but in a way that will not influence upper levels.
+ * Returns true if sd->next_in_service changes in such a way that
+ * entity->parent may become the next_in_service for its parent
+ * entity.
  */
 static bool bfq_update_next_in_service(struct bfq_sched_data *sd,
 				       struct bfq_entity *new_entity)
 {
 	struct bfq_entity *next_in_service = sd->next_in_service;
 	struct bfq_queue *bfqq;
+	bool parent_sched_may_change = false;
 
 	/*
 	 * If this update is triggered by the activation, requeueing
@@ -150,12 +160,15 @@ static bool bfq_update_next_in_service(struct bfq_sched_data *sd,
 	} else /* invoked because of a deactivation: lookup needed */
 		next_in_service = bfq_lookup_next_entity(sd);
 
+	if (next_in_service) {
+		parent_sched_may_change = !sd->next_in_service ||
+			bfq_update_parent_budget(next_in_service);
+	}
+
 	sd->next_in_service = next_in_service;
 
-	if (next_in_service)
-		bfq_update_budget(next_in_service);
-	else
-		goto exit;
+	if (!next_in_service)
+		return parent_sched_may_change;
 
 	bfqq = bfq_entity_to_bfqq(next_in_service);
 	if (bfqq)
@@ -171,8 +184,7 @@ static bool bfq_update_next_in_service(struct bfq_sched_data *sd,
 			     "update_next_in_service: chosen this entity");
 	}
 #endif
-exit:
-	return true;
+	return parent_sched_may_change;
 }
 
 #else
@@ -187,7 +199,7 @@ static int bfq_update_next_in_service(struct bfq_sched_data *sd)
 	return 0;
 }
 
-static void bfq_update_budget(struct bfq_entity *next_in_service)
+static void bfq_update_parent_budget(struct bfq_entity *next_in_service)
 {
 }
 #endif

From eb70b7cbd454b508d3d81cac89f4c4655f8ff7a1 Mon Sep 17 00:00:00 2001
From: Paolo Valente <paolo.valente@linaro.org>
Date: Mon, 19 Dec 2016 11:15:38 +0100
Subject: [PATCH 28/31] BFQ v8r6-rc3

Signed-off-by: Paolo Valente <paolo.valente@linaro.org>
---
 block/bfq-iosched.c | 2 +-
 block/bfq.h         | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
index 7036f0b..be8aa7f 100644
--- a/block/bfq-iosched.c
+++ b/block/bfq-iosched.c
@@ -5229,7 +5229,7 @@ static struct blkcg_policy blkcg_policy_bfq = {
 static int __init bfq_init(void)
 {
 	int ret;
-	char msg[60] = "BFQ I/O-scheduler: v8r6-rc2";
+	char msg[60] = "BFQ I/O-scheduler: v8r6-rc3";
 
 #ifdef CONFIG_BFQ_GROUP_IOSCHED
 	ret = blkcg_policy_register(&blkcg_policy_bfq);
diff --git a/block/bfq.h b/block/bfq.h
index 4dd4931..b285bc4 100644
--- a/block/bfq.h
+++ b/block/bfq.h
@@ -1,5 +1,5 @@
 /*
- * BFQ v8r6-rc2 for 4.9.0: data structures and common functions prototypes.
+ * BFQ v8r6-rc3 for 4.9.0: data structures and common functions prototypes.
  *
  * Based on ideas and code from CFQ:
  * Copyright (C) 2003 Jens Axboe <axboe@kernel.dk>

From 73765731ad69d43823c2784dba8176a1c8da88d8 Mon Sep 17 00:00:00 2001
From: Paolo Valente <paolo.valente@linaro.org>
Date: Fri, 23 Dec 2016 11:15:21 +0100
Subject: [PATCH 29/31] BFQ-v8r6

Signed-off-by: Paolo Valente <paolo.valente@linaro.org>
---
 block/bfq-iosched.c | 2 +-
 block/bfq.h         | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
index be8aa7f..0f9cd53 100644
--- a/block/bfq-iosched.c
+++ b/block/bfq-iosched.c
@@ -5229,7 +5229,7 @@ static struct blkcg_policy blkcg_policy_bfq = {
 static int __init bfq_init(void)
 {
 	int ret;
-	char msg[60] = "BFQ I/O-scheduler: v8r6-rc3";
+	char msg[60] = "BFQ I/O-scheduler: v8r6";
 
 #ifdef CONFIG_BFQ_GROUP_IOSCHED
 	ret = blkcg_policy_register(&blkcg_policy_bfq);
diff --git a/block/bfq.h b/block/bfq.h
index b285bc4..57ab51d 100644
--- a/block/bfq.h
+++ b/block/bfq.h
@@ -1,5 +1,5 @@
 /*
- * BFQ v8r6-rc3 for 4.9.0: data structures and common functions prototypes.
+ * BFQ v8r6 for 4.9.0: data structures and common functions prototypes.
  *
  * Based on ideas and code from CFQ:
  * Copyright (C) 2003 Jens Axboe <axboe@kernel.dk>

From a3d988a47da1968219016c053a112dd6794f1fdb Mon Sep 17 00:00:00 2001
From: Paolo Valente <paolo.valente@linaro.org>
Date: Fri, 30 Dec 2016 10:13:59 +0100
Subject: [PATCH 30/31] BUGFIX: make BFQ compile also without hierarchical
 support

Signed-off-by: Paolo Valente <paolo.valente@linaro.org>
---
 block/bfq-sched.c | 172 +++++++++++++++++++++++++++---------------------------
 1 file changed, 87 insertions(+), 85 deletions(-)

diff --git a/block/bfq-sched.c b/block/bfq-sched.c
index 4af77f4..797bce7 100644
--- a/block/bfq-sched.c
+++ b/block/bfq-sched.c
@@ -26,48 +26,6 @@ static int bfq_gt(u64 a, u64 b)
 	return (s64)(a - b) > 0;
 }
 
-#ifdef CONFIG_BFQ_GROUP_IOSCHED
-/* both next loops stop at one of the child entities of the root group */
-#define for_each_entity(entity)				\
-	for (; entity ; entity = entity->parent)
-
-#define for_each_entity_safe(entity, parent) \
-	for (; entity && ({ parent = entity->parent; 1; }); entity = parent)
-
-
-static struct bfq_entity *bfq_lookup_next_entity(struct bfq_sched_data *sd);
-
-/*
- * Returns true if this budget changes may let next_in_service->parent
- * become the next_in_service entity for its parent entity.
- */
-static bool bfq_update_parent_budget(struct bfq_entity *next_in_service)
-{
-	struct bfq_entity *bfqg_entity;
-	struct bfq_group *bfqg;
-	struct bfq_sched_data *group_sd;
-	bool ret = false;
-
-	BUG_ON(!next_in_service);
-
-	group_sd = next_in_service->sched_data;
-
-	bfqg = container_of(group_sd, struct bfq_group, sched_data);
-	/*
-	 * bfq_group's my_entity field is not NULL only if the group
-	 * is not the root group. We must not touch the root entity
-	 * as it must never become an in-service entity.
-	 */
-	bfqg_entity = bfqg->my_entity;
-	if (bfqg_entity) {
-		if (bfqg_entity->budget > next_in_service->budget)
-			ret = true;
-		bfqg_entity->budget = next_in_service->budget;
-	}
-
-	return ret;
-}
-
 static struct bfq_entity *bfq_root_active_entity(struct rb_root *tree)
 {
 	struct rb_node *node = tree->rb_node;
@@ -75,6 +33,10 @@ static struct bfq_entity *bfq_root_active_entity(struct rb_root *tree)
 	return rb_entry(node, struct bfq_entity, rb_node);
 }
 
+static struct bfq_entity *bfq_lookup_next_entity(struct bfq_sched_data *sd);
+
+static bool bfq_update_parent_budget(struct bfq_entity *next_in_service);
+
 /**
  * bfq_update_next_in_service - update sd->next_in_service
  * @sd: sched_data for which to perform the update.
@@ -187,22 +149,98 @@ static bool bfq_update_next_in_service(struct bfq_sched_data *sd,
 	return parent_sched_may_change;
 }
 
-#else
+#ifdef CONFIG_BFQ_GROUP_IOSCHED
+/* both next loops stop at one of the child entities of the root group */
+#define for_each_entity(entity)				\
+	for (; entity ; entity = entity->parent)
+
+#define for_each_entity_safe(entity, parent) \
+	for (; entity && ({ parent = entity->parent; 1; }); entity = parent)
+
+/*
+ * Returns true if this budget changes may let next_in_service->parent
+ * become the next_in_service entity for its parent entity.
+ */
+static bool bfq_update_parent_budget(struct bfq_entity *next_in_service)
+{
+	struct bfq_entity *bfqg_entity;
+	struct bfq_group *bfqg;
+	struct bfq_sched_data *group_sd;
+	bool ret = false;
+
+	BUG_ON(!next_in_service);
+
+	group_sd = next_in_service->sched_data;
+
+	bfqg = container_of(group_sd, struct bfq_group, sched_data);
+	/*
+	 * bfq_group's my_entity field is not NULL only if the group
+	 * is not the root group. We must not touch the root entity
+	 * as it must never become an in-service entity.
+	 */
+	bfqg_entity = bfqg->my_entity;
+	if (bfqg_entity) {
+		if (bfqg_entity->budget > next_in_service->budget)
+			ret = true;
+		bfqg_entity->budget = next_in_service->budget;
+	}
+
+	return ret;
+}
+
+/*
+ * This function tells whether entity stops being a candidate for next
+ * service, according to the following logic.
+ *
+ * This function is invoked for an entity that is about to be set in
+ * service. If such an entity is a queue, then the entity is no longer
+ * a candidate for next service (i.e, a candidate entity to serve
+ * after the in-service entity is expired). The function then returns
+ * true.
+ *
+ * In contrast, the entity could stil be a candidate for next service
+ * if it is not a queue, and has more than one child. In fact, even if
+ * one of its children is about to be set in service, other children
+ * may still be the next to serve. As a consequence, a non-queue
+ * entity is not a candidate for next-service only if it has only one
+ * child. And only if this condition holds, then the function returns
+ * true for a non-queue entity.
+ */
+static bool bfq_no_longer_next_in_service(struct bfq_entity *entity)
+{
+	struct bfq_group *bfqg;
+
+	if (bfq_entity_to_bfqq(entity))
+		return true;
+
+	bfqg = container_of(entity, struct bfq_group, entity);
+
+	BUG_ON(bfqg == ((struct bfq_data *)(bfqg->bfqd))->root_group);
+	BUG_ON(bfqg->active_entities == 0);
+	if (bfqg->active_entities == 1)
+		return true;
+
+	return false;
+}
+
+#else /* CONFIG_BFQ_GROUP_IOSCHED */
 #define for_each_entity(entity)	\
 	for (; entity ; entity = NULL)
 
 #define for_each_entity_safe(entity, parent) \
 	for (parent = NULL; entity ; entity = parent)
 
-static int bfq_update_next_in_service(struct bfq_sched_data *sd)
+static bool bfq_update_parent_budget(struct bfq_entity *next_in_service)
 {
-	return 0;
+	return false;
 }
 
-static void bfq_update_parent_budget(struct bfq_entity *next_in_service)
+static bool bfq_no_longer_next_in_service(struct bfq_entity *entity)
 {
+	return true;
 }
-#endif
+
+#endif /* CONFIG_BFQ_GROUP_IOSCHED */
 
 /*
  * Shift for timestamp calculations.  This actually limits the maximum
@@ -1369,7 +1407,8 @@ static void bfq_deactivate_entity(struct bfq_entity *entity,
 		if (!bfq_update_next_in_service(sd, entity) &&
 		    !expiration)
 			/*
-			 * next_in_service unchanged, and no
+			 * next_in_service unchanged or not causing
+			 * any change in entity->parent->sd, and no
 			 * requeueing needed for expiration: stop
 			 * here.
 			 */
@@ -1627,43 +1666,6 @@ static bool next_queue_may_preempt(struct bfq_data *bfqd)
 }
 
 /*
- * This function tells whether entity stops being a candidate for next
- * service, according to the following logic.
- *
- * This function is invoked for an entity that is about to be set in
- * service. If such an entity is a queue, then the entity is no longer
- * a candidate for next service (i.e, a candidate entity to serve
- * after the in-service entity is expired). The function then returns
- * true.
- *
- * In contrast, the entity could stil be a candidate for next service
- * if it is not a queue, and has more than one child. In fact, even if
- * one of its children is about to be set in service, other children
- * may still be the next to serve. As a consequence, a non-queue
- * entity is not a candidate for next-service only if it has only one
- * child. And only if this condition holds, then the function returns
- * true for a non-queue entity.
- */
-static bool bfq_no_longer_next_in_service(struct bfq_entity *entity)
-{
-	struct bfq_group *bfqg;
-
-	if (bfq_entity_to_bfqq(entity))
-		return true;
-
-#ifdef CONFIG_BFQ_GROUP_IOSCHED
-	bfqg = container_of(entity, struct bfq_group, entity);
-
-	BUG_ON(bfqg == ((struct bfq_data *)(bfqg->bfqd))->root_group);
-	BUG_ON(bfqg->active_entities == 0);
-	if (bfqg->active_entities == 1)
-		return true;
-#endif
-
-	return false;
-}
-
-/*
  * Get next queue for service.
  */
 static struct bfq_queue *bfq_get_next_queue(struct bfq_data *bfqd)

From f211d5cd6a36e0a1dfc598bec7d92bd48275aa52 Mon Sep 17 00:00:00 2001
From: Paolo Valente <paolo.valente@linaro.org>
Date: Fri, 30 Dec 2016 10:17:24 +0100
Subject: [PATCH 31/31] BFQ-v8r7

Signed-off-by: Paolo Valente <paolo.valente@linaro.org>
---
 block/bfq-iosched.c | 2 +-
 block/bfq.h         | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
index 0f9cd53..2a2c130 100644
--- a/block/bfq-iosched.c
+++ b/block/bfq-iosched.c
@@ -5229,7 +5229,7 @@ static struct blkcg_policy blkcg_policy_bfq = {
 static int __init bfq_init(void)
 {
 	int ret;
-	char msg[60] = "BFQ I/O-scheduler: v8r6";
+	char msg[60] = "BFQ I/O-scheduler: v8r7";
 
 #ifdef CONFIG_BFQ_GROUP_IOSCHED
 	ret = blkcg_policy_register(&blkcg_policy_bfq);
diff --git a/block/bfq.h b/block/bfq.h
index 57ab51d..bef8244 100644
--- a/block/bfq.h
+++ b/block/bfq.h
@@ -1,5 +1,5 @@
 /*
- * BFQ v8r6 for 4.9.0: data structures and common functions prototypes.
+ * BFQ v8r7 for 4.9.0: data structures and common functions prototypes.
  *
  * Based on ideas and code from CFQ:
  * Copyright (C) 2003 Jens Axboe <axboe@kernel.dk>
0