Update to source version 4.9-rt1

author: alyptik 2017-01-07 13:48:30 -1000
committer: alyptik 2017-01-07 13:48:46 -1000
commit: aec4310300ab04ea4398c2bad4df6e3e9c8cfe62 (patch)
tree: 3bbde5efc1622019ca402af072f4a4c29eb2dce9 /block.patch
parent: 081ac052b27edd8b527c28de81c94c06c155596f (diff)
download: aur-aec4310300ab04ea4398c2bad4df6e3e9c8cfe62.tar.gz
1 files changed, 0 insertions, 2650 deletions
diff --git a/block.patch b/block.patch
deleted file mode 100644
index 1f99d7869a0d..000000000000
--- a/block.patch
+++ /dev/null
@@ -1,2650 +0,0 @@
-To: LKML <linux-kernel@vger.kernel.org>, Jens Axboe <axboe@fb.com>
-From: =?UTF-8?Q?Holger_Hoffst=c3=a4tte?= <holger.hoffstaette@googlemail.com>
-Subject: [PATCH] loop: properly observe rotational flag of underlying device
-Organization: Applied Asynchrony, Inc.
-Date: Wed, 11 Nov 2015 16:21:51 +0100
-
-The loop driver always declares the rotational flag of its device as
-rotational, even when the device of the mapped file is nonrotational,
-as is the case with SSDs or on tmpfs. This can confuse filesystem tools
-which are SSD-aware; in my case I frequently forget to tell mkfs.btrfs
-that my loop device on tmpfs is nonrotational, and that I really don't
-need any automatic metadata redundancy.
-
-The attached patch fixes this by introspecting the rotational flag of the
-mapped file's underlying block device, if it exists. If the mapped file's
-filesystem has no associated block device - as is the case on e.g. tmpfs -
-we assume nonrotational storage. If there is a better way to identify such
-non-devices I'd love to hear them.
-
-Signed-off-by: Holger Hoffstätte <holger.hoffstaette@googlemail.com>
----
- drivers/block/loop.c | 19 +++++++++++++++++++
- 1 file changed, 19 insertions(+)
-
-diff --git a/drivers/block/loop.c b/drivers/block/loop.c
-index 423f4ca..2984aca 100644
---- a/drivers/block/loop.c
-+++ b/drivers/block/loop.c
-@@ -843,6 +843,24 @@ static void loop_config_discard(struct loop_device *lo)
- 	queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, q);
- }
- 
-+static void loop_update_rotational(struct loop_device *lo)
-+{
-+	struct file *file = lo->lo_backing_file;
-+	struct inode *file_inode = file->f_mapping->host;
-+	struct block_device *file_bdev = file_inode->i_sb->s_bdev;
-+	struct request_queue *q = lo->lo_queue;
-+	bool nonrot = true;
-+
-+	/* not all filesystems (e.g. tmpfs) have a sb->s_bdev */
-+	if (file_bdev)
-+		nonrot = blk_queue_nonrot(bdev_get_queue(file_bdev));
-+
-+	if (nonrot)
-+		queue_flag_set_unlocked(QUEUE_FLAG_NONROT, q);
-+	else
-+		queue_flag_clear_unlocked(QUEUE_FLAG_NONROT, q);
-+}
-+
- static void loop_unprepare_queue(struct loop_device *lo)
- {
- 	flush_kthread_worker(&lo->worker);
-@@ -939,6 +957,7 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode,
- 	if (!(lo_flags & LO_FLAGS_READ_ONLY) && file->f_op->fsync)
- 		blk_queue_flush(lo->lo_queue, REQ_FLUSH);
- 
-+	loop_update_rotational(lo);
- 	loop_update_dio(lo);
- 	set_capacity(lo->lo_disk, size);
- 	bd_set_size(bdev, size << 9);
--- 
-2.6.3
-From 273d4cb9fc3d75b6b7f147d1a064f75a5412a76c Mon Sep 17 00:00:00 2001
-From: Jens Axboe <axboe@fb.com>
-Date: Wed, 27 Jul 2016 15:30:35 -0600
-Subject: block: add WRITE_BG
-
-This adds a new request flag, REQ_BG, that callers can use to tell
-the block layer that this is background (non-urgent) IO.
-
-Signed-off-by: Jens Axboe <axboe@fb.com>
----
- include/linux/blk_types.h | 4 +++-
- include/linux/fs.h        | 3 +++
- 2 files changed, 6 insertions(+), 1 deletion(-)
-
-diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
-index 436f43f..be4409b 100644
---- a/include/linux/blk_types.h
-+++ b/include/linux/blk_types.h
-@@ -155,6 +155,7 @@ enum rq_flag_bits {
- 	__REQ_INTEGRITY,	/* I/O includes block integrity payload */
- 	__REQ_FUA,		/* forced unit access */
- 	__REQ_PREFLUSH,		/* request for cache flush */
-+	__REQ_BG,		/* background activity */
- 
- 	/* bio only flags */
- 	__REQ_RAHEAD,		/* read ahead, can fail anytime */
-@@ -198,7 +199,7 @@ enum rq_flag_bits {
- 	(REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | REQ_FAILFAST_DRIVER)
- #define REQ_COMMON_MASK \
- 	(REQ_FAILFAST_MASK | REQ_SYNC | REQ_META | REQ_PRIO | REQ_NOIDLE | \
--	 REQ_PREFLUSH | REQ_FUA | REQ_INTEGRITY | REQ_NOMERGE)
-+	 REQ_PREFLUSH | REQ_FUA | REQ_INTEGRITY | REQ_NOMERGE | REQ_BG)
- #define REQ_CLONE_MASK		REQ_COMMON_MASK
- 
- /* This mask is used for both bio and request merge checking */
-@@ -223,6 +224,7 @@ enum rq_flag_bits {
- #define REQ_COPY_USER		(1ULL << __REQ_COPY_USER)
- #define REQ_PREFLUSH		(1ULL << __REQ_PREFLUSH)
- #define REQ_FLUSH_SEQ		(1ULL << __REQ_FLUSH_SEQ)
-+#define REQ_BG			(1ULL << __REQ_BG)
- #define REQ_IO_STAT		(1ULL << __REQ_IO_STAT)
- #define REQ_MIXED_MERGE		(1ULL << __REQ_MIXED_MERGE)
- #define REQ_PM			(1ULL << __REQ_PM)
-diff --git a/include/linux/fs.h b/include/linux/fs.h
-index 901e25d..7c7951f 100644
---- a/include/linux/fs.h
-+++ b/include/linux/fs.h
-@@ -189,6 +189,8 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset,
-  * WRITE_FLUSH_FUA	Combination of WRITE_FLUSH and FUA. The IO is preceded
-  *			by a cache flush and data is guaranteed to be on
-  *			non-volatile media on completion.
-+ * WRITE_BG		Background write. This is for background activity like
-+ *			the periodic flush and background threshold writeback
-  *
-  */
- #define RW_MASK			REQ_OP_WRITE
-@@ -202,6 +204,7 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset,
- #define WRITE_FLUSH		(REQ_SYNC | REQ_NOIDLE | REQ_PREFLUSH)
- #define WRITE_FUA		(REQ_SYNC | REQ_NOIDLE | REQ_FUA)
- #define WRITE_FLUSH_FUA		(REQ_SYNC | REQ_NOIDLE | REQ_PREFLUSH | REQ_FUA)
-+#define WRITE_BG		(REQ_NOIDLE | REQ_BG)
- 
- /*
-  * Attribute flags.  These should be or-ed together to figure out what
--- 
-cgit v0.11.2
-
-From 33a170c4f076584bc05feb19efa7beb0ee099318 Mon Sep 17 00:00:00 2001
-From: Jens Axboe <axboe@fb.com>
-Date: Wed, 27 Jul 2016 15:24:08 -0600
-Subject: writeback: add wbc_to_write_flags()
-
-Add wbc_to_write_flags(), which returns the write modifier flags to use,
-based on a struct writeback_control. No functional changes in this
-patch, but it prepares us for factoring other wbc fields for write type.
-
-Signed-off-by: Jens Axboe <axboe@fb.com>
-Reviewed-by: Jan Kara <jack@suse.cz>
----
- fs/buffer.c               | 2 +-
- fs/f2fs/data.c            | 2 +-
- fs/f2fs/node.c            | 2 +-
- fs/gfs2/meta_io.c         | 3 +--
- fs/mpage.c                | 2 +-
- fs/xfs/xfs_aops.c         | 7 +++----
- include/linux/writeback.h | 8 ++++++++
- 7 files changed, 16 insertions(+), 10 deletions(-)
-
-diff --git a/fs/buffer.c b/fs/buffer.c
-index 9c8eb9b..6a5f1a0 100644
---- a/fs/buffer.c
-+++ b/fs/buffer.c
-@@ -1698,7 +1698,7 @@ int __block_write_full_page(struct inode *inode, struct page *page,
- 	struct buffer_head *bh, *head;
- 	unsigned int blocksize, bbits;
- 	int nr_underway = 0;
--	int write_flags = (wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : 0);
-+	int write_flags = wbc_to_write_flags(wbc);
- 
- 	head = create_page_buffers(page, inode,
- 					(1 << BH_Dirty)|(1 << BH_Uptodate));
-diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
-index ccb401e..cb0528b 100644
---- a/fs/f2fs/data.c
-+++ b/fs/f2fs/data.c
-@@ -1240,7 +1240,7 @@ static int f2fs_write_data_page(struct page *page,
- 		.sbi = sbi,
- 		.type = DATA,
- 		.op = REQ_OP_WRITE,
--		.op_flags = (wbc->sync_mode == WB_SYNC_ALL) ? WRITE_SYNC : 0,
-+		.op_flags = wbc_to_write_flags(wbc),
- 		.page = page,
- 		.encrypted_page = NULL,
- 	};
-diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
-index f75d197..c1713da 100644
---- a/fs/f2fs/node.c
-+++ b/fs/f2fs/node.c
-@@ -1561,7 +1561,7 @@ static int f2fs_write_node_page(struct page *page,
- 		.sbi = sbi,
- 		.type = NODE,
- 		.op = REQ_OP_WRITE,
--		.op_flags = (wbc->sync_mode == WB_SYNC_ALL) ? WRITE_SYNC : 0,
-+		.op_flags = wbc_to_write_flags(wbc),
- 		.page = page,
- 		.encrypted_page = NULL,
- 	};
-diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c
-index 950b8be..7991c62 100644
---- a/fs/gfs2/meta_io.c
-+++ b/fs/gfs2/meta_io.c
-@@ -37,8 +37,7 @@ static int gfs2_aspace_writepage(struct page *page, struct writeback_control *wb
- {
- 	struct buffer_head *bh, *head;
- 	int nr_underway = 0;
--	int write_flags = REQ_META | REQ_PRIO |
--		(wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : 0);
-+	int write_flags = REQ_META | REQ_PRIO | wbc_to_write_flags(wbc);
- 
- 	BUG_ON(!PageLocked(page));
- 	BUG_ON(!page_has_buffers(page));
-diff --git a/fs/mpage.c b/fs/mpage.c
-index d2413af..d6f1afe 100644
---- a/fs/mpage.c
-+++ b/fs/mpage.c
-@@ -489,7 +489,7 @@ static int __mpage_writepage(struct page *page, struct writeback_control *wbc,
- 	struct buffer_head map_bh;
- 	loff_t i_size = i_size_read(inode);
- 	int ret = 0;
--	int op_flags = (wbc->sync_mode == WB_SYNC_ALL ?  WRITE_SYNC : 0);
-+	int op_flags = wbc_to_write_flags(wbc);
- 
- 	if (page_has_buffers(page)) {
- 		struct buffer_head *head = page_buffers(page);
-diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
-index 7575cfc..a68645a 100644
---- a/fs/xfs/xfs_aops.c
-+++ b/fs/xfs/xfs_aops.c
-@@ -447,8 +447,8 @@ xfs_submit_ioend(
- 
- 	ioend->io_bio->bi_private = ioend;
- 	ioend->io_bio->bi_end_io = xfs_end_bio;
--	bio_set_op_attrs(ioend->io_bio, REQ_OP_WRITE,
--			 (wbc->sync_mode == WB_SYNC_ALL) ? WRITE_SYNC : 0);
-+	bio_set_op_attrs(ioend->io_bio, REQ_OP_WRITE, wbc_to_write_flags(wbc));
-+
- 	/*
- 	 * If we are failing the IO now, just mark the ioend with an
- 	 * error and finish it. This will run IO completion immediately
-@@ -519,8 +519,7 @@ xfs_chain_bio(
- 
- 	bio_chain(ioend->io_bio, new);
- 	bio_get(ioend->io_bio);		/* for xfs_destroy_ioend */
--	bio_set_op_attrs(ioend->io_bio, REQ_OP_WRITE,
--			  (wbc->sync_mode == WB_SYNC_ALL) ? WRITE_SYNC : 0);
-+	bio_set_op_attrs(ioend->io_bio, REQ_OP_WRITE, wbc_to_write_flags(wbc));
- 	submit_bio(ioend->io_bio);
- 	ioend->io_bio = new;
- }
-diff --git a/include/linux/writeback.h b/include/linux/writeback.h
-index fc1e16c..608afd3 100644
---- a/include/linux/writeback.h
-+++ b/include/linux/writeback.h
-@@ -100,6 +100,14 @@ struct writeback_control {
- #endif
- };
- 
-+static inline int wbc_to_write_flags(struct writeback_control *wbc)
-+{
-+	if (wbc->sync_mode == WB_SYNC_ALL)
-+		return WRITE_SYNC;
-+
-+	return 0;
-+}
-+
- /*
-  * A wb_domain represents a domain that wb's (bdi_writeback's) belong to
-  * and are measured against each other in.  There always is one global
--- 
-cgit v0.11.2
-
-From d6cf7bfd4d627114ba3e2cce96fa9468042a6fba Mon Sep 17 00:00:00 2001
-From: Jens Axboe <axboe@fb.com>
-Date: Thu, 14 Apr 2016 09:53:24 -0600
-Subject: writeback: use WRITE_BG for kupdate and background writeback
-
-If we're doing background type writes, then use the appropriate
-write command for that.
-
-Signed-off-by: Jens Axboe <axboe@fb.com>
----
- include/linux/writeback.h | 2 ++
- 1 file changed, 2 insertions(+)
-
-diff --git a/include/linux/writeback.h b/include/linux/writeback.h
-index 608afd3..e53abf2 100644
---- a/include/linux/writeback.h
-+++ b/include/linux/writeback.h
-@@ -104,6 +104,8 @@ static inline int wbc_to_write_flags(struct writeback_control *wbc)
- {
- 	if (wbc->sync_mode == WB_SYNC_ALL)
- 		return WRITE_SYNC;
-+	else if (wbc->for_kupdate || wbc->for_background)
-+		return WRITE_BG;
- 
- 	return 0;
- }
--- 
-cgit v0.11.2
-
-From cd38cff40da34de0bf78f8305c89bdfafc606e7f Mon Sep 17 00:00:00 2001
-From: Jens Axboe <axboe@fb.com>
-Date: Thu, 1 Sep 2016 10:20:33 -0600
-Subject: writeback: track if we're sleeping on progress in
- balance_dirty_pages()
-
-Note in the bdi_writeback structure whenever a task ends up sleeping
-waiting for progress. We can use that information in the lower layers
-to increase the priority of writes.
-
-Signed-off-by: Jens Axboe <axboe@fb.com>
----
- include/linux/backing-dev-defs.h | 2 ++
- mm/backing-dev.c                 | 1 +
- mm/page-writeback.c              | 1 +
- 3 files changed, 4 insertions(+)
-
-diff --git a/include/linux/backing-dev-defs.h b/include/linux/backing-dev-defs.h
-index c357f27..dc5f76d 100644
---- a/include/linux/backing-dev-defs.h
-+++ b/include/linux/backing-dev-defs.h
-@@ -116,6 +116,8 @@ struct bdi_writeback {
- 	struct list_head work_list;
- 	struct delayed_work dwork;	/* work item used for writeback */
- 
-+	unsigned long dirty_sleep;	/* last wait */
-+
- 	struct list_head bdi_node;	/* anchored at bdi->wb_list */
- 
- #ifdef CONFIG_CGROUP_WRITEBACK
-diff --git a/mm/backing-dev.c b/mm/backing-dev.c
-index 8fde443..3bfed5ab 100644
---- a/mm/backing-dev.c
-+++ b/mm/backing-dev.c
-@@ -310,6 +310,7 @@ static int wb_init(struct bdi_writeback *wb, struct backing_dev_info *bdi,
- 	spin_lock_init(&wb->work_lock);
- 	INIT_LIST_HEAD(&wb->work_list);
- 	INIT_DELAYED_WORK(&wb->dwork, wb_workfn);
-+	wb->dirty_sleep = jiffies;
- 
- 	wb->congested = wb_congested_get_create(bdi, blkcg_id, gfp);
- 	if (!wb->congested)
-diff --git a/mm/page-writeback.c b/mm/page-writeback.c
-index f4cd7d8..98bc3fc 100644
---- a/mm/page-writeback.c
-+++ b/mm/page-writeback.c
-@@ -1778,6 +1778,7 @@ pause:
- 					  pause,
- 					  start_time);
- 		__set_current_state(TASK_KILLABLE);
-+		wb->dirty_sleep = now;
- 		io_schedule_timeout(pause);
- 
- 		current->dirty_paused_when = now + pause;
--- 
-cgit v0.11.2
-
-From a98f5ab3840c2e6008c478aafe5df055404acdd1 Mon Sep 17 00:00:00 2001
-From: Jens Axboe <axboe@fb.com>
-Date: Wed, 30 Mar 2016 10:21:08 -0600
-Subject: block: add code to track actual device queue depth
-
-For blk-mq, ->nr_requests does track queue depth, at least at init
-time. But for the older queue paths, it's simply a soft setting.
-On top of that, it's generally larger than the hardware setting
-on purpose, to allow backup of requests for merging.
-
-Fill a hole in struct request with a 'queue_depth' member, that
-drivers can call to more closely inform the block layer of the
-real queue depth.
-
-Signed-off-by: Jens Axboe <axboe@fb.com>
----
- block/blk-settings.c   | 12 ++++++++++++
- drivers/scsi/scsi.c    |  3 +++
- include/linux/blkdev.h | 11 +++++++++++
- 3 files changed, 26 insertions(+)
-
-diff --git a/block/blk-settings.c b/block/blk-settings.c
-index f679ae1..f7e122e 100644
---- a/block/blk-settings.c
-+++ b/block/blk-settings.c
-@@ -832,6 +832,18 @@ void blk_queue_flush_queueable(struct request_queue *q, bool queueable)
- EXPORT_SYMBOL_GPL(blk_queue_flush_queueable);
- 
- /**
-+ * blk_set_queue_depth - tell the block layer about the device queue depth
-+ * @q:		the request queue for the device
-+ * @depth:		queue depth
-+ *
-+ */
-+void blk_set_queue_depth(struct request_queue *q, unsigned int depth)
-+{
-+	q->queue_depth = depth;
-+}
-+EXPORT_SYMBOL(blk_set_queue_depth);
-+
-+/**
-  * blk_queue_write_cache - configure queue's write cache
-  * @q:		the request queue for the device
-  * @wc:		write back cache on or off
-diff --git a/drivers/scsi/scsi.c b/drivers/scsi/scsi.c
-index 1f36aca..f3de98a 100644
---- a/drivers/scsi/scsi.c
-+++ b/drivers/scsi/scsi.c
-@@ -621,6 +621,9 @@ int scsi_change_queue_depth(struct scsi_device *sdev, int depth)
- 		wmb();
- 	}
- 
-+	if (sdev->request_queue)
-+		blk_set_queue_depth(sdev->request_queue, depth);
-+
- 	return sdev->queue_depth;
- }
- EXPORT_SYMBOL(scsi_change_queue_depth);
-diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
-index e79055c..1d12aa6 100644
---- a/include/linux/blkdev.h
-+++ b/include/linux/blkdev.h
-@@ -327,6 +327,8 @@ struct request_queue {
- 	struct blk_mq_ctx __percpu	*queue_ctx;
- 	unsigned int		nr_queues;
- 
-+	unsigned int		queue_depth;
-+
- 	/* hw dispatch queues */
- 	struct blk_mq_hw_ctx	**queue_hw_ctx;
- 	unsigned int		nr_hw_queues;
-@@ -683,6 +685,14 @@ static inline bool blk_write_same_mergeable(struct bio *a, struct bio *b)
- 	return false;
- }
- 
-+static inline unsigned int blk_queue_depth(struct request_queue *q)
-+{
-+	if (q->queue_depth)
-+		return q->queue_depth;
-+
-+	return q->nr_requests;
-+}
-+
- /*
-  * q->prep_rq_fn return values
-  */
-@@ -999,6 +1009,7 @@ extern void blk_limits_io_min(struct queue_limits *limits, unsigned int min);
- extern void blk_queue_io_min(struct request_queue *q, unsigned int min);
- extern void blk_limits_io_opt(struct queue_limits *limits, unsigned int opt);
- extern void blk_queue_io_opt(struct request_queue *q, unsigned int opt);
-+extern void blk_set_queue_depth(struct request_queue *q, unsigned int depth);
- extern void blk_set_default_limits(struct queue_limits *lim);
- extern void blk_set_stacking_limits(struct queue_limits *lim);
- extern int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
--- 
-cgit v0.11.2
-
-From a13cc5885ddd5582129869c1837821d6af6d48bb Mon Sep 17 00:00:00 2001
-From: Jens Axboe <axboe@fb.com>
-Date: Thu, 1 Sep 2016 10:22:41 -0600
-Subject: block: add scalable completion tracking of requests
-
-For legacy block, we simply track them in the request queue. For
-blk-mq, we track them on a per-sw queue basis, which we can then
-sum up through the hardware queues and finally to a per device
-state.
-
-The stats are tracked in, roughly, 0.1s interval windows.
-
-Add sysfs files to display the stats.
-
-Signed-off-by: Jens Axboe <axboe@fb.com>
----
- block/Makefile            |   2 +-
- block/blk-core.c          |   4 +
- block/blk-mq-sysfs.c      |  47 ++++++++++
- block/blk-mq.c            |  14 +++
- block/blk-mq.h            |   3 +
- block/blk-stat.c          | 220 ++++++++++++++++++++++++++++++++++++++++++++++
- block/blk-stat.h          |  18 ++++
- block/blk-sysfs.c         |  26 ++++++
- include/linux/blk_types.h |  12 +++
- include/linux/blkdev.h    |   4 +
- 10 files changed, 349 insertions(+), 1 deletion(-)
- create mode 100644 block/blk-stat.c
- create mode 100644 block/blk-stat.h
-
-diff --git a/block/Makefile b/block/Makefile
-index 9eda232..3446e04 100644
---- a/block/Makefile
-+++ b/block/Makefile
-@@ -5,7 +5,7 @@
- obj-$(CONFIG_BLOCK) := bio.o elevator.o blk-core.o blk-tag.o blk-sysfs.o \
- 			blk-flush.o blk-settings.o blk-ioc.o blk-map.o \
- 			blk-exec.o blk-merge.o blk-softirq.o blk-timeout.o \
--			blk-lib.o blk-mq.o blk-mq-tag.o \
-+			blk-lib.o blk-mq.o blk-mq-tag.o blk-stat.o \
- 			blk-mq-sysfs.o blk-mq-cpu.o blk-mq-cpumap.o ioctl.o \
- 			genhd.o scsi_ioctl.o partition-generic.o ioprio.o \
- 			badblocks.o partitions/
-diff --git a/block/blk-core.c b/block/blk-core.c
-index 36c7ac3..4075cbe 100644
---- a/block/blk-core.c
-+++ b/block/blk-core.c
-@@ -2475,6 +2475,8 @@ void blk_start_request(struct request *req)
- {
- 	blk_dequeue_request(req);
- 
-+	req->issue_time = ktime_to_ns(ktime_get());
-+
- 	/*
- 	 * We are now handing the request to the hardware, initialize
- 	 * resid_len to full count and add the timeout handler.
-@@ -2542,6 +2544,8 @@ bool blk_update_request(struct request *req, int error, unsigned int nr_bytes)
- 
- 	trace_block_rq_complete(req->q, req, nr_bytes);
- 
-+	blk_stat_add(&req->q->rq_stats[rq_data_dir(req)], req);
-+
- 	if (!req->bio)
- 		return false;
- 
-diff --git a/block/blk-mq-sysfs.c b/block/blk-mq-sysfs.c
-index fe822aa..b66bbf1 100644
---- a/block/blk-mq-sysfs.c
-+++ b/block/blk-mq-sysfs.c
-@@ -247,6 +247,47 @@ static ssize_t blk_mq_hw_sysfs_cpus_show(struct blk_mq_hw_ctx *hctx, char *page)
- 	return ret;
- }
- 
-+static void blk_mq_stat_clear(struct blk_mq_hw_ctx *hctx)
-+{
-+	struct blk_mq_ctx *ctx;
-+	unsigned int i;
-+
-+	hctx_for_each_ctx(hctx, ctx, i) {
-+		blk_stat_init(&ctx->stat[0]);
-+		blk_stat_init(&ctx->stat[1]);
-+	}
-+}
-+
-+static ssize_t blk_mq_hw_sysfs_stat_store(struct blk_mq_hw_ctx *hctx,
-+					  const char *page, size_t count)
-+{
-+	blk_mq_stat_clear(hctx);
-+	return count;
-+}
-+
-+static ssize_t print_stat(char *page, struct blk_rq_stat *stat, const char *pre)
-+{
-+	return sprintf(page, "%s samples=%llu, mean=%lld, min=%lld, max=%lld\n",
-+			pre, (long long) stat->nr_samples,
-+			(long long) stat->mean, (long long) stat->min,
-+			(long long) stat->max);
-+}
-+
-+static ssize_t blk_mq_hw_sysfs_stat_show(struct blk_mq_hw_ctx *hctx, char *page)
-+{
-+	struct blk_rq_stat stat[2];
-+	ssize_t ret;
-+
-+	blk_stat_init(&stat[0]);
-+	blk_stat_init(&stat[1]);
-+
-+	blk_hctx_stat_get(hctx, stat);
-+
-+	ret = print_stat(page, &stat[0], "read :");
-+	ret += print_stat(page + ret, &stat[1], "write:");
-+	return ret;
-+}
-+
- static struct blk_mq_ctx_sysfs_entry blk_mq_sysfs_dispatched = {
- 	.attr = {.name = "dispatched", .mode = S_IRUGO },
- 	.show = blk_mq_sysfs_dispatched_show,
-@@ -304,6 +345,11 @@ static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_poll = {
- 	.attr = {.name = "io_poll", .mode = S_IRUGO },
- 	.show = blk_mq_hw_sysfs_poll_show,
- };
-+static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_stat = {
-+	.attr = {.name = "stats", .mode = S_IRUGO | S_IWUSR },
-+	.show = blk_mq_hw_sysfs_stat_show,
-+	.store = blk_mq_hw_sysfs_stat_store,
-+};
- 
- static struct attribute *default_hw_ctx_attrs[] = {
- 	&blk_mq_hw_sysfs_queued.attr,
-@@ -314,6 +360,7 @@ static struct attribute *default_hw_ctx_attrs[] = {
- 	&blk_mq_hw_sysfs_cpus.attr,
- 	&blk_mq_hw_sysfs_active.attr,
- 	&blk_mq_hw_sysfs_poll.attr,
-+	&blk_mq_hw_sysfs_stat.attr,
- 	NULL,
- };
- 
-diff --git a/block/blk-mq.c b/block/blk-mq.c
-index 13f5a6c..712f141 100644
---- a/block/blk-mq.c
-+++ b/block/blk-mq.c
-@@ -29,6 +29,7 @@
- #include "blk.h"
- #include "blk-mq.h"
- #include "blk-mq-tag.h"
-+#include "blk-stat.h"
- 
- static DEFINE_MUTEX(all_q_mutex);
- static LIST_HEAD(all_q_list);
-@@ -400,10 +401,19 @@ static void blk_mq_ipi_complete_request(struct request *rq)
- 	put_cpu();
- }
- 
-+static void blk_mq_stat_add(struct request *rq)
-+{
-+	struct blk_rq_stat *stat = &rq->mq_ctx->stat[rq_data_dir(rq)];
-+
-+	blk_stat_add(stat, rq);
-+}
-+
- static void __blk_mq_complete_request(struct request *rq)
- {
- 	struct request_queue *q = rq->q;
- 
-+	blk_mq_stat_add(rq);
-+
- 	if (!q->softirq_done_fn)
- 		blk_mq_end_request(rq, rq->errors);
- 	else
-@@ -447,6 +457,8 @@ void blk_mq_start_request(struct request *rq)
- 	if (unlikely(blk_bidi_rq(rq)))
- 		rq->next_rq->resid_len = blk_rq_bytes(rq->next_rq);
- 
-+	rq->issue_time = ktime_to_ns(ktime_get());
-+
- 	blk_add_timer(rq);
- 
- 	/*
-@@ -1795,6 +1807,8 @@ static void blk_mq_init_cpu_queues(struct request_queue *q,
- 		spin_lock_init(&__ctx->lock);
- 		INIT_LIST_HEAD(&__ctx->rq_list);
- 		__ctx->queue = q;
-+		blk_stat_init(&__ctx->stat[0]);
-+		blk_stat_init(&__ctx->stat[1]);
- 
- 		/* If the cpu isn't online, the cpu is mapped to first hctx */
- 		if (!cpu_online(i))
-diff --git a/block/blk-mq.h b/block/blk-mq.h
-index 9087b11..e107f70 100644
---- a/block/blk-mq.h
-+++ b/block/blk-mq.h
-@@ -1,6 +1,8 @@
- #ifndef INT_BLK_MQ_H
- #define INT_BLK_MQ_H
- 
-+#include "blk-stat.h"
-+
- struct blk_mq_tag_set;
- 
- struct blk_mq_ctx {
-@@ -20,6 +22,7 @@ struct blk_mq_ctx {
- 
- 	/* incremented at completion time */
- 	unsigned long		____cacheline_aligned_in_smp rq_completed[2];
-+	struct blk_rq_stat	stat[2];
- 
- 	struct request_queue	*queue;
- 	struct kobject		kobj;
-diff --git a/block/blk-stat.c b/block/blk-stat.c
-new file mode 100644
-index 0000000..3965e8a
---- /dev/null
-+++ b/block/blk-stat.c
-@@ -0,0 +1,220 @@
-+/*
-+ * Block stat tracking code
-+ *
-+ * Copyright (C) 2016 Jens Axboe
-+ */
-+#include <linux/kernel.h>
-+#include <linux/blk-mq.h>
-+
-+#include "blk-stat.h"
-+#include "blk-mq.h"
-+
-+static void blk_stat_flush_batch(struct blk_rq_stat *stat)
-+{
-+	if (!stat->nr_batch)
-+		return;
-+	if (!stat->nr_samples)
-+		stat->mean = div64_s64(stat->batch, stat->nr_batch);
-+	else {
-+		stat->mean = div64_s64((stat->mean * stat->nr_samples) +
-+					stat->batch,
-+					stat->nr_samples + stat->nr_batch);
-+	}
-+
-+	stat->nr_samples += stat->nr_batch;
-+	stat->nr_batch = stat->batch = 0;
-+}
-+
-+void blk_stat_sum(struct blk_rq_stat *dst, struct blk_rq_stat *src)
-+{
-+	if (!src->nr_samples)
-+		return;
-+
-+	blk_stat_flush_batch(src);
-+
-+	dst->min = min(dst->min, src->min);
-+	dst->max = max(dst->max, src->max);
-+
-+	if (!dst->nr_samples)
-+		dst->mean = src->mean;
-+	else {
-+		dst->mean = div64_s64((src->mean * src->nr_samples) +
-+					(dst->mean * dst->nr_samples),
-+					dst->nr_samples + src->nr_samples);
-+	}
-+	dst->nr_samples += src->nr_samples;
-+}
-+
-+static void blk_mq_stat_get(struct request_queue *q, struct blk_rq_stat *dst)
-+{
-+	struct blk_mq_hw_ctx *hctx;
-+	struct blk_mq_ctx *ctx;
-+	uint64_t latest = 0;
-+	int i, j, nr;
-+
-+	blk_stat_init(&dst[0]);
-+	blk_stat_init(&dst[1]);
-+
-+	nr = 0;
-+	do {
-+		uint64_t newest = 0;
-+
-+		queue_for_each_hw_ctx(q, hctx, i) {
-+			hctx_for_each_ctx(hctx, ctx, j) {
-+				if (!ctx->stat[0].nr_samples &&
-+				    !ctx->stat[1].nr_samples)
-+					continue;
-+				if (ctx->stat[0].time > newest)
-+					newest = ctx->stat[0].time;
-+				if (ctx->stat[1].time > newest)
-+					newest = ctx->stat[1].time;
-+			}
-+		}
-+
-+		/*
-+		 * No samples
-+		 */
-+		if (!newest)
-+			break;
-+
-+		if (newest > latest)
-+			latest = newest;
-+
-+		queue_for_each_hw_ctx(q, hctx, i) {
-+			hctx_for_each_ctx(hctx, ctx, j) {
-+				if (ctx->stat[0].time == newest) {
-+					blk_stat_sum(&dst[0], &ctx->stat[0]);
-+					nr++;
-+				}
-+				if (ctx->stat[1].time == newest) {
-+					blk_stat_sum(&dst[1], &ctx->stat[1]);
-+					nr++;
-+				}
-+			}
-+		}
-+		/*
-+		 * If we race on finding an entry, just loop back again.
-+		 * Should be very rare.
-+		 */
-+	} while (!nr);
-+
-+	dst[0].time = dst[1].time = latest;
-+}
-+
-+void blk_queue_stat_get(struct request_queue *q, struct blk_rq_stat *dst)
-+{
-+	if (q->mq_ops)
-+		blk_mq_stat_get(q, dst);
-+	else {
-+		memcpy(&dst[0], &q->rq_stats[0], sizeof(struct blk_rq_stat));
-+		memcpy(&dst[1], &q->rq_stats[1], sizeof(struct blk_rq_stat));
-+	}
-+}
-+
-+void blk_hctx_stat_get(struct blk_mq_hw_ctx *hctx, struct blk_rq_stat *dst)
-+{
-+	struct blk_mq_ctx *ctx;
-+	unsigned int i, nr;
-+
-+	nr = 0;
-+	do {
-+		uint64_t newest = 0;
-+
-+		hctx_for_each_ctx(hctx, ctx, i) {
-+			if (!ctx->stat[0].nr_samples &&
-+			    !ctx->stat[1].nr_samples)
-+				continue;
-+
-+			if (ctx->stat[0].time > newest)
-+				newest = ctx->stat[0].time;
-+			if (ctx->stat[1].time > newest)
-+				newest = ctx->stat[1].time;
-+		}
-+
-+		if (!newest)
-+			break;
-+
-+		hctx_for_each_ctx(hctx, ctx, i) {
-+			if (ctx->stat[0].time == newest) {
-+				blk_stat_sum(&dst[0], &ctx->stat[0]);
-+				nr++;
-+			}
-+			if (ctx->stat[1].time == newest) {
-+				blk_stat_sum(&dst[1], &ctx->stat[1]);
-+				nr++;
-+			}
-+		}
-+		/*
-+		 * If we race on finding an entry, just loop back again.
-+		 * Should be very rare, as the window is only updated
-+		 * occasionally
-+		 */
-+	} while (!nr);
-+}
-+
-+static void __blk_stat_init(struct blk_rq_stat *stat, s64 time_now)
-+{
-+	stat->min = -1ULL;
-+	stat->max = stat->nr_samples = stat->mean = 0;
-+	stat->batch = stat->nr_batch = 0;
-+	stat->time = time_now & BLK_STAT_MASK;
-+}
-+
-+void blk_stat_init(struct blk_rq_stat *stat)
-+{
-+	__blk_stat_init(stat, ktime_to_ns(ktime_get()));
-+}
-+
-+static bool __blk_stat_is_current(struct blk_rq_stat *stat, s64 now)
-+{
-+	return (now & BLK_STAT_MASK) == (stat->time & BLK_STAT_MASK);
-+}
-+
-+bool blk_stat_is_current(struct blk_rq_stat *stat)
-+{
-+	return __blk_stat_is_current(stat, ktime_to_ns(ktime_get()));
-+}
-+
-+void blk_stat_add(struct blk_rq_stat *stat, struct request *rq)
-+{
-+	s64 now, value;
-+
-+	now = ktime_to_ns(ktime_get());
-+	if (now < rq->issue_time)
-+		return;
-+
-+	if (!__blk_stat_is_current(stat, now))
-+		__blk_stat_init(stat, now);
-+
-+	value = now - rq->issue_time;
-+	if (value > stat->max)
-+		stat->max = value;
-+	if (value < stat->min)
-+		stat->min = value;
-+
-+	if (stat->batch + value < stat->batch ||
-+	    stat->nr_batch + 1 == BLK_RQ_STAT_BATCH)
-+		blk_stat_flush_batch(stat);
-+
-+	stat->batch += value;
-+	stat->nr_batch++;
-+}
-+
-+void blk_stat_clear(struct request_queue *q)
-+{
-+	if (q->mq_ops) {
-+		struct blk_mq_hw_ctx *hctx;
-+		struct blk_mq_ctx *ctx;
-+		int i, j;
-+
-+		queue_for_each_hw_ctx(q, hctx, i) {
-+			hctx_for_each_ctx(hctx, ctx, j) {
-+				blk_stat_init(&ctx->stat[0]);
-+				blk_stat_init(&ctx->stat[1]);
-+			}
-+		}
-+	} else {
-+		blk_stat_init(&q->rq_stats[0]);
-+		blk_stat_init(&q->rq_stats[1]);
-+	}
-+}
-diff --git a/block/blk-stat.h b/block/blk-stat.h
-new file mode 100644
-index 0000000..376a6cc
---- /dev/null
-+++ b/block/blk-stat.h
-@@ -0,0 +1,18 @@
-+#ifndef BLK_STAT_H
-+#define BLK_STAT_H
-+
-+/*
-+ * ~0.13s window as a power-of-2 (2^27 nsecs)
-+ */
-+#define BLK_STAT_NSEC	134217728ULL
-+#define BLK_STAT_MASK	~(BLK_STAT_NSEC - 1)
-+
-+void blk_stat_add(struct blk_rq_stat *, struct request *);
-+void blk_hctx_stat_get(struct blk_mq_hw_ctx *, struct blk_rq_stat *);
-+void blk_queue_stat_get(struct request_queue *, struct blk_rq_stat *);
-+void blk_stat_clear(struct request_queue *q);
-+void blk_stat_init(struct blk_rq_stat *);
-+void blk_stat_sum(struct blk_rq_stat *, struct blk_rq_stat *);
-+bool blk_stat_is_current(struct blk_rq_stat *);
-+
-+#endif
-diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
-index f87a7e7..0b9e435 100644
---- a/block/blk-sysfs.c
-+++ b/block/blk-sysfs.c
-@@ -384,6 +384,26 @@ static ssize_t queue_dax_show(struct request_queue *q, char *page)
- 	return queue_var_show(blk_queue_dax(q), page);
- }
- 
-+static ssize_t print_stat(char *page, struct blk_rq_stat *stat, const char *pre)
-+{
-+	return sprintf(page, "%s samples=%llu, mean=%lld, min=%lld, max=%lld\n",
-+			pre, (long long) stat->nr_samples,
-+			(long long) stat->mean, (long long) stat->min,
-+			(long long) stat->max);
-+}
-+
-+static ssize_t queue_stats_show(struct request_queue *q, char *page)
-+{
-+	struct blk_rq_stat stat[2];
-+	ssize_t ret;
-+
-+	blk_queue_stat_get(q, stat);
-+
-+	ret = print_stat(page, &stat[0], "read :");
-+	ret += print_stat(page + ret, &stat[1], "write:");
-+	return ret;
-+}
-+
- static struct queue_sysfs_entry queue_requests_entry = {
- 	.attr = {.name = "nr_requests", .mode = S_IRUGO | S_IWUSR },
- 	.show = queue_requests_show,
-@@ -526,6 +546,11 @@ static struct queue_sysfs_entry queue_dax_entry = {
- 	.show = queue_dax_show,
- };
- 
-+static struct queue_sysfs_entry queue_stats_entry = {
-+	.attr = {.name = "stats", .mode = S_IRUGO },
-+	.show = queue_stats_show,
-+};
-+
- static struct attribute *default_attrs[] = {
- 	&queue_requests_entry.attr,
- 	&queue_ra_entry.attr,
-@@ -553,6 +578,7 @@ static struct attribute *default_attrs[] = {
- 	&queue_poll_entry.attr,
- 	&queue_wc_entry.attr,
- 	&queue_dax_entry.attr,
-+	&queue_stats_entry.attr,
- 	NULL,
- };
- 
-diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
-index be4409b..95fbfa1 100644
---- a/include/linux/blk_types.h
-+++ b/include/linux/blk_types.h
-@@ -266,4 +266,16 @@ static inline unsigned int blk_qc_t_to_tag(blk_qc_t cookie)
- 	return cookie & ((1u << BLK_QC_T_SHIFT) - 1);
- }
- 
-+#define BLK_RQ_STAT_BATCH	64
-+
-+struct blk_rq_stat {
-+	s64 mean;
-+	u64 min;
-+	u64 max;
-+	s32 nr_samples;
-+	s32 nr_batch;
-+	u64 batch;
-+	s64 time;
-+};
-+
- #endif /* __LINUX_BLK_TYPES_H */
-diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
-index 1d12aa6..259eba8 100644
---- a/include/linux/blkdev.h
-+++ b/include/linux/blkdev.h
-@@ -151,6 +151,7 @@ struct request {
- 	struct gendisk *rq_disk;
- 	struct hd_struct *part;
- 	unsigned long start_time;
-+	s64 issue_time;
- #ifdef CONFIG_BLK_CGROUP
- 	struct request_list *rl;		/* rl this rq is alloced from */
- 	unsigned long long start_time_ns;
-@@ -414,6 +415,9 @@ struct request_queue {
- 
- 	unsigned int		nr_sorted;
- 	unsigned int		in_flight[2];
-+
-+	struct blk_rq_stat	rq_stats[2];
-+
- 	/*
- 	 * Number of active block driver functions for which blk_drain_queue()
- 	 * must wait. Must be incremented around functions that unlock the
--- 
-cgit v0.11.2
-
-From 9a38b8e46f9f759dbb3fd81810579ac1013bf814 Mon Sep 17 00:00:00 2001
-From: Jens Axboe <axboe@fb.com>
-Date: Thu, 8 Sep 2016 11:07:16 -0600
-Subject: wbt: add general throttling mechanism
-
-We can hook this up to the block layer, to help throttle buffered
-writes. Or NFS can tap into it, to accomplish the same.
-
-wbt registers a few trace points that can be used to track what is
-happening in the system:
-
-wbt_lat: 259:0: latency 2446318
-wbt_stat: 259:0: rmean=2446318, rmin=2446318, rmax=2446318, rsamples=1,
-               wmean=518866, wmin=15522, wmax=5330353, wsamples=57
-wbt_step: 259:0: step down: step=1, window=72727272, background=8, normal=16, max=32
-
-This shows a sync issue event (wbt_lat) that exceeded it's time. wbt_stat
-dumps the current read/write stats for that window, and wbt_step shows a
-step down event where we now scale back writes. Each trace includes the
-device, 259:0 in this case.
-
-Signed-off-by: Jens Axboe <axboe@fb.com>
----
- include/linux/wbt.h        | 120 ++++++++
- include/trace/events/wbt.h | 153 ++++++++++
- lib/Kconfig                |   3 +
- lib/Makefile               |   1 +
- lib/wbt.c                  | 681 +++++++++++++++++++++++++++++++++++++++++++++
- 5 files changed, 958 insertions(+)
- create mode 100644 include/linux/wbt.h
- create mode 100644 include/trace/events/wbt.h
- create mode 100644 lib/wbt.c
-
-diff --git a/include/linux/wbt.h b/include/linux/wbt.h
-new file mode 100644
-index 0000000..5ffcd14
---- /dev/null
-+++ b/include/linux/wbt.h
-@@ -0,0 +1,120 @@
-+#ifndef WB_THROTTLE_H
-+#define WB_THROTTLE_H
-+
-+#include <linux/atomic.h>
-+#include <linux/wait.h>
-+#include <linux/timer.h>
-+#include <linux/ktime.h>
-+
-+enum {
-+	ISSUE_STAT_TRACKED	= 1ULL << 63,
-+	ISSUE_STAT_READ		= 1ULL << 62,
-+	ISSUE_STAT_MASK 	= ISSUE_STAT_TRACKED | ISSUE_STAT_READ,
-+	ISSUE_STAT_TIME_MASK	= ~ISSUE_STAT_MASK,
-+
-+	WBT_TRACKED		= 1,
-+	WBT_READ		= 2,
-+};
-+
-+struct wb_issue_stat {
-+	u64 time;
-+};
-+
-+static inline void wbt_issue_stat_set_time(struct wb_issue_stat *stat)
-+{
-+	stat->time = (stat->time & ISSUE_STAT_MASK) |
-+			(ktime_to_ns(ktime_get()) & ISSUE_STAT_TIME_MASK);
-+}
-+
-+static inline u64 wbt_issue_stat_get_time(struct wb_issue_stat *stat)
-+{
-+	return stat->time & ISSUE_STAT_TIME_MASK;
-+}
-+
-+static inline void wbt_mark_tracked(struct wb_issue_stat *stat)
-+{
-+	stat->time |= ISSUE_STAT_TRACKED;
-+}
-+
-+static inline void wbt_clear_state(struct wb_issue_stat *stat)
-+{
-+	stat->time &= ~(ISSUE_STAT_TRACKED | ISSUE_STAT_READ);
-+}
-+
-+static inline bool wbt_tracked(struct wb_issue_stat *stat)
-+{
-+	return (stat->time & ISSUE_STAT_TRACKED) != 0;
-+}
-+
-+static inline void wbt_mark_read(struct wb_issue_stat *stat)
-+{
-+	stat->time |= ISSUE_STAT_READ;
-+}
-+
-+static inline bool wbt_is_read(struct wb_issue_stat *stat)
-+{
-+	return (stat->time & ISSUE_STAT_READ) != 0;
-+}
-+
-+struct wb_stat_ops {
-+	void (*get)(void *, struct blk_rq_stat *);
-+	bool (*is_current)(struct blk_rq_stat *);
-+	void (*clear)(void *);
-+};
-+
-+struct rq_wb {
-+	/*
-+	 * Settings that govern how we throttle
-+	 */
-+	unsigned int wb_background;		/* background writeback */
-+	unsigned int wb_normal;			/* normal writeback */
-+	unsigned int wb_max;			/* max throughput writeback */
-+	int scale_step;
-+	bool scaled_max;
-+
-+	u64 win_nsec;				/* default window size */
-+	u64 cur_win_nsec;			/* current window size */
-+
-+	/*
-+	 * Number of consecutive periods where we don't have enough
-+	 * information to make a firm scale up/down decision.
-+	 */
-+	unsigned int unknown_cnt;
-+
-+	struct timer_list window_timer;
-+
-+	s64 sync_issue;
-+	void *sync_cookie;
-+
-+	unsigned int wc;
-+	unsigned int queue_depth;
-+
-+	unsigned long last_issue;		/* last non-throttled issue */
-+	unsigned long last_comp;		/* last non-throttled comp */
-+	unsigned long min_lat_nsec;
-+	struct backing_dev_info *bdi;
-+	struct request_queue *q;
-+	wait_queue_head_t wait;
-+	atomic_t inflight;
-+
-+	struct wb_stat_ops *stat_ops;
-+	void *ops_data;
-+};
-+
-+struct backing_dev_info;
-+
-+void __wbt_done(struct rq_wb *);
-+void wbt_done(struct rq_wb *, struct wb_issue_stat *);
-+unsigned int wbt_wait(struct rq_wb *, unsigned int, spinlock_t *);
-+struct rq_wb *wbt_init(struct backing_dev_info *, struct wb_stat_ops *, void *);
-+void wbt_exit(struct rq_wb *);
-+void wbt_update_limits(struct rq_wb *);
-+void wbt_requeue(struct rq_wb *, struct wb_issue_stat *);
-+void wbt_issue(struct rq_wb *, struct wb_issue_stat *);
-+void wbt_disable(struct rq_wb *);
-+void wbt_track(struct wb_issue_stat *, unsigned int);
-+
-+void wbt_set_queue_depth(struct rq_wb *, unsigned int);
-+void wbt_set_write_cache(struct rq_wb *, bool);
-+
-+#endif
-diff --git a/include/trace/events/wbt.h b/include/trace/events/wbt.h
-new file mode 100644
-index 0000000..926c7ee
---- /dev/null
-+++ b/include/trace/events/wbt.h
-@@ -0,0 +1,153 @@
-+#undef TRACE_SYSTEM
-+#define TRACE_SYSTEM wbt
-+
-+#if !defined(_TRACE_WBT_H) || defined(TRACE_HEADER_MULTI_READ)
-+#define _TRACE_WBT_H
-+
-+#include <linux/tracepoint.h>
-+#include <linux/wbt.h>
-+
-+/**
-+ * wbt_stat - trace stats for blk_wb
-+ * @stat: array of read/write stats
-+ */
-+TRACE_EVENT(wbt_stat,
-+
-+	TP_PROTO(struct backing_dev_info *bdi, struct blk_rq_stat *stat),
-+
-+	TP_ARGS(bdi, stat),
-+
-+	TP_STRUCT__entry(
-+		__array(char, name, 32)
-+		__field(s64, rmean)
-+		__field(u64, rmin)
-+		__field(u64, rmax)
-+		__field(s64, rnr_samples)
-+		__field(s64, rtime)
-+		__field(s64, wmean)
-+		__field(u64, wmin)
-+		__field(u64, wmax)
-+		__field(s64, wnr_samples)
-+		__field(s64, wtime)
-+	),
-+
-+	TP_fast_assign(
-+		strncpy(__entry->name, dev_name(bdi->dev), 32);
-+		__entry->rmean		= stat[0].mean;
-+		__entry->rmin		= stat[0].min;
-+		__entry->rmax		= stat[0].max;
-+		__entry->rnr_samples	= stat[0].nr_samples;
-+		__entry->wmean		= stat[1].mean;
-+		__entry->wmin		= stat[1].min;
-+		__entry->wmax		= stat[1].max;
-+		__entry->wnr_samples	= stat[1].nr_samples;
-+	),
-+
-+	TP_printk("%s: rmean=%llu, rmin=%llu, rmax=%llu, rsamples=%llu, "
-+		  "wmean=%llu, wmin=%llu, wmax=%llu, wsamples=%llu\n",
-+		  __entry->name, __entry->rmean, __entry->rmin, __entry->rmax,
-+		  __entry->rnr_samples, __entry->wmean, __entry->wmin,
-+		  __entry->wmax, __entry->wnr_samples)
-+);
-+
-+/**
-+ * wbt_lat - trace latency event
-+ * @lat: latency trigger
-+ */
-+TRACE_EVENT(wbt_lat,
-+
-+	TP_PROTO(struct backing_dev_info *bdi, unsigned long lat),
-+
-+	TP_ARGS(bdi, lat),
-+
-+	TP_STRUCT__entry(
-+		__array(char, name, 32)
-+		__field(unsigned long, lat)
-+	),
-+
-+	TP_fast_assign(
-+		strncpy(__entry->name, dev_name(bdi->dev), 32);
-+		__entry->lat = div_u64(lat, 1000);
-+	),
-+
-+	TP_printk("%s: latency %lluus\n", __entry->name,
-+			(unsigned long long) __entry->lat)
-+);
-+
-+/**
-+ * wbt_step - trace wb event step
-+ * @msg: context message
-+ * @step: the current scale step count
-+ * @window: the current monitoring window
-+ * @bg: the current background queue limit
-+ * @normal: the current normal writeback limit
-+ * @max: the current max throughput writeback limit
-+ */
-+TRACE_EVENT(wbt_step,
-+
-+	TP_PROTO(struct backing_dev_info *bdi, const char *msg,
-+		 int step, unsigned long window, unsigned int bg,
-+		 unsigned int normal, unsigned int max),
-+
-+	TP_ARGS(bdi, msg, step, window, bg, normal, max),
-+
-+	TP_STRUCT__entry(
-+		__array(char, name, 32)
-+		__field(const char *, msg)
-+		__field(int, step)
-+		__field(unsigned long, window)
-+		__field(unsigned int, bg)
-+		__field(unsigned int, normal)
-+		__field(unsigned int, max)
-+	),
-+
-+	TP_fast_assign(
-+		strncpy(__entry->name, dev_name(bdi->dev), 32);
-+		__entry->msg	= msg;
-+		__entry->step	= step;
-+		__entry->window	= div_u64(window, 1000);
-+		__entry->bg	= bg;
-+		__entry->normal	= normal;
-+		__entry->max	= max;
-+	),
-+
-+	TP_printk("%s: %s: step=%d, window=%luus, background=%u, normal=%u, max=%u\n",
-+		  __entry->name, __entry->msg, __entry->step, __entry->window,
-+		  __entry->bg, __entry->normal, __entry->max)
-+);
-+
-+/**
-+ * wbt_timer - trace wb timer event
-+ * @status: timer state status
-+ * @step: the current scale step count
-+ * @inflight: tracked writes inflight
-+ */
-+TRACE_EVENT(wbt_timer,
-+
-+	TP_PROTO(struct backing_dev_info *bdi, unsigned int status,
-+		 int step, unsigned int inflight),
-+
-+	TP_ARGS(bdi, status, step, inflight),
-+
-+	TP_STRUCT__entry(
-+		__array(char, name, 32)
-+		__field(unsigned int, status)
-+		__field(int, step)
-+		__field(unsigned int, inflight)
-+	),
-+
-+	TP_fast_assign(
-+		strncpy(__entry->name, dev_name(bdi->dev), 32);
-+		__entry->status		= status;
-+		__entry->step		= step;
-+		__entry->inflight	= inflight;
-+	),
-+
-+	TP_printk("%s: status=%u, step=%d, inflight=%u\n", __entry->name,
-+		  __entry->status, __entry->step, __entry->inflight)
-+);
-+
-+#endif /* _TRACE_WBT_H */
-+
-+/* This part must be outside protection */
-+#include <trace/define_trace.h>
-diff --git a/lib/Kconfig b/lib/Kconfig
-index d79909d..c585e4c 100644
---- a/lib/Kconfig
-+++ b/lib/Kconfig
-@@ -550,4 +550,7 @@ config STACKDEPOT
- 	bool
- 	select STACKTRACE
- 
-+config WBT
-+	bool
-+
- endmenu
-diff --git a/lib/Makefile b/lib/Makefile
-index 5dc77a8..23afd63 100644
---- a/lib/Makefile
-+++ b/lib/Makefile
-@@ -177,6 +177,7 @@ obj-$(CONFIG_SG_SPLIT) += sg_split.o
- obj-$(CONFIG_SG_POOL) += sg_pool.o
- obj-$(CONFIG_STMP_DEVICE) += stmp_device.o
- obj-$(CONFIG_IRQ_POLL) += irq_poll.o
-+obj-$(CONFIG_WBT) += wbt.o
- 
- obj-$(CONFIG_STACKDEPOT) += stackdepot.o
- KASAN_SANITIZE_stackdepot.o := n
-diff --git a/lib/wbt.c b/lib/wbt.c
-new file mode 100644
-index 0000000..a995703
---- /dev/null
-+++ b/lib/wbt.c
-@@ -0,0 +1,681 @@
-+/*
-+ * buffered writeback throttling. losely based on CoDel. We can't drop
-+ * packets for IO scheduling, so the logic is something like this:
-+ *
-+ * - Monitor latencies in a defined window of time.
-+ * - If the minimum latency in the above window exceeds some target, increment
-+ *   scaling step and scale down queue depth by a factor of 2x. The monitoring
-+ *   window is then shrunk to 100 / sqrt(scaling step + 1).
-+ * - For any window where we don't have solid data on what the latencies
-+ *   look like, retain status quo.
-+ * - If latencies look good, decrement scaling step.
-+ * - If we're only doing writes, allow the scaling step to go negative. This
-+ *   will temporarily boost write performance, snapping back to a stable
-+ *   scaling step of 0 if reads show up or the heavy writers finish. Unlike
-+ *   positive scaling steps where we shrink the monitoring window, a negative
-+ *   scaling step retains the default step==0 window size.
-+ *
-+ * Copyright (C) 2016 Jens Axboe
-+ *
-+ */
-+#include <linux/kernel.h>
-+#include <linux/blk_types.h>
-+#include <linux/slab.h>
-+#include <linux/backing-dev.h>
-+#include <linux/wbt.h>
-+
-+#define CREATE_TRACE_POINTS
-+#include <trace/events/wbt.h>
-+
-+enum {
-+	/*
-+	 * Default setting, we'll scale up (to 75% of QD max) or down (min 1)
-+	 * from here depending on device stats
-+	 */
-+	RWB_DEF_DEPTH	= 16,
-+
-+	/*
-+	 * 100msec window
-+	 */
-+	RWB_WINDOW_NSEC		= 100 * 1000 * 1000ULL,
-+
-+	/*
-+	 * Disregard stats, if we don't meet this minimum
-+	 */
-+	RWB_MIN_WRITE_SAMPLES	= 3,
-+
-+	/*
-+	 * If we have this number of consecutive windows with not enough
-+	 * information to scale up or down, scale up.
-+	 */
-+	RWB_UNKNOWN_BUMP	= 5,
-+};
-+
-+static inline bool rwb_enabled(struct rq_wb *rwb)
-+{
-+	return rwb && rwb->wb_normal != 0;
-+}
-+
-+/*
-+ * Increment 'v', if 'v' is below 'below'. Returns true if we succeeded,
-+ * false if 'v' + 1 would be bigger than 'below'.
-+ */
-+static bool atomic_inc_below(atomic_t *v, int below)
-+{
-+	int cur = atomic_read(v);
-+
-+	for (;;) {
-+		int old;
-+
-+		if (cur >= below)
-+			return false;
-+		old = atomic_cmpxchg(v, cur, cur + 1);
-+		if (old == cur)
-+			break;
-+		cur = old;
-+	}
-+
-+	return true;
-+}
-+
-+static void wb_timestamp(struct rq_wb *rwb, unsigned long *var)
-+{
-+	if (rwb_enabled(rwb)) {
-+		const unsigned long cur = jiffies;
-+
-+		if (cur != *var)
-+			*var = cur;
-+	}
-+}
-+
-+/*
-+ * If a task was rate throttled in balance_dirty_pages() within the last
-+ * second or so, use that to indicate a higher cleaning rate.
-+ */
-+static bool wb_recent_wait(struct rq_wb *rwb)
-+{
-+	struct bdi_writeback *wb = &rwb->bdi->wb;
-+
-+	return time_before(jiffies, wb->dirty_sleep + HZ);
-+}
-+
-+void __wbt_done(struct rq_wb *rwb)
-+{
-+	int inflight, limit;
-+
-+	inflight = atomic_dec_return(&rwb->inflight);
-+
-+	/*
-+	 * wbt got disabled with IO in flight. Wake up any potential
-+	 * waiters, we don't have to do more than that.
-+	 */
-+	if (unlikely(!rwb_enabled(rwb))) {
-+		wake_up_all(&rwb->wait);
-+		return;
-+	}
-+
-+	/*
-+	 * If the device does write back caching, drop further down
-+	 * before we wake people up.
-+	 */
-+	if (rwb->wc && !wb_recent_wait(rwb))
-+		limit = 0;
-+	else
-+		limit = rwb->wb_normal;
-+
-+	/*
-+	 * Don't wake anyone up if we are above the normal limit.
-+	 */
-+	if (inflight && inflight >= limit)
-+		return;
-+
-+	if (waitqueue_active(&rwb->wait)) {
-+		int diff = limit - inflight;
-+
-+		if (!inflight || diff >= rwb->wb_background / 2)
-+			wake_up(&rwb->wait);
-+	}
-+}
-+
-+/*
-+ * Called on completion of a request. Note that it's also called when
-+ * a request is merged, when the request gets freed.
-+ */
-+void wbt_done(struct rq_wb *rwb, struct wb_issue_stat *stat)
-+{
-+	if (!rwb)
-+		return;
-+
-+	if (!wbt_tracked(stat)) {
-+		if (rwb->sync_cookie == stat) {
-+			rwb->sync_issue = 0;
-+			rwb->sync_cookie = NULL;
-+		}
-+
-+		if (wbt_is_read(stat))
-+			wb_timestamp(rwb, &rwb->last_comp);
-+		wbt_clear_state(stat);
-+	} else {
-+		WARN_ON_ONCE(stat == rwb->sync_cookie);
-+		__wbt_done(rwb);
-+		wbt_clear_state(stat);
-+	}
-+}
-+
-+/*
-+ * Return true, if we can't increase the depth further by scaling
-+ */
-+static bool calc_wb_limits(struct rq_wb *rwb)
-+{
-+	unsigned int depth;
-+	bool ret = false;
-+
-+	if (!rwb->min_lat_nsec) {
-+		rwb->wb_max = rwb->wb_normal = rwb->wb_background = 0;
-+		return false;
-+	}
-+
-+	/*
-+	 * For QD=1 devices, this is a special case. It's important for those
-+	 * to have one request ready when one completes, so force a depth of
-+	 * 2 for those devices. On the backend, it'll be a depth of 1 anyway,
-+	 * since the device can't have more than that in flight. If we're
-+	 * scaling down, then keep a setting of 1/1/1.
-+	 */
-+	if (rwb->queue_depth == 1) {
-+		if (rwb->scale_step > 0)
-+			rwb->wb_max = rwb->wb_normal = 1;
-+		else {
-+			rwb->wb_max = rwb->wb_normal = 2;
-+			ret = true;
-+		}
-+		rwb->wb_background = 1;
-+	} else {
-+		/*
-+		 * scale_step == 0 is our default state. If we have suffered
-+		 * latency spikes, step will be > 0, and we shrink the
-+		 * allowed write depths. If step is < 0, we're only doing
-+		 * writes, and we allow a temporarily higher depth to
-+		 * increase performance.
-+		 */
-+		depth = min_t(unsigned int, RWB_DEF_DEPTH, rwb->queue_depth);
-+		if (rwb->scale_step > 0)
-+			depth = 1 + ((depth - 1) >> min(31, rwb->scale_step));
-+		else if (rwb->scale_step < 0) {
-+			unsigned int maxd = 3 * rwb->queue_depth / 4;
-+
-+			depth = 1 + ((depth - 1) << -rwb->scale_step);
-+			if (depth > maxd) {
-+				depth = maxd;
-+				ret = true;
-+			}
-+		}
-+
-+		/*
-+		 * Set our max/normal/bg queue depths based on how far
-+		 * we have scaled down (->scale_step).
-+		 */
-+		rwb->wb_max = depth;
-+		rwb->wb_normal = (rwb->wb_max + 1) / 2;
-+		rwb->wb_background = (rwb->wb_max + 3) / 4;
-+	}
-+
-+	return ret;
-+}
-+
-+static bool inline stat_sample_valid(struct blk_rq_stat *stat)
-+{
-+	/*
-+	 * We need at least one read sample, and a minimum of
-+	 * RWB_MIN_WRITE_SAMPLES. We require some write samples to know
-+	 * that it's writes impacting us, and not just some sole read on
-+	 * a device that is in a lower power state.
-+	 */
-+	return stat[0].nr_samples >= 1 &&
-+		stat[1].nr_samples >= RWB_MIN_WRITE_SAMPLES;
-+}
-+
-+static u64 rwb_sync_issue_lat(struct rq_wb *rwb)
-+{
-+	u64 now, issue = ACCESS_ONCE(rwb->sync_issue);
-+
-+	if (!issue || !rwb->sync_cookie)
-+		return 0;
-+
-+	now = ktime_to_ns(ktime_get());
-+	return now - issue;
-+}
-+
-+enum {
-+	LAT_OK = 1,
-+	LAT_UNKNOWN,
-+	LAT_UNKNOWN_WRITES,
-+	LAT_EXCEEDED,
-+};
-+
-+static int __latency_exceeded(struct rq_wb *rwb, struct blk_rq_stat *stat)
-+{
-+	u64 thislat;
-+
-+	/*
-+	 * If our stored sync issue exceeds the window size, or it
-+	 * exceeds our min target AND we haven't logged any entries,
-+	 * flag the latency as exceeded. wbt works off completion latencies,
-+	 * but for a flooded device, a single sync IO can take a long time
-+	 * to complete after being issued. If this time exceeds our
-+	 * monitoring window AND we didn't see any other completions in that
-+	 * window, then count that sync IO as a violation of the latency.
-+	 */
-+	thislat = rwb_sync_issue_lat(rwb);
-+	if (thislat > rwb->cur_win_nsec ||
-+	    (thislat > rwb->min_lat_nsec && !stat[0].nr_samples)) {
-+		trace_wbt_lat(rwb->bdi, thislat);
-+		return LAT_EXCEEDED;
-+	}
-+
-+	/*
-+	 * No read/write mix, if stat isn't valid
-+	 */
-+	if (!stat_sample_valid(stat)) {
-+		/*
-+		 * If we had writes in this stat window and the window is
-+		 * current, we're only doing writes. If a task recently
-+		 * waited or still has writes in flights, consider us doing
-+		 * just writes as well.
-+		 */
-+		if ((stat[1].nr_samples && rwb->stat_ops->is_current(stat)) ||
-+		    wb_recent_wait(rwb) || atomic_read(&rwb->inflight))
-+			return LAT_UNKNOWN_WRITES;
-+		return LAT_UNKNOWN;
-+	}
-+
-+	/*
-+	 * If the 'min' latency exceeds our target, step down.
-+	 */
-+	if (stat[0].min > rwb->min_lat_nsec) {
-+		trace_wbt_lat(rwb->bdi, stat[0].min);
-+		trace_wbt_stat(rwb->bdi, stat);
-+		return LAT_EXCEEDED;
-+	}
-+
-+	if (rwb->scale_step)
-+		trace_wbt_stat(rwb->bdi, stat);
-+
-+	return LAT_OK;
-+}
-+
-+static int latency_exceeded(struct rq_wb *rwb)
-+{
-+	struct blk_rq_stat stat[2];
-+
-+	rwb->stat_ops->get(rwb->ops_data, stat);
-+	return __latency_exceeded(rwb, stat);
-+}
-+
-+static void rwb_trace_step(struct rq_wb *rwb, const char *msg)
-+{
-+	trace_wbt_step(rwb->bdi, msg, rwb->scale_step, rwb->cur_win_nsec,
-+			rwb->wb_background, rwb->wb_normal, rwb->wb_max);
-+}
-+
-+static void scale_up(struct rq_wb *rwb)
-+{
-+	/*
-+	 * Hit max in previous round, stop here
-+	 */
-+	if (rwb->scaled_max)
-+		return;
-+
-+	rwb->scale_step--;
-+	rwb->unknown_cnt = 0;
-+	rwb->stat_ops->clear(rwb->ops_data);
-+
-+	rwb->scaled_max = calc_wb_limits(rwb);
-+
-+	if (waitqueue_active(&rwb->wait))
-+		wake_up_all(&rwb->wait);
-+
-+	rwb_trace_step(rwb, "step up");
-+}
-+
-+/*
-+ * Scale rwb down. If 'hard_throttle' is set, do it quicker, since we
-+ * had a latency violation.
-+ */
-+static void scale_down(struct rq_wb *rwb, bool hard_throttle)
-+{
-+	/*
-+	 * Stop scaling down when we've hit the limit. This also prevents
-+	 * ->scale_step from going to crazy values, if the device can't
-+	 * keep up.
-+	 */
-+	if (rwb->wb_max == 1)
-+		return;
-+
-+	if (rwb->scale_step < 0 && hard_throttle)
-+		rwb->scale_step = 0;
-+	else
-+		rwb->scale_step++;
-+
-+	rwb->scaled_max = false;
-+	rwb->unknown_cnt = 0;
-+	rwb->stat_ops->clear(rwb->ops_data);
-+	calc_wb_limits(rwb);
-+	rwb_trace_step(rwb, "step down");
-+}
-+
-+static void rwb_arm_timer(struct rq_wb *rwb)
-+{
-+	unsigned long expires;
-+
-+	if (rwb->scale_step > 0) {
-+		/*
-+		 * We should speed this up, using some variant of a fast
-+		 * integer inverse square root calculation. Since we only do
-+		 * this for every window expiration, it's not a huge deal,
-+		 * though.
-+		 */
-+		rwb->cur_win_nsec = div_u64(rwb->win_nsec << 4,
-+					int_sqrt((rwb->scale_step + 1) << 8));
-+	} else {
-+		/*
-+		 * For step < 0, we don't want to increase/decrease the
-+		 * window size.
-+		 */
-+		rwb->cur_win_nsec = rwb->win_nsec;
-+	}
-+
-+	expires = jiffies + nsecs_to_jiffies(rwb->cur_win_nsec);
-+	mod_timer(&rwb->window_timer, expires);
-+}
-+
-+static void wb_timer_fn(unsigned long data)
-+{
-+	struct rq_wb *rwb = (struct rq_wb *) data;
-+	int status, inflight;
-+
-+	inflight = atomic_read(&rwb->inflight);
-+
-+	status = latency_exceeded(rwb);
-+
-+	trace_wbt_timer(rwb->bdi, status, rwb->scale_step, inflight);
-+
-+	/*
-+	 * If we exceeded the latency target, step down. If we did not,
-+	 * step one level up. If we don't know enough to say either exceeded
-+	 * or ok, then don't do anything.
-+	 */
-+	switch (status) {
-+	case LAT_EXCEEDED:
-+		scale_down(rwb, true);
-+		break;
-+	case LAT_OK:
-+		scale_up(rwb);
-+		break;
-+	case LAT_UNKNOWN_WRITES:
-+		scale_up(rwb);
-+		break;
-+	case LAT_UNKNOWN:
-+		if (++rwb->unknown_cnt < RWB_UNKNOWN_BUMP)
-+			break;
-+		/*
-+		 * We get here for two reasons:
-+		 *
-+		 * 1) We previously scaled reduced depth, and we currently
-+		 *    don't have a valid read/write sample. For that case,
-+		 *    slowly return to center state (step == 0).
-+		 * 2) We started a the center step, but don't have a valid
-+		 *    read/write sample, but we do have writes going on.
-+		 *    Allow step to go negative, to increase write perf.
-+		 */
-+		if (rwb->scale_step > 0)
-+			scale_up(rwb);
-+		else if (rwb->scale_step < 0)
-+			scale_down(rwb, false);
-+		break;
-+	default:
-+		break;
-+	}
-+
-+	/*
-+	 * Re-arm timer, if we have IO in flight
-+	 */
-+	if (rwb->scale_step || inflight)
-+		rwb_arm_timer(rwb);
-+}
-+
-+void wbt_update_limits(struct rq_wb *rwb)
-+{
-+	rwb->scale_step = 0;
-+	rwb->scaled_max = false;
-+	calc_wb_limits(rwb);
-+
-+	if (waitqueue_active(&rwb->wait))
-+		wake_up_all(&rwb->wait);
-+}
-+
-+static bool close_io(struct rq_wb *rwb)
-+{
-+	const unsigned long now = jiffies;
-+
-+	return time_before(now, rwb->last_issue + HZ / 10) ||
-+		time_before(now, rwb->last_comp + HZ / 10);
-+}
-+
-+#define REQ_HIPRIO	(REQ_SYNC | REQ_META | REQ_PRIO)
-+
-+static inline unsigned int get_limit(struct rq_wb *rwb, unsigned long rw)
-+{
-+	unsigned int limit;
-+
-+	/*
-+	 * At this point we know it's a buffered write. If REQ_SYNC is
-+	 * set, then it's WB_SYNC_ALL writeback, and we'll use the max
-+	 * limit for that. If the write is marked as a background write,
-+	 * then use the idle limit, or go to normal if we haven't had
-+	 * competing IO for a bit.
-+	 */
-+	if ((rw & REQ_HIPRIO) || wb_recent_wait(rwb))
-+		limit = rwb->wb_max;
-+	else if ((rw & REQ_BG) || close_io(rwb)) {
-+		/*
-+		 * If less than 100ms since we completed unrelated IO,
-+		 * limit us to half the depth for background writeback.
-+		 */
-+		limit = rwb->wb_background;
-+	} else
-+		limit = rwb->wb_normal;
-+
-+	return limit;
-+}
-+
-+static inline bool may_queue(struct rq_wb *rwb, unsigned long rw)
-+{
-+	/*
-+	 * inc it here even if disabled, since we'll dec it at completion.
-+	 * this only happens if the task was sleeping in __wbt_wait(),
-+	 * and someone turned it off at the same time.
-+	 */
-+	if (!rwb_enabled(rwb)) {
-+		atomic_inc(&rwb->inflight);
-+		return true;
-+	}
-+
-+	return atomic_inc_below(&rwb->inflight, get_limit(rwb, rw));
-+}
-+
-+/*
-+ * Block if we will exceed our limit, or if we are currently waiting for
-+ * the timer to kick off queuing again.
-+ */
-+static void __wbt_wait(struct rq_wb *rwb, unsigned long rw, spinlock_t *lock)
-+{
-+	DEFINE_WAIT(wait);
-+
-+	if (may_queue(rwb, rw))
-+		return;
-+
-+	do {
-+		prepare_to_wait_exclusive(&rwb->wait, &wait,
-+						TASK_UNINTERRUPTIBLE);
-+
-+		if (may_queue(rwb, rw))
-+			break;
-+
-+		if (lock)
-+			spin_unlock_irq(lock);
-+
-+		io_schedule();
-+
-+		if (lock)
-+			spin_lock_irq(lock);
-+	} while (1);
-+
-+	finish_wait(&rwb->wait, &wait);
-+}
-+
-+static inline bool wbt_should_throttle(struct rq_wb *rwb, unsigned int rw)
-+{
-+	const int op = rw >> BIO_OP_SHIFT;
-+
-+	/*
-+	 * If not a WRITE (or a discard), do nothing
-+	 */
-+	if (!(op == REQ_OP_WRITE || op == REQ_OP_DISCARD))
-+		return false;
-+
-+	/*
-+	 * Don't throttle WRITE_ODIRECT
-+	 */
-+	if ((rw & (REQ_SYNC | REQ_NOIDLE)) == REQ_SYNC)
-+		return false;
-+
-+	return true;
-+}
-+
-+/*
-+ * Returns true if the IO request should be accounted, false if not.
-+ * May sleep, if we have exceeded the writeback limits. Caller can pass
-+ * in an irq held spinlock, if it holds one when calling this function.
-+ * If we do sleep, we'll release and re-grab it.
-+ */
-+unsigned int wbt_wait(struct rq_wb *rwb, unsigned int rw, spinlock_t *lock)
-+{
-+	unsigned int ret;
-+
-+	if (!rwb_enabled(rwb))
-+		return 0;
-+
-+	if ((rw >> BIO_OP_SHIFT) == REQ_OP_READ)
-+		ret = WBT_READ;
-+
-+	if (!wbt_should_throttle(rwb, rw)) {
-+		if (ret & WBT_READ)
-+			wb_timestamp(rwb, &rwb->last_issue);
-+		return ret;
-+	}
-+
-+	__wbt_wait(rwb, rw, lock);
-+
-+	if (!timer_pending(&rwb->window_timer))
-+		rwb_arm_timer(rwb);
-+
-+	return ret | WBT_TRACKED;
-+}
-+
-+void wbt_issue(struct rq_wb *rwb, struct wb_issue_stat *stat)
-+{
-+	if (!rwb_enabled(rwb))
-+		return;
-+
-+	wbt_issue_stat_set_time(stat);
-+
-+	/*
-+	 * Track sync issue, in case it takes a long time to complete. Allows
-+	 * us to react quicker, if a sync IO takes a long time to complete.
-+	 * Note that this is just a hint. 'stat' can go away when the
-+	 * request completes, so it's important we never dereference it. We
-+	 * only use the address to compare with, which is why we store the
-+	 * sync_issue time locally.
-+	 */
-+	if (wbt_is_read(stat) && !rwb->sync_issue) {
-+		rwb->sync_cookie = stat;
-+		rwb->sync_issue = wbt_issue_stat_get_time(stat);
-+	}
-+}
-+
-+void wbt_track(struct wb_issue_stat *stat, unsigned int wb_acct)
-+{
-+	if (wb_acct & WBT_TRACKED)
-+		wbt_mark_tracked(stat);
-+	else if (wb_acct & WBT_READ)
-+		wbt_mark_read(stat);
-+}
-+
-+void wbt_requeue(struct rq_wb *rwb, struct wb_issue_stat *stat)
-+{
-+	if (!rwb_enabled(rwb))
-+		return;
-+	if (stat == rwb->sync_cookie) {
-+		rwb->sync_issue = 0;
-+		rwb->sync_cookie = NULL;
-+	}
-+}
-+
-+void wbt_set_queue_depth(struct rq_wb *rwb, unsigned int depth)
-+{
-+	if (rwb) {
-+		rwb->queue_depth = depth;
-+		wbt_update_limits(rwb);
-+	}
-+}
-+
-+void wbt_set_write_cache(struct rq_wb *rwb, bool write_cache_on)
-+{
-+	if (rwb)
-+		rwb->wc = write_cache_on;
-+}
-+
-+void wbt_disable(struct rq_wb *rwb)
-+{
-+	if (rwb) {
-+		del_timer_sync(&rwb->window_timer);
-+		rwb->win_nsec = rwb->min_lat_nsec = 0;
-+		wbt_update_limits(rwb);
-+	}
-+}
-+EXPORT_SYMBOL_GPL(wbt_disable);
-+
-+struct rq_wb *wbt_init(struct backing_dev_info *bdi, struct wb_stat_ops *ops,
-+		       void *ops_data)
-+{
-+	struct rq_wb *rwb;
-+
-+	if (!ops->get || !ops->is_current || !ops->clear)
-+		return ERR_PTR(-EINVAL);
-+
-+	rwb = kzalloc(sizeof(*rwb), GFP_KERNEL);
-+	if (!rwb)
-+		return ERR_PTR(-ENOMEM);
-+
-+	atomic_set(&rwb->inflight, 0);
-+	init_waitqueue_head(&rwb->wait);
-+	setup_timer(&rwb->window_timer, wb_timer_fn, (unsigned long) rwb);
-+	rwb->wc = 1;
-+	rwb->queue_depth = RWB_DEF_DEPTH;
-+	rwb->last_comp = rwb->last_issue = jiffies;
-+	rwb->bdi = bdi;
-+	rwb->win_nsec = RWB_WINDOW_NSEC;
-+	rwb->stat_ops = ops,
-+	rwb->ops_data = ops_data;
-+	wbt_update_limits(rwb);
-+	return rwb;
-+}
-+
-+void wbt_exit(struct rq_wb *rwb)
-+{
-+	if (rwb) {
-+		del_timer_sync(&rwb->window_timer);
-+		kfree(rwb);
-+	}
-+}
--- 
-cgit v0.11.2
-
-From db3de07314ef350fceb90ade08474fe4eea5e665 Mon Sep 17 00:00:00 2001
-From: Jens Axboe <axboe@fb.com>
-Date: Thu, 8 Sep 2016 11:08:17 -0600
-Subject: writeback: throttle buffered writeback
-
-Test patch that throttles buffered writeback to make it a lot
-more smooth, and has way less impact on other system activity.
-Background writeback should be, by definition, background
-activity. The fact that we flush huge bundles of it at the time
-means that it potentially has heavy impacts on foreground workloads,
-which isn't ideal. We can't easily limit the sizes of writes that
-we do, since that would impact file system layout in the presence
-of delayed allocation. So just throttle back buffered writeback,
-unless someone is waiting for it.
-
-The algorithm for when to throttle takes its inspiration in the
-CoDel networking scheduling algorithm. Like CoDel, blk-wb monitors
-the minimum latencies of requests over a window of time. In that
-window of time, if the minimum latency of any request exceeds a
-given target, then a scale count is incremented and the queue depth
-is shrunk. The next monitoring window is shrunk accordingly. Unlike
-CoDel, if we hit a window that exhibits good behavior, then we
-simply increment the scale count and re-calculate the limits for that
-scale value. This prevents us from oscillating between a
-close-to-ideal value and max all the time, instead remaining in the
-windows where we get good behavior.
-
-Unlike CoDel, blk-wb allows the scale count to to negative. This
-happens if we primarily have writes going on. Unlike positive
-scale counts, this doesn't change the size of the monitoring window.
-When the heavy writers finish, blk-bw quickly snaps back to it's
-stable state of a zero scale count.
-
-The patch registers two sysfs entries. The first one, 'wb_window_usec',
-defines the window of monitoring. The second one, 'wb_lat_usec',
-sets the latency target for the window. It defaults to 2 msec for
-non-rotational storage, and 75 msec for rotational storage. Setting
-this value to '0' disables blk-wb. Generally, a user would not have
-to touch these settings.
-
-We don't enable WBT on devices that are managed with CFQ, and have
-a non-root block cgroup attached. If we have a proportional share setup
-on this particular disk, then the wbt throttling will interfere with
-that. We don't have a strong need for wbt for that case, since we will
-rely on CFQ doing that for us.
-
-Signed-off-by: Jens Axboe <axboe@fb.com>
----
- Documentation/block/queue-sysfs.txt |  13 ++++
- block/Kconfig                       |   1 +
- block/blk-core.c                    |  20 +++++-
- block/blk-mq.c                      |  30 ++++++++-
- block/blk-settings.c                |   3 +
- block/blk-stat.c                    |   5 +-
- block/blk-sysfs.c                   | 125 ++++++++++++++++++++++++++++++++++++
- block/cfq-iosched.c                 |  13 ++++
- include/linux/blkdev.h              |   6 +-
- 9 files changed, 207 insertions(+), 9 deletions(-)
-
-diff --git a/Documentation/block/queue-sysfs.txt b/Documentation/block/queue-sysfs.txt
-index 2a39040..2847219 100644
---- a/Documentation/block/queue-sysfs.txt
-+++ b/Documentation/block/queue-sysfs.txt
-@@ -169,5 +169,18 @@ This is the number of bytes the device can write in a single write-same
- command.  A value of '0' means write-same is not supported by this
- device.
- 
-+wb_lat_usec (RW)
-+----------------
-+If the device is registered for writeback throttling, then this file shows
-+the target minimum read latency. If this latency is exceeded in a given
-+window of time (see wb_window_usec), then the writeback throttling will start
-+scaling back writes.
-+
-+wb_window_usec (RW)
-+-------------------
-+If the device is registered for writeback throttling, then this file shows
-+the value of the monitoring window in which we'll look at the target
-+latency. See wb_lat_usec.
-+
- 
- Jens Axboe <jens.axboe@oracle.com>, February 2009
-diff --git a/block/Kconfig b/block/Kconfig
-index 161491d..6da79e6 100644
---- a/block/Kconfig
-+++ b/block/Kconfig
-@@ -4,6 +4,7 @@
- menuconfig BLOCK
-        bool "Enable the block layer" if EXPERT
-        default y
-+       select WBT
-        help
- 	 Provide block layer support for the kernel.
- 
-diff --git a/block/blk-core.c b/block/blk-core.c
-index 4075cbe..4f4ce05 100644
---- a/block/blk-core.c
-+++ b/block/blk-core.c
-@@ -33,6 +33,7 @@
- #include <linux/ratelimit.h>
- #include <linux/pm_runtime.h>
- #include <linux/blk-cgroup.h>
-+#include <linux/wbt.h>
- 
- #define CREATE_TRACE_POINTS
- #include <trace/events/block.h>
-@@ -882,6 +883,8 @@ blk_init_allocated_queue(struct request_queue *q, request_fn_proc *rfn,
- 
- fail:
- 	blk_free_flush_queue(q->fq);
-+	wbt_exit(q->rq_wb);
-+	q->rq_wb = NULL;
- 	return NULL;
- }
- EXPORT_SYMBOL(blk_init_allocated_queue);
-@@ -1346,6 +1349,7 @@ void blk_requeue_request(struct request_queue *q, struct request *rq)
- 	blk_delete_timer(rq);
- 	blk_clear_rq_complete(rq);
- 	trace_block_rq_requeue(q, rq);
-+	wbt_requeue(q->rq_wb, &rq->wb_stat);
- 
- 	if (rq->cmd_flags & REQ_QUEUED)
- 		blk_queue_end_tag(q, rq);
-@@ -1436,6 +1440,8 @@ void __blk_put_request(struct request_queue *q, struct request *req)
- 	/* this is a bio leak */
- 	WARN_ON(req->bio != NULL);
- 
-+	wbt_done(q->rq_wb, &req->wb_stat);
-+
- 	/*
- 	 * Request may not have originated from ll_rw_blk. if not,
- 	 * it didn't come out of our reserved rq pools
-@@ -1667,6 +1673,7 @@ static blk_qc_t blk_queue_bio(struct request_queue *q, struct bio *bio)
- 	int el_ret, rw_flags = 0, where = ELEVATOR_INSERT_SORT;
- 	struct request *req;
- 	unsigned int request_count = 0;
-+	unsigned int wb_acct;
- 
- 	/*
- 	 * low level driver can indicate that it wants pages above a
-@@ -1719,6 +1726,8 @@ static blk_qc_t blk_queue_bio(struct request_queue *q, struct bio *bio)
- 	}
- 
- get_rq:
-+	wb_acct = wbt_wait(q->rq_wb, bio->bi_opf, q->queue_lock);
-+
- 	/*
- 	 * This sync check and mask will be re-done in init_request_from_bio(),
- 	 * but we need to set it earlier to expose the sync flag to the
-@@ -1738,11 +1747,15 @@ get_rq:
- 	 */
- 	req = get_request(q, bio_data_dir(bio), rw_flags, bio, GFP_NOIO);
- 	if (IS_ERR(req)) {
-+		if (wb_acct & WBT_TRACKED)
-+			__wbt_done(q->rq_wb);
- 		bio->bi_error = PTR_ERR(req);
- 		bio_endio(bio);
- 		goto out_unlock;
- 	}
- 
-+	wbt_track(&req->wb_stat, wb_acct);
-+
- 	/*
- 	 * After dropping the lock and possibly sleeping here, our request
- 	 * may now be mergeable after it had proven unmergeable (above).
-@@ -2475,7 +2488,7 @@ void blk_start_request(struct request *req)
- {
- 	blk_dequeue_request(req);
- 
--	req->issue_time = ktime_to_ns(ktime_get());
-+	wbt_issue(req->q->rq_wb, &req->wb_stat);
- 
- 	/*
- 	 * We are now handing the request to the hardware, initialize
-@@ -2713,9 +2726,10 @@ void blk_finish_request(struct request *req, int error)
- 
- 	blk_account_io_done(req);
- 
--	if (req->end_io)
-+	if (req->end_io) {
-+		wbt_done(req->q->rq_wb, &req->wb_stat);
- 		req->end_io(req, error);
--	else {
-+	} else {
- 		if (blk_bidi_rq(req))
- 			__blk_put_request(req->next_rq->q, req->next_rq);
- 
-diff --git a/block/blk-mq.c b/block/blk-mq.c
-index 712f141..511289a 100644
---- a/block/blk-mq.c
-+++ b/block/blk-mq.c
-@@ -22,6 +22,7 @@
- #include <linux/sched/sysctl.h>
- #include <linux/delay.h>
- #include <linux/crash_dump.h>
-+#include <linux/wbt.h>
- 
- #include <trace/events/block.h>
- 
-@@ -319,6 +320,8 @@ static void __blk_mq_free_request(struct blk_mq_hw_ctx *hctx,
- 
- 	if (rq->cmd_flags & REQ_MQ_INFLIGHT)
- 		atomic_dec(&hctx->nr_active);
-+
-+	wbt_done(q->rq_wb, &rq->wb_stat);
- 	rq->cmd_flags = 0;
- 
- 	clear_bit(REQ_ATOM_STARTED, &rq->atomic_flags);
-@@ -351,6 +354,7 @@ inline void __blk_mq_end_request(struct request *rq, int error)
- 	blk_account_io_done(rq);
- 
- 	if (rq->end_io) {
-+		wbt_done(rq->q->rq_wb, &rq->wb_stat);
- 		rq->end_io(rq, error);
- 	} else {
- 		if (unlikely(blk_bidi_rq(rq)))
-@@ -457,7 +461,7 @@ void blk_mq_start_request(struct request *rq)
- 	if (unlikely(blk_bidi_rq(rq)))
- 		rq->next_rq->resid_len = blk_rq_bytes(rq->next_rq);
- 
--	rq->issue_time = ktime_to_ns(ktime_get());
-+	wbt_issue(q->rq_wb, &rq->wb_stat);
- 
- 	blk_add_timer(rq);
- 
-@@ -494,6 +498,7 @@ static void __blk_mq_requeue_request(struct request *rq)
- 	struct request_queue *q = rq->q;
- 
- 	trace_block_rq_requeue(q, rq);
-+	wbt_requeue(q->rq_wb, &rq->wb_stat);
- 
- 	if (test_and_clear_bit(REQ_ATOM_STARTED, &rq->atomic_flags)) {
- 		if (q->dma_drain_size && blk_rq_bytes(rq))
-@@ -1312,6 +1317,7 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
- 	struct blk_plug *plug;
- 	struct request *same_queue_rq = NULL;
- 	blk_qc_t cookie;
-+	unsigned int wb_acct;
- 
- 	blk_queue_bounce(q, &bio);
- 
-@@ -1326,9 +1332,16 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
- 	    blk_attempt_plug_merge(q, bio, &request_count, &same_queue_rq))
- 		return BLK_QC_T_NONE;
- 
-+	wb_acct = wbt_wait(q->rq_wb, bio->bi_opf, NULL);
-+
- 	rq = blk_mq_map_request(q, bio, &data);
--	if (unlikely(!rq))
-+	if (unlikely(!rq)) {
-+		if (wb_acct & WBT_TRACKED)
-+			__wbt_done(q->rq_wb);
- 		return BLK_QC_T_NONE;
-+	}
-+
-+	wbt_track(&rq->wb_stat, wb_acct);
- 
- 	cookie = blk_tag_to_qc_t(rq->tag, data.hctx->queue_num);
- 
-@@ -1405,6 +1418,7 @@ static blk_qc_t blk_sq_make_request(struct request_queue *q, struct bio *bio)
- 	struct blk_map_ctx data;
- 	struct request *rq;
- 	blk_qc_t cookie;
-+	unsigned int wb_acct;
- 
- 	blk_queue_bounce(q, &bio);
- 
-@@ -1421,9 +1435,16 @@ static blk_qc_t blk_sq_make_request(struct request_queue *q, struct bio *bio)
- 	} else
- 		request_count = blk_plug_queued_count(q);
- 
-+	wb_acct = wbt_wait(q->rq_wb, bio->bi_opf, NULL);
-+
- 	rq = blk_mq_map_request(q, bio, &data);
--	if (unlikely(!rq))
-+	if (unlikely(!rq)) {
-+		if (wb_acct & WBT_TRACKED)
-+			__wbt_done(q->rq_wb);
- 		return BLK_QC_T_NONE;
-+	}
-+
-+	wbt_track(&rq->wb_stat, wb_acct);
- 
- 	cookie = blk_tag_to_qc_t(rq->tag, data.hctx->queue_num);
- 
-@@ -2147,6 +2168,9 @@ void blk_mq_free_queue(struct request_queue *q)
- 	list_del_init(&q->all_q_node);
- 	mutex_unlock(&all_q_mutex);
- 
-+	wbt_exit(q->rq_wb);
-+	q->rq_wb = NULL;
-+
- 	blk_mq_del_queue_tag_set(q);
- 
- 	blk_mq_exit_hw_queues(q, set, set->nr_hw_queues);
-diff --git a/block/blk-settings.c b/block/blk-settings.c
-index f7e122e..746dc9f 100644
---- a/block/blk-settings.c
-+++ b/block/blk-settings.c
-@@ -840,6 +840,7 @@ EXPORT_SYMBOL_GPL(blk_queue_flush_queueable);
- void blk_set_queue_depth(struct request_queue *q, unsigned int depth)
- {
- 	q->queue_depth = depth;
-+	wbt_set_queue_depth(q->rq_wb, depth);
- }
- EXPORT_SYMBOL(blk_set_queue_depth);
- 
-@@ -863,6 +864,8 @@ void blk_queue_write_cache(struct request_queue *q, bool wc, bool fua)
- 	else
- 		queue_flag_clear(QUEUE_FLAG_FUA, q);
- 	spin_unlock_irq(q->queue_lock);
-+
-+	wbt_set_write_cache(q->rq_wb, test_bit(QUEUE_FLAG_WC, &q->queue_flags));
- }
- EXPORT_SYMBOL_GPL(blk_queue_write_cache);
- 
-diff --git a/block/blk-stat.c b/block/blk-stat.c
-index 3965e8a..bdb16d8 100644
---- a/block/blk-stat.c
-+++ b/block/blk-stat.c
-@@ -178,15 +178,16 @@ bool blk_stat_is_current(struct blk_rq_stat *stat)
- void blk_stat_add(struct blk_rq_stat *stat, struct request *rq)
- {
- 	s64 now, value;
-+	u64 rq_time = wbt_issue_stat_get_time(&rq->wb_stat);
- 
- 	now = ktime_to_ns(ktime_get());
--	if (now < rq->issue_time)
-+	if (now < rq_time)
- 		return;
- 
- 	if (!__blk_stat_is_current(stat, now))
- 		__blk_stat_init(stat, now);
- 
--	value = now - rq->issue_time;
-+	value = now - rq_time;
- 	if (value > stat->max)
- 		stat->max = value;
- 	if (value < stat->min)
-diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
-index 0b9e435..85c3dc2 100644
---- a/block/blk-sysfs.c
-+++ b/block/blk-sysfs.c
-@@ -10,6 +10,7 @@
- #include <linux/blktrace_api.h>
- #include <linux/blk-mq.h>
- #include <linux/blk-cgroup.h>
-+#include <linux/wbt.h>
- 
- #include "blk.h"
- #include "blk-mq.h"
-@@ -41,6 +42,19 @@ queue_var_store(unsigned long *var, const char *page, size_t count)
- 	return count;
- }
- 
-+static ssize_t queue_var_store64(u64 *var, const char *page)
-+{
-+	int err;
-+	u64 v;
-+
-+	err = kstrtou64(page, 10, &v);
-+	if (err < 0)
-+		return err;
-+
-+	*var = v;
-+	return 0;
-+}
-+
- static ssize_t queue_requests_show(struct request_queue *q, char *page)
- {
- 	return queue_var_show(q->nr_requests, (page));
-@@ -347,6 +361,58 @@ static ssize_t queue_poll_store(struct request_queue *q, const char *page,
- 	return ret;
- }
- 
-+static ssize_t queue_wb_win_show(struct request_queue *q, char *page)
-+{
-+	if (!q->rq_wb)
-+		return -EINVAL;
-+
-+	return sprintf(page, "%llu\n", div_u64(q->rq_wb->win_nsec, 1000));
-+}
-+
-+static ssize_t queue_wb_win_store(struct request_queue *q, const char *page,
-+				  size_t count)
-+{
-+	ssize_t ret;
-+	u64 val;
-+
-+	if (!q->rq_wb)
-+		return -EINVAL;
-+
-+	ret = queue_var_store64(&val, page);
-+	if (ret < 0)
-+		return ret;
-+
-+	q->rq_wb->win_nsec = val * 1000ULL;
-+	wbt_update_limits(q->rq_wb);
-+	return count;
-+}
-+
-+static ssize_t queue_wb_lat_show(struct request_queue *q, char *page)
-+{
-+	if (!q->rq_wb)
-+		return -EINVAL;
-+
-+	return sprintf(page, "%llu\n", div_u64(q->rq_wb->min_lat_nsec, 1000));
-+}
-+
-+static ssize_t queue_wb_lat_store(struct request_queue *q, const char *page,
-+				  size_t count)
-+{
-+	ssize_t ret;
-+	u64 val;
-+
-+	if (!q->rq_wb)
-+		return -EINVAL;
-+
-+	ret = queue_var_store64(&val, page);
-+	if (ret < 0)
-+		return ret;
-+
-+	q->rq_wb->min_lat_nsec = val * 1000ULL;
-+	wbt_update_limits(q->rq_wb);
-+	return count;
-+}
-+
- static ssize_t queue_wc_show(struct request_queue *q, char *page)
- {
- 	if (test_bit(QUEUE_FLAG_WC, &q->queue_flags))
-@@ -551,6 +617,18 @@ static struct queue_sysfs_entry queue_stats_entry = {
- 	.show = queue_stats_show,
- };
- 
-+static struct queue_sysfs_entry queue_wb_lat_entry = {
-+	.attr = {.name = "wbt_lat_usec", .mode = S_IRUGO | S_IWUSR },
-+	.show = queue_wb_lat_show,
-+	.store = queue_wb_lat_store,
-+};
-+
-+static struct queue_sysfs_entry queue_wb_win_entry = {
-+	.attr = {.name = "wbt_window_usec", .mode = S_IRUGO | S_IWUSR },
-+	.show = queue_wb_win_show,
-+	.store = queue_wb_win_store,
-+};
-+
- static struct attribute *default_attrs[] = {
- 	&queue_requests_entry.attr,
- 	&queue_ra_entry.attr,
-@@ -579,6 +657,8 @@ static struct attribute *default_attrs[] = {
- 	&queue_wc_entry.attr,
- 	&queue_dax_entry.attr,
- 	&queue_stats_entry.attr,
-+	&queue_wb_lat_entry.attr,
-+	&queue_wb_win_entry.attr,
- 	NULL,
- };
- 
-@@ -693,6 +773,49 @@ struct kobj_type blk_queue_ktype = {
- 	.release	= blk_release_queue,
- };
- 
-+static void blk_wb_stat_get(void *data, struct blk_rq_stat *stat)
-+{
-+	blk_queue_stat_get(data, stat);
-+}
-+
-+static void blk_wb_stat_clear(void *data)
-+{
-+	blk_stat_clear(data);
-+}
-+
-+static bool blk_wb_stat_is_current(struct blk_rq_stat *stat)
-+{
-+	return blk_stat_is_current(stat);
-+}
-+
-+static struct wb_stat_ops wb_stat_ops = {
-+	.get		= blk_wb_stat_get,
-+	.is_current	= blk_wb_stat_is_current,
-+	.clear		= blk_wb_stat_clear,
-+};
-+
-+static void blk_wb_init(struct request_queue *q)
-+{
-+	struct rq_wb *rwb;
-+
-+	rwb = wbt_init(&q->backing_dev_info, &wb_stat_ops, q);
-+
-+	/*
-+	 * If this fails, we don't get throttling
-+	 */
-+	if (IS_ERR(rwb))
-+		return;
-+
-+	if (blk_queue_nonrot(q))
-+		rwb->min_lat_nsec = 2000000ULL;
-+	else
-+		rwb->min_lat_nsec = 75000000ULL;
-+
-+	wbt_set_queue_depth(rwb, blk_queue_depth(q));
-+	wbt_set_write_cache(rwb, test_bit(QUEUE_FLAG_WC, &q->queue_flags));
-+	q->rq_wb = rwb;
-+}
-+
- int blk_register_queue(struct gendisk *disk)
- {
- 	int ret;
-@@ -732,6 +855,8 @@ int blk_register_queue(struct gendisk *disk)
- 	if (q->mq_ops)
- 		blk_mq_register_disk(disk);
- 
-+	blk_wb_init(q);
-+
- 	if (!q->request_fn)
- 		return 0;
- 
-diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
-index cc2f6db..fdcd5999 100644
---- a/block/cfq-iosched.c
-+++ b/block/cfq-iosched.c
-@@ -3764,9 +3764,11 @@ static void check_blkcg_changed(struct cfq_io_cq *cic, struct bio *bio)
- 	struct cfq_data *cfqd = cic_to_cfqd(cic);
- 	struct cfq_queue *cfqq;
- 	uint64_t serial_nr;
-+	bool nonroot_cg;
- 
- 	rcu_read_lock();
- 	serial_nr = bio_blkcg(bio)->css.serial_nr;
-+	nonroot_cg = bio_blkcg(bio) != &blkcg_root;
- 	rcu_read_unlock();
- 
- 	/*
-@@ -3777,6 +3779,17 @@ static void check_blkcg_changed(struct cfq_io_cq *cic, struct bio *bio)
- 		return;
- 
- 	/*
-+	 * If we have a non-root cgroup, we can depend on that to
-+	 * do proper throttling of writes. Turn off wbt for that
-+	 * case.
-+	 */
-+	if (nonroot_cg) {
-+		struct request_queue *q = cfqd->queue;
-+
-+		wbt_disable(q->rq_wb);
-+	}
-+
-+	/*
- 	 * Drop reference to queues.  New queues will be assigned in new
- 	 * group upon arrival of fresh requests.
- 	 */
-diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
-index 259eba8..45256d7 100644
---- a/include/linux/blkdev.h
-+++ b/include/linux/blkdev.h
-@@ -24,6 +24,7 @@
- #include <linux/rcupdate.h>
- #include <linux/percpu-refcount.h>
- #include <linux/scatterlist.h>
-+#include <linux/wbt.h>
- 
- struct module;
- struct scsi_ioctl_command;
-@@ -37,6 +38,7 @@ struct bsg_job;
- struct blkcg_gq;
- struct blk_flush_queue;
- struct pr_ops;
-+struct rq_wb;
- 
- #define BLKDEV_MIN_RQ	4
- #define BLKDEV_MAX_RQ	128	/* Default maximum */
-@@ -151,7 +153,7 @@ struct request {
- 	struct gendisk *rq_disk;
- 	struct hd_struct *part;
- 	unsigned long start_time;
--	s64 issue_time;
-+	struct wb_issue_stat wb_stat;
- #ifdef CONFIG_BLK_CGROUP
- 	struct request_list *rl;		/* rl this rq is alloced from */
- 	unsigned long long start_time_ns;
-@@ -303,6 +305,8 @@ struct request_queue {
- 	int			nr_rqs[2];	/* # allocated [a]sync rqs */
- 	int			nr_rqs_elvpriv;	/* # allocated rqs w/ elvpriv */
- 
-+	struct rq_wb		*rq_wb;
-+
- 	/*
- 	 * If blkcg is not used, @q->root_rl serves all requests.  If blkcg
- 	 * is used, root blkg allocates from @q->root_rl and all other
--- 
-cgit v0.11.2
-
-From 21c990f3ab1d3324ad3152cb94f86e6e0772b73c Mon Sep 17 00:00:00 2001
-From: Jens Axboe <axboe@fb.com>
-Date: Sat, 10 Sep 2016 10:06:26 -0600
-Subject: wbt: spelling check fix
-
-Signed-off-by: Jens Axboe <axboe@fb.com>
----
- lib/wbt.c | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/lib/wbt.c b/lib/wbt.c
-index a995703..5c507e5 100644
---- a/lib/wbt.c
-+++ b/lib/wbt.c
-@@ -1,5 +1,5 @@
- /*
-- * buffered writeback throttling. losely based on CoDel. We can't drop
-+ * buffered writeback throttling. loosely based on CoDel. We can't drop
-  * packets for IO scheduling, so the logic is something like this:
-  *
-  * - Monitor latencies in a defined window of time.
--- 
-cgit v0.11.2
-
author	alyptik	2017-01-07 13:48:30 -1000
committer	alyptik	2017-01-07 13:48:46 -1000
commit	aec4310300ab04ea4398c2bad4df6e3e9c8cfe62 (patch)
tree	3bbde5efc1622019ca402af072f4a4c29eb2dce9 /block.patch
parent	081ac052b27edd8b527c28de81c94c06c155596f (diff)
download	aur-aec4310300ab04ea4398c2bad4df6e3e9c8cfe62.tar.gz