summarylogtreecommitdiffstats
path: root/0001-futex.patch
blob: 72bf55a8f28d48fc5c41342798feafe73186c2ea (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
From 3c0db5afb3180941bf51dd7c12541514d774a250 Mon Sep 17 00:00:00 2001
From: Gabriel Krisman Bertazi <krisman@collabora.com>
Date: Thu, 13 Feb 2020 18:45:22 -0300
Subject: [PATCH 1/4] futex: Implement mechanism to wait on any of several
 futexes
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This is a new futex operation, called FUTEX_WAIT_MULTIPLE, which allows
a thread to wait on several futexes at the same time, and be awoken by
any of them.  In a sense, it implements one of the features that was
supported by pooling on the old FUTEX_FD interface.

The use case lies in the Wine implementation of the Windows NT interface
WaitMultipleObjects. This Windows API function allows a thread to sleep
waiting on the first of a set of event sources (mutexes, timers, signal,
console input, etc) to signal.  Considering this is a primitive
synchronization operation for Windows applications, being able to quickly
signal events on the producer side, and quickly go to sleep on the
consumer side is essential for good performance of those running over Wine.

Wine developers have an implementation that uses eventfd, but it suffers
from FD exhaustion (there is applications that go to the order of
multi-milion FDs), and higher CPU utilization than this new operation.

The futex list is passed as an array of `struct futex_wait_block`
(pointer, value, bitset) to the kernel, which will enqueue all of them
and sleep if none was already triggered. It returns a hint of which
futex caused the wake up event to userspace, but the hint doesn't
guarantee that is the only futex triggered.  Before calling the syscall
again, userspace should traverse the list, trying to re-acquire any of
the other futexes, to prevent an immediate -EWOULDBLOCK return code from
the kernel.

This was tested using three mechanisms:

1) By reimplementing FUTEX_WAIT in terms of FUTEX_WAIT_MULTIPLE and
running the unmodified tools/testing/selftests/futex and a full linux
distro on top of this kernel.

2) By an example code that exercises the FUTEX_WAIT_MULTIPLE path on a
multi-threaded, event-handling setup.

3) By running the Wine fsync with Valve's Proton compatibility code
implementation and executing multi-threaded applications, in particular
modern games, on top of this implementation.

Changes were tested for the following ABIs: x86_64, i386 and x32.
Support for x32 applications is not implemented since it would
take a major rework adding a new entry point and splitting the current
futex 64 entry point in two and we can't change the current x32 syscall
number without breaking user space compatibility.

CC: Steven Rostedt <rostedt@goodmis.org>
Cc: Richard Yao <ryao@gentoo.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Co-developed-by: Zebediah Figura <z.figura12@gmail.com>
Signed-off-by: Zebediah Figura <z.figura12@gmail.com>
Co-developed-by: Steven Noonan <steven@valvesoftware.com>
Signed-off-by: Steven Noonan <steven@valvesoftware.com>
Co-developed-by: Pierre-Loup A. Griffais <pgriffais@valvesoftware.com>
Signed-off-by: Pierre-Loup A. Griffais <pgriffais@valvesoftware.com>
Signed-off-by: Gabriel Krisman Bertazi <krisman@collabora.com>
[Added compatibility code]
Co-developed-by: André Almeida <andrealmeid@collabora.com>
Signed-off-by: André Almeida <andrealmeid@collabora.com>
---
 include/uapi/linux/futex.h |  20 ++
 kernel/futex.c             | 363 ++++++++++++++++++++++++++++++++++++-
 2 files changed, 379 insertions(+), 4 deletions(-)

diff --git a/include/uapi/linux/futex.h b/include/uapi/linux/futex.h
index a89eb0acc..a3e760886 100644
--- a/include/uapi/linux/futex.h
+++ b/include/uapi/linux/futex.h
@@ -21,6 +21,7 @@
 #define FUTEX_WAKE_BITSET	10
 #define FUTEX_WAIT_REQUEUE_PI	11
 #define FUTEX_CMP_REQUEUE_PI	12
+#define FUTEX_WAIT_MULTIPLE	31
 
 #define FUTEX_PRIVATE_FLAG	128
 #define FUTEX_CLOCK_REALTIME	256
@@ -40,6 +41,8 @@
 					 FUTEX_PRIVATE_FLAG)
 #define FUTEX_CMP_REQUEUE_PI_PRIVATE	(FUTEX_CMP_REQUEUE_PI | \
 					 FUTEX_PRIVATE_FLAG)
+#define FUTEX_WAIT_MULTIPLE_PRIVATE	(FUTEX_WAIT_MULTIPLE | \
+					 FUTEX_PRIVATE_FLAG)
 
 /*
  * Support for robust futexes: the kernel cleans up held futexes at
@@ -150,4 +153,21 @@ struct robust_list_head {
   (((op & 0xf) << 28) | ((cmp & 0xf) << 24)		\
    | ((oparg & 0xfff) << 12) | (cmparg & 0xfff))
 
+/*
+ * Maximum number of multiple futexes to wait for
+ */
+#define FUTEX_MULTIPLE_MAX_COUNT	128
+
+/**
+ * struct futex_wait_block - Block of futexes to be waited for
+ * @uaddr:	User address of the futex
+ * @val:	Futex value expected by userspace
+ * @bitset:	Bitset for the optional bitmasked wakeup
+ */
+struct futex_wait_block {
+	__u32 __user *uaddr;
+	__u32 val;
+	__u32 bitset;
+};
+
 #endif /* _UAPI_LINUX_FUTEX_H */
diff --git a/kernel/futex.c b/kernel/futex.c
index b59532862..2fbfb0b80 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -214,6 +214,8 @@ struct futex_pi_state {
  * @rt_waiter:		rt_waiter storage for use with requeue_pi
  * @requeue_pi_key:	the requeue_pi target futex key
  * @bitset:		bitset for the optional bitmasked wakeup
+ * @uaddr:             userspace address of futex
+ * @uval:              expected futex's value
  *
  * We use this hashed waitqueue, instead of a normal wait_queue_entry_t, so
  * we can wake only the relevant ones (hashed queues may be shared).
@@ -236,6 +238,8 @@ struct futex_q {
 	struct rt_mutex_waiter *rt_waiter;
 	union futex_key *requeue_pi_key;
 	u32 bitset;
+	u32 __user *uaddr;
+	u32 uval;
 } __randomize_layout;
 
 static const struct futex_q futex_q_init = {
@@ -2346,6 +2350,29 @@ static int unqueue_me(struct futex_q *q)
 	return ret;
 }
 
+/**
+ * unqueue_multiple() - Remove several futexes from their futex_hash_bucket
+ * @q:	The list of futexes to unqueue
+ * @count: Number of futexes in the list
+ *
+ * Helper to unqueue a list of futexes. This can't fail.
+ *
+ * Return:
+ *  - >=0 - Index of the last futex that was awoken;
+ *  - -1  - If no futex was awoken
+ */
+static int unqueue_multiple(struct futex_q *q, int count)
+{
+	int ret = -1;
+	int i;
+
+	for (i = 0; i < count; i++) {
+		if (!unqueue_me(&q[i]))
+			ret = i;
+	}
+	return ret;
+}
+
 /*
  * PI futexes can not be requeued and must remove themself from the
  * hash bucket. The hash bucket lock (i.e. lock_ptr) is held on entry
@@ -2709,6 +2736,211 @@ static int futex_wait_setup(u32 __user *uaddr, u32 val, unsigned int flags,
 	return ret;
 }
 
+/**
+ * futex_wait_multiple_setup() - Prepare to wait and enqueue multiple futexes
+ * @qs:		The corresponding futex list
+ * @count:	The size of the lists
+ * @flags:	Futex flags (FLAGS_SHARED, etc.)
+ * @awaken:	Index of the last awoken futex
+ *
+ * Prepare multiple futexes in a single step and enqueue them. This may fail if
+ * the futex list is invalid or if any futex was already awoken. On success the
+ * task is ready to interruptible sleep.
+ *
+ * Return:
+ *  -  1 - One of the futexes was awaken by another thread
+ *  -  0 - Success
+ *  - <0 - -EFAULT, -EWOULDBLOCK or -EINVAL
+ */
+static int futex_wait_multiple_setup(struct futex_q *qs, int count,
+				     unsigned int flags, int *awaken)
+{
+	struct futex_hash_bucket *hb;
+	int ret, i;
+	u32 uval;
+
+	/*
+	 * Enqueuing multiple futexes is tricky, because we need to
+	 * enqueue each futex in the list before dealing with the next
+	 * one to avoid deadlocking on the hash bucket.  But, before
+	 * enqueuing, we need to make sure that current->state is
+	 * TASK_INTERRUPTIBLE, so we don't absorb any awake events, which
+	 * cannot be done before the get_futex_key of the next key,
+	 * because it calls get_user_pages, which can sleep.  Thus, we
+	 * fetch the list of futexes keys in two steps, by first pinning
+	 * all the memory keys in the futex key, and only then we read
+	 * each key and queue the corresponding futex.
+	 */
+retry:
+	for (i = 0; i < count; i++) {
+		qs[i].key = FUTEX_KEY_INIT;
+		ret = get_futex_key(qs[i].uaddr, flags & FLAGS_SHARED,
+				    &qs[i].key, FUTEX_READ);
+		if (unlikely(ret)) {
+			for (--i; i >= 0; i--)
+				put_futex_key(&qs[i].key);
+			return ret;
+		}
+	}
+
+	set_current_state(TASK_INTERRUPTIBLE);
+
+	for (i = 0; i < count; i++) {
+		struct futex_q *q = &qs[i];
+
+		hb = queue_lock(q);
+
+		ret = get_futex_value_locked(&uval, q->uaddr);
+		if (ret) {
+			/*
+			 * We need to try to handle the fault, which
+			 * cannot be done without sleep, so we need to
+			 * undo all the work already done, to make sure
+			 * we don't miss any wake ups.  Therefore, clean
+			 * up, handle the fault and retry from the
+			 * beginning.
+			 */
+			queue_unlock(hb);
+
+			/*
+			 * Keys 0..(i-1) are implicitly put
+			 * on unqueue_multiple.
+			 */
+			put_futex_key(&q->key);
+
+			*awaken = unqueue_multiple(qs, i);
+
+			__set_current_state(TASK_RUNNING);
+
+			/*
+			 * On a real fault, prioritize the error even if
+			 * some other futex was awoken.  Userspace gave
+			 * us a bad address, -EFAULT them.
+			 */
+			ret = get_user(uval, q->uaddr);
+			if (ret)
+				return ret;
+
+			/*
+			 * Even if the page fault was handled, If
+			 * something was already awaken, we can safely
+			 * give up and succeed to give a hint for userspace to
+			 * acquire the right futex faster.
+			 */
+			if (*awaken >= 0)
+				return 1;
+
+			goto retry;
+		}
+
+		if (uval != q->uval) {
+			queue_unlock(hb);
+
+			put_futex_key(&qs[i].key);
+
+			/*
+			 * If something was already awaken, we can
+			 * safely ignore the error and succeed.
+			 */
+			*awaken = unqueue_multiple(qs, i);
+			__set_current_state(TASK_RUNNING);
+			if (*awaken >= 0)
+				return 1;
+
+			return -EWOULDBLOCK;
+		}
+
+		/*
+		 * The bucket lock can't be held while dealing with the
+		 * next futex. Queue each futex at this moment so hb can
+		 * be unlocked.
+		 */
+		queue_me(&qs[i], hb);
+	}
+	return 0;
+}
+
+/**
+ * futex_wait_multiple() - Prepare to wait on and enqueue several futexes
+ * @qs:		The list of futexes to wait on
+ * @op:		Operation code from futex's syscall
+ * @count:	The number of objects
+ * @abs_time:	Timeout before giving up and returning to userspace
+ *
+ * Entry point for the FUTEX_WAIT_MULTIPLE futex operation, this function
+ * sleeps on a group of futexes and returns on the first futex that
+ * triggered, or after the timeout has elapsed.
+ *
+ * Return:
+ *  - >=0 - Hint to the futex that was awoken
+ *  - <0  - On error
+ */
+static int futex_wait_multiple(struct futex_q *qs, int op,
+			       u32 count, ktime_t *abs_time)
+{
+	struct hrtimer_sleeper timeout, *to;
+	int ret, flags = 0, hint = 0;
+	unsigned int i;
+
+	if (!(op & FUTEX_PRIVATE_FLAG))
+		flags |= FLAGS_SHARED;
+
+	if (op & FUTEX_CLOCK_REALTIME)
+		flags |= FLAGS_CLOCKRT;
+
+	to = futex_setup_timer(abs_time, &timeout, flags, 0);
+	while (1) {
+		ret = futex_wait_multiple_setup(qs, count, flags, &hint);
+		if (ret) {
+			if (ret > 0) {
+				/* A futex was awaken during setup */
+				ret = hint;
+			}
+			break;
+		}
+
+		if (to)
+			hrtimer_start_expires(&to->timer, HRTIMER_MODE_ABS);
+
+		/*
+		 * Avoid sleeping if another thread already tried to
+		 * wake us.
+		 */
+		for (i = 0; i < count; i++) {
+			if (plist_node_empty(&qs[i].list))
+				break;
+		}
+
+		if (i == count && (!to || to->task))
+			freezable_schedule();
+
+		ret = unqueue_multiple(qs, count);
+
+		__set_current_state(TASK_RUNNING);
+
+		if (ret >= 0)
+			break;
+		if (to && !to->task) {
+			ret = -ETIMEDOUT;
+			break;
+		} else if (signal_pending(current)) {
+			ret = -ERESTARTSYS;
+			break;
+		}
+		/*
+		 * The final case is a spurious wakeup, for
+		 * which just retry.
+		 */
+	}
+
+	if (to) {
+		hrtimer_cancel(&to->timer);
+		destroy_hrtimer_on_stack(&to->timer);
+	}
+
+	return ret;
+}
+
 static int futex_wait(u32 __user *uaddr, unsigned int flags, u32 val,
 		      ktime_t *abs_time, u32 bitset)
 {
@@ -3833,6 +4065,43 @@ long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,
 	return -ENOSYS;
 }
 
+/**
+ * futex_read_wait_block - Read an array of futex_wait_block from userspace
+ * @uaddr:	Userspace address of the block
+ * @count:	Number of blocks to be read
+ *
+ * This function creates and allocate an array of futex_q (we zero it to
+ * initialize the fields) and then, for each futex_wait_block element from
+ * userspace, fill a futex_q element with proper values.
+ */
+inline struct futex_q *futex_read_wait_block(u32 __user *uaddr, u32 count)
+{
+	unsigned int i;
+	struct futex_q *qs;
+	struct futex_wait_block fwb;
+	struct futex_wait_block __user *entry =
+		(struct futex_wait_block __user *)uaddr;
+
+	if (!count || count > FUTEX_MULTIPLE_MAX_COUNT)
+		return ERR_PTR(-EINVAL);
+
+	qs = kcalloc(count, sizeof(*qs), GFP_KERNEL);
+	if (!qs)
+		return ERR_PTR(-ENOMEM);
+
+	for (i = 0; i < count; i++) {
+		if (copy_from_user(&fwb, &entry[i], sizeof(fwb))) {
+			kfree(qs);
+			return ERR_PTR(-EFAULT);
+		}
+
+		qs[i].uaddr = fwb.uaddr;
+		qs[i].uval = fwb.val;
+		qs[i].bitset = fwb.bitset;
+	}
+
+	return qs;
+}
 
 SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val,
 		struct __kernel_timespec __user *, utime, u32 __user *, uaddr2,
@@ -3845,7 +4114,8 @@ SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val,
 
 	if (utime && (cmd == FUTEX_WAIT || cmd == FUTEX_LOCK_PI ||
 		      cmd == FUTEX_WAIT_BITSET ||
-		      cmd == FUTEX_WAIT_REQUEUE_PI)) {
+		      cmd == FUTEX_WAIT_REQUEUE_PI ||
+		      cmd == FUTEX_WAIT_MULTIPLE)) {
 		if (unlikely(should_fail_futex(!(op & FUTEX_PRIVATE_FLAG))))
 			return -EFAULT;
 		if (get_timespec64(&ts, utime))
@@ -3854,7 +4124,7 @@ SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val,
 			return -EINVAL;
 
 		t = timespec64_to_ktime(ts);
-		if (cmd == FUTEX_WAIT)
+		if (cmd == FUTEX_WAIT || cmd == FUTEX_WAIT_MULTIPLE)
 			t = ktime_add_safe(ktime_get(), t);
 		tp = &t;
 	}
@@ -3866,6 +4136,25 @@ SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val,
 	    cmd == FUTEX_CMP_REQUEUE_PI || cmd == FUTEX_WAKE_OP)
 		val2 = (u32) (unsigned long) utime;
 
+	if (cmd == FUTEX_WAIT_MULTIPLE) {
+		int ret;
+		struct futex_q *qs;
+
+#ifdef CONFIG_X86_X32
+		if (unlikely(in_x32_syscall()))
+			return -ENOSYS;
+#endif
+		qs = futex_read_wait_block(uaddr, val);
+
+		if (IS_ERR(qs))
+			return PTR_ERR(qs);
+
+		ret = futex_wait_multiple(qs, op, val, tp);
+		kfree(qs);
+
+		return ret;
+	}
+
 	return do_futex(uaddr, op, val, tp, uaddr2, val2, val3);
 }
 
@@ -4028,6 +4317,58 @@ COMPAT_SYSCALL_DEFINE3(get_robust_list, int, pid,
 #endif /* CONFIG_COMPAT */
 
 #ifdef CONFIG_COMPAT_32BIT_TIME
+/**
+ * struct compat_futex_wait_block - Block of futexes to be waited for
+ * @uaddr:	User address of the futex (compatible pointer)
+ * @val:	Futex value expected by userspace
+ * @bitset:	Bitset for the optional bitmasked wakeup
+ */
+struct compat_futex_wait_block {
+	compat_uptr_t	uaddr;
+	__u32 pad;
+	__u32 val;
+	__u32 bitset;
+};
+
+/**
+ * compat_futex_read_wait_block - Read an array of futex_wait_block from
+ * userspace
+ * @uaddr:	Userspace address of the block
+ * @count:	Number of blocks to be read
+ *
+ * This function does the same as futex_read_wait_block(), except that it
+ * converts the pointer to the futex from the compat version to the regular one.
+ */
+inline struct futex_q *compat_futex_read_wait_block(u32 __user *uaddr,
+						    u32 count)
+{
+	unsigned int i;
+	struct futex_q *qs;
+	struct compat_futex_wait_block fwb;
+	struct compat_futex_wait_block __user *entry =
+		(struct compat_futex_wait_block __user *)uaddr;
+
+	if (!count || count > FUTEX_MULTIPLE_MAX_COUNT)
+		return ERR_PTR(-EINVAL);
+
+	qs = kcalloc(count, sizeof(*qs), GFP_KERNEL);
+	if (!qs)
+		return ERR_PTR(-ENOMEM);
+
+	for (i = 0; i < count; i++) {
+		if (copy_from_user(&fwb, &entry[i], sizeof(fwb))) {
+			kfree(qs);
+			return ERR_PTR(-EFAULT);
+		}
+
+		qs[i].uaddr = compat_ptr(fwb.uaddr);
+		qs[i].uval = fwb.val;
+		qs[i].bitset = fwb.bitset;
+	}
+
+	return qs;
+}
+
 SYSCALL_DEFINE6(futex_time32, u32 __user *, uaddr, int, op, u32, val,
 		struct old_timespec32 __user *, utime, u32 __user *, uaddr2,
 		u32, val3)
@@ -4039,14 +4380,15 @@ SYSCALL_DEFINE6(futex_time32, u32 __user *, uaddr, int, op, u32, val,
 
 	if (utime && (cmd == FUTEX_WAIT || cmd == FUTEX_LOCK_PI ||
 		      cmd == FUTEX_WAIT_BITSET ||
-		      cmd == FUTEX_WAIT_REQUEUE_PI)) {
+		      cmd == FUTEX_WAIT_REQUEUE_PI ||
+		      cmd == FUTEX_WAIT_MULTIPLE)) {
 		if (get_old_timespec32(&ts, utime))
 			return -EFAULT;
 		if (!timespec64_valid(&ts))
 			return -EINVAL;
 
 		t = timespec64_to_ktime(ts);
-		if (cmd == FUTEX_WAIT)
+		if (cmd == FUTEX_WAIT || cmd == FUTEX_WAIT_MULTIPLE)
 			t = ktime_add_safe(ktime_get(), t);
 		tp = &t;
 	}
@@ -4054,6 +4396,19 @@ SYSCALL_DEFINE6(futex_time32, u32 __user *, uaddr, int, op, u32, val,
 	    cmd == FUTEX_CMP_REQUEUE_PI || cmd == FUTEX_WAKE_OP)
 		val2 = (int) (unsigned long) utime;
 
+	if (cmd == FUTEX_WAIT_MULTIPLE) {
+		int ret;
+		struct futex_q *qs = compat_futex_read_wait_block(uaddr, val);
+
+		if (IS_ERR(qs))
+			return PTR_ERR(qs);
+
+		ret = futex_wait_multiple(qs, op, val, tp);
+		kfree(qs);
+
+		return ret;
+	}
+
 	return do_futex(uaddr, op, val, tp, uaddr2, val2, val3);
 }
 #endif /* CONFIG_COMPAT_32BIT_TIME */
-- 
2.27.0.rc2


From 3b2db8473486da199622782b28a9de69c4446323 Mon Sep 17 00:00:00 2001
From: Gabriel Krisman Bertazi <krisman@collabora.com>
Date: Thu, 13 Feb 2020 18:45:23 -0300
Subject: [PATCH 2/4] selftests: futex: Add FUTEX_WAIT_MULTIPLE timeout test
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add test for timeout when waiting for multiple futexes. Skip the test if
it's a x32 application and the kernel returned the approtiaded error,
since this ABI is not supported for this operation.

Signed-off-by: Gabriel Krisman Bertazi <krisman@collabora.com>
Co-developed-by: André Almeida <andrealmeid@collabora.com>
Signed-off-by: André Almeida <andrealmeid@collabora.com>
---
 .../futex/functional/futex_wait_timeout.c     | 38 ++++++++++++++++++-
 .../selftests/futex/include/futextest.h       | 22 +++++++++++
 2 files changed, 58 insertions(+), 2 deletions(-)

diff --git a/tools/testing/selftests/futex/functional/futex_wait_timeout.c b/tools/testing/selftests/futex/functional/futex_wait_timeout.c
index ee55e6d38..2a63e1c2c 100644
--- a/tools/testing/selftests/futex/functional/futex_wait_timeout.c
+++ b/tools/testing/selftests/futex/functional/futex_wait_timeout.c
@@ -11,6 +11,7 @@
  *
  * HISTORY
  *      2009-Nov-6: Initial version by Darren Hart <dvhart@linux.intel.com>
+ *      2019-Dec-13: Add WAIT_MULTIPLE test by Krisman <krisman@collabora.com>
  *
  *****************************************************************************/
 
@@ -41,6 +42,8 @@ int main(int argc, char *argv[])
 {
 	futex_t f1 = FUTEX_INITIALIZER;
 	struct timespec to;
+	time_t secs;
+	struct futex_wait_block fwb = {&f1, f1, 0};
 	int res, ret = RET_PASS;
 	int c;
 
@@ -65,7 +68,7 @@ int main(int argc, char *argv[])
 	}
 
 	ksft_print_header();
-	ksft_set_plan(1);
+	ksft_set_plan(2);
 	ksft_print_msg("%s: Block on a futex and wait for timeout\n",
 	       basename(argv[0]));
 	ksft_print_msg("\tArguments: timeout=%ldns\n", timeout_ns);
@@ -79,8 +82,39 @@ int main(int argc, char *argv[])
 	if (!res || errno != ETIMEDOUT) {
 		fail("futex_wait returned %d\n", ret < 0 ? errno : ret);
 		ret = RET_FAIL;
+	} else
+		ksft_test_result_pass("futex_wait timeout succeeds\n");
+
+	info("Calling futex_wait_multiple on f1: %u @ %p\n", f1, &f1);
+
+	/* Setup absolute time */
+	ret = clock_gettime(CLOCK_REALTIME, &to);
+	secs = (to.tv_nsec + timeout_ns) / 1000000000;
+	to.tv_nsec = ((int64_t)to.tv_nsec + timeout_ns) % 1000000000;
+	to.tv_sec += secs;
+	info("to.tv_sec  = %ld\n", to.tv_sec);
+	info("to.tv_nsec = %ld\n", to.tv_nsec);
+
+	res = futex_wait_multiple(&fwb, 1, &to,
+				  FUTEX_PRIVATE_FLAG | FUTEX_CLOCK_REALTIME);
+
+#ifdef __ILP32__
+	if (res == -1 && errno == ENOSYS) {
+		ksft_test_result_skip("futex_wait_multiple not supported at x32\n");
+	} else {
+		ksft_test_result_fail("futex_wait_multiple returned %d\n",
+				      res < 0 ? errno : res);
+		ret = RET_FAIL;
 	}
+#else
+	if (!res || errno != ETIMEDOUT) {
+		ksft_test_result_fail("futex_wait_multiple returned %d\n",
+				      res < 0 ? errno : res);
+		ret = RET_FAIL;
+	} else
+		ksft_test_result_pass("futex_wait_multiple timeout succeeds\n");
+#endif /* __ILP32__ */
 
-	print_result(TEST_NAME, ret);
+	ksft_print_cnts();
 	return ret;
 }
diff --git a/tools/testing/selftests/futex/include/futextest.h b/tools/testing/selftests/futex/include/futextest.h
index ddbcfc9b7..bb103bef4 100644
--- a/tools/testing/selftests/futex/include/futextest.h
+++ b/tools/testing/selftests/futex/include/futextest.h
@@ -38,6 +38,14 @@ typedef volatile u_int32_t futex_t;
 #ifndef FUTEX_CMP_REQUEUE_PI
 #define FUTEX_CMP_REQUEUE_PI		12
 #endif
+#ifndef FUTEX_WAIT_MULTIPLE
+#define FUTEX_WAIT_MULTIPLE		13
+struct futex_wait_block {
+	futex_t *uaddr;
+	futex_t val;
+	__u32 bitset;
+};
+#endif
 #ifndef FUTEX_WAIT_REQUEUE_PI_PRIVATE
 #define FUTEX_WAIT_REQUEUE_PI_PRIVATE	(FUTEX_WAIT_REQUEUE_PI | \
 					 FUTEX_PRIVATE_FLAG)
@@ -80,6 +88,20 @@ futex_wait(futex_t *uaddr, futex_t val, struct timespec *timeout, int opflags)
 	return futex(uaddr, FUTEX_WAIT, val, timeout, NULL, 0, opflags);
 }
 
+/**
+ * futex_wait_multiple() - block on several futexes with optional timeout
+ * @fwb:	wait block user space address
+ * @count:	number of entities at fwb
+ * @timeout:	absolute timeout
+ */
+static inline int
+futex_wait_multiple(struct futex_wait_block *fwb, int count,
+		    struct timespec *timeout, int opflags)
+{
+	return futex(fwb, FUTEX_WAIT_MULTIPLE, count, timeout, NULL, 0,
+		     opflags);
+}
+
 /**
  * futex_wake() - wake one or more tasks blocked on uaddr
  * @nr_wake:	wake up to this many tasks
-- 
2.27.0.rc2


From b285216bc7b31eeba9b0e8081fe312bac7514931 Mon Sep 17 00:00:00 2001
From: Gabriel Krisman Bertazi <krisman@collabora.com>
Date: Thu, 13 Feb 2020 18:45:24 -0300
Subject: [PATCH 3/4] selftests: futex: Add FUTEX_WAIT_MULTIPLE wouldblock test
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add test for wouldblock return when waiting for multiple futexes. Skip
the test if it's a x32 application and the kernel returned the approtiaded
error, since this ABI is not supported for this operation.

Signed-off-by: Gabriel Krisman Bertazi <krisman@collabora.com>
Co-developed-by: André Almeida <andrealmeid@collabora.com>
Signed-off-by: André Almeida <andrealmeid@collabora.com>
---
 .../futex/functional/futex_wait_wouldblock.c  | 28 +++++++++++++++++--
 1 file changed, 26 insertions(+), 2 deletions(-)

diff --git a/tools/testing/selftests/futex/functional/futex_wait_wouldblock.c b/tools/testing/selftests/futex/functional/futex_wait_wouldblock.c
index 0ae390ff8..bcbac0429 100644
--- a/tools/testing/selftests/futex/functional/futex_wait_wouldblock.c
+++ b/tools/testing/selftests/futex/functional/futex_wait_wouldblock.c
@@ -12,6 +12,7 @@
  *
  * HISTORY
  *      2009-Nov-14: Initial version by Gowrishankar <gowrishankar.m@in.ibm.com>
+ *      2019-Dec-13: Add WAIT_MULTIPLE test by Krisman <krisman@collabora.com>
  *
  *****************************************************************************/
 
@@ -40,6 +41,7 @@ int main(int argc, char *argv[])
 {
 	struct timespec to = {.tv_sec = 0, .tv_nsec = timeout_ns};
 	futex_t f1 = FUTEX_INITIALIZER;
+	struct futex_wait_block fwb = {&f1, f1+1, 0};
 	int res, ret = RET_PASS;
 	int c;
 
@@ -61,7 +63,7 @@ int main(int argc, char *argv[])
 	}
 
 	ksft_print_header();
-	ksft_set_plan(1);
+	ksft_set_plan(2);
 	ksft_print_msg("%s: Test the unexpected futex value in FUTEX_WAIT\n",
 	       basename(argv[0]));
 
@@ -71,8 +73,30 @@ int main(int argc, char *argv[])
 		fail("futex_wait returned: %d %s\n",
 		     res ? errno : res, res ? strerror(errno) : "");
 		ret = RET_FAIL;
+	} else
+		ksft_test_result_pass("futex_wait wouldblock succeeds\n");
+
+	info("Calling futex_wait_multiple on f1: %u @ %p with val=%u\n",
+	     f1, &f1, f1+1);
+	res = futex_wait_multiple(&fwb, 1, NULL, FUTEX_PRIVATE_FLAG);
+
+#ifdef __ILP32__
+	if (res != -1 || errno != ENOSYS) {
+		ksft_test_result_fail("futex_wait_multiple returned %d\n",
+				      res < 0 ? errno : res);
+		ret = RET_FAIL;
+	} else {
+		ksft_test_result_skip("futex_wait_multiple not supported at x32\n");
+	}
+#else
+	if (!res || errno != EWOULDBLOCK) {
+		ksft_test_result_fail("futex_wait_multiple returned %d\n",
+				      res < 0 ? errno : res);
+		ret = RET_FAIL;
 	}
+	ksft_test_result_pass("futex_wait_multiple wouldblock succeeds\n");
+#endif /* __ILP32__ */
 
-	print_result(TEST_NAME, ret);
+	ksft_print_cnts();
 	return ret;
 }
-- 
2.27.0.rc2


From c2a6902699480877846a07e7567fbce94b370cce Mon Sep 17 00:00:00 2001
From: Gabriel Krisman Bertazi <krisman@collabora.com>
Date: Thu, 13 Feb 2020 18:45:25 -0300
Subject: [PATCH 4/4] selftests: futex: Add FUTEX_WAIT_MULTIPLE wake up test
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add test for wait at multiple futexes mechanism. Skip the test if it's a
x32 application and the kernel returned the approtiaded error, since this
ABI is not supported for this operation.

Signed-off-by: Gabriel Krisman Bertazi <krisman@collabora.com>
Co-developed-by: André Almeida <andrealmeid@collabora.com>
Signed-off-by: André Almeida <andrealmeid@collabora.com>
---
 .../selftests/futex/functional/.gitignore     |   1 +
 .../selftests/futex/functional/Makefile       |   3 +-
 .../futex/functional/futex_wait_multiple.c    | 173 ++++++++++++++++++
 .../testing/selftests/futex/functional/run.sh |   3 +
 4 files changed, 179 insertions(+), 1 deletion(-)
 create mode 100644 tools/testing/selftests/futex/functional/futex_wait_multiple.c

diff --git a/tools/testing/selftests/futex/functional/.gitignore b/tools/testing/selftests/futex/functional/.gitignore
index 0efcd494d..03a4bedce 100644
--- a/tools/testing/selftests/futex/functional/.gitignore
+++ b/tools/testing/selftests/futex/functional/.gitignore
@@ -6,3 +6,4 @@ futex_wait_private_mapped_file
 futex_wait_timeout
 futex_wait_uninitialized_heap
 futex_wait_wouldblock
+futex_wait_multiple
diff --git a/tools/testing/selftests/futex/functional/Makefile b/tools/testing/selftests/futex/functional/Makefile
index 23207829e..26562f2d7 100644
--- a/tools/testing/selftests/futex/functional/Makefile
+++ b/tools/testing/selftests/futex/functional/Makefile
@@ -14,7 +14,8 @@ TEST_GEN_FILES := \
 	futex_requeue_pi_signal_restart \
 	futex_requeue_pi_mismatched_ops \
 	futex_wait_uninitialized_heap \
-	futex_wait_private_mapped_file
+	futex_wait_private_mapped_file \
+	futex_wait_multiple
 
 TEST_PROGS := run.sh
 
diff --git a/tools/testing/selftests/futex/functional/futex_wait_multiple.c b/tools/testing/selftests/futex/functional/futex_wait_multiple.c
new file mode 100644
index 000000000..b48422e79
--- /dev/null
+++ b/tools/testing/selftests/futex/functional/futex_wait_multiple.c
@@ -0,0 +1,173 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/******************************************************************************
+ *
+ *   Copyright © Collabora, Ltd., 2019
+ *
+ * DESCRIPTION
+ *      Test basic semantics of FUTEX_WAIT_MULTIPLE
+ *
+ * AUTHOR
+ *      Gabriel Krisman Bertazi <krisman@collabora.com>
+ *
+ * HISTORY
+ *      2019-Dec-13: Initial version by Krisman <krisman@collabora.com>
+ *
+ *****************************************************************************/
+
+#include <errno.h>
+#include <getopt.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+#include <pthread.h>
+#include "futextest.h"
+#include "logging.h"
+
+#define TEST_NAME "futex-wait-multiple"
+#define timeout_ns 100000
+#define MAX_COUNT 128
+#define WAKE_WAIT_US 3000000
+
+int ret = RET_PASS;
+char *progname;
+futex_t f[MAX_COUNT] = {0};
+struct futex_wait_block fwb[MAX_COUNT];
+
+void usage(char *prog)
+{
+	printf("Usage: %s\n", prog);
+	printf("  -c	Use color\n");
+	printf("  -h	Display this help message\n");
+	printf("  -v L	Verbosity level: %d=QUIET %d=CRITICAL %d=INFO\n",
+	       VQUIET, VCRITICAL, VINFO);
+}
+
+void test_count_overflow(void)
+{
+	futex_t f = FUTEX_INITIALIZER;
+	struct futex_wait_block fwb[MAX_COUNT+1];
+	int res, i;
+
+	ksft_print_msg("%s: Test a too big number of futexes\n", progname);
+
+	for (i = 0; i < MAX_COUNT+1; i++) {
+		fwb[i].uaddr = &f;
+		fwb[i].val = f;
+		fwb[i].bitset = 0;
+	}
+
+	res = futex_wait_multiple(fwb, MAX_COUNT+1, NULL, FUTEX_PRIVATE_FLAG);
+
+#ifdef __ILP32__
+	if (res != -1 || errno != ENOSYS) {
+		ksft_test_result_fail("futex_wait_multiple returned %d\n",
+				      res < 0 ? errno : res);
+		ret = RET_FAIL;
+	} else {
+		ksft_test_result_skip("futex_wait_multiple not supported at x32\n");
+	}
+#else
+	if (res != -1 || errno != EINVAL) {
+		ksft_test_result_fail("futex_wait_multiple returned %d\n",
+				      res < 0 ? errno : res);
+		ret = RET_FAIL;
+	} else {
+		ksft_test_result_pass("futex_wait_multiple count overflow succeed\n");
+	}
+
+#endif /* __ILP32__ */
+}
+
+void *waiterfn(void *arg)
+{
+	int res;
+
+	res = futex_wait_multiple(fwb, MAX_COUNT, NULL, FUTEX_PRIVATE_FLAG);
+
+#ifdef __ILP32__
+	if (res != -1 || errno != ENOSYS) {
+		ksft_test_result_fail("futex_wait_multiple returned %d\n",
+				      res < 0 ? errno : res);
+		ret = RET_FAIL;
+	} else {
+		ksft_test_result_skip("futex_wait_multiple not supported at x32\n");
+	}
+#else
+	if (res < 0)
+		ksft_print_msg("waiter failed %d\n", res);
+
+	info("futex_wait_multiple: Got hint futex %d was freed\n", res);
+#endif /* __ILP32__ */
+
+	return NULL;
+}
+
+void test_fwb_wakeup(void)
+{
+	int res, i;
+	pthread_t waiter;
+
+	ksft_print_msg("%s: Test wake up in a list of futex\n", progname);
+
+	for (i = 0; i < MAX_COUNT; i++) {
+		fwb[i].uaddr = &f[i];
+		fwb[i].val = f[i];
+		fwb[i].bitset = 0xffffffff;
+	}
+
+	res = pthread_create(&waiter, NULL, waiterfn, NULL);
+	if (res) {
+		ksft_test_result_fail("Creating waiting thread failed");
+		ksft_exit_fail();
+	}
+
+	usleep(WAKE_WAIT_US);
+	res = futex_wake(&(f[MAX_COUNT-1]), 1, FUTEX_PRIVATE_FLAG);
+	if (res != 1) {
+		ksft_test_result_fail("Failed to wake thread res=%d\n", res);
+		ksft_exit_fail();
+	}
+
+	pthread_join(waiter, NULL);
+	ksft_test_result_pass("%s succeed\n", __func__);
+}
+
+int main(int argc, char *argv[])
+{
+	int c;
+
+	while ((c = getopt(argc, argv, "cht:v:")) != -1) {
+		switch (c) {
+		case 'c':
+			log_color(1);
+			break;
+		case 'h':
+			usage(basename(argv[0]));
+			exit(0);
+		case 'v':
+			log_verbosity(atoi(optarg));
+			break;
+		default:
+			usage(basename(argv[0]));
+			exit(1);
+		}
+	}
+
+	progname = basename(argv[0]);
+
+	ksft_print_header();
+	ksft_set_plan(2);
+
+	test_count_overflow();
+
+#ifdef __ILP32__
+	// if it's a 32x binary, there's no futex to wakeup
+	ksft_test_result_skip("futex_wait_multiple not supported at x32\n");
+#else
+	test_fwb_wakeup();
+#endif /* __ILP32__ */
+
+	ksft_print_cnts();
+	return ret;
+}
diff --git a/tools/testing/selftests/futex/functional/run.sh b/tools/testing/selftests/futex/functional/run.sh
index 1acb6ace1..a8be94f28 100755
--- a/tools/testing/selftests/futex/functional/run.sh
+++ b/tools/testing/selftests/futex/functional/run.sh
@@ -73,3 +73,6 @@ echo
 echo
 ./futex_wait_uninitialized_heap $COLOR
 ./futex_wait_private_mapped_file $COLOR
+
+echo
+./futex_wait_multiple $COLOR
-- 
2.27.0.rc2