1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
|
diff --git a/drivers/pci/controller/intel-nvme-remap.c b/drivers/pci/controller/intel-nvme-remap.c
new file mode 100644
index 000000000000..e105e6f5cc91
--- /dev/null
+++ b/drivers/pci/controller/intel-nvme-remap.c
@@ -0,0 +1,462 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Intel remapped NVMe device support.
+ *
+ * Copyright (c) 2019 Endless Mobile, Inc.
+ * Author: Daniel Drake <drake@endlessm.com>
+ *
+ * Some products ship by default with the SATA controller in "RAID" or
+ * "Intel RST Premium With Intel Optane System Acceleration" mode. Under this
+ * mode, which we refer to as "remapped NVMe" mode, any installed NVMe
+ * devices disappear from the PCI bus, and instead their I/O memory becomes
+ * available within the AHCI device BARs.
+ *
+ * This scheme is understood to be a way of avoiding usage of the standard
+ * Windows NVMe driver under that OS, instead mandating usage of Intel's
+ * driver instead, which has better power management, and presumably offers
+ * some RAID/disk-caching solutions too.
+ *
+ * Here in this driver, we support the remapped NVMe mode by claiming the
+ * AHCI device and creating a fake PCIe root port. On the new bus, the
+ * original AHCI device is exposed with only minor tweaks. Then, fake PCI
+ * devices corresponding to the remapped NVMe devices are created. The usual
+ * ahci and nvme drivers are then expected to bind to these devices and
+ * operate as normal.
+ *
+ * The PCI configuration space for the NVMe devices is completely
+ * unavailable, so we fake a minimal one and hope for the best.
+ *
+ * Interrupts are shared between the AHCI and NVMe devices. For simplicity,
+ * we only support the legacy interrupt here, although MSI support
+ * could potentially be added later.
+ */
+
+#define MODULE_NAME "intel-nvme-remap"
+
+#include <linux/ahci-remap.h>
+#include <linux/irq.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+
+#define AHCI_PCI_BAR_STANDARD 5
+
+struct nvme_remap_dev {
+ struct pci_dev *dev; /* AHCI device */
+ struct pci_bus *bus; /* our fake PCI bus */
+ struct pci_sysdata sysdata;
+ int irq_base; /* our fake interrupts */
+
+ /*
+ * When we detect an all-ones write to a BAR register, this flag
+ * is set, so that we return the BAR size on the next read (a
+ * standard PCI behaviour).
+ * This includes the assumption that an all-ones BAR write is
+ * immediately followed by a read of the same register.
+ */
+ bool bar_sizing;
+
+ /*
+ * Resources copied from the AHCI device, to be regarded as
+ * resources on our fake bus.
+ */
+ struct resource ahci_resources[PCI_NUM_RESOURCES];
+
+ /* Resources corresponding to the NVMe devices. */
+ struct resource remapped_dev_mem[AHCI_MAX_REMAP];
+
+ /* Number of remapped NVMe devices found. */
+ int num_remapped_devices;
+};
+
+static inline struct nvme_remap_dev *nrdev_from_bus(struct pci_bus *bus)
+{
+ return container_of(bus->sysdata, struct nvme_remap_dev, sysdata);
+}
+
+
+/******** PCI configuration space **********/
+
+/*
+ * Helper macros for tweaking returned contents of PCI configuration space.
+ *
+ * value contains len bytes of data read from reg.
+ * If fixup_reg is included in that range, fix up the contents of that
+ * register to fixed_value.
+ */
+#define NR_FIX8(fixup_reg, fixed_value) do { \
+ if (reg <= fixup_reg && fixup_reg < reg + len) \
+ ((u8 *) value)[fixup_reg - reg] = (u8) (fixed_value); \
+ } while (0)
+
+#define NR_FIX16(fixup_reg, fixed_value) do { \
+ NR_FIX8(fixup_reg, fixed_value); \
+ NR_FIX8(fixup_reg + 1, fixed_value >> 8); \
+ } while (0)
+
+#define NR_FIX24(fixup_reg, fixed_value) do { \
+ NR_FIX8(fixup_reg, fixed_value); \
+ NR_FIX8(fixup_reg + 1, fixed_value >> 8); \
+ NR_FIX8(fixup_reg + 2, fixed_value >> 16); \
+ } while (0)
+
+#define NR_FIX32(fixup_reg, fixed_value) do { \
+ NR_FIX16(fixup_reg, (u16) fixed_value); \
+ NR_FIX16(fixup_reg + 2, fixed_value >> 16); \
+ } while (0)
+
+/*
+ * Read PCI config space of the slot 0 (AHCI) device.
+ * We pass through the read request to the underlying device, but
+ * tweak the results in some cases.
+ */
+static int nvme_remap_pci_read_slot0(struct pci_bus *bus, int reg,
+ int len, u32 *value)
+{
+ struct nvme_remap_dev *nrdev = nrdev_from_bus(bus);
+ struct pci_bus *ahci_dev_bus = nrdev->dev->bus;
+ int ret;
+
+ ret = ahci_dev_bus->ops->read(ahci_dev_bus, nrdev->dev->devfn,
+ reg, len, value);
+ if (ret)
+ return ret;
+
+ /*
+ * Adjust the device class, to prevent this driver from attempting to
+ * additionally probe the device we're simulating here.
+ */
+ NR_FIX24(PCI_CLASS_PROG, PCI_CLASS_STORAGE_SATA_AHCI);
+
+ /*
+ * Unset interrupt pin, otherwise ACPI tries to find routing
+ * info for our virtual IRQ, fails, and complains.
+ */
+ NR_FIX8(PCI_INTERRUPT_PIN, 0);
+
+ /*
+ * Truncate the AHCI BAR to not include the region that covers the
+ * hidden devices. This will cause the ahci driver to successfully
+ * probe th new device (instead of handing it over to this driver).
+ */
+ if (nrdev->bar_sizing) {
+ NR_FIX32(PCI_BASE_ADDRESS_5, ~(SZ_16K - 1));
+ nrdev->bar_sizing = false;
+ }
+
+ return PCIBIOS_SUCCESSFUL;
+}
+
+/*
+ * Read PCI config space of a remapped device.
+ * Since the original PCI config space is inaccessible, we provide a minimal,
+ * fake config space instead.
+ */
+static int nvme_remap_pci_read_remapped(struct pci_bus *bus, unsigned int port,
+ int reg, int len, u32 *value)
+{
+ struct nvme_remap_dev *nrdev = nrdev_from_bus(bus);
+ struct resource *remapped_mem;
+
+ if (port > nrdev->num_remapped_devices)
+ return PCIBIOS_DEVICE_NOT_FOUND;
+
+ *value = 0;
+ remapped_mem = &nrdev->remapped_dev_mem[port - 1];
+
+ /* Set a Vendor ID, otherwise Linux assumes no device is present */
+ NR_FIX16(PCI_VENDOR_ID, PCI_VENDOR_ID_INTEL);
+
+ /* Always appear on & bus mastering */
+ NR_FIX16(PCI_COMMAND, PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER);
+
+ /* Set class so that nvme driver probes us */
+ NR_FIX24(PCI_CLASS_PROG, PCI_CLASS_STORAGE_EXPRESS);
+
+ if (nrdev->bar_sizing) {
+ NR_FIX32(PCI_BASE_ADDRESS_0,
+ ~(resource_size(remapped_mem) - 1));
+ nrdev->bar_sizing = false;
+ } else {
+ resource_size_t mem_start = remapped_mem->start;
+
+ mem_start |= PCI_BASE_ADDRESS_MEM_TYPE_64;
+ NR_FIX32(PCI_BASE_ADDRESS_0, mem_start);
+ mem_start >>= 32;
+ NR_FIX32(PCI_BASE_ADDRESS_1, mem_start);
+ }
+
+ return PCIBIOS_SUCCESSFUL;
+}
+
+/* Read PCI configuration space. */
+static int nvme_remap_pci_read(struct pci_bus *bus, unsigned int devfn,
+ int reg, int len, u32 *value)
+{
+ if (PCI_SLOT(devfn) == 0)
+ return nvme_remap_pci_read_slot0(bus, reg, len, value);
+ else
+ return nvme_remap_pci_read_remapped(bus, PCI_SLOT(devfn),
+ reg, len, value);
+}
+
+/*
+ * Write PCI config space of the slot 0 (AHCI) device.
+ * Apart from the special case of BAR sizing, we disable all writes.
+ * Otherwise, the ahci driver could make changes (e.g. unset PCI bus master)
+ * that would affect the operation of the NVMe devices.
+ */
+static int nvme_remap_pci_write_slot0(struct pci_bus *bus, int reg,
+ int len, u32 value)
+{
+ struct nvme_remap_dev *nrdev = nrdev_from_bus(bus);
+ struct pci_bus *ahci_dev_bus = nrdev->dev->bus;
+
+ if (reg >= PCI_BASE_ADDRESS_0 && reg <= PCI_BASE_ADDRESS_5) {
+ /*
+ * Writing all-ones to a BAR means that the size of the
+ * memory region is being checked. Flag this so that we can
+ * reply with an appropriate size on the next read.
+ */
+ if (value == ~0)
+ nrdev->bar_sizing = true;
+
+ return ahci_dev_bus->ops->write(ahci_dev_bus,
+ nrdev->dev->devfn,
+ reg, len, value);
+ }
+
+ return PCIBIOS_SET_FAILED;
+}
+
+/*
+ * Write PCI config space of a remapped device.
+ * Since the original PCI config space is inaccessible, we reject all
+ * writes, except for the special case of BAR probing.
+ */
+static int nvme_remap_pci_write_remapped(struct pci_bus *bus,
+ unsigned int port,
+ int reg, int len, u32 value)
+{
+ struct nvme_remap_dev *nrdev = nrdev_from_bus(bus);
+
+ if (port > nrdev->num_remapped_devices)
+ return PCIBIOS_DEVICE_NOT_FOUND;
+
+ /*
+ * Writing all-ones to a BAR means that the size of the memory
+ * region is being checked. Flag this so that we can reply with
+ * an appropriate size on the next read.
+ */
+ if (value == ~0 && reg >= PCI_BASE_ADDRESS_0
+ && reg <= PCI_BASE_ADDRESS_5) {
+ nrdev->bar_sizing = true;
+ return PCIBIOS_SUCCESSFUL;
+ }
+
+ return PCIBIOS_SET_FAILED;
+}
+
+/* Write PCI configuration space. */
+static int nvme_remap_pci_write(struct pci_bus *bus, unsigned int devfn,
+ int reg, int len, u32 value)
+{
+ if (PCI_SLOT(devfn) == 0)
+ return nvme_remap_pci_write_slot0(bus, reg, len, value);
+ else
+ return nvme_remap_pci_write_remapped(bus, PCI_SLOT(devfn),
+ reg, len, value);
+}
+
+static struct pci_ops nvme_remap_pci_ops = {
+ .read = nvme_remap_pci_read,
+ .write = nvme_remap_pci_write,
+};
+
+
+/******** Initialization & exit **********/
+
+/*
+ * Find a PCI domain ID to use for our fake bus.
+ * Start at 0x10000 to not clash with ACPI _SEG domains (16 bits).
+ */
+static int find_free_domain(void)
+{
+ int domain = 0xffff;
+ struct pci_bus *bus = NULL;
+
+ while ((bus = pci_find_next_bus(bus)) != NULL)
+ domain = max_t(int, domain, pci_domain_nr(bus));
+
+ return domain + 1;
+}
+
+static int find_remapped_devices(struct nvme_remap_dev *nrdev,
+ struct list_head *resources)
+{
+ void __iomem *mmio;
+ int i, count = 0;
+ u32 cap;
+
+ mmio = pcim_iomap(nrdev->dev, AHCI_PCI_BAR_STANDARD,
+ pci_resource_len(nrdev->dev,
+ AHCI_PCI_BAR_STANDARD));
+ if (!mmio)
+ return -ENODEV;
+
+ /* Check if this device might have remapped nvme devices. */
+ if (pci_resource_len(nrdev->dev, AHCI_PCI_BAR_STANDARD) < SZ_512K ||
+ !(readl(mmio + AHCI_VSCAP) & 1))
+ return -ENODEV;
+
+ cap = readq(mmio + AHCI_REMAP_CAP);
+ for (i = AHCI_MAX_REMAP-1; i >= 0; i--) {
+ struct resource *remapped_mem;
+
+ if ((cap & (1 << i)) == 0)
+ continue;
+ if (readl(mmio + ahci_remap_dcc(i))
+ != PCI_CLASS_STORAGE_EXPRESS)
+ continue;
+
+ /* We've found a remapped device */
+ remapped_mem = &nrdev->remapped_dev_mem[count++];
+ remapped_mem->start =
+ pci_resource_start(nrdev->dev, AHCI_PCI_BAR_STANDARD)
+ + ahci_remap_base(i);
+ remapped_mem->end = remapped_mem->start
+ + AHCI_REMAP_N_SIZE - 1;
+ remapped_mem->flags = IORESOURCE_MEM | IORESOURCE_PCI_FIXED;
+ pci_add_resource(resources, remapped_mem);
+ }
+
+ pcim_iounmap(nrdev->dev, mmio);
+
+ if (count == 0)
+ return -ENODEV;
+
+ nrdev->num_remapped_devices = count;
+ dev_info(&nrdev->dev->dev, "Found %d remapped NVMe devices\n",
+ nrdev->num_remapped_devices);
+ return 0;
+}
+
+static void nvme_remap_remove_root_bus(void *data)
+{
+ struct pci_bus *bus = data;
+
+ pci_stop_root_bus(bus);
+ pci_remove_root_bus(bus);
+}
+
+static int nvme_remap_probe(struct pci_dev *dev,
+ const struct pci_device_id *id)
+{
+ struct nvme_remap_dev *nrdev;
+ LIST_HEAD(resources);
+ int i;
+ int ret;
+ struct pci_dev *child;
+
+ nrdev = devm_kzalloc(&dev->dev, sizeof(*nrdev), GFP_KERNEL);
+ nrdev->sysdata.domain = find_free_domain();
+ nrdev->sysdata.nvme_remap_dev = dev;
+ nrdev->dev = dev;
+ pci_set_drvdata(dev, nrdev);
+
+ ret = pcim_enable_device(dev);
+ if (ret < 0)
+ return ret;
+
+ pci_set_master(dev);
+
+ ret = find_remapped_devices(nrdev, &resources);
+ if (ret)
+ return ret;
+
+ /* Add resources from the original AHCI device */
+ for (i = 0; i < PCI_NUM_RESOURCES; i++) {
+ struct resource *res = &dev->resource[i];
+
+ if (res->start) {
+ struct resource *nr_res = &nrdev->ahci_resources[i];
+
+ nr_res->start = res->start;
+ nr_res->end = res->end;
+ nr_res->flags = res->flags;
+ pci_add_resource(&resources, nr_res);
+ }
+ }
+
+ /* Create virtual interrupts */
+ nrdev->irq_base = devm_irq_alloc_descs(&dev->dev, -1, 0,
+ nrdev->num_remapped_devices + 1,
+ 0);
+ if (nrdev->irq_base < 0)
+ return nrdev->irq_base;
+
+ /* Create and populate PCI bus */
+ nrdev->bus = pci_create_root_bus(&dev->dev, 0, &nvme_remap_pci_ops,
+ &nrdev->sysdata, &resources);
+ if (!nrdev->bus)
+ return -ENODEV;
+
+ if (devm_add_action_or_reset(&dev->dev, nvme_remap_remove_root_bus,
+ nrdev->bus))
+ return -ENOMEM;
+
+ /* We don't support sharing MSI interrupts between these devices */
+ nrdev->bus->bus_flags |= PCI_BUS_FLAGS_NO_MSI;
+
+ pci_scan_child_bus(nrdev->bus);
+
+ list_for_each_entry(child, &nrdev->bus->devices, bus_list) {
+ /*
+ * Prevent PCI core from trying to move memory BARs around.
+ * The hidden NVMe devices are at fixed locations.
+ */
+ for (i = 0; i < PCI_NUM_RESOURCES; i++) {
+ struct resource *res = &child->resource[i];
+
+ if (res->flags & IORESOURCE_MEM)
+ res->flags |= IORESOURCE_PCI_FIXED;
+ }
+
+ /* Share the legacy IRQ between all devices */
+ child->irq = dev->irq;
+ }
+
+ pci_assign_unassigned_bus_resources(nrdev->bus);
+ pci_bus_add_devices(nrdev->bus);
+
+ return 0;
+}
+
+static const struct pci_device_id nvme_remap_ids[] = {
+ /*
+ * Match all Intel RAID controllers.
+ *
+ * There's overlap here with the set of devices detected by the ahci
+ * driver, but ahci will only successfully probe when there
+ * *aren't* any remapped NVMe devices, and this driver will only
+ * successfully probe when there *are* remapped NVMe devices that
+ * need handling.
+ */
+ {
+ PCI_VDEVICE(INTEL, PCI_ANY_ID),
+ .class = PCI_CLASS_STORAGE_RAID << 8,
+ .class_mask = 0xffffff00,
+ },
+ {0,}
+};
+MODULE_DEVICE_TABLE(pci, nvme_remap_ids);
+
+static struct pci_driver nvme_remap_drv = {
+ .name = MODULE_NAME,
+ .id_table = nvme_remap_ids,
+ .probe = nvme_remap_probe,
+};
+module_pci_driver(nvme_remap_drv);
+
+MODULE_AUTHOR("Daniel Drake <drake@endlessm.com>");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c
index 3cac7e40456e..22b6a649c031 100644
--- a/drivers/usb/host/xhci.c
+++ b/drivers/usb/host/xhci.c
@@ -695,8 +695,6 @@ int xhci_run(struct usb_hcd *hcd)
xhci_dbg_trace(xhci, trace_xhci_dbg_init,
"Finished xhci_run for USB2 roothub");
- set_bit(HCD_FLAG_DEFER_RH_REGISTER, &hcd->flags);
-
xhci_create_dbc_dev(xhci);
xhci_debugfs_init(xhci);
|