diff options
-rw-r--r-- | .SRCINFO | 22 | ||||
-rw-r--r-- | 1-2-mt76-mt7915-send-EAPOL-frames-at-lowest-rate.patch | 31 | ||||
-rw-r--r-- | 1-2-mt76-mt7921-robustify-hardware-initialization-flow.patch | 125 | ||||
-rw-r--r-- | 2-2-mt76-mt7921-send-EAPOL-frames-at-lowest-rate.patch | 31 | ||||
-rw-r--r-- | PCI-Add-more-NVIDIA-controllers-to-the-MSI-masking-q.patch | 24 | ||||
-rw-r--r-- | PKGBUILD | 25 | ||||
-rw-r--r-- | cpufreq-intel_pstate-ITMT-support-for-overclocked-sy.patch | 55 | ||||
-rw-r--r-- | iommu-intel-do-deep-dma-unmapping-to-avoid-kernel-fl.patch | 88 | ||||
-rw-r--r-- | lg-laptop-Recognize-more-models.patch | 39 | ||||
-rw-r--r-- | x86-ACPI-State-Optimize-C3-entry-on-AMD-CPUs.patch | 58 | ||||
-rw-r--r-- | x86-change-default-to-spec_store_bypass_disable-prct.patch | 255 | ||||
-rw-r--r-- | zstd-udpate-fixes.patch | 241 |
12 files changed, 1 insertions, 993 deletions
@@ -20,23 +20,12 @@ pkgbase = linux-xanmod-rog source = https://cdn.kernel.org/pub/linux/kernel/v5.x/linux-5.15.tar.sign source = https://github.com/xanmod/linux/releases/download/5.15.14-xanmod1/patch-5.15.14-xanmod1.xz source = choose-gcc-optimization.sh - source = sphinx-workaround.patch - source = PCI-Add-more-NVIDIA-controllers-to-the-MSI-masking-q.patch - source = iommu-intel-do-deep-dma-unmapping-to-avoid-kernel-fl.patch - source = cpufreq-intel_pstate-ITMT-support-for-overclocked-sy.patch source = Bluetooth-btintel-Fix-bdaddress-comparison-with-garb.patch - source = lg-laptop-Recognize-more-models.patch - source = zstd-udpate-fixes.patch - source = x86-ACPI-State-Optimize-C3-entry-on-AMD-CPUs.patch - source = x86-change-default-to-spec_store_bypass_disable-prct.patch source = acpi-battery-Always-read-fresh-battery-state-on-update.patch source = cfg80211-dont-WARN-if-a-self-managed-device.patch source = HID-asus-Reduce-object-size-by-consolidating-calls.patch source = v16-asus-wmi-Add-support-for-custom-fan-curves.patch - source = 1-2-mt76-mt7915-send-EAPOL-frames-at-lowest-rate.patch - source = 2-2-mt76-mt7921-send-EAPOL-frames-at-lowest-rate.patch source = mt76-mt7921-enable-VO-tx-aggregation.patch - source = 1-2-mt76-mt7921-robustify-hardware-initialization-flow.patch source = 1-2-Bluetooth-btusb-Add-Mediatek-MT7921-support-for-Foxconn.patch source = 2-2-Bluetooth-btusb-Add-Mediatek-MT7921-support-for-IMC-Network.patch source = Bluetooth-btusb-Add-support-for-IMC-Networks-Mediatek-Chip.patch @@ -49,23 +38,12 @@ pkgbase = linux-xanmod-rog sha256sums = SKIP sha256sums = ea5dceb862b22e645d72e0d759b646643bc6569d0910f1711aedb66447962c76 sha256sums = 5b8eddb90671f3e8469a023b7ed0d3c5a9521f662affa1d541063e273b64dba8 - sha256sums = 52fc0fcd806f34e774e36570b2a739dbdf337f7ff679b1c1139bee54d03301eb - sha256sums = 20c0926964a4286486c3ae1dd05014767f42388316a7d60b4f52639536fbd615 - sha256sums = 6c48a3e96b49ddb6ece78f3abb1a8a889a77c097f93b14ab052f1e19eda05076 - sha256sums = 54f161457cc4de4ebd5770c16f14f7a47f1dbbc4f1af83dec651e6a8b1eaad56 sha256sums = 802e9f8d5c98088946b8ad5629e0732350b4b0fd91c24e12af31935791fcd556 - sha256sums = 80538b0f529ed32118dac933d50599c6e843ae31882361fd88cd40a1dc7d21f9 - sha256sums = d636bd74a71b2d898b20246e3c013b853fd1a462ed622e7e90302d53b4157428 - sha256sums = 923230ed8367e28adfdeed75d3cdba9eec6b781818c37f6f3d3eb64101d2e716 - sha256sums = cc401107f1bf7b7d8e8a78ee594f9db4b6fa252b7239b6aa88f678aef84d935c sha256sums = f7a4bf6293912bfc4a20743e58a5a266be8c4dbe3c1862d196d3a3b45f2f7c90 sha256sums = 3d8961438b5c8110588ff0b881d472fc71a4304d306808d78a4055a4150f351e sha256sums = 544464bf0807b324120767d55867f03014a9fda4e1804768ca341be902d7ade4 sha256sums = 0c422d8f420c1518aab1b980c6cdb6e029a4fa9cde1fd99a63670bb105a44f36 - sha256sums = 4bfbff4eba07fc9de2ce78097a4a269509468ba0e24c15a82905cd94e093ad55 - sha256sums = 021f8539ab2fb722b46937b95fdab22a2308236a24ecc1a9ea8db4853721dd39 sha256sums = 1ce9fd988201c4d2e48794c58acda5b768ec0fea1d29555e99d35cd2712281e4 - sha256sums = c368cc4eefff20b7ae904eec686b7e72b46ff02b32c8a4fbd6bd4039f087e7ba sha256sums = 236cdadf0b1472945c0d7570caeed7b95929aabed6872319c9d0969a819689e9 sha256sums = cc2aa580d69801aa1afb0d72ecf094fe13c797363d3d5928c868d3a389910b7b sha256sums = 292a7e32b248c7eee6e2f5407d609d03d985f367d329adb02b9d6dba1f85b44c diff --git a/1-2-mt76-mt7915-send-EAPOL-frames-at-lowest-rate.patch b/1-2-mt76-mt7915-send-EAPOL-frames-at-lowest-rate.patch deleted file mode 100644 index c2e18b71ae18..000000000000 --- a/1-2-mt76-mt7915-send-EAPOL-frames-at-lowest-rate.patch +++ /dev/null @@ -1,31 +0,0 @@ -From: Ryder Lee <ryder.lee@mediatek.com> -To: Felix Fietkau <nbd@nbd.name> -CC: Lorenzo Bianconi <lorenzo.bianconi@redhat.com>, Shayne Chen - <shayne.chen@mediatek.com>, Evelyn Tsai <evelyn.tsai@mediatek.com>, Sean Wang - <sean.wang@mediatek.com>, <linux-wireless@vger.kernel.org>, - <linux-mediatek@lists.infradead.org>, Ryder Lee <ryder.lee@mediatek.com> -Subject: [PATCH 1/2] mt76: mt7915: send EAPOL frames at lowest rate -Date: Sat, 17 Jul 2021 13:05:48 +0800 - -The firmware rate control may choose the high rate for EAPOL frames, -so checking IEEE80211_TX_CTL_USE_MINRATE to use the lowest TX rate. - -Signed-off-by: Ryder Lee <ryder.lee@mediatek.com> ---- - drivers/net/wireless/mediatek/mt76/mt7915/mac.c | 3 ++- - 1 file changed, 2 insertions(+), 1 deletion(-) - -diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mac.c b/drivers/net/wireless/mediatek/mt76/mt7915/mac.c -index 2462704094b0..d47dd0f96bdb 100644 ---- a/drivers/net/wireless/mediatek/mt76/mt7915/mac.c -+++ b/drivers/net/wireless/mediatek/mt76/mt7915/mac.c -@@ -869,7 +869,8 @@ mt7915_mac_write_txwi_80211(struct mt7915_dev *dev, __le32 *txwi, - txwi[3] &= ~cpu_to_le32(MT_TXD3_PROTECT_FRAME); - } - -- if (!ieee80211_is_data(fc) || multicast) -+ if (!ieee80211_is_data(fc) || multicast || -+ info->flags & IEEE80211_TX_CTL_USE_MINRATE) - val |= MT_TXD2_FIX_RATE; - - txwi[2] |= cpu_to_le32(val); diff --git a/1-2-mt76-mt7921-robustify-hardware-initialization-flow.patch b/1-2-mt76-mt7921-robustify-hardware-initialization-flow.patch deleted file mode 100644 index f475476f8832..000000000000 --- a/1-2-mt76-mt7921-robustify-hardware-initialization-flow.patch +++ /dev/null @@ -1,125 +0,0 @@ -From: <sean.wang@mediatek.com> -To: <nbd@nbd.name>, <lorenzo.bianconi@redhat.com> -CC: <sean.wang@mediatek.com>, <Soul.Huang@mediatek.com>, - <YN.Chen@mediatek.com>, <Leon.Yen@mediatek.com>, - <Eric-SY.Chang@mediatek.com>, <Deren.Wu@mediatek.com>, <km.lin@mediatek.com>, - <robin.chiu@mediatek.com>, <ch.yeh@mediatek.com>, <posh.sun@mediatek.com>, - <ted.huang@mediatek.com>, <Eric.Liang@mediatek.com>, - <Stella.Chang@mediatek.com>, <jemele@google.com>, - <linux-wireless@vger.kernel.org>, <linux-mediatek@lists.infradead.org> -Subject: [PATCH 1/2] mt76: mt7921: robustify hardware initialization flow -Date: Tue, 14 Sep 2021 23:50:21 +0800 - -From: Sean Wang <sean.wang@mediatek.com> - -Robustify hardware initialization in the current driver probing flow -to get rid of the device is possibly lost after the machine boot due -to possible firmware abnormal state by trying to recover the failure -with more chances. - -Tested-by: Leon Yen <Leon.Yen@mediatek.com> -Tested-by: YN Chen <YN.Chen@mediatek.com> -Signed-off-by: Sean Wang <sean.wang@mediatek.com> ---- - .../net/wireless/mediatek/mt76/mt7921/init.c | 53 ++++++++++++++----- - .../wireless/mediatek/mt76/mt7921/mt7921.h | 1 + - 2 files changed, 41 insertions(+), 13 deletions(-) - -diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/init.c b/drivers/net/wireless/mediatek/mt76/mt7921/init.c -index 1f37e64b6038..d26166a612f0 100644 ---- a/drivers/net/wireless/mediatek/mt76/mt7921/init.c -+++ b/drivers/net/wireless/mediatek/mt76/mt7921/init.c -@@ -146,33 +146,60 @@ int mt7921_mac_init(struct mt7921_dev *dev) - return mt76_connac_mcu_set_rts_thresh(&dev->mt76, 0x92b, 0); - } - --static int mt7921_init_hardware(struct mt7921_dev *dev) -+static int __mt7921_init_hardware(struct mt7921_dev *dev) - { -- int ret, idx; -- -- ret = mt7921_dma_init(dev); -- if (ret) -- return ret; -- -- set_bit(MT76_STATE_INITIALIZED, &dev->mphy.state); -+ struct mt76_dev *mdev = &dev->mt76; -+ int ret; - - /* force firmware operation mode into normal state, - * which should be set before firmware download stage. - */ - mt76_wr(dev, MT_SWDEF_MODE, MT_SWDEF_NORMAL_MODE); -- - ret = mt7921_mcu_init(dev); - if (ret) -- return ret; -+ goto out; - - ret = mt7921_eeprom_init(dev); -- if (ret < 0) -- return ret; -+ if (ret) -+ goto out; - - ret = mt7921_mcu_set_eeprom(dev); -+ if (ret) -+ goto out; -+ -+ ret = mt7921_mac_init(dev); -+out: -+ if (ret && mdev->eeprom.data) { -+ devm_kfree(mdev->dev, mdev->eeprom.data); -+ mdev->eeprom.data = NULL; -+ } -+ -+ return ret; -+} -+ -+static int mt7921_init_hardware(struct mt7921_dev *dev) -+{ -+ int ret, idx, i; -+ -+ ret = mt7921_dma_init(dev); - if (ret) - return ret; - -+ set_bit(MT76_STATE_INITIALIZED, &dev->mphy.state); -+ -+ for (i = 0; i < MT7921_MCU_INIT_RETRY_COUNT; i++) { -+ ret = __mt7921_init_hardware(dev); -+ if (!ret) -+ break; -+ -+ mt7921_wpdma_reset(dev, true); -+ } -+ -+ if (i == MT7921_MCU_INIT_RETRY_COUNT) { -+ dev_err(dev->mt76.dev, "hardware init failed\n"); -+ return ret; -+ } -+ - /* Beacon and mgmt frames should occupy wcid 0 */ - idx = mt76_wcid_alloc(dev->mt76.wcid_mask, MT7921_WTBL_STA - 1); - if (idx) -@@ -183,7 +210,7 @@ static int mt7921_init_hardware(struct mt7921_dev *dev) - dev->mt76.global_wcid.tx_info |= MT_WCID_TX_INFO_SET; - rcu_assign_pointer(dev->mt76.wcid[idx], &dev->mt76.global_wcid); - -- return mt7921_mac_init(dev); -+ return 0; - } - - int mt7921_register_device(struct mt7921_dev *dev) -diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/mt7921.h b/drivers/net/wireless/mediatek/mt76/mt7921/mt7921.h -index 6a47ba65b96e..cee7a2507224 100644 ---- a/drivers/net/wireless/mediatek/mt76/mt7921/mt7921.h -+++ b/drivers/net/wireless/mediatek/mt76/mt7921/mt7921.h -@@ -29,6 +29,7 @@ - #define MT7921_RX_MCU_RING_SIZE 512 - - #define MT7921_DRV_OWN_RETRY_COUNT 10 -+#define MT7921_MCU_INIT_RETRY_COUNT 10 - - #define MT7921_FIRMWARE_WM "mediatek/WIFI_RAM_CODE_MT7961_1.bin" - #define MT7921_ROM_PATCH "mediatek/WIFI_MT7961_patch_mcu_1_2_hdr.bin" diff --git a/2-2-mt76-mt7921-send-EAPOL-frames-at-lowest-rate.patch b/2-2-mt76-mt7921-send-EAPOL-frames-at-lowest-rate.patch deleted file mode 100644 index 0812eb4e785c..000000000000 --- a/2-2-mt76-mt7921-send-EAPOL-frames-at-lowest-rate.patch +++ /dev/null @@ -1,31 +0,0 @@ -From: Ryder Lee <ryder.lee@mediatek.com> -To: Felix Fietkau <nbd@nbd.name> -CC: Lorenzo Bianconi <lorenzo.bianconi@redhat.com>, Shayne Chen - <shayne.chen@mediatek.com>, Evelyn Tsai <evelyn.tsai@mediatek.com>, Sean Wang - <sean.wang@mediatek.com>, <linux-wireless@vger.kernel.org>, - <linux-mediatek@lists.infradead.org>, Ryder Lee <ryder.lee@mediatek.com> -Subject: [PATCH 2/2] mt76: mt7921: send EAPOL frames at lowest rate -Date: Sat, 17 Jul 2021 13:05:49 +0800 - -The firmware rate control may choose the high rate for EAPOL frames, -so checking IEEE80211_TX_CTL_USE_MINRATE to use the lowest TX rate. - -Signed-off-by: Ryder Lee <ryder.lee@mediatek.com> ---- - drivers/net/wireless/mediatek/mt76/mt7921/mac.c | 3 ++- - 1 file changed, 2 insertions(+), 1 deletion(-) - -diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/mac.c b/drivers/net/wireless/mediatek/mt76/mt7921/mac.c -index 7fe2e3a50428..eb0d98c8d5d8 100644 ---- a/drivers/net/wireless/mediatek/mt76/mt7921/mac.c -+++ b/drivers/net/wireless/mediatek/mt76/mt7921/mac.c -@@ -702,7 +702,8 @@ mt7921_mac_write_txwi_80211(struct mt7921_dev *dev, __le32 *txwi, - txwi[3] &= ~cpu_to_le32(MT_TXD3_PROTECT_FRAME); - } - -- if (!ieee80211_is_data(fc) || multicast) -+ if (!ieee80211_is_data(fc) || multicast || -+ info->flags & IEEE80211_TX_CTL_USE_MINRATE) - val |= MT_TXD2_FIX_RATE; - - txwi[2] |= cpu_to_le32(val); diff --git a/PCI-Add-more-NVIDIA-controllers-to-the-MSI-masking-q.patch b/PCI-Add-more-NVIDIA-controllers-to-the-MSI-masking-q.patch deleted file mode 100644 index a48dbd9c9c6b..000000000000 --- a/PCI-Add-more-NVIDIA-controllers-to-the-MSI-masking-q.patch +++ /dev/null @@ -1,24 +0,0 @@ -From 01cf9c1f2ac7862230c7bc348ef32e349420f14b Mon Sep 17 00:00:00 2001 -From: "Jan Alexander Steffens (heftig)" <heftig@archlinux.org> -Date: Thu, 18 Nov 2021 22:53:31 +0100 -Subject: [PATCH 4/4] PCI: Add more NVIDIA controllers to the MSI masking quirk - -For: https://bugs.archlinux.org/task/72734 -For: https://bugs.archlinux.org/task/72777 ---- - drivers/pci/quirks.c | 2 ++ - 1 file changed, 2 insertions(+) - -diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c -index 208fa03acdda..7fdb7e9c2e12 100644 ---- a/drivers/pci/quirks.c -+++ b/drivers/pci/quirks.c -@@ -5802,3 +5802,5 @@ static void nvidia_ion_ahci_fixup(struct pci_dev *pdev) - pdev->dev_flags |= PCI_DEV_FLAGS_HAS_MSI_MASKING; - } - DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_NVIDIA, 0x0ab8, nvidia_ion_ahci_fixup); -+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_NVIDIA, 0x0ab9, nvidia_ion_ahci_fixup); -+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_NVIDIA, 0x0d88, nvidia_ion_ahci_fixup); --- -2.34.0 - @@ -97,7 +97,7 @@ _branch=${xanmod%%\.*\-*}.x # 5.x source=("https://cdn.kernel.org/pub/linux/kernel/v${_branch}/linux-${_major}.tar."{xz,sign} "https://github.com/xanmod/linux/releases/download/${xanmod}/patch-${xanmod}.xz" "choose-gcc-optimization.sh" - "sphinx-workaround.patch" + #"sphinx-workaround.patch" # apply incremental kernel updates ahead of official Xanmod release #"https://cdn.kernel.org/pub/linux/kernel/v5.x/patch-5.15.1.xz" @@ -105,11 +105,7 @@ source=("https://cdn.kernel.org/pub/linux/kernel/v${_branch}/linux-${_major}.tar #"Linux-5.15.7.patch.xz" # Arch: misc hotfixes - "PCI-Add-more-NVIDIA-controllers-to-the-MSI-masking-q.patch" - "iommu-intel-do-deep-dma-unmapping-to-avoid-kernel-fl.patch" - "cpufreq-intel_pstate-ITMT-support-for-overclocked-sy.patch" "Bluetooth-btintel-Fix-bdaddress-comparison-with-garb.patch" - "lg-laptop-Recognize-more-models.patch" # amd-pstate v5 included in Xanmod # multigenerational lru v5 included in Xanmod @@ -117,13 +113,8 @@ source=("https://cdn.kernel.org/pub/linux/kernel/v${_branch}/linux-${_major}.tar # 5.17 TCP csum optimization included in Xanmod # 5.16: zstd 1.4.10 update stack size regression fixes - "zstd-udpate-fixes.patch" - # 5.16: don't drop shared caches on C3 state transitions - "x86-ACPI-State-Optimize-C3-entry-on-AMD-CPUs.patch" - # 5.16 spectre defaults - "x86-change-default-to-spec_store_bypass_disable-prct.patch" # -- patch from Chromium developers; more accurately report battery state changes "acpi-battery-Always-read-fresh-battery-state-on-update.patch" @@ -136,10 +127,7 @@ source=("https://cdn.kernel.org/pub/linux/kernel/v${_branch}/linux-${_major}.tar "v16-asus-wmi-Add-support-for-custom-fan-curves.patch" # mediatek mt7921 bt/wifi patches - "1-2-mt76-mt7915-send-EAPOL-frames-at-lowest-rate.patch" - "2-2-mt76-mt7921-send-EAPOL-frames-at-lowest-rate.patch" "mt76-mt7921-enable-VO-tx-aggregation.patch" - "1-2-mt76-mt7921-robustify-hardware-initialization-flow.patch" "1-2-Bluetooth-btusb-Add-Mediatek-MT7921-support-for-Foxconn.patch" "2-2-Bluetooth-btusb-Add-Mediatek-MT7921-support-for-IMC-Network.patch" "Bluetooth-btusb-Add-support-for-IMC-Networks-Mediatek-Chip.patch" @@ -158,23 +146,12 @@ sha256sums=('57b2cf6991910e3b67a1b3490022e8a0674b6965c74c12da1e99d138d1991ee8' 'SKIP' 'ea5dceb862b22e645d72e0d759b646643bc6569d0910f1711aedb66447962c76' '5b8eddb90671f3e8469a023b7ed0d3c5a9521f662affa1d541063e273b64dba8' - '52fc0fcd806f34e774e36570b2a739dbdf337f7ff679b1c1139bee54d03301eb' - '20c0926964a4286486c3ae1dd05014767f42388316a7d60b4f52639536fbd615' - '6c48a3e96b49ddb6ece78f3abb1a8a889a77c097f93b14ab052f1e19eda05076' - '54f161457cc4de4ebd5770c16f14f7a47f1dbbc4f1af83dec651e6a8b1eaad56' '802e9f8d5c98088946b8ad5629e0732350b4b0fd91c24e12af31935791fcd556' - '80538b0f529ed32118dac933d50599c6e843ae31882361fd88cd40a1dc7d21f9' - 'd636bd74a71b2d898b20246e3c013b853fd1a462ed622e7e90302d53b4157428' - '923230ed8367e28adfdeed75d3cdba9eec6b781818c37f6f3d3eb64101d2e716' - 'cc401107f1bf7b7d8e8a78ee594f9db4b6fa252b7239b6aa88f678aef84d935c' 'f7a4bf6293912bfc4a20743e58a5a266be8c4dbe3c1862d196d3a3b45f2f7c90' '3d8961438b5c8110588ff0b881d472fc71a4304d306808d78a4055a4150f351e' '544464bf0807b324120767d55867f03014a9fda4e1804768ca341be902d7ade4' '0c422d8f420c1518aab1b980c6cdb6e029a4fa9cde1fd99a63670bb105a44f36' - '4bfbff4eba07fc9de2ce78097a4a269509468ba0e24c15a82905cd94e093ad55' - '021f8539ab2fb722b46937b95fdab22a2308236a24ecc1a9ea8db4853721dd39' '1ce9fd988201c4d2e48794c58acda5b768ec0fea1d29555e99d35cd2712281e4' - 'c368cc4eefff20b7ae904eec686b7e72b46ff02b32c8a4fbd6bd4039f087e7ba' '236cdadf0b1472945c0d7570caeed7b95929aabed6872319c9d0969a819689e9' 'cc2aa580d69801aa1afb0d72ecf094fe13c797363d3d5928c868d3a389910b7b' '292a7e32b248c7eee6e2f5407d609d03d985f367d329adb02b9d6dba1f85b44c' diff --git a/cpufreq-intel_pstate-ITMT-support-for-overclocked-sy.patch b/cpufreq-intel_pstate-ITMT-support-for-overclocked-sy.patch deleted file mode 100644 index 250b98e68fc6..000000000000 --- a/cpufreq-intel_pstate-ITMT-support-for-overclocked-sy.patch +++ /dev/null @@ -1,55 +0,0 @@ -From 65b4f362db508f63e53d18e26bc3a574c2ca0c21 Mon Sep 17 00:00:00 2001 -From: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com> -Date: Thu, 18 Nov 2021 21:18:01 -0800 -Subject: [PATCH 1/3] cpufreq: intel_pstate: ITMT support for overclocked - system - -On systems with overclocking enabled, CPPC Highest Performance can be -hard coded to 0xff. In this case even if we have cores with different -highest performance, ITMT can't be enabled as the current implementation -depends on CPPC Highest Performance. - -On such systems we can use MSR_HWP_CAPABILITIES maximum performance field -when CPPC.Highest Performance is 0xff. - -Due to legacy reasons, we can't solely depend on MSR_HWP_CAPABILITIES as -in some older systems CPPC Highest Performance is the only way to identify -different performing cores. - -Reported-by: Michael Larabel <Michael@MichaelLarabel.com> -Signed-off-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com> ---- - drivers/cpufreq/intel_pstate.c | 10 ++++++++++ - 1 file changed, 10 insertions(+) - -diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c -index e15c3bc17a55..8a2c6b58b652 100644 ---- a/drivers/cpufreq/intel_pstate.c -+++ b/drivers/cpufreq/intel_pstate.c -@@ -335,6 +335,8 @@ static void intel_pstste_sched_itmt_work_fn(struct work_struct *work) - - static DECLARE_WORK(sched_itmt_work, intel_pstste_sched_itmt_work_fn); - -+#define CPPC_MAX_PERF U8_MAX -+ - static void intel_pstate_set_itmt_prio(int cpu) - { - struct cppc_perf_caps cppc_perf; -@@ -345,6 +347,14 @@ static void intel_pstate_set_itmt_prio(int cpu) - if (ret) - return; - -+ /* -+ * On some systems with overclocking enabled, CPPC.highest_perf is hardcoded to 0xff. -+ * In this case we can't use CPPC.highest_perf to enable ITMT. -+ * In this case we can look at MSR_HWP_CAPABILITIES bits [8:0] to decide. -+ */ -+ if (cppc_perf.highest_perf == CPPC_MAX_PERF) -+ cppc_perf.highest_perf = HWP_HIGHEST_PERF(READ_ONCE(all_cpu_data[cpu]->hwp_cap_cached)); -+ - /* - * The priorities can be set regardless of whether or not - * sched_set_itmt_support(true) has been called and it is valid to --- -2.34.1 - diff --git a/iommu-intel-do-deep-dma-unmapping-to-avoid-kernel-fl.patch b/iommu-intel-do-deep-dma-unmapping-to-avoid-kernel-fl.patch deleted file mode 100644 index 4550d3382484..000000000000 --- a/iommu-intel-do-deep-dma-unmapping-to-avoid-kernel-fl.patch +++ /dev/null @@ -1,88 +0,0 @@ -From 9e783ad2d0a1e90e8befb138fc2546db895a19ee Mon Sep 17 00:00:00 2001 -From: Ajay Garg <ajaygargnsit@gmail.com> -Date: Tue, 12 Oct 2021 19:26:53 +0530 -Subject: [PATCH 1/4] iommu: intel: do deep dma-unmapping, to avoid - kernel-flooding. - -Origins at : -https://lists.linuxfoundation.org/pipermail/iommu/2021-October/thread.html - -=== Changes from v1 => v2 === - -a) -Improved patch-description. - -b) -A more root-level fix, as suggested by - - 1. - Alex Williamson <alex.williamson@redhat.com> - - 2. - Lu Baolu <baolu.lu@linux.intel.com> - -=== Issue === - -Kernel-flooding is seen, when an x86_64 L1 guest (Ubuntu-21) is booted in qemu/kvm -on a x86_64 host (Ubuntu-21), with a host-pci-device attached. - -Following kind of logs, along with the stacktraces, cause the flood : - -...... - DMAR: ERROR: DMA PTE for vPFN 0x428ec already set (to 3f6ec003 not 3f6ec003) - DMAR: ERROR: DMA PTE for vPFN 0x428ed already set (to 3f6ed003 not 3f6ed003) - DMAR: ERROR: DMA PTE for vPFN 0x428ee already set (to 3f6ee003 not 3f6ee003) - DMAR: ERROR: DMA PTE for vPFN 0x428ef already set (to 3f6ef003 not 3f6ef003) - DMAR: ERROR: DMA PTE for vPFN 0x428f0 already set (to 3f6f0003 not 3f6f0003) -...... - -=== Current Behaviour, leading to the issue === - -Currently, when we do a dma-unmapping, we unmap/unlink the mappings, but -the pte-entries are not cleared. - -Thus, following sequencing would flood the kernel-logs : - -i) -A dma-unmapping makes the real/leaf-level pte-slot invalid, but the -pte-content itself is not cleared. - -ii) -Now, during some later dma-mapping procedure, as the pte-slot is about -to hold a new pte-value, the intel-iommu checks if a prior -pte-entry exists in the pte-slot. If it exists, it logs a kernel-error, -along with a corresponding stacktrace. - -iii) -Step ii) runs in abundance, and the kernel-logs run insane. - -=== Fix === - -We ensure that as part of a dma-unmapping, each (unmapped) pte-slot -is also cleared of its value/content (at the leaf-level, where the -real mapping from a iova => pfn mapping is stored). - -This completes a "deep" dma-unmapping. - -Signed-off-by: Ajay Garg <ajaygargnsit@gmail.com> -Link: https://lore.kernel.org/linux-iommu/20211012135653.3852-1-ajaygargnsit@gmail.com/ ---- - drivers/iommu/intel/iommu.c | 2 ++ - 1 file changed, 2 insertions(+) - -diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c -index d75f59ae28e6..485a8ea71394 100644 ---- a/drivers/iommu/intel/iommu.c -+++ b/drivers/iommu/intel/iommu.c -@@ -5090,6 +5090,8 @@ static size_t intel_iommu_unmap(struct iommu_domain *domain, - gather->freelist = domain_unmap(dmar_domain, start_pfn, - last_pfn, gather->freelist); - -+ dma_pte_clear_range(dmar_domain, start_pfn, last_pfn); -+ - if (dmar_domain->max_addr == iova + size) - dmar_domain->max_addr = iova; - --- -2.34.0 - diff --git a/lg-laptop-Recognize-more-models.patch b/lg-laptop-Recognize-more-models.patch deleted file mode 100644 index 904efce14b89..000000000000 --- a/lg-laptop-Recognize-more-models.patch +++ /dev/null @@ -1,39 +0,0 @@ -From c03af20354e338bb1e47eee01d535f23717a7608 Mon Sep 17 00:00:00 2001 -From: Matan Ziv-Av <matan@svgalib.org> -Date: Tue, 23 Nov 2021 22:14:55 +0200 -Subject: [PATCH 2/3] lg-laptop: Recognize more models - -LG uses 5 instead of 0 in the third digit (second digit after 2019) of the year string to indicate newer models in the same year. Handle this case as well. - -Signed-off-by: Matan Ziv-Av <matan@svgalib.org> -For: https://bugs.archlinux.org/task/71772 ---- - drivers/platform/x86/lg-laptop.c | 12 ++++++++++++ - 1 file changed, 12 insertions(+) - -diff --git a/drivers/platform/x86/lg-laptop.c b/drivers/platform/x86/lg-laptop.c -index 88b551caeaaf..d6f74d3a7605 100644 ---- a/drivers/platform/x86/lg-laptop.c -+++ b/drivers/platform/x86/lg-laptop.c -@@ -658,6 +658,18 @@ static int acpi_add(struct acpi_device *device) - if (product && strlen(product) > 4) - switch (product[4]) { - case '5': -+ if (strlen(product) > 5) -+ switch (product[5]) { -+ case 'N': -+ year = 2021; -+ break; -+ case '0': -+ year = 2016; -+ break; -+ default: -+ year = 2022; -+ } -+ break; - case '6': - year = 2016; - break; --- -2.34.1 - diff --git a/x86-ACPI-State-Optimize-C3-entry-on-AMD-CPUs.patch b/x86-ACPI-State-Optimize-C3-entry-on-AMD-CPUs.patch deleted file mode 100644 index ef0789b20f37..000000000000 --- a/x86-ACPI-State-Optimize-C3-entry-on-AMD-CPUs.patch +++ /dev/null @@ -1,58 +0,0 @@ -From: Deepak Sharma <deepak.sharma@amd.com> -To: <deepak.sharma@amd.com> -CC: "Rafael J. Wysocki" <rjw@rjwysocki.net>, - Len Brown <len.brown@intel.com>, Pavel Machek <pavel@ucw.cz>, - Thomas Gleixner <tglx@linutronix.de>, - "Ingo Molnar" <mingo@redhat.com>, Borislav Petkov <bp@alien8.de>, - "maintainer:X86 ARCHITECTURE (32-BIT AND 64-BIT)" <x86@kernel.org>, - "H. Peter Anvin" <hpa@zytor.com>, - "open list:SUSPEND TO RAM" <linux-pm@vger.kernel.org>, - "open list:X86 ARCHITECTURE (32-BIT AND 64-BIT)" - <linux-kernel@vger.kernel.org> -Subject: [PATCH] x86/ACPI/State: Optimize C3 entry on AMD CPUs -Date: Wed, 18 Aug 2021 17:43:05 -0700 -Message-ID: <20210819004305.20203-1-deepak.sharma@amd.com> -List-ID: <linux-kernel.vger.kernel.org> -X-Mailing-List: linux-kernel@vger.kernel.org -List-Archive: <https://lore.kernel.org/lkml/> - -AMD CPU which support C3 shares cache. Its not necessary to flush the -caches in software before entering C3. This will cause performance drop -for the cores which share some caches. ARB_DIS is not used with current -AMD C state implementation. So set related flags correctly. - -Signed-off-by: Deepak Sharma <deepak.sharma@amd.com> ---- - arch/x86/kernel/acpi/cstate.c | 15 +++++++++++++++ - 1 file changed, 15 insertions(+) - -diff --git a/arch/x86/kernel/acpi/cstate.c b/arch/x86/kernel/acpi/cstate.c -index 7de599eba7f0..62a5986d625a 100644 ---- a/arch/x86/kernel/acpi/cstate.c -+++ b/arch/x86/kernel/acpi/cstate.c -@@ -79,6 +79,21 @@ void acpi_processor_power_init_bm_check(struct acpi_processor_flags *flags, - */ - flags->bm_control = 0; - } -+ if (c->x86_vendor == X86_VENDOR_AMD) { -+ /* -+ * For all AMD CPUs that support C3, caches should not be -+ * flushed by software while entering C3 type state. Set -+ * bm->check to 1 so that kernel doesn't need to execute -+ * cache flush operation. -+ */ -+ flags->bm_check = 1; -+ /* -+ * In current AMD C state implementation ARB_DIS is no longer -+ * used. So set bm_control to zero to indicate ARB_DIS is not -+ * required while entering C3 type state. -+ */ -+ flags->bm_control = 0; -+ } - } - EXPORT_SYMBOL(acpi_processor_power_init_bm_check); - --- -2.25.1 - - diff --git a/x86-change-default-to-spec_store_bypass_disable-prct.patch b/x86-change-default-to-spec_store_bypass_disable-prct.patch deleted file mode 100644 index a47f845e580b..000000000000 --- a/x86-change-default-to-spec_store_bypass_disable-prct.patch +++ /dev/null @@ -1,255 +0,0 @@ -From 1d4c03c1d9ea9ab1ee7ab0efbd05eec71b6a92d5 Mon Sep 17 00:00:00 2001 -From: Andrea Arcangeli <aarcange@redhat.com> -Date: Wed, 4 Nov 2020 18:50:54 -0500 -Subject: [PATCH] x86: change default to spec_store_bypass_disable=prctl - spectre_v2_user=prctl - -Switch the kernel default of SSBD and STIBP to the ones with -CONFIG_SECCOMP=n (i.e. spec_store_bypass_disable=prctl -spectre_v2_user=prctl) even if CONFIG_SECCOMP=y. - -Several motivations listed below: - -- If SMT is enabled the seccomp jail can still attack the rest of the - system even with spectre_v2_user=seccomp by using MDS-HT (except on - XEON PHI where MDS can be tamed with SMT left enabled, but that's a - special case). Setting STIBP become a very expensive window dressing - after MDS-HT was discovered. - -- The seccomp jail cannot attack the kernel with spectre-v2-HT - regardless (even if STIBP is not set), but with MDS-HT the seccomp - jail can attack the kernel too. - -- With spec_store_bypass_disable=prctl the seccomp jail can attack the - other userland (guest or host mode) using spectre-v2-HT, but the - userland attack is already mitigated by both ASLR and pid namespaces - for host userland and through virt isolation with libkrun or - kata. (if something if somebody is worried about spectre-v2-HT it's - best to mount proc with hidepid=2,gid=proc on workstations where not - all apps may run under container runtimes, rather than slowing down - all seccomp jails, but the best is to add pid namespaces to the - seccomp jail). As opposed MDS-HT is not mitigated and the seccomp - jail can still attack all other host and guest userland if SMT is - enabled even with spec_store_bypass_disable=seccomp. - -- If full security is required then MDS-HT must also be mitigated with - nosmt and then spectre_v2_user=prctl and spectre_v2_user=seccomp - would become identical. - -- Setting spectre_v2_user=seccomp is overall lower priority than to - setting javascript.options.wasm false in about:config to protect - against remote wasm MDS-HT, instead of worrying about Spectre-v2-HT - and STIBP which again is already statistically well mitigated by - other means in userland and it's fully mitigated in kernel with - retpolines (unlike the wasm assist call with MDS-HT). - -- SSBD is needed to prevent reading the JIT memory and the primary - user being the OpenJDK. However the primary user of SSBD wouldn't be - covered by spec_store_bypass_disable=seccomp because it doesn't use - seccomp and the primary user also explicitly declined to set - PR_SET_SPECULATION_CTRL+PR_SPEC_STORE_BYPASS despite it easily - could. In fact it would need to set it only when the sandboxing - mechanism is enabled for javaws applets, but it still declined it by - declaring security within the same user address space as an - untenable objective for their JIT, even in the sandboxing case where - performance would be a lesser concern (for the record: I kind of - disagree in not setting PR_SPEC_STORE_BYPASS in the sandbox case and - I prefer to run javaws through a wrapper that sets - PR_SPEC_STORE_BYPASS if I need). In turn it can be inferred that - even if the primary user of SSBD would use seccomp, they would - invoke it with SECCOMP_FILTER_FLAG_SPEC_ALLOW by now. - -- runc/crun already set SECCOMP_FILTER_FLAG_SPEC_ALLOW by default, k8s - and podman have a default json seccomp allowlist that cannot be - slowed down, so for the #1 seccomp user this change is already a - noop. - -- systemd/sshd or other apps that use seccomp, if they really need - STIBP or SSBD, they need to explicitly set the - PR_SET_SPECULATION_CTRL by now. The stibp/ssbd seccomp blind - catch-all approach was done probably initially with a wishful - thinking objective to pretend to have a peace of mind that it could - magically fix it all. That was wishful thinking before MDS-HT was - discovered, but after MDS-HT has been discovered it become just - window dressing. - -- For qemu "-sandbox" seccomp jail it wouldn't make sense to set STIBP - or SSBD. SSBD doesn't help with KVM because there's no JIT (if it's - needed with TCG it should be an opt-in with - PR_SET_SPECULATION_CTRL+PR_SPEC_STORE_BYPASS and it shouldn't - slowdown KVM for nothing). For qemu+KVM STIBP would be even more - window dressing than it is for all other apps, because in the - qemu+KVM case there's not only the MDS attack to worry about with - SMT enabled. Even after disabling SMT, there's still a theoretical - spectre-v2 attack possible within the same thread context from guest - mode to host ring3 that the host kernel retpoline mitigation has no - theoretical chance to mitigate. On some kernels a - ibrs-always/ibrs-retpoline opt-in model is provided that will - enabled IBRS in the qemu host ring3 userland which fixes this - theoretical concern. Only after enabling IBRS in the host userland - it would then make sense to proceed and worry about STIBP and an - attack on the other host userland, but then again SMT would need to - be disabled for full security anyway, so that would render STIBP - again a noop. - -- last but not the least: the lack of "spec_store_bypass_disable=prctl - spectre_v2_user=prctl" means the moment a guest boots and - sshd/systemd runs, the guest kernel will write to SPEC_CTRL MSR - which will make the guest vmexit forever slower, forcing KVM to - issue a very slow rdmsr instruction at every vmexit. So the end - result is that SPEC_CTRL MSR is only available in GCE. Most other - public cloud providers don't expose SPEC_CTRL, which means that not - only STIBP/SSBD isn't available, but IBPB isn't available either - (which would cause no overhead to the guest or the hypervisor - because it's write only and requires no reading during vmexit). So - the current default already net loss in security (missing IBPB) - which means most public cloud providers cannot achieve a fully - secure guest with nosmt (and nosmt is enough to fully mitigate - MDS-HT). It also means GCE and is unfairly penalized in performance - because it provides the option to enable full security in the guest - as an opt-in (i.e. nosmt and IBPB). So this change will allow all - cloud providers to expose SPEC_CTRL without incurring into any - hypervisor slowdown and at the same time it will remove the unfair - penalization of GCE performance for doing the right thing and it'll - allow to get full security with nosmt with IBPB being available (and - STIBP becoming meaningless). - -Example to put things in prospective: the STIBP enabled in seccomp has -never been about protecting apps using seccomp like sshd from an -attack from a malicious userland, but to the contrary it has always -been about protecting the system from an attack from sshd, after a -successful remote network exploit against sshd. In fact initially it -wasn't obvious STIBP would work both ways (STIBP was about preventing -the task that runs with STIBP to be attacked with spectre-v2-HT, but -accidentally in the STIBP case it also prevents the attack in the -other direction). In the hypothetical case that sshd has been remotely -exploited the last concern should be STIBP being set, because it'll be -still possible to obtain info even from the kernel by using MDS if -nosmt wasn't set (and if it was set, STIBP is a noop in the first -place). As opposed kernel cannot leak anything with spectre-v2 HT -because of retpolines and the userland is mitigated by ASLR already -and ideally PID namespaces too. If something it'd be worth checking if -sshd run the seccomp thread under pid namespaces too if available in -the running kernel. SSBD also would be a noop for sshd, since sshd -uses no JIT. If sshd prefers to keep doing the STIBP window dressing -exercise, it still can even after this change of defaults by opting-in -with PR_SPEC_INDIRECT_BRANCH. - -Ultimately setting SSBD and STIBP by default for all seccomp jails is -a bad sweet spot and bad default with more cons than pros that end up -reducing security in the public cloud (by giving an huge incentive to -not expose SPEC_CTRL which would be needed to get full security with -IBPB after setting nosmt in the guest) and by excessively hurting -performance to more secure apps using seccomp that end up having to -opt out with SECCOMP_FILTER_FLAG_SPEC_ALLOW. - -The following is the verified result of the new default with SMT -enabled: - -(gdb) print spectre_v2_user_stibp -$1 = SPECTRE_V2_USER_PRCTL -(gdb) print spectre_v2_user_ibpb -$2 = SPECTRE_V2_USER_PRCTL -(gdb) print ssb_mode -$3 = SPEC_STORE_BYPASS_PRCTL - -Signed-off-by: Andrea Arcangeli <aarcange@redhat.com> -Signed-off-by: Kees Cook <keescook@chromium.org> -Link: https://lore.kernel.org/r/20201104235054.5678-1-aarcange@redhat.com -Acked-by: Josh Poimboeuf <jpoimboe@redhat.com> -Link: https://lore.kernel.org/lkml/AAA2EF2C-293D-4D5B-BFA6-FF655105CD84@redhat.com -Acked-by: Waiman Long <longman@redhat.com> -Link: https://lore.kernel.org/lkml/c0722838-06f7-da6b-138f-e0f26362f16a@redhat.com ---- - Documentation/admin-guide/hw-vuln/spectre.rst | 10 ++++------ - Documentation/admin-guide/kernel-parameters.txt | 5 ++--- - arch/x86/kernel/cpu/bugs.c | 4 ++-- - 3 files changed, 8 insertions(+), 11 deletions(-) - -diff --git a/Documentation/admin-guide/hw-vuln/spectre.rst b/Documentation/admin-guide/hw-vuln/spectre.rst -index e05e581af5cf..19b897cb1d45 100644 ---- a/Documentation/admin-guide/hw-vuln/spectre.rst -+++ b/Documentation/admin-guide/hw-vuln/spectre.rst -@@ -490,9 +490,8 @@ Spectre variant 2 - - Restricting indirect branch speculation on a user program will - also prevent the program from launching a variant 2 attack -- on x86. All sand-boxed SECCOMP programs have indirect branch -- speculation restricted by default. Administrators can change -- that behavior via the kernel command line and sysfs control files. -+ on x86. Administrators can change that behavior via the kernel -+ command line and sysfs control files. - See :ref:`spectre_mitigation_control_command_line`. - - Programs that disable their indirect branch speculation will have -@@ -674,9 +673,8 @@ Mitigation selection guide - off by disabling their indirect branch speculation when they are run - (See :ref:`Documentation/userspace-api/spec_ctrl.rst <set_spec_ctrl>`). - This prevents untrusted programs from polluting the branch target -- buffer. All programs running in SECCOMP sandboxes have indirect -- branch speculation restricted by default. This behavior can be -- changed via the kernel command line and sysfs control files. See -+ buffer. This behavior can be changed via the kernel command line -+ and sysfs control files. See - :ref:`spectre_mitigation_control_command_line`. - - 3. High security mode -diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt -index bdb22006f713..b558005780d3 100644 ---- a/Documentation/admin-guide/kernel-parameters.txt -+++ b/Documentation/admin-guide/kernel-parameters.txt -@@ -5265,8 +5265,7 @@ - auto - Kernel selects the mitigation depending on - the available CPU features and vulnerability. - -- Default mitigation: -- If CONFIG_SECCOMP=y then "seccomp", otherwise "prctl" -+ Default mitigation: "prctl" - - Not specifying this option is equivalent to - spectre_v2_user=auto. -@@ -5310,7 +5309,7 @@ - will disable SSB unless they explicitly opt out. - - Default mitigations: -- X86: If CONFIG_SECCOMP=y "seccomp", otherwise "prctl" -+ X86: "prctl" - - On powerpc the options are: - -diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c -index d41b70fe4918..8feb866623d0 100644 ---- a/arch/x86/kernel/cpu/bugs.c -+++ b/arch/x86/kernel/cpu/bugs.c -@@ -721,11 +721,11 @@ spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd) - case SPECTRE_V2_USER_CMD_FORCE: - mode = SPECTRE_V2_USER_STRICT; - break; -+ case SPECTRE_V2_USER_CMD_AUTO: - case SPECTRE_V2_USER_CMD_PRCTL: - case SPECTRE_V2_USER_CMD_PRCTL_IBPB: - mode = SPECTRE_V2_USER_PRCTL; - break; -- case SPECTRE_V2_USER_CMD_AUTO: - case SPECTRE_V2_USER_CMD_SECCOMP: - case SPECTRE_V2_USER_CMD_SECCOMP_IBPB: - if (IS_ENABLED(CONFIG_SECCOMP)) -@@ -1132,7 +1132,6 @@ static enum ssb_mitigation __init __ssb_select_mitigation(void) - return mode; - - switch (cmd) { -- case SPEC_STORE_BYPASS_CMD_AUTO: - case SPEC_STORE_BYPASS_CMD_SECCOMP: - /* - * Choose prctl+seccomp as the default mode if seccomp is -@@ -1146,6 +1145,7 @@ static enum ssb_mitigation __init __ssb_select_mitigation(void) - case SPEC_STORE_BYPASS_CMD_ON: - mode = SPEC_STORE_BYPASS_DISABLE; - break; -+ case SPEC_STORE_BYPASS_CMD_AUTO: - case SPEC_STORE_BYPASS_CMD_PRCTL: - mode = SPEC_STORE_BYPASS_PRCTL; - break; --- -2.33.1 - diff --git a/zstd-udpate-fixes.patch b/zstd-udpate-fixes.patch deleted file mode 100644 index 70310ffd8409..000000000000 --- a/zstd-udpate-fixes.patch +++ /dev/null @@ -1,241 +0,0 @@ -From 9a8cd176f6ee90a3605d5dab75a4ab778e73f6f0 Mon Sep 17 00:00:00 2001 -From: Scott B <arglebargle@arglebargle.dev> -Date: Sat, 20 Nov 2021 12:14:56 -0800 -Subject: [PATCH] zstd udpate fixes - -Squashed commit of the following: - -commit 82290fbdaffe7519c8451d5a8fe20af6faddd665 -Author: Nick Terrell <terrelln@fb.com> -Date: Tue Nov 16 15:11:39 2021 -0800 - - lib: zstd: Don't add -O3 to cflags - - After the update to zstd-1.4.10 passing -O3 is no longer necessary to - get good performance from zstd. Using the default optimization level -O2 - is sufficient to get good performance. - - I've measured no significant change to compression speed, and a ~1% - decompression speed loss, which is acceptable. - - This fixes the reported parisc -Wframe-larger-than=1536 errors [0]. The - gcc-8-hppa-linux-gnu compiler performed very poorly with -O3, generating - stacks that are ~3KB. With -O2 these same functions generate stacks in - the < 100B, completely fixing the problem. Function size deltas are - listed below: - - ZSTD_compressBlock_fast_extDict_generic: 3800 -> 68 - ZSTD_compressBlock_fast: 2216 -> 40 - ZSTD_compressBlock_fast_dictMatchState: 1848 -> 64 - ZSTD_compressBlock_doubleFast_extDict_generic: 3744 -> 76 - ZSTD_fillDoubleHashTable: 3252 -> 0 - ZSTD_compressBlock_doubleFast: 5856 -> 36 - ZSTD_compressBlock_doubleFast_dictMatchState: 5380 -> 84 - ZSTD_copmressBlock_lazy2: 2420 -> 72 - - Additionally, this improves the reported code bloat [1]. With gcc-11 - bloat-o-meter shows an 80KB code size improvement: - - ``` - > ../scripts/bloat-o-meter vmlinux.old vmlinux - add/remove: 31/8 grow/shrink: 24/155 up/down: 25734/-107924 (-82190) - Total: Before=6418562, After=6336372, chg -1.28% - ``` - - Compared to before the zstd-1.4.10 update we see a total code size - regression of 105KB, down from 374KB at v5.16-rc1: - - ``` - > ../scripts/bloat-o-meter vmlinux.old vmlinux - add/remove: 292/62 grow/shrink: 56/88 up/down: 235009/-127487 (107522) - Total: Before=6228850, After=6336372, chg +1.73% - ``` - - [0] https://lkml.org/lkml/2021/11/15/710 - [1] https://lkml.org/lkml/2021/11/14/189 - - Link: https://lore.kernel.org/r/20211117014949.1169186-4-nickrterrell@gmail.com/ - Link: https://lore.kernel.org/r/20211117201459.1194876-4-nickrterrell@gmail.com/ - - Reported-by: Geert Uytterhoeven <geert@linux-m68k.org> - Tested-by: Geert Uytterhoeven <geert@linux-m68k.org> - Reviewed-by: Geert Uytterhoeven <geert@linux-m68k.org> - Signed-off-by: Nick Terrell <terrelln@fb.com> - -commit e764e672bf43ad4a3a3a85e79b267daaad406990 -Author: Nick Terrell <terrelln@fb.com> -Date: Mon Nov 15 20:33:08 2021 -0800 - - lib: zstd: Don't inline functions in zstd_opt.c - - `zstd_opt.c` contains the match finder for the highest compression - levels. These levels are already very slow, and are unlikely to be used - in the kernel. If they are used, they shouldn't be used in latency - sensitive workloads, so slowing them down shouldn't be a big deal. - - This saves 188 KB of the 288 KB regression reported by Geert Uytterhoeven [0]. - I've also opened an issue upstream [1] so that we can properly tackle - the code size issue in `zstd_opt.c` for all users, and can hopefully - remove this hack in the next zstd version we import. - - Bloat-o-meter output on x86-64: - - ``` - > ../scripts/bloat-o-meter vmlinux.old vmlinux - add/remove: 6/5 grow/shrink: 1/9 up/down: 16673/-209939 (-193266) - Function old new delta - ZSTD_compressBlock_opt_generic.constprop - 7559 +7559 - ZSTD_insertBtAndGetAllMatches - 6304 +6304 - ZSTD_insertBt1 - 1731 +1731 - ZSTD_storeSeq - 693 +693 - ZSTD_BtGetAllMatches - 255 +255 - ZSTD_updateRep - 128 +128 - ZSTD_updateTree 96 99 +3 - ZSTD_insertAndFindFirstIndexHash3 81 - -81 - ZSTD_setBasePrices.constprop 98 - -98 - ZSTD_litLengthPrice.constprop 138 - -138 - ZSTD_count 362 181 -181 - ZSTD_count_2segments 1407 938 -469 - ZSTD_insertBt1.constprop 2689 - -2689 - ZSTD_compressBlock_btultra2 19990 423 -19567 - ZSTD_compressBlock_btultra 19633 15 -19618 - ZSTD_initStats_ultra 19825 - -19825 - ZSTD_compressBlock_btopt 20374 12 -20362 - ZSTD_compressBlock_btopt_extDict 29984 12 -29972 - ZSTD_compressBlock_btultra_extDict 30718 15 -30703 - ZSTD_compressBlock_btopt_dictMatchState 32689 12 -32677 - ZSTD_compressBlock_btultra_dictMatchState 33574 15 -33559 - Total: Before=6611828, After=6418562, chg -2.92% - ``` - - [0] https://lkml.org/lkml/2021/11/14/189 - [1] https://github.com/facebook/zstd/issues/2862 - - Link: https://lore.kernel.org/r/20211117014949.1169186-3-nickrterrell@gmail.com/ - Link: https://lore.kernel.org/r/20211117201459.1194876-3-nickrterrell@gmail.com/ - - Reported-by: Geert Uytterhoeven <geert@linux-m68k.org> - Tested-by: Geert Uytterhoeven <geert@linux-m68k.org> - Reviewed-by: Geert Uytterhoeven <geert@linux-m68k.org> - Signed-off-by: Nick Terrell <terrelln@fb.com> - -commit e5fa8f8334f188f496e02c0c5dcb1f9234fec2fb -Author: Nick Terrell <terrelln@fb.com> -Date: Mon Nov 15 19:08:19 2021 -0800 - - lib: zstd: Fix unused variable warning - - The variable `litLengthSum` is only used by an `assert()`, so when - asserts are disabled the compiler doesn't see any usage and warns. - - This issue is already fixed upstream by PR #2838 [0]. It was reported - by the Kernel test robot in [1]. - - Another approach would be to change zstd's disabled `assert()` - definition to use the argument in a disabled branch, instead of - ignoring the argument. I've avoided this approach because there are - some small changes necessary to get zstd to build, and I would - want to thoroughly re-test for performance, since that is slightly - changing the code in every function in zstd. It seems like a - trivial change, but some functions are pretty sensitive to small - changes. However, I think it is a valid approach that I would - like to see upstream take, so I've opened Issue #2868 to attempt - this upstream. - - Lastly, I've chosen not to use __maybe_unused because all code - in lib/zstd/ must eventually be upstreamed. Upstream zstd can't - use __maybe_unused because it isn't portable across all compilers. - - [0] https://github.com/facebook/zstd/pull/2838 - [1] https://lore.kernel.org/linux-mm/202111120312.833wII4i-lkp@intel.com/T/ - [2] https://github.com/facebook/zstd/issues/2868 - - Link: https://lore.kernel.org/r/20211117014949.1169186-2-nickrterrell@gmail.com/ - Link: https://lore.kernel.org/r/20211117201459.1194876-2-nickrterrell@gmail.com/ - - Reported-by: kernel test robot <lkp@intel.com> - Signed-off-by: Nick Terrell <terrelln@fb.com> ---- - lib/zstd/Makefile | 2 -- - lib/zstd/common/compiler.h | 7 +++++++ - lib/zstd/compress/zstd_compress_superblock.c | 2 ++ - lib/zstd/compress/zstd_opt.c | 12 ++++++++++++ - 4 files changed, 21 insertions(+), 2 deletions(-) - -diff --git a/lib/zstd/Makefile b/lib/zstd/Makefile -index 65218ec5b8f2..fc45339fc3a3 100644 ---- a/lib/zstd/Makefile -+++ b/lib/zstd/Makefile -@@ -11,8 +11,6 @@ - obj-$(CONFIG_ZSTD_COMPRESS) += zstd_compress.o - obj-$(CONFIG_ZSTD_DECOMPRESS) += zstd_decompress.o - --ccflags-y += -O3 -- - zstd_compress-y := \ - zstd_compress_module.o \ - common/debug.o \ -diff --git a/lib/zstd/common/compiler.h b/lib/zstd/common/compiler.h -index a1a051e4bce6..f5a9c70a228a 100644 ---- a/lib/zstd/common/compiler.h -+++ b/lib/zstd/common/compiler.h -@@ -16,6 +16,7 @@ - *********************************************************/ - /* force inlining */ - -+#if !defined(ZSTD_NO_INLINE) - #if (defined(__GNUC__) && !defined(__STRICT_ANSI__)) || defined(__cplusplus) || defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */ - # define INLINE_KEYWORD inline - #else -@@ -24,6 +25,12 @@ - - #define FORCE_INLINE_ATTR __attribute__((always_inline)) - -+#else -+ -+#define INLINE_KEYWORD -+#define FORCE_INLINE_ATTR -+ -+#endif - - /* - On MSVC qsort requires that functions passed into it use the __cdecl calling conversion(CC). -diff --git a/lib/zstd/compress/zstd_compress_superblock.c b/lib/zstd/compress/zstd_compress_superblock.c -index ee03e0aedb03..b0610b255653 100644 ---- a/lib/zstd/compress/zstd_compress_superblock.c -+++ b/lib/zstd/compress/zstd_compress_superblock.c -@@ -411,6 +411,8 @@ static size_t ZSTD_seqDecompressedSize(seqStore_t const* seqStore, const seqDef* - const seqDef* sp = sstart; - size_t matchLengthSum = 0; - size_t litLengthSum = 0; -+ /* Only used by assert(), suppress unused variable warnings in production. */ -+ (void)litLengthSum; - while (send-sp > 0) { - ZSTD_sequenceLength const seqLen = ZSTD_getSequenceLength(seqStore, sp); - litLengthSum += seqLen.litLength; -diff --git a/lib/zstd/compress/zstd_opt.c b/lib/zstd/compress/zstd_opt.c -index 70adc7c2cc5e..09483f518dc3 100644 ---- a/lib/zstd/compress/zstd_opt.c -+++ b/lib/zstd/compress/zstd_opt.c -@@ -8,6 +8,18 @@ - * You may select, at your option, one of the above-listed licenses. - */ - -+/* -+ * Disable inlining for the optimal parser for the kernel build. -+ * It is unlikely to be used in the kernel, and where it is used -+ * latency shouldn't matter because it is very slow to begin with. -+ * We prefer a ~180KB binary size win over faster optimal parsing. -+ * -+ * TODO(https://github.com/facebook/zstd/issues/2862): -+ * Improve the code size of the optimal parser in general, so we -+ * don't need this hack for the kernel build. -+ */ -+#define ZSTD_NO_INLINE 1 -+ - #include "zstd_compress_internal.h" - #include "hist.h" - #include "zstd_opt.h" --- -2.34.0 - |