diff options
author | Scott B | 2021-11-19 11:36:42 -0800 |
---|---|---|
committer | Scott B | 2021-11-19 11:36:42 -0800 |
commit | dc9629e44241e52ccea16daba7137098b7a71e98 (patch) | |
tree | 3d595b039db024f82809cf655e4f4dd72cced4f3 | |
parent | afa1356c68cfa51f8607b5d5df9b9bb824fb4c35 (diff) | |
download | aur-dc9629e44241e52ccea16daba7137098b7a71e98.tar.gz |
patch: update amd-pstate to v4
-rw-r--r-- | .SRCINFO | 4 | ||||
-rw-r--r-- | PKGBUILD | 4 | ||||
-rw-r--r-- | revert-amd-pstate-v3.patch | 2047 | ||||
-rw-r--r-- | squashed-amd-pstate-v4-for-5.15.patch | 2403 |
4 files changed, 4458 insertions, 0 deletions
@@ -22,6 +22,8 @@ pkgbase = linux-xanmod-rog source = choose-gcc-optimization.sh source = sphinx-workaround.patch source = https://cdn.kernel.org/pub/linux/kernel/v5.x/incr/patch-5.15.2-3.xz + source = revert-amd-pstate-v3.patch + source = squashed-amd-pstate-v4-for-5.15.patch source = squashed-mm-multigenerational-lru-v5-for-5.15.y.patch source = x86-ACPI-State-Optimize-C3-entry-on-AMD-CPUs.patch source = acpi-battery-Always-read-fresh-battery-state-on-update.patch @@ -50,6 +52,8 @@ pkgbase = linux-xanmod-rog sha256sums = 1ac18cad2578df4a70f9346f7c6fccbb62f042a0ee0594817fdef9f2704904ee sha256sums = 52fc0fcd806f34e774e36570b2a739dbdf337f7ff679b1c1139bee54d03301eb sha256sums = 074f5dd036079f81fcc4a239b6d159528fa22a57e06d5658f8b5c4970de65c26 + sha256sums = 12d78853f582ccf8027dbd2cbc27b0fcde106202958c6be31054815bae7752ae + sha256sums = 8592fc434cc7d52b0fefe478de19d0af3d7e06406d8d09bd1bc85e6805738e68 sha256sums = 7675116fc7da55cfb9c6ceaaf120c5ebd285a13b1f1641ca810dd05516c88e0b sha256sums = 923230ed8367e28adfdeed75d3cdba9eec6b781818c37f6f3d3eb64101d2e716 sha256sums = f7a4bf6293912bfc4a20743e58a5a266be8c4dbe3c1862d196d3a3b45f2f7c90 @@ -124,6 +124,8 @@ source=("https://cdn.kernel.org/pub/linux/kernel/v${_branch}/linux-${_major}.tar #"Linux-5.15.3-rc4.patch" # amd-pstate v3 included in Xanmod + "revert-amd-pstate-v3.patch" + "squashed-amd-pstate-v4-for-5.15.patch" # multigenerational lru v5 "squashed-mm-multigenerational-lru-v5-for-5.15.y.patch" @@ -180,6 +182,8 @@ sha256sums=('57b2cf6991910e3b67a1b3490022e8a0674b6965c74c12da1e99d138d1991ee8' '1ac18cad2578df4a70f9346f7c6fccbb62f042a0ee0594817fdef9f2704904ee' '52fc0fcd806f34e774e36570b2a739dbdf337f7ff679b1c1139bee54d03301eb' '074f5dd036079f81fcc4a239b6d159528fa22a57e06d5658f8b5c4970de65c26' + '12d78853f582ccf8027dbd2cbc27b0fcde106202958c6be31054815bae7752ae' + '8592fc434cc7d52b0fefe478de19d0af3d7e06406d8d09bd1bc85e6805738e68' '7675116fc7da55cfb9c6ceaaf120c5ebd285a13b1f1641ca810dd05516c88e0b' '923230ed8367e28adfdeed75d3cdba9eec6b781818c37f6f3d3eb64101d2e716' 'f7a4bf6293912bfc4a20743e58a5a266be8c4dbe3c1862d196d3a3b45f2f7c90' diff --git a/revert-amd-pstate-v3.patch b/revert-amd-pstate-v3.patch new file mode 100644 index 000000000000..6df995e26b93 --- /dev/null +++ b/revert-amd-pstate-v3.patch @@ -0,0 +1,2047 @@ +From d7cc6b447c87327feeca43c5c14748f2a99731a6 Mon Sep 17 00:00:00 2001 +From: Scott B <arglebargle@arglebargle.dev> +Date: Fri, 19 Nov 2021 11:33:29 -0800 +Subject: [PATCH] revert amd-pstate v3 + +Squashed commit of the following: + +commit bf68016ac8eebc0877b66d1799b82d3b72140a0e +Author: Scott B <arglebargle@arglebargle.dev> +Date: Fri Nov 19 11:33:12 2021 -0800 + + Revert "x86/cpufreatures: add AMD Collaborative Processor Performance Control feature flag" + + This reverts commit a00d065f4b06d01582052b774050f3804f696a75. + +commit 612abb91d31e37db6d3df0875b3f0b37423f23b3 +Author: Scott B <arglebargle@arglebargle.dev> +Date: Fri Nov 19 11:32:48 2021 -0800 + + Revert "x86/msr: add AMD CPPC MSR definitions" + + This reverts commit 5abba2f85d7e070decf56ef35ce6a414a0a8669b. + +commit 47d20a0cfe55618878200a8cb372d1ebc1dd6632 +Author: Scott B <arglebargle@arglebargle.dev> +Date: Fri Nov 19 11:32:47 2021 -0800 + + Revert "ACPI: CPPC: implement support for SystemIO registers" + + This reverts commit 006bf956420e1a115671ffc65b8e4f8d3d9ca401. + +commit 5ab391b98f8c5c1762cfae82a7aa03893426726a +Author: Scott B <arglebargle@arglebargle.dev> +Date: Fri Nov 19 11:32:46 2021 -0800 + + Revert "ACPI: CPPC: Check present CPUs for determining _CPC is valid" + + This reverts commit 281d2f12565255e4b6ec89860e3991ed50baed63. + +commit 301948dc50b835a8998a0e4e0957583d1236325f +Author: Scott B <arglebargle@arglebargle.dev> +Date: Fri Nov 19 11:32:45 2021 -0800 + + Revert "ACPI: CPPC: add cppc enable register function" + + This reverts commit 507e3fa01595c1a1ba29c6100b086d5f393b0279. + +commit 5fca2c8c0aabcd40a3ad46f598e40526d3f3a491 +Author: Scott B <arglebargle@arglebargle.dev> +Date: Fri Nov 19 11:32:44 2021 -0800 + + Revert "cpufreq: amd: introduce a new amd pstate driver to support future processors" + + This reverts commit 7d9f5592ddfd7bf536b3ece185f6bc8ae3f19dd6. + +commit c601c1f849abc6a8a29098c72fccf5e4e8802710 +Author: Scott B <arglebargle@arglebargle.dev> +Date: Fri Nov 19 11:32:43 2021 -0800 + + Revert "cpufreq: amd: add fast switch function for amd-pstate" + + This reverts commit 57448d13ce9afbd5672ea397439d2c60d6ff4415. + +commit 2902551e5ccb26b5eb7710570a1860fd27be8e93 +Author: Scott B <arglebargle@arglebargle.dev> +Date: Fri Nov 19 11:32:42 2021 -0800 + + Revert "cpufreq: amd: add acpi cppc function as the backend for legacy processors" + + This reverts commit a97ebea71a980752c4852641dee71c1fc60a790a. + +commit 0eaa9aa7ac20eb7b6974eb0e9011cb96f4df7dc4 +Author: Scott B <arglebargle@arglebargle.dev> +Date: Fri Nov 19 11:32:41 2021 -0800 + + Revert "cpufreq: amd: add trace for amd-pstate module" + + This reverts commit de13cc00ce1dc1affd4029461320d7d252b209b6. + +commit a28477b5740fb6187ef4dd414dfc9170ab6fc040 +Author: Scott B <arglebargle@arglebargle.dev> +Date: Fri Nov 19 11:32:40 2021 -0800 + + Revert "cpufreq: amd: add boost mode support for amd-pstate" + + This reverts commit 9f1e5af9f6c16364167edae13cdac877742027c6. + +commit 7afe8a41a8a8f675d5451fb395c4f47430812519 +Author: Scott B <arglebargle@arglebargle.dev> +Date: Fri Nov 19 11:32:38 2021 -0800 + + Revert "cpufreq: amd: add amd-pstate frequencies attributes" + + This reverts commit dd80c527df00ada7308e0bb3d3919622281ae180. + +commit 81161d18950f14278da7e0fec1a1529dc15964c6 +Author: Scott B <arglebargle@arglebargle.dev> +Date: Fri Nov 19 11:32:37 2021 -0800 + + Revert "cpufreq: amd: add amd-pstate performance attributes" + + This reverts commit b89d41fa5e10df868a29444749b3e9811474697a. + +commit 6b45d3c413cf0342c572196a26f13f8070161c85 +Author: Scott B <arglebargle@arglebargle.dev> +Date: Fri Nov 19 11:32:36 2021 -0800 + + Revert "cpupower: add AMD P-state capability flag" + + This reverts commit 2be733db76f7b0d713f09c13e6c21dd168d4101d. + +commit fa55160194681aaee9151aa572427ba7c70bb68b +Author: Scott B <arglebargle@arglebargle.dev> +Date: Fri Nov 19 11:32:34 2021 -0800 + + Revert "cpupower: add the function to check amd-pstate enabled" + + This reverts commit 3223a43bae34c0db8c83798134170aabdfce656b. + +commit 7a273997a09314ebafa6fc7abe6f708a135fd3cc +Author: Scott B <arglebargle@arglebargle.dev> +Date: Fri Nov 19 11:32:32 2021 -0800 + + Revert "cpupower: initial AMD P-state capability" + + This reverts commit d5bc8e3e262cadd9cba755a5e83416695756d5e5. + +commit 30b1e6400b4b8a17e8213e2246b0c9478e1785af +Author: Scott B <arglebargle@arglebargle.dev> +Date: Fri Nov 19 11:32:31 2021 -0800 + + Revert "cpupower: add the function to get the sysfs value from specific table" + + This reverts commit d391c3172281e8ae86b298a8d5e6361dc53a6712. + +commit 316005334a8ec73dbc9c3b6c5e16abd0222a1f08 +Author: Scott B <arglebargle@arglebargle.dev> +Date: Fri Nov 19 11:32:31 2021 -0800 + + Revert "cpupower: add amd-pstate sysfs definition and access helper" + + This reverts commit e843b0dcbe9d0ee85e08ce9a7f11dd37a41246f6. + +commit b29429095889ea938ea8ca21629b0d461e07f976 +Author: Scott B <arglebargle@arglebargle.dev> +Date: Fri Nov 19 11:32:30 2021 -0800 + + Revert "cpupower: enable boost state support for amd-pstate module" + + This reverts commit 2e3c1f40a15f5497b5e99f3296afc6f112e59899. + +commit b6dd0dbb5d975f98ef4f46f3527b67a6b5b545c6 +Author: Scott B <arglebargle@arglebargle.dev> +Date: Fri Nov 19 11:32:28 2021 -0800 + + Revert "cpupower: move print_speed function into misc helper" + + This reverts commit 0f68a352aab7ec550ef100f5a6b7f4e2fb52d457. + +commit 894da3dbc15f6c2931fecae8ad910677f9d4bd52 +Author: Scott B <arglebargle@arglebargle.dev> +Date: Fri Nov 19 11:32:26 2021 -0800 + + Revert "cpupower: print amd-pstate information on cpupower" + + This reverts commit 6d3e84c31f5c8873bb02ff2a10bd1fd073500651. + +commit d86216045739ea7f935c82b9b53063848c5a0170 +Author: Scott B <arglebargle@arglebargle.dev> +Date: Fri Nov 19 11:32:18 2021 -0800 + + Revert "Documentation: amd-pstate: add amd-pstate driver introduction" + + This reverts commit d0fe816e6098462c7985381a4096913cb179cbe1. +--- + Documentation/admin-guide/pm/amd-pstate.rst | 372 ---------- + .../admin-guide/pm/working-state.rst | 1 - + arch/x86/include/asm/cpufeatures.h | 1 - + arch/x86/include/asm/msr-index.h | 17 - + drivers/acpi/cppc_acpi.c | 93 +-- + drivers/cpufreq/Kconfig.x86 | 17 - + drivers/cpufreq/Makefile | 1 - + drivers/cpufreq/amd-pstate.c | 663 ------------------ + include/acpi/cppc_acpi.h | 5 - + include/trace/events/power.h | 46 -- + tools/power/cpupower/lib/cpufreq.c | 21 +- + tools/power/cpupower/lib/cpufreq.h | 12 - + tools/power/cpupower/utils/cpufreq-info.c | 68 +- + tools/power/cpupower/utils/helpers/amd.c | 87 --- + tools/power/cpupower/utils/helpers/cpuid.c | 13 - + tools/power/cpupower/utils/helpers/helpers.h | 22 - + tools/power/cpupower/utils/helpers/misc.c | 62 -- + 17 files changed, 61 insertions(+), 1440 deletions(-) + delete mode 100644 Documentation/admin-guide/pm/amd-pstate.rst + delete mode 100644 drivers/cpufreq/amd-pstate.c + +diff --git a/Documentation/admin-guide/pm/amd-pstate.rst b/Documentation/admin-guide/pm/amd-pstate.rst +deleted file mode 100644 +index d9fcfd42f567..000000000000 +--- a/Documentation/admin-guide/pm/amd-pstate.rst ++++ /dev/null +@@ -1,372 +0,0 @@ +-.. SPDX-License-Identifier: GPL-2.0 +-.. include:: <isonum.txt> +- +-=============================================== +-``amd-pstate`` CPU Performance Scaling Driver +-=============================================== +- +-:Copyright: |copy| 2021 Advanced Micro Devices, Inc. +- +-:Author: Huang Rui <ray.huang@amd.com> +- +- +-Introduction +-=================== +- +-``amd-pstate`` is the AMD CPU performance scaling driver that introduces a +-new CPU frequency control mechanism on modern AMD APU and CPU series in +-Linux kernel. The new mechanism is based on Collaborative Processor +-Performance Control (CPPC) which provides finer grain frequency management +-than legacy ACPI hardware P-States. Current AMD CPU/APU platforms are using +-the ACPI P-states driver to manage CPU frequency and clocks with switching +-only in 3 P-states. CPPC replaces the ACPI P-states controls, allows a +-flexible, low-latency interface for the Linux kernel to directly +-communicate the performance hints to hardware. +- +-``amd-pstate`` leverages the Linux kernel governors such as ``schedutil``, +-``ondemand``, etc. to manage the performance hints which are provided by +-CPPC hardware functionality that internally follows the hardware +-specification (for details refer to AMD64 Architecture Programmer's Manual +-Volume 2: System Programming [1]_). Currently ``amd-pstate`` supports basic +-frequency control function according to kernel governors on some of the +-Zen2 and Zen3 processors, and we will implement more AMD specific functions +-in future after we verify them on the hardware and SBIOS. +- +- +-AMD CPPC Overview +-======================= +- +-Collaborative Processor Performance Control (CPPC) interface enumerates a +-continuous, abstract, and unit-less performance value in a scale that is +-not tied to a specific performance state / frequency. This is an ACPI +-standard [2]_ which software can specify application performance goals and +-hints as a relative target to the infrastructure limits. AMD processors +-provides the low latency register model (MSR) instead of AML code +-interpreter for performance adjustments. ``amd-pstate`` will initialize a +-``struct cpufreq_driver`` instance ``amd_pstate_driver`` with the callbacks +-to manage each performance update behavior. :: +- +- Highest Perf ------>+-----------------------+ +-----------------------+ +- | | | | +- | | | | +- | | Max Perf ---->| | +- | | | | +- | | | | +- Nominal Perf ------>+-----------------------+ +-----------------------+ +- | | | | +- | | | | +- | | | | +- | | | | +- | | | | +- | | | | +- | | Desired Perf ---->| | +- | | | | +- | | | | +- | | | | +- | | | | +- | | | | +- | | | | +- | | | | +- | | | | +- | | | | +- Lowest non- | | | | +- linear perf ------>+-----------------------+ +-----------------------+ +- | | | | +- | | Lowest perf ---->| | +- | | | | +- Lowest perf ------>+-----------------------+ +-----------------------+ +- | | | | +- | | | | +- | | | | +- 0 ------>+-----------------------+ +-----------------------+ +- +- AMD P-States Performance Scale +- +- +-.. _perf_cap: +- +-AMD CPPC Performance Capability +--------------------------------- +- +-Highest Performance (RO) +-......................... +- +-It is the absolute maximum performance an individual processor may reach, +-assuming ideal conditions. This performance level may not be sustainable +-for long durations and may only be achievable if other platform components +-are in a specific state; for example, it may require other processors be in +-an idle state. This would be equivalent to the highest frequencies +-supported by the processor. +- +-Nominal (Guaranteed) Performance (RO) +-...................................... +- +-It is the maximum sustained performance level of the processor, assuming +-ideal operating conditions. In absence of an external constraint (power, +-thermal, etc.) this is the performance level the processor is expected to +-be able to maintain continuously. All cores/processors are expected to be +-able to sustain their nominal performance state simultaneously. +- +-Lowest non-linear Performance (RO) +-................................... +- +-It is the lowest performance level at which nonlinear power savings are +-achieved, for example, due to the combined effects of voltage and frequency +-scaling. Above this threshold, lower performance levels should be generally +-more energy efficient than higher performance levels. This register +-effectively conveys the most efficient performance level to ``amd-pstate``. +- +-Lowest Performance (RO) +-........................ +- +-It is the absolute lowest performance level of the processor. Selecting a +-performance level lower than the lowest nonlinear performance level may +-cause an efficiency penalty but should reduce the instantaneous power +-consumption of the processor. +- +-AMD CPPC Performance Control +------------------------------- +- +-``amd-pstate`` passes performance goals through these registers. The +-register drives the behavior of the desired performance target. +- +-Minimum requested performance (RW) +-................................... +- +-``amd-pstate`` specifies the minimum allowed performance level. +- +-Maximum requested performance (RW) +-................................... +- +-``amd-pstate`` specifies a limit the maximum performance that is expected +-to be supplied by the hardware. +- +-Desired performance target (RW) +-................................... +- +-``amd-pstate`` specifies a desired target in the CPPC performance scale as +-a relative number. This can be expressed as percentage of nominal +-performance (infrastructure max). Below the nominal sustained performance +-level, desired performance expresses the average performance level of the +-processor subject to hardware. Above the nominal performance level, +-processor must provide at least nominal performance requested and go higher +-if current operating conditions allow. +- +-Energy Performance Preference (EPP) (RW) +-......................................... +- +-Provides a hint to the hardware if software wants to bias toward performance +-(0x0) or energy efficiency (0xff). +- +- +-Key Governors Support +-======================= +- +-``amd-pstate`` can be used with all the (generic) scaling governors listed +-by the ``scaling_available_governors`` policy attribute in ``sysfs``. Then, +-it is responsible for the configuration of policy objects corresponding to +-CPUs and provides the ``CPUFreq`` core (and the scaling governors attached +-to the policy objects) with accurate information on the maximum and minimum +-operating frequencies supported by the hardware. Users can check the +-``scaling_cur_freq`` information comes from the ``CPUFreq`` core. +- +-``amd-pstate`` mainly supports ``schedutil`` and ``ondemand`` for dynamic +-frequency control. It is to fine tune the processor configuration on +-``amd-pstate`` to the ``schedutil`` with CPU CFS scheduler. ``amd-pstate`` +-registers adjust_perf callback to implement the CPPC similar performance +-update behavior. It is initialized by ``sugov_start`` and then populate the +-CPU's update_util_data pointer to assign ``sugov_update_single_perf`` as +-the utilization update callback function in CPU scheduler. CPU scheduler +-will call ``cpufreq_update_util`` and assign the target performance +-according to the ``struct sugov_cpu`` that utilization update belongs to. +-Then ``amd-pstate`` updates the desired performance according to the CPU +-scheduler assigned. +- +- +-Processor Support +-======================= +- +-The ``amd-pstate`` initialization will fail if the _CPC in ACPI SBIOS is +-not existed at the detected processor, and it uses ``acpi_cpc_valid`` to +-check the _CPC existence. All Zen based processors support legacy ACPI +-hardware P-States function, so while the ``amd-pstate`` fails to be +-initialized, the kernel will fall back to initialize ``acpi-cpufreq`` +-driver. +- +-There are two types of hardware implementations for ``amd-pstate``: one is +-`Full MSR Support <perf_cap_>`_ and another is `Shared Memory Support +-<perf_cap_>`_. It can use :c:macro:`X86_FEATURE_AMD_CPPC` feature flag (for +-details refer to Processor Programming Reference (PPR) for AMD Family +-19h Model 21h, Revision B0 Processors [3]_) to indicate the different +-types. ``amd-pstate`` is to register different ``amd_pstate_perf_funcs`` +-instances for different hardware implementations. +- +-Currently, some of Zen2 and Zen3 processors support ``amd-pstate``. In the +-future, it will be supported on more and more AMD processors. +- +-Full MSR Support +------------------ +- +-Some new Zen3 processors such as Cezanne provide the MSR registers directly +-while the :c:macro:`X86_FEATURE_AMD_CPPC` CPU feature flag is set. +-``amd-pstate`` can handle the MSR register to implement the fast switch +-function in ``CPUFreq`` that can shrink latency of frequency control on the +-interrupt context. +- +-Shared Memory Support +----------------------- +- +-If :c:macro:`X86_FEATURE_AMD_CPPC` CPU feature flag is not set, that means +-the processor supports shared memory solution. In this case, ``amd-pstate`` +-uses the ``cppc_acpi`` helper methods to implement the callback functions +-of ``amd_pstate_perf_funcs``. +- +- +-AMD P-States and ACPI hardware P-States always can be supported in one +-processor. But AMD P-States has the higher priority and if it is enabled +-with :c:macro:`MSR_AMD_CPPC_ENABLE` or ``cppc_set_enable``, it will respond +-to the request from AMD P-States. +- +- +-User Space Interface in ``sysfs`` +-================================== +- +-``amd-pstate`` exposes several global attributes (files) in ``sysfs`` to +-control its functionality at the system level. They located in the +-``/sys/devices/system/cpu/cpufreq/policyX/`` directory and affect all CPUs. :: +- +- root@hr-test1:/home/ray# ls /sys/devices/system/cpu/cpufreq/policy0/*amd* +- /sys/devices/system/cpu/cpufreq/policy0/amd_pstate_highest_perf +- /sys/devices/system/cpu/cpufreq/policy0/amd_pstate_lowest_nonlinear_freq +- /sys/devices/system/cpu/cpufreq/policy0/amd_pstate_lowest_nonlinear_perf +- /sys/devices/system/cpu/cpufreq/policy0/amd_pstate_lowest_perf +- /sys/devices/system/cpu/cpufreq/policy0/amd_pstate_max_freq +- /sys/devices/system/cpu/cpufreq/policy0/amd_pstate_min_freq +- /sys/devices/system/cpu/cpufreq/policy0/amd_pstate_nominal_freq +- /sys/devices/system/cpu/cpufreq/policy0/amd_pstate_nominal_perf +- +- +-``amd_pstate_highest_perf / amd_pstate_max_freq`` +- +-Maximum CPPC performance and CPU frequency that the driver is allowed to +-set in percent of the maximum supported CPPC performance level (the highest +-performance supported in `AMD CPPC Performance Capability <perf_cap_>`_). +-This attribute is read-only. +- +-``amd_pstate_nominal_perf / amd_pstate_nominal_freq`` +- +-Nominal CPPC performance and CPU frequency that the driver is allowed to +-set in percent of the maximum supported CPPC performance level (Please see +-nominal performance in `AMD CPPC Performance Capability <perf_cap_>`_). +-This attribute is read-only. +- +-``amd_pstate_lowest_nonlinear_perf / amd_pstate_lowest_nonlinear_freq`` +- +-The lowest non-linear CPPC performance and CPU frequency that the driver is +-allowed to set in percent of the maximum supported CPPC performance level +-(Please see the lowest non-linear performance in `AMD CPPC Performance +-Capability <perf_cap_>`_). +-This attribute is read-only. +- +-``amd_pstate_lowest_perf`` +- +-The lowest physical CPPC performance. The minimum CPU frequency can be read +-back from ``cpuinfo`` member of ``cpufreq_policy``, so we won't expose it +-here. +-This attribute is read-only. +- +- +-``amd-pstate`` vs ``acpi-cpufreq`` +-====================================== +- +-On majority of AMD platforms supported by ``acpi-cpufreq``, the ACPI tables +-provided by the platform firmware used for CPU performance scaling, but +-only provides 3 P-states on AMD processors. +-However, on modern AMD APU and CPU series, it provides the collaborative +-processor performance control according to ACPI protocol and customize this +-for AMD platforms. That is fine-grain and continuous frequency range +-instead of the legacy hardware P-states. ``amd-pstate`` is the kernel +-module which supports the new AMD P-States mechanism on most of future AMD +-platforms. The AMD P-States mechanism will be the more performance and energy +-efficiency frequency management method on AMD processors. +- +-``cpupower`` tool support for ``amd-pstate`` +-=============================================== +- +-``amd-pstate`` is supported on ``cpupower`` tool that can be used to dump the frequency +-information. And it is in progress to support more and more operations for new +-``amd-pstate`` module with this tool. :: +- +- root@hr-test1:/home/ray# cpupower frequency-info +- analyzing CPU 0: +- driver: amd-pstate +- CPUs which run at the same hardware frequency: 0 +- CPUs which need to have their frequency coordinated by software: 0 +- maximum transition latency: 131 us +- hardware limits: 400 MHz - 4.68 GHz +- available cpufreq governors: ondemand conservative powersave userspace performance schedutil +- current policy: frequency should be within 400 MHz and 4.68 GHz. +- The governor "schedutil" may decide which speed to use +- within this range. +- current CPU frequency: Unable to call hardware +- current CPU frequency: 4.02 GHz (asserted by call to kernel) +- boost state support: +- Supported: yes +- Active: yes +- AMD PSTATE Highest Performance: 166. Maximum Frequency: 4.68 GHz. +- AMD PSTATE Nominal Performance: 117. Nominal Frequency: 3.30 GHz. +- AMD PSTATE Lowest Non-linear Performance: 39. Lowest Non-linear Frequency: 1.10 GHz. +- AMD PSTATE Lowest Performance: 15. Lowest Frequency: 400 MHz. +- +- +-Diagnostics and Tuning +-======================= +- +-Trace Events +--------------- +- +-There are two static trace events that can be used for ``amd-pstate`` +-diagnostics. One of them is the cpu_frequency trace event generally used +-by ``CPUFreq``, and the other one is the ``amd_pstate_perf`` trace event +-specific to ``amd-pstate``. The following sequence of shell commands can +-be used to enable them and see their output (if the kernel is generally +-configured to support event tracing). :: +- +- root@hr-test1:/home/ray# cd /sys/kernel/tracing/ +- root@hr-test1:/sys/kernel/tracing# echo 1 > events/amd_cpu/enable +- root@hr-test1:/sys/kernel/tracing# cat trace +- # tracer: nop +- # +- # entries-in-buffer/entries-written: 47827/42233061 #P:2 +- # +- # _-----=> irqs-off +- # / _----=> need-resched +- # | / _---=> hardirq/softirq +- # || / _--=> preempt-depth +- # ||| / delay +- # TASK-PID CPU# |||| TIMESTAMP FUNCTION +- # | | | |||| | | +- <idle>-0 [015] dN... 4995.979886: amd_pstate_perf: amd_min_perf=85 amd_des_perf=85 amd_max_perf=166 cpu_id=15 changed=false fast_switch=true +- <idle>-0 [007] d.h.. 4995.979893: amd_pstate_perf: amd_min_perf=85 amd_des_perf=85 amd_max_perf=166 cpu_id=7 changed=false fast_switch=true +- cat-2161 [000] d.... 4995.980841: amd_pstate_perf: amd_min_perf=85 amd_des_perf=85 amd_max_perf=166 cpu_id=0 changed=false fast_switch=true +- sshd-2125 [004] d.s.. 4995.980968: amd_pstate_perf: amd_min_perf=85 amd_des_perf=85 amd_max_perf=166 cpu_id=4 changed=false fast_switch=true +- <idle>-0 [007] d.s.. 4995.980968: amd_pstate_perf: amd_min_perf=85 amd_des_perf=85 amd_max_perf=166 cpu_id=7 changed=false fast_switch=true +- <idle>-0 [003] d.s.. 4995.980971: amd_pstate_perf: amd_min_perf=85 amd_des_perf=85 amd_max_perf=166 cpu_id=3 changed=false fast_switch=true +- <idle>-0 [011] d.s.. 4995.980996: amd_pstate_perf: amd_min_perf=85 amd_des_perf=85 amd_max_perf=166 cpu_id=11 changed=false fast_switch=true +- +-The cpu_frequency trace event will be triggered either by the ``schedutil`` scaling +-governor (for the policies it is attached to), or by the ``CPUFreq`` core (for the +-policies with other scaling governors). +- +- +-Reference +-=========== +- +-.. [1] AMD64 Architecture Programmer's Manual Volume 2: System Programming, +- https://www.amd.com/system/files/TechDocs/24593.pdf +- +-.. [2] Advanced Configuration and Power Interface Specification, +- https://uefi.org/sites/default/files/resources/ACPI_Spec_6_4_Jan22.pdf +- +-.. [3] Processor Programming Reference (PPR) for AMD Family 19h Model 21h, Revision B0 Processors +- https://www.amd.com/system/files/TechDocs/55898_B1_pub_0.50.zip +diff --git a/Documentation/admin-guide/pm/working-state.rst b/Documentation/admin-guide/pm/working-state.rst +index 5d2757e2de65..f40994c422dc 100644 +--- a/Documentation/admin-guide/pm/working-state.rst ++++ b/Documentation/admin-guide/pm/working-state.rst +@@ -11,7 +11,6 @@ Working-State Power Management + intel_idle + cpufreq + intel_pstate +- amd-pstate + cpufreq_drivers + intel_epb + intel-speed-select +diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h +index f23dc1abd485..d0ce5cfd3ac1 100644 +--- a/arch/x86/include/asm/cpufeatures.h ++++ b/arch/x86/include/asm/cpufeatures.h +@@ -313,7 +313,6 @@ + #define X86_FEATURE_AMD_SSBD (13*32+24) /* "" Speculative Store Bypass Disable */ + #define X86_FEATURE_VIRT_SSBD (13*32+25) /* Virtualized Speculative Store Bypass Disable */ + #define X86_FEATURE_AMD_SSB_NO (13*32+26) /* "" Speculative Store Bypass is fixed in hardware. */ +-#define X86_FEATURE_AMD_CPPC (13*32+27) /* Collaborative Processor Performance Control */ + + /* Thermal and Power Management Leaf, CPUID level 0x00000006 (EAX), word 14 */ + #define X86_FEATURE_DTHERM (14*32+ 0) /* Digital Thermal Sensor */ +diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h +index ce42e15cf303..a7c413432b33 100644 +--- a/arch/x86/include/asm/msr-index.h ++++ b/arch/x86/include/asm/msr-index.h +@@ -486,23 +486,6 @@ + + #define MSR_AMD64_VIRT_SPEC_CTRL 0xc001011f + +-/* AMD Collaborative Processor Performance Control MSRs */ +-#define MSR_AMD_CPPC_CAP1 0xc00102b0 +-#define MSR_AMD_CPPC_ENABLE 0xc00102b1 +-#define MSR_AMD_CPPC_CAP2 0xc00102b2 +-#define MSR_AMD_CPPC_REQ 0xc00102b3 +-#define MSR_AMD_CPPC_STATUS 0xc00102b4 +- +-#define CAP1_LOWEST_PERF(x) (((x) >> 0) & 0xff) +-#define CAP1_LOWNONLIN_PERF(x) (((x) >> 8) & 0xff) +-#define CAP1_NOMINAL_PERF(x) (((x) >> 16) & 0xff) +-#define CAP1_HIGHEST_PERF(x) (((x) >> 24) & 0xff) +- +-#define REQ_MAX_PERF(x) (((x) & 0xff) << 0) +-#define REQ_MIN_PERF(x) (((x) & 0xff) << 8) +-#define REQ_DES_PERF(x) (((x) & 0xff) << 16) +-#define REQ_ENERGY_PERF_PREF(x) (((x) & 0xff) << 24) +- + /* Fam 17h MSRs */ + #define MSR_F17H_IRPERF 0xc00000e9 + +diff --git a/drivers/acpi/cppc_acpi.c b/drivers/acpi/cppc_acpi.c +index 2d2297ef5bf9..bd482108310c 100644 +--- a/drivers/acpi/cppc_acpi.c ++++ b/drivers/acpi/cppc_acpi.c +@@ -411,7 +411,7 @@ bool acpi_cpc_valid(void) + struct cpc_desc *cpc_ptr; + int cpu; + +- for_each_present_cpu(cpu) { ++ for_each_possible_cpu(cpu) { + cpc_ptr = per_cpu(cpc_desc_ptr, cpu); + if (!cpc_ptr) + return false; +@@ -759,24 +759,9 @@ int acpi_cppc_processor_probe(struct acpi_processor *pr) + goto out_free; + cpc_ptr->cpc_regs[i-2].sys_mem_vaddr = addr; + } +- } else if (gas_t->space_id == ACPI_ADR_SPACE_SYSTEM_IO) { +- if (gas_t->access_width < 1 || gas_t->access_width > 3) { +- /* 1 = 8-bit, 2 = 16-bit, and 3 = 32-bit. SystemIO doesn't +- * implement 64-bit registers. +- */ +- pr_debug("Invalid access width %d for SystemIO register\n", +- gas_t->access_width); +- goto out_free; +- } +- if (gas_t->address & ~0xFFFFULL) { +- /* SystemIO registers use 16-bit integer addresses */ +- pr_debug("Invalid IO port %llu for SystemIO register\n", +- gas_t->address); +- goto out_free; +- } + } else { + if (gas_t->space_id != ACPI_ADR_SPACE_FIXED_HARDWARE || !cpc_ffh_supported()) { +- /* Support only PCC, SystemMemory, SystemIO, and FFH type regs. */ ++ /* Support only PCC ,SYS MEM and FFH type regs */ + pr_debug("Unsupported register type: %d\n", gas_t->space_id); + goto out_free; + } +@@ -951,20 +936,7 @@ static int cpc_read(int cpu, struct cpc_register_resource *reg_res, u64 *val) + } + + *val = 0; +- +- if (reg->space_id == ACPI_ADR_SPACE_SYSTEM_IO) { +- u32 width = 8 << (reg->access_width - 1); +- acpi_status status; +- +- status = acpi_os_read_port((acpi_io_address)reg->address, (u32 *)val, width); +- +- if (status != AE_OK) { +- pr_debug("Error: Failed to read SystemIO port %llx\n", reg->address); +- return -EFAULT; +- } +- +- return 0; +- } else if (reg->space_id == ACPI_ADR_SPACE_PLATFORM_COMM && pcc_ss_id >= 0) ++ if (reg->space_id == ACPI_ADR_SPACE_PLATFORM_COMM && pcc_ss_id >= 0) + vaddr = GET_PCC_VADDR(reg->address, pcc_ss_id); + else if (reg->space_id == ACPI_ADR_SPACE_SYSTEM_MEMORY) + vaddr = reg_res->sys_mem_vaddr; +@@ -1003,19 +975,7 @@ static int cpc_write(int cpu, struct cpc_register_resource *reg_res, u64 val) + int pcc_ss_id = per_cpu(cpu_pcc_subspace_idx, cpu); + struct cpc_reg *reg = ®_res->cpc_entry.reg; + +- if (reg->space_id == ACPI_ADR_SPACE_SYSTEM_IO) { +- u32 width = 8 << (reg->access_width - 1); +- acpi_status status; +- +- status = acpi_os_write_port((acpi_io_address)reg->address, (u32)val, width); +- +- if (status != AE_OK) { +- pr_debug("Error: Failed to write SystemIO port %llx\n", reg->address); +- return -EFAULT; +- } +- +- return 0; +- } else if (reg->space_id == ACPI_ADR_SPACE_PLATFORM_COMM && pcc_ss_id >= 0) ++ if (reg->space_id == ACPI_ADR_SPACE_PLATFORM_COMM && pcc_ss_id >= 0) + vaddr = GET_PCC_VADDR(reg->address, pcc_ss_id); + else if (reg->space_id == ACPI_ADR_SPACE_SYSTEM_MEMORY) + vaddr = reg_res->sys_mem_vaddr; +@@ -1275,51 +1235,6 @@ int cppc_get_perf_ctrs(int cpunum, struct cppc_perf_fb_ctrs *perf_fb_ctrs) + } + EXPORT_SYMBOL_GPL(cppc_get_perf_ctrs); + +-/** +- * cppc_set_enable - Set to enable CPPC on the processor by writing the +- * Continuous Performance Control package EnableRegister feild. +- * @cpu: CPU for which to enable CPPC register. +- * @enable: 0 - disable, 1 - enable CPPC feature on the processor. +- * +- * Return: 0 for success, -ERRNO or -EIO otherwise. +- */ +-int cppc_set_enable(int cpu, bool enable) +-{ +- int pcc_ss_id = per_cpu(cpu_pcc_subspace_idx, cpu); +- struct cpc_register_resource *enable_reg; +- struct cpc_desc *cpc_desc = per_cpu(cpc_desc_ptr, cpu); +- struct cppc_pcc_data *pcc_ss_data = NULL; +- int ret = -EINVAL; +- +- if (!cpc_desc) { +- pr_debug("No CPC descriptor for CPU:%d\n", cpu); +- return -EINVAL; +- } +- +- enable_reg = &cpc_desc->cpc_regs[ENABLE]; +- +- if (CPC_IN_PCC(enable_reg)) { +- +- if (pcc_ss_id < 0) +- return -EIO; +- +- ret = cpc_write(cpu, enable_reg, enable); +- if (ret) +- return ret; +- +- pcc_ss_data = pcc_data[pcc_ss_id]; +- +- down_write(&pcc_ss_data->pcc_lock); +- /* after writing CPC, transfer the ownership of PCC to platfrom */ +- ret = send_pcc_cmd(pcc_ss_id, CMD_WRITE); +- up_write(&pcc_ss_data->pcc_lock); +- return ret; +- } +- +- return cpc_write(cpu, enable_reg, enable); +-} +-EXPORT_SYMBOL_GPL(cppc_set_enable); +- + /** + * cppc_set_perf - Set a CPU's performance controls. + * @cpu: CPU for which to set performance controls. +diff --git a/drivers/cpufreq/Kconfig.x86 b/drivers/cpufreq/Kconfig.x86 +index 2e798b2c0bdb..92701a18bdd9 100644 +--- a/drivers/cpufreq/Kconfig.x86 ++++ b/drivers/cpufreq/Kconfig.x86 +@@ -34,23 +34,6 @@ config X86_PCC_CPUFREQ + + If in doubt, say N. + +-config X86_AMD_PSTATE +- bool "AMD Processor P-State driver" +- depends on X86 +- select ACPI_PROCESSOR if ACPI +- select ACPI_CPPC_LIB if X86_64 && ACPI && SCHED_MC_PRIO +- select CPU_FREQ_GOV_SCHEDUTIL if SMP +- help +- This driver adds a CPUFreq driver which utilizes a fine grain +- processor performance freqency control range instead of legacy +- performance levels. This driver supports the AMD processors with +- _CPC object in the SBIOS. +- +- For details, take a look at: +- <file:Documentation/admin-guide/pm/amd-pstate.rst>. +- +- If in doubt, say N. +- + config X86_ACPI_CPUFREQ + tristate "ACPI Processor P-States driver" + depends on ACPI_PROCESSOR +diff --git a/drivers/cpufreq/Makefile b/drivers/cpufreq/Makefile +index c8d307010922..48ee5859030c 100644 +--- a/drivers/cpufreq/Makefile ++++ b/drivers/cpufreq/Makefile +@@ -25,7 +25,6 @@ obj-$(CONFIG_CPUFREQ_DT_PLATDEV) += cpufreq-dt-platdev.o + # speedstep-* is preferred over p4-clockmod. + + obj-$(CONFIG_X86_ACPI_CPUFREQ) += acpi-cpufreq.o +-obj-$(CONFIG_X86_AMD_PSTATE) += amd-pstate.o + obj-$(CONFIG_X86_POWERNOW_K8) += powernow-k8.o + obj-$(CONFIG_X86_PCC_CPUFREQ) += pcc-cpufreq.o + obj-$(CONFIG_X86_POWERNOW_K6) += powernow-k6.o +diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c +deleted file mode 100644 +index 58ee50bf492b..000000000000 +--- a/drivers/cpufreq/amd-pstate.c ++++ /dev/null +@@ -1,663 +0,0 @@ +-/* +- * amd-pstate.c - AMD Processor P-state Frequency Driver +- * +- * Copyright (C) 2021 Advanced Micro Devices, Inc. All Rights Reserved. +- * +- * This program is free software; you can redistribute it and/or +- * modify it under the terms of the GNU General Public License +- * as published by the Free Software Foundation; either version 2 +- * of the License, or (at your option) any later version. +- * +- * This program is distributed in the hope that it will be useful, +- * but WITHOUT ANY WARRANTY; without even the implied warranty of +- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +- * GNU General Public License for more details. +- * +- * You should have received a copy of the GNU General Public License along with +- * this program; if not, write to the Free Software +- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. +- * +- * Author: Huang Rui <ray.huang@amd.com> +- */ +- +-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +- +-#include <linux/kernel.h> +-#include <linux/module.h> +-#include <linux/init.h> +-#include <linux/smp.h> +-#include <linux/sched.h> +-#include <linux/cpufreq.h> +-#include <linux/compiler.h> +-#include <linux/dmi.h> +-#include <linux/slab.h> +-#include <linux/acpi.h> +-#include <linux/io.h> +-#include <linux/delay.h> +-#include <linux/uaccess.h> +-#include <linux/static_call.h> +-#include <trace/events/power.h> +- +-#include <acpi/processor.h> +-#include <acpi/cppc_acpi.h> +- +-#include <asm/msr.h> +-#include <asm/processor.h> +-#include <asm/cpufeature.h> +-#include <asm/cpu_device_id.h> +- +-#define AMD_PSTATE_TRANSITION_LATENCY 0x20000 +-#define AMD_PSTATE_TRANSITION_DELAY 500 +- +-static struct cpufreq_driver amd_pstate_driver; +- +-struct amd_cpudata { +- int cpu; +- +- struct freq_qos_request req[2]; +- +- u64 cppc_req_cached; +- +- u32 highest_perf; +- u32 nominal_perf; +- u32 lowest_nonlinear_perf; +- u32 lowest_perf; +- +- u32 max_freq; +- u32 min_freq; +- u32 nominal_freq; +- u32 lowest_nonlinear_freq; +- +- bool boost_supported; +-}; +- +-static inline int pstate_enable(bool enable) +-{ +- return wrmsrl_safe(MSR_AMD_CPPC_ENABLE, enable ? 1 : 0); +-} +- +-static int cppc_enable(bool enable) +-{ +- int cpu, ret = 0; +- +- for_each_online_cpu(cpu) { +- ret = cppc_set_enable(cpu, enable ? 1 : 0); +- if (ret) +- return ret; +- } +- +- return ret; +-} +- +-DEFINE_STATIC_CALL(amd_pstate_enable, pstate_enable); +- +-static inline int amd_pstate_enable(bool enable) +-{ +- return static_call(amd_pstate_enable)(enable); +-} +- +-static int pstate_init_perf(struct amd_cpudata *cpudata) +-{ +- u64 cap1; +- +- int ret = rdmsrl_safe_on_cpu(cpudata->cpu, MSR_AMD_CPPC_CAP1, +- &cap1); +- if (ret) +- return ret; +- +- /* +- * TODO: Introduce AMD specific power feature. +- * +- * CPPC entry doesn't indicate the highest performance in some ASICs. +- */ +- WRITE_ONCE(cpudata->highest_perf, amd_get_highest_perf()); +- +- WRITE_ONCE(cpudata->nominal_perf, CAP1_NOMINAL_PERF(cap1)); +- WRITE_ONCE(cpudata->lowest_nonlinear_perf, CAP1_LOWNONLIN_PERF(cap1)); +- WRITE_ONCE(cpudata->lowest_perf, CAP1_LOWEST_PERF(cap1)); +- +- return 0; +-} +- +-static int cppc_init_perf(struct amd_cpudata *cpudata) +-{ +- struct cppc_perf_caps cppc_perf; +- +- int ret = cppc_get_perf_caps(cpudata->cpu, &cppc_perf); +- if (ret) +- return ret; +- +- WRITE_ONCE(cpudata->highest_perf, amd_get_highest_perf()); +- +- WRITE_ONCE(cpudata->nominal_perf, cppc_perf.nominal_perf); +- WRITE_ONCE(cpudata->lowest_nonlinear_perf, +- cppc_perf.lowest_nonlinear_perf); +- WRITE_ONCE(cpudata->lowest_perf, cppc_perf.lowest_perf); +- +- return 0; +-} +- +-DEFINE_STATIC_CALL(amd_pstate_init_perf, pstate_init_perf); +- +-static inline int amd_pstate_init_perf(struct amd_cpudata *cpudata) +-{ +- return static_call(amd_pstate_init_perf)(cpudata); +-} +- +-static void pstate_update_perf(struct amd_cpudata *cpudata, u32 min_perf, +- u32 des_perf, u32 max_perf, bool fast_switch) +-{ +- if (fast_switch) +- wrmsrl(MSR_AMD_CPPC_REQ, READ_ONCE(cpudata->cppc_req_cached)); +- else +- wrmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, +- READ_ONCE(cpudata->cppc_req_cached)); +-} +- +-static void cppc_update_perf(struct amd_cpudata *cpudata, +- u32 min_perf, u32 des_perf, +- u32 max_perf, bool fast_switch) +-{ +- struct cppc_perf_ctrls perf_ctrls; +- +- perf_ctrls.max_perf = max_perf; +- perf_ctrls.min_perf = min_perf; +- perf_ctrls.desired_perf = des_perf; +- +- cppc_set_perf(cpudata->cpu, &perf_ctrls); +-} +- +-DEFINE_STATIC_CALL(amd_pstate_update_perf, pstate_update_perf); +- +-static inline void amd_pstate_update_perf(struct amd_cpudata *cpudata, +- u32 min_perf, u32 des_perf, +- u32 max_perf, bool fast_switch) +-{ +- static_call(amd_pstate_update_perf)(cpudata, min_perf, des_perf, +- max_perf, fast_switch); +-} +- +-static void amd_pstate_update(struct amd_cpudata *cpudata, u32 min_perf, +- u32 des_perf, u32 max_perf, bool fast_switch) +-{ +- u64 prev = READ_ONCE(cpudata->cppc_req_cached); +- u64 value = prev; +- +- value &= ~REQ_MIN_PERF(~0L); +- value |= REQ_MIN_PERF(min_perf); +- +- value &= ~REQ_DES_PERF(~0L); +- value |= REQ_DES_PERF(des_perf); +- +- value &= ~REQ_MAX_PERF(~0L); +- value |= REQ_MAX_PERF(max_perf); +- +- trace_amd_pstate_perf(min_perf, des_perf, max_perf, cpudata->cpu, +- (value != prev), fast_switch); +- +- if (value == prev) +- return; +- +- WRITE_ONCE(cpudata->cppc_req_cached, value); +- +- amd_pstate_update_perf(cpudata, min_perf, des_perf, +- max_perf, fast_switch); +-} +- +-static int amd_pstate_verify(struct cpufreq_policy_data *policy) +-{ +- cpufreq_verify_within_cpu_limits(policy); +- +- return 0; +-} +- +-static int amd_pstate_target(struct cpufreq_policy *policy, +- unsigned int target_freq, +- unsigned int relation) +-{ +- struct cpufreq_freqs freqs; +- struct amd_cpudata *cpudata = policy->driver_data; +- unsigned long amd_max_perf, amd_min_perf, amd_des_perf, +- amd_cap_perf; +- +- if (!cpudata->max_freq) +- return -ENODEV; +- +- amd_cap_perf = READ_ONCE(cpudata->highest_perf); +- amd_min_perf = READ_ONCE(cpudata->lowest_nonlinear_perf); +- amd_max_perf = amd_cap_perf; +- +- freqs.old = policy->cur; +- freqs.new = target_freq; +- +- amd_des_perf = DIV_ROUND_CLOSEST(target_freq * amd_cap_perf, +- cpudata->max_freq); +- +- cpufreq_freq_transition_begin(policy, &freqs); +- amd_pstate_update(cpudata, amd_min_perf, amd_des_perf, +- amd_max_perf, false); +- cpufreq_freq_transition_end(policy, &freqs, false); +- +- return 0; +-} +- +-static void amd_pstate_adjust_perf(unsigned int cpu, +- unsigned long min_perf, +- unsigned long target_perf, +- unsigned long capacity) +-{ +- unsigned long amd_max_perf, amd_min_perf, amd_des_perf, +- amd_cap_perf, lowest_nonlinear_perf; +- struct cpufreq_policy *policy = cpufreq_cpu_get(cpu); +- struct amd_cpudata *cpudata = policy->driver_data; +- +- amd_cap_perf = READ_ONCE(cpudata->highest_perf); +- lowest_nonlinear_perf = READ_ONCE(cpudata->lowest_nonlinear_perf); +- +- if (target_perf < capacity) +- amd_des_perf = DIV_ROUND_UP(amd_cap_perf * target_perf, +- capacity); +- +- amd_min_perf = READ_ONCE(cpudata->highest_perf); +- if (min_perf < capacity) +- amd_min_perf = DIV_ROUND_UP(amd_cap_perf * min_perf, capacity); +- +- if (amd_min_perf < lowest_nonlinear_perf) +- amd_min_perf = lowest_nonlinear_perf; +- +- amd_max_perf = amd_cap_perf; +- if (amd_max_perf < amd_min_perf) +- amd_max_perf = amd_min_perf; +- +- amd_des_perf = clamp_t(unsigned long, amd_des_perf, +- amd_min_perf, amd_max_perf); +- +- amd_pstate_update(cpudata, amd_min_perf, amd_des_perf, +- amd_max_perf, true); +-} +- +-static int amd_get_min_freq(struct amd_cpudata *cpudata) +-{ +- struct cppc_perf_caps cppc_perf; +- +- int ret = cppc_get_perf_caps(cpudata->cpu, &cppc_perf); +- if (ret) +- return ret; +- +- /* Switch to khz */ +- return cppc_perf.lowest_freq * 1000; +-} +- +-static int amd_get_max_freq(struct amd_cpudata *cpudata) +-{ +- struct cppc_perf_caps cppc_perf; +- u32 max_perf, max_freq, nominal_freq, nominal_perf; +- u64 boost_ratio; +- +- int ret = cppc_get_perf_caps(cpudata->cpu, &cppc_perf); +- if (ret) +- return ret; +- +- nominal_freq = cppc_perf.nominal_freq; +- nominal_perf = READ_ONCE(cpudata->nominal_perf); +- max_perf = READ_ONCE(cpudata->highest_perf); +- +- boost_ratio = div_u64(max_perf << SCHED_CAPACITY_SHIFT, +- nominal_perf); +- +- max_freq = nominal_freq * boost_ratio >> SCHED_CAPACITY_SHIFT; +- +- /* Switch to khz */ +- return max_freq * 1000; +-} +- +-static int amd_get_nominal_freq(struct amd_cpudata *cpudata) +-{ +- struct cppc_perf_caps cppc_perf; +- u32 nominal_freq; +- +- int ret = cppc_get_perf_caps(cpudata->cpu, &cppc_perf); +- if (ret) +- return ret; +- +- nominal_freq = cppc_perf.nominal_freq; +- +- /* Switch to khz */ +- return nominal_freq * 1000; +-} +- +-static int amd_get_lowest_nonlinear_freq(struct amd_cpudata *cpudata) +-{ +- struct cppc_perf_caps cppc_perf; +- u32 lowest_nonlinear_freq, lowest_nonlinear_perf, +- nominal_freq, nominal_perf; +- u64 lowest_nonlinear_ratio; +- +- int ret = cppc_get_perf_caps(cpudata->cpu, &cppc_perf); +- if (ret) +- return ret; +- +- nominal_freq = cppc_perf.nominal_freq; +- nominal_perf = READ_ONCE(cpudata->nominal_perf); +- +- lowest_nonlinear_perf = cppc_perf.lowest_nonlinear_perf; +- +- lowest_nonlinear_ratio = div_u64(lowest_nonlinear_perf << +- SCHED_CAPACITY_SHIFT, nominal_perf); +- +- lowest_nonlinear_freq = nominal_freq * lowest_nonlinear_ratio >> SCHED_CAPACITY_SHIFT; +- +- /* Switch to khz */ +- return lowest_nonlinear_freq * 1000; +-} +- +-static int amd_pstate_set_boost(struct cpufreq_policy *policy, int state) +-{ +- struct amd_cpudata *cpudata = policy->driver_data; +- int ret; +- +- if (!cpudata->boost_supported) { +- pr_err("Boost mode is not supported by this processor or SBIOS\n"); +- return -EINVAL; +- } +- +- if (state) +- policy->cpuinfo.max_freq = cpudata->max_freq; +- else +- policy->cpuinfo.max_freq = cpudata->nominal_freq; +- +- policy->max = policy->cpuinfo.max_freq; +- +- ret = freq_qos_update_request(&cpudata->req[1], +- policy->cpuinfo.max_freq); +- if (ret < 0) +- return ret; +- +- return 0; +-} +- +-static void amd_pstate_boost_init(struct amd_cpudata *cpudata) +-{ +- u32 highest_perf, nominal_perf; +- +- highest_perf = READ_ONCE(cpudata->highest_perf); +- nominal_perf = READ_ONCE(cpudata->nominal_perf); +- +- if (highest_perf <= nominal_perf) +- return; +- +- cpudata->boost_supported = true; +- amd_pstate_driver.boost_enabled = true; +-} +- +-static int amd_pstate_cpu_init(struct cpufreq_policy *policy) +-{ +- int min_freq, max_freq, nominal_freq, lowest_nonlinear_freq, ret; +- unsigned int cpu = policy->cpu; +- struct device *dev; +- struct amd_cpudata *cpudata; +- +- dev = get_cpu_device(policy->cpu); +- if (!dev) +- return -ENODEV; +- +- cpudata = kzalloc(sizeof(*cpudata), GFP_KERNEL); +- if (!cpudata) +- return -ENOMEM; +- +- cpudata->cpu = cpu; +- +- ret = amd_pstate_init_perf(cpudata); +- if (ret) +- goto free_cpudata1; +- +- min_freq = amd_get_min_freq(cpudata); +- max_freq = amd_get_max_freq(cpudata); +- nominal_freq = amd_get_nominal_freq(cpudata); +- lowest_nonlinear_freq = amd_get_lowest_nonlinear_freq(cpudata); +- +- if (min_freq < 0 || max_freq < 0 || min_freq > max_freq) { +- dev_err(dev, "min_freq(%d) or max_freq(%d) value is incorrect\n", +- min_freq, max_freq); +- ret = -EINVAL; +- goto free_cpudata1; +- } +- +- policy->cpuinfo.transition_latency = AMD_PSTATE_TRANSITION_LATENCY; +- policy->transition_delay_us = AMD_PSTATE_TRANSITION_DELAY; +- +- policy->min = min_freq; +- policy->max = max_freq; +- +- policy->cpuinfo.min_freq = min_freq; +- policy->cpuinfo.max_freq = max_freq; +- +- /* It will be updated by governor */ +- policy->cur = policy->cpuinfo.min_freq; +- +- if (boot_cpu_has(X86_FEATURE_AMD_CPPC)) +- policy->fast_switch_possible = true; +- +- ret = freq_qos_add_request(&policy->constraints, &cpudata->req[0], +- FREQ_QOS_MIN, policy->cpuinfo.min_freq); +- if (ret < 0) { +- dev_err(dev, "Failed to add min-freq constraint (%d)\n", ret); +- goto free_cpudata1; +- } +- +- ret = freq_qos_add_request(&policy->constraints, &cpudata->req[1], +- FREQ_QOS_MAX, policy->cpuinfo.max_freq); +- if (ret < 0) { +- dev_err(dev, "Failed to add max-freq constraint (%d)\n", ret); +- goto free_cpudata2; +- } +- +- /* Initial processor data capability frequencies */ +- cpudata->max_freq = max_freq; +- cpudata->min_freq = min_freq; +- cpudata->nominal_freq = nominal_freq; +- cpudata->lowest_nonlinear_freq = lowest_nonlinear_freq; +- +- policy->driver_data = cpudata; +- +- amd_pstate_boost_init(cpudata); +- +- return 0; +- +- freq_qos_remove_request(&cpudata->req[1]); +-free_cpudata2: +- freq_qos_remove_request(&cpudata->req[0]); +-free_cpudata1: +- kfree(cpudata); +- return ret; +-} +- +-static int amd_pstate_cpu_exit(struct cpufreq_policy *policy) +-{ +- struct amd_cpudata *cpudata; +- +- cpudata = policy->driver_data; +- +- freq_qos_remove_request(&cpudata->req[1]); +- freq_qos_remove_request(&cpudata->req[0]); +- kfree(cpudata); +- +- return 0; +-} +- +-/* Sysfs attributes */ +- +-/* This frequency is to indicate the maximum hardware frequency. +- * If boost is not active but supported, the frequency will be larger than the +- * one in cpuinfo. +- */ +-static ssize_t show_amd_pstate_max_freq(struct cpufreq_policy *policy, +- char *buf) +-{ +- int max_freq; +- struct amd_cpudata *cpudata; +- +- cpudata = policy->driver_data; +- +- max_freq = amd_get_max_freq(cpudata); +- if (max_freq < 0) +- return max_freq; +- +- return sprintf(&buf[0], "%u\n", max_freq); +-} +- +-static ssize_t show_amd_pstate_nominal_freq(struct cpufreq_policy *policy, +- char *buf) +-{ +- int nominal_freq; +- struct amd_cpudata *cpudata; +- +- cpudata = policy->driver_data; +- +- nominal_freq = amd_get_nominal_freq(cpudata); +- if (nominal_freq < 0) +- return nominal_freq; +- +- return sprintf(&buf[0], "%u\n", nominal_freq); +-} +- +-static ssize_t show_amd_pstate_lowest_nonlinear_freq(struct cpufreq_policy *policy, +- char *buf) +-{ +- int freq; +- struct amd_cpudata *cpudata; +- +- cpudata = policy->driver_data; +- +- freq = amd_get_lowest_nonlinear_freq(cpudata); +- if (freq < 0) +- return freq; +- +- return sprintf(&buf[0], "%u\n", freq); +-} +- +-static ssize_t show_amd_pstate_highest_perf(struct cpufreq_policy *policy, +- char *buf) +-{ +- u32 perf; +- struct amd_cpudata *cpudata = policy->driver_data; +- +- perf = READ_ONCE(cpudata->highest_perf); +- +- return sprintf(&buf[0], "%u\n", perf); +-} +- +-static ssize_t show_amd_pstate_nominal_perf(struct cpufreq_policy *policy, +- char *buf) +-{ +- u32 perf; +- struct amd_cpudata *cpudata = policy->driver_data; +- +- perf = READ_ONCE(cpudata->nominal_perf); +- +- return sprintf(&buf[0], "%u\n", perf); +-} +- +-static ssize_t show_amd_pstate_lowest_nonlinear_perf(struct cpufreq_policy *policy, +- char *buf) +-{ +- u32 perf; +- struct amd_cpudata *cpudata = policy->driver_data; +- +- perf = READ_ONCE(cpudata->lowest_nonlinear_perf); +- +- return sprintf(&buf[0], "%u\n", perf); +-} +- +-static ssize_t show_amd_pstate_lowest_perf(struct cpufreq_policy *policy, +- char *buf) +-{ +- u32 perf; +- struct amd_cpudata *cpudata = policy->driver_data; +- +- perf = READ_ONCE(cpudata->lowest_perf); +- +- return sprintf(&buf[0], "%u\n", perf); +-} +- +-cpufreq_freq_attr_ro(amd_pstate_max_freq); +-cpufreq_freq_attr_ro(amd_pstate_nominal_freq); +-cpufreq_freq_attr_ro(amd_pstate_lowest_nonlinear_freq); +- +-cpufreq_freq_attr_ro(amd_pstate_highest_perf); +-cpufreq_freq_attr_ro(amd_pstate_nominal_perf); +-cpufreq_freq_attr_ro(amd_pstate_lowest_nonlinear_perf); +-cpufreq_freq_attr_ro(amd_pstate_lowest_perf); +- +-static struct freq_attr *amd_pstate_attr[] = { +- &amd_pstate_max_freq, +- &amd_pstate_nominal_freq, +- &amd_pstate_lowest_nonlinear_freq, +- &amd_pstate_highest_perf, +- &amd_pstate_nominal_perf, +- &amd_pstate_lowest_nonlinear_perf, +- &amd_pstate_lowest_perf, +- NULL, +-}; +- +-static struct cpufreq_driver amd_pstate_driver = { +- .flags = CPUFREQ_CONST_LOOPS | CPUFREQ_NEED_UPDATE_LIMITS, +- .verify = amd_pstate_verify, +- .target = amd_pstate_target, +- .init = amd_pstate_cpu_init, +- .exit = amd_pstate_cpu_exit, +- .set_boost = amd_pstate_set_boost, +- .name = "amd-pstate", +- .attr = amd_pstate_attr, +-}; +- +-static int __init amd_pstate_init(void) +-{ +- int ret; +- +- if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) +- return -ENODEV; +- +- if (!acpi_cpc_valid()) { +- pr_debug("%s, the _CPC object is not present in SBIOS\n", +- __func__); +- return -ENODEV; +- } +- +- /* don't keep reloading if cpufreq_driver exists */ +- if (cpufreq_get_current_driver()) +- return -EEXIST; +- +- /* capability check */ +- if (boot_cpu_has(X86_FEATURE_AMD_CPPC)) { +- pr_debug("%s, AMD CPPC MSR based functionality is supported\n", +- __func__); +- amd_pstate_driver.adjust_perf = amd_pstate_adjust_perf; +- } else { +- static_call_update(amd_pstate_enable, cppc_enable); +- static_call_update(amd_pstate_init_perf, cppc_init_perf); +- static_call_update(amd_pstate_update_perf, cppc_update_perf); +- } +- +- /* enable amd pstate feature */ +- ret = amd_pstate_enable(true); +- if (ret) { +- pr_err("%s, failed to enable amd-pstate with return %d\n", +- __func__, ret); +- return ret; +- } +- +- ret = cpufreq_register_driver(&amd_pstate_driver); +- if (ret) { +- pr_err("%s, return %d\n", __func__, ret); +- return ret; +- } +- +- return 0; +-} +- +-device_initcall(amd_pstate_init); +- +-MODULE_AUTHOR("Huang Rui <ray.huang@amd.com>"); +-MODULE_DESCRIPTION("AMD Processor P-state Frequency Driver"); +-MODULE_LICENSE("GPL"); +diff --git a/include/acpi/cppc_acpi.h b/include/acpi/cppc_acpi.h +index 92b7ea8d8f5e..bc159a9b4a73 100644 +--- a/include/acpi/cppc_acpi.h ++++ b/include/acpi/cppc_acpi.h +@@ -138,7 +138,6 @@ extern int cppc_get_desired_perf(int cpunum, u64 *desired_perf); + extern int cppc_get_nominal_perf(int cpunum, u64 *nominal_perf); + extern int cppc_get_perf_ctrs(int cpu, struct cppc_perf_fb_ctrs *perf_fb_ctrs); + extern int cppc_set_perf(int cpu, struct cppc_perf_ctrls *perf_ctrls); +-extern int cppc_set_enable(int cpu, bool enable); + extern int cppc_get_perf_caps(int cpu, struct cppc_perf_caps *caps); + extern bool acpi_cpc_valid(void); + extern int acpi_get_psd_map(unsigned int cpu, struct cppc_cpudata *cpu_data); +@@ -163,10 +162,6 @@ static inline int cppc_set_perf(int cpu, struct cppc_perf_ctrls *perf_ctrls) + { + return -ENOTSUPP; + } +-static inline int cppc_set_enable(int cpu, bool enable) +-{ +- return -ENOTSUPP; +-} + static inline int cppc_get_perf_caps(int cpu, struct cppc_perf_caps *caps) + { + return -ENOTSUPP; +diff --git a/include/trace/events/power.h b/include/trace/events/power.h +index c95c0b8d443d..af5018aa9517 100644 +--- a/include/trace/events/power.h ++++ b/include/trace/events/power.h +@@ -173,52 +173,6 @@ TRACE_EVENT(cpu_frequency_limits, + (unsigned long)__entry->cpu_id) + ); + +-TRACE_EVENT(amd_pstate_perf, +- +- TP_PROTO(unsigned long min_perf, +- unsigned long target_perf, +- unsigned long capacity, +- unsigned int cpu_id, +- bool changed, +- bool fast_switch +- ), +- +- TP_ARGS(min_perf, +- target_perf, +- capacity, +- cpu_id, +- changed, +- fast_switch +- ), +- +- TP_STRUCT__entry( +- __field(unsigned long, min_perf) +- __field(unsigned long, target_perf) +- __field(unsigned long, capacity) +- __field(unsigned int, cpu_id) +- __field(bool, changed) +- __field(bool, fast_switch) +- ), +- +- TP_fast_assign( +- __entry->min_perf = min_perf; +- __entry->target_perf = target_perf; +- __entry->capacity = capacity; +- __entry->cpu_id = cpu_id; +- __entry->changed = changed; +- __entry->fast_switch = fast_switch; +- ), +- +- TP_printk("amd_min_perf=%lu amd_des_perf=%lu amd_max_perf=%lu cpu_id=%u changed=%s fast_switch=%s", +- (unsigned long)__entry->min_perf, +- (unsigned long)__entry->target_perf, +- (unsigned long)__entry->capacity, +- (unsigned int)__entry->cpu_id, +- (__entry->changed) ? "true" : "false", +- (__entry->fast_switch) ? "true" : "false" +- ) +-); +- + TRACE_EVENT(device_pm_callback_start, + + TP_PROTO(struct device *dev, const char *pm_ops, int event), +diff --git a/tools/power/cpupower/lib/cpufreq.c b/tools/power/cpupower/lib/cpufreq.c +index 02719cc400a1..c3b56db8b921 100644 +--- a/tools/power/cpupower/lib/cpufreq.c ++++ b/tools/power/cpupower/lib/cpufreq.c +@@ -83,21 +83,20 @@ static const char *cpufreq_value_files[MAX_CPUFREQ_VALUE_READ_FILES] = { + [STATS_NUM_TRANSITIONS] = "stats/total_trans" + }; + +-unsigned long cpufreq_get_sysfs_value_from_table(unsigned int cpu, +- const char **table, +- unsigned index, +- unsigned size) ++ ++static unsigned long sysfs_cpufreq_get_one_value(unsigned int cpu, ++ enum cpufreq_value which) + { + unsigned long value; + unsigned int len; + char linebuf[MAX_LINE_LEN]; + char *endp; + +- if (!table && !table[index] && index >= size) ++ if (which >= MAX_CPUFREQ_VALUE_READ_FILES) + return 0; + +- len = sysfs_cpufreq_read_file(cpu, table[index], linebuf, +- sizeof(linebuf)); ++ len = sysfs_cpufreq_read_file(cpu, cpufreq_value_files[which], ++ linebuf, sizeof(linebuf)); + + if (len == 0) + return 0; +@@ -110,14 +109,6 @@ unsigned long cpufreq_get_sysfs_value_from_table(unsigned int cpu, + return value; + } + +-static unsigned long sysfs_cpufreq_get_one_value(unsigned int cpu, +- enum cpufreq_value which) +-{ +- return cpufreq_get_sysfs_value_from_table(cpu, cpufreq_value_files, +- which, +- MAX_CPUFREQ_VALUE_READ_FILES); +-} +- + /* read access to files which contain one string */ + + enum cpufreq_string { +diff --git a/tools/power/cpupower/lib/cpufreq.h b/tools/power/cpupower/lib/cpufreq.h +index 107668c0c454..95f4fd9e2656 100644 +--- a/tools/power/cpupower/lib/cpufreq.h ++++ b/tools/power/cpupower/lib/cpufreq.h +@@ -203,18 +203,6 @@ int cpufreq_modify_policy_governor(unsigned int cpu, char *governor); + int cpufreq_set_frequency(unsigned int cpu, + unsigned long target_frequency); + +-/* +- * get the sysfs value from specific table +- * +- * Read the value with the sysfs file name from specific table. Does +- * only work if the cpufreq driver has the specific sysfs interfaces. +- */ +- +-unsigned long cpufreq_get_sysfs_value_from_table(unsigned int cpu, +- const char **table, +- unsigned index, +- unsigned size); +- + #ifdef __cplusplus + } + #endif +diff --git a/tools/power/cpupower/utils/cpufreq-info.c b/tools/power/cpupower/utils/cpufreq-info.c +index f828f3c35a6f..f9895e31ff5a 100644 +--- a/tools/power/cpupower/utils/cpufreq-info.c ++++ b/tools/power/cpupower/utils/cpufreq-info.c +@@ -84,6 +84,43 @@ static void proc_cpufreq_output(void) + } + + static int no_rounding; ++static void print_speed(unsigned long speed) ++{ ++ unsigned long tmp; ++ ++ if (no_rounding) { ++ if (speed > 1000000) ++ printf("%u.%06u GHz", ((unsigned int) speed/1000000), ++ ((unsigned int) speed%1000000)); ++ else if (speed > 1000) ++ printf("%u.%03u MHz", ((unsigned int) speed/1000), ++ (unsigned int) (speed%1000)); ++ else ++ printf("%lu kHz", speed); ++ } else { ++ if (speed > 1000000) { ++ tmp = speed%10000; ++ if (tmp >= 5000) ++ speed += 10000; ++ printf("%u.%02u GHz", ((unsigned int) speed/1000000), ++ ((unsigned int) (speed%1000000)/10000)); ++ } else if (speed > 100000) { ++ tmp = speed%1000; ++ if (tmp >= 500) ++ speed += 1000; ++ printf("%u MHz", ((unsigned int) speed/1000)); ++ } else if (speed > 1000) { ++ tmp = speed%100; ++ if (tmp >= 50) ++ speed += 100; ++ printf("%u.%01u MHz", ((unsigned int) speed/1000), ++ ((unsigned int) (speed%1000)/100)); ++ } ++ } ++ ++ return; ++} ++ + static void print_duration(unsigned long duration) + { + unsigned long tmp; +@@ -146,12 +183,9 @@ static int get_boost_mode_x86(unsigned int cpu) + printf(_(" Supported: %s\n"), support ? _("yes") : _("no")); + printf(_(" Active: %s\n"), active ? _("yes") : _("no")); + +- if (cpupower_cpu_info.vendor == X86_VENDOR_AMD && +- cpupower_cpu_info.caps & CPUPOWER_CAP_AMD_PSTATE) { +- amd_pstate_show_perf_and_freq(cpu, no_rounding); +- } else if ((cpupower_cpu_info.vendor == X86_VENDOR_AMD && +- cpupower_cpu_info.family >= 0x10) || +- cpupower_cpu_info.vendor == X86_VENDOR_HYGON) { ++ if ((cpupower_cpu_info.vendor == X86_VENDOR_AMD && ++ cpupower_cpu_info.family >= 0x10) || ++ cpupower_cpu_info.vendor == X86_VENDOR_HYGON) { + ret = decode_pstates(cpu, b_states, pstates, &pstate_no); + if (ret) + return ret; +@@ -220,11 +254,11 @@ static int get_boost_mode(unsigned int cpu) + if (freqs) { + printf(_(" boost frequency steps: ")); + while (freqs->next) { +- print_speed(freqs->frequency, no_rounding); ++ print_speed(freqs->frequency); + printf(", "); + freqs = freqs->next; + } +- print_speed(freqs->frequency, no_rounding); ++ print_speed(freqs->frequency); + printf("\n"); + cpufreq_put_available_frequencies(freqs); + } +@@ -243,7 +277,7 @@ static int get_freq_kernel(unsigned int cpu, unsigned int human) + return -EINVAL; + } + if (human) { +- print_speed(freq, no_rounding); ++ print_speed(freq); + } else + printf("%lu", freq); + printf(_(" (asserted by call to kernel)\n")); +@@ -262,7 +296,7 @@ static int get_freq_hardware(unsigned int cpu, unsigned int human) + return -EINVAL; + } + if (human) { +- print_speed(freq, no_rounding); ++ print_speed(freq); + } else + printf("%lu", freq); + printf(_(" (asserted by call to hardware)\n")); +@@ -282,9 +316,9 @@ static int get_hardware_limits(unsigned int cpu, unsigned int human) + + if (human) { + printf(_(" hardware limits: ")); +- print_speed(min, no_rounding); ++ print_speed(min); + printf(" - "); +- print_speed(max, no_rounding); ++ print_speed(max); + printf("\n"); + } else { + printf("%lu %lu\n", min, max); +@@ -316,9 +350,9 @@ static int get_policy(unsigned int cpu) + return -EINVAL; + } + printf(_(" current policy: frequency should be within ")); +- print_speed(policy->min, no_rounding); ++ print_speed(policy->min); + printf(_(" and ")); +- print_speed(policy->max, no_rounding); ++ print_speed(policy->max); + + printf(".\n "); + printf(_("The governor \"%s\" may decide which speed to use\n" +@@ -402,7 +436,7 @@ static int get_freq_stats(unsigned int cpu, unsigned int human) + struct cpufreq_stats *stats = cpufreq_get_stats(cpu, &total_time); + while (stats) { + if (human) { +- print_speed(stats->frequency, no_rounding); ++ print_speed(stats->frequency); + printf(":%.2f%%", + (100.0 * stats->time_in_state) / total_time); + } else +@@ -452,11 +486,11 @@ static void debug_output_one(unsigned int cpu) + if (freqs) { + printf(_(" available frequency steps: ")); + while (freqs->next) { +- print_speed(freqs->frequency, no_rounding); ++ print_speed(freqs->frequency); + printf(", "); + freqs = freqs->next; + } +- print_speed(freqs->frequency, no_rounding); ++ print_speed(freqs->frequency); + printf("\n"); + cpufreq_put_available_frequencies(freqs); + } +diff --git a/tools/power/cpupower/utils/helpers/amd.c b/tools/power/cpupower/utils/helpers/amd.c +index fa38d3da42ce..97f2c857048e 100644 +--- a/tools/power/cpupower/utils/helpers/amd.c ++++ b/tools/power/cpupower/utils/helpers/amd.c +@@ -8,9 +8,7 @@ + #include <pci/pci.h> + + #include "helpers/helpers.h" +-#include "cpufreq.h" + +-/* ACPI P-States Helper Functions for AMD Processors ***************/ + #define MSR_AMD_PSTATE_STATUS 0xc0010063 + #define MSR_AMD_PSTATE 0xc0010064 + #define MSR_AMD_PSTATE_LIMIT 0xc0010061 +@@ -148,89 +146,4 @@ int amd_pci_get_num_boost_states(int *active, int *states) + pci_cleanup(pci_acc); + return 0; + } +- +-/* ACPI P-States Helper Functions for AMD Processors ***************/ +- +-/* AMD P-States Helper Functions ***************/ +-enum amd_pstate_value { +- AMD_PSTATE_HIGHEST_PERF, +- AMD_PSTATE_NOMINAL_PERF, +- AMD_PSTATE_LOWEST_NONLINEAR_PERF, +- AMD_PSTATE_LOWEST_PERF, +- AMD_PSTATE_MAX_FREQ, +- AMD_PSTATE_NOMINAL_FREQ, +- AMD_PSTATE_LOWEST_NONLINEAR_FREQ, +- MAX_AMD_PSTATE_VALUE_READ_FILES +-}; +- +-static const char *amd_pstate_value_files[MAX_AMD_PSTATE_VALUE_READ_FILES] = { +- [AMD_PSTATE_HIGHEST_PERF] = "amd_pstate_highest_perf", +- [AMD_PSTATE_NOMINAL_PERF] = "amd_pstate_nominal_perf", +- [AMD_PSTATE_LOWEST_NONLINEAR_PERF] = "amd_pstate_lowest_nonlinear_perf", +- [AMD_PSTATE_LOWEST_PERF] = "amd_pstate_lowest_perf", +- [AMD_PSTATE_MAX_FREQ] = "amd_pstate_max_freq", +- [AMD_PSTATE_NOMINAL_FREQ] = "amd_pstate_nominal_freq", +- [AMD_PSTATE_LOWEST_NONLINEAR_FREQ] = "amd_pstate_lowest_nonlinear_freq", +-}; +- +-static unsigned long amd_pstate_get_data(unsigned int cpu, +- enum amd_pstate_value value) +-{ +- return cpufreq_get_sysfs_value_from_table(cpu, +- amd_pstate_value_files, +- value, +- MAX_AMD_PSTATE_VALUE_READ_FILES); +-} +- +-void amd_pstate_boost_init(unsigned int cpu, int *support, int *active) +-{ +- unsigned long highest_perf, nominal_perf, cpuinfo_min, +- cpuinfo_max, amd_pstate_max; +- +- highest_perf = amd_pstate_get_data(cpu, AMD_PSTATE_HIGHEST_PERF); +- nominal_perf = amd_pstate_get_data(cpu, AMD_PSTATE_NOMINAL_PERF); +- +- *support = highest_perf > nominal_perf ? 1 : 0; +- if (!(*support)) +- return; +- +- cpufreq_get_hardware_limits(cpu, &cpuinfo_min, &cpuinfo_max); +- amd_pstate_max = amd_pstate_get_data(cpu, AMD_PSTATE_MAX_FREQ); +- +- *active = cpuinfo_max == amd_pstate_max ? 1 : 0; +-} +- +-void amd_pstate_show_perf_and_freq(unsigned int cpu, int no_rounding) +-{ +- unsigned long cpuinfo_max, cpuinfo_min; +- +- cpufreq_get_hardware_limits(cpu, &cpuinfo_min, &cpuinfo_max); +- +- printf(_(" AMD PSTATE Highest Performance: %lu. Maximum Frequency: "), +- amd_pstate_get_data(cpu, AMD_PSTATE_HIGHEST_PERF)); +- /* If boost isn't active, the cpuinfo_max doesn't indicate real max +- * frequency. So we read it back from amd-pstate sysfs entry. +- */ +- print_speed(amd_pstate_get_data(cpu, AMD_PSTATE_MAX_FREQ), no_rounding); +- printf(".\n"); +- +- printf(_(" AMD PSTATE Nominal Performance: %lu. Nominal Frequency: "), +- amd_pstate_get_data(cpu, AMD_PSTATE_NOMINAL_PERF)); +- print_speed(amd_pstate_get_data(cpu, AMD_PSTATE_NOMINAL_FREQ), +- no_rounding); +- printf(".\n"); +- +- printf(_(" AMD PSTATE Lowest Non-linear Performance: %lu. Lowest Non-linear Frequency: "), +- amd_pstate_get_data(cpu, AMD_PSTATE_LOWEST_NONLINEAR_PERF)); +- print_speed(amd_pstate_get_data(cpu, AMD_PSTATE_LOWEST_NONLINEAR_FREQ), +- no_rounding); +- printf(".\n"); +- +- printf(_(" AMD PSTATE Lowest Performance: %lu. Lowest Frequency: "), +- amd_pstate_get_data(cpu, AMD_PSTATE_LOWEST_PERF)); +- print_speed(cpuinfo_min, no_rounding); +- printf(".\n"); +-} +- +-/* AMD P-States Helper Functions ***************/ + #endif /* defined(__i386__) || defined(__x86_64__) */ +diff --git a/tools/power/cpupower/utils/helpers/cpuid.c b/tools/power/cpupower/utils/helpers/cpuid.c +index 2a6dc104e76b..72eb43593180 100644 +--- a/tools/power/cpupower/utils/helpers/cpuid.c ++++ b/tools/power/cpupower/utils/helpers/cpuid.c +@@ -149,19 +149,6 @@ int get_cpu_info(struct cpupower_cpu_info *cpu_info) + if (ext_cpuid_level >= 0x80000008 && + cpuid_ebx(0x80000008) & (1 << 4)) + cpu_info->caps |= CPUPOWER_CAP_AMD_RDPRU; +- +- if (cpupower_amd_pstate_enabled()) { +- cpu_info->caps |= CPUPOWER_CAP_AMD_PSTATE; +- +- /* +- * If AMD P-state is enabled, the firmware will treat +- * AMD P-state function as high priority. +- */ +- cpu_info->caps &= ~CPUPOWER_CAP_AMD_CPB; +- cpu_info->caps &= ~CPUPOWER_CAP_AMD_CPB_MSR; +- cpu_info->caps &= ~CPUPOWER_CAP_AMD_HW_PSTATE; +- cpu_info->caps &= ~CPUPOWER_CAP_AMD_PSTATEDEF; +- } + } + + if (cpu_info->vendor == X86_VENDOR_INTEL) { +diff --git a/tools/power/cpupower/utils/helpers/helpers.h b/tools/power/cpupower/utils/helpers/helpers.h +index 5f6862502dbf..33ffacee7fcb 100644 +--- a/tools/power/cpupower/utils/helpers/helpers.h ++++ b/tools/power/cpupower/utils/helpers/helpers.h +@@ -11,7 +11,6 @@ + + #include <libintl.h> + #include <locale.h> +-#include <stdbool.h> + + #include "helpers/bitmask.h" + #include <cpupower.h> +@@ -74,7 +73,6 @@ enum cpupower_cpu_vendor {X86_VENDOR_UNKNOWN = 0, X86_VENDOR_INTEL, + #define CPUPOWER_CAP_AMD_HW_PSTATE 0x00000100 + #define CPUPOWER_CAP_AMD_PSTATEDEF 0x00000200 + #define CPUPOWER_CAP_AMD_CPB_MSR 0x00000400 +-#define CPUPOWER_CAP_AMD_PSTATE 0x00000800 + + #define CPUPOWER_AMD_CPBDIS 0x02000000 + +@@ -137,16 +135,6 @@ extern int decode_pstates(unsigned int cpu, int boost_states, + + extern int cpufreq_has_boost_support(unsigned int cpu, int *support, + int *active, int * states); +- +-/* AMD P-States stuff **************************/ +-extern bool cpupower_amd_pstate_enabled(void); +-extern void amd_pstate_boost_init(unsigned int cpu, +- int *support, int *active); +-extern void amd_pstate_show_perf_and_freq(unsigned int cpu, +- int no_rounding); +- +-/* AMD P-States stuff **************************/ +- + /* + * CPUID functions returning a single datum + */ +@@ -179,15 +167,6 @@ static inline int cpufreq_has_boost_support(unsigned int cpu, int *support, + int *active, int * states) + { return -1; } + +-static inline bool cpupower_amd_pstate_enabled(void) +-{ return false; } +-static void amd_pstate_boost_init(unsigned int cpu, +- int *support, int *active) +-{ return; } +-static inline void amd_pstate_show_perf_and_freq(unsigned int cpu, +- int no_rounding) +-{ return; } +- + /* cpuid and cpuinfo helpers **************************/ + + static inline unsigned int cpuid_eax(unsigned int op) { return 0; }; +@@ -205,6 +184,5 @@ extern struct bitmask *offline_cpus; + void get_cpustate(void); + void print_online_cpus(void); + void print_offline_cpus(void); +-void print_speed(unsigned long speed, int no_rounding); + + #endif /* __CPUPOWERUTILS_HELPERS__ */ +diff --git a/tools/power/cpupower/utils/helpers/misc.c b/tools/power/cpupower/utils/helpers/misc.c +index d693c96cd09c..fc6e34511721 100644 +--- a/tools/power/cpupower/utils/helpers/misc.c ++++ b/tools/power/cpupower/utils/helpers/misc.c +@@ -3,11 +3,9 @@ + #include <stdio.h> + #include <errno.h> + #include <stdlib.h> +-#include <string.h> + + #include "helpers/helpers.h" + #include "helpers/sysfs.h" +-#include "cpufreq.h" + + #if defined(__i386__) || defined(__x86_64__) + +@@ -41,8 +39,6 @@ int cpufreq_has_boost_support(unsigned int cpu, int *support, int *active, + if (ret) + return ret; + } +- } else if (cpupower_cpu_info.caps & CPUPOWER_CAP_AMD_PSTATE) { +- amd_pstate_boost_init(cpu, support, active); + } else if (cpupower_cpu_info.caps & CPUPOWER_CAP_INTEL_IDA) + *support = *active = 1; + return 0; +@@ -87,22 +83,6 @@ int cpupower_intel_set_perf_bias(unsigned int cpu, unsigned int val) + return 0; + } + +-bool cpupower_amd_pstate_enabled(void) +-{ +- char *driver = cpufreq_get_driver(0); +- bool ret = false; +- +- if (!driver) +- return ret; +- +- if (!strcmp(driver, "amd-pstate")) +- ret = true; +- +- cpufreq_put_driver(driver); +- +- return ret; +-} +- + #endif /* #if defined(__i386__) || defined(__x86_64__) */ + + /* get_cpustate +@@ -164,45 +144,3 @@ void print_offline_cpus(void) + printf(_("cpupower set operation was not performed on them\n")); + } + } +- +-/* +- * print_speed +- * +- * Print the exact CPU frequency with appropriate unit +- */ +-void print_speed(unsigned long speed, int no_rounding) +-{ +- unsigned long tmp; +- +- if (no_rounding) { +- if (speed > 1000000) +- printf("%u.%06u GHz", ((unsigned int) speed/1000000), +- ((unsigned int) speed%1000000)); +- else if (speed > 1000) +- printf("%u.%03u MHz", ((unsigned int) speed/1000), +- (unsigned int) (speed%1000)); +- else +- printf("%lu kHz", speed); +- } else { +- if (speed > 1000000) { +- tmp = speed%10000; +- if (tmp >= 5000) +- speed += 10000; +- printf("%u.%02u GHz", ((unsigned int) speed/1000000), +- ((unsigned int) (speed%1000000)/10000)); +- } else if (speed > 100000) { +- tmp = speed%1000; +- if (tmp >= 500) +- speed += 1000; +- printf("%u MHz", ((unsigned int) speed/1000)); +- } else if (speed > 1000) { +- tmp = speed%100; +- if (tmp >= 50) +- speed += 100; +- printf("%u.%01u MHz", ((unsigned int) speed/1000), +- ((unsigned int) (speed%1000)/100)); +- } +- } +- +- return; +-} +-- +2.34.0 + diff --git a/squashed-amd-pstate-v4-for-5.15.patch b/squashed-amd-pstate-v4-for-5.15.patch new file mode 100644 index 000000000000..b03d497e5e50 --- /dev/null +++ b/squashed-amd-pstate-v4-for-5.15.patch @@ -0,0 +1,2403 @@ +From 17d7d23d10f8c1633c04e42162a1acbcdaa05067 Mon Sep 17 00:00:00 2001 +From: Scott B <arglebargle@arglebargle.dev> +Date: Fri, 19 Nov 2021 11:22:15 -0800 +Subject: [PATCH] squashed amd-pstate v4 for 5.15 + +Squashed commit of the following: + +commit d6d2439bbbc3632c507eb598d171ba12ec7d76a3 +Author: Jinzhou Su <Jinzhou.Su@amd.com> +Date: Fri Nov 12 11:00:42 2021 +0800 + + ACPI: CPPC: Add setting MIN/MAX perf in cppc_set_perf + + Except for desire perf, sometimes need to update the min/max perf + in CPC table. So reuse the code in cppc_set_perf. + + Signed-off-by: Jinzhou Su <Jinzhou.Su@amd.com> + +commit 06c2d215a2a6c142528c57184f1a0be238b7672b +Author: Huang Rui <ray.huang@amd.com> +Date: Thu Jun 10 23:40:18 2021 +0800 + + Documentation: amd-pstate: add amd-pstate driver introduction + + Introduce the amd-pstate driver design and implementation. + + Signed-off-by: Huang Rui <ray.huang@amd.com> + +commit fd04adb59f5652e60a9d9e8cc60dcf6aa88fdf9d +Author: Huang Rui <ray.huang@amd.com> +Date: Thu Jun 10 23:48:03 2021 +0800 + + cpupower: print amd-pstate information on cpupower + + amd-pstate kernel module is using the fine grain frequency instead of + acpi hardware pstate. So the performance and frequency values should be + printed in frequency-info. + + Signed-off-by: Huang Rui <ray.huang@amd.com> + +commit 9755c1b747b5b78acc5c5627d63b61626a43ec99 +Author: Huang Rui <ray.huang@amd.com> +Date: Sun Sep 26 16:26:06 2021 +0800 + + cpupower: move print_speed function into misc helper + + The print_speed can be as a common function, and expose it into misc + helper header. Then it can be used on other helper files as well. + + Signed-off-by: Huang Rui <ray.huang@amd.com> + +commit 42d694d0580ef222c7adce6244d2512ddc60cf51 +Author: Huang Rui <ray.huang@amd.com> +Date: Sun Jun 20 17:35:45 2021 +0800 + + cpupower: enable boost state support for amd-pstate module + + The legacy ACPI hardware P-States function has 3 P-States on ACPI table, + the CPU frequency only can be switched between the 3 P-States. While the + processor supports the boost state, it will have another boost state + that the frequency can be higher than P0 state, and the state can be + decoded by the function of decode_pstates() and read by + amd_pci_get_num_boost_states(). + + However, the new AMD P-States function is different than legacy ACPI + hardware P-State on AMD processors. That has a finer grain frequency + range between the highest and lowest frequency. And boost frequency is + actually the frequency which is mapped on highest performance ratio. The + similiar previous P0 frequency is mapped on nominal performance ratio. + If the highest performance on the processor is higher than nominal + performance, then we think the current processor supports the boost + state. And it uses amd_pstate_boost_init() to initialize boost for AMD + P-States function. + + Signed-off-by: Huang Rui <ray.huang@amd.com> + +commit 5b1e7b429abff86fb58e37cc603681fe07577676 +Author: Huang Rui <ray.huang@amd.com> +Date: Sun Jun 20 17:07:25 2021 +0800 + + cpupower: add amd-pstate sysfs definition and access helper + + Introduce the marco definitions and access helper function for + amd-pstate sysfs interfaces such as each performance goals and frequency + levels in amd helper file. They will be used to read the sysfs attribute + from amd-pstate cpufreq driver for cpupower utilities. + + Signed-off-by: Huang Rui <ray.huang@amd.com> + +commit 5928c99135752de8263cbc7b35320cc3bc58c6bc +Author: Huang Rui <ray.huang@amd.com> +Date: Sun Sep 26 10:55:53 2021 +0800 + + cpupower: add the function to get the sysfs value from specific table + + Expose the helper into cpufreq header, then cpufreq driver can use this + function to get the sysfs value if it has any specific sysfs interfaces. + + Signed-off-by: Huang Rui <ray.huang@amd.com> + +commit 4a7abac990d6dce5ef98d781498204ed192c57c9 +Author: Huang Rui <ray.huang@amd.com> +Date: Sun Jun 27 22:40:14 2021 +0800 + + cpupower: initial AMD P-state capability + + If kernel starts the amd-pstate module, the cpupower will initial the + capability flag as CPUPOWER_CAP_AMD_PSTATE. And once amd-pstate + capability is set, it won't need to set legacy ACPI relative + capabilities anymore. + + Signed-off-by: Huang Rui <ray.huang@amd.com> + +commit 05bc63c7be388bf939f8b97684ed1c242e1e7c01 +Author: Huang Rui <ray.huang@amd.com> +Date: Sun Jun 27 22:25:39 2021 +0800 + + cpupower: add the function to check amd-pstate enabled + + The processor with amd-pstate function also supports legacy ACPI + hardware P-States feature as well. Once driver sets amd-pstate eanbled, + the processor will respond the finer grain amd-pstate feature instead of + legacy ACPI P-States. So it introduces the cpupower_amd_pstate_enabled() + to check whether the current kernel enables amd-pstate or acpi-cpufreq + module. + + Signed-off-by: Huang Rui <ray.huang@amd.com> + +commit dadbd9b137f1c985649b4319b045144be07701a9 +Author: Huang Rui <ray.huang@amd.com> +Date: Mon Jun 14 22:52:01 2021 +0800 + + cpupower: add AMD P-state capability flag + + Add AMD P-state capability flag in cpupower to indicate AMD new P-state + kernel module support on Ryzen processors. + + Signed-off-by: Huang Rui <ray.huang@amd.com> + +commit fbb4ae2347d5182d1fc98b2530b57992243e3f8a +Author: Huang Rui <ray.huang@amd.com> +Date: Sun Jun 20 15:01:08 2021 +0800 + + cpufreq: amd: add amd-pstate performance attributes + + Introduce sysfs attributes to get the different level amd-pstate + performances. + + Signed-off-by: Huang Rui <ray.huang@amd.com> + +commit c1089d257734a7671893ecc7e7316a2a1b5c0677 +Author: Huang Rui <ray.huang@amd.com> +Date: Sun Jun 20 13:26:01 2021 +0800 + + cpufreq: amd: add amd-pstate frequencies attributes + + Introduce sysfs attributes to get the different level processor + frequencies. + + Signed-off-by: Huang Rui <ray.huang@amd.com> + +commit 68e484a961047057bfe6ebca8b16fe3663c8c532 +Author: Huang Rui <ray.huang@amd.com> +Date: Thu Jun 10 23:13:00 2021 +0800 + + cpufreq: amd: add boost mode support for amd-pstate + + If the sbios supports the boost mode of amd-pstate, let's switch to + boost enabled by default. + + Signed-off-by: Huang Rui <ray.huang@amd.com> + +commit e4ddb12f2df38d8f05d3a32b455658d403e779a0 +Author: Huang Rui <ray.huang@amd.com> +Date: Thu Jun 10 20:24:00 2021 +0800 + + cpufreq: amd: add trace for amd-pstate module + + Add trace event to monitor the performance value changes which is + controlled by cpu governors. + + Signed-off-by: Huang Rui <ray.huang@amd.com> + +commit 4896e1e8044d887eebfccf51b4a3621bda70958a +Author: Huang Rui <ray.huang@amd.com> +Date: Mon Aug 9 19:06:51 2021 +0800 + + cpufreq: amd: add acpi cppc function as the backend for legacy processors + + In some old Zen based processors, they are using the shared memory that + exposed from ACPI SBIOS. + + Signed-off-by: Jinzhou Su <Jinzhou.Su@amd.com> + Signed-off-by: Huang Rui <ray.huang@amd.com> + +commit a04f0d65d21470d60b011e4d63c4720b61f79c57 +Author: Huang Rui <ray.huang@amd.com> +Date: Fri Aug 13 18:43:47 2021 +0800 + + cpufreq: amd: add fast switch function for amd-pstate + + Introduce the fast switch function for amd-pstate on the AMD processors + which support the full MSR register control. It's able to decrease the + lattency on interrupt context. + + Signed-off-by: Huang Rui <ray.huang@amd.com> + +commit c8b60cdcdeed47fa1d78df95b4bb1f9ebf9ca62c +Author: Huang Rui <ray.huang@amd.com> +Date: Thu Jun 10 18:04:45 2021 +0800 + + cpufreq: amd: introduce a new amd pstate driver to support future processors + + amd-pstate is the AMD CPU performance scaling driver that introduces a + new CPU frequency control mechanism on AMD Zen based CPU series in Linux + kernel. The new mechanism is based on Collaborative processor + performance control (CPPC) which is finer grain frequency management + than legacy ACPI hardware P-States. Current AMD CPU platforms are using + the ACPI P-states driver to manage CPU frequency and clocks with + switching only in 3 P-states. AMD P-States is to replace the ACPI + P-states controls, allows a flexible, low-latency interface for the + Linux kernel to directly communicate the performance hints to hardware. + + "amd-pstate" leverages the Linux kernel governors such as *schedutil*, + *ondemand*, etc. to manage the performance hints which are provided by CPPC + hardware functionality. The first version for amd-pstate is to support one + of the Zen3 processors, and we will support more in future after we verify + the hardware and SBIOS functionalities. + + There are two types of hardware implementations for amd-pstate: one is full + MSR support and another is shared memory support. It can use + X86_FEATURE_CPPC feature flag to distinguish the different types. + + Using the new AMD P-States method + kernel governors (*schedutil*, + *ondemand*, ...) to manage the frequency update is the most appropriate + bridge between AMD Zen based hardware processor and Linux kernel, the + processor is able to ajust to the most efficiency frequency according to + the kernel scheduler loading. + + Performance Per Watt (PPW) Caculation: + + The PPW caculation is referred by below paper: + https://software.intel.com/content/dam/develop/external/us/en/documents/performance-per-what-paper.pdf + + Below formula is referred from below spec to measure the PPW: + + (F / t) / P = F * t / (t * E) = F / E, + + "F" is the number of frames per second. + "P" is power measurd in watts. + "E" is energy measured in joules. + + We use the RAPL interface with "perf" tool to get the energy data of the + package power. + + The data comparsions between amd-pstate and acpi-freq module are tested on + AMD Cezanne processor: + + 1) TBench CPU benchmark: + + +---------------------------------------------------------------------+ + | | + | TBench (Performance Per Watt) | + | Higher is better | + +-------------------+------------------------+------------------------+ + | | Performance Per Watt | Performance Per Watt | + | Kernel Module | (Schedutil) | (Ondemand) | + | | Unit: MB / (s * J) | Unit: MB / (s * J) | + +-------------------+------------------------+------------------------+ + | | | | + | acpi-cpufreq | 3.022 | 2.969 | + | | | | + +-------------------+------------------------+------------------------+ + | | | | + | amd-pstate | 3.131 | 3.284 | + | | | | + +-------------------+------------------------+------------------------+ + + 2) Gitsource CPU benchmark: + + +---------------------------------------------------------------------+ + | | + | Gitsource (Performance Per Watt) | + | Higher is better | + +-------------------+------------------------+------------------------+ + | | Performance Per Watt | Performance Per Watt | + | Kernel Module | (Schedutil) | (Ondemand) | + | | Unit: 1 / (s * J) | Unit: 1 / (s * J) | + +-------------------+------------------------+------------------------+ + | | | | + | acpi-cpufreq | 3.42172E-07 | 2.74508E-07 | + | | | | + +-------------------+------------------------+------------------------+ + | | | | + | amd-pstate | 4.09141E-07 | 3.47610E-07 | + | | | | + +-------------------+------------------------+------------------------+ + + 3) Speedometer 2.0 CPU benchmark: + + +---------------------------------------------------------------------+ + | | + | Speedometer 2.0 (Performance Per Watt) | + | Higher is better | + +-------------------+------------------------+------------------------+ + | | Performance Per Watt | Performance Per Watt | + | Kernel Module | (Schedutil) | (Ondemand) | + | | Unit: 1 / (s * J) | Unit: 1 / (s * J) | + +-------------------+------------------------+------------------------+ + | | | | + | acpi-cpufreq | 0.116111767 | 0.110321664 | + | | | | + +-------------------+------------------------+------------------------+ + | | | | + | amd-pstate | 0.115825281 | 0.122024299 | + | | | | + +-------------------+------------------------+------------------------+ + + According to above average data, we can see this solution has shown better + performance per watt scaling on mobile CPU benchmarks in most of cases. + + Signed-off-by: Huang Rui <ray.huang@amd.com> + +commit fad952704f167c9ae8c4254df1f870533850a601 +Author: Jinzhou Su <Jinzhou.Su@amd.com> +Date: Mon Aug 9 19:04:17 2021 +0800 + + ACPI: CPPC: add cppc enable register function + + Add a new function to enable CPPC feature. This function + will write Continuous Performance Control package + EnableRegister field on the processor. + + CPPC EnableRegister register described in section 8.4.7.1 of ACPI 6.4: + This element is optional. If supported, contains a resource descriptor + with a single Register() descriptor that describes a register to which + OSPM writes a One to enable CPPC on this processor. Before this register + is set, the processor will be controlled by legacy mechanisms (ACPI + Pstates, firmware, etc.). + + This register will be used for AMD processors to enable amd-pstate + function instead of legacy ACPI P-States. + + Signed-off-by: Jinzhou Su <Jinzhou.Su@amd.com> + Signed-off-by: Huang Rui <ray.huang@amd.com> + +commit d995363a09adc216d9f6d30fdaaeeba1f3abbd30 +Author: Mario Limonciello <mario.limonciello@amd.com> +Date: Wed Jul 28 05:48:34 2021 +0800 + + ACPI: CPPC: Check present CPUs for determining _CPC is valid + + As this is a static check, it should be based upon what is currently + present on the system. This makes probeing more deterministic. + + While local APIC flags field (lapic_flags) of cpu core in MADT table is + 0, then the cpu core won't be enabled. In this case, _CPC won't be found + in this core, and return back to _CPC invalid with walking through + possible cpus (include disable cpus). This is not expected, so switch to + check present CPUs instead. + + Reported-by: Jinzhou Su <Jinzhou.Su@amd.com> + Signed-off-by: Mario Limonciello <mario.limonciello@amd.com> + Signed-off-by: Huang Rui <ray.huang@amd.com> + +commit 664330769a48c0699102748f25ea22a42e0ab73a +Author: Steven Noonan <steven@valvesoftware.com> +Date: Wed Oct 27 01:11:54 2021 -0700 + + ACPI: CPPC: implement support for SystemIO registers + + According to the ACPI v6.2 (and later) specification, SystemIO can be + used for _CPC registers. This teaches cppc_acpi how to handle such + registers. + + This patch was tested using the amd_pstate driver on my Zephyrus G15 + (model GA503QS) using the current version 410 BIOS, which uses + a SystemIO register for the HighestPerformance element in _CPC. + + Signed-off-by: Steven Noonan <steven@valvesoftware.com> + Signed-off-by: Huang Rui <ray.huang@amd.com> + +commit 6d701c90560845ebcb245ba9d8b4bc40956f5bb5 +Author: Huang Rui <ray.huang@amd.com> +Date: Mon Jan 25 15:50:24 2021 +0800 + + x86/msr: add AMD CPPC MSR definitions + + AMD CPPC (Collaborative Processor Performance Control) function uses MSR + registers to manage the performance hints. So add the MSR register macro + here. + + Signed-off-by: Huang Rui <ray.huang@amd.com> + +commit a52e705f391ac57a50a0992e98f4d87a2e68bdac +Author: Huang Rui <ray.huang@amd.com> +Date: Thu Jan 28 10:50:26 2021 +0800 + + x86/cpufreatures: add AMD Collaborative Processor Performance Control feature flag + + Add Collaborative Processor Performance Control feature flag for AMD + processors. + + This feature flag will be used on the following amd-pstate driver. The + amd-pstate driver has two approaches to implement the frequency control + behavior. That depends on the CPU hardware implementation. One is "Full + MSR Support" and another is "Shared Memory Support". The feature flag + indicates the current processors with "Full MSR Support". + + Acked-by: Borislav Petkov <bp@suse.de> + Signed-off-by: Huang Rui <ray.huang@amd.com> +--- + Documentation/admin-guide/pm/amd-pstate.rst | 373 ++++++++++ + .../admin-guide/pm/working-state.rst | 1 + + arch/x86/include/asm/cpufeatures.h | 1 + + arch/x86/include/asm/msr-index.h | 17 + + drivers/acpi/cppc_acpi.c | 114 ++- + drivers/cpufreq/Kconfig.x86 | 17 + + drivers/cpufreq/Makefile | 5 + + drivers/cpufreq/amd-pstate-trace.c | 2 + + drivers/cpufreq/amd-pstate-trace.h | 91 +++ + drivers/cpufreq/amd-pstate.c | 662 ++++++++++++++++++ + include/acpi/cppc_acpi.h | 5 + + tools/power/cpupower/lib/cpufreq.c | 21 +- + tools/power/cpupower/lib/cpufreq.h | 12 + + tools/power/cpupower/utils/cpufreq-info.c | 68 +- + tools/power/cpupower/utils/helpers/amd.c | 87 +++ + tools/power/cpupower/utils/helpers/cpuid.c | 13 + + tools/power/cpupower/utils/helpers/helpers.h | 22 + + tools/power/cpupower/utils/helpers/misc.c | 62 ++ + 18 files changed, 1503 insertions(+), 70 deletions(-) + create mode 100644 Documentation/admin-guide/pm/amd-pstate.rst + create mode 100644 drivers/cpufreq/amd-pstate-trace.c + create mode 100644 drivers/cpufreq/amd-pstate-trace.h + create mode 100644 drivers/cpufreq/amd-pstate.c + +diff --git a/Documentation/admin-guide/pm/amd-pstate.rst b/Documentation/admin-guide/pm/amd-pstate.rst +new file mode 100644 +index 000000000000..24a88476fc69 +--- /dev/null ++++ b/Documentation/admin-guide/pm/amd-pstate.rst +@@ -0,0 +1,373 @@ ++.. SPDX-License-Identifier: GPL-2.0 ++.. include:: <isonum.txt> ++ ++=============================================== ++``amd-pstate`` CPU Performance Scaling Driver ++=============================================== ++ ++:Copyright: |copy| 2021 Advanced Micro Devices, Inc. ++ ++:Author: Huang Rui <ray.huang@amd.com> ++ ++ ++Introduction ++=================== ++ ++``amd-pstate`` is the AMD CPU performance scaling driver that introduces a ++new CPU frequency control mechanism on modern AMD APU and CPU series in ++Linux kernel. The new mechanism is based on Collaborative Processor ++Performance Control (CPPC) which provides finer grain frequency management ++than legacy ACPI hardware P-States. Current AMD CPU/APU platforms are using ++the ACPI P-states driver to manage CPU frequency and clocks with switching ++only in 3 P-states. CPPC replaces the ACPI P-states controls, allows a ++flexible, low-latency interface for the Linux kernel to directly ++communicate the performance hints to hardware. ++ ++``amd-pstate`` leverages the Linux kernel governors such as ``schedutil``, ++``ondemand``, etc. to manage the performance hints which are provided by ++CPPC hardware functionality that internally follows the hardware ++specification (for details refer to AMD64 Architecture Programmer's Manual ++Volume 2: System Programming [1]_). Currently ``amd-pstate`` supports basic ++frequency control function according to kernel governors on some of the ++Zen2 and Zen3 processors, and we will implement more AMD specific functions ++in future after we verify them on the hardware and SBIOS. ++ ++ ++AMD CPPC Overview ++======================= ++ ++Collaborative Processor Performance Control (CPPC) interface enumerates a ++continuous, abstract, and unit-less performance value in a scale that is ++not tied to a specific performance state / frequency. This is an ACPI ++standard [2]_ which software can specify application performance goals and ++hints as a relative target to the infrastructure limits. AMD processors ++provides the low latency register model (MSR) instead of AML code ++interpreter for performance adjustments. ``amd-pstate`` will initialize a ++``struct cpufreq_driver`` instance ``amd_pstate_driver`` with the callbacks ++to manage each performance update behavior. :: ++ ++ Highest Perf ------>+-----------------------+ +-----------------------+ ++ | | | | ++ | | | | ++ | | Max Perf ---->| | ++ | | | | ++ | | | | ++ Nominal Perf ------>+-----------------------+ +-----------------------+ ++ | | | | ++ | | | | ++ | | | | ++ | | | | ++ | | | | ++ | | | | ++ | | Desired Perf ---->| | ++ | | | | ++ | | | | ++ | | | | ++ | | | | ++ | | | | ++ | | | | ++ | | | | ++ | | | | ++ | | | | ++ Lowest non- | | | | ++ linear perf ------>+-----------------------+ +-----------------------+ ++ | | | | ++ | | Lowest perf ---->| | ++ | | | | ++ Lowest perf ------>+-----------------------+ +-----------------------+ ++ | | | | ++ | | | | ++ | | | | ++ 0 ------>+-----------------------+ +-----------------------+ ++ ++ AMD P-States Performance Scale ++ ++ ++.. _perf_cap: ++ ++AMD CPPC Performance Capability ++-------------------------------- ++ ++Highest Performance (RO) ++......................... ++ ++It is the absolute maximum performance an individual processor may reach, ++assuming ideal conditions. This performance level may not be sustainable ++for long durations and may only be achievable if other platform components ++are in a specific state; for example, it may require other processors be in ++an idle state. This would be equivalent to the highest frequencies ++supported by the processor. ++ ++Nominal (Guaranteed) Performance (RO) ++...................................... ++ ++It is the maximum sustained performance level of the processor, assuming ++ideal operating conditions. In absence of an external constraint (power, ++thermal, etc.) this is the performance level the processor is expected to ++be able to maintain continuously. All cores/processors are expected to be ++able to sustain their nominal performance state simultaneously. ++ ++Lowest non-linear Performance (RO) ++................................... ++ ++It is the lowest performance level at which nonlinear power savings are ++achieved, for example, due to the combined effects of voltage and frequency ++scaling. Above this threshold, lower performance levels should be generally ++more energy efficient than higher performance levels. This register ++effectively conveys the most efficient performance level to ``amd-pstate``. ++ ++Lowest Performance (RO) ++........................ ++ ++It is the absolute lowest performance level of the processor. Selecting a ++performance level lower than the lowest nonlinear performance level may ++cause an efficiency penalty but should reduce the instantaneous power ++consumption of the processor. ++ ++AMD CPPC Performance Control ++------------------------------ ++ ++``amd-pstate`` passes performance goals through these registers. The ++register drives the behavior of the desired performance target. ++ ++Minimum requested performance (RW) ++................................... ++ ++``amd-pstate`` specifies the minimum allowed performance level. ++ ++Maximum requested performance (RW) ++................................... ++ ++``amd-pstate`` specifies a limit the maximum performance that is expected ++to be supplied by the hardware. ++ ++Desired performance target (RW) ++................................... ++ ++``amd-pstate`` specifies a desired target in the CPPC performance scale as ++a relative number. This can be expressed as percentage of nominal ++performance (infrastructure max). Below the nominal sustained performance ++level, desired performance expresses the average performance level of the ++processor subject to hardware. Above the nominal performance level, ++processor must provide at least nominal performance requested and go higher ++if current operating conditions allow. ++ ++Energy Performance Preference (EPP) (RW) ++......................................... ++ ++Provides a hint to the hardware if software wants to bias toward performance ++(0x0) or energy efficiency (0xff). ++ ++ ++Key Governors Support ++======================= ++ ++``amd-pstate`` can be used with all the (generic) scaling governors listed ++by the ``scaling_available_governors`` policy attribute in ``sysfs``. Then, ++it is responsible for the configuration of policy objects corresponding to ++CPUs and provides the ``CPUFreq`` core (and the scaling governors attached ++to the policy objects) with accurate information on the maximum and minimum ++operating frequencies supported by the hardware. Users can check the ++``scaling_cur_freq`` information comes from the ``CPUFreq`` core. ++ ++``amd-pstate`` mainly supports ``schedutil`` and ``ondemand`` for dynamic ++frequency control. It is to fine tune the processor configuration on ++``amd-pstate`` to the ``schedutil`` with CPU CFS scheduler. ``amd-pstate`` ++registers adjust_perf callback to implement the CPPC similar performance ++update behavior. It is initialized by ``sugov_start`` and then populate the ++CPU's update_util_data pointer to assign ``sugov_update_single_perf`` as ++the utilization update callback function in CPU scheduler. CPU scheduler ++will call ``cpufreq_update_util`` and assign the target performance ++according to the ``struct sugov_cpu`` that utilization update belongs to. ++Then ``amd-pstate`` updates the desired performance according to the CPU ++scheduler assigned. ++ ++ ++Processor Support ++======================= ++ ++The ``amd-pstate`` initialization will fail if the _CPC in ACPI SBIOS is ++not existed at the detected processor, and it uses ``acpi_cpc_valid`` to ++check the _CPC existence. All Zen based processors support legacy ACPI ++hardware P-States function, so while the ``amd-pstate`` fails to be ++initialized, the kernel will fall back to initialize ``acpi-cpufreq`` ++driver. ++ ++There are two types of hardware implementations for ``amd-pstate``: one is ++`Full MSR Support <perf_cap_>`_ and another is `Shared Memory Support ++<perf_cap_>`_. It can use :c:macro:`X86_FEATURE_CPPC` feature flag (for ++details refer to Processor Programming Reference (PPR) for AMD Family ++19h Model 21h, Revision B0 Processors [3]_) to indicate the different ++types. ``amd-pstate`` is to register different ``amd_pstate_perf_funcs`` ++instances for different hardware implementations. ++ ++Currently, some of Zen2 and Zen3 processors support ``amd-pstate``. In the ++future, it will be supported on more and more AMD processors. ++ ++Full MSR Support ++----------------- ++ ++Some new Zen3 processors such as Cezanne provide the MSR registers directly ++while the :c:macro:`X86_FEATURE_CPPC` CPU feature flag is set. ++``amd-pstate`` can handle the MSR register to implement the fast switch ++function in ``CPUFreq`` that can shrink latency of frequency control on the ++interrupt context. ++ ++Shared Memory Support ++---------------------- ++ ++If :c:macro:`X86_FEATURE_CPPC` CPU feature flag is not set, that means the ++processor supports shared memory solution. In this case, ``amd-pstate`` ++uses the ``cppc_acpi`` helper methods to implement the callback functions ++of ``amd_pstate_perf_funcs``. ++ ++ ++AMD P-States and ACPI hardware P-States always can be supported in one ++processor. But AMD P-States has the higher priority and if it is enabled ++with :c:macro:`MSR_AMD_CPPC_ENABLE` or ``cppc_set_enable``, it will respond ++to the request from AMD P-States. ++ ++ ++User Space Interface in ``sysfs`` ++================================== ++ ++``amd-pstate`` exposes several global attributes (files) in ``sysfs`` to ++control its functionality at the system level. They located in the ++``/sys/devices/system/cpu/cpufreq/policyX/`` directory and affect all CPUs. :: ++ ++ root@hr-test1:/home/ray# ls /sys/devices/system/cpu/cpufreq/policy0/*amd* ++ /sys/devices/system/cpu/cpufreq/policy0/amd_pstate_highest_perf ++ /sys/devices/system/cpu/cpufreq/policy0/amd_pstate_lowest_nonlinear_freq ++ /sys/devices/system/cpu/cpufreq/policy0/amd_pstate_lowest_nonlinear_perf ++ /sys/devices/system/cpu/cpufreq/policy0/amd_pstate_lowest_perf ++ /sys/devices/system/cpu/cpufreq/policy0/amd_pstate_max_freq ++ /sys/devices/system/cpu/cpufreq/policy0/amd_pstate_min_freq ++ /sys/devices/system/cpu/cpufreq/policy0/amd_pstate_nominal_freq ++ /sys/devices/system/cpu/cpufreq/policy0/amd_pstate_nominal_perf ++ ++ ++``amd_pstate_highest_perf / amd_pstate_max_freq`` ++ ++Maximum CPPC performance and CPU frequency that the driver is allowed to ++set in percent of the maximum supported CPPC performance level (the highest ++performance supported in `AMD CPPC Performance Capability <perf_cap_>`_). ++This attribute is read-only. ++ ++``amd_pstate_nominal_perf / amd_pstate_nominal_freq`` ++ ++Nominal CPPC performance and CPU frequency that the driver is allowed to ++set in percent of the maximum supported CPPC performance level (Please see ++nominal performance in `AMD CPPC Performance Capability <perf_cap_>`_). ++This attribute is read-only. ++ ++``amd_pstate_lowest_nonlinear_perf / amd_pstate_lowest_nonlinear_freq`` ++ ++The lowest non-linear CPPC performance and CPU frequency that the driver is ++allowed to set in percent of the maximum supported CPPC performance level ++(Please see the lowest non-linear performance in `AMD CPPC Performance ++Capability <perf_cap_>`_). ++This attribute is read-only. ++ ++``amd_pstate_lowest_perf`` ++ ++The lowest physical CPPC performance. The minimum CPU frequency can be read ++back from ``cpuinfo`` member of ``cpufreq_policy``, so we won't expose it ++here. ++This attribute is read-only. ++ ++ ++``amd-pstate`` vs ``acpi-cpufreq`` ++====================================== ++ ++On majority of AMD platforms supported by ``acpi-cpufreq``, the ACPI tables ++provided by the platform firmware used for CPU performance scaling, but ++only provides 3 P-states on AMD processors. ++However, on modern AMD APU and CPU series, it provides the collaborative ++processor performance control according to ACPI protocol and customize this ++for AMD platforms. That is fine-grain and continuous frequency range ++instead of the legacy hardware P-states. ``amd-pstate`` is the kernel ++module which supports the new AMD P-States mechanism on most of future AMD ++platforms. The AMD P-States mechanism will be the more performance and energy ++efficiency frequency management method on AMD processors. ++ ++``cpupower`` tool support for ``amd-pstate`` ++=============================================== ++ ++``amd-pstate`` is supported on ``cpupower`` tool that can be used to dump the frequency ++information. And it is in progress to support more and more operations for new ++``amd-pstate`` module with this tool. :: ++ ++ root@hr-test1:/home/ray# cpupower frequency-info ++ analyzing CPU 0: ++ driver: amd-pstate ++ CPUs which run at the same hardware frequency: 0 ++ CPUs which need to have their frequency coordinated by software: 0 ++ maximum transition latency: 131 us ++ hardware limits: 400 MHz - 4.68 GHz ++ available cpufreq governors: ondemand conservative powersave userspace performance schedutil ++ current policy: frequency should be within 400 MHz and 4.68 GHz. ++ The governor "schedutil" may decide which speed to use ++ within this range. ++ current CPU frequency: Unable to call hardware ++ current CPU frequency: 4.02 GHz (asserted by call to kernel) ++ boost state support: ++ Supported: yes ++ Active: yes ++ AMD PSTATE Highest Performance: 166. Maximum Frequency: 4.68 GHz. ++ AMD PSTATE Nominal Performance: 117. Nominal Frequency: 3.30 GHz. ++ AMD PSTATE Lowest Non-linear Performance: 39. Lowest Non-linear Frequency: 1.10 GHz. ++ AMD PSTATE Lowest Performance: 15. Lowest Frequency: 400 MHz. ++ ++ ++Diagnostics and Tuning ++======================= ++ ++Trace Events ++-------------- ++ ++There are two static trace events that can be used for ``amd-pstate`` ++diagnostics. One of them is the cpu_frequency trace event generally used ++by ``CPUFreq``, and the other one is the ``amd_pstate_perf`` trace event ++specific to ``amd-pstate``. The following sequence of shell commands can ++be used to enable them and see their output (if the kernel is generally ++configured to support event tracing). :: ++ ++ root@hr-test1:/home/ray# cd /sys/kernel/tracing/ ++ root@hr-test1:/sys/kernel/tracing# echo 1 > events/amd_cpu/enable ++ root@hr-test1:/sys/kernel/tracing# cat trace ++ # tracer: nop ++ # ++ # entries-in-buffer/entries-written: 47827/42233061 #P:2 ++ # ++ # _-----=> irqs-off ++ # / _----=> need-resched ++ # | / _---=> hardirq/softirq ++ # || / _--=> preempt-depth ++ # ||| / delay ++ # TASK-PID CPU# |||| TIMESTAMP FUNCTION ++ # | | | |||| | | ++ <idle>-0 [015] dN... 4995.979886: amd_pstate_perf: amd_min_perf=85 amd_des_perf=85 amd_max_perf=166 cpu_id=15 changed=false fast_switch=true ++ <idle>-0 [007] d.h.. 4995.979893: amd_pstate_perf: amd_min_perf=85 amd_des_perf=85 amd_max_perf=166 cpu_id=7 changed=false fast_switch=true ++ cat-2161 [000] d.... 4995.980841: amd_pstate_perf: amd_min_perf=85 amd_des_perf=85 amd_max_perf=166 cpu_id=0 changed=false fast_switch=true ++ sshd-2125 [004] d.s.. 4995.980968: amd_pstate_perf: amd_min_perf=85 amd_des_perf=85 amd_max_perf=166 cpu_id=4 changed=false fast_switch=true ++ <idle>-0 [007] d.s.. 4995.980968: amd_pstate_perf: amd_min_perf=85 amd_des_perf=85 amd_max_perf=166 cpu_id=7 changed=false fast_switch=true ++ <idle>-0 [003] d.s.. 4995.980971: amd_pstate_perf: amd_min_perf=85 amd_des_perf=85 amd_max_perf=166 cpu_id=3 changed=false fast_switch=true ++ <idle>-0 [011] d.s.. 4995.980996: amd_pstate_perf: amd_min_perf=85 amd_des_perf=85 amd_max_perf=166 cpu_id=11 changed=false fast_switch=true ++ ++The cpu_frequency trace event will be triggered either by the ``schedutil`` scaling ++governor (for the policies it is attached to), or by the ``CPUFreq`` core (for the ++policies with other scaling governors). ++ ++ ++Reference ++=========== ++ ++.. [1] AMD64 Architecture Programmer's Manual Volume 2: System Programming, ++ https://www.amd.com/system/files/TechDocs/24593.pdf ++ ++.. [2] Advanced Configuration and Power Interface Specification, ++ https://uefi.org/sites/default/files/resources/ACPI_Spec_6_4_Jan22.pdf ++ ++.. [3] Processor Programming Reference (PPR) for AMD Family 19h Model 21h, Revision B0 Processors ++ https://www.amd.com/system/files/TechDocs/55898_B1_pub_0.50.zip ++ +diff --git a/Documentation/admin-guide/pm/working-state.rst b/Documentation/admin-guide/pm/working-state.rst +index f40994c422dc..5d2757e2de65 100644 +--- a/Documentation/admin-guide/pm/working-state.rst ++++ b/Documentation/admin-guide/pm/working-state.rst +@@ -11,6 +11,7 @@ Working-State Power Management + intel_idle + cpufreq + intel_pstate ++ amd-pstate + cpufreq_drivers + intel_epb + intel-speed-select +diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h +index d0ce5cfd3ac1..dec4dab82e0c 100644 +--- a/arch/x86/include/asm/cpufeatures.h ++++ b/arch/x86/include/asm/cpufeatures.h +@@ -313,6 +313,7 @@ + #define X86_FEATURE_AMD_SSBD (13*32+24) /* "" Speculative Store Bypass Disable */ + #define X86_FEATURE_VIRT_SSBD (13*32+25) /* Virtualized Speculative Store Bypass Disable */ + #define X86_FEATURE_AMD_SSB_NO (13*32+26) /* "" Speculative Store Bypass is fixed in hardware. */ ++#define X86_FEATURE_CPPC (13*32+27) /* Collaborative Processor Performance Control */ + + /* Thermal and Power Management Leaf, CPUID level 0x00000006 (EAX), word 14 */ + #define X86_FEATURE_DTHERM (14*32+ 0) /* Digital Thermal Sensor */ +diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h +index a7c413432b33..ce42e15cf303 100644 +--- a/arch/x86/include/asm/msr-index.h ++++ b/arch/x86/include/asm/msr-index.h +@@ -486,6 +486,23 @@ + + #define MSR_AMD64_VIRT_SPEC_CTRL 0xc001011f + ++/* AMD Collaborative Processor Performance Control MSRs */ ++#define MSR_AMD_CPPC_CAP1 0xc00102b0 ++#define MSR_AMD_CPPC_ENABLE 0xc00102b1 ++#define MSR_AMD_CPPC_CAP2 0xc00102b2 ++#define MSR_AMD_CPPC_REQ 0xc00102b3 ++#define MSR_AMD_CPPC_STATUS 0xc00102b4 ++ ++#define CAP1_LOWEST_PERF(x) (((x) >> 0) & 0xff) ++#define CAP1_LOWNONLIN_PERF(x) (((x) >> 8) & 0xff) ++#define CAP1_NOMINAL_PERF(x) (((x) >> 16) & 0xff) ++#define CAP1_HIGHEST_PERF(x) (((x) >> 24) & 0xff) ++ ++#define REQ_MAX_PERF(x) (((x) & 0xff) << 0) ++#define REQ_MIN_PERF(x) (((x) & 0xff) << 8) ++#define REQ_DES_PERF(x) (((x) & 0xff) << 16) ++#define REQ_ENERGY_PERF_PREF(x) (((x) & 0xff) << 24) ++ + /* Fam 17h MSRs */ + #define MSR_F17H_IRPERF 0xc00000e9 + +diff --git a/drivers/acpi/cppc_acpi.c b/drivers/acpi/cppc_acpi.c +index bd482108310c..fc9d0f330eb3 100644 +--- a/drivers/acpi/cppc_acpi.c ++++ b/drivers/acpi/cppc_acpi.c +@@ -411,7 +411,7 @@ bool acpi_cpc_valid(void) + struct cpc_desc *cpc_ptr; + int cpu; + +- for_each_possible_cpu(cpu) { ++ for_each_present_cpu(cpu) { + cpc_ptr = per_cpu(cpc_desc_ptr, cpu); + if (!cpc_ptr) + return false; +@@ -759,9 +759,24 @@ int acpi_cppc_processor_probe(struct acpi_processor *pr) + goto out_free; + cpc_ptr->cpc_regs[i-2].sys_mem_vaddr = addr; + } ++ } else if (gas_t->space_id == ACPI_ADR_SPACE_SYSTEM_IO) { ++ if (gas_t->access_width < 1 || gas_t->access_width > 3) { ++ /* 1 = 8-bit, 2 = 16-bit, and 3 = 32-bit. SystemIO doesn't ++ * implement 64-bit registers. ++ */ ++ pr_debug("Invalid access width %d for SystemIO register\n", ++ gas_t->access_width); ++ goto out_free; ++ } ++ if (gas_t->address & ~0xFFFFULL) { ++ /* SystemIO registers use 16-bit integer addresses */ ++ pr_debug("Invalid IO port %llu for SystemIO register\n", ++ gas_t->address); ++ goto out_free; ++ } + } else { + if (gas_t->space_id != ACPI_ADR_SPACE_FIXED_HARDWARE || !cpc_ffh_supported()) { +- /* Support only PCC ,SYS MEM and FFH type regs */ ++ /* Support only PCC, SystemMemory, SystemIO, and FFH type regs. */ + pr_debug("Unsupported register type: %d\n", gas_t->space_id); + goto out_free; + } +@@ -936,7 +951,20 @@ static int cpc_read(int cpu, struct cpc_register_resource *reg_res, u64 *val) + } + + *val = 0; +- if (reg->space_id == ACPI_ADR_SPACE_PLATFORM_COMM && pcc_ss_id >= 0) ++ ++ if (reg->space_id == ACPI_ADR_SPACE_SYSTEM_IO) { ++ u32 width = 8 << (reg->access_width - 1); ++ acpi_status status; ++ ++ status = acpi_os_read_port((acpi_io_address)reg->address, (u32 *)val, width); ++ ++ if (status != AE_OK) { ++ pr_debug("Error: Failed to read SystemIO port %llx\n", reg->address); ++ return -EFAULT; ++ } ++ ++ return 0; ++ } else if (reg->space_id == ACPI_ADR_SPACE_PLATFORM_COMM && pcc_ss_id >= 0) + vaddr = GET_PCC_VADDR(reg->address, pcc_ss_id); + else if (reg->space_id == ACPI_ADR_SPACE_SYSTEM_MEMORY) + vaddr = reg_res->sys_mem_vaddr; +@@ -975,7 +1003,19 @@ static int cpc_write(int cpu, struct cpc_register_resource *reg_res, u64 val) + int pcc_ss_id = per_cpu(cpu_pcc_subspace_idx, cpu); + struct cpc_reg *reg = ®_res->cpc_entry.reg; + +- if (reg->space_id == ACPI_ADR_SPACE_PLATFORM_COMM && pcc_ss_id >= 0) ++ if (reg->space_id == ACPI_ADR_SPACE_SYSTEM_IO) { ++ u32 width = 8 << (reg->access_width - 1); ++ acpi_status status; ++ ++ status = acpi_os_write_port((acpi_io_address)reg->address, (u32)val, width); ++ ++ if (status != AE_OK) { ++ pr_debug("Error: Failed to write SystemIO port %llx\n", reg->address); ++ return -EFAULT; ++ } ++ ++ return 0; ++ } else if (reg->space_id == ACPI_ADR_SPACE_PLATFORM_COMM && pcc_ss_id >= 0) + vaddr = GET_PCC_VADDR(reg->address, pcc_ss_id); + else if (reg->space_id == ACPI_ADR_SPACE_SYSTEM_MEMORY) + vaddr = reg_res->sys_mem_vaddr; +@@ -1235,6 +1275,51 @@ int cppc_get_perf_ctrs(int cpunum, struct cppc_perf_fb_ctrs *perf_fb_ctrs) + } + EXPORT_SYMBOL_GPL(cppc_get_perf_ctrs); + ++/** ++ * cppc_set_enable - Set to enable CPPC on the processor by writing the ++ * Continuous Performance Control package EnableRegister feild. ++ * @cpu: CPU for which to enable CPPC register. ++ * @enable: 0 - disable, 1 - enable CPPC feature on the processor. ++ * ++ * Return: 0 for success, -ERRNO or -EIO otherwise. ++ */ ++int cppc_set_enable(int cpu, bool enable) ++{ ++ int pcc_ss_id = per_cpu(cpu_pcc_subspace_idx, cpu); ++ struct cpc_register_resource *enable_reg; ++ struct cpc_desc *cpc_desc = per_cpu(cpc_desc_ptr, cpu); ++ struct cppc_pcc_data *pcc_ss_data = NULL; ++ int ret = -EINVAL; ++ ++ if (!cpc_desc) { ++ pr_debug("No CPC descriptor for CPU:%d\n", cpu); ++ return -EINVAL; ++ } ++ ++ enable_reg = &cpc_desc->cpc_regs[ENABLE]; ++ ++ if (CPC_IN_PCC(enable_reg)) { ++ ++ if (pcc_ss_id < 0) ++ return -EIO; ++ ++ ret = cpc_write(cpu, enable_reg, enable); ++ if (ret) ++ return ret; ++ ++ pcc_ss_data = pcc_data[pcc_ss_id]; ++ ++ down_write(&pcc_ss_data->pcc_lock); ++ /* after writing CPC, transfer the ownership of PCC to platfrom */ ++ ret = send_pcc_cmd(pcc_ss_id, CMD_WRITE); ++ up_write(&pcc_ss_data->pcc_lock); ++ return ret; ++ } ++ ++ return cpc_write(cpu, enable_reg, enable); ++} ++EXPORT_SYMBOL_GPL(cppc_set_enable); ++ + /** + * cppc_set_perf - Set a CPU's performance controls. + * @cpu: CPU for which to set performance controls. +@@ -1245,7 +1330,7 @@ EXPORT_SYMBOL_GPL(cppc_get_perf_ctrs); + int cppc_set_perf(int cpu, struct cppc_perf_ctrls *perf_ctrls) + { + struct cpc_desc *cpc_desc = per_cpu(cpc_desc_ptr, cpu); +- struct cpc_register_resource *desired_reg; ++ struct cpc_register_resource *desired_reg, *min_reg, *max_reg; + int pcc_ss_id = per_cpu(cpu_pcc_subspace_idx, cpu); + struct cppc_pcc_data *pcc_ss_data = NULL; + int ret = 0; +@@ -1256,6 +1341,8 @@ int cppc_set_perf(int cpu, struct cppc_perf_ctrls *perf_ctrls) + } + + desired_reg = &cpc_desc->cpc_regs[DESIRED_PERF]; ++ min_reg = &cpc_desc->cpc_regs[MIN_PERF]; ++ max_reg = &cpc_desc->cpc_regs[MAX_PERF]; + + /* + * This is Phase-I where we want to write to CPC registers +@@ -1264,7 +1351,7 @@ int cppc_set_perf(int cpu, struct cppc_perf_ctrls *perf_ctrls) + * Since read_lock can be acquired by multiple CPUs simultaneously we + * achieve that goal here + */ +- if (CPC_IN_PCC(desired_reg)) { ++ if (CPC_IN_PCC(desired_reg) && CPC_IN_PCC(min_reg) && CPC_IN_PCC(max_reg)) { + if (pcc_ss_id < 0) { + pr_debug("Invalid pcc_ss_id\n"); + return -ENODEV; +@@ -1287,13 +1374,14 @@ int cppc_set_perf(int cpu, struct cppc_perf_ctrls *perf_ctrls) + cpc_desc->write_cmd_status = 0; + } + +- /* +- * Skip writing MIN/MAX until Linux knows how to come up with +- * useful values. +- */ +- cpc_write(cpu, desired_reg, perf_ctrls->desired_perf); ++ if (perf_ctrls->desired_perf) ++ cpc_write(cpu, desired_reg, perf_ctrls->desired_perf); ++ if (perf_ctrls->min_perf) ++ cpc_write(cpu, min_reg, perf_ctrls->min_perf); ++ if (perf_ctrls->max_perf) ++ cpc_write(cpu, max_reg, perf_ctrls->max_perf); + +- if (CPC_IN_PCC(desired_reg)) ++ if (CPC_IN_PCC(desired_reg) && CPC_IN_PCC(min_reg) && CPC_IN_PCC(max_reg)) + up_read(&pcc_ss_data->pcc_lock); /* END Phase-I */ + /* + * This is Phase-II where we transfer the ownership of PCC to Platform +@@ -1341,7 +1429,7 @@ int cppc_set_perf(int cpu, struct cppc_perf_ctrls *perf_ctrls) + * case during a CMD_READ and if there are pending writes it delivers + * the write command before servicing the read command + */ +- if (CPC_IN_PCC(desired_reg)) { ++ if (CPC_IN_PCC(desired_reg) && CPC_IN_PCC(min_reg) && CPC_IN_PCC(max_reg)) { + if (down_write_trylock(&pcc_ss_data->pcc_lock)) {/* BEGIN Phase-II */ + /* Update only if there are pending write commands */ + if (pcc_ss_data->pending_pcc_write_cmd) +diff --git a/drivers/cpufreq/Kconfig.x86 b/drivers/cpufreq/Kconfig.x86 +index 92701a18bdd9..56620437b443 100644 +--- a/drivers/cpufreq/Kconfig.x86 ++++ b/drivers/cpufreq/Kconfig.x86 +@@ -34,6 +34,23 @@ config X86_PCC_CPUFREQ + + If in doubt, say N. + ++config X86_AMD_PSTATE ++ tristate "AMD Processor P-State driver" ++ depends on X86 ++ select ACPI_PROCESSOR if ACPI ++ select ACPI_CPPC_LIB if X86_64 && ACPI ++ select CPU_FREQ_GOV_SCHEDUTIL if SMP ++ help ++ This driver adds a CPUFreq driver which utilizes a fine grain ++ processor performance freqency control range instead of legacy ++ performance levels. This driver supports the AMD processors with ++ _CPC object in the SBIOS. ++ ++ For details, take a look at: ++ <file:Documentation/admin-guide/pm/amd-pstate.rst>. ++ ++ If in doubt, say N. ++ + config X86_ACPI_CPUFREQ + tristate "ACPI Processor P-States driver" + depends on ACPI_PROCESSOR +diff --git a/drivers/cpufreq/Makefile b/drivers/cpufreq/Makefile +index 48ee5859030c..285de70af877 100644 +--- a/drivers/cpufreq/Makefile ++++ b/drivers/cpufreq/Makefile +@@ -17,6 +17,10 @@ obj-$(CONFIG_CPU_FREQ_GOV_ATTR_SET) += cpufreq_governor_attr_set.o + obj-$(CONFIG_CPUFREQ_DT) += cpufreq-dt.o + obj-$(CONFIG_CPUFREQ_DT_PLATDEV) += cpufreq-dt-platdev.o + ++# Traces ++CFLAGS_amd-pstate-trace.o := -I$(src) ++amd_pstate-y := amd-pstate.o amd-pstate-trace.o ++ + ################################################################################## + # x86 drivers. + # Link order matters. K8 is preferred to ACPI because of firmware bugs in early +@@ -25,6 +29,7 @@ obj-$(CONFIG_CPUFREQ_DT_PLATDEV) += cpufreq-dt-platdev.o + # speedstep-* is preferred over p4-clockmod. + + obj-$(CONFIG_X86_ACPI_CPUFREQ) += acpi-cpufreq.o ++obj-$(CONFIG_X86_AMD_PSTATE) += amd_pstate.o + obj-$(CONFIG_X86_POWERNOW_K8) += powernow-k8.o + obj-$(CONFIG_X86_PCC_CPUFREQ) += pcc-cpufreq.o + obj-$(CONFIG_X86_POWERNOW_K6) += powernow-k6.o +diff --git a/drivers/cpufreq/amd-pstate-trace.c b/drivers/cpufreq/amd-pstate-trace.c +new file mode 100644 +index 000000000000..891b696dcd69 +--- /dev/null ++++ b/drivers/cpufreq/amd-pstate-trace.c +@@ -0,0 +1,2 @@ ++#define CREATE_TRACE_POINTS ++#include "amd-pstate-trace.h" +diff --git a/drivers/cpufreq/amd-pstate-trace.h b/drivers/cpufreq/amd-pstate-trace.h +new file mode 100644 +index 000000000000..cf978a7d53dd +--- /dev/null ++++ b/drivers/cpufreq/amd-pstate-trace.h +@@ -0,0 +1,91 @@ ++/* SPDX-License-Identifier: GPL-2.0 */ ++/* ++ * amd-pstate-trace.h - AMD Processor P-state Frequency Driver Tracer ++ * ++ * Copyright (C) 2021 Advanced Micro Devices, Inc. All Rights Reserved. ++ * ++ * This program is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU General Public License ++ * as published by the Free Software Foundation; either version 2 ++ * of the License, or (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License along with ++ * this program; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. ++ * ++ * Author: Huang Rui <ray.huang@amd.com> ++ */ ++ ++#if !defined(_AMD_PSTATE_TRACE_H) || defined(TRACE_HEADER_MULTI_READ) ++#define _AMD_PSTATE_TRACE_H ++ ++#include <linux/cpufreq.h> ++#include <linux/tracepoint.h> ++#include <linux/trace_events.h> ++ ++#undef TRACE_SYSTEM ++#define TRACE_SYSTEM amd_cpu ++ ++#undef TRACE_INCLUDE_FILE ++#define TRACE_INCLUDE_FILE amd-pstate-trace ++ ++#define TPS(x) tracepoint_string(x) ++ ++TRACE_EVENT(amd_pstate_perf, ++ ++ TP_PROTO(unsigned long min_perf, ++ unsigned long target_perf, ++ unsigned long capacity, ++ unsigned int cpu_id, ++ bool changed, ++ bool fast_switch ++ ), ++ ++ TP_ARGS(min_perf, ++ target_perf, ++ capacity, ++ cpu_id, ++ changed, ++ fast_switch ++ ), ++ ++ TP_STRUCT__entry( ++ __field(unsigned long, min_perf) ++ __field(unsigned long, target_perf) ++ __field(unsigned long, capacity) ++ __field(unsigned int, cpu_id) ++ __field(bool, changed) ++ __field(bool, fast_switch) ++ ), ++ ++ TP_fast_assign( ++ __entry->min_perf = min_perf; ++ __entry->target_perf = target_perf; ++ __entry->capacity = capacity; ++ __entry->cpu_id = cpu_id; ++ __entry->changed = changed; ++ __entry->fast_switch = fast_switch; ++ ), ++ ++ TP_printk("amd_min_perf=%lu amd_des_perf=%lu amd_max_perf=%lu cpu_id=%u changed=%s fast_switch=%s", ++ (unsigned long)__entry->min_perf, ++ (unsigned long)__entry->target_perf, ++ (unsigned long)__entry->capacity, ++ (unsigned int)__entry->cpu_id, ++ (__entry->changed) ? "true" : "false", ++ (__entry->fast_switch) ? "true" : "false" ++ ) ++); ++ ++#endif /* _AMD_PSTATE_TRACE_H */ ++ ++/* This part must be outside protection */ ++#undef TRACE_INCLUDE_PATH ++#define TRACE_INCLUDE_PATH . ++ ++#include <trace/define_trace.h> +diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c +new file mode 100644 +index 000000000000..4ab2c4f20b1c +--- /dev/null ++++ b/drivers/cpufreq/amd-pstate.c +@@ -0,0 +1,662 @@ ++/* ++ * amd-pstate.c - AMD Processor P-state Frequency Driver ++ * ++ * Copyright (C) 2021 Advanced Micro Devices, Inc. All Rights Reserved. ++ * ++ * This program is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU General Public License ++ * as published by the Free Software Foundation; either version 2 ++ * of the License, or (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License along with ++ * this program; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. ++ * ++ * Author: Huang Rui <ray.huang@amd.com> ++ */ ++ ++#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt ++ ++#include <linux/kernel.h> ++#include <linux/module.h> ++#include <linux/init.h> ++#include <linux/smp.h> ++#include <linux/sched.h> ++#include <linux/cpufreq.h> ++#include <linux/compiler.h> ++#include <linux/dmi.h> ++#include <linux/slab.h> ++#include <linux/acpi.h> ++#include <linux/io.h> ++#include <linux/delay.h> ++#include <linux/uaccess.h> ++#include <linux/static_call.h> ++ ++#include <acpi/processor.h> ++#include <acpi/cppc_acpi.h> ++ ++#include <asm/msr.h> ++#include <asm/processor.h> ++#include <asm/cpufeature.h> ++#include <asm/cpu_device_id.h> ++#include "amd-pstate-trace.h" ++ ++#define AMD_PSTATE_TRANSITION_LATENCY 0x20000 ++#define AMD_PSTATE_TRANSITION_DELAY 500 ++ ++static struct cpufreq_driver amd_pstate_driver; ++ ++struct amd_cpudata { ++ int cpu; ++ ++ struct freq_qos_request req[2]; ++ ++ u64 cppc_req_cached; ++ ++ u32 highest_perf; ++ u32 nominal_perf; ++ u32 lowest_nonlinear_perf; ++ u32 lowest_perf; ++ ++ u32 max_freq; ++ u32 min_freq; ++ u32 nominal_freq; ++ u32 lowest_nonlinear_freq; ++ ++ bool boost_supported; ++}; ++ ++static inline int pstate_enable(bool enable) ++{ ++ return wrmsrl_safe(MSR_AMD_CPPC_ENABLE, enable ? 1 : 0); ++} ++ ++static int cppc_enable(bool enable) ++{ ++ int cpu, ret = 0; ++ ++ for_each_online_cpu(cpu) { ++ ret = cppc_set_enable(cpu, enable ? 1 : 0); ++ if (ret) ++ return ret; ++ } ++ ++ return ret; ++} ++ ++DEFINE_STATIC_CALL(amd_pstate_enable, pstate_enable); ++ ++static inline int amd_pstate_enable(bool enable) ++{ ++ return static_call(amd_pstate_enable)(enable); ++} ++ ++static int pstate_init_perf(struct amd_cpudata *cpudata) ++{ ++ u64 cap1; ++ ++ int ret = rdmsrl_safe_on_cpu(cpudata->cpu, MSR_AMD_CPPC_CAP1, ++ &cap1); ++ if (ret) ++ return ret; ++ ++ /* ++ * TODO: Introduce AMD specific power feature. ++ * ++ * CPPC entry doesn't indicate the highest performance in some ASICs. ++ */ ++ WRITE_ONCE(cpudata->highest_perf, amd_get_highest_perf()); ++ ++ WRITE_ONCE(cpudata->nominal_perf, CAP1_NOMINAL_PERF(cap1)); ++ WRITE_ONCE(cpudata->lowest_nonlinear_perf, CAP1_LOWNONLIN_PERF(cap1)); ++ WRITE_ONCE(cpudata->lowest_perf, CAP1_LOWEST_PERF(cap1)); ++ ++ return 0; ++} ++ ++static int cppc_init_perf(struct amd_cpudata *cpudata) ++{ ++ struct cppc_perf_caps cppc_perf; ++ ++ int ret = cppc_get_perf_caps(cpudata->cpu, &cppc_perf); ++ if (ret) ++ return ret; ++ ++ WRITE_ONCE(cpudata->highest_perf, amd_get_highest_perf()); ++ ++ WRITE_ONCE(cpudata->nominal_perf, cppc_perf.nominal_perf); ++ WRITE_ONCE(cpudata->lowest_nonlinear_perf, ++ cppc_perf.lowest_nonlinear_perf); ++ WRITE_ONCE(cpudata->lowest_perf, cppc_perf.lowest_perf); ++ ++ return 0; ++} ++ ++DEFINE_STATIC_CALL(amd_pstate_init_perf, pstate_init_perf); ++ ++static inline int amd_pstate_init_perf(struct amd_cpudata *cpudata) ++{ ++ return static_call(amd_pstate_init_perf)(cpudata); ++} ++ ++static void pstate_update_perf(struct amd_cpudata *cpudata, u32 min_perf, ++ u32 des_perf, u32 max_perf, bool fast_switch) ++{ ++ if (fast_switch) ++ wrmsrl(MSR_AMD_CPPC_REQ, READ_ONCE(cpudata->cppc_req_cached)); ++ else ++ wrmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, ++ READ_ONCE(cpudata->cppc_req_cached)); ++} ++ ++static void cppc_update_perf(struct amd_cpudata *cpudata, ++ u32 min_perf, u32 des_perf, ++ u32 max_perf, bool fast_switch) ++{ ++ struct cppc_perf_ctrls perf_ctrls; ++ ++ perf_ctrls.max_perf = max_perf; ++ perf_ctrls.min_perf = min_perf; ++ perf_ctrls.desired_perf = des_perf; ++ ++ cppc_set_perf(cpudata->cpu, &perf_ctrls); ++} ++ ++DEFINE_STATIC_CALL(amd_pstate_update_perf, pstate_update_perf); ++ ++static inline void amd_pstate_update_perf(struct amd_cpudata *cpudata, ++ u32 min_perf, u32 des_perf, ++ u32 max_perf, bool fast_switch) ++{ ++ static_call(amd_pstate_update_perf)(cpudata, min_perf, des_perf, ++ max_perf, fast_switch); ++} ++ ++static void amd_pstate_update(struct amd_cpudata *cpudata, u32 min_perf, ++ u32 des_perf, u32 max_perf, bool fast_switch) ++{ ++ u64 prev = READ_ONCE(cpudata->cppc_req_cached); ++ u64 value = prev; ++ ++ value &= ~REQ_MIN_PERF(~0L); ++ value |= REQ_MIN_PERF(min_perf); ++ ++ value &= ~REQ_DES_PERF(~0L); ++ value |= REQ_DES_PERF(des_perf); ++ ++ value &= ~REQ_MAX_PERF(~0L); ++ value |= REQ_MAX_PERF(max_perf); ++ ++ trace_amd_pstate_perf(min_perf, des_perf, max_perf, ++ cpudata->cpu, (value != prev), fast_switch); ++ ++ if (value == prev) ++ return; ++ ++ WRITE_ONCE(cpudata->cppc_req_cached, value); ++ ++ amd_pstate_update_perf(cpudata, min_perf, des_perf, ++ max_perf, fast_switch); ++} ++ ++static int amd_pstate_verify(struct cpufreq_policy_data *policy) ++{ ++ cpufreq_verify_within_cpu_limits(policy); ++ ++ return 0; ++} ++ ++static int amd_pstate_target(struct cpufreq_policy *policy, ++ unsigned int target_freq, ++ unsigned int relation) ++{ ++ struct cpufreq_freqs freqs; ++ struct amd_cpudata *cpudata = policy->driver_data; ++ unsigned long amd_max_perf, amd_min_perf, amd_des_perf, ++ amd_cap_perf; ++ ++ if (!cpudata->max_freq) ++ return -ENODEV; ++ ++ amd_cap_perf = READ_ONCE(cpudata->highest_perf); ++ amd_min_perf = READ_ONCE(cpudata->lowest_nonlinear_perf); ++ amd_max_perf = amd_cap_perf; ++ ++ freqs.old = policy->cur; ++ freqs.new = target_freq; ++ ++ amd_des_perf = DIV_ROUND_CLOSEST(target_freq * amd_cap_perf, ++ cpudata->max_freq); ++ ++ cpufreq_freq_transition_begin(policy, &freqs); ++ amd_pstate_update(cpudata, amd_min_perf, amd_des_perf, ++ amd_max_perf, false); ++ cpufreq_freq_transition_end(policy, &freqs, false); ++ ++ return 0; ++} ++ ++static void amd_pstate_adjust_perf(unsigned int cpu, ++ unsigned long min_perf, ++ unsigned long target_perf, ++ unsigned long capacity) ++{ ++ unsigned long amd_max_perf, amd_min_perf, amd_des_perf, ++ amd_cap_perf, lowest_nonlinear_perf; ++ struct cpufreq_policy *policy = cpufreq_cpu_get(cpu); ++ struct amd_cpudata *cpudata = policy->driver_data; ++ ++ amd_cap_perf = READ_ONCE(cpudata->highest_perf); ++ lowest_nonlinear_perf = READ_ONCE(cpudata->lowest_nonlinear_perf); ++ ++ if (target_perf < capacity) ++ amd_des_perf = DIV_ROUND_UP(amd_cap_perf * target_perf, ++ capacity); ++ ++ amd_min_perf = READ_ONCE(cpudata->highest_perf); ++ if (min_perf < capacity) ++ amd_min_perf = DIV_ROUND_UP(amd_cap_perf * min_perf, capacity); ++ ++ if (amd_min_perf < lowest_nonlinear_perf) ++ amd_min_perf = lowest_nonlinear_perf; ++ ++ amd_max_perf = amd_cap_perf; ++ if (amd_max_perf < amd_min_perf) ++ amd_max_perf = amd_min_perf; ++ ++ amd_des_perf = clamp_t(unsigned long, amd_des_perf, ++ amd_min_perf, amd_max_perf); ++ ++ amd_pstate_update(cpudata, amd_min_perf, amd_des_perf, ++ amd_max_perf, true); ++} ++ ++static int amd_get_min_freq(struct amd_cpudata *cpudata) ++{ ++ struct cppc_perf_caps cppc_perf; ++ ++ int ret = cppc_get_perf_caps(cpudata->cpu, &cppc_perf); ++ if (ret) ++ return ret; ++ ++ /* Switch to khz */ ++ return cppc_perf.lowest_freq * 1000; ++} ++ ++static int amd_get_max_freq(struct amd_cpudata *cpudata) ++{ ++ struct cppc_perf_caps cppc_perf; ++ u32 max_perf, max_freq, nominal_freq, nominal_perf; ++ u64 boost_ratio; ++ ++ int ret = cppc_get_perf_caps(cpudata->cpu, &cppc_perf); ++ if (ret) ++ return ret; ++ ++ nominal_freq = cppc_perf.nominal_freq; ++ nominal_perf = READ_ONCE(cpudata->nominal_perf); ++ max_perf = READ_ONCE(cpudata->highest_perf); ++ ++ boost_ratio = div_u64(max_perf << SCHED_CAPACITY_SHIFT, ++ nominal_perf); ++ ++ max_freq = nominal_freq * boost_ratio >> SCHED_CAPACITY_SHIFT; ++ ++ /* Switch to khz */ ++ return max_freq * 1000; ++} ++ ++static int amd_get_nominal_freq(struct amd_cpudata *cpudata) ++{ ++ struct cppc_perf_caps cppc_perf; ++ u32 nominal_freq; ++ ++ int ret = cppc_get_perf_caps(cpudata->cpu, &cppc_perf); ++ if (ret) ++ return ret; ++ ++ nominal_freq = cppc_perf.nominal_freq; ++ ++ /* Switch to khz */ ++ return nominal_freq * 1000; ++} ++ ++static int amd_get_lowest_nonlinear_freq(struct amd_cpudata *cpudata) ++{ ++ struct cppc_perf_caps cppc_perf; ++ u32 lowest_nonlinear_freq, lowest_nonlinear_perf, ++ nominal_freq, nominal_perf; ++ u64 lowest_nonlinear_ratio; ++ ++ int ret = cppc_get_perf_caps(cpudata->cpu, &cppc_perf); ++ if (ret) ++ return ret; ++ ++ nominal_freq = cppc_perf.nominal_freq; ++ nominal_perf = READ_ONCE(cpudata->nominal_perf); ++ ++ lowest_nonlinear_perf = cppc_perf.lowest_nonlinear_perf; ++ ++ lowest_nonlinear_ratio = div_u64(lowest_nonlinear_perf << ++ SCHED_CAPACITY_SHIFT, nominal_perf); ++ ++ lowest_nonlinear_freq = nominal_freq * lowest_nonlinear_ratio >> SCHED_CAPACITY_SHIFT; ++ ++ /* Switch to khz */ ++ return lowest_nonlinear_freq * 1000; ++} ++ ++static int amd_pstate_set_boost(struct cpufreq_policy *policy, int state) ++{ ++ struct amd_cpudata *cpudata = policy->driver_data; ++ int ret; ++ ++ if (!cpudata->boost_supported) { ++ pr_err("Boost mode is not supported by this processor or SBIOS\n"); ++ return -EINVAL; ++ } ++ ++ if (state) ++ policy->cpuinfo.max_freq = cpudata->max_freq; ++ else ++ policy->cpuinfo.max_freq = cpudata->nominal_freq; ++ ++ policy->max = policy->cpuinfo.max_freq; ++ ++ ret = freq_qos_update_request(&cpudata->req[1], ++ policy->cpuinfo.max_freq); ++ if (ret < 0) ++ return ret; ++ ++ return 0; ++} ++ ++static void amd_pstate_boost_init(struct amd_cpudata *cpudata) ++{ ++ u32 highest_perf, nominal_perf; ++ ++ highest_perf = READ_ONCE(cpudata->highest_perf); ++ nominal_perf = READ_ONCE(cpudata->nominal_perf); ++ ++ if (highest_perf <= nominal_perf) ++ return; ++ ++ cpudata->boost_supported = true; ++ amd_pstate_driver.boost_enabled = true; ++} ++ ++static int amd_pstate_cpu_init(struct cpufreq_policy *policy) ++{ ++ int min_freq, max_freq, nominal_freq, lowest_nonlinear_freq, ret; ++ unsigned int cpu = policy->cpu; ++ struct device *dev; ++ struct amd_cpudata *cpudata; ++ ++ dev = get_cpu_device(policy->cpu); ++ if (!dev) ++ return -ENODEV; ++ ++ cpudata = kzalloc(sizeof(*cpudata), GFP_KERNEL); ++ if (!cpudata) ++ return -ENOMEM; ++ ++ cpudata->cpu = cpu; ++ ++ ret = amd_pstate_init_perf(cpudata); ++ if (ret) ++ goto free_cpudata1; ++ ++ min_freq = amd_get_min_freq(cpudata); ++ max_freq = amd_get_max_freq(cpudata); ++ nominal_freq = amd_get_nominal_freq(cpudata); ++ lowest_nonlinear_freq = amd_get_lowest_nonlinear_freq(cpudata); ++ ++ if (min_freq < 0 || max_freq < 0 || min_freq > max_freq) { ++ dev_err(dev, "min_freq(%d) or max_freq(%d) value is incorrect\n", ++ min_freq, max_freq); ++ ret = -EINVAL; ++ goto free_cpudata1; ++ } ++ ++ policy->cpuinfo.transition_latency = AMD_PSTATE_TRANSITION_LATENCY; ++ policy->transition_delay_us = AMD_PSTATE_TRANSITION_DELAY; ++ ++ policy->min = min_freq; ++ policy->max = max_freq; ++ ++ policy->cpuinfo.min_freq = min_freq; ++ policy->cpuinfo.max_freq = max_freq; ++ ++ /* It will be updated by governor */ ++ policy->cur = policy->cpuinfo.min_freq; ++ ++ if (boot_cpu_has(X86_FEATURE_CPPC)) ++ policy->fast_switch_possible = true; ++ ++ ret = freq_qos_add_request(&policy->constraints, &cpudata->req[0], ++ FREQ_QOS_MIN, policy->cpuinfo.min_freq); ++ if (ret < 0) { ++ dev_err(dev, "Failed to add min-freq constraint (%d)\n", ret); ++ goto free_cpudata1; ++ } ++ ++ ret = freq_qos_add_request(&policy->constraints, &cpudata->req[1], ++ FREQ_QOS_MAX, policy->cpuinfo.max_freq); ++ if (ret < 0) { ++ dev_err(dev, "Failed to add max-freq constraint (%d)\n", ret); ++ goto free_cpudata2; ++ } ++ ++ /* Initial processor data capability frequencies */ ++ cpudata->max_freq = max_freq; ++ cpudata->min_freq = min_freq; ++ cpudata->nominal_freq = nominal_freq; ++ cpudata->lowest_nonlinear_freq = lowest_nonlinear_freq; ++ ++ policy->driver_data = cpudata; ++ ++ amd_pstate_boost_init(cpudata); ++ ++ return 0; ++ ++free_cpudata2: ++ freq_qos_remove_request(&cpudata->req[0]); ++free_cpudata1: ++ kfree(cpudata); ++ return ret; ++} ++ ++static int amd_pstate_cpu_exit(struct cpufreq_policy *policy) ++{ ++ struct amd_cpudata *cpudata; ++ ++ cpudata = policy->driver_data; ++ ++ freq_qos_remove_request(&cpudata->req[1]); ++ freq_qos_remove_request(&cpudata->req[0]); ++ kfree(cpudata); ++ ++ return 0; ++} ++ ++/* Sysfs attributes */ ++ ++/* This frequency is to indicate the maximum hardware frequency. ++ * If boost is not active but supported, the frequency will be larger than the ++ * one in cpuinfo. ++ */ ++static ssize_t show_amd_pstate_max_freq(struct cpufreq_policy *policy, ++ char *buf) ++{ ++ int max_freq; ++ struct amd_cpudata *cpudata; ++ ++ cpudata = policy->driver_data; ++ ++ max_freq = amd_get_max_freq(cpudata); ++ if (max_freq < 0) ++ return max_freq; ++ ++ return sprintf(&buf[0], "%u\n", max_freq); ++} ++ ++static ssize_t show_amd_pstate_nominal_freq(struct cpufreq_policy *policy, ++ char *buf) ++{ ++ int nominal_freq; ++ struct amd_cpudata *cpudata; ++ ++ cpudata = policy->driver_data; ++ ++ nominal_freq = amd_get_nominal_freq(cpudata); ++ if (nominal_freq < 0) ++ return nominal_freq; ++ ++ return sprintf(&buf[0], "%u\n", nominal_freq); ++} ++ ++static ssize_t show_amd_pstate_lowest_nonlinear_freq(struct cpufreq_policy *policy, ++ char *buf) ++{ ++ int freq; ++ struct amd_cpudata *cpudata; ++ ++ cpudata = policy->driver_data; ++ ++ freq = amd_get_lowest_nonlinear_freq(cpudata); ++ if (freq < 0) ++ return freq; ++ ++ return sprintf(&buf[0], "%u\n", freq); ++} ++ ++static ssize_t show_amd_pstate_highest_perf(struct cpufreq_policy *policy, ++ char *buf) ++{ ++ u32 perf; ++ struct amd_cpudata *cpudata = policy->driver_data; ++ ++ perf = READ_ONCE(cpudata->highest_perf); ++ ++ return sprintf(&buf[0], "%u\n", perf); ++} ++ ++static ssize_t show_amd_pstate_nominal_perf(struct cpufreq_policy *policy, ++ char *buf) ++{ ++ u32 perf; ++ struct amd_cpudata *cpudata = policy->driver_data; ++ ++ perf = READ_ONCE(cpudata->nominal_perf); ++ ++ return sprintf(&buf[0], "%u\n", perf); ++} ++ ++static ssize_t show_amd_pstate_lowest_nonlinear_perf(struct cpufreq_policy *policy, ++ char *buf) ++{ ++ u32 perf; ++ struct amd_cpudata *cpudata = policy->driver_data; ++ ++ perf = READ_ONCE(cpudata->lowest_nonlinear_perf); ++ ++ return sprintf(&buf[0], "%u\n", perf); ++} ++ ++static ssize_t show_amd_pstate_lowest_perf(struct cpufreq_policy *policy, ++ char *buf) ++{ ++ u32 perf; ++ struct amd_cpudata *cpudata = policy->driver_data; ++ ++ perf = READ_ONCE(cpudata->lowest_perf); ++ ++ return sprintf(&buf[0], "%u\n", perf); ++} ++ ++cpufreq_freq_attr_ro(amd_pstate_max_freq); ++cpufreq_freq_attr_ro(amd_pstate_nominal_freq); ++cpufreq_freq_attr_ro(amd_pstate_lowest_nonlinear_freq); ++ ++cpufreq_freq_attr_ro(amd_pstate_highest_perf); ++cpufreq_freq_attr_ro(amd_pstate_nominal_perf); ++cpufreq_freq_attr_ro(amd_pstate_lowest_nonlinear_perf); ++cpufreq_freq_attr_ro(amd_pstate_lowest_perf); ++ ++static struct freq_attr *amd_pstate_attr[] = { ++ &amd_pstate_max_freq, ++ &amd_pstate_nominal_freq, ++ &amd_pstate_lowest_nonlinear_freq, ++ &amd_pstate_highest_perf, ++ &amd_pstate_nominal_perf, ++ &amd_pstate_lowest_nonlinear_perf, ++ &amd_pstate_lowest_perf, ++ NULL, ++}; ++ ++static struct cpufreq_driver amd_pstate_driver = { ++ .flags = CPUFREQ_CONST_LOOPS | CPUFREQ_NEED_UPDATE_LIMITS, ++ .verify = amd_pstate_verify, ++ .target = amd_pstate_target, ++ .init = amd_pstate_cpu_init, ++ .exit = amd_pstate_cpu_exit, ++ .set_boost = amd_pstate_set_boost, ++ .name = "amd-pstate", ++ .attr = amd_pstate_attr, ++}; ++ ++static int __init amd_pstate_init(void) ++{ ++ int ret; ++ ++ if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) ++ return -ENODEV; ++ ++ if (!acpi_cpc_valid()) { ++ pr_debug("%s, the _CPC object is not present in SBIOS\n", ++ __func__); ++ return -ENODEV; ++ } ++ ++ /* don't keep reloading if cpufreq_driver exists */ ++ if (cpufreq_get_current_driver()) ++ return -EEXIST; ++ ++ /* capability check */ ++ if (boot_cpu_has(X86_FEATURE_CPPC)) { ++ pr_debug("%s, AMD CPPC MSR based functionality is supported\n", ++ __func__); ++ amd_pstate_driver.adjust_perf = amd_pstate_adjust_perf; ++ } else { ++ static_call_update(amd_pstate_enable, cppc_enable); ++ static_call_update(amd_pstate_init_perf, cppc_init_perf); ++ static_call_update(amd_pstate_update_perf, cppc_update_perf); ++ } ++ ++ /* enable amd pstate feature */ ++ ret = amd_pstate_enable(true); ++ if (ret) { ++ pr_err("%s, failed to enable amd-pstate with return %d\n", ++ __func__, ret); ++ return ret; ++ } ++ ++ ret = cpufreq_register_driver(&amd_pstate_driver); ++ if (ret) { ++ pr_err("%s, return %d\n", __func__, ret); ++ return ret; ++ } ++ ++ return 0; ++} ++ ++device_initcall(amd_pstate_init); ++ ++MODULE_AUTHOR("Huang Rui <ray.huang@amd.com>"); ++MODULE_DESCRIPTION("AMD Processor P-state Frequency Driver"); ++MODULE_LICENSE("GPL"); +diff --git a/include/acpi/cppc_acpi.h b/include/acpi/cppc_acpi.h +index bc159a9b4a73..92b7ea8d8f5e 100644 +--- a/include/acpi/cppc_acpi.h ++++ b/include/acpi/cppc_acpi.h +@@ -138,6 +138,7 @@ extern int cppc_get_desired_perf(int cpunum, u64 *desired_perf); + extern int cppc_get_nominal_perf(int cpunum, u64 *nominal_perf); + extern int cppc_get_perf_ctrs(int cpu, struct cppc_perf_fb_ctrs *perf_fb_ctrs); + extern int cppc_set_perf(int cpu, struct cppc_perf_ctrls *perf_ctrls); ++extern int cppc_set_enable(int cpu, bool enable); + extern int cppc_get_perf_caps(int cpu, struct cppc_perf_caps *caps); + extern bool acpi_cpc_valid(void); + extern int acpi_get_psd_map(unsigned int cpu, struct cppc_cpudata *cpu_data); +@@ -162,6 +163,10 @@ static inline int cppc_set_perf(int cpu, struct cppc_perf_ctrls *perf_ctrls) + { + return -ENOTSUPP; + } ++static inline int cppc_set_enable(int cpu, bool enable) ++{ ++ return -ENOTSUPP; ++} + static inline int cppc_get_perf_caps(int cpu, struct cppc_perf_caps *caps) + { + return -ENOTSUPP; +diff --git a/tools/power/cpupower/lib/cpufreq.c b/tools/power/cpupower/lib/cpufreq.c +index c3b56db8b921..02719cc400a1 100644 +--- a/tools/power/cpupower/lib/cpufreq.c ++++ b/tools/power/cpupower/lib/cpufreq.c +@@ -83,20 +83,21 @@ static const char *cpufreq_value_files[MAX_CPUFREQ_VALUE_READ_FILES] = { + [STATS_NUM_TRANSITIONS] = "stats/total_trans" + }; + +- +-static unsigned long sysfs_cpufreq_get_one_value(unsigned int cpu, +- enum cpufreq_value which) ++unsigned long cpufreq_get_sysfs_value_from_table(unsigned int cpu, ++ const char **table, ++ unsigned index, ++ unsigned size) + { + unsigned long value; + unsigned int len; + char linebuf[MAX_LINE_LEN]; + char *endp; + +- if (which >= MAX_CPUFREQ_VALUE_READ_FILES) ++ if (!table && !table[index] && index >= size) + return 0; + +- len = sysfs_cpufreq_read_file(cpu, cpufreq_value_files[which], +- linebuf, sizeof(linebuf)); ++ len = sysfs_cpufreq_read_file(cpu, table[index], linebuf, ++ sizeof(linebuf)); + + if (len == 0) + return 0; +@@ -109,6 +110,14 @@ static unsigned long sysfs_cpufreq_get_one_value(unsigned int cpu, + return value; + } + ++static unsigned long sysfs_cpufreq_get_one_value(unsigned int cpu, ++ enum cpufreq_value which) ++{ ++ return cpufreq_get_sysfs_value_from_table(cpu, cpufreq_value_files, ++ which, ++ MAX_CPUFREQ_VALUE_READ_FILES); ++} ++ + /* read access to files which contain one string */ + + enum cpufreq_string { +diff --git a/tools/power/cpupower/lib/cpufreq.h b/tools/power/cpupower/lib/cpufreq.h +index 95f4fd9e2656..107668c0c454 100644 +--- a/tools/power/cpupower/lib/cpufreq.h ++++ b/tools/power/cpupower/lib/cpufreq.h +@@ -203,6 +203,18 @@ int cpufreq_modify_policy_governor(unsigned int cpu, char *governor); + int cpufreq_set_frequency(unsigned int cpu, + unsigned long target_frequency); + ++/* ++ * get the sysfs value from specific table ++ * ++ * Read the value with the sysfs file name from specific table. Does ++ * only work if the cpufreq driver has the specific sysfs interfaces. ++ */ ++ ++unsigned long cpufreq_get_sysfs_value_from_table(unsigned int cpu, ++ const char **table, ++ unsigned index, ++ unsigned size); ++ + #ifdef __cplusplus + } + #endif +diff --git a/tools/power/cpupower/utils/cpufreq-info.c b/tools/power/cpupower/utils/cpufreq-info.c +index f9895e31ff5a..f828f3c35a6f 100644 +--- a/tools/power/cpupower/utils/cpufreq-info.c ++++ b/tools/power/cpupower/utils/cpufreq-info.c +@@ -84,43 +84,6 @@ static void proc_cpufreq_output(void) + } + + static int no_rounding; +-static void print_speed(unsigned long speed) +-{ +- unsigned long tmp; +- +- if (no_rounding) { +- if (speed > 1000000) +- printf("%u.%06u GHz", ((unsigned int) speed/1000000), +- ((unsigned int) speed%1000000)); +- else if (speed > 1000) +- printf("%u.%03u MHz", ((unsigned int) speed/1000), +- (unsigned int) (speed%1000)); +- else +- printf("%lu kHz", speed); +- } else { +- if (speed > 1000000) { +- tmp = speed%10000; +- if (tmp >= 5000) +- speed += 10000; +- printf("%u.%02u GHz", ((unsigned int) speed/1000000), +- ((unsigned int) (speed%1000000)/10000)); +- } else if (speed > 100000) { +- tmp = speed%1000; +- if (tmp >= 500) +- speed += 1000; +- printf("%u MHz", ((unsigned int) speed/1000)); +- } else if (speed > 1000) { +- tmp = speed%100; +- if (tmp >= 50) +- speed += 100; +- printf("%u.%01u MHz", ((unsigned int) speed/1000), +- ((unsigned int) (speed%1000)/100)); +- } +- } +- +- return; +-} +- + static void print_duration(unsigned long duration) + { + unsigned long tmp; +@@ -183,9 +146,12 @@ static int get_boost_mode_x86(unsigned int cpu) + printf(_(" Supported: %s\n"), support ? _("yes") : _("no")); + printf(_(" Active: %s\n"), active ? _("yes") : _("no")); + +- if ((cpupower_cpu_info.vendor == X86_VENDOR_AMD && +- cpupower_cpu_info.family >= 0x10) || +- cpupower_cpu_info.vendor == X86_VENDOR_HYGON) { ++ if (cpupower_cpu_info.vendor == X86_VENDOR_AMD && ++ cpupower_cpu_info.caps & CPUPOWER_CAP_AMD_PSTATE) { ++ amd_pstate_show_perf_and_freq(cpu, no_rounding); ++ } else if ((cpupower_cpu_info.vendor == X86_VENDOR_AMD && ++ cpupower_cpu_info.family >= 0x10) || ++ cpupower_cpu_info.vendor == X86_VENDOR_HYGON) { + ret = decode_pstates(cpu, b_states, pstates, &pstate_no); + if (ret) + return ret; +@@ -254,11 +220,11 @@ static int get_boost_mode(unsigned int cpu) + if (freqs) { + printf(_(" boost frequency steps: ")); + while (freqs->next) { +- print_speed(freqs->frequency); ++ print_speed(freqs->frequency, no_rounding); + printf(", "); + freqs = freqs->next; + } +- print_speed(freqs->frequency); ++ print_speed(freqs->frequency, no_rounding); + printf("\n"); + cpufreq_put_available_frequencies(freqs); + } +@@ -277,7 +243,7 @@ static int get_freq_kernel(unsigned int cpu, unsigned int human) + return -EINVAL; + } + if (human) { +- print_speed(freq); ++ print_speed(freq, no_rounding); + } else + printf("%lu", freq); + printf(_(" (asserted by call to kernel)\n")); +@@ -296,7 +262,7 @@ static int get_freq_hardware(unsigned int cpu, unsigned int human) + return -EINVAL; + } + if (human) { +- print_speed(freq); ++ print_speed(freq, no_rounding); + } else + printf("%lu", freq); + printf(_(" (asserted by call to hardware)\n")); +@@ -316,9 +282,9 @@ static int get_hardware_limits(unsigned int cpu, unsigned int human) + + if (human) { + printf(_(" hardware limits: ")); +- print_speed(min); ++ print_speed(min, no_rounding); + printf(" - "); +- print_speed(max); ++ print_speed(max, no_rounding); + printf("\n"); + } else { + printf("%lu %lu\n", min, max); +@@ -350,9 +316,9 @@ static int get_policy(unsigned int cpu) + return -EINVAL; + } + printf(_(" current policy: frequency should be within ")); +- print_speed(policy->min); ++ print_speed(policy->min, no_rounding); + printf(_(" and ")); +- print_speed(policy->max); ++ print_speed(policy->max, no_rounding); + + printf(".\n "); + printf(_("The governor \"%s\" may decide which speed to use\n" +@@ -436,7 +402,7 @@ static int get_freq_stats(unsigned int cpu, unsigned int human) + struct cpufreq_stats *stats = cpufreq_get_stats(cpu, &total_time); + while (stats) { + if (human) { +- print_speed(stats->frequency); ++ print_speed(stats->frequency, no_rounding); + printf(":%.2f%%", + (100.0 * stats->time_in_state) / total_time); + } else +@@ -486,11 +452,11 @@ static void debug_output_one(unsigned int cpu) + if (freqs) { + printf(_(" available frequency steps: ")); + while (freqs->next) { +- print_speed(freqs->frequency); ++ print_speed(freqs->frequency, no_rounding); + printf(", "); + freqs = freqs->next; + } +- print_speed(freqs->frequency); ++ print_speed(freqs->frequency, no_rounding); + printf("\n"); + cpufreq_put_available_frequencies(freqs); + } +diff --git a/tools/power/cpupower/utils/helpers/amd.c b/tools/power/cpupower/utils/helpers/amd.c +index 97f2c857048e..fa38d3da42ce 100644 +--- a/tools/power/cpupower/utils/helpers/amd.c ++++ b/tools/power/cpupower/utils/helpers/amd.c +@@ -8,7 +8,9 @@ + #include <pci/pci.h> + + #include "helpers/helpers.h" ++#include "cpufreq.h" + ++/* ACPI P-States Helper Functions for AMD Processors ***************/ + #define MSR_AMD_PSTATE_STATUS 0xc0010063 + #define MSR_AMD_PSTATE 0xc0010064 + #define MSR_AMD_PSTATE_LIMIT 0xc0010061 +@@ -146,4 +148,89 @@ int amd_pci_get_num_boost_states(int *active, int *states) + pci_cleanup(pci_acc); + return 0; + } ++ ++/* ACPI P-States Helper Functions for AMD Processors ***************/ ++ ++/* AMD P-States Helper Functions ***************/ ++enum amd_pstate_value { ++ AMD_PSTATE_HIGHEST_PERF, ++ AMD_PSTATE_NOMINAL_PERF, ++ AMD_PSTATE_LOWEST_NONLINEAR_PERF, ++ AMD_PSTATE_LOWEST_PERF, ++ AMD_PSTATE_MAX_FREQ, ++ AMD_PSTATE_NOMINAL_FREQ, ++ AMD_PSTATE_LOWEST_NONLINEAR_FREQ, ++ MAX_AMD_PSTATE_VALUE_READ_FILES ++}; ++ ++static const char *amd_pstate_value_files[MAX_AMD_PSTATE_VALUE_READ_FILES] = { ++ [AMD_PSTATE_HIGHEST_PERF] = "amd_pstate_highest_perf", ++ [AMD_PSTATE_NOMINAL_PERF] = "amd_pstate_nominal_perf", ++ [AMD_PSTATE_LOWEST_NONLINEAR_PERF] = "amd_pstate_lowest_nonlinear_perf", ++ [AMD_PSTATE_LOWEST_PERF] = "amd_pstate_lowest_perf", ++ [AMD_PSTATE_MAX_FREQ] = "amd_pstate_max_freq", ++ [AMD_PSTATE_NOMINAL_FREQ] = "amd_pstate_nominal_freq", ++ [AMD_PSTATE_LOWEST_NONLINEAR_FREQ] = "amd_pstate_lowest_nonlinear_freq", ++}; ++ ++static unsigned long amd_pstate_get_data(unsigned int cpu, ++ enum amd_pstate_value value) ++{ ++ return cpufreq_get_sysfs_value_from_table(cpu, ++ amd_pstate_value_files, ++ value, ++ MAX_AMD_PSTATE_VALUE_READ_FILES); ++} ++ ++void amd_pstate_boost_init(unsigned int cpu, int *support, int *active) ++{ ++ unsigned long highest_perf, nominal_perf, cpuinfo_min, ++ cpuinfo_max, amd_pstate_max; ++ ++ highest_perf = amd_pstate_get_data(cpu, AMD_PSTATE_HIGHEST_PERF); ++ nominal_perf = amd_pstate_get_data(cpu, AMD_PSTATE_NOMINAL_PERF); ++ ++ *support = highest_perf > nominal_perf ? 1 : 0; ++ if (!(*support)) ++ return; ++ ++ cpufreq_get_hardware_limits(cpu, &cpuinfo_min, &cpuinfo_max); ++ amd_pstate_max = amd_pstate_get_data(cpu, AMD_PSTATE_MAX_FREQ); ++ ++ *active = cpuinfo_max == amd_pstate_max ? 1 : 0; ++} ++ ++void amd_pstate_show_perf_and_freq(unsigned int cpu, int no_rounding) ++{ ++ unsigned long cpuinfo_max, cpuinfo_min; ++ ++ cpufreq_get_hardware_limits(cpu, &cpuinfo_min, &cpuinfo_max); ++ ++ printf(_(" AMD PSTATE Highest Performance: %lu. Maximum Frequency: "), ++ amd_pstate_get_data(cpu, AMD_PSTATE_HIGHEST_PERF)); ++ /* If boost isn't active, the cpuinfo_max doesn't indicate real max ++ * frequency. So we read it back from amd-pstate sysfs entry. ++ */ ++ print_speed(amd_pstate_get_data(cpu, AMD_PSTATE_MAX_FREQ), no_rounding); ++ printf(".\n"); ++ ++ printf(_(" AMD PSTATE Nominal Performance: %lu. Nominal Frequency: "), ++ amd_pstate_get_data(cpu, AMD_PSTATE_NOMINAL_PERF)); ++ print_speed(amd_pstate_get_data(cpu, AMD_PSTATE_NOMINAL_FREQ), ++ no_rounding); ++ printf(".\n"); ++ ++ printf(_(" AMD PSTATE Lowest Non-linear Performance: %lu. Lowest Non-linear Frequency: "), ++ amd_pstate_get_data(cpu, AMD_PSTATE_LOWEST_NONLINEAR_PERF)); ++ print_speed(amd_pstate_get_data(cpu, AMD_PSTATE_LOWEST_NONLINEAR_FREQ), ++ no_rounding); ++ printf(".\n"); ++ ++ printf(_(" AMD PSTATE Lowest Performance: %lu. Lowest Frequency: "), ++ amd_pstate_get_data(cpu, AMD_PSTATE_LOWEST_PERF)); ++ print_speed(cpuinfo_min, no_rounding); ++ printf(".\n"); ++} ++ ++/* AMD P-States Helper Functions ***************/ + #endif /* defined(__i386__) || defined(__x86_64__) */ +diff --git a/tools/power/cpupower/utils/helpers/cpuid.c b/tools/power/cpupower/utils/helpers/cpuid.c +index 72eb43593180..2a6dc104e76b 100644 +--- a/tools/power/cpupower/utils/helpers/cpuid.c ++++ b/tools/power/cpupower/utils/helpers/cpuid.c +@@ -149,6 +149,19 @@ int get_cpu_info(struct cpupower_cpu_info *cpu_info) + if (ext_cpuid_level >= 0x80000008 && + cpuid_ebx(0x80000008) & (1 << 4)) + cpu_info->caps |= CPUPOWER_CAP_AMD_RDPRU; ++ ++ if (cpupower_amd_pstate_enabled()) { ++ cpu_info->caps |= CPUPOWER_CAP_AMD_PSTATE; ++ ++ /* ++ * If AMD P-state is enabled, the firmware will treat ++ * AMD P-state function as high priority. ++ */ ++ cpu_info->caps &= ~CPUPOWER_CAP_AMD_CPB; ++ cpu_info->caps &= ~CPUPOWER_CAP_AMD_CPB_MSR; ++ cpu_info->caps &= ~CPUPOWER_CAP_AMD_HW_PSTATE; ++ cpu_info->caps &= ~CPUPOWER_CAP_AMD_PSTATEDEF; ++ } + } + + if (cpu_info->vendor == X86_VENDOR_INTEL) { +diff --git a/tools/power/cpupower/utils/helpers/helpers.h b/tools/power/cpupower/utils/helpers/helpers.h +index 33ffacee7fcb..5f6862502dbf 100644 +--- a/tools/power/cpupower/utils/helpers/helpers.h ++++ b/tools/power/cpupower/utils/helpers/helpers.h +@@ -11,6 +11,7 @@ + + #include <libintl.h> + #include <locale.h> ++#include <stdbool.h> + + #include "helpers/bitmask.h" + #include <cpupower.h> +@@ -73,6 +74,7 @@ enum cpupower_cpu_vendor {X86_VENDOR_UNKNOWN = 0, X86_VENDOR_INTEL, + #define CPUPOWER_CAP_AMD_HW_PSTATE 0x00000100 + #define CPUPOWER_CAP_AMD_PSTATEDEF 0x00000200 + #define CPUPOWER_CAP_AMD_CPB_MSR 0x00000400 ++#define CPUPOWER_CAP_AMD_PSTATE 0x00000800 + + #define CPUPOWER_AMD_CPBDIS 0x02000000 + +@@ -135,6 +137,16 @@ extern int decode_pstates(unsigned int cpu, int boost_states, + + extern int cpufreq_has_boost_support(unsigned int cpu, int *support, + int *active, int * states); ++ ++/* AMD P-States stuff **************************/ ++extern bool cpupower_amd_pstate_enabled(void); ++extern void amd_pstate_boost_init(unsigned int cpu, ++ int *support, int *active); ++extern void amd_pstate_show_perf_and_freq(unsigned int cpu, ++ int no_rounding); ++ ++/* AMD P-States stuff **************************/ ++ + /* + * CPUID functions returning a single datum + */ +@@ -167,6 +179,15 @@ static inline int cpufreq_has_boost_support(unsigned int cpu, int *support, + int *active, int * states) + { return -1; } + ++static inline bool cpupower_amd_pstate_enabled(void) ++{ return false; } ++static void amd_pstate_boost_init(unsigned int cpu, ++ int *support, int *active) ++{ return; } ++static inline void amd_pstate_show_perf_and_freq(unsigned int cpu, ++ int no_rounding) ++{ return; } ++ + /* cpuid and cpuinfo helpers **************************/ + + static inline unsigned int cpuid_eax(unsigned int op) { return 0; }; +@@ -184,5 +205,6 @@ extern struct bitmask *offline_cpus; + void get_cpustate(void); + void print_online_cpus(void); + void print_offline_cpus(void); ++void print_speed(unsigned long speed, int no_rounding); + + #endif /* __CPUPOWERUTILS_HELPERS__ */ +diff --git a/tools/power/cpupower/utils/helpers/misc.c b/tools/power/cpupower/utils/helpers/misc.c +index fc6e34511721..d693c96cd09c 100644 +--- a/tools/power/cpupower/utils/helpers/misc.c ++++ b/tools/power/cpupower/utils/helpers/misc.c +@@ -3,9 +3,11 @@ + #include <stdio.h> + #include <errno.h> + #include <stdlib.h> ++#include <string.h> + + #include "helpers/helpers.h" + #include "helpers/sysfs.h" ++#include "cpufreq.h" + + #if defined(__i386__) || defined(__x86_64__) + +@@ -39,6 +41,8 @@ int cpufreq_has_boost_support(unsigned int cpu, int *support, int *active, + if (ret) + return ret; + } ++ } else if (cpupower_cpu_info.caps & CPUPOWER_CAP_AMD_PSTATE) { ++ amd_pstate_boost_init(cpu, support, active); + } else if (cpupower_cpu_info.caps & CPUPOWER_CAP_INTEL_IDA) + *support = *active = 1; + return 0; +@@ -83,6 +87,22 @@ int cpupower_intel_set_perf_bias(unsigned int cpu, unsigned int val) + return 0; + } + ++bool cpupower_amd_pstate_enabled(void) ++{ ++ char *driver = cpufreq_get_driver(0); ++ bool ret = false; ++ ++ if (!driver) ++ return ret; ++ ++ if (!strcmp(driver, "amd-pstate")) ++ ret = true; ++ ++ cpufreq_put_driver(driver); ++ ++ return ret; ++} ++ + #endif /* #if defined(__i386__) || defined(__x86_64__) */ + + /* get_cpustate +@@ -144,3 +164,45 @@ void print_offline_cpus(void) + printf(_("cpupower set operation was not performed on them\n")); + } + } ++ ++/* ++ * print_speed ++ * ++ * Print the exact CPU frequency with appropriate unit ++ */ ++void print_speed(unsigned long speed, int no_rounding) ++{ ++ unsigned long tmp; ++ ++ if (no_rounding) { ++ if (speed > 1000000) ++ printf("%u.%06u GHz", ((unsigned int) speed/1000000), ++ ((unsigned int) speed%1000000)); ++ else if (speed > 1000) ++ printf("%u.%03u MHz", ((unsigned int) speed/1000), ++ (unsigned int) (speed%1000)); ++ else ++ printf("%lu kHz", speed); ++ } else { ++ if (speed > 1000000) { ++ tmp = speed%10000; ++ if (tmp >= 5000) ++ speed += 10000; ++ printf("%u.%02u GHz", ((unsigned int) speed/1000000), ++ ((unsigned int) (speed%1000000)/10000)); ++ } else if (speed > 100000) { ++ tmp = speed%1000; ++ if (tmp >= 500) ++ speed += 1000; ++ printf("%u MHz", ((unsigned int) speed/1000)); ++ } else if (speed > 1000) { ++ tmp = speed%100; ++ if (tmp >= 50) ++ speed += 100; ++ printf("%u.%01u MHz", ((unsigned int) speed/1000), ++ ((unsigned int) (speed%1000)/100)); ++ } ++ } ++ ++ return; ++} +-- +2.34.0 + |