mirror of https://github.com/torvalds/linux.git
Merge 6.13-rc7 into driver-core-next
We need the debugfs / driver-core fixes in here as well for testing and to build on top of. Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
This commit is contained in:
commit
dd19f4116e
5
.mailmap
5
.mailmap
|
|
@ -121,6 +121,8 @@ Ben Widawsky <bwidawsk@kernel.org> <benjamin.widawsky@intel.com>
|
||||||
Benjamin Poirier <benjamin.poirier@gmail.com> <bpoirier@suse.de>
|
Benjamin Poirier <benjamin.poirier@gmail.com> <bpoirier@suse.de>
|
||||||
Benjamin Tissoires <bentiss@kernel.org> <benjamin.tissoires@gmail.com>
|
Benjamin Tissoires <bentiss@kernel.org> <benjamin.tissoires@gmail.com>
|
||||||
Benjamin Tissoires <bentiss@kernel.org> <benjamin.tissoires@redhat.com>
|
Benjamin Tissoires <bentiss@kernel.org> <benjamin.tissoires@redhat.com>
|
||||||
|
Bingwu Zhang <xtex@aosc.io> <xtexchooser@duck.com>
|
||||||
|
Bingwu Zhang <xtex@aosc.io> <xtex@xtexx.eu.org>
|
||||||
Bjorn Andersson <andersson@kernel.org> <bjorn@kryo.se>
|
Bjorn Andersson <andersson@kernel.org> <bjorn@kryo.se>
|
||||||
Bjorn Andersson <andersson@kernel.org> <bjorn.andersson@linaro.org>
|
Bjorn Andersson <andersson@kernel.org> <bjorn.andersson@linaro.org>
|
||||||
Bjorn Andersson <andersson@kernel.org> <bjorn.andersson@sonymobile.com>
|
Bjorn Andersson <andersson@kernel.org> <bjorn.andersson@sonymobile.com>
|
||||||
|
|
@ -435,7 +437,7 @@ Martin Kepplinger <martink@posteo.de> <martin.kepplinger@ginzinger.com>
|
||||||
Martin Kepplinger <martink@posteo.de> <martin.kepplinger@puri.sm>
|
Martin Kepplinger <martink@posteo.de> <martin.kepplinger@puri.sm>
|
||||||
Martin Kepplinger <martink@posteo.de> <martin.kepplinger@theobroma-systems.com>
|
Martin Kepplinger <martink@posteo.de> <martin.kepplinger@theobroma-systems.com>
|
||||||
Martyna Szapar-Mudlaw <martyna.szapar-mudlaw@linux.intel.com> <martyna.szapar-mudlaw@intel.com>
|
Martyna Szapar-Mudlaw <martyna.szapar-mudlaw@linux.intel.com> <martyna.szapar-mudlaw@intel.com>
|
||||||
Mathieu Othacehe <m.othacehe@gmail.com> <othacehe@gnu.org>
|
Mathieu Othacehe <othacehe@gnu.org> <m.othacehe@gmail.com>
|
||||||
Mat Martineau <martineau@kernel.org> <mathew.j.martineau@linux.intel.com>
|
Mat Martineau <martineau@kernel.org> <mathew.j.martineau@linux.intel.com>
|
||||||
Mat Martineau <martineau@kernel.org> <mathewm@codeaurora.org>
|
Mat Martineau <martineau@kernel.org> <mathewm@codeaurora.org>
|
||||||
Matthew Wilcox <willy@infradead.org> <matthew.r.wilcox@intel.com>
|
Matthew Wilcox <willy@infradead.org> <matthew.r.wilcox@intel.com>
|
||||||
|
|
@ -735,6 +737,7 @@ Wolfram Sang <wsa@kernel.org> <w.sang@pengutronix.de>
|
||||||
Wolfram Sang <wsa@kernel.org> <wsa@the-dreams.de>
|
Wolfram Sang <wsa@kernel.org> <wsa@the-dreams.de>
|
||||||
Yakir Yang <kuankuan.y@gmail.com> <ykk@rock-chips.com>
|
Yakir Yang <kuankuan.y@gmail.com> <ykk@rock-chips.com>
|
||||||
Yanteng Si <si.yanteng@linux.dev> <siyanteng@loongson.cn>
|
Yanteng Si <si.yanteng@linux.dev> <siyanteng@loongson.cn>
|
||||||
|
Ying Huang <huang.ying.caritas@gmail.com> <ying.huang@intel.com>
|
||||||
Yusuke Goda <goda.yusuke@renesas.com>
|
Yusuke Goda <goda.yusuke@renesas.com>
|
||||||
Zack Rusin <zack.rusin@broadcom.com> <zackr@vmware.com>
|
Zack Rusin <zack.rusin@broadcom.com> <zackr@vmware.com>
|
||||||
Zhu Yanjun <zyjzyj2000@gmail.com> <yanjunz@nvidia.com>
|
Zhu Yanjun <zyjzyj2000@gmail.com> <yanjunz@nvidia.com>
|
||||||
|
|
|
||||||
12
CREDITS
12
CREDITS
|
|
@ -20,6 +20,10 @@ N: Thomas Abraham
|
||||||
E: thomas.ab@samsung.com
|
E: thomas.ab@samsung.com
|
||||||
D: Samsung pin controller driver
|
D: Samsung pin controller driver
|
||||||
|
|
||||||
|
N: Jose Abreu
|
||||||
|
E: jose.abreu@synopsys.com
|
||||||
|
D: Synopsys DesignWare XPCS MDIO/PCS driver.
|
||||||
|
|
||||||
N: Dragos Acostachioaie
|
N: Dragos Acostachioaie
|
||||||
E: dragos@iname.com
|
E: dragos@iname.com
|
||||||
W: http://www.arbornet.org/~dragos
|
W: http://www.arbornet.org/~dragos
|
||||||
|
|
@ -1428,6 +1432,10 @@ S: 8124 Constitution Apt. 7
|
||||||
S: Sterling Heights, Michigan 48313
|
S: Sterling Heights, Michigan 48313
|
||||||
S: USA
|
S: USA
|
||||||
|
|
||||||
|
N: Andy Gospodarek
|
||||||
|
E: andy@greyhouse.net
|
||||||
|
D: Maintenance and contributions to the network interface bonding driver.
|
||||||
|
|
||||||
N: Wolfgang Grandegger
|
N: Wolfgang Grandegger
|
||||||
E: wg@grandegger.com
|
E: wg@grandegger.com
|
||||||
D: Controller Area Network (device drivers)
|
D: Controller Area Network (device drivers)
|
||||||
|
|
@ -1812,6 +1820,10 @@ D: Author/maintainer of most DRM drivers (especially ATI, MGA)
|
||||||
D: Core DRM templates, general DRM and 3D-related hacking
|
D: Core DRM templates, general DRM and 3D-related hacking
|
||||||
S: No fixed address
|
S: No fixed address
|
||||||
|
|
||||||
|
N: Woojung Huh
|
||||||
|
E: woojung.huh@microchip.com
|
||||||
|
D: Microchip LAN78XX USB Ethernet driver
|
||||||
|
|
||||||
N: Kenn Humborg
|
N: Kenn Humborg
|
||||||
E: kenn@wombat.ie
|
E: kenn@wombat.ie
|
||||||
D: Mods to loop device to support sparse backing files
|
D: Mods to loop device to support sparse backing files
|
||||||
|
|
|
||||||
|
|
@ -445,8 +445,10 @@ event code Key Notes
|
||||||
0x1008 0x07 FN+F8 IBM: toggle screen expand
|
0x1008 0x07 FN+F8 IBM: toggle screen expand
|
||||||
Lenovo: configure UltraNav,
|
Lenovo: configure UltraNav,
|
||||||
or toggle screen expand.
|
or toggle screen expand.
|
||||||
On newer platforms (2024+)
|
On 2024 platforms replaced by
|
||||||
replaced by 0x131f (see below)
|
0x131f (see below) and on newer
|
||||||
|
platforms (2025 +) keycode is
|
||||||
|
replaced by 0x1401 (see below).
|
||||||
|
|
||||||
0x1009 0x08 FN+F9 -
|
0x1009 0x08 FN+F9 -
|
||||||
|
|
||||||
|
|
@ -506,9 +508,11 @@ event code Key Notes
|
||||||
|
|
||||||
0x1019 0x18 unknown
|
0x1019 0x18 unknown
|
||||||
|
|
||||||
0x131f ... FN+F8 Platform Mode change.
|
0x131f ... FN+F8 Platform Mode change (2024 systems).
|
||||||
Implemented in driver.
|
Implemented in driver.
|
||||||
|
|
||||||
|
0x1401 ... FN+F8 Platform Mode change (2025 + systems).
|
||||||
|
Implemented in driver.
|
||||||
... ... ...
|
... ... ...
|
||||||
|
|
||||||
0x1020 0x1F unknown
|
0x1020 0x1F unknown
|
||||||
|
|
|
||||||
|
|
@ -436,7 +436,7 @@ AnonHugePmdMapped).
|
||||||
The number of file transparent huge pages mapped to userspace is available
|
The number of file transparent huge pages mapped to userspace is available
|
||||||
by reading ShmemPmdMapped and ShmemHugePages fields in ``/proc/meminfo``.
|
by reading ShmemPmdMapped and ShmemHugePages fields in ``/proc/meminfo``.
|
||||||
To identify what applications are mapping file transparent huge pages, it
|
To identify what applications are mapping file transparent huge pages, it
|
||||||
is necessary to read ``/proc/PID/smaps`` and count the FileHugeMapped fields
|
is necessary to read ``/proc/PID/smaps`` and count the FilePmdMapped fields
|
||||||
for each mapping.
|
for each mapping.
|
||||||
|
|
||||||
Note that reading the smaps file is expensive and reading it
|
Note that reading the smaps file is expensive and reading it
|
||||||
|
|
|
||||||
|
|
@ -251,9 +251,7 @@ performance supported in `AMD CPPC Performance Capability <perf_cap_>`_).
|
||||||
In some ASICs, the highest CPPC performance is not the one in the ``_CPC``
|
In some ASICs, the highest CPPC performance is not the one in the ``_CPC``
|
||||||
table, so we need to expose it to sysfs. If boost is not active, but
|
table, so we need to expose it to sysfs. If boost is not active, but
|
||||||
still supported, this maximum frequency will be larger than the one in
|
still supported, this maximum frequency will be larger than the one in
|
||||||
``cpuinfo``. On systems that support preferred core, the driver will have
|
``cpuinfo``.
|
||||||
different values for some cores than others and this will reflect the values
|
|
||||||
advertised by the platform at bootup.
|
|
||||||
This attribute is read-only.
|
This attribute is read-only.
|
||||||
|
|
||||||
``amd_pstate_lowest_nonlinear_freq``
|
``amd_pstate_lowest_nonlinear_freq``
|
||||||
|
|
|
||||||
|
|
@ -114,8 +114,9 @@ patternProperties:
|
||||||
table that specifies the PPID to LIODN mapping. Needed if the PAMU is
|
table that specifies the PPID to LIODN mapping. Needed if the PAMU is
|
||||||
used. Value is a 12 bit value where value is a LIODN ID for this JR.
|
used. Value is a 12 bit value where value is a LIODN ID for this JR.
|
||||||
This property is normally set by boot firmware.
|
This property is normally set by boot firmware.
|
||||||
$ref: /schemas/types.yaml#/definitions/uint32
|
$ref: /schemas/types.yaml#/definitions/uint32-array
|
||||||
maximum: 0xfff
|
items:
|
||||||
|
- maximum: 0xfff
|
||||||
|
|
||||||
'^rtic@[0-9a-f]+$':
|
'^rtic@[0-9a-f]+$':
|
||||||
type: object
|
type: object
|
||||||
|
|
@ -186,8 +187,9 @@ patternProperties:
|
||||||
Needed if the PAMU is used. Value is a 12 bit value where value
|
Needed if the PAMU is used. Value is a 12 bit value where value
|
||||||
is a LIODN ID for this JR. This property is normally set by boot
|
is a LIODN ID for this JR. This property is normally set by boot
|
||||||
firmware.
|
firmware.
|
||||||
$ref: /schemas/types.yaml#/definitions/uint32
|
$ref: /schemas/types.yaml#/definitions/uint32-array
|
||||||
maximum: 0xfff
|
items:
|
||||||
|
- maximum: 0xfff
|
||||||
|
|
||||||
fsl,rtic-region:
|
fsl,rtic-region:
|
||||||
description:
|
description:
|
||||||
|
|
|
||||||
|
|
@ -90,7 +90,7 @@ properties:
|
||||||
adi,dsi-lanes:
|
adi,dsi-lanes:
|
||||||
description: Number of DSI data lanes connected to the DSI host.
|
description: Number of DSI data lanes connected to the DSI host.
|
||||||
$ref: /schemas/types.yaml#/definitions/uint32
|
$ref: /schemas/types.yaml#/definitions/uint32
|
||||||
enum: [ 1, 2, 3, 4 ]
|
enum: [ 2, 3, 4 ]
|
||||||
|
|
||||||
"#sound-dai-cells":
|
"#sound-dai-cells":
|
||||||
const: 0
|
const: 0
|
||||||
|
|
|
||||||
|
|
@ -42,6 +42,9 @@ properties:
|
||||||
interrupts:
|
interrupts:
|
||||||
maxItems: 1
|
maxItems: 1
|
||||||
|
|
||||||
|
'#sound-dai-cells':
|
||||||
|
const: 0
|
||||||
|
|
||||||
ports:
|
ports:
|
||||||
$ref: /schemas/graph.yaml#/properties/ports
|
$ref: /schemas/graph.yaml#/properties/ports
|
||||||
properties:
|
properties:
|
||||||
|
|
@ -85,7 +88,21 @@ required:
|
||||||
- ports
|
- ports
|
||||||
- max-linkrate-mhz
|
- max-linkrate-mhz
|
||||||
|
|
||||||
additionalProperties: false
|
allOf:
|
||||||
|
- $ref: /schemas/sound/dai-common.yaml#
|
||||||
|
- if:
|
||||||
|
not:
|
||||||
|
properties:
|
||||||
|
compatible:
|
||||||
|
contains:
|
||||||
|
enum:
|
||||||
|
- mediatek,mt8188-dp-tx
|
||||||
|
- mediatek,mt8195-dp-tx
|
||||||
|
then:
|
||||||
|
properties:
|
||||||
|
'#sound-dai-cells': false
|
||||||
|
|
||||||
|
unevaluatedProperties: false
|
||||||
|
|
||||||
examples:
|
examples:
|
||||||
- |
|
- |
|
||||||
|
|
|
||||||
|
|
@ -65,6 +65,7 @@ properties:
|
||||||
- st,lsm9ds0-gyro
|
- st,lsm9ds0-gyro
|
||||||
- description: STMicroelectronics Magnetometers
|
- description: STMicroelectronics Magnetometers
|
||||||
enum:
|
enum:
|
||||||
|
- st,iis2mdc
|
||||||
- st,lis2mdl
|
- st,lis2mdl
|
||||||
- st,lis3mdl-magn
|
- st,lis3mdl-magn
|
||||||
- st,lsm303agr-magn
|
- st,lsm303agr-magn
|
||||||
|
|
|
||||||
|
|
@ -82,7 +82,7 @@ examples:
|
||||||
|
|
||||||
uimage@100000 {
|
uimage@100000 {
|
||||||
reg = <0x0100000 0x200000>;
|
reg = <0x0100000 0x200000>;
|
||||||
compress = "lzma";
|
compression = "lzma";
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -81,7 +81,7 @@ properties:
|
||||||
List of phandles, each pointing to the power supply for the
|
List of phandles, each pointing to the power supply for the
|
||||||
corresponding pairset named in 'pairset-names'. This property
|
corresponding pairset named in 'pairset-names'. This property
|
||||||
aligns with IEEE 802.3-2022, Section 33.2.3 and 145.2.4.
|
aligns with IEEE 802.3-2022, Section 33.2.3 and 145.2.4.
|
||||||
PSE Pinout Alternatives (as per IEEE 802.3-2022 Table 145\u20133)
|
PSE Pinout Alternatives (as per IEEE 802.3-2022 Table 145-3)
|
||||||
|-----------|---------------|---------------|---------------|---------------|
|
|-----------|---------------|---------------|---------------|---------------|
|
||||||
| Conductor | Alternative A | Alternative A | Alternative B | Alternative B |
|
| Conductor | Alternative A | Alternative A | Alternative B | Alternative B |
|
||||||
| | (MDI-X) | (MDI) | (X) | (S) |
|
| | (MDI-X) | (MDI) | (X) | (S) |
|
||||||
|
|
|
||||||
|
|
@ -35,6 +35,7 @@ properties:
|
||||||
|
|
||||||
fsl,liodn:
|
fsl,liodn:
|
||||||
$ref: /schemas/types.yaml#/definitions/uint32-array
|
$ref: /schemas/types.yaml#/definitions/uint32-array
|
||||||
|
maxItems: 2
|
||||||
description: See pamu.txt. Two LIODN(s). DQRR LIODN (DLIODN) and Frame LIODN
|
description: See pamu.txt. Two LIODN(s). DQRR LIODN (DLIODN) and Frame LIODN
|
||||||
(FLIODN)
|
(FLIODN)
|
||||||
|
|
||||||
|
|
@ -69,6 +70,7 @@ patternProperties:
|
||||||
type: object
|
type: object
|
||||||
properties:
|
properties:
|
||||||
fsl,liodn:
|
fsl,liodn:
|
||||||
|
$ref: /schemas/types.yaml#/definitions/uint32-array
|
||||||
description: See pamu.txt, PAMU property used for static LIODN assignment
|
description: See pamu.txt, PAMU property used for static LIODN assignment
|
||||||
|
|
||||||
fsl,iommu-parent:
|
fsl,iommu-parent:
|
||||||
|
|
|
||||||
|
|
@ -51,7 +51,7 @@ properties:
|
||||||
description: Power supply for AVDD, providing 1.8V.
|
description: Power supply for AVDD, providing 1.8V.
|
||||||
|
|
||||||
cpvdd-supply:
|
cpvdd-supply:
|
||||||
description: Power supply for CPVDD, providing 3.5V.
|
description: Power supply for CPVDD, providing 1.8V.
|
||||||
|
|
||||||
hp-detect-gpios:
|
hp-detect-gpios:
|
||||||
description:
|
description:
|
||||||
|
|
|
||||||
|
|
@ -3,3 +3,853 @@
|
||||||
=================
|
=================
|
||||||
Process Addresses
|
Process Addresses
|
||||||
=================
|
=================
|
||||||
|
|
||||||
|
.. toctree::
|
||||||
|
:maxdepth: 3
|
||||||
|
|
||||||
|
|
||||||
|
Userland memory ranges are tracked by the kernel via Virtual Memory Areas or
|
||||||
|
'VMA's of type :c:struct:`!struct vm_area_struct`.
|
||||||
|
|
||||||
|
Each VMA describes a virtually contiguous memory range with identical
|
||||||
|
attributes, each described by a :c:struct:`!struct vm_area_struct`
|
||||||
|
object. Userland access outside of VMAs is invalid except in the case where an
|
||||||
|
adjacent stack VMA could be extended to contain the accessed address.
|
||||||
|
|
||||||
|
All VMAs are contained within one and only one virtual address space, described
|
||||||
|
by a :c:struct:`!struct mm_struct` object which is referenced by all tasks (that is,
|
||||||
|
threads) which share the virtual address space. We refer to this as the
|
||||||
|
:c:struct:`!mm`.
|
||||||
|
|
||||||
|
Each mm object contains a maple tree data structure which describes all VMAs
|
||||||
|
within the virtual address space.
|
||||||
|
|
||||||
|
.. note:: An exception to this is the 'gate' VMA which is provided by
|
||||||
|
architectures which use :c:struct:`!vsyscall` and is a global static
|
||||||
|
object which does not belong to any specific mm.
|
||||||
|
|
||||||
|
-------
|
||||||
|
Locking
|
||||||
|
-------
|
||||||
|
|
||||||
|
The kernel is designed to be highly scalable against concurrent read operations
|
||||||
|
on VMA **metadata** so a complicated set of locks are required to ensure memory
|
||||||
|
corruption does not occur.
|
||||||
|
|
||||||
|
.. note:: Locking VMAs for their metadata does not have any impact on the memory
|
||||||
|
they describe nor the page tables that map them.
|
||||||
|
|
||||||
|
Terminology
|
||||||
|
-----------
|
||||||
|
|
||||||
|
* **mmap locks** - Each MM has a read/write semaphore :c:member:`!mmap_lock`
|
||||||
|
which locks at a process address space granularity which can be acquired via
|
||||||
|
:c:func:`!mmap_read_lock`, :c:func:`!mmap_write_lock` and variants.
|
||||||
|
* **VMA locks** - The VMA lock is at VMA granularity (of course) which behaves
|
||||||
|
as a read/write semaphore in practice. A VMA read lock is obtained via
|
||||||
|
:c:func:`!lock_vma_under_rcu` (and unlocked via :c:func:`!vma_end_read`) and a
|
||||||
|
write lock via :c:func:`!vma_start_write` (all VMA write locks are unlocked
|
||||||
|
automatically when the mmap write lock is released). To take a VMA write lock
|
||||||
|
you **must** have already acquired an :c:func:`!mmap_write_lock`.
|
||||||
|
* **rmap locks** - When trying to access VMAs through the reverse mapping via a
|
||||||
|
:c:struct:`!struct address_space` or :c:struct:`!struct anon_vma` object
|
||||||
|
(reachable from a folio via :c:member:`!folio->mapping`). VMAs must be stabilised via
|
||||||
|
:c:func:`!anon_vma_[try]lock_read` or :c:func:`!anon_vma_[try]lock_write` for
|
||||||
|
anonymous memory and :c:func:`!i_mmap_[try]lock_read` or
|
||||||
|
:c:func:`!i_mmap_[try]lock_write` for file-backed memory. We refer to these
|
||||||
|
locks as the reverse mapping locks, or 'rmap locks' for brevity.
|
||||||
|
|
||||||
|
We discuss page table locks separately in the dedicated section below.
|
||||||
|
|
||||||
|
The first thing **any** of these locks achieve is to **stabilise** the VMA
|
||||||
|
within the MM tree. That is, guaranteeing that the VMA object will not be
|
||||||
|
deleted from under you nor modified (except for some specific fields
|
||||||
|
described below).
|
||||||
|
|
||||||
|
Stabilising a VMA also keeps the address space described by it around.
|
||||||
|
|
||||||
|
Lock usage
|
||||||
|
----------
|
||||||
|
|
||||||
|
If you want to **read** VMA metadata fields or just keep the VMA stable, you
|
||||||
|
must do one of the following:
|
||||||
|
|
||||||
|
* Obtain an mmap read lock at the MM granularity via :c:func:`!mmap_read_lock` (or a
|
||||||
|
suitable variant), unlocking it with a matching :c:func:`!mmap_read_unlock` when
|
||||||
|
you're done with the VMA, *or*
|
||||||
|
* Try to obtain a VMA read lock via :c:func:`!lock_vma_under_rcu`. This tries to
|
||||||
|
acquire the lock atomically so might fail, in which case fall-back logic is
|
||||||
|
required to instead obtain an mmap read lock if this returns :c:macro:`!NULL`,
|
||||||
|
*or*
|
||||||
|
* Acquire an rmap lock before traversing the locked interval tree (whether
|
||||||
|
anonymous or file-backed) to obtain the required VMA.
|
||||||
|
|
||||||
|
If you want to **write** VMA metadata fields, then things vary depending on the
|
||||||
|
field (we explore each VMA field in detail below). For the majority you must:
|
||||||
|
|
||||||
|
* Obtain an mmap write lock at the MM granularity via :c:func:`!mmap_write_lock` (or a
|
||||||
|
suitable variant), unlocking it with a matching :c:func:`!mmap_write_unlock` when
|
||||||
|
you're done with the VMA, *and*
|
||||||
|
* Obtain a VMA write lock via :c:func:`!vma_start_write` for each VMA you wish to
|
||||||
|
modify, which will be released automatically when :c:func:`!mmap_write_unlock` is
|
||||||
|
called.
|
||||||
|
* If you want to be able to write to **any** field, you must also hide the VMA
|
||||||
|
from the reverse mapping by obtaining an **rmap write lock**.
|
||||||
|
|
||||||
|
VMA locks are special in that you must obtain an mmap **write** lock **first**
|
||||||
|
in order to obtain a VMA **write** lock. A VMA **read** lock however can be
|
||||||
|
obtained without any other lock (:c:func:`!lock_vma_under_rcu` will acquire then
|
||||||
|
release an RCU lock to lookup the VMA for you).
|
||||||
|
|
||||||
|
This constrains the impact of writers on readers, as a writer can interact with
|
||||||
|
one VMA while a reader interacts with another simultaneously.
|
||||||
|
|
||||||
|
.. note:: The primary users of VMA read locks are page fault handlers, which
|
||||||
|
means that without a VMA write lock, page faults will run concurrent with
|
||||||
|
whatever you are doing.
|
||||||
|
|
||||||
|
Examining all valid lock states:
|
||||||
|
|
||||||
|
.. table::
|
||||||
|
|
||||||
|
========= ======== ========= ======= ===== =========== ==========
|
||||||
|
mmap lock VMA lock rmap lock Stable? Read? Write most? Write all?
|
||||||
|
========= ======== ========= ======= ===== =========== ==========
|
||||||
|
\- \- \- N N N N
|
||||||
|
\- R \- Y Y N N
|
||||||
|
\- \- R/W Y Y N N
|
||||||
|
R/W \-/R \-/R/W Y Y N N
|
||||||
|
W W \-/R Y Y Y N
|
||||||
|
W W W Y Y Y Y
|
||||||
|
========= ======== ========= ======= ===== =========== ==========
|
||||||
|
|
||||||
|
.. warning:: While it's possible to obtain a VMA lock while holding an mmap read lock,
|
||||||
|
attempting to do the reverse is invalid as it can result in deadlock - if
|
||||||
|
another task already holds an mmap write lock and attempts to acquire a VMA
|
||||||
|
write lock that will deadlock on the VMA read lock.
|
||||||
|
|
||||||
|
All of these locks behave as read/write semaphores in practice, so you can
|
||||||
|
obtain either a read or a write lock for each of these.
|
||||||
|
|
||||||
|
.. note:: Generally speaking, a read/write semaphore is a class of lock which
|
||||||
|
permits concurrent readers. However a write lock can only be obtained
|
||||||
|
once all readers have left the critical region (and pending readers
|
||||||
|
made to wait).
|
||||||
|
|
||||||
|
This renders read locks on a read/write semaphore concurrent with other
|
||||||
|
readers and write locks exclusive against all others holding the semaphore.
|
||||||
|
|
||||||
|
VMA fields
|
||||||
|
^^^^^^^^^^
|
||||||
|
|
||||||
|
We can subdivide :c:struct:`!struct vm_area_struct` fields by their purpose, which makes it
|
||||||
|
easier to explore their locking characteristics:
|
||||||
|
|
||||||
|
.. note:: We exclude VMA lock-specific fields here to avoid confusion, as these
|
||||||
|
are in effect an internal implementation detail.
|
||||||
|
|
||||||
|
.. table:: Virtual layout fields
|
||||||
|
|
||||||
|
===================== ======================================== ===========
|
||||||
|
Field Description Write lock
|
||||||
|
===================== ======================================== ===========
|
||||||
|
:c:member:`!vm_start` Inclusive start virtual address of range mmap write,
|
||||||
|
VMA describes. VMA write,
|
||||||
|
rmap write.
|
||||||
|
:c:member:`!vm_end` Exclusive end virtual address of range mmap write,
|
||||||
|
VMA describes. VMA write,
|
||||||
|
rmap write.
|
||||||
|
:c:member:`!vm_pgoff` Describes the page offset into the file, mmap write,
|
||||||
|
the original page offset within the VMA write,
|
||||||
|
virtual address space (prior to any rmap write.
|
||||||
|
:c:func:`!mremap`), or PFN if a PFN map
|
||||||
|
and the architecture does not support
|
||||||
|
:c:macro:`!CONFIG_ARCH_HAS_PTE_SPECIAL`.
|
||||||
|
===================== ======================================== ===========
|
||||||
|
|
||||||
|
These fields describes the size, start and end of the VMA, and as such cannot be
|
||||||
|
modified without first being hidden from the reverse mapping since these fields
|
||||||
|
are used to locate VMAs within the reverse mapping interval trees.
|
||||||
|
|
||||||
|
.. table:: Core fields
|
||||||
|
|
||||||
|
============================ ======================================== =========================
|
||||||
|
Field Description Write lock
|
||||||
|
============================ ======================================== =========================
|
||||||
|
:c:member:`!vm_mm` Containing mm_struct. None - written once on
|
||||||
|
initial map.
|
||||||
|
:c:member:`!vm_page_prot` Architecture-specific page table mmap write, VMA write.
|
||||||
|
protection bits determined from VMA
|
||||||
|
flags.
|
||||||
|
:c:member:`!vm_flags` Read-only access to VMA flags describing N/A
|
||||||
|
attributes of the VMA, in union with
|
||||||
|
private writable
|
||||||
|
:c:member:`!__vm_flags`.
|
||||||
|
:c:member:`!__vm_flags` Private, writable access to VMA flags mmap write, VMA write.
|
||||||
|
field, updated by
|
||||||
|
:c:func:`!vm_flags_*` functions.
|
||||||
|
:c:member:`!vm_file` If the VMA is file-backed, points to a None - written once on
|
||||||
|
struct file object describing the initial map.
|
||||||
|
underlying file, if anonymous then
|
||||||
|
:c:macro:`!NULL`.
|
||||||
|
:c:member:`!vm_ops` If the VMA is file-backed, then either None - Written once on
|
||||||
|
the driver or file-system provides a initial map by
|
||||||
|
:c:struct:`!struct vm_operations_struct` :c:func:`!f_ops->mmap()`.
|
||||||
|
object describing callbacks to be
|
||||||
|
invoked on VMA lifetime events.
|
||||||
|
:c:member:`!vm_private_data` A :c:member:`!void *` field for Handled by driver.
|
||||||
|
driver-specific metadata.
|
||||||
|
============================ ======================================== =========================
|
||||||
|
|
||||||
|
These are the core fields which describe the MM the VMA belongs to and its attributes.
|
||||||
|
|
||||||
|
.. table:: Config-specific fields
|
||||||
|
|
||||||
|
================================= ===================== ======================================== ===============
|
||||||
|
Field Configuration option Description Write lock
|
||||||
|
================================= ===================== ======================================== ===============
|
||||||
|
:c:member:`!anon_name` CONFIG_ANON_VMA_NAME A field for storing a mmap write,
|
||||||
|
:c:struct:`!struct anon_vma_name` VMA write.
|
||||||
|
object providing a name for anonymous
|
||||||
|
mappings, or :c:macro:`!NULL` if none
|
||||||
|
is set or the VMA is file-backed. The
|
||||||
|
underlying object is reference counted
|
||||||
|
and can be shared across multiple VMAs
|
||||||
|
for scalability.
|
||||||
|
:c:member:`!swap_readahead_info` CONFIG_SWAP Metadata used by the swap mechanism mmap read,
|
||||||
|
to perform readahead. This field is swap-specific
|
||||||
|
accessed atomically. lock.
|
||||||
|
:c:member:`!vm_policy` CONFIG_NUMA :c:type:`!mempolicy` object which mmap write,
|
||||||
|
describes the NUMA behaviour of the VMA write.
|
||||||
|
VMA. The underlying object is reference
|
||||||
|
counted.
|
||||||
|
:c:member:`!numab_state` CONFIG_NUMA_BALANCING :c:type:`!vma_numab_state` object which mmap read,
|
||||||
|
describes the current state of numab-specific
|
||||||
|
NUMA balancing in relation to this VMA. lock.
|
||||||
|
Updated under mmap read lock by
|
||||||
|
:c:func:`!task_numa_work`.
|
||||||
|
:c:member:`!vm_userfaultfd_ctx` CONFIG_USERFAULTFD Userfaultfd context wrapper object of mmap write,
|
||||||
|
type :c:type:`!vm_userfaultfd_ctx`, VMA write.
|
||||||
|
either of zero size if userfaultfd is
|
||||||
|
disabled, or containing a pointer
|
||||||
|
to an underlying
|
||||||
|
:c:type:`!userfaultfd_ctx` object which
|
||||||
|
describes userfaultfd metadata.
|
||||||
|
================================= ===================== ======================================== ===============
|
||||||
|
|
||||||
|
These fields are present or not depending on whether the relevant kernel
|
||||||
|
configuration option is set.
|
||||||
|
|
||||||
|
.. table:: Reverse mapping fields
|
||||||
|
|
||||||
|
=================================== ========================================= ============================
|
||||||
|
Field Description Write lock
|
||||||
|
=================================== ========================================= ============================
|
||||||
|
:c:member:`!shared.rb` A red/black tree node used, if the mmap write, VMA write,
|
||||||
|
mapping is file-backed, to place the VMA i_mmap write.
|
||||||
|
in the
|
||||||
|
:c:member:`!struct address_space->i_mmap`
|
||||||
|
red/black interval tree.
|
||||||
|
:c:member:`!shared.rb_subtree_last` Metadata used for management of the mmap write, VMA write,
|
||||||
|
interval tree if the VMA is file-backed. i_mmap write.
|
||||||
|
:c:member:`!anon_vma_chain` List of pointers to both forked/CoW’d mmap read, anon_vma write.
|
||||||
|
:c:type:`!anon_vma` objects and
|
||||||
|
:c:member:`!vma->anon_vma` if it is
|
||||||
|
non-:c:macro:`!NULL`.
|
||||||
|
:c:member:`!anon_vma` :c:type:`!anon_vma` object used by When :c:macro:`NULL` and
|
||||||
|
anonymous folios mapped exclusively to setting non-:c:macro:`NULL`:
|
||||||
|
this VMA. Initially set by mmap read, page_table_lock.
|
||||||
|
:c:func:`!anon_vma_prepare` serialised
|
||||||
|
by the :c:macro:`!page_table_lock`. This When non-:c:macro:`NULL` and
|
||||||
|
is set as soon as any page is faulted in. setting :c:macro:`NULL`:
|
||||||
|
mmap write, VMA write,
|
||||||
|
anon_vma write.
|
||||||
|
=================================== ========================================= ============================
|
||||||
|
|
||||||
|
These fields are used to both place the VMA within the reverse mapping, and for
|
||||||
|
anonymous mappings, to be able to access both related :c:struct:`!struct anon_vma` objects
|
||||||
|
and the :c:struct:`!struct anon_vma` in which folios mapped exclusively to this VMA should
|
||||||
|
reside.
|
||||||
|
|
||||||
|
.. note:: If a file-backed mapping is mapped with :c:macro:`!MAP_PRIVATE` set
|
||||||
|
then it can be in both the :c:type:`!anon_vma` and :c:type:`!i_mmap`
|
||||||
|
trees at the same time, so all of these fields might be utilised at
|
||||||
|
once.
|
||||||
|
|
||||||
|
Page tables
|
||||||
|
-----------
|
||||||
|
|
||||||
|
We won't speak exhaustively on the subject but broadly speaking, page tables map
|
||||||
|
virtual addresses to physical ones through a series of page tables, each of
|
||||||
|
which contain entries with physical addresses for the next page table level
|
||||||
|
(along with flags), and at the leaf level the physical addresses of the
|
||||||
|
underlying physical data pages or a special entry such as a swap entry,
|
||||||
|
migration entry or other special marker. Offsets into these pages are provided
|
||||||
|
by the virtual address itself.
|
||||||
|
|
||||||
|
In Linux these are divided into five levels - PGD, P4D, PUD, PMD and PTE. Huge
|
||||||
|
pages might eliminate one or two of these levels, but when this is the case we
|
||||||
|
typically refer to the leaf level as the PTE level regardless.
|
||||||
|
|
||||||
|
.. note:: In instances where the architecture supports fewer page tables than
|
||||||
|
five the kernel cleverly 'folds' page table levels, that is stubbing
|
||||||
|
out functions related to the skipped levels. This allows us to
|
||||||
|
conceptually act as if there were always five levels, even if the
|
||||||
|
compiler might, in practice, eliminate any code relating to missing
|
||||||
|
ones.
|
||||||
|
|
||||||
|
There are four key operations typically performed on page tables:
|
||||||
|
|
||||||
|
1. **Traversing** page tables - Simply reading page tables in order to traverse
|
||||||
|
them. This only requires that the VMA is kept stable, so a lock which
|
||||||
|
establishes this suffices for traversal (there are also lockless variants
|
||||||
|
which eliminate even this requirement, such as :c:func:`!gup_fast`).
|
||||||
|
2. **Installing** page table mappings - Whether creating a new mapping or
|
||||||
|
modifying an existing one in such a way as to change its identity. This
|
||||||
|
requires that the VMA is kept stable via an mmap or VMA lock (explicitly not
|
||||||
|
rmap locks).
|
||||||
|
3. **Zapping/unmapping** page table entries - This is what the kernel calls
|
||||||
|
clearing page table mappings at the leaf level only, whilst leaving all page
|
||||||
|
tables in place. This is a very common operation in the kernel performed on
|
||||||
|
file truncation, the :c:macro:`!MADV_DONTNEED` operation via
|
||||||
|
:c:func:`!madvise`, and others. This is performed by a number of functions
|
||||||
|
including :c:func:`!unmap_mapping_range` and :c:func:`!unmap_mapping_pages`.
|
||||||
|
The VMA need only be kept stable for this operation.
|
||||||
|
4. **Freeing** page tables - When finally the kernel removes page tables from a
|
||||||
|
userland process (typically via :c:func:`!free_pgtables`) extreme care must
|
||||||
|
be taken to ensure this is done safely, as this logic finally frees all page
|
||||||
|
tables in the specified range, ignoring existing leaf entries (it assumes the
|
||||||
|
caller has both zapped the range and prevented any further faults or
|
||||||
|
modifications within it).
|
||||||
|
|
||||||
|
.. note:: Modifying mappings for reclaim or migration is performed under rmap
|
||||||
|
lock as it, like zapping, does not fundamentally modify the identity
|
||||||
|
of what is being mapped.
|
||||||
|
|
||||||
|
**Traversing** and **zapping** ranges can be performed holding any one of the
|
||||||
|
locks described in the terminology section above - that is the mmap lock, the
|
||||||
|
VMA lock or either of the reverse mapping locks.
|
||||||
|
|
||||||
|
That is - as long as you keep the relevant VMA **stable** - you are good to go
|
||||||
|
ahead and perform these operations on page tables (though internally, kernel
|
||||||
|
operations that perform writes also acquire internal page table locks to
|
||||||
|
serialise - see the page table implementation detail section for more details).
|
||||||
|
|
||||||
|
When **installing** page table entries, the mmap or VMA lock must be held to
|
||||||
|
keep the VMA stable. We explore why this is in the page table locking details
|
||||||
|
section below.
|
||||||
|
|
||||||
|
.. warning:: Page tables are normally only traversed in regions covered by VMAs.
|
||||||
|
If you want to traverse page tables in areas that might not be
|
||||||
|
covered by VMAs, heavier locking is required.
|
||||||
|
See :c:func:`!walk_page_range_novma` for details.
|
||||||
|
|
||||||
|
**Freeing** page tables is an entirely internal memory management operation and
|
||||||
|
has special requirements (see the page freeing section below for more details).
|
||||||
|
|
||||||
|
.. warning:: When **freeing** page tables, it must not be possible for VMAs
|
||||||
|
containing the ranges those page tables map to be accessible via
|
||||||
|
the reverse mapping.
|
||||||
|
|
||||||
|
The :c:func:`!free_pgtables` function removes the relevant VMAs
|
||||||
|
from the reverse mappings, but no other VMAs can be permitted to be
|
||||||
|
accessible and span the specified range.
|
||||||
|
|
||||||
|
Lock ordering
|
||||||
|
-------------
|
||||||
|
|
||||||
|
As we have multiple locks across the kernel which may or may not be taken at the
|
||||||
|
same time as explicit mm or VMA locks, we have to be wary of lock inversion, and
|
||||||
|
the **order** in which locks are acquired and released becomes very important.
|
||||||
|
|
||||||
|
.. note:: Lock inversion occurs when two threads need to acquire multiple locks,
|
||||||
|
but in doing so inadvertently cause a mutual deadlock.
|
||||||
|
|
||||||
|
For example, consider thread 1 which holds lock A and tries to acquire lock B,
|
||||||
|
while thread 2 holds lock B and tries to acquire lock A.
|
||||||
|
|
||||||
|
Both threads are now deadlocked on each other. However, had they attempted to
|
||||||
|
acquire locks in the same order, one would have waited for the other to
|
||||||
|
complete its work and no deadlock would have occurred.
|
||||||
|
|
||||||
|
The opening comment in :c:macro:`!mm/rmap.c` describes in detail the required
|
||||||
|
ordering of locks within memory management code:
|
||||||
|
|
||||||
|
.. code-block::
|
||||||
|
|
||||||
|
inode->i_rwsem (while writing or truncating, not reading or faulting)
|
||||||
|
mm->mmap_lock
|
||||||
|
mapping->invalidate_lock (in filemap_fault)
|
||||||
|
folio_lock
|
||||||
|
hugetlbfs_i_mmap_rwsem_key (in huge_pmd_share, see hugetlbfs below)
|
||||||
|
vma_start_write
|
||||||
|
mapping->i_mmap_rwsem
|
||||||
|
anon_vma->rwsem
|
||||||
|
mm->page_table_lock or pte_lock
|
||||||
|
swap_lock (in swap_duplicate, swap_info_get)
|
||||||
|
mmlist_lock (in mmput, drain_mmlist and others)
|
||||||
|
mapping->private_lock (in block_dirty_folio)
|
||||||
|
i_pages lock (widely used)
|
||||||
|
lruvec->lru_lock (in folio_lruvec_lock_irq)
|
||||||
|
inode->i_lock (in set_page_dirty's __mark_inode_dirty)
|
||||||
|
bdi.wb->list_lock (in set_page_dirty's __mark_inode_dirty)
|
||||||
|
sb_lock (within inode_lock in fs/fs-writeback.c)
|
||||||
|
i_pages lock (widely used, in set_page_dirty,
|
||||||
|
in arch-dependent flush_dcache_mmap_lock,
|
||||||
|
within bdi.wb->list_lock in __sync_single_inode)
|
||||||
|
|
||||||
|
There is also a file-system specific lock ordering comment located at the top of
|
||||||
|
:c:macro:`!mm/filemap.c`:
|
||||||
|
|
||||||
|
.. code-block::
|
||||||
|
|
||||||
|
->i_mmap_rwsem (truncate_pagecache)
|
||||||
|
->private_lock (__free_pte->block_dirty_folio)
|
||||||
|
->swap_lock (exclusive_swap_page, others)
|
||||||
|
->i_pages lock
|
||||||
|
|
||||||
|
->i_rwsem
|
||||||
|
->invalidate_lock (acquired by fs in truncate path)
|
||||||
|
->i_mmap_rwsem (truncate->unmap_mapping_range)
|
||||||
|
|
||||||
|
->mmap_lock
|
||||||
|
->i_mmap_rwsem
|
||||||
|
->page_table_lock or pte_lock (various, mainly in memory.c)
|
||||||
|
->i_pages lock (arch-dependent flush_dcache_mmap_lock)
|
||||||
|
|
||||||
|
->mmap_lock
|
||||||
|
->invalidate_lock (filemap_fault)
|
||||||
|
->lock_page (filemap_fault, access_process_vm)
|
||||||
|
|
||||||
|
->i_rwsem (generic_perform_write)
|
||||||
|
->mmap_lock (fault_in_readable->do_page_fault)
|
||||||
|
|
||||||
|
bdi->wb.list_lock
|
||||||
|
sb_lock (fs/fs-writeback.c)
|
||||||
|
->i_pages lock (__sync_single_inode)
|
||||||
|
|
||||||
|
->i_mmap_rwsem
|
||||||
|
->anon_vma.lock (vma_merge)
|
||||||
|
|
||||||
|
->anon_vma.lock
|
||||||
|
->page_table_lock or pte_lock (anon_vma_prepare and various)
|
||||||
|
|
||||||
|
->page_table_lock or pte_lock
|
||||||
|
->swap_lock (try_to_unmap_one)
|
||||||
|
->private_lock (try_to_unmap_one)
|
||||||
|
->i_pages lock (try_to_unmap_one)
|
||||||
|
->lruvec->lru_lock (follow_page_mask->mark_page_accessed)
|
||||||
|
->lruvec->lru_lock (check_pte_range->folio_isolate_lru)
|
||||||
|
->private_lock (folio_remove_rmap_pte->set_page_dirty)
|
||||||
|
->i_pages lock (folio_remove_rmap_pte->set_page_dirty)
|
||||||
|
bdi.wb->list_lock (folio_remove_rmap_pte->set_page_dirty)
|
||||||
|
->inode->i_lock (folio_remove_rmap_pte->set_page_dirty)
|
||||||
|
bdi.wb->list_lock (zap_pte_range->set_page_dirty)
|
||||||
|
->inode->i_lock (zap_pte_range->set_page_dirty)
|
||||||
|
->private_lock (zap_pte_range->block_dirty_folio)
|
||||||
|
|
||||||
|
Please check the current state of these comments which may have changed since
|
||||||
|
the time of writing of this document.
|
||||||
|
|
||||||
|
------------------------------
|
||||||
|
Locking Implementation Details
|
||||||
|
------------------------------
|
||||||
|
|
||||||
|
.. warning:: Locking rules for PTE-level page tables are very different from
|
||||||
|
locking rules for page tables at other levels.
|
||||||
|
|
||||||
|
Page table locking details
|
||||||
|
--------------------------
|
||||||
|
|
||||||
|
In addition to the locks described in the terminology section above, we have
|
||||||
|
additional locks dedicated to page tables:
|
||||||
|
|
||||||
|
* **Higher level page table locks** - Higher level page tables, that is PGD, P4D
|
||||||
|
and PUD each make use of the process address space granularity
|
||||||
|
:c:member:`!mm->page_table_lock` lock when modified.
|
||||||
|
|
||||||
|
* **Fine-grained page table locks** - PMDs and PTEs each have fine-grained locks
|
||||||
|
either kept within the folios describing the page tables or allocated
|
||||||
|
separated and pointed at by the folios if :c:macro:`!ALLOC_SPLIT_PTLOCKS` is
|
||||||
|
set. The PMD spin lock is obtained via :c:func:`!pmd_lock`, however PTEs are
|
||||||
|
mapped into higher memory (if a 32-bit system) and carefully locked via
|
||||||
|
:c:func:`!pte_offset_map_lock`.
|
||||||
|
|
||||||
|
These locks represent the minimum required to interact with each page table
|
||||||
|
level, but there are further requirements.
|
||||||
|
|
||||||
|
Importantly, note that on a **traversal** of page tables, sometimes no such
|
||||||
|
locks are taken. However, at the PTE level, at least concurrent page table
|
||||||
|
deletion must be prevented (using RCU) and the page table must be mapped into
|
||||||
|
high memory, see below.
|
||||||
|
|
||||||
|
Whether care is taken on reading the page table entries depends on the
|
||||||
|
architecture, see the section on atomicity below.
|
||||||
|
|
||||||
|
Locking rules
|
||||||
|
^^^^^^^^^^^^^
|
||||||
|
|
||||||
|
We establish basic locking rules when interacting with page tables:
|
||||||
|
|
||||||
|
* When changing a page table entry the page table lock for that page table
|
||||||
|
**must** be held, except if you can safely assume nobody can access the page
|
||||||
|
tables concurrently (such as on invocation of :c:func:`!free_pgtables`).
|
||||||
|
* Reads from and writes to page table entries must be *appropriately*
|
||||||
|
atomic. See the section on atomicity below for details.
|
||||||
|
* Populating previously empty entries requires that the mmap or VMA locks are
|
||||||
|
held (read or write), doing so with only rmap locks would be dangerous (see
|
||||||
|
the warning below).
|
||||||
|
* As mentioned previously, zapping can be performed while simply keeping the VMA
|
||||||
|
stable, that is holding any one of the mmap, VMA or rmap locks.
|
||||||
|
|
||||||
|
.. warning:: Populating previously empty entries is dangerous as, when unmapping
|
||||||
|
VMAs, :c:func:`!vms_clear_ptes` has a window of time between
|
||||||
|
zapping (via :c:func:`!unmap_vmas`) and freeing page tables (via
|
||||||
|
:c:func:`!free_pgtables`), where the VMA is still visible in the
|
||||||
|
rmap tree. :c:func:`!free_pgtables` assumes that the zap has
|
||||||
|
already been performed and removes PTEs unconditionally (along with
|
||||||
|
all other page tables in the freed range), so installing new PTE
|
||||||
|
entries could leak memory and also cause other unexpected and
|
||||||
|
dangerous behaviour.
|
||||||
|
|
||||||
|
There are additional rules applicable when moving page tables, which we discuss
|
||||||
|
in the section on this topic below.
|
||||||
|
|
||||||
|
PTE-level page tables are different from page tables at other levels, and there
|
||||||
|
are extra requirements for accessing them:
|
||||||
|
|
||||||
|
* On 32-bit architectures, they may be in high memory (meaning they need to be
|
||||||
|
mapped into kernel memory to be accessible).
|
||||||
|
* When empty, they can be unlinked and RCU-freed while holding an mmap lock or
|
||||||
|
rmap lock for reading in combination with the PTE and PMD page table locks.
|
||||||
|
In particular, this happens in :c:func:`!retract_page_tables` when handling
|
||||||
|
:c:macro:`!MADV_COLLAPSE`.
|
||||||
|
So accessing PTE-level page tables requires at least holding an RCU read lock;
|
||||||
|
but that only suffices for readers that can tolerate racing with concurrent
|
||||||
|
page table updates such that an empty PTE is observed (in a page table that
|
||||||
|
has actually already been detached and marked for RCU freeing) while another
|
||||||
|
new page table has been installed in the same location and filled with
|
||||||
|
entries. Writers normally need to take the PTE lock and revalidate that the
|
||||||
|
PMD entry still refers to the same PTE-level page table.
|
||||||
|
|
||||||
|
To access PTE-level page tables, a helper like :c:func:`!pte_offset_map_lock` or
|
||||||
|
:c:func:`!pte_offset_map` can be used depending on stability requirements.
|
||||||
|
These map the page table into kernel memory if required, take the RCU lock, and
|
||||||
|
depending on variant, may also look up or acquire the PTE lock.
|
||||||
|
See the comment on :c:func:`!__pte_offset_map_lock`.
|
||||||
|
|
||||||
|
Atomicity
|
||||||
|
^^^^^^^^^
|
||||||
|
|
||||||
|
Regardless of page table locks, the MMU hardware concurrently updates accessed
|
||||||
|
and dirty bits (perhaps more, depending on architecture). Additionally, page
|
||||||
|
table traversal operations in parallel (though holding the VMA stable) and
|
||||||
|
functionality like GUP-fast locklessly traverses (that is reads) page tables,
|
||||||
|
without even keeping the VMA stable at all.
|
||||||
|
|
||||||
|
When performing a page table traversal and keeping the VMA stable, whether a
|
||||||
|
read must be performed once and only once or not depends on the architecture
|
||||||
|
(for instance x86-64 does not require any special precautions).
|
||||||
|
|
||||||
|
If a write is being performed, or if a read informs whether a write takes place
|
||||||
|
(on an installation of a page table entry say, for instance in
|
||||||
|
:c:func:`!__pud_install`), special care must always be taken. In these cases we
|
||||||
|
can never assume that page table locks give us entirely exclusive access, and
|
||||||
|
must retrieve page table entries once and only once.
|
||||||
|
|
||||||
|
If we are reading page table entries, then we need only ensure that the compiler
|
||||||
|
does not rearrange our loads. This is achieved via :c:func:`!pXXp_get`
|
||||||
|
functions - :c:func:`!pgdp_get`, :c:func:`!p4dp_get`, :c:func:`!pudp_get`,
|
||||||
|
:c:func:`!pmdp_get`, and :c:func:`!ptep_get`.
|
||||||
|
|
||||||
|
Each of these uses :c:func:`!READ_ONCE` to guarantee that the compiler reads
|
||||||
|
the page table entry only once.
|
||||||
|
|
||||||
|
However, if we wish to manipulate an existing page table entry and care about
|
||||||
|
the previously stored data, we must go further and use an hardware atomic
|
||||||
|
operation as, for example, in :c:func:`!ptep_get_and_clear`.
|
||||||
|
|
||||||
|
Equally, operations that do not rely on the VMA being held stable, such as
|
||||||
|
GUP-fast (see :c:func:`!gup_fast` and its various page table level handlers like
|
||||||
|
:c:func:`!gup_fast_pte_range`), must very carefully interact with page table
|
||||||
|
entries, using functions such as :c:func:`!ptep_get_lockless` and equivalent for
|
||||||
|
higher level page table levels.
|
||||||
|
|
||||||
|
Writes to page table entries must also be appropriately atomic, as established
|
||||||
|
by :c:func:`!set_pXX` functions - :c:func:`!set_pgd`, :c:func:`!set_p4d`,
|
||||||
|
:c:func:`!set_pud`, :c:func:`!set_pmd`, and :c:func:`!set_pte`.
|
||||||
|
|
||||||
|
Equally functions which clear page table entries must be appropriately atomic,
|
||||||
|
as in :c:func:`!pXX_clear` functions - :c:func:`!pgd_clear`,
|
||||||
|
:c:func:`!p4d_clear`, :c:func:`!pud_clear`, :c:func:`!pmd_clear`, and
|
||||||
|
:c:func:`!pte_clear`.
|
||||||
|
|
||||||
|
Page table installation
|
||||||
|
^^^^^^^^^^^^^^^^^^^^^^^
|
||||||
|
|
||||||
|
Page table installation is performed with the VMA held stable explicitly by an
|
||||||
|
mmap or VMA lock in read or write mode (see the warning in the locking rules
|
||||||
|
section for details as to why).
|
||||||
|
|
||||||
|
When allocating a P4D, PUD or PMD and setting the relevant entry in the above
|
||||||
|
PGD, P4D or PUD, the :c:member:`!mm->page_table_lock` must be held. This is
|
||||||
|
acquired in :c:func:`!__p4d_alloc`, :c:func:`!__pud_alloc` and
|
||||||
|
:c:func:`!__pmd_alloc` respectively.
|
||||||
|
|
||||||
|
.. note:: :c:func:`!__pmd_alloc` actually invokes :c:func:`!pud_lock` and
|
||||||
|
:c:func:`!pud_lockptr` in turn, however at the time of writing it ultimately
|
||||||
|
references the :c:member:`!mm->page_table_lock`.
|
||||||
|
|
||||||
|
Allocating a PTE will either use the :c:member:`!mm->page_table_lock` or, if
|
||||||
|
:c:macro:`!USE_SPLIT_PMD_PTLOCKS` is defined, a lock embedded in the PMD
|
||||||
|
physical page metadata in the form of a :c:struct:`!struct ptdesc`, acquired by
|
||||||
|
:c:func:`!pmd_ptdesc` called from :c:func:`!pmd_lock` and ultimately
|
||||||
|
:c:func:`!__pte_alloc`.
|
||||||
|
|
||||||
|
Finally, modifying the contents of the PTE requires special treatment, as the
|
||||||
|
PTE page table lock must be acquired whenever we want stable and exclusive
|
||||||
|
access to entries contained within a PTE, especially when we wish to modify
|
||||||
|
them.
|
||||||
|
|
||||||
|
This is performed via :c:func:`!pte_offset_map_lock` which carefully checks to
|
||||||
|
ensure that the PTE hasn't changed from under us, ultimately invoking
|
||||||
|
:c:func:`!pte_lockptr` to obtain a spin lock at PTE granularity contained within
|
||||||
|
the :c:struct:`!struct ptdesc` associated with the physical PTE page. The lock
|
||||||
|
must be released via :c:func:`!pte_unmap_unlock`.
|
||||||
|
|
||||||
|
.. note:: There are some variants on this, such as
|
||||||
|
:c:func:`!pte_offset_map_rw_nolock` when we know we hold the PTE stable but
|
||||||
|
for brevity we do not explore this. See the comment for
|
||||||
|
:c:func:`!__pte_offset_map_lock` for more details.
|
||||||
|
|
||||||
|
When modifying data in ranges we typically only wish to allocate higher page
|
||||||
|
tables as necessary, using these locks to avoid races or overwriting anything,
|
||||||
|
and set/clear data at the PTE level as required (for instance when page faulting
|
||||||
|
or zapping).
|
||||||
|
|
||||||
|
A typical pattern taken when traversing page table entries to install a new
|
||||||
|
mapping is to optimistically determine whether the page table entry in the table
|
||||||
|
above is empty, if so, only then acquiring the page table lock and checking
|
||||||
|
again to see if it was allocated underneath us.
|
||||||
|
|
||||||
|
This allows for a traversal with page table locks only being taken when
|
||||||
|
required. An example of this is :c:func:`!__pud_alloc`.
|
||||||
|
|
||||||
|
At the leaf page table, that is the PTE, we can't entirely rely on this pattern
|
||||||
|
as we have separate PMD and PTE locks and a THP collapse for instance might have
|
||||||
|
eliminated the PMD entry as well as the PTE from under us.
|
||||||
|
|
||||||
|
This is why :c:func:`!__pte_offset_map_lock` locklessly retrieves the PMD entry
|
||||||
|
for the PTE, carefully checking it is as expected, before acquiring the
|
||||||
|
PTE-specific lock, and then *again* checking that the PMD entry is as expected.
|
||||||
|
|
||||||
|
If a THP collapse (or similar) were to occur then the lock on both pages would
|
||||||
|
be acquired, so we can ensure this is prevented while the PTE lock is held.
|
||||||
|
|
||||||
|
Installing entries this way ensures mutual exclusion on write.
|
||||||
|
|
||||||
|
Page table freeing
|
||||||
|
^^^^^^^^^^^^^^^^^^
|
||||||
|
|
||||||
|
Tearing down page tables themselves is something that requires significant
|
||||||
|
care. There must be no way that page tables designated for removal can be
|
||||||
|
traversed or referenced by concurrent tasks.
|
||||||
|
|
||||||
|
It is insufficient to simply hold an mmap write lock and VMA lock (which will
|
||||||
|
prevent racing faults, and rmap operations), as a file-backed mapping can be
|
||||||
|
truncated under the :c:struct:`!struct address_space->i_mmap_rwsem` alone.
|
||||||
|
|
||||||
|
As a result, no VMA which can be accessed via the reverse mapping (either
|
||||||
|
through the :c:struct:`!struct anon_vma->rb_root` or the :c:member:`!struct
|
||||||
|
address_space->i_mmap` interval trees) can have its page tables torn down.
|
||||||
|
|
||||||
|
The operation is typically performed via :c:func:`!free_pgtables`, which assumes
|
||||||
|
either the mmap write lock has been taken (as specified by its
|
||||||
|
:c:member:`!mm_wr_locked` parameter), or that the VMA is already unreachable.
|
||||||
|
|
||||||
|
It carefully removes the VMA from all reverse mappings, however it's important
|
||||||
|
that no new ones overlap these or any route remain to permit access to addresses
|
||||||
|
within the range whose page tables are being torn down.
|
||||||
|
|
||||||
|
Additionally, it assumes that a zap has already been performed and steps have
|
||||||
|
been taken to ensure that no further page table entries can be installed between
|
||||||
|
the zap and the invocation of :c:func:`!free_pgtables`.
|
||||||
|
|
||||||
|
Since it is assumed that all such steps have been taken, page table entries are
|
||||||
|
cleared without page table locks (in the :c:func:`!pgd_clear`, :c:func:`!p4d_clear`,
|
||||||
|
:c:func:`!pud_clear`, and :c:func:`!pmd_clear` functions.
|
||||||
|
|
||||||
|
.. note:: It is possible for leaf page tables to be torn down independent of
|
||||||
|
the page tables above it as is done by
|
||||||
|
:c:func:`!retract_page_tables`, which is performed under the i_mmap
|
||||||
|
read lock, PMD, and PTE page table locks, without this level of care.
|
||||||
|
|
||||||
|
Page table moving
|
||||||
|
^^^^^^^^^^^^^^^^^
|
||||||
|
|
||||||
|
Some functions manipulate page table levels above PMD (that is PUD, P4D and PGD
|
||||||
|
page tables). Most notable of these is :c:func:`!mremap`, which is capable of
|
||||||
|
moving higher level page tables.
|
||||||
|
|
||||||
|
In these instances, it is required that **all** locks are taken, that is
|
||||||
|
the mmap lock, the VMA lock and the relevant rmap locks.
|
||||||
|
|
||||||
|
You can observe this in the :c:func:`!mremap` implementation in the functions
|
||||||
|
:c:func:`!take_rmap_locks` and :c:func:`!drop_rmap_locks` which perform the rmap
|
||||||
|
side of lock acquisition, invoked ultimately by :c:func:`!move_page_tables`.
|
||||||
|
|
||||||
|
VMA lock internals
|
||||||
|
------------------
|
||||||
|
|
||||||
|
Overview
|
||||||
|
^^^^^^^^
|
||||||
|
|
||||||
|
VMA read locking is entirely optimistic - if the lock is contended or a competing
|
||||||
|
write has started, then we do not obtain a read lock.
|
||||||
|
|
||||||
|
A VMA **read** lock is obtained by :c:func:`!lock_vma_under_rcu`, which first
|
||||||
|
calls :c:func:`!rcu_read_lock` to ensure that the VMA is looked up in an RCU
|
||||||
|
critical section, then attempts to VMA lock it via :c:func:`!vma_start_read`,
|
||||||
|
before releasing the RCU lock via :c:func:`!rcu_read_unlock`.
|
||||||
|
|
||||||
|
VMA read locks hold the read lock on the :c:member:`!vma->vm_lock` semaphore for
|
||||||
|
their duration and the caller of :c:func:`!lock_vma_under_rcu` must release it
|
||||||
|
via :c:func:`!vma_end_read`.
|
||||||
|
|
||||||
|
VMA **write** locks are acquired via :c:func:`!vma_start_write` in instances where a
|
||||||
|
VMA is about to be modified, unlike :c:func:`!vma_start_read` the lock is always
|
||||||
|
acquired. An mmap write lock **must** be held for the duration of the VMA write
|
||||||
|
lock, releasing or downgrading the mmap write lock also releases the VMA write
|
||||||
|
lock so there is no :c:func:`!vma_end_write` function.
|
||||||
|
|
||||||
|
Note that a semaphore write lock is not held across a VMA lock. Rather, a
|
||||||
|
sequence number is used for serialisation, and the write semaphore is only
|
||||||
|
acquired at the point of write lock to update this.
|
||||||
|
|
||||||
|
This ensures the semantics we require - VMA write locks provide exclusive write
|
||||||
|
access to the VMA.
|
||||||
|
|
||||||
|
Implementation details
|
||||||
|
^^^^^^^^^^^^^^^^^^^^^^
|
||||||
|
|
||||||
|
The VMA lock mechanism is designed to be a lightweight means of avoiding the use
|
||||||
|
of the heavily contended mmap lock. It is implemented using a combination of a
|
||||||
|
read/write semaphore and sequence numbers belonging to the containing
|
||||||
|
:c:struct:`!struct mm_struct` and the VMA.
|
||||||
|
|
||||||
|
Read locks are acquired via :c:func:`!vma_start_read`, which is an optimistic
|
||||||
|
operation, i.e. it tries to acquire a read lock but returns false if it is
|
||||||
|
unable to do so. At the end of the read operation, :c:func:`!vma_end_read` is
|
||||||
|
called to release the VMA read lock.
|
||||||
|
|
||||||
|
Invoking :c:func:`!vma_start_read` requires that :c:func:`!rcu_read_lock` has
|
||||||
|
been called first, establishing that we are in an RCU critical section upon VMA
|
||||||
|
read lock acquisition. Once acquired, the RCU lock can be released as it is only
|
||||||
|
required for lookup. This is abstracted by :c:func:`!lock_vma_under_rcu` which
|
||||||
|
is the interface a user should use.
|
||||||
|
|
||||||
|
Writing requires the mmap to be write-locked and the VMA lock to be acquired via
|
||||||
|
:c:func:`!vma_start_write`, however the write lock is released by the termination or
|
||||||
|
downgrade of the mmap write lock so no :c:func:`!vma_end_write` is required.
|
||||||
|
|
||||||
|
All this is achieved by the use of per-mm and per-VMA sequence counts, which are
|
||||||
|
used in order to reduce complexity, especially for operations which write-lock
|
||||||
|
multiple VMAs at once.
|
||||||
|
|
||||||
|
If the mm sequence count, :c:member:`!mm->mm_lock_seq` is equal to the VMA
|
||||||
|
sequence count :c:member:`!vma->vm_lock_seq` then the VMA is write-locked. If
|
||||||
|
they differ, then it is not.
|
||||||
|
|
||||||
|
Each time the mmap write lock is released in :c:func:`!mmap_write_unlock` or
|
||||||
|
:c:func:`!mmap_write_downgrade`, :c:func:`!vma_end_write_all` is invoked which
|
||||||
|
also increments :c:member:`!mm->mm_lock_seq` via
|
||||||
|
:c:func:`!mm_lock_seqcount_end`.
|
||||||
|
|
||||||
|
This way, we ensure that, regardless of the VMA's sequence number, a write lock
|
||||||
|
is never incorrectly indicated and that when we release an mmap write lock we
|
||||||
|
efficiently release **all** VMA write locks contained within the mmap at the
|
||||||
|
same time.
|
||||||
|
|
||||||
|
Since the mmap write lock is exclusive against others who hold it, the automatic
|
||||||
|
release of any VMA locks on its release makes sense, as you would never want to
|
||||||
|
keep VMAs locked across entirely separate write operations. It also maintains
|
||||||
|
correct lock ordering.
|
||||||
|
|
||||||
|
Each time a VMA read lock is acquired, we acquire a read lock on the
|
||||||
|
:c:member:`!vma->vm_lock` read/write semaphore and hold it, while checking that
|
||||||
|
the sequence count of the VMA does not match that of the mm.
|
||||||
|
|
||||||
|
If it does, the read lock fails. If it does not, we hold the lock, excluding
|
||||||
|
writers, but permitting other readers, who will also obtain this lock under RCU.
|
||||||
|
|
||||||
|
Importantly, maple tree operations performed in :c:func:`!lock_vma_under_rcu`
|
||||||
|
are also RCU safe, so the whole read lock operation is guaranteed to function
|
||||||
|
correctly.
|
||||||
|
|
||||||
|
On the write side, we acquire a write lock on the :c:member:`!vma->vm_lock`
|
||||||
|
read/write semaphore, before setting the VMA's sequence number under this lock,
|
||||||
|
also simultaneously holding the mmap write lock.
|
||||||
|
|
||||||
|
This way, if any read locks are in effect, :c:func:`!vma_start_write` will sleep
|
||||||
|
until these are finished and mutual exclusion is achieved.
|
||||||
|
|
||||||
|
After setting the VMA's sequence number, the lock is released, avoiding
|
||||||
|
complexity with a long-term held write lock.
|
||||||
|
|
||||||
|
This clever combination of a read/write semaphore and sequence count allows for
|
||||||
|
fast RCU-based per-VMA lock acquisition (especially on page fault, though
|
||||||
|
utilised elsewhere) with minimal complexity around lock ordering.
|
||||||
|
|
||||||
|
mmap write lock downgrading
|
||||||
|
---------------------------
|
||||||
|
|
||||||
|
When an mmap write lock is held one has exclusive access to resources within the
|
||||||
|
mmap (with the usual caveats about requiring VMA write locks to avoid races with
|
||||||
|
tasks holding VMA read locks).
|
||||||
|
|
||||||
|
It is then possible to **downgrade** from a write lock to a read lock via
|
||||||
|
:c:func:`!mmap_write_downgrade` which, similar to :c:func:`!mmap_write_unlock`,
|
||||||
|
implicitly terminates all VMA write locks via :c:func:`!vma_end_write_all`, but
|
||||||
|
importantly does not relinquish the mmap lock while downgrading, therefore
|
||||||
|
keeping the locked virtual address space stable.
|
||||||
|
|
||||||
|
An interesting consequence of this is that downgraded locks are exclusive
|
||||||
|
against any other task possessing a downgraded lock (since a racing task would
|
||||||
|
have to acquire a write lock first to downgrade it, and the downgraded lock
|
||||||
|
prevents a new write lock from being obtained until the original lock is
|
||||||
|
released).
|
||||||
|
|
||||||
|
For clarity, we map read (R)/downgraded write (D)/write (W) locks against one
|
||||||
|
another showing which locks exclude the others:
|
||||||
|
|
||||||
|
.. list-table:: Lock exclusivity
|
||||||
|
:widths: 5 5 5 5
|
||||||
|
:header-rows: 1
|
||||||
|
:stub-columns: 1
|
||||||
|
|
||||||
|
* -
|
||||||
|
- R
|
||||||
|
- D
|
||||||
|
- W
|
||||||
|
* - R
|
||||||
|
- N
|
||||||
|
- N
|
||||||
|
- Y
|
||||||
|
* - D
|
||||||
|
- N
|
||||||
|
- Y
|
||||||
|
- Y
|
||||||
|
* - W
|
||||||
|
- Y
|
||||||
|
- Y
|
||||||
|
- Y
|
||||||
|
|
||||||
|
Here a Y indicates the locks in the matching row/column are mutually exclusive,
|
||||||
|
and N indicates that they are not.
|
||||||
|
|
||||||
|
Stack expansion
|
||||||
|
---------------
|
||||||
|
|
||||||
|
Stack expansion throws up additional complexities in that we cannot permit there
|
||||||
|
to be racing page faults, as a result we invoke :c:func:`!vma_start_write` to
|
||||||
|
prevent this in :c:func:`!expand_downwards` or :c:func:`!expand_upwards`.
|
||||||
|
|
|
||||||
|
|
@ -22,65 +22,67 @@ definitions:
|
||||||
doc: unused event
|
doc: unused event
|
||||||
-
|
-
|
||||||
name: created
|
name: created
|
||||||
doc:
|
doc: >-
|
||||||
token, family, saddr4 | saddr6, daddr4 | daddr6, sport, dport
|
|
||||||
A new MPTCP connection has been created. It is the good time to
|
A new MPTCP connection has been created. It is the good time to
|
||||||
allocate memory and send ADD_ADDR if needed. Depending on the
|
allocate memory and send ADD_ADDR if needed. Depending on the
|
||||||
traffic-patterns it can take a long time until the
|
traffic-patterns it can take a long time until the
|
||||||
MPTCP_EVENT_ESTABLISHED is sent.
|
MPTCP_EVENT_ESTABLISHED is sent.
|
||||||
|
Attributes: token, family, saddr4 | saddr6, daddr4 | daddr6, sport,
|
||||||
|
dport, server-side.
|
||||||
-
|
-
|
||||||
name: established
|
name: established
|
||||||
doc:
|
doc: >-
|
||||||
token, family, saddr4 | saddr6, daddr4 | daddr6, sport, dport
|
|
||||||
A MPTCP connection is established (can start new subflows).
|
A MPTCP connection is established (can start new subflows).
|
||||||
|
Attributes: token, family, saddr4 | saddr6, daddr4 | daddr6, sport,
|
||||||
|
dport, server-side.
|
||||||
-
|
-
|
||||||
name: closed
|
name: closed
|
||||||
doc:
|
doc: >-
|
||||||
token
|
|
||||||
A MPTCP connection has stopped.
|
A MPTCP connection has stopped.
|
||||||
|
Attribute: token.
|
||||||
-
|
-
|
||||||
name: announced
|
name: announced
|
||||||
value: 6
|
value: 6
|
||||||
doc:
|
doc: >-
|
||||||
token, rem_id, family, daddr4 | daddr6 [, dport]
|
|
||||||
A new address has been announced by the peer.
|
A new address has been announced by the peer.
|
||||||
|
Attributes: token, rem_id, family, daddr4 | daddr6 [, dport].
|
||||||
-
|
-
|
||||||
name: removed
|
name: removed
|
||||||
doc:
|
doc: >-
|
||||||
token, rem_id
|
|
||||||
An address has been lost by the peer.
|
An address has been lost by the peer.
|
||||||
|
Attributes: token, rem_id.
|
||||||
-
|
-
|
||||||
name: sub-established
|
name: sub-established
|
||||||
value: 10
|
value: 10
|
||||||
doc:
|
doc: >-
|
||||||
token, family, loc_id, rem_id, saddr4 | saddr6, daddr4 | daddr6, sport,
|
|
||||||
dport, backup, if_idx [, error]
|
|
||||||
A new subflow has been established. 'error' should not be set.
|
A new subflow has been established. 'error' should not be set.
|
||||||
|
Attributes: token, family, loc_id, rem_id, saddr4 | saddr6, daddr4 |
|
||||||
|
daddr6, sport, dport, backup, if_idx [, error].
|
||||||
-
|
-
|
||||||
name: sub-closed
|
name: sub-closed
|
||||||
doc:
|
doc: >-
|
||||||
token, family, loc_id, rem_id, saddr4 | saddr6, daddr4 | daddr6, sport,
|
|
||||||
dport, backup, if_idx [, error]
|
|
||||||
A subflow has been closed. An error (copy of sk_err) could be set if an
|
A subflow has been closed. An error (copy of sk_err) could be set if an
|
||||||
error has been detected for this subflow.
|
error has been detected for this subflow.
|
||||||
|
Attributes: token, family, loc_id, rem_id, saddr4 | saddr6, daddr4 |
|
||||||
|
daddr6, sport, dport, backup, if_idx [, error].
|
||||||
-
|
-
|
||||||
name: sub-priority
|
name: sub-priority
|
||||||
value: 13
|
value: 13
|
||||||
doc:
|
doc: >-
|
||||||
token, family, loc_id, rem_id, saddr4 | saddr6, daddr4 | daddr6, sport,
|
|
||||||
dport, backup, if_idx [, error]
|
|
||||||
The priority of a subflow has changed. 'error' should not be set.
|
The priority of a subflow has changed. 'error' should not be set.
|
||||||
|
Attributes: token, family, loc_id, rem_id, saddr4 | saddr6, daddr4 |
|
||||||
|
daddr6, sport, dport, backup, if_idx [, error].
|
||||||
-
|
-
|
||||||
name: listener-created
|
name: listener-created
|
||||||
value: 15
|
value: 15
|
||||||
doc:
|
doc: >-
|
||||||
family, sport, saddr4 | saddr6
|
|
||||||
A new PM listener is created.
|
A new PM listener is created.
|
||||||
|
Attributes: family, sport, saddr4 | saddr6.
|
||||||
-
|
-
|
||||||
name: listener-closed
|
name: listener-closed
|
||||||
doc:
|
doc: >-
|
||||||
family, sport, saddr4 | saddr6
|
|
||||||
A PM listener is closed.
|
A PM listener is closed.
|
||||||
|
Attributes: family, sport, saddr4 | saddr6.
|
||||||
|
|
||||||
attribute-sets:
|
attribute-sets:
|
||||||
-
|
-
|
||||||
|
|
@ -307,7 +309,7 @@ operations:
|
||||||
- addr
|
- addr
|
||||||
-
|
-
|
||||||
name: flush-addrs
|
name: flush-addrs
|
||||||
doc: flush addresses
|
doc: Flush addresses
|
||||||
attribute-set: endpoint
|
attribute-set: endpoint
|
||||||
dont-validate: [ strict ]
|
dont-validate: [ strict ]
|
||||||
flags: [ uns-admin-perm ]
|
flags: [ uns-admin-perm ]
|
||||||
|
|
@ -351,7 +353,7 @@ operations:
|
||||||
- addr-remote
|
- addr-remote
|
||||||
-
|
-
|
||||||
name: announce
|
name: announce
|
||||||
doc: announce new sf
|
doc: Announce new address
|
||||||
attribute-set: attr
|
attribute-set: attr
|
||||||
dont-validate: [ strict ]
|
dont-validate: [ strict ]
|
||||||
flags: [ uns-admin-perm ]
|
flags: [ uns-admin-perm ]
|
||||||
|
|
@ -362,7 +364,7 @@ operations:
|
||||||
- token
|
- token
|
||||||
-
|
-
|
||||||
name: remove
|
name: remove
|
||||||
doc: announce removal
|
doc: Announce removal
|
||||||
attribute-set: attr
|
attribute-set: attr
|
||||||
dont-validate: [ strict ]
|
dont-validate: [ strict ]
|
||||||
flags: [ uns-admin-perm ]
|
flags: [ uns-admin-perm ]
|
||||||
|
|
@ -373,7 +375,7 @@ operations:
|
||||||
- loc-id
|
- loc-id
|
||||||
-
|
-
|
||||||
name: subflow-create
|
name: subflow-create
|
||||||
doc: todo
|
doc: Create subflow
|
||||||
attribute-set: attr
|
attribute-set: attr
|
||||||
dont-validate: [ strict ]
|
dont-validate: [ strict ]
|
||||||
flags: [ uns-admin-perm ]
|
flags: [ uns-admin-perm ]
|
||||||
|
|
@ -385,7 +387,7 @@ operations:
|
||||||
- addr-remote
|
- addr-remote
|
||||||
-
|
-
|
||||||
name: subflow-destroy
|
name: subflow-destroy
|
||||||
doc: todo
|
doc: Destroy subflow
|
||||||
attribute-set: attr
|
attribute-set: attr
|
||||||
dont-validate: [ strict ]
|
dont-validate: [ strict ]
|
||||||
flags: [ uns-admin-perm ]
|
flags: [ uns-admin-perm ]
|
||||||
|
|
|
||||||
|
|
@ -1914,6 +1914,9 @@ No flags are specified so far, the corresponding field must be set to zero.
|
||||||
#define KVM_IRQ_ROUTING_HV_SINT 4
|
#define KVM_IRQ_ROUTING_HV_SINT 4
|
||||||
#define KVM_IRQ_ROUTING_XEN_EVTCHN 5
|
#define KVM_IRQ_ROUTING_XEN_EVTCHN 5
|
||||||
|
|
||||||
|
On s390, adding a KVM_IRQ_ROUTING_S390_ADAPTER is rejected on ucontrol VMs with
|
||||||
|
error -EINVAL.
|
||||||
|
|
||||||
flags:
|
flags:
|
||||||
|
|
||||||
- KVM_MSI_VALID_DEVID: used along with KVM_IRQ_ROUTING_MSI routing entry
|
- KVM_MSI_VALID_DEVID: used along with KVM_IRQ_ROUTING_MSI routing entry
|
||||||
|
|
|
||||||
|
|
@ -58,11 +58,15 @@ Groups:
|
||||||
Enables async page faults for the guest. So in case of a major page fault
|
Enables async page faults for the guest. So in case of a major page fault
|
||||||
the host is allowed to handle this async and continues the guest.
|
the host is allowed to handle this async and continues the guest.
|
||||||
|
|
||||||
|
-EINVAL is returned when called on the FLIC of a ucontrol VM.
|
||||||
|
|
||||||
KVM_DEV_FLIC_APF_DISABLE_WAIT
|
KVM_DEV_FLIC_APF_DISABLE_WAIT
|
||||||
Disables async page faults for the guest and waits until already pending
|
Disables async page faults for the guest and waits until already pending
|
||||||
async page faults are done. This is necessary to trigger a completion interrupt
|
async page faults are done. This is necessary to trigger a completion interrupt
|
||||||
for every init interrupt before migrating the interrupt list.
|
for every init interrupt before migrating the interrupt list.
|
||||||
|
|
||||||
|
-EINVAL is returned when called on the FLIC of a ucontrol VM.
|
||||||
|
|
||||||
KVM_DEV_FLIC_ADAPTER_REGISTER
|
KVM_DEV_FLIC_ADAPTER_REGISTER
|
||||||
Register an I/O adapter interrupt source. Takes a kvm_s390_io_adapter
|
Register an I/O adapter interrupt source. Takes a kvm_s390_io_adapter
|
||||||
describing the adapter to register::
|
describing the adapter to register::
|
||||||
|
|
|
||||||
37
MAINTAINERS
37
MAINTAINERS
|
|
@ -949,7 +949,6 @@ AMAZON ETHERNET DRIVERS
|
||||||
M: Shay Agroskin <shayagr@amazon.com>
|
M: Shay Agroskin <shayagr@amazon.com>
|
||||||
M: Arthur Kiyanovski <akiyano@amazon.com>
|
M: Arthur Kiyanovski <akiyano@amazon.com>
|
||||||
R: David Arinzon <darinzon@amazon.com>
|
R: David Arinzon <darinzon@amazon.com>
|
||||||
R: Noam Dagan <ndagan@amazon.com>
|
|
||||||
R: Saeed Bishara <saeedb@amazon.com>
|
R: Saeed Bishara <saeedb@amazon.com>
|
||||||
L: netdev@vger.kernel.org
|
L: netdev@vger.kernel.org
|
||||||
S: Supported
|
S: Supported
|
||||||
|
|
@ -1797,7 +1796,6 @@ F: include/uapi/linux/if_arcnet.h
|
||||||
|
|
||||||
ARM AND ARM64 SoC SUB-ARCHITECTURES (COMMON PARTS)
|
ARM AND ARM64 SoC SUB-ARCHITECTURES (COMMON PARTS)
|
||||||
M: Arnd Bergmann <arnd@arndb.de>
|
M: Arnd Bergmann <arnd@arndb.de>
|
||||||
M: Olof Johansson <olof@lixom.net>
|
|
||||||
L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
|
L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
|
||||||
L: soc@lists.linux.dev
|
L: soc@lists.linux.dev
|
||||||
S: Maintained
|
S: Maintained
|
||||||
|
|
@ -2691,7 +2689,6 @@ N: at91
|
||||||
N: atmel
|
N: atmel
|
||||||
|
|
||||||
ARM/Microchip Sparx5 SoC support
|
ARM/Microchip Sparx5 SoC support
|
||||||
M: Lars Povlsen <lars.povlsen@microchip.com>
|
|
||||||
M: Steen Hegelund <Steen.Hegelund@microchip.com>
|
M: Steen Hegelund <Steen.Hegelund@microchip.com>
|
||||||
M: Daniel Machon <daniel.machon@microchip.com>
|
M: Daniel Machon <daniel.machon@microchip.com>
|
||||||
M: UNGLinuxDriver@microchip.com
|
M: UNGLinuxDriver@microchip.com
|
||||||
|
|
@ -3608,6 +3605,7 @@ F: drivers/phy/qualcomm/phy-ath79-usb.c
|
||||||
|
|
||||||
ATHEROS ATH GENERIC UTILITIES
|
ATHEROS ATH GENERIC UTILITIES
|
||||||
M: Kalle Valo <kvalo@kernel.org>
|
M: Kalle Valo <kvalo@kernel.org>
|
||||||
|
M: Jeff Johnson <jjohnson@kernel.org>
|
||||||
L: linux-wireless@vger.kernel.org
|
L: linux-wireless@vger.kernel.org
|
||||||
S: Supported
|
S: Supported
|
||||||
F: drivers/net/wireless/ath/*
|
F: drivers/net/wireless/ath/*
|
||||||
|
|
@ -4058,7 +4056,6 @@ F: net/bluetooth/
|
||||||
|
|
||||||
BONDING DRIVER
|
BONDING DRIVER
|
||||||
M: Jay Vosburgh <jv@jvosburgh.net>
|
M: Jay Vosburgh <jv@jvosburgh.net>
|
||||||
M: Andy Gospodarek <andy@greyhouse.net>
|
|
||||||
L: netdev@vger.kernel.org
|
L: netdev@vger.kernel.org
|
||||||
S: Maintained
|
S: Maintained
|
||||||
F: Documentation/networking/bonding.rst
|
F: Documentation/networking/bonding.rst
|
||||||
|
|
@ -4131,7 +4128,6 @@ S: Odd Fixes
|
||||||
F: drivers/net/ethernet/netronome/nfp/bpf/
|
F: drivers/net/ethernet/netronome/nfp/bpf/
|
||||||
|
|
||||||
BPF JIT for POWERPC (32-BIT AND 64-BIT)
|
BPF JIT for POWERPC (32-BIT AND 64-BIT)
|
||||||
M: Michael Ellerman <mpe@ellerman.id.au>
|
|
||||||
M: Hari Bathini <hbathini@linux.ibm.com>
|
M: Hari Bathini <hbathini@linux.ibm.com>
|
||||||
M: Christophe Leroy <christophe.leroy@csgroup.eu>
|
M: Christophe Leroy <christophe.leroy@csgroup.eu>
|
||||||
R: Naveen N Rao <naveen@kernel.org>
|
R: Naveen N Rao <naveen@kernel.org>
|
||||||
|
|
@ -7355,7 +7351,7 @@ F: drivers/gpu/drm/panel/panel-novatek-nt36672a.c
|
||||||
DRM DRIVER FOR NVIDIA GEFORCE/QUADRO GPUS
|
DRM DRIVER FOR NVIDIA GEFORCE/QUADRO GPUS
|
||||||
M: Karol Herbst <kherbst@redhat.com>
|
M: Karol Herbst <kherbst@redhat.com>
|
||||||
M: Lyude Paul <lyude@redhat.com>
|
M: Lyude Paul <lyude@redhat.com>
|
||||||
M: Danilo Krummrich <dakr@redhat.com>
|
M: Danilo Krummrich <dakr@kernel.org>
|
||||||
L: dri-devel@lists.freedesktop.org
|
L: dri-devel@lists.freedesktop.org
|
||||||
L: nouveau@lists.freedesktop.org
|
L: nouveau@lists.freedesktop.org
|
||||||
S: Supported
|
S: Supported
|
||||||
|
|
@ -8461,7 +8457,7 @@ F: include/video/s1d13xxxfb.h
|
||||||
EROFS FILE SYSTEM
|
EROFS FILE SYSTEM
|
||||||
M: Gao Xiang <xiang@kernel.org>
|
M: Gao Xiang <xiang@kernel.org>
|
||||||
M: Chao Yu <chao@kernel.org>
|
M: Chao Yu <chao@kernel.org>
|
||||||
R: Yue Hu <huyue2@coolpad.com>
|
R: Yue Hu <zbestahu@gmail.com>
|
||||||
R: Jeffle Xu <jefflexu@linux.alibaba.com>
|
R: Jeffle Xu <jefflexu@linux.alibaba.com>
|
||||||
R: Sandeep Dhavale <dhavale@google.com>
|
R: Sandeep Dhavale <dhavale@google.com>
|
||||||
L: linux-erofs@lists.ozlabs.org
|
L: linux-erofs@lists.ozlabs.org
|
||||||
|
|
@ -8932,7 +8928,7 @@ F: include/linux/arm_ffa.h
|
||||||
FIRMWARE LOADER (request_firmware)
|
FIRMWARE LOADER (request_firmware)
|
||||||
M: Luis Chamberlain <mcgrof@kernel.org>
|
M: Luis Chamberlain <mcgrof@kernel.org>
|
||||||
M: Russ Weight <russ.weight@linux.dev>
|
M: Russ Weight <russ.weight@linux.dev>
|
||||||
M: Danilo Krummrich <dakr@redhat.com>
|
M: Danilo Krummrich <dakr@kernel.org>
|
||||||
L: linux-kernel@vger.kernel.org
|
L: linux-kernel@vger.kernel.org
|
||||||
S: Maintained
|
S: Maintained
|
||||||
F: Documentation/firmware_class/
|
F: Documentation/firmware_class/
|
||||||
|
|
@ -12640,7 +12636,7 @@ F: arch/mips/include/uapi/asm/kvm*
|
||||||
F: arch/mips/kvm/
|
F: arch/mips/kvm/
|
||||||
|
|
||||||
KERNEL VIRTUAL MACHINE FOR POWERPC (KVM/powerpc)
|
KERNEL VIRTUAL MACHINE FOR POWERPC (KVM/powerpc)
|
||||||
M: Michael Ellerman <mpe@ellerman.id.au>
|
M: Madhavan Srinivasan <maddy@linux.ibm.com>
|
||||||
R: Nicholas Piggin <npiggin@gmail.com>
|
R: Nicholas Piggin <npiggin@gmail.com>
|
||||||
L: linuxppc-dev@lists.ozlabs.org
|
L: linuxppc-dev@lists.ozlabs.org
|
||||||
L: kvm@vger.kernel.org
|
L: kvm@vger.kernel.org
|
||||||
|
|
@ -13219,11 +13215,11 @@ X: drivers/macintosh/adb-iop.c
|
||||||
X: drivers/macintosh/via-macii.c
|
X: drivers/macintosh/via-macii.c
|
||||||
|
|
||||||
LINUX FOR POWERPC (32-BIT AND 64-BIT)
|
LINUX FOR POWERPC (32-BIT AND 64-BIT)
|
||||||
|
M: Madhavan Srinivasan <maddy@linux.ibm.com>
|
||||||
M: Michael Ellerman <mpe@ellerman.id.au>
|
M: Michael Ellerman <mpe@ellerman.id.au>
|
||||||
R: Nicholas Piggin <npiggin@gmail.com>
|
R: Nicholas Piggin <npiggin@gmail.com>
|
||||||
R: Christophe Leroy <christophe.leroy@csgroup.eu>
|
R: Christophe Leroy <christophe.leroy@csgroup.eu>
|
||||||
R: Naveen N Rao <naveen@kernel.org>
|
R: Naveen N Rao <naveen@kernel.org>
|
||||||
M: Madhavan Srinivasan <maddy@linux.ibm.com>
|
|
||||||
L: linuxppc-dev@lists.ozlabs.org
|
L: linuxppc-dev@lists.ozlabs.org
|
||||||
S: Supported
|
S: Supported
|
||||||
W: https://github.com/linuxppc/wiki/wiki
|
W: https://github.com/linuxppc/wiki/wiki
|
||||||
|
|
@ -14574,7 +14570,6 @@ F: drivers/dma/mediatek/
|
||||||
MEDIATEK ETHERNET DRIVER
|
MEDIATEK ETHERNET DRIVER
|
||||||
M: Felix Fietkau <nbd@nbd.name>
|
M: Felix Fietkau <nbd@nbd.name>
|
||||||
M: Sean Wang <sean.wang@mediatek.com>
|
M: Sean Wang <sean.wang@mediatek.com>
|
||||||
M: Mark Lee <Mark-MC.Lee@mediatek.com>
|
|
||||||
M: Lorenzo Bianconi <lorenzo@kernel.org>
|
M: Lorenzo Bianconi <lorenzo@kernel.org>
|
||||||
L: netdev@vger.kernel.org
|
L: netdev@vger.kernel.org
|
||||||
S: Maintained
|
S: Maintained
|
||||||
|
|
@ -14764,7 +14759,7 @@ F: drivers/memory/mtk-smi.c
|
||||||
F: include/soc/mediatek/smi.h
|
F: include/soc/mediatek/smi.h
|
||||||
|
|
||||||
MEDIATEK SWITCH DRIVER
|
MEDIATEK SWITCH DRIVER
|
||||||
M: Arınç ÜNAL <arinc.unal@arinc9.com>
|
M: Chester A. Unal <chester.a.unal@arinc9.com>
|
||||||
M: Daniel Golle <daniel@makrotopia.org>
|
M: Daniel Golle <daniel@makrotopia.org>
|
||||||
M: DENG Qingfang <dqfext@gmail.com>
|
M: DENG Qingfang <dqfext@gmail.com>
|
||||||
M: Sean Wang <sean.wang@mediatek.com>
|
M: Sean Wang <sean.wang@mediatek.com>
|
||||||
|
|
@ -18471,7 +18466,7 @@ F: Documentation/devicetree/bindings/pinctrl/mediatek,mt8183-pinctrl.yaml
|
||||||
F: drivers/pinctrl/mediatek/
|
F: drivers/pinctrl/mediatek/
|
||||||
|
|
||||||
PIN CONTROLLER - MEDIATEK MIPS
|
PIN CONTROLLER - MEDIATEK MIPS
|
||||||
M: Arınç ÜNAL <arinc.unal@arinc9.com>
|
M: Chester A. Unal <chester.a.unal@arinc9.com>
|
||||||
M: Sergio Paracuellos <sergio.paracuellos@gmail.com>
|
M: Sergio Paracuellos <sergio.paracuellos@gmail.com>
|
||||||
L: linux-mediatek@lists.infradead.org (moderated for non-subscribers)
|
L: linux-mediatek@lists.infradead.org (moderated for non-subscribers)
|
||||||
L: linux-mips@vger.kernel.org
|
L: linux-mips@vger.kernel.org
|
||||||
|
|
@ -19515,7 +19510,7 @@ S: Maintained
|
||||||
F: arch/mips/ralink
|
F: arch/mips/ralink
|
||||||
|
|
||||||
RALINK MT7621 MIPS ARCHITECTURE
|
RALINK MT7621 MIPS ARCHITECTURE
|
||||||
M: Arınç ÜNAL <arinc.unal@arinc9.com>
|
M: Chester A. Unal <chester.a.unal@arinc9.com>
|
||||||
M: Sergio Paracuellos <sergio.paracuellos@gmail.com>
|
M: Sergio Paracuellos <sergio.paracuellos@gmail.com>
|
||||||
L: linux-mips@vger.kernel.org
|
L: linux-mips@vger.kernel.org
|
||||||
S: Maintained
|
S: Maintained
|
||||||
|
|
@ -20919,6 +20914,8 @@ F: kernel/sched/
|
||||||
SCHEDULER - SCHED_EXT
|
SCHEDULER - SCHED_EXT
|
||||||
R: Tejun Heo <tj@kernel.org>
|
R: Tejun Heo <tj@kernel.org>
|
||||||
R: David Vernet <void@manifault.com>
|
R: David Vernet <void@manifault.com>
|
||||||
|
R: Andrea Righi <arighi@nvidia.com>
|
||||||
|
R: Changwoo Min <changwoo@igalia.com>
|
||||||
L: linux-kernel@vger.kernel.org
|
L: linux-kernel@vger.kernel.org
|
||||||
S: Maintained
|
S: Maintained
|
||||||
W: https://github.com/sched-ext/scx
|
W: https://github.com/sched-ext/scx
|
||||||
|
|
@ -22513,11 +22510,8 @@ F: Documentation/devicetree/bindings/phy/st,stm32mp25-combophy.yaml
|
||||||
F: drivers/phy/st/phy-stm32-combophy.c
|
F: drivers/phy/st/phy-stm32-combophy.c
|
||||||
|
|
||||||
STMMAC ETHERNET DRIVER
|
STMMAC ETHERNET DRIVER
|
||||||
M: Alexandre Torgue <alexandre.torgue@foss.st.com>
|
|
||||||
M: Jose Abreu <joabreu@synopsys.com>
|
|
||||||
L: netdev@vger.kernel.org
|
L: netdev@vger.kernel.org
|
||||||
S: Supported
|
S: Orphan
|
||||||
W: http://www.stlinux.com
|
|
||||||
F: Documentation/networking/device_drivers/ethernet/stmicro/
|
F: Documentation/networking/device_drivers/ethernet/stmicro/
|
||||||
F: drivers/net/ethernet/stmicro/stmmac/
|
F: drivers/net/ethernet/stmicro/stmmac/
|
||||||
|
|
||||||
|
|
@ -22749,9 +22743,8 @@ S: Supported
|
||||||
F: drivers/net/ethernet/synopsys/
|
F: drivers/net/ethernet/synopsys/
|
||||||
|
|
||||||
SYNOPSYS DESIGNWARE ETHERNET XPCS DRIVER
|
SYNOPSYS DESIGNWARE ETHERNET XPCS DRIVER
|
||||||
M: Jose Abreu <Jose.Abreu@synopsys.com>
|
|
||||||
L: netdev@vger.kernel.org
|
L: netdev@vger.kernel.org
|
||||||
S: Supported
|
S: Orphan
|
||||||
F: drivers/net/pcs/pcs-xpcs.c
|
F: drivers/net/pcs/pcs-xpcs.c
|
||||||
F: drivers/net/pcs/pcs-xpcs.h
|
F: drivers/net/pcs/pcs-xpcs.h
|
||||||
F: include/linux/pcs/pcs-xpcs.h
|
F: include/linux/pcs/pcs-xpcs.h
|
||||||
|
|
@ -23659,7 +23652,6 @@ F: tools/testing/selftests/timers/
|
||||||
|
|
||||||
TIPC NETWORK LAYER
|
TIPC NETWORK LAYER
|
||||||
M: Jon Maloy <jmaloy@redhat.com>
|
M: Jon Maloy <jmaloy@redhat.com>
|
||||||
M: Ying Xue <ying.xue@windriver.com>
|
|
||||||
L: netdev@vger.kernel.org (core kernel code)
|
L: netdev@vger.kernel.org (core kernel code)
|
||||||
L: tipc-discussion@lists.sourceforge.net (user apps, general discussion)
|
L: tipc-discussion@lists.sourceforge.net (user apps, general discussion)
|
||||||
S: Maintained
|
S: Maintained
|
||||||
|
|
@ -24265,7 +24257,8 @@ F: Documentation/devicetree/bindings/usb/nxp,isp1760.yaml
|
||||||
F: drivers/usb/isp1760/*
|
F: drivers/usb/isp1760/*
|
||||||
|
|
||||||
USB LAN78XX ETHERNET DRIVER
|
USB LAN78XX ETHERNET DRIVER
|
||||||
M: Woojung Huh <woojung.huh@microchip.com>
|
M: Thangaraj Samynathan <Thangaraj.S@microchip.com>
|
||||||
|
M: Rengarajan Sundararajan <Rengarajan.S@microchip.com>
|
||||||
M: UNGLinuxDriver@microchip.com
|
M: UNGLinuxDriver@microchip.com
|
||||||
L: netdev@vger.kernel.org
|
L: netdev@vger.kernel.org
|
||||||
S: Maintained
|
S: Maintained
|
||||||
|
|
|
||||||
2
Makefile
2
Makefile
|
|
@ -2,7 +2,7 @@
|
||||||
VERSION = 6
|
VERSION = 6
|
||||||
PATCHLEVEL = 13
|
PATCHLEVEL = 13
|
||||||
SUBLEVEL = 0
|
SUBLEVEL = 0
|
||||||
EXTRAVERSION = -rc3
|
EXTRAVERSION = -rc7
|
||||||
NAME = Baby Opossum Posse
|
NAME = Baby Opossum Posse
|
||||||
|
|
||||||
# *DOCUMENTATION*
|
# *DOCUMENTATION*
|
||||||
|
|
|
||||||
|
|
@ -6,6 +6,7 @@
|
||||||
config ARC
|
config ARC
|
||||||
def_bool y
|
def_bool y
|
||||||
select ARC_TIMERS
|
select ARC_TIMERS
|
||||||
|
select ARCH_HAS_CPU_CACHE_ALIASING
|
||||||
select ARCH_HAS_CACHE_LINE_SIZE
|
select ARCH_HAS_CACHE_LINE_SIZE
|
||||||
select ARCH_HAS_DEBUG_VM_PGTABLE
|
select ARCH_HAS_DEBUG_VM_PGTABLE
|
||||||
select ARCH_HAS_DMA_PREP_COHERENT
|
select ARCH_HAS_DMA_PREP_COHERENT
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,8 @@
|
||||||
|
/* SPDX-License-Identifier: GPL-2.0 */
|
||||||
|
#ifndef __ASM_ARC_CACHETYPE_H
|
||||||
|
#define __ASM_ARC_CACHETYPE_H
|
||||||
|
|
||||||
|
#define cpu_dcache_is_aliasing() false
|
||||||
|
#define cpu_icache_is_aliasing() true
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
@ -87,7 +87,7 @@ usdhc1: mmc@402c0000 {
|
||||||
reg = <0x402c0000 0x4000>;
|
reg = <0x402c0000 0x4000>;
|
||||||
interrupts = <110>;
|
interrupts = <110>;
|
||||||
clocks = <&clks IMXRT1050_CLK_IPG_PDOF>,
|
clocks = <&clks IMXRT1050_CLK_IPG_PDOF>,
|
||||||
<&clks IMXRT1050_CLK_OSC>,
|
<&clks IMXRT1050_CLK_AHB_PODF>,
|
||||||
<&clks IMXRT1050_CLK_USDHC1>;
|
<&clks IMXRT1050_CLK_USDHC1>;
|
||||||
clock-names = "ipg", "ahb", "per";
|
clock-names = "ipg", "ahb", "per";
|
||||||
bus-width = <4>;
|
bus-width = <4>;
|
||||||
|
|
|
||||||
|
|
@ -323,6 +323,7 @@ CONFIG_SND_SOC_IMX_SGTL5000=y
|
||||||
CONFIG_SND_SOC_FSL_ASOC_CARD=y
|
CONFIG_SND_SOC_FSL_ASOC_CARD=y
|
||||||
CONFIG_SND_SOC_AC97_CODEC=y
|
CONFIG_SND_SOC_AC97_CODEC=y
|
||||||
CONFIG_SND_SOC_CS42XX8_I2C=y
|
CONFIG_SND_SOC_CS42XX8_I2C=y
|
||||||
|
CONFIG_SND_SOC_SPDIF=y
|
||||||
CONFIG_SND_SOC_TLV320AIC3X_I2C=y
|
CONFIG_SND_SOC_TLV320AIC3X_I2C=y
|
||||||
CONFIG_SND_SOC_WM8960=y
|
CONFIG_SND_SOC_WM8960=y
|
||||||
CONFIG_SND_SOC_WM8962=y
|
CONFIG_SND_SOC_WM8962=y
|
||||||
|
|
|
||||||
|
|
@ -6,6 +6,7 @@ menuconfig ARCH_MXC
|
||||||
select CLKSRC_IMX_GPT
|
select CLKSRC_IMX_GPT
|
||||||
select GENERIC_IRQ_CHIP
|
select GENERIC_IRQ_CHIP
|
||||||
select GPIOLIB
|
select GPIOLIB
|
||||||
|
select PINCTRL
|
||||||
select PM_OPP if PM
|
select PM_OPP if PM
|
||||||
select SOC_BUS
|
select SOC_BUS
|
||||||
select SRAM
|
select SRAM
|
||||||
|
|
|
||||||
|
|
@ -233,7 +233,7 @@ pci: pci@40000000 {
|
||||||
#interrupt-cells = <0x1>;
|
#interrupt-cells = <0x1>;
|
||||||
compatible = "pci-host-ecam-generic";
|
compatible = "pci-host-ecam-generic";
|
||||||
device_type = "pci";
|
device_type = "pci";
|
||||||
bus-range = <0x0 0x1>;
|
bus-range = <0x0 0xff>;
|
||||||
reg = <0x0 0x40000000 0x0 0x10000000>;
|
reg = <0x0 0x40000000 0x0 0x10000000>;
|
||||||
ranges = <0x2000000 0x0 0x50000000 0x0 0x50000000 0x0 0x10000000>;
|
ranges = <0x2000000 0x0 0x50000000 0x0 0x50000000 0x0 0x10000000>;
|
||||||
interrupt-map = <0 0 0 1 &gic 0 0 GIC_SPI 168 IRQ_TYPE_LEVEL_HIGH>,
|
interrupt-map = <0 0 0 1 &gic 0 0 GIC_SPI 168 IRQ_TYPE_LEVEL_HIGH>,
|
||||||
|
|
|
||||||
|
|
@ -67,7 +67,7 @@ cpu0: cpu@0 {
|
||||||
l2_cache_l0: l2-cache-l0 {
|
l2_cache_l0: l2-cache-l0 {
|
||||||
compatible = "cache";
|
compatible = "cache";
|
||||||
cache-size = <0x80000>;
|
cache-size = <0x80000>;
|
||||||
cache-line-size = <128>;
|
cache-line-size = <64>;
|
||||||
cache-sets = <1024>; //512KiB(size)/64(line-size)=8192ways/8-way set
|
cache-sets = <1024>; //512KiB(size)/64(line-size)=8192ways/8-way set
|
||||||
cache-level = <2>;
|
cache-level = <2>;
|
||||||
cache-unified;
|
cache-unified;
|
||||||
|
|
@ -91,7 +91,7 @@ cpu1: cpu@1 {
|
||||||
l2_cache_l1: l2-cache-l1 {
|
l2_cache_l1: l2-cache-l1 {
|
||||||
compatible = "cache";
|
compatible = "cache";
|
||||||
cache-size = <0x80000>;
|
cache-size = <0x80000>;
|
||||||
cache-line-size = <128>;
|
cache-line-size = <64>;
|
||||||
cache-sets = <1024>; //512KiB(size)/64(line-size)=8192ways/8-way set
|
cache-sets = <1024>; //512KiB(size)/64(line-size)=8192ways/8-way set
|
||||||
cache-level = <2>;
|
cache-level = <2>;
|
||||||
cache-unified;
|
cache-unified;
|
||||||
|
|
@ -115,7 +115,7 @@ cpu2: cpu@2 {
|
||||||
l2_cache_l2: l2-cache-l2 {
|
l2_cache_l2: l2-cache-l2 {
|
||||||
compatible = "cache";
|
compatible = "cache";
|
||||||
cache-size = <0x80000>;
|
cache-size = <0x80000>;
|
||||||
cache-line-size = <128>;
|
cache-line-size = <64>;
|
||||||
cache-sets = <1024>; //512KiB(size)/64(line-size)=8192ways/8-way set
|
cache-sets = <1024>; //512KiB(size)/64(line-size)=8192ways/8-way set
|
||||||
cache-level = <2>;
|
cache-level = <2>;
|
||||||
cache-unified;
|
cache-unified;
|
||||||
|
|
@ -139,7 +139,7 @@ cpu3: cpu@3 {
|
||||||
l2_cache_l3: l2-cache-l3 {
|
l2_cache_l3: l2-cache-l3 {
|
||||||
compatible = "cache";
|
compatible = "cache";
|
||||||
cache-size = <0x80000>;
|
cache-size = <0x80000>;
|
||||||
cache-line-size = <128>;
|
cache-line-size = <64>;
|
||||||
cache-sets = <1024>; //512KiB(size)/64(line-size)=8192ways/8-way set
|
cache-sets = <1024>; //512KiB(size)/64(line-size)=8192ways/8-way set
|
||||||
cache-level = <2>;
|
cache-level = <2>;
|
||||||
cache-unified;
|
cache-unified;
|
||||||
|
|
|
||||||
|
|
@ -165,7 +165,7 @@ asrc0: asrc@59000000 {
|
||||||
};
|
};
|
||||||
|
|
||||||
esai0: esai@59010000 {
|
esai0: esai@59010000 {
|
||||||
compatible = "fsl,imx8qm-esai";
|
compatible = "fsl,imx8qm-esai", "fsl,imx6ull-esai";
|
||||||
reg = <0x59010000 0x10000>;
|
reg = <0x59010000 0x10000>;
|
||||||
interrupts = <GIC_SPI 409 IRQ_TYPE_LEVEL_HIGH>;
|
interrupts = <GIC_SPI 409 IRQ_TYPE_LEVEL_HIGH>;
|
||||||
clocks = <&esai0_lpcg IMX_LPCG_CLK_4>,
|
clocks = <&esai0_lpcg IMX_LPCG_CLK_4>,
|
||||||
|
|
|
||||||
|
|
@ -134,7 +134,7 @@ sai5_lpcg: clock-controller@59490000 {
|
||||||
};
|
};
|
||||||
|
|
||||||
esai1: esai@59810000 {
|
esai1: esai@59810000 {
|
||||||
compatible = "fsl,imx8qm-esai";
|
compatible = "fsl,imx8qm-esai", "fsl,imx6ull-esai";
|
||||||
reg = <0x59810000 0x10000>;
|
reg = <0x59810000 0x10000>;
|
||||||
interrupts = <GIC_SPI 411 IRQ_TYPE_LEVEL_HIGH>;
|
interrupts = <GIC_SPI 411 IRQ_TYPE_LEVEL_HIGH>;
|
||||||
clocks = <&esai1_lpcg IMX_LPCG_CLK_0>,
|
clocks = <&esai1_lpcg IMX_LPCG_CLK_0>,
|
||||||
|
|
|
||||||
|
|
@ -1673,7 +1673,7 @@ pcie1_ep: pcie-ep@4c380000 {
|
||||||
|
|
||||||
netcmix_blk_ctrl: syscon@4c810000 {
|
netcmix_blk_ctrl: syscon@4c810000 {
|
||||||
compatible = "nxp,imx95-netcmix-blk-ctrl", "syscon";
|
compatible = "nxp,imx95-netcmix-blk-ctrl", "syscon";
|
||||||
reg = <0x0 0x4c810000 0x0 0x10000>;
|
reg = <0x0 0x4c810000 0x0 0x8>;
|
||||||
#clock-cells = <1>;
|
#clock-cells = <1>;
|
||||||
clocks = <&scmi_clk IMX95_CLK_BUSNETCMIX>;
|
clocks = <&scmi_clk IMX95_CLK_BUSNETCMIX>;
|
||||||
assigned-clocks = <&scmi_clk IMX95_CLK_BUSNETCMIX>;
|
assigned-clocks = <&scmi_clk IMX95_CLK_BUSNETCMIX>;
|
||||||
|
|
|
||||||
|
|
@ -2440,6 +2440,7 @@ tpdm@4003000 {
|
||||||
|
|
||||||
qcom,cmb-element-bits = <32>;
|
qcom,cmb-element-bits = <32>;
|
||||||
qcom,cmb-msrs-num = <32>;
|
qcom,cmb-msrs-num = <32>;
|
||||||
|
status = "disabled";
|
||||||
|
|
||||||
out-ports {
|
out-ports {
|
||||||
port {
|
port {
|
||||||
|
|
@ -6092,7 +6093,7 @@ pcie0_ep: pcie-ep@1c00000 {
|
||||||
<0x0 0x40000000 0x0 0xf20>,
|
<0x0 0x40000000 0x0 0xf20>,
|
||||||
<0x0 0x40000f20 0x0 0xa8>,
|
<0x0 0x40000f20 0x0 0xa8>,
|
||||||
<0x0 0x40001000 0x0 0x4000>,
|
<0x0 0x40001000 0x0 0x4000>,
|
||||||
<0x0 0x40200000 0x0 0x100000>,
|
<0x0 0x40200000 0x0 0x1fe00000>,
|
||||||
<0x0 0x01c03000 0x0 0x1000>,
|
<0x0 0x01c03000 0x0 0x1000>,
|
||||||
<0x0 0x40005000 0x0 0x2000>;
|
<0x0 0x40005000 0x0 0x2000>;
|
||||||
reg-names = "parf", "dbi", "elbi", "atu", "addr_space",
|
reg-names = "parf", "dbi", "elbi", "atu", "addr_space",
|
||||||
|
|
@ -6250,7 +6251,7 @@ pcie1_ep: pcie-ep@1c10000 {
|
||||||
<0x0 0x60000000 0x0 0xf20>,
|
<0x0 0x60000000 0x0 0xf20>,
|
||||||
<0x0 0x60000f20 0x0 0xa8>,
|
<0x0 0x60000f20 0x0 0xa8>,
|
||||||
<0x0 0x60001000 0x0 0x4000>,
|
<0x0 0x60001000 0x0 0x4000>,
|
||||||
<0x0 0x60200000 0x0 0x100000>,
|
<0x0 0x60200000 0x0 0x1fe00000>,
|
||||||
<0x0 0x01c13000 0x0 0x1000>,
|
<0x0 0x01c13000 0x0 0x1000>,
|
||||||
<0x0 0x60005000 0x0 0x2000>;
|
<0x0 0x60005000 0x0 0x2000>;
|
||||||
reg-names = "parf", "dbi", "elbi", "atu", "addr_space",
|
reg-names = "parf", "dbi", "elbi", "atu", "addr_space",
|
||||||
|
|
|
||||||
|
|
@ -773,6 +773,10 @@ &usb_1_ss0 {
|
||||||
status = "okay";
|
status = "okay";
|
||||||
};
|
};
|
||||||
|
|
||||||
|
&usb_1_ss0_dwc3 {
|
||||||
|
dr_mode = "host";
|
||||||
|
};
|
||||||
|
|
||||||
&usb_1_ss0_dwc3_hs {
|
&usb_1_ss0_dwc3_hs {
|
||||||
remote-endpoint = <&pmic_glink_ss0_hs_in>;
|
remote-endpoint = <&pmic_glink_ss0_hs_in>;
|
||||||
};
|
};
|
||||||
|
|
@ -801,6 +805,10 @@ &usb_1_ss1 {
|
||||||
status = "okay";
|
status = "okay";
|
||||||
};
|
};
|
||||||
|
|
||||||
|
&usb_1_ss1_dwc3 {
|
||||||
|
dr_mode = "host";
|
||||||
|
};
|
||||||
|
|
||||||
&usb_1_ss1_dwc3_hs {
|
&usb_1_ss1_dwc3_hs {
|
||||||
remote-endpoint = <&pmic_glink_ss1_hs_in>;
|
remote-endpoint = <&pmic_glink_ss1_hs_in>;
|
||||||
};
|
};
|
||||||
|
|
|
||||||
|
|
@ -1197,6 +1197,10 @@ &usb_1_ss0 {
|
||||||
status = "okay";
|
status = "okay";
|
||||||
};
|
};
|
||||||
|
|
||||||
|
&usb_1_ss0_dwc3 {
|
||||||
|
dr_mode = "host";
|
||||||
|
};
|
||||||
|
|
||||||
&usb_1_ss0_dwc3_hs {
|
&usb_1_ss0_dwc3_hs {
|
||||||
remote-endpoint = <&pmic_glink_ss0_hs_in>;
|
remote-endpoint = <&pmic_glink_ss0_hs_in>;
|
||||||
};
|
};
|
||||||
|
|
@ -1225,6 +1229,10 @@ &usb_1_ss1 {
|
||||||
status = "okay";
|
status = "okay";
|
||||||
};
|
};
|
||||||
|
|
||||||
|
&usb_1_ss1_dwc3 {
|
||||||
|
dr_mode = "host";
|
||||||
|
};
|
||||||
|
|
||||||
&usb_1_ss1_dwc3_hs {
|
&usb_1_ss1_dwc3_hs {
|
||||||
remote-endpoint = <&pmic_glink_ss1_hs_in>;
|
remote-endpoint = <&pmic_glink_ss1_hs_in>;
|
||||||
};
|
};
|
||||||
|
|
@ -1253,6 +1261,10 @@ &usb_1_ss2 {
|
||||||
status = "okay";
|
status = "okay";
|
||||||
};
|
};
|
||||||
|
|
||||||
|
&usb_1_ss2_dwc3 {
|
||||||
|
dr_mode = "host";
|
||||||
|
};
|
||||||
|
|
||||||
&usb_1_ss2_dwc3_hs {
|
&usb_1_ss2_dwc3_hs {
|
||||||
remote-endpoint = <&pmic_glink_ss2_hs_in>;
|
remote-endpoint = <&pmic_glink_ss2_hs_in>;
|
||||||
};
|
};
|
||||||
|
|
|
||||||
|
|
@ -2924,7 +2924,7 @@ pcie6a: pci@1bf8000 {
|
||||||
#address-cells = <3>;
|
#address-cells = <3>;
|
||||||
#size-cells = <2>;
|
#size-cells = <2>;
|
||||||
ranges = <0x01000000 0x0 0x00000000 0x0 0x70200000 0x0 0x100000>,
|
ranges = <0x01000000 0x0 0x00000000 0x0 0x70200000 0x0 0x100000>,
|
||||||
<0x02000000 0x0 0x70300000 0x0 0x70300000 0x0 0x1d00000>;
|
<0x02000000 0x0 0x70300000 0x0 0x70300000 0x0 0x3d00000>;
|
||||||
bus-range = <0x00 0xff>;
|
bus-range = <0x00 0xff>;
|
||||||
|
|
||||||
dma-coherent;
|
dma-coherent;
|
||||||
|
|
@ -4066,8 +4066,6 @@ usb_1_ss2_dwc3: usb@a000000 {
|
||||||
|
|
||||||
dma-coherent;
|
dma-coherent;
|
||||||
|
|
||||||
usb-role-switch;
|
|
||||||
|
|
||||||
ports {
|
ports {
|
||||||
#address-cells = <1>;
|
#address-cells = <1>;
|
||||||
#size-cells = <0>;
|
#size-cells = <0>;
|
||||||
|
|
@ -4321,8 +4319,6 @@ usb_1_ss0_dwc3: usb@a600000 {
|
||||||
|
|
||||||
dma-coherent;
|
dma-coherent;
|
||||||
|
|
||||||
usb-role-switch;
|
|
||||||
|
|
||||||
ports {
|
ports {
|
||||||
#address-cells = <1>;
|
#address-cells = <1>;
|
||||||
#size-cells = <0>;
|
#size-cells = <0>;
|
||||||
|
|
@ -4421,8 +4417,6 @@ usb_1_ss1_dwc3: usb@a800000 {
|
||||||
|
|
||||||
dma-coherent;
|
dma-coherent;
|
||||||
|
|
||||||
usb-role-switch;
|
|
||||||
|
|
||||||
ports {
|
ports {
|
||||||
#address-cells = <1>;
|
#address-cells = <1>;
|
||||||
#size-cells = <0>;
|
#size-cells = <0>;
|
||||||
|
|
|
||||||
|
|
@ -333,6 +333,7 @@ power: power-controller {
|
||||||
|
|
||||||
power-domain@RK3328_PD_HEVC {
|
power-domain@RK3328_PD_HEVC {
|
||||||
reg = <RK3328_PD_HEVC>;
|
reg = <RK3328_PD_HEVC>;
|
||||||
|
clocks = <&cru SCLK_VENC_CORE>;
|
||||||
#power-domain-cells = <0>;
|
#power-domain-cells = <0>;
|
||||||
};
|
};
|
||||||
power-domain@RK3328_PD_VIDEO {
|
power-domain@RK3328_PD_VIDEO {
|
||||||
|
|
|
||||||
|
|
@ -350,6 +350,7 @@ combphy0: phy@fe820000 {
|
||||||
assigned-clocks = <&pmucru CLK_PCIEPHY0_REF>;
|
assigned-clocks = <&pmucru CLK_PCIEPHY0_REF>;
|
||||||
assigned-clock-rates = <100000000>;
|
assigned-clock-rates = <100000000>;
|
||||||
resets = <&cru SRST_PIPEPHY0>;
|
resets = <&cru SRST_PIPEPHY0>;
|
||||||
|
reset-names = "phy";
|
||||||
rockchip,pipe-grf = <&pipegrf>;
|
rockchip,pipe-grf = <&pipegrf>;
|
||||||
rockchip,pipe-phy-grf = <&pipe_phy_grf0>;
|
rockchip,pipe-phy-grf = <&pipe_phy_grf0>;
|
||||||
#phy-cells = <1>;
|
#phy-cells = <1>;
|
||||||
|
|
|
||||||
|
|
@ -1681,6 +1681,7 @@ combphy1: phy@fe830000 {
|
||||||
assigned-clocks = <&pmucru CLK_PCIEPHY1_REF>;
|
assigned-clocks = <&pmucru CLK_PCIEPHY1_REF>;
|
||||||
assigned-clock-rates = <100000000>;
|
assigned-clock-rates = <100000000>;
|
||||||
resets = <&cru SRST_PIPEPHY1>;
|
resets = <&cru SRST_PIPEPHY1>;
|
||||||
|
reset-names = "phy";
|
||||||
rockchip,pipe-grf = <&pipegrf>;
|
rockchip,pipe-grf = <&pipegrf>;
|
||||||
rockchip,pipe-phy-grf = <&pipe_phy_grf1>;
|
rockchip,pipe-phy-grf = <&pipe_phy_grf1>;
|
||||||
#phy-cells = <1>;
|
#phy-cells = <1>;
|
||||||
|
|
@ -1697,6 +1698,7 @@ combphy2: phy@fe840000 {
|
||||||
assigned-clocks = <&pmucru CLK_PCIEPHY2_REF>;
|
assigned-clocks = <&pmucru CLK_PCIEPHY2_REF>;
|
||||||
assigned-clock-rates = <100000000>;
|
assigned-clock-rates = <100000000>;
|
||||||
resets = <&cru SRST_PIPEPHY2>;
|
resets = <&cru SRST_PIPEPHY2>;
|
||||||
|
reset-names = "phy";
|
||||||
rockchip,pipe-grf = <&pipegrf>;
|
rockchip,pipe-grf = <&pipegrf>;
|
||||||
rockchip,pipe-phy-grf = <&pipe_phy_grf2>;
|
rockchip,pipe-phy-grf = <&pipe_phy_grf2>;
|
||||||
#phy-cells = <1>;
|
#phy-cells = <1>;
|
||||||
|
|
|
||||||
|
|
@ -72,7 +72,7 @@ fan: pwm-fan {
|
||||||
|
|
||||||
rfkill {
|
rfkill {
|
||||||
compatible = "rfkill-gpio";
|
compatible = "rfkill-gpio";
|
||||||
label = "rfkill-pcie-wlan";
|
label = "rfkill-m2-wlan";
|
||||||
radio-type = "wlan";
|
radio-type = "wlan";
|
||||||
shutdown-gpios = <&gpio4 RK_PA2 GPIO_ACTIVE_HIGH>;
|
shutdown-gpios = <&gpio4 RK_PA2 GPIO_ACTIVE_HIGH>;
|
||||||
};
|
};
|
||||||
|
|
|
||||||
|
|
@ -434,6 +434,7 @@ &sdhci {
|
||||||
&sdmmc {
|
&sdmmc {
|
||||||
bus-width = <4>;
|
bus-width = <4>;
|
||||||
cap-sd-highspeed;
|
cap-sd-highspeed;
|
||||||
|
cd-gpios = <&gpio0 RK_PA4 GPIO_ACTIVE_LOW>;
|
||||||
disable-wp;
|
disable-wp;
|
||||||
max-frequency = <150000000>;
|
max-frequency = <150000000>;
|
||||||
no-mmc;
|
no-mmc;
|
||||||
|
|
|
||||||
|
|
@ -36,15 +36,8 @@
|
||||||
#include <asm/traps.h>
|
#include <asm/traps.h>
|
||||||
#include <asm/vdso.h>
|
#include <asm/vdso.h>
|
||||||
|
|
||||||
#ifdef CONFIG_ARM64_GCS
|
|
||||||
#define GCS_SIGNAL_CAP(addr) (((unsigned long)addr) & GCS_CAP_ADDR_MASK)
|
#define GCS_SIGNAL_CAP(addr) (((unsigned long)addr) & GCS_CAP_ADDR_MASK)
|
||||||
|
|
||||||
static bool gcs_signal_cap_valid(u64 addr, u64 val)
|
|
||||||
{
|
|
||||||
return val == GCS_SIGNAL_CAP(addr);
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Do a signal return; undo the signal stack. These are aligned to 128-bit.
|
* Do a signal return; undo the signal stack. These are aligned to 128-bit.
|
||||||
*/
|
*/
|
||||||
|
|
@ -1062,8 +1055,7 @@ static int restore_sigframe(struct pt_regs *regs,
|
||||||
#ifdef CONFIG_ARM64_GCS
|
#ifdef CONFIG_ARM64_GCS
|
||||||
static int gcs_restore_signal(void)
|
static int gcs_restore_signal(void)
|
||||||
{
|
{
|
||||||
unsigned long __user *gcspr_el0;
|
u64 gcspr_el0, cap;
|
||||||
u64 cap;
|
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
if (!system_supports_gcs())
|
if (!system_supports_gcs())
|
||||||
|
|
@ -1072,7 +1064,7 @@ static int gcs_restore_signal(void)
|
||||||
if (!(current->thread.gcs_el0_mode & PR_SHADOW_STACK_ENABLE))
|
if (!(current->thread.gcs_el0_mode & PR_SHADOW_STACK_ENABLE))
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
gcspr_el0 = (unsigned long __user *)read_sysreg_s(SYS_GCSPR_EL0);
|
gcspr_el0 = read_sysreg_s(SYS_GCSPR_EL0);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Ensure that any changes to the GCS done via GCS operations
|
* Ensure that any changes to the GCS done via GCS operations
|
||||||
|
|
@ -1087,22 +1079,23 @@ static int gcs_restore_signal(void)
|
||||||
* then faults will be generated on GCS operations - the main
|
* then faults will be generated on GCS operations - the main
|
||||||
* concern is to protect GCS pages.
|
* concern is to protect GCS pages.
|
||||||
*/
|
*/
|
||||||
ret = copy_from_user(&cap, gcspr_el0, sizeof(cap));
|
ret = copy_from_user(&cap, (unsigned long __user *)gcspr_el0,
|
||||||
|
sizeof(cap));
|
||||||
if (ret)
|
if (ret)
|
||||||
return -EFAULT;
|
return -EFAULT;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Check that the cap is the actual GCS before replacing it.
|
* Check that the cap is the actual GCS before replacing it.
|
||||||
*/
|
*/
|
||||||
if (!gcs_signal_cap_valid((u64)gcspr_el0, cap))
|
if (cap != GCS_SIGNAL_CAP(gcspr_el0))
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
|
|
||||||
/* Invalidate the token to prevent reuse */
|
/* Invalidate the token to prevent reuse */
|
||||||
put_user_gcs(0, (__user void*)gcspr_el0, &ret);
|
put_user_gcs(0, (unsigned long __user *)gcspr_el0, &ret);
|
||||||
if (ret != 0)
|
if (ret != 0)
|
||||||
return -EFAULT;
|
return -EFAULT;
|
||||||
|
|
||||||
write_sysreg_s(gcspr_el0 + 1, SYS_GCSPR_EL0);
|
write_sysreg_s(gcspr_el0 + 8, SYS_GCSPR_EL0);
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
@ -1421,7 +1414,7 @@ static int get_sigframe(struct rt_sigframe_user_layout *user,
|
||||||
|
|
||||||
static int gcs_signal_entry(__sigrestore_t sigtramp, struct ksignal *ksig)
|
static int gcs_signal_entry(__sigrestore_t sigtramp, struct ksignal *ksig)
|
||||||
{
|
{
|
||||||
unsigned long __user *gcspr_el0;
|
u64 gcspr_el0;
|
||||||
int ret = 0;
|
int ret = 0;
|
||||||
|
|
||||||
if (!system_supports_gcs())
|
if (!system_supports_gcs())
|
||||||
|
|
@ -1434,18 +1427,20 @@ static int gcs_signal_entry(__sigrestore_t sigtramp, struct ksignal *ksig)
|
||||||
* We are entering a signal handler, current register state is
|
* We are entering a signal handler, current register state is
|
||||||
* active.
|
* active.
|
||||||
*/
|
*/
|
||||||
gcspr_el0 = (unsigned long __user *)read_sysreg_s(SYS_GCSPR_EL0);
|
gcspr_el0 = read_sysreg_s(SYS_GCSPR_EL0);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Push a cap and the GCS entry for the trampoline onto the GCS.
|
* Push a cap and the GCS entry for the trampoline onto the GCS.
|
||||||
*/
|
*/
|
||||||
put_user_gcs((unsigned long)sigtramp, gcspr_el0 - 2, &ret);
|
put_user_gcs((unsigned long)sigtramp,
|
||||||
put_user_gcs(GCS_SIGNAL_CAP(gcspr_el0 - 1), gcspr_el0 - 1, &ret);
|
(unsigned long __user *)(gcspr_el0 - 16), &ret);
|
||||||
|
put_user_gcs(GCS_SIGNAL_CAP(gcspr_el0 - 8),
|
||||||
|
(unsigned long __user *)(gcspr_el0 - 8), &ret);
|
||||||
if (ret != 0)
|
if (ret != 0)
|
||||||
return ret;
|
return ret;
|
||||||
|
|
||||||
gcspr_el0 -= 2;
|
gcspr_el0 -= 16;
|
||||||
write_sysreg_s((unsigned long)gcspr_el0, SYS_GCSPR_EL0);
|
write_sysreg_s(gcspr_el0, SYS_GCSPR_EL0);
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -783,9 +783,6 @@ static int hyp_ack_unshare(u64 addr, const struct pkvm_mem_transition *tx)
|
||||||
if (tx->initiator.id == PKVM_ID_HOST && hyp_page_count((void *)addr))
|
if (tx->initiator.id == PKVM_ID_HOST && hyp_page_count((void *)addr))
|
||||||
return -EBUSY;
|
return -EBUSY;
|
||||||
|
|
||||||
if (__hyp_ack_skip_pgtable_check(tx))
|
|
||||||
return 0;
|
|
||||||
|
|
||||||
return __hyp_check_page_state_range(addr, size,
|
return __hyp_check_page_state_range(addr, size,
|
||||||
PKVM_PAGE_SHARED_BORROWED);
|
PKVM_PAGE_SHARED_BORROWED);
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -24,6 +24,7 @@ static DEFINE_MUTEX(arm_pmus_lock);
|
||||||
|
|
||||||
static void kvm_pmu_create_perf_event(struct kvm_pmc *pmc);
|
static void kvm_pmu_create_perf_event(struct kvm_pmc *pmc);
|
||||||
static void kvm_pmu_release_perf_event(struct kvm_pmc *pmc);
|
static void kvm_pmu_release_perf_event(struct kvm_pmc *pmc);
|
||||||
|
static bool kvm_pmu_counter_is_enabled(struct kvm_pmc *pmc);
|
||||||
|
|
||||||
static struct kvm_vcpu *kvm_pmc_to_vcpu(const struct kvm_pmc *pmc)
|
static struct kvm_vcpu *kvm_pmc_to_vcpu(const struct kvm_pmc *pmc)
|
||||||
{
|
{
|
||||||
|
|
@ -327,48 +328,25 @@ u64 kvm_pmu_implemented_counter_mask(struct kvm_vcpu *vcpu)
|
||||||
return GENMASK(val - 1, 0) | BIT(ARMV8_PMU_CYCLE_IDX);
|
return GENMASK(val - 1, 0) | BIT(ARMV8_PMU_CYCLE_IDX);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
static void kvm_pmc_enable_perf_event(struct kvm_pmc *pmc)
|
||||||
* kvm_pmu_enable_counter_mask - enable selected PMU counters
|
|
||||||
* @vcpu: The vcpu pointer
|
|
||||||
* @val: the value guest writes to PMCNTENSET register
|
|
||||||
*
|
|
||||||
* Call perf_event_enable to start counting the perf event
|
|
||||||
*/
|
|
||||||
void kvm_pmu_enable_counter_mask(struct kvm_vcpu *vcpu, u64 val)
|
|
||||||
{
|
{
|
||||||
int i;
|
|
||||||
if (!kvm_vcpu_has_pmu(vcpu))
|
|
||||||
return;
|
|
||||||
|
|
||||||
if (!(kvm_vcpu_read_pmcr(vcpu) & ARMV8_PMU_PMCR_E) || !val)
|
|
||||||
return;
|
|
||||||
|
|
||||||
for (i = 0; i < KVM_ARMV8_PMU_MAX_COUNTERS; i++) {
|
|
||||||
struct kvm_pmc *pmc;
|
|
||||||
|
|
||||||
if (!(val & BIT(i)))
|
|
||||||
continue;
|
|
||||||
|
|
||||||
pmc = kvm_vcpu_idx_to_pmc(vcpu, i);
|
|
||||||
|
|
||||||
if (!pmc->perf_event) {
|
if (!pmc->perf_event) {
|
||||||
kvm_pmu_create_perf_event(pmc);
|
kvm_pmu_create_perf_event(pmc);
|
||||||
} else {
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
perf_event_enable(pmc->perf_event);
|
perf_event_enable(pmc->perf_event);
|
||||||
if (pmc->perf_event->state != PERF_EVENT_STATE_ACTIVE)
|
if (pmc->perf_event->state != PERF_EVENT_STATE_ACTIVE)
|
||||||
kvm_debug("fail to enable perf event\n");
|
kvm_debug("fail to enable perf event\n");
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
static void kvm_pmc_disable_perf_event(struct kvm_pmc *pmc)
|
||||||
* kvm_pmu_disable_counter_mask - disable selected PMU counters
|
{
|
||||||
* @vcpu: The vcpu pointer
|
if (pmc->perf_event)
|
||||||
* @val: the value guest writes to PMCNTENCLR register
|
perf_event_disable(pmc->perf_event);
|
||||||
*
|
}
|
||||||
* Call perf_event_disable to stop counting the perf event
|
|
||||||
*/
|
void kvm_pmu_reprogram_counter_mask(struct kvm_vcpu *vcpu, u64 val)
|
||||||
void kvm_pmu_disable_counter_mask(struct kvm_vcpu *vcpu, u64 val)
|
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
|
|
@ -376,16 +354,18 @@ void kvm_pmu_disable_counter_mask(struct kvm_vcpu *vcpu, u64 val)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
for (i = 0; i < KVM_ARMV8_PMU_MAX_COUNTERS; i++) {
|
for (i = 0; i < KVM_ARMV8_PMU_MAX_COUNTERS; i++) {
|
||||||
struct kvm_pmc *pmc;
|
struct kvm_pmc *pmc = kvm_vcpu_idx_to_pmc(vcpu, i);
|
||||||
|
|
||||||
if (!(val & BIT(i)))
|
if (!(val & BIT(i)))
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
pmc = kvm_vcpu_idx_to_pmc(vcpu, i);
|
if (kvm_pmu_counter_is_enabled(pmc))
|
||||||
|
kvm_pmc_enable_perf_event(pmc);
|
||||||
if (pmc->perf_event)
|
else
|
||||||
perf_event_disable(pmc->perf_event);
|
kvm_pmc_disable_perf_event(pmc);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
kvm_vcpu_pmu_restore_guest(vcpu);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
@ -626,27 +606,28 @@ void kvm_pmu_handle_pmcr(struct kvm_vcpu *vcpu, u64 val)
|
||||||
if (!kvm_has_feat(vcpu->kvm, ID_AA64DFR0_EL1, PMUVer, V3P5))
|
if (!kvm_has_feat(vcpu->kvm, ID_AA64DFR0_EL1, PMUVer, V3P5))
|
||||||
val &= ~ARMV8_PMU_PMCR_LP;
|
val &= ~ARMV8_PMU_PMCR_LP;
|
||||||
|
|
||||||
|
/* Request a reload of the PMU to enable/disable affected counters */
|
||||||
|
if ((__vcpu_sys_reg(vcpu, PMCR_EL0) ^ val) & ARMV8_PMU_PMCR_E)
|
||||||
|
kvm_make_request(KVM_REQ_RELOAD_PMU, vcpu);
|
||||||
|
|
||||||
/* The reset bits don't indicate any state, and shouldn't be saved. */
|
/* The reset bits don't indicate any state, and shouldn't be saved. */
|
||||||
__vcpu_sys_reg(vcpu, PMCR_EL0) = val & ~(ARMV8_PMU_PMCR_C | ARMV8_PMU_PMCR_P);
|
__vcpu_sys_reg(vcpu, PMCR_EL0) = val & ~(ARMV8_PMU_PMCR_C | ARMV8_PMU_PMCR_P);
|
||||||
|
|
||||||
if (val & ARMV8_PMU_PMCR_E) {
|
|
||||||
kvm_pmu_enable_counter_mask(vcpu,
|
|
||||||
__vcpu_sys_reg(vcpu, PMCNTENSET_EL0));
|
|
||||||
} else {
|
|
||||||
kvm_pmu_disable_counter_mask(vcpu,
|
|
||||||
__vcpu_sys_reg(vcpu, PMCNTENSET_EL0));
|
|
||||||
}
|
|
||||||
|
|
||||||
if (val & ARMV8_PMU_PMCR_C)
|
if (val & ARMV8_PMU_PMCR_C)
|
||||||
kvm_pmu_set_counter_value(vcpu, ARMV8_PMU_CYCLE_IDX, 0);
|
kvm_pmu_set_counter_value(vcpu, ARMV8_PMU_CYCLE_IDX, 0);
|
||||||
|
|
||||||
if (val & ARMV8_PMU_PMCR_P) {
|
if (val & ARMV8_PMU_PMCR_P) {
|
||||||
unsigned long mask = kvm_pmu_accessible_counter_mask(vcpu);
|
/*
|
||||||
mask &= ~BIT(ARMV8_PMU_CYCLE_IDX);
|
* Unlike other PMU sysregs, the controls in PMCR_EL0 always apply
|
||||||
|
* to the 'guest' range of counters and never the 'hyp' range.
|
||||||
|
*/
|
||||||
|
unsigned long mask = kvm_pmu_implemented_counter_mask(vcpu) &
|
||||||
|
~kvm_pmu_hyp_counter_mask(vcpu) &
|
||||||
|
~BIT(ARMV8_PMU_CYCLE_IDX);
|
||||||
|
|
||||||
for_each_set_bit(i, &mask, 32)
|
for_each_set_bit(i, &mask, 32)
|
||||||
kvm_pmu_set_pmc_value(kvm_vcpu_idx_to_pmc(vcpu, i), 0, true);
|
kvm_pmu_set_pmc_value(kvm_vcpu_idx_to_pmc(vcpu, i), 0, true);
|
||||||
}
|
}
|
||||||
kvm_vcpu_pmu_restore_guest(vcpu);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool kvm_pmu_counter_is_enabled(struct kvm_pmc *pmc)
|
static bool kvm_pmu_counter_is_enabled(struct kvm_pmc *pmc)
|
||||||
|
|
@ -910,11 +891,11 @@ void kvm_vcpu_reload_pmu(struct kvm_vcpu *vcpu)
|
||||||
{
|
{
|
||||||
u64 mask = kvm_pmu_implemented_counter_mask(vcpu);
|
u64 mask = kvm_pmu_implemented_counter_mask(vcpu);
|
||||||
|
|
||||||
kvm_pmu_handle_pmcr(vcpu, kvm_vcpu_read_pmcr(vcpu));
|
|
||||||
|
|
||||||
__vcpu_sys_reg(vcpu, PMOVSSET_EL0) &= mask;
|
__vcpu_sys_reg(vcpu, PMOVSSET_EL0) &= mask;
|
||||||
__vcpu_sys_reg(vcpu, PMINTENSET_EL1) &= mask;
|
__vcpu_sys_reg(vcpu, PMINTENSET_EL1) &= mask;
|
||||||
__vcpu_sys_reg(vcpu, PMCNTENSET_EL0) &= mask;
|
__vcpu_sys_reg(vcpu, PMCNTENSET_EL0) &= mask;
|
||||||
|
|
||||||
|
kvm_pmu_reprogram_counter_mask(vcpu, mask);
|
||||||
}
|
}
|
||||||
|
|
||||||
int kvm_arm_pmu_v3_enable(struct kvm_vcpu *vcpu)
|
int kvm_arm_pmu_v3_enable(struct kvm_vcpu *vcpu)
|
||||||
|
|
|
||||||
|
|
@ -1208,16 +1208,14 @@ static bool access_pmcnten(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
|
||||||
mask = kvm_pmu_accessible_counter_mask(vcpu);
|
mask = kvm_pmu_accessible_counter_mask(vcpu);
|
||||||
if (p->is_write) {
|
if (p->is_write) {
|
||||||
val = p->regval & mask;
|
val = p->regval & mask;
|
||||||
if (r->Op2 & 0x1) {
|
if (r->Op2 & 0x1)
|
||||||
/* accessing PMCNTENSET_EL0 */
|
/* accessing PMCNTENSET_EL0 */
|
||||||
__vcpu_sys_reg(vcpu, PMCNTENSET_EL0) |= val;
|
__vcpu_sys_reg(vcpu, PMCNTENSET_EL0) |= val;
|
||||||
kvm_pmu_enable_counter_mask(vcpu, val);
|
else
|
||||||
kvm_vcpu_pmu_restore_guest(vcpu);
|
|
||||||
} else {
|
|
||||||
/* accessing PMCNTENCLR_EL0 */
|
/* accessing PMCNTENCLR_EL0 */
|
||||||
__vcpu_sys_reg(vcpu, PMCNTENSET_EL0) &= ~val;
|
__vcpu_sys_reg(vcpu, PMCNTENSET_EL0) &= ~val;
|
||||||
kvm_pmu_disable_counter_mask(vcpu, val);
|
|
||||||
}
|
kvm_pmu_reprogram_counter_mask(vcpu, val);
|
||||||
} else {
|
} else {
|
||||||
p->regval = __vcpu_sys_reg(vcpu, PMCNTENSET_EL0);
|
p->regval = __vcpu_sys_reg(vcpu, PMCNTENSET_EL0);
|
||||||
}
|
}
|
||||||
|
|
@ -2450,6 +2448,26 @@ static unsigned int s1pie_el2_visibility(const struct kvm_vcpu *vcpu,
|
||||||
return __el2_visibility(vcpu, rd, s1pie_visibility);
|
return __el2_visibility(vcpu, rd, s1pie_visibility);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static bool access_mdcr(struct kvm_vcpu *vcpu,
|
||||||
|
struct sys_reg_params *p,
|
||||||
|
const struct sys_reg_desc *r)
|
||||||
|
{
|
||||||
|
u64 old = __vcpu_sys_reg(vcpu, MDCR_EL2);
|
||||||
|
|
||||||
|
if (!access_rw(vcpu, p, r))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Request a reload of the PMU to enable/disable the counters affected
|
||||||
|
* by HPME.
|
||||||
|
*/
|
||||||
|
if ((old ^ __vcpu_sys_reg(vcpu, MDCR_EL2)) & MDCR_EL2_HPME)
|
||||||
|
kvm_make_request(KVM_REQ_RELOAD_PMU, vcpu);
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Architected system registers.
|
* Architected system registers.
|
||||||
* Important: Must be sorted ascending by Op0, Op1, CRn, CRm, Op2
|
* Important: Must be sorted ascending by Op0, Op1, CRn, CRm, Op2
|
||||||
|
|
@ -2983,7 +3001,7 @@ static const struct sys_reg_desc sys_reg_descs[] = {
|
||||||
EL2_REG(SCTLR_EL2, access_rw, reset_val, SCTLR_EL2_RES1),
|
EL2_REG(SCTLR_EL2, access_rw, reset_val, SCTLR_EL2_RES1),
|
||||||
EL2_REG(ACTLR_EL2, access_rw, reset_val, 0),
|
EL2_REG(ACTLR_EL2, access_rw, reset_val, 0),
|
||||||
EL2_REG_VNCR(HCR_EL2, reset_hcr, 0),
|
EL2_REG_VNCR(HCR_EL2, reset_hcr, 0),
|
||||||
EL2_REG(MDCR_EL2, access_rw, reset_val, 0),
|
EL2_REG(MDCR_EL2, access_mdcr, reset_val, 0),
|
||||||
EL2_REG(CPTR_EL2, access_rw, reset_val, CPTR_NVHE_EL2_RES1),
|
EL2_REG(CPTR_EL2, access_rw, reset_val, CPTR_NVHE_EL2_RES1),
|
||||||
EL2_REG_VNCR(HSTR_EL2, reset_val, 0),
|
EL2_REG_VNCR(HSTR_EL2, reset_val, 0),
|
||||||
EL2_REG_VNCR(HFGRTR_EL2, reset_val, 0),
|
EL2_REG_VNCR(HFGRTR_EL2, reset_val, 0),
|
||||||
|
|
|
||||||
|
|
@ -32,3 +32,9 @@ KBUILD_LDFLAGS += $(ldflags-y)
|
||||||
TIR_NAME := r19
|
TIR_NAME := r19
|
||||||
KBUILD_CFLAGS += -ffixed-$(TIR_NAME) -DTHREADINFO_REG=$(TIR_NAME) -D__linux__
|
KBUILD_CFLAGS += -ffixed-$(TIR_NAME) -DTHREADINFO_REG=$(TIR_NAME) -D__linux__
|
||||||
KBUILD_AFLAGS += -DTHREADINFO_REG=$(TIR_NAME)
|
KBUILD_AFLAGS += -DTHREADINFO_REG=$(TIR_NAME)
|
||||||
|
|
||||||
|
# Disable HexagonConstExtenders pass for LLVM versions prior to 19.1.0
|
||||||
|
# https://github.com/llvm/llvm-project/issues/99714
|
||||||
|
ifneq ($(call clang-min-version, 190100),y)
|
||||||
|
KBUILD_CFLAGS += -mllvm -hexagon-cext=false
|
||||||
|
endif
|
||||||
|
|
|
||||||
|
|
@ -143,11 +143,11 @@ static int show_cpuinfo(struct seq_file *m, void *v)
|
||||||
" DIV:\t\t%s\n"
|
" DIV:\t\t%s\n"
|
||||||
" BMX:\t\t%s\n"
|
" BMX:\t\t%s\n"
|
||||||
" CDX:\t\t%s\n",
|
" CDX:\t\t%s\n",
|
||||||
cpuinfo.has_mul ? "yes" : "no",
|
str_yes_no(cpuinfo.has_mul),
|
||||||
cpuinfo.has_mulx ? "yes" : "no",
|
str_yes_no(cpuinfo.has_mulx),
|
||||||
cpuinfo.has_div ? "yes" : "no",
|
str_yes_no(cpuinfo.has_div),
|
||||||
cpuinfo.has_bmx ? "yes" : "no",
|
str_yes_no(cpuinfo.has_bmx),
|
||||||
cpuinfo.has_cdx ? "yes" : "no");
|
str_yes_no(cpuinfo.has_cdx));
|
||||||
|
|
||||||
seq_printf(m,
|
seq_printf(m,
|
||||||
"Icache:\t\t%ukB, line length: %u\n",
|
"Icache:\t\t%ukB, line length: %u\n",
|
||||||
|
|
|
||||||
|
|
@ -208,6 +208,7 @@ CONFIG_FB_ATY=y
|
||||||
CONFIG_FB_ATY_CT=y
|
CONFIG_FB_ATY_CT=y
|
||||||
CONFIG_FB_ATY_GX=y
|
CONFIG_FB_ATY_GX=y
|
||||||
CONFIG_FB_3DFX=y
|
CONFIG_FB_3DFX=y
|
||||||
|
CONFIG_BACKLIGHT_CLASS_DEVICE=y
|
||||||
# CONFIG_VGA_CONSOLE is not set
|
# CONFIG_VGA_CONSOLE is not set
|
||||||
CONFIG_FRAMEBUFFER_CONSOLE=y
|
CONFIG_FRAMEBUFFER_CONSOLE=y
|
||||||
CONFIG_LOGO=y
|
CONFIG_LOGO=y
|
||||||
|
|
|
||||||
|
|
@ -716,6 +716,7 @@ CONFIG_FB_TRIDENT=m
|
||||||
CONFIG_FB_SM501=m
|
CONFIG_FB_SM501=m
|
||||||
CONFIG_FB_IBM_GXT4500=y
|
CONFIG_FB_IBM_GXT4500=y
|
||||||
CONFIG_LCD_PLATFORM=m
|
CONFIG_LCD_PLATFORM=m
|
||||||
|
CONFIG_BACKLIGHT_CLASS_DEVICE=y
|
||||||
CONFIG_FRAMEBUFFER_CONSOLE=y
|
CONFIG_FRAMEBUFFER_CONSOLE=y
|
||||||
CONFIG_FRAMEBUFFER_CONSOLE_ROTATION=y
|
CONFIG_FRAMEBUFFER_CONSOLE_ROTATION=y
|
||||||
CONFIG_LOGO=y
|
CONFIG_LOGO=y
|
||||||
|
|
|
||||||
|
|
@ -34,6 +34,8 @@ enum vcpu_ftr {
|
||||||
#define E500_TLB_BITMAP (1 << 30)
|
#define E500_TLB_BITMAP (1 << 30)
|
||||||
/* TLB1 entry is mapped by host TLB0 */
|
/* TLB1 entry is mapped by host TLB0 */
|
||||||
#define E500_TLB_TLB0 (1 << 29)
|
#define E500_TLB_TLB0 (1 << 29)
|
||||||
|
/* entry is writable on the host */
|
||||||
|
#define E500_TLB_WRITABLE (1 << 28)
|
||||||
/* bits [6-5] MAS2_X1 and MAS2_X0 and [4-0] bits for WIMGE */
|
/* bits [6-5] MAS2_X1 and MAS2_X0 and [4-0] bits for WIMGE */
|
||||||
#define E500_TLB_MAS2_ATTR (0x7f)
|
#define E500_TLB_MAS2_ATTR (0x7f)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -45,11 +45,14 @@ static inline unsigned int tlb1_max_shadow_size(void)
|
||||||
return host_tlb_params[1].entries - tlbcam_index - 1;
|
return host_tlb_params[1].entries - tlbcam_index - 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline u32 e500_shadow_mas3_attrib(u32 mas3, int usermode)
|
static inline u32 e500_shadow_mas3_attrib(u32 mas3, bool writable, int usermode)
|
||||||
{
|
{
|
||||||
/* Mask off reserved bits. */
|
/* Mask off reserved bits. */
|
||||||
mas3 &= MAS3_ATTRIB_MASK;
|
mas3 &= MAS3_ATTRIB_MASK;
|
||||||
|
|
||||||
|
if (!writable)
|
||||||
|
mas3 &= ~(MAS3_UW|MAS3_SW);
|
||||||
|
|
||||||
#ifndef CONFIG_KVM_BOOKE_HV
|
#ifndef CONFIG_KVM_BOOKE_HV
|
||||||
if (!usermode) {
|
if (!usermode) {
|
||||||
/* Guest is in supervisor mode,
|
/* Guest is in supervisor mode,
|
||||||
|
|
@ -242,17 +245,18 @@ static inline int tlbe_is_writable(struct kvm_book3e_206_tlb_entry *tlbe)
|
||||||
return tlbe->mas7_3 & (MAS3_SW|MAS3_UW);
|
return tlbe->mas7_3 & (MAS3_SW|MAS3_UW);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline bool kvmppc_e500_ref_setup(struct tlbe_ref *ref,
|
static inline void kvmppc_e500_ref_setup(struct tlbe_ref *ref,
|
||||||
struct kvm_book3e_206_tlb_entry *gtlbe,
|
struct kvm_book3e_206_tlb_entry *gtlbe,
|
||||||
kvm_pfn_t pfn, unsigned int wimg)
|
kvm_pfn_t pfn, unsigned int wimg,
|
||||||
|
bool writable)
|
||||||
{
|
{
|
||||||
ref->pfn = pfn;
|
ref->pfn = pfn;
|
||||||
ref->flags = E500_TLB_VALID;
|
ref->flags = E500_TLB_VALID;
|
||||||
|
if (writable)
|
||||||
|
ref->flags |= E500_TLB_WRITABLE;
|
||||||
|
|
||||||
/* Use guest supplied MAS2_G and MAS2_E */
|
/* Use guest supplied MAS2_G and MAS2_E */
|
||||||
ref->flags |= (gtlbe->mas2 & MAS2_ATTRIB_MASK) | wimg;
|
ref->flags |= (gtlbe->mas2 & MAS2_ATTRIB_MASK) | wimg;
|
||||||
|
|
||||||
return tlbe_is_writable(gtlbe);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void kvmppc_e500_ref_release(struct tlbe_ref *ref)
|
static inline void kvmppc_e500_ref_release(struct tlbe_ref *ref)
|
||||||
|
|
@ -305,6 +309,7 @@ static void kvmppc_e500_setup_stlbe(
|
||||||
{
|
{
|
||||||
kvm_pfn_t pfn = ref->pfn;
|
kvm_pfn_t pfn = ref->pfn;
|
||||||
u32 pr = vcpu->arch.shared->msr & MSR_PR;
|
u32 pr = vcpu->arch.shared->msr & MSR_PR;
|
||||||
|
bool writable = !!(ref->flags & E500_TLB_WRITABLE);
|
||||||
|
|
||||||
BUG_ON(!(ref->flags & E500_TLB_VALID));
|
BUG_ON(!(ref->flags & E500_TLB_VALID));
|
||||||
|
|
||||||
|
|
@ -312,7 +317,7 @@ static void kvmppc_e500_setup_stlbe(
|
||||||
stlbe->mas1 = MAS1_TSIZE(tsize) | get_tlb_sts(gtlbe) | MAS1_VALID;
|
stlbe->mas1 = MAS1_TSIZE(tsize) | get_tlb_sts(gtlbe) | MAS1_VALID;
|
||||||
stlbe->mas2 = (gvaddr & MAS2_EPN) | (ref->flags & E500_TLB_MAS2_ATTR);
|
stlbe->mas2 = (gvaddr & MAS2_EPN) | (ref->flags & E500_TLB_MAS2_ATTR);
|
||||||
stlbe->mas7_3 = ((u64)pfn << PAGE_SHIFT) |
|
stlbe->mas7_3 = ((u64)pfn << PAGE_SHIFT) |
|
||||||
e500_shadow_mas3_attrib(gtlbe->mas7_3, pr);
|
e500_shadow_mas3_attrib(gtlbe->mas7_3, writable, pr);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline int kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500,
|
static inline int kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500,
|
||||||
|
|
@ -321,15 +326,14 @@ static inline int kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500,
|
||||||
struct tlbe_ref *ref)
|
struct tlbe_ref *ref)
|
||||||
{
|
{
|
||||||
struct kvm_memory_slot *slot;
|
struct kvm_memory_slot *slot;
|
||||||
unsigned long pfn = 0; /* silence GCC warning */
|
unsigned int psize;
|
||||||
|
unsigned long pfn;
|
||||||
struct page *page = NULL;
|
struct page *page = NULL;
|
||||||
unsigned long hva;
|
unsigned long hva;
|
||||||
int pfnmap = 0;
|
|
||||||
int tsize = BOOK3E_PAGESZ_4K;
|
int tsize = BOOK3E_PAGESZ_4K;
|
||||||
int ret = 0;
|
int ret = 0;
|
||||||
unsigned long mmu_seq;
|
unsigned long mmu_seq;
|
||||||
struct kvm *kvm = vcpu_e500->vcpu.kvm;
|
struct kvm *kvm = vcpu_e500->vcpu.kvm;
|
||||||
unsigned long tsize_pages = 0;
|
|
||||||
pte_t *ptep;
|
pte_t *ptep;
|
||||||
unsigned int wimg = 0;
|
unsigned int wimg = 0;
|
||||||
pgd_t *pgdir;
|
pgd_t *pgdir;
|
||||||
|
|
@ -351,30 +355,54 @@ static inline int kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500,
|
||||||
slot = gfn_to_memslot(vcpu_e500->vcpu.kvm, gfn);
|
slot = gfn_to_memslot(vcpu_e500->vcpu.kvm, gfn);
|
||||||
hva = gfn_to_hva_memslot(slot, gfn);
|
hva = gfn_to_hva_memslot(slot, gfn);
|
||||||
|
|
||||||
if (tlbsel == 1) {
|
pfn = __kvm_faultin_pfn(slot, gfn, FOLL_WRITE, &writable, &page);
|
||||||
struct vm_area_struct *vma;
|
if (is_error_noslot_pfn(pfn)) {
|
||||||
mmap_read_lock(kvm->mm);
|
if (printk_ratelimit())
|
||||||
|
pr_err("%s: real page not found for gfn %lx\n",
|
||||||
|
__func__, (long)gfn);
|
||||||
|
return -EINVAL;
|
||||||
|
}
|
||||||
|
|
||||||
vma = find_vma(kvm->mm, hva);
|
spin_lock(&kvm->mmu_lock);
|
||||||
if (vma && hva >= vma->vm_start &&
|
if (mmu_invalidate_retry(kvm, mmu_seq)) {
|
||||||
(vma->vm_flags & VM_PFNMAP)) {
|
ret = -EAGAIN;
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
pgdir = vcpu_e500->vcpu.arch.pgdir;
|
||||||
/*
|
/*
|
||||||
* This VMA is a physically contiguous region (e.g.
|
* We are just looking at the wimg bits, so we don't
|
||||||
* /dev/mem) that bypasses normal Linux page
|
* care much about the trans splitting bit.
|
||||||
* management. Find the overlap between the
|
* We are holding kvm->mmu_lock so a notifier invalidate
|
||||||
* vma and the memslot.
|
* can't run hence pfn won't change.
|
||||||
*/
|
*/
|
||||||
|
local_irq_save(flags);
|
||||||
|
ptep = find_linux_pte(pgdir, hva, NULL, &psize);
|
||||||
|
if (ptep) {
|
||||||
|
pte_t pte = READ_ONCE(*ptep);
|
||||||
|
|
||||||
|
if (pte_present(pte)) {
|
||||||
|
wimg = (pte_val(pte) >> PTE_WIMGE_SHIFT) &
|
||||||
|
MAS2_WIMGE_MASK;
|
||||||
|
} else {
|
||||||
|
local_irq_restore(flags);
|
||||||
|
pr_err_ratelimited("%s: pte not present: gfn %lx,pfn %lx\n",
|
||||||
|
__func__, (long)gfn, pfn);
|
||||||
|
ret = -EINVAL;
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
local_irq_restore(flags);
|
||||||
|
|
||||||
|
if (psize && tlbsel == 1) {
|
||||||
|
unsigned long psize_pages, tsize_pages;
|
||||||
unsigned long start, end;
|
unsigned long start, end;
|
||||||
unsigned long slot_start, slot_end;
|
unsigned long slot_start, slot_end;
|
||||||
|
|
||||||
pfnmap = 1;
|
psize_pages = 1UL << (psize - PAGE_SHIFT);
|
||||||
|
start = pfn & ~(psize_pages - 1);
|
||||||
start = vma->vm_pgoff;
|
end = start + psize_pages;
|
||||||
end = start +
|
|
||||||
vma_pages(vma);
|
|
||||||
|
|
||||||
pfn = start + ((hva - vma->vm_start) >> PAGE_SHIFT);
|
|
||||||
|
|
||||||
slot_start = pfn - (gfn - slot->base_gfn);
|
slot_start = pfn - (gfn - slot->base_gfn);
|
||||||
slot_end = slot_start + slot->npages;
|
slot_end = slot_start + slot->npages;
|
||||||
|
|
@ -387,6 +415,12 @@ static inline int kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500,
|
||||||
tsize = (gtlbe->mas1 & MAS1_TSIZE_MASK) >>
|
tsize = (gtlbe->mas1 & MAS1_TSIZE_MASK) >>
|
||||||
MAS1_TSIZE_SHIFT;
|
MAS1_TSIZE_SHIFT;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Any page size that doesn't satisfy the host mapping
|
||||||
|
* will fail the start and end tests.
|
||||||
|
*/
|
||||||
|
tsize = min(psize - PAGE_SHIFT + BOOK3E_PAGESZ_4K, tsize);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* e500 doesn't implement the lowest tsize bit,
|
* e500 doesn't implement the lowest tsize bit,
|
||||||
* or 1K pages.
|
* or 1K pages.
|
||||||
|
|
@ -419,79 +453,12 @@ static inline int kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500,
|
||||||
pfn &= ~(tsize_pages - 1);
|
pfn &= ~(tsize_pages - 1);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
} else if (vma && hva >= vma->vm_start &&
|
|
||||||
is_vm_hugetlb_page(vma)) {
|
|
||||||
unsigned long psize = vma_kernel_pagesize(vma);
|
|
||||||
|
|
||||||
tsize = (gtlbe->mas1 & MAS1_TSIZE_MASK) >>
|
|
||||||
MAS1_TSIZE_SHIFT;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Take the largest page size that satisfies both host
|
|
||||||
* and guest mapping
|
|
||||||
*/
|
|
||||||
tsize = min(__ilog2(psize) - 10, tsize);
|
|
||||||
|
|
||||||
/*
|
|
||||||
* e500 doesn't implement the lowest tsize bit,
|
|
||||||
* or 1K pages.
|
|
||||||
*/
|
|
||||||
tsize = max(BOOK3E_PAGESZ_4K, tsize & ~1);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
mmap_read_unlock(kvm->mm);
|
kvmppc_e500_ref_setup(ref, gtlbe, pfn, wimg, writable);
|
||||||
}
|
|
||||||
|
|
||||||
if (likely(!pfnmap)) {
|
|
||||||
tsize_pages = 1UL << (tsize + 10 - PAGE_SHIFT);
|
|
||||||
pfn = __kvm_faultin_pfn(slot, gfn, FOLL_WRITE, NULL, &page);
|
|
||||||
if (is_error_noslot_pfn(pfn)) {
|
|
||||||
if (printk_ratelimit())
|
|
||||||
pr_err("%s: real page not found for gfn %lx\n",
|
|
||||||
__func__, (long)gfn);
|
|
||||||
return -EINVAL;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Align guest and physical address to page map boundaries */
|
|
||||||
pfn &= ~(tsize_pages - 1);
|
|
||||||
gvaddr &= ~((tsize_pages << PAGE_SHIFT) - 1);
|
|
||||||
}
|
|
||||||
|
|
||||||
spin_lock(&kvm->mmu_lock);
|
|
||||||
if (mmu_invalidate_retry(kvm, mmu_seq)) {
|
|
||||||
ret = -EAGAIN;
|
|
||||||
goto out;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
pgdir = vcpu_e500->vcpu.arch.pgdir;
|
|
||||||
/*
|
|
||||||
* We are just looking at the wimg bits, so we don't
|
|
||||||
* care much about the trans splitting bit.
|
|
||||||
* We are holding kvm->mmu_lock so a notifier invalidate
|
|
||||||
* can't run hence pfn won't change.
|
|
||||||
*/
|
|
||||||
local_irq_save(flags);
|
|
||||||
ptep = find_linux_pte(pgdir, hva, NULL, NULL);
|
|
||||||
if (ptep) {
|
|
||||||
pte_t pte = READ_ONCE(*ptep);
|
|
||||||
|
|
||||||
if (pte_present(pte)) {
|
|
||||||
wimg = (pte_val(pte) >> PTE_WIMGE_SHIFT) &
|
|
||||||
MAS2_WIMGE_MASK;
|
|
||||||
local_irq_restore(flags);
|
|
||||||
} else {
|
|
||||||
local_irq_restore(flags);
|
|
||||||
pr_err_ratelimited("%s: pte not present: gfn %lx,pfn %lx\n",
|
|
||||||
__func__, (long)gfn, pfn);
|
|
||||||
ret = -EINVAL;
|
|
||||||
goto out;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
writable = kvmppc_e500_ref_setup(ref, gtlbe, pfn, wimg);
|
|
||||||
|
|
||||||
kvmppc_e500_setup_stlbe(&vcpu_e500->vcpu, gtlbe, tsize,
|
kvmppc_e500_setup_stlbe(&vcpu_e500->vcpu, gtlbe, tsize,
|
||||||
ref, gvaddr, stlbe);
|
ref, gvaddr, stlbe);
|
||||||
|
writable = tlbe_is_writable(stlbe);
|
||||||
|
|
||||||
/* Clear i-cache for new pages */
|
/* Clear i-cache for new pages */
|
||||||
kvmppc_mmu_flush_icache(pfn);
|
kvmppc_mmu_flush_icache(pfn);
|
||||||
|
|
|
||||||
|
|
@ -464,7 +464,43 @@ static vm_fault_t vas_mmap_fault(struct vm_fault *vmf)
|
||||||
return VM_FAULT_SIGBUS;
|
return VM_FAULT_SIGBUS;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* During mmap() paste address, mapping VMA is saved in VAS window
|
||||||
|
* struct which is used to unmap during migration if the window is
|
||||||
|
* still open. But the user space can remove this mapping with
|
||||||
|
* munmap() before closing the window and the VMA address will
|
||||||
|
* be invalid. Set VAS window VMA to NULL in this function which
|
||||||
|
* is called before VMA free.
|
||||||
|
*/
|
||||||
|
static void vas_mmap_close(struct vm_area_struct *vma)
|
||||||
|
{
|
||||||
|
struct file *fp = vma->vm_file;
|
||||||
|
struct coproc_instance *cp_inst = fp->private_data;
|
||||||
|
struct vas_window *txwin;
|
||||||
|
|
||||||
|
/* Should not happen */
|
||||||
|
if (!cp_inst || !cp_inst->txwin) {
|
||||||
|
pr_err("No attached VAS window for the paste address mmap\n");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
txwin = cp_inst->txwin;
|
||||||
|
/*
|
||||||
|
* task_ref.vma is set in coproc_mmap() during mmap paste
|
||||||
|
* address. So it has to be the same VMA that is getting freed.
|
||||||
|
*/
|
||||||
|
if (WARN_ON(txwin->task_ref.vma != vma)) {
|
||||||
|
pr_err("Invalid paste address mmaping\n");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
mutex_lock(&txwin->task_ref.mmap_mutex);
|
||||||
|
txwin->task_ref.vma = NULL;
|
||||||
|
mutex_unlock(&txwin->task_ref.mmap_mutex);
|
||||||
|
}
|
||||||
|
|
||||||
static const struct vm_operations_struct vas_vm_ops = {
|
static const struct vm_operations_struct vas_vm_ops = {
|
||||||
|
.close = vas_mmap_close,
|
||||||
.fault = vas_mmap_fault,
|
.fault = vas_mmap_fault,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -122,6 +122,7 @@ struct kernel_mapping {
|
||||||
|
|
||||||
extern struct kernel_mapping kernel_map;
|
extern struct kernel_mapping kernel_map;
|
||||||
extern phys_addr_t phys_ram_base;
|
extern phys_addr_t phys_ram_base;
|
||||||
|
extern unsigned long vmemmap_start_pfn;
|
||||||
|
|
||||||
#define is_kernel_mapping(x) \
|
#define is_kernel_mapping(x) \
|
||||||
((x) >= kernel_map.virt_addr && (x) < (kernel_map.virt_addr + kernel_map.size))
|
((x) >= kernel_map.virt_addr && (x) < (kernel_map.virt_addr + kernel_map.size))
|
||||||
|
|
|
||||||
|
|
@ -87,7 +87,7 @@
|
||||||
* Define vmemmap for pfn_to_page & page_to_pfn calls. Needed if kernel
|
* Define vmemmap for pfn_to_page & page_to_pfn calls. Needed if kernel
|
||||||
* is configured with CONFIG_SPARSEMEM_VMEMMAP enabled.
|
* is configured with CONFIG_SPARSEMEM_VMEMMAP enabled.
|
||||||
*/
|
*/
|
||||||
#define vmemmap ((struct page *)VMEMMAP_START - (phys_ram_base >> PAGE_SHIFT))
|
#define vmemmap ((struct page *)VMEMMAP_START - vmemmap_start_pfn)
|
||||||
|
|
||||||
#define PCI_IO_SIZE SZ_16M
|
#define PCI_IO_SIZE SZ_16M
|
||||||
#define PCI_IO_END VMEMMAP_START
|
#define PCI_IO_END VMEMMAP_START
|
||||||
|
|
|
||||||
|
|
@ -159,6 +159,7 @@ struct riscv_pmu_snapshot_data {
|
||||||
};
|
};
|
||||||
|
|
||||||
#define RISCV_PMU_RAW_EVENT_MASK GENMASK_ULL(47, 0)
|
#define RISCV_PMU_RAW_EVENT_MASK GENMASK_ULL(47, 0)
|
||||||
|
#define RISCV_PMU_PLAT_FW_EVENT_MASK GENMASK_ULL(61, 0)
|
||||||
#define RISCV_PMU_RAW_EVENT_IDX 0x20000
|
#define RISCV_PMU_RAW_EVENT_IDX 0x20000
|
||||||
#define RISCV_PLAT_FW_EVENT 0xFFFF
|
#define RISCV_PLAT_FW_EVENT 0xFFFF
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -3,8 +3,11 @@
|
||||||
#ifndef __ASM_RISCV_SPINLOCK_H
|
#ifndef __ASM_RISCV_SPINLOCK_H
|
||||||
#define __ASM_RISCV_SPINLOCK_H
|
#define __ASM_RISCV_SPINLOCK_H
|
||||||
|
|
||||||
#ifdef CONFIG_RISCV_COMBO_SPINLOCKS
|
#ifdef CONFIG_QUEUED_SPINLOCKS
|
||||||
#define _Q_PENDING_LOOPS (1 << 9)
|
#define _Q_PENDING_LOOPS (1 << 9)
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef CONFIG_RISCV_COMBO_SPINLOCKS
|
||||||
|
|
||||||
#define __no_arch_spinlock_redefine
|
#define __no_arch_spinlock_redefine
|
||||||
#include <asm/ticket_spinlock.h>
|
#include <asm/ticket_spinlock.h>
|
||||||
|
|
|
||||||
|
|
@ -23,21 +23,21 @@
|
||||||
REG_S a0, TASK_TI_A0(tp)
|
REG_S a0, TASK_TI_A0(tp)
|
||||||
csrr a0, CSR_CAUSE
|
csrr a0, CSR_CAUSE
|
||||||
/* Exclude IRQs */
|
/* Exclude IRQs */
|
||||||
blt a0, zero, _new_vmalloc_restore_context_a0
|
blt a0, zero, .Lnew_vmalloc_restore_context_a0
|
||||||
|
|
||||||
REG_S a1, TASK_TI_A1(tp)
|
REG_S a1, TASK_TI_A1(tp)
|
||||||
/* Only check new_vmalloc if we are in page/protection fault */
|
/* Only check new_vmalloc if we are in page/protection fault */
|
||||||
li a1, EXC_LOAD_PAGE_FAULT
|
li a1, EXC_LOAD_PAGE_FAULT
|
||||||
beq a0, a1, _new_vmalloc_kernel_address
|
beq a0, a1, .Lnew_vmalloc_kernel_address
|
||||||
li a1, EXC_STORE_PAGE_FAULT
|
li a1, EXC_STORE_PAGE_FAULT
|
||||||
beq a0, a1, _new_vmalloc_kernel_address
|
beq a0, a1, .Lnew_vmalloc_kernel_address
|
||||||
li a1, EXC_INST_PAGE_FAULT
|
li a1, EXC_INST_PAGE_FAULT
|
||||||
bne a0, a1, _new_vmalloc_restore_context_a1
|
bne a0, a1, .Lnew_vmalloc_restore_context_a1
|
||||||
|
|
||||||
_new_vmalloc_kernel_address:
|
.Lnew_vmalloc_kernel_address:
|
||||||
/* Is it a kernel address? */
|
/* Is it a kernel address? */
|
||||||
csrr a0, CSR_TVAL
|
csrr a0, CSR_TVAL
|
||||||
bge a0, zero, _new_vmalloc_restore_context_a1
|
bge a0, zero, .Lnew_vmalloc_restore_context_a1
|
||||||
|
|
||||||
/* Check if a new vmalloc mapping appeared that could explain the trap */
|
/* Check if a new vmalloc mapping appeared that could explain the trap */
|
||||||
REG_S a2, TASK_TI_A2(tp)
|
REG_S a2, TASK_TI_A2(tp)
|
||||||
|
|
@ -69,7 +69,7 @@ _new_vmalloc_kernel_address:
|
||||||
/* Check the value of new_vmalloc for this cpu */
|
/* Check the value of new_vmalloc for this cpu */
|
||||||
REG_L a2, 0(a0)
|
REG_L a2, 0(a0)
|
||||||
and a2, a2, a1
|
and a2, a2, a1
|
||||||
beq a2, zero, _new_vmalloc_restore_context
|
beq a2, zero, .Lnew_vmalloc_restore_context
|
||||||
|
|
||||||
/* Atomically reset the current cpu bit in new_vmalloc */
|
/* Atomically reset the current cpu bit in new_vmalloc */
|
||||||
amoxor.d a0, a1, (a0)
|
amoxor.d a0, a1, (a0)
|
||||||
|
|
@ -83,11 +83,11 @@ _new_vmalloc_kernel_address:
|
||||||
csrw CSR_SCRATCH, x0
|
csrw CSR_SCRATCH, x0
|
||||||
sret
|
sret
|
||||||
|
|
||||||
_new_vmalloc_restore_context:
|
.Lnew_vmalloc_restore_context:
|
||||||
REG_L a2, TASK_TI_A2(tp)
|
REG_L a2, TASK_TI_A2(tp)
|
||||||
_new_vmalloc_restore_context_a1:
|
.Lnew_vmalloc_restore_context_a1:
|
||||||
REG_L a1, TASK_TI_A1(tp)
|
REG_L a1, TASK_TI_A1(tp)
|
||||||
_new_vmalloc_restore_context_a0:
|
.Lnew_vmalloc_restore_context_a0:
|
||||||
REG_L a0, TASK_TI_A0(tp)
|
REG_L a0, TASK_TI_A0(tp)
|
||||||
.endm
|
.endm
|
||||||
|
|
||||||
|
|
@ -278,6 +278,7 @@ SYM_CODE_START_NOALIGN(ret_from_exception)
|
||||||
#else
|
#else
|
||||||
sret
|
sret
|
||||||
#endif
|
#endif
|
||||||
|
SYM_INNER_LABEL(ret_from_exception_end, SYM_L_GLOBAL)
|
||||||
SYM_CODE_END(ret_from_exception)
|
SYM_CODE_END(ret_from_exception)
|
||||||
ASM_NOKPROBE(ret_from_exception)
|
ASM_NOKPROBE(ret_from_exception)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -23,7 +23,7 @@ struct used_bucket {
|
||||||
|
|
||||||
struct relocation_head {
|
struct relocation_head {
|
||||||
struct hlist_node node;
|
struct hlist_node node;
|
||||||
struct list_head *rel_entry;
|
struct list_head rel_entry;
|
||||||
void *location;
|
void *location;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
@ -634,7 +634,7 @@ process_accumulated_relocations(struct module *me,
|
||||||
location = rel_head_iter->location;
|
location = rel_head_iter->location;
|
||||||
list_for_each_entry_safe(rel_entry_iter,
|
list_for_each_entry_safe(rel_entry_iter,
|
||||||
rel_entry_iter_tmp,
|
rel_entry_iter_tmp,
|
||||||
rel_head_iter->rel_entry,
|
&rel_head_iter->rel_entry,
|
||||||
head) {
|
head) {
|
||||||
curr_type = rel_entry_iter->type;
|
curr_type = rel_entry_iter->type;
|
||||||
reloc_handlers[curr_type].reloc_handler(
|
reloc_handlers[curr_type].reloc_handler(
|
||||||
|
|
@ -704,16 +704,7 @@ static int add_relocation_to_accumulate(struct module *me, int type,
|
||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
}
|
}
|
||||||
|
|
||||||
rel_head->rel_entry =
|
INIT_LIST_HEAD(&rel_head->rel_entry);
|
||||||
kmalloc(sizeof(struct list_head), GFP_KERNEL);
|
|
||||||
|
|
||||||
if (!rel_head->rel_entry) {
|
|
||||||
kfree(entry);
|
|
||||||
kfree(rel_head);
|
|
||||||
return -ENOMEM;
|
|
||||||
}
|
|
||||||
|
|
||||||
INIT_LIST_HEAD(rel_head->rel_entry);
|
|
||||||
rel_head->location = location;
|
rel_head->location = location;
|
||||||
INIT_HLIST_NODE(&rel_head->node);
|
INIT_HLIST_NODE(&rel_head->node);
|
||||||
if (!current_head->first) {
|
if (!current_head->first) {
|
||||||
|
|
@ -722,7 +713,6 @@ static int add_relocation_to_accumulate(struct module *me, int type,
|
||||||
|
|
||||||
if (!bucket) {
|
if (!bucket) {
|
||||||
kfree(entry);
|
kfree(entry);
|
||||||
kfree(rel_head->rel_entry);
|
|
||||||
kfree(rel_head);
|
kfree(rel_head);
|
||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
}
|
}
|
||||||
|
|
@ -735,7 +725,7 @@ static int add_relocation_to_accumulate(struct module *me, int type,
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Add relocation to head of discovered rel_head */
|
/* Add relocation to head of discovered rel_head */
|
||||||
list_add_tail(&entry->head, rel_head->rel_entry);
|
list_add_tail(&entry->head, &rel_head->rel_entry);
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -30,7 +30,7 @@ static void __kprobes arch_prepare_ss_slot(struct kprobe *p)
|
||||||
p->ainsn.api.restore = (unsigned long)p->addr + len;
|
p->ainsn.api.restore = (unsigned long)p->addr + len;
|
||||||
|
|
||||||
patch_text_nosync(p->ainsn.api.insn, &p->opcode, len);
|
patch_text_nosync(p->ainsn.api.insn, &p->opcode, len);
|
||||||
patch_text_nosync(p->ainsn.api.insn + len, &insn, GET_INSN_LENGTH(insn));
|
patch_text_nosync((void *)p->ainsn.api.insn + len, &insn, GET_INSN_LENGTH(insn));
|
||||||
}
|
}
|
||||||
|
|
||||||
static void __kprobes arch_prepare_simulate(struct kprobe *p)
|
static void __kprobes arch_prepare_simulate(struct kprobe *p)
|
||||||
|
|
|
||||||
|
|
@ -17,6 +17,7 @@
|
||||||
#ifdef CONFIG_FRAME_POINTER
|
#ifdef CONFIG_FRAME_POINTER
|
||||||
|
|
||||||
extern asmlinkage void handle_exception(void);
|
extern asmlinkage void handle_exception(void);
|
||||||
|
extern unsigned long ret_from_exception_end;
|
||||||
|
|
||||||
static inline int fp_is_valid(unsigned long fp, unsigned long sp)
|
static inline int fp_is_valid(unsigned long fp, unsigned long sp)
|
||||||
{
|
{
|
||||||
|
|
@ -71,7 +72,8 @@ void notrace walk_stackframe(struct task_struct *task, struct pt_regs *regs,
|
||||||
fp = frame->fp;
|
fp = frame->fp;
|
||||||
pc = ftrace_graph_ret_addr(current, &graph_idx, frame->ra,
|
pc = ftrace_graph_ret_addr(current, &graph_idx, frame->ra,
|
||||||
&frame->ra);
|
&frame->ra);
|
||||||
if (pc == (unsigned long)handle_exception) {
|
if (pc >= (unsigned long)handle_exception &&
|
||||||
|
pc < (unsigned long)&ret_from_exception_end) {
|
||||||
if (unlikely(!__kernel_text_address(pc) || !fn(arg, pc)))
|
if (unlikely(!__kernel_text_address(pc) || !fn(arg, pc)))
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -35,7 +35,7 @@
|
||||||
|
|
||||||
int show_unhandled_signals = 1;
|
int show_unhandled_signals = 1;
|
||||||
|
|
||||||
static DEFINE_SPINLOCK(die_lock);
|
static DEFINE_RAW_SPINLOCK(die_lock);
|
||||||
|
|
||||||
static int copy_code(struct pt_regs *regs, u16 *val, const u16 *insns)
|
static int copy_code(struct pt_regs *regs, u16 *val, const u16 *insns)
|
||||||
{
|
{
|
||||||
|
|
@ -81,7 +81,7 @@ void die(struct pt_regs *regs, const char *str)
|
||||||
|
|
||||||
oops_enter();
|
oops_enter();
|
||||||
|
|
||||||
spin_lock_irqsave(&die_lock, flags);
|
raw_spin_lock_irqsave(&die_lock, flags);
|
||||||
console_verbose();
|
console_verbose();
|
||||||
bust_spinlocks(1);
|
bust_spinlocks(1);
|
||||||
|
|
||||||
|
|
@ -100,7 +100,7 @@ void die(struct pt_regs *regs, const char *str)
|
||||||
|
|
||||||
bust_spinlocks(0);
|
bust_spinlocks(0);
|
||||||
add_taint(TAINT_DIE, LOCKDEP_NOW_UNRELIABLE);
|
add_taint(TAINT_DIE, LOCKDEP_NOW_UNRELIABLE);
|
||||||
spin_unlock_irqrestore(&die_lock, flags);
|
raw_spin_unlock_irqrestore(&die_lock, flags);
|
||||||
oops_exit();
|
oops_exit();
|
||||||
|
|
||||||
if (in_interrupt())
|
if (in_interrupt())
|
||||||
|
|
|
||||||
|
|
@ -33,6 +33,7 @@
|
||||||
#include <asm/pgtable.h>
|
#include <asm/pgtable.h>
|
||||||
#include <asm/sections.h>
|
#include <asm/sections.h>
|
||||||
#include <asm/soc.h>
|
#include <asm/soc.h>
|
||||||
|
#include <asm/sparsemem.h>
|
||||||
#include <asm/tlbflush.h>
|
#include <asm/tlbflush.h>
|
||||||
|
|
||||||
#include "../kernel/head.h"
|
#include "../kernel/head.h"
|
||||||
|
|
@ -62,6 +63,13 @@ EXPORT_SYMBOL(pgtable_l5_enabled);
|
||||||
phys_addr_t phys_ram_base __ro_after_init;
|
phys_addr_t phys_ram_base __ro_after_init;
|
||||||
EXPORT_SYMBOL(phys_ram_base);
|
EXPORT_SYMBOL(phys_ram_base);
|
||||||
|
|
||||||
|
#ifdef CONFIG_SPARSEMEM_VMEMMAP
|
||||||
|
#define VMEMMAP_ADDR_ALIGN (1ULL << SECTION_SIZE_BITS)
|
||||||
|
|
||||||
|
unsigned long vmemmap_start_pfn __ro_after_init;
|
||||||
|
EXPORT_SYMBOL(vmemmap_start_pfn);
|
||||||
|
#endif
|
||||||
|
|
||||||
unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)]
|
unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)]
|
||||||
__page_aligned_bss;
|
__page_aligned_bss;
|
||||||
EXPORT_SYMBOL(empty_zero_page);
|
EXPORT_SYMBOL(empty_zero_page);
|
||||||
|
|
@ -240,8 +248,12 @@ static void __init setup_bootmem(void)
|
||||||
* Make sure we align the start of the memory on a PMD boundary so that
|
* Make sure we align the start of the memory on a PMD boundary so that
|
||||||
* at worst, we map the linear mapping with PMD mappings.
|
* at worst, we map the linear mapping with PMD mappings.
|
||||||
*/
|
*/
|
||||||
if (!IS_ENABLED(CONFIG_XIP_KERNEL))
|
if (!IS_ENABLED(CONFIG_XIP_KERNEL)) {
|
||||||
phys_ram_base = memblock_start_of_DRAM() & PMD_MASK;
|
phys_ram_base = memblock_start_of_DRAM() & PMD_MASK;
|
||||||
|
#ifdef CONFIG_SPARSEMEM_VMEMMAP
|
||||||
|
vmemmap_start_pfn = round_down(phys_ram_base, VMEMMAP_ADDR_ALIGN) >> PAGE_SHIFT;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* In 64-bit, any use of __va/__pa before this point is wrong as we
|
* In 64-bit, any use of __va/__pa before this point is wrong as we
|
||||||
|
|
@ -1101,6 +1113,9 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa)
|
||||||
kernel_map.xiprom_sz = (uintptr_t)(&_exiprom) - (uintptr_t)(&_xiprom);
|
kernel_map.xiprom_sz = (uintptr_t)(&_exiprom) - (uintptr_t)(&_xiprom);
|
||||||
|
|
||||||
phys_ram_base = CONFIG_PHYS_RAM_BASE;
|
phys_ram_base = CONFIG_PHYS_RAM_BASE;
|
||||||
|
#ifdef CONFIG_SPARSEMEM_VMEMMAP
|
||||||
|
vmemmap_start_pfn = round_down(phys_ram_base, VMEMMAP_ADDR_ALIGN) >> PAGE_SHIFT;
|
||||||
|
#endif
|
||||||
kernel_map.phys_addr = (uintptr_t)CONFIG_PHYS_RAM_BASE;
|
kernel_map.phys_addr = (uintptr_t)CONFIG_PHYS_RAM_BASE;
|
||||||
kernel_map.size = (uintptr_t)(&_end) - (uintptr_t)(&_start);
|
kernel_map.size = (uintptr_t)(&_end) - (uintptr_t)(&_start);
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -234,6 +234,8 @@ static unsigned long get_vmem_size(unsigned long identity_size,
|
||||||
vsize = round_up(SZ_2G + max_mappable, rte_size) +
|
vsize = round_up(SZ_2G + max_mappable, rte_size) +
|
||||||
round_up(vmemmap_size, rte_size) +
|
round_up(vmemmap_size, rte_size) +
|
||||||
FIXMAP_SIZE + MODULES_LEN + KASLR_LEN;
|
FIXMAP_SIZE + MODULES_LEN + KASLR_LEN;
|
||||||
|
if (IS_ENABLED(CONFIG_KMSAN))
|
||||||
|
vsize += MODULES_LEN * 2;
|
||||||
return size_add(vsize, vmalloc_size);
|
return size_add(vsize, vmalloc_size);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -306,7 +306,7 @@ static void pgtable_pte_populate(pmd_t *pmd, unsigned long addr, unsigned long e
|
||||||
pages++;
|
pages++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (mode == POPULATE_DIRECT)
|
if (mode == POPULATE_IDENTITY)
|
||||||
update_page_count(PG_DIRECT_MAP_4K, pages);
|
update_page_count(PG_DIRECT_MAP_4K, pages);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -339,7 +339,7 @@ static void pgtable_pmd_populate(pud_t *pud, unsigned long addr, unsigned long e
|
||||||
}
|
}
|
||||||
pgtable_pte_populate(pmd, addr, next, mode);
|
pgtable_pte_populate(pmd, addr, next, mode);
|
||||||
}
|
}
|
||||||
if (mode == POPULATE_DIRECT)
|
if (mode == POPULATE_IDENTITY)
|
||||||
update_page_count(PG_DIRECT_MAP_1M, pages);
|
update_page_count(PG_DIRECT_MAP_1M, pages);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -372,7 +372,7 @@ static void pgtable_pud_populate(p4d_t *p4d, unsigned long addr, unsigned long e
|
||||||
}
|
}
|
||||||
pgtable_pmd_populate(pud, addr, next, mode);
|
pgtable_pmd_populate(pud, addr, next, mode);
|
||||||
}
|
}
|
||||||
if (mode == POPULATE_DIRECT)
|
if (mode == POPULATE_IDENTITY)
|
||||||
update_page_count(PG_DIRECT_MAP_2G, pages);
|
update_page_count(PG_DIRECT_MAP_2G, pages);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -270,7 +270,7 @@ static ssize_t sys_##_prefix##_##_name##_store(struct kobject *kobj, \
|
||||||
if (len >= sizeof(_value)) \
|
if (len >= sizeof(_value)) \
|
||||||
return -E2BIG; \
|
return -E2BIG; \
|
||||||
len = strscpy(_value, buf, sizeof(_value)); \
|
len = strscpy(_value, buf, sizeof(_value)); \
|
||||||
if (len < 0) \
|
if ((ssize_t)len < 0) \
|
||||||
return len; \
|
return len; \
|
||||||
strim(_value); \
|
strim(_value); \
|
||||||
return len; \
|
return len; \
|
||||||
|
|
|
||||||
|
|
@ -2678,9 +2678,13 @@ static int flic_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
|
||||||
kvm_s390_clear_float_irqs(dev->kvm);
|
kvm_s390_clear_float_irqs(dev->kvm);
|
||||||
break;
|
break;
|
||||||
case KVM_DEV_FLIC_APF_ENABLE:
|
case KVM_DEV_FLIC_APF_ENABLE:
|
||||||
|
if (kvm_is_ucontrol(dev->kvm))
|
||||||
|
return -EINVAL;
|
||||||
dev->kvm->arch.gmap->pfault_enabled = 1;
|
dev->kvm->arch.gmap->pfault_enabled = 1;
|
||||||
break;
|
break;
|
||||||
case KVM_DEV_FLIC_APF_DISABLE_WAIT:
|
case KVM_DEV_FLIC_APF_DISABLE_WAIT:
|
||||||
|
if (kvm_is_ucontrol(dev->kvm))
|
||||||
|
return -EINVAL;
|
||||||
dev->kvm->arch.gmap->pfault_enabled = 0;
|
dev->kvm->arch.gmap->pfault_enabled = 0;
|
||||||
/*
|
/*
|
||||||
* Make sure no async faults are in transition when
|
* Make sure no async faults are in transition when
|
||||||
|
|
@ -2894,6 +2898,8 @@ int kvm_set_routing_entry(struct kvm *kvm,
|
||||||
switch (ue->type) {
|
switch (ue->type) {
|
||||||
/* we store the userspace addresses instead of the guest addresses */
|
/* we store the userspace addresses instead of the guest addresses */
|
||||||
case KVM_IRQ_ROUTING_S390_ADAPTER:
|
case KVM_IRQ_ROUTING_S390_ADAPTER:
|
||||||
|
if (kvm_is_ucontrol(kvm))
|
||||||
|
return -EINVAL;
|
||||||
e->set = set_adapter_int;
|
e->set = set_adapter_int;
|
||||||
uaddr = gmap_translate(kvm->arch.gmap, ue->u.adapter.summary_addr);
|
uaddr = gmap_translate(kvm->arch.gmap, ue->u.adapter.summary_addr);
|
||||||
if (uaddr == -EFAULT)
|
if (uaddr == -EFAULT)
|
||||||
|
|
|
||||||
|
|
@ -854,7 +854,7 @@ static int pin_blocks(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
|
||||||
static void unpin_scb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page,
|
static void unpin_scb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page,
|
||||||
gpa_t gpa)
|
gpa_t gpa)
|
||||||
{
|
{
|
||||||
hpa_t hpa = (hpa_t) vsie_page->scb_o;
|
hpa_t hpa = virt_to_phys(vsie_page->scb_o);
|
||||||
|
|
||||||
if (hpa)
|
if (hpa)
|
||||||
unpin_guest_page(vcpu->kvm, gpa, hpa);
|
unpin_guest_page(vcpu->kvm, gpa, hpa);
|
||||||
|
|
|
||||||
|
|
@ -429,6 +429,16 @@ static struct event_constraint intel_lnc_event_constraints[] = {
|
||||||
EVENT_CONSTRAINT_END
|
EVENT_CONSTRAINT_END
|
||||||
};
|
};
|
||||||
|
|
||||||
|
static struct extra_reg intel_lnc_extra_regs[] __read_mostly = {
|
||||||
|
INTEL_UEVENT_EXTRA_REG(0x012a, MSR_OFFCORE_RSP_0, 0xfffffffffffull, RSP_0),
|
||||||
|
INTEL_UEVENT_EXTRA_REG(0x012b, MSR_OFFCORE_RSP_1, 0xfffffffffffull, RSP_1),
|
||||||
|
INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x01cd),
|
||||||
|
INTEL_UEVENT_EXTRA_REG(0x02c6, MSR_PEBS_FRONTEND, 0x9, FE),
|
||||||
|
INTEL_UEVENT_EXTRA_REG(0x03c6, MSR_PEBS_FRONTEND, 0x7fff1f, FE),
|
||||||
|
INTEL_UEVENT_EXTRA_REG(0x40ad, MSR_PEBS_FRONTEND, 0xf, FE),
|
||||||
|
INTEL_UEVENT_EXTRA_REG(0x04c2, MSR_PEBS_FRONTEND, 0x8, FE),
|
||||||
|
EVENT_EXTRA_END
|
||||||
|
};
|
||||||
|
|
||||||
EVENT_ATTR_STR(mem-loads, mem_ld_nhm, "event=0x0b,umask=0x10,ldlat=3");
|
EVENT_ATTR_STR(mem-loads, mem_ld_nhm, "event=0x0b,umask=0x10,ldlat=3");
|
||||||
EVENT_ATTR_STR(mem-loads, mem_ld_snb, "event=0xcd,umask=0x1,ldlat=3");
|
EVENT_ATTR_STR(mem-loads, mem_ld_snb, "event=0xcd,umask=0x1,ldlat=3");
|
||||||
|
|
@ -6422,7 +6432,7 @@ static __always_inline void intel_pmu_init_lnc(struct pmu *pmu)
|
||||||
intel_pmu_init_glc(pmu);
|
intel_pmu_init_glc(pmu);
|
||||||
hybrid(pmu, event_constraints) = intel_lnc_event_constraints;
|
hybrid(pmu, event_constraints) = intel_lnc_event_constraints;
|
||||||
hybrid(pmu, pebs_constraints) = intel_lnc_pebs_event_constraints;
|
hybrid(pmu, pebs_constraints) = intel_lnc_pebs_event_constraints;
|
||||||
hybrid(pmu, extra_regs) = intel_rwc_extra_regs;
|
hybrid(pmu, extra_regs) = intel_lnc_extra_regs;
|
||||||
}
|
}
|
||||||
|
|
||||||
static __always_inline void intel_pmu_init_skt(struct pmu *pmu)
|
static __always_inline void intel_pmu_init_skt(struct pmu *pmu)
|
||||||
|
|
|
||||||
|
|
@ -2517,6 +2517,7 @@ void __init intel_ds_init(void)
|
||||||
x86_pmu.large_pebs_flags |= PERF_SAMPLE_TIME;
|
x86_pmu.large_pebs_flags |= PERF_SAMPLE_TIME;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
case 6:
|
||||||
case 5:
|
case 5:
|
||||||
x86_pmu.pebs_ept = 1;
|
x86_pmu.pebs_ept = 1;
|
||||||
fallthrough;
|
fallthrough;
|
||||||
|
|
|
||||||
|
|
@ -1910,6 +1910,7 @@ static const struct x86_cpu_id intel_uncore_match[] __initconst = {
|
||||||
X86_MATCH_VFM(INTEL_ATOM_GRACEMONT, &adl_uncore_init),
|
X86_MATCH_VFM(INTEL_ATOM_GRACEMONT, &adl_uncore_init),
|
||||||
X86_MATCH_VFM(INTEL_ATOM_CRESTMONT_X, &gnr_uncore_init),
|
X86_MATCH_VFM(INTEL_ATOM_CRESTMONT_X, &gnr_uncore_init),
|
||||||
X86_MATCH_VFM(INTEL_ATOM_CRESTMONT, &gnr_uncore_init),
|
X86_MATCH_VFM(INTEL_ATOM_CRESTMONT, &gnr_uncore_init),
|
||||||
|
X86_MATCH_VFM(INTEL_ATOM_DARKMONT_X, &gnr_uncore_init),
|
||||||
{},
|
{},
|
||||||
};
|
};
|
||||||
MODULE_DEVICE_TABLE(x86cpu, intel_uncore_match);
|
MODULE_DEVICE_TABLE(x86cpu, intel_uncore_match);
|
||||||
|
|
|
||||||
|
|
@ -452,6 +452,7 @@
|
||||||
#define X86_FEATURE_SME_COHERENT (19*32+10) /* AMD hardware-enforced cache coherency */
|
#define X86_FEATURE_SME_COHERENT (19*32+10) /* AMD hardware-enforced cache coherency */
|
||||||
#define X86_FEATURE_DEBUG_SWAP (19*32+14) /* "debug_swap" AMD SEV-ES full debug state swap support */
|
#define X86_FEATURE_DEBUG_SWAP (19*32+14) /* "debug_swap" AMD SEV-ES full debug state swap support */
|
||||||
#define X86_FEATURE_SVSM (19*32+28) /* "svsm" SVSM present */
|
#define X86_FEATURE_SVSM (19*32+28) /* "svsm" SVSM present */
|
||||||
|
#define X86_FEATURE_HV_INUSE_WR_ALLOWED (19*32+30) /* Allow Write to in-use hypervisor-owned pages */
|
||||||
|
|
||||||
/* AMD-defined Extended Feature 2 EAX, CPUID level 0x80000021 (EAX), word 20 */
|
/* AMD-defined Extended Feature 2 EAX, CPUID level 0x80000021 (EAX), word 20 */
|
||||||
#define X86_FEATURE_NO_NESTED_DATA_BP (20*32+ 0) /* No Nested Data Breakpoints */
|
#define X86_FEATURE_NO_NESTED_DATA_BP (20*32+ 0) /* No Nested Data Breakpoints */
|
||||||
|
|
|
||||||
|
|
@ -230,6 +230,8 @@ static inline unsigned long long l1tf_pfn_limit(void)
|
||||||
return BIT_ULL(boot_cpu_data.x86_cache_bits - 1 - PAGE_SHIFT);
|
return BIT_ULL(boot_cpu_data.x86_cache_bits - 1 - PAGE_SHIFT);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void init_cpu_devs(void);
|
||||||
|
void get_cpu_vendor(struct cpuinfo_x86 *c);
|
||||||
extern void early_cpu_init(void);
|
extern void early_cpu_init(void);
|
||||||
extern void identify_secondary_cpu(struct cpuinfo_x86 *);
|
extern void identify_secondary_cpu(struct cpuinfo_x86 *);
|
||||||
extern void print_cpu_info(struct cpuinfo_x86 *);
|
extern void print_cpu_info(struct cpuinfo_x86 *);
|
||||||
|
|
|
||||||
|
|
@ -65,4 +65,19 @@
|
||||||
|
|
||||||
extern bool __static_call_fixup(void *tramp, u8 op, void *dest);
|
extern bool __static_call_fixup(void *tramp, u8 op, void *dest);
|
||||||
|
|
||||||
|
extern void __static_call_update_early(void *tramp, void *func);
|
||||||
|
|
||||||
|
#define static_call_update_early(name, _func) \
|
||||||
|
({ \
|
||||||
|
typeof(&STATIC_CALL_TRAMP(name)) __F = (_func); \
|
||||||
|
if (static_call_initialized) { \
|
||||||
|
__static_call_update(&STATIC_CALL_KEY(name), \
|
||||||
|
STATIC_CALL_TRAMP_ADDR(name), __F);\
|
||||||
|
} else { \
|
||||||
|
WRITE_ONCE(STATIC_CALL_KEY(name).func, _func); \
|
||||||
|
__static_call_update_early(STATIC_CALL_TRAMP_ADDR(name),\
|
||||||
|
__F); \
|
||||||
|
} \
|
||||||
|
})
|
||||||
|
|
||||||
#endif /* _ASM_STATIC_CALL_H */
|
#endif /* _ASM_STATIC_CALL_H */
|
||||||
|
|
|
||||||
|
|
@ -8,7 +8,7 @@
|
||||||
#include <asm/special_insns.h>
|
#include <asm/special_insns.h>
|
||||||
|
|
||||||
#ifdef CONFIG_X86_32
|
#ifdef CONFIG_X86_32
|
||||||
static inline void iret_to_self(void)
|
static __always_inline void iret_to_self(void)
|
||||||
{
|
{
|
||||||
asm volatile (
|
asm volatile (
|
||||||
"pushfl\n\t"
|
"pushfl\n\t"
|
||||||
|
|
@ -19,7 +19,7 @@ static inline void iret_to_self(void)
|
||||||
: ASM_CALL_CONSTRAINT : : "memory");
|
: ASM_CALL_CONSTRAINT : : "memory");
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
static inline void iret_to_self(void)
|
static __always_inline void iret_to_self(void)
|
||||||
{
|
{
|
||||||
unsigned int tmp;
|
unsigned int tmp;
|
||||||
|
|
||||||
|
|
@ -55,7 +55,7 @@ static inline void iret_to_self(void)
|
||||||
* Like all of Linux's memory ordering operations, this is a
|
* Like all of Linux's memory ordering operations, this is a
|
||||||
* compiler barrier as well.
|
* compiler barrier as well.
|
||||||
*/
|
*/
|
||||||
static inline void sync_core(void)
|
static __always_inline void sync_core(void)
|
||||||
{
|
{
|
||||||
/*
|
/*
|
||||||
* The SERIALIZE instruction is the most straightforward way to
|
* The SERIALIZE instruction is the most straightforward way to
|
||||||
|
|
|
||||||
|
|
@ -39,9 +39,11 @@
|
||||||
#include <linux/string.h>
|
#include <linux/string.h>
|
||||||
#include <linux/types.h>
|
#include <linux/types.h>
|
||||||
#include <linux/pgtable.h>
|
#include <linux/pgtable.h>
|
||||||
|
#include <linux/instrumentation.h>
|
||||||
|
|
||||||
#include <trace/events/xen.h>
|
#include <trace/events/xen.h>
|
||||||
|
|
||||||
|
#include <asm/alternative.h>
|
||||||
#include <asm/page.h>
|
#include <asm/page.h>
|
||||||
#include <asm/smap.h>
|
#include <asm/smap.h>
|
||||||
#include <asm/nospec-branch.h>
|
#include <asm/nospec-branch.h>
|
||||||
|
|
@ -86,11 +88,20 @@ struct xen_dm_op_buf;
|
||||||
* there aren't more than 5 arguments...)
|
* there aren't more than 5 arguments...)
|
||||||
*/
|
*/
|
||||||
|
|
||||||
extern struct { char _entry[32]; } hypercall_page[];
|
void xen_hypercall_func(void);
|
||||||
|
DECLARE_STATIC_CALL(xen_hypercall, xen_hypercall_func);
|
||||||
|
|
||||||
#define __HYPERCALL "call hypercall_page+%c[offset]"
|
#ifdef MODULE
|
||||||
#define __HYPERCALL_ENTRY(x) \
|
#define __ADDRESSABLE_xen_hypercall
|
||||||
[offset] "i" (__HYPERVISOR_##x * sizeof(hypercall_page[0]))
|
#else
|
||||||
|
#define __ADDRESSABLE_xen_hypercall __ADDRESSABLE_ASM_STR(__SCK__xen_hypercall)
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#define __HYPERCALL \
|
||||||
|
__ADDRESSABLE_xen_hypercall \
|
||||||
|
"call __SCT__xen_hypercall"
|
||||||
|
|
||||||
|
#define __HYPERCALL_ENTRY(x) "a" (x)
|
||||||
|
|
||||||
#ifdef CONFIG_X86_32
|
#ifdef CONFIG_X86_32
|
||||||
#define __HYPERCALL_RETREG "eax"
|
#define __HYPERCALL_RETREG "eax"
|
||||||
|
|
@ -148,7 +159,7 @@ extern struct { char _entry[32]; } hypercall_page[];
|
||||||
__HYPERCALL_0ARG(); \
|
__HYPERCALL_0ARG(); \
|
||||||
asm volatile (__HYPERCALL \
|
asm volatile (__HYPERCALL \
|
||||||
: __HYPERCALL_0PARAM \
|
: __HYPERCALL_0PARAM \
|
||||||
: __HYPERCALL_ENTRY(name) \
|
: __HYPERCALL_ENTRY(__HYPERVISOR_ ## name) \
|
||||||
: __HYPERCALL_CLOBBER0); \
|
: __HYPERCALL_CLOBBER0); \
|
||||||
(type)__res; \
|
(type)__res; \
|
||||||
})
|
})
|
||||||
|
|
@ -159,7 +170,7 @@ extern struct { char _entry[32]; } hypercall_page[];
|
||||||
__HYPERCALL_1ARG(a1); \
|
__HYPERCALL_1ARG(a1); \
|
||||||
asm volatile (__HYPERCALL \
|
asm volatile (__HYPERCALL \
|
||||||
: __HYPERCALL_1PARAM \
|
: __HYPERCALL_1PARAM \
|
||||||
: __HYPERCALL_ENTRY(name) \
|
: __HYPERCALL_ENTRY(__HYPERVISOR_ ## name) \
|
||||||
: __HYPERCALL_CLOBBER1); \
|
: __HYPERCALL_CLOBBER1); \
|
||||||
(type)__res; \
|
(type)__res; \
|
||||||
})
|
})
|
||||||
|
|
@ -170,7 +181,7 @@ extern struct { char _entry[32]; } hypercall_page[];
|
||||||
__HYPERCALL_2ARG(a1, a2); \
|
__HYPERCALL_2ARG(a1, a2); \
|
||||||
asm volatile (__HYPERCALL \
|
asm volatile (__HYPERCALL \
|
||||||
: __HYPERCALL_2PARAM \
|
: __HYPERCALL_2PARAM \
|
||||||
: __HYPERCALL_ENTRY(name) \
|
: __HYPERCALL_ENTRY(__HYPERVISOR_ ## name) \
|
||||||
: __HYPERCALL_CLOBBER2); \
|
: __HYPERCALL_CLOBBER2); \
|
||||||
(type)__res; \
|
(type)__res; \
|
||||||
})
|
})
|
||||||
|
|
@ -181,7 +192,7 @@ extern struct { char _entry[32]; } hypercall_page[];
|
||||||
__HYPERCALL_3ARG(a1, a2, a3); \
|
__HYPERCALL_3ARG(a1, a2, a3); \
|
||||||
asm volatile (__HYPERCALL \
|
asm volatile (__HYPERCALL \
|
||||||
: __HYPERCALL_3PARAM \
|
: __HYPERCALL_3PARAM \
|
||||||
: __HYPERCALL_ENTRY(name) \
|
: __HYPERCALL_ENTRY(__HYPERVISOR_ ## name) \
|
||||||
: __HYPERCALL_CLOBBER3); \
|
: __HYPERCALL_CLOBBER3); \
|
||||||
(type)__res; \
|
(type)__res; \
|
||||||
})
|
})
|
||||||
|
|
@ -192,7 +203,7 @@ extern struct { char _entry[32]; } hypercall_page[];
|
||||||
__HYPERCALL_4ARG(a1, a2, a3, a4); \
|
__HYPERCALL_4ARG(a1, a2, a3, a4); \
|
||||||
asm volatile (__HYPERCALL \
|
asm volatile (__HYPERCALL \
|
||||||
: __HYPERCALL_4PARAM \
|
: __HYPERCALL_4PARAM \
|
||||||
: __HYPERCALL_ENTRY(name) \
|
: __HYPERCALL_ENTRY(__HYPERVISOR_ ## name) \
|
||||||
: __HYPERCALL_CLOBBER4); \
|
: __HYPERCALL_CLOBBER4); \
|
||||||
(type)__res; \
|
(type)__res; \
|
||||||
})
|
})
|
||||||
|
|
@ -206,12 +217,9 @@ xen_single_call(unsigned int call,
|
||||||
__HYPERCALL_DECLS;
|
__HYPERCALL_DECLS;
|
||||||
__HYPERCALL_5ARG(a1, a2, a3, a4, a5);
|
__HYPERCALL_5ARG(a1, a2, a3, a4, a5);
|
||||||
|
|
||||||
if (call >= PAGE_SIZE / sizeof(hypercall_page[0]))
|
asm volatile(__HYPERCALL
|
||||||
return -EINVAL;
|
|
||||||
|
|
||||||
asm volatile(CALL_NOSPEC
|
|
||||||
: __HYPERCALL_5PARAM
|
: __HYPERCALL_5PARAM
|
||||||
: [thunk_target] "a" (&hypercall_page[call])
|
: __HYPERCALL_ENTRY(call)
|
||||||
: __HYPERCALL_CLOBBER5);
|
: __HYPERCALL_CLOBBER5);
|
||||||
|
|
||||||
return (long)__res;
|
return (long)__res;
|
||||||
|
|
|
||||||
|
|
@ -142,11 +142,6 @@ static bool skip_addr(void *dest)
|
||||||
if (dest >= (void *)relocate_kernel &&
|
if (dest >= (void *)relocate_kernel &&
|
||||||
dest < (void*)relocate_kernel + KEXEC_CONTROL_CODE_MAX_SIZE)
|
dest < (void*)relocate_kernel + KEXEC_CONTROL_CODE_MAX_SIZE)
|
||||||
return true;
|
return true;
|
||||||
#endif
|
|
||||||
#ifdef CONFIG_XEN
|
|
||||||
if (dest >= (void *)hypercall_page &&
|
|
||||||
dest < (void*)hypercall_page + PAGE_SIZE)
|
|
||||||
return true;
|
|
||||||
#endif
|
#endif
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -81,6 +81,34 @@ static void do_user_cp_fault(struct pt_regs *regs, unsigned long error_code)
|
||||||
|
|
||||||
static __ro_after_init bool ibt_fatal = true;
|
static __ro_after_init bool ibt_fatal = true;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* By definition, all missing-ENDBRANCH #CPs are a result of WFE && !ENDBR.
|
||||||
|
*
|
||||||
|
* For the kernel IBT no ENDBR selftest where #CPs are deliberately triggered,
|
||||||
|
* the WFE state of the interrupted context needs to be cleared to let execution
|
||||||
|
* continue. Otherwise when the CPU resumes from the instruction that just
|
||||||
|
* caused the previous #CP, another missing-ENDBRANCH #CP is raised and the CPU
|
||||||
|
* enters a dead loop.
|
||||||
|
*
|
||||||
|
* This is not a problem with IDT because it doesn't preserve WFE and IRET doesn't
|
||||||
|
* set WFE. But FRED provides space on the entry stack (in an expanded CS area)
|
||||||
|
* to save and restore the WFE state, thus the WFE state is no longer clobbered,
|
||||||
|
* so software must clear it.
|
||||||
|
*/
|
||||||
|
static void ibt_clear_fred_wfe(struct pt_regs *regs)
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* No need to do any FRED checks.
|
||||||
|
*
|
||||||
|
* For IDT event delivery, the high-order 48 bits of CS are pushed
|
||||||
|
* as 0s into the stack, and later IRET ignores these bits.
|
||||||
|
*
|
||||||
|
* For FRED, a test to check if fred_cs.wfe is set would be dropped
|
||||||
|
* by compilers.
|
||||||
|
*/
|
||||||
|
regs->fred_cs.wfe = 0;
|
||||||
|
}
|
||||||
|
|
||||||
static void do_kernel_cp_fault(struct pt_regs *regs, unsigned long error_code)
|
static void do_kernel_cp_fault(struct pt_regs *regs, unsigned long error_code)
|
||||||
{
|
{
|
||||||
if ((error_code & CP_EC) != CP_ENDBR) {
|
if ((error_code & CP_EC) != CP_ENDBR) {
|
||||||
|
|
@ -90,6 +118,7 @@ static void do_kernel_cp_fault(struct pt_regs *regs, unsigned long error_code)
|
||||||
|
|
||||||
if (unlikely(regs->ip == (unsigned long)&ibt_selftest_noendbr)) {
|
if (unlikely(regs->ip == (unsigned long)&ibt_selftest_noendbr)) {
|
||||||
regs->ax = 0;
|
regs->ax = 0;
|
||||||
|
ibt_clear_fred_wfe(regs);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -97,6 +126,7 @@ static void do_kernel_cp_fault(struct pt_regs *regs, unsigned long error_code)
|
||||||
if (!ibt_fatal) {
|
if (!ibt_fatal) {
|
||||||
printk(KERN_DEFAULT CUT_HERE);
|
printk(KERN_DEFAULT CUT_HERE);
|
||||||
__warn(__FILE__, __LINE__, (void *)regs->ip, TAINT_WARN, regs, NULL);
|
__warn(__FILE__, __LINE__, (void *)regs->ip, TAINT_WARN, regs, NULL);
|
||||||
|
ibt_clear_fred_wfe(regs);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
BUG();
|
BUG();
|
||||||
|
|
|
||||||
|
|
@ -867,7 +867,7 @@ static void cpu_detect_tlb(struct cpuinfo_x86 *c)
|
||||||
tlb_lld_4m[ENTRIES], tlb_lld_1g[ENTRIES]);
|
tlb_lld_4m[ENTRIES], tlb_lld_1g[ENTRIES]);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void get_cpu_vendor(struct cpuinfo_x86 *c)
|
void get_cpu_vendor(struct cpuinfo_x86 *c)
|
||||||
{
|
{
|
||||||
char *v = c->x86_vendor_id;
|
char *v = c->x86_vendor_id;
|
||||||
int i;
|
int i;
|
||||||
|
|
@ -1649,15 +1649,11 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c)
|
||||||
detect_nopl();
|
detect_nopl();
|
||||||
}
|
}
|
||||||
|
|
||||||
void __init early_cpu_init(void)
|
void __init init_cpu_devs(void)
|
||||||
{
|
{
|
||||||
const struct cpu_dev *const *cdev;
|
const struct cpu_dev *const *cdev;
|
||||||
int count = 0;
|
int count = 0;
|
||||||
|
|
||||||
#ifdef CONFIG_PROCESSOR_SELECT
|
|
||||||
pr_info("KERNEL supported cpus:\n");
|
|
||||||
#endif
|
|
||||||
|
|
||||||
for (cdev = __x86_cpu_dev_start; cdev < __x86_cpu_dev_end; cdev++) {
|
for (cdev = __x86_cpu_dev_start; cdev < __x86_cpu_dev_end; cdev++) {
|
||||||
const struct cpu_dev *cpudev = *cdev;
|
const struct cpu_dev *cpudev = *cdev;
|
||||||
|
|
||||||
|
|
@ -1665,20 +1661,30 @@ void __init early_cpu_init(void)
|
||||||
break;
|
break;
|
||||||
cpu_devs[count] = cpudev;
|
cpu_devs[count] = cpudev;
|
||||||
count++;
|
count++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void __init early_cpu_init(void)
|
||||||
|
{
|
||||||
|
#ifdef CONFIG_PROCESSOR_SELECT
|
||||||
|
unsigned int i, j;
|
||||||
|
|
||||||
|
pr_info("KERNEL supported cpus:\n");
|
||||||
|
#endif
|
||||||
|
|
||||||
|
init_cpu_devs();
|
||||||
|
|
||||||
#ifdef CONFIG_PROCESSOR_SELECT
|
#ifdef CONFIG_PROCESSOR_SELECT
|
||||||
{
|
for (i = 0; i < X86_VENDOR_NUM && cpu_devs[i]; i++) {
|
||||||
unsigned int j;
|
|
||||||
|
|
||||||
for (j = 0; j < 2; j++) {
|
for (j = 0; j < 2; j++) {
|
||||||
if (!cpudev->c_ident[j])
|
if (!cpu_devs[i]->c_ident[j])
|
||||||
continue;
|
continue;
|
||||||
pr_info(" %s %s\n", cpudev->c_vendor,
|
pr_info(" %s %s\n", cpu_devs[i]->c_vendor,
|
||||||
cpudev->c_ident[j]);
|
cpu_devs[i]->c_ident[j]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
}
|
|
||||||
early_identify_cpu(&boot_cpu_data);
|
early_identify_cpu(&boot_cpu_data);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -223,6 +223,63 @@ static void hv_machine_crash_shutdown(struct pt_regs *regs)
|
||||||
hyperv_cleanup();
|
hyperv_cleanup();
|
||||||
}
|
}
|
||||||
#endif /* CONFIG_CRASH_DUMP */
|
#endif /* CONFIG_CRASH_DUMP */
|
||||||
|
|
||||||
|
static u64 hv_ref_counter_at_suspend;
|
||||||
|
static void (*old_save_sched_clock_state)(void);
|
||||||
|
static void (*old_restore_sched_clock_state)(void);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Hyper-V clock counter resets during hibernation. Save and restore clock
|
||||||
|
* offset during suspend/resume, while also considering the time passed
|
||||||
|
* before suspend. This is to make sure that sched_clock using hv tsc page
|
||||||
|
* based clocksource, proceeds from where it left off during suspend and
|
||||||
|
* it shows correct time for the timestamps of kernel messages after resume.
|
||||||
|
*/
|
||||||
|
static void save_hv_clock_tsc_state(void)
|
||||||
|
{
|
||||||
|
hv_ref_counter_at_suspend = hv_read_reference_counter();
|
||||||
|
}
|
||||||
|
|
||||||
|
static void restore_hv_clock_tsc_state(void)
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* Adjust the offsets used by hv tsc clocksource to
|
||||||
|
* account for the time spent before hibernation.
|
||||||
|
* adjusted value = reference counter (time) at suspend
|
||||||
|
* - reference counter (time) now.
|
||||||
|
*/
|
||||||
|
hv_adj_sched_clock_offset(hv_ref_counter_at_suspend - hv_read_reference_counter());
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Functions to override save_sched_clock_state and restore_sched_clock_state
|
||||||
|
* functions of x86_platform. The Hyper-V clock counter is reset during
|
||||||
|
* suspend-resume and the offset used to measure time needs to be
|
||||||
|
* corrected, post resume.
|
||||||
|
*/
|
||||||
|
static void hv_save_sched_clock_state(void)
|
||||||
|
{
|
||||||
|
old_save_sched_clock_state();
|
||||||
|
save_hv_clock_tsc_state();
|
||||||
|
}
|
||||||
|
|
||||||
|
static void hv_restore_sched_clock_state(void)
|
||||||
|
{
|
||||||
|
restore_hv_clock_tsc_state();
|
||||||
|
old_restore_sched_clock_state();
|
||||||
|
}
|
||||||
|
|
||||||
|
static void __init x86_setup_ops_for_tsc_pg_clock(void)
|
||||||
|
{
|
||||||
|
if (!(ms_hyperv.features & HV_MSR_REFERENCE_TSC_AVAILABLE))
|
||||||
|
return;
|
||||||
|
|
||||||
|
old_save_sched_clock_state = x86_platform.save_sched_clock_state;
|
||||||
|
x86_platform.save_sched_clock_state = hv_save_sched_clock_state;
|
||||||
|
|
||||||
|
old_restore_sched_clock_state = x86_platform.restore_sched_clock_state;
|
||||||
|
x86_platform.restore_sched_clock_state = hv_restore_sched_clock_state;
|
||||||
|
}
|
||||||
#endif /* CONFIG_HYPERV */
|
#endif /* CONFIG_HYPERV */
|
||||||
|
|
||||||
static uint32_t __init ms_hyperv_platform(void)
|
static uint32_t __init ms_hyperv_platform(void)
|
||||||
|
|
@ -579,6 +636,7 @@ static void __init ms_hyperv_init_platform(void)
|
||||||
|
|
||||||
/* Register Hyper-V specific clocksource */
|
/* Register Hyper-V specific clocksource */
|
||||||
hv_init_clocksource();
|
hv_init_clocksource();
|
||||||
|
x86_setup_ops_for_tsc_pg_clock();
|
||||||
hv_vtl_init_platform();
|
hv_vtl_init_platform();
|
||||||
#endif
|
#endif
|
||||||
/*
|
/*
|
||||||
|
|
|
||||||
|
|
@ -190,7 +190,8 @@ int ssp_get(struct task_struct *target, const struct user_regset *regset,
|
||||||
struct fpu *fpu = &target->thread.fpu;
|
struct fpu *fpu = &target->thread.fpu;
|
||||||
struct cet_user_state *cetregs;
|
struct cet_user_state *cetregs;
|
||||||
|
|
||||||
if (!cpu_feature_enabled(X86_FEATURE_USER_SHSTK))
|
if (!cpu_feature_enabled(X86_FEATURE_USER_SHSTK) ||
|
||||||
|
!ssp_active(target, regset))
|
||||||
return -ENODEV;
|
return -ENODEV;
|
||||||
|
|
||||||
sync_fpstate(fpu);
|
sync_fpstate(fpu);
|
||||||
|
|
|
||||||
|
|
@ -172,6 +172,14 @@ void arch_static_call_transform(void *site, void *tramp, void *func, bool tail)
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(arch_static_call_transform);
|
EXPORT_SYMBOL_GPL(arch_static_call_transform);
|
||||||
|
|
||||||
|
noinstr void __static_call_update_early(void *tramp, void *func)
|
||||||
|
{
|
||||||
|
BUG_ON(system_state != SYSTEM_BOOTING);
|
||||||
|
BUG_ON(static_call_initialized);
|
||||||
|
__text_gen_insn(tramp, JMP32_INSN_OPCODE, tramp, func, JMP32_INSN_SIZE);
|
||||||
|
sync_core();
|
||||||
|
}
|
||||||
|
|
||||||
#ifdef CONFIG_MITIGATION_RETHUNK
|
#ifdef CONFIG_MITIGATION_RETHUNK
|
||||||
/*
|
/*
|
||||||
* This is called by apply_returns() to fix up static call trampolines,
|
* This is called by apply_returns() to fix up static call trampolines,
|
||||||
|
|
|
||||||
|
|
@ -519,14 +519,10 @@ INIT_PER_CPU(irq_stack_backing_store);
|
||||||
* linker will never mark as relocatable. (Using just ABSOLUTE() is not
|
* linker will never mark as relocatable. (Using just ABSOLUTE() is not
|
||||||
* sufficient for that).
|
* sufficient for that).
|
||||||
*/
|
*/
|
||||||
#ifdef CONFIG_XEN
|
|
||||||
#ifdef CONFIG_XEN_PV
|
#ifdef CONFIG_XEN_PV
|
||||||
xen_elfnote_entry_value =
|
xen_elfnote_entry_value =
|
||||||
ABSOLUTE(xen_elfnote_entry) + ABSOLUTE(startup_xen);
|
ABSOLUTE(xen_elfnote_entry) + ABSOLUTE(startup_xen);
|
||||||
#endif
|
#endif
|
||||||
xen_elfnote_hypercall_page_value =
|
|
||||||
ABSOLUTE(xen_elfnote_hypercall_page) + ABSOLUTE(hypercall_page);
|
|
||||||
#endif
|
|
||||||
#ifdef CONFIG_PVH
|
#ifdef CONFIG_PVH
|
||||||
xen_elfnote_phys32_entry_value =
|
xen_elfnote_phys32_entry_value =
|
||||||
ABSOLUTE(xen_elfnote_phys32_entry) + ABSOLUTE(pvh_start_xen - LOAD_OFFSET);
|
ABSOLUTE(xen_elfnote_phys32_entry) + ABSOLUTE(pvh_start_xen - LOAD_OFFSET);
|
||||||
|
|
|
||||||
|
|
@ -3364,18 +3364,6 @@ static bool fast_pf_fix_direct_spte(struct kvm_vcpu *vcpu,
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool is_access_allowed(struct kvm_page_fault *fault, u64 spte)
|
|
||||||
{
|
|
||||||
if (fault->exec)
|
|
||||||
return is_executable_pte(spte);
|
|
||||||
|
|
||||||
if (fault->write)
|
|
||||||
return is_writable_pte(spte);
|
|
||||||
|
|
||||||
/* Fault was on Read access */
|
|
||||||
return spte & PT_PRESENT_MASK;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Returns the last level spte pointer of the shadow page walk for the given
|
* Returns the last level spte pointer of the shadow page walk for the given
|
||||||
* gpa, and sets *spte to the spte value. This spte may be non-preset. If no
|
* gpa, and sets *spte to the spte value. This spte may be non-preset. If no
|
||||||
|
|
|
||||||
|
|
@ -461,6 +461,23 @@ static inline bool is_mmu_writable_spte(u64 spte)
|
||||||
return spte & shadow_mmu_writable_mask;
|
return spte & shadow_mmu_writable_mask;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Returns true if the access indicated by @fault is allowed by the existing
|
||||||
|
* SPTE protections. Note, the caller is responsible for checking that the
|
||||||
|
* SPTE is a shadow-present, leaf SPTE (either before or after).
|
||||||
|
*/
|
||||||
|
static inline bool is_access_allowed(struct kvm_page_fault *fault, u64 spte)
|
||||||
|
{
|
||||||
|
if (fault->exec)
|
||||||
|
return is_executable_pte(spte);
|
||||||
|
|
||||||
|
if (fault->write)
|
||||||
|
return is_writable_pte(spte);
|
||||||
|
|
||||||
|
/* Fault was on Read access */
|
||||||
|
return spte & PT_PRESENT_MASK;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If the MMU-writable flag is cleared, i.e. the SPTE is write-protected for
|
* If the MMU-writable flag is cleared, i.e. the SPTE is write-protected for
|
||||||
* write-tracking, remote TLBs must be flushed, even if the SPTE was read-only,
|
* write-tracking, remote TLBs must be flushed, even if the SPTE was read-only,
|
||||||
|
|
|
||||||
|
|
@ -985,6 +985,11 @@ static int tdp_mmu_map_handle_target_level(struct kvm_vcpu *vcpu,
|
||||||
if (fault->prefetch && is_shadow_present_pte(iter->old_spte))
|
if (fault->prefetch && is_shadow_present_pte(iter->old_spte))
|
||||||
return RET_PF_SPURIOUS;
|
return RET_PF_SPURIOUS;
|
||||||
|
|
||||||
|
if (is_shadow_present_pte(iter->old_spte) &&
|
||||||
|
is_access_allowed(fault, iter->old_spte) &&
|
||||||
|
is_last_spte(iter->old_spte, iter->level))
|
||||||
|
return RET_PF_SPURIOUS;
|
||||||
|
|
||||||
if (unlikely(!fault->slot))
|
if (unlikely(!fault->slot))
|
||||||
new_spte = make_mmio_spte(vcpu, iter->gfn, ACC_ALL);
|
new_spte = make_mmio_spte(vcpu, iter->gfn, ACC_ALL);
|
||||||
else
|
else
|
||||||
|
|
|
||||||
|
|
@ -1199,6 +1199,12 @@ bool avic_hardware_setup(void)
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (cc_platform_has(CC_ATTR_HOST_SEV_SNP) &&
|
||||||
|
!boot_cpu_has(X86_FEATURE_HV_INUSE_WR_ALLOWED)) {
|
||||||
|
pr_warn("AVIC disabled: missing HvInUseWrAllowed on SNP-enabled system\n");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
if (boot_cpu_has(X86_FEATURE_AVIC)) {
|
if (boot_cpu_has(X86_FEATURE_AVIC)) {
|
||||||
pr_info("AVIC enabled\n");
|
pr_info("AVIC enabled\n");
|
||||||
} else if (force_avic) {
|
} else if (force_avic) {
|
||||||
|
|
|
||||||
|
|
@ -3201,15 +3201,6 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
|
||||||
if (data & ~supported_de_cfg)
|
if (data & ~supported_de_cfg)
|
||||||
return 1;
|
return 1;
|
||||||
|
|
||||||
/*
|
|
||||||
* Don't let the guest change the host-programmed value. The
|
|
||||||
* MSR is very model specific, i.e. contains multiple bits that
|
|
||||||
* are completely unknown to KVM, and the one bit known to KVM
|
|
||||||
* is simply a reflection of hardware capabilities.
|
|
||||||
*/
|
|
||||||
if (!msr->host_initiated && data != svm->msr_decfg)
|
|
||||||
return 1;
|
|
||||||
|
|
||||||
svm->msr_decfg = data;
|
svm->msr_decfg = data;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -2,7 +2,7 @@
|
||||||
#ifndef __KVM_X86_VMX_POSTED_INTR_H
|
#ifndef __KVM_X86_VMX_POSTED_INTR_H
|
||||||
#define __KVM_X86_VMX_POSTED_INTR_H
|
#define __KVM_X86_VMX_POSTED_INTR_H
|
||||||
|
|
||||||
#include <linux/find.h>
|
#include <linux/bitmap.h>
|
||||||
#include <asm/posted_intr.h>
|
#include <asm/posted_intr.h>
|
||||||
|
|
||||||
void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu);
|
void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu);
|
||||||
|
|
|
||||||
|
|
@ -9976,7 +9976,7 @@ static int complete_hypercall_exit(struct kvm_vcpu *vcpu)
|
||||||
{
|
{
|
||||||
u64 ret = vcpu->run->hypercall.ret;
|
u64 ret = vcpu->run->hypercall.ret;
|
||||||
|
|
||||||
if (!is_64_bit_mode(vcpu))
|
if (!is_64_bit_hypercall(vcpu))
|
||||||
ret = (u32)ret;
|
ret = (u32)ret;
|
||||||
kvm_rax_write(vcpu, ret);
|
kvm_rax_write(vcpu, ret);
|
||||||
++vcpu->stat.hypercalls;
|
++vcpu->stat.hypercalls;
|
||||||
|
|
@ -12724,6 +12724,13 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
|
||||||
kvm_hv_init_vm(kvm);
|
kvm_hv_init_vm(kvm);
|
||||||
kvm_xen_init_vm(kvm);
|
kvm_xen_init_vm(kvm);
|
||||||
|
|
||||||
|
if (ignore_msrs && !report_ignored_msrs) {
|
||||||
|
pr_warn_once("Running KVM with ignore_msrs=1 and report_ignored_msrs=0 is not a\n"
|
||||||
|
"a supported configuration. Lying to the guest about the existence of MSRs\n"
|
||||||
|
"may cause the guest operating system to hang or produce errors. If a guest\n"
|
||||||
|
"does not run without ignore_msrs=1, please report it to kvm@vger.kernel.org.\n");
|
||||||
|
}
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
out_uninit_mmu:
|
out_uninit_mmu:
|
||||||
|
|
|
||||||
|
|
@ -2,6 +2,7 @@
|
||||||
|
|
||||||
#include <linux/console.h>
|
#include <linux/console.h>
|
||||||
#include <linux/cpu.h>
|
#include <linux/cpu.h>
|
||||||
|
#include <linux/instrumentation.h>
|
||||||
#include <linux/kexec.h>
|
#include <linux/kexec.h>
|
||||||
#include <linux/memblock.h>
|
#include <linux/memblock.h>
|
||||||
#include <linux/slab.h>
|
#include <linux/slab.h>
|
||||||
|
|
@ -21,7 +22,8 @@
|
||||||
|
|
||||||
#include "xen-ops.h"
|
#include "xen-ops.h"
|
||||||
|
|
||||||
EXPORT_SYMBOL_GPL(hypercall_page);
|
DEFINE_STATIC_CALL(xen_hypercall, xen_hypercall_hvm);
|
||||||
|
EXPORT_STATIC_CALL_TRAMP(xen_hypercall);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Pointer to the xen_vcpu_info structure or
|
* Pointer to the xen_vcpu_info structure or
|
||||||
|
|
@ -68,6 +70,67 @@ EXPORT_SYMBOL(xen_start_flags);
|
||||||
*/
|
*/
|
||||||
struct shared_info *HYPERVISOR_shared_info = &xen_dummy_shared_info;
|
struct shared_info *HYPERVISOR_shared_info = &xen_dummy_shared_info;
|
||||||
|
|
||||||
|
static __ref void xen_get_vendor(void)
|
||||||
|
{
|
||||||
|
init_cpu_devs();
|
||||||
|
cpu_detect(&boot_cpu_data);
|
||||||
|
get_cpu_vendor(&boot_cpu_data);
|
||||||
|
}
|
||||||
|
|
||||||
|
void xen_hypercall_setfunc(void)
|
||||||
|
{
|
||||||
|
if (static_call_query(xen_hypercall) != xen_hypercall_hvm)
|
||||||
|
return;
|
||||||
|
|
||||||
|
if ((boot_cpu_data.x86_vendor == X86_VENDOR_AMD ||
|
||||||
|
boot_cpu_data.x86_vendor == X86_VENDOR_HYGON))
|
||||||
|
static_call_update(xen_hypercall, xen_hypercall_amd);
|
||||||
|
else
|
||||||
|
static_call_update(xen_hypercall, xen_hypercall_intel);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Evaluate processor vendor in order to select the correct hypercall
|
||||||
|
* function for HVM/PVH guests.
|
||||||
|
* Might be called very early in boot before vendor has been set by
|
||||||
|
* early_cpu_init().
|
||||||
|
*/
|
||||||
|
noinstr void *__xen_hypercall_setfunc(void)
|
||||||
|
{
|
||||||
|
void (*func)(void);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Xen is supported only on CPUs with CPUID, so testing for
|
||||||
|
* X86_FEATURE_CPUID is a test for early_cpu_init() having been
|
||||||
|
* run.
|
||||||
|
*
|
||||||
|
* Note that __xen_hypercall_setfunc() is noinstr only due to a nasty
|
||||||
|
* dependency chain: it is being called via the xen_hypercall static
|
||||||
|
* call when running as a PVH or HVM guest. Hypercalls need to be
|
||||||
|
* noinstr due to PV guests using hypercalls in noinstr code. So we
|
||||||
|
* can safely tag the function body as "instrumentation ok", since
|
||||||
|
* the PV guest requirement is not of interest here (xen_get_vendor()
|
||||||
|
* calls noinstr functions, and static_call_update_early() might do
|
||||||
|
* so, too).
|
||||||
|
*/
|
||||||
|
instrumentation_begin();
|
||||||
|
|
||||||
|
if (!boot_cpu_has(X86_FEATURE_CPUID))
|
||||||
|
xen_get_vendor();
|
||||||
|
|
||||||
|
if ((boot_cpu_data.x86_vendor == X86_VENDOR_AMD ||
|
||||||
|
boot_cpu_data.x86_vendor == X86_VENDOR_HYGON))
|
||||||
|
func = xen_hypercall_amd;
|
||||||
|
else
|
||||||
|
func = xen_hypercall_intel;
|
||||||
|
|
||||||
|
static_call_update_early(xen_hypercall, func);
|
||||||
|
|
||||||
|
instrumentation_end();
|
||||||
|
|
||||||
|
return func;
|
||||||
|
}
|
||||||
|
|
||||||
static int xen_cpu_up_online(unsigned int cpu)
|
static int xen_cpu_up_online(unsigned int cpu)
|
||||||
{
|
{
|
||||||
xen_init_lock_cpu(cpu);
|
xen_init_lock_cpu(cpu);
|
||||||
|
|
|
||||||
|
|
@ -106,15 +106,8 @@ static void __init init_hvm_pv_info(void)
|
||||||
/* PVH set up hypercall page in xen_prepare_pvh(). */
|
/* PVH set up hypercall page in xen_prepare_pvh(). */
|
||||||
if (xen_pvh_domain())
|
if (xen_pvh_domain())
|
||||||
pv_info.name = "Xen PVH";
|
pv_info.name = "Xen PVH";
|
||||||
else {
|
else
|
||||||
u64 pfn;
|
|
||||||
uint32_t msr;
|
|
||||||
|
|
||||||
pv_info.name = "Xen HVM";
|
pv_info.name = "Xen HVM";
|
||||||
msr = cpuid_ebx(base + 2);
|
|
||||||
pfn = __pa(hypercall_page);
|
|
||||||
wrmsr_safe(msr, (u32)pfn, (u32)(pfn >> 32));
|
|
||||||
}
|
|
||||||
|
|
||||||
xen_setup_features();
|
xen_setup_features();
|
||||||
|
|
||||||
|
|
@ -300,6 +293,10 @@ static uint32_t __init xen_platform_hvm(void)
|
||||||
if (xen_pv_domain())
|
if (xen_pv_domain())
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
|
/* Set correct hypercall function. */
|
||||||
|
if (xen_domain)
|
||||||
|
xen_hypercall_setfunc();
|
||||||
|
|
||||||
if (xen_pvh_domain() && nopv) {
|
if (xen_pvh_domain() && nopv) {
|
||||||
/* Guest booting via the Xen-PVH boot entry goes here */
|
/* Guest booting via the Xen-PVH boot entry goes here */
|
||||||
pr_info("\"nopv\" parameter is ignored in PVH guest\n");
|
pr_info("\"nopv\" parameter is ignored in PVH guest\n");
|
||||||
|
|
|
||||||
|
|
@ -1341,6 +1341,9 @@ asmlinkage __visible void __init xen_start_kernel(struct start_info *si)
|
||||||
|
|
||||||
xen_domain_type = XEN_PV_DOMAIN;
|
xen_domain_type = XEN_PV_DOMAIN;
|
||||||
xen_start_flags = xen_start_info->flags;
|
xen_start_flags = xen_start_info->flags;
|
||||||
|
/* Interrupts are guaranteed to be off initially. */
|
||||||
|
early_boot_irqs_disabled = true;
|
||||||
|
static_call_update_early(xen_hypercall, xen_hypercall_pv);
|
||||||
|
|
||||||
xen_setup_features();
|
xen_setup_features();
|
||||||
|
|
||||||
|
|
@ -1431,7 +1434,6 @@ asmlinkage __visible void __init xen_start_kernel(struct start_info *si)
|
||||||
WARN_ON(xen_cpuhp_setup(xen_cpu_up_prepare_pv, xen_cpu_dead_pv));
|
WARN_ON(xen_cpuhp_setup(xen_cpu_up_prepare_pv, xen_cpu_dead_pv));
|
||||||
|
|
||||||
local_irq_disable();
|
local_irq_disable();
|
||||||
early_boot_irqs_disabled = true;
|
|
||||||
|
|
||||||
xen_raw_console_write("mapping kernel into physical memory\n");
|
xen_raw_console_write("mapping kernel into physical memory\n");
|
||||||
xen_setup_kernel_pagetable((pgd_t *)xen_start_info->pt_base,
|
xen_setup_kernel_pagetable((pgd_t *)xen_start_info->pt_base,
|
||||||
|
|
|
||||||
|
|
@ -129,17 +129,10 @@ static void __init pvh_arch_setup(void)
|
||||||
|
|
||||||
void __init xen_pvh_init(struct boot_params *boot_params)
|
void __init xen_pvh_init(struct boot_params *boot_params)
|
||||||
{
|
{
|
||||||
u32 msr;
|
|
||||||
u64 pfn;
|
|
||||||
|
|
||||||
xen_pvh = 1;
|
xen_pvh = 1;
|
||||||
xen_domain_type = XEN_HVM_DOMAIN;
|
xen_domain_type = XEN_HVM_DOMAIN;
|
||||||
xen_start_flags = pvh_start_info.flags;
|
xen_start_flags = pvh_start_info.flags;
|
||||||
|
|
||||||
msr = cpuid_ebx(xen_cpuid_base() + 2);
|
|
||||||
pfn = __pa(hypercall_page);
|
|
||||||
wrmsr_safe(msr, (u32)pfn, (u32)(pfn >> 32));
|
|
||||||
|
|
||||||
x86_init.oem.arch_setup = pvh_arch_setup;
|
x86_init.oem.arch_setup = pvh_arch_setup;
|
||||||
x86_init.oem.banner = xen_banner;
|
x86_init.oem.banner = xen_banner;
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -20,9 +20,32 @@
|
||||||
|
|
||||||
#include <linux/init.h>
|
#include <linux/init.h>
|
||||||
#include <linux/linkage.h>
|
#include <linux/linkage.h>
|
||||||
|
#include <linux/objtool.h>
|
||||||
#include <../entry/calling.h>
|
#include <../entry/calling.h>
|
||||||
|
|
||||||
.pushsection .noinstr.text, "ax"
|
.pushsection .noinstr.text, "ax"
|
||||||
|
/*
|
||||||
|
* PV hypercall interface to the hypervisor.
|
||||||
|
*
|
||||||
|
* Called via inline asm(), so better preserve %rcx and %r11.
|
||||||
|
*
|
||||||
|
* Input:
|
||||||
|
* %eax: hypercall number
|
||||||
|
* %rdi, %rsi, %rdx, %r10, %r8: args 1..5 for the hypercall
|
||||||
|
* Output: %rax
|
||||||
|
*/
|
||||||
|
SYM_FUNC_START(xen_hypercall_pv)
|
||||||
|
ANNOTATE_NOENDBR
|
||||||
|
push %rcx
|
||||||
|
push %r11
|
||||||
|
UNWIND_HINT_SAVE
|
||||||
|
syscall
|
||||||
|
UNWIND_HINT_RESTORE
|
||||||
|
pop %r11
|
||||||
|
pop %rcx
|
||||||
|
RET
|
||||||
|
SYM_FUNC_END(xen_hypercall_pv)
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Disabling events is simply a matter of making the event mask
|
* Disabling events is simply a matter of making the event mask
|
||||||
* non-zero.
|
* non-zero.
|
||||||
|
|
@ -176,7 +199,6 @@ SYM_CODE_START(xen_early_idt_handler_array)
|
||||||
SYM_CODE_END(xen_early_idt_handler_array)
|
SYM_CODE_END(xen_early_idt_handler_array)
|
||||||
__FINIT
|
__FINIT
|
||||||
|
|
||||||
hypercall_iret = hypercall_page + __HYPERVISOR_iret * 32
|
|
||||||
/*
|
/*
|
||||||
* Xen64 iret frame:
|
* Xen64 iret frame:
|
||||||
*
|
*
|
||||||
|
|
@ -186,17 +208,28 @@ hypercall_iret = hypercall_page + __HYPERVISOR_iret * 32
|
||||||
* cs
|
* cs
|
||||||
* rip <-- standard iret frame
|
* rip <-- standard iret frame
|
||||||
*
|
*
|
||||||
* flags
|
* flags <-- xen_iret must push from here on
|
||||||
*
|
*
|
||||||
* rcx }
|
* rcx
|
||||||
* r11 }<-- pushed by hypercall page
|
* r11
|
||||||
* rsp->rax }
|
* rsp->rax
|
||||||
*/
|
*/
|
||||||
|
.macro xen_hypercall_iret
|
||||||
|
pushq $0 /* Flags */
|
||||||
|
push %rcx
|
||||||
|
push %r11
|
||||||
|
push %rax
|
||||||
|
mov $__HYPERVISOR_iret, %eax
|
||||||
|
syscall /* Do the IRET. */
|
||||||
|
#ifdef CONFIG_MITIGATION_SLS
|
||||||
|
int3
|
||||||
|
#endif
|
||||||
|
.endm
|
||||||
|
|
||||||
SYM_CODE_START(xen_iret)
|
SYM_CODE_START(xen_iret)
|
||||||
UNWIND_HINT_UNDEFINED
|
UNWIND_HINT_UNDEFINED
|
||||||
ANNOTATE_NOENDBR
|
ANNOTATE_NOENDBR
|
||||||
pushq $0
|
xen_hypercall_iret
|
||||||
jmp hypercall_iret
|
|
||||||
SYM_CODE_END(xen_iret)
|
SYM_CODE_END(xen_iret)
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
@ -301,8 +334,7 @@ SYM_CODE_START(xen_entry_SYSENTER_compat)
|
||||||
ENDBR
|
ENDBR
|
||||||
lea 16(%rsp), %rsp /* strip %rcx, %r11 */
|
lea 16(%rsp), %rsp /* strip %rcx, %r11 */
|
||||||
mov $-ENOSYS, %rax
|
mov $-ENOSYS, %rax
|
||||||
pushq $0
|
xen_hypercall_iret
|
||||||
jmp hypercall_iret
|
|
||||||
SYM_CODE_END(xen_entry_SYSENTER_compat)
|
SYM_CODE_END(xen_entry_SYSENTER_compat)
|
||||||
SYM_CODE_END(xen_entry_SYSCALL_compat)
|
SYM_CODE_END(xen_entry_SYSCALL_compat)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -6,9 +6,11 @@
|
||||||
|
|
||||||
#include <linux/elfnote.h>
|
#include <linux/elfnote.h>
|
||||||
#include <linux/init.h>
|
#include <linux/init.h>
|
||||||
|
#include <linux/instrumentation.h>
|
||||||
|
|
||||||
#include <asm/boot.h>
|
#include <asm/boot.h>
|
||||||
#include <asm/asm.h>
|
#include <asm/asm.h>
|
||||||
|
#include <asm/frame.h>
|
||||||
#include <asm/msr.h>
|
#include <asm/msr.h>
|
||||||
#include <asm/page_types.h>
|
#include <asm/page_types.h>
|
||||||
#include <asm/percpu.h>
|
#include <asm/percpu.h>
|
||||||
|
|
@ -20,28 +22,6 @@
|
||||||
#include <xen/interface/xen-mca.h>
|
#include <xen/interface/xen-mca.h>
|
||||||
#include <asm/xen/interface.h>
|
#include <asm/xen/interface.h>
|
||||||
|
|
||||||
.pushsection .noinstr.text, "ax"
|
|
||||||
.balign PAGE_SIZE
|
|
||||||
SYM_CODE_START(hypercall_page)
|
|
||||||
.rept (PAGE_SIZE / 32)
|
|
||||||
UNWIND_HINT_FUNC
|
|
||||||
ANNOTATE_NOENDBR
|
|
||||||
ANNOTATE_UNRET_SAFE
|
|
||||||
ret
|
|
||||||
/*
|
|
||||||
* Xen will write the hypercall page, and sort out ENDBR.
|
|
||||||
*/
|
|
||||||
.skip 31, 0xcc
|
|
||||||
.endr
|
|
||||||
|
|
||||||
#define HYPERCALL(n) \
|
|
||||||
.equ xen_hypercall_##n, hypercall_page + __HYPERVISOR_##n * 32; \
|
|
||||||
.type xen_hypercall_##n, @function; .size xen_hypercall_##n, 32
|
|
||||||
#include <asm/xen-hypercalls.h>
|
|
||||||
#undef HYPERCALL
|
|
||||||
SYM_CODE_END(hypercall_page)
|
|
||||||
.popsection
|
|
||||||
|
|
||||||
#ifdef CONFIG_XEN_PV
|
#ifdef CONFIG_XEN_PV
|
||||||
__INIT
|
__INIT
|
||||||
SYM_CODE_START(startup_xen)
|
SYM_CODE_START(startup_xen)
|
||||||
|
|
@ -87,6 +67,87 @@ SYM_CODE_END(xen_cpu_bringup_again)
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
.pushsection .noinstr.text, "ax"
|
||||||
|
/*
|
||||||
|
* Xen hypercall interface to the hypervisor.
|
||||||
|
*
|
||||||
|
* Input:
|
||||||
|
* %eax: hypercall number
|
||||||
|
* 32-bit:
|
||||||
|
* %ebx, %ecx, %edx, %esi, %edi: args 1..5 for the hypercall
|
||||||
|
* 64-bit:
|
||||||
|
* %rdi, %rsi, %rdx, %r10, %r8: args 1..5 for the hypercall
|
||||||
|
* Output: %[er]ax
|
||||||
|
*/
|
||||||
|
SYM_FUNC_START(xen_hypercall_hvm)
|
||||||
|
ENDBR
|
||||||
|
FRAME_BEGIN
|
||||||
|
/* Save all relevant registers (caller save and arguments). */
|
||||||
|
#ifdef CONFIG_X86_32
|
||||||
|
push %eax
|
||||||
|
push %ebx
|
||||||
|
push %ecx
|
||||||
|
push %edx
|
||||||
|
push %esi
|
||||||
|
push %edi
|
||||||
|
#else
|
||||||
|
push %rax
|
||||||
|
push %rcx
|
||||||
|
push %rdx
|
||||||
|
push %rdi
|
||||||
|
push %rsi
|
||||||
|
push %r11
|
||||||
|
push %r10
|
||||||
|
push %r9
|
||||||
|
push %r8
|
||||||
|
#ifdef CONFIG_FRAME_POINTER
|
||||||
|
pushq $0 /* Dummy push for stack alignment. */
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
/* Set the vendor specific function. */
|
||||||
|
call __xen_hypercall_setfunc
|
||||||
|
/* Set ZF = 1 if AMD, Restore saved registers. */
|
||||||
|
#ifdef CONFIG_X86_32
|
||||||
|
lea xen_hypercall_amd, %ebx
|
||||||
|
cmp %eax, %ebx
|
||||||
|
pop %edi
|
||||||
|
pop %esi
|
||||||
|
pop %edx
|
||||||
|
pop %ecx
|
||||||
|
pop %ebx
|
||||||
|
pop %eax
|
||||||
|
#else
|
||||||
|
lea xen_hypercall_amd(%rip), %rbx
|
||||||
|
cmp %rax, %rbx
|
||||||
|
#ifdef CONFIG_FRAME_POINTER
|
||||||
|
pop %rax /* Dummy pop. */
|
||||||
|
#endif
|
||||||
|
pop %r8
|
||||||
|
pop %r9
|
||||||
|
pop %r10
|
||||||
|
pop %r11
|
||||||
|
pop %rsi
|
||||||
|
pop %rdi
|
||||||
|
pop %rdx
|
||||||
|
pop %rcx
|
||||||
|
pop %rax
|
||||||
|
#endif
|
||||||
|
/* Use correct hypercall function. */
|
||||||
|
jz xen_hypercall_amd
|
||||||
|
jmp xen_hypercall_intel
|
||||||
|
SYM_FUNC_END(xen_hypercall_hvm)
|
||||||
|
|
||||||
|
SYM_FUNC_START(xen_hypercall_amd)
|
||||||
|
vmmcall
|
||||||
|
RET
|
||||||
|
SYM_FUNC_END(xen_hypercall_amd)
|
||||||
|
|
||||||
|
SYM_FUNC_START(xen_hypercall_intel)
|
||||||
|
vmcall
|
||||||
|
RET
|
||||||
|
SYM_FUNC_END(xen_hypercall_intel)
|
||||||
|
.popsection
|
||||||
|
|
||||||
ELFNOTE(Xen, XEN_ELFNOTE_GUEST_OS, .asciz "linux")
|
ELFNOTE(Xen, XEN_ELFNOTE_GUEST_OS, .asciz "linux")
|
||||||
ELFNOTE(Xen, XEN_ELFNOTE_GUEST_VERSION, .asciz "2.6")
|
ELFNOTE(Xen, XEN_ELFNOTE_GUEST_VERSION, .asciz "2.6")
|
||||||
ELFNOTE(Xen, XEN_ELFNOTE_XEN_VERSION, .asciz "xen-3.0")
|
ELFNOTE(Xen, XEN_ELFNOTE_XEN_VERSION, .asciz "xen-3.0")
|
||||||
|
|
@ -116,8 +177,6 @@ SYM_CODE_END(xen_cpu_bringup_again)
|
||||||
#else
|
#else
|
||||||
# define FEATURES_DOM0 0
|
# define FEATURES_DOM0 0
|
||||||
#endif
|
#endif
|
||||||
ELFNOTE(Xen, XEN_ELFNOTE_HYPERCALL_PAGE, .globl xen_elfnote_hypercall_page;
|
|
||||||
xen_elfnote_hypercall_page: _ASM_PTR xen_elfnote_hypercall_page_value - .)
|
|
||||||
ELFNOTE(Xen, XEN_ELFNOTE_SUPPORTED_FEATURES,
|
ELFNOTE(Xen, XEN_ELFNOTE_SUPPORTED_FEATURES,
|
||||||
.long FEATURES_PV | FEATURES_PVH | FEATURES_DOM0)
|
.long FEATURES_PV | FEATURES_PVH | FEATURES_DOM0)
|
||||||
ELFNOTE(Xen, XEN_ELFNOTE_LOADER, .asciz "generic")
|
ELFNOTE(Xen, XEN_ELFNOTE_LOADER, .asciz "generic")
|
||||||
|
|
|
||||||
|
|
@ -326,4 +326,13 @@ static inline void xen_smp_intr_free_pv(unsigned int cpu) {}
|
||||||
static inline void xen_smp_count_cpus(void) { }
|
static inline void xen_smp_count_cpus(void) { }
|
||||||
#endif /* CONFIG_SMP */
|
#endif /* CONFIG_SMP */
|
||||||
|
|
||||||
|
#ifdef CONFIG_XEN_PV
|
||||||
|
void xen_hypercall_pv(void);
|
||||||
|
#endif
|
||||||
|
void xen_hypercall_hvm(void);
|
||||||
|
void xen_hypercall_amd(void);
|
||||||
|
void xen_hypercall_intel(void);
|
||||||
|
void xen_hypercall_setfunc(void);
|
||||||
|
void *__xen_hypercall_setfunc(void);
|
||||||
|
|
||||||
#endif /* XEN_OPS_H */
|
#endif /* XEN_OPS_H */
|
||||||
|
|
|
||||||
|
|
@ -155,8 +155,7 @@ int set_blocksize(struct file *file, int size)
|
||||||
struct inode *inode = file->f_mapping->host;
|
struct inode *inode = file->f_mapping->host;
|
||||||
struct block_device *bdev = I_BDEV(inode);
|
struct block_device *bdev = I_BDEV(inode);
|
||||||
|
|
||||||
/* Size must be a power of two, and between 512 and PAGE_SIZE */
|
if (blk_validate_block_size(size))
|
||||||
if (size > PAGE_SIZE || size < 512 || !is_power_of_2(size))
|
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
|
|
||||||
/* Size cannot be smaller than the size supported by the device */
|
/* Size cannot be smaller than the size supported by the device */
|
||||||
|
|
|
||||||
|
|
@ -6844,16 +6844,24 @@ static struct bfq_queue *bfq_waker_bfqq(struct bfq_queue *bfqq)
|
||||||
if (new_bfqq == waker_bfqq) {
|
if (new_bfqq == waker_bfqq) {
|
||||||
/*
|
/*
|
||||||
* If waker_bfqq is in the merge chain, and current
|
* If waker_bfqq is in the merge chain, and current
|
||||||
* is the only procress.
|
* is the only process, waker_bfqq can be freed.
|
||||||
*/
|
*/
|
||||||
if (bfqq_process_refs(waker_bfqq) == 1)
|
if (bfqq_process_refs(waker_bfqq) == 1)
|
||||||
return NULL;
|
return NULL;
|
||||||
break;
|
|
||||||
|
return waker_bfqq;
|
||||||
}
|
}
|
||||||
|
|
||||||
new_bfqq = new_bfqq->new_bfqq;
|
new_bfqq = new_bfqq->new_bfqq;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If waker_bfqq is not in the merge chain, and it's procress reference
|
||||||
|
* is 0, waker_bfqq can be freed.
|
||||||
|
*/
|
||||||
|
if (bfqq_process_refs(waker_bfqq) == 0)
|
||||||
|
return NULL;
|
||||||
|
|
||||||
return waker_bfqq;
|
return waker_bfqq;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -275,13 +275,15 @@ void blk_mq_sysfs_unregister_hctxs(struct request_queue *q)
|
||||||
struct blk_mq_hw_ctx *hctx;
|
struct blk_mq_hw_ctx *hctx;
|
||||||
unsigned long i;
|
unsigned long i;
|
||||||
|
|
||||||
lockdep_assert_held(&q->sysfs_dir_lock);
|
mutex_lock(&q->sysfs_dir_lock);
|
||||||
|
|
||||||
if (!q->mq_sysfs_init_done)
|
if (!q->mq_sysfs_init_done)
|
||||||
return;
|
goto unlock;
|
||||||
|
|
||||||
queue_for_each_hw_ctx(q, hctx, i)
|
queue_for_each_hw_ctx(q, hctx, i)
|
||||||
blk_mq_unregister_hctx(hctx);
|
blk_mq_unregister_hctx(hctx);
|
||||||
|
|
||||||
|
unlock:
|
||||||
|
mutex_unlock(&q->sysfs_dir_lock);
|
||||||
}
|
}
|
||||||
|
|
||||||
int blk_mq_sysfs_register_hctxs(struct request_queue *q)
|
int blk_mq_sysfs_register_hctxs(struct request_queue *q)
|
||||||
|
|
@ -290,10 +292,9 @@ int blk_mq_sysfs_register_hctxs(struct request_queue *q)
|
||||||
unsigned long i;
|
unsigned long i;
|
||||||
int ret = 0;
|
int ret = 0;
|
||||||
|
|
||||||
lockdep_assert_held(&q->sysfs_dir_lock);
|
mutex_lock(&q->sysfs_dir_lock);
|
||||||
|
|
||||||
if (!q->mq_sysfs_init_done)
|
if (!q->mq_sysfs_init_done)
|
||||||
return ret;
|
goto unlock;
|
||||||
|
|
||||||
queue_for_each_hw_ctx(q, hctx, i) {
|
queue_for_each_hw_ctx(q, hctx, i) {
|
||||||
ret = blk_mq_register_hctx(hctx);
|
ret = blk_mq_register_hctx(hctx);
|
||||||
|
|
@ -301,5 +302,8 @@ int blk_mq_sysfs_register_hctxs(struct request_queue *q)
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
unlock:
|
||||||
|
mutex_unlock(&q->sysfs_dir_lock);
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -4412,6 +4412,15 @@ struct gendisk *blk_mq_alloc_disk_for_queue(struct request_queue *q,
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL(blk_mq_alloc_disk_for_queue);
|
EXPORT_SYMBOL(blk_mq_alloc_disk_for_queue);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Only hctx removed from cpuhp list can be reused
|
||||||
|
*/
|
||||||
|
static bool blk_mq_hctx_is_reusable(struct blk_mq_hw_ctx *hctx)
|
||||||
|
{
|
||||||
|
return hlist_unhashed(&hctx->cpuhp_online) &&
|
||||||
|
hlist_unhashed(&hctx->cpuhp_dead);
|
||||||
|
}
|
||||||
|
|
||||||
static struct blk_mq_hw_ctx *blk_mq_alloc_and_init_hctx(
|
static struct blk_mq_hw_ctx *blk_mq_alloc_and_init_hctx(
|
||||||
struct blk_mq_tag_set *set, struct request_queue *q,
|
struct blk_mq_tag_set *set, struct request_queue *q,
|
||||||
int hctx_idx, int node)
|
int hctx_idx, int node)
|
||||||
|
|
@ -4421,7 +4430,7 @@ static struct blk_mq_hw_ctx *blk_mq_alloc_and_init_hctx(
|
||||||
/* reuse dead hctx first */
|
/* reuse dead hctx first */
|
||||||
spin_lock(&q->unused_hctx_lock);
|
spin_lock(&q->unused_hctx_lock);
|
||||||
list_for_each_entry(tmp, &q->unused_hctx_list, hctx_list) {
|
list_for_each_entry(tmp, &q->unused_hctx_list, hctx_list) {
|
||||||
if (tmp->numa_node == node) {
|
if (tmp->numa_node == node && blk_mq_hctx_is_reusable(tmp)) {
|
||||||
hctx = tmp;
|
hctx = tmp;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
@ -4453,8 +4462,7 @@ static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set,
|
||||||
unsigned long i, j;
|
unsigned long i, j;
|
||||||
|
|
||||||
/* protect against switching io scheduler */
|
/* protect against switching io scheduler */
|
||||||
lockdep_assert_held(&q->sysfs_lock);
|
mutex_lock(&q->sysfs_lock);
|
||||||
|
|
||||||
for (i = 0; i < set->nr_hw_queues; i++) {
|
for (i = 0; i < set->nr_hw_queues; i++) {
|
||||||
int old_node;
|
int old_node;
|
||||||
int node = blk_mq_get_hctx_node(set, i);
|
int node = blk_mq_get_hctx_node(set, i);
|
||||||
|
|
@ -4487,6 +4495,7 @@ static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set,
|
||||||
|
|
||||||
xa_for_each_start(&q->hctx_table, j, hctx, j)
|
xa_for_each_start(&q->hctx_table, j, hctx, j)
|
||||||
blk_mq_exit_hctx(q, set, hctx, j);
|
blk_mq_exit_hctx(q, set, hctx, j);
|
||||||
|
mutex_unlock(&q->sysfs_lock);
|
||||||
|
|
||||||
/* unregister cpuhp callbacks for exited hctxs */
|
/* unregister cpuhp callbacks for exited hctxs */
|
||||||
blk_mq_remove_hw_queues_cpuhp(q);
|
blk_mq_remove_hw_queues_cpuhp(q);
|
||||||
|
|
@ -4518,14 +4527,10 @@ int blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
|
||||||
|
|
||||||
xa_init(&q->hctx_table);
|
xa_init(&q->hctx_table);
|
||||||
|
|
||||||
mutex_lock(&q->sysfs_lock);
|
|
||||||
|
|
||||||
blk_mq_realloc_hw_ctxs(set, q);
|
blk_mq_realloc_hw_ctxs(set, q);
|
||||||
if (!q->nr_hw_queues)
|
if (!q->nr_hw_queues)
|
||||||
goto err_hctxs;
|
goto err_hctxs;
|
||||||
|
|
||||||
mutex_unlock(&q->sysfs_lock);
|
|
||||||
|
|
||||||
INIT_WORK(&q->timeout_work, blk_mq_timeout_work);
|
INIT_WORK(&q->timeout_work, blk_mq_timeout_work);
|
||||||
blk_queue_rq_timeout(q, set->timeout ? set->timeout : 30 * HZ);
|
blk_queue_rq_timeout(q, set->timeout ? set->timeout : 30 * HZ);
|
||||||
|
|
||||||
|
|
@ -4544,7 +4549,6 @@ int blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
err_hctxs:
|
err_hctxs:
|
||||||
mutex_unlock(&q->sysfs_lock);
|
|
||||||
blk_mq_release(q);
|
blk_mq_release(q);
|
||||||
err_exit:
|
err_exit:
|
||||||
q->mq_ops = NULL;
|
q->mq_ops = NULL;
|
||||||
|
|
@ -4925,12 +4929,12 @@ static bool blk_mq_elv_switch_none(struct list_head *head,
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
/* q->elevator needs protection from ->sysfs_lock */
|
/* q->elevator needs protection from ->sysfs_lock */
|
||||||
lockdep_assert_held(&q->sysfs_lock);
|
mutex_lock(&q->sysfs_lock);
|
||||||
|
|
||||||
/* the check has to be done with holding sysfs_lock */
|
/* the check has to be done with holding sysfs_lock */
|
||||||
if (!q->elevator) {
|
if (!q->elevator) {
|
||||||
kfree(qe);
|
kfree(qe);
|
||||||
goto out;
|
goto unlock;
|
||||||
}
|
}
|
||||||
|
|
||||||
INIT_LIST_HEAD(&qe->node);
|
INIT_LIST_HEAD(&qe->node);
|
||||||
|
|
@ -4940,7 +4944,9 @@ static bool blk_mq_elv_switch_none(struct list_head *head,
|
||||||
__elevator_get(qe->type);
|
__elevator_get(qe->type);
|
||||||
list_add(&qe->node, head);
|
list_add(&qe->node, head);
|
||||||
elevator_disable(q);
|
elevator_disable(q);
|
||||||
out:
|
unlock:
|
||||||
|
mutex_unlock(&q->sysfs_lock);
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -4969,9 +4975,11 @@ static void blk_mq_elv_switch_back(struct list_head *head,
|
||||||
list_del(&qe->node);
|
list_del(&qe->node);
|
||||||
kfree(qe);
|
kfree(qe);
|
||||||
|
|
||||||
|
mutex_lock(&q->sysfs_lock);
|
||||||
elevator_switch(q, t);
|
elevator_switch(q, t);
|
||||||
/* drop the reference acquired in blk_mq_elv_switch_none */
|
/* drop the reference acquired in blk_mq_elv_switch_none */
|
||||||
elevator_put(t);
|
elevator_put(t);
|
||||||
|
mutex_unlock(&q->sysfs_lock);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set,
|
static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set,
|
||||||
|
|
@ -4991,11 +4999,8 @@ static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set,
|
||||||
if (set->nr_maps == 1 && nr_hw_queues == set->nr_hw_queues)
|
if (set->nr_maps == 1 && nr_hw_queues == set->nr_hw_queues)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
list_for_each_entry(q, &set->tag_list, tag_set_list) {
|
list_for_each_entry(q, &set->tag_list, tag_set_list)
|
||||||
mutex_lock(&q->sysfs_dir_lock);
|
|
||||||
mutex_lock(&q->sysfs_lock);
|
|
||||||
blk_mq_freeze_queue(q);
|
blk_mq_freeze_queue(q);
|
||||||
}
|
|
||||||
/*
|
/*
|
||||||
* Switch IO scheduler to 'none', cleaning up the data associated
|
* Switch IO scheduler to 'none', cleaning up the data associated
|
||||||
* with the previous scheduler. We will switch back once we are done
|
* with the previous scheduler. We will switch back once we are done
|
||||||
|
|
@ -5051,11 +5056,8 @@ static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set,
|
||||||
list_for_each_entry(q, &set->tag_list, tag_set_list)
|
list_for_each_entry(q, &set->tag_list, tag_set_list)
|
||||||
blk_mq_elv_switch_back(&head, q);
|
blk_mq_elv_switch_back(&head, q);
|
||||||
|
|
||||||
list_for_each_entry(q, &set->tag_list, tag_set_list) {
|
list_for_each_entry(q, &set->tag_list, tag_set_list)
|
||||||
blk_mq_unfreeze_queue(q);
|
blk_mq_unfreeze_queue(q);
|
||||||
mutex_unlock(&q->sysfs_lock);
|
|
||||||
mutex_unlock(&q->sysfs_dir_lock);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Free the excess tags when nr_hw_queues shrink. */
|
/* Free the excess tags when nr_hw_queues shrink. */
|
||||||
for (i = set->nr_hw_queues; i < prev_nr_hw_queues; i++)
|
for (i = set->nr_hw_queues; i < prev_nr_hw_queues; i++)
|
||||||
|
|
|
||||||
|
|
@ -706,11 +706,11 @@ queue_attr_store(struct kobject *kobj, struct attribute *attr,
|
||||||
if (entry->load_module)
|
if (entry->load_module)
|
||||||
entry->load_module(disk, page, length);
|
entry->load_module(disk, page, length);
|
||||||
|
|
||||||
mutex_lock(&q->sysfs_lock);
|
|
||||||
blk_mq_freeze_queue(q);
|
blk_mq_freeze_queue(q);
|
||||||
|
mutex_lock(&q->sysfs_lock);
|
||||||
res = entry->store(disk, page, length);
|
res = entry->store(disk, page, length);
|
||||||
blk_mq_unfreeze_queue(q);
|
|
||||||
mutex_unlock(&q->sysfs_lock);
|
mutex_unlock(&q->sysfs_lock);
|
||||||
|
blk_mq_unfreeze_queue(q);
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -409,7 +409,7 @@ static void ivpu_bo_print_info(struct ivpu_bo *bo, struct drm_printer *p)
|
||||||
mutex_lock(&bo->lock);
|
mutex_lock(&bo->lock);
|
||||||
|
|
||||||
drm_printf(p, "%-9p %-3u 0x%-12llx %-10lu 0x%-8x %-4u",
|
drm_printf(p, "%-9p %-3u 0x%-12llx %-10lu 0x%-8x %-4u",
|
||||||
bo, bo->ctx->id, bo->vpu_addr, bo->base.base.size,
|
bo, bo->ctx ? bo->ctx->id : 0, bo->vpu_addr, bo->base.base.size,
|
||||||
bo->flags, kref_read(&bo->base.base.refcount));
|
bo->flags, kref_read(&bo->base.base.refcount));
|
||||||
|
|
||||||
if (bo->base.pages)
|
if (bo->base.pages)
|
||||||
|
|
|
||||||
|
|
@ -612,18 +612,22 @@ int ivpu_mmu_reserved_context_init(struct ivpu_device *vdev)
|
||||||
if (!ivpu_mmu_ensure_pgd(vdev, &vdev->rctx.pgtable)) {
|
if (!ivpu_mmu_ensure_pgd(vdev, &vdev->rctx.pgtable)) {
|
||||||
ivpu_err(vdev, "Failed to allocate root page table for reserved context\n");
|
ivpu_err(vdev, "Failed to allocate root page table for reserved context\n");
|
||||||
ret = -ENOMEM;
|
ret = -ENOMEM;
|
||||||
goto unlock;
|
goto err_ctx_fini;
|
||||||
}
|
}
|
||||||
|
|
||||||
ret = ivpu_mmu_cd_set(vdev, vdev->rctx.id, &vdev->rctx.pgtable);
|
ret = ivpu_mmu_cd_set(vdev, vdev->rctx.id, &vdev->rctx.pgtable);
|
||||||
if (ret) {
|
if (ret) {
|
||||||
ivpu_err(vdev, "Failed to set context descriptor for reserved context\n");
|
ivpu_err(vdev, "Failed to set context descriptor for reserved context\n");
|
||||||
goto unlock;
|
goto err_ctx_fini;
|
||||||
}
|
}
|
||||||
|
|
||||||
unlock:
|
|
||||||
mutex_unlock(&vdev->rctx.lock);
|
mutex_unlock(&vdev->rctx.lock);
|
||||||
return ret;
|
return ret;
|
||||||
|
|
||||||
|
err_ctx_fini:
|
||||||
|
mutex_unlock(&vdev->rctx.lock);
|
||||||
|
ivpu_mmu_context_fini(vdev, &vdev->rctx);
|
||||||
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
void ivpu_mmu_reserved_context_fini(struct ivpu_device *vdev)
|
void ivpu_mmu_reserved_context_fini(struct ivpu_device *vdev)
|
||||||
|
|
|
||||||
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue