powerpc updates for 5.4

- Initial support for running on a system with an Ultravisor, which is software
    that runs below the hypervisor and protects guests against some attacks by
    the hypervisor.
 
  - Support for building the kernel to run as a "Secure Virtual Machine", ie. as
    a guest capable of running on a system with an Ultravisor.
 
  - Some changes to our DMA code on bare metal, to allow devices with medium
    sized DMA masks (> 32 && < 59 bits) to use more than 2GB of DMA space.
 
  - Support for firmware assisted crash dumps on bare metal (powernv).
 
  - Two series fixing bugs in and refactoring our PCI EEH code.
 
  - A large series refactoring our exception entry code to use gas macros, both
    to make it more readable and also enable some future optimisations.
 
 As well as many cleanups and other minor features & fixups.
 
 Thanks to:
   Adam Zerella, Alexey Kardashevskiy, Alistair Popple, Andrew Donnellan, Aneesh
   Kumar K.V, Anju T Sudhakar, Anshuman Khandual, Balbir Singh, Benjamin
   Herrenschmidt, Cédric Le Goater, Christophe JAILLET, Christophe Leroy,
   Christopher M. Riedl, Christoph Hellwig, Claudio Carvalho, Daniel Axtens,
   David Gibson, David Hildenbrand, Desnes A. Nunes do Rosario, Ganesh Goudar,
   Gautham R. Shenoy, Greg Kurz, Guerney Hunt, Gustavo Romero, Halil Pasic, Hari
   Bathini, Joakim Tjernlund, Jonathan Neuschafer, Jordan Niethe, Leonardo Bras,
   Lianbo Jiang, Madhavan Srinivasan, Mahesh Salgaonkar, Mahesh Salgaonkar,
   Masahiro Yamada, Maxiwell S. Garcia, Michael Anderson, Nathan Chancellor,
   Nathan Lynch, Naveen N. Rao, Nicholas Piggin, Oliver O'Halloran, Qian Cai, Ram
   Pai, Ravi Bangoria, Reza Arbab, Ryan Grimm, Sam Bobroff, Santosh Sivaraj,
   Segher Boessenkool, Sukadev Bhattiprolu, Thiago Bauermann, Thiago Jung
   Bauermann, Thomas Gleixner, Tom Lendacky, Vasant Hegde.
 -----BEGIN PGP SIGNATURE-----
 
 iQJHBAABCAAxFiEEJFGtCPCthwEv2Y/bUevqPMjhpYAFAl2EtEcTHG1wZUBlbGxl
 cm1hbi5pZC5hdQAKCRBR6+o8yOGlgPfsD/9uXyBXn3anI/H08+mk74k5gCsmMQpn
 D442CD/ByogZcccp23yBTlhawtCE03hcHnCLygn0Xgd8a4YvHts/RGHUe3fPHqlG
 bEyZ7jsLVz5ebNZQP7r4eGs2pSzCajwJy2N9HJ/C1ojf15rrfRxoVJtnyhE2wXpm
 DL+6o2K+nUCB3gTQ1Inr3DnWzoGOOUfNTOea2u+J+yfHwGRqOBYpevwqiwy5eelK
 aRjUJCqMTvrzra49MeFwjo0Nt3/Y8UNcwA+JlGdeR8bRuWhFrYmyBRiZEKPaujNO
 5EAfghBBlB0KQCqvF/tRM/c0OftHqK59AMobP9T7u9oOaBXeF/FpZX/iXjzNDPsN
 j9Oo2tKLTu/YVEXqBFuREGP+znANr1Wo4CFyOG8SbvYz0HFjR6XbtRJsS+0e8GWl
 kqX5/ZhYz3lBnKSNe9jgWOrh/J0KCSFigBTEWJT3xsn4YE8x8kK2l9KPqAIldWEP
 sKb2UjGS7v0NKq+NvShH88Q9AeQUEIjTcg/9aDDQDe6FaRQ7KiF8bUxSdwSPi+Fn
 j0lnF6i+1ATWZKuCr85veVi7C5qoe/+MqalnmP7MxULyzgXLLxUgN0SzEYO6QofK
 LQK/VaH2XVr5+M5YAb7K4/NX5gbM3s1bKrCiUy4EyHNvgG7gricYdbz6HgAjKpR7
 oP0rHfgmVYvF1g==
 =WlW+
 -----END PGP SIGNATURE-----

Merge tag 'powerpc-5.4-1' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux

Pull powerpc updates from Michael Ellerman:
 "This is a bit late, partly due to me travelling, and partly due to a
  power outage knocking out some of my test systems *while* I was
  travelling.

   - Initial support for running on a system with an Ultravisor, which
     is software that runs below the hypervisor and protects guests
     against some attacks by the hypervisor.

   - Support for building the kernel to run as a "Secure Virtual
     Machine", ie. as a guest capable of running on a system with an
     Ultravisor.

   - Some changes to our DMA code on bare metal, to allow devices with
     medium sized DMA masks (> 32 && < 59 bits) to use more than 2GB of
     DMA space.

   - Support for firmware assisted crash dumps on bare metal (powernv).

   - Two series fixing bugs in and refactoring our PCI EEH code.

   - A large series refactoring our exception entry code to use gas
     macros, both to make it more readable and also enable some future
     optimisations.

  As well as many cleanups and other minor features & fixups.

  Thanks to: Adam Zerella, Alexey Kardashevskiy, Alistair Popple, Andrew
  Donnellan, Aneesh Kumar K.V, Anju T Sudhakar, Anshuman Khandual,
  Balbir Singh, Benjamin Herrenschmidt, Cédric Le Goater, Christophe
  JAILLET, Christophe Leroy, Christopher M. Riedl, Christoph Hellwig,
  Claudio Carvalho, Daniel Axtens, David Gibson, David Hildenbrand,
  Desnes A. Nunes do Rosario, Ganesh Goudar, Gautham R. Shenoy, Greg
  Kurz, Guerney Hunt, Gustavo Romero, Halil Pasic, Hari Bathini, Joakim
  Tjernlund, Jonathan Neuschafer, Jordan Niethe, Leonardo Bras, Lianbo
  Jiang, Madhavan Srinivasan, Mahesh Salgaonkar, Mahesh Salgaonkar,
  Masahiro Yamada, Maxiwell S. Garcia, Michael Anderson, Nathan
  Chancellor, Nathan Lynch, Naveen N. Rao, Nicholas Piggin, Oliver
  O'Halloran, Qian Cai, Ram Pai, Ravi Bangoria, Reza Arbab, Ryan Grimm,
  Sam Bobroff, Santosh Sivaraj, Segher Boessenkool, Sukadev Bhattiprolu,
  Thiago Bauermann, Thiago Jung Bauermann, Thomas Gleixner, Tom
  Lendacky, Vasant Hegde"

* tag 'powerpc-5.4-1' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux: (264 commits)
  powerpc/mm/mce: Keep irqs disabled during lockless page table walk
  powerpc: Use ftrace_graph_ret_addr() when unwinding
  powerpc/ftrace: Enable HAVE_FUNCTION_GRAPH_RET_ADDR_PTR
  ftrace: Look up the address of return_to_handler() using helpers
  powerpc: dump kernel log before carrying out fadump or kdump
  docs: powerpc: Add missing documentation reference
  powerpc/xmon: Fix output of XIVE IPI
  powerpc/xmon: Improve output of XIVE interrupts
  powerpc/mm/radix: remove useless kernel messages
  powerpc/fadump: support holes in kernel boot memory area
  powerpc/fadump: remove RMA_START and RMA_END macros
  powerpc/fadump: update documentation about option to release opalcore
  powerpc/fadump: consider f/w load area
  powerpc/opalcore: provide an option to invalidate /sys/firmware/opal/core file
  powerpc/opalcore: export /sys/firmware/opal/core for analysing opal crashes
  powerpc/fadump: update documentation about CONFIG_PRESERVE_FA_DUMP
  powerpc/fadump: add support to preserve crash data on FADUMP disabled kernel
  powerpc/fadump: improve how crashed kernel's memory is reserved
  powerpc/fadump: consider reserved ranges while releasing memory
  powerpc/fadump: make crash memory ranges array allocation generic
  ...
This commit is contained in:
Linus Torvalds 2019-09-20 11:48:06 -07:00
commit 45824fc0da
247 changed files with 9763 additions and 5475 deletions

View File

@ -562,3 +562,13 @@ Description: Umwait control
or C0.2 state. The time is an unsigned 32-bit number.
Note that a value of zero means there is no limit.
Low order two bits must be zero.
What: /sys/devices/system/cpu/svm
Date: August 2019
Contact: Linux kernel mailing list <linux-kernel@vger.kernel.org>
Linux for PowerPC mailing list <linuxppc-dev@ozlabs.org>
Description: Secure Virtual Machine
If 1, it means the system is using the Protected Execution
Facility in POWER9 and newer processors. i.e., it is a Secure
Virtual Machine.

View File

@ -860,6 +860,10 @@
disable_radix [PPC]
Disable RADIX MMU mode on POWER9
disable_tlbie [PPC]
Disable TLBIE instruction. Currently does not work
with KVM, with HASH MMU, or with coherent accelerators.
disable_cpu_apicid= [X86,APIC,SMP]
Format: <int>
The number of initial APIC ID for the
@ -4641,6 +4645,11 @@
/sys/power/pm_test). Only available when CONFIG_PM_DEBUG
is set. Default value is 5.
svm= [PPC]
Format: { on | off | y | n | 1 | 0 }
This parameter controls use of the Protected
Execution Facility on pSeries.
swapaccount=[0|1]
[KNL] Enable accounting of swap in memory resource
controller if no parameter or 1 is given or disable
@ -5326,3 +5335,22 @@
A hex value specifying bitmask with supplemental xhci
host controller quirks. Meaning of each bit can be
consulted in header drivers/usb/host/xhci.h.
xmon [PPC]
Format: { early | on | rw | ro | off }
Controls if xmon debugger is enabled. Default is off.
Passing only "xmon" is equivalent to "xmon=early".
early Call xmon as early as possible on boot; xmon
debugger is called from setup_arch().
on xmon debugger hooks will be installed so xmon
is only called on a kernel crash. Default mode,
i.e. either "ro" or "rw" mode, is controlled
with CONFIG_XMON_DEFAULT_RO_MODE.
rw xmon debugger hooks will be installed so xmon
is called only on a kernel crash, mode is write,
meaning SPR registers, memory and, other data
can be written using xmon commands.
ro same as "rw" option above but SPR registers,
memory, and other data can't be written using
xmon commands.
off xmon is disabled.

View File

@ -0,0 +1,41 @@
==========================
ELF Note PowerPC Namespace
==========================
The PowerPC namespace in an ELF Note of the kernel binary is used to store
capabilities and information which can be used by a bootloader or userland.
Types and Descriptors
---------------------
The types to be used with the "PowerPC" namesapce are defined in [#f1]_.
1) PPC_ELFNOTE_CAPABILITIES
Define the capabilities supported/required by the kernel. This type uses a
bitmap as "descriptor" field. Each bit is described below:
- Ultravisor-capable bit (PowerNV only).
.. code-block:: c
#define PPCCAP_ULTRAVISOR_BIT (1 << 0)
Indicate that the powerpc kernel binary knows how to run in an
ultravisor-enabled system.
In an ultravisor-enabled system, some machine resources are now controlled
by the ultravisor. If the kernel is not ultravisor-capable, but it ends up
being run on a machine with ultravisor, the kernel will probably crash
trying to access ultravisor resources. For instance, it may crash in early
boot trying to set the partition table entry 0.
In an ultravisor-enabled system, a bootloader could warn the user or prevent
the kernel from being run if the PowerPC ultravisor capability doesn't exist
or the Ultravisor-capable bit is not set.
References
----------
.. [#f1] arch/powerpc/include/asm/elfnote.h

View File

@ -9,18 +9,18 @@ a crashed system, and to do so from a fully-reset system, and
to minimize the total elapsed time until the system is back
in production use.
- Firmware assisted dump (fadump) infrastructure is intended to replace
- Firmware-Assisted Dump (FADump) infrastructure is intended to replace
the existing phyp assisted dump.
- Fadump uses the same firmware interfaces and memory reservation model
as phyp assisted dump.
- Unlike phyp dump, fadump exports the memory dump through /proc/vmcore
- Unlike phyp dump, FADump exports the memory dump through /proc/vmcore
in the ELF format in the same way as kdump. This helps us reuse the
kdump infrastructure for dump capture and filtering.
- Unlike phyp dump, userspace tool does not need to refer any sysfs
interface while reading /proc/vmcore.
- Unlike phyp dump, fadump allows user to release all the memory reserved
- Unlike phyp dump, FADump allows user to release all the memory reserved
for dump, with a single operation of echo 1 > /sys/kernel/fadump_release_mem.
- Once enabled through kernel boot parameter, fadump can be
- Once enabled through kernel boot parameter, FADump can be
started/stopped through /sys/kernel/fadump_registered interface (see
sysfs files section below) and can be easily integrated with kdump
service start/stop init scripts.
@ -34,7 +34,7 @@ dump offers several strong, practical advantages:
in a clean, consistent state.
- Once the dump is copied out, the memory that held the dump
is immediately available to the running kernel. And therefore,
unlike kdump, fadump doesn't need a 2nd reboot to get back
unlike kdump, FADump doesn't need a 2nd reboot to get back
the system to the production configuration.
The above can only be accomplished by coordination with,
@ -46,10 +46,9 @@ as follows:
These registered sections of memory are reserved by the first
kernel during early boot.
- When a system crashes, the Power firmware will save
the low memory (boot memory of size larger of 5% of system RAM
or 256MB) of RAM to the previous registered region. It will
also save system registers, and hardware PTE's.
- When system crashes, the Power firmware will copy the registered
low memory regions (boot memory) from source to destination area.
It will also save hardware PTE's.
NOTE:
The term 'boot memory' means size of the low memory chunk
@ -61,9 +60,9 @@ as follows:
the default calculated size. Use this option if default
boot memory size is not sufficient for second kernel to
boot successfully. For syntax of crashkernel= parameter,
refer to Documentation/admin-guide/kdump/kdump.rst. If any offset is
provided in crashkernel= parameter, it will be ignored
as fadump uses a predefined offset to reserve memory
refer to Documentation/admin-guide/kdump/kdump.rst. If any
offset is provided in crashkernel= parameter, it will be
ignored as FADump uses a predefined offset to reserve memory
for boot memory dump preservation in case of a crash.
- After the low memory (boot memory) area has been saved, the
@ -71,13 +70,15 @@ as follows:
*not* clear the RAM. It will then launch the bootloader, as
normal.
- The freshly booted kernel will notice that there is a new
node (ibm,dump-kernel) in the device tree, indicating that
- The freshly booted kernel will notice that there is a new node
(rtas/ibm,kernel-dump on pSeries or ibm,opal/dump/mpipl-boot
on OPAL platform) in the device tree, indicating that
there is crash data available from a previous boot. During
the early boot OS will reserve rest of the memory above
boot memory size effectively booting with restricted memory
size. This will make sure that the second kernel will not
touch any of the dump memory area.
size. This will make sure that this kernel (also, referred
to as second kernel or capture kernel) will not touch any
of the dump memory area.
- User-space tools will read /proc/vmcore to obtain the contents
of memory, which holds the previous crashed kernel dump in ELF
@ -94,8 +95,30 @@ as follows:
# echo 1 > /sys/kernel/fadump_release_mem
Please note that the firmware-assisted dump feature
is only available on Power6 and above systems with recent
firmware versions.
is only available on POWER6 and above systems on pSeries
(PowerVM) platform and POWER9 and above systems with OP940
or later firmware versions on PowerNV (OPAL) platform.
Note that, OPAL firmware exports ibm,opal/dump node when
FADump is supported on PowerNV platform.
On OPAL based machines, system first boots into an intermittent
kernel (referred to as petitboot kernel) before booting into the
capture kernel. This kernel would have minimal kernel and/or
userspace support to process crash data. Such kernel needs to
preserve previously crash'ed kernel's memory for the subsequent
capture kernel boot to process this crash data. Kernel config
option CONFIG_PRESERVE_FA_DUMP has to be enabled on such kernel
to ensure that crash data is preserved to process later.
-- On OPAL based machines (PowerNV), if the kernel is build with
CONFIG_OPAL_CORE=y, OPAL memory at the time of crash is also
exported as /sys/firmware/opal/core file. This procfs file is
helpful in debugging OPAL crashes with GDB. The kernel memory
used for exporting this procfs file can be released by echo'ing
'1' to /sys/kernel/fadump_release_opalcore node.
e.g.
# echo 1 > /sys/kernel/fadump_release_opalcore
Implementation details:
-----------------------
@ -110,72 +133,95 @@ that are run. If there is dump data, then the
/sys/kernel/fadump_release_mem file is created, and the reserved
memory is held.
If there is no waiting dump data, then only the memory required
to hold CPU state, HPTE region, boot memory dump and elfcore
header, is usually reserved at an offset greater than boot memory
size (see Fig. 1). This area is *not* released: this region will
be kept permanently reserved, so that it can act as a receptacle
for a copy of the boot memory content in addition to CPU state
and HPTE region, in the case a crash does occur. Since this reserved
memory area is used only after the system crash, there is no point in
blocking this significant chunk of memory from production kernel.
Hence, the implementation uses the Linux kernel's Contiguous Memory
Allocator (CMA) for memory reservation if CMA is configured for kernel.
With CMA reservation this memory will be available for applications to
use it, while kernel is prevented from using it. With this fadump will
still be able to capture all of the kernel memory and most of the user
space memory except the user pages that were present in CMA region::
If there is no waiting dump data, then only the memory required to
hold CPU state, HPTE region, boot memory dump, FADump header and
elfcore header, is usually reserved at an offset greater than boot
memory size (see Fig. 1). This area is *not* released: this region
will be kept permanently reserved, so that it can act as a receptacle
for a copy of the boot memory content in addition to CPU state and
HPTE region, in the case a crash does occur.
Since this reserved memory area is used only after the system crash,
there is no point in blocking this significant chunk of memory from
production kernel. Hence, the implementation uses the Linux kernel's
Contiguous Memory Allocator (CMA) for memory reservation if CMA is
configured for kernel. With CMA reservation this memory will be
available for applications to use it, while kernel is prevented from
using it. With this FADump will still be able to capture all of the
kernel memory and most of the user space memory except the user pages
that were present in CMA region::
o Memory Reservation during first kernel
Low memory Top of memory
0 boot memory size |
| | |<--Reserved dump area -->| |
V V | Permanent Reservation | V
+-----------+----------/ /---+---+----+-----------+----+------+
| | |CPU|HPTE| DUMP |ELF | |
+-----------+----------/ /---+---+----+-----------+----+------+
| ^
| |
\ /
-------------------------------------------
Boot memory content gets transferred to
reserved area by firmware at the time of
crash
Low memory Top of memory
0 boot memory size |<--- Reserved dump area --->| |
| | | Permanent Reservation | |
V V | | V
+-----------+-----/ /---+---+----+-------+-----+-----+----+--+
| | |///|////| DUMP | HDR | ELF |////| |
+-----------+-----/ /---+---+----+-------+-----+-----+----+--+
| ^ ^ ^ ^ ^
| | | | | |
\ CPU HPTE / | |
------------------------------ | |
Boot memory content gets transferred | |
to reserved area by firmware at the | |
time of crash. | |
FADump Header |
(meta area) |
|
|
Metadata: This area holds a metadata struture whose
address is registered with f/w and retrieved in the
second kernel after crash, on platforms that support
tags (OPAL). Having such structure with info needed
to process the crashdump eases dump capture process.
Fig. 1
o Memory Reservation during second kernel after crash
Low memory Top of memory
0 boot memory size |
| |<------------- Reserved dump area ----------- -->|
V V V
+-----------+----------/ /---+---+----+-----------+----+------+
| | |CPU|HPTE| DUMP |ELF | |
+-----------+----------/ /---+---+----+-----------+----+------+
| |
V V
Used by second /proc/vmcore
Low memory Top of memory
0 boot memory size |
| |<------------ Crash preserved area ------------>|
V V |<--- Reserved dump area --->| |
+-----------+-----/ /---+---+----+-------+-----+-----+----+--+
| | |///|////| DUMP | HDR | ELF |////| |
+-----------+-----/ /---+---+----+-------+-----+-----+----+--+
| |
V V
Used by second /proc/vmcore
kernel to boot
+---+
|///| -> Regions (CPU, HPTE & Metadata) marked like this in the above
+---+ figures are not always present. For example, OPAL platform
does not have CPU & HPTE regions while Metadata region is
not supported on pSeries currently.
Fig. 2
Currently the dump will be copied from /proc/vmcore to a
a new file upon user intervention. The dump data available through
/proc/vmcore will be in ELF format. Hence the existing kdump
infrastructure (kdump scripts) to save the dump works fine with
minor modifications.
Currently the dump will be copied from /proc/vmcore to a new file upon
user intervention. The dump data available through /proc/vmcore will be
in ELF format. Hence the existing kdump infrastructure (kdump scripts)
to save the dump works fine with minor modifications. KDump scripts on
major Distro releases have already been modified to work seemlessly (no
user intervention in saving the dump) when FADump is used, instead of
KDump, as dump mechanism.
The tools to examine the dump will be same as the ones
used for kdump.
How to enable firmware-assisted dump (fadump):
How to enable firmware-assisted dump (FADump):
----------------------------------------------
1. Set config option CONFIG_FA_DUMP=y and build kernel.
2. Boot into linux kernel with 'fadump=on' kernel cmdline option.
By default, fadump reserved memory will be initialized as CMA area.
By default, FADump reserved memory will be initialized as CMA area.
Alternatively, user can boot linux kernel with 'fadump=nocma' to
prevent fadump to use CMA.
prevent FADump to use CMA.
3. Optionally, user can also set 'crashkernel=' kernel cmdline
to specify size of the memory to reserve for boot memory dump
preservation.
@ -201,29 +247,29 @@ the control files and debugfs file to display memory reserved region.
Here is the list of files under kernel sysfs:
/sys/kernel/fadump_enabled
This is used to display the fadump status.
This is used to display the FADump status.
- 0 = fadump is disabled
- 1 = fadump is enabled
- 0 = FADump is disabled
- 1 = FADump is enabled
This interface can be used by kdump init scripts to identify if
fadump is enabled in the kernel and act accordingly.
FADump is enabled in the kernel and act accordingly.
/sys/kernel/fadump_registered
This is used to display the fadump registration status as well
as to control (start/stop) the fadump registration.
This is used to display the FADump registration status as well
as to control (start/stop) the FADump registration.
- 0 = fadump is not registered.
- 1 = fadump is registered and ready to handle system crash.
- 0 = FADump is not registered.
- 1 = FADump is registered and ready to handle system crash.
To register fadump echo 1 > /sys/kernel/fadump_registered and
To register FADump echo 1 > /sys/kernel/fadump_registered and
echo 0 > /sys/kernel/fadump_registered for un-register and stop the
fadump. Once the fadump is un-registered, the system crash will not
FADump. Once the FADump is un-registered, the system crash will not
be handled and vmcore will not be captured. This interface can be
easily integrated with kdump service start/stop.
/sys/kernel/fadump_release_mem
This file is available only when fadump is active during
This file is available only when FADump is active during
second kernel. This is used to release the reserved memory
region that are held for saving crash dump. To release the
reserved memory echo 1 to it::
@ -237,25 +283,38 @@ Here is the list of files under kernel sysfs:
enhanced to use this interface to release the memory reserved for
dump and continue without 2nd reboot.
/sys/kernel/fadump_release_opalcore
This file is available only on OPAL based machines when FADump is
active during capture kernel. This is used to release the memory
used by the kernel to export /sys/firmware/opal/core file. To
release this memory, echo '1' to it:
echo 1 > /sys/kernel/fadump_release_opalcore
Here is the list of files under powerpc debugfs:
(Assuming debugfs is mounted on /sys/kernel/debug directory.)
/sys/kernel/debug/powerpc/fadump_region
This file shows the reserved memory regions if fadump is
This file shows the reserved memory regions if FADump is
enabled otherwise this file is empty. The output format
is::
<region>: [<start>-<end>] <reserved-size> bytes, Dumped: <dump-size>
and for kernel DUMP region is:
DUMP: Src: <src-addr>, Dest: <dest-addr>, Size: <size>, Dumped: # bytes
e.g.
Contents when fadump is registered during first kernel::
Contents when FADump is registered during first kernel::
# cat /sys/kernel/debug/powerpc/fadump_region
CPU : [0x0000006ffb0000-0x0000006fff001f] 0x40020 bytes, Dumped: 0x0
HPTE: [0x0000006fff0020-0x0000006fff101f] 0x1000 bytes, Dumped: 0x0
DUMP: [0x0000006fff1020-0x0000007fff101f] 0x10000000 bytes, Dumped: 0x0
Contents when fadump is active during second kernel::
Contents when FADump is active during second kernel::
# cat /sys/kernel/debug/powerpc/fadump_region
CPU : [0x0000006ffb0000-0x0000006fff001f] 0x40020 bytes, Dumped: 0x40020
@ -263,6 +322,7 @@ Here is the list of files under powerpc debugfs:
DUMP: [0x0000006fff1020-0x0000007fff101f] 0x10000000 bytes, Dumped: 0x10000000
: [0x00000010000000-0x0000006ffaffff] 0x5ffb0000 bytes, Dumped: 0x5ffb0000
NOTE:
Please refer to Documentation/filesystems/debugfs.txt on
how to mount the debugfs filesystem.
@ -273,7 +333,7 @@ TODO:
- Need to come up with the better approach to find out more
accurate boot memory size that is required for a kernel to
boot successfully when booted with restricted memory.
- The fadump implementation introduces a fadump crash info structure
- The FADump implementation introduces a FADump crash info structure
in the scratch area before the ELF core header. The idea of introducing
this structure is to pass some important crash info data to the second
kernel which will help second kernel to populate ELF core header with

View File

@ -15,6 +15,7 @@ powerpc
dawr-power9
dscr
eeh-pci-error-recovery
elfnote
firmware-assisted-dump
hvcs
isa-versions
@ -25,6 +26,7 @@ powerpc
qe_firmware
syscall64-abi
transactional_memory
ultravisor
.. only:: subproject and html

File diff suppressed because it is too large Load Diff

View File

@ -946,6 +946,9 @@ config RELR
well as compatible NM and OBJCOPY utilities (llvm-nm and llvm-objcopy
are compatible).
config ARCH_HAS_MEM_ENCRYPT
bool
source "kernel/gcov/Kconfig"
source "scripts/gcc-plugins/Kconfig"

View File

@ -128,14 +128,15 @@ config PPC
select ARCH_HAS_HUGEPD if HUGETLB_PAGE
select ARCH_HAS_MMIOWB if PPC64
select ARCH_HAS_PHYS_TO_DMA
select ARCH_HAS_PMEM_API if PPC64
select ARCH_HAS_PMEM_API
select ARCH_HAS_PTE_DEVMAP if PPC_BOOK3S_64
select ARCH_HAS_PTE_SPECIAL
select ARCH_HAS_MEMBARRIER_CALLBACKS
select ARCH_HAS_SCALED_CPUTIME if VIRT_CPU_ACCOUNTING_NATIVE && PPC64
select ARCH_HAS_SCALED_CPUTIME if VIRT_CPU_ACCOUNTING_NATIVE && PPC_BOOK3S_64
select ARCH_HAS_STRICT_KERNEL_RWX if ((PPC_BOOK3S_64 || PPC32) && !RELOCATABLE && !HIBERNATION)
select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST
select ARCH_HAS_UACCESS_FLUSHCACHE if PPC64
select ARCH_HAS_UACCESS_FLUSHCACHE
select ARCH_HAS_UACCESS_MCSAFE if PPC64
select ARCH_HAS_UBSAN_SANITIZE_ALL
select ARCH_HAVE_NMI_SAFE_CMPXCHG
select ARCH_KEEP_MEMBLOCK
@ -183,6 +184,7 @@ config PPC
select HAVE_STACKPROTECTOR if PPC64 && $(cc-option,-mstack-protector-guard=tls -mstack-protector-guard-reg=r13)
select HAVE_STACKPROTECTOR if PPC32 && $(cc-option,-mstack-protector-guard=tls -mstack-protector-guard-reg=r2)
select HAVE_CONTEXT_TRACKING if PPC64
select HAVE_COPY_THREAD_TLS
select HAVE_DEBUG_KMEMLEAK
select HAVE_DEBUG_STACKOVERFLOW
select HAVE_DYNAMIC_FTRACE
@ -568,7 +570,7 @@ config CRASH_DUMP
config FA_DUMP
bool "Firmware-assisted dump"
depends on PPC64 && PPC_RTAS
depends on PPC64 && (PPC_RTAS || PPC_POWERNV)
select CRASH_CORE
select CRASH_DUMP
help
@ -579,7 +581,26 @@ config FA_DUMP
is meant to be a kdump replacement offering robustness and
speed not possible without system firmware assistance.
If unsure, say "N"
If unsure, say "y". Only special kernels like petitboot may
need to say "N" here.
config PRESERVE_FA_DUMP
bool "Preserve Firmware-assisted dump"
depends on PPC64 && PPC_POWERNV && !FA_DUMP
help
On a kernel with FA_DUMP disabled, this option helps to preserve
crash data from a previously crash'ed kernel. Useful when the next
memory preserving kernel boot would process this crash data.
Petitboot kernel is the typical usecase for this option.
config OPAL_CORE
bool "Export OPAL memory as /sys/firmware/opal/core"
depends on PPC64 && PPC_POWERNV
help
This option uses the MPIPL support in firmware to provide an
ELF core of OPAL memory after a crash. The ELF core is exported
as /sys/firmware/opal/core file which is helpful in debugging
OPAL crashes using GDB.
config IRQ_ALL_CPUS
bool "Distribute interrupts on all CPUs by default"
@ -1140,18 +1161,6 @@ config TASK_SIZE
default "0x80000000" if PPC_8xx
default "0xc0000000"
config CONSISTENT_SIZE_BOOL
bool "Set custom consistent memory pool size"
depends on ADVANCED_OPTIONS && NOT_COHERENT_CACHE
help
This option allows you to set the size of the
consistent memory pool. This pool of virtual memory
is used to make consistent memory allocations.
config CONSISTENT_SIZE
hex "Size of consistent memory pool" if CONSISTENT_SIZE_BOOL
default "0x00200000" if NOT_COHERENT_CACHE
config PIN_TLB
bool "Pinned Kernel TLBs (860 ONLY)"
depends on ADVANCED_OPTIONS && PPC_8xx && \

View File

@ -110,7 +110,6 @@ ifeq ($(HAS_BIARCH),y)
KBUILD_CFLAGS += -m$(BITS)
KBUILD_AFLAGS += -m$(BITS) -Wl,-a$(BITS)
KBUILD_LDFLAGS += -m elf$(BITS)$(LDEMULATION)
KBUILD_ARFLAGS += --target=elf$(BITS)-$(GNUTARGET)
endif
cflags-$(CONFIG_STACKPROTECTOR) += -mstack-protector-guard=tls

View File

@ -146,6 +146,46 @@ static struct addr_range prep_initrd(struct addr_range vmlinux, void *chosen,
return (struct addr_range){(void *)initrd_addr, initrd_size};
}
#ifdef __powerpc64__
static void prep_esm_blob(struct addr_range vmlinux, void *chosen)
{
unsigned long esm_blob_addr, esm_blob_size;
/* Do we have an ESM (Enter Secure Mode) blob? */
if (_esm_blob_end <= _esm_blob_start)
return;
printf("Attached ESM blob at 0x%p-0x%p\n\r",
_esm_blob_start, _esm_blob_end);
esm_blob_addr = (unsigned long)_esm_blob_start;
esm_blob_size = _esm_blob_end - _esm_blob_start;
/*
* If the ESM blob is too low it will be clobbered when the
* kernel relocates to its final location. In this case,
* allocate a safer place and move it.
*/
if (esm_blob_addr < vmlinux.size) {
void *old_addr = (void *)esm_blob_addr;
printf("Allocating 0x%lx bytes for esm_blob ...\n\r",
esm_blob_size);
esm_blob_addr = (unsigned long)malloc(esm_blob_size);
if (!esm_blob_addr)
fatal("Can't allocate memory for ESM blob !\n\r");
printf("Relocating ESM blob 0x%lx <- 0x%p (0x%lx bytes)\n\r",
esm_blob_addr, old_addr, esm_blob_size);
memmove((void *)esm_blob_addr, old_addr, esm_blob_size);
}
/* Tell the kernel ESM blob address via device tree. */
setprop_val(chosen, "linux,esm-blob-start", (u32)(esm_blob_addr));
setprop_val(chosen, "linux,esm-blob-end", (u32)(esm_blob_addr + esm_blob_size));
}
#else
static inline void prep_esm_blob(struct addr_range vmlinux, void *chosen) { }
#endif
/* A buffer that may be edited by tools operating on a zImage binary so as to
* edit the command line passed to vmlinux (by setting /chosen/bootargs).
* The buffer is put in it's own section so that tools may locate it easier.
@ -214,6 +254,7 @@ void start(void)
vmlinux = prep_kernel();
initrd = prep_initrd(vmlinux, chosen,
loader_info.initrd_addr, loader_info.initrd_size);
prep_esm_blob(vmlinux, chosen);
prep_cmdline(chosen);
printf("Finalizing device tree...");

View File

@ -251,6 +251,8 @@ extern char _initrd_start[];
extern char _initrd_end[];
extern char _dtb_start[];
extern char _dtb_end[];
extern char _esm_blob_start[];
extern char _esm_blob_end[];
static inline __attribute__((const))
int __ilog2_u32(u32 n)

View File

@ -13,6 +13,7 @@
# -i initrd specify initrd file
# -d devtree specify device-tree blob
# -s tree.dts specify device-tree source file (needs dtc installed)
# -e esm_blob specify ESM blob for secure images
# -c cache $kernel.strip.gz (use if present & newer, else make)
# -C prefix specify command prefix for cross-building tools
# (strip, objcopy, ld)
@ -37,6 +38,7 @@ platform=of
initrd=
dtb=
dts=
esm_blob=
cacheit=
binary=
compression=.gz
@ -60,9 +62,9 @@ tmpdir=.
usage() {
echo 'Usage: wrapper [-o output] [-p platform] [-i initrd]' >&2
echo ' [-d devtree] [-s tree.dts] [-c] [-C cross-prefix]' >&2
echo ' [-D datadir] [-W workingdir] [-Z (gz|xz|none)]' >&2
echo ' [--no-compression] [vmlinux]' >&2
echo ' [-d devtree] [-s tree.dts] [-e esm_blob]' >&2
echo ' [-c] [-C cross-prefix] [-D datadir] [-W workingdir]' >&2
echo ' [-Z (gz|xz|none)] [--no-compression] [vmlinux]' >&2
exit 1
}
@ -105,6 +107,11 @@ while [ "$#" -gt 0 ]; do
[ "$#" -gt 0 ] || usage
dtb="$1"
;;
-e)
shift
[ "$#" -gt 0 ] || usage
esm_blob="$1"
;;
-s)
shift
[ "$#" -gt 0 ] || usage
@ -218,9 +225,16 @@ objflags=-S
tmp=$tmpdir/zImage.$$.o
ksection=.kernel:vmlinux.strip
isection=.kernel:initrd
esection=.kernel:esm_blob
link_address='0x400000'
make_space=y
if [ -n "$esm_blob" -a "$platform" != "pseries" ]; then
echo "ESM blob not support on non-pseries platforms" >&2
exit 1
fi
case "$platform" in
of)
platformo="$object/of.o $object/epapr.o"
@ -477,6 +491,10 @@ if [ -n "$dtb" ]; then
fi
fi
if [ -n "$esm_blob" ]; then
addsec $tmp "$esm_blob" $esection
fi
if [ "$platform" != "miboot" ]; then
if [ -n "$link_address" ] ; then
text_start="-Ttext $link_address"

View File

@ -68,6 +68,14 @@ SECTIONS
_initrd_end = .;
}
. = ALIGN(4096);
.kernel:esm_blob :
{
_esm_blob_start = .;
*(.kernel:esm_blob)
_esm_blob_end = .;
}
#ifdef CONFIG_PPC64_BOOT_WRAPPER
. = ALIGN(256);
.got :

View File

@ -20,7 +20,6 @@ CONFIG_CPU_FREQ=y
CONFIG_CPU_FREQ_GOV_POWERSAVE=y
CONFIG_CPU_FREQ_GOV_USERSPACE=y
CONFIG_CPU_FREQ_PMAC=y
CONFIG_PPC601_SYNC_FIX=y
CONFIG_GEN_RTC=y
CONFIG_HIGHMEM=y
CONFIG_BINFMT_MISC=m

View File

@ -38,7 +38,7 @@ CONFIG_MODULE_UNLOAD=y
CONFIG_MODVERSIONS=y
CONFIG_MODULE_SRCVERSION_ALL=y
CONFIG_PARTITION_ADVANCED=y
CONFIG_SCOM_DEBUGFS=y
# CONFIG_SCOM_DEBUGFS is not set
CONFIG_OPAL_PRD=y
CONFIG_PPC_MEMTRACE=y
# CONFIG_PPC_PSERIES is not set

View File

@ -84,4 +84,3 @@ CONFIG_CRYPTO_ECB=y
CONFIG_CRYPTO_PCBC=y
CONFIG_CRYPTO_MD5=y
CONFIG_CRYPTO_DES=y
CONFIG_PPC4xx_OCM=y

View File

@ -29,6 +29,7 @@ CONFIG_DTL=y
CONFIG_SCANLOG=m
CONFIG_PPC_SMLPAR=y
CONFIG_IBMEBUS=y
CONFIG_PPC_SVM=y
CONFIG_PPC_MAPLE=y
CONFIG_PPC_PASEMI=y
CONFIG_PPC_PASEMI_IOMMU=y

View File

@ -42,6 +42,7 @@ CONFIG_DTL=y
CONFIG_SCANLOG=m
CONFIG_PPC_SMLPAR=y
CONFIG_IBMEBUS=y
CONFIG_PPC_SVM=y
# CONFIG_PPC_PMAC is not set
CONFIG_RTAS_FLASH=m
CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND=y

View File

@ -213,6 +213,7 @@ CONFIG_IPMI_WATCHDOG=y
CONFIG_HW_RANDOM=y
CONFIG_TCG_TPM=y
CONFIG_TCG_TIS_I2C_NUVOTON=y
# CONFIG_DEVPORT is not set
CONFIG_I2C=y
# CONFIG_I2C_COMPAT is not set
CONFIG_I2C_CHARDEV=y

View File

@ -15,6 +15,7 @@
#include <asm/epapr_hcalls.h>
#include <asm/dcr.h>
#include <asm/mmu_context.h>
#include <asm/ultravisor-api.h>
#include <uapi/asm/ucontext.h>
@ -34,6 +35,16 @@ extern struct static_key hcall_tracepoint_key;
void __trace_hcall_entry(unsigned long opcode, unsigned long *args);
void __trace_hcall_exit(long opcode, long retval, unsigned long *retbuf);
/* Ultravisor */
#if defined(CONFIG_PPC_POWERNV) || defined(CONFIG_PPC_SVM)
long ucall_norets(unsigned long opcode, ...);
#else
static inline long ucall_norets(unsigned long opcode, ...)
{
return U_NOT_AVAILABLE;
}
#endif
/* OPAL */
int64_t __opal_call(int64_t a0, int64_t a1, int64_t a2, int64_t a3,
int64_t a4, int64_t a5, int64_t a6, int64_t a7,
@ -123,7 +134,8 @@ extern int __ucmpdi2(u64, u64);
/* tracing */
void _mcount(void);
unsigned long prepare_ftrace_return(unsigned long parent, unsigned long ip);
unsigned long prepare_ftrace_return(unsigned long parent, unsigned long ip,
unsigned long sp);
void pnv_power9_force_smt4_catch(void);
void pnv_power9_force_smt4_release(void);

View File

@ -148,23 +148,21 @@ int map_kernel_page(unsigned long va, phys_addr_t pa, pgprot_t prot);
*/
#include <asm/fixmap.h>
#ifdef CONFIG_HIGHMEM
#define KVIRT_TOP PKMAP_BASE
#else
#define KVIRT_TOP FIXADDR_START
#endif
/*
* ioremap_bot starts at that address. Early ioremaps move down from there,
* until mem_init() at which point this becomes the top of the vmalloc
* and ioremap space
*/
#ifdef CONFIG_NOT_COHERENT_CACHE
#define IOREMAP_TOP ((KVIRT_TOP - CONFIG_CONSISTENT_SIZE) & PAGE_MASK)
#ifdef CONFIG_HIGHMEM
#define IOREMAP_TOP PKMAP_BASE
#else
#define IOREMAP_TOP KVIRT_TOP
#define IOREMAP_TOP FIXADDR_START
#endif
/* PPC32 shares vmalloc area with ioremap */
#define IOREMAP_START VMALLOC_START
#define IOREMAP_END VMALLOC_END
/*
* Just any arbitrary offset to the start of the vmalloc VM area: the
* current 16MB value just means that there will be a 64MB "hole" after the
@ -201,8 +199,6 @@ int map_kernel_page(unsigned long va, phys_addr_t pa, pgprot_t prot);
#include <linux/sched.h>
#include <linux/threads.h>
extern unsigned long ioremap_bot;
/* Bits to mask out from a PGD to get to the PUD page */
#define PGD_MASKED_BITS 0

View File

@ -206,7 +206,6 @@ extern int mmu_io_psize;
void mmu_early_init_devtree(void);
void hash__early_init_devtree(void);
void radix__early_init_devtree(void);
extern void radix_init_native(void);
extern void hash__early_init_mmu(void);
extern void radix__early_init_mmu(void);
static inline void early_init_mmu(void)
@ -238,9 +237,6 @@ static inline void setup_initial_memory_limit(phys_addr_t first_memblock_base,
first_memblock_size);
}
extern int (*register_process_table)(unsigned long base, unsigned long page_size,
unsigned long tbl_size);
#ifdef CONFIG_PPC_PSERIES
extern void radix_init_pseries(void);
#else

View File

@ -289,7 +289,6 @@ extern unsigned long __kernel_io_end;
#define KERN_IO_END __kernel_io_end
extern struct page *vmemmap;
extern unsigned long ioremap_bot;
extern unsigned long pci_io_base;
#endif /* __ASSEMBLY__ */
@ -317,6 +316,7 @@ extern unsigned long pci_io_base;
#define PHB_IO_BASE (ISA_IO_END)
#define PHB_IO_END (KERN_IO_START + FULL_IO_SIZE)
#define IOREMAP_BASE (PHB_IO_END)
#define IOREMAP_START (ioremap_bot)
#define IOREMAP_END (KERN_IO_END)
/* Advertise special mapping type for AGP */
@ -608,8 +608,10 @@ static inline bool pte_access_permitted(pte_t pte, bool write)
*/
static inline pte_t pfn_pte(unsigned long pfn, pgprot_t pgprot)
{
return __pte((((pte_basic_t)(pfn) << PAGE_SHIFT) & PTE_RPN_MASK) |
pgprot_val(pgprot));
VM_BUG_ON(pfn >> (64 - PAGE_SHIFT));
VM_BUG_ON((pfn << PAGE_SHIFT) & ~PTE_RPN_MASK);
return __pte(((pte_basic_t)pfn << PAGE_SHIFT) | pgprot_val(pgprot));
}
static inline unsigned long pte_pfn(pte_t pte)

View File

@ -266,9 +266,6 @@ extern void radix__vmemmap_remove_mapping(unsigned long start,
extern int radix__map_kernel_page(unsigned long ea, unsigned long pa,
pgprot_t flags, unsigned int psz);
extern int radix__ioremap_range(unsigned long ea, phys_addr_t pa,
unsigned long size, pgprot_t prot, int nid);
static inline unsigned long radix__get_tree_size(void)
{
unsigned long rts_field;

View File

@ -17,8 +17,8 @@ extern void radix__flush_tlb_lpid_page(unsigned int lpid,
unsigned long addr,
unsigned long page_size);
extern void radix__flush_pwc_lpid(unsigned int lpid);
extern void radix__flush_tlb_lpid(unsigned int lpid);
extern void radix__local_flush_tlb_lpid_guest(unsigned int lpid);
extern void radix__flush_all_lpid(unsigned int lpid);
extern void radix__flush_all_lpid_guest(unsigned int lpid);
#else
static inline void radix__tlbiel_all(unsigned int action) { WARN_ON(1); };
static inline void radix__flush_tlb_lpid_page(unsigned int lpid,
@ -31,11 +31,7 @@ static inline void radix__flush_pwc_lpid(unsigned int lpid)
{
WARN_ON(1);
}
static inline void radix__flush_tlb_lpid(unsigned int lpid)
{
WARN_ON(1);
}
static inline void radix__local_flush_tlb_lpid_guest(unsigned int lpid)
static inline void radix__flush_all_lpid(unsigned int lpid)
{
WARN_ON(1);
}
@ -73,6 +69,4 @@ extern void radix__flush_tlb_pwc(struct mmu_gather *tlb, unsigned long addr);
extern void radix__flush_tlb_collapsed_pmd(struct mm_struct *mm, unsigned long addr);
extern void radix__flush_tlb_all(void);
extern void radix__local_flush_tlb_lpid(unsigned int lpid);
#endif

View File

@ -162,4 +162,13 @@ static inline void flush_tlb_pgtable(struct mmu_gather *tlb, unsigned long addre
radix__flush_tlb_pwc(tlb, address);
}
extern bool tlbie_capable;
extern bool tlbie_enabled;
static inline bool cputlb_use_tlbie(void)
{
return tlbie_enabled;
}
#endif /* _ASM_POWERPC_BOOK3S_64_TLBFLUSH_H */

View File

@ -26,5 +26,16 @@ extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
unsigned long size, pgprot_t vma_prot);
#define __HAVE_PHYS_MEM_ACCESS_PROT
/*
* This gets called at the end of handling a page fault, when
* the kernel has put a new PTE into the page table for the process.
* We use it to ensure coherency between the i-cache and d-cache
* for the page which has just been mapped in.
* On machines which use an MMU hash table, we use this to put a
* corresponding HPTE into the hash table ahead of time, instead of
* waiting for the inevitable extra hash-table miss exception.
*/
void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t *ptep);
#endif /* __ASSEMBLY__ */
#endif

View File

@ -5,14 +5,6 @@
#include <asm/asm-compat.h>
/*
* Define an illegal instr to trap on the bug.
* We don't use 0 because that marks the end of a function
* in the ELF ABI. That's "Boo Boo" in case you wonder...
*/
#define BUG_OPCODE .long 0x00b00b00 /* For asm */
#define BUG_ILLEGAL_INSTR "0x00b00b00" /* For BUG macro */
#ifdef CONFIG_BUG
#ifdef __ASSEMBLY__

View File

@ -145,12 +145,10 @@ static inline void cpu_feature_keys_init(void) { }
/* Definitions for features that only exist on 32-bit chips */
#ifdef CONFIG_PPC32
#define CPU_FTR_601 ASM_CONST(0x00001000)
#define CPU_FTR_L2CR ASM_CONST(0x00002000)
#define CPU_FTR_SPEC7450 ASM_CONST(0x00004000)
#define CPU_FTR_TAU ASM_CONST(0x00008000)
#define CPU_FTR_CAN_DOZE ASM_CONST(0x00010000)
#define CPU_FTR_USE_RTC ASM_CONST(0x00020000)
#define CPU_FTR_L3CR ASM_CONST(0x00040000)
#define CPU_FTR_L3_DISABLE_NAP ASM_CONST(0x00080000)
#define CPU_FTR_NAP_DISABLE_L2_PR ASM_CONST(0x00100000)
@ -160,14 +158,12 @@ static inline void cpu_feature_keys_init(void) { }
#define CPU_FTR_NEED_COHERENT ASM_CONST(0x01000000)
#define CPU_FTR_NO_BTIC ASM_CONST(0x02000000)
#define CPU_FTR_PPC_LE ASM_CONST(0x04000000)
#define CPU_FTR_UNIFIED_ID_CACHE ASM_CONST(0x08000000)
#define CPU_FTR_SPE ASM_CONST(0x10000000)
#define CPU_FTR_NEED_PAIRED_STWCX ASM_CONST(0x20000000)
#define CPU_FTR_INDEXED_DCR ASM_CONST(0x40000000)
#else /* CONFIG_PPC32 */
/* Define these to 0 for the sake of tests in common code */
#define CPU_FTR_601 (0)
#define CPU_FTR_PPC_LE (0)
#endif
@ -294,8 +290,8 @@ static inline void cpu_feature_keys_init(void) { }
#define CPU_FTR_MAYBE_CAN_NAP 0
#endif
#define CPU_FTRS_PPC601 (CPU_FTR_COMMON | CPU_FTR_601 | \
CPU_FTR_COHERENT_ICACHE | CPU_FTR_UNIFIED_ID_CACHE | CPU_FTR_USE_RTC)
#define CPU_FTRS_PPC601 (CPU_FTR_COMMON | \
CPU_FTR_COHERENT_ICACHE)
#define CPU_FTRS_603 (CPU_FTR_COMMON | CPU_FTR_MAYBE_CAN_DOZE | \
CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_PPC_LE | CPU_FTR_NOEXECUTE)
#define CPU_FTRS_604 (CPU_FTR_COMMON | CPU_FTR_PPC_LE)
@ -386,7 +382,7 @@ static inline void cpu_feature_keys_init(void) { }
#define CPU_FTRS_47X (CPU_FTRS_440x6)
#define CPU_FTRS_E200 (CPU_FTR_SPE_COMP | \
CPU_FTR_NODSISRALIGN | CPU_FTR_COHERENT_ICACHE | \
CPU_FTR_UNIFIED_ID_CACHE | CPU_FTR_NOEXECUTE | \
CPU_FTR_NOEXECUTE | \
CPU_FTR_DEBUG_LVL_EXC)
#define CPU_FTRS_E500 (CPU_FTR_MAYBE_CAN_DOZE | \
CPU_FTR_SPE_COMP | CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_NODSISRALIGN | \
@ -498,7 +494,9 @@ static inline void cpu_feature_keys_init(void) { }
#else
enum {
CPU_FTRS_POSSIBLE =
#ifdef CONFIG_PPC_BOOK3S_32
#ifdef CONFIG_PPC_BOOK3S_601
CPU_FTRS_PPC601 |
#elif defined(CONFIG_PPC_BOOK3S_32)
CPU_FTRS_PPC601 | CPU_FTRS_603 | CPU_FTRS_604 | CPU_FTRS_740_NOTAU |
CPU_FTRS_740 | CPU_FTRS_750 | CPU_FTRS_750FX1 |
CPU_FTRS_750FX2 | CPU_FTRS_750FX | CPU_FTRS_750GX |
@ -574,8 +572,10 @@ enum {
#else
enum {
CPU_FTRS_ALWAYS =
#ifdef CONFIG_PPC_BOOK3S_32
CPU_FTRS_PPC601 & CPU_FTRS_603 & CPU_FTRS_604 & CPU_FTRS_740_NOTAU &
#ifdef CONFIG_PPC_BOOK3S_601
CPU_FTRS_PPC601 &
#elif defined(CONFIG_PPC_BOOK3S_32)
CPU_FTRS_603 & CPU_FTRS_604 & CPU_FTRS_740_NOTAU &
CPU_FTRS_740 & CPU_FTRS_750 & CPU_FTRS_750FX1 &
CPU_FTRS_750FX2 & CPU_FTRS_750FX & CPU_FTRS_750GX &
CPU_FTRS_7400_NOTAU & CPU_FTRS_7400 & CPU_FTRS_7450_20 &

View File

@ -16,7 +16,8 @@ static inline struct task_struct *get_current(void)
{
struct task_struct *task;
__asm__ __volatile__("ld %0,%1(13)"
/* get_current can be cached by the compiler, so no volatile */
asm ("ld %0,%1(13)"
: "=r" (task)
: "i" (offsetof(struct paca_struct, __current)));

View File

@ -88,6 +88,19 @@ struct eeh_pe {
struct list_head child_list; /* List of PEs below this PE */
struct list_head child; /* Memb. child_list/eeh_phb_pe */
struct list_head edevs; /* List of eeh_dev in this PE */
#ifdef CONFIG_STACKTRACE
/*
* Saved stack trace. When we find a PE freeze in eeh_dev_check_failure
* the stack trace is saved here so we can print it in the recovery
* thread if it turns out to due to a real problem rather than
* a hot-remove.
*
* A max of 64 entries might be overkill, but it also might not be.
*/
unsigned long stack_trace[64];
int trace_entries;
#endif /* CONFIG_STACKTRACE */
};
#define eeh_pe_for_each_dev(pe, edev, tmp) \
@ -121,6 +134,8 @@ static inline bool eeh_pe_passed(struct eeh_pe *pe)
struct eeh_dev {
int mode; /* EEH mode */
int class_code; /* Class code of the device */
int bdfn; /* bdfn of device (for cfg ops) */
struct pci_controller *controller;
int pe_config_addr; /* PE config address */
u32 config_space[16]; /* Saved PCI config space */
int pcix_cap; /* Saved PCIx capability */
@ -136,6 +151,17 @@ struct eeh_dev {
struct pci_dev *physfn; /* Associated SRIOV PF */
};
/* "fmt" must be a simple literal string */
#define EEH_EDEV_PRINT(level, edev, fmt, ...) \
pr_##level("PCI %04x:%02x:%02x.%x#%04x: EEH: " fmt, \
(edev)->controller->global_number, PCI_BUSNO((edev)->bdfn), \
PCI_SLOT((edev)->bdfn), PCI_FUNC((edev)->bdfn), \
((edev)->pe ? (edev)->pe_config_addr : 0xffff), ##__VA_ARGS__)
#define eeh_edev_dbg(edev, fmt, ...) EEH_EDEV_PRINT(debug, (edev), fmt, ##__VA_ARGS__)
#define eeh_edev_info(edev, fmt, ...) EEH_EDEV_PRINT(info, (edev), fmt, ##__VA_ARGS__)
#define eeh_edev_warn(edev, fmt, ...) EEH_EDEV_PRINT(warn, (edev), fmt, ##__VA_ARGS__)
#define eeh_edev_err(edev, fmt, ...) EEH_EDEV_PRINT(err, (edev), fmt, ##__VA_ARGS__)
static inline struct pci_dn *eeh_dev_to_pdn(struct eeh_dev *edev)
{
return edev ? edev->pdn : NULL;
@ -247,7 +273,7 @@ static inline bool eeh_state_active(int state)
== (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE);
}
typedef void *(*eeh_edev_traverse_func)(struct eeh_dev *edev, void *flag);
typedef void (*eeh_edev_traverse_func)(struct eeh_dev *edev, void *flag);
typedef void *(*eeh_pe_traverse_func)(struct eeh_pe *pe, void *flag);
void eeh_set_pe_aux_size(int size);
int eeh_phb_pe_create(struct pci_controller *phb);
@ -261,20 +287,20 @@ int eeh_rmv_from_parent_pe(struct eeh_dev *edev);
void eeh_pe_update_time_stamp(struct eeh_pe *pe);
void *eeh_pe_traverse(struct eeh_pe *root,
eeh_pe_traverse_func fn, void *flag);
void *eeh_pe_dev_traverse(struct eeh_pe *root,
eeh_edev_traverse_func fn, void *flag);
void eeh_pe_dev_traverse(struct eeh_pe *root,
eeh_edev_traverse_func fn, void *flag);
void eeh_pe_restore_bars(struct eeh_pe *pe);
const char *eeh_pe_loc_get(struct eeh_pe *pe);
struct pci_bus *eeh_pe_bus_get(struct eeh_pe *pe);
struct eeh_dev *eeh_dev_init(struct pci_dn *pdn);
void eeh_dev_phb_init_dynamic(struct pci_controller *phb);
void eeh_probe_devices(void);
void eeh_show_enabled(void);
int __init eeh_ops_register(struct eeh_ops *ops);
int __exit eeh_ops_unregister(const char *name);
int eeh_check_failure(const volatile void __iomem *token);
int eeh_dev_check_failure(struct eeh_dev *edev);
void eeh_addr_cache_build(void);
void eeh_addr_cache_init(void);
void eeh_add_device_early(struct pci_dn *);
void eeh_add_device_tree_early(struct pci_dn *);
void eeh_add_device_late(struct pci_dev *);
@ -316,7 +342,7 @@ static inline bool eeh_enabled(void)
return false;
}
static inline void eeh_probe_devices(void) { }
static inline void eeh_show_enabled(void) { }
static inline void *eeh_dev_init(struct pci_dn *pdn, void *data)
{
@ -332,7 +358,7 @@ static inline int eeh_check_failure(const volatile void __iomem *token)
#define eeh_dev_check_failure(x) (0)
static inline void eeh_addr_cache_build(void) { }
static inline void eeh_addr_cache_init(void) { }
static inline void eeh_add_device_early(struct pci_dn *pdn) { }

View File

@ -0,0 +1,24 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
* PowerPC ELF notes.
*
* Copyright 2019, IBM Corporation
*/
#ifndef __ASM_POWERPC_ELFNOTE_H__
#define __ASM_POWERPC_ELFNOTE_H__
/*
* These note types should live in a SHT_NOTE segment and have
* "PowerPC" in the name field.
*/
/*
* The capabilities supported/required by this kernel (bitmap).
*
* This type uses a bitmap as "desc" field. Each bit is described
* in arch/powerpc/kernel/note.S
*/
#define PPC_ELFNOTE_CAPABILITIES 1
#endif /* __ASM_POWERPC_ELFNOTE_H__ */

View File

@ -0,0 +1,169 @@
/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* Firmware-Assisted Dump internal code.
*
* Copyright 2011, Mahesh Salgaonkar, IBM Corporation.
* Copyright 2019, Hari Bathini, IBM Corporation.
*/
#ifndef _ASM_POWERPC_FADUMP_INTERNAL_H
#define _ASM_POWERPC_FADUMP_INTERNAL_H
/* Maximum number of memory regions kernel supports */
#define FADUMP_MAX_MEM_REGS 128
#ifndef CONFIG_PRESERVE_FA_DUMP
/* The upper limit percentage for user specified boot memory size (25%) */
#define MAX_BOOT_MEM_RATIO 4
#define memblock_num_regions(memblock_type) (memblock.memblock_type.cnt)
/* Alignment per CMA requirement. */
#define FADUMP_CMA_ALIGNMENT (PAGE_SIZE << \
max_t(unsigned long, MAX_ORDER - 1, \
pageblock_order))
/* FAD commands */
#define FADUMP_REGISTER 1
#define FADUMP_UNREGISTER 2
#define FADUMP_INVALIDATE 3
/*
* Copy the ascii values for first 8 characters from a string into u64
* variable at their respective indexes.
* e.g.
* The string "FADMPINF" will be converted into 0x4641444d50494e46
*/
static inline u64 fadump_str_to_u64(const char *str)
{
u64 val = 0;
int i;
for (i = 0; i < sizeof(val); i++)
val = (*str) ? (val << 8) | *str++ : val << 8;
return val;
}
#define FADUMP_CPU_UNKNOWN (~((u32)0))
#define FADUMP_CRASH_INFO_MAGIC fadump_str_to_u64("FADMPINF")
/* fadump crash info structure */
struct fadump_crash_info_header {
u64 magic_number;
u64 elfcorehdr_addr;
u32 crashing_cpu;
struct pt_regs regs;
struct cpumask online_mask;
};
struct fadump_memory_range {
u64 base;
u64 size;
};
/* fadump memory ranges info */
struct fadump_mrange_info {
char name[16];
struct fadump_memory_range *mem_ranges;
u32 mem_ranges_sz;
u32 mem_range_cnt;
u32 max_mem_ranges;
};
/* Platform specific callback functions */
struct fadump_ops;
/* Firmware-assisted dump configuration details. */
struct fw_dump {
unsigned long reserve_dump_area_start;
unsigned long reserve_dump_area_size;
/* cmd line option during boot */
unsigned long reserve_bootvar;
unsigned long cpu_state_data_size;
u64 cpu_state_dest_vaddr;
u32 cpu_state_data_version;
u32 cpu_state_entry_size;
unsigned long hpte_region_size;
unsigned long boot_memory_size;
u64 boot_mem_dest_addr;
u64 boot_mem_addr[FADUMP_MAX_MEM_REGS];
u64 boot_mem_sz[FADUMP_MAX_MEM_REGS];
u64 boot_mem_top;
u64 boot_mem_regs_cnt;
unsigned long fadumphdr_addr;
unsigned long cpu_notes_buf_vaddr;
unsigned long cpu_notes_buf_size;
/*
* Maximum size supported by firmware to copy from source to
* destination address per entry.
*/
u64 max_copy_size;
u64 kernel_metadata;
int ibm_configure_kernel_dump;
unsigned long fadump_enabled:1;
unsigned long fadump_supported:1;
unsigned long dump_active:1;
unsigned long dump_registered:1;
unsigned long nocma:1;
struct fadump_ops *ops;
};
struct fadump_ops {
u64 (*fadump_init_mem_struct)(struct fw_dump *fadump_conf);
u64 (*fadump_get_metadata_size)(void);
int (*fadump_setup_metadata)(struct fw_dump *fadump_conf);
u64 (*fadump_get_bootmem_min)(void);
int (*fadump_register)(struct fw_dump *fadump_conf);
int (*fadump_unregister)(struct fw_dump *fadump_conf);
int (*fadump_invalidate)(struct fw_dump *fadump_conf);
void (*fadump_cleanup)(struct fw_dump *fadump_conf);
int (*fadump_process)(struct fw_dump *fadump_conf);
void (*fadump_region_show)(struct fw_dump *fadump_conf,
struct seq_file *m);
void (*fadump_trigger)(struct fadump_crash_info_header *fdh,
const char *msg);
};
/* Helper functions */
s32 fadump_setup_cpu_notes_buf(u32 num_cpus);
void fadump_free_cpu_notes_buf(void);
u32 *fadump_regs_to_elf_notes(u32 *buf, struct pt_regs *regs);
void fadump_update_elfcore_header(char *bufp);
bool is_fadump_boot_mem_contiguous(void);
bool is_fadump_reserved_mem_contiguous(void);
#else /* !CONFIG_PRESERVE_FA_DUMP */
/* Firmware-assisted dump configuration details. */
struct fw_dump {
u64 boot_mem_top;
u64 dump_active;
};
#endif /* CONFIG_PRESERVE_FA_DUMP */
#ifdef CONFIG_PPC_PSERIES
extern void rtas_fadump_dt_scan(struct fw_dump *fadump_conf, u64 node);
#else
static inline void
rtas_fadump_dt_scan(struct fw_dump *fadump_conf, u64 node) { }
#endif
#ifdef CONFIG_PPC_POWERNV
extern void opal_fadump_dt_scan(struct fw_dump *fadump_conf, u64 node);
#else
static inline void
opal_fadump_dt_scan(struct fw_dump *fadump_conf, u64 node) { }
#endif
#endif /* _ASM_POWERPC_FADUMP_INTERNAL_H */

View File

@ -6,196 +6,14 @@
* Author: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
*/
#ifndef __PPC64_FA_DUMP_H__
#define __PPC64_FA_DUMP_H__
#ifndef _ASM_POWERPC_FADUMP_H
#define _ASM_POWERPC_FADUMP_H
#ifdef CONFIG_FA_DUMP
/*
* The RMA region will be saved for later dumping when kernel crashes.
* RMA is Real Mode Area, the first block of logical memory address owned
* by logical partition, containing the storage that may be accessed with
* translate off.
*/
#define RMA_START 0x0
#define RMA_END (ppc64_rma_size)
/*
* On some Power systems where RMO is 128MB, it still requires minimum of
* 256MB for kernel to boot successfully. When kdump infrastructure is
* configured to save vmcore over network, we run into OOM issue while
* loading modules related to network setup. Hence we need aditional 64M
* of memory to avoid OOM issue.
*/
#define MIN_BOOT_MEM (((RMA_END < (0x1UL << 28)) ? (0x1UL << 28) : RMA_END) \
+ (0x1UL << 26))
/* The upper limit percentage for user specified boot memory size (25%) */
#define MAX_BOOT_MEM_RATIO 4
#define memblock_num_regions(memblock_type) (memblock.memblock_type.cnt)
/* Alignement per CMA requirement. */
#define FADUMP_CMA_ALIGNMENT (PAGE_SIZE << \
max_t(unsigned long, MAX_ORDER - 1, pageblock_order))
/* Firmware provided dump sections */
#define FADUMP_CPU_STATE_DATA 0x0001
#define FADUMP_HPTE_REGION 0x0002
#define FADUMP_REAL_MODE_REGION 0x0011
/* Dump request flag */
#define FADUMP_REQUEST_FLAG 0x00000001
/* FAD commands */
#define FADUMP_REGISTER 1
#define FADUMP_UNREGISTER 2
#define FADUMP_INVALIDATE 3
/* Dump status flag */
#define FADUMP_ERROR_FLAG 0x2000
#define FADUMP_CPU_ID_MASK ((1UL << 32) - 1)
#define CPU_UNKNOWN (~((u32)0))
/* Utility macros */
#define SKIP_TO_NEXT_CPU(reg_entry) \
({ \
while (be64_to_cpu(reg_entry->reg_id) != REG_ID("CPUEND")) \
reg_entry++; \
reg_entry++; \
})
extern int crashing_cpu;
/* Kernel Dump section info */
struct fadump_section {
__be32 request_flag;
__be16 source_data_type;
__be16 error_flags;
__be64 source_address;
__be64 source_len;
__be64 bytes_dumped;
__be64 destination_address;
};
/* ibm,configure-kernel-dump header. */
struct fadump_section_header {
__be32 dump_format_version;
__be16 dump_num_sections;
__be16 dump_status_flag;
__be32 offset_first_dump_section;
/* Fields for disk dump option. */
__be32 dd_block_size;
__be64 dd_block_offset;
__be64 dd_num_blocks;
__be32 dd_offset_disk_path;
/* Maximum time allowed to prevent an automatic dump-reboot. */
__be32 max_time_auto;
};
/*
* Firmware Assisted dump memory structure. This structure is required for
* registering future kernel dump with power firmware through rtas call.
*
* No disk dump option. Hence disk dump path string section is not included.
*/
struct fadump_mem_struct {
struct fadump_section_header header;
/* Kernel dump sections */
struct fadump_section cpu_state_data;
struct fadump_section hpte_region;
struct fadump_section rmr_region;
};
/* Firmware-assisted dump configuration details. */
struct fw_dump {
unsigned long cpu_state_data_size;
unsigned long hpte_region_size;
unsigned long boot_memory_size;
unsigned long reserve_dump_area_start;
unsigned long reserve_dump_area_size;
/* cmd line option during boot */
unsigned long reserve_bootvar;
unsigned long fadumphdr_addr;
unsigned long cpu_notes_buf;
unsigned long cpu_notes_buf_size;
int ibm_configure_kernel_dump;
unsigned long fadump_enabled:1;
unsigned long fadump_supported:1;
unsigned long dump_active:1;
unsigned long dump_registered:1;
unsigned long nocma:1;
};
/*
* Copy the ascii values for first 8 characters from a string into u64
* variable at their respective indexes.
* e.g.
* The string "FADMPINF" will be converted into 0x4641444d50494e46
*/
static inline u64 str_to_u64(const char *str)
{
u64 val = 0;
int i;
for (i = 0; i < sizeof(val); i++)
val = (*str) ? (val << 8) | *str++ : val << 8;
return val;
}
#define STR_TO_HEX(x) str_to_u64(x)
#define REG_ID(x) str_to_u64(x)
#define FADUMP_CRASH_INFO_MAGIC STR_TO_HEX("FADMPINF")
#define REGSAVE_AREA_MAGIC STR_TO_HEX("REGSAVE")
/* The firmware-assisted dump format.
*
* The register save area is an area in the partition's memory used to preserve
* the register contents (CPU state data) for the active CPUs during a firmware
* assisted dump. The dump format contains register save area header followed
* by register entries. Each list of registers for a CPU starts with
* "CPUSTRT" and ends with "CPUEND".
*/
/* Register save area header. */
struct fadump_reg_save_area_header {
__be64 magic_number;
__be32 version;
__be32 num_cpu_offset;
};
/* Register entry. */
struct fadump_reg_entry {
__be64 reg_id;
__be64 reg_value;
};
/* fadump crash info structure */
struct fadump_crash_info_header {
u64 magic_number;
u64 elfcorehdr_addr;
u32 crashing_cpu;
struct pt_regs regs;
struct cpumask online_mask;
};
struct fad_crash_memory_ranges {
unsigned long long base;
unsigned long long size;
};
extern int is_fadump_memory_area(u64 addr, ulong size);
extern int early_init_dt_scan_fw_dump(unsigned long node,
const char *uname, int depth, void *data);
extern int fadump_reserve_mem(void);
extern int setup_fadump(void);
extern int is_fadump_active(void);
extern int should_fadump_crash(void);
@ -207,5 +25,11 @@ static inline int is_fadump_active(void) { return 0; }
static inline int should_fadump_crash(void) { return 0; }
static inline void crash_fadump(struct pt_regs *regs, const char *str) { }
static inline void fadump_cleanup(void) { }
#endif /* !CONFIG_FA_DUMP */
#if defined(CONFIG_FA_DUMP) || defined(CONFIG_PRESERVE_FA_DUMP)
extern int early_init_dt_scan_fw_dump(unsigned long node, const char *uname,
int depth, void *data);
extern int fadump_reserve_mem(void);
#endif
#endif
#endif /* _ASM_POWERPC_FADUMP_H */

View File

@ -50,6 +50,7 @@
#define FW_FEATURE_DRC_INFO ASM_CONST(0x0000000800000000)
#define FW_FEATURE_BLOCK_REMOVE ASM_CONST(0x0000001000000000)
#define FW_FEATURE_PAPR_SCM ASM_CONST(0x0000002000000000)
#define FW_FEATURE_ULTRAVISOR ASM_CONST(0x0000004000000000)
#ifndef __ASSEMBLY__
@ -68,9 +69,9 @@ enum {
FW_FEATURE_TYPE1_AFFINITY | FW_FEATURE_PRRN |
FW_FEATURE_HPT_RESIZE | FW_FEATURE_DRMEM_V2 |
FW_FEATURE_DRC_INFO | FW_FEATURE_BLOCK_REMOVE |
FW_FEATURE_PAPR_SCM,
FW_FEATURE_PAPR_SCM | FW_FEATURE_ULTRAVISOR,
FW_FEATURE_PSERIES_ALWAYS = 0,
FW_FEATURE_POWERNV_POSSIBLE = FW_FEATURE_OPAL,
FW_FEATURE_POWERNV_POSSIBLE = FW_FEATURE_OPAL | FW_FEATURE_ULTRAVISOR,
FW_FEATURE_POWERNV_ALWAYS = 0,
FW_FEATURE_PS3_POSSIBLE = FW_FEATURE_LPAR | FW_FEATURE_PS3_LV1,
FW_FEATURE_PS3_ALWAYS = FW_FEATURE_LPAR | FW_FEATURE_PS3_LV1,

View File

@ -8,6 +8,8 @@
#define MCOUNT_ADDR ((unsigned long)(_mcount))
#define MCOUNT_INSN_SIZE 4 /* sizeof mcount call */
#define HAVE_FUNCTION_GRAPH_RET_ADDR_PTR
#ifdef __ASSEMBLY__
/* Based off of objdump optput from glibc */

View File

@ -60,8 +60,7 @@ static inline int arch_futex_atomic_op_inuser(int op, int oparg, int *oval,
pagefault_enable();
if (!ret)
*oval = oldval;
*oval = oldval;
prevent_write_to_user(uaddr, sizeof(*uaddr));
return ret;

View File

@ -169,47 +169,6 @@ name:
#define ABS_ADDR(label) (label - fs_label + fs_start)
#define EXC_REAL_BEGIN(name, start, size) \
FIXED_SECTION_ENTRY_BEGIN_LOCATION(real_vectors, exc_real_##start##_##name, start, size)
#define EXC_REAL_END(name, start, size) \
FIXED_SECTION_ENTRY_END_LOCATION(real_vectors, exc_real_##start##_##name, start, size)
#define EXC_VIRT_BEGIN(name, start, size) \
FIXED_SECTION_ENTRY_BEGIN_LOCATION(virt_vectors, exc_virt_##start##_##name, start, size)
#define EXC_VIRT_END(name, start, size) \
FIXED_SECTION_ENTRY_END_LOCATION(virt_vectors, exc_virt_##start##_##name, start, size)
#define EXC_COMMON_BEGIN(name) \
USE_TEXT_SECTION(); \
.balign IFETCH_ALIGN_BYTES; \
.global name; \
_ASM_NOKPROBE_SYMBOL(name); \
DEFINE_FIXED_SYMBOL(name); \
name:
#define TRAMP_REAL_BEGIN(name) \
FIXED_SECTION_ENTRY_BEGIN(real_trampolines, name)
#define TRAMP_VIRT_BEGIN(name) \
FIXED_SECTION_ENTRY_BEGIN(virt_trampolines, name)
#ifdef CONFIG_KVM_BOOK3S_64_HANDLER
#define TRAMP_KVM_BEGIN(name) \
TRAMP_VIRT_BEGIN(name)
#else
#define TRAMP_KVM_BEGIN(name)
#endif
#define EXC_REAL_NONE(start, size) \
FIXED_SECTION_ENTRY_BEGIN_LOCATION(real_vectors, exc_real_##start##_##unused, start, size); \
FIXED_SECTION_ENTRY_END_LOCATION(real_vectors, exc_real_##start##_##unused, start, size)
#define EXC_VIRT_NONE(start, size) \
FIXED_SECTION_ENTRY_BEGIN_LOCATION(virt_vectors, exc_virt_##start##_##unused, start, size); \
FIXED_SECTION_ENTRY_END_LOCATION(virt_vectors, exc_virt_##start##_##unused, start, size)
#endif /* __ASSEMBLY__ */
#endif /* _ASM_POWERPC_HEAD_64_H */

View File

@ -31,9 +31,6 @@ static inline int is_hugepage_only_range(struct mm_struct *mm,
return 0;
}
void book3e_hugetlb_preload(struct vm_area_struct *vma, unsigned long ea,
pte_t pte);
#define __HAVE_ARCH_HUGETLB_FREE_PGD_RANGE
void hugetlb_free_pgd_range(struct mmu_gather *tlb, unsigned long addr,
unsigned long end, unsigned long floor,

View File

@ -8,6 +8,7 @@
#ifndef _IO_WORKAROUNDS_H
#define _IO_WORKAROUNDS_H
#ifdef CONFIG_PPC_IO_WORKAROUNDS
#include <linux/io.h>
#include <asm/pci-bridge.h>
@ -32,4 +33,23 @@ extern int spiderpci_iowa_init(struct iowa_bus *, void *);
#define SPIDER_PCI_DUMMY_READ 0x0810
#define SPIDER_PCI_DUMMY_READ_BASE 0x0814
#endif
#if defined(CONFIG_PPC_IO_WORKAROUNDS) && defined(CONFIG_PPC_INDIRECT_MMIO)
extern bool io_workaround_inited;
static inline bool iowa_is_active(void)
{
return unlikely(io_workaround_inited);
}
#else
static inline bool iowa_is_active(void)
{
return false;
}
#endif
void __iomem *iowa_ioremap(phys_addr_t addr, unsigned long size,
pgprot_t prot, void *caller);
#endif /* _IO_WORKAROUNDS_H */

View File

@ -705,16 +705,9 @@ static inline void iosync(void)
* create hand-made mappings for use only by the PCI code and cannot
* currently be hooked. Must be page aligned.
*
* * __ioremap is the low level implementation used by ioremap and
* ioremap_prot and cannot be hooked (but can be used by a hook on one
* of the previous ones)
*
* * __ioremap_caller is the same as above but takes an explicit caller
* reference rather than using __builtin_return_address(0)
*
* * __iounmap, is the low level implementation used by iounmap and cannot
* be hooked (but can be used by a hook on iounmap)
*
*/
extern void __iomem *ioremap(phys_addr_t address, unsigned long size);
extern void __iomem *ioremap_prot(phys_addr_t address, unsigned long size,
@ -729,13 +722,14 @@ void __iomem *ioremap_coherent(phys_addr_t address, unsigned long size);
extern void iounmap(volatile void __iomem *addr);
extern void __iomem *__ioremap(phys_addr_t, unsigned long size,
unsigned long flags);
int early_ioremap_range(unsigned long ea, phys_addr_t pa,
unsigned long size, pgprot_t prot);
void __iomem *do_ioremap(phys_addr_t pa, phys_addr_t offset, unsigned long size,
pgprot_t prot, void *caller);
extern void __iomem *__ioremap_caller(phys_addr_t, unsigned long size,
pgprot_t prot, void *caller);
extern void __iounmap(volatile void __iomem *addr);
extern void __iomem * __ioremap_at(phys_addr_t pa, void *ea,
unsigned long size, pgprot_t prot);
extern void __iounmap_at(void *ea, unsigned long size);

View File

@ -48,15 +48,16 @@ struct iommu_table_ops {
* returns old TCE and DMA direction mask.
* @tce is a physical address.
*/
int (*exchange)(struct iommu_table *tbl,
int (*xchg_no_kill)(struct iommu_table *tbl,
long index,
unsigned long *hpa,
enum dma_data_direction *direction);
/* Real mode */
int (*exchange_rm)(struct iommu_table *tbl,
long index,
unsigned long *hpa,
enum dma_data_direction *direction);
enum dma_data_direction *direction,
bool realmode);
void (*tce_kill)(struct iommu_table *tbl,
unsigned long index,
unsigned long pages,
bool realmode);
__be64 *(*useraddrptr)(struct iommu_table *tbl, long index, bool alloc);
#endif
@ -111,6 +112,8 @@ struct iommu_table {
struct iommu_table_ops *it_ops;
struct kref it_kref;
int it_nid;
unsigned long it_reserved_start; /* Start of not-DMA-able (MMIO) area */
unsigned long it_reserved_end;
};
#define IOMMU_TABLE_USERSPACE_ENTRY_RO(tbl, entry) \
@ -149,8 +152,9 @@ extern int iommu_tce_table_put(struct iommu_table *tbl);
/* Initializes an iommu_table based in values set in the passed-in
* structure
*/
extern struct iommu_table *iommu_init_table(struct iommu_table * tbl,
int nid);
extern struct iommu_table *iommu_init_table(struct iommu_table *tbl,
int nid, unsigned long res_start, unsigned long res_end);
#define IOMMU_TABLE_GROUP_MAX_TABLES 2
struct iommu_table_group;
@ -206,6 +210,12 @@ extern void iommu_del_device(struct device *dev);
extern long iommu_tce_xchg(struct mm_struct *mm, struct iommu_table *tbl,
unsigned long entry, unsigned long *hpa,
enum dma_data_direction *direction);
extern long iommu_tce_xchg_no_kill(struct mm_struct *mm,
struct iommu_table *tbl,
unsigned long entry, unsigned long *hpa,
enum dma_data_direction *direction);
extern void iommu_tce_kill(struct iommu_table *tbl,
unsigned long entry, unsigned long pages);
#else
static inline void iommu_register_group(struct iommu_table_group *table_group,
int pci_domain_number,

View File

@ -297,6 +297,7 @@ struct kvm_arch {
cpumask_t cpu_in_guest;
u8 radix;
u8 fwnmi_enabled;
u8 secure_guest;
bool threads_indep;
bool nested_enable;
pgd_t *pgtable;

View File

@ -3,9 +3,6 @@
#define _ASM_POWERPC_MACHDEP_H
#ifdef __KERNEL__
/*
*/
#include <linux/seq_file.h>
#include <linux/init.h>
#include <linux/dma-mapping.h>
@ -31,10 +28,6 @@ struct pci_host_bridge;
struct machdep_calls {
char *name;
#ifdef CONFIG_PPC64
void __iomem * (*ioremap)(phys_addr_t addr, unsigned long size,
pgprot_t prot, void *caller);
void (*iounmap)(volatile void __iomem *token);
#ifdef CONFIG_PM
void (*iommu_save)(void);
void (*iommu_restore)(void);

View File

@ -30,6 +30,10 @@ enum MCE_Disposition {
enum MCE_Initiator {
MCE_INITIATOR_UNKNOWN = 0,
MCE_INITIATOR_CPU = 1,
MCE_INITIATOR_PCI = 2,
MCE_INITIATOR_ISA = 3,
MCE_INITIATOR_MEMORY= 4,
MCE_INITIATOR_POWERMGM = 5,
};
enum MCE_ErrorType {
@ -41,6 +45,8 @@ enum MCE_ErrorType {
MCE_ERROR_TYPE_USER = 5,
MCE_ERROR_TYPE_RA = 6,
MCE_ERROR_TYPE_LINK = 7,
MCE_ERROR_TYPE_DCACHE = 8,
MCE_ERROR_TYPE_ICACHE = 9,
};
enum MCE_ErrorClass {
@ -122,7 +128,8 @@ struct machine_check_event {
enum MCE_UeErrorType ue_error_type:8;
u8 effective_address_provided;
u8 physical_address_provided;
u8 reserved_1[5];
u8 ignore_event;
u8 reserved_1[4];
u64 effective_address;
u64 physical_address;
u8 reserved_2[8];
@ -193,6 +200,7 @@ struct mce_error_info {
enum MCE_Initiator initiator:8;
enum MCE_ErrorClass error_class:8;
bool sync_error;
bool ignore_event;
};
#define MAX_MC_EVT 100

View File

@ -0,0 +1,26 @@
/* SPDX-License-Identifier: GPL-2.0+ */
/*
* SVM helper functions
*
* Copyright 2018 IBM Corporation
*/
#ifndef _ASM_POWERPC_MEM_ENCRYPT_H
#define _ASM_POWERPC_MEM_ENCRYPT_H
#include <asm/svm.h>
static inline bool mem_encrypt_active(void)
{
return is_secure_guest();
}
static inline bool force_dma_unencrypted(struct device *dev)
{
return is_secure_guest();
}
int set_memory_encrypted(unsigned long addr, int numpages);
int set_memory_decrypted(unsigned long addr, int numpages);
#endif /* _ASM_POWERPC_MEM_ENCRYPT_H */

View File

@ -257,7 +257,7 @@ extern void radix__mmu_cleanup_all(void);
/* Functions for creating and updating partition table on POWER9 */
extern void mmu_partition_table_init(void);
extern void mmu_partition_table_set_entry(unsigned int lpid, unsigned long dw0,
unsigned long dw1);
unsigned long dw1, bool flush);
#endif /* CONFIG_PPC64 */
struct mm_struct;

View File

@ -11,8 +11,6 @@
#include <asm/mmu.h> /* For sub-arch specific PPC_PIN_SIZE */
#include <asm/asm-405.h>
extern unsigned long ioremap_bot;
#ifdef CONFIG_44x
extern int icache_44x_need_flush;
#endif
@ -78,23 +76,21 @@ int map_kernel_page(unsigned long va, phys_addr_t pa, pgprot_t prot);
*/
#include <asm/fixmap.h>
#ifdef CONFIG_HIGHMEM
#define KVIRT_TOP PKMAP_BASE
#else
#define KVIRT_TOP FIXADDR_START
#endif
/*
* ioremap_bot starts at that address. Early ioremaps move down from there,
* until mem_init() at which point this becomes the top of the vmalloc
* and ioremap space
*/
#ifdef CONFIG_NOT_COHERENT_CACHE
#define IOREMAP_TOP ((KVIRT_TOP - CONFIG_CONSISTENT_SIZE) & PAGE_MASK)
#ifdef CONFIG_HIGHMEM
#define IOREMAP_TOP PKMAP_BASE
#else
#define IOREMAP_TOP KVIRT_TOP
#define IOREMAP_TOP FIXADDR_START
#endif
/* PPC32 shares vmalloc area with ioremap */
#define IOREMAP_START VMALLOC_START
#define IOREMAP_END VMALLOC_END
/*
* Just any arbitrary offset to the start of the vmalloc VM area: the
* current 16MB value just means that there will be a 64MB "hole" after the

View File

@ -53,6 +53,7 @@
#define PHB_IO_BASE (ISA_IO_END)
#define PHB_IO_END (KERN_IO_START + FULL_IO_SIZE)
#define IOREMAP_BASE (PHB_IO_END)
#define IOREMAP_START (ioremap_bot)
#define IOREMAP_END (KERN_VIRT_START + KERN_VIRT_SIZE)

View File

@ -293,5 +293,18 @@ static inline int pgd_huge(pgd_t pgd)
#define is_hugepd(hpd) (hugepd_ok(hpd))
#endif
/*
* This gets called at the end of handling a page fault, when
* the kernel has put a new PTE into the page table for the process.
* We use it to ensure coherency between the i-cache and d-cache
* for the page which has just been mapped in.
*/
#if defined(CONFIG_PPC_FSL_BOOK3E) && defined(CONFIG_HUGETLB_PAGE)
void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t *ptep);
#else
static inline
void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t *ptep) {}
#endif
#endif /* __ASSEMBLY__ */
#endif

View File

@ -208,7 +208,10 @@
#define OPAL_HANDLE_HMI2 166
#define OPAL_NX_COPROC_INIT 167
#define OPAL_XIVE_GET_VP_STATE 170
#define OPAL_LAST 170
#define OPAL_MPIPL_UPDATE 173
#define OPAL_MPIPL_REGISTER_TAG 174
#define OPAL_MPIPL_QUERY_TAG 175
#define OPAL_LAST 175
#define QUIESCE_HOLD 1 /* Spin all calls at entry */
#define QUIESCE_REJECT 2 /* Fail all calls with OPAL_BUSY */
@ -453,6 +456,7 @@ enum opal_msg_type {
OPAL_MSG_DPO = 5,
OPAL_MSG_PRD = 6,
OPAL_MSG_OCC = 7,
OPAL_MSG_PRD2 = 8,
OPAL_MSG_TYPE_MAX,
};
@ -1059,6 +1063,7 @@ enum {
OPAL_REBOOT_NORMAL = 0,
OPAL_REBOOT_PLATFORM_ERROR = 1,
OPAL_REBOOT_FULL_IPL = 2,
OPAL_REBOOT_MPIPL = 3,
};
/* Argument to OPAL_PCI_TCE_KILL */
@ -1135,6 +1140,44 @@ enum {
#define OPAL_PCI_P2P_LOAD 0x2
#define OPAL_PCI_P2P_STORE 0x4
/* MPIPL update operations */
enum opal_mpipl_ops {
OPAL_MPIPL_ADD_RANGE = 0,
OPAL_MPIPL_REMOVE_RANGE = 1,
OPAL_MPIPL_REMOVE_ALL = 2,
OPAL_MPIPL_FREE_PRESERVED_MEMORY = 3,
};
/* Tag will point to various metadata area. Kernel will
* use tag to get metadata value.
*/
enum opal_mpipl_tags {
OPAL_MPIPL_TAG_CPU = 0,
OPAL_MPIPL_TAG_OPAL = 1,
OPAL_MPIPL_TAG_KERNEL = 2,
OPAL_MPIPL_TAG_BOOT_MEM = 3,
};
/* Preserved memory details */
struct opal_mpipl_region {
__be64 src;
__be64 dest;
__be64 size;
};
/* Structure version */
#define OPAL_MPIPL_VERSION 0x01
struct opal_mpipl_fadump {
u8 version;
u8 reserved[7];
__be32 crashing_pir; /* OPAL crashing CPU PIR */
__be32 cpu_data_version;
__be32 cpu_data_size;
__be32 region_cnt;
struct opal_mpipl_region region[];
} __packed;
#endif /* __ASSEMBLY__ */
#endif /* __OPAL_API_H */

View File

@ -39,6 +39,7 @@ int64_t opal_npu_spa_clear_cache(uint64_t phb_id, uint32_t bdfn,
uint64_t PE_handle);
int64_t opal_npu_tl_set(uint64_t phb_id, uint32_t bdfn, long cap,
uint64_t rate_phys, uint32_t size);
int64_t opal_console_write(int64_t term_number, __be64 *length,
const uint8_t *buffer);
int64_t opal_console_read(int64_t term_number, __be64 *length,
@ -272,7 +273,7 @@ int64_t opal_xive_get_vp_info(uint64_t vp,
int64_t opal_xive_set_vp_info(uint64_t vp,
uint64_t flags,
uint64_t report_cl_pair);
int64_t opal_xive_allocate_irq(uint32_t chip_id);
int64_t opal_xive_allocate_irq_raw(uint32_t chip_id);
int64_t opal_xive_free_irq(uint32_t girq);
int64_t opal_xive_sync(uint32_t type, uint32_t id);
int64_t opal_xive_dump(uint32_t type, uint32_t id);
@ -297,6 +298,10 @@ int opal_sensor_group_clear(u32 group_hndl, int token);
int opal_sensor_group_enable(u32 group_hndl, int token, bool enable);
int opal_nx_coproc_init(uint32_t chip_id, uint32_t ct);
s64 opal_mpipl_update(enum opal_mpipl_ops op, u64 src, u64 dest, u64 size);
s64 opal_mpipl_register_tag(enum opal_mpipl_tags tag, u64 addr);
s64 opal_mpipl_query_tag(enum opal_mpipl_tags tag, u64 *addr);
s64 opal_signal_system_reset(s32 cpu);
s64 opal_quiesce(u64 shutdown_type, s32 cpu);

View File

@ -215,9 +215,19 @@ static inline bool pfn_valid(unsigned long pfn)
/*
* gcc miscompiles (unsigned long)(&static_var) - PAGE_OFFSET
* with -mcmodel=medium, so we use & and | instead of - and + on 64-bit.
* This also results in better code generation.
*/
#define __va(x) ((void *)(unsigned long)((phys_addr_t)(x) | PAGE_OFFSET))
#define __pa(x) ((unsigned long)(x) & 0x0fffffffffffffffUL)
#define __va(x) \
({ \
VIRTUAL_BUG_ON((unsigned long)(x) >= PAGE_OFFSET); \
(void *)(unsigned long)((phys_addr_t)(x) | PAGE_OFFSET); \
})
#define __pa(x) \
({ \
VIRTUAL_BUG_ON((unsigned long)(x) < PAGE_OFFSET); \
(unsigned long)(x) & 0x0fffffffffffffffUL; \
})
#else /* 32-bit, non book E */
#define __va(x) ((void *)(unsigned long)((phys_addr_t)(x) + PAGE_OFFSET - MEMORY_START))

View File

@ -40,6 +40,8 @@ typedef unsigned long long pte_basic_t;
typedef unsigned long pte_basic_t;
#endif
#include <asm/bug.h>
/*
* Clear page using the dcbz instruction, which doesn't cause any
* memory traffic (except to write out any cache lines which get
@ -49,6 +51,8 @@ static inline void clear_page(void *addr)
{
unsigned int i;
WARN_ON((unsigned long)addr & (L1_CACHE_BYTES - 1));
for (i = 0; i < PAGE_SIZE / L1_CACHE_BYTES; i++, addr += L1_CACHE_BYTES)
dcbz(addr);
}

View File

@ -183,6 +183,7 @@ struct iommu_table;
struct pci_dn {
int flags;
#define PCI_DN_FLAG_IOV_VF 0x01
#define PCI_DN_FLAG_DEAD 0x02 /* Device has been hot-removed */
int busno; /* pci bus number */
int devfn; /* pci device and function number */

View File

@ -68,6 +68,8 @@ extern pgd_t swapper_pg_dir[];
extern void paging_init(void);
extern unsigned long ioremap_bot;
/*
* kern_addr_valid is intended to indicate whether an address is a valid
* kernel address. Most 32-bit archs define it as always true (like this)
@ -77,18 +79,6 @@ extern void paging_init(void);
#include <asm-generic/pgtable.h>
/*
* This gets called at the end of handling a page fault, when
* the kernel has put a new PTE into the page table for the process.
* We use it to ensure coherency between the i-cache and d-cache
* for the page which has just been mapped in.
* On machines which use an MMU hash table, we use this to put a
* corresponding HPTE into the hash table ahead of time, instead of
* waiting for the inevitable extra hash-table miss exception.
*/
extern void update_mmu_cache(struct vm_area_struct *, unsigned long, pte_t *);
#ifndef CONFIG_TRANSPARENT_HUGEPAGE
#define pmd_large(pmd) 0
#endif

View File

@ -340,6 +340,12 @@ static inline long plpar_set_ciabr(unsigned long ciabr)
{
return 0;
}
static inline long plpar_pte_read_4(unsigned long flags, unsigned long ptex,
unsigned long *ptes)
{
return 0;
}
#endif /* CONFIG_PPC_PSERIES */
#endif /* _ASM_POWERPC_PLPAR_WRAPPERS_H */

View File

@ -62,11 +62,6 @@ void eeh_pe_dev_mode_mark(struct eeh_pe *pe, int mode);
void eeh_sysfs_add_device(struct pci_dev *pdev);
void eeh_sysfs_remove_device(struct pci_dev *pdev);
static inline const char *eeh_pci_name(struct pci_dev *pdev)
{
return pdev ? pci_name(pdev) : "<null>";
}
static inline const char *eeh_driver_name(struct pci_dev *pdev)
{
return (pdev && pdev->driver) ? pdev->driver->name : "<null>";
@ -74,6 +69,8 @@ static inline const char *eeh_driver_name(struct pci_dev *pdev)
#endif /* CONFIG_EEH */
#define PCI_BUSNO(bdfn) ((bdfn >> 8) & 0xff)
#else /* CONFIG_PCI */
static inline void init_pci_config_tokens(void) { }
#endif /* !CONFIG_PCI */

View File

@ -1,31 +0,0 @@
/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* PowerPC 4xx OCM memory allocation support
*
* (C) Copyright 2009, Applied Micro Circuits Corporation
* Victor Gallardo (vgallardo@amcc.com)
*
* See file CREDITS for list of people who contributed to this
* project.
*/
#ifndef __ASM_POWERPC_PPC4XX_OCM_H__
#define __ASM_POWERPC_PPC4XX_OCM_H__
#define PPC4XX_OCM_NON_CACHED 0
#define PPC4XX_OCM_CACHED 1
#if defined(CONFIG_PPC4xx_OCM)
void *ppc4xx_ocm_alloc(phys_addr_t *phys, int size, int align,
int flags, const char *owner);
void ppc4xx_ocm_free(const void *virt);
#else
#define ppc4xx_ocm_alloc(phys, size, align, flags, owner) NULL
#define ppc4xx_ocm_free(addr) ((void)0)
#endif /* CONFIG_PPC4xx_OCM */
#endif /* __ASM_POWERPC_PPC4XX_OCM_H__ */

View File

@ -311,18 +311,48 @@ n:
addis reg,reg,(name - 0b)@ha; \
addi reg,reg,(name - 0b)@l;
#ifdef __powerpc64__
#ifdef HAVE_AS_ATHIGH
#if defined(__powerpc64__) && defined(HAVE_AS_ATHIGH)
#define __AS_ATHIGH high
#else
#define __AS_ATHIGH h
#endif
#define LOAD_REG_IMMEDIATE(reg,expr) \
lis reg,(expr)@highest; \
ori reg,reg,(expr)@higher; \
rldicr reg,reg,32,31; \
oris reg,reg,(expr)@__AS_ATHIGH; \
ori reg,reg,(expr)@l;
.macro __LOAD_REG_IMMEDIATE_32 r, x
.if (\x) >= 0x8000 || (\x) < -0x8000
lis \r, (\x)@__AS_ATHIGH
.if (\x) & 0xffff != 0
ori \r, \r, (\x)@l
.endif
.else
li \r, (\x)@l
.endif
.endm
.macro __LOAD_REG_IMMEDIATE r, x
.if (\x) >= 0x80000000 || (\x) < -0x80000000
__LOAD_REG_IMMEDIATE_32 \r, (\x) >> 32
sldi \r, \r, 32
.if (\x) & 0xffff0000 != 0
oris \r, \r, (\x)@__AS_ATHIGH
.endif
.if (\x) & 0xffff != 0
ori \r, \r, (\x)@l
.endif
.else
__LOAD_REG_IMMEDIATE_32 \r, \x
.endif
.endm
#ifdef __powerpc64__
#define LOAD_REG_IMMEDIATE(reg, expr) __LOAD_REG_IMMEDIATE reg, expr
#define LOAD_REG_IMMEDIATE_SYM(reg, tmp, expr) \
lis tmp, (expr)@highest; \
lis reg, (expr)@__AS_ATHIGH; \
ori tmp, tmp, (expr)@higher; \
ori reg, reg, (expr)@l; \
rldimi reg, tmp, 32, 0
#define LOAD_REG_ADDR(reg,name) \
ld reg,name@got(r2)
@ -335,11 +365,13 @@ n:
#else /* 32-bit */
#define LOAD_REG_IMMEDIATE(reg,expr) \
#define LOAD_REG_IMMEDIATE(reg, expr) __LOAD_REG_IMMEDIATE_32 reg, expr
#define LOAD_REG_IMMEDIATE_SYM(reg,expr) \
lis reg,(expr)@ha; \
addi reg,reg,(expr)@l;
#define LOAD_REG_ADDR(reg,name) LOAD_REG_IMMEDIATE(reg, name)
#define LOAD_REG_ADDR(reg,name) LOAD_REG_IMMEDIATE_SYM(reg, name)
#define LOAD_REG_ADDRBASE(reg, name) lis reg,name@ha
#define ADDROFF(name) name@l
@ -351,19 +383,9 @@ n:
/* various errata or part fixups */
#ifdef CONFIG_PPC601_SYNC_FIX
#define SYNC \
BEGIN_FTR_SECTION \
sync; \
isync; \
END_FTR_SECTION_IFSET(CPU_FTR_601)
#define SYNC_601 \
BEGIN_FTR_SECTION \
sync; \
END_FTR_SECTION_IFSET(CPU_FTR_601)
#define ISYNC_601 \
BEGIN_FTR_SECTION \
isync; \
END_FTR_SECTION_IFSET(CPU_FTR_601)
#define SYNC sync; isync
#define SYNC_601 sync
#define ISYNC_601 isync
#else
#define SYNC
#define SYNC_601
@ -389,15 +411,11 @@ END_FTR_SECTION_NESTED(CPU_FTR_CELL_TB_BUG, CPU_FTR_CELL_TB_BUG, 96)
#define MFTBU(dest) mfspr dest, SPRN_TBRU
#endif
#ifndef CONFIG_SMP
#define TLBSYNC
#else /* CONFIG_SMP */
/* tlbsync is not implemented on 601 */
#define TLBSYNC \
BEGIN_FTR_SECTION \
tlbsync; \
sync; \
END_FTR_SECTION_IFCLR(CPU_FTR_601)
#if !defined(CONFIG_SMP) || defined(CONFIG_PPC_BOOK3S_601)
#define TLBSYNC
#else
#define TLBSYNC tlbsync; sync
#endif
#ifdef CONFIG_PPC64

View File

@ -203,7 +203,11 @@ do { \
#endif /* __powerpc64__ */
#define arch_has_single_step() (1)
#define arch_has_block_step() (!cpu_has_feature(CPU_FTR_601))
#ifndef CONFIG_BOOK3S_601
#define arch_has_block_step() (true)
#else
#define arch_has_block_step() (false)
#endif
#define ARCH_HAS_USER_SINGLE_STEP_REPORT
/*

View File

@ -38,6 +38,7 @@
#define MSR_TM_LG 32 /* Trans Mem Available */
#define MSR_VEC_LG 25 /* Enable AltiVec */
#define MSR_VSX_LG 23 /* Enable VSX */
#define MSR_S_LG 22 /* Secure state */
#define MSR_POW_LG 18 /* Enable Power Management */
#define MSR_WE_LG 18 /* Wait State Enable */
#define MSR_TGPR_LG 17 /* TLB Update registers in use */
@ -71,11 +72,13 @@
#define MSR_SF __MASK(MSR_SF_LG) /* Enable 64 bit mode */
#define MSR_ISF __MASK(MSR_ISF_LG) /* Interrupt 64b mode valid on 630 */
#define MSR_HV __MASK(MSR_HV_LG) /* Hypervisor state */
#define MSR_S __MASK(MSR_S_LG) /* Secure state */
#else
/* so tests for these bits fail on 32-bit */
#define MSR_SF 0
#define MSR_ISF 0
#define MSR_HV 0
#define MSR_S 0
#endif
/*

View File

@ -1,154 +0,0 @@
/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* Copyright 2010 Benjamin Herrenschmidt, IBM Corp
* <benh@kernel.crashing.org>
* and David Gibson, IBM Corporation.
*/
#ifndef _ASM_POWERPC_SCOM_H
#define _ASM_POWERPC_SCOM_H
#ifdef __KERNEL__
#ifndef __ASSEMBLY__
#ifdef CONFIG_PPC_SCOM
/*
* The SCOM bus is a sideband bus used for accessing various internal
* registers of the processor or the chipset. The implementation details
* differ between processors and platforms, and the access method as
* well.
*
* This API allows to "map" ranges of SCOM register numbers associated
* with a given SCOM controller. The later must be represented by a
* device node, though some implementations might support NULL if there
* is no possible ambiguity
*
* Then, scom_read/scom_write can be used to accesses registers inside
* that range. The argument passed is a register number relative to
* the beginning of the range mapped.
*/
typedef void *scom_map_t;
/* Value for an invalid SCOM map */
#define SCOM_MAP_INVALID (NULL)
/* The scom_controller data structure is what the platform passes
* to the core code in scom_init, it provides the actual implementation
* of all the SCOM functions
*/
struct scom_controller {
scom_map_t (*map)(struct device_node *ctrl_dev, u64 reg, u64 count);
void (*unmap)(scom_map_t map);
int (*read)(scom_map_t map, u64 reg, u64 *value);
int (*write)(scom_map_t map, u64 reg, u64 value);
};
extern const struct scom_controller *scom_controller;
/**
* scom_init - Initialize the SCOM backend, called by the platform
* @controller: The platform SCOM controller
*/
static inline void scom_init(const struct scom_controller *controller)
{
scom_controller = controller;
}
/**
* scom_map_ok - Test is a SCOM mapping is successful
* @map: The result of scom_map to test
*/
static inline int scom_map_ok(scom_map_t map)
{
return map != SCOM_MAP_INVALID;
}
/**
* scom_map - Map a block of SCOM registers
* @ctrl_dev: Device node of the SCOM controller
* some implementations allow NULL here
* @reg: first SCOM register to map
* @count: Number of SCOM registers to map
*/
static inline scom_map_t scom_map(struct device_node *ctrl_dev,
u64 reg, u64 count)
{
return scom_controller->map(ctrl_dev, reg, count);
}
/**
* scom_find_parent - Find the SCOM controller for a device
* @dev: OF node of the device
*
* This is not meant for general usage, but in combination with
* scom_map() allows to map registers not represented by the
* device own scom-reg property. Useful for applying HW workarounds
* on things not properly represented in the device-tree for example.
*/
struct device_node *scom_find_parent(struct device_node *dev);
/**
* scom_map_device - Map a device's block of SCOM registers
* @dev: OF node of the device
* @index: Register bank index (index in "scom-reg" property)
*
* This function will use the device-tree binding for SCOM which
* is to follow "scom-parent" properties until it finds a node with
* a "scom-controller" property to find the controller. It will then
* use the "scom-reg" property which is made of reg/count pairs,
* each of them having a size defined by the controller's #scom-cells
* property
*/
extern scom_map_t scom_map_device(struct device_node *dev, int index);
/**
* scom_unmap - Unmap a block of SCOM registers
* @map: Result of scom_map is to be unmapped
*/
static inline void scom_unmap(scom_map_t map)
{
if (scom_map_ok(map))
scom_controller->unmap(map);
}
/**
* scom_read - Read a SCOM register
* @map: Result of scom_map
* @reg: Register index within that map
* @value: Updated with the value read
*
* Returns 0 (success) or a negative error code
*/
static inline int scom_read(scom_map_t map, u64 reg, u64 *value)
{
int rc;
rc = scom_controller->read(map, reg, value);
if (rc)
*value = 0xfffffffffffffffful;
return rc;
}
/**
* scom_write - Write to a SCOM register
* @map: Result of scom_map
* @reg: Register index within that map
* @value: Value to write
*
* Returns 0 (success) or a negative error code
*/
static inline int scom_write(scom_map_t map, u64 reg, u64 value)
{
return scom_controller->write(map, reg, value);
}
#endif /* CONFIG_PPC_SCOM */
#endif /* __ASSEMBLY__ */
#endif /* __KERNEL__ */
#endif /* _ASM_POWERPC_SCOM_H */

View File

@ -61,17 +61,6 @@ static inline int overlaps_kernel_text(unsigned long start, unsigned long end)
(unsigned long)_stext < end;
}
static inline int overlaps_kvm_tmp(unsigned long start, unsigned long end)
{
#ifdef CONFIG_KVM_GUEST
extern char kvm_tmp[];
return start < (unsigned long)kvm_tmp &&
(unsigned long)&kvm_tmp[1024 * 1024] < end;
#else
return 0;
#endif
}
#ifdef PPC64_ELF_ABI_v1
#define HAVE_DEREFERENCE_FUNCTION_DESCRIPTOR 1

View File

@ -7,7 +7,7 @@
#define JMP_BUF_LEN 23
extern long setjmp(long *);
extern void longjmp(long *, long);
extern long setjmp(long *) __attribute__((returns_twice));
extern void longjmp(long *, long) __attribute__((noreturn));
#endif /* _ASM_POWERPC_SETJMP_H */

View File

@ -101,15 +101,43 @@ static inline int arch_spin_trylock(arch_spinlock_t *lock)
#if defined(CONFIG_PPC_SPLPAR)
/* We only yield to the hypervisor if we are in shared processor mode */
#define SHARED_PROCESSOR (lppaca_shared_proc(local_paca->lppaca_ptr))
extern void __spin_yield(arch_spinlock_t *lock);
extern void __rw_yield(arch_rwlock_t *lock);
void splpar_spin_yield(arch_spinlock_t *lock);
void splpar_rw_yield(arch_rwlock_t *lock);
#else /* SPLPAR */
#define __spin_yield(x) barrier()
#define __rw_yield(x) barrier()
#define SHARED_PROCESSOR 0
static inline void splpar_spin_yield(arch_spinlock_t *lock) {};
static inline void splpar_rw_yield(arch_rwlock_t *lock) {};
#endif
static inline bool is_shared_processor(void)
{
/*
* LPPACA is only available on Pseries so guard anything LPPACA related to
* allow other platforms (which include this common header) to compile.
*/
#ifdef CONFIG_PPC_PSERIES
return (IS_ENABLED(CONFIG_PPC_SPLPAR) &&
lppaca_shared_proc(local_paca->lppaca_ptr));
#else
return false;
#endif
}
static inline void spin_yield(arch_spinlock_t *lock)
{
if (is_shared_processor())
splpar_spin_yield(lock);
else
barrier();
}
static inline void rw_yield(arch_rwlock_t *lock)
{
if (is_shared_processor())
splpar_rw_yield(lock);
else
barrier();
}
static inline void arch_spin_lock(arch_spinlock_t *lock)
{
while (1) {
@ -117,8 +145,8 @@ static inline void arch_spin_lock(arch_spinlock_t *lock)
break;
do {
HMT_low();
if (SHARED_PROCESSOR)
__spin_yield(lock);
if (is_shared_processor())
splpar_spin_yield(lock);
} while (unlikely(lock->slock != 0));
HMT_medium();
}
@ -136,8 +164,8 @@ void arch_spin_lock_flags(arch_spinlock_t *lock, unsigned long flags)
local_irq_restore(flags);
do {
HMT_low();
if (SHARED_PROCESSOR)
__spin_yield(lock);
if (is_shared_processor())
splpar_spin_yield(lock);
} while (unlikely(lock->slock != 0));
HMT_medium();
local_irq_restore(flags_dis);
@ -226,8 +254,8 @@ static inline void arch_read_lock(arch_rwlock_t *rw)
break;
do {
HMT_low();
if (SHARED_PROCESSOR)
__rw_yield(rw);
if (is_shared_processor())
splpar_rw_yield(rw);
} while (unlikely(rw->lock < 0));
HMT_medium();
}
@ -240,8 +268,8 @@ static inline void arch_write_lock(arch_rwlock_t *rw)
break;
do {
HMT_low();
if (SHARED_PROCESSOR)
__rw_yield(rw);
if (is_shared_processor())
splpar_rw_yield(rw);
} while (unlikely(rw->lock != 0));
HMT_medium();
}
@ -281,9 +309,9 @@ static inline void arch_write_unlock(arch_rwlock_t *rw)
rw->lock = 0;
}
#define arch_spin_relax(lock) __spin_yield(lock)
#define arch_read_relax(lock) __rw_yield(lock)
#define arch_write_relax(lock) __rw_yield(lock)
#define arch_spin_relax(lock) spin_yield(lock)
#define arch_read_relax(lock) rw_yield(lock)
#define arch_write_relax(lock) rw_yield(lock)
/* See include/linux/spinlock.h */
#define smp_mb__after_spinlock() smp_mb()

View File

@ -53,7 +53,9 @@ void *__memmove(void *to, const void *from, __kernel_size_t n);
#ifndef CONFIG_KASAN
#define __HAVE_ARCH_MEMSET32
#define __HAVE_ARCH_MEMSET64
#define __HAVE_ARCH_MEMCPY_MCSAFE
extern int memcpy_mcsafe(void *dst, const void *src, __kernel_size_t sz);
extern void *__memset16(uint16_t *, uint16_t v, __kernel_size_t);
extern void *__memset32(uint32_t *, uint32_t v, __kernel_size_t);
extern void *__memset64(uint64_t *, uint64_t v, __kernel_size_t);

View File

@ -0,0 +1,31 @@
/* SPDX-License-Identifier: GPL-2.0+ */
/*
* SVM helper functions
*
* Copyright 2018 Anshuman Khandual, IBM Corporation.
*/
#ifndef _ASM_POWERPC_SVM_H
#define _ASM_POWERPC_SVM_H
#ifdef CONFIG_PPC_SVM
static inline bool is_secure_guest(void)
{
return mfmsr() & MSR_S;
}
void dtl_cache_ctor(void *addr);
#define get_dtl_cache_ctor() (is_secure_guest() ? dtl_cache_ctor : NULL)
#else /* CONFIG_PPC_SVM */
static inline bool is_secure_guest(void)
{
return false;
}
#define get_dtl_cache_ctor() NULL
#endif /* CONFIG_PPC_SVM */
#endif /* _ASM_POWERPC_SVM_H */

View File

@ -41,11 +41,7 @@ struct div_result {
/* Accessor functions for the timebase (RTC on 601) registers. */
/* If one day CONFIG_POWER is added just define __USE_RTC as 1 */
#ifdef CONFIG_PPC_BOOK3S_32
#define __USE_RTC() (cpu_has_feature(CPU_FTR_USE_RTC))
#else
#define __USE_RTC() 0
#endif
#define __USE_RTC() (IS_ENABLED(CONFIG_PPC_BOOK3S_601))
#ifdef CONFIG_PPC64

View File

@ -17,38 +17,10 @@ typedef unsigned long cycles_t;
static inline cycles_t get_cycles(void)
{
#ifdef __powerpc64__
if (IS_ENABLED(CONFIG_BOOK3S_601))
return 0;
return mftb();
#else
cycles_t ret;
/*
* For the "cycle" counter we use the timebase lower half.
* Currently only used on SMP.
*/
ret = 0;
__asm__ __volatile__(
#ifdef CONFIG_PPC_8xx
"97: mftb %0\n"
#else
"97: mfspr %0, %2\n"
#endif
"99:\n"
".section __ftr_fixup,\"a\"\n"
".align 2\n"
"98:\n"
" .long %1\n"
" .long 0\n"
" .long 97b-98b\n"
" .long 99b-98b\n"
" .long 0\n"
" .long 0\n"
".previous"
: "=r" (ret) : "i" (CPU_FTR_601), "i" (SPRN_TBRL));
return ret;
#endif
}
#endif /* __KERNEL__ */

View File

@ -387,6 +387,20 @@ static inline unsigned long raw_copy_to_user(void __user *to,
return ret;
}
static __always_inline unsigned long __must_check
copy_to_user_mcsafe(void __user *to, const void *from, unsigned long n)
{
if (likely(check_copy_size(from, n, true))) {
if (access_ok(to, n)) {
allow_write_to_user(to, n);
n = memcpy_mcsafe((void *)to, from, n);
prevent_write_to_user(to, n);
}
}
return n;
}
extern unsigned long __clear_user(void __user *addr, unsigned long size);
static inline unsigned long clear_user(void __user *addr, unsigned long size)

View File

@ -0,0 +1,33 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
* Ultravisor API.
*
* Copyright 2019, IBM Corporation.
*
*/
#ifndef _ASM_POWERPC_ULTRAVISOR_API_H
#define _ASM_POWERPC_ULTRAVISOR_API_H
#include <asm/hvcall.h>
/* Return codes */
#define U_BUSY H_BUSY
#define U_FUNCTION H_FUNCTION
#define U_NOT_AVAILABLE H_NOT_AVAILABLE
#define U_P2 H_P2
#define U_P3 H_P3
#define U_P4 H_P4
#define U_P5 H_P5
#define U_PARAMETER H_PARAMETER
#define U_PERMISSION H_PERMISSION
#define U_SUCCESS H_SUCCESS
/* opcodes */
#define UV_WRITE_PATE 0xF104
#define UV_RETURN 0xF11C
#define UV_ESM 0xF110
#define UV_SHARE_PAGE 0xF130
#define UV_UNSHARE_PAGE 0xF134
#define UV_UNSHARE_ALL_PAGES 0xF140
#endif /* _ASM_POWERPC_ULTRAVISOR_API_H */

View File

@ -0,0 +1,49 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
* Ultravisor definitions
*
* Copyright 2019, IBM Corporation.
*
*/
#ifndef _ASM_POWERPC_ULTRAVISOR_H
#define _ASM_POWERPC_ULTRAVISOR_H
#include <asm/asm-prototypes.h>
#include <asm/ultravisor-api.h>
#include <asm/firmware.h>
int early_init_dt_scan_ultravisor(unsigned long node, const char *uname,
int depth, void *data);
/*
* In ultravisor enabled systems, PTCR becomes ultravisor privileged only for
* writing and an attempt to write to it will cause a Hypervisor Emulation
* Assistance interrupt.
*/
static inline void set_ptcr_when_no_uv(u64 val)
{
if (!firmware_has_feature(FW_FEATURE_ULTRAVISOR))
mtspr(SPRN_PTCR, val);
}
static inline int uv_register_pate(u64 lpid, u64 dw0, u64 dw1)
{
return ucall_norets(UV_WRITE_PATE, lpid, dw0, dw1);
}
static inline int uv_share_page(u64 pfn, u64 npages)
{
return ucall_norets(UV_SHARE_PAGE, pfn, npages);
}
static inline int uv_unshare_page(u64 pfn, u64 npages)
{
return ucall_norets(UV_UNSHARE_PAGE, pfn, npages);
}
static inline int uv_unshare_all_pages(void)
{
return ucall_norets(UV_UNSHARE_ALL_PAGES);
}
#endif /* _ASM_POWERPC_ULTRAVISOR_H */

View File

@ -99,6 +99,7 @@ extern void xive_flush_interrupt(void);
/* xmon hook */
extern void xmon_xive_do_dump(int cpu);
extern int xmon_xive_get_irq_config(u32 hw_irq, struct irq_data *d);
/* APIs used by KVM */
extern u32 xive_native_default_eq_shift(void);

View File

@ -1 +1,2 @@
prom_init_check
vmlinux.lds

View File

@ -52,7 +52,7 @@ obj-y := cputable.o ptrace.o syscalls.o \
of_platform.o prom_parse.o
obj-$(CONFIG_PPC64) += setup_64.o sys_ppc32.o \
signal_64.o ptrace32.o \
paca.o nvram_64.o firmware.o
paca.o nvram_64.o firmware.o note.o
obj-$(CONFIG_VDSO32) += vdso32/
obj-$(CONFIG_PPC_WATCHDOG) += watchdog.o
obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o
@ -78,7 +78,9 @@ obj-$(CONFIG_EEH) += eeh.o eeh_pe.o eeh_dev.o eeh_cache.o \
eeh_driver.o eeh_event.o eeh_sysfs.o
obj-$(CONFIG_GENERIC_TBSYNC) += smp-tbsync.o
obj-$(CONFIG_CRASH_DUMP) += crash_dump.o
obj-$(CONFIG_FA_DUMP) += fadump.o
ifneq ($(CONFIG_FA_DUMP)$(CONFIG_PRESERVE_FA_DUMP),)
obj-y += fadump.o
endif
ifdef CONFIG_PPC32
obj-$(CONFIG_E500) += idle_e500.o
endif
@ -155,6 +157,9 @@ endif
obj-$(CONFIG_EPAPR_PARAVIRT) += epapr_paravirt.o epapr_hcalls.o
obj-$(CONFIG_KVM_GUEST) += kvm.o kvm_emul.o
ifneq ($(CONFIG_PPC_POWERNV)$(CONFIG_PPC_SVM),)
obj-y += ucall.o
endif
# Disable GCOV, KCOV & sanitizers in odd or sensitive code
GCOV_PROFILE_prom_init.o := n
@ -184,15 +189,13 @@ extra-$(CONFIG_ALTIVEC) += vector.o
extra-$(CONFIG_PPC64) += entry_64.o
extra-$(CONFIG_PPC_OF_BOOT_TRAMPOLINE) += prom_init.o
ifdef CONFIG_PPC_OF_BOOT_TRAMPOLINE
$(obj)/built-in.a: prom_init_check
extra-$(CONFIG_PPC_OF_BOOT_TRAMPOLINE) += prom_init_check
quiet_cmd_prom_init_check = CALL $<
cmd_prom_init_check = $(CONFIG_SHELL) $< "$(NM)" "$(obj)/prom_init.o"
quiet_cmd_prom_init_check = PROMCHK $@
cmd_prom_init_check = $(CONFIG_SHELL) $< "$(NM)" $(obj)/prom_init.o; touch $@
PHONY += prom_init_check
prom_init_check: $(src)/prom_init_check.sh $(obj)/prom_init.o
$(call cmd,prom_init_check)
endif
$(obj)/prom_init_check: $(src)/prom_init_check.sh $(obj)/prom_init.o FORCE
$(call if_changed,prom_init_check)
targets += prom_init_check
clean-files := vmlinux.lds

View File

@ -506,6 +506,7 @@ int main(void)
OFFSET(KVM_VRMA_SLB_V, kvm, arch.vrma_slb_v);
OFFSET(KVM_RADIX, kvm, arch.radix);
OFFSET(KVM_FWNMI, kvm, arch.fwnmi_enabled);
OFFSET(KVM_SECURE_GUEST, kvm, arch.secure_guest);
OFFSET(VCPU_DSISR, kvm_vcpu, arch.shregs.dsisr);
OFFSET(VCPU_DAR, kvm_vcpu, arch.shregs.dar);
OFFSET(VCPU_VPA, kvm_vcpu, arch.vpa.pinned_addr);

View File

@ -569,7 +569,7 @@ static struct cpu_spec __initdata cpu_specs[] = {
#endif /* CONFIG_PPC_BOOK3S_64 */
#ifdef CONFIG_PPC32
#ifdef CONFIG_PPC_BOOK3S_32
#ifdef CONFIG_PPC_BOOK3S_601
{ /* 601 */
.pvr_mask = 0xffff0000,
.pvr_value = 0x00010000,
@ -583,6 +583,8 @@ static struct cpu_spec __initdata cpu_specs[] = {
.machine_check = machine_check_generic,
.platform = "ppc601",
},
#endif /* CONFIG_PPC_BOOK3S_601 */
#ifdef CONFIG_PPC_BOOK3S_6xx
{ /* 603 */
.pvr_mask = 0xffff0000,
.pvr_value = 0x00030000,
@ -1212,7 +1214,7 @@ static struct cpu_spec __initdata cpu_specs[] = {
.machine_check = machine_check_generic,
.platform = "ppc603",
},
#endif /* CONFIG_PPC_BOOK3S_32 */
#endif /* CONFIG_PPC_BOOK3S_6xx */
#ifdef CONFIG_PPC_8xx
{ /* 8xx */
.pvr_mask = 0xffff0000,

View File

@ -122,18 +122,17 @@ int dma_iommu_dma_supported(struct device *dev, u64 mask)
{
struct iommu_table *tbl = get_iommu_table_base(dev);
if (!tbl) {
dev_info(dev, "Warning: IOMMU dma not supported: mask 0x%08llx"
", table unavailable\n", mask);
return 0;
}
if (dev_is_pci(dev) && dma_iommu_bypass_supported(dev, mask)) {
dev->archdata.iommu_bypass = true;
dev_dbg(dev, "iommu: 64-bit OK, using fixed ops\n");
return 1;
}
if (!tbl) {
dev_err(dev, "Warning: IOMMU dma not supported: mask 0x%08llx, table unavailable\n", mask);
return 0;
}
if (tbl->it_offset > (mask >> tbl->it_page_shift)) {
dev_info(dev, "Warning: IOMMU offset too big for device mask\n");
dev_info(dev, "mask: 0x%08llx, table offset: 0x%08lx\n",

View File

@ -150,6 +150,16 @@ static int __init eeh_setup(char *str)
}
__setup("eeh=", eeh_setup);
void eeh_show_enabled(void)
{
if (eeh_has_flag(EEH_FORCE_DISABLED))
pr_info("EEH: Recovery disabled by kernel parameter.\n");
else if (eeh_has_flag(EEH_ENABLED))
pr_info("EEH: Capable adapter found: recovery enabled.\n");
else
pr_info("EEH: No capable adapters found: recovery disabled.\n");
}
/*
* This routine captures assorted PCI configuration space data
* for the indicated PCI device, and puts them into a buffer
@ -410,11 +420,9 @@ static int eeh_phb_check_failure(struct eeh_pe *pe)
eeh_pe_mark_isolated(phb_pe);
eeh_serialize_unlock(flags);
pr_err("EEH: PHB#%x failure detected, location: %s\n",
pr_debug("EEH: PHB#%x failure detected, location: %s\n",
phb_pe->phb->global_number, eeh_pe_loc_get(phb_pe));
dump_stack();
eeh_send_failure_event(phb_pe);
return 1;
out:
eeh_serialize_unlock(flags);
@ -441,7 +449,7 @@ int eeh_dev_check_failure(struct eeh_dev *edev)
unsigned long flags;
struct device_node *dn;
struct pci_dev *dev;
struct eeh_pe *pe, *parent_pe, *phb_pe;
struct eeh_pe *pe, *parent_pe;
int rc = 0;
const char *location = NULL;
@ -460,8 +468,7 @@ int eeh_dev_check_failure(struct eeh_dev *edev)
/* Access to IO BARs might get this far and still not want checking. */
if (!pe) {
eeh_stats.ignored_check++;
pr_debug("EEH: Ignored check for %s\n",
eeh_pci_name(dev));
eeh_edev_dbg(edev, "Ignored check\n");
return 0;
}
@ -501,12 +508,11 @@ int eeh_dev_check_failure(struct eeh_dev *edev)
if (dn)
location = of_get_property(dn, "ibm,loc-code",
NULL);
printk(KERN_ERR "EEH: %d reads ignored for recovering device at "
"location=%s driver=%s pci addr=%s\n",
eeh_edev_err(edev, "%d reads ignored for recovering device at location=%s driver=%s\n",
pe->check_count,
location ? location : "unknown",
eeh_driver_name(dev), eeh_pci_name(dev));
printk(KERN_ERR "EEH: Might be infinite loop in %s driver\n",
eeh_driver_name(dev));
eeh_edev_err(edev, "Might be infinite loop in %s driver\n",
eeh_driver_name(dev));
dump_stack();
}
@ -573,13 +579,8 @@ int eeh_dev_check_failure(struct eeh_dev *edev)
* a stack trace will help the device-driver authors figure
* out what happened. So print that out.
*/
phb_pe = eeh_phb_pe_get(pe->phb);
pr_err("EEH: Frozen PHB#%x-PE#%x detected\n",
pe->phb->global_number, pe->addr);
pr_err("EEH: PE location: %s, PHB location: %s\n",
eeh_pe_loc_get(pe), eeh_pe_loc_get(phb_pe));
dump_stack();
pr_debug("EEH: %s: Frozen PHB#%x-PE#%x detected\n",
__func__, pe->phb->global_number, pe->addr);
eeh_send_failure_event(pe);
return 1;
@ -697,7 +698,7 @@ int eeh_pci_enable(struct eeh_pe *pe, int function)
return rc;
}
static void *eeh_disable_and_save_dev_state(struct eeh_dev *edev,
static void eeh_disable_and_save_dev_state(struct eeh_dev *edev,
void *userdata)
{
struct pci_dev *pdev = eeh_dev_to_pci_dev(edev);
@ -708,7 +709,7 @@ static void *eeh_disable_and_save_dev_state(struct eeh_dev *edev,
* state for the specified device
*/
if (!pdev || pdev == dev)
return NULL;
return;
/* Ensure we have D0 power state */
pci_set_power_state(pdev, PCI_D0);
@ -721,18 +722,16 @@ static void *eeh_disable_and_save_dev_state(struct eeh_dev *edev,
* interrupt from the device
*/
pci_write_config_word(pdev, PCI_COMMAND, PCI_COMMAND_INTX_DISABLE);
return NULL;
}
static void *eeh_restore_dev_state(struct eeh_dev *edev, void *userdata)
static void eeh_restore_dev_state(struct eeh_dev *edev, void *userdata)
{
struct pci_dn *pdn = eeh_dev_to_pdn(edev);
struct pci_dev *pdev = eeh_dev_to_pci_dev(edev);
struct pci_dev *dev = userdata;
if (!pdev)
return NULL;
return;
/* Apply customization from firmware */
if (pdn && eeh_ops->restore_config)
@ -741,8 +740,6 @@ static void *eeh_restore_dev_state(struct eeh_dev *edev, void *userdata)
/* The caller should restore state for the specified device */
if (pdev != dev)
pci_restore_state(pdev);
return NULL;
}
int eeh_restore_vf_config(struct pci_dn *pdn)
@ -868,7 +865,7 @@ int pcibios_set_pcie_reset_state(struct pci_dev *dev, enum pcie_reset_state stat
* the indicated device and its children so that the bunch of the
* devices could be reset properly.
*/
static void *eeh_set_dev_freset(struct eeh_dev *edev, void *flag)
static void eeh_set_dev_freset(struct eeh_dev *edev, void *flag)
{
struct pci_dev *dev;
unsigned int *freset = (unsigned int *)flag;
@ -876,8 +873,6 @@ static void *eeh_set_dev_freset(struct eeh_dev *edev, void *flag)
dev = eeh_dev_to_pci_dev(edev);
if (dev)
*freset |= dev->needs_freset;
return NULL;
}
static void eeh_pe_refreeze_passed(struct eeh_pe *root)
@ -1063,23 +1058,6 @@ static struct notifier_block eeh_reboot_nb = {
.notifier_call = eeh_reboot_notifier,
};
void eeh_probe_devices(void)
{
struct pci_controller *hose, *tmp;
struct pci_dn *pdn;
/* Enable EEH for all adapters */
list_for_each_entry_safe(hose, tmp, &hose_list, list_node) {
pdn = hose->pci_data;
traverse_pci_dn(pdn, eeh_ops->probe, NULL);
}
if (eeh_enabled())
pr_info("EEH: PCI Enhanced I/O Error Handling Enabled\n");
else
pr_info("EEH: No capable adapters found\n");
}
/**
* eeh_init - EEH initialization
*
@ -1120,6 +1098,8 @@ static int eeh_init(void)
list_for_each_entry_safe(hose, tmp, &hose_list, list_node)
eeh_dev_phb_init_dynamic(hose);
eeh_addr_cache_init();
/* Initialize EEH event */
return eeh_event_init();
}
@ -1190,15 +1170,14 @@ void eeh_add_device_late(struct pci_dev *dev)
struct pci_dn *pdn;
struct eeh_dev *edev;
if (!dev || !eeh_enabled())
if (!dev)
return;
pr_debug("EEH: Adding device %s\n", pci_name(dev));
pdn = pci_get_pdn_by_devfn(dev->bus, dev->devfn);
edev = pdn_to_eeh_dev(pdn);
eeh_edev_dbg(edev, "Adding device\n");
if (edev->pdev == dev) {
pr_debug("EEH: Already referenced !\n");
eeh_edev_dbg(edev, "Device already referenced!\n");
return;
}
@ -1246,6 +1225,8 @@ void eeh_add_device_tree_late(struct pci_bus *bus)
{
struct pci_dev *dev;
if (eeh_has_flag(EEH_FORCE_DISABLED))
return;
list_for_each_entry(dev, &bus->devices, bus_list) {
eeh_add_device_late(dev);
if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) {
@ -1299,10 +1280,10 @@ void eeh_remove_device(struct pci_dev *dev)
edev = pci_dev_to_eeh_dev(dev);
/* Unregister the device with the EEH/PCI address search system */
pr_debug("EEH: Removing device %s\n", pci_name(dev));
dev_dbg(&dev->dev, "EEH: Removing device\n");
if (!edev || !edev->pdev || !edev->pe) {
pr_debug("EEH: Not referenced !\n");
dev_dbg(&dev->dev, "EEH: Device not referenced!\n");
return;
}
@ -1890,6 +1871,198 @@ static const struct file_operations eeh_force_recover_fops = {
.llseek = no_llseek,
.write = eeh_force_recover_write,
};
static ssize_t eeh_debugfs_dev_usage(struct file *filp,
char __user *user_buf,
size_t count, loff_t *ppos)
{
static const char usage[] = "input format: <domain>:<bus>:<dev>.<fn>\n";
return simple_read_from_buffer(user_buf, count, ppos,
usage, sizeof(usage) - 1);
}
static ssize_t eeh_dev_check_write(struct file *filp,
const char __user *user_buf,
size_t count, loff_t *ppos)
{
uint32_t domain, bus, dev, fn;
struct pci_dev *pdev;
struct eeh_dev *edev;
char buf[20];
int ret;
memset(buf, 0, sizeof(buf));
ret = simple_write_to_buffer(buf, sizeof(buf)-1, ppos, user_buf, count);
if (!ret)
return -EFAULT;
ret = sscanf(buf, "%x:%x:%x.%x", &domain, &bus, &dev, &fn);
if (ret != 4) {
pr_err("%s: expected 4 args, got %d\n", __func__, ret);
return -EINVAL;
}
pdev = pci_get_domain_bus_and_slot(domain, bus, (dev << 3) | fn);
if (!pdev)
return -ENODEV;
edev = pci_dev_to_eeh_dev(pdev);
if (!edev) {
pci_err(pdev, "No eeh_dev for this device!\n");
pci_dev_put(pdev);
return -ENODEV;
}
ret = eeh_dev_check_failure(edev);
pci_info(pdev, "eeh_dev_check_failure(%04x:%02x:%02x.%01x) = %d\n",
domain, bus, dev, fn, ret);
pci_dev_put(pdev);
return count;
}
static const struct file_operations eeh_dev_check_fops = {
.open = simple_open,
.llseek = no_llseek,
.write = eeh_dev_check_write,
.read = eeh_debugfs_dev_usage,
};
static int eeh_debugfs_break_device(struct pci_dev *pdev)
{
struct resource *bar = NULL;
void __iomem *mapped;
u16 old, bit;
int i, pos;
/* Do we have an MMIO BAR to disable? */
for (i = 0; i <= PCI_STD_RESOURCE_END; i++) {
struct resource *r = &pdev->resource[i];
if (!r->flags || !r->start)
continue;
if (r->flags & IORESOURCE_IO)
continue;
if (r->flags & IORESOURCE_UNSET)
continue;
bar = r;
break;
}
if (!bar) {
pci_err(pdev, "Unable to find Memory BAR to cause EEH with\n");
return -ENXIO;
}
pci_err(pdev, "Going to break: %pR\n", bar);
if (pdev->is_virtfn) {
#ifndef CONFIG_IOV
return -ENXIO;
#else
/*
* VFs don't have a per-function COMMAND register, so the best
* we can do is clear the Memory Space Enable bit in the PF's
* SRIOV control reg.
*
* Unfortunately, this requires that we have a PF (i.e doesn't
* work for a passed-through VF) and it has the potential side
* effect of also causing an EEH on every other VF under the
* PF. Oh well.
*/
pdev = pdev->physfn;
if (!pdev)
return -ENXIO; /* passed through VFs have no PF */
pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_SRIOV);
pos += PCI_SRIOV_CTRL;
bit = PCI_SRIOV_CTRL_MSE;
#endif /* !CONFIG_IOV */
} else {
bit = PCI_COMMAND_MEMORY;
pos = PCI_COMMAND;
}
/*
* Process here is:
*
* 1. Disable Memory space.
*
* 2. Perform an MMIO to the device. This should result in an error
* (CA / UR) being raised by the device which results in an EEH
* PE freeze. Using the in_8() accessor skips the eeh detection hook
* so the freeze hook so the EEH Detection machinery won't be
* triggered here. This is to match the usual behaviour of EEH
* where the HW will asyncronously freeze a PE and it's up to
* the kernel to notice and deal with it.
*
* 3. Turn Memory space back on. This is more important for VFs
* since recovery will probably fail if we don't. For normal
* the COMMAND register is reset as a part of re-initialising
* the device.
*
* Breaking stuff is the point so who cares if it's racy ;)
*/
pci_read_config_word(pdev, pos, &old);
mapped = ioremap(bar->start, PAGE_SIZE);
if (!mapped) {
pci_err(pdev, "Unable to map MMIO BAR %pR\n", bar);
return -ENXIO;
}
pci_write_config_word(pdev, pos, old & ~bit);
in_8(mapped);
pci_write_config_word(pdev, pos, old);
iounmap(mapped);
return 0;
}
static ssize_t eeh_dev_break_write(struct file *filp,
const char __user *user_buf,
size_t count, loff_t *ppos)
{
uint32_t domain, bus, dev, fn;
struct pci_dev *pdev;
char buf[20];
int ret;
memset(buf, 0, sizeof(buf));
ret = simple_write_to_buffer(buf, sizeof(buf)-1, ppos, user_buf, count);
if (!ret)
return -EFAULT;
ret = sscanf(buf, "%x:%x:%x.%x", &domain, &bus, &dev, &fn);
if (ret != 4) {
pr_err("%s: expected 4 args, got %d\n", __func__, ret);
return -EINVAL;
}
pdev = pci_get_domain_bus_and_slot(domain, bus, (dev << 3) | fn);
if (!pdev)
return -ENODEV;
ret = eeh_debugfs_break_device(pdev);
pci_dev_put(pdev);
if (ret < 0)
return ret;
return count;
}
static const struct file_operations eeh_dev_break_fops = {
.open = simple_open,
.llseek = no_llseek,
.write = eeh_dev_break_write,
.read = eeh_debugfs_dev_usage,
};
#endif
static int __init eeh_init_proc(void)
@ -1905,6 +2078,12 @@ static int __init eeh_init_proc(void)
debugfs_create_bool("eeh_disable_recovery", 0600,
powerpc_debugfs_root,
&eeh_debugfs_no_recover);
debugfs_create_file_unsafe("eeh_dev_check", 0600,
powerpc_debugfs_root, NULL,
&eeh_dev_check_fops);
debugfs_create_file_unsafe("eeh_dev_break", 0600,
powerpc_debugfs_root, NULL,
&eeh_dev_break_fops);
debugfs_create_file_unsafe("eeh_force_recover", 0600,
powerpc_debugfs_root, NULL,
&eeh_force_recover_fops);

View File

@ -148,8 +148,8 @@ eeh_addr_cache_insert(struct pci_dev *dev, resource_size_t alo,
piar->pcidev = dev;
piar->flags = flags;
pr_debug("PIAR: insert range=[%pap:%pap] dev=%s\n",
&alo, &ahi, pci_name(dev));
eeh_edev_dbg(piar->edev, "PIAR: insert range=[%pap:%pap]\n",
&alo, &ahi);
rb_link_node(&piar->rb_node, parent, p);
rb_insert_color(&piar->rb_node, &pci_io_addr_cache_root.rb_root);
@ -229,8 +229,8 @@ restart:
piar = rb_entry(n, struct pci_io_addr_range, rb_node);
if (piar->pcidev == dev) {
pr_debug("PIAR: remove range=[%pap:%pap] dev=%s\n",
&piar->addr_lo, &piar->addr_hi, pci_name(dev));
eeh_edev_dbg(piar->edev, "PIAR: remove range=[%pap:%pap]\n",
&piar->addr_lo, &piar->addr_hi);
rb_erase(n, &pci_io_addr_cache_root.rb_root);
kfree(piar);
goto restart;
@ -258,37 +258,14 @@ void eeh_addr_cache_rmv_dev(struct pci_dev *dev)
}
/**
* eeh_addr_cache_build - Build a cache of I/O addresses
* eeh_addr_cache_init - Initialize a cache of I/O addresses
*
* Build a cache of pci i/o addresses. This cache will be used to
* Initialize a cache of pci i/o addresses. This cache will be used to
* find the pci device that corresponds to a given address.
* This routine scans all pci busses to build the cache.
* Must be run late in boot process, after the pci controllers
* have been scanned for devices (after all device resources are known).
*/
void eeh_addr_cache_build(void)
void eeh_addr_cache_init(void)
{
struct pci_dn *pdn;
struct eeh_dev *edev;
struct pci_dev *dev = NULL;
spin_lock_init(&pci_io_addr_cache_root.piar_lock);
for_each_pci_dev(dev) {
pdn = pci_get_pdn_by_devfn(dev->bus, dev->devfn);
if (!pdn)
continue;
edev = pdn_to_eeh_dev(pdn);
if (!edev)
continue;
dev->dev.archdata.edev = edev;
edev->pdev = dev;
eeh_addr_cache_insert_dev(dev);
eeh_sysfs_add_device(dev);
}
}
static int eeh_addr_cache_show(struct seq_file *s, void *v)

View File

@ -47,6 +47,8 @@ struct eeh_dev *eeh_dev_init(struct pci_dn *pdn)
/* Associate EEH device with OF node */
pdn->edev = edev;
edev->pdn = pdn;
edev->bdfn = (pdn->busno << 8) | pdn->devfn;
edev->controller = pdn->phb;
return edev;
}

View File

@ -27,6 +27,7 @@
#include <linux/irq.h>
#include <linux/module.h>
#include <linux/pci.h>
#include <linux/pci_hotplug.h>
#include <asm/eeh.h>
#include <asm/eeh_event.h>
#include <asm/ppc-pci.h>
@ -81,23 +82,6 @@ static const char *pci_ers_result_name(enum pci_ers_result result)
}
};
static __printf(2, 3) void eeh_edev_info(const struct eeh_dev *edev,
const char *fmt, ...)
{
struct va_format vaf;
va_list args;
va_start(args, fmt);
vaf.fmt = fmt;
vaf.va = &args;
printk(KERN_INFO "EEH: PE#%x (PCI %s): %pV\n", edev->pe_config_addr,
edev->pdev ? dev_name(&edev->pdev->dev) : "none", &vaf);
va_end(args);
}
static enum pci_ers_result pci_ers_merge_result(enum pci_ers_result old,
enum pci_ers_result new)
{
@ -113,8 +97,16 @@ static bool eeh_dev_removed(struct eeh_dev *edev)
static bool eeh_edev_actionable(struct eeh_dev *edev)
{
return (edev->pdev && !eeh_dev_removed(edev) &&
!eeh_pe_passed(edev->pe));
if (!edev->pdev)
return false;
if (edev->pdev->error_state == pci_channel_io_perm_failure)
return false;
if (eeh_dev_removed(edev))
return false;
if (eeh_pe_passed(edev->pe))
return false;
return true;
}
/**
@ -214,12 +206,12 @@ static void eeh_enable_irq(struct eeh_dev *edev)
}
}
static void *eeh_dev_save_state(struct eeh_dev *edev, void *userdata)
static void eeh_dev_save_state(struct eeh_dev *edev, void *userdata)
{
struct pci_dev *pdev;
if (!edev)
return NULL;
return;
/*
* We cannot access the config space on some adapters.
@ -229,14 +221,13 @@ static void *eeh_dev_save_state(struct eeh_dev *edev, void *userdata)
* device is created.
*/
if (edev->pe && (edev->pe->state & EEH_PE_CFG_RESTRICTED))
return NULL;
return;
pdev = eeh_dev_to_pci_dev(edev);
if (!pdev)
return NULL;
return;
pci_save_state(pdev);
return NULL;
}
static void eeh_set_channel_state(struct eeh_pe *root, enum pci_channel_state s)
@ -274,20 +265,27 @@ static void eeh_set_irq_state(struct eeh_pe *root, bool enable)
}
typedef enum pci_ers_result (*eeh_report_fn)(struct eeh_dev *,
struct pci_dev *,
struct pci_driver *);
static void eeh_pe_report_edev(struct eeh_dev *edev, eeh_report_fn fn,
enum pci_ers_result *result)
{
struct pci_dev *pdev;
struct pci_driver *driver;
enum pci_ers_result new_result;
if (!edev->pdev) {
pci_lock_rescan_remove();
pdev = edev->pdev;
if (pdev)
get_device(&pdev->dev);
pci_unlock_rescan_remove();
if (!pdev) {
eeh_edev_info(edev, "no device");
return;
}
device_lock(&edev->pdev->dev);
device_lock(&pdev->dev);
if (eeh_edev_actionable(edev)) {
driver = eeh_pcid_get(edev->pdev);
driver = eeh_pcid_get(pdev);
if (!driver)
eeh_edev_info(edev, "no driver");
@ -296,7 +294,7 @@ static void eeh_pe_report_edev(struct eeh_dev *edev, eeh_report_fn fn,
else if (edev->mode & EEH_DEV_NO_HANDLER)
eeh_edev_info(edev, "driver bound too late");
else {
new_result = fn(edev, driver);
new_result = fn(edev, pdev, driver);
eeh_edev_info(edev, "%s driver reports: '%s'",
driver->name,
pci_ers_result_name(new_result));
@ -305,12 +303,15 @@ static void eeh_pe_report_edev(struct eeh_dev *edev, eeh_report_fn fn,
new_result);
}
if (driver)
eeh_pcid_put(edev->pdev);
eeh_pcid_put(pdev);
} else {
eeh_edev_info(edev, "not actionable (%d,%d,%d)", !!edev->pdev,
eeh_edev_info(edev, "not actionable (%d,%d,%d)", !!pdev,
!eeh_dev_removed(edev), !eeh_pe_passed(edev->pe));
}
device_unlock(&edev->pdev->dev);
device_unlock(&pdev->dev);
if (edev->pdev != pdev)
eeh_edev_warn(edev, "Device changed during processing!\n");
put_device(&pdev->dev);
}
static void eeh_pe_report(const char *name, struct eeh_pe *root,
@ -337,20 +338,20 @@ static void eeh_pe_report(const char *name, struct eeh_pe *root,
* Report an EEH error to each device driver.
*/
static enum pci_ers_result eeh_report_error(struct eeh_dev *edev,
struct pci_dev *pdev,
struct pci_driver *driver)
{
enum pci_ers_result rc;
struct pci_dev *dev = edev->pdev;
if (!driver->err_handler->error_detected)
return PCI_ERS_RESULT_NONE;
eeh_edev_info(edev, "Invoking %s->error_detected(IO frozen)",
driver->name);
rc = driver->err_handler->error_detected(dev, pci_channel_io_frozen);
rc = driver->err_handler->error_detected(pdev, pci_channel_io_frozen);
edev->in_error = true;
pci_uevent_ers(dev, PCI_ERS_RESULT_NONE);
pci_uevent_ers(pdev, PCI_ERS_RESULT_NONE);
return rc;
}
@ -363,12 +364,13 @@ static enum pci_ers_result eeh_report_error(struct eeh_dev *edev,
* are now enabled.
*/
static enum pci_ers_result eeh_report_mmio_enabled(struct eeh_dev *edev,
struct pci_dev *pdev,
struct pci_driver *driver)
{
if (!driver->err_handler->mmio_enabled)
return PCI_ERS_RESULT_NONE;
eeh_edev_info(edev, "Invoking %s->mmio_enabled()", driver->name);
return driver->err_handler->mmio_enabled(edev->pdev);
return driver->err_handler->mmio_enabled(pdev);
}
/**
@ -382,20 +384,21 @@ static enum pci_ers_result eeh_report_mmio_enabled(struct eeh_dev *edev,
* driver can work again while the device is recovered.
*/
static enum pci_ers_result eeh_report_reset(struct eeh_dev *edev,
struct pci_dev *pdev,
struct pci_driver *driver)
{
if (!driver->err_handler->slot_reset || !edev->in_error)
return PCI_ERS_RESULT_NONE;
eeh_edev_info(edev, "Invoking %s->slot_reset()", driver->name);
return driver->err_handler->slot_reset(edev->pdev);
return driver->err_handler->slot_reset(pdev);
}
static void *eeh_dev_restore_state(struct eeh_dev *edev, void *userdata)
static void eeh_dev_restore_state(struct eeh_dev *edev, void *userdata)
{
struct pci_dev *pdev;
if (!edev)
return NULL;
return;
/*
* The content in the config space isn't saved because
@ -407,15 +410,14 @@ static void *eeh_dev_restore_state(struct eeh_dev *edev, void *userdata)
if (list_is_last(&edev->entry, &edev->pe->edevs))
eeh_pe_restore_bars(edev->pe);
return NULL;
return;
}
pdev = eeh_dev_to_pci_dev(edev);
if (!pdev)
return NULL;
return;
pci_restore_state(pdev);
return NULL;
}
/**
@ -428,13 +430,14 @@ static void *eeh_dev_restore_state(struct eeh_dev *edev, void *userdata)
* to make the recovered device work again.
*/
static enum pci_ers_result eeh_report_resume(struct eeh_dev *edev,
struct pci_dev *pdev,
struct pci_driver *driver)
{
if (!driver->err_handler->resume || !edev->in_error)
return PCI_ERS_RESULT_NONE;
eeh_edev_info(edev, "Invoking %s->resume()", driver->name);
driver->err_handler->resume(edev->pdev);
driver->err_handler->resume(pdev);
pci_uevent_ers(edev->pdev, PCI_ERS_RESULT_RECOVERED);
#ifdef CONFIG_PCI_IOV
@ -453,6 +456,7 @@ static enum pci_ers_result eeh_report_resume(struct eeh_dev *edev,
* dead, and that no further recovery attempts will be made on it.
*/
static enum pci_ers_result eeh_report_failure(struct eeh_dev *edev,
struct pci_dev *pdev,
struct pci_driver *driver)
{
enum pci_ers_result rc;
@ -462,10 +466,10 @@ static enum pci_ers_result eeh_report_failure(struct eeh_dev *edev,
eeh_edev_info(edev, "Invoking %s->error_detected(permanent failure)",
driver->name);
rc = driver->err_handler->error_detected(edev->pdev,
rc = driver->err_handler->error_detected(pdev,
pci_channel_io_perm_failure);
pci_uevent_ers(edev->pdev, PCI_ERS_RESULT_DISCONNECT);
pci_uevent_ers(pdev, PCI_ERS_RESULT_DISCONNECT);
return rc;
}
@ -473,12 +477,9 @@ static void *eeh_add_virt_device(struct eeh_dev *edev)
{
struct pci_driver *driver;
struct pci_dev *dev = eeh_dev_to_pci_dev(edev);
struct pci_dn *pdn = eeh_dev_to_pdn(edev);
if (!(edev->physfn)) {
pr_warn("%s: EEH dev %04x:%02x:%02x.%01x not for VF\n",
__func__, pdn->phb->global_number, pdn->busno,
PCI_SLOT(pdn->devfn), PCI_FUNC(pdn->devfn));
eeh_edev_warn(edev, "Not for VF\n");
return NULL;
}
@ -492,12 +493,12 @@ static void *eeh_add_virt_device(struct eeh_dev *edev)
}
#ifdef CONFIG_PCI_IOV
pci_iov_add_virtfn(edev->physfn, pdn->vf_index);
pci_iov_add_virtfn(edev->physfn, eeh_dev_to_pdn(edev)->vf_index);
#endif
return NULL;
}
static void *eeh_rmv_device(struct eeh_dev *edev, void *userdata)
static void eeh_rmv_device(struct eeh_dev *edev, void *userdata)
{
struct pci_driver *driver;
struct pci_dev *dev = eeh_dev_to_pci_dev(edev);
@ -512,7 +513,7 @@ static void *eeh_rmv_device(struct eeh_dev *edev, void *userdata)
*/
if (!eeh_edev_actionable(edev) ||
(dev->hdr_type == PCI_HEADER_TYPE_BRIDGE))
return NULL;
return;
if (rmv_data) {
driver = eeh_pcid_get(dev);
@ -521,7 +522,7 @@ static void *eeh_rmv_device(struct eeh_dev *edev, void *userdata)
driver->err_handler->error_detected &&
driver->err_handler->slot_reset) {
eeh_pcid_put(dev);
return NULL;
return;
}
eeh_pcid_put(dev);
}
@ -554,8 +555,6 @@ static void *eeh_rmv_device(struct eeh_dev *edev, void *userdata)
pci_stop_and_remove_bus_device(dev);
pci_unlock_rescan_remove();
}
return NULL;
}
static void *eeh_pe_detach_dev(struct eeh_pe *pe, void *userdata)
@ -744,6 +743,99 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus,
*/
#define MAX_WAIT_FOR_RECOVERY 300
/* Walks the PE tree after processing an event to remove any stale PEs.
*
* NB: This needs to be recursive to ensure the leaf PEs get removed
* before their parents do. Although this is possible to do recursively
* we don't since this is easier to read and we need to garantee
* the leaf nodes will be handled first.
*/
static void eeh_pe_cleanup(struct eeh_pe *pe)
{
struct eeh_pe *child_pe, *tmp;
list_for_each_entry_safe(child_pe, tmp, &pe->child_list, child)
eeh_pe_cleanup(child_pe);
if (pe->state & EEH_PE_KEEP)
return;
if (!(pe->state & EEH_PE_INVALID))
return;
if (list_empty(&pe->edevs) && list_empty(&pe->child_list)) {
list_del(&pe->child);
kfree(pe);
}
}
/**
* eeh_check_slot_presence - Check if a device is still present in a slot
* @pdev: pci_dev to check
*
* This function may return a false positive if we can't determine the slot's
* presence state. This might happen for for PCIe slots if the PE containing
* the upstream bridge is also frozen, or the bridge is part of the same PE
* as the device.
*
* This shouldn't happen often, but you might see it if you hotplug a PCIe
* switch.
*/
static bool eeh_slot_presence_check(struct pci_dev *pdev)
{
const struct hotplug_slot_ops *ops;
struct pci_slot *slot;
u8 state;
int rc;
if (!pdev)
return false;
if (pdev->error_state == pci_channel_io_perm_failure)
return false;
slot = pdev->slot;
if (!slot || !slot->hotplug)
return true;
ops = slot->hotplug->ops;
if (!ops || !ops->get_adapter_status)
return true;
/* set the attention indicator while we've got the slot ops */
if (ops->set_attention_status)
ops->set_attention_status(slot->hotplug, 1);
rc = ops->get_adapter_status(slot->hotplug, &state);
if (rc)
return true;
return !!state;
}
static void eeh_clear_slot_attention(struct pci_dev *pdev)
{
const struct hotplug_slot_ops *ops;
struct pci_slot *slot;
if (!pdev)
return;
if (pdev->error_state == pci_channel_io_perm_failure)
return;
slot = pdev->slot;
if (!slot || !slot->hotplug)
return;
ops = slot->hotplug->ops;
if (!ops || !ops->set_attention_status)
return;
ops->set_attention_status(slot->hotplug, 0);
}
/**
* eeh_handle_normal_event - Handle EEH events on a specific PE
* @pe: EEH PE - which should not be used after we return, as it may
@ -774,6 +866,7 @@ void eeh_handle_normal_event(struct eeh_pe *pe)
enum pci_ers_result result = PCI_ERS_RESULT_NONE;
struct eeh_rmv_data rmv_data =
{LIST_HEAD_INIT(rmv_data.removed_vf_list), 0};
int devices = 0;
bus = eeh_pe_bus_get(pe);
if (!bus) {
@ -782,7 +875,59 @@ void eeh_handle_normal_event(struct eeh_pe *pe)
return;
}
eeh_pe_state_mark(pe, EEH_PE_RECOVERING);
/*
* When devices are hot-removed we might get an EEH due to
* a driver attempting to touch the MMIO space of a removed
* device. In this case we don't have a device to recover
* so suppress the event if we can't find any present devices.
*
* The hotplug driver should take care of tearing down the
* device itself.
*/
eeh_for_each_pe(pe, tmp_pe)
eeh_pe_for_each_dev(tmp_pe, edev, tmp)
if (eeh_slot_presence_check(edev->pdev))
devices++;
if (!devices) {
pr_debug("EEH: Frozen PHB#%x-PE#%x is empty!\n",
pe->phb->global_number, pe->addr);
goto out; /* nothing to recover */
}
/* Log the event */
if (pe->type & EEH_PE_PHB) {
pr_err("EEH: PHB#%x failure detected, location: %s\n",
pe->phb->global_number, eeh_pe_loc_get(pe));
} else {
struct eeh_pe *phb_pe = eeh_phb_pe_get(pe->phb);
pr_err("EEH: Frozen PHB#%x-PE#%x detected\n",
pe->phb->global_number, pe->addr);
pr_err("EEH: PE location: %s, PHB location: %s\n",
eeh_pe_loc_get(pe), eeh_pe_loc_get(phb_pe));
}
#ifdef CONFIG_STACKTRACE
/*
* Print the saved stack trace now that we've verified there's
* something to recover.
*/
if (pe->trace_entries) {
void **ptrs = (void **) pe->stack_trace;
int i;
pr_err("EEH: Frozen PHB#%x-PE#%x detected\n",
pe->phb->global_number, pe->addr);
/* FIXME: Use the same format as dump_stack() */
pr_err("EEH: Call Trace:\n");
for (i = 0; i < pe->trace_entries; i++)
pr_err("EEH: [%pK] %pS\n", ptrs[i], ptrs[i]);
pe->trace_entries = 0;
}
#endif /* CONFIG_STACKTRACE */
eeh_pe_update_time_stamp(pe);
pe->freeze_count++;
@ -793,6 +938,10 @@ void eeh_handle_normal_event(struct eeh_pe *pe)
result = PCI_ERS_RESULT_DISCONNECT;
}
eeh_for_each_pe(pe, tmp_pe)
eeh_pe_for_each_dev(tmp_pe, edev, tmp)
edev->mode &= ~EEH_DEV_NO_HANDLER;
/* Walk the various device drivers attached to this slot through
* a reset sequence, giving each an opportunity to do what it needs
* to accomplish the reset. Each child gets a report of the
@ -969,6 +1118,19 @@ void eeh_handle_normal_event(struct eeh_pe *pe)
return;
}
}
out:
/*
* Clean up any PEs without devices. While marked as EEH_PE_RECOVERYING
* we don't want to modify the PE tree structure so we do it here.
*/
eeh_pe_cleanup(pe);
/* clear the slot attention LED for all recovered devices */
eeh_for_each_pe(pe, tmp_pe)
eeh_pe_for_each_dev(tmp_pe, edev, tmp)
eeh_clear_slot_attention(edev->pdev);
eeh_pe_state_clear(pe, EEH_PE_RECOVERING, true);
}
@ -981,7 +1143,8 @@ void eeh_handle_normal_event(struct eeh_pe *pe)
*/
void eeh_handle_special_event(void)
{
struct eeh_pe *pe, *phb_pe;
struct eeh_pe *pe, *phb_pe, *tmp_pe;
struct eeh_dev *edev, *tmp_edev;
struct pci_bus *bus;
struct pci_controller *hose;
unsigned long flags;
@ -1040,6 +1203,7 @@ void eeh_handle_special_event(void)
*/
if (rc == EEH_NEXT_ERR_FROZEN_PE ||
rc == EEH_NEXT_ERR_FENCED_PHB) {
eeh_pe_state_mark(pe, EEH_PE_RECOVERING);
eeh_handle_normal_event(pe);
} else {
pci_lock_rescan_remove();
@ -1050,6 +1214,10 @@ void eeh_handle_special_event(void)
(phb_pe->state & EEH_PE_RECOVERING))
continue;
eeh_for_each_pe(pe, tmp_pe)
eeh_pe_for_each_dev(tmp_pe, edev, tmp_edev)
edev->mode &= ~EEH_DEV_NO_HANDLER;
/* Notify all devices to be down */
eeh_pe_state_clear(pe, EEH_PE_PRI_BUS, true);
eeh_set_channel_state(pe, pci_channel_io_perm_failure);

View File

@ -40,7 +40,6 @@ static int eeh_event_handler(void * dummy)
{
unsigned long flags;
struct eeh_event *event;
struct eeh_pe *pe;
while (!kthread_should_stop()) {
if (wait_for_completion_interruptible(&eeh_eventlist_event))
@ -59,19 +58,10 @@ static int eeh_event_handler(void * dummy)
continue;
/* We might have event without binding PE */
pe = event->pe;
if (pe) {
if (pe->type & EEH_PE_PHB)
pr_info("EEH: Detected error on PHB#%x\n",
pe->phb->global_number);
else
pr_info("EEH: Detected PCI bus error on "
"PHB#%x-PE#%x\n",
pe->phb->global_number, pe->addr);
eeh_handle_normal_event(pe);
} else {
if (event->pe)
eeh_handle_normal_event(event->pe);
else
eeh_handle_special_event();
}
kfree(event);
}
@ -121,6 +111,24 @@ int __eeh_send_failure_event(struct eeh_pe *pe)
}
event->pe = pe;
/*
* Mark the PE as recovering before inserting it in the queue.
* This prevents the PE from being free()ed by a hotplug driver
* while the PE is sitting in the event queue.
*/
if (pe) {
#ifdef CONFIG_STACKTRACE
/*
* Save the current stack trace so we can dump it from the
* event handler thread.
*/
pe->trace_entries = stack_trace_save(pe->stack_trace,
ARRAY_SIZE(pe->stack_trace), 0);
#endif /* CONFIG_STACKTRACE */
eeh_pe_state_mark(pe, EEH_PE_RECOVERING);
}
/* We may or may not be called in an interrupt context */
spin_lock_irqsave(&eeh_eventlist_lock, flags);
list_add(&event->list, &eeh_eventlist);

View File

@ -231,29 +231,22 @@ void *eeh_pe_traverse(struct eeh_pe *root,
* The function is used to traverse the devices of the specified
* PE and its child PEs.
*/
void *eeh_pe_dev_traverse(struct eeh_pe *root,
void eeh_pe_dev_traverse(struct eeh_pe *root,
eeh_edev_traverse_func fn, void *flag)
{
struct eeh_pe *pe;
struct eeh_dev *edev, *tmp;
void *ret;
if (!root) {
pr_warn("%s: Invalid PE %p\n",
__func__, root);
return NULL;
return;
}
/* Traverse root PE */
eeh_for_each_pe(root, pe) {
eeh_pe_for_each_dev(pe, edev, tmp) {
ret = fn(edev, flag);
if (ret)
return ret;
}
}
return NULL;
eeh_for_each_pe(root, pe)
eeh_pe_for_each_dev(pe, edev, tmp)
fn(edev, flag);
}
/**
@ -379,8 +372,7 @@ int eeh_add_to_parent_pe(struct eeh_dev *edev)
/* Check if the PE number is valid */
if (!eeh_has_flag(EEH_VALID_PE_ZERO) && !edev->pe_config_addr) {
pr_err("%s: Invalid PE#0 for edev 0x%x on PHB#%x\n",
__func__, config_addr, pdn->phb->global_number);
eeh_edev_err(edev, "PE#0 is invalid for this PHB!\n");
return -EINVAL;
}
@ -391,42 +383,34 @@ int eeh_add_to_parent_pe(struct eeh_dev *edev)
* components.
*/
pe = eeh_pe_get(pdn->phb, edev->pe_config_addr, config_addr);
if (pe && !(pe->type & EEH_PE_INVALID)) {
/* Mark the PE as type of PCI bus */
pe->type = EEH_PE_BUS;
edev->pe = pe;
if (pe) {
if (pe->type & EEH_PE_INVALID) {
list_add_tail(&edev->entry, &pe->edevs);
edev->pe = pe;
/*
* We're running to here because of PCI hotplug caused by
* EEH recovery. We need clear EEH_PE_INVALID until the top.
*/
parent = pe;
while (parent) {
if (!(parent->type & EEH_PE_INVALID))
break;
parent->type &= ~EEH_PE_INVALID;
parent = parent->parent;
}
/* Put the edev to PE */
list_add_tail(&edev->entry, &pe->edevs);
pr_debug("EEH: Add %04x:%02x:%02x.%01x to Bus PE#%x\n",
pdn->phb->global_number,
pdn->busno,
PCI_SLOT(pdn->devfn),
PCI_FUNC(pdn->devfn),
pe->addr);
return 0;
} else if (pe && (pe->type & EEH_PE_INVALID)) {
list_add_tail(&edev->entry, &pe->edevs);
edev->pe = pe;
/*
* We're running to here because of PCI hotplug caused by
* EEH recovery. We need clear EEH_PE_INVALID until the top.
*/
parent = pe;
while (parent) {
if (!(parent->type & EEH_PE_INVALID))
break;
parent->type &= ~EEH_PE_INVALID;
parent = parent->parent;
eeh_edev_dbg(edev,
"Added to device PE (parent: PE#%x)\n",
pe->parent->addr);
} else {
/* Mark the PE as type of PCI bus */
pe->type = EEH_PE_BUS;
edev->pe = pe;
/* Put the edev to PE */
list_add_tail(&edev->entry, &pe->edevs);
eeh_edev_dbg(edev, "Added to bus PE\n");
}
pr_debug("EEH: Add %04x:%02x:%02x.%01x to Device "
"PE#%x, Parent PE#%x\n",
pdn->phb->global_number,
pdn->busno,
PCI_SLOT(pdn->devfn),
PCI_FUNC(pdn->devfn),
pe->addr, pe->parent->addr);
return 0;
}
@ -468,13 +452,8 @@ int eeh_add_to_parent_pe(struct eeh_dev *edev)
list_add_tail(&pe->child, &parent->child_list);
list_add_tail(&edev->entry, &pe->edevs);
edev->pe = pe;
pr_debug("EEH: Add %04x:%02x:%02x.%01x to "
"Device PE#%x, Parent PE#%x\n",
pdn->phb->global_number,
pdn->busno,
PCI_SLOT(pdn->devfn),
PCI_FUNC(pdn->devfn),
pe->addr, pe->parent->addr);
eeh_edev_dbg(edev, "Added to device PE (parent: PE#%x)\n",
pe->parent->addr);
return 0;
}
@ -491,16 +470,12 @@ int eeh_add_to_parent_pe(struct eeh_dev *edev)
int eeh_rmv_from_parent_pe(struct eeh_dev *edev)
{
struct eeh_pe *pe, *parent, *child;
bool keep, recover;
int cnt;
struct pci_dn *pdn = eeh_dev_to_pdn(edev);
pe = eeh_dev_to_pe(edev);
if (!pe) {
pr_debug("%s: No PE found for device %04x:%02x:%02x.%01x\n",
__func__, pdn->phb->global_number,
pdn->busno,
PCI_SLOT(pdn->devfn),
PCI_FUNC(pdn->devfn));
eeh_edev_dbg(edev, "No PE found for device.\n");
return -EEXIST;
}
@ -516,10 +491,21 @@ int eeh_rmv_from_parent_pe(struct eeh_dev *edev)
*/
while (1) {
parent = pe->parent;
/* PHB PEs should never be removed */
if (pe->type & EEH_PE_PHB)
break;
if (!(pe->state & EEH_PE_KEEP)) {
/*
* XXX: KEEP is set while resetting a PE. I don't think it's
* ever set without RECOVERING also being set. I could
* be wrong though so catch that with a WARN.
*/
keep = !!(pe->state & EEH_PE_KEEP);
recover = !!(pe->state & EEH_PE_RECOVERING);
WARN_ON(keep && !recover);
if (!keep && !recover) {
if (list_empty(&pe->edevs) &&
list_empty(&pe->child_list)) {
list_del(&pe->child);
@ -528,6 +514,15 @@ int eeh_rmv_from_parent_pe(struct eeh_dev *edev)
break;
}
} else {
/*
* Mark the PE as invalid. At the end of the recovery
* process any invalid PEs will be garbage collected.
*
* We need to delay the free()ing of them since we can
* remove edev's while traversing the PE tree which
* might trigger the removal of a PE and we can't
* deal with that (yet).
*/
if (list_empty(&pe->edevs)) {
cnt = 0;
list_for_each_entry(child, &pe->child_list, child) {
@ -623,13 +618,11 @@ void eeh_pe_mark_isolated(struct eeh_pe *root)
}
EXPORT_SYMBOL_GPL(eeh_pe_mark_isolated);
static void *__eeh_pe_dev_mode_mark(struct eeh_dev *edev, void *flag)
static void __eeh_pe_dev_mode_mark(struct eeh_dev *edev, void *flag)
{
int mode = *((int *)flag);
edev->mode |= mode;
return NULL;
}
/**
@ -717,17 +710,13 @@ static void eeh_bridge_check_link(struct eeh_dev *edev)
if (!(edev->mode & (EEH_DEV_ROOT_PORT | EEH_DEV_DS_PORT)))
return;
pr_debug("%s: Check PCIe link for %04x:%02x:%02x.%01x ...\n",
__func__, pdn->phb->global_number,
pdn->busno,
PCI_SLOT(pdn->devfn),
PCI_FUNC(pdn->devfn));
eeh_edev_dbg(edev, "Checking PCIe link...\n");
/* Check slot status */
cap = edev->pcie_cap;
eeh_ops->read_config(pdn, cap + PCI_EXP_SLTSTA, 2, &val);
if (!(val & PCI_EXP_SLTSTA_PDS)) {
pr_debug(" No card in the slot (0x%04x) !\n", val);
eeh_edev_dbg(edev, "No card in the slot (0x%04x) !\n", val);
return;
}
@ -736,7 +725,7 @@ static void eeh_bridge_check_link(struct eeh_dev *edev)
if (val & PCI_EXP_SLTCAP_PCP) {
eeh_ops->read_config(pdn, cap + PCI_EXP_SLTCTL, 2, &val);
if (val & PCI_EXP_SLTCTL_PCC) {
pr_debug(" In power-off state, power it on ...\n");
eeh_edev_dbg(edev, "In power-off state, power it on ...\n");
val &= ~(PCI_EXP_SLTCTL_PCC | PCI_EXP_SLTCTL_PIC);
val |= (0x0100 & PCI_EXP_SLTCTL_PIC);
eeh_ops->write_config(pdn, cap + PCI_EXP_SLTCTL, 2, val);
@ -752,7 +741,7 @@ static void eeh_bridge_check_link(struct eeh_dev *edev)
/* Check link */
eeh_ops->read_config(pdn, cap + PCI_EXP_LNKCAP, 4, &val);
if (!(val & PCI_EXP_LNKCAP_DLLLARC)) {
pr_debug(" No link reporting capability (0x%08x) \n", val);
eeh_edev_dbg(edev, "No link reporting capability (0x%08x) \n", val);
msleep(1000);
return;
}
@ -769,10 +758,10 @@ static void eeh_bridge_check_link(struct eeh_dev *edev)
}
if (val & PCI_EXP_LNKSTA_DLLLA)
pr_debug(" Link up (%s)\n",
eeh_edev_dbg(edev, "Link up (%s)\n",
(val & PCI_EXP_LNKSTA_CLS_2_5GB) ? "2.5GB" : "5GB");
else
pr_debug(" Link not ready (0x%04x)\n", val);
eeh_edev_dbg(edev, "Link not ready (0x%04x)\n", val);
}
#define BYTE_SWAP(OFF) (8*((OFF)/4)+3-(OFF))
@ -852,7 +841,7 @@ static void eeh_restore_device_bars(struct eeh_dev *edev)
* the expansion ROM base address, the latency timer, and etc.
* from the saved values in the device node.
*/
static void *eeh_restore_one_device_bars(struct eeh_dev *edev, void *flag)
static void eeh_restore_one_device_bars(struct eeh_dev *edev, void *flag)
{
struct pci_dn *pdn = eeh_dev_to_pdn(edev);
@ -864,8 +853,6 @@ static void *eeh_restore_one_device_bars(struct eeh_dev *edev, void *flag)
if (eeh_ops->restore_config && pdn)
eeh_ops->restore_config(pdn);
return NULL;
}
/**

View File

@ -230,7 +230,7 @@ transfer_to_handler_cont:
*/
lis r12,reenable_mmu@h
ori r12,r12,reenable_mmu@l
LOAD_MSR_KERNEL(r0, MSR_KERNEL)
LOAD_REG_IMMEDIATE(r0, MSR_KERNEL)
mtspr SPRN_SRR0,r12
mtspr SPRN_SRR1,r0
SYNC
@ -304,7 +304,7 @@ stack_ovf:
addi r1,r1,THREAD_SIZE-STACK_FRAME_OVERHEAD
lis r9,StackOverflow@ha
addi r9,r9,StackOverflow@l
LOAD_MSR_KERNEL(r10,MSR_KERNEL)
LOAD_REG_IMMEDIATE(r10,MSR_KERNEL)
#if defined(CONFIG_PPC_8xx) && defined(CONFIG_PERF_EVENTS)
mtspr SPRN_NRI, r0
#endif
@ -324,7 +324,7 @@ trace_syscall_entry_irq_off:
bl trace_hardirqs_on
/* Now enable for real */
LOAD_MSR_KERNEL(r10, MSR_KERNEL | MSR_EE)
LOAD_REG_IMMEDIATE(r10, MSR_KERNEL | MSR_EE)
mtmsr r10
REST_GPR(0, r1)
@ -394,7 +394,7 @@ ret_from_syscall:
#endif
mr r6,r3
/* disable interrupts so current_thread_info()->flags can't change */
LOAD_MSR_KERNEL(r10,MSR_KERNEL) /* doesn't include MSR_EE */
LOAD_REG_IMMEDIATE(r10,MSR_KERNEL) /* doesn't include MSR_EE */
/* Note: We don't bother telling lockdep about it */
SYNC
MTMSRD(r10)
@ -777,11 +777,19 @@ fast_exception_return:
1: lis r3,exc_exit_restart_end@ha
addi r3,r3,exc_exit_restart_end@l
cmplw r12,r3
#if CONFIG_PPC_BOOK3S_601
bge 2b
#else
bge 3f
#endif
lis r4,exc_exit_restart@ha
addi r4,r4,exc_exit_restart@l
cmplw r12,r4
#if CONFIG_PPC_BOOK3S_601
blt 2b
#else
blt 3f
#endif
lis r3,fee_restarts@ha
tophys(r3,r3)
lwz r5,fee_restarts@l(r3)
@ -800,9 +808,6 @@ fee_restarts:
/* aargh, we don't know which trap this is */
/* but the 601 doesn't implement the RI bit, so assume it's OK */
3:
BEGIN_FTR_SECTION
b 2b
END_FTR_SECTION_IFSET(CPU_FTR_601)
li r10,-1
stw r10,_TRAP(r11)
addi r3,r1,STACK_FRAME_OVERHEAD
@ -824,7 +829,7 @@ ret_from_except:
* can't change between when we test it and when we return
* from the interrupt. */
/* Note: We don't bother telling lockdep about it */
LOAD_MSR_KERNEL(r10,MSR_KERNEL)
LOAD_REG_IMMEDIATE(r10,MSR_KERNEL)
SYNC /* Some chip revs have problems here... */
MTMSRD(r10) /* disable interrupts */
@ -991,7 +996,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_NEED_PAIRED_STWCX)
* can restart the exception exit path at the label
* exc_exit_restart below. -- paulus
*/
LOAD_MSR_KERNEL(r10,MSR_KERNEL & ~MSR_RI)
LOAD_REG_IMMEDIATE(r10,MSR_KERNEL & ~MSR_RI)
SYNC
MTMSRD(r10) /* clear the RI bit */
.globl exc_exit_restart
@ -1066,7 +1071,7 @@ exc_exit_restart_end:
REST_NVGPRS(r1); \
lwz r3,_MSR(r1); \
andi. r3,r3,MSR_PR; \
LOAD_MSR_KERNEL(r10,MSR_KERNEL); \
LOAD_REG_IMMEDIATE(r10,MSR_KERNEL); \
bne user_exc_return; \
lwz r0,GPR0(r1); \
lwz r2,GPR2(r1); \
@ -1236,7 +1241,7 @@ recheck:
* neither. Those disable/enable cycles used to peek at
* TI_FLAGS aren't advertised.
*/
LOAD_MSR_KERNEL(r10,MSR_KERNEL)
LOAD_REG_IMMEDIATE(r10,MSR_KERNEL)
SYNC
MTMSRD(r10) /* disable interrupts */
lwz r9,TI_FLAGS(r2)
@ -1270,11 +1275,19 @@ nonrecoverable:
lis r10,exc_exit_restart_end@ha
addi r10,r10,exc_exit_restart_end@l
cmplw r12,r10
#ifdef CONFIG_PPC_BOOK3S_601
bgelr
#else
bge 3f
#endif
lis r11,exc_exit_restart@ha
addi r11,r11,exc_exit_restart@l
cmplw r12,r11
#ifdef CONFIG_PPC_BOOK3S_601
bltlr
#else
blt 3f
#endif
lis r10,ee_restarts@ha
lwz r12,ee_restarts@l(r10)
addi r12,r12,1
@ -1283,9 +1296,6 @@ nonrecoverable:
blr
3: /* OK, we can't recover, kill this process */
/* but the 601 doesn't implement the RI bit, so assume it's OK */
BEGIN_FTR_SECTION
blr
END_FTR_SECTION_IFSET(CPU_FTR_601)
lwz r3,_TRAP(r1)
andi. r0,r3,1
beq 5f
@ -1329,7 +1339,7 @@ _GLOBAL(enter_rtas)
lwz r4,RTASBASE(r4)
mfmsr r9
stw r9,8(r1)
LOAD_MSR_KERNEL(r0,MSR_KERNEL)
LOAD_REG_IMMEDIATE(r0,MSR_KERNEL)
SYNC /* disable interrupts so SRR0/1 */
MTMSRD(r0) /* don't get trashed */
li r9,MSR_KERNEL & ~(MSR_IR|MSR_DR)

View File

@ -69,24 +69,20 @@ BEGIN_FTR_SECTION
bne .Ltabort_syscall
END_FTR_SECTION_IFSET(CPU_FTR_TM)
#endif
andi. r10,r12,MSR_PR
mr r10,r1
addi r1,r1,-INT_FRAME_SIZE
beq- 1f
ld r1,PACAKSAVE(r13)
1: std r10,0(r1)
std r10,0(r1)
std r11,_NIP(r1)
std r12,_MSR(r1)
std r0,GPR0(r1)
std r10,GPR1(r1)
beq 2f /* if from kernel mode */
#ifdef CONFIG_PPC_FSL_BOOK3E
START_BTB_FLUSH_SECTION
BTB_FLUSH(r10)
END_BTB_FLUSH_SECTION
#endif
ACCOUNT_CPU_USER_ENTRY(r13, r10, r11)
2: std r2,GPR2(r1)
std r2,GPR2(r1)
std r3,GPR3(r1)
mfcr r2
std r4,GPR4(r1)
@ -122,14 +118,13 @@ END_BTB_FLUSH_SECTION
#if defined(CONFIG_VIRT_CPU_ACCOUNTING_NATIVE) && defined(CONFIG_PPC_SPLPAR)
BEGIN_FW_FTR_SECTION
beq 33f
/* if from user, see if there are any DTL entries to process */
/* see if there are any DTL entries to process */
ld r10,PACALPPACAPTR(r13) /* get ptr to VPA */
ld r11,PACA_DTL_RIDX(r13) /* get log read index */
addi r10,r10,LPPACA_DTLIDX
LDX_BE r10,0,r10 /* get log write index */
cmpd cr1,r11,r10
beq+ cr1,33f
cmpd r11,r10
beq+ 33f
bl accumulate_stolen_time
REST_GPR(0,r1)
REST_4GPRS(3,r1)
@ -203,6 +198,7 @@ system_call: /* label this so stack traces look sane */
mtctr r12
bctrl /* Call handler */
/* syscall_exit can exit to kernel mode, via ret_from_kernel_thread */
.Lsyscall_exit:
std r3,RESULT(r1)
@ -216,11 +212,6 @@ system_call: /* label this so stack traces look sane */
ld r12, PACA_THREAD_INFO(r13)
ld r8,_MSR(r1)
#ifdef CONFIG_PPC_BOOK3S
/* No MSR:RI on BookE */
andi. r10,r8,MSR_RI
beq- .Lunrecov_restore
#endif
/*
* This is a few instructions into the actual syscall exit path (which actually

View File

@ -750,12 +750,14 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
ld r15,PACATOC(r13)
ld r14,interrupt_base_book3e@got(r15)
ld r15,__end_interrupts@got(r15)
#else
LOAD_REG_IMMEDIATE(r14,interrupt_base_book3e)
LOAD_REG_IMMEDIATE(r15,__end_interrupts)
#endif
cmpld cr0,r10,r14
cmpld cr1,r10,r15
#else
LOAD_REG_IMMEDIATE_SYM(r14, r15, interrupt_base_book3e)
cmpld cr0, r10, r14
LOAD_REG_IMMEDIATE_SYM(r14, r15, __end_interrupts)
cmpld cr1, r10, r14
#endif
blt+ cr0,1f
bge+ cr1,1f
@ -820,12 +822,14 @@ kernel_dbg_exc:
ld r15,PACATOC(r13)
ld r14,interrupt_base_book3e@got(r15)
ld r15,__end_interrupts@got(r15)
#else
LOAD_REG_IMMEDIATE(r14,interrupt_base_book3e)
LOAD_REG_IMMEDIATE(r15,__end_interrupts)
#endif
cmpld cr0,r10,r14
cmpld cr1,r10,r15
#else
LOAD_REG_IMMEDIATE_SYM(r14, r15, interrupt_base_book3e)
cmpld cr0, r10, r14
LOAD_REG_IMMEDIATE_SYM(r14, r15,__end_interrupts)
cmpld cr1, r10, r14
#endif
blt+ cr0,1f
bge+ cr1,1f
@ -1449,7 +1453,7 @@ a2_tlbinit_code_start:
a2_tlbinit_after_linear_map:
/* Now we branch the new virtual address mapped by this entry */
LOAD_REG_IMMEDIATE(r3,1f)
LOAD_REG_IMMEDIATE_SYM(r3, r5, 1f)
mtctr r3
bctr

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -34,7 +34,16 @@
#include "head_32.h"
/* 601 only have IBAT; cr0.eq is set on 601 when using this macro */
/* 601 only have IBAT */
#ifdef CONFIG_PPC_BOOK3S_601
#define LOAD_BAT(n, reg, RA, RB) \
li RA,0; \
mtspr SPRN_IBAT##n##U,RA; \
lwz RA,(n*16)+0(reg); \
lwz RB,(n*16)+4(reg); \
mtspr SPRN_IBAT##n##U,RA; \
mtspr SPRN_IBAT##n##L,RB
#else
#define LOAD_BAT(n, reg, RA, RB) \
/* see the comment for clear_bats() -- Cort */ \
li RA,0; \
@ -44,12 +53,11 @@
lwz RB,(n*16)+4(reg); \
mtspr SPRN_IBAT##n##U,RA; \
mtspr SPRN_IBAT##n##L,RB; \
beq 1f; \
lwz RA,(n*16)+8(reg); \
lwz RB,(n*16)+12(reg); \
mtspr SPRN_DBAT##n##U,RA; \
mtspr SPRN_DBAT##n##L,RB; \
1:
mtspr SPRN_DBAT##n##L,RB
#endif
__HEAD
.stabs "arch/powerpc/kernel/",N_SO,0,0,0f
@ -557,9 +565,9 @@ DataStoreTLBMiss:
cmplw 0,r1,r3
mfspr r2, SPRN_SPRG_PGDIR
#ifdef CONFIG_SWAP
li r1, _PAGE_RW | _PAGE_PRESENT | _PAGE_ACCESSED
li r1, _PAGE_RW | _PAGE_DIRTY | _PAGE_PRESENT | _PAGE_ACCESSED
#else
li r1, _PAGE_RW | _PAGE_PRESENT
li r1, _PAGE_RW | _PAGE_DIRTY | _PAGE_PRESENT
#endif
bge- 112f
lis r2, (swapper_pg_dir - PAGE_OFFSET)@ha /* if kernel address, use */
@ -820,9 +828,6 @@ load_up_mmu:
/* Load the BAT registers with the values set up by MMU_init.
MMU_init takes care of whether we're on a 601 or not. */
mfpvr r3
srwi r3,r3,16
cmpwi r3,1
lis r3,BATS@ha
addi r3,r3,BATS@l
tophys(r3,r3)
@ -897,9 +902,11 @@ start_here:
bl machine_init
bl __save_cpu_setup
bl MMU_init
#ifdef CONFIG_KASAN
BEGIN_MMU_FTR_SECTION
bl MMU_init_hw_patch
END_MMU_FTR_SECTION_IFSET(MMU_FTR_HPTE_TABLE)
#endif
/*
* Go back to running unmapped so we can load up new values
@ -996,11 +1003,8 @@ EXPORT_SYMBOL(switch_mmu_context)
*/
clear_bats:
li r10,0
mfspr r9,SPRN_PVR
rlwinm r9,r9,16,16,31 /* r9 = 1 for 601, 4 for 604 */
cmpwi r9, 1
beq 1f
#ifndef CONFIG_PPC_BOOK3S_601
mtspr SPRN_DBAT0U,r10
mtspr SPRN_DBAT0L,r10
mtspr SPRN_DBAT1U,r10
@ -1009,7 +1013,7 @@ clear_bats:
mtspr SPRN_DBAT2L,r10
mtspr SPRN_DBAT3U,r10
mtspr SPRN_DBAT3L,r10
1:
#endif
mtspr SPRN_IBAT0U,r10
mtspr SPRN_IBAT0L,r10
mtspr SPRN_IBAT1U,r10
@ -1104,10 +1108,7 @@ mmu_off:
*/
initial_bats:
lis r11,PAGE_OFFSET@h
mfspr r9,SPRN_PVR
rlwinm r9,r9,16,16,31 /* r9 = 1 for 601, 4 for 604 */
cmpwi 0,r9,1
bne 4f
#ifdef CONFIG_PPC_BOOK3S_601
ori r11,r11,4 /* set up BAT registers for 601 */
li r8,0x7f /* valid, block length = 8MB */
mtspr SPRN_IBAT0U,r11 /* N.B. 601 has valid bit in */
@ -1120,10 +1121,8 @@ initial_bats:
addis r8,r8,0x800000@h
mtspr SPRN_IBAT2U,r11
mtspr SPRN_IBAT2L,r8
isync
blr
4: tophys(r8,r11)
#else
tophys(r8,r11)
#ifdef CONFIG_SMP
ori r8,r8,0x12 /* R/W access, M=1 */
#else
@ -1135,10 +1134,10 @@ initial_bats:
mtspr SPRN_DBAT0U,r11 /* bit in upper BAT register */
mtspr SPRN_IBAT0L,r8
mtspr SPRN_IBAT0U,r11
#endif
isync
blr
#ifdef CONFIG_BOOTX_TEXT
setup_disp_bat:
/*
@ -1153,15 +1152,13 @@ setup_disp_bat:
beqlr
lwz r11,0(r8)
lwz r8,4(r8)
mfspr r9,SPRN_PVR
rlwinm r9,r9,16,16,31 /* r9 = 1 for 601, 4 for 604 */
cmpwi 0,r9,1
beq 1f
#ifndef CONFIG_PPC_BOOK3S_601
mtspr SPRN_DBAT3L,r8
mtspr SPRN_DBAT3U,r11
blr
1: mtspr SPRN_IBAT3L,r8
#else
mtspr SPRN_IBAT3L,r8
mtspr SPRN_IBAT3U,r11
#endif
blr
#endif /* CONFIG_BOOTX_TEXT */

View File

@ -4,19 +4,6 @@
#include <asm/ptrace.h> /* for STACK_FRAME_REGS_MARKER */
/*
* MSR_KERNEL is > 0x8000 on 4xx/Book-E since it include MSR_CE.
*/
.macro __LOAD_MSR_KERNEL r, x
.if \x >= 0x8000
lis \r, (\x)@h
ori \r, \r, (\x)@l
.else
li \r, (\x)
.endif
.endm
#define LOAD_MSR_KERNEL(r, x) __LOAD_MSR_KERNEL r, x
/*
* Exception entry code. This code runs with address translation
* turned off, i.e. using physical addresses.
@ -92,7 +79,7 @@
#ifdef CONFIG_40x
rlwinm r9,r9,0,14,12 /* clear MSR_WE (necessary?) */
#else
LOAD_MSR_KERNEL(r10, MSR_KERNEL & ~(MSR_IR|MSR_DR)) /* can take exceptions */
LOAD_REG_IMMEDIATE(r10, MSR_KERNEL & ~(MSR_IR|MSR_DR)) /* can take exceptions */
MTMSRD(r10) /* (except for mach check in rtas) */
#endif
lis r10,STACK_FRAME_REGS_MARKER@ha /* exception frame marker */
@ -140,10 +127,10 @@
* otherwise we might risk taking an interrupt before we tell lockdep
* they are enabled.
*/
LOAD_MSR_KERNEL(r10, MSR_KERNEL)
LOAD_REG_IMMEDIATE(r10, MSR_KERNEL)
rlwimi r10, r9, 0, MSR_EE
#else
LOAD_MSR_KERNEL(r10, MSR_KERNEL | MSR_EE)
LOAD_REG_IMMEDIATE(r10, MSR_KERNEL | MSR_EE)
#endif
#if defined(CONFIG_PPC_8xx) && defined(CONFIG_PERF_EVENTS)
mtspr SPRN_NRI, r0
@ -187,7 +174,7 @@ label:
#define EXC_XFER_TEMPLATE(hdlr, trap, msr, tfer, ret) \
li r10,trap; \
stw r10,_TRAP(r11); \
LOAD_MSR_KERNEL(r10, msr); \
LOAD_REG_IMMEDIATE(r10, msr); \
bl tfer; \
.long hdlr; \
.long ret

View File

@ -182,7 +182,8 @@ __secondary_hold:
isync
bctr
#else
BUG_OPCODE
0: trap
EMIT_BUG_ENTRY 0b, __FILE__, __LINE__, 0
#endif
CLOSE_FIXED_SECTION(first_256B)
@ -635,7 +636,7 @@ __after_prom_start:
sub r5,r5,r11
#else
/* just copy interrupts */
LOAD_REG_IMMEDIATE(r5, FIXED_SYMBOL_ABS_ADDR(__end_interrupts))
LOAD_REG_IMMEDIATE_SYM(r5, r11, FIXED_SYMBOL_ABS_ADDR(__end_interrupts))
#endif
b 5f
3:
@ -998,7 +999,8 @@ start_here_common:
bl start_kernel
/* Not reached */
BUG_OPCODE
trap
EMIT_BUG_ENTRY 0b, __FILE__, __LINE__, 0
/*
* We put a few things here that have to be page-aligned.

View File

@ -15,6 +15,7 @@
*/
#include <linux/init.h>
#include <linux/magic.h>
#include <asm/processor.h>
#include <asm/page.h>
#include <asm/mmu.h>
@ -574,8 +575,6 @@ InstructionBreakpoint:
* by decoding the registers used by the dcbx instruction and adding them.
* DAR is set to the calculated address.
*/
/* define if you don't want to use self modifying code */
#define NO_SELF_MODIFYING_CODE
FixupDAR:/* Entry point for dcbx workaround. */
mtspr SPRN_M_TW, r10
/* fetch instruction from memory. */
@ -639,27 +638,6 @@ FixupDAR:/* Entry point for dcbx workaround. */
rlwinm r10, r10,0,7,5 /* Clear store bit for buggy dcbst insn */
mtspr SPRN_DSISR, r10
142: /* continue, it was a dcbx, dcbi instruction. */
#ifndef NO_SELF_MODIFYING_CODE
andis. r10,r11,0x1f /* test if reg RA is r0 */
li r10,modified_instr@l
dcbtst r0,r10 /* touch for store */
rlwinm r11,r11,0,0,20 /* Zero lower 10 bits */
oris r11,r11,640 /* Transform instr. to a "add r10,RA,RB" */
ori r11,r11,532
stw r11,0(r10) /* store add/and instruction */
dcbf 0,r10 /* flush new instr. to memory. */
icbi 0,r10 /* invalidate instr. cache line */
mfspr r11, SPRN_SPRG_SCRATCH1 /* restore r11 */
mfspr r10, SPRN_SPRG_SCRATCH0 /* restore r10 */
isync /* Wait until new instr is loaded from memory */
modified_instr:
.space 4 /* this is where the add instr. is stored */
bne+ 143f
subf r10,r0,r10 /* r10=r10-r0, only if reg RA is r0 */
143: mtdar r10 /* store faulting EA in DAR */
mfspr r10,SPRN_M_TW
b DARFixed /* Go back to normal TLB handling */
#else
mfctr r10
mtdar r10 /* save ctr reg in DAR */
rlwinm r10, r11, 24, 24, 28 /* offset into jump table for reg RB */
@ -723,7 +701,6 @@ modified_instr:
add r10, r10, r11 /* add it */
mfctr r11 /* restore r11 */
b 151b
#endif
/*
* This is where the main kernel code starts.
@ -741,6 +718,9 @@ start_here:
/* stack */
lis r1,init_thread_union@ha
addi r1,r1,init_thread_union@l
lis r0, STACK_END_MAGIC@h
ori r0, r0, STACK_END_MAGIC@l
stw r0, 0(r1)
li r0,0
stwu r0,THREAD_SIZE-STACK_FRAME_OVERHEAD(r1)

View File

@ -195,18 +195,63 @@ void thread_change_pc(struct task_struct *tsk, struct pt_regs *regs)
tsk->thread.last_hit_ubp = NULL;
}
static bool is_larx_stcx_instr(struct pt_regs *regs, unsigned int instr)
{
int ret, type;
struct instruction_op op;
ret = analyse_instr(&op, regs, instr);
type = GETTYPE(op.type);
return (!ret && (type == LARX || type == STCX));
}
/*
* Handle debug exception notifications.
*/
static bool stepping_handler(struct pt_regs *regs, struct perf_event *bp,
unsigned long addr)
{
unsigned int instr = 0;
if (__get_user_inatomic(instr, (unsigned int *)regs->nip))
goto fail;
if (is_larx_stcx_instr(regs, instr)) {
printk_ratelimited("Breakpoint hit on instruction that can't be emulated."
" Breakpoint at 0x%lx will be disabled.\n", addr);
goto disable;
}
/* Do not emulate user-space instructions, instead single-step them */
if (user_mode(regs)) {
current->thread.last_hit_ubp = bp;
regs->msr |= MSR_SE;
return false;
}
if (!emulate_step(regs, instr))
goto fail;
return true;
fail:
/*
* We've failed in reliably handling the hw-breakpoint. Unregister
* it and throw a warning message to let the user know about it.
*/
WARN(1, "Unable to handle hardware breakpoint. Breakpoint at "
"0x%lx will be disabled.", addr);
disable:
perf_event_disable_inatomic(bp);
return false;
}
int hw_breakpoint_handler(struct die_args *args)
{
int rc = NOTIFY_STOP;
struct perf_event *bp;
struct pt_regs *regs = args->regs;
#ifndef CONFIG_PPC_8xx
int stepped = 1;
unsigned int instr;
#endif
struct arch_hw_breakpoint *info;
unsigned long dar = regs->dar;
@ -251,31 +296,9 @@ int hw_breakpoint_handler(struct die_args *args)
(dar - bp->attr.bp_addr < bp->attr.bp_len)))
info->type |= HW_BRK_TYPE_EXTRANEOUS_IRQ;
#ifndef CONFIG_PPC_8xx
/* Do not emulate user-space instructions, instead single-step them */
if (user_mode(regs)) {
current->thread.last_hit_ubp = bp;
regs->msr |= MSR_SE;
if (!IS_ENABLED(CONFIG_PPC_8xx) && !stepping_handler(regs, bp, info->address))
goto out;
}
stepped = 0;
instr = 0;
if (!__get_user_inatomic(instr, (unsigned int *) regs->nip))
stepped = emulate_step(regs, instr);
/*
* emulate_step() could not execute it. We've failed in reliably
* handling the hw-breakpoint. Unregister it and throw a warning
* message to let the user know about it.
*/
if (!stepped) {
WARN(1, "Unable to handle hardware breakpoint. Breakpoint at "
"0x%lx will be disabled.", info->address);
perf_event_disable_inatomic(bp);
goto out;
}
#endif
/*
* As a policy, the callback is invoked in a 'trigger-after-execute'
* fashion

View File

@ -149,8 +149,8 @@ static const struct ppc_pci_io iowa_pci_io = {
};
#ifdef CONFIG_PPC_INDIRECT_MMIO
static void __iomem *iowa_ioremap(phys_addr_t addr, unsigned long size,
pgprot_t prot, void *caller)
void __iomem *iowa_ioremap(phys_addr_t addr, unsigned long size,
pgprot_t prot, void *caller)
{
struct iowa_bus *bus;
void __iomem *res = __ioremap_caller(addr, size, prot, caller);
@ -163,20 +163,17 @@ static void __iomem *iowa_ioremap(phys_addr_t addr, unsigned long size,
}
return res;
}
#else /* CONFIG_PPC_INDIRECT_MMIO */
#define iowa_ioremap NULL
#endif /* !CONFIG_PPC_INDIRECT_MMIO */
bool io_workaround_inited;
/* Enable IO workaround */
static void io_workaround_init(void)
{
static int io_workaround_inited;
if (io_workaround_inited)
return;
ppc_pci_io = iowa_pci_io;
ppc_md.ioremap = iowa_ioremap;
io_workaround_inited = 1;
io_workaround_inited = true;
}
/* Register new bus to support workaround */

View File

@ -633,11 +633,54 @@ static void iommu_table_clear(struct iommu_table *tbl)
#endif
}
static void iommu_table_reserve_pages(struct iommu_table *tbl,
unsigned long res_start, unsigned long res_end)
{
int i;
WARN_ON_ONCE(res_end < res_start);
/*
* Reserve page 0 so it will not be used for any mappings.
* This avoids buggy drivers that consider page 0 to be invalid
* to crash the machine or even lose data.
*/
if (tbl->it_offset == 0)
set_bit(0, tbl->it_map);
tbl->it_reserved_start = res_start;
tbl->it_reserved_end = res_end;
/* Check if res_start..res_end isn't empty and overlaps the table */
if (res_start && res_end &&
(tbl->it_offset + tbl->it_size < res_start ||
res_end < tbl->it_offset))
return;
for (i = tbl->it_reserved_start; i < tbl->it_reserved_end; ++i)
set_bit(i - tbl->it_offset, tbl->it_map);
}
static void iommu_table_release_pages(struct iommu_table *tbl)
{
int i;
/*
* In case we have reserved the first bit, we should not emit
* the warning below.
*/
if (tbl->it_offset == 0)
clear_bit(0, tbl->it_map);
for (i = tbl->it_reserved_start; i < tbl->it_reserved_end; ++i)
clear_bit(i - tbl->it_offset, tbl->it_map);
}
/*
* Build a iommu_table structure. This contains a bit map which
* is used to manage allocation of the tce space.
*/
struct iommu_table *iommu_init_table(struct iommu_table *tbl, int nid)
struct iommu_table *iommu_init_table(struct iommu_table *tbl, int nid,
unsigned long res_start, unsigned long res_end)
{
unsigned long sz;
static int welcomed = 0;
@ -656,13 +699,7 @@ struct iommu_table *iommu_init_table(struct iommu_table *tbl, int nid)
tbl->it_map = page_address(page);
memset(tbl->it_map, 0, sz);
/*
* Reserve page 0 so it will not be used for any mappings.
* This avoids buggy drivers that consider page 0 to be invalid
* to crash the machine or even lose data.
*/
if (tbl->it_offset == 0)
set_bit(0, tbl->it_map);
iommu_table_reserve_pages(tbl, res_start, res_end);
/* We only split the IOMMU table if we have 1GB or more of space */
if ((tbl->it_size << tbl->it_page_shift) >= (1UL * 1024 * 1024 * 1024))
@ -714,12 +751,7 @@ static void iommu_table_free(struct kref *kref)
return;
}
/*
* In case we have reserved the first bit, we should not emit
* the warning below.
*/
if (tbl->it_offset == 0)
clear_bit(0, tbl->it_map);
iommu_table_release_pages(tbl);
/* verify that table contains no entries */
if (!bitmap_empty(tbl->it_map, tbl->it_size))
@ -981,29 +1013,32 @@ int iommu_tce_check_gpa(unsigned long page_shift, unsigned long gpa)
}
EXPORT_SYMBOL_GPL(iommu_tce_check_gpa);
long iommu_tce_xchg(struct mm_struct *mm, struct iommu_table *tbl,
extern long iommu_tce_xchg_no_kill(struct mm_struct *mm,
struct iommu_table *tbl,
unsigned long entry, unsigned long *hpa,
enum dma_data_direction *direction)
{
long ret;
unsigned long size = 0;
ret = tbl->it_ops->exchange(tbl, entry, hpa, direction);
ret = tbl->it_ops->xchg_no_kill(tbl, entry, hpa, direction, false);
if (!ret && ((*direction == DMA_FROM_DEVICE) ||
(*direction == DMA_BIDIRECTIONAL)) &&
!mm_iommu_is_devmem(mm, *hpa, tbl->it_page_shift,
&size))
SetPageDirty(pfn_to_page(*hpa >> PAGE_SHIFT));
/* if (unlikely(ret))
pr_err("iommu_tce: %s failed on hwaddr=%lx ioba=%lx kva=%lx ret=%d\n",
__func__, hwaddr, entry << tbl->it_page_shift,
hwaddr, ret); */
return ret;
}
EXPORT_SYMBOL_GPL(iommu_tce_xchg);
EXPORT_SYMBOL_GPL(iommu_tce_xchg_no_kill);
void iommu_tce_kill(struct iommu_table *tbl,
unsigned long entry, unsigned long pages)
{
if (tbl->it_ops->tce_kill)
tbl->it_ops->tce_kill(tbl, entry, pages, false);
}
EXPORT_SYMBOL_GPL(iommu_tce_kill);
int iommu_take_ownership(struct iommu_table *tbl)
{
@ -1017,22 +1052,21 @@ int iommu_take_ownership(struct iommu_table *tbl)
* requires exchange() callback defined so if it is not
* implemented, we disallow taking ownership over the table.
*/
if (!tbl->it_ops->exchange)
if (!tbl->it_ops->xchg_no_kill)
return -EINVAL;
spin_lock_irqsave(&tbl->large_pool.lock, flags);
for (i = 0; i < tbl->nr_pools; i++)
spin_lock(&tbl->pools[i].lock);
if (tbl->it_offset == 0)
clear_bit(0, tbl->it_map);
iommu_table_release_pages(tbl);
if (!bitmap_empty(tbl->it_map, tbl->it_size)) {
pr_err("iommu_tce: it_map is not empty");
ret = -EBUSY;
/* Restore bit#0 set by iommu_init_table() */
if (tbl->it_offset == 0)
set_bit(0, tbl->it_map);
/* Undo iommu_table_release_pages, i.e. restore bit#0, etc */
iommu_table_reserve_pages(tbl, tbl->it_reserved_start,
tbl->it_reserved_end);
} else {
memset(tbl->it_map, 0xff, sz);
}
@ -1055,9 +1089,8 @@ void iommu_release_ownership(struct iommu_table *tbl)
memset(tbl->it_map, 0, sz);
/* Restore bit#0 set by iommu_init_table() */
if (tbl->it_offset == 0)
set_bit(0, tbl->it_map);
iommu_table_reserve_pages(tbl, tbl->it_reserved_start,
tbl->it_reserved_end);
for (i = 0; i < tbl->nr_pools; i++)
spin_unlock(&tbl->pools[i].lock);

View File

@ -64,16 +64,17 @@
#define KVM_INST_MTSRIN 0x7c0001e4
static bool kvm_patching_worked = true;
char kvm_tmp[1024 * 1024];
extern char kvm_tmp[];
extern char kvm_tmp_end[];
static int kvm_tmp_index;
static inline void kvm_patch_ins(u32 *inst, u32 new_inst)
static void __init kvm_patch_ins(u32 *inst, u32 new_inst)
{
*inst = new_inst;
flush_icache_range((ulong)inst, (ulong)inst + 4);
}
static void kvm_patch_ins_ll(u32 *inst, long addr, u32 rt)
static void __init kvm_patch_ins_ll(u32 *inst, long addr, u32 rt)
{
#ifdef CONFIG_64BIT
kvm_patch_ins(inst, KVM_INST_LD | rt | (addr & 0x0000fffc));
@ -82,7 +83,7 @@ static void kvm_patch_ins_ll(u32 *inst, long addr, u32 rt)
#endif
}
static void kvm_patch_ins_ld(u32 *inst, long addr, u32 rt)
static void __init kvm_patch_ins_ld(u32 *inst, long addr, u32 rt)
{
#ifdef CONFIG_64BIT
kvm_patch_ins(inst, KVM_INST_LD | rt | (addr & 0x0000fffc));
@ -91,12 +92,12 @@ static void kvm_patch_ins_ld(u32 *inst, long addr, u32 rt)
#endif
}
static void kvm_patch_ins_lwz(u32 *inst, long addr, u32 rt)
static void __init kvm_patch_ins_lwz(u32 *inst, long addr, u32 rt)
{
kvm_patch_ins(inst, KVM_INST_LWZ | rt | (addr & 0x0000ffff));
}
static void kvm_patch_ins_std(u32 *inst, long addr, u32 rt)
static void __init kvm_patch_ins_std(u32 *inst, long addr, u32 rt)
{
#ifdef CONFIG_64BIT
kvm_patch_ins(inst, KVM_INST_STD | rt | (addr & 0x0000fffc));
@ -105,17 +106,17 @@ static void kvm_patch_ins_std(u32 *inst, long addr, u32 rt)
#endif
}
static void kvm_patch_ins_stw(u32 *inst, long addr, u32 rt)
static void __init kvm_patch_ins_stw(u32 *inst, long addr, u32 rt)
{
kvm_patch_ins(inst, KVM_INST_STW | rt | (addr & 0x0000fffc));
}
static void kvm_patch_ins_nop(u32 *inst)
static void __init kvm_patch_ins_nop(u32 *inst)
{
kvm_patch_ins(inst, KVM_INST_NOP);
}
static void kvm_patch_ins_b(u32 *inst, int addr)
static void __init kvm_patch_ins_b(u32 *inst, int addr)
{
#if defined(CONFIG_RELOCATABLE) && defined(CONFIG_PPC_BOOK3S)
/* On relocatable kernels interrupts handlers and our code
@ -128,11 +129,11 @@ static void kvm_patch_ins_b(u32 *inst, int addr)
kvm_patch_ins(inst, KVM_INST_B | (addr & KVM_INST_B_MASK));
}
static u32 *kvm_alloc(int len)
static u32 * __init kvm_alloc(int len)
{
u32 *p;
if ((kvm_tmp_index + len) > ARRAY_SIZE(kvm_tmp)) {
if ((kvm_tmp_index + len) > (kvm_tmp_end - kvm_tmp)) {
printk(KERN_ERR "KVM: No more space (%d + %d)\n",
kvm_tmp_index, len);
kvm_patching_worked = false;
@ -151,7 +152,7 @@ extern u32 kvm_emulate_mtmsrd_orig_ins_offs;
extern u32 kvm_emulate_mtmsrd_len;
extern u32 kvm_emulate_mtmsrd[];
static void kvm_patch_ins_mtmsrd(u32 *inst, u32 rt)
static void __init kvm_patch_ins_mtmsrd(u32 *inst, u32 rt)
{
u32 *p;
int distance_start;
@ -204,7 +205,7 @@ extern u32 kvm_emulate_mtmsr_orig_ins_offs;
extern u32 kvm_emulate_mtmsr_len;
extern u32 kvm_emulate_mtmsr[];
static void kvm_patch_ins_mtmsr(u32 *inst, u32 rt)
static void __init kvm_patch_ins_mtmsr(u32 *inst, u32 rt)
{
u32 *p;
int distance_start;
@ -265,7 +266,7 @@ extern u32 kvm_emulate_wrtee_orig_ins_offs;
extern u32 kvm_emulate_wrtee_len;
extern u32 kvm_emulate_wrtee[];
static void kvm_patch_ins_wrtee(u32 *inst, u32 rt, int imm_one)
static void __init kvm_patch_ins_wrtee(u32 *inst, u32 rt, int imm_one)
{
u32 *p;
int distance_start;
@ -322,7 +323,7 @@ extern u32 kvm_emulate_wrteei_0_branch_offs;
extern u32 kvm_emulate_wrteei_0_len;
extern u32 kvm_emulate_wrteei_0[];
static void kvm_patch_ins_wrteei_0(u32 *inst)
static void __init kvm_patch_ins_wrteei_0(u32 *inst)
{
u32 *p;
int distance_start;
@ -363,7 +364,7 @@ extern u32 kvm_emulate_mtsrin_orig_ins_offs;
extern u32 kvm_emulate_mtsrin_len;
extern u32 kvm_emulate_mtsrin[];
static void kvm_patch_ins_mtsrin(u32 *inst, u32 rt, u32 rb)
static void __init kvm_patch_ins_mtsrin(u32 *inst, u32 rt, u32 rb)
{
u32 *p;
int distance_start;
@ -399,7 +400,7 @@ static void kvm_patch_ins_mtsrin(u32 *inst, u32 rt, u32 rb)
#endif
static void kvm_map_magic_page(void *data)
static void __init kvm_map_magic_page(void *data)
{
u32 *features = data;
@ -414,7 +415,7 @@ static void kvm_map_magic_page(void *data)
*features = out[0];
}
static void kvm_check_ins(u32 *inst, u32 features)
static void __init kvm_check_ins(u32 *inst, u32 features)
{
u32 _inst = *inst;
u32 inst_no_rt = _inst & ~KVM_MASK_RT;
@ -658,7 +659,7 @@ static void kvm_check_ins(u32 *inst, u32 features)
extern u32 kvm_template_start[];
extern u32 kvm_template_end[];
static void kvm_use_magic_page(void)
static void __init kvm_use_magic_page(void)
{
u32 *p;
u32 *start, *end;
@ -699,25 +700,13 @@ static void kvm_use_magic_page(void)
kvm_patching_worked ? "worked" : "failed");
}
static __init void kvm_free_tmp(void)
{
/*
* Inform kmemleak about the hole in the .bss section since the
* corresponding pages will be unmapped with DEBUG_PAGEALLOC=y.
*/
kmemleak_free_part(&kvm_tmp[kvm_tmp_index],
ARRAY_SIZE(kvm_tmp) - kvm_tmp_index);
free_reserved_area(&kvm_tmp[kvm_tmp_index],
&kvm_tmp[ARRAY_SIZE(kvm_tmp)], -1, NULL);
}
static int __init kvm_guest_init(void)
{
if (!kvm_para_available())
goto free_tmp;
return 0;
if (!epapr_paravirt_enabled)
goto free_tmp;
return 0;
if (kvm_para_has_feature(KVM_FEATURE_MAGIC_PAGE))
kvm_use_magic_page();
@ -727,9 +716,6 @@ static int __init kvm_guest_init(void)
powersave_nap = 1;
#endif
free_tmp:
kvm_free_tmp();
return 0;
}

View File

@ -192,6 +192,8 @@ kvm_emulate_mtmsr_orig_ins_offs:
kvm_emulate_mtmsr_len:
.long (kvm_emulate_mtmsr_end - kvm_emulate_mtmsr) / 4
#ifdef CONFIG_BOOKE
/* also used for wrteei 1 */
.global kvm_emulate_wrtee
kvm_emulate_wrtee:
@ -285,6 +287,10 @@ kvm_emulate_wrteei_0_branch_offs:
kvm_emulate_wrteei_0_len:
.long (kvm_emulate_wrteei_0_end - kvm_emulate_wrteei_0) / 4
#endif /* CONFIG_BOOKE */
#ifdef CONFIG_PPC_BOOK3S_32
.global kvm_emulate_mtsrin
kvm_emulate_mtsrin:
@ -334,5 +340,15 @@ kvm_emulate_mtsrin_orig_ins_offs:
kvm_emulate_mtsrin_len:
.long (kvm_emulate_mtsrin_end - kvm_emulate_mtsrin) / 4
#endif /* CONFIG_PPC_BOOK3S_32 */
.balign 4
.global kvm_tmp
kvm_tmp:
.space (64 * 1024)
.global kvm_tmp_end
kvm_tmp_end:
.global kvm_template_end
kvm_template_end:

View File

@ -29,6 +29,8 @@
#include <asm/smp.h>
#include <asm/hw_breakpoint.h>
#include <asm/asm-prototypes.h>
#include <asm/svm.h>
#include <asm/ultravisor.h>
int default_machine_kexec_prepare(struct kimage *image)
{
@ -327,6 +329,13 @@ void default_machine_kexec(struct kimage *image)
#ifdef CONFIG_PPC_PSERIES
kexec_paca.lppaca_ptr = NULL;
#endif
if (is_secure_guest() && !(image->preserve_context ||
image->type == KEXEC_TYPE_CRASH)) {
uv_unshare_all_pages();
printk("kexec: Unshared all shared pages.\n");
}
paca_ptrs[kexec_paca.paca_index] = &kexec_paca;
setup_paca(&kexec_paca);

Some files were not shown because too many files have changed in this diff Show More