linux/arch/powerpc/platforms/pseries/phyp_dump.c

514 lines
15 KiB
C
Raw Normal View History

/*
* Hypervisor-assisted dump
*
* Linas Vepstas, Manish Ahuja 2008
* Copyright 2008 IBM Corp.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*
*/
include cleanup: Update gfp.h and slab.h includes to prepare for breaking implicit slab.h inclusion from percpu.h percpu.h is included by sched.h and module.h and thus ends up being included when building most .c files. percpu.h includes slab.h which in turn includes gfp.h making everything defined by the two files universally available and complicating inclusion dependencies. percpu.h -> slab.h dependency is about to be removed. Prepare for this change by updating users of gfp and slab facilities include those headers directly instead of assuming availability. As this conversion needs to touch large number of source files, the following script is used as the basis of conversion. http://userweb.kernel.org/~tj/misc/slabh-sweep.py The script does the followings. * Scan files for gfp and slab usages and update includes such that only the necessary includes are there. ie. if only gfp is used, gfp.h, if slab is used, slab.h. * When the script inserts a new include, it looks at the include blocks and try to put the new include such that its order conforms to its surrounding. It's put in the include block which contains core kernel includes, in the same order that the rest are ordered - alphabetical, Christmas tree, rev-Xmas-tree or at the end if there doesn't seem to be any matching order. * If the script can't find a place to put a new include (mostly because the file doesn't have fitting include block), it prints out an error message indicating which .h file needs to be added to the file. The conversion was done in the following steps. 1. The initial automatic conversion of all .c files updated slightly over 4000 files, deleting around 700 includes and adding ~480 gfp.h and ~3000 slab.h inclusions. The script emitted errors for ~400 files. 2. Each error was manually checked. Some didn't need the inclusion, some needed manual addition while adding it to implementation .h or embedding .c file was more appropriate for others. This step added inclusions to around 150 files. 3. The script was run again and the output was compared to the edits from #2 to make sure no file was left behind. 4. Several build tests were done and a couple of problems were fixed. e.g. lib/decompress_*.c used malloc/free() wrappers around slab APIs requiring slab.h to be added manually. 5. The script was run on all .h files but without automatically editing them as sprinkling gfp.h and slab.h inclusions around .h files could easily lead to inclusion dependency hell. Most gfp.h inclusion directives were ignored as stuff from gfp.h was usually wildly available and often used in preprocessor macros. Each slab.h inclusion directive was examined and added manually as necessary. 6. percpu.h was updated not to include slab.h. 7. Build test were done on the following configurations and failures were fixed. CONFIG_GCOV_KERNEL was turned off for all tests (as my distributed build env didn't work with gcov compiles) and a few more options had to be turned off depending on archs to make things build (like ipr on powerpc/64 which failed due to missing writeq). * x86 and x86_64 UP and SMP allmodconfig and a custom test config. * powerpc and powerpc64 SMP allmodconfig * sparc and sparc64 SMP allmodconfig * ia64 SMP allmodconfig * s390 SMP allmodconfig * alpha SMP allmodconfig * um on x86_64 SMP allmodconfig 8. percpu.h modifications were reverted so that it could be applied as a separate patch and serve as bisection point. Given the fact that I had only a couple of failures from tests on step 6, I'm fairly confident about the coverage of this conversion patch. If there is a breakage, it's likely to be something in one of the arch headers which should be easily discoverable easily on most builds of the specific arch. Signed-off-by: Tejun Heo <tj@kernel.org> Guess-its-ok-by: Christoph Lameter <cl@linux-foundation.org> Cc: Ingo Molnar <mingo@redhat.com> Cc: Lee Schermerhorn <Lee.Schermerhorn@hp.com>
2010-03-24 09:04:11 +01:00
#include <linux/gfp.h>
#include <linux/init.h>
#include <linux/kobject.h>
#include <linux/mm.h>
#include <linux/of.h>
#include <linux/pfn.h>
#include <linux/swap.h>
#include <linux/sysfs.h>
#include <asm/page.h>
#include <asm/phyp_dump.h>
#include <asm/machdep.h>
#include <asm/prom.h>
#include <asm/rtas.h>
/* Variables, used to communicate data between early boot and late boot */
static struct phyp_dump phyp_dump_vars;
struct phyp_dump *phyp_dump_info = &phyp_dump_vars;
static int ibm_configure_kernel_dump;
/* ------------------------------------------------- */
/* RTAS interfaces to declare the dump regions */
struct dump_section {
u32 dump_flags;
u16 source_type;
u16 error_flags;
u64 source_address;
u64 source_length;
u64 length_copied;
u64 destination_address;
};
struct phyp_dump_header {
u32 version;
u16 num_of_sections;
u16 status;
u32 first_offset_section;
u32 dump_disk_section;
u64 block_num_dd;
u64 num_of_blocks_dd;
u32 offset_dd;
u32 maxtime_to_auto;
/* No dump disk path string used */
struct dump_section cpu_data;
struct dump_section hpte_data;
struct dump_section kernel_data;
};
/* The dump header *must be* in low memory, so .bss it */
static struct phyp_dump_header phdr;
#define NUM_DUMP_SECTIONS 3
#define DUMP_HEADER_VERSION 0x1
#define DUMP_REQUEST_FLAG 0x1
#define DUMP_SOURCE_CPU 0x0001
#define DUMP_SOURCE_HPTE 0x0002
#define DUMP_SOURCE_RMO 0x0011
#define DUMP_ERROR_FLAG 0x2000
#define DUMP_TRIGGERED 0x4000
#define DUMP_PERFORMED 0x8000
/**
* init_dump_header() - initialize the header declaring a dump
* Returns: length of dump save area.
*
* When the hypervisor saves crashed state, it needs to put
* it somewhere. The dump header tells the hypervisor where
* the data can be saved.
*/
static unsigned long init_dump_header(struct phyp_dump_header *ph)
{
unsigned long addr_offset = 0;
/* Set up the dump header */
ph->version = DUMP_HEADER_VERSION;
ph->num_of_sections = NUM_DUMP_SECTIONS;
ph->status = 0;
ph->first_offset_section =
(u32)offsetof(struct phyp_dump_header, cpu_data);
ph->dump_disk_section = 0;
ph->block_num_dd = 0;
ph->num_of_blocks_dd = 0;
ph->offset_dd = 0;
ph->maxtime_to_auto = 0; /* disabled */
/* The first two sections are mandatory */
ph->cpu_data.dump_flags = DUMP_REQUEST_FLAG;
ph->cpu_data.source_type = DUMP_SOURCE_CPU;
ph->cpu_data.source_address = 0;
ph->cpu_data.source_length = phyp_dump_info->cpu_state_size;
ph->cpu_data.destination_address = addr_offset;
addr_offset += phyp_dump_info->cpu_state_size;
ph->hpte_data.dump_flags = DUMP_REQUEST_FLAG;
ph->hpte_data.source_type = DUMP_SOURCE_HPTE;
ph->hpte_data.source_address = 0;
ph->hpte_data.source_length = phyp_dump_info->hpte_region_size;
ph->hpte_data.destination_address = addr_offset;
addr_offset += phyp_dump_info->hpte_region_size;
/* This section describes the low kernel region */
ph->kernel_data.dump_flags = DUMP_REQUEST_FLAG;
ph->kernel_data.source_type = DUMP_SOURCE_RMO;
ph->kernel_data.source_address = PHYP_DUMP_RMR_START;
ph->kernel_data.source_length = PHYP_DUMP_RMR_END;
ph->kernel_data.destination_address = addr_offset;
addr_offset += ph->kernel_data.source_length;
return addr_offset;
}
static void print_dump_header(const struct phyp_dump_header *ph)
{
#ifdef DEBUG
if (ph == NULL)
return;
printk(KERN_INFO "dump header:\n");
/* setup some ph->sections required */
printk(KERN_INFO "version = %d\n", ph->version);
printk(KERN_INFO "Sections = %d\n", ph->num_of_sections);
printk(KERN_INFO "Status = 0x%x\n", ph->status);
/* No ph->disk, so all should be set to 0 */
printk(KERN_INFO "Offset to first section 0x%x\n",
ph->first_offset_section);
printk(KERN_INFO "dump disk sections should be zero\n");
printk(KERN_INFO "dump disk section = %d\n", ph->dump_disk_section);
printk(KERN_INFO "block num = %lld\n", ph->block_num_dd);
printk(KERN_INFO "number of blocks = %lld\n", ph->num_of_blocks_dd);
printk(KERN_INFO "dump disk offset = %d\n", ph->offset_dd);
printk(KERN_INFO "Max auto time= %d\n", ph->maxtime_to_auto);
/*set cpu state and hpte states as well scratch pad area */
printk(KERN_INFO " CPU AREA\n");
printk(KERN_INFO "cpu dump_flags =%d\n", ph->cpu_data.dump_flags);
printk(KERN_INFO "cpu source_type =%d\n", ph->cpu_data.source_type);
printk(KERN_INFO "cpu error_flags =%d\n", ph->cpu_data.error_flags);
printk(KERN_INFO "cpu source_address =%llx\n",
ph->cpu_data.source_address);
printk(KERN_INFO "cpu source_length =%llx\n",
ph->cpu_data.source_length);
printk(KERN_INFO "cpu length_copied =%llx\n",
ph->cpu_data.length_copied);
printk(KERN_INFO " HPTE AREA\n");
printk(KERN_INFO "HPTE dump_flags =%d\n", ph->hpte_data.dump_flags);
printk(KERN_INFO "HPTE source_type =%d\n", ph->hpte_data.source_type);
printk(KERN_INFO "HPTE error_flags =%d\n", ph->hpte_data.error_flags);
printk(KERN_INFO "HPTE source_address =%llx\n",
ph->hpte_data.source_address);
printk(KERN_INFO "HPTE source_length =%llx\n",
ph->hpte_data.source_length);
printk(KERN_INFO "HPTE length_copied =%llx\n",
ph->hpte_data.length_copied);
printk(KERN_INFO " SRSD AREA\n");
printk(KERN_INFO "SRSD dump_flags =%d\n", ph->kernel_data.dump_flags);
printk(KERN_INFO "SRSD source_type =%d\n", ph->kernel_data.source_type);
printk(KERN_INFO "SRSD error_flags =%d\n", ph->kernel_data.error_flags);
printk(KERN_INFO "SRSD source_address =%llx\n",
ph->kernel_data.source_address);
printk(KERN_INFO "SRSD source_length =%llx\n",
ph->kernel_data.source_length);
printk(KERN_INFO "SRSD length_copied =%llx\n",
ph->kernel_data.length_copied);
#endif
}
static ssize_t show_phyp_dump_active(struct kobject *kobj,
struct kobj_attribute *attr, char *buf)
{
/* create filesystem entry so kdump is phyp-dump aware */
return sprintf(buf, "%lx\n", phyp_dump_info->phyp_dump_at_boot);
}
static struct kobj_attribute pdl = __ATTR(phyp_dump_active, 0600,
show_phyp_dump_active,
NULL);
static void register_dump_area(struct phyp_dump_header *ph, unsigned long addr)
{
int rc;
/* Add addr value if not initialized before */
if (ph->cpu_data.destination_address == 0) {
ph->cpu_data.destination_address += addr;
ph->hpte_data.destination_address += addr;
ph->kernel_data.destination_address += addr;
}
/* ToDo Invalidate kdump and free memory range. */
do {
rc = rtas_call(ibm_configure_kernel_dump, 3, 1, NULL,
1, ph, sizeof(struct phyp_dump_header));
} while (rtas_busy_delay(rc));
if (rc) {
printk(KERN_ERR "phyp-dump: unexpected error (%d) on "
"register\n", rc);
print_dump_header(ph);
return;
}
rc = sysfs_create_file(kernel_kobj, &pdl.attr);
if (rc)
printk(KERN_ERR "phyp-dump: unable to create sysfs"
" file (%d)\n", rc);
}
static
void invalidate_last_dump(struct phyp_dump_header *ph, unsigned long addr)
{
int rc;
/* Add addr value if not initialized before */
if (ph->cpu_data.destination_address == 0) {
ph->cpu_data.destination_address += addr;
ph->hpte_data.destination_address += addr;
ph->kernel_data.destination_address += addr;
}
do {
rc = rtas_call(ibm_configure_kernel_dump, 3, 1, NULL,
2, ph, sizeof(struct phyp_dump_header));
} while (rtas_busy_delay(rc));
if (rc) {
printk(KERN_ERR "phyp-dump: unexpected error (%d) "
"on invalidate\n", rc);
print_dump_header(ph);
}
}
/* ------------------------------------------------- */
/**
* release_memory_range -- release memory previously memblock_reserved
* @start_pfn: starting physical frame number
* @nr_pages: number of pages to free.
*
* This routine will release memory that had been previously
* memblock_reserved in early boot. The released memory becomes
* available for genreal use.
*/
static void release_memory_range(unsigned long start_pfn,
unsigned long nr_pages)
{
struct page *rpage;
unsigned long end_pfn;
long i;
end_pfn = start_pfn + nr_pages;
for (i = start_pfn; i <= end_pfn; i++) {
rpage = pfn_to_page(i);
if (PageReserved(rpage)) {
ClearPageReserved(rpage);
init_page_count(rpage);
__free_page(rpage);
totalram_pages++;
}
}
}
/**
* track_freed_range -- Counts the range being freed.
* Once the counter goes to zero, it re-registers dump for
* future use.
*/
static void
track_freed_range(unsigned long addr, unsigned long length)
{
static unsigned long scratch_area_size, reserved_area_size;
if (addr < phyp_dump_info->init_reserve_start)
return;
if ((addr >= phyp_dump_info->init_reserve_start) &&
(addr <= phyp_dump_info->init_reserve_start +
phyp_dump_info->init_reserve_size))
reserved_area_size += length;
if ((addr >= phyp_dump_info->reserved_scratch_addr) &&
(addr <= phyp_dump_info->reserved_scratch_addr +
phyp_dump_info->reserved_scratch_size))
scratch_area_size += length;
if ((reserved_area_size == phyp_dump_info->init_reserve_size) &&
(scratch_area_size == phyp_dump_info->reserved_scratch_size)) {
invalidate_last_dump(&phdr,
phyp_dump_info->reserved_scratch_addr);
register_dump_area(&phdr,
phyp_dump_info->reserved_scratch_addr);
}
}
/* ------------------------------------------------- */
/**
* sysfs_release_region -- sysfs interface to release memory range.
*
* Usage:
* "echo <start addr> <length> > /sys/kernel/release_region"
*
* Example:
* "echo 0x40000000 0x10000000 > /sys/kernel/release_region"
*
* will release 256MB starting at 1GB.
*/
static ssize_t store_release_region(struct kobject *kobj,
struct kobj_attribute *attr,
const char *buf, size_t count)
{
unsigned long start_addr, length, end_addr;
unsigned long start_pfn, nr_pages;
ssize_t ret;
ret = sscanf(buf, "%lx %lx", &start_addr, &length);
if (ret != 2)
return -EINVAL;
track_freed_range(start_addr, length);
/* Range-check - don't free any reserved memory that
* wasn't reserved for phyp-dump */
if (start_addr < phyp_dump_info->init_reserve_start)
start_addr = phyp_dump_info->init_reserve_start;
end_addr = phyp_dump_info->init_reserve_start +
phyp_dump_info->init_reserve_size;
if (start_addr+length > end_addr)
length = end_addr - start_addr;
/* Release the region of memory assed in by user */
start_pfn = PFN_DOWN(start_addr);
nr_pages = PFN_DOWN(length);
release_memory_range(start_pfn, nr_pages);
return count;
}
static ssize_t show_release_region(struct kobject *kobj,
struct kobj_attribute *attr, char *buf)
{
u64 second_addr_range;
/* total reserved size - start of scratch area */
second_addr_range = phyp_dump_info->init_reserve_size -
phyp_dump_info->reserved_scratch_size;
return sprintf(buf, "CPU:0x%llx-0x%llx: HPTE:0x%llx-0x%llx:"
" DUMP:0x%llx-0x%llx, 0x%lx-0x%llx:\n",
phdr.cpu_data.destination_address,
phdr.cpu_data.length_copied,
phdr.hpte_data.destination_address,
phdr.hpte_data.length_copied,
phdr.kernel_data.destination_address,
phdr.kernel_data.length_copied,
phyp_dump_info->init_reserve_start,
second_addr_range);
}
static struct kobj_attribute rr = __ATTR(release_region, 0600,
show_release_region,
store_release_region);
static int __init phyp_dump_setup(void)
{
struct device_node *rtas;
const struct phyp_dump_header *dump_header = NULL;
unsigned long dump_area_start;
unsigned long dump_area_length;
int header_len = 0;
int rc;
/* If no memory was reserved in early boot, there is nothing to do */
if (phyp_dump_info->init_reserve_size == 0)
return 0;
/* Return if phyp dump not supported */
if (!phyp_dump_info->phyp_dump_configured)
return -ENOSYS;
/* Is there dump data waiting for us? If there isn't,
* then register a new dump area, and release all of
* the rest of the reserved ram.
*
* The /rtas/ibm,kernel-dump rtas node is present only
* if there is dump data waiting for us.
*/
rtas = of_find_node_by_path("/rtas");
if (rtas) {
dump_header = of_get_property(rtas, "ibm,kernel-dump",
&header_len);
of_node_put(rtas);
}
ibm_configure_kernel_dump = rtas_token("ibm,configure-kernel-dump");
print_dump_header(dump_header);
dump_area_length = init_dump_header(&phdr);
/* align down */
dump_area_start = phyp_dump_info->init_reserve_start & PAGE_MASK;
if (dump_header == NULL) {
register_dump_area(&phdr, dump_area_start);
return 0;
}
/* re-register the dump area, if old dump was invalid */
if ((dump_header) && (dump_header->status & DUMP_ERROR_FLAG)) {
invalidate_last_dump(&phdr, dump_area_start);
register_dump_area(&phdr, dump_area_start);
return 0;
}
if (dump_header) {
phyp_dump_info->reserved_scratch_addr =
dump_header->cpu_data.destination_address;
phyp_dump_info->reserved_scratch_size =
dump_header->cpu_data.source_length +
dump_header->hpte_data.source_length +
dump_header->kernel_data.source_length;
}
/* Should we create a dump_subsys, analogous to s390/ipl.c ? */
rc = sysfs_create_file(kernel_kobj, &rr.attr);
if (rc)
printk(KERN_ERR "phyp-dump: unable to create sysfs file (%d)\n",
rc);
/* ToDo: re-register the dump area, for next time. */
return 0;
}
machine_subsys_initcall(pseries, phyp_dump_setup);
int __init early_init_dt_scan_phyp_dump(unsigned long node,
const char *uname, int depth, void *data)
{
const unsigned int *sizes;
phyp_dump_info->phyp_dump_configured = 0;
phyp_dump_info->phyp_dump_is_active = 0;
if (depth != 1 || strcmp(uname, "rtas") != 0)
return 0;
if (of_get_flat_dt_prop(node, "ibm,configure-kernel-dump", NULL))
phyp_dump_info->phyp_dump_configured++;
if (of_get_flat_dt_prop(node, "ibm,dump-kernel", NULL))
phyp_dump_info->phyp_dump_is_active++;
sizes = of_get_flat_dt_prop(node, "ibm,configure-kernel-dump-sizes",
NULL);
if (!sizes)
return 0;
if (sizes[0] == 1)
phyp_dump_info->cpu_state_size = *((unsigned long *)&sizes[1]);
if (sizes[3] == 2)
phyp_dump_info->hpte_region_size =
*((unsigned long *)&sizes[4]);
return 1;
}
/* Look for phyp_dump= cmdline option */
static int __init early_phyp_dump_enabled(char *p)
{
phyp_dump_info->phyp_dump_at_boot = 1;
if (!p)
return 0;
if (strncmp(p, "1", 1) == 0)
phyp_dump_info->phyp_dump_at_boot = 1;
else if (strncmp(p, "0", 1) == 0)
phyp_dump_info->phyp_dump_at_boot = 0;
return 0;
}
early_param("phyp_dump", early_phyp_dump_enabled);
/* Look for phyp_dump_reserve_size= cmdline option */
static int __init early_phyp_dump_reserve_size(char *p)
{
if (p)
phyp_dump_info->reserve_bootvar = memparse(p, &p);
return 0;
}
early_param("phyp_dump_reserve_size", early_phyp_dump_reserve_size);