eac7a7791b
The setup_data links are appended to the compressed kernel image. Since the kernel image is typically loaded at 0x100000, setup_data lives at `0x100000 + compressed_size`, which does not get relocated during the kernel's boot process. The kernel typically decompresses the image starting at address 0x1000000 (note: there's one more zero there than the compressed image above). This usually is fine for most kernels. However, if the compressed image is actually quite large, then setup_data will live at a `0x100000 + compressed_size` that extends into the decompressed zone at 0x1000000. In other words, if compressed_size is larger than `0x1000000 - 0x100000`, then the decompression step will clobber setup_data, resulting in crashes. Visually, what happens now is that QEMU appends setup_data to the kernel image: kernel image setup_data |--------------------------||----------------| 0x100000 0x100000+l1 0x100000+l1+l2 The problem is that this decompresses to 0x1000000 (one more zero). So if l1 is > (0x1000000-0x100000), then this winds up looking like: kernel image setup_data |--------------------------||----------------| 0x100000 0x100000+l1 0x100000+l1+l2 d e c o m p r e s s e d k e r n e l |-------------------------------------------------------------| 0x1000000 0x1000000+l3 The decompressed kernel seemingly overwriting the compressed kernel image isn't a problem, because that gets relocated to a higher address early on in the boot process, at the end of startup_64. setup_data, however, stays in the same place, since those links are self referential and nothing fixes them up. So the decompressed kernel clobbers it. Fix this by appending setup_data to the cmdline blob rather than the kernel image blob, which remains at a lower address that won't get clobbered. This could have been done by overwriting the initrd blob instead, but that poses big difficulties, such as no longer being able to use memory mapped files for initrd, hurting performance, and, more importantly, the initrd address calculation is hard coded in qboot, and it always grows down rather than up, which means lots of brittle semantics would have to be changed around, incurring more complexity. In contrast, using cmdline is simple and doesn't interfere with anything. The microvm machine has a gross hack where it fiddles with fw_cfg data after the fact. So this hack is updated to account for this appending, by reserving some bytes. Fixup-by: Michael S. Tsirkin <mst@redhat.com> Cc: x86@kernel.org Cc: Philippe Mathieu-Daudé <philmd@linaro.org> Cc: H. Peter Anvin <hpa@zytor.com> Cc: Borislav Petkov <bp@alien8.de> Cc: Eric Biggers <ebiggers@kernel.org> Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> Message-Id: <20221230220725.618763-1-Jason@zx2c4.com> Message-ID: <20230128061015-mutt-send-email-mst@kernel.org> Reviewed-by: Michael S. Tsirkin <mst@redhat.com> Signed-off-by: Michael S. Tsirkin <mst@redhat.com> Tested-by: Eric Biggers <ebiggers@google.com> Tested-by: Mathias Krause <minipli@grsecurity.net>
113 lines
3.3 KiB
C
113 lines
3.3 KiB
C
/*
|
|
* Copyright (c) 2018 Intel Corporation
|
|
* Copyright (c) 2019 Red Hat, Inc.
|
|
*
|
|
* This program is free software; you can redistribute it and/or modify it
|
|
* under the terms and conditions of the GNU General Public License,
|
|
* version 2 or later, as published by the Free Software Foundation.
|
|
*
|
|
* This program is distributed in the hope it will be useful, but WITHOUT
|
|
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
|
* more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public License along with
|
|
* this program. If not, see <http://www.gnu.org/licenses/>.
|
|
*/
|
|
|
|
#ifndef HW_I386_MICROVM_H
|
|
#define HW_I386_MICROVM_H
|
|
|
|
#include "exec/hwaddr.h"
|
|
#include "qemu/notify.h"
|
|
|
|
#include "hw/boards.h"
|
|
#include "hw/i386/x86.h"
|
|
#include "hw/acpi/acpi_dev_interface.h"
|
|
#include "hw/pci-host/gpex.h"
|
|
#include "qom/object.h"
|
|
|
|
/*
|
|
* IRQ | pc | microvm (acpi=on)
|
|
* --------+------------+------------------
|
|
* 0 | pit |
|
|
* 1 | kbd |
|
|
* 2 | cascade |
|
|
* 3 | serial 1 |
|
|
* 4 | serial 0 | serial
|
|
* 5 | - |
|
|
* 6 | floppy |
|
|
* 7 | parallel |
|
|
* 8 | rtc | rtc (rtc=on)
|
|
* 9 | acpi | acpi (ged)
|
|
* 10 | pci lnk | xhci (usb=on)
|
|
* 11 | pci lnk |
|
|
* 12 | ps2 | pcie
|
|
* 13 | fpu | pcie
|
|
* 14 | ide 0 | pcie
|
|
* 15 | ide 1 | pcie
|
|
* 16-23 | pci gsi | virtio
|
|
*/
|
|
|
|
/* Platform virtio definitions */
|
|
#define VIRTIO_MMIO_BASE 0xfeb00000
|
|
#define VIRTIO_CMDLINE_MAXLEN 64
|
|
#define VIRTIO_CMDLINE_TOTAL_MAX_LEN ((VIRTIO_CMDLINE_MAXLEN + 1) * 16)
|
|
|
|
#define GED_MMIO_BASE 0xfea00000
|
|
#define GED_MMIO_BASE_MEMHP (GED_MMIO_BASE + 0x100)
|
|
#define GED_MMIO_BASE_REGS (GED_MMIO_BASE + 0x200)
|
|
#define GED_MMIO_IRQ 9
|
|
|
|
#define MICROVM_XHCI_BASE 0xfe900000
|
|
#define MICROVM_XHCI_IRQ 10
|
|
|
|
#define PCIE_MMIO_BASE 0xc0000000
|
|
#define PCIE_MMIO_SIZE 0x20000000
|
|
#define PCIE_ECAM_BASE 0xe0000000
|
|
#define PCIE_ECAM_SIZE 0x10000000
|
|
|
|
/* Machine type options */
|
|
#define MICROVM_MACHINE_RTC "rtc"
|
|
#define MICROVM_MACHINE_PCIE "pcie"
|
|
#define MICROVM_MACHINE_IOAPIC2 "ioapic2"
|
|
#define MICROVM_MACHINE_ISA_SERIAL "isa-serial"
|
|
#define MICROVM_MACHINE_OPTION_ROMS "x-option-roms"
|
|
#define MICROVM_MACHINE_AUTO_KERNEL_CMDLINE "auto-kernel-cmdline"
|
|
|
|
struct MicrovmMachineClass {
|
|
X86MachineClass parent;
|
|
HotplugHandler *(*orig_hotplug_handler)(MachineState *machine,
|
|
DeviceState *dev);
|
|
};
|
|
|
|
struct MicrovmMachineState {
|
|
X86MachineState parent;
|
|
|
|
/* Machine type options */
|
|
OnOffAuto rtc;
|
|
OnOffAuto pcie;
|
|
OnOffAuto ioapic2;
|
|
bool isa_serial;
|
|
bool option_roms;
|
|
bool auto_kernel_cmdline;
|
|
|
|
/* Machine state */
|
|
uint32_t pcie_irq_base;
|
|
uint32_t virtio_irq_base;
|
|
uint32_t virtio_num_transports;
|
|
bool kernel_cmdline_fixed;
|
|
Notifier machine_done;
|
|
Notifier powerdown_req;
|
|
struct GPEXConfig gpex;
|
|
|
|
/* device tree */
|
|
void *fdt;
|
|
uint32_t ioapic_phandle[2];
|
|
};
|
|
|
|
#define TYPE_MICROVM_MACHINE MACHINE_TYPE_NAME("microvm")
|
|
OBJECT_DECLARE_TYPE(MicrovmMachineState, MicrovmMachineClass, MICROVM_MACHINE)
|
|
|
|
#endif
|