efi: Capsule update support

The EFI capsule mechanism allows data blobs to be passed to the EFI
firmware. By setting the EFI_CAPSULE_POPULATE_SYSTEM_TABLE and the
EFI_CAPSULE_PERSIST_ACROSS_REBOOT flags, the firmware will place a
pointer to our data blob in the EFI System Table on the next boot. We
can get access to the array of EFI capsules when parsing the
configuration tables the next time we boot.

We can utilise this facility to save crash dumps, call traces, even file
systems in a region of memory and have them preserved by the firmware
across a reboot.

Once a capsule has been passed to the firmware, the next reboot will
always be performed using the ResetSystem() EFI runtime service, which
may involve overriding the reboot type specified by reboot=. This
ensures the reset value returned by QueryCapsuleCapabilities() is used
to reset the system, which is required for the capsule to be processed.

Cc: Andi Kleen <ak@linux.intel.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Seiji Aguchi <seiji.aguchi@hds.com>
Signed-off-by: Matt Fleming <matt.fleming@intel.com>
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 89e7c73..64e4081 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -486,6 +486,13 @@
 	mode = reboot_mode == REBOOT_WARM ? 0x1234 : 0;
 	*((unsigned short *)__va(0x472)) = mode;
 
+	/*
+	 * If an EFI capsule has been registered with the firmware then
+	 * override the reboot= parameter.
+	 */
+	if (efi_capsule_pending(NULL))
+		reboot_type = BOOT_EFI;
+
 	for (;;) {
 		/* Could also try the reset bit in the Hammer NB */
 		switch (reboot_type) {
diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
index 90c8d3c..2ec51b1 100644
--- a/arch/x86/platform/efi/efi.c
+++ b/arch/x86/platform/efi/efi.c
@@ -273,7 +273,6 @@
 				  efi_char16_t *data)
 {
 	unsigned long flags;
-	bool nmi = in_nmi();
 
 	spin_lock_irqsave(&efi_runtime_lock, flags);
 	efi_call_virt4(reset_system, reset_type, status,
diff --git a/drivers/firmware/efi/Kconfig b/drivers/firmware/efi/Kconfig
index 6aecbc8..05ed9c3 100644
--- a/drivers/firmware/efi/Kconfig
+++ b/drivers/firmware/efi/Kconfig
@@ -36,6 +36,42 @@
 	  backend for pstore by default. This setting can be overridden
 	  using the efivars module's pstore_disable parameter.
 
+config EFI_CAPSULE_PSTORE
+	bool "EFI capsule pstore backend"
+	depends on EFI && PSTORE
+	help
+	  The EFI capsule mechanism can be used to store crash dumps and
+	  function tracing data by passing the data to the firmware, which
+	  will be preserved across a reboot.
+
+	  It should be noted that enabling this opton will pass a capsule
+	  to the firmware on every boot. Some firmware will not allow a
+	  user to enter the BIOS setup when a capsule has been registered
+	  on the previous boot.
+
+	  Many EFI machines have buggy implementations of the UpdateCapsule()
+	  runtime service. This option will enable code that may not function
+	  correctly with your firmware.
+
+	  If unsure, say N.
+
+config EFI_CAPSULE_BLK_DEV
+	tristate "EFI capsule block device"
+	depends on EFI
+	help
+	  This option uses the EFI capsule mechanism to reserve regions
+	  for use as block devices, which allows you to create file systems
+	  on them, read/write files, etc. The major advantage is that any
+	  data you create on the block device will be preserved across a
+	  reboot.
+
+	  If unsure, say N.
+
+config EFI_CAPSULE_BLK_DEV_SIZE
+	int "Default EFI capsule size (Mbytes)"
+	depends on EFI_CAPSULE_BLK_DEV
+	default "8"
+
 endmenu
 
 config UEFI_CPER
diff --git a/drivers/firmware/efi/Makefile b/drivers/firmware/efi/Makefile
index 4759112..d7c3588 100644
--- a/drivers/firmware/efi/Makefile
+++ b/drivers/firmware/efi/Makefile
@@ -1,7 +1,9 @@
 #
 # Makefile for linux kernel
 #
-obj-$(CONFIG_EFI)			+= efi.o vars.o reboot.o
+obj-$(CONFIG_EFI)			+= efi.o vars.o reboot.o capsule.o
 obj-$(CONFIG_EFI_VARS)			+= efivars.o
 obj-$(CONFIG_EFI_VARS_PSTORE)		+= efi-pstore.o
 obj-$(CONFIG_UEFI_CPER)			+= cper.o
+obj-$(CONFIG_EFI_CAPSULE_PSTORE)	+= efi-capsule-pstore.o
+obj-$(CONFIG_EFI_CAPSULE_BLK_DEV)	+= efi-capsule-blk.o
diff --git a/drivers/firmware/efi/capsule.c b/drivers/firmware/efi/capsule.c
new file mode 100644
index 0000000..ac6ad47
--- /dev/null
+++ b/drivers/firmware/efi/capsule.c
@@ -0,0 +1,455 @@
+/*
+ * EFI capsule support.
+ *
+ * Copyright 2013 Intel Corporation <matt.fleming@intel.com>
+ *
+ * This file is part of the Linux kernel, and is made available under
+ * the terms of the GNU General Public License version 2.
+ */
+
+#define pr_fmt(fmt) "efi-capsule: " fmt
+
+#include <linux/slab.h>
+#include <linux/mutex.h>
+#include <linux/highmem.h>
+#include <linux/efi.h>
+
+typedef struct {
+	u64 length;
+	u64 data;
+} efi_capsule_block_desc_t;
+
+static bool capsule_pending;
+static int efi_reset_type = -1;
+
+/*
+ * capsule_mutex serialises access to both 'capsule_pending' and
+ * 'efi_reset_type'.
+ *
+ * This mutex must be held across calls to efi_capsule_supported() and
+ * efi_update_capsule() so that the operation is atomic. This ensures
+ * that efi_update_capsule() isn't called with a capsule that requires a
+ * different reset type to the registered 'efi_reset_type'.
+ */
+static DEFINE_MUTEX(capsule_mutex);
+
+static int efi_update_capsule(efi_capsule_header_t *capsule,
+			      struct page **pages, size_t size, int reset);
+
+/**
+ * efi_capsule_pending - has a capsule been passed to the firmware?
+ * @reset_type: store the type of EFI reset if capsule is pending
+ *
+ * To ensure that the registered capsule is processed correctly by the
+ * firmware we need to perform a specific type of reset. If a capsule is
+ * pending return the reset type in @reset_type.
+ */
+bool efi_capsule_pending(int *reset_type)
+{
+	bool rv = false;
+
+	mutex_lock(&capsule_mutex);
+	if (!capsule_pending)
+		goto out;
+
+	if (reset_type)
+		*reset_type = efi_reset_type;
+	rv = true;
+
+out:
+	mutex_unlock(&capsule_mutex);
+	return rv;
+}
+
+/**
+ * efi_capsule_supported - does the firmware support the capsule?
+ * @guid: vendor guid of capsule
+ * @flags: capsule flags
+ * @size: size of capsule data
+ * @reset: the reset type required for this capsule
+ *
+ * Check whether a capsule with @flags is supported and that @size
+ * doesn't exceed the maximum size for a capsule.
+ */
+int efi_capsule_supported(efi_guid_t guid, u32 flags, size_t size, int *reset)
+{
+	efi_capsule_header_t *capsule;
+	efi_status_t status;
+	u64 max_size;
+	int rv = 0;
+
+	capsule = kmalloc(sizeof(*capsule), GFP_KERNEL);
+	if (!capsule)
+		return -ENOMEM;
+
+	capsule->headersize = capsule->imagesize = sizeof(*capsule);
+	memcpy(&capsule->guid, &guid, sizeof(efi_guid_t));
+	capsule->flags = flags;
+
+	status = efi.query_capsule_caps(&capsule, 1, &max_size, reset);
+	if (status != EFI_SUCCESS) {
+		rv = efi_status_to_err(status);
+		goto out;
+	}
+
+	if (size > max_size)
+		rv = -ENOSPC;
+out:
+	kfree(capsule);
+	return rv;
+}
+
+/**
+ * efi_capsule_update - send a capsule to the firmware
+ * @capsule:
+ * @pages:
+ */
+int efi_capsule_update(efi_capsule_header_t *capsule, struct page **pages)
+{
+	efi_guid_t guid = capsule->guid;
+	size_t size = capsule->imagesize;
+	u32 flags = capsule->flags;
+	int rv, reset_type;
+
+	mutex_lock(&capsule_mutex);
+	rv = efi_capsule_supported(guid, flags, size, &reset_type);
+	if (rv)
+		goto out;
+
+	if (efi_reset_type >= 0 && efi_reset_type != reset_type) {
+		pr_err("Incompatible capsule reset type %d\n", reset_type);
+		rv = -EINVAL;
+		goto out;
+	}
+
+	rv = efi_update_capsule(capsule, pages, size, reset_type);
+out:
+	mutex_unlock(&capsule_mutex);
+	return rv;
+}
+EXPORT_SYMBOL_GPL(efi_capsule_update);
+
+/**
+ * efi_capsule_build - alloc capsule and send to firmware
+ * @guid: guid of the capsule
+ * @size: size in bytes of the capsule data
+ *
+ * This is a helper function for allocating enough room for user data
+ * + the size of an EFI capsule header, and passing that capsule to the
+ * firmware.
+ *
+ * We also atomically update the EFI reset type.
+ *
+ * Returns a pointer to the capsule on success, an ERR_PTR() value on
+ * error. If an error is returned we guarantee that the capsule has not
+ * been passed to the firmware.
+ */
+efi_capsule_header_t *efi_capsule_build(efi_guid_t guid, size_t size)
+{
+	efi_capsule_header_t *capsule = NULL;
+	unsigned int nr_pages = 0;
+	size_t capsule_size;
+	struct page **pages;
+	int i, rv = -ENOMEM;
+	u32 flags = EFI_CAPSULE_PERSIST_ACROSS_RESET |
+		EFI_CAPSULE_POPULATE_SYSTEM_TABLE;
+
+	capsule_size = size + sizeof(*capsule);
+
+	nr_pages = ALIGN(capsule_size, PAGE_SIZE) >> PAGE_SHIFT;
+	pages = kzalloc(nr_pages * sizeof(void *), GFP_KERNEL);
+	if (!pages)
+		return ERR_PTR(-ENOMEM);
+
+	for (i = 0; i < nr_pages; i++) {
+		struct page *page;
+
+		page = alloc_page(GFP_KERNEL);
+		if (!page)
+			goto fail;
+
+		pages[i] = page;
+	}
+
+	capsule = vmap(pages, nr_pages, 0, PAGE_KERNEL);
+	if (!capsule)
+		goto fail;
+
+	/*
+	 * Setup the EFI capsule header.
+	 */
+	memcpy(&capsule->guid, &guid, sizeof(guid));
+
+	capsule->headersize = sizeof(*capsule);
+	capsule->imagesize = capsule_size;
+	capsule->flags = flags;
+
+	rv = efi_capsule_update(capsule, pages);
+	if (rv)
+		goto fail;
+out:
+	kfree(pages);
+	return capsule;
+
+fail:
+	vunmap(capsule);
+	for (i = 0; i < nr_pages; i++) {
+		if (!pages[i])
+			break;
+
+		__free_page(pages[i]);
+	}
+	capsule = ERR_PTR(rv);
+	goto out;
+}
+EXPORT_SYMBOL_GPL(efi_capsule_build);
+
+static efi_capsule_header_t *
+__map_capsule(efi_capsule_header_t *phys, efi_guid_t guid)
+{
+	efi_capsule_header_t *virt;
+	size_t size;
+	void *err = ERR_PTR(-ENOMEM);
+
+	virt = ioremap((resource_size_t)phys, sizeof(*virt));
+	if (!virt) {
+		pr_err("failed to ioremap capsule\n");
+		goto fail;
+	}
+
+	size = virt->imagesize;
+	iounmap(virt);
+
+	virt = ioremap((resource_size_t)phys, size);
+	if (!virt) {
+		pr_err("failed to ioremap header + data\n");
+		goto fail;
+	}
+
+	/*
+	 * This *really* shouldn't happen, since the firmware groups all
+	 * capsules with the same guid together.
+	 */
+	if (unlikely(efi_guidcmp(virt->guid, guid))) {
+		pr_err("guid mismatch inside capsule\n");
+		iounmap(virt);
+		err = ERR_PTR(-EINVAL);
+		goto fail;
+	}
+
+	return virt;
+fail:
+	return err;
+}
+
+static efi_guid_t mappings[] = {
+	LINUX_EFI_CRASH_GUID,
+	LINUX_EFI_BLK_DEV_GUID,
+};
+
+/**
+ * efi_capsule_lookup - search capsule array for entries.
+ * @guid: the guid to search for.
+ * @nr_found: the number of entries found.
+ *
+ * Map each capsule header into the kernel's virtual address space and
+ * inspect the guid. Build an array of capsule headers with every
+ * capsule that is found with @guid. If a match is found the capsule
+ * remains mapped, otherwise it is unmapped.
+ *
+ * This function searches the capsule array built at efi_config_init()
+ * time for capsules matching @guid. If we find a matching array
+ * we remap each capsule into the kernel's virtual address space and
+ * return a new array of virtually mapped capsule pointers.
+ *
+ * Returns an array of capsule headers, each element of which has the
+ * guid @guid. The number of elements in the array is stored in
+ * @nr_found. Returns %NULL and stores zero in @nr_found if no capsules
+ * were found.
+ *
+ * If capsules were found but an error condition was encountered when
+ * retrieving them, an ERR_PTR() value is returned instead of NULL.
+ */
+efi_capsule_header_t **efi_capsule_lookup(efi_guid_t guid, uint32_t *nr_found)
+{
+	efi_capsule_header_t **capsules = NULL;
+	efi_capsule_header_t **cap_array;
+	uint32_t nr_capsules;
+	int i;
+
+	*nr_found = 0;
+
+	for (i = 0; i < EFI_LINUX_CAPSULES_NR; i++) {
+		unsigned long addr;
+		void *cap_header;
+		size_t size;
+
+		if (efi_guidcmp(mappings[i], guid))
+			continue;
+
+		if (!efi.capsules[i])
+			continue;
+
+		addr = efi.capsules[i];
+		cap_header = ioremap(addr, sizeof(nr_capsules));
+		if (!cap_header)
+			return ERR_PTR(-ENOMEM);
+
+		/*
+		 * The array of capsules is prefixed with the number of
+		 * capsule entries in the array for this guid.
+		 */
+		nr_capsules = *(uint32_t *)cap_header;
+		iounmap(cap_header);
+
+		/*
+		 * This shouldn't happen. If it does, it likely indicates
+		 * buggy firmware since it is the firmware that writes
+		 * this value.
+		 */
+		if (!nr_capsules) {
+			pr_err("capsule contains no entries\n");
+			continue;
+		}
+
+		/* Allocate up front */
+		capsules = kmalloc(nr_capsules * sizeof(*capsules), GFP_KERNEL);
+		if (!capsules)
+			goto fail;
+
+		size = nr_capsules * sizeof(*cap_array) + sizeof(nr_capsules);
+		cap_header = ioremap(addr, size);
+		if (!cap_header)
+			goto fail;
+
+		cap_array = cap_header + sizeof(uint32_t *);
+
+		for (i = 0; i < nr_capsules; i++) {
+			efi_capsule_header_t *c;
+
+			c = __map_capsule(cap_array[i], guid);
+			if (IS_ERR(c)) {
+				pr_err("Failed to map capsule %d guid %pUl\n",
+				       i, guid.b);
+				break;
+			}
+			capsules[i] = c;
+			*nr_found += 1;
+		}
+
+		iounmap(cap_header);
+
+		/*
+		 * Exit early. We know there are no other matches in the
+		 * capsule list because they're grouped by guid.
+		 */
+		break;
+	}
+
+	return capsules;
+
+fail:
+	kfree(capsules);
+	return ERR_PTR(-ENOMEM);
+}
+EXPORT_SYMBOL_GPL(efi_capsule_lookup);
+
+#define BLOCKS_PER_PAGE	(PAGE_SIZE / sizeof(efi_capsule_block_desc_t))
+
+/*
+ * How many pages of block descriptors do we need to map 'nr_pages'?
+ *
+ * Every list of block descriptors in a page must end with a
+ * continuation pointer. The last continuation pointer of the lage page
+ * must be zero to mark the end of the chain.
+ */
+static inline unsigned int num_block_pages(unsigned int nr_pages)
+{
+	return DIV_ROUND_UP(nr_pages, BLOCKS_PER_PAGE - 1);
+}
+
+/**
+ * efi_update_capsule - pass a single capsule to the firmware.
+ * @capsule: capsule to send to the firmware.
+ * @pages: an array of capsule data.
+ * @size: total size of capsule data + headers in @capsule.
+ * @reset: the reset type required for @capsule
+ *
+ * Map @capsule with EFI capsule block descriptors in PAGE_SIZE chunks.
+ * @size needn't necessarily be a multiple of PAGE_SIZE - we can handle
+ * a trailing chunk that is smaller than PAGE_SIZE.
+ *
+ * @capsule MUST be virtually contiguous.
+ *
+ * Return 0 on success.
+ */
+static int efi_update_capsule(efi_capsule_header_t *capsule,
+			      struct page **pages, size_t size, int reset)
+{
+	efi_capsule_block_desc_t *block = NULL;
+	struct page **block_pgs;
+	efi_status_t status;
+	unsigned int nr_data_pgs, nr_block_pgs;
+	int i, j, err = -ENOMEM;
+
+	nr_data_pgs = DIV_ROUND_UP(size, PAGE_SIZE);
+	nr_block_pgs = num_block_pages(nr_data_pgs);
+
+	block_pgs = kzalloc(nr_block_pgs * sizeof(*block_pgs), GFP_KERNEL);
+	if (!block_pgs)
+		return -ENOMEM;
+
+	for (i = 0; i < nr_block_pgs; i++) {
+		block_pgs[i] = alloc_page(GFP_KERNEL);
+		if (!block_pgs[i])
+			goto fail;
+	}
+
+	for (i = 0; i < nr_block_pgs; i++) {
+		block = kmap(block_pgs[i]);
+		if (!block)
+			goto fail;
+
+		for (j = 0; j < BLOCKS_PER_PAGE - 1 && nr_data_pgs > 0; j++) {
+			u64 sz = min_t(u64, size, PAGE_SIZE);
+
+			block[j].length = sz;
+			block[j].data = page_to_phys(*pages++);
+
+			size -= sz;
+			nr_data_pgs--;
+		}
+
+		/* Continuation pointer */
+		block[j].length = 0;
+
+		if (i + 1 == nr_block_pgs)
+			block[j].data = 0;
+		else
+			block[j].data = page_to_phys(block_pgs[i + 1]);
+
+		kunmap(block_pgs[i]);
+	}
+
+	status = efi.update_capsule(&capsule, 1, page_to_phys(block_pgs[0]));
+	if (status != EFI_SUCCESS) {
+		pr_err("update_capsule fail: 0x%lx\n", status);
+		err = efi_status_to_err(status);
+		goto fail;
+	}
+
+	capsule_pending = true;
+	efi_reset_type = reset;
+
+	kfree(block_pgs);
+	return 0;
+
+fail:
+	for (i = 0; i < nr_block_pgs; i++) {
+		if (block_pgs[i])
+			__free_page(block_pgs[i]);
+	}
+
+	kfree(block_pgs);
+	return err;
+}
diff --git a/drivers/firmware/efi/efi-capsule-blk.c b/drivers/firmware/efi/efi-capsule-blk.c
new file mode 100644
index 0000000..b08a240
--- /dev/null
+++ b/drivers/firmware/efi/efi-capsule-blk.c
@@ -0,0 +1,250 @@
+/*
+ * EFI capsule block device driver.
+ *
+ * Copyright 2014 Intel Corporation <matt.fleming@intel.com>
+ *
+ * This file is part of the Linux kernel, and is made available under
+ * the terms of the GNU General Public License version 2.
+ */
+
+#include <linux/efi.h>
+#include <linux/genhd.h>
+#include <linux/bio.h>
+#include <linux/blkdev.h>
+#include <linux/moduleparam.h>
+#include <linux/module.h>
+
+#define EFI_CAPSULE_BLKDEV_NAME		"efi-capsule"
+#define EFI_CAPSULE_NR_MINORS		16
+#define EFI_CAPSULE_BLKDEV_SHIFT	9
+#define EFI_CAPSULE_BLKDEV_SIZE		(1 << EFI_CAPSULE_BLKDEV_SHIFT)
+
+static unsigned int cap_size = CONFIG_EFI_CAPSULE_BLK_DEV_SIZE;
+module_param(cap_size, uint, S_IRUGO);
+MODULE_PARM_DESC(cap_size, "Size of each RAM disk in Mbytes.");
+
+MODULE_LICENSE("GPL");
+
+/*
+ * This is a trivial little wrapper because it's irritating to have to
+ * add the size of the header to a efi_capsule_header_t * to figure out
+ * the virtual address mapping of the capsule data.
+ */
+static inline unsigned long
+capsule_data_addr(efi_capsule_header_t *capsule)
+{
+	return (unsigned long)capsule + capsule->headersize;
+}
+
+static inline unsigned long
+capsule_data_size(efi_capsule_header_t *capsule)
+{
+	return capsule->imagesize - capsule->headersize;
+}
+
+static void
+efi_capsule_blkdev_make_request(struct request_queue *queue, struct bio *bio)
+{
+	efi_capsule_header_t *capsule = bio->bi_bdev->bd_disk->private_data;
+	unsigned long phys_addr;
+	unsigned short index;
+	struct bio_vec *vec;
+	void *virt_addr;
+	sector_t sector;
+
+	sector = bio->bi_sector << EFI_CAPSULE_BLKDEV_SHIFT;
+	phys_addr = capsule_data_addr(capsule) + sector;
+
+	if (bio_end_sector(bio) > get_capacity(bio->bi_bdev->bd_disk)) {
+		bio_io_error(bio);
+		return;
+	}
+
+	bio_for_each_segment(vec, bio, index) {
+		virt_addr = page_address(vec->bv_page) + vec->bv_offset;
+		if (bio_data_dir(bio) == READ)
+			memcpy(virt_addr, (void *)phys_addr, vec->bv_len);
+		else
+			memcpy((void *)phys_addr, virt_addr, vec->bv_len);
+
+		phys_addr += vec->bv_len;
+	}
+
+	bio_endio(bio, 0);
+}
+
+/**
+ * efi_capsule_blkdev_direct_access - direct_access() method for blkdev
+ *
+ */
+static int
+efi_capsule_blkdev_direct_access(struct block_device *blkdev, sector_t sector,
+				 void **addr, unsigned long *pfn)
+{
+	efi_capsule_header_t *capsule;
+	loff_t offset;
+
+	capsule = blkdev->bd_disk->private_data;
+	offset = sector;
+
+	if (blkdev->bd_part != NULL)
+		offset += blkdev->bd_part->start_sect;
+
+	offset <<= EFI_CAPSULE_BLKDEV_SHIFT;
+	if (offset >= capsule_data_size(capsule))
+		return -ERANGE;
+
+	*addr = (void *)(capsule_data_addr(capsule) + offset);
+	*pfn = virt_to_phys(addr) >> PAGE_SHIFT;
+	return 0;
+}
+
+static const struct block_device_operations efi_capsule_blkdev_ops = {
+	.owner		= THIS_MODULE,
+	.direct_access	= efi_capsule_blkdev_direct_access,
+};
+
+static int __new_blkdev(int major, efi_capsule_header_t *capsule)
+{
+	struct gendisk *disk;
+	sector_t size;
+
+	disk = alloc_disk(EFI_CAPSULE_NR_MINORS);
+	if (!disk) {
+		pr_err("Unable to alloc disk\n");
+		return -ENODEV;
+	}
+
+	disk->major = major;
+	disk->first_minor = 0;
+	disk->fops = &efi_capsule_blkdev_ops;
+	disk->private_data = capsule;
+
+	sprintf(disk->disk_name, "%s", EFI_CAPSULE_BLKDEV_NAME);
+
+	disk->queue = blk_alloc_queue(GFP_KERNEL);
+	if (!disk->queue) {
+		pr_err("Unable to alloc blk queue\n");
+		goto fail;
+	}
+
+	size = capsule_data_size(capsule) >> EFI_CAPSULE_BLKDEV_SHIFT;
+	set_capacity(disk, size);
+
+	blk_queue_make_request(disk->queue, efi_capsule_blkdev_make_request);
+	blk_queue_logical_block_size(disk->queue, EFI_CAPSULE_BLKDEV_SIZE);
+	add_disk(disk);
+
+	return 0;
+fail:
+	del_gendisk(disk);
+	return -1;
+}
+
+/*
+ * Pass a capsule we picked from the EFI System Table back to the
+ * firmware via efi_update_capsule(). This is how we implement
+ * persistence.
+ *
+ * This function must only be passed a capulse returned from
+ * efi_capsule_lookup().
+ *
+ * Per the UEFI spec, the capsule pages are guaranteed to be physically
+ * contiguous - the firmware took care of that.
+ */
+static int send_to_fw(efi_capsule_header_t *capsule)
+{
+	unsigned long pfn;
+	unsigned int nr_pages;
+	struct vm_struct *vm;
+	struct page **pages;
+	int i, rv;
+
+	vm = find_vm_area(capsule);
+	if (!vm)
+		return -EINVAL;
+
+	/*
+	 * We only expect to be called with an ioremap'd capsule because
+	 * we make assumptions about the physical addresses the capsule
+	 * occupies being contiguous.
+	 */
+	if (!(vm->flags & VM_IOREMAP)) {
+		pr_err("Cannot persist non-ioremap'd capsule\n");
+		WARN_ON(1);
+		return -EINVAL;
+	}
+
+	nr_pages = ALIGN(capsule->imagesize, PAGE_SIZE) >> PAGE_SHIFT;
+	pages = kmalloc(nr_pages * sizeof(*pages), GFP_KERNEL);
+	if (!pages)
+		return -ENOMEM;
+
+	pfn = vm->phys_addr >> PAGE_SHIFT;
+	for (i = 0; i < nr_pages; i++)
+		pages[i] = pfn_to_page(pfn++);
+
+	rv = efi_capsule_update(capsule, pages);
+
+	kfree(pages);
+	return rv;
+}
+
+/*
+ * Register a new block device for exposing EFI capsules as a RAM
+ * device.
+ */
+static int efi_capsule_blkdev_init(void)
+{
+	efi_capsule_header_t **capsules;
+	uint32_t nr_caps = 0;
+	int major;
+	int i, rv;
+
+	rv = register_blkdev(0, EFI_CAPSULE_BLKDEV_NAME);
+	if (rv < 0) {
+		pr_err("Unable to register blkdev\n");
+		return rv;
+	}
+
+	major = rv;
+
+	/*
+	 * Lookup any capsules that were passed from a previous boot.
+	 */
+	capsules = efi_capsule_lookup(LINUX_EFI_BLK_DEV_GUID, &nr_caps);
+	if (IS_ERR(capsules)) {
+		pr_err("Couldn't lookup LINUX_EFI_BLK_DEV capsules\n");
+		rv = PTR_ERR(capsules);
+		goto fail;
+	}
+
+	if (nr_caps > 0) {
+		for (i = 0; i < nr_caps; i++) {
+			rv = send_to_fw(capsules[i]);
+			if (rv)
+				goto fail;
+
+			__new_blkdev(major, capsules[i]);
+		}
+	} else {
+		efi_capsule_header_t *capsule;
+		efi_guid_t guid;
+
+		guid = LINUX_EFI_BLK_DEV_GUID;
+		capsule = efi_capsule_build(guid, cap_size << 20);
+		if (IS_ERR(capsule)) {
+			rv = PTR_ERR(capsule);
+			pr_err("could not build capsule\n");
+			goto fail;
+		}
+
+		__new_blkdev(major, capsule);
+	}
+
+	return 0;
+fail:
+	unregister_blkdev(major, EFI_CAPSULE_BLKDEV_NAME);
+	return rv;
+}
+module_init(efi_capsule_blkdev_init);
diff --git a/drivers/firmware/efi/efi-capsule-pstore.c b/drivers/firmware/efi/efi-capsule-pstore.c
new file mode 100644
index 0000000..e322508
--- /dev/null
+++ b/drivers/firmware/efi/efi-capsule-pstore.c
@@ -0,0 +1,396 @@
+/*
+ * EFI capsule pstore backend.
+ *
+ */
+
+#include <linux/slab.h>
+#include <linux/pstore.h>
+#include <linux/efi.h>
+
+struct efi_capsule_pstore_buf {
+	void *data;
+	size_t size;
+	atomic_long_t offset;
+};
+
+struct efi_capsule_pstore {
+	/* Previous records */
+	efi_capsule_header_t **hdrs;
+	uint32_t nr_hdrs;
+	uint32_t hdr_index;	/* Index of current header in 'hdrs' */
+	off_t hdr_offset;	/* Offset into current header */
+
+	/* New records */
+	struct efi_capsule_pstore_buf console;
+	struct efi_capsule_pstore_buf ftrace;
+	struct efi_capsule_pstore_buf dmesg;
+};
+
+struct efi_capsule_pstore_record {
+	u64 timestamp;
+	u64 id;
+	enum pstore_type_id type;
+	size_t size;
+	int count;
+	bool inuse;
+	char data[];
+} __packed;
+
+static struct pstore_info efi_capsule_info;
+
+static efi_capsule_header_t *
+efi_setup_pstore_buffer(struct efi_capsule_pstore_buf *buf,
+			size_t size, enum pstore_type_id type)
+{
+	struct efi_capsule_pstore_record *rec;
+	efi_capsule_header_t *capsule;
+
+	capsule = efi_capsule_build(LINUX_EFI_CRASH_GUID, size);
+	if (IS_ERR(capsule))
+		return capsule;
+
+	rec = (void *)capsule + capsule->headersize;
+	rec->size = size - offsetof(typeof(*rec), data);
+	rec->type = type;
+
+	rec->inuse = false;
+
+	buf->size = rec->size;
+	atomic_long_set(&buf->offset, 0);
+	buf->data = rec->data;
+
+	return capsule;
+}
+
+/*
+ * We may not be in a position to allocate memory at the time of a
+ * crash, so pre-allocate some space now and register it with the
+ * firmware via efi_capsule_update().
+ *
+ * Also, iterate through the array of capsules pointed to from the EFI
+ * system table and take note of any LINUX_EFI_CRASH_GUID
+ * capsules. They will be parsed by efi_capsule_pstore_read().
+ */
+static int efi_capsule_pstore_setup(void)
+{
+	struct efi_capsule_pstore *pctx = NULL;
+	struct efi_capsule_pstore_buf *buf;
+	efi_capsule_header_t *capsule;
+	void *crash_buf = NULL;
+	size_t size, crash_size;
+	int rv;
+
+	pctx = kzalloc(sizeof(*pctx), GFP_KERNEL);
+	if (!pctx)
+		return -ENOMEM;
+
+	size = 65536;
+	capsule = efi_capsule_build(LINUX_EFI_CRASH_GUID, size);
+	if (IS_ERR(capsule)) {
+		rv = PTR_ERR(capsule);
+		goto fail;
+	}
+
+	pctx->dmesg.data = (void *)capsule + capsule->headersize;
+	atomic_long_set(&pctx->dmesg.offset, 0);
+	pctx->dmesg.size = size;
+
+	buf = &pctx->console;
+	capsule = efi_setup_pstore_buffer(buf, size, PSTORE_TYPE_CONSOLE);
+	if (IS_ERR(capsule)) {
+		rv = PTR_ERR(capsule);
+		goto fail;
+	}
+
+	buf = &pctx->ftrace;
+	capsule = efi_setup_pstore_buffer(buf, size, PSTORE_TYPE_FTRACE);
+	if (IS_ERR(capsule)) {
+		rv = PTR_ERR(capsule);
+		goto fail;
+	}
+
+	crash_size = 4096;
+	crash_buf = kmalloc(crash_size, GFP_KERNEL);
+	if (!crash_buf) {
+		rv = -ENOMEM;
+		goto fail;
+	}
+
+	/*
+	 * Register the capsule backend with pstore.
+	 */
+	spin_lock_init(&efi_capsule_info.buf_lock);
+
+	efi_capsule_info.buf = crash_buf;
+	efi_capsule_info.bufsize = crash_size;
+	efi_capsule_info.data = pctx;
+
+	rv = pstore_register(&efi_capsule_info);
+	if (rv) {
+		pr_err("pstore registration failed: %d\n", rv);
+		goto fail;
+	}
+
+	return rv;
+
+fail:
+	kfree(crash_buf);
+	kfree(pctx);
+	return rv;
+}
+
+static int efi_capsule_pstore_open(struct pstore_info *psi)
+{
+	struct efi_capsule_pstore *pctx = psi->data;
+	efi_capsule_header_t **capsules;
+	int rv = 0;
+
+	/*
+	 * Read any pstore entries that were passed across a reboot.
+	 */
+	capsules = efi_capsule_lookup(LINUX_EFI_CRASH_GUID, &pctx->nr_hdrs);
+	if (IS_ERR(capsules)) {
+		rv = PTR_ERR(capsules);
+		capsules = NULL;
+	}
+
+	pctx->hdrs = capsules;
+	return rv;
+}
+
+static int efi_capsule_pstore_close(struct pstore_info *psi)
+{
+	struct efi_capsule_pstore *pctx = psi->data;
+	int i;
+
+	for (i = 0; i < pctx->nr_hdrs; i++)
+		iounmap(pctx->hdrs[i]);
+
+	pctx->nr_hdrs = 0;
+	pctx->hdr_index = 0;
+	kfree(pctx->hdrs);
+
+	return 0;
+}
+
+/*
+ * Return the next pstore record that was passed to us across a reboot
+ * in an EFI capsule.
+ *
+ * This is expected to be called under the pstore
+ * read_mutex. Therefore, no serialisation is done here.
+ */
+static struct efi_capsule_pstore_record *
+get_pstore_read_record(struct efi_capsule_pstore *pctx)
+{
+	struct efi_capsule_pstore_record *rec;
+	efi_capsule_header_t *hdr;
+	off_t remaining;
+
+next:
+	if (pctx->hdr_index == pctx->nr_hdrs)
+		return NULL;
+
+	hdr = pctx->hdrs[pctx->hdr_index];
+	rec = (void *)hdr + hdr->headersize + pctx->hdr_offset;
+
+	remaining = hdr->imagesize - hdr->headersize -
+		pctx->hdr_offset - offsetof(typeof(*rec), data);
+
+	/*
+	 * A single EFI capsule may contain multiple pstore records, but
+	 * there is no guarantee it will be filled completely, so we
+	 * need to handle partial records.
+	 *
+	 * If there are no more entries in this capsule try the next.
+	 */
+	if (!rec->inuse) {
+		pctx->hdr_index++;
+		pctx->hdr_offset = 0;
+		goto next;
+	}
+
+	/*
+	 * If we've finished parsing all records in this capsule, move
+	 * onto the next. Otherwise, increment the offset into the
+	 * current capsule (pctx->hdr_offset).
+	 */
+	if (rec->size == remaining) {
+		pctx->hdr_index++;
+		pctx->hdr_offset = 0;
+	} else
+		pctx->hdr_offset += rec->size + offsetof(typeof(*rec), data);
+
+	return rec;
+}
+
+static ssize_t efi_capsule_pstore_read(u64 *id, enum pstore_type_id *type,
+				       int *count, struct timespec *time,
+				       char **buf, struct pstore_info *psi)
+{
+	struct efi_capsule_pstore_record *rec;
+	struct efi_capsule_pstore *pctx = psi->data;
+	ssize_t size;
+
+	rec = get_pstore_read_record(pctx);
+	if (!rec)
+		return 0;
+
+	*type = rec->type;
+	time->tv_sec = rec->timestamp;
+	time->tv_nsec = 0;
+	size = rec->size;
+	*id = rec->id;
+	*count = rec->count;
+
+	*buf = kmalloc(size, GFP_KERNEL);
+	if (!*buf)
+		return -ENOMEM;
+
+	memcpy(*buf, rec->data, size);
+
+	return size;
+}
+
+/*
+ * We expect to be called with ->buf_lock held, and so don't perform
+ * any serialisation.
+ */
+static struct notrace efi_capsule_pstore_record *
+get_pstore_write_record(struct efi_capsule_pstore_buf *pbuf, size_t size)
+{
+	struct efi_capsule_pstore_record *rec;
+	long offset = atomic_long_read(&pbuf->offset);
+
+	if (offset + size > pbuf->size)
+		return NULL;
+
+	rec = pbuf->data + offset;
+
+	atomic_long_add(offsetof(typeof(*rec), data) + size, &pbuf->offset);
+	rec->inuse = true;
+
+	return rec;
+}
+
+static int notrace
+efi_capsule_pstore_write(enum pstore_type_id type,
+			 enum kmsg_dump_reason reason, u64 *id,
+			 unsigned int part, int count, size_t hsize,
+			 size_t size, struct pstore_info *psi)
+{
+	struct efi_capsule_pstore_record *rec;
+	struct efi_capsule_pstore *pctx = psi->data;
+
+	if (!size)
+		return -EINVAL;
+
+	rec = get_pstore_write_record(&pctx->dmesg, size);
+	if (!rec)
+		return -ENOSPC;
+
+	rec->type = type;
+	rec->timestamp = get_seconds();
+	rec->size = size;
+	*id = rec->id = part;
+	rec->count = count;
+	memcpy(rec->data, psi->buf, size);
+
+	return 0;
+}
+
+static inline void buf_inuse(struct efi_capsule_pstore_buf *pbuf)
+{
+	struct efi_capsule_pstore_record *rec;
+
+	rec = pbuf->data - sizeof(*rec);
+	rec->inuse = true;
+}
+
+static notrace void *
+get_pstore_buf(struct efi_capsule_pstore_buf *pbuf, size_t size)
+{
+	long next, curr;
+
+	if (size > pbuf->size)
+		return NULL;
+
+	buf_inuse(pbuf);
+
+	do {
+		curr = atomic_long_read(&pbuf->offset);
+		next = curr + size;
+
+		/* Wrap? */
+		if (next > pbuf->size) {
+			next = size;
+			if (atomic_long_cmpxchg(&pbuf->offset, curr, next)) {
+				curr = 0;
+				break;
+			}
+
+			continue;
+		}
+
+	} while (atomic_long_cmpxchg(&pbuf->offset, curr, next) != curr);
+
+	return pbuf->data + curr;
+}
+
+static int notrace
+efi_capsule_pstore_write_buf(enum pstore_type_id type,
+			     enum kmsg_dump_reason reason,
+			     u64 *id, unsigned int part,
+			     const char *buf, size_t hsize,
+			     size_t size, struct pstore_info *psi)
+{
+	struct efi_capsule_pstore *pctx = psi->data;
+	void *dst;
+
+	if (type == PSTORE_TYPE_FTRACE)
+		dst = get_pstore_buf(&pctx->ftrace, size);
+	else if (type == PSTORE_TYPE_CONSOLE)
+		dst = get_pstore_buf(&pctx->console, size);
+	else
+		return -EINVAL;
+
+	if (!dst)
+		return -ENOSPC;
+
+	memcpy(dst, buf, size);
+	return 0;
+}
+
+
+static struct pstore_info efi_capsule_info = {
+	.owner     = THIS_MODULE,
+	.name      = "capsule",
+	.open      = efi_capsule_pstore_open,
+	.close     = efi_capsule_pstore_close,
+	.read      = efi_capsule_pstore_read,
+	.write     = efi_capsule_pstore_write,
+	.write_buf = efi_capsule_pstore_write_buf,
+};
+
+
+/*
+ * efi_capsule_init - initialise the EFI capsule system
+ */
+static __init int efi_capsule_pstore_init(void)
+{
+	int rv, reset;
+	u32 flags = EFI_CAPSULE_PERSIST_ACROSS_RESET |
+		EFI_CAPSULE_POPULATE_SYSTEM_TABLE;
+
+	if (!efi_enabled(EFI_RUNTIME_SERVICES))
+		return -ENODEV;
+
+	rv = efi_capsule_supported(LINUX_EFI_CRASH_GUID, flags, 0, &reset);
+	if (rv)
+		return rv;
+
+	efi_capsule_pstore_setup();
+
+	return 0;
+}
+device_initcall(efi_capsule_pstore_init);
diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c
index 772f559..874afd8 100644
--- a/drivers/firmware/efi/efi.c
+++ b/drivers/firmware/efi/efi.c
@@ -21,6 +21,7 @@
 #include <linux/device.h>
 #include <linux/efi.h>
 #include <linux/io.h>
+#include <linux/slab.h>
 
 struct efi __read_mostly efi = {
 	.mps        = EFI_INVALID_TABLE_ADDR,
@@ -208,24 +209,60 @@
 	u8 str[EFI_VARIABLE_GUID_LEN + 1];
 	int i;
 
-	if (table_types) {
-		efi_guid_unparse(guid, str);
+	if (!table_types)
+		return 0;
 
-		for (i = 0; efi_guidcmp(table_types[i].guid, NULL_GUID); i++) {
-			efi_guid_unparse(&table_types[i].guid, str);
+	efi_guid_unparse(guid, str);
 
-			if (!efi_guidcmp(*guid, table_types[i].guid)) {
-				*(table_types[i].ptr) = table;
-				pr_cont(" %s=0x%lx ",
-					table_types[i].name, table);
-				return 1;
-			}
+	for (i = 0; efi_guidcmp(table_types[i].guid, NULL_GUID); i++) {
+		efi_guid_unparse(&table_types[i].guid, str);
+
+		if (!efi_guidcmp(*guid, table_types[i].guid)) {
+			*(table_types[i].ptr) = table;
+			pr_cont(" %s=0x%lx ",
+				table_types[i].name, table);
+			return 1;
 		}
 	}
 
 	return 0;
 }
 
+static efi_guid_t capsule_guids[] = {
+	LINUX_EFI_CRASH_GUID,
+	LINUX_EFI_BLK_DEV_GUID,
+	NULL_GUID,
+};
+
+/*
+ * Search all list of architecture independent config tables for a match
+ * with 'guid'.
+ */
+static __init int match_config_tables(efi_guid_t *guid, unsigned long table)
+{
+	int i;
+
+	/* Search for match in common EFI tables */
+	if (match_config_table(guid, table, common_tables))
+		return 1;
+
+	/*
+	 * Search for Linux-specific capsule guids. These guids denote
+	 * capsule blobs that we've created on a previous boot and have
+	 * been passed back to us via the EFI system table.
+	 */
+	for (i = 0; efi_guidcmp(capsule_guids[i], NULL_GUID); i++) {
+		if (efi_guidcmp(*guid, capsule_guids[i]))
+			continue;
+
+		efi.capsules[i] = table;
+
+		return 1;
+	}
+
+	return 0;
+}
+
 int __init efi_config_init(efi_config_table_type_t *arch_tables)
 {
 	void *config_tables, *tablep;
@@ -271,8 +308,8 @@
 			table = ((efi_config_table_32_t *)tablep)->table;
 		}
 
-		if (!match_config_table(&guid, table, common_tables))
-			match_config_table(&guid, table, arch_tables);
+		if (!match_config_table(&guid, table, arch_tables))
+			match_config_tables(&guid, table);
 
 		tablep += sz;
 	}
diff --git a/drivers/firmware/efi/reboot.c b/drivers/firmware/efi/reboot.c
index f9f34eb..d6ea42a 100644
--- a/drivers/firmware/efi/reboot.c
+++ b/drivers/firmware/efi/reboot.c
@@ -10,6 +10,9 @@
 
 void efi_reboot(int mode)
 {
+	const char *str[] = { "cold", "warm", "shutdown", "platform" };
+	int cap_reset_mode;
+
 	switch (mode) {
 	case EFI_RESET_COLD:
 	case EFI_RESET_WARM:
@@ -21,5 +24,14 @@
 		return;
 	}
 
+	if (efi_capsule_pending(&cap_reset_mode)) {
+		if (mode != cap_reset_mode)
+			printk("efi: %s reset requested but pending capsule "
+			       "update requires %s reset... Performing "
+			       "%s reset\n", str[mode], str[cap_reset_mode],
+			       str[cap_reset_mode]);
+		mode = cap_reset_mode;
+	}
+
 	efi.reset_system(mode, EFI_SUCCESS, 0, NULL);
 }
diff --git a/include/linux/efi.h b/include/linux/efi.h
index f4f1b01..a1dbaf4 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -117,6 +117,13 @@
 } efi_capsule_header_t;
 
 /*
+ * EFI capsule flags
+ */
+#define EFI_CAPSULE_PERSIST_ACROSS_RESET	0x00010000
+#define EFI_CAPSULE_POPULATE_SYSTEM_TABLE	0x00020000
+#define EFI_CAPSULE_INITIATE_RESET		0x00040000
+
+/*
  * Allocation types for calls to boottime->allocate_pages.
  */
 #define EFI_ALLOCATE_ANY_PAGES		0
@@ -395,6 +402,9 @@
 #define EFI_FILE_SYSTEM_GUID \
     EFI_GUID(  0x964e5b22, 0x6459, 0x11d2, 0x8e, 0x39, 0x00, 0xa0, 0xc9, 0x69, 0x72, 0x3b )
 
+#define LINUX_EFI_BLK_DEV_GUID \
+    EFI_GUID(  0xe7db9873, 0x6ac3, 0x4897, 0x53, 0xa2, 0xac, 0x27, 0xa2, 0xb3, 0x6b, 0x42 )
+
 typedef struct {
 	efi_guid_t guid;
 	u64 table;
@@ -543,6 +553,17 @@
 #define EFI_INVALID_TABLE_ADDR		(~0UL)
 
 /*
+ * This is the in-kernel representation of an EFI capsule passed
+ * via the EFI System Table. It has no analogy in the EFI spec.
+ */
+struct efi_capsule {
+	unsigned long addr;
+	efi_guid_t guid;
+};
+
+#define EFI_LINUX_CAPSULES_NR	2
+
+/*
  * All runtime access to EFI goes through this structure:
  */
 extern struct efi {
@@ -571,6 +592,12 @@
 	efi_reset_system_t *reset_system;
 	efi_set_virtual_address_map_t *set_virtual_address_map;
 	struct efi_memory_map *memmap;
+
+	/*
+	 * Collect pointers to EFI capsules that were passed via the
+	 * EFI System Table on boot.
+	 */
+	unsigned long capsules[EFI_LINUX_CAPSULES_NR];
 } efi;
 
 static inline int
@@ -908,5 +935,16 @@
 #define EFIVARS_DATA_SIZE_MAX 1024
 
 #endif /* CONFIG_EFI_VARS */
+extern efi_capsule_header_t *
+efi_capsule_build(efi_guid_t guid, size_t size);
+extern efi_capsule_header_t **
+efi_capsule_lookup(efi_guid_t guid, uint32_t *nr_found);
 
+extern bool efi_capsule_pending(int *reset_type);
+
+extern int efi_capsule_supported(efi_guid_t guid, u32 flags,
+				 size_t size, int *reset);
+
+extern int efi_capsule_update(efi_capsule_header_t *capsule,
+			      struct page **pages);
 #endif /* _LINUX_EFI_H */
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 0fdf968..c239fde 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -1393,6 +1393,7 @@
 
 	return NULL;
 }
+EXPORT_SYMBOL_GPL(find_vm_area);
 
 /**
  *	remove_vm_area  -  find and remove a continuous kernel virtual area