mcfx's blog

题解、Writeup、游记和碎碎念

DEFCON CTF 2023 Qualifier Writeup

In the DEFCON CTF Qualifier this year, I mainly contributed in challenge opacity and blackbox. This post contains the writeup for them.

Contents

opacity

In this challenge, we are given two ELFs init_drm and run_prog. We can use ./init_drm <program.license> ./run_prog <program.bin> to run some programs.

After some basic tests, we can find that a program only runs with correct license. The given triangle.bin will print a triangle, and password_for_flag.bin might give us the flag.

init_drm

First, let's check init_drm.

It does only one things: patch qemu with some code and the license file.

But what are these patched values? Let's take a look in this specific qemu-aarch64-static.

Here is a comparison of them, the left is before patch, the right is after patch. The unk_88D390 points to the patched license.

In order to identify this part in the giant function, we could check the debugger symbols. However, I didn't do so.

In the later part, we can find such a string, and it's easy to find the corresponding function at https://github.com/qemu/qemu/blob/f9baca549e44791be0dd98de15add3d8452a8af0/linux-user/syscall.c#L10775.

As a result, we know that the patched qemu will write the license to PAC keys when reset, instead of random keys.

run_prog

The hard reverse process is mainly done by my teammates. Here are some key points:

We can find the PAC authentication code in https://github.com/qemu/qemu/blob/f9baca549e44791be0dd98de15add3d8452a8af0/target/arm/pauth_helper.c, and calculate the truth table for all gates:

#include<stdio.h>
#include<stdint.h>
#include<assert.h>
#include<string.h>
#include<stdlib.h>
#define MAKE_64BIT_MASK(shift, length) \
    (((~0ULL) >> (64 - (length))) << (shift))

static inline uint64_t deposit64(uint64_t value, int start, int length,
                                 uint64_t fieldval)
{
    uint64_t mask;
    assert(start >= 0 && length > 0 && length <= 64 - start);
    mask = (~0ULL >> (64 - length)) << start;
    return (value & ~mask) | ((fieldval << start) & mask);
}

static inline int64_t sextract64(uint64_t value, int start, int length)
{
    assert(start >= 0 && length > 0 && length <= 64 - start);
    /* Note that this implementation relies on right shift of signed
     * integers being an arithmetic shift.
     */
    return ((int64_t)(value << (64 - length - start))) >> (64 - length);
}

static inline uint32_t extract32(uint32_t value, int start, int length)
{
    assert(start >= 0 && length > 0 && length <= 32 - start);
    return (value >> start) & (~0U >> (32 - length));
}

static int rot_cell(int cell, int n)
{
    /* 4-bit rotate left by n.  */
    cell |= cell << 4;
    return extract32(cell, 4 - n, 4);
}


static inline uint64_t extract64(uint64_t value, int start, int length)
{
    assert(start >= 0 && length > 0 && length <= 64 - start);
    return (value >> start) & (~0ULL >> (64 - length));
}

static uint64_t pac_cell_shuffle(uint64_t i)
{
    uint64_t o = 0;

    o |= extract64(i, 52, 4);
    o |= extract64(i, 24, 4) << 4;
    o |= extract64(i, 44, 4) << 8;
    o |= extract64(i,  0, 4) << 12;

    o |= extract64(i, 28, 4) << 16;
    o |= extract64(i, 48, 4) << 20;
    o |= extract64(i,  4, 4) << 24;
    o |= extract64(i, 40, 4) << 28;

    o |= extract64(i, 32, 4) << 32;
    o |= extract64(i, 12, 4) << 36;
    o |= extract64(i, 56, 4) << 40;
    o |= extract64(i, 20, 4) << 44;

    o |= extract64(i,  8, 4) << 48;
    o |= extract64(i, 36, 4) << 52;
    o |= extract64(i, 16, 4) << 56;
    o |= extract64(i, 60, 4) << 60;

    return o;
}


static uint64_t pac_cell_inv_shuffle(uint64_t i)
{
    uint64_t o = 0;

    o |= extract64(i, 12, 4);
    o |= extract64(i, 24, 4) << 4;
    o |= extract64(i, 48, 4) << 8;
    o |= extract64(i, 36, 4) << 12;

    o |= extract64(i, 56, 4) << 16;
    o |= extract64(i, 44, 4) << 20;
    o |= extract64(i,  4, 4) << 24;
    o |= extract64(i, 16, 4) << 28;

    o |= i & MAKE_64BIT_MASK(32, 4);
    o |= extract64(i, 52, 4) << 36;
    o |= extract64(i, 28, 4) << 40;
    o |= extract64(i,  8, 4) << 44;

    o |= extract64(i, 20, 4) << 48;
    o |= extract64(i,  0, 4) << 52;
    o |= extract64(i, 40, 4) << 56;
    o |= i & MAKE_64BIT_MASK(60, 4);

    return o;
}

static uint64_t pac_sub(uint64_t i)
{
    static const uint8_t sub[16] = {
        0xb, 0x6, 0x8, 0xf, 0xc, 0x0, 0x9, 0xe,
        0x3, 0x7, 0x4, 0x5, 0xd, 0x2, 0x1, 0xa,
    };
    uint64_t o = 0;
    int b;

    for (b = 0; b < 64; b += 4) {
        o |= (uint64_t)sub[(i >> b) & 0xf] << b;
    }
    return o;
}

static uint64_t pac_inv_sub(uint64_t i)
{
    static const uint8_t inv_sub[16] = {
        0x5, 0xe, 0xd, 0x8, 0xa, 0xb, 0x1, 0x9,
        0x2, 0x6, 0xf, 0x0, 0x4, 0xc, 0x7, 0x3,
    };
    uint64_t o = 0;
    int b;

    for (b = 0; b < 64; b += 4) {
        o |= (uint64_t)inv_sub[(i >> b) & 0xf] << b;
    }
    return o;
}


static uint64_t pac_mult(uint64_t i)
{
    uint64_t o = 0;
    int b;

    for (b = 0; b < 4 * 4; b += 4) {
        int i0, i4, i8, ic, t0, t1, t2, t3;

        i0 = extract64(i, b, 4);
        i4 = extract64(i, b + 4 * 4, 4);
        i8 = extract64(i, b + 8 * 4, 4);
        ic = extract64(i, b + 12 * 4, 4);

        t0 = rot_cell(i8, 1) ^ rot_cell(i4, 2) ^ rot_cell(i0, 1);
        t1 = rot_cell(ic, 1) ^ rot_cell(i4, 1) ^ rot_cell(i0, 2);
        t2 = rot_cell(ic, 2) ^ rot_cell(i8, 1) ^ rot_cell(i0, 1);
        t3 = rot_cell(ic, 1) ^ rot_cell(i8, 2) ^ rot_cell(i4, 1);

        o |= (uint64_t)t3 << b;
        o |= (uint64_t)t2 << (b + 4 * 4);
        o |= (uint64_t)t1 << (b + 8 * 4);
        o |= (uint64_t)t0 << (b + 12 * 4);
    }
    return o;
}

static uint64_t tweak_cell_rot(uint64_t cell)
{
    return (cell >> 1) | (((cell ^ (cell >> 1)) & 1) << 3);
}

static uint64_t tweak_shuffle(uint64_t i)
{
    uint64_t o = 0;

    o |= extract64(i, 16, 4) << 0;
    o |= extract64(i, 20, 4) << 4;
    o |= tweak_cell_rot(extract64(i, 24, 4)) << 8;
    o |= extract64(i, 28, 4) << 12;

    o |= tweak_cell_rot(extract64(i, 44, 4)) << 16;
    o |= extract64(i,  8, 4) << 20;
    o |= extract64(i, 12, 4) << 24;
    o |= tweak_cell_rot(extract64(i, 32, 4)) << 28;

    o |= extract64(i, 48, 4) << 32;
    o |= extract64(i, 52, 4) << 36;
    o |= extract64(i, 56, 4) << 40;
    o |= tweak_cell_rot(extract64(i, 60, 4)) << 44;

    o |= tweak_cell_rot(extract64(i,  0, 4)) << 48;
    o |= extract64(i,  4, 4) << 52;
    o |= tweak_cell_rot(extract64(i, 40, 4)) << 56;
    o |= tweak_cell_rot(extract64(i, 36, 4)) << 60;

    return o;
}

static uint64_t tweak_cell_inv_rot(uint64_t cell)
{
    return ((cell << 1) & 0xf) | ((cell & 1) ^ (cell >> 3));
}

static uint64_t tweak_inv_shuffle(uint64_t i)
{
    uint64_t o = 0;

    o |= tweak_cell_inv_rot(extract64(i, 48, 4));
    o |= extract64(i, 52, 4) << 4;
    o |= extract64(i, 20, 4) << 8;
    o |= extract64(i, 24, 4) << 12;

    o |= extract64(i,  0, 4) << 16;
    o |= extract64(i,  4, 4) << 20;
    o |= tweak_cell_inv_rot(extract64(i,  8, 4)) << 24;
    o |= extract64(i, 12, 4) << 28;

    o |= tweak_cell_inv_rot(extract64(i, 28, 4)) << 32;
    o |= tweak_cell_inv_rot(extract64(i, 60, 4)) << 36;
    o |= tweak_cell_inv_rot(extract64(i, 56, 4)) << 40;
    o |= tweak_cell_inv_rot(extract64(i, 16, 4)) << 44;

    o |= extract64(i, 32, 4) << 48;
    o |= extract64(i, 36, 4) << 52;
    o |= extract64(i, 40, 4) << 56;
    o |= tweak_cell_inv_rot(extract64(i, 44, 4)) << 60;

    return o;
}


static uint64_t pauth_computepac_architected(uint64_t data, uint64_t modifier,
                                             uint64_t key0, uint64_t key1)
{
    static const uint64_t RC[5] = {
        0x0000000000000000ull,
        0x13198A2E03707344ull,
        0xA4093822299F31D0ull,
        0x082EFA98EC4E6C89ull,
        0x452821E638D01377ull,
    };
    const uint64_t alpha = 0xC0AC29B7C97C50DDull;
    /*
     * Note that in the ARM pseudocode, key0 contains bits <127:64>
     * and key1 contains bits <63:0> of the 128-bit key.
     */
    uint64_t workingval, runningmod, roundkey, modk0;
    int i;

    modk0 = (key0 << 63) | ((key0 >> 1) ^ (key0 >> 63));
    runningmod = modifier;
    workingval = data ^ key0;

    for (i = 0; i <= 4; ++i) {
        roundkey = key1 ^ runningmod;
        workingval ^= roundkey;
        workingval ^= RC[i];
        if (i > 0) {
            workingval = pac_cell_shuffle(workingval);
            workingval = pac_mult(workingval);
        }
        workingval = pac_sub(workingval);
        runningmod = tweak_shuffle(runningmod);
    }
    roundkey = modk0 ^ runningmod;
    workingval ^= roundkey;
    workingval = pac_cell_shuffle(workingval);
    workingval = pac_mult(workingval);
    workingval = pac_sub(workingval);
    workingval = pac_cell_shuffle(workingval);
    workingval = pac_mult(workingval);
    workingval ^= key1;
    workingval = pac_cell_inv_shuffle(workingval);
    workingval = pac_inv_sub(workingval);
    workingval = pac_mult(workingval);
    workingval = pac_cell_inv_shuffle(workingval);
    workingval ^= key0;
    workingval ^= runningmod;
    for (i = 0; i <= 4; ++i) {
        workingval = pac_inv_sub(workingval);
        if (i < 4) {
            workingval = pac_mult(workingval);
            workingval = pac_cell_inv_shuffle(workingval);
        }
        runningmod = tweak_inv_shuffle(runningmod);
        roundkey = key1 ^ runningmod;
        workingval ^= RC[4 - i];
        workingval ^= roundkey;
        workingval ^= alpha;
    }
    workingval ^= modk0;

    return workingval;
}

uint64_t test(uint32_t a,uint32_t b)
{
    return pauth_computepac_architected(a,b,0xcf1b86873198a7bdull,0xd6c912742cd0b7f9ull)>>48&0x7f;
}

uint32_t gates[665][7];

int main(int argc, char** argv) {
    FILE*f=fopen("dist/data/triangle.bin","rb");
    fseek(f,0xb0,SEEK_SET);
    fread(gates,1,sizeof gates,f);
    assert(ftell(f)==18796);
    fclose(f);
    freopen("table.txt","w",stdout);
    for(int i=0;i<665;i++)
    {
        if((gates[i][0]&0xff)==0)
        {
            int x=(gates[i][1]-0x4040000)/28,y=(gates[i][2]-0x4040000)/28;
            uint64_t x1=gates[x][3],x0=gates[x][4],y1=gates[y][3],y0=gates[y][4],
                tweak=gates[i][0]>>8&255,check=gates[i][0]>>16;
            printf("%d %d %d %d %d\n",
                i,
                check==test(x0,y0+tweak),
                check==test(x0,y1+tweak),
                check==test(x1,y0+tweak),
                check==test(x1,y1+tweak));
        }
        else
        {
            assert(gates[i][1]==0||gates[i][2]==0);
        }
    }
}

password_for_flag

With the reversing done, we can try to find the flag.

In the given files, there are no license for password_for_flag.bin, so we can't directly execute it.

However, we can notice that the structure of password_for_flag.bin is exactly the same as triangle.bin, so we transplants the PAC values from triangle to password_for_flag.bin, and it seems worked.

Finally, I wrote a symbolic simulator using z3, and it produced some passwords. My teammates have different solutions, such as reversing the gates manually.

import sys, struct
import z3


def gate(x):
    if x == 0:
        return -1
    else:
        assert (x - 0x4040000) % 28 == 0
        return (x - 0x4040000) // 28


s = open('password_for_flag.bin', 'rb').read()
h = s[:0xb0]

memory = h[0x90:]
ha = struct.unpack('i' * 36, h[:0x90])
iter_count = ha[0]
ha = list(map(lambda x: (x - 0x4040000) // 28, ha))
is_out = ha[2]
out_regs = ha[3:11]
is_in = ha[11]
in_regs = ha[12:20]
is_exit = ha[20]
is_error = ha[21]
is_giveflag = ha[22]
mem_read_ptr = ha[23:28]
mem_read_val = ha[28:36]

s = s[0xb0:]
gates = []
while s:
    tp, tweak, check, lgate, rgate, lconst, rconst, oconst, dep = struct.unpack('<BBHIIIIII', s[:28])
    gates.append((tp & 7, tweak, check, gate(lgate), gate(rgate), lconst, rconst, oconst, dep))
    assert dep == 0
    s = s[28:]

val = [-1] * len(gates)
xo = [0] * len(gates)

for i, (tp, tweak, check, lgate, rgate, lconst, rconst, oconst, dep) in enumerate(gates):
    assert dep == 0
    if tp == 5 or tp == 3:
        assert oconst == lconst or oconst == rconst
        val[i] = int(oconst == lconst)
    elif tp == 0:
        assert gates[lgate][0] != 4
        assert gates[rgate][0] != 4
    elif tp == 4:
        xo[i] = int(gates[lgate][5] != lconst)
        pass
    else:
        assert False

tt = [None] * len(gates)
for line in open('table.txt').readlines():
    i, *o = map(int, line.split())
    tt[i] = o

q = []
c = [0] * len(gates)
for i in range(len(gates)):
    if gates[i][0] == 0:
        c[gates[i][3]] += 1
        c[gates[i][4]] += 1
for i in range(len(gates)):
    if gates[i][0] == 0 and c[i] == 0:
        q.append(i)
i = 0
while i < len(q):
    x = q[i]
    i += 1
    c[gates[x][3]] -= 1
    c[gates[x][4]] -= 1
    if gates[gates[x][3]][0] == 0 and c[gates[x][3]] == 0:
        q.append(gates[x][3])
    if gates[gates[x][4]][0] == 0 and c[gates[x][4]] == 0:
        q.append(gates[x][4])
for i in range(len(gates)):
    assert c[i] == 0
q = q[::-1]


def read_val(val, arr):
    r = 0
    for i in range(len(arr)):
        r |= val[arr[i]] << i
    return r


def write_val(val, arr, v):
    for i in range(len(arr)):
        val[arr[i]] = v >> i & 1


for i in range(len(gates)):
    if gates[i][0] == 0:
        if tt[i][0]:
            tt[i] = lambda x, y: (1 ^ x) & (1 ^ y)
        elif tt[i][1]:
            tt[i] = lambda x, y: (1 ^ x) & y
        elif tt[i][2]:
            tt[i] = lambda x, y: x & (1 ^ y)
        elif tt[i][3]:
            tt[i] = lambda x, y: x & y

val = [z3.BitVecVal(x, 8)for x in val]
flag = [z3.BitVec('x' + str(i), 8)for i in range(21)]
in_p = 0
some_flag = 0
solver = z3.Solver()
for x in flag:
    solver.add(x != 0)


def get_unique_value(x):
    solver.push()
    assert solver.check() == z3.sat
    m = solver.model()
    t = m.eval(x).as_long()
    solver.add(x != t)
    assert solver.check() == z3.unsat
    solver.pop()
    return t


for it in range(iter_count):
    print(it)
    for i in q:
        l, r = gates[i][3:5]
        val[i] = 1 ^ tt[i](val[l], val[r])
    for i in range(len(gates)):
        if gates[i][0] == 4 and gates[i][3] != -1:
            val[i] = xo[i] ^ val[gates[i][3]]
    for i in range(len(gates)):
        if (gates[i][0] == 3 or gates[i][0] == 5) and gates[i][3] != -1:
            val[i] = val[gates[i][3]]
    for i in range(len(gates)):
        val[i] = z3.simplify(val[i])
    memptr = z3.simplify(read_val(val, mem_read_ptr))
    if it + 1 < iter_count:
        solver.add(val[is_exit] == 0)
    if get_unique_value(val[is_in]):
        write_val(val, in_regs, flag[in_p])
        in_p += 1

    solver.push()
    solver.check()
    m = solver.model()
    t = m.eval(memptr).as_long()
    v = z3.BitVecVal(memory[t], 8)
    solver.add(memptr != t)
    while solver.check() == z3.sat:
        m = solver.model()
        t = m.eval(memptr).as_long()
        v = z3.If(memptr == t, z3.BitVecVal(memory[t], 8), v)
        solver.add(memptr != t)
    write_val(val, mem_read_val, v)
    solver.pop()

    some_flag |= val[is_giveflag]

s2 = z3.Solver()
for x in flag:
    s2.add(x >= 32)
    s2.add(z3.ULT(x, 128))
s2.add(some_flag == 1)
if s2.check() == z3.sat:
    m = s2.model()
    print(bytes([m.eval(x).as_long()for x in flag]))

blackbox

We can send some code to execute in a VM, but we don't know anything about it.

Here's a timeline for solving it:

日期: 2023-06-02

标签: CTF Writeup