In this crypto challenge, we are given a binary which implements some sort of cipher. It will give us the ciphertext for any plaintext block that we send and we have to decrypt a ciphertext encrypted using CBC mode in order to get the flag.

Reverse Engineering

Aplet123 reverse-engineered the binary and manually decompiled it to Python:

def xor(a, b):
    for i in range(4):
        a[i] ^= b[i]

sbox = [32, 1, 82, 147, 4, 165, 198, 95, 56, 9, 138, 59, 12, 93, 142, 55, 8, 33, 42, 203, 100, 77, 230, 103, 136, 41, 162, 131, 44, 101, 46, 7, 176, 25, 114, 3, 28, 37, 30, 79, 0, 113, 98, 171, 20, 125, 118, 111, 40, 169, 26, 91, 196, 69, 150, 15, 48, 145, 18, 99, 156, 117, 166, 71, 16, 97, 90, 19, 140, 29, 78, 63, 160, 185, 74, 51, 148, 45, 126, 151, 88, 225, 170, 11, 84, 53, 22, 87, 224, 153, 218, 43, 52, 197, 94, 47, 168, 161, 34, 227, 76, 141, 174, 23, 128, 49, 154, 67, 60, 205, 70, 31, 120, 249, 66, 155, 236, 181, 62, 143, 152, 57, 2, 219, 36, 157, 54, 223, 112, 241, 146, 179, 68, 21, 6, 39, 72, 209, 122, 187, 252, 5, 214, 199, 144, 129, 130, 107, 180, 237, 246, 119, 24, 105, 106, 83, 220, 13, 158, 239, 104, 73, 250, 115, 164, 85, 110, 135, 208, 89, 50, 139, 108, 173, 14, 215, 184, 81, 234, 35, 92, 149, 238, 127, 64, 17, 178, 243, 228, 253, 86, 159, 240, 233, 226, 235, 116, 245, 102, 183, 232, 177, 194, 251, 124, 61, 190, 191, 216, 137, 58, 123, 132, 229, 134, 247, 192, 65, 210, 163, 172, 189, 38, 231, 200, 193, 10, 195, 244, 133, 222, 167, 96, 201, 186, 211, 204, 213, 182, 207, 80, 217, 202, 27, 188, 109, 206, 255, 248, 121, 242, 75, 212, 221, 254, 175]
def sub(a):
    for i in range(4):
        a[i] = sbox[a[i]]

permutation = [0, 4, 8, 12, 16, 20, 24, 28, 1, 5, 9, 13, 17, 21, 25, 29, 2, 6, 10, 14, 18, 22, 26, 30, 3, 7, 11, 15, 19, 23, 27, 31]
def shuffle_bits(a):
    n = int.from_bytes(a, "big")
    shuf = 0
    for i in range(32):
        shuf |= ((n >> i) & 1) << permutation[i]
    r = int.to_bytes(shuf, 4, "big")
    for i in range(4):
        a[i] = r[i]

def encrypt_block(ct):
    for i in range(3):
        xor(ct, key[i])
        sub(ct)
        shuffle_bits(ct)
    xor(ct, key[3])
    sub(ct)
    xor(ct, key[4])
    return ct


# the actual program uses /dev/urandom but I want fixed randomness for now
keyfile = open("./key", "rb")
# 5 rows of 4 ints each
key = [[keyfile.read(1)[0] for _ in range(4)] for _ in range(5)]
while True:
    cmd = int(input("cmd = "))
    if cmd == 3:
        print("Goodbye")
        break
    elif cmd == 2:
        iv = keyfile.read(4)
        pt = keyfile.read(32)
        ct = [bytearray([0] * 4) for _ in range(8)] # 8 blocks of 4 bytes
        xor(ct[0], pt[0:4])
        xor(ct[0], iv)
        encrypt_block(ct[0])
        for i in range(1, 8):
            xor(ct[i], pt[i * 4:i * 4 + 4])
            xor(ct[i], ct[i - 1])
            encrypt_block(ct[i])
        print("iv = " + iv.hex())
        print("ct = " + "".join(x.hex() for x in ct))
        inp = bytes.fromhex(input("pt = "))
        assert len(inp) == 32
        if inp != pt:
            print("Incorrect plaintext")
        else:
            print("flag{placeholder_flag}")
    elif cmd == 1:
        pt = bytearray.fromhex(input("pt = "))
        assert len(pt) == 4
        ct = encrypt_block(pt)
        print("ct = " + ct.hex())
    else:
        print("Invalid command")
        break

The code implements a substitution-permutation network block cipher with four rounds and a block size of 32 bits. The key is 160 bits long and it’s just the five round keys concatenated together.

Cryptanalysis

I had no experience with attacking block ciphers, but I had previously heard about linear and differential cryptanalysis. After some searching, I found a paper with a tutorial on using these techniques to attack a substitution-permutation network, and I learned differential and linear cryptanalysis just for this challenge. I initially tried differential cryptanalysis and I analyzed the sbox with this script:

#!/usr/bin/env python3

import matplotlib.pyplot as plt
import numpy as np

sbox = [32, 1, 82, 147, 4, 165, 198, 95, 56, 9, 138, 59, 12, 93, 142, 55, 8, 33, 42, 203, 100, 77, 230, 103, 136, 41, 162, 131, 44, 101, 46, 7, 176, 25, 114, 3, 28, 37, 30, 79, 0, 113, 98, 171, 20, 125, 118, 111, 40, 169, 26, 91, 196, 69, 150, 15, 48, 145, 18, 99, 156, 117, 166, 71, 16, 97, 90, 19, 140, 29, 78, 63, 160, 185, 74, 51, 148, 45, 126, 151, 88, 225, 170, 11, 84, 53, 22, 87, 224, 153, 218, 43, 52, 197, 94, 47, 168, 161, 34, 227, 76, 141, 174, 23, 128, 49, 154, 67, 60, 205, 70, 31, 120, 249, 66, 155, 236, 181, 62, 143, 152, 57, 2, 219, 36, 157, 54, 223, 112, 241, 146, 179, 68, 21, 6, 39, 72, 209, 122, 187, 252, 5, 214, 199, 144, 129, 130, 107, 180, 237, 246, 119, 24, 105, 106, 83, 220, 13, 158, 239, 104, 73, 250, 115, 164, 85, 110, 135, 208, 89, 50, 139, 108, 173, 14, 215, 184, 81, 234, 35, 92, 149, 238, 127, 64, 17, 178, 243, 228, 253, 86, 159, 240, 233, 226, 235, 116, 245, 102, 183, 232, 177, 194, 251, 124, 61, 190, 191, 216, 137, 58, 123, 132, 229, 134, 247, 192, 65, 210, 163, 172, 189, 38, 231, 200, 193, 10, 195, 244, 133, 222, 167, 96, 201, 186, 211, 204, 213, 182, 207, 80, 217, 202, 27, 188, 109, 206, 255, 248, 121, 242, 75, 212, 221, 254, 175]  # fmt: skip

probs = np.zeros((256, 256), dtype=np.int32)
for dx in range(256):
    for x1 in range(256):
        x2 = x1 ^ dx
        y1, y2 = sbox[x1], sbox[x2]
        dy = y1 ^ y2
        probs[dx][dy] += 1

probs = probs / 256

l = [(probs[i, j], (i, j)) for i in range(256) for j in range(256)]

l.sort(reverse=True)

for t in l[:100]:
    print(f"{t[0]} ({t[1][0]:08b}, {t[1][1]:08b})")

plt.imshow(probs, vmin=0, vmax=0.1)

plt.show()

The script computes the probability of all differential pairs, visualizes them, and prints the pairs with the highest probabilities. The visualization looks like this:

visualization produced by the script

There is a clear diagonal pattern which is suspicious. On closer examination, I saw that the all of the nonzero pixels were on diagonal lines that were 8 pixels apart. This made me realize that the sbox does not change the last three bits.

I decided to switch to linear cryptanalysis, since I thought that could better take advantage of this flaw in the sbox and I modified the analysis script to find linear approximations with high bias:

for mx in range(256):
    for my in range(256):
        for x in range(256):
            y = sbox[x]
            if not ((x & mx).bit_count() + (y & my).bit_count()) & 1:
                probs[mx][my] += 1

probs = np.abs(probs / 256 - 0.5)

The output looks like this after some editing:

0.5        (00000111, 00000111)
0.5        (00000110, 00000110)
0.5        (00000101, 00000101)
0.5        (00000100, 00000100)
0.5        (00000011, 00000011)
0.5        (00000010, 00000010)
0.5        (00000001, 00000001)
0.5        (00000000, 00000000)
0.140625   (10000111, 10000111)
0.140625   (10000110, 10000110)
0.140625   (10000101, 10000101)
0.140625   (10000100, 10000100)
0.140625   (10000011, 10000011)
0.140625   (10000010, 10000010)
0.140625   (10000001, 10000001)
0.140625   (10000000, 10000000)
0.1328125  (10000111, 01000111)
0.1328125  (10000110, 01000110)
0.1328125  (10000101, 01000101)
0.1328125  (10000100, 01000100)
0.1328125  (10000011, 01000011)
0.1328125  (10000010, 01000010)
0.1328125  (10000001, 01000001)
0.1328125  (10000000, 01000000)
0.1171875  (01001111, 10011010)
0.1171875  (01001110, 10011011)
0.1171875  (01001101, 10011000)
0.1171875  (01001100, 10011001)
0.1171875  (01001011, 10011110)
0.1171875  (01001010, 10011111)
0.1171875  (01001001, 10011100)
0.1171875  (01001000, 10011101)
0.1171875  (00011111, 11111010)
0.1171875  (00011110, 11111011)
0.1171875  (00011101, 11111000)
0.1171875  (00011100, 11111001)
0.1171875  (00011011, 11111110)
0.1171875  (00011010, 11111111)
0.1171875  (00011001, 11111100)
0.1171875  (00011000, 11111101)
0.109375   (01101111, 10010000)
0.109375   (01101110, 10010001)
0.109375   (01101101, 10010010)
0.109375   (01101100, 10010011)
0.109375   (01101011, 10010100)
0.109375   (01101010, 10010101)
0.109375   (01101001, 10010110)
0.109375   (01101000, 10010111)
0.109375   (01100111, 11101011)
0.109375   (01100110, 11101010)
0.109375   (01100101, 11101001)
0.109375   (01100100, 11101000)
0.109375   (01100011, 11101111)
0.109375   (01100010, 11101110)
0.109375   (01100001, 11101101)
0.109375   (01100000, 11101100)
0.109375   (00110111, 00010111)
0.109375   (00110110, 00010110)
0.109375   (00110101, 00010101)
0.109375   (00110100, 00010100)
0.109375   (00110011, 00010011)
0.109375   (00110010, 00010010)
0.109375   (00110001, 00010001)
0.109375   (00110000, 00010000)
0.109375   (00100111, 11001100)
0.109375   (00100110, 11001101)
0.109375   (00100101, 11001110)
0.109375   (00100100, 11001111)
0.109375   (00100011, 11001000)
0.109375   (00100010, 11001001)
0.109375   (00100001, 11001010)
0.109375   (00100000, 11001011)
0.1015625  (11110111, 10110101)
0.1015625  (11110110, 10110100)
0.1015625  (11110101, 10110111)
0.1015625  (11110100, 10110110)
0.1015625  (11110011, 10110001)
0.1015625  (11110010, 10110000)
0.1015625  (11110001, 10110011)
0.1015625  (11110000, 10110010)
0.1015625  (11100111, 00010110)
0.1015625  (11100110, 00010111)
0.1015625  (11100101, 00010100)
0.1015625  (11100100, 00010101)
0.1015625  (11100011, 00010010)
0.1015625  (11100010, 00010011)
0.1015625  (11100001, 00010000)
0.1015625  (11100000, 00010001)
0.1015625  (11010111, 11010111)
0.1015625  (11010110, 11010110)
0.1015625  (11010101, 11010101)
0.1015625  (11010100, 11010100)
0.1015625  (11010011, 11010011)
0.1015625  (11010010, 11010010)
0.1015625  (11010001, 11010001)
0.1015625  (11010000, 11010000)
0.1015625  (11000111, 11110100)
0.1015625  (11000110, 11110101)
0.1015625  (11000101, 11110110)
0.1015625  (11000100, 11110111)
0.1015625  (11000011, 11110000)
0.1015625  (11000010, 11110001)
0.1015625  (11000001, 11110010)
0.1015625  (11000000, 11110011)
0.1015625  (10110111, 10100100)
0.1015625  (10110111, 10000011)
0.1015625  (10110110, 10100101)
0.1015625  (10110110, 10000010)
0.1015625  (10110101, 10100110)
0.1015625  (10110101, 10000001)
0.1015625  (10110100, 10100111)
0.1015625  (10110100, 10000000)
0.1015625  (10110011, 10100000)
0.1015625  (10110011, 10000111)
0.1015625  (10110010, 10100001)
0.1015625  (10110010, 10000110)
0.1015625  (10110001, 10100010)
0.1015625  (10110001, 10000101)
0.1015625  (10110000, 10100011)
0.1015625  (10110000, 10000100)
0.1015625  (10101111, 11001110)
0.1015625  (10101110, 11001111)
0.1015625  (10101101, 11001100)
0.1015625  (10101100, 11001101)
0.1015625  (10101011, 11001010)
0.1015625  (10101010, 11001011)
0.1015625  (10101001, 11001000)
0.1015625  (10101000, 11001001)
0.1015625  (10010111, 00001011)
0.1015625  (10010110, 00001010)
0.1015625  (10010101, 00001001)
0.1015625  (10010100, 00001000)
0.1015625  (10010011, 00001111)
0.1015625  (10010010, 00001110)
0.1015625  (10010001, 00001101)
0.1015625  (10010000, 00001100)
0.1015625  (10000111, 01110100)
0.1015625  (10000110, 01110101)
0.1015625  (10000101, 01110110)
0.1015625  (10000100, 01110111)
0.1015625  (10000011, 01110000)
0.1015625  (10000010, 01110001)
0.1015625  (10000001, 01110010)
0.1015625  (10000000, 01110011)
0.1015625  (01010111, 10110011)
0.1015625  (01010110, 10110010)
0.1015625  (01010101, 10110001)
0.1015625  (01010100, 10110000)
0.1015625  (01010011, 10110111)
0.1015625  (01010010, 10110110)
0.1015625  (01010001, 10110101)
0.1015625  (01010000, 10110100)
0.1015625  (01000111, 10000111)
0.1015625  (01000110, 10000110)
0.1015625  (01000101, 10000101)
0.1015625  (01000100, 10000100)
0.1015625  (01000011, 10000011)
0.1015625  (01000010, 10000010)
0.1015625  (01000001, 10000001)
0.1015625  (01000000, 10000000)
0.1015625  (00101111, 00001111)
0.1015625  (00101111, 00001110)
0.1015625  (00101110, 00001111)
0.1015625  (00101110, 00001110)
0.1015625  (00101101, 00001101)
0.1015625  (00101101, 00001100)
0.1015625  (00101100, 00001101)
0.1015625  (00101100, 00001100)
0.1015625  (00101011, 00001011)
0.1015625  (00101011, 00001010)
0.1015625  (00101010, 00001011)
0.1015625  (00101010, 00001010)
0.1015625  (00101001, 00001001)
0.1015625  (00101001, 00001000)
0.1015625  (00101000, 00001001)
0.1015625  (00101000, 00001000)
0.1015625  (00001111, 11010011)
0.1015625  (00001110, 11010010)
0.1015625  (00001101, 11010001)
0.1015625  (00001100, 11010000)
0.1015625  (00001011, 11010111)
0.1015625  (00001010, 11010110)
0.1015625  (00001001, 11010101)
0.1015625  (00001000, 11010100)

There are several approximations that involve few bits and have a bias of at least 0.1. If we can concatenate some of them together, we would be able to construct an approximation for the whole cipher with a bias that can be detected with some thousands of known plaintexts. After hours of staring at my hand-drawn diagram of the cipher, I was able to construct approximations involving the plaintext bits and each of the input bytes for the last layer of sboxes. This lets us recover the last round key, since we can guess each of the bytes and the approximations will only have high bias if our guess is correct. I wrote my code in C++ and made Python bindings with pybind since I thought that I might have to guess multiple bytes at a time, but that turned out to not be necessary.

#include <algorithm>
#include <array>
#include <cmath>
#include <cstdint>
#include <optional>
#include <sstream>
#include <stdexcept>
#include <utility>
#include <vector>

#include <pybind11/pybind11.h>
#include <pybind11/stl.h>

namespace {
using u8 = uint8_t;
using u32 = uint32_t;
using Key = std::array<u32, 5>;

constexpr u8 sbox[256] = {
    32,  1,   82,  147, 4,   165, 198, 95,  56,  9,   138, 59,  12,  93,  142,
    55,  8,   33,  42,  203, 100, 77,  230, 103, 136, 41,  162, 131, 44,  101,
    46,  7,   176, 25,  114, 3,   28,  37,  30,  79,  0,   113, 98,  171, 20,
    125, 118, 111, 40,  169, 26,  91,  196, 69,  150, 15,  48,  145, 18,  99,
    156, 117, 166, 71,  16,  97,  90,  19,  140, 29,  78,  63,  160, 185, 74,
    51,  148, 45,  126, 151, 88,  225, 170, 11,  84,  53,  22,  87,  224, 153,
    218, 43,  52,  197, 94,  47,  168, 161, 34,  227, 76,  141, 174, 23,  128,
    49,  154, 67,  60,  205, 70,  31,  120, 249, 66,  155, 236, 181, 62,  143,
    152, 57,  2,   219, 36,  157, 54,  223, 112, 241, 146, 179, 68,  21,  6,
    39,  72,  209, 122, 187, 252, 5,   214, 199, 144, 129, 130, 107, 180, 237,
    246, 119, 24,  105, 106, 83,  220, 13,  158, 239, 104, 73,  250, 115, 164,
    85,  110, 135, 208, 89,  50,  139, 108, 173, 14,  215, 184, 81,  234, 35,
    92,  149, 238, 127, 64,  17,  178, 243, 228, 253, 86,  159, 240, 233, 226,
    235, 116, 245, 102, 183, 232, 177, 194, 251, 124, 61,  190, 191, 216, 137,
    58,  123, 132, 229, 134, 247, 192, 65,  210, 163, 172, 189, 38,  231, 200,
    193, 10,  195, 244, 133, 222, 167, 96,  201, 186, 211, 204, 213, 182, 207,
    80,  217, 202, 27,  188, 109, 206, 255, 248, 121, 242, 75,  212, 221, 254,
    175};

constexpr u8 inv_sbox[256] = {
    40,  1,   122, 35,  4,   141, 134, 31,  16,  9,   226, 83,  12,  157, 174,
    55,  64,  185, 58,  67,  44,  133, 86,  103, 152, 33,  50,  243, 36,  69,
    38,  111, 0,   17,  98,  179, 124, 37,  222, 135, 48,  25,  18,  91,  28,
    77,  30,  95,  56,  105, 170, 75,  92,  85,  126, 15,  8,   121, 210, 11,
    108, 205, 118, 71,  184, 217, 114, 107, 132, 53,  110, 63,  136, 161, 74,
    251, 100, 21,  70,  39,  240, 177, 2,   155, 84,  165, 190, 87,  80,  169,
    66,  51,  180, 13,  94,  7,   232, 65,  42,  59,  20,  29,  198, 23,  160,
    153, 154, 147, 172, 245, 166, 47,  128, 41,  34,  163, 196, 61,  46,  151,
    112, 249, 138, 211, 204, 45,  78,  183, 104, 145, 146, 27,  212, 229, 214,
    167, 24,  209, 10,  171, 68,  101, 14,  119, 144, 57,  130, 3,   76,  181,
    54,  79,  120, 89,  106, 115, 60,  125, 158, 191, 72,  97,  26,  219, 164,
    5,   62,  231, 96,  49,  82,  43,  220, 173, 102, 255, 32,  201, 186, 131,
    148, 117, 238, 199, 176, 73,  234, 139, 244, 221, 206, 207, 216, 225, 202,
    227, 52,  93,  6,   143, 224, 233, 242, 19,  236, 109, 246, 239, 168, 137,
    218, 235, 252, 237, 142, 175, 208, 241, 90,  123, 156, 253, 230, 127, 88,
    81,  194, 99,  188, 213, 22,  223, 200, 193, 178, 195, 116, 149, 182, 159,
    192, 129, 250, 187, 228, 197, 150, 215, 248, 113, 162, 203, 140, 189, 254,
    247};

u32 sub(const u32 x) {
  u32 y = 0;
  for (auto i = 0; i < 32; i += 8) {
    y |= static_cast<u32>(sbox[x >> i & 0xff]) << i;
  }
  return y;
}

u32 inv_sub(const u32 x) {
  u32 y = 0;
  for (auto i = 0; i < 32; i += 8) {
    y |= static_cast<u32>(inv_sbox[x >> i & 0xff]) << i;
  }
  return y;
}

u32 shuffle_bits(const u32 x) {
  u32 y = 0;
  for (auto i = 0; i < 4; ++i) {
    for (auto j = 0; j < 8; ++j) {
      y |= (x >> (i * 8 + j) & 1) << (j * 4 + i);
    }
  }
  return y;
}

u32 inv_shuffle_bits(const u32 x) {
  u32 y = 0;
  for (auto i = 0; i < 8; ++i) {
    for (auto j = 0; j < 4; ++j) {
      y |= (x >> (i * 4 + j) & 1) << (j * 8 + i);
    }
  }
  return y;
}

u8 unshuffle_byte(const u32 x, const int byte_index) {
  u8 y = 0;
  for (auto i = 0; i < 8; ++i) {
    y |= (x >> (i * 4 + byte_index) & 1) << i;
  }
  return y;
}

using Data = std::vector<std::pair<u32, u32>>;
using Check = bool (*)(u32, u32, u8);

double compute_bias(const Data &data, const Check check, const u8 k) {
  std::size_t count = 0;
  for (const auto &d : data) {
    if (check(d.first, d.second, k)) {
      ++count;
    }
  }
  const auto prob = static_cast<double>(count) / data.size();
  return std::abs(prob - 0.5);
}

u8 recover_byte(const Data &data, const Check check) {
  double max_bias = 0;
  u8 best_k = 0;
  for (u8 k = 0;; ++k) {
    const auto b = compute_bias(data, check, k);
    if (b > max_bias) {
      max_bias = b;
      best_k = k;
    }
    if (k == 255) {
      break;
    }
  }
  return best_k;
}

bool check4_0(const u32 pt, const u32 ct, const u8 k) {
  const bool p2 = pt >> 2 & 1;
  const auto ub3_0 = inv_sbox[(ct & 0xff) ^ k];
  const bool u3_4 = ub3_0 >> 4 & 1;
  return p2 != u3_4;
}

bool check4_1(const u32 pt, const u32 ct, const u8 k) {
  const bool p8 = pt >> 8 & 1, p15 = pt >> 15 & 1;
  const auto ub3_1 = inv_sbox[(ct >> 8 & 0xff) ^ k];
  const bool u3_8 = ub3_1 & 1, u3_12 = ub3_1 >> 4 & 1;
  return (p8 != p15) != (u3_8 != u3_12);
}

bool check4_2(const u32 pt, const u32 ct, const u8 k) {
  const bool p0 = pt & 1, p2 = pt >> 2 & 1, p15 = pt >> 15 & 1;
  const auto ub3_2 = inv_sbox[(ct >> 16 & 0xff) ^ k];
  const bool u3_16 = ub3_2 & 1, u3_20 = ub3_2 >> 4 & 1, u3_24 = ct >> 24 & 1;
  return ((p0 != p2) != p15) != ((u3_16 != u3_20) != u3_24);
}

bool check4_3(const u32 pt, const u32 ct, const u8 k) {
  const bool p15 = pt >> 15 & 1;
  const auto ub3_3 = inv_sbox[(ct >> 24 & 0xff) ^ k];
  const bool u3_28 = ub3_3 >> 4 & 1;
  return p15 != u3_28;
}

u32 recover4(const Data &data) {
  return static_cast<u32>(recover_byte(data, check4_0)) |
         static_cast<u32>(recover_byte(data, check4_1)) << 8 |
         static_cast<u32>(recover_byte(data, check4_2)) << 16 |
         static_cast<u32>(recover_byte(data, check4_3)) << 24;
}

I used 32-bit unsigned ints to store blocks, and std::vector<std::pair<u32, u32>> to store pairs of known plaintexts and corresponding ciphertexts. Once I recovered the last round key, I can decrypt the last round:

const auto k4 = recover4(data);
for (auto &p : data) {
  p.second = inv_sub(p.second ^ k4);
}

Then I just have to repeat this for the other rounds:

bool check3_0(const u32 pt, const u32 ct, const u8 k) {
  const bool p8 = pt >> 8 & 1;
  const auto ub2_0 = inv_sbox[unshuffle_byte(ct, 0) ^ k];
  const bool u2_4 = ub2_0 >> 4;
  return p8 != u2_4;
}

bool check3_1(const u32 pt, const u32 ct, const u8 k) {
  const bool p1 = pt >> 1 & 1, p25 = pt >> 25 & 1;
  const auto ub2_1 = inv_sbox[unshuffle_byte(ct, 1) ^ k];
  const bool u2_8 = ub2_1 & 1, u2_12 = ub2_1 >> 4 & 1;
  return (p1 != p25) != (u2_8 != u2_12);
}

bool check3_2(const u32 pt, const u32 ct, const u8 k) {
  const bool p0 = pt & 1, p8 = pt >> 8 & 1, p25 = pt >> 25 & 1;
  const auto ub2_2 = inv_sbox[unshuffle_byte(ct, 2) ^ k];
  const bool u2_16 = ub2_2 & 1, u2_20 = ub2_2 >> 4 & 1, u2_24 = ct >> 3 & 1;
  return ((p0 != p8) != p25) != ((u2_16 != u2_20) != u2_24);
}

bool check3_3(const u32 pt, const u32 ct, const u8 k) {
  const bool p25 = pt >> 25 & 1;
  const auto ub2_3 = inv_sbox[unshuffle_byte(ct, 3) ^ k];
  const bool u2_28 = ub2_3 >> 4 & 1;
  return p25 != u2_28;
}

u32 recover3(const Data &data) {
  return shuffle_bits(static_cast<u32>(recover_byte(data, check3_0)) |
                      static_cast<u32>(recover_byte(data, check3_1)) << 8 |
                      static_cast<u32>(recover_byte(data, check3_2)) << 16 |
                      static_cast<u32>(recover_byte(data, check3_3)) << 24);
}

bool check2_0(const u32 pt, const u32 ct, const u8 k) {
  const bool p1 = pt >> 1 & 1;
  const auto ub1_0 = inv_sbox[unshuffle_byte(ct, 0) ^ k];
  const bool u1_4 = ub1_0 >> 4 & 1;
  return p1 != u1_4;
}

bool check2_1(const u32 pt, const u32 ct, const u8 k) {
  const bool p4 = pt >> 4 & 1, p7 = pt >> 7 & 1;
  const auto ub1_1 = inv_sbox[unshuffle_byte(ct, 1) ^ k];
  const bool u1_8 = ub1_1 & 1, u1_12 = ub1_1 >> 4 & 1;
  return (p4 != p7) != (u1_8 != u1_12);
}

bool check2_2(const u32 pt, const u32 ct, const u8 k) {
  const bool p0 = pt & 1, p1 = pt >> 1 & 1, p7 = pt >> 7 & 1;
  const auto ub1_2 = inv_sbox[unshuffle_byte(ct, 2) ^ k];
  const bool u1_16 = ub1_2 & 1, u1_20 = ub1_2 >> 4 & 1, u1_24 = ct >> 3 & 1;
  return ((p0 != p1) != p7) != ((u1_16 != u1_20) != u1_24);
}

bool check2_3(const u32 pt, const u32 ct, const u8 k) {
  const bool p31 = pt >> 31 & 1;
  const auto ub1_3 = inv_sbox[unshuffle_byte(ct, 3) ^ k];
  const bool u1_31 = ub1_3 >> 7 & 1;
  return p31 != u1_31;
}

u32 recover2(const Data &data) {
  return shuffle_bits(static_cast<u32>(recover_byte(data, check2_0)) |
                      static_cast<u32>(recover_byte(data, check2_1)) << 8 |
                      static_cast<u32>(recover_byte(data, check2_2)) << 16 |
                      static_cast<u32>(recover_byte(data, check2_3)) << 24);
}

u8 recover_byte0(const u32 pt, const u32 ct, const int byte_index,
                 const u8 kb1) {
  u8 pb = pt >> (byte_index * 8) & 0xff;
  auto ub0 = inv_sbox[unshuffle_byte(ct, byte_index) ^ kb1];
  return pb ^ ub0;
}

std::optional<u8> check1(const Data &data, const int byte_index, const u8 kb1) {
  const auto kb0 =
      recover_byte0(data.front().first, data.front().second, byte_index, kb1);
  if (!std::all_of(data.begin() + 1, data.end(), [&](const auto &p) {
        return recover_byte0(p.first, p.second, byte_index, kb1) == kb0;
      })) {
    return {};
  }
  return kb0;
}

std::pair<u8, u8> recover_byte0_1(const Data &data, const int byte_index) {
  for (u8 kb1 = 0;; ++kb1) {
    if (const auto result = check1(data, byte_index, kb1)) {
      return {*result, kb1};
    }
    if (kb1 == 255) {
      break;
    }
  }
  std::ostringstream ss;
  ss << "failed to recover byte " << byte_index << " of the first round key";
  throw std::invalid_argument(ss.str());
}

std::pair<u32, u32> recover0_1(const Data &data) {
  u32 k0 = 0, unshuffled_k1 = 0;
  for (auto i = 0; i < 4; ++i) {
    auto result = recover_byte0_1(data, i);
    k0 |= static_cast<u32>(result.first) << (i * 8);
    unshuffled_k1 |= static_cast<u32>(result.second) << (i * 8);
  }
  return {k0, shuffle_bits(unshuffled_k1)};
}

Key recover_key(Data data) {
  if (data.empty()) {
    throw std::invalid_argument("data is empty");
  }
  const auto k4 = recover4(data);
  for (auto &p : data) {
    p.second = inv_sub(p.second ^ k4);
  }
  const auto k3 = recover3(data);
  for (auto &p : data) {
    p.second = inv_sub(inv_shuffle_bits(p.second ^ k3));
  }
  const auto k2 = recover2(data);
  for (auto &p : data) {
    p.second = inv_sub(inv_shuffle_bits(p.second ^ k2));
  }
  const auto k0_1 = recover0_1(data);
  return {k0_1.first, k0_1.second, k2, k3, k4};
}

After recovering the key, we can decrypt the challenge ciphertext and obtain the flag. I found that the attack requires around 30,000 known plaintexts to work reliably. Here’s the full C++ code:

#include <algorithm>
#include <array>
#include <cmath>
#include <cstdint>
#include <optional>
#include <sstream>
#include <stdexcept>
#include <utility>
#include <vector>

#include <pybind11/pybind11.h>
#include <pybind11/stl.h>

namespace {
using u8 = uint8_t;
using u32 = uint32_t;
using Key = std::array<u32, 5>;

constexpr u8 sbox[256] = {
    32,  1,   82,  147, 4,   165, 198, 95,  56,  9,   138, 59,  12,  93,  142,
    55,  8,   33,  42,  203, 100, 77,  230, 103, 136, 41,  162, 131, 44,  101,
    46,  7,   176, 25,  114, 3,   28,  37,  30,  79,  0,   113, 98,  171, 20,
    125, 118, 111, 40,  169, 26,  91,  196, 69,  150, 15,  48,  145, 18,  99,
    156, 117, 166, 71,  16,  97,  90,  19,  140, 29,  78,  63,  160, 185, 74,
    51,  148, 45,  126, 151, 88,  225, 170, 11,  84,  53,  22,  87,  224, 153,
    218, 43,  52,  197, 94,  47,  168, 161, 34,  227, 76,  141, 174, 23,  128,
    49,  154, 67,  60,  205, 70,  31,  120, 249, 66,  155, 236, 181, 62,  143,
    152, 57,  2,   219, 36,  157, 54,  223, 112, 241, 146, 179, 68,  21,  6,
    39,  72,  209, 122, 187, 252, 5,   214, 199, 144, 129, 130, 107, 180, 237,
    246, 119, 24,  105, 106, 83,  220, 13,  158, 239, 104, 73,  250, 115, 164,
    85,  110, 135, 208, 89,  50,  139, 108, 173, 14,  215, 184, 81,  234, 35,
    92,  149, 238, 127, 64,  17,  178, 243, 228, 253, 86,  159, 240, 233, 226,
    235, 116, 245, 102, 183, 232, 177, 194, 251, 124, 61,  190, 191, 216, 137,
    58,  123, 132, 229, 134, 247, 192, 65,  210, 163, 172, 189, 38,  231, 200,
    193, 10,  195, 244, 133, 222, 167, 96,  201, 186, 211, 204, 213, 182, 207,
    80,  217, 202, 27,  188, 109, 206, 255, 248, 121, 242, 75,  212, 221, 254,
    175};

constexpr u8 inv_sbox[256] = {
    40,  1,   122, 35,  4,   141, 134, 31,  16,  9,   226, 83,  12,  157, 174,
    55,  64,  185, 58,  67,  44,  133, 86,  103, 152, 33,  50,  243, 36,  69,
    38,  111, 0,   17,  98,  179, 124, 37,  222, 135, 48,  25,  18,  91,  28,
    77,  30,  95,  56,  105, 170, 75,  92,  85,  126, 15,  8,   121, 210, 11,
    108, 205, 118, 71,  184, 217, 114, 107, 132, 53,  110, 63,  136, 161, 74,
    251, 100, 21,  70,  39,  240, 177, 2,   155, 84,  165, 190, 87,  80,  169,
    66,  51,  180, 13,  94,  7,   232, 65,  42,  59,  20,  29,  198, 23,  160,
    153, 154, 147, 172, 245, 166, 47,  128, 41,  34,  163, 196, 61,  46,  151,
    112, 249, 138, 211, 204, 45,  78,  183, 104, 145, 146, 27,  212, 229, 214,
    167, 24,  209, 10,  171, 68,  101, 14,  119, 144, 57,  130, 3,   76,  181,
    54,  79,  120, 89,  106, 115, 60,  125, 158, 191, 72,  97,  26,  219, 164,
    5,   62,  231, 96,  49,  82,  43,  220, 173, 102, 255, 32,  201, 186, 131,
    148, 117, 238, 199, 176, 73,  234, 139, 244, 221, 206, 207, 216, 225, 202,
    227, 52,  93,  6,   143, 224, 233, 242, 19,  236, 109, 246, 239, 168, 137,
    218, 235, 252, 237, 142, 175, 208, 241, 90,  123, 156, 253, 230, 127, 88,
    81,  194, 99,  188, 213, 22,  223, 200, 193, 178, 195, 116, 149, 182, 159,
    192, 129, 250, 187, 228, 197, 150, 215, 248, 113, 162, 203, 140, 189, 254,
    247};

u32 sub(const u32 x) {
  u32 y = 0;
  for (auto i = 0; i < 32; i += 8) {
    y |= static_cast<u32>(sbox[x >> i & 0xff]) << i;
  }
  return y;
}

u32 inv_sub(const u32 x) {
  u32 y = 0;
  for (auto i = 0; i < 32; i += 8) {
    y |= static_cast<u32>(inv_sbox[x >> i & 0xff]) << i;
  }
  return y;
}

u32 shuffle_bits(const u32 x) {
  u32 y = 0;
  for (auto i = 0; i < 4; ++i) {
    for (auto j = 0; j < 8; ++j) {
      y |= (x >> (i * 8 + j) & 1) << (j * 4 + i);
    }
  }
  return y;
}

u32 inv_shuffle_bits(const u32 x) {
  u32 y = 0;
  for (auto i = 0; i < 8; ++i) {
    for (auto j = 0; j < 4; ++j) {
      y |= (x >> (i * 4 + j) & 1) << (j * 8 + i);
    }
  }
  return y;
}

u8 unshuffle_byte(const u32 x, const int byte_index) {
  u8 y = 0;
  for (auto i = 0; i < 8; ++i) {
    y |= (x >> (i * 4 + byte_index) & 1) << i;
  }
  return y;
}

using Data = std::vector<std::pair<u32, u32>>;
using Check = bool (*)(u32, u32, u8);

double compute_bias(const Data &data, const Check check, const u8 k) {
  std::size_t count = 0;
  for (const auto &d : data) {
    if (check(d.first, d.second, k)) {
      ++count;
    }
  }
  const auto prob = static_cast<double>(count) / data.size();
  return std::abs(prob - 0.5);
}

u8 recover_byte(const Data &data, const Check check) {
  double max_bias = 0;
  u8 best_k = 0;
  for (u8 k = 0;; ++k) {
    const auto b = compute_bias(data, check, k);
    if (b > max_bias) {
      max_bias = b;
      best_k = k;
    }
    if (k == 255) {
      break;
    }
  }
  return best_k;
}

bool check4_0(const u32 pt, const u32 ct, const u8 k) {
  const bool p2 = pt >> 2 & 1;
  const auto ub3_0 = inv_sbox[(ct & 0xff) ^ k];
  const bool u3_4 = ub3_0 >> 4 & 1;
  return p2 != u3_4;
}

bool check4_1(const u32 pt, const u32 ct, const u8 k) {
  const bool p8 = pt >> 8 & 1, p15 = pt >> 15 & 1;
  const auto ub3_1 = inv_sbox[(ct >> 8 & 0xff) ^ k];
  const bool u3_8 = ub3_1 & 1, u3_12 = ub3_1 >> 4 & 1;
  return (p8 != p15) != (u3_8 != u3_12);
}

bool check4_2(const u32 pt, const u32 ct, const u8 k) {
  const bool p0 = pt & 1, p2 = pt >> 2 & 1, p15 = pt >> 15 & 1;
  const auto ub3_2 = inv_sbox[(ct >> 16 & 0xff) ^ k];
  const bool u3_16 = ub3_2 & 1, u3_20 = ub3_2 >> 4 & 1, u3_24 = ct >> 24 & 1;
  return ((p0 != p2) != p15) != ((u3_16 != u3_20) != u3_24);
}

bool check4_3(const u32 pt, const u32 ct, const u8 k) {
  const bool p15 = pt >> 15 & 1;
  const auto ub3_3 = inv_sbox[(ct >> 24 & 0xff) ^ k];
  const bool u3_28 = ub3_3 >> 4 & 1;
  return p15 != u3_28;
}

u32 recover4(const Data &data) {
  return static_cast<u32>(recover_byte(data, check4_0)) |
         static_cast<u32>(recover_byte(data, check4_1)) << 8 |
         static_cast<u32>(recover_byte(data, check4_2)) << 16 |
         static_cast<u32>(recover_byte(data, check4_3)) << 24;
}

bool check3_0(const u32 pt, const u32 ct, const u8 k) {
  const bool p8 = pt >> 8 & 1;
  const auto ub2_0 = inv_sbox[unshuffle_byte(ct, 0) ^ k];
  const bool u2_4 = ub2_0 >> 4;
  return p8 != u2_4;
}

bool check3_1(const u32 pt, const u32 ct, const u8 k) {
  const bool p1 = pt >> 1 & 1, p25 = pt >> 25 & 1;
  const auto ub2_1 = inv_sbox[unshuffle_byte(ct, 1) ^ k];
  const bool u2_8 = ub2_1 & 1, u2_12 = ub2_1 >> 4 & 1;
  return (p1 != p25) != (u2_8 != u2_12);
}

bool check3_2(const u32 pt, const u32 ct, const u8 k) {
  const bool p0 = pt & 1, p8 = pt >> 8 & 1, p25 = pt >> 25 & 1;
  const auto ub2_2 = inv_sbox[unshuffle_byte(ct, 2) ^ k];
  const bool u2_16 = ub2_2 & 1, u2_20 = ub2_2 >> 4 & 1, u2_24 = ct >> 3 & 1;
  return ((p0 != p8) != p25) != ((u2_16 != u2_20) != u2_24);
}

bool check3_3(const u32 pt, const u32 ct, const u8 k) {
  const bool p25 = pt >> 25 & 1;
  const auto ub2_3 = inv_sbox[unshuffle_byte(ct, 3) ^ k];
  const bool u2_28 = ub2_3 >> 4 & 1;
  return p25 != u2_28;
}

u32 recover3(const Data &data) {
  return shuffle_bits(static_cast<u32>(recover_byte(data, check3_0)) |
                      static_cast<u32>(recover_byte(data, check3_1)) << 8 |
                      static_cast<u32>(recover_byte(data, check3_2)) << 16 |
                      static_cast<u32>(recover_byte(data, check3_3)) << 24);
}

bool check2_0(const u32 pt, const u32 ct, const u8 k) {
  const bool p1 = pt >> 1 & 1;
  const auto ub1_0 = inv_sbox[unshuffle_byte(ct, 0) ^ k];
  const bool u1_4 = ub1_0 >> 4 & 1;
  return p1 != u1_4;
}

bool check2_1(const u32 pt, const u32 ct, const u8 k) {
  const bool p4 = pt >> 4 & 1, p7 = pt >> 7 & 1;
  const auto ub1_1 = inv_sbox[unshuffle_byte(ct, 1) ^ k];
  const bool u1_8 = ub1_1 & 1, u1_12 = ub1_1 >> 4 & 1;
  return (p4 != p7) != (u1_8 != u1_12);
}

bool check2_2(const u32 pt, const u32 ct, const u8 k) {
  const bool p0 = pt & 1, p1 = pt >> 1 & 1, p7 = pt >> 7 & 1;
  const auto ub1_2 = inv_sbox[unshuffle_byte(ct, 2) ^ k];
  const bool u1_16 = ub1_2 & 1, u1_20 = ub1_2 >> 4 & 1, u1_24 = ct >> 3 & 1;
  return ((p0 != p1) != p7) != ((u1_16 != u1_20) != u1_24);
}

bool check2_3(const u32 pt, const u32 ct, const u8 k) {
  const bool p31 = pt >> 31 & 1;
  const auto ub1_3 = inv_sbox[unshuffle_byte(ct, 3) ^ k];
  const bool u1_31 = ub1_3 >> 7 & 1;
  return p31 != u1_31;
}

u32 recover2(const Data &data) {
  return shuffle_bits(static_cast<u32>(recover_byte(data, check2_0)) |
                      static_cast<u32>(recover_byte(data, check2_1)) << 8 |
                      static_cast<u32>(recover_byte(data, check2_2)) << 16 |
                      static_cast<u32>(recover_byte(data, check2_3)) << 24);
}

u8 recover_byte0(const u32 pt, const u32 ct, const int byte_index,
                 const u8 kb1) {
  u8 pb = pt >> (byte_index * 8) & 0xff;
  auto ub0 = inv_sbox[unshuffle_byte(ct, byte_index) ^ kb1];
  return pb ^ ub0;
}

std::optional<u8> check1(const Data &data, const int byte_index, const u8 kb1) {
  const auto kb0 =
      recover_byte0(data.front().first, data.front().second, byte_index, kb1);
  if (!std::all_of(data.begin() + 1, data.end(), [&](const auto &p) {
        return recover_byte0(p.first, p.second, byte_index, kb1) == kb0;
      })) {
    return {};
  }
  return kb0;
}

std::pair<u8, u8> recover_byte0_1(const Data &data, const int byte_index) {
  for (u8 kb1 = 0;; ++kb1) {
    if (const auto result = check1(data, byte_index, kb1)) {
      return {*result, kb1};
    }
    if (kb1 == 255) {
      break;
    }
  }
  std::ostringstream ss;
  ss << "failed to recover byte " << byte_index << " of the first round key";
  throw std::invalid_argument(ss.str());
}

std::pair<u32, u32> recover0_1(const Data &data) {
  u32 k0 = 0, unshuffled_k1 = 0;
  for (auto i = 0; i < 4; ++i) {
    auto result = recover_byte0_1(data, i);
    k0 |= static_cast<u32>(result.first) << (i * 8);
    unshuffled_k1 |= static_cast<u32>(result.second) << (i * 8);
  }
  return {k0, shuffle_bits(unshuffled_k1)};
}

} // namespace

u32 encrypt_block(u32 x, const Key &key) {
  for (auto i = 0; i < 3; ++i) {
    x ^= key[i];
    x = shuffle_bits(sub(x));
  }
  x ^= key[3];
  x = sub(x);
  x ^= key[4];
  return x;
}

u32 decrypt_block(u32 x, const Key &key) {
  x ^= key[4];
  x = inv_sub(x);
  x ^= key[3];
  for (auto i = 2; i >= 0; --i) {
    x = inv_sub(inv_shuffle_bits(x));
    x ^= key[i];
  }
  return x;
}

Key recover_key(Data data) {
  if (data.empty()) {
    throw std::invalid_argument("data is empty");
  }
  const auto k4 = recover4(data);
  for (auto &p : data) {
    p.second = inv_sub(p.second ^ k4);
  }
  const auto k3 = recover3(data);
  for (auto &p : data) {
    p.second = inv_sub(inv_shuffle_bits(p.second ^ k3));
  }
  const auto k2 = recover2(data);
  for (auto &p : data) {
    p.second = inv_sub(inv_shuffle_bits(p.second ^ k2));
  }
  const auto k0_1 = recover0_1(data);
  return {k0_1.first, k0_1.second, k2, k3, k4};
}

PYBIND11_MODULE(seccom, m) {
  m.def("encrypt_block", &encrypt_block);
  m.def("decrypt_block", &decrypt_block);
  m.def("recover_key", &recover_key);
}

And the Python code that talks to the server and handles the CBC mode decryption:

#!/usr/bin/env python3

import random

from pwn import *

from seccom import *


def inttohex(x):
    return x.to_bytes(4, "big").hex()


def hextoint(x):
    return int.from_bytes(bytes.fromhex(x), "big")


r = remote("challs.htsp.ro", 10001)

data = []

for i in range(3):
    pts = []
    cts = []
    for j in range(10000):
        pt = random.getrandbits(32)
        pts.append(pt)
        r.sendline(b"1")
        r.sendline(inttohex(pt).encode())
    for j in range(10000):
        r.recvuntil(b"ct = ")
        ct = hextoint(r.recvlineS(keepends=False))
        cts.append(ct)
    data.extend(zip(pts, cts))

key = recover_key(data)
log.info(f"{key=}")

r.sendlineafter(b"cmd = ", b"2")
r.recvuntil(b"iv = ")
iv = hextoint(r.recvlineS(keepends=False))
r.recvuntil(b"ct = ")
ct = r.recvlineS(keepends=False)
ct = [hextoint(ct[i : i + 8]) for i in range(0, len(ct), 8)]
pt = [decrypt_block(ct[0], key) ^ iv] + [
    decrypt_block(ctb, key) ^ prev_ctb for ctb, prev_ctb in zip(ct[1:], ct[:-1])
]
r.sendlineafter(b"pt = ", "".join(map(inttohex, pt)).encode())

r.interactive()

The Python code sends 10,000 plaintexts at a time so that we don’t waste time waiting for 30,000 round trips. The flag was something like “differential cryptanalysis go brrr,” so I guess the intended solution was differential cryptanalysis. I think I spent around three days on this, which is probably the most that I’ve ever spent on a single challenge.