Binaries

Cześć

Mam na imię Michał

Programuję w C++

mrzechonek

Data

1 1 0 1 0 1 0 1
0 1 0 1 1 1 0 1
0 1 1 0 1 1 1 0
0 0 0 1 0 0 1 0
1 0 1 0 1 0 0 1
1 1 0 1 1 0 1 1
0 1 1 0 0 1 1 0
1 0 1 0 1 0 1 1
0 1 1 0 1 1 0 1
0 1 0 1 0 1 0 1
1 1 1 1 0 0 0 0
1 0 1 0 1 1 0 1

But... why?!

here's why!

When I use a word,

it means just what I choose it to mean

numbers

1 1 0 1 0 1 0 1
0 1 0 1 1 1 0 1
0 1 1 0 1 1 1 0
0 0 0 1 0 0 1 0
 3579670034
int('11010101'
    '01011101'
    '01101110'
    '00010010', 2)

endians

1 1 0 1 0 1 0 1
0 1 0 1 1 1 0 1
0 1 1 0 1 1 1 0
0 0 0 1 0 0 1 0
309222869
int('00010010'
    '01101110'
    '01011101'
    '11010101', 2)

arrays

1 1 0 1 0 1 0 1
[18, 110, 93, 213]
[int('00010010', 2),
 int('01101110', 2),
 int('01011101', 2),
 int('11010101', 2)]
0 1 0 1 1 1 0 1
0 1 1 0 1 1 1 0
0 0 0 1 0 0 1 0

structures

1 1 0 1 0 1 0 1
0 1 0 1 1 1 0 1
1 1 0 1 0 1 0 1
(54621, 110, 18)
(int('11010101'
     '01011101', 2),
 int('01101110', 2),
 int('00010010', 2))
0 1 0 1 1 1 0 1

bit fields

(13, 21974, 3602)
(int('1101', 2),
 int(    '0101'
     '01011101'
     '0110', 2),
 int(    '1110'
     '00010010', 2))
1 1 0 1
1 1 0 1
0 1 0 1
1 1 0 1
0 1 0 1
1 1 0 1
0 1 0 1
1 1 0 1

C: struct

typedef struct
{
    unsigned version: 4;
    unsigned header_length: 4;
    unsigned services: 6;
    unsigned congestion: 2;
    uint16_t total_length;
    uint16_t identification;
    unsigned flags: 3;
    unsigned fragment: 13;
    uint8_t  time_to_live;
    uint8_t  protocol;
    uint16_t header_checksum;
    uint8_t  source[4];
    uint8_t  destination[4];
} __attribute__((packed)) IPv4Header;

C: struct

char *data = "\x45\x00\x00\x54\x17\x61\x40\x00"
             "\x40\x01\x21\x37\xc0\xa8\x0b\x12"
             "\xd4\x4d\x62\x09";

IPv4Header *header = (IPv4Header*)data;

printf("total length: %hu\n",
       ntohs(header->total_length));

Python: struct

header = struct.unpack('!' # network endian
    'B'   # version, header_length
    'B'   # services, congestion
    'H'   # total_length
    'H'   # identification
    'H'   # flags, fragment
    'B'   # time_to_live
    'B'   # protocol
    'H'   # header_checksum
    '4B'  # source
    '4B', # destination
    data)

print("total length:", header[2])

Python: bitstring

header = bitstring.Bits(data).unpack(','.join((
    'uint:4',    # version
    'uint:4',    # header_length
    'uint:6',    # services
    'uint:2',    # congestion
    'uintbe:16', # total_length
    'uintbe:16', # identification
    'uint:3',    # flags
    'uint:13',   # fragment
    'uint:8',    # time_to_live
    'uint:8',    # protocol
    'uintbe:16', # header_checksum
    'bytes:4',   # source
    'bytes:4',   # destination
)))

print("total length:", header[4])

Python: CTYPES

class IPv4Header(ctypes.BigEndianStructure):
    _fields_ = (
        ('version', ctypes.c_uint, 4),
        ('header_length', ctypes.c_uint, 4),
        ('services', ctypes.c_uint, 6),
        ('congestion', ctypes.c_uint, 2),
        ('total_length', ctypes.c_uint, 16),
        ('identification', ctypes.c_uint, 16),
        ('flags', ctypes.c_uint, 3),
        ('fragment', ctypes.c_uint, 13),
        ('time_to_live', ctypes.c_uint, 8),
        ('protocol', ctypes.c_uint, 8),
        ('header_checksum', ctypes.c_uint, 16),
        ('source', ctypes.c_byte * 4),
        ('destination', ctypes.c_byte * 4)
    )

header = ctypes.cast(data,
                     ctypes.POINTER(IPv4Header)).contents

print("total length:", header.total_length)

There must be a better way

That reminds me of something...

class Person(models.Model): # Django
    name = models.CharField(max_length=42)
    age = models.IntegerField()


class CommentSerializer(serializers.Serializer): # DRF
    email = serializers.EmailField()
    content = serializers.CharField(max_length=200)
    created = serializers.DateTimeField()


class UserSchema(Schema): # marshmallow
    name = fields.Str()
    email = fields.Email()
    created_at = fields.DateTime()

wishful thinking driven development

class IPv4Header(BitStruct):
    version = Unsigned(4)
    header_length = Unsigned(4)
    services = Unsigned(6)
    congestion = Unsigned(2)
    total_length = Unsigned(16)
    identification = Unsigned(16)
    flags = Unsigned(3)
    fragment = Unsigned(13)
    time_to_live = Unsigned(8)
    protocol = Unsigned(8)
    header_checksum = Unsigned(16)
    source = Bytes(4)
    destination = Bytes(4)

WTDD: fields

class BitStruct(ctypes.BigEndianStructure):
    # ...


class Field:
    pass


class Unsigned(Field):
    def __init__(self, width):
        self.spec = (ctypes.c_uint, width)


class Bytes(Field):
    def __init__(self, length):
        self.spec = (ctypes.c_bytes * length, )

WTDD: applied wizardry

class BitStructMeta(type(ctypes.BigEndianStructure)):
    def __prepare__(name, bases):
        # order matters!
        return OrderedDict()

    def __new__(mcs, name, bases, attrs, **kwargs):
        cls = super().__new__(mcs, name, bases, attrs,
                              **kwargs)

        cls._fields_ = [(k, ) + v.spec
                         for k, v in attrs.items()
                         if isinstance(v, Field)]

        return cls

WTDD: APPLIED WIZARDRY

class BitStruct(ctypes.BigEndianStructure,
                metaclass=BitStructMeta):
    # ...


class IPv4Header(BitStruct):
    version = Unsigned(4)
    header_length = Unsigned(4)
    services = Unsigned(6)
    congestion = Unsigned(2)
    total_length = Unsigned(16)
    # ...

WTDD: THERE

class BitStruct(ctypes.BigEndianStructure,
                metaclass=BitStructMeta):
    # ...

    @classmethod
    def unpack(cls, data):
        return ctypes.cast(data, ctypes.POINTER(cls)).contents






data = (b"\x45\x00\x00\x54\x17\x61\x40\x00"
        b"\x40\x01\x21\x37\xc0\xa8\x0b\x12"
        b"\xd4\x4d\x62\x09")

header = IPv4Header.unpack(data)
print("total length:", header.total_length)

WTDD: ...and back again

class BitStruct(ctypes.BigEndianStructure,
                metaclass=BitStructMeta):
    # ...


    def pack(self):
        buffer = ctypes.POINTER(ctypes.c_char *
                                ctypes.sizeof(self))

        return ctypes.cast(ctypes.pointer(self),
                           buffer).contents.raw


data = (b"\x45\x00\x00\x54\x17\x61\x40\x00"
        b"\x40\x01\x21\x37\xc0\xa8\x0b\x12"
        b"\xd4\x4d\x62\x09")

header = IPv4Header.unpack(data)
assert header.pack() == data
data = (b"\x45\x00\x00\x54\x17\x61\x40\x00"
        b"\x40\x01\x21\x37\xc0\xa8\x0b\x12"
        b"\xd4\x4d\x62\x09")

data[1] = bitstring.pack('uint:6, uint:2', 1, 0).bytes

NOTE ON Writable data

Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
TypeError: 'bytes' object does not support item assignment
data = bytearray(b'\x01\x02\x03\x04')

data[2:2] = b'\xca\xfe\xba\xbe'

assert data == b'\x01\x02\xca\xfe\xba\xbe\x03\x04'

SWIG

 when all fails:
Real men code in C

heatshrink_decoder *heatshrink_decoder_alloc(
    uint16_t input_buffer_size,
    uint8_t expansion_buffer_sz2,
    uint8_t lookahead_sz2);

HSD_sink_res heatshrink_decoder_sink(
    heatshrink_decoder *hsd,
    uint8_t *in_buf, size_t size, size_t *input_size);

HSD_poll_res heatshrink_decoder_poll(
    heatshrink_decoder *hsd,
    uint8_t *out_buf, size_t out_buf_size, size_t *output_size);

HSD_finish_res heatshrink_decoder_finish(
    heatshrink_decoder *hsd);

void heatshrink_decoder_free(heatshrink_decoder *hsd);

SWIG

Simplified Wrapper & interface generator

%module heatshrink
%{
#include "heatshrink_decoder.h"
%}

%rename(Decoder) heatshrink_decoder;

typedef struct {
    %extend {
        heatshrink_decoder(uint16_t bufsize = 256,
                           uint8_t window = 8,
                           uint8_t lookahead = 4);

        HSD_sink_res sink(uint8_t *input,
                          size_t insize,
                          size_t *input_size);

        HSD_poll_res poll(uint8_t *output,
                          size_t outsize,
                          size_t *output_size);

        HSD_finish_res finish();

        ~heatshrink_decoder();
    }
} heatshrink_decoder;

SWIG

SWIG

%typemap(out) HSD_sink_res {
    switch($1) {
        case HSDR_SINK_OK: $result = Py_True; break;
        case HSDR_SINK_FULL: $result = Py_False; break;
        case HSDR_SINK_ERROR_NULL:
            SWIG_exception(SWIG_RuntimeError, "Error");
        break;
    }
}

%typemap(in) (uint8_t *input, size_t insize) {
    if(!PyBytes_Check($input)) {
       PyErr_SetString(PyExc_ValueError, "Expecting a bytes object");
       return NULL;
    }
    $1 = PyBytes_AsString($input);
    $2 = PyBytes_Size($input);
}

%include "typemaps.i"
%apply size_t *OUTPUT { size_t *output_size };
HSD_sink_res sink(uint8_t *input, size_t insize, size_t *input_size);

SWIG

from heatshrink import Decoder

decoder = Decoder()

result, sunk = decoder.sink(b'\xde\xad\xbe\xef')
HSD_sink_res sink(uint8_t *input, size_t insize, size_t *input_size);

SWIG

HSD_poll_res poll(uint8_t *output, size_t outsize, size_t *output_size);
%typemap(in) (uint8_t *output, size_t outsize)  {
   if (!PyInt_Check($input) || PyInt_AsLong($input) < 0) {
       PyErr_SetString(PyExc_ValueError, "Expecting a positive integer");
       return NULL;
   }
   $2 = PyInt_AsLong($input);
   $1 = (uint8_t*)malloc($2);
}

%typemap(freearg) (uint8_t *output, size_t outsize)  {
    free($1);
}

%typemap(argout) (uint8_t *output, size_t outsize, size_t *output_size)  {
    $result = SWIG_Python_AppendOutput($result,
                                       PyBytes_FromStringAndSize($1,*$3));
}

SWIG

from heatshrink import Decoder

decoder = Decoder()

result, sunk = decoder.sink(b'\xde\xad\xbe\xef')

more, polled = decoder.poll(256)
HSD_sink_res sink(uint8_t *input, size_t insize, size_t *input_size);

you don't need C to look smart

* at least not much of it

THANK YOU

Binaries @ Pykonik 11.2016

By Michał Lowas-Rzechonek

Binaries @ Pykonik 11.2016

Processing binary and low-level data in Python

  • 194
Loading comments...

More from Michał Lowas-Rzechonek