Writing parsers in Rust with combinators

Motivation

We're creating a bencode parser for our BitTorrent client, matey.

Potential solutions

regex

manual

generators

Parser combinators

let needle_parser = tag("needle");

needle_parser("needlehaystack"); // => Ok(("haystack", "needle"))

use nom::multi::many0_count;

let count_needles = many0_count(needle_parser);

count_needles("haystack"); // => Ok(("haystack", 0))
count_needles("needleneedleneedleneedlehaystack");
// => Ok(("haystack", 4))
use nom::sequence::pair;
use nom::multi::many0_count;

let hello_world = pair(tag("hello"), tag("world"));

hello_world("helloworld"); // => Ok(("", "helloworld"))
many0_count(hello_world)("helloworldhelloworldhelloworld"); // => Ok(("", 3))

Bencode

d
    8:announce 46:http://torrent.fedoraproject.org:6969/announce
    13:creation date i1587996226e 
    4:infod
    	5:files l
            d
               6:length i1030e 
               4:path l
                 41:Fedora-Workstation-32-1.6-x86_64-CHECKSUM
               e
            e
            d
              6:length i1966178304e 
              4:path l
                41:Fedora-Workstation-Live-x86_64-32-1.6.isoeee
              e
            e
        e
    e
e

Our parser

use nom::{
    bytes::complete::tag, sequence::pair,
    multi::many0, branch::alt, combinators::map
};

pub fn parse_bencode(bencode: &[u8]) -> IResult<&[u8], Bencode> {
    alt((
        map(number, Bencode::Number),
        map(string, Bencode::ByteString),
        map(list, Bencode::List),
        map(dict, Bencode::Dict),
    ))(bencode)
}

pub fn string(bencode: &[u8]) -> IResult<&[u8], Vec<u8>> { /* ... */ }
pub fn number(bencode: &[u8]) -> IResult<&[u8], i64> { /* ... */ }

pub fn list(bencode: &[u8]) -> IResult<&[u8], Vec<Bencode>> {
    let (bencode, _) = tag("l")(bencode)?;
    let (bencode, output_list) = many0(parse_bencode)(bencode)?;
    let (bencode, _) = tag("e")(bencode)?;

    Ok((bencode, output_list))
}

pub fn dict(bencode: &[u8]) -> IResult<&[u8], BTreeMap<Vec<u8>, Bencode>> {
    let (bencode, _) = tag("d")(bencode)?;
    let (bencode, output_tuple_list) = many0(pair(string, parse_bencode))(bencode)?;
    let (bencode, _) = tag("e")(bencode)?;

    Ok((bencode, output_tuple_list.into_iter().collect()))
}
Torrent {
    announce: "http://torrent.fedoraproject.org:6969/announce",   
    info: TorrentInfo {
        name: "Fedora-Workstation-Live-x86_64-32",
        files: [
            TorrentFile {
                length: 1030,
                path: "Fedora-Workstation-32-1.6-x86_64-CHECKSUM",
            },
            TorrentFile {
                length: 1966178304,
                path: "Fedora-Workstation-Live-x86_64-32-1.6.iso",
            },
        ],
   }
}

Writing parsers in Rust with combinators

By Eshan Singh

Writing parsers in Rust with combinators

  • 79