Generating Rust types from JSON samples

Rust

  • Intended as a safer, more modern replacement for C / C++
    • No garbage collection
    • Region based memory management
  • Intended to make systems level programming approachable for programmers used to higher level languages
struct Point {
    x: f32,
    y: f32,
}

fn main() {
    let point = Point { x: 0.3, y: 0.4 };

    println!("point coordinates: ({}, {})",
        point.x, point.y);

    let sum_of_odd: i64 = (1..10)
        .filter(|n| n % 2 != 0)
        .sum();

    println!("{}", sum_of_odd);
}

Rust

  • Strongly influenced by ML-family of languages
    • Statically typed with type inference
    • Algebraic datatypes and pattern matching
    • Monadic error handling, rather than exceptions
fn main() {
    let elem = true;
    let mut vec = Vec::new();

    vec.push(elem);
    // vec must be Vec<bool>

    vec.push(5);
    // type error: expected bool, found integral variable
}
enum Option<T> {
    None,
    Some(T),
}

fn main() {
    match might_fail() {
        Some(n) => println!("It was {}", n),
        None => println!("No result"),
    }
}

JSON

  • Very common data interchange format
  • Simple, short list of possible types:
    • Objects
    • Arrays
    • Strings
    • Numbers
    • Booleans
  • Schemaless, all types nullable, arrays are heterogenous
{
  "id": 745823,
  "name": "Bob",
  "dob": "1985-02-22",
  "phoneNumber": "81549300",
  "aliases": ["Robert", "Bobbie"],
  "address": {
    "street": "Pleasant Road 5",
    "postalCode": "3490"
  }
}

JSON in Rust

pub enum Value {
    Null,
    Bool(bool),
    Number(Number),
    String(String),
    Array(Vec<Value>),
    Object(Map<String, Value>),
}
{
  "id": 745823,
  "name": "Bob",
  "dob": "1985-02-22",
  "phoneNumber": "81549300"
}
fn main() {
    let v = somehow::parse::<Value>();
    
    if let Value::Object(fields) = v {
        if let Some(number) = fields.get("phoneNumber") {
            if let Value::String(s) = number {
                println!("{}", s);
            }
        }
    }
}

JSON in Rust

pub enum Value {
    Null,
    Bool(bool),
    Number(Number),
    String(String),
    Array(Vec<Value>),
    Object(Map<String, Value>),
}
{
  "id": 745823,
  "name": "Bob",
  "dob": "1985-02-22",
  "phoneNumber": "81549300"
}
fn main() {
    let v = somehow::parse::<Value>();

    println!("{}", v.pointer("/phoneNumber").unwrap()
                    .as_str().unwrap());
}

JSON in Rust

pub struct Customer {
    id: i64,
    name: String,
    dob: Date,
    phone_number: String
}
{
  "id": 745823,
  "name": "Bob",
  "dob": "1985-02-22",
  "phoneNumber": "81549300"
}
fn main() {
    let v = somehow::parse::<Customer>();
    
    println!("{}", v.phone_number);
}

JSON in Rust

{"id":"AN621C0S5-K11","modelId":"AN621C0S5","name":"Summer dress - blue/white","shopUrl":"https://www.zalando.co.uk/anna-field-summer-dress-blue-white-an621c0s5-k11.html",
"color":"Blue","available":true,"season":"WINTER","seasonYear":"2016","activationDate":"2015-04-30T16:52:52+02:00","additionalInfos":[],"tags":[],"genders":["FEMALE"],
"ageGroups":["ADULT"],"brand":{"key":"AN6","name":"Anna Field","logoUrl":"https://i6.ztat.net/brand/anna-field.jpg","logoLargeUrl":"https://i6.ztat.net/brandxl/anna-field.jpg",
"brandFamily":{"key":"AN00","name":"Anna Field","shopUrl":"https://www.zalando.co.uk/anna-field-online-shop"},"shopUrl":"https://www.zalando.co.uk/anna-field"},
"categoryKeys":["catalog","women","womens-sale","womens-clothing-sale","womens-dresses-sale","all","sale","summer-dresses-sale"],"attributes":[{"name":"Outer fabric material","values":["100% cotton"]},{"name":"Total length","values":["34.0 \" (Size 8)"]},{"name":"Insert material","values":["97% viscose, 3% spandex"]},{"name":"Fabric","values":["Jersey"]},{"name":"Details","values":["belt included"]},{"name":"Length","values":["short"]},{"name":"Top part material","values":["97% viscose, 3% spandex"]},{"name":"Fit","values":["tailored"]},{"name":"Pattern","values":["striped"]},{"name":"Neckline","values":["round neck"]},
{"name":"Washing instructions","values":["do not tumble dry","machine wash at 30°C","Machine wash on gentle cycle"]},{"name":"Sleeve length","values":["Extra short"]},{"name":"Our model's height","values":["Our model is 70.0 \" tall and is wearing size 8"]}],"units":[{"id":"AN621C0S5-K110340000","size":"6","price":{"currency":"GBP","value":14.69,"formatted":"£14.69"},"originalPrice":{"currency":"GBP","value":20.99,"formatted":"£20.99"},"available":false,"stock":0},{"id":"AN621C0S5-K110360000","size":"8","price":{"currency":"GBP","value":14.69,"formatted":"£14.69"},"originalPrice":{"currency":"GBP","value":20.99,"formatted":"£20.99"},"available":false,"stock":0},
{"id":"AN621C0S5-K110440000","size":"16","price":{"currency":"GBP","value":14.69,"formatted":"£14.69"},"originalPrice":{"currency":"GBP","value":20.99,"formatted":"£20.99"},"available":true,"stock":3},{"id":"AN621C0S5-K110420000","size":"14","price":{"currency":"GBP","value":14.69,"formatted":"£14.69"},"originalPrice":{"currency":"GBP","value":20.99,"formatted":"£20.99"},"available":false,"stock":0},{"id":"AN621C0S5-K110380000","size":"10","price":{"currency":"GBP","value":14.69,"formatted":"£14.69"},"originalPrice":{"currency":"GBP","value":20.99,"formatted":"£20.99"},"available":false,"stock":0},{"id":"AN621C0S5-K110400000","size":"12","price":{"currency":"GBP","value":14.69,"formatted":"£14.69"},"originalPrice":{"currency":"GBP","value":20.99,"formatted":"£20.99"},"available":false,"stock":0}],"media":{"images":[
{"orderNumber":1,"type":"NON_MODEL","thumbnailHdUrl":"https://i2.ztat.net/thumb_hd/AN/62/1C/0S/5K/11/AN621C0S5-K11@16.jpg","smallUrl":"https://i2.ztat.net/catalog/AN/62/1C/0S/5K/11/AN621C0S5-K11@16.jpg","smallHdUrl":"https://i2.ztat.net/catalog_hd/AN/62/1C/0S/5K/11/AN621C0S5-K11@16.jpg",
"mediumUrl":"https://i2.ztat.net/detail/AN/62/1C/0S/5K/11/AN621C0S5-K11@16.jpg","mediumHdUrl":"https://i2.ztat.net/detail_hd/AN/62/1C/0S/5K/11/AN621C0S5-K11@16.jpg","largeUrl":"https://i2.ztat.net/large/AN/62/1C/0S/5K/11/AN621C0S5-K11@16.jpg","largeHdUrl":"https://i2.ztat.net/large_hd/AN/62/1C/0S/5K/11/AN621C0S5-K11@16.jpg"},
{"orderNumber":2,"type":"STYLE","thumbnailHdUrl":"https://i3.ztat.net/thumb_hd/AN/62/1C/0S/5K/11/AN621C0S5-K11@15.jpg","smallUrl":"https://i3.ztat.net/catalog/AN/62/1C/0S/5K/11/AN621C0S5-K11@15.jpg","smallHdUrl":"https://i3.ztat.net/catalog_hd/AN/62/1C/0S/5K/11/AN621C0S5-K11@15.jpg","mediumUrl":"https://i3.ztat.net/detail/AN/62/1C/0S/5K/11/AN621C0S5-K11@15.jpg","mediumHdUrl":"https://i3.ztat.net/detail_hd/AN/62/1C/0S/5K/11/AN621C0S5-K11@15.jpg",
"largeUrl":"https://i3.ztat.net/large/AN/62/1C/0S/5K/11/AN621C0S5-K11@15.jpg","largeHdUrl":"https://i3.ztat.net/large_hd/AN/62/1C/0S/5K/11/AN621C0S5-K11@15.jpg"},
{"orderNumber":3,"type":"PREMIUM","thumbnailHdUrl":"https://i6.ztat.net/thumb_hd/AN/62/1C/0S/5K/11/AN621C0S5-K11@14.jpg","smallUrl":"https://i6.ztat.net/catalog/AN/62/1C/0S/5K/11/AN621C0S5-K11@14.jpg","smallHdUrl":"https://i6.ztat.net/catalog_hd/AN/62/1C/0S/5K/11/AN621C0S5-K11@14.jpg","mediumUrl":"https://i6.ztat.net/detail/AN/62/1C/0S/5K/11/AN621C0S5-K11@14.jpg","mediumHdUrl":"https://i6.ztat.net/detail_hd/AN/62/1C/0S/5K/11/AN621C0S5-K11@14.jpg","largeUrl":"https://i6.ztat.net/large/AN/62/1C/0S/5K/11/AN621C0S5-K11@14.jpg","largeHdUrl":"https://i6.ztat.net/large_hd/AN/62/1C/0S/5K/11/AN621C0S5-K11@14.jpg"},
{"orderNumber":4,"type":"PREMIUM","thumbnailHdUrl":"https://i5.ztat.net/thumb_hd/AN/62/1C/0S/5K/11/AN621C0S5-K11@13.jpg","smallUrl":"https://i5.ztat.net/catalog/AN/62/1C/0S/5K/11/AN621C0S5-K11@13.jpg","smallHdUrl":"https://i5.ztat.net/catalog_hd/AN/62/1C/0S/5K/11/AN621C0S5-K11@13.jpg","mediumUrl":
"https://i5.ztat.net/detail/AN/62/1C/0S/5K/11/AN621C0S5-K11@13.jpg","mediumHdUrl":"https://i5.ztat.net/detail_hd/AN/62/1C/0S/5K/11/AN621C0S5-K11@13.jpg","largeUrl":"https://i5.ztat.net/large/AN/62/1C/0S/5K/11/AN621C0S5-K11@13.jpg","largeHdUrl":"https://i5.ztat.net/large_hd/AN/62/1C/0S/5K/11/AN621C0S5-K11@13.jpg"},
{"orderNumber":5,"type":"PREMIUM","thumbnailHdUrl":"https://i4.ztat.net/thumb_hd/AN/62/1C/0S/5K/11/AN621C0S5-K11@12.jpg","smallUrl":"https://i4.ztat.net/catalog/AN/62/1C/0S/5K/11/AN621C0S5-K11@12.jpg",
"smallHdUrl":"https://i4.ztat.net/catalog_hd/AN/62/1C/0S/5K/11/AN621C0S5-K11@12.jpg","mediumUrl":"https://i4.ztat.net/detail/AN/62/1C/0S/5K/11/AN621C0S5-K11@12.jpg","mediumHdUrl":"https://i4.ztat.net/detail_hd/AN/62/1C/0S/5K/11/AN621C0S5-K11@12.jpg","largeUrl":"https://i4.ztat.net/large/AN/62/1C/0S/5K/11/AN621C0S5-K11@12.jpg","largeHdUrl":"https://i4.ztat.net/large_hd/AN/62/1C/0S/5K/11/AN621C0S5-K11@12.jpg"},
{"orderNumber":6,"type":"PREMIUM","thumbnailHdUrl":"https://i1.ztat.net/thumb_hd/AN/62/1C/0S/5K/11/AN621C0S5-K11@11.jpg","smallUrl":"https://i1.ztat.net/catalog/AN/62/1C/0S/5K/11/AN621C0S5-K11@11.jpg","smallHdUrl":"https://i1.ztat.net/catalog_hd/AN/62/1C/0S/5K/11/AN621C0S5-K11@11.jpg","mediumUrl":"https://i1.ztat.net/detail/AN/62/1C/0S/5K/11/AN621C0S5-K11@11.jpg",
"mediumHdUrl":"https://i1.ztat.net/detail_hd/AN/62/1C/0S/5K/11/AN621C0S5-K11@11.jpg","largeUrl":"https://i1.ztat.net/large/AN/62/1C/0S/5K/11/AN621C0S5-K11@11.jpg","largeHdUrl":"https://i1.ztat.net/large_hd/AN/62/1C/0S/5K/11/AN621C0S5-K11@11.jpg"},
{"orderNumber":7,"type":"PREMIUM","thumbnailHdUrl":"https://i6.ztat.net/thumb_hd/AN/62/1C/0S/5K/11/AN621C0S5-K11@10.jpg","smallUrl":"https://i6.ztat.net/catalog/AN/62/1C/0S/5K/11/AN621C0S5-K11@10.jpg","smallHdUrl":"https://i6.ztat.net/catalog_hd/AN/62/1C/0S/5K/11/AN621C0S5-K11@10.jpg","mediumUrl":"https://i6.ztat.net/detail/AN/62/1C/0S/5K/11/AN621C0S5-K11@10.jpg","mediumHdUrl":
"https://i6.ztat.net/detail_hd/AN/62/1C/0S/5K/11/AN621C0S5-K11@10.jpg","largeUrl":"https://i6.ztat.net/large/AN/62/1C/0S/5K/11/AN621C0S5-K11@10.jpg","largeHdUrl":"https://i6.ztat.net/large_hd/AN/62/1C/0S/5K/11/AN621C0S5-K11@10.jpg"},
{"orderNumber":8,"type":"PREMIUM","thumbnailHdUrl":"https://i5.ztat.net/thumb_hd/AN/62/1C/0S/5K/11/AN621C0S5-K11@9.jpg","smallUrl":"https://i5.ztat.net/catalog/AN/62/1C/0S/5K/11/AN621C0S5-K11@9.jpg","smallHdUrl":"https://i5.ztat.net/catalog_hd/AN/62/1C/0S/5K/11/AN621C0S5-K11@9.jpg","mediumUrl":"https://i5.ztat.net/detail/AN/62/1C/0S/5K/11/AN621C0S5-K11@9.jpg","mediumHdUrl":"https://i5.ztat.net/detail_hd/AN/62/1C/0S/5K/11/AN621C0S5-K11@9.jpg",
"largeUrl":"https://i5.ztat.net/large/AN/62/1C/0S/5K/11/AN621C0S5-K11@9.jpg","largeHdUrl":"https://i5.ztat.net/large_hd/AN/62/1C/0S/5K/11/AN621C0S5-K11@9.jpg"}]}}

Type providers

- types from data

A feature introduced in version 3.0 (in August 2012) of the programming language F# for strongly typed interaction with external elements.

 

E.g. a web API or a database.

Type providers

The library F# Data in action:

type Simple = JsonProvider<""" { "name":"John", "age":94 } """>

let simple = Simple.Parse(""" { "name":"Tomas", "age":4 } """)
simple.Age
simple.Name

compile time

runtime

type Simple = JsonProvider<"http://example.com/api/person/bob">

let simple = Simple.Parse(""" { "name":"Tomas", "age":4 } """)
simple.Age
simple.Name

Type providers

Γ ⊢ e : τ

Normally when running type inference and checking we start with an empty initial context which is then extended by the code.

W'([ ], Π)

W'(Γ, let x = e1 in e2) =
    let τ = W'(Γ, e1) in W'(Γ[x ↦ τ], e2)

Type providers

Type providers introduces a way to project some information from the outside world into the typing context.

W'(π(🌍), Π)

W'([ ], Π)

Type providers

Sounds great, but Rust does not have type providers...

Macros

- code that generates code

While Rust does not have builtin support for type providers, it does have macros.

Procedural macros

fn procedural_macro(input: Tokens) -> Tokens {
    ...
}

Rule-based macros

macro_rules! some_if {
    ($cond:expr, $then:expr) => ({
        if $cond {
            Some($then)
        } else {
            None
        }
    })
}

Procedural macros


pub struct Customer {
    id: i64,
    name: String,
    dob: Date,
    phone_number: String
}
#[derive(Serialize, Deserialize)]
pub struct Customer {
    id: i64,
    name: String,
    dob: Date,
    phone_number: String
}

serde_derive

generates code for serialization and deserialization

impl Serialize for Customer {
    fn serialize() -> Result<Customer, Error> {
        ...
    }
}

Procedural macros

infer_schema!("dotenv:DATABASE_URL");

diesel

generates code by talking to your database

// types and functions as necessary
// depending on the actual schema of the database

Code that talks to the database (at compile time)

json_provider!("Point", r#"{ "x": 3, "y": 5 }"#);

What I have made

A procedural macro generating Rust types from inline, local or remote JSON samples.

Demo

The issue with type providers

Type providers does not give you (as the user) access to the generated code*.

*: F# Data creates erased types, so it does not really have normal code to give you even if it wanted to.

So what happens if you want to stop using the type provider?

{
  "id": 745823,
  "name": "Bob",
  "dob": "1985-02-22",
  "phoneNumber": "81549300"
}
#[derive(Default, Debug, Clone, Serialize, Deserialize)]
struct Point {
    id: i64,
    name: String,
    dob: String,
    #[serde(rename = "phoneNumber")]
    phone_number: String,
}
#[derive(Default, Debug, Clone, Serialize, Deserialize)]
struct Point {
    id: i64,
    name: String,
    dob: Date,
    #[serde(rename = "phoneNumber")]
    phone_number: String,
}

Generated:

We want:

Sample:

Since procedural macros are basically functions from Tokens to Tokens, we don't have to run them at compile time.

As such my project has three different interfaces (to the same code generation):

  • Procedural macro
  • Web-based interface
  • Command line tool

How do we generate code?

Shape inference

Presented in a paper by the authors of the library F# Data: http://tomasp.net/academic/papers/fsharp-data/

  1. Infer specific shapes from individual sample values
  2. Recursively find common shapes and infer composite shapes
fn infer_shape_from_value(value: Value) -> InferredType {
    match value {
        Value::Bool(_) => InferredType::Bool,
        Value::Number(ref n) => {
            if n.is_i64() {
                InferredType::Integer
            } else {
                InferredType::Floating
            }
        },
        ...
    }
}
fn common_shape(a: InferredType, b: InferredType)
                -> InferredType {
    if a == b {
        return a;
    }
    use InferredType::*;
    match (a, b) {
        (Floating, Integer) => Floating,
        ...
        _ => Any,
    }
}
// For Value::Array(values)
let inner = values.fold(shape_of_first, |shape, value| {
    let new_shape = infer_shape_from_value(value);
    common_shape(shape, new_shape)
});
// InferredType::VecT(inner)
fn generate_type_from_inferred(inferred: InferredType) -> Tokens {
    match inferred {
        InferredType::Null |
        InferredType::Any => quote! { ::serde_json::Value },
        InferredType::Bool => quote! { bool },
        ...
    }
}
#[cfg(not(feature = "online-samples"))]
json_provider!("Point", r#"{ "x": 1, "y": 2 }"#);
#[cfg(feature = "online-samples")]
json_provider!("Point", "http://vestera.as/json_sample/examples/point.json");
#[cfg(not(feature = "online-samples"))]
mod point;
#[cfg(feature = "online-samples")]
mod point {
    json_provider!("Point", "http://vestera.as/json_sample/examples/point.json");
}
cargo check --features "online-samples"

Conditional compilation / adapting to different enviroments

In your Rust code:

Command line/shell script:

Taking this further

There are still many missing features, unhandled edge cases, etc.

Taking this further

F# Data, which has a type provider from JSON samples, also has type providers for XML and CSV based on the same inference code.

Taking this further

While JSON itself is schemaless, the JSON Schema standard (json-schema.org) defines a way to write schemas for JSON documents.

Questions?

Generating Rust types from JSON samples

By Erik Vesteraas

Generating Rust types from JSON samples

  • 1,712