Escodgen
using JSON as an AST
@PuercoPop
Code as Data
¡Pregunten!
(Excepto para comentarios parentéticos que pueden esperar hasta el final)
¿Qué es un AST?
¿Para qué sirve?
Nos permite hablar sobre el lenguaje de manera estructurada
Haskell
fact 0 = 1
fact n = n * fact (n - 1)
fact :: Int -> Int
fact 0 = 1
fact n = n * fact (n - 1)
SML
fun fact 0 = 1
| fact n = n * fact(n - 1);
= val fact = fn : int -> int
(* El hablar de los tipos en meta-lenguaje. *)
fun fact (0: int): int = 1
| fact (n: int): int = n * fact(n - 1);
C++ templates son turing complete!
Apropos
In Fortran you can speak of numbers, and in C you can speak of strings. In Lisp you can speak of Lisp. Everything Lisp does can be described as a Lisp program."
–Guy Steele Jr.
Metaprogramming
If you give someone Fortran, he has Fortran. If you give somene Lisp, he has any language he pleases."
– Guy Steele Jr.
(Parentesis)
¿Y como lo hace?
Homoiconicidad, la misma representacion para código como para data
Wat if JS was homoiconic?
Shoutout to Wat.js
function fact(n) {
if (0 === n) {
return 1;
} else {
return n * fact(n-1);
}};
// Sería
["function", "fact", ["n"],
["if", ["n", "===", "0"],
["return", ["1"]],
["return", ["n",
"*",
["fact", ["-",
"n",
"1"]]]]]]
Semanticas Meta-circulares
Furthermore, a metacircular definition is also more readable and understandable [than English] by the average Common Lisp programmer, since it is written in terms he mostly understands"
– Metacircular Semantics for Common Lisp Special Forms, Henry Givens Baker
The peculiar penchant of modern software standards committees to couch their pronouncements in English prose is symptomatic of an epidemic of lawyer envy which is sweeping the computer field. Lawyers understand the fine art of language obfuscation, in which a simple thing is made complex for the single purpose of providing employment for other lawyers who then interpret the language. It's bad enough that there are already more lawyers than engineers in the United States, without having these few remaining engineers talking and acting like lawyers, as well."
En Javascript
function newOperator(Constr, args) {
var thisValue = Object.create(Constr.prototype); // (1)
var result = Constr.apply(thisValue, args);
if (typeof result === 'object' && result !== null) {
return result; // (2)
}
return thisValue;
};
Autor: Dr. Axel Rauschmayer
En Python
for item in collection:
do_stuff(item)
iterator = iter(collection)
while True:
try:
item = iterator.next()
# The code from the for indent block goes here. (No lambdas :/)
do_stuff(item)
except StopIteration:
# The code from the else clause goes here.
break
# Don't trust me try this:
collection = iter([1, 2, 3, 4, 5]) # Python lists are iterable, but not
# iterators
while True:
try:
item = collection.next()
print "{0} bottles of beer on the wall".format(item)
except StopIteration:
break
JSON AST, ejemplo:
var esprima = require('esprima');
var stmt = esprima.parse('1 + 2');
//The AST of the expression 1 + 2 is:
{
"type": "Program",
"body": [
{
"type": "ExpressionStatement",
"expression": {
"type": "BinaryExpression",
"operator": "+",
"left": {
"type": "Literal",
"value": 1,
"raw": "1"
},
"right": {
"type": "Literal",
"value": 2,
"raw": "2"
}
}
}
]
}
Para explorar el AST pueden visitar
http://esprima.org/demo/parse.html
stmt = esprima.parse('var x = 2 + 4');
// console.log(JSON.stringify(stmt, null, 4));
{
"type": "Program",
"body": [
{
"type": "VariableDeclaration",
"declarations": [
{
"type": "VariableDeclarator",
"id": {
"type": "Identifier",
"name": "x"
},
"init": {
"type": "BinaryExpression",
"operator": "+",
"left": {
"type": "Literal",
"value": 2,
"raw": "2"
},
"right": {
"type": "Literal",
"value": 4,
"raw": "4"
}
}
}
],
"kind": "var"
}
]
}
stmt = esprima.parse('var x = 2 + 4; 4 + x');
// console.log(JSON.stringify(stmt, null, 4));
{
"type": "Program",
"body": [
{
/// nom nom...
},
{
"type": "ExpressionStatement",
"expression": {
"type": "BinaryExpression",
"operator": "+",
"left": {
"type": "Literal",
"value": 4,
"raw": "4"
},
"right": {
"type": "Identifier",
"name": "x"
}
}
}
]
}
Constant Folding
Constant folding es el proceso de reconocer y evaluar expresiones constantes durante la compilacion en lugar de durante la ejecución.
var x = 2 + 5;
var x = 7;
First Stab
/// Boilerplate
var esprima = require('esprima');
var parse = esprima.parse;
var escodegen = require('escodegen');
var estraverse = require('estraverse');
var Syntax = estraverse.Syntax;
var generate = escodegen.generate;
var run = function(code, transformation) {
console.log('running', transformation.name);
console.log('Input:', code)
console.log('Output', generate(transformation(parse(code))));
}
First Stab
function most_simple_constant_foldling(stmt) {
estraverse.replace(
stmt,
{
enter: function(node) {
if (node.type === 'BinaryExpression')
{
if (node.operator === "+" &&
node.left.type === "Literal" &&
node.right.type === "Literal") {
var result = node.left.value + node.right.value;
return { type: "Literal",
value: result};
} else {
return node;
}
}
}
})
return stmt;
};
run('var x = 2 + 4', most_simple_constant_foldling);
// running most_simple_constant_foldling
// Input: var x = 2 + 4
// Output var x = 6;
- * /
// running most_simple_constant_foldling
// Input: var x = 2 - 4
// Output var x = 2 - 4;
// Faltan el resto operadores aritméticos
function too_simple_constant_folding(stmt) {
estraverse.replace(
stmt,
{
enter: function(node) {
if (node.type === Syntax.BinaryExpression &&
node.left.type === Syntax.Literal &&
node.right.type === Syntax.Literal) {
if (node.operator === "+") {
return { type: Syntax.Literal,
value: node.left.value + node.right.value };
} else if ( node.operator === "-") {
return { type: Syntax.Literal,
value: node.left.value - node.right.value };
} else if (node.operator === "*") {
return { type: Syntax.Literal,
value: node.left.value * node.right.value }
} else if (node.operator === "/") {
return { type: Syntax.Literal,
value: node.left.value / node.right.value }
}};
}
})
return stmt; };
// Input: var x = 4 - 2
// Output var x = 2;
// Input: var x = 4 / 2
// Output var x = 2;
// Input: var x = 4 / 0
/ Output var x = 1e+400;
function finicky_simple_constant_folding(stmt) {
estraverse.replace(
stmt,
{
enter: function(node) {
if (node.type === Syntax.BinaryExpression &&
node.left.type === Syntax.Literal &&
node.right.type === Syntax.Literal) {
if (node.operator === "+") {
return { type: Syntax.Literal,
value: node.left.value + node.right.value };
} else if ( node.operator === "-") {
return { type: Syntax.Literal,
value: node.left.value - node.right.value };
} else if (node.operator === "*") {
return { type: Syntax.Literal,
value: node.left.value * node.right.value }
} else if (node.operator === "/") {
var result = node.left.value / node.right.value;
if (isFinite(result)) {
return { type: Syntax.Literal,
value: result }
}
}};
}
})
return stmt; };
run('var x = 4 - 8', too_simple_constant_folding);
// Error: Numeric literal whose value is negative
// ¿Cómo se representa -2?
{
"type": "UnaryExpression",
"operator": "-",
"argument": {
"type": "Literal",
"value": 2,
"raw": "2"
},
"prefix": true
}
function wrap_negative_values(node) {
if (node.value < 0) {
return { type: Syntax.UnaryExpression,
operator: "-",
prefix: true,
argument: { type: Syntax.Literal,
value: -node.value}}
} else {
return node;
}
}
/// ...
if ( node.operator === "-") {
return wrap_negative_values({ type: Syntax.Literal,
value: node.left.value - node.right.value });
/// ...
run('var x = 4 + 2 + 8', i_hope_it_does_simple_constant_folding);
// running i_hope_it_does_simple_constant_folding
// Input: var x = 4 + 2 + 8
// Output var x = 6 + 8;
function alas_simple_constant_folding(stmt) {
var modified = true;
while (modified) {
modified = false;
estraverse.replace(
stmt,
{
enter: function(node) {
if (node.type === Syntax.BinaryExpression &&
node.left.type === Syntax.Literal &&
node.right.type === Syntax.Literal) {
if (node.operator === "+") {
modified = true;
return { type: Syntax.Literal,
value: node.left.value + node.right.value };
// ...
} else if (node.operator === "/") {
var result = node.left.value / node.right.value;
if (isFinite(result) || !isNaN(result)) {
modified = true;
return { type: Syntax.Literal,
value: result }
}
}};
}
})
};
return stmt; }
Build on top
// Return the number as a value if looking at number or false if otherwise;
function lookingAtNumber(node) {
// NaN is a number but as a value it is falsy :D
if ( node.type === Syntax.Literal && typeof(node.value) === 'number') {
return node.value;
} else if ( node.type == Syntax.UnaryExpression &&
node.operator === '-' &&
node.argument.type === Syntax.Literal &&
typeof(node.argument.value) === 'number') {
return -node.argument.value;
} else {
return false;
}
};
As a Target Language
- Evita concatenación de Cadenas, lo que facilita la manipulacion de cadenas
- Serializar a JSON es ubiquo
- Genera Sourcemaps (si los nodos tienen line information)
Un Lenguaje Simple
(+ 1 2)
(+ 1 2 3)
(define-function foo ()
1)
(define-function foo (n)
n)
(define-function fact (n)
(if (== n 0)
1
(* n (fact (- n 1)))))
Representacion Interna
(defclass function-definition ()
((name :initarg :name :reader name)
(arguments :initarg :arguments :reader arguments)
(body :initarg :body :reader body)))
(defclass function-application ()
((name :initarg :name :reader name)
(arguments :initarg :arguments :reader arguments)))
(defclass identifier ()
((name :initarg :name :reader name)))
(defclass num ()
((value :initarg :value :reader value)))
(defclass arith-op ()
((operator :initarg :operator :reader operator)
(operands :initarg :operands :reader operands)))
(defclass conditional-op ()
((test :initarg :test :reader test)
(consequent :initarg :consequent :reader consequent)
(alternate :initarg :alternate :initform nil :reader alternate)))
(defclass block-statement ()
((expressions :initarg :expressions :reader expressions)))
Parser
;; Entry Point
(defun compile-to-js (code &optional (output-file #P"~/tmp.js"))
(with-open-file (out output-file :direction :output :if-exists :supersede)
(to-json (parse code) :stream out)))
;; Parser
(defun parse (expr)
"No error reporting whatsoever."
(cond
((null expr) "")
((atom expr)
(cond
((numberp expr) (parse-number expr))
(t (parse-identifier expr))))
((atom (car expr))
(case (car expr)
((+ - * / ==) (parse-arith-op expr))
(if (parse-conditional-op expr))
(define-function (parse-function-definition expr))
(t (parse-function-call expr))))
(t (parse-block-statement (mapcar #'parse expr)))))
Boring Parts
(defun parse-number (expr)
(make-instance 'num
:value expr))
(defun parse-identifier (expr)
(make-instance 'identifier
:name expr))
(defun parse-arith-op (expr)
(make-instance 'arith-op
:operator (car expr)
:operands (map 'list #'parse (cdr expr))))
(defun parse-function-definition (expr)
(make-instance 'function-definition
:name (second expr)
:arguments (third expr)
:body (mapcar #'parse (cdddr expr))))
(defun parse-function-call (expr)
(make-instance 'function-application
:name (car expr)
:arguments (mapcar #'parse (cdr expr))))
(defun parse-block-statement (expr)
(make-instance 'block-statement
:expressions expr))
Hairy Parts
;; Consequent and Alternate deben ser block staments
(defmacro listify (expr)
"Ensure the expression return is a list."
(let ((result expr))
`(or (and (listp ,result) ,result)
(list ,result))))
(defun parse-conditional-op (expr)
;; Guarantee that the expr is a list
(let ((third (listify (parse (third expr))))
(fourth (listify (parse (fourth expr)))))
(make-instance 'conditional-op
:test (parse (second expr))
:consequent (make-instance 'block-statement
:expressions third)
:alternate (make-instance 'block-statement
:expressions fourth))))
Serializando a JSON
(defgeneric to-json (obj &key stream )
(:documentation "Serialize object to json"))
(defun negate (number)
(make-instance 'num :value (- (value number))))
(defmethod to-json ((obj num) &key (stream t))
(if (> 0 (value obj))
(format stream
"{ \"type\": \"UnaryExpression\", \"operator\": \"-\",
\"prefix\": \"true\", \"argument\": ~A }"
(to-json (negate obj) :stream nil))
(format stream
"{\"type\": \"Literal\", \"value\": ~A }"
(value obj))))
(defmethod to-json ((obj identifier) &key stream)
(format stream "{\"type\": \"Identifier\", \"name\": \"~A\"}" (name obj)))
Eeewww
;;; Algunas transformaciones las hago en esta etapa.
;;; En mi representación intermedia las operaciones arithmeticas son de aridad variable
;;; En la Representación de SpiderMonkey tienen aridad 2.
(defmethod to-json ((obj arith-op) &key (stream t))
(with-accessors ((operator operator)
(operands operands))
obj
(cond
((> (length operands) 2)
(format stream
"{ \"type\": \"BinaryExpression\", \"operator\": \"~A\",
\"left\": ~A, \"right\": ~A }"
operator
(to-json (first operands) :stream nil)
(to-json (make-instance 'arith-op
:operator operator
Escodegen
#!/usr/bin/js
var fs = require('fs');
var escodegen = require('escodegen');
var inputFile = process.argv[2] ? process.argv[2] : "/home/puercopop/tmp.js";
var outputFile = process.argv[3] ? process.argv[3] : "/home/puercopop/a.js";
var code;
fs.readFile(inputFile, 'utf8', function (err, data) {
if (err) {
console.log(err);
}
code = escodegen.generate(JSON.parse(data));
fs.writeFile(outputFile, code, 'utf8', function(err) {
if (err) return console.log(err);
console.log('Writing', code, 'in', outputFile);
});
});
Examples
// (compile-to-js '(+ 1 2 3))
{
"type": "BinaryExpression",
"operator": "+",
"left": {
"type": "Literal",
"value": 1
},
"right": {
"type": "BinaryExpression",
"operator": "+",
"left": {
"type": "Literal",
"value": 2
},
"right": {
"type": "Literal",
"value": 3
}
}
}
// generate.js
1 + (2 + 3)
// (compile-to-js '(define-function foo (n) (+ n 2)))
{
"type": "FunctionDeclaration",
"id": { "type": "Identifier", "name": "FOO" },
"params": [ { "type": "Identifier", "name": "N" } ],
"defaults": [], "rest": null, "generator": false, "expression": false,
"body": {
"type": "BlockStatement",
"body": [
{
"type": "ExpressionStatement",
"expression": {
"type": "BinaryExpression",
"operator": "+",
"left": {
"type": "Identifier",
"name": "N"
},
"right": {
"type": "Literal",
"value": 2
}
}
}
]
}
}
// generate.js
function FOO(N) {
N + 2;
}
Links de Interés
Gracias
¿Preguntas?
Escodgen
By puercopop
Escodgen
Presentacion sobre escodegen en LimaJS #Agosto14
- 1,605