Yatharth Khatri
Design Systems and Frontend Architect
Classical Pianist
GitHub: yatharthk
Twitter: yatharthkhatri
Why should I learn about compilers?
Because we use compilers
ALL THE TIME
We use languages that compile to JavaScript
And tools to improve our workflow and developer experience
Is understanding and writing compilers within my capacity?
// add two numbers
function add(a, b) {
return a + b;
}
0 1 1 1 1 1 1 0
0 0 0 1 0 0 1 1
1 0 1 1 1 1 0 0
1 1 1 1 0 0 1 1
0 0 1 1 0 0 0 1
Source Code
Machine Code
// add two numbers
const add = (a, b)
=> (a + b);
// add two numbers
function add(a, b) {
return a + b;
}
ES6
(High Level Language)
ES5
(High Level Language)
Parsing
Transformation
Code Generation
II. Syntactic Analysis
I. Lexical Analysis
Lexical Analysis
Source Code
Tokens
Syntactic Analysis
Tokens
Abstract Syntax Tree (AST)
Transformation
AST (Source Code Language)
AST (Target Language)
Code Generation
AST (Target Language)
Print code in target language
import module from "module"
const module = require("module")
Compiler sections
/* import module from 'module'; => var module = require('module'); */
function tokenizer(source) {
// ...
return tokens;
}
function parser(tokens) {
// ...
return ast;
}
function transformer(ast) {
// ...
return newAst;
}
function codeGenerator(ast) {
// ...
return code;
}
Tokens
[
{ type: "keyword", value: "import" },
{ type: "identifier", value: "module" },
{ type: "keyword", value: "from" },
{ type: "string", value: "module" }
]
import module from "module"
tokenizer
// Constants
const KEYWORDS = ['import', 'from'];
const WHITESPACE = /\s/;
const NEWLINE = /\n/;
const LETTERS = /[a-zA-Z]/;
function tokenizer(input) {
let current = 0;
const tokens = [];
while (current < input.length) {
let char = input[current];
if (WHITESPACE.test(char) || NEWLINE.test(char) || char === ';') {
current++;
continue;
}
if (char === '"') {
let value = '';
char = input[++current];
while (char !== '"') {
value += char;
char = input[++current];
}
tokens.push({ type: 'string', value: value });
current++;
continue;
}
(...)
tokenizer
function tokenizer(input) {
let current = 0;
const tokens = [];
while (current < input.length) {
let char = input[current];
(...)
if (LETTERS.test(char)) {
let value = '';
while (LETTERS.test(char)) {
value += char;
char = input[++current];
}
if (KEYWORDS.indexOf(value) > -1) {
tokens.push({ type: 'keyword', value: value });
} else {
tokens.push({ type: 'identifier', value: value });
}
current++;
continue;
}
throw new Error(`Unrecognized token: ${char}`);
}
return tokens;
}
Tokens
[
{ type: "keyword", value: "import" },
{ type: "identifier", value: "module" },
{ type: "keyword", value: "from" },
{ type: "string", value: "module" }
]
{
type: "Program",
body: [
{
type: "ImportDeclaration",
specifier: {
type: "Identifier",
name: "module"
},
source: {
type: "StringLiteral",
value: "module"
}
}
]
}
Abstract Syntax Tree
parser
function parser(tokens) {
let current = 0;
function walk() {
let token = tokens[current];
if (token.type === 'string') {
current++;
return {
type: 'StringLiteral',
value: token.value
};
}
if (token.type === 'identifier') {
current++;
return {
type: 'Identifier',
name: token.value
};
}
(...)
}
(...)
}
parser
(...)
if (token.type === 'keyword') {
if (token.value === 'import') {
current++;
const node = {
type: 'ImportDeclaration',
source: null,
specifier: null
};
if (tokens[current].type === 'identifier') {
node.specifier = walk();
} else {
throw new Error(
`Parse error: Unexpected ${
tokens[current].value
} after \`import\``
);
}
// expect keyword `from` after the identifier.
if (
tokens[current].type === 'keyword' &&
tokens[current].value === 'from'
) {
current++;
} else {
throw new Error(
`Parse Error: Unexpected token after ${node.specifier.name}`
);
}
(...)
parser
(...)
function walk() {
(...)
if (tokens[current].type === 'string') {
node.source = walk();
} else {
throw new Error(`Parse Error: Unexpected token after \`from\``);
}
return node;
}
}
throw new Error(`Unrecognized token: ${token.value}`);
}
}
const ast = {
type: 'Program',
body: []
};
while (current < tokens.length) {
ast.body.push(walk());
}
return ast;
}
Source AST
{
type: "Program",
body: [
{
type: "ImportDeclaration",
specifier: {
type: "Identifier",
name: "module"
},
source: {
type: "StringLiteral",
value: "module"
}
}
]
}
{
type: "Program",
body: [
{
type: "VariableDeclaration",
kind: "var",
id: {
type: "Identifier",
name: "module"
},
init: {
type: "CallExpression",
callee: {
type: "Identifier",
name: "require",
}
arguments: ["module"]
}
}
]
}
New AST
traverser
function traverser(ast, visitor) {
function traverseArray(nodes) {
nodes.forEach(traverseNode);
}
function traverseNode(node) {
const visitorMethod = visitor[node.type];
switch (node.type) {
case 'Program':
traverseArray(node.body);
break;
case 'StringLiteral':
case 'Identifier':
break;
case 'ImportDeclaration':
traverseNode(node.specifier);
traverseNode(node.source);
break;
default:
throw new TypeError(node.type);
}
if (visitorMethod) {
visitorMethod(node);
}
}
traverseNode(ast);
}
// VISITOR
{
ImportDeclaration(node) {
// transform node
}
StringLiteral(node) {
// ...
},
StringLiteral(node) {
// ...
},
}
transformer
function transformer(ast) {
traverser(ast, {
ImportDeclaration(node) {
let variableDeclaration = {
type: 'VariableDeclaration',
kind: 'var',
id: node.specifier,
init: undefined
};
let callExpression = {
type: 'CallExpression',
callee: {
type: 'Identifier',
name: 'require'
},
arguments: [node.source]
};
variableDeclaration.init = callExpression;
// a hack, but that's okay :)
delete node.specifier;
delete node.source;
Object.assign(node, variableDeclaration);
}
});
return ast;
}
codeGenerator
function codeGenerator(node) {
switch (node.type) {
case 'Program':
return node.body.map(codeGenerator).join('\n');
case 'Identifier':
return node.name;
case 'StringLiteral': {
return '"' + node.value + '"';
}
case 'VariableDeclaration': {
return (
node.kind + // var
' ' +
codeGenerator(node.id) + // module,etc
' = ' +
codeGenerator(node.init) // CallExpression
);
}
case 'CallExpression': {
return (
codeGenerator(node.callee) + // require
'(' +
node.arguments.map(codeGenerator).join(', ') + // "module",etc
')' +
';'
);
}
}
}
compiler
function compiler(input) {
const tokens = tokenizer(input);
const ast = parser(tokens);
const transformedAst = transformer(ast);
const output = codeGenerator(transformedAst);
return output;
}
module.exports = {
tokenizer,
parser,
transformer,
codeGenerator,
compiler
};
yatharthk/micro-es6-import-compiler
I hope you did learn few good things today