A deep dive into scalac
Chris Birchall
Amsterdam.scala
16th November 2015
-
The journey from Foo.scala to Foo.class
-
The scalac codebase
-
Extending the compiler with plugins
-
Hacking on the compiler itself
Agenda
me me me
- Guardian
- Scala fanboy since 2.7.x
- ScalaCache
- Macro nerd
- @cbirchall
- github.com/cb372
-
Debugging slow compilation
-
Writing macros
-
Contributing to Scala
Why is this useful?
The journey
from Foo.scala to Foo.class
Terminology
The compiler deals with 3 kinds of stuff
-
Trees
-
Types
-
Symbols
Tree
Abstract Syntax Tree (AST)
scala> import scala.reflect.runtime.universe._
import scala.reflect.runtime.universe._
scala> val tree = q"a + b"
tree: reflect.runtime.universe.Tree = a.$plus(b)
scala> showRaw(tree)
res0: String =
Apply(
Select(
Ident(TermName("a")),
TermName("$plus")
),
List(Ident(TermName("b")))
)
Type
The, erm, type of a thing?
Symbol
A binding between a "thing" (e.g. a method, a class or a type) and the name for that thing
$ scalac -Xshow-phases
phase name id description
---------- -- -----------
parser 1 parse source into ASTs, perform simple desugaring
namer 2 resolve names, attach symbols to named trees
packageobjects 3 load package objects
typer 4 the meat and potatoes: type the trees
patmat 5 translate match expressions
superaccessors 6 add super accessors in traits and nested classes
extmethods 7 add extension methods for inline classes
pickler 8 serialize symbol tables
refchecks 9 reference/override checking, translate nested objects
uncurry 10 uncurry, translate function values to anonymous classes
tailcalls 11 replace tail calls by jumps
specialize 12 @specialized-driven class and method specialization
explicitouter 13 this refs to outer pointers
erasure 14 erase types, add interfaces for traits
posterasure 15 clean up erased inline classes
lazyvals 16 allocate bitmaps, translate lazy vals into lazified defs
lambdalift 17 move nested functions to top level
constructors 18 move field definitions into constructors
flatten 19 eliminate inner classes
mixin 20 mixin composition
cleanup 21 platform-specific cleanups, generate reflective calls
delambdafy 22 remove lambdas
icode 23 generate portable intermediate code
jvm 24 generate JVM bytecode
terminal 25 the last phase during a compilation run
Why is my compile so slow?
$ scalac -verbose Foo.scala 2>&1 | grep "\[\w* in \d*ms\]"
[parser in 19ms]
[namer in 70ms]
[packageobjects in 0ms]
[typer in 570ms]
[patmat in 204ms]
[superaccessors in 12ms]
[extmethods in 3ms]
[pickler in 14ms]
[refchecks in 54ms]
...
[cleanup in 4ms]
[delambdafy in 0ms]
[jvm in 167ms]
[total in 2391ms]
What is phase X doing?
Example: patmat
object PatternMatch {
def isGreeting(word: String) = word match {
case "hello" => true
case _ => false
}
}
Before patmat phase
$ scalac -Xprint:typer PatternMatch.scala
[[syntax trees at end of typer]]
package <empty> {
object PatternMatch extends scala.AnyRef {
def <init>(): PatternMatch.type = {
PatternMatch.super.<init>();
()
};
def isGreeting(word: String): Boolean = word match {
case "hello" => true
case _ => false
}
}
}
After patmat phase
$ scalac -Xprint:patmat PatternMatch.scala
// ...
def isGreeting(word: String): Boolean = {
case <synthetic> val x1: String = word;
case5(){
if ("hello".==(x1))
matchEnd4(true)
else
case6()
};
case6(){
matchEnd4(false)
};
matchEnd4(x: Boolean){
x
}
}
Another example: tail-call optimization
import scala.annotation.tailrec
object Fib {
def fib(n: Int) = {
@tailrec
def fibRec(n: Int, a:Int, b:Int): Int = n match {
case 0 => a
case _ => fibRec(n-1, b, a+b)
}
fibRec(n, 0, 1)
}
}
Before tailcalls
$ scalac -Xprint:uncurry Fib.scala
// ...
def fib(n: Int): Int = {
@scala.annotation.tailrec def fibRec(n: Int, a: Int, b: Int): Int = {
case <synthetic> val x1: Int = n;
x1 match {
case 0 => a
case _ => fibRec(n.-(1), b, a.+(b))
}
};
fibRec(n, 0, 1)
}
After tailcalls
$ scalac -Xprint:tailcalls Fib.scala
// ...
def fib(n: Int): Int = {
@scala.annotation.tailrec def fibRec(n: Int, a: Int, b: Int): Int = {
<synthetic> val _$this: Fib.type = Fib.this;
_fibRec(_$this: Fib.type, n: Int, a: Int, b: Int){
{
case <synthetic> val x1: Int = n;
x1 match {
case 0 => a
case _ => _fibRec(Fib.this, n.-(1).asInstanceOf[Int](), b.asInstanceOf[Int](), a.+(b).asInstanceOf[Int]())
}
}.asInstanceOf[Int]()
}
};
fibRec(n, 0, 1)
}
Let's look at the tree
[[syntax trees at end of tailcalls]]// Scala source: Fib.scala
PackageDef(
"<empty>" // final package <empty>, tree.tpe=<empty>.type
ClassDef( // class Fib extends Object
<module>
"Fib"
[]
Template( // val <local Fib>: <notype> in object Fib, tree.tpe=Fib.type
"java.lang.Object" // parents
ValDef(
private
"_"
<tpt>
<empty>
)
// 2 statements
DefDef( // def <init>(): Fib.type in object Fib
<method>
"<init>"
[]
List(Nil)
<tpt> // tree.tpe=Fib.type
Block( // tree.tpe=Unit
Apply( // def <init>(): Object in class Object, tree.tpe=Object
Fib.super."<init>" // def <init>(): Object in class Object, tree.tpe=()Object
Nil
)
()
)
)
DefDef( // def fib(n: Int): Int in object Fib
<method>
"fib"
[]
// 1 parameter list
ValDef( // n: Int
<param> <triedcooking>
"n"
<tpt> // tree.tpe=Int
<empty>
)
<tpt> // tree.tpe=Int
Block( // tree.tpe=Int
DefDef( // def fibRec(n: Int,a: Int,b: Int): Int
<method> <triedcooking> @{ scala.annotation.tailrec }
"fibRec"
[]
// 1 parameter list
ValDef( // n: Int
<param> <triedcooking>
"n"
<tpt> // tree.tpe=Int
<empty>
)
ValDef( // a: Int
<param> <triedcooking>
"a"
<tpt> // tree.tpe=Int
<empty>
)
ValDef( // b: Int
<param> <triedcooking>
"b"
<tpt> // tree.tpe=Int
<empty>
)
<tpt> // tree.tpe=Int
Block( // tree.tpe=Int
ValDef( // val _$this: Fib.type
<synthetic>
"_$this"
<tpt> // tree.tpe=Fib.type
This("Fib")class Fib extends Object, tree.tpe=Fib.type
)
LabelDef( // def _fibRec(x$1: Fib.type,n: Int,a: Int,b: Int): Int, tree.tpe=Int
// 4 paramss
"_$this" // val _$this: Fib.type, tree.tpe=Fib.type
"n" // n: Int, tree.tpe=Int
"a" // a: Int, tree.tpe=Int
"b" // b: Int, tree.tpe=Int
Apply( // final def asInstanceOf[T0](): T0 in class Any, tree.tpe=Int
TypeApply( // final def asInstanceOf[T0](): T0 in class Any, tree.tpe=()Int
{
case <synthetic> val x1: Int = n;
x1 match {
case 0 => a
case _ => _fibRec(Fib.this, n.-(1).asInstanceOf[Int](), b.asInstanceOf[Int](), a.+(b).asInstanceOf[Int]())
}
}."asInstanceOf" // final def asInstanceOf[T0](): T0 in class Any, tree.tpe=[T0]()T0
<tpt> // tree.tpe=Int
)
Nil
)
)
)
)
Apply( // def fibRec(n: Int,a: Int,b: Int): Int, tree.tpe=Int
"fibRec" // def fibRec(n: Int,a: Int,b: Int): Int, tree.tpe=(n: Int, a: Int, b: Int)Int
// 3 arguments
"n" // n: Int, tree.tpe=Int
0
1
)
)
)
)
)
)
$ scalac -Xprint:tailcalls -Yshow-trees Fib.scala
LabelDef( // def _fibRec(x$1: Fib.type,n: Int,a: Int,b: Int): Int, tree.tpe=Int
// 4 paramss
"_$this" // val _$this: Fib.type, tree.tpe=Fib.type
"n" // n: Int, tree.tpe=Int
"a" // a: Int, tree.tpe=Int
"b" // b: Int, tree.tpe=Int
...and the bytecode
$ javap -private -c Fib$.class
// ...
private final int fibRec$1(int, int, int);
Code:
0: iload_1
1: istore 5
3: iload 5
5: tableswitch { // 0 to 0
0: 37
default: 24
}
24: iload_1
25: iconst_1
26: isub
27: iload_3
28: iload_2
29: iload_3
30: iadd
31: istore_3
32: istore_2
33: istore_1
34: goto 0
37: iload_2
38: ireturn
In general
$ scalac -Xprint:<previous-phase> \
[-Xprint-types] \
[-Yshow-trees] \
[-Yshow-syms] \
[-Xshow-class Foo] \
Foo.scala
$ scalac -Xprint:<phase-you-care-about> \
[-Xprint-types] \
[-Yshow-trees] \
[-Yshow-syms] \
[-Xshow-class Foo] \
Foo.scala
The code
History
-
2004 - Scala 1.0, compiler written in Java
-
2006 - Scala 2.0, nsc
-
2011-2012 - .Net backend
-
2013 - Scala.js
-
(2.12) - new backend and optimizer
Size
--------------------------------------------------------------------------------
Language files blank comment code
--------------------------------------------------------------------------------
Scala 321 12304 21416 63852
Bourne Again Shell 1 26 41 150
XML 2 0 0 26
--------------------------------------------------------------------------------
SUM: 324 12330 21457 64028
--------------------------------------------------------------------------------
(Scala 2.11.7)
mi casa es su casa
class Global {
// ...
lazy val analyzer = new {
val global: Global.this.type = Global.this
} with Analyzer
}
trait Analyzer extends ... {
val global : Global
import global._
// ...
}
Walkthrough
simplified version of Global.Run.compile(files: List[String])
- create list of phases
- for each phase
- for each file
- run phase
- print debug stuff
- sanity check trees
- for each file
- print any errors
Phase
abstract class GlobalPhase(prev: Phase) extends Phase(prev) {
// ...
def apply(unit: CompilationUnit): Unit
}
Transformer
class InvertingTransformer extends Transformer {
def transform(tree: Tree): Tree = {
val newTree = super.transform(tree)
newTree match {
case Literal(Constant(true)) => Literal(Constant(false))
case Literal(Constant(false)) => Literal(Constant(true))
case _ => newTree
}
}
}
Plugins
class MyPlugin(val global: Global) extends Plugin {
val components = List[PluginComponent](Component)
private object Component extends PluginComponent {
val runsAfter = List("typer")
def newPhase(prev: Phase) = new MyPhase(prev)
class MyPhase(prev: Phase) extends StdPhase(prev) {
def apply(unit: CompilationUnit): Unit = {
// TODO: MAGIC GOES HERE
}
}
}
}
Example
Let's hack!
$ git clone git@github.com:scala/scala.git && cd scala
# ... hack hack hack ...
$ echo "locker.skip=1" > build.properties
$ echo "docs.skip=1" >> build.properties
$ ant publish-core-local -Dmaven.version.suffix="-chris"
# ... go and make a cup of tea ...
# ...
$ cd ~/tmp
$ echo 'scalaVersion := "2.11.7-chris"' > build.sbt
$ echo 'resolvers += Resolver.mavenLocal' >> build.sbt
$ sbt console
Let's hack!
My (trolling) contribution ...
Let's hack!
Why stop at adding a json AST?
Everybody loves XML literals,
so why don't we have JSON literals?
scala> val foo = json [ { "a": "b", "c": 123, "d": [ true ]}, 1.23e+5 ]
1. Add json AST to stdlib
// src/library/scala/json/AST.scala
package scala.json
object AST {
sealed trait JValue
case object JNull extends JValue
case class JString(s: String) extends JValue
sealed trait JNumber
case class JDecimal(num: BigDecimal) extends JValue with JNumber
case class JInt(num: BigInt) extends JValue with JNumber
case class JBool(value: Boolean) extends JValue
// ...
}
2. Add keyword, token
// src/reflect/scala/reflect/internal/StdNames.scala
// ...
final val VIEWBOUNDkw: TermName = kw("<%")
final val SUPERTYPEkw: TermName = kw(">:")
final val HASHkw: TermName = kw("#")
final val ATkw: TermName = kw("@")
final val JSONkw: TermName = kw("json") // (* ̄0 ̄)/
// src/compiler/scala/tools/nsc/ast/parser/Tokens.scala
// ...
final val VIEWBOUND = 136
final val NEWLINE = 137
final val NEWLINES = 138
final val XMLSTART = 139
final val JSONSTART = 140 // ヾ(@⌒ー⌒@)ノ
3. Add JSON parser
class JsonParser(parser: SourceFileParser) {
def jLiteral: Tree = {
val jvalue: JValue = parse()
TreeBuilder.toTree(jvalue)
}
// ...
private def parse(): JValue = {
parser.in.nextToken()
parser.in.token match {
case LBRACE =>
stack.push(JObject(Nil))
parse()
case RBRACE =>
// ...
}
}
4. Wire it together
// src/compiler/scala/tools/nsc/ast/parser/Parsers.scala
private[this] val jsonp = new JsonParser(this)
def jsonLiteral() : Tree = jsonp.jLiteral
def expr0(location: Location): Tree = in.token match {
case IF => // ...
case TRY => // ...
case WHILE => // ...
// ...
case JSONSTART => jsonLiteral()
// ...
}
DEMO
Further reading
-
Symbols, trees and types
-
Some very old videos by Martin Odersky
- Writing Scala compiler plugins
- Scalable Component Abstractions
- New backend and optimizer
-
Dotty: exploring the future of Scala
Final remarks
Questions?
A deep dive into scalac - Amsterdam 2015
By Chris Birchall
A deep dive into scalac - Amsterdam 2015
- 3,686