Airton Libório
Data Analytics Lead @ McKinsey Digital Labs
Scala
DEMO!
Scala is a modern multi-paradigm programming language designed to express common programming patterns in a concise, elegant, and type-safe way
Prefer vals, immutable objects, and methods without side effects. Reach for them first
// Bad
var x: ExpressionType = null
if (myBoolean) x = expr1 else x = expr2
// Good
val x = if (myBoolean) expr1 else expr2
val firstDate: Date = ...
val c = Calendar.getInstance()
c.setTime(firstDate)
c.add(Calendar.DATE, -1)
val dayAgo = c.getTime()
val dateTime = new DateTime(date)
val dayBefore = dateTime.minusDays(1)
Joda-time
java.util.date
// Not everything should be solved with immutability though...
class Item{ ... }
class Player(var health: Int = 100,
val items: mutable.Buffer[Item] = mutable.Buffer.empty)
val player = new Player()
// Mutability for performance is OK
def getFibs(n: Int): Seq[Int] = {
val fibs = mutable.ArrayBuffer(1, 1)
while(fibs.length < n){
fibs.append(fibs(fibs.length-1) + fibs(fibs.length-2))
}
fibs
}
// But try to avoid this
def getFibs(n: Int, fibs: mutable.ArrayBuffer[Int]): Unit = {
fibs.clear()
fibs.append(1)
fibs.append(1)
while(fibs.length < n){
fibs.append(fibs(fibs.length-1) + fibs(fibs.length-2))
}
fibs
}
// Mutate either the variable or the value
val myList = new mutable.ArrayBuffer[Int]()
var myList = immutable.List[Int](1, 2, 3)
// Bad!
var myList = mutable.ArrayBuffer[Int]
val sum = 34 + 4 * 2
val list = List(1, 2, 3)
val map = Map("hey" -> list)
def succ(x: Int) = x + 1
val sum: Int = 34 + 4 * 2
val list: List[Int] = List(1, 2, 3)
val map: Map[String, List[Int]] = Map("hey" -> list)
def succ(x: Int): Int = x + 1
def someComplexFunction(p: Parameter) = {
def theFirstStep = {
// do something, using parameter
}
def anotherStep = {
// do something else
}
theFirstStep + theSecondStep
}
def someComplexFunction(p: Parameter) =
theFirstStep(p) + anotherStep(p)
private def theFirstStep(p: Parameter) = {
...
}
private def anotherStep(p: Parameter) = {
...
}
public class MyUser {
String myStr;
Integer myInt;
public MyUser(String myStr, Integer myInt) {
this.myStr = myStr;
this.myInt = myInt;
}
public String getMyStr() {
return this.myStr;
}
public Integer getMyInt() {
return this.myInt;
}
}
case class MyUser(myStr: String, myInt: Int)
Case classes are regular classes which export their constructor parameters and which provide a recursive decomposition mechanism via pattern matching
case class Point(x: Int, y: Int, z: Int = 0)
(...) tests whether a given value (or sequence of values) has the shape defined by a pattern, and, if it does, binds the variables in the pattern to the corresponding components of the value (or sequence of values)
// Mixed
case class Player(name: String, score: Int)
def message(player: Player) = player match {
case Player(_, score) if score > 100000 => "Get a job, dude!"
case Player(name, _) => "Hey " + name + ", nice to see you again!"
}
// With collections
val list = List(0, 4, 5)
list match {
case List(0, _, _) => println("found it")
case _ =>
}
// Value matching
val sign = ch match {
case '+' => 1
case '-' => -1
case _ => 0
}
// Type matching
obj match {
case x: Int => x
case s: String => Integer.parseInt(s)
case _: BigInt => Int.MaxValue
case _ => 0
}
def isIntIntMap(x: Any) = x match {
case m: Map[Int, Int] => true
case _ => false
}
sealed abstract class Shape
case class Circle(radius: Double) extends Shape
case class Rectangle(width: Double, height: Double) extends Shape
case class Triangle(base: Double, height: Double) extends Shape
def area(shape: Shape): Double = {
shape match {
case Circle(radius) => math.Pi * math.pow(radius, 2.0)
case Rectangle(1, height) => height
case Rectangle(width, 1) => width
case Rectangle(width, height) => width * height
case Triangle(0, _) | Triangle(_, 0) => 0
case Triangle(base, height) => height * base / 2
}
}
case class User(id: Int, name: String, age: Int, gender: Option[String])
object UserRepository {
private val users = Map(1 -> User(1, "John Doe", 32, Some("male")),
2 -> User(2, "Johanna Doe", 30, None))
users(2).gender match {
case Some(gender) => println("Gender: " + gender)
case None => println("Gender: not specified")
}
def findById(id: Int): Option[User] = users.get(id)
def findAll = users.values
}
UserRepository.findById(2).foreach(user => println(user.age)) // prints 30
for {
User(_, _, _, _, Some(gender)) <- UserRepository.findAll
} yield gender
class X { val x = { Thread.sleep(2000); 15 } }
class Y { lazy val y = { Thread.sleep(2000); 13 } }
new X // we have to wait two seconds to the result
new Y // Returns instantly
// Expression only evaluated if needed
def logMsg(lazy val str: String) { ... }
def expensive: String = { ... }
logMsg(s"Some $expensive message!")
Some languages (like Haskell) are lazy: every expression’s evaluation waits for its (first) use
Scala is strict by default, but lazy if explicitly specified for given variables or parameters
Laziness is made of lambdas – anonymous functions closed over their lexical scope
Concurrency != Parallelism
Avoid concurrency like the plague it is!!!
Parallelism is about speeding up a program by using multiple processors
use Parallelism if you can, Concurrency otherwise
(Haskell wiki)
f: ( X x Y x Z ) -> N
currying produces...
curry(f): X -> (Y -> (Z -> N ))
(1 to 10) map { _ * 2 }
(1 to 1000).reduceLeft( _ + _ )
(1 to 1000).sum
val fileText = Source.fromFile("file.txt").mkString
val fileLines = Source.fromFile("file.txt").getLines.toList
List(14, 35, -7, 46, 98).reduceLeft ( _ min _ )
List(14, 35, -7, 46, 98).min
// Verify if words exists in a String
val wordList = List("scala", "akka", "play framework", "sbt", "typesafe")
val tweet = "This is an example tweet talking about scala and sbt."
(wordList.foldLeft(false)( _ || tweet.contains(_) ))
wordList.exists(tweet.contains)
val pangram = "The quick brown fox jumps over the lazy dog"
(pangram split " ") filter (_ contains 'o')
val m = pangram filter (_.isLetter) groupBy (_.toLower) mapValues (_.size)
m.toSeq sortBy (_._2)
m.toSeq sortWith (_._2 > _._2)
m.filter(_._2 > 1).toSeq sortWith (_._2 > _._2) mkString "\n"
Source.fromURL("https://github.com/humans.txt").take(335).mkString
Programs must be written for people to read, and only incidentally for machines to execute
-- Harold Abelson
Should array indices start at 0 or 1? My compromise of 0.5 was rejected without, I thought, proper consideration
-- Stan Kelly-Bootle
A programming language is low level when its programs require attention to the irrelevant
-- Alan J. Perlis
Always code as if the guy who ends up maintaining your code will be a violent psychopath who knows where you live
-- Martin Golding
class MyActor extends Actor {
def receive = {
msg match {
case HttpRequest(request) => {
val query = buildQuery(request)
dbCall(query)
}
case DbResponse(dbResponse) => {
var wsRequest = buildWebServiceRequest(dbResponse)
wsCall(dbResponse)
}
case WsResponse(wsResponse) => sendReply(wsResponse)
}
}
}
// Definition of the COFFEES table
class Coffees(tag: Tag) extends Table[(String, Int, Double, Int, Int)](tag, "COFFEES") {
def name = column[String]("COF_NAME", O.PrimaryKey)
def supID = column[Int]("SUP_ID")
def price = column[Double]("PRICE")
def sales = column[Int]("SALES")
def total = column[Int]("TOTAL")
def * = (name, supID, price, sales, total)
// A reified foreign key relation that can be navigated to create a join
def supplier = foreignKey("SUP_FK", supID, suppliers)(_.id)
}
val coffees = TableQuery[Coffees]
coffees ++= Seq(
("Colombian", 101, 7.99, 0, 0),
("French_Roast_Decaf", 49, 9.99, 0, 0)
)
def fetchAll(): = { for(c <- coffees) yield c }
def fetch(name: String): = { coffees.filter(_.name === name).result }
def insert(c: Coffee) = { projects += p }
def insert(coffees: Seq[Coffee]) = { (projects ++= projectSeq).transactionally }
def delete(supID: Int) = { coffees.filter(_.supID === supID).delete }
for {
c <- coffees if c.price < 9.0
s <- suppliers if s.id === c.supID
} yield (c.name, s.name)
class HomeController @Inject() (computerService: ComputerService,
companyService: CompanyService,
val messagesApi: MessagesApi)
extends Controller with I18nSupport {
val computerForm = Form(
mapping(
"id" -> ignored(None:Option[Long]),
"name" -> nonEmptyText,
"introduced" -> optional(date("yyyy-MM-dd")),
"discontinued" -> optional(date("yyyy-MM-dd")),
"company" -> optional(longNumber)
)(Computer.apply)(Computer.unapply)
)
def edit(id: Long) = Action {
computerService.findById(id).map { computer =>
Ok(html.editForm(id, computerForm.fill(computer), companyService.options))
}.getOrElse(NotFound)
}
def list(page: Int, orderBy: Int, filter: String) = Action { implicit request =>
Ok(html.list(
computerService.list(page = page, orderBy = orderBy, filter = ("%"+filter+"%")),
orderBy, filter
))
}
}
val listRoute = pathPrefix(PREFIX) {
(path("list") & get) {
respondWithMediaType(MediaTypes.`application/json`) {
onComplete(projectDS.fetchAll()) {
case Success(f) => complete(f)
case Failure(ex) => complete(InternalServerError, ex.getMessage)
}
}
}
}
val deleteRoute = pathPrefix(PREFIX) {
(path("delete" / IntNumber) & delete) { pid =>
respondWithMediaType(MediaTypes.`application/json`) {
complete(projectDS.delete(pid))
}
}
}
val routes = listRoute ~ addRoute ~ deleteRoute
A couple of applications
a term for data sets that are so large or complex that traditional data processing applications are inadequate
What we want from a Big data pipeline system
What we want from a Big data queue / message system
What we want from a stream processing system
analysis of data in motion
The Ultimate Scala Collections
Thanks!
github.com/airtonjal
airtonjal@gmail.com
airton_liborio@mckinsey.com