The power of Akka

Krzysztof Borowski

Akka Paint -requirements

  • Simple
  • Real time changes
  • Multiuser
  • Scalable

Simple

case class Pixel(x: Int, y: Int)

case class DrawEvent(
    changes: Seq[Pixel],
    color:   String
)

//Board state
type Color = String
var drawballBoard = Map.empty[Pixel, Color]

Changes as events

Simple

class DrawballActorSimple() extends PersistentActor {

  var drawballBoard = Map.empty[Pixel, String]

  override def persistenceId: String = "drawballActor"

  override def receiveRecover: Receive = {
    case d: DrawEvent => updateState(d)
  }

  override def receiveCommand: Receive = {
    case Draw(changes, color) =>
      persistAsync(DrawEvent(changes, color)) { de =>
        updateState(de)
      }
  }

  private def updateState(drawEvent: DrawEvent) = {
    drawballBoard = drawEvent.changes.foldLeft(drawballBoard) {
      case (newBoard, pixel) =>
        newBoard.updated(pixel, drawEvent.color)
    }
  }

}

Board as a Persistent Actor

Multiuser

Multiuser

class DrawballActor() extends PersistentActor {

  var drawballBoard = Map.empty[Pixel, String]
  var registeredClients = Set.empty[ActorRef]

  override def persistenceId: String = "drawballActor"

  override def receiveRecover: Receive = {
    case d: DrawEvent => updateState(d)
  }

  override def receiveCommand: Receive = {
    case Draw(changes, color) =>
      persistAsync(DrawEvent(changes, color)) { de =>
        updateState(de)
        (registeredClients - sender())
          .foreach(_ ! Changes(de.changes, de.color))
      }
    case r: RegisterClient => {
      registeredClients = registeredClients + r.client
      convertBoardToUpdates(drawballBoard, Changes.apply)
        .foreach(r.client ! _)
    }
    case ur: UnregisterClient => {
      registeredClients = registeredClients - ur.client
    }
  }

  private def updateState(drawEvent: DrawEvent) = {
    ...
  } 
}

Multiuser and real time


class ClientConnectionSimple(
  browser: ActorRef,
  drawBoardActor: ActorRef
) extends Actor {

  drawBoardActor ! RegisterClient(self)
  var recentChanges = Map.empty[Pixel, String]

  override def receive: Receive = {
    case d: Draw =>
      drawBoardActor ! d
    case c @ Changes =>
      browser ! c
  }

  override def postStop(): Unit = {
    drawBoardActor ! UnregisterClient(self)
  }
}
//Play! controller
def socket = WebSocket.accept[Draw, Changes](requestHeader => {
    ActorFlow.actorRef[Draw, Changes](browser =>
      ClientConnection.props(
        browser,
        drawballActor
      ))
})

Scalable

Akka Sharding to the rescue !

 

Akka Sharding to the rescue !

 

Akka Sharding to the rescue !

 

Scalable

Scaling - architecture

Sharding

def shardingPixels(changes: Iterable[Pixel], color: String): Iterable[DrawEntity] = {
      changes.groupBy { pixel =>
        (pixel.y / entitySize, pixel.x / entitySize)
      }.map {
        case ((shardId, entityId), pixels) =>
          DrawEntity(shardId, entityId, pixels.toSeq, color)
      }
}

private val extractEntityId: ShardRegion.ExtractEntityId = {
    case DrawEntity(_, entityId, pixels, color) ⇒
      (entityId.toString, Draw(pixels, color))
    case ShardingRegister(_, entityId, client) ⇒
      (entityId.toString, RegisterClient(Serialization.serializedActorPath(client)))
    case ShardingUnregister(_, entityId, client) ⇒
      (entityId.toString, UnregisterClient(Serialization.serializedActorPath(client)))
}

private val extractShardId: ShardRegion.ExtractShardId = {
    case DrawEntity(shardId, _, _, _) ⇒
      shardId.toString
    case ShardingRegister(shardId, _, _) ⇒
      shardId.toString
    case ShardingUnregister(shardId, _, _) ⇒
      shardId.toString
}

Sharding Cluster

 

def initializeCluster(): ActorSystem = {
    // Create an Akka system
    val system = ActorSystem("DrawballSystem")

    ClusterSharding(system).start(
      typeName = entityName,
      entityProps = Props[DrawballActor],
      settings = ClusterShardingSettings(system),
      extractEntityId = extractEntityId,
      extractShardId = extractShardId
    )
    system
}

def shardRegion()(implicit actorSystem: ActorSystem) = {
    ClusterSharding(actorSystem).shardRegion(entityName)
}

Akka - snapshoting

 

 override def receiveRecover: Receive = {
    ...
    case SnapshotOffer(_, snapshot: DrawSnapshot) => {
      snapshot.changes.foreach(updateState)
      snapshot.clients.foreach(c => registerClient(RegisterClient(c)))
    }
    case RecoveryCompleted => {
      registeredClients.foreach(c => c ! ReRegisterClient())
      registeredClients = Set.empty
    }
}
override def receiveCommand: Receive = {
    case Draw(changes, color) =>
      persistAsync(DrawEvent(changes, color)) { de =>
        updateState(de)
        changesNumber += 1
        if (changesNumber > 1000) {
          changesNumber = 0
          self ! "snap"
        }
        (registeredClients - sender())
          .foreach(_ ! Changes(de.changes, de.color))
      }
    case "snap" => saveSnapshot(DrawSnapshot(
      convertBoardToUpdates(drawballBoard, DrawEvent.apply).toSeq,
      registeredClients.map(Serialization.serializedActorPath).toSeq
    ))
    ...
}

DEMO

Tips and tricks

  • Use `ClusterSharding.startProxy` to not hold any entities on the node.
  • Distributed coordinator - akka.extensions += "akka.cluster.ddata.DistributedData" (Experimental).
  • Use Protocol Buffer.
  • Be careful about message buffering.  

Summary:

  • lines of Code: 275!,
  • multiuser,
  • scalable,
  • fault tolerant.

Bibliography

  • http://www.slideshare.net/bantonsson/akka-persistencescaladays2014
  • http://doc.akka.io/docs/akka/current/scala/cluster-sharding.html
  • https://github.com/trueaccord/ScalaPB

Scaflow - scientific workflows in Akka

Workflow

  • Directed Graph
  • Many inputs/Many outputs
  • Nodes - activities
  • Edges - dependencies (control flow)
  • Each node activity == fun(Data): Result

Scientific workflow

  • Data elements can be big
  • Activities can be long-running and resource intensive
  • Often invoke legacy code (e.g. Fortran, C) or external services

Scientific workflow - requirements

  • Parallelization and distribution of computations
  • Persistence and recovery
  • Fault tolerance

Actor model and workflow - similarities

Akka-streams to the rescue!

  • Build with actor model
  • Support for complicated flows (beautiful graph oriented API)
  • Concurrent data processing

But...

Scientific workflows Akka streams
bounded input data set unbounded data stream
big data elements small data elements
focused on scaling focused on back-pressure (reactive streams implementation)
important recovery mechanism important high message throughput 

Components

Source

Data processing

Data filtering

Data grouping

Components

Broadcast data

Merge data

Components

Synchronize data

Data sink

Scaflow API

StandardWorkflow.source(List(1, 2, 3, 4, 5, 6))
      .map(a => a * a)
      .group(3)
      .map(_.sum)
      .sink(println).run

Concurrent computations in actor model

Push model

Pull model

Scalability in actor model

Workflow persistent state

  • Event sourcing 
  • Persistent actors
  override def receiveRecover: Receive = receiveRecoverWithAck {
    case n: NextVal[A] =>
      if (filter(n.data)) deliver(destination, n)
  }

  override def receiveCommand: Receive = receiveCommandWithAck {
    case n: NextVal[A] =>
      persistAsync(n) { e =>
        if (filter(e.data)) deliver(destination, n)
      }
  }

/* ... */
PersistentWorkflow.source("source", List(1, 2, 3, 4, 5, 6))
      .map("square", a => a * a)
      .group("group", 3)
      .map("sum", _.sum)
      .sink(println).run

Fault tolerance

 val HTTPSupervisorStrategy = OneForOneStrategy(10, 5.seconds) {
    case e: TimeoutException => Restart //retry
    case _ => Stop //drop the message
  }

 PersistentWorkflow.connector[String]("pngConnector")
      .map("getPng", getPathwayMapPng, Some(HTTPSupervisorStrategy))
      .sink("sinkPng", id => println(s"PNG map downloaded for $id"))

Real world example

Real world example cont.

val HTTPSupervisorStrategy = OneForOneStrategy(10, 10.seconds) {
      case e: TimeoutException => Restart // try to perform operation again
      case _ => Stop // drop the message
}

val savePathwayPngFlow = PersistentWorkflow.connector[String]("pngConnector")
      .map("getPng", getPathwayMapPng, Some(HTTPSupervisorStrategy), workersNumber = 8)
      .sink("sinkPng", id => println(s"PNG map downloaded for $id"))

val savePathwayTextFlow = PersistentWorkflow.connector[String]("textConnector")
      .map("getTxt", getPathwayDetails, Some(HTTPSupervisorStrategy), workersNumber = 8)
      .sink("sinkTxt", id => println(s"TXT details downloaded for pathway $id"))

PersistentWorkflow
  .source("source", List("hsa"))
  .map("getSetOfPathways", getSetOfPathways, Some(HTTPSupervisorStrategy))
  .split[String]("split")
  .broadcast("broadcast", savePathwayPngFlow, savePathwayTextFlow)
  .run

Real world example - scaling

val remoteWorkersHostLocations = 
    Seq(AddressFromURIString("akka.tcp://workersActorSystem@localhost:5150"),
        AddressFromURIString("akka.tcp://workersActorSystem@localhost:5151"))

val HTTPSupervisorStrategy = OneForOneStrategy(10, 10.seconds) {
      case e: TimeoutException => Restart // try to perform operation again
      case _ => Stop // drop the message
}

val savePathwayPngFlow = PersistentWorkflow.connector[String]("pngConnector")
      .map("getPng", getPathwayMapPng, Some(HTTPSupervisorStrategy), 
            workersNumber = 8, remoteAddresses = remoteWorkersHostLocations)
      .sink("sinkPng", id => println(s"PNG map downloaded for $id"))

val savePathwayTextFlow = PersistentWorkflow.connector[String]("textConnector")
      .map("getTxt", getPathwayDetails, Some(HTTPSupervisorStrategy), workersNumber = 8)
      .sink("sinkTxt", id => println(s"TXT details downloaded for pathway $id"))

PersistentWorkflow
  .source("source", List("hsa"))
  .map("getSetOfPathways", getSetOfPathways, Some(HTTPSupervisorStrategy))
  .split[String]("split")
  .broadcast("broadcast", savePathwayPngFlow, savePathwayTextFlow)
  .run

Future development

  • Graph API
  • Extensive usage of Akka clusters
  • Workflow monitoring

https://github.com/liosedhel/scaflow

Made with Slides.com