The power of Akka
Krzysztof Borowski
Akka Paint -requirements
- Simple
- Real time changes
- Multiuser
- Scalable
Simple
case class Pixel(x: Int, y: Int)
case class DrawEvent(
changes: Seq[Pixel],
color: String
)
//Board state
type Color = String
var drawballBoard = Map.empty[Pixel, Color]
Changes as events
Simple
class DrawballActorSimple() extends PersistentActor {
var drawballBoard = Map.empty[Pixel, String]
override def persistenceId: String = "drawballActor"
override def receiveRecover: Receive = {
case d: DrawEvent => updateState(d)
}
override def receiveCommand: Receive = {
case Draw(changes, color) =>
persistAsync(DrawEvent(changes, color)) { de =>
updateState(de)
}
}
private def updateState(drawEvent: DrawEvent) = {
drawballBoard = drawEvent.changes.foldLeft(drawballBoard) {
case (newBoard, pixel) =>
newBoard.updated(pixel, drawEvent.color)
}
}
}
Board as a Persistent Actor
Multiuser
Multiuser
class DrawballActor() extends PersistentActor {
var drawballBoard = Map.empty[Pixel, String]
var registeredClients = Set.empty[ActorRef]
override def persistenceId: String = "drawballActor"
override def receiveRecover: Receive = {
case d: DrawEvent => updateState(d)
}
override def receiveCommand: Receive = {
case Draw(changes, color) =>
persistAsync(DrawEvent(changes, color)) { de =>
updateState(de)
(registeredClients - sender())
.foreach(_ ! Changes(de.changes, de.color))
}
case r: RegisterClient => {
registeredClients = registeredClients + r.client
convertBoardToUpdates(drawballBoard, Changes.apply)
.foreach(r.client ! _)
}
case ur: UnregisterClient => {
registeredClients = registeredClients - ur.client
}
}
private def updateState(drawEvent: DrawEvent) = {
...
}
}
Multiuser and real time
class ClientConnectionSimple(
browser: ActorRef,
drawBoardActor: ActorRef
) extends Actor {
drawBoardActor ! RegisterClient(self)
var recentChanges = Map.empty[Pixel, String]
override def receive: Receive = {
case d: Draw =>
drawBoardActor ! d
case c @ Changes =>
browser ! c
}
override def postStop(): Unit = {
drawBoardActor ! UnregisterClient(self)
}
}
//Play! controller
def socket = WebSocket.accept[Draw, Changes](requestHeader => {
ActorFlow.actorRef[Draw, Changes](browser =>
ClientConnection.props(
browser,
drawballActor
))
})
Scalable
Akka Sharding to the rescue !
Akka Sharding to the rescue !
Akka Sharding to the rescue !
Scalable
Scaling - architecture
Sharding
def shardingPixels(changes: Iterable[Pixel], color: String): Iterable[DrawEntity] = {
changes.groupBy { pixel =>
(pixel.y / entitySize, pixel.x / entitySize)
}.map {
case ((shardId, entityId), pixels) =>
DrawEntity(shardId, entityId, pixels.toSeq, color)
}
}
private val extractEntityId: ShardRegion.ExtractEntityId = {
case DrawEntity(_, entityId, pixels, color) ⇒
(entityId.toString, Draw(pixels, color))
case ShardingRegister(_, entityId, client) ⇒
(entityId.toString, RegisterClient(Serialization.serializedActorPath(client)))
case ShardingUnregister(_, entityId, client) ⇒
(entityId.toString, UnregisterClient(Serialization.serializedActorPath(client)))
}
private val extractShardId: ShardRegion.ExtractShardId = {
case DrawEntity(shardId, _, _, _) ⇒
shardId.toString
case ShardingRegister(shardId, _, _) ⇒
shardId.toString
case ShardingUnregister(shardId, _, _) ⇒
shardId.toString
}
Sharding Cluster
def initializeCluster(): ActorSystem = {
// Create an Akka system
val system = ActorSystem("DrawballSystem")
ClusterSharding(system).start(
typeName = entityName,
entityProps = Props[DrawballActor],
settings = ClusterShardingSettings(system),
extractEntityId = extractEntityId,
extractShardId = extractShardId
)
system
}
def shardRegion()(implicit actorSystem: ActorSystem) = {
ClusterSharding(actorSystem).shardRegion(entityName)
}
Akka - snapshoting
override def receiveRecover: Receive = {
...
case SnapshotOffer(_, snapshot: DrawSnapshot) => {
snapshot.changes.foreach(updateState)
snapshot.clients.foreach(c => registerClient(RegisterClient(c)))
}
case RecoveryCompleted => {
registeredClients.foreach(c => c ! ReRegisterClient())
registeredClients = Set.empty
}
}
override def receiveCommand: Receive = {
case Draw(changes, color) =>
persistAsync(DrawEvent(changes, color)) { de =>
updateState(de)
changesNumber += 1
if (changesNumber > 1000) {
changesNumber = 0
self ! "snap"
}
(registeredClients - sender())
.foreach(_ ! Changes(de.changes, de.color))
}
case "snap" => saveSnapshot(DrawSnapshot(
convertBoardToUpdates(drawballBoard, DrawEvent.apply).toSeq,
registeredClients.map(Serialization.serializedActorPath).toSeq
))
...
}
DEMO
Tips and tricks
- Use `ClusterSharding.startProxy` to not hold any entities on the node.
- Distributed coordinator - akka.extensions += "akka.cluster.ddata.DistributedData" (Experimental).
- Use Protocol Buffer.
- Be careful about message buffering.
Summary:
- lines of Code: 275!,
- multiuser,
- scalable,
- fault tolerant.
Bibliography
- http://www.slideshare.net/bantonsson/akka-persistencescaladays2014
- http://doc.akka.io/docs/akka/current/scala/cluster-sharding.html
- https://github.com/trueaccord/ScalaPB
Scaflow - scientific workflows in Akka
Workflow
- Directed Graph
- Many inputs/Many outputs
- Nodes - activities
- Edges - dependencies (control flow)
- Each node activity == fun(Data): Result
Scientific workflow
- Data elements can be big
- Activities can be long-running and resource intensive
- Often invoke legacy code (e.g. Fortran, C) or external services
Scientific workflow - requirements
- Parallelization and distribution of computations
- Persistence and recovery
- Fault tolerance
Actor model and workflow - similarities
Akka-streams to the rescue!
- Build with actor model
- Support for complicated flows (beautiful graph oriented API)
- Concurrent data processing
But...
Scientific workflows | Akka streams |
---|---|
bounded input data set | unbounded data stream |
big data elements | small data elements |
focused on scaling | focused on back-pressure (reactive streams implementation) |
important recovery mechanism | important high message throughput |
Components
Source
Data processing
Data filtering
Data grouping
Components
Broadcast data
Merge data
Components
Synchronize data
Data sink
Scaflow API
StandardWorkflow.source(List(1, 2, 3, 4, 5, 6))
.map(a => a * a)
.group(3)
.map(_.sum)
.sink(println).run
Concurrent computations in actor model
Push model
Pull model
Scalability in actor model
Workflow persistent state
- Event sourcing
- Persistent actors
override def receiveRecover: Receive = receiveRecoverWithAck {
case n: NextVal[A] =>
if (filter(n.data)) deliver(destination, n)
}
override def receiveCommand: Receive = receiveCommandWithAck {
case n: NextVal[A] =>
persistAsync(n) { e =>
if (filter(e.data)) deliver(destination, n)
}
}
/* ... */
PersistentWorkflow.source("source", List(1, 2, 3, 4, 5, 6))
.map("square", a => a * a)
.group("group", 3)
.map("sum", _.sum)
.sink(println).run
Fault tolerance
val HTTPSupervisorStrategy = OneForOneStrategy(10, 5.seconds) {
case e: TimeoutException => Restart //retry
case _ => Stop //drop the message
}
PersistentWorkflow.connector[String]("pngConnector")
.map("getPng", getPathwayMapPng, Some(HTTPSupervisorStrategy))
.sink("sinkPng", id => println(s"PNG map downloaded for $id"))
Real world example
Real world example cont.
val HTTPSupervisorStrategy = OneForOneStrategy(10, 10.seconds) {
case e: TimeoutException => Restart // try to perform operation again
case _ => Stop // drop the message
}
val savePathwayPngFlow = PersistentWorkflow.connector[String]("pngConnector")
.map("getPng", getPathwayMapPng, Some(HTTPSupervisorStrategy), workersNumber = 8)
.sink("sinkPng", id => println(s"PNG map downloaded for $id"))
val savePathwayTextFlow = PersistentWorkflow.connector[String]("textConnector")
.map("getTxt", getPathwayDetails, Some(HTTPSupervisorStrategy), workersNumber = 8)
.sink("sinkTxt", id => println(s"TXT details downloaded for pathway $id"))
PersistentWorkflow
.source("source", List("hsa"))
.map("getSetOfPathways", getSetOfPathways, Some(HTTPSupervisorStrategy))
.split[String]("split")
.broadcast("broadcast", savePathwayPngFlow, savePathwayTextFlow)
.run
Real world example - scaling
val remoteWorkersHostLocations =
Seq(AddressFromURIString("akka.tcp://workersActorSystem@localhost:5150"),
AddressFromURIString("akka.tcp://workersActorSystem@localhost:5151"))
val HTTPSupervisorStrategy = OneForOneStrategy(10, 10.seconds) {
case e: TimeoutException => Restart // try to perform operation again
case _ => Stop // drop the message
}
val savePathwayPngFlow = PersistentWorkflow.connector[String]("pngConnector")
.map("getPng", getPathwayMapPng, Some(HTTPSupervisorStrategy),
workersNumber = 8, remoteAddresses = remoteWorkersHostLocations)
.sink("sinkPng", id => println(s"PNG map downloaded for $id"))
val savePathwayTextFlow = PersistentWorkflow.connector[String]("textConnector")
.map("getTxt", getPathwayDetails, Some(HTTPSupervisorStrategy), workersNumber = 8)
.sink("sinkTxt", id => println(s"TXT details downloaded for pathway $id"))
PersistentWorkflow
.source("source", List("hsa"))
.map("getSetOfPathways", getSetOfPathways, Some(HTTPSupervisorStrategy))
.split[String]("split")
.broadcast("broadcast", savePathwayPngFlow, savePathwayTextFlow)
.run
Future development
- Graph API
- Extensive usage of Akka clusters
- Workflow monitoring
https://github.com/liosedhel/scaflow
ScalaCamp-ThePowerOfAkka
By liosedhel
ScalaCamp-ThePowerOfAkka
- 1,656