Piotr Gawryś
Monix Observableを理解する
https://github.com/Avasil
twitter.com/p_gawrys
Monix メンテナの一人
非同期プログラミングのためのライブラリ Task、IO などの並行プリミティブを提供
RxJava に影響を受けている プッシュ・ベースかつバックプレッシャー付き
twitter.com/p_gawrys
val result: Task[Long] =
Observable.fromIterable(allElements)
.bufferTumbling(bufferSize)
.mapEval(seq => Task(seq.sum))
.filter(_ > 0)
.map(_.toLong)
.foldLeftL(0L)(_ + _)
今日は内部構造にも踏み込んでいきます
trait Observer[-A] {
def onNext(elem: A): Future[Ack]
def onError(ex: Throwable): Unit
def onComplete(): Unit
}
// Needs some kind of ExecutionContext to do
// anything with onNext (which returns Future)
trait Subscriber[-A] extends Observer[A] {
implicit def scheduler: Scheduler
}
abstract class Observable[+A] {
def unsafeSubscribeFn(subscriber: Subscriber[A]): Cancelable
}
まずは定義から
Observable
Observer
Observer
subscribe
subscribe
Observable
Observer
Observer
onNext
onNext
trait Observer[-A] {
def onNext(elem: A): Future[Ack]
}
sealed abstract class Ack extends Future[Ack]
case object Continue extends Ack
case object Stop extends Ack
onComplete/onError が呼ばれるまでは onNext は 0~複数回呼ばれることがある
trait Observer[-A] {
def onError(ex: Throwable): Unit
def onComplete(): Unit
}
onComplete もしくは onError は一度だけ呼ばれ 両方を呼ぶことは禁止
trait Observer[-A] {
def onNext(elem: A): Future[Ack]
def onError(ex: Throwable): Unit
def onComplete(): Unit
}
onNext、onComplete、onError は逐次的に呼ぶこと 例外を投げることは禁止
abstract class Observable[+A] {
def unsafeSubscribeFn(subscriber: Subscriber[A]): Cancelable
}
Subscriber は Observable を購読する Cancelable によってキャンセル可能
twitter.com/p_gawrys
final class NowObservable[+A](elem: A) extends Observable[A] {
def unsafeSubscribeFn(subscriber: Subscriber[A]): Cancelable = {
// No need to back-pressure for onComplete
subscriber.onNext(elem)
subscriber.onComplete()
// There's no specific action needed in case the connection is canceled
Cancelable.empty
}
}
twitter.com/p_gawrys
final class PrintSubscriber[-A] extends Subscriber[A] {
override def scheduler: Scheduler = Scheduler.global
override def onNext(elem: A): Future[Ack] = {
println(s"Received $elem")
Continue
}
override def onError(ex: Throwable): Unit = {
println(s"Received error $ex")
}
override def onComplete(): Unit = {
println(s"Received final event")
}
}
twitter.com/p_gawrys
val source: Observable[Int] = new NowObservable(10)
val cancelable: Cancelable =
source.unsafeSubscribeFn(new PrintSubscriber)
// => Received 10
// => Received final event
twitter.com/p_gawrys
new Observable[Int] {
def unsafeSubscribeFn(subscriber: Subscriber[Int]): Cancelable = {
subscriber.onNext(elem)
subscriber.onComplete()
Cancelable.empty
}
}.unsafeSubscribeFn(new Subscriber[Int] {
override def scheduler: Scheduler = Scheduler.global
override def onNext(elem: A): Future[Ack] = {
println(s"Received $elem")
Continue
}
override def onError(ex: Throwable): Unit = {
println(s"Received error $ex")
}
override def onComplete(): Unit = {
println(s"Received final event")
}
})
// => Received 10
// => Received final event
import monix.eval.Task
import monix.reactive.Observable
import scala.concurrent.duration._
import scala.util.Random
val result: Task[List[Int]] =
Observable.repeatEval(Random.nextInt(10))
.takeByTimespan(10.second)
.toListL
もう少し複雑な例
twitter.com/p_gawrys
object Observable {
def repeatEval[A](task: => A): Observable[A] =
new RepeatEvalObservable(task)
}
final class RepeatEvalObservable[+A](eval: => A) extends Observable[A] {
def unsafeSubscribeFn(subscriber: Subscriber[A]): Cancelable = {
val s = subscriber.scheduler
val cancelable = BooleanCancelable()
fastLoop(subscriber, cancelable, s.executionModel, 0)(s)
cancelable
}
@tailrec
def fastLoop(
o: Subscriber[A],
// We might check it periodically to
// see if the subscription is not cancelled
c: BooleanCancelable,
// Scheduler has ExecutionModel, e.g. Synchronous, Batched, AlwaysAsync
// We could add async boundaries according to it
em: ExecutionModel,
// BatchedExecution model inserts async boundary
// after N synchronous operations
syncIndex: Int
)(implicit s: Scheduler): Unit = ???
}
twitter.com/p_gawrys
@tailrec
def fastLoop(
o: Subscriber[A],
c: BooleanCancelable,
em: ExecutionModel,
syncIndex: Int
)(implicit s: Scheduler): Unit = {
val ack =
try o.onNext(eval)
catch {
case ex if NonFatal(ex) =>
Future.failed(ex)
}
val nextIndex =
if (ack == Continue) em.nextFrameIndex(syncIndex)
else if (ack == Stop) -1
else 0
if (nextIndex > 0)
fastLoop(o, c, em, nextIndex)
else if (nextIndex == 0 && !c.isCanceled)
reschedule(ack, o, c, em)
}
def reschedule(
ack: Future[Ack],
o: Subscriber[A],
c: BooleanCancelable,
em: ExecutionModel
)(implicit s: Scheduler): Unit = ???
def reschedule(
ack: Future[Ack],
o: Subscriber[A],
c: BooleanCancelable,
em: ExecutionModel
)(implicit s: Scheduler): Unit =
ack.onComplete {
case Success(success) =>
if (success == Continue) fastLoop(o, c, em, 0)
case Failure(ex) =>
s.reportFailure(ex)
case _ => () // this was a Stop, do nothing
}
twitter.com/p_gawrys
twitter.com/p_gawrys
abstract class Observable[+A] {
final def toListL: Task[List[A]] =
foldLeft(mutable.ListBuffer.empty[A])(_ += _)
// We know for sure that there will be only one element
.firstOrElseL(mutable.ListBuffer.empty[A])
.map(_.toList)
final def foldLeft[R](seed: => R)(op: (R, A) => R): Observable[R] = ???
final def firstOrElseL[B >: A](default: => B): Task[B] = ???
}
twitter.com/p_gawrys
final class FoldLeftObservable[A, R](
source: Observable[A],
initial: () => R,
f: (R, A) => R
) extends Observable[R] {
def unsafeSubscribeFn(out: Subscriber[R]): Cancelable = {
var streamErrors = true
try {
val initialState = initial()
streamErrors = false
source.unsafeSubscribeFn(new Subscriber[A] { ... })
} catch {
// If an error was thrown in source.unsafeSubscribeFn(...)
// it is a breach of the protocol and the behavior is undefined
// but we don't want to call out.onError in case it already happened there
case NonFatal(ex) if streamErrors =>
out.onError(ex)
Cancelable.empty
}
}
}
source.unsafeSubscribeFn(new Subscriber[A] {
implicit val scheduler = out.scheduler
// We might call onError in onNext so we need this
// flag to protect from potentially calling it twice
// (once from onNext, once by upstream)
private[this] var isDone = false
private[this] var state: R = initialState
def onNext(elem: A): Ack = {
try {
// User-supplied function
// could throw exception
state = f(state, elem)
Continue
} catch {
case ex if NonFatal(ex) =>
onError(ex)
Stop
}
}
def onComplete(): Unit =
if (!isDone) {
isDone = true
out.onNext(state)
out.onComplete()
}
def onError(ex: Throwable): Unit =
if (!isDone) {
isDone = true
out.onError(ex)
}
})
private[this] var isDone = false
private[this] var state: R = initialState
その var スレッドセーフですか? 変数は複数スレッドから書き込める状態
private[this] var isDone = false
private[this] var state: R = initialState
プロトコルによってスレッドセーフが保証されている
out.onNext(next).flatMap(_ => out2.onNext).flatMap(_ => out3.onNext) ...
out.onNext(next).flatMap(_ => out2.onNext).flatMap(_ => Continue) ...
out.onNext(next).flatMap(_ => Continue) ...
Continue
If we follow onNext calls, it goes like that:
And then the next element is sent after Continue is received (remember onComplete in repeatEval?)
onNext の呼び出しを展開してみる
Internally, each Future might be scheduled on a potentially different Thread with ec.execute():
Which establishes a happens-before relation between writing and reading isDone from potentially different threads.
var isDone = false
ec.execute(() => {
isDone = true
// second thread
ec.execute(() => {
assert(isDone)
})
})
内部では各 Future は別スレッドに分担される 事前発生関係を保つことで安全性を担保
twitter.com/p_gawrys
abstract class Observable[+A] {
final def toListL: Task[List[A]] =
foldLeft(mutable.ListBuffer.empty[A])(_ += _)
// We know for sure that there will be only one element
.firstOrElseL(mutable.ListBuffer.empty[A])
.map(_.toList)
final def foldLeft[R](seed: => R)(op: (R, A) => R): Observable[R] =
new FoldLeftObservable(source, seed, op)
final def firstOrElseL[B >: A](default: => B): Task[B] = ???
}
final def firstOrElseL[B >: A](default: => B): Task[B] =
Task.create { (s, cb) =>
unsafeSubscribeFn(new Subscriber[A] {
implicit val scheduler: Scheduler = s
private[this] var isDone = false
def onNext(elem: A): Ack = {
cb.onSuccess(elem)
isDone = true
Stop
}
def onError(ex: Throwable): Unit =
if (!isDone) {
isDone = true
cb.onError(ex)
}
def onComplete(): Unit =
if (!isDone) {
isDone = true
cb(Try(default))
}
})
}
final def firstOrElseLZIOOO[B >: A](default: => B): zio.Task[B] = {
ZIO.descriptorWith { desc =>
ZIO.effectAsync { cb =>
unsafeSubscribeFn(new Subscriber[A] {
implicit val scheduler: Scheduler =
Scheduler(desc.executor.asEC)
private[this] var isDone = false
def onNext(elem: A): Ack = {
cb(ZIO.succeed(elem))
isDone = true
Stop
}
def onError(ex: Throwable): Unit =
if (!isDone) {
isDone = true
cb(ZIO.fail(ex))
}
def onComplete(): Unit =
if (!isDone) {
isDone = true
cb(ZIO(default))
}
})
}
}
}
abstract class Observable[+A] {
final def takeByTimespan(timespan: FiniteDuration): Observable[A] =
new TakeLeftByTimespanObservable(this, timespan)
}
一定時間が経過するまで要素を取る
final class TakeLeftByTimespanObservable[A](
source: Observable[A],
timespan: FiniteDuration
) extends Observable[A] {
def unsafeSubscribeFn(out: Subscriber[A]): Cancelable = {
source.unsafeSubscribeFn(new Subscriber[A] {
implicit val scheduler = out.scheduler
def onNext(elem: A): Future[Ack] = out.onNext(elem)
def onError(ex: Throwable): Unit = out.onError(ex)
def onComplete(): Unit = out.onComplete()
})
}
}
Let's add a concurrent task to call onComplete after timespan
一定時間後に onComplete を呼ぶタスクを追加してみる
def unsafeSubscribeFn(out: Subscriber[A]): Cancelable = {
val composite = CompositeCancelable()
composite += source.unsafeSubscribeFn(new Subscriber[A] {
implicit val scheduler = out.scheduler
private[this] val timeoutTask: Cancelable = {
val ref = scheduler.scheduleOnce(
timespan.length,
timespan.unit,
new Runnable { override def run(): Unit = onComplete() }
)
composite += ref
ref
}
def onNext(elem: A): Future[Ack] = ???
def onError(ex: Throwable): Unit = ???
def onComplete(): Unit = ???
})
}
private[this] var isActive = true
private def deactivate(): Unit = {
isActive = false
timeoutTask.cancel()
}
def onNext(elem: A): Future[Ack] = {
if (isActive) out.onNext(elem).syncOnStopOrFailure(_ => deactivate())
else Stop
}
def onError(ex: Throwable): Unit = {
if (isActive) {
deactivate()
out.onError(ex)
}
}
def onComplete(): Unit = {
if (isActive) {
deactivate()
out.onComplete()
}
}
Access to isActive needs to be synchronized because there is no happens-before relationship between onNext and timeoutTask
onNext と timeoutTask には事前発生関係が無いため isActive へのアクセスは同期化する必要がある
private[this] var isActive = true
private def deactivate(): Unit = {
isActive = false
timeoutTask.cancel()
}
def onNext(elem: A): Future[Ack] = synchronized {
if (isActive) out.onNext(elem).syncOnStopOrFailure(_ => deactivate())
else Stop
}
def onError(ex: Throwable): Unit = synchronized {
if (isActive) {
deactivate()
out.onError(ex)
}
}
def onComplete(): Unit = synchronized {
if (isActive) {
deactivate()
out.onComplete()
}
}
// F-bounded polymorphism, see
// https://github.com/ghik/opinionated-scala/blob/master/chapters/Generics-and-type-members.md#f-bounded-polymorphism
implicit class AckExtensions[Self <: Future[Ack]](val source: Self) extends AnyVal {
def syncOnStopOrFailure(
cb: Option[Throwable] => Unit
)(implicit r: UncaughtExceptionReporter): Self = {
if (source eq Stop)
try cb(None)
catch { case e if NonFatal(e) => r.reportFailure(e) }
else if (source ne Continue)
source.onComplete { ack =>
try ack match {
case Success(Stop) => cb(None)
case Failure(e) => cb(Some(e))
case _ => ()
} catch {
case e if NonFatal(e) => r.reportFailure(e)
}
}(immediate)
source
}
}
source.unsafeSubscribeFn(new Subscriber[A] {
implicit val scheduler = out.scheduler
private[this] var isActive = true
private[this] val timeoutTask: Cancelable = {
val ref = scheduler.scheduleOnce(
timespan.length,
timespan.unit,
new Runnable { override def run(): Unit = onComplete() }
)
composite += ref
ref
}
})
source.unsafeSubscribeFn(new Subscriber[A] with Runnable {
implicit val scheduler = out.scheduler
private[this] var isActive = true
private[this] val timeoutTask: Cancelable = {
val ref = scheduler.scheduleOnce(
timespan.length,
timespan.unit,
this
)
composite += ref
ref
}
def run() = onComplete()
})
final class TakeLeftByTimespanObservable[A](
source: Observable[A],
timespan: FiniteDuration
) extends Observable[A] {
def unsafeSubscribeFn(out: Subscriber[A]): Cancelable = {
val composite = CompositeCancelable()
composite += source.unsafeSubscribeFn(new Subscriber[A] with Runnable {
implicit val scheduler = out.scheduler
private[this] var isActive = true
private[this] val timeoutTask: Cancelable = {
val ref = scheduler.scheduleOnce(timespan.length, timespan.unit, this)
composite += ref
ref
}
def run(): Unit = onComplete()
private def deactivate(): Unit = synchronized {
isActive = false
timeoutTask.cancel()
}
def onNext(elem: A): Future[Ack] = synchronized {
if (isActive) out.onNext(elem).syncOnStopOrFailure(_ => deactivate())
else Stop
}
def onError(ex: Throwable): Unit = synchronized {
if (isActive) {
deactivate()
out.onError(ex)
}
}
def onComplete(): Unit = synchronized {
if (isActive) {
deactivate()
out.onComplete()
}
}
})
}
}
val result: Task[List[Int]] =
Observable.repeatEval(Random.nextInt(10))
.takeByTimespan(10.second)
.toListL
Could be inlined to
このコードを展開してみよう
Task.create { (s, cb) =>
val source =
new FoldLeftObservable(
new TakeLeftByTimespanObservable(
new RepeatEvalObservable(Random.nextInt(10)),
10.second
),
mutable.ListBuffer.empty[Int]
)(_ += _).firstOrElse().map(_.toList)
source.unsafeSubscribeFn(new Subscriber[A] {
implicit val scheduler: Scheduler = s
private[this] var isDone = false
def onNext(elem: A): Ack = {
cb.onSuccess(elem)
isDone = true
Stop
}
def onError(ex: Throwable): Unit =
if (!isDone) {
isDone = true
cb.onError(ex)
}
def onComplete(): Unit =
if (!isDone) {
isDone = true
cb(Try(default))
}
})
.map(_.toList)
}
今回カバーできなかった事
I'm about to show few micro-benchmarks.
Please, keep in mind that the results can be misleading - it's best to measure for your specific use case.
API/Ecosystem/Familiarity is usually better criteria, as long as the library meets the minimum performance requirements.
泥臭い内部構造は割に合うのか ライブラリの選択は性能が全てでは無いが...
def monixObservable(): Int = {
val stream = Observable
.fromIterable(allElements)
.map(_ + 1)
.filter(_ % 2 == 0)
sum(stream)
}
def akkaStream(): Long = {
val stream = AkkaSource(allElements)
.map(_ + 1)
.filter(_ % 2 == 0)
.toMat(AkkaSink.fold(0L)(_ + _))(Keep.right)
Await.result(stream.run(), Duration.Inf)
}
def whileLoop(): Int = {
val cursor = allElements.iterator
var sum = 0
while (cursor.hasNext) {
val next = cursor.next() + 1
if (next % 2 == 0) sum += next
}
sum
}
def zioStream(): Int = {
val stream = ZStream
.fromChunks(zioChunks: _*)
.map(_ + 1)
.filter(_ % 2 == 0)
.runSum
zioUntracedRuntime.unsafeRun(stream)
}
def fs2Stream(): Int = {
val stream = FS2Stream(fs2Chunks: _*)
.flatMap(FS2Stream.chunk)
.map(_ + 1)
.filter(_ % 2 == 0)
.compile
.fold(0)(_ + _)
stream
}
[info] Benchmark (chunkCount) (chunkSize) Mode Cnt Score Error Units
[info] akka 1000 1000 thrpt 20 10.749 ± 0.082 ops/s
[info] fs2 1000 1000 thrpt 20 55.939 ± 0.497 ops/s
[info] iterator 1000 1000 thrpt 20 76.830 ± 1.182 ops/s
[info] monix 1000 1000 thrpt 20 97.942 ± 2.479 ops/s
[info] vector 1000 1000 thrpt 20 61.514 ± 0.213 ops/s
[info] whileLoop 1000 1000 thrpt 20 355.31 ± 2.803 ops/s
[info] zio 1000 1000 thrpt 20 31.971 ± 0.197 ops/s
Note that Monix and Akka process elements one-by-one, and fs2 and zio do it in batches
Monix と Akka は要素を 1つづつ処理する
twitter.com/p_gawrys
[info] Benchmark (n) Mode Cnt Score Error Units
[info] fs2 1000 thrpt 20 66490.570 ± 211.840 ops/s
[info] fs2 10000 thrpt 20 8241.498 ± 52.588 ops/s
[info] monix 1000 thrpt 20 99300.153 ± 619.293 ops/s
[info] monix 10000 thrpt 20 10539.976 ± 203.321 ops/s
[info] zio 1000 thrpt 20 1819.379 ± 16.974 ops/s
[info] zio 10000 thrpt 20 201.752 ± 2.983 ops/s
twitter.com/p_gawrys
[info] Benchmark (streams) Mode Cnt Score Error Units
[info] akka 100 thrpt 20 1048.289 ± 4.834 ops/s
[info] akka 1000 thrpt 20 118.239 ± 1.396 ops/s
[info] fs2 100 thrpt 20 347.603 ± 8.651 ops/s
[info] fs2 1000 thrpt 20 8.636 ± 0.128 ops/s
[info] monix 100 thrpt 20 15860.931 ± 122.029 ops/s
[info] monix 1000 thrpt 20 2563.933 ± 53.844 ops/s
[info] zio 100 thrpt 20 116.100 ± 0.973 ops/s
[info] zio 1000 thrpt 20 12.260 ± 0.315 ops/s
twitter.com/p_gawrys
def fs2Stream = {
val stream = FS2Stream
.apply(allElements: _*)
.chunkN(chunkSize)
.evalMap[MonixTask, Int](chunk => MonixTask(sumIntScala(chunk.iterator)))
.filter(_ > 0)
.map(_.toLong)
.compile
.fold(0L)(_ + _)
}
def fs2StreamPreChunked = {
val stream = FS2Stream(fs2Chunks: _*)
.evalMap[MonixTask, Int](chunk => MonixTask(sumIntScala(chunk.iterator)))
.filter(_ > 0)
.map(_.toLong)
.compile
.fold(0L)(_ + _)
}
[info] Benchmark (chunkCount) (chunkSize) Mode Cnt Score Error Units
[info] akka 1000 1000 thrpt 20 12.620 ± 0.418 ops/s
[info] akkaPreChunked 1000 1000 thrpt 20 193.842 ± 1.147 ops/s
[info] fs2 1000 1000 thrpt 20 61.285 ± 1.243 ops/s
[info] fs2PreChunked 1000 1000 thrpt 20 150.544 ± 1.089 ops/s
[info] monix 1000 1000 thrpt 20 80.448 ± 1.510 ops/s
[info] monixPreChunked 1000 1000 thrpt 20 280.467 ± 4.769 ops/s
[info] zioPreChunked 1000 1000 thrpt 20 121.028 ± 0.861 ops/s
ZIO will be automatically chunked with fromIterable so we don't compare buffering here
ZIO は自動的にチャンク化するので比較外
Cons:
演算子は fs2/zio に比べて分析しづらい 自分でバッファリングする必要がある
Pros:
ナイスな API、(他社比)最高速の性能 時間ベースの演算にも向いている
コントリ大歓迎 ご清聴ありがとうございます