TensorFlow for Scala 3
by Bartek Tonia


Introduction


Goal
Provide API for TensorFlow in Scala 3 for data scientists and ML enthusiasts
Background
Fork of github.com/shadaj/scalapy-tensorflow
Works based on `scalapy-numpy` and `scalapy-core`
All originally developed by Shadaj Laddad
TensorFlow

Machine learning models

import numpy as np
from tensorflow import keras
from tensorflow.keras import layers
max_features = 20000
maxlen = 200
(x_train, y_train), (x_val, y_val) = keras.datasets.imdb.load_data(num_words=max_features)
x_train = keras.preprocessing.sequence.pad_sequences(x_train, maxlen=maxlen)
x_val = keras.preprocessing.sequence.pad_sequences(x_val, maxlen=maxlen)
model = keras.models.Sequential(layers = [
layers.Embedding(max_features, 128, input_length = maxlen),
layers.Bidirectional(layers.LSTM(64)),
layers.Dropout(0.5),
layers.Dense(1, activation = "sigmoid")
])
model.compile("adam", "binary_crossentropy", metrics=["accuracy"])
model.fit(x_train, y_train, batch_size=32, epochs=2, validation_data=(x_val, y_val))Tensor calculations

import tensorflow as tf
import numpy as np
xData = np.random.rand(100)
yData = (xData * 0.1) + 0.3
W = tf.Variable(tf.random.uniform(shape = [1], minval = -1, maxval = 1))
b = tf.Variable(tf.zeros([1]))
def y():
return (W * xData) + b
def loss():
return tf.reduce_mean(tf.square(y() - yData))
opt = tf.keras.optimizers.SGD(learning_rate=0.1, momentum=0.9)
num_epochs = 400
for epoch in range(num_epochs):
step_count = opt.minimize(loss, [W,b]).numpy()
print("W: {}, b: {}".format(W.numpy(), b.numpy()))Library

scalapy-tensorflow


The part in Scala 2 remains due to redesigned Scala 3
macros and complex interaction with CPython
Scala 2

@py.native
trait Optimizer extends py.Object {
def apply_gradients(grads_and_vars: Seq[(Tensor, Variable)]): Operation = py.native
}
sealed class |[A, B](val value: scala.Any, val isLeft: Boolean) {} def eval(code: String): Unit = {
CPythonAPI.PyRun_String(Platform.toCString(code), ...)
}Scala 3

class Sequential private[api] (val underlying: PySequential) extends PythonType[PySequential] {
def add(layer: Layer): Unit = underlying.add(layer.underlying)
def compile(
optimizer: String | Optimizer = "rmsprop",
loss: Option[PyFunction] = None,
metrics: Seq[Metrics] = Seq.empty,
lossWeights: Option[Seq[(Double, Double)]] = None
) =
underlying.compile(
optimizer,
loss,
metrics,
lossWeights
)
}Deployment

Validated for MacOS and Linux with conda and pip
Uses jitpack to manage dependancies from github repositories
Performance kept similar to python version by performance testing
New Features

Union types

def kernelSize: Int | (Int, Int) = underlying.kernel_size
implicit def fromPythonTypesUnion[
X <: py.Any,
Y <: py.Any,
A <: PythonType[X],
B <: PythonType[Y]
](
u: A | B
)(implicit ev1: ClassTag[A], ev2: ClassTag[B]): py.|[X, Y] =
u match {
case a: A => a.underlying
case b: B => b.underlying
case _ => throw new IllegalArgumentException()
}
Enums

enum DataFormat(override private[api] val v: String) extends PythonEnum(v){
case ChannelsLast extends DataFormat("channels_last")
case ChannelsFirst extends DataFormat("channels_first")
} def dataFormat: DataFormat = DataFormat.valueOf(underlying.data_format)
Option

def inputShape: Option[(Int, Int, Int)] = underlying.input_shape
implicit def option2PyOption[A]: Conversion[Option[A], py.NoneOr[A]] =
_ match {
case None => py.|.fromLeft(py.None)
case Some(v) => py.|.fromRight(v)
}and new implicit conversion syntax
Trait parameters

trait Layer private[api] (val underlying: PyLayer) extends PythonType[PyLayer] {}
class Dense private[api] (override val underlying: PyDense) extends Layer(underlying) {
...
}Scala 3 examples

Machine learning models

sealed class |[A, B](val value: scala.Any, val isLeft: Boolean) {}object BidirectionalLSTMExample extends Runnable {
def run() = {
import me.shadaj.scalapy.tensorflow.api.{TensorFlow => tf}
val sequence = tf.keras.preprocessing.sequence
val ((xTrain, yTrain), (xTest, yTest)) = imdb.loadData(numWords = Some(maxFeatures))
val xTrain1 = sequence.padSequences(xTrain, maxLen = Some(maxLen)).astype(np.float32)
val xTest1 = sequence.padSequences(xTest, maxLen = Some(maxLen)).astype(np.float32)
val yTrain1 = yTrain.astype(np.float32)
val yTest1 = yTest.astype(np.float32)
val model = keras1.models.Sequential(layers = Seq(
layers.Embedding(maxFeatures, 128, inputLength = Some(maxLen)),
layers.Bidirectional(layers.LSTM(64, returnSequences=true)),
layers.Bidirectional(layers.LSTM(64)),
layers.Dense(1, activation = Some(Activation.Sigmoid))
))
model.compile(Optimizers.Adam,
loss = Some(keras1.backend.binaryCrossentropy), metrics = Metrics.Accuracy)
model.fit(xTrain1, yTrain1, batchSize = Some(batchSize),
epochs = epochs, validationData = Some((xTest1, yTest1)))
}
}
Side by side comparison

object BidirectionalLSTMExample extends Runnable {
def run() = {
import me.shadaj.scalapy.tensorflow.api.{TensorFlow => tf}
val sequence = tf.keras.preprocessing.sequence
val ((xTrain, yTrain), (xTest, yTest)) = imdb.loadData(numWords = Some(maxFeatures))
val xTrain1 = sequence.padSequences(xTrain, maxLen = Some(maxLen)).astype(np.float32)
val xTest1 = sequence.padSequences(xTest, maxLen = Some(maxLen)).astype(np.float32)
val yTrain1 = yTrain.astype(np.float32)
val yTest1 = yTest.astype(np.float32)
val model = keras1.models.Sequential(layers = Seq(
layers.Embedding(maxFeatures, 128, inputLength = Some(maxLen)),
layers.Bidirectional(layers.LSTM(64, returnSequences=true)),
layers.Bidirectional(layers.LSTM(64)),
layers.Dense(1, activation = Some(Activation.Sigmoid))
))
model.compile(Optimizers.Adam,
loss = Some(keras1.backend.binaryCrossentropy), metrics = Metrics.Accuracy)
model.fit(xTrain1, yTrain1, batchSize = Some(batchSize),
epochs = epochs, validationData = Some((xTest1, yTest1)))
}
}
import numpy as np
from tensorflow import keras
from tensorflow.keras import layers
max_features = 20000
maxlen = 200
(x_train, y_train), (x_val, y_val) = keras.datasets.imdb.load_data(num_words=max_features)
x_train = keras.preprocessing.sequence.pad_sequences(x_train, maxlen=maxlen)
x_val = keras.preprocessing.sequence.pad_sequences(x_val, maxlen=maxlen)
model = keras.models.Sequential(layers = [
layers.Embedding(max_features, 128, input_length = maxlen),
layers.Bidirectional(layers.LSTM(64, return_sequences=true)),
layers.Bidirectional(layers.LSTM(64)),
layers.Dense(1, activation = "sigmoid")
])
model.compile("adam", "binary_crossentropy", metrics=["accuracy"])
model.fit(x_train, y_train, batch_size=32, epochs=2, validation_data=(x_val, y_val))Tensor calculations

sealed class |[A, B](val value: scala.Any, val isLeft: Boolean) {}object GradientDescentOptimizerExample extends Runnable {
def run(): Unit = {
val xData = np.random.rand(100).astype(np.float32)
val yData = (xData * 0.1f) + 0.3f
val W = tf.Variable(tf.random.uniform(shape = Seq(1), minval = -1, maxval = 1))
val b = tf.Variable(tf.zeros(Seq(1)))
def y = () => W * xData + b
def loss = () => tf.reduceMean(tf.square(y() - yData))
def grad(): Option[(Tensor, Seq[Tensor])] =
CloseableResourceManager.withResource(tf.GradientTape()) { tape =>
val lossValue = loss()
val gradients: Seq[Tensor] = tape.gradient(lossValue, Seq(W, b))
(lossValue, gradients)
}
val optimizer = tf.keras.optimizers.SGD(learningRate = 0.1, momentum = Some(0.9))
for (epoch <- 1 to 200) {
val (lossValue, grads) = grad().get
optimizer.applyGradients(grads.zip(Seq(W, b)))
}
print(s"W: ${W.numpy()}, b: ${b.numpy()}")
}
}
Side by side

object GradientDescentOptimizerExample extends Runnable {
def run(): Unit = {
val xData = np.random.rand(100).astype(np.float32)
val yData = (xData * 0.1f) + 0.3f
val W = tf.Variable(tf.random.uniform(shape = Seq(1), minval = -1, maxval = 1))
val b = tf.Variable(tf.zeros(Seq(1)))
def y = () => W * xData + b
def loss = () => tf.reduceMean(tf.square(y() - yData))
def grad(): Option[(Tensor, Seq[Tensor])] =
CloseableResourceManager.withResource(tf.GradientTape()) { tape =>
val lossValue = loss()
val gradients: Seq[Tensor] = tape.gradient(lossValue, Seq(W, b))
(lossValue, gradients)
}
val optimizer = tf.keras.optimizers.SGD(learningRate = 0.1, momentum = Some(0.9))
for (epoch <- 1 to 200) {
val (lossValue, grads) = grad().get
optimizer.applyGradients(grads.zip(Seq(W, b)))
}
print(s"W: ${W.numpy()}, b: ${b.numpy()}")
}
}
import tensorflow as tf
import numpy as np
xData = np.random.rand(100)
yData = (xData * 0.1) + 0.3
W = tf.Variable(tf.random.uniform(shape = [1], minval = -1, maxval = 1))
b = tf.Variable(tf.zeros([1]))
def y():
return (W * xData) + b
def loss():
return tf.reduce_mean(tf.square(y() - yData))
opt = tf.keras.optimizers.SGD(learning_rate=0.1, momentum=0.9)
num_epochs = 400
for epoch in range(num_epochs):
step_count = opt.minimize(loss, [W,b]).numpy()
print("W: {}, b: {}".format(W.numpy(), b.numpy()))Future development

Functional API

@tf.function
def f(x, y):
return x ** 2 + y
x = tf.constant([2, 3])
y = tf.constant([3, -2])
f(x, y)This would require calling Scala functions in Python
i.e. loss function
Other

- Change resource manager to scala.Using
- Consider adding documentation
- Type classes i.e. Numeric
- Increase API coverage
Summary

Performance

| Scala | Python | |
|---|---|---|
| Total time | 108.5 | 107.8 |
| Epoch 1 | 46s 59ms/step | 41s 52ms/step |
| Epoch 2 | 41s 53ms/step | 48s 61ms/step |
| Loss | 0.64 | 0.64 |
| Accuracy | 0.61 | 0.61 |
BidirectionalLSTMExample.scala used to compare learning speed
Results

scalapy-tensorflow provided similar experience and faced similar issues as other language versions of TensorFlow, for example: Swift or JavaScript
TensorFlow for scala 3
By Bartek Tonia
TensorFlow for scala 3
- 51