TensorFlow for Scala 3
by Bartek Tonia
Introduction
Goal
Provide API for TensorFlow in Scala 3 for data scientists and ML enthusiasts
Background
Fork of github.com/shadaj/scalapy-tensorflow
Works based on `scalapy-numpy` and `scalapy-core`
All originally developed by Shadaj Laddad
TensorFlow
Machine learning models
import numpy as np
from tensorflow import keras
from tensorflow.keras import layers
max_features = 20000
maxlen = 200
(x_train, y_train), (x_val, y_val) = keras.datasets.imdb.load_data(num_words=max_features)
x_train = keras.preprocessing.sequence.pad_sequences(x_train, maxlen=maxlen)
x_val = keras.preprocessing.sequence.pad_sequences(x_val, maxlen=maxlen)
model = keras.models.Sequential(layers = [
layers.Embedding(max_features, 128, input_length = maxlen),
layers.Bidirectional(layers.LSTM(64)),
layers.Dropout(0.5),
layers.Dense(1, activation = "sigmoid")
])
model.compile("adam", "binary_crossentropy", metrics=["accuracy"])
model.fit(x_train, y_train, batch_size=32, epochs=2, validation_data=(x_val, y_val))
Tensor calculations
import tensorflow as tf
import numpy as np
xData = np.random.rand(100)
yData = (xData * 0.1) + 0.3
W = tf.Variable(tf.random.uniform(shape = [1], minval = -1, maxval = 1))
b = tf.Variable(tf.zeros([1]))
def y():
return (W * xData) + b
def loss():
return tf.reduce_mean(tf.square(y() - yData))
opt = tf.keras.optimizers.SGD(learning_rate=0.1, momentum=0.9)
num_epochs = 400
for epoch in range(num_epochs):
step_count = opt.minimize(loss, [W,b]).numpy()
print("W: {}, b: {}".format(W.numpy(), b.numpy()))
Library
scalapy-tensorflow
The part in Scala 2 remains due to redesigned Scala 3
macros and complex interaction with CPython
Scala 2
@py.native
trait Optimizer extends py.Object {
def apply_gradients(grads_and_vars: Seq[(Tensor, Variable)]): Operation = py.native
}
sealed class |[A, B](val value: scala.Any, val isLeft: Boolean) {}
def eval(code: String): Unit = {
CPythonAPI.PyRun_String(Platform.toCString(code), ...)
}
Scala 3
class Sequential private[api] (val underlying: PySequential) extends PythonType[PySequential] {
def add(layer: Layer): Unit = underlying.add(layer.underlying)
def compile(
optimizer: String | Optimizer = "rmsprop",
loss: Option[PyFunction] = None,
metrics: Seq[Metrics] = Seq.empty,
lossWeights: Option[Seq[(Double, Double)]] = None
) =
underlying.compile(
optimizer,
loss,
metrics,
lossWeights
)
}
Deployment
Validated for MacOS and Linux with conda and pip
Uses jitpack to manage dependancies from github repositories
Performance kept similar to python version by performance testing
New Features
Union types
def kernelSize: Int | (Int, Int) = underlying.kernel_size
implicit def fromPythonTypesUnion[
X <: py.Any,
Y <: py.Any,
A <: PythonType[X],
B <: PythonType[Y]
](
u: A | B
)(implicit ev1: ClassTag[A], ev2: ClassTag[B]): py.|[X, Y] =
u match {
case a: A => a.underlying
case b: B => b.underlying
case _ => throw new IllegalArgumentException()
}
Enums
enum DataFormat(override private[api] val v: String) extends PythonEnum(v){
case ChannelsLast extends DataFormat("channels_last")
case ChannelsFirst extends DataFormat("channels_first")
}
def dataFormat: DataFormat = DataFormat.valueOf(underlying.data_format)
Option
def inputShape: Option[(Int, Int, Int)] = underlying.input_shape
implicit def option2PyOption[A]: Conversion[Option[A], py.NoneOr[A]] =
_ match {
case None => py.|.fromLeft(py.None)
case Some(v) => py.|.fromRight(v)
}
and new implicit conversion syntax
Trait parameters
trait Layer private[api] (val underlying: PyLayer) extends PythonType[PyLayer] {}
class Dense private[api] (override val underlying: PyDense) extends Layer(underlying) {
...
}
Scala 3 examples
Machine learning models
sealed class |[A, B](val value: scala.Any, val isLeft: Boolean) {}
object BidirectionalLSTMExample extends Runnable {
def run() = {
import me.shadaj.scalapy.tensorflow.api.{TensorFlow => tf}
val sequence = tf.keras.preprocessing.sequence
val ((xTrain, yTrain), (xTest, yTest)) = imdb.loadData(numWords = Some(maxFeatures))
val xTrain1 = sequence.padSequences(xTrain, maxLen = Some(maxLen)).astype(np.float32)
val xTest1 = sequence.padSequences(xTest, maxLen = Some(maxLen)).astype(np.float32)
val yTrain1 = yTrain.astype(np.float32)
val yTest1 = yTest.astype(np.float32)
val model = keras1.models.Sequential(layers = Seq(
layers.Embedding(maxFeatures, 128, inputLength = Some(maxLen)),
layers.Bidirectional(layers.LSTM(64, returnSequences=true)),
layers.Bidirectional(layers.LSTM(64)),
layers.Dense(1, activation = Some(Activation.Sigmoid))
))
model.compile(Optimizers.Adam,
loss = Some(keras1.backend.binaryCrossentropy), metrics = Metrics.Accuracy)
model.fit(xTrain1, yTrain1, batchSize = Some(batchSize),
epochs = epochs, validationData = Some((xTest1, yTest1)))
}
}
Side by side comparison
object BidirectionalLSTMExample extends Runnable {
def run() = {
import me.shadaj.scalapy.tensorflow.api.{TensorFlow => tf}
val sequence = tf.keras.preprocessing.sequence
val ((xTrain, yTrain), (xTest, yTest)) = imdb.loadData(numWords = Some(maxFeatures))
val xTrain1 = sequence.padSequences(xTrain, maxLen = Some(maxLen)).astype(np.float32)
val xTest1 = sequence.padSequences(xTest, maxLen = Some(maxLen)).astype(np.float32)
val yTrain1 = yTrain.astype(np.float32)
val yTest1 = yTest.astype(np.float32)
val model = keras1.models.Sequential(layers = Seq(
layers.Embedding(maxFeatures, 128, inputLength = Some(maxLen)),
layers.Bidirectional(layers.LSTM(64, returnSequences=true)),
layers.Bidirectional(layers.LSTM(64)),
layers.Dense(1, activation = Some(Activation.Sigmoid))
))
model.compile(Optimizers.Adam,
loss = Some(keras1.backend.binaryCrossentropy), metrics = Metrics.Accuracy)
model.fit(xTrain1, yTrain1, batchSize = Some(batchSize),
epochs = epochs, validationData = Some((xTest1, yTest1)))
}
}
import numpy as np
from tensorflow import keras
from tensorflow.keras import layers
max_features = 20000
maxlen = 200
(x_train, y_train), (x_val, y_val) = keras.datasets.imdb.load_data(num_words=max_features)
x_train = keras.preprocessing.sequence.pad_sequences(x_train, maxlen=maxlen)
x_val = keras.preprocessing.sequence.pad_sequences(x_val, maxlen=maxlen)
model = keras.models.Sequential(layers = [
layers.Embedding(max_features, 128, input_length = maxlen),
layers.Bidirectional(layers.LSTM(64, return_sequences=true)),
layers.Bidirectional(layers.LSTM(64)),
layers.Dense(1, activation = "sigmoid")
])
model.compile("adam", "binary_crossentropy", metrics=["accuracy"])
model.fit(x_train, y_train, batch_size=32, epochs=2, validation_data=(x_val, y_val))
Tensor calculations
sealed class |[A, B](val value: scala.Any, val isLeft: Boolean) {}
object GradientDescentOptimizerExample extends Runnable {
def run(): Unit = {
val xData = np.random.rand(100).astype(np.float32)
val yData = (xData * 0.1f) + 0.3f
val W = tf.Variable(tf.random.uniform(shape = Seq(1), minval = -1, maxval = 1))
val b = tf.Variable(tf.zeros(Seq(1)))
def y = () => W * xData + b
def loss = () => tf.reduceMean(tf.square(y() - yData))
def grad(): Option[(Tensor, Seq[Tensor])] =
CloseableResourceManager.withResource(tf.GradientTape()) { tape =>
val lossValue = loss()
val gradients: Seq[Tensor] = tape.gradient(lossValue, Seq(W, b))
(lossValue, gradients)
}
val optimizer = tf.keras.optimizers.SGD(learningRate = 0.1, momentum = Some(0.9))
for (epoch <- 1 to 200) {
val (lossValue, grads) = grad().get
optimizer.applyGradients(grads.zip(Seq(W, b)))
}
print(s"W: ${W.numpy()}, b: ${b.numpy()}")
}
}
Side by side
object GradientDescentOptimizerExample extends Runnable {
def run(): Unit = {
val xData = np.random.rand(100).astype(np.float32)
val yData = (xData * 0.1f) + 0.3f
val W = tf.Variable(tf.random.uniform(shape = Seq(1), minval = -1, maxval = 1))
val b = tf.Variable(tf.zeros(Seq(1)))
def y = () => W * xData + b
def loss = () => tf.reduceMean(tf.square(y() - yData))
def grad(): Option[(Tensor, Seq[Tensor])] =
CloseableResourceManager.withResource(tf.GradientTape()) { tape =>
val lossValue = loss()
val gradients: Seq[Tensor] = tape.gradient(lossValue, Seq(W, b))
(lossValue, gradients)
}
val optimizer = tf.keras.optimizers.SGD(learningRate = 0.1, momentum = Some(0.9))
for (epoch <- 1 to 200) {
val (lossValue, grads) = grad().get
optimizer.applyGradients(grads.zip(Seq(W, b)))
}
print(s"W: ${W.numpy()}, b: ${b.numpy()}")
}
}
import tensorflow as tf
import numpy as np
xData = np.random.rand(100)
yData = (xData * 0.1) + 0.3
W = tf.Variable(tf.random.uniform(shape = [1], minval = -1, maxval = 1))
b = tf.Variable(tf.zeros([1]))
def y():
return (W * xData) + b
def loss():
return tf.reduce_mean(tf.square(y() - yData))
opt = tf.keras.optimizers.SGD(learning_rate=0.1, momentum=0.9)
num_epochs = 400
for epoch in range(num_epochs):
step_count = opt.minimize(loss, [W,b]).numpy()
print("W: {}, b: {}".format(W.numpy(), b.numpy()))
Future development
Functional API
@tf.function
def f(x, y):
return x ** 2 + y
x = tf.constant([2, 3])
y = tf.constant([3, -2])
f(x, y)
This would require calling Scala functions in Python
i.e. loss function
Other
- Change resource manager to scala.Using
- Consider adding documentation
- Type classes i.e. Numeric
- Increase API coverage
Summary
Performance
Scala | Python | |
---|---|---|
Total time | 108.5 | 107.8 |
Epoch 1 | 46s 59ms/step | 41s 52ms/step |
Epoch 2 | 41s 53ms/step | 48s 61ms/step |
Loss | 0.64 | 0.64 |
Accuracy | 0.61 | 0.61 |
BidirectionalLSTMExample.scala used to compare learning speed
Results
scalapy-tensorflow provided similar experience and faced similar issues as other language versions of TensorFlow, for example: Swift or JavaScript
TensorFlow for scala 3
By Bartek Tonia
TensorFlow for scala 3
- 43