Goal
Provide API for TensorFlow in Scala 3 for data scientists and ML enthusiasts
Background
Fork of github.com/shadaj/scalapy-tensorflow
Works based on `scalapy-numpy` and `scalapy-core`
All originally developed by Shadaj Laddad
import numpy as np
from tensorflow import keras
from tensorflow.keras import layers
max_features = 20000
maxlen = 200
(x_train, y_train), (x_val, y_val) = keras.datasets.imdb.load_data(num_words=max_features)
x_train = keras.preprocessing.sequence.pad_sequences(x_train, maxlen=maxlen)
x_val = keras.preprocessing.sequence.pad_sequences(x_val, maxlen=maxlen)
model = keras.models.Sequential(layers = [
layers.Embedding(max_features, 128, input_length = maxlen),
layers.Bidirectional(layers.LSTM(64)),
layers.Dropout(0.5),
layers.Dense(1, activation = "sigmoid")
])
model.compile("adam", "binary_crossentropy", metrics=["accuracy"])
model.fit(x_train, y_train, batch_size=32, epochs=2, validation_data=(x_val, y_val))
import tensorflow as tf
import numpy as np
xData = np.random.rand(100)
yData = (xData * 0.1) + 0.3
W = tf.Variable(tf.random.uniform(shape = [1], minval = -1, maxval = 1))
b = tf.Variable(tf.zeros([1]))
def y():
return (W * xData) + b
def loss():
return tf.reduce_mean(tf.square(y() - yData))
opt = tf.keras.optimizers.SGD(learning_rate=0.1, momentum=0.9)
num_epochs = 400
for epoch in range(num_epochs):
step_count = opt.minimize(loss, [W,b]).numpy()
print("W: {}, b: {}".format(W.numpy(), b.numpy()))
The part in Scala 2 remains due to redesigned Scala 3
macros and complex interaction with CPython
@py.native
trait Optimizer extends py.Object {
def apply_gradients(grads_and_vars: Seq[(Tensor, Variable)]): Operation = py.native
}
sealed class |[A, B](val value: scala.Any, val isLeft: Boolean) {}
def eval(code: String): Unit = {
CPythonAPI.PyRun_String(Platform.toCString(code), ...)
}
class Sequential private[api] (val underlying: PySequential) extends PythonType[PySequential] {
def add(layer: Layer): Unit = underlying.add(layer.underlying)
def compile(
optimizer: String | Optimizer = "rmsprop",
loss: Option[PyFunction] = None,
metrics: Seq[Metrics] = Seq.empty,
lossWeights: Option[Seq[(Double, Double)]] = None
) =
underlying.compile(
optimizer,
loss,
metrics,
lossWeights
)
}
Validated for MacOS and Linux with conda and pip
Uses jitpack to manage dependancies from github repositories
Performance kept similar to python version by performance testing
def kernelSize: Int | (Int, Int) = underlying.kernel_size
implicit def fromPythonTypesUnion[
X <: py.Any,
Y <: py.Any,
A <: PythonType[X],
B <: PythonType[Y]
](
u: A | B
)(implicit ev1: ClassTag[A], ev2: ClassTag[B]): py.|[X, Y] =
u match {
case a: A => a.underlying
case b: B => b.underlying
case _ => throw new IllegalArgumentException()
}
enum DataFormat(override private[api] val v: String) extends PythonEnum(v){
case ChannelsLast extends DataFormat("channels_last")
case ChannelsFirst extends DataFormat("channels_first")
}
def dataFormat: DataFormat = DataFormat.valueOf(underlying.data_format)
def inputShape: Option[(Int, Int, Int)] = underlying.input_shape
implicit def option2PyOption[A]: Conversion[Option[A], py.NoneOr[A]] =
_ match {
case None => py.|.fromLeft(py.None)
case Some(v) => py.|.fromRight(v)
}
and new implicit conversion syntax
trait Layer private[api] (val underlying: PyLayer) extends PythonType[PyLayer] {}
class Dense private[api] (override val underlying: PyDense) extends Layer(underlying) {
...
}
sealed class |[A, B](val value: scala.Any, val isLeft: Boolean) {}
object BidirectionalLSTMExample extends Runnable {
def run() = {
import me.shadaj.scalapy.tensorflow.api.{TensorFlow => tf}
val sequence = tf.keras.preprocessing.sequence
val ((xTrain, yTrain), (xTest, yTest)) = imdb.loadData(numWords = Some(maxFeatures))
val xTrain1 = sequence.padSequences(xTrain, maxLen = Some(maxLen)).astype(np.float32)
val xTest1 = sequence.padSequences(xTest, maxLen = Some(maxLen)).astype(np.float32)
val yTrain1 = yTrain.astype(np.float32)
val yTest1 = yTest.astype(np.float32)
val model = keras1.models.Sequential(layers = Seq(
layers.Embedding(maxFeatures, 128, inputLength = Some(maxLen)),
layers.Bidirectional(layers.LSTM(64, returnSequences=true)),
layers.Bidirectional(layers.LSTM(64)),
layers.Dense(1, activation = Some(Activation.Sigmoid))
))
model.compile(Optimizers.Adam,
loss = Some(keras1.backend.binaryCrossentropy), metrics = Metrics.Accuracy)
model.fit(xTrain1, yTrain1, batchSize = Some(batchSize),
epochs = epochs, validationData = Some((xTest1, yTest1)))
}
}
object BidirectionalLSTMExample extends Runnable {
def run() = {
import me.shadaj.scalapy.tensorflow.api.{TensorFlow => tf}
val sequence = tf.keras.preprocessing.sequence
val ((xTrain, yTrain), (xTest, yTest)) = imdb.loadData(numWords = Some(maxFeatures))
val xTrain1 = sequence.padSequences(xTrain, maxLen = Some(maxLen)).astype(np.float32)
val xTest1 = sequence.padSequences(xTest, maxLen = Some(maxLen)).astype(np.float32)
val yTrain1 = yTrain.astype(np.float32)
val yTest1 = yTest.astype(np.float32)
val model = keras1.models.Sequential(layers = Seq(
layers.Embedding(maxFeatures, 128, inputLength = Some(maxLen)),
layers.Bidirectional(layers.LSTM(64, returnSequences=true)),
layers.Bidirectional(layers.LSTM(64)),
layers.Dense(1, activation = Some(Activation.Sigmoid))
))
model.compile(Optimizers.Adam,
loss = Some(keras1.backend.binaryCrossentropy), metrics = Metrics.Accuracy)
model.fit(xTrain1, yTrain1, batchSize = Some(batchSize),
epochs = epochs, validationData = Some((xTest1, yTest1)))
}
}
import numpy as np
from tensorflow import keras
from tensorflow.keras import layers
max_features = 20000
maxlen = 200
(x_train, y_train), (x_val, y_val) = keras.datasets.imdb.load_data(num_words=max_features)
x_train = keras.preprocessing.sequence.pad_sequences(x_train, maxlen=maxlen)
x_val = keras.preprocessing.sequence.pad_sequences(x_val, maxlen=maxlen)
model = keras.models.Sequential(layers = [
layers.Embedding(max_features, 128, input_length = maxlen),
layers.Bidirectional(layers.LSTM(64, return_sequences=true)),
layers.Bidirectional(layers.LSTM(64)),
layers.Dense(1, activation = "sigmoid")
])
model.compile("adam", "binary_crossentropy", metrics=["accuracy"])
model.fit(x_train, y_train, batch_size=32, epochs=2, validation_data=(x_val, y_val))
sealed class |[A, B](val value: scala.Any, val isLeft: Boolean) {}
object GradientDescentOptimizerExample extends Runnable {
def run(): Unit = {
val xData = np.random.rand(100).astype(np.float32)
val yData = (xData * 0.1f) + 0.3f
val W = tf.Variable(tf.random.uniform(shape = Seq(1), minval = -1, maxval = 1))
val b = tf.Variable(tf.zeros(Seq(1)))
def y = () => W * xData + b
def loss = () => tf.reduceMean(tf.square(y() - yData))
def grad(): Option[(Tensor, Seq[Tensor])] =
CloseableResourceManager.withResource(tf.GradientTape()) { tape =>
val lossValue = loss()
val gradients: Seq[Tensor] = tape.gradient(lossValue, Seq(W, b))
(lossValue, gradients)
}
val optimizer = tf.keras.optimizers.SGD(learningRate = 0.1, momentum = Some(0.9))
for (epoch <- 1 to 200) {
val (lossValue, grads) = grad().get
optimizer.applyGradients(grads.zip(Seq(W, b)))
}
print(s"W: ${W.numpy()}, b: ${b.numpy()}")
}
}
object GradientDescentOptimizerExample extends Runnable {
def run(): Unit = {
val xData = np.random.rand(100).astype(np.float32)
val yData = (xData * 0.1f) + 0.3f
val W = tf.Variable(tf.random.uniform(shape = Seq(1), minval = -1, maxval = 1))
val b = tf.Variable(tf.zeros(Seq(1)))
def y = () => W * xData + b
def loss = () => tf.reduceMean(tf.square(y() - yData))
def grad(): Option[(Tensor, Seq[Tensor])] =
CloseableResourceManager.withResource(tf.GradientTape()) { tape =>
val lossValue = loss()
val gradients: Seq[Tensor] = tape.gradient(lossValue, Seq(W, b))
(lossValue, gradients)
}
val optimizer = tf.keras.optimizers.SGD(learningRate = 0.1, momentum = Some(0.9))
for (epoch <- 1 to 200) {
val (lossValue, grads) = grad().get
optimizer.applyGradients(grads.zip(Seq(W, b)))
}
print(s"W: ${W.numpy()}, b: ${b.numpy()}")
}
}
import tensorflow as tf
import numpy as np
xData = np.random.rand(100)
yData = (xData * 0.1) + 0.3
W = tf.Variable(tf.random.uniform(shape = [1], minval = -1, maxval = 1))
b = tf.Variable(tf.zeros([1]))
def y():
return (W * xData) + b
def loss():
return tf.reduce_mean(tf.square(y() - yData))
opt = tf.keras.optimizers.SGD(learning_rate=0.1, momentum=0.9)
num_epochs = 400
for epoch in range(num_epochs):
step_count = opt.minimize(loss, [W,b]).numpy()
print("W: {}, b: {}".format(W.numpy(), b.numpy()))
@tf.function
def f(x, y):
return x ** 2 + y
x = tf.constant([2, 3])
y = tf.constant([3, -2])
f(x, y)
This would require calling Scala functions in Python
i.e. loss function
Scala | Python | |
---|---|---|
Total time | 108.5 | 107.8 |
Epoch 1 | 46s 59ms/step | 41s 52ms/step |
Epoch 2 | 41s 53ms/step | 48s 61ms/step |
Loss | 0.64 | 0.64 |
Accuracy | 0.61 | 0.61 |
BidirectionalLSTMExample.scala used to compare learning speed
scalapy-tensorflow provided similar experience and faced similar issues as other language versions of TensorFlow, for example: Swift or JavaScript