ngoldin@imubit.com
Where we're at
Must streamline the process -> Full automation
# At the end of training
def serialize_model(session, inputs, outputs, export_path):
builder = SavedModelBuilder(export_path)
signature = build_signature_def(inputs=inputs,
outputs=outputs)
builder.add_meta_graph_and_variables(
session,
signature_def_map={'model_signature':
signature})
builder.save()
return export_path
Protocol Buffer
Data
... but is it enough?
def create_model_metadata(context,
session,
inputs,
outputs):
metadata = {}
metadata['topic'] = 'One week weather predictor'
metadata['area'] = ['Jerusalem', 'Beit-Shemesh']
metadata['train_end_time'] = pd.to_datetime('now')
metadata['author'] = context.user
metadata['max_temperature_seen'] = tf.max(...)
metadata['inputs'] = {...}
...
return json.dumps(metadata)
The delivery
import marshmallow as ma
...
class ModelType(Enum):
SummerHumidityPredictor = auto()
WinterPredictor = auto()
class ModelMetadataSchema(ma.Schema):
model_type = EnumField(ModelType, required=True)
schema_version = ma.fields.String('0.2')
author = ma.fields.String(required=True)
max_humidity_seen = ma.fields.Float(required=False)
...
# Inside your application
if model.schema_version > Version('0.2'):
use_new_capabilities()
else:
use_old_one()
Should support parallel development of DL models and Software
Split to different git repositories:
Research - responsible of training and delivering models - a single way to train models
Shared Interface - holds the interface definitions
Software - Your application and the interface implementation
Define clear ownership
Research
Software
Shared
## YAML Format
application:
area: Israel
training:
required_inputs:
- humidity
period: '30 days'
sample_rate: '1 minute'
- temperature:
area: 'Israel'
period: '30 days'
sample_rate: '5 minutes'
from models_backend import ModelsBackend, find_best_model
from utils import override_cities
class RuntimeModel(object):
def __init__(self, app_context, model_id):
self.model = ModelsBackend.load_by_id(model_id)
self.app_context = app_context
@property
def required_inputs(self):
return self.model.required_inputs
@property
def areas(self):
return self.model.areas
...
from models_backend import find_best_model, get_inputs
from api_service import api
class PredictionRequest(object):
@api('/weather/predict')
def get(self, app_context, data):
model = find_best_model(data['area'], app_context)
dataframe = utils.get_inputs(model.required_inputs)
return json.jsonify((model.predict(dataframe))
def find_best_model(area, app_context):
for model in ModelsBackend.models:
if area == 'Hebron' and 'JLM' in model.areas:
# their pretty close no?
return model
...
# train a model and..
def create_model_metadata(context,
session,
inputs,
outputs):
...
metadata['area'] = ['Jerusalem',
'Beit-Shemesh',
'Hebron']
...
return json.dumps(metadata)
"In the face of ambiguity, refuse the temptation to guess." - The Zen of Python
ngoldin@imubit.com