from __future__ import print_function
import sys
import time
from pprint import pprint
from dkube.sdk.internal import dkube_api
from dkube.sdk.internal.dkube_api.models.custom_container_model import \
CustomContainerModel
from dkube.sdk.internal.dkube_api.models.custom_container_model_image import \
CustomContainerModelImage
from dkube.sdk.internal.dkube_api.models.inference_job_model import \
InferenceJobModel
from dkube.sdk.internal.dkube_api.models.job_model import JobModel
from dkube.sdk.internal.dkube_api.models.job_model_parameters import \
JobModelParameters
from dkube.sdk.internal.dkube_api.models.job_model_parameters_run import \
JobModelParametersRun
from dkube.sdk.internal.dkube_api.rest import ApiException
from .util import *
[docs]class DkubeServing(object):
"""
This class defines Model Deployment with helper functions to set properties of Model Deployment.::
from dkube.sdk import *
serving = DkubeServing("oneconv", name="mnist-serving")
Where first argument is the user of the Model Deployment. User should be a valid onboarded user in dkube.
"""
def __init__(self, user, name=generate('serving'), description='', tags=[]):
self.predictor_container = CustomContainerModelImage(
path='', username=None, password=None, runas=None)
self.predictor = CustomContainerModel(image=None)
self.transformer_container = CustomContainerModelImage(
path='', username=None, password=None, runas=None)
self.transformer = CustomContainerModel(image=None)
self.serving_def = InferenceJobModel(model=None, version=None, owner=None, device=None, deploy=None,
serving_image=self.predictor, transformer=False,
transformer_image=self.transformer, transformer_project=None,
transformer_commit_id=None, transformer_code=None,
min_replicas=0, max_concurrent_requests=0)
self.run_def = JobModelParametersRun(template=None, group='default')
self.job_parameters = JobModelParameters(
_class='inference', inference=self.serving_def, run=self.run_def)
self.job = JobModel(name=None, parameters=self.job_parameters)
self.update_basic(user, name, description, tags)
[docs] def update_basic(self, user, name, description, tags):
"""
Method to update the attributes specified at creation. Description and tags can be updated. tags is a list of string values.
"""
tags = list_of_strs(tags)
self.user = user
self.name = name
self.description = description
self.job.name = name
self.job.description = description
self.serving_def.tags = tags
self.serving_def.device = "cpu"
self.serving_def.transformer = False
[docs] def update_serving_model(self, model, version=None):
"""
Method to update Model Repo input for Model Serving
*Inputs*
name
Name of Model Repo containing the model files
version
Version (unique id) to use from Model Repo
"""
self.serving_def.model = model
self.serving_def.version = version
self.serving_def.owner = self.user
[docs] def update_serving_image(self, deploy=None, image_url='', login_uname=None, login_pswd=None):
"""
Method to update the image to use for Model Serving
*Inputs*
deploy
Flag to specify Serving for Test or Production (TODO)
image_url
url for the image repository |br|
e.g, docker.io/ocdr/tensorflowserver:2.0.0
login_uname
username to access the image repository
login_pswd
password to access the image repository
"""
self.predictor_container.path = image_url
self.predictor_container.username = login_uname
self.predictor_container.password = login_pswd
self.serving_def.deploy = deploy
self.predictor.image = self.predictor_container
[docs] def set_production_deploy(self):
"""
Method to update the mode to use for Model Serving
*Inputs*
deploy
Flag to specify Serving for Test or Production (TODO)
"""
self.serving_def.deploy = True
[docs] def update_autoscaling_config(self, min_replicas, max_concurrent_requests):
"""
Method to update the autocale config to use for Model Serving
*Inputs*
min_replicas
Min number of pods to be running for Serving
max_concurrent_requests
Soft target threshold value for number of concurrent requests to trigger scale up of Serving pods
"""
self.serving_def.min_replicas = min_replicas
self.serving_def.max_concurrent_requests = max_concurrent_requests