Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions runtimes/huggingface/mlserver_huggingface/codecs/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from .conversation import HuggingfaceConversationCodec
from .raw import RawCodec
from .utils import EqualUtil
from .chariot import ChariotImgModelOutputCodec

__all__ = [
"MultiInputRequestCodec",
Expand All @@ -14,6 +15,7 @@
"HuggingfaceSingleJSONCodec",
"HuggingfaceListJSONCodec",
"HuggingfaceConversationCodec",
"ChariotImgModelOutputCodec",
"NumpyListCodec",
"RawCodec",
"EqualUtil",
Expand Down
113 changes: 113 additions & 0 deletions runtimes/huggingface/mlserver_huggingface/codecs/chariot.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
import numpy as np
from mlserver.codecs.lists import is_list_of
import json


def get_det_dict_from_hf_obj_detect(obj_detect):
"""Convert hf object detection output to standard chariot object detection output"""
det_dict = {
"num_detections": 0,
"detection_classes": [],
"detection_boxes": [],
"detection_scores": [],
}
for det in obj_detect:
conf, cls = det["score"], det["label"]
y1, x1, y2, x2 = (
det["box"]["ymin"],
det["box"]["xmin"],
det["box"]["ymax"],
det["box"]["xmax"],
)
det_dict["num_detections"] += 1
det_dict["detection_classes"].append(cls)
det_dict["detection_scores"].append(conf)
det_dict["detection_boxes"].append([y1, x1, y2, x2])
return det_dict


def get_chariot_seg_mask_from_hf_seg_output(seg_pred, class_int_to_str):
"""Convert hf segmentation output to standard chariot segmentation output"""
mask_shape = np.array(seg_pred[0]["mask"]).shape
class_str_to_int = {v: k for k, v in class_int_to_str.items()}
# Create an empty mask
combined_mask = np.full(mask_shape, None)
for i in seg_pred:
# Convert mask from PIL image to numpy array
mask = np.array(i["mask"])
class_str = i["label"]
class_int = class_str_to_int[class_str]
combined_mask[np.where(mask > 0)] = class_int
predictions = combined_mask.tolist()
return predictions


class ChariotImgModelOutputCodec:
"""Encoder that converts HF model output to the standard Chariot model output"""

@classmethod
def encode_output(
cls, predictions, task_type, class_int_to_str, predict_proba=False
):
if is_list_of(predictions, dict):
predictions = [predictions]
if task_type == "image-classification":

if predict_proba:
# class_int_to_str: {0:"Egyptian cat",
# 1:"tabby, tabby cat",
# 2:"tiger cat"}
# convert HF output: [[{"label": "tabby, tabby cat", "score": 0.94},
# {"label": "tiger cat", "score"': 0.04},
# {"label": "Egyptian cat", "score": 0.02}]]
# to standard Chariot probability output: [[0.02,0.94,0.04]]
# The probability scores are ordered by class id
num_labels = len(class_int_to_str)
class_to_proba = [
{d["label"]: d["score"] for d in p} for p in predictions
]
predictions = [
[d.get(class_int_to_str[i]) for i in range(num_labels)]
for d in class_to_proba
]
else:
# get Top-1 predicted class
# convert HF output: [[{"label": "tabby, tabby cat", "score": 0.94},
# {"label": "tiger cat", "score": 0.04},
# {"label": "Egyptian cat", "score": 0.02}]]
# to standard Chariot output: ['"tabby, tabby cat"']
predictions = [json.dumps(p[0]["label"]) for p in predictions]
elif task_type == "object-detection":

# convert HF output: [[{"score": 0.9897010326385498,
# "label": 'cat',
# "box": {"xmin": 53, "ymin": 313,
# "xmax": 697, "ymax": 986}},
# {"score": 0.9896764159202576,
# "label": "cat",
# "box": {"xmin": 974, "ymin": 221,
# "xmax": 1526, "ymax": 1071}}]]

# to standard Chariot output: [{"num_detections":2,
# "detection_classes":["cat","cat"],
# "detection_scores":[0.9897010326385498,0.9896764159202576],
# "detection_boxes":[[313,53,986,697],
# [221,974,1071,1562]]}]
predictions = [get_det_dict_from_hf_obj_detect(p) for p in predictions]

elif task_type == "image-segmentation":

# convert HF output: [[{"score": None,
# "label": "wall",
# "mask": <PIL.Image.Image>},
# {"score": None,
# "label": "floor",
# "mask": <PIL.Image.Image>}]]
# to standard Chariot output: [[[0,0,...,0],...,[0,0,0,...,0]]]
# 2d array with size of the original image. Each pixel is a class int
# Background uses class_int 0
predictions = [
get_chariot_seg_mask_from_hf_seg_output(p, class_int_to_str)
for p in predictions
]
return predictions
25 changes: 13 additions & 12 deletions runtimes/huggingface/mlserver_huggingface/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,6 @@ def load_pipeline_from_settings(
if not model:
model = settings.parameters.uri # type: ignore
tokenizer = hf_settings.pretrained_tokenizer
if not tokenizer:
tokenizer = hf_settings.pretrained_model
if hf_settings.framework == "tf":
if hf_settings.inter_op_threads is not None:
tf.config.threading.set_inter_op_parallelism_threads(
Expand All @@ -49,7 +47,8 @@ def load_pipeline_from_settings(
torch.set_num_interop_threads(hf_settings.inter_op_threads)
if hf_settings.intra_op_threads is not None:
torch.set_num_threads(hf_settings.intra_op_threads)

# If no tokenizer is provided in the config
# HF pipeline would automatically load tokenizer from model directory
hf_pipeline = pipeline(
hf_settings.task_name,
model=model,
Expand All @@ -63,15 +62,17 @@ def load_pipeline_from_settings(
# If max_batch_size > 1 we need to ensure tokens are padded
if settings.max_batch_size > 1:
model = hf_pipeline.model
if not hf_pipeline.tokenizer.pad_token_id:
eos_token_id = model.config.eos_token_id # type: ignore
if eos_token_id:
hf_pipeline.tokenizer.pad_token_id = [str(eos_token_id)] # type: ignore
else:
logger.warning(
"Model has neither pad_token or eos_token, setting batch size to 1"
)
hf_pipeline._batch_size = 1
if hf_pipeline.tokenizer is not None:
if not hf_pipeline.tokenizer.pad_token_id:
eos_token_id = model.config.eos_token_id # type: ignore
if eos_token_id:
hf_pipeline.tokenizer.pad_token_id = [str(eos_token_id)]
else:
logger.warning(
"Model has neither pad_token or eos_token, \
setting batch size to 1"
)
hf_pipeline._batch_size = 1

return hf_pipeline

Expand Down
50 changes: 43 additions & 7 deletions runtimes/huggingface/mlserver_huggingface/runtime.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import asyncio
import torch

from typing import Any
from mlserver.model import MLModel
from mlserver.settings import ModelSettings
from mlserver.logging import logger
Expand All @@ -11,9 +11,15 @@

from .settings import get_huggingface_settings
from .common import load_pipeline_from_settings
from .codecs import HuggingfaceRequestCodec
from .codecs import HuggingfaceRequestCodec, ChariotImgModelOutputCodec
from .metadata import METADATA

CHARIOT_IMAGE_TASK = [
"image-classification",
"image-segmentation",
"object-detection",
]


class HuggingFaceRuntime(MLModel):
"""Runtime class for specific Huggingface models"""
Expand All @@ -40,15 +46,45 @@ async def predict(self, payload: InferenceRequest) -> InferenceResponse:
# TODO: convert and validate?
kwargs = HuggingfaceRequestCodec.decode_request(payload)
args = kwargs.pop("args", [])

array_inputs = kwargs.pop("array_inputs", [])
if array_inputs:
args = [list(array_inputs)] + args
prediction = self._model(*args, **kwargs)

return self.encode_response(
payload=prediction, default_codec=HuggingfaceRequestCodec
predict_proba, predict_proba_kwargs = self.get_predict_proba_kwargs(payload)
predictions = self._model(*args, **kwargs, **predict_proba_kwargs)
if self.hf_settings.task in CHARIOT_IMAGE_TASK:
predictions = ChariotImgModelOutputCodec.encode_output(
predictions,
task_type=self.hf_settings.task,
class_int_to_str=self._model.model.config.id2label,
predict_proba=predict_proba,
)
response = self.encode_response(
payload=predictions, default_codec=HuggingfaceRequestCodec
)
return response

def get_predict_proba_kwargs(
self, payload: InferenceRequest
) -> tuple[bool, dict[str, Any]]:
actions = {
(
getattr(request_input.parameters, "action", "predict")
if request_input.parameters
else "predict"
)
for request_input in payload.inputs
}
if len(actions) > 1:
raise ValueError(
f"If processing a batch all 'actions' must be the same \
but got 'actions': {actions}"
)
action = actions.pop()
predict_proba = action == "predict_proba"
predict_proba_kwargs = dict()
if predict_proba and self.hf_settings.task == "image-classification":
predict_proba_kwargs["top_k"] = self._model.model.config.num_labels
return predict_proba, predict_proba_kwargs

async def unload(self) -> bool:
# TODO: Free up Tensorflow's GPU memory
Expand Down
2 changes: 1 addition & 1 deletion runtimes/huggingface/mlserver_huggingface/version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "2.0.8"
__version__ = "2.0.11"
Loading
Loading