Striveworks · nanbo-liu · Feb 4, 2025 · Jan 21, 2025 · Jan 21, 2025 · Jan 21, 2025
diff --git a/runtimes/huggingface/mlserver_huggingface/codecs/__init__.py b/runtimes/huggingface/mlserver_huggingface/codecs/__init__.py
@@ -6,6 +6,7 @@
 from .conversation import HuggingfaceConversationCodec
 from .raw import RawCodec
 from .utils import EqualUtil
+from .chariot import ChariotImgModelOutputCodec
 
 __all__ = [
     "MultiInputRequestCodec",
@@ -14,6 +15,7 @@
     "HuggingfaceSingleJSONCodec",
     "HuggingfaceListJSONCodec",
     "HuggingfaceConversationCodec",
+    "ChariotImgModelOutputCodec",
     "NumpyListCodec",
     "RawCodec",
     "EqualUtil",

diff --git a/runtimes/huggingface/mlserver_huggingface/codecs/chariot.py b/runtimes/huggingface/mlserver_huggingface/codecs/chariot.py
@@ -0,0 +1,113 @@
+import numpy as np
+from mlserver.codecs.lists import is_list_of
+import json
+
+
+def get_det_dict_from_hf_obj_detect(obj_detect):
+    """Convert hf object detection output to standard chariot object detection output"""
+    det_dict = {
+        "num_detections": 0,
+        "detection_classes": [],
+        "detection_boxes": [],
+        "detection_scores": [],
+    }
+    for det in obj_detect:
+        conf, cls = det["score"], det["label"]
+        y1, x1, y2, x2 = (
+            det["box"]["ymin"],
+            det["box"]["xmin"],
+            det["box"]["ymax"],
+            det["box"]["xmax"],
+        )
+        det_dict["num_detections"] += 1
+        det_dict["detection_classes"].append(cls)
+        det_dict["detection_scores"].append(conf)
+        det_dict["detection_boxes"].append([y1, x1, y2, x2])
+    return det_dict
+
+
+def get_chariot_seg_mask_from_hf_seg_output(seg_pred, class_int_to_str):
+    """Convert hf segmentation output to standard chariot segmentation output"""
+    mask_shape = np.array(seg_pred[0]["mask"]).shape
+    class_str_to_int = {v: k for k, v in class_int_to_str.items()}
+    # Create an empty mask
+    combined_mask = np.full(mask_shape, None)
+    for i in seg_pred:
+        # Convert mask from PIL image to numpy array
+        mask = np.array(i["mask"])
+        class_str = i["label"]
+        class_int = class_str_to_int[class_str]
+        combined_mask[np.where(mask > 0)] = class_int
+    predictions = combined_mask.tolist()
+    return predictions
+
+
+class ChariotImgModelOutputCodec:
+    """Encoder that converts HF model output to the standard Chariot model output"""
+
+    @classmethod
+    def encode_output(
+        cls, predictions, task_type, class_int_to_str, predict_proba=False
+    ):
+        if is_list_of(predictions, dict):
+            predictions = [predictions]
+        if task_type == "image-classification":
+
+            if predict_proba:
+                # class_int_to_str: {0:"Egyptian cat",
+                #                    1:"tabby, tabby cat",
+                #                    2:"tiger cat"}
+                # convert HF output: [[{"label": "tabby, tabby cat", "score": 0.94},
+                #                      {"label": "tiger cat", "score"': 0.04},
+                #                      {"label": "Egyptian cat", "score": 0.02}]]
+                # to standard Chariot probability output: [[0.02,0.94,0.04]]
+                # The probability scores are ordered by class id
+                num_labels = len(class_int_to_str)
+                class_to_proba = [
+                    {d["label"]: d["score"] for d in p} for p in predictions
+                ]
+                predictions = [
+                    [d.get(class_int_to_str[i]) for i in range(num_labels)]
+                    for d in class_to_proba
+                ]
+            else:
+                # get Top-1 predicted class
+                # convert HF output: [[{"label": "tabby, tabby cat", "score": 0.94},
+                #                      {"label": "tiger cat", "score": 0.04},
+                #                      {"label": "Egyptian cat", "score": 0.02}]]
+                # to standard Chariot output: ['"tabby, tabby cat"']
+                predictions = [json.dumps(p[0]["label"]) for p in predictions]
+        elif task_type == "object-detection":
+
+            # convert HF output: [[{"score": 0.9897010326385498,
+            #                       "label": 'cat',
+            #                       "box": {"xmin": 53, "ymin": 313,
+            #                               "xmax": 697, "ymax": 986}},
+            #                       {"score": 0.9896764159202576,
+            #                       "label": "cat",
+            #                       "box": {"xmin": 974, "ymin": 221,
+            #                               "xmax": 1526, "ymax": 1071}}]]
+
+            # to standard Chariot output: [{"num_detections":2,
+            #                             "detection_classes":["cat","cat"],
+            #                             "detection_scores":[0.9897010326385498,0.9896764159202576],
+            #                             "detection_boxes":[[313,53,986,697],
+            #                                                [221,974,1071,1562]]}]
+            predictions = [get_det_dict_from_hf_obj_detect(p) for p in predictions]
+
+        elif task_type == "image-segmentation":
+
+            # convert HF output: [[{"score": None,
+            #                      "label": "wall",
+            #                      "mask": <PIL.Image.Image>},
+            #                     {"score": None,
+            #                      "label": "floor",
+            #                      "mask": <PIL.Image.Image>}]]
+            # to standard Chariot output: [[[0,0,...,0],...,[0,0,0,...,0]]]
+            # 2d array with size of the original image. Each pixel is a class int
+            # Background uses class_int 0
+            predictions = [
+                get_chariot_seg_mask_from_hf_seg_output(p, class_int_to_str)
+                for p in predictions
+            ]
+        return predictions
diff --git a/runtimes/huggingface/mlserver_huggingface/common.py b/runtimes/huggingface/mlserver_huggingface/common.py
@@ -33,8 +33,6 @@ def load_pipeline_from_settings(
     if not model:
         model = settings.parameters.uri  # type: ignore
     tokenizer = hf_settings.pretrained_tokenizer
-    if not tokenizer:
-        tokenizer = hf_settings.pretrained_model
     if hf_settings.framework == "tf":
         if hf_settings.inter_op_threads is not None:
             tf.config.threading.set_inter_op_parallelism_threads(
@@ -49,7 +47,8 @@ def load_pipeline_from_settings(
             torch.set_num_interop_threads(hf_settings.inter_op_threads)
         if hf_settings.intra_op_threads is not None:
             torch.set_num_threads(hf_settings.intra_op_threads)
-
+    # If no tokenizer is provided in the config
+    # HF pipeline would automatically load tokenizer from model directory
     hf_pipeline = pipeline(
         hf_settings.task_name,
         model=model,
@@ -63,15 +62,17 @@ def load_pipeline_from_settings(
     # If max_batch_size > 1 we need to ensure tokens are padded
     if settings.max_batch_size > 1:
         model = hf_pipeline.model
-        if not hf_pipeline.tokenizer.pad_token_id:
-            eos_token_id = model.config.eos_token_id  # type: ignore
-            if eos_token_id:
-                hf_pipeline.tokenizer.pad_token_id = [str(eos_token_id)]  # type: ignore
-            else:
-                logger.warning(
-                    "Model has neither pad_token or eos_token, setting batch size to 1"
-                )
-                hf_pipeline._batch_size = 1
+        if hf_pipeline.tokenizer is not None:
+            if not hf_pipeline.tokenizer.pad_token_id:
+                eos_token_id = model.config.eos_token_id  # type: ignore
+                if eos_token_id:
+                    hf_pipeline.tokenizer.pad_token_id = [str(eos_token_id)]
+                else:
+                    logger.warning(
+                        "Model has neither pad_token or eos_token, \
+                        setting batch size to 1"
+                    )
+                    hf_pipeline._batch_size = 1
 
     return hf_pipeline
 

diff --git a/runtimes/huggingface/mlserver_huggingface/runtime.py b/runtimes/huggingface/mlserver_huggingface/runtime.py
@@ -1,6 +1,6 @@
 import asyncio
 import torch
-
+from typing import Any
 from mlserver.model import MLModel
 from mlserver.settings import ModelSettings
 from mlserver.logging import logger
@@ -11,9 +11,15 @@
 
 from .settings import get_huggingface_settings
 from .common import load_pipeline_from_settings
-from .codecs import HuggingfaceRequestCodec
+from .codecs import HuggingfaceRequestCodec, ChariotImgModelOutputCodec
 from .metadata import METADATA
 
+CHARIOT_IMAGE_TASK = [
+    "image-classification",
+    "image-segmentation",
+    "object-detection",
+]
+
 
 class HuggingFaceRuntime(MLModel):
     """Runtime class for specific Huggingface models"""
@@ -40,15 +46,45 @@ async def predict(self, payload: InferenceRequest) -> InferenceResponse:
         # TODO: convert and validate?
         kwargs = HuggingfaceRequestCodec.decode_request(payload)
         args = kwargs.pop("args", [])
-
         array_inputs = kwargs.pop("array_inputs", [])
         if array_inputs:
             args = [list(array_inputs)] + args
-        prediction = self._model(*args, **kwargs)
-
-        return self.encode_response(
-            payload=prediction, default_codec=HuggingfaceRequestCodec
+        predict_proba, predict_proba_kwargs = self.get_predict_proba_kwargs(payload)
+        predictions = self._model(*args, **kwargs, **predict_proba_kwargs)
+        if self.hf_settings.task in CHARIOT_IMAGE_TASK:
+            predictions = ChariotImgModelOutputCodec.encode_output(
+                predictions,
+                task_type=self.hf_settings.task,
+                class_int_to_str=self._model.model.config.id2label,
+                predict_proba=predict_proba,
+            )
+        response = self.encode_response(
+            payload=predictions, default_codec=HuggingfaceRequestCodec
         )
+        return response
+
+    def get_predict_proba_kwargs(
+        self, payload: InferenceRequest
+    ) -> tuple[bool, dict[str, Any]]:
+        actions = {
+            (
+                getattr(request_input.parameters, "action", "predict")
+                if request_input.parameters
+                else "predict"
+            )
+            for request_input in payload.inputs
+        }
+        if len(actions) > 1:
+            raise ValueError(
+                f"If processing a batch all 'actions' must be the same \
+                but got 'actions': {actions}"
+            )
+        action = actions.pop()
+        predict_proba = action == "predict_proba"
+        predict_proba_kwargs = dict()
+        if predict_proba and self.hf_settings.task == "image-classification":
+            predict_proba_kwargs["top_k"] = self._model.model.config.num_labels
+        return predict_proba, predict_proba_kwargs
 
     async def unload(self) -> bool:
         # TODO: Free up Tensorflow's GPU memory

diff --git a/runtimes/huggingface/mlserver_huggingface/version.py b/runtimes/huggingface/mlserver_huggingface/version.py
@@ -1 +1 @@
-__version__ = "2.0.8"
+__version__ = "2.0.11"