From 4c6a302e650b39a12c5e840adc5dd4674e3b2704 Mon Sep 17 00:00:00 2001
From: Michael Wyatt <michaelwyatt@microsoft.com>
Date: Wed, 14 Feb 2024 15:36:51 -0800
Subject: [PATCH 1/6] add stable diffusion CI workflow

---
 .github/workflows/nv-a6000-fastgen.yml |  3 +-
 .github/workflows/nv-a6000-sd.yml      | 58 ++++++++++++++++++++++++++
 .github/workflows/nv-v100-legacy.yml   |  2 +-
 requirements/requirements-dev.txt      |  1 +
 tests/legacy/conftest.py               | 35 +++++++++++++---
 tests/legacy/pytest.ini                |  2 +-
 tests/legacy/test_local_deployment.py  | 24 ++++++++++-
 7 files changed, 115 insertions(+), 10 deletions(-)
 create mode 100644 .github/workflows/nv-a6000-sd.yml

diff --git a/.github/workflows/nv-a6000-fastgen.yml b/.github/workflows/nv-a6000-fastgen.yml
index 80bca5ae..034eac4a 100644
--- a/.github/workflows/nv-a6000-fastgen.yml
+++ b/.github/workflows/nv-a6000-fastgen.yml
@@ -8,7 +8,8 @@ on:
     paths-ignore:
       - 'mii/legacy/**'
       - 'tests/legacy/**'
-      - '.github/workflows/nv-torch-latest-v100.yml'
+      - '.github/workflows/nv-v100-legacy.yml'
+      - '.github/workflows/nv-a6000-sd.yml'
 
 concurrency:
   group: ${{ github.workflow }}-${{ github.ref }}
diff --git a/.github/workflows/nv-a6000-sd.yml b/.github/workflows/nv-a6000-sd.yml
new file mode 100644
index 00000000..bf3cb64a
--- /dev/null
+++ b/.github/workflows/nv-a6000-sd.yml
@@ -0,0 +1,58 @@
+name: nv-a6000-sd
+
+on:
+  workflow_dispatch:
+  schedule:
+    - cron: "0 0 * * *"
+  pull_request:
+    paths:
+      - 'mii/legacy/**'
+      - 'tests/legacy/**'
+      - '.github/workflows/nv-a6000-sd.yml'
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  unit-tests:
+    runs-on: [self-hosted, nvidia, a6000]
+    container:
+      image: nvcr.io/nvidia/pytorch:23.03-py3
+      ports:
+        - 80
+      options: --gpus all --shm-size "8G"
+
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Check container state
+        run: |
+          ldd --version
+          nvcc --version
+          nvidia-smi
+          python -c "import torch; print('torch:', torch.__version__, torch)"
+          python -c "import torch; print('CUDA available:', torch.cuda.is_available())"
+      - name: Install transformers
+        run: |
+          git clone --depth=1 https://github.com/huggingface/transformers
+          cd transformers
+          git rev-parse --short HEAD
+          python -m pip install .
+      - name: Install deepspeed
+        run: |
+          git clone --depth=1 https://github.com/microsoft/DeepSpeed
+          cd DeepSpeed
+          python -m pip install .
+          ds_report
+      - name: Install MII
+        run: |
+          pip install .[dev]
+      - name: Python environment
+        run: |
+          python -m pip list
+      - name: Unit tests
+        run: |
+          unset TORCH_CUDA_ARCH_LIST # only jit compile for current arch
+          cd tests/legacy
+          python -m pytest --color=yes --durations=0 --verbose -rF -m "stable_diffusion" ./
diff --git a/.github/workflows/nv-v100-legacy.yml b/.github/workflows/nv-v100-legacy.yml
index 692793dc..8ef51c25 100644
--- a/.github/workflows/nv-v100-legacy.yml
+++ b/.github/workflows/nv-v100-legacy.yml
@@ -9,7 +9,7 @@ on:
       - 'mii/__init__.py'
       - 'mii/legacy/**'
       - 'tests/legacy/**'
-      - '.github/workflows/nv-torch-latest-v100.yml'
+      - '.github/workflows/nv-v100-legacy.yml'
       - 'requirements/**'
       - 'setup.py'
 
diff --git a/requirements/requirements-dev.txt b/requirements/requirements-dev.txt
index dd6132a3..bf4a2dd2 100644
--- a/requirements/requirements-dev.txt
+++ b/requirements/requirements-dev.txt
@@ -1,4 +1,5 @@
 clang-format==16.0.2
+diffusers
 pre-commit>=2.20.0
 pytest
 pytest-forked
diff --git a/tests/legacy/conftest.py b/tests/legacy/conftest.py
index 9cb85cfd..2ad9d902 100644
--- a/tests/legacy/conftest.py
+++ b/tests/legacy/conftest.py
@@ -7,6 +7,8 @@
 import os
 import mii.legacy as mii
 from types import SimpleNamespace
+from packaging import version as pkg_version
+import torch
 
 
 @pytest.fixture(scope="function", params=["fp16"])
@@ -84,11 +86,9 @@ def ds_config(request):
     return request.param
 
 
-@pytest.fixture(scope="function")
-def replace_with_kernel_inject(model_name):
-    if "clip-vit" in model_name:
-        return False
-    return True
+@pytest.fixture(scope="function", params=[True])
+def replace_with_kernel_inject(request):
+    return request.param
 
 
 @pytest.fixture(scope="function")
@@ -145,8 +145,31 @@ def expected_failure(request):
     return request.param
 
 
+@pytest.fixture(scope="function", params=[None])
+def min_compute_capability(request):
+    return request.param
+
+
+@pytest.fixture(scope="function")
+def meets_compute_capability_reqs(min_compute_capability):
+    if min_compute_capability is None:
+        return
+    min_compute_ver = pkg_version.parse(str(min_compute_capability))
+    device_compute_ver = pkg_version.parse(".".join(
+        map(str,
+            torch.cuda.get_device_capability())))
+    if device_compute_ver < min_compute_ver:
+        pytest.skip(
+            f"Skipping test because device compute capability ({device_compute_ver}) is less than the minimum required ({min_compute_ver})."
+        )
+
+
 @pytest.fixture(scope="function")
-def deployment(deployment_name, mii_config, model_config, expected_failure):
+def deployment(deployment_name,
+               mii_config,
+               model_config,
+               expected_failure,
+               meets_compute_capability_reqs):
     if expected_failure is not None:
         with pytest.raises(expected_failure) as excinfo:
             mii.deploy(
diff --git a/tests/legacy/pytest.ini b/tests/legacy/pytest.ini
index 4c072427..2ba77e71 100644
--- a/tests/legacy/pytest.ini
+++ b/tests/legacy/pytest.ini
@@ -1,3 +1,3 @@
 [pytest]
 markers =
-    deepspeed:Run test for deepspeed CI
+    stable_diffusion:Run Stable Diffusion tests
diff --git a/tests/legacy/test_local_deployment.py b/tests/legacy/test_local_deployment.py
index b7a44e95..aa3a9f47 100644
--- a/tests/legacy/test_local_deployment.py
+++ b/tests/legacy/test_local_deployment.py
@@ -83,7 +83,7 @@ def test_single_GPU(deployment, query):
 
 
 @pytest.mark.parametrize(
-    "task_name, model_name, query",
+    "task_name, model_name, query, tensor_parallel",
     [
         (
             "text-generation",
@@ -92,6 +92,7 @@ def test_single_GPU(deployment, query):
                 "query": ["DeepSpeed is the greatest",
                           "Seattle is"]
             },
+            2,
         ),
     ],
 )
@@ -121,3 +122,24 @@ def test_session(deployment, query):
     result = generator.query(query)
     generator.destroy_session(session_name)
     assert result
+
+
+@pytest.mark.stable_diffusion
+@pytest.mark.parametrize(
+    "task_name, model_name, query",
+    [
+        (
+            "text-to-image",
+            "openskyml/midjourney-mini",
+            {
+                "query": ["a dog on a rocket"]
+            },
+        ),
+    ],
+)
+@pytest.mark.parametrize("min_compute_capability", [8])
+def test_stable_diffusion(deployment, query):
+    print(deployment)
+    generator = mii.mii_query_handle(deployment)
+    result = generator.query(query)
+    assert result

From d3fa2e2697f78ecf92c6b3e03c534e827fe5b63e Mon Sep 17 00:00:00 2001
From: Michael Wyatt <michaelwyatt@microsoft.com>
Date: Thu, 15 Feb 2024 14:10:50 -0800
Subject: [PATCH 2/6] fix

---
 mii/legacy/models/providers/diffusers.py |  2 +-
 tests/legacy/test_local_deployment.py    | 25 ++++++++++++------------
 2 files changed, 14 insertions(+), 13 deletions(-)

diff --git a/mii/legacy/models/providers/diffusers.py b/mii/legacy/models/providers/diffusers.py
index 15973d0e..c5910b64 100644
--- a/mii/legacy/models/providers/diffusers.py
+++ b/mii/legacy/models/providers/diffusers.py
@@ -17,7 +17,7 @@ def diffusers_provider(model_config: ModelConfig):
     kwargs = model_config.pipeline_kwargs
     if model_config.dtype == torch.half:
         kwargs["torch_dtype"] = torch.float16
-        kwargs["revision"] = "fp16"
+        #kwargs["revision"] = "fp16"
 
     pipeline = attempt_load(DiffusionPipeline.from_pretrained,
                             model_config.model,
diff --git a/tests/legacy/test_local_deployment.py b/tests/legacy/test_local_deployment.py
index aa3a9f47..abde90f7 100644
--- a/tests/legacy/test_local_deployment.py
+++ b/tests/legacy/test_local_deployment.py
@@ -63,17 +63,6 @@
                 "query": "DeepSpeed is the greatest"
             },
         ),
-        (
-            "zero-shot-image-classification",
-            "openai/clip-vit-base-patch32",
-            {
-                "image":
-                "https://huggingface.co/datasets/Narsil/image_dummy/raw/main/parrots.png",
-                "candidate_labels": ["animals",
-                                     "humans",
-                                     "landscape"]
-            },
-        ),
     ],
 )
 def test_single_GPU(deployment, query):
@@ -132,7 +121,19 @@ def test_session(deployment, query):
             "text-to-image",
             "openskyml/midjourney-mini",
             {
-                "query": ["a dog on a rocket"]
+                "prompt": "a dog on a rocket",
+                "negative_prompt": "planet earth",
+            },
+        ),
+        (
+            "zero-shot-image-classification",
+            "openai/clip-vit-base-patch32",
+            {
+                "image":
+                "https://huggingface.co/datasets/Narsil/image_dummy/raw/main/parrots.png",
+                "candidate_labels": ["animals",
+                                     "humans",
+                                     "landscape"]
             },
         ),
     ],

From 40f50c514bad7d82536f70d1a3cd98494513d996 Mon Sep 17 00:00:00 2001
From: Michael Wyatt <michaelwyatt@microsoft.com>
Date: Thu, 15 Feb 2024 16:25:59 -0800
Subject: [PATCH 3/6] fixes and improvements

---
 mii/legacy/models/providers/diffusers.py      |  10 +-
 tests/legacy/conftest.py                      |  15 +-
 tests/legacy/test_local_deployment.py         | 148 ++++++++++--------
 .../legacy/test_non_persistent_deployment.py  |   7 -
 4 files changed, 101 insertions(+), 79 deletions(-)

diff --git a/mii/legacy/models/providers/diffusers.py b/mii/legacy/models/providers/diffusers.py
index c5910b64..fca49470 100644
--- a/mii/legacy/models/providers/diffusers.py
+++ b/mii/legacy/models/providers/diffusers.py
@@ -4,11 +4,18 @@
 # DeepSpeed Team
 import os
 import torch
+from huggingface_hub import HfApi
 
 from .utils import attempt_load
 from mii.config import ModelConfig
 
 
+def _get_model_revs(model_name):
+    api = HfApi()
+    branches = api.list_repo_refs(model_name).branches
+    return [b.name for b in branches]
+
+
 def diffusers_provider(model_config: ModelConfig):
     from diffusers import DiffusionPipeline
 
@@ -17,7 +24,8 @@ def diffusers_provider(model_config: ModelConfig):
     kwargs = model_config.pipeline_kwargs
     if model_config.dtype == torch.half:
         kwargs["torch_dtype"] = torch.float16
-        #kwargs["revision"] = "fp16"
+        if "fp16" in _get_model_revs(model_config.model):
+            kwargs["revision"] = "fp16"
 
     pipeline = attempt_load(DiffusionPipeline.from_pretrained,
                             model_config.model,
diff --git a/tests/legacy/conftest.py b/tests/legacy/conftest.py
index 2ad9d902..239d8383 100644
--- a/tests/legacy/conftest.py
+++ b/tests/legacy/conftest.py
@@ -86,8 +86,17 @@ def ds_config(request):
     return request.param
 
 
-@pytest.fixture(scope="function", params=[True])
-def replace_with_kernel_inject(request):
+@pytest.fixture(scope="function", params=[None])
+def replace_with_kernel_inject(request, model_name):
+    if request.param is not None:
+        return request.param
+    if model_name == "openai/clip-vit-base-patch32":
+        return False
+    return True
+
+
+@pytest.fixture(scope="function", params=[False])
+def enable_cuda_graph(request):
     return request.param
 
 
@@ -104,6 +113,7 @@ def model_config(
     enable_zero: bool,
     ds_config: dict,
     replace_with_kernel_inject: bool,
+    enable_cuda_graph: bool,
 ):
     config = SimpleNamespace(
         skip_model_check=True, # TODO: remove this once conversation task check is fixed
@@ -120,6 +130,7 @@ def model_config(
         enable_zero=enable_zero,
         ds_config=ds_config,
         replace_with_kernel_inject=replace_with_kernel_inject,
+        enable_cuda_graph=enable_cuda_graph,
     )
     return config.__dict__
 
diff --git a/tests/legacy/test_local_deployment.py b/tests/legacy/test_local_deployment.py
index abde90f7..3fe5288f 100644
--- a/tests/legacy/test_local_deployment.py
+++ b/tests/legacy/test_local_deployment.py
@@ -5,65 +5,86 @@
 import pytest
 import mii.legacy as mii
 
+import requests
+from PIL import Image
+
 
 @pytest.mark.parametrize(
     "task_name, model_name, query",
-    [
-        (
-            "conversational",
-            "microsoft/DialoGPT-small",
-            {
-                "text": "DeepSpeed is the greatest",
-                "conversation_id": 3,
-                "past_user_inputs": [],
-                "generated_responses": [],
-            },
-        ),
-        (
-            "fill-mask",
-            "bert-base-uncased",
-            {
-                "query": "Hello I'm a [MASK] model."
-            },
-        ),
-        (
-            "question-answering",
-            "deepset/roberta-large-squad2",
-            {
-                "question": "What is the greatest?",
-                "context": "DeepSpeed is the greatest",
-            },
-        ),
-        (
-            "text-generation",
-            "distilgpt2",
-            {
-                "query": ["DeepSpeed is the greatest"]
-            },
-        ),
-        (
-            "text-generation",
-            "bigscience/bloom-560m",
-            {
-                "query": ["DeepSpeed is the greatest",
-                          "Seattle is"]
-            },
-        ),
-        (
-            "token-classification",
-            "Jean-Baptiste/roberta-large-ner-english",
-            {
-                "query": "My name is jean-baptiste and I live in montreal."
-            },
-        ),
-        (
-            "text-classification",
-            "roberta-large-mnli",
-            {
-                "query": "DeepSpeed is the greatest"
-            },
-        ),
-    ],
+    [(
+        "conversational",
+        "microsoft/DialoGPT-small",
+        {
+            "text": "DeepSpeed is the greatest",
+            "conversation_id": 3,
+            "past_user_inputs": [],
+            "generated_responses": [],
+        },
+    ),
+     (
+         "fill-mask",
+         "bert-base-uncased",
+         {
+             "query": "Hello I'm a [MASK] model."
+         },
+     ),
+     (
+         "question-answering",
+         "deepset/roberta-large-squad2",
+         {
+             "question": "What is the greatest?",
+             "context": "DeepSpeed is the greatest",
+         },
+     ),
+     (
+         "text-generation",
+         "bigscience/bloom-560m",
+         {
+             "query": ["DeepSpeed is the greatest",
+                       "Seattle is"]
+         },
+     ),
+     (
+         "token-classification",
+         "Jean-Baptiste/roberta-large-ner-english",
+         {
+             "query": "My name is jean-baptiste and I live in montreal."
+         },
+     ),
+     (
+         "text-classification",
+         "roberta-large-mnli",
+         {
+             "query": "DeepSpeed is the greatest"
+         },
+     ),
+     (
+         "zero-shot-image-classification",
+         "openai/clip-vit-base-patch32",
+         {
+             "image":
+             "https://huggingface.co/datasets/Narsil/image_dummy/raw/main/parrots.png",
+             "candidate_labels": ["animals",
+                                  "humans",
+                                  "landscape"]
+         },
+     ),
+     ("text-to-image-inpainting",
+      "stabilityai/stable-diffusion-2-inpainting",
+      {
+          "prompt":
+          "the head of a dog",
+          "image":
+          Image.open(
+              requests.get(
+                  "https://huggingface.co/datasets/Narsil/image_dummy/raw/main/parrots.png",
+                  stream=True).raw),
+          "mask_image":
+          Image.open(
+              requests.get(
+                  "https://huggingface.co/datasets/Narsil/image_dummy/raw/main/parrots.png",
+                  stream=True).raw),
+      })],
 )
 def test_single_GPU(deployment, query):
     generator = mii.mii_query_handle(deployment)
@@ -125,22 +146,11 @@ def test_session(deployment, query):
                 "negative_prompt": "planet earth",
             },
         ),
-        (
-            "zero-shot-image-classification",
-            "openai/clip-vit-base-patch32",
-            {
-                "image":
-                "https://huggingface.co/datasets/Narsil/image_dummy/raw/main/parrots.png",
-                "candidate_labels": ["animals",
-                                     "humans",
-                                     "landscape"]
-            },
-        ),
     ],
 )
+@pytest.mark.parametrize("enable_cuda_graph", [True])
 @pytest.mark.parametrize("min_compute_capability", [8])
-def test_stable_diffusion(deployment, query):
-    print(deployment)
+def test_SD_kernel_inject(deployment, query):
     generator = mii.mii_query_handle(deployment)
     result = generator.query(query)
     assert result
diff --git a/tests/legacy/test_non_persistent_deployment.py b/tests/legacy/test_non_persistent_deployment.py
index edd94b93..8f04d4ee 100644
--- a/tests/legacy/test_non_persistent_deployment.py
+++ b/tests/legacy/test_non_persistent_deployment.py
@@ -36,13 +36,6 @@
                 "context": "DeepSpeed is the greatest",
             },
         ),
-        (
-            "text-generation",
-            "distilgpt2",
-            {
-                "query": ["DeepSpeed is the greatest"]
-            },
-        ),
         (
             "text-generation",
             "bigscience/bloom-560m",

From 44e0b4925836e6f81470259e77c9ede0c1274af7 Mon Sep 17 00:00:00 2001
From: Michael Wyatt <michaelwyatt@microsoft.com>
Date: Fri, 16 Feb 2024 10:02:56 -0800
Subject: [PATCH 4/6] allow images to be passed for inpainting task

---
 mii/legacy/method_table.py            | 24 +++++++++++++++++++++---
 tests/legacy/test_local_deployment.py |  6 +++---
 2 files changed, 24 insertions(+), 6 deletions(-)

diff --git a/mii/legacy/method_table.py b/mii/legacy/method_table.py
index 145d6092..fb75d21e 100644
--- a/mii/legacy/method_table.py
+++ b/mii/legacy/method_table.py
@@ -2,6 +2,7 @@
 # SPDX-License-Identifier: Apache-2.0
 
 # DeepSpeed Team
+import io
 import uuid
 
 from abc import ABC, abstractmethod
@@ -331,11 +332,28 @@ def pack_request_to_proto(self, request_dict, **query_kwargs):
         negative_prompt = request_dict.get("negative_prompt", [""] * len(prompt))
         negative_prompt = negative_prompt if isinstance(negative_prompt,
                                                         list) else [negative_prompt]
-        image = request_dict["image"] if isinstance(request_dict["image"],
-                                                    list) else [request_dict["image"]]
-        mask_image = request_dict["mask_image"] if isinstance(
+        image_list = request_dict["image"] if isinstance(
+            request_dict["image"],
+            list) else [request_dict["image"]]
+        mask_image_list = request_dict["mask_image"] if isinstance(
             request_dict["mask_image"],
             list) else [request_dict["mask_image"]]
+        image = []
+        for img in image_list:
+            if isinstance(img, bytes):
+                image.append(img)
+            else:
+                imgByteArr = io.BytesIO()
+                img.save(imgByteArr, format=img.format)
+                image.append(imgByteArr.getvalue())
+        mask_image = []
+        for img in mask_image_list:
+            if isinstance(img, bytes):
+                mask_image.append(img)
+            else:
+                imgByteArr = io.BytesIO()
+                img.save(imgByteArr, format=img.format)
+                mask_image.append(imgByteArr.getvalue())
 
         return modelresponse_pb2.InpaintingRequest(
             prompt=prompt,
diff --git a/tests/legacy/test_local_deployment.py b/tests/legacy/test_local_deployment.py
index 3fe5288f..8a010468 100644
--- a/tests/legacy/test_local_deployment.py
+++ b/tests/legacy/test_local_deployment.py
@@ -73,16 +73,16 @@
       "stabilityai/stable-diffusion-2-inpainting",
       {
           "prompt":
-          "the head of a dog",
+          "a black cat with glowing eyes",
           "image":
           Image.open(
               requests.get(
-                  "https://huggingface.co/datasets/Narsil/image_dummy/raw/main/parrots.png",
+                  "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/inpaint.png",
                   stream=True).raw),
           "mask_image":
           Image.open(
               requests.get(
-                  "https://huggingface.co/datasets/Narsil/image_dummy/raw/main/parrots.png",
+                  "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/inpaint_mask.png",
                   stream=True).raw),
       })],
 )

From 6c0100f5e5fb78b1e24b97254efbc6817dd668ac Mon Sep 17 00:00:00 2001
From: Logan Adams <loadams@microsoft.com>
Date: Wed, 30 Oct 2024 09:17:36 -0700
Subject: [PATCH 5/6] Remove errant import for formatting

---
 mii/legacy/method_table.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/mii/legacy/method_table.py b/mii/legacy/method_table.py
index 286dd462..85d21b79 100644
--- a/mii/legacy/method_table.py
+++ b/mii/legacy/method_table.py
@@ -3,7 +3,6 @@
 
 # DeepSpeed Team
 import io
-import uuid
 
 from abc import ABC, abstractmethod
 from mii.legacy.constants import TaskType

From 42c5f0d834f34be3f0cde9ba73e40a03c864c55c Mon Sep 17 00:00:00 2001
From: Logan Adams <loadams@microsoft.com>
Date: Wed, 30 Oct 2024 09:58:24 -0700
Subject: [PATCH 6/6] Update workflow

---
 .github/workflows/nv-a6000-sd.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/nv-a6000-sd.yml b/.github/workflows/nv-a6000-sd.yml
index bf3cb64a..8d41f5bd 100644
--- a/.github/workflows/nv-a6000-sd.yml
+++ b/.github/workflows/nv-a6000-sd.yml
@@ -18,7 +18,7 @@ jobs:
   unit-tests:
     runs-on: [self-hosted, nvidia, a6000]
     container:
-      image: nvcr.io/nvidia/pytorch:23.03-py3
+      image: nvcr.io/nvidia/pytorch:24.03-py3
       ports:
         - 80
       options: --gpus all --shm-size "8G"