From e44ad7da5de8edb04a0345a6950b1811f08ec36b Mon Sep 17 00:00:00 2001 From: mosout Date: Thu, 21 Jul 2022 17:35:04 +0800 Subject: [PATCH 1/3] support one embedding --- CMakeLists.txt | 2 ++ examples_embedding/embedding/client.py | 36 +++++++++++++++++++++++ examples_embedding/embedding/config.pbtxt | 34 +++++++++++++++++++++ include/triton/model_state.h | 2 ++ src/triton/model_state.cpp | 19 ++++++++++-- 5 files changed, 91 insertions(+), 2 deletions(-) create mode 100644 examples_embedding/embedding/client.py create mode 100644 examples_embedding/embedding/config.pbtxt diff --git a/CMakeLists.txt b/CMakeLists.txt index c1e0f81..d9157f5 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,5 +1,7 @@ cmake_minimum_required(VERSION 3.18.0) +set(CMAKE_EXPORT_COMPILE_COMMANDS ON CACHE BOOL "") + project(OneFlowServing) if(NOT CMAKE_BUILD_TYPE) diff --git a/examples_embedding/embedding/client.py b/examples_embedding/embedding/client.py new file mode 100644 index 0000000..1853d62 --- /dev/null +++ b/examples_embedding/embedding/client.py @@ -0,0 +1,36 @@ +""" +Copyright 2020 The OneFlow Authors. All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +""" + +import time +import numpy as np +import tritonclient.http as httpclient + + +if __name__ == '__main__': + triton_client = httpclient.InferenceServerClient(url='127.0.0.1:8000') + + data = np.ones((10000,39)).astype(np.int64) + + inputs = [] + inputs.append(httpclient.InferInput('INPUT_0', data.shape, "INT64")) + inputs[0].set_data_from_numpy(data, binary_data=True) + outputs = [] + outputs.append(httpclient.InferRequestedOutput('OUTPUT_0', binary_data=True, class_count=1)) + now = time.time() + results = triton_client.infer("embedding", inputs=inputs, outputs=outputs) + print(time.time() - now) + output_data0 = results.as_numpy('OUTPUT_0') + print(output_data0.shape) diff --git a/examples_embedding/embedding/config.pbtxt b/examples_embedding/embedding/config.pbtxt new file mode 100644 index 0000000..cd12764 --- /dev/null +++ b/examples_embedding/embedding/config.pbtxt @@ -0,0 +1,34 @@ +name: "embedding" +backend: "oneflow" +max_batch_size: 10000 + +input [ + { + name: "INPUT_0" + data_type: TYPE_INT64 + dims: [ 39 ] + } +] + +output [ + { + name: "OUTPUT_0" + data_type: TYPE_FP32 + dims: [ 1 ] + } +] + +instance_group [ + { + count: 1 + kind: KIND_GPU + gpus: [ 0 ] + } +] + +parameters { + key: "one_embedding_persistent_table_path" + value: { + string_value: "/mnt/DATA/lph/s_embedding/examples_embedding/embedding/dlrm/init_model/0-1", + } +} \ No newline at end of file diff --git a/include/triton/model_state.h b/include/triton/model_state.h index 446bd1c..47504ca 100644 --- a/include/triton/model_state.h +++ b/include/triton/model_state.h @@ -96,6 +96,8 @@ class ModelState : public BackendModel { TRITONSERVER_Error* ValidateAndParseOutputs(); XrtKind xrt_kind_ = XrtKind::kOneflow; + bool enable_one_embedding_ = false; + std::string persistent_table_path_ = ""; std::vector input_names_; std::vector output_names_; diff --git a/src/triton/model_state.cpp b/src/triton/model_state.cpp index 9647a02..1b94130 100644 --- a/src/triton/model_state.cpp +++ b/src/triton/model_state.cpp @@ -72,6 +72,15 @@ ModelState::ValidateAndParseModelConfig() RETURN_IF_ERROR(xrt.MemberAsString("string_value", &xrt_str)); this->xrt_kind_ = ParseXrtKind(xrt_str, &is_unknown); } + + common::TritonJson::Value one_embedding_persistent_table_path; + if (params.Find( + "one_embedding_persistent_table_path", + &one_embedding_persistent_table_path)) { + enable_one_embedding_ = true; + RETURN_IF_ERROR(one_embedding_persistent_table_path.MemberAsString( + "string_value", &persistent_table_path_)); + } } if (is_unknown) { LOG_MESSAGE( @@ -335,9 +344,15 @@ ModelState::LoadModel( std::string("unable to find '") + model_path + "' for model instance '" + Name() + "'"); } + if (enable_one_embedding_) { + graph->reset(new oneflow_api::Graph(oneflow_api::Graph::LoadOneEmbedding( + model_path, device, persistent_table_path_))); + + } else { + graph->reset( + new oneflow_api::Graph(oneflow_api::Graph::Load(model_path, device))); + } - graph->reset( - new oneflow_api::Graph(oneflow_api::Graph::Load(model_path, device))); if (MaxBatchSize() > 0) { (*graph)->set_batch_size(MaxBatchSize()); } From 01f74eef4d1c5135108a61450159613004134528 Mon Sep 17 00:00:00 2001 From: mosout Date: Thu, 4 Aug 2022 16:11:44 +0800 Subject: [PATCH 2/3] refine --- examples_embedding/embedding/config.pbtxt | 7 ------- include/triton/model_state.h | 2 -- src/triton/model_state.cpp | 20 +++----------------- 3 files changed, 3 insertions(+), 26 deletions(-) diff --git a/examples_embedding/embedding/config.pbtxt b/examples_embedding/embedding/config.pbtxt index cd12764..6666203 100644 --- a/examples_embedding/embedding/config.pbtxt +++ b/examples_embedding/embedding/config.pbtxt @@ -25,10 +25,3 @@ instance_group [ gpus: [ 0 ] } ] - -parameters { - key: "one_embedding_persistent_table_path" - value: { - string_value: "/mnt/DATA/lph/s_embedding/examples_embedding/embedding/dlrm/init_model/0-1", - } -} \ No newline at end of file diff --git a/include/triton/model_state.h b/include/triton/model_state.h index 47504ca..446bd1c 100644 --- a/include/triton/model_state.h +++ b/include/triton/model_state.h @@ -96,8 +96,6 @@ class ModelState : public BackendModel { TRITONSERVER_Error* ValidateAndParseOutputs(); XrtKind xrt_kind_ = XrtKind::kOneflow; - bool enable_one_embedding_ = false; - std::string persistent_table_path_ = ""; std::vector input_names_; std::vector output_names_; diff --git a/src/triton/model_state.cpp b/src/triton/model_state.cpp index 1b94130..0956c6b 100644 --- a/src/triton/model_state.cpp +++ b/src/triton/model_state.cpp @@ -72,15 +72,6 @@ ModelState::ValidateAndParseModelConfig() RETURN_IF_ERROR(xrt.MemberAsString("string_value", &xrt_str)); this->xrt_kind_ = ParseXrtKind(xrt_str, &is_unknown); } - - common::TritonJson::Value one_embedding_persistent_table_path; - if (params.Find( - "one_embedding_persistent_table_path", - &one_embedding_persistent_table_path)) { - enable_one_embedding_ = true; - RETURN_IF_ERROR(one_embedding_persistent_table_path.MemberAsString( - "string_value", &persistent_table_path_)); - } } if (is_unknown) { LOG_MESSAGE( @@ -344,14 +335,9 @@ ModelState::LoadModel( std::string("unable to find '") + model_path + "' for model instance '" + Name() + "'"); } - if (enable_one_embedding_) { - graph->reset(new oneflow_api::Graph(oneflow_api::Graph::LoadOneEmbedding( - model_path, device, persistent_table_path_))); - - } else { - graph->reset( - new oneflow_api::Graph(oneflow_api::Graph::Load(model_path, device))); - } + + graph->reset( + new oneflow_api::Graph(oneflow_api::Graph::Load(model_path, device))); if (MaxBatchSize() > 0) { (*graph)->set_batch_size(MaxBatchSize()); From 021e3ba68add8614f62945c44a7b3b80b362aba7 Mon Sep 17 00:00:00 2001 From: mosout Date: Thu, 4 Aug 2022 16:12:45 +0800 Subject: [PATCH 3/3] refine --- src/triton/model_state.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/triton/model_state.cpp b/src/triton/model_state.cpp index 0956c6b..733fae7 100644 --- a/src/triton/model_state.cpp +++ b/src/triton/model_state.cpp @@ -335,7 +335,7 @@ ModelState::LoadModel( std::string("unable to find '") + model_path + "' for model instance '" + Name() + "'"); } - + graph->reset( new oneflow_api::Graph(oneflow_api::Graph::Load(model_path, device)));