From 4542f2f761d5d900ff1d0478ad490039021611e3 Mon Sep 17 00:00:00 2001 From: Adi Eldar Date: Thu, 8 Jan 2026 09:40:43 +0200 Subject: [PATCH 1/7] Split it to ADX & Fabric versions --- .../slm-embeddings-fl-adx.md | 232 +++++++++++++++++ .../slm-embeddings-fl-fabric.md | 233 ++++++++++++++++++ .../functions-library/slm-embeddings-fl.md | 229 +---------------- 3 files changed, 473 insertions(+), 221 deletions(-) create mode 100644 data-explorer/kusto/functions-library/slm-embeddings-fl-adx.md create mode 100644 data-explorer/kusto/functions-library/slm-embeddings-fl-fabric.md diff --git a/data-explorer/kusto/functions-library/slm-embeddings-fl-adx.md b/data-explorer/kusto/functions-library/slm-embeddings-fl-adx.md new file mode 100644 index 0000000000..6a2cbee445 --- /dev/null +++ b/data-explorer/kusto/functions-library/slm-embeddings-fl-adx.md @@ -0,0 +1,232 @@ +--- +ms.topic: include +ms.date: 01/08/2026 +--- + +# slm_embeddings_fl() + +The function `slm_embeddings_fl()` is a [UDF (user-defined function)](../query/functions/user-defined-functions.md) that generates text embeddings using local Small Language Models (SLM). This function converts text into numerical vector representations that can be used for semantic search, similarity analysis, and other natural language processing tasks. +Currently the function supports [jina-v2-small](https://huggingface.co/jinaai/jina-embeddings-v2-small-en) and [e5-small-v2](https://huggingface.co/intfloat/e5-small-v2) models. + +[!INCLUDE [python-zone-pivot-fabric](../includes/python-zone-pivot-fabric.md)] +* More for ADX + +## Syntax + +`T | invoke slm_embeddings_fl(`*text_col*`,` *embeddings_col* [`,` *batch_size* ] [`,` *model_name* ] [`,` *prefix* ]`)` + +[!INCLUDE [syntax-conventions-note](../includes/syntax-conventions-note.md)] + +## Parameters + +|Name|Type|Required|Description| +|--|--|--|--| +|*text_col*| `string` | :heavy_check_mark:|The name of the column containing the text to embed.| +|*embeddings_col*| `string` | :heavy_check_mark:|The name of the column to store the output embeddings.| +|*batch_size*| `int` ||The number of texts to process in each batch. Default is 32.| +|*model_name*| `string` ||The name of the embedding model to use. Supported values are `jina-v2-small` (default) and `e5-small-v2`.| +|*prefix*| `string` ||The text prefix to add before each input. Default is `query:`. For E5 model, use `query:` for search queries and `passage:` for documents to be searched. This parameter is ignored for Jina model.| + +## Function definition + +You can define the function by either embedding its code as a query-defined function, or creating it as a stored function in your database, as follows: + +### [Query-defined](#tab/query-defined) + +Define the function using the following [let statement](../query/let-statement.md). No permissions are required. + +> [!IMPORTANT] +> A [let statement](../query/let-statement.md) can't run on its own. It must be followed by a [tabular expression statement](../query/tabular-expression-statements.md). To run a working example of `slm_embeddings_fl()`, see [Example](#example). + +~~~kusto +let slm_embeddings_fl = (tbl:(*), text_col:string, embeddings_col:string, batch_size:int=32, model_name:string='jina-v2-small', prefix:string='query:') +{ + let kwargs = bag_pack('text_col', text_col, 'embeddings_col', embeddings_col, 'batch_size', batch_size, 'model_name', model_name, 'prefix', prefix); + let code = ```if 1: + from sandbox_utils import Zipackage + Zipackage.install('embedding_engine.zip') +# Zipackage.install('tokenizers-0.22.1.whl') # redundant if tokenizers package is included in the Python image + + from embedding_factory import create_embedding_engine + + text_col = kargs["text_col"] + embeddings_col = kargs["embeddings_col"] + batch_size = kargs["batch_size"] + model_name = kargs["model_name"] + prefix = kargs["prefix"] + + Zipackage.install(f'{model_name}.zip') + + engine = create_embedding_engine(model_name, cache_dir="C:\\Temp") + embeddings = engine.encode(df[text_col].tolist(), batch_size=batch_size, prefix=prefix) # prefix is used only for E5 + + result = df + result[embeddings_col] = list(embeddings) + ```; + tbl + | evaluate hint.distribution=per_node python(typeof(*), code, kwargs, external_artifacts = bag_pack( + 'embedding_engine.zip', 'https://artifactswestus.z22.web.core.windows.net/models/SLM/embedding_engine.zip', +// 'tokenizers-0.22.1.whl', 'https://artifactswestus.z22.web.core.windows.net/models/SLM/tokenizers-0.22.1-cp39-abi3-win_amd64.whl', + 'jina-v2-small.zip', 'https://artifactswestus.z22.web.core.windows.net/models/SLM/jina-v2-small.zip', + 'e5-small-v2.zip', 'https://artifactswestus.z22.web.core.windows.net/models/SLM/e5-small-v2.zip')) +}; +// Write your query to use the function here. +~~~ + +### [Stored](#tab/stored) + +Define the stored function once using the following [`.create function`](../management/create-function.md). [Database User permissions](../access-control/role-based-access-control.md) are required. + +> [!IMPORTANT] +> You must run this code to create the function before you can use the function as shown in the [Example](#example). + +~~~kusto +.create-or-alter function with (folder = "Packages\\AI", docstring = "Embedding using local SLM") +slm_embeddings_fl(tbl:(*), text_col:string, embeddings_col:string, batch_size:int=32, model_name:string='jina-v2-small', prefix:string='query:') +{ + let kwargs = bag_pack('text_col', text_col, 'embeddings_col', embeddings_col, 'batch_size', batch_size, 'model_name', model_name, 'prefix', prefix); + let code = ```if 1: + from sandbox_utils import Zipackage + Zipackage.install('embedding_engine.zip') +# Zipackage.install('tokenizers-0.22.1.whl') # redundant if tokenizers package is included in the Python image + + from embedding_factory import create_embedding_engine + + text_col = kargs["text_col"] + embeddings_col = kargs["embeddings_col"] + batch_size = kargs["batch_size"] + model_name = kargs["model_name"] + prefix = kargs["prefix"] + + Zipackage.install(f'{model_name}.zip') + + engine = create_embedding_engine(model_name, cache_dir="C:\\Temp") + embeddings = engine.encode(df[text_col].tolist(), batch_size=batch_size, prefix=prefix) # prefix is used only for E5 + + result = df + result[embeddings_col] = list(embeddings) + ```; + tbl + | evaluate hint.distribution=per_node python(typeof(*), code, kwargs, external_artifacts = bag_pack( + 'embedding_engine.zip', 'https://artifactswestus.z22.web.core.windows.net/models/SLM/embedding_engine.zip', +// 'tokenizers-0.22.1.whl', 'https://artifactswestus.z22.web.core.windows.net/models/SLM/tokenizers-0.22.1-cp39-abi3-win_amd64.whl', + 'jina-v2-small.zip', 'https://artifactswestus.z22.web.core.windows.net/models/SLM/jina-v2-small.zip', + 'e5-small-v2.zip', 'https://artifactswestus.z22.web.core.windows.net/models/SLM/e5-small-v2.zip')) +} +~~~ + +--- + +## Example + +The following example uses the [invoke operator](../query/invoke-operator.md) to run the function. + +### Generate embeddings and perform semantic search + +### [Query-defined](#tab/query-defined) + +To use a query-defined function, invoke it after the embedded function definition. + +~~~kusto +let slm_embeddings_fl=(tbl:(*), text_col:string, embeddings_col:string, batch_size:int=32, model_name:string='jina-v2-small', prefix:string='query:') +{ + let kwargs = bag_pack('text_col', text_col, 'embeddings_col', embeddings_col, 'batch_size', batch_size, 'model_name', model_name, 'prefix', prefix); + let code = ```if 1: + from sandbox_utils import Zipackage + Zipackage.install('embedding_engine.zip') +# Zipackage.install('tokenizers-0.22.1.whl') # redundant if tokenizers package is included in the Python image + + from embedding_factory import create_embedding_engine + + text_col = kargs["text_col"] + embeddings_col = kargs["embeddings_col"] + batch_size = kargs["batch_size"] + model_name = kargs["model_name"] + prefix = kargs["prefix"] + + Zipackage.install(f'{model_name}.zip') + + engine = create_embedding_engine(model_name, cache_dir="C:\\Temp") + embeddings = engine.encode(df[text_col].tolist(), batch_size=batch_size, prefix=prefix) # prefix is used only for E5 + + result = df + result[embeddings_col] = list(embeddings) + ```; + tbl + | evaluate hint.distribution=per_node python(typeof(*), code, kwargs, external_artifacts = bag_pack( + 'embedding_engine.zip', 'https://artifactswestus.z22.web.core.windows.net/models/SLM/embedding_engine.zip', + 'jina-v2-small.zip', 'https://artifactswestus.z22.web.core.windows.net/models/SLM/jina-v2-small.zip', + 'e5-small-v2.zip', 'https://artifactswestus.z22.web.core.windows.net/models/SLM/e5-small-v2.zip')) +}; +// +// Create a sample dataset with text passages +let passages = datatable(text:string) +[ + "Machine learning models can process natural language efficiently.", + "Python is a versatile programming language for data science.", + "Azure Data Explorer provides fast analytics on large datasets.", + "Embeddings convert text into numerical vector representations.", + "Neural networks learn patterns from training data." +]; +// Generate embeddings for passages using 'passage:' prefix +let passage_embeddings = + passages + | extend text_embeddings=dynamic(null) + | invoke slm_embeddings_fl('text', 'text_embeddings', 32, 'e5-small-v2', 'passage:'); +// Create a search query and find similar passages +let search_query = datatable(query:string) +[ + "How do embeddings work?" +]; +search_query +| extend query_embeddings=dynamic(null) +| invoke slm_embeddings_fl('query', 'query_embeddings', 32, 'e5-small-v2', 'query:') +| extend dummy=1 +| join (passage_embeddings | extend dummy=1) on dummy +| project query, text, similarity=series_cosine_similarity(query_embeddings, text_embeddings, 1.0, 1.0) +| top 3 by similarity desc +~~~ + +### [Stored](#tab/stored) + +> [!IMPORTANT] +> For this example to run successfully, you must first run the [Function definition](#function-definition) code to store the function. + +```kusto +// Create a sample dataset with text passages +let passages = datatable(text:string) +[ + "Machine learning models can process natural language efficiently.", + "Python is a versatile programming language for data science.", + "Azure Data Explorer provides fast analytics on large datasets.", + "Embeddings convert text into numerical vector representations.", + "Neural networks learn patterns from training data." +]; +// Generate embeddings for passages using 'passage:' prefix +let passage_embeddings = + passages + | extend text_embeddings=dynamic(null) + | invoke slm_embeddings_fl('text', 'text_embeddings', 32, 'e5-small-v2', 'passage:'); +// Create a search query and find similar passages +let search_query = datatable(query:string) +[ + "How do embeddings work?" +]; +search_query +| extend query_embeddings=dynamic(null) +| invoke slm_embeddings_fl('query', 'query_embeddings', 32, 'e5-small-v2', 'query:') +| extend dummy=1 +| join (passage_embeddings | extend dummy=1) on dummy +| project query, text, similarity=series_cosine_similarity(query_embeddings, text_embeddings, 1.0, 1.0) +| top 3 by similarity desc +``` + +--- + +**Output** + +| query | text | similarity | +|---|---|---| +| How do embeddings work? | Embeddings convert text into numerical vector representations. | 0.871 | +| How do embeddings work? | Neural networks learn patterns from training data. | 0.812 | +| How do embeddings work? | Machine learning models can process natural language efficiently. | 0.782 | diff --git a/data-explorer/kusto/functions-library/slm-embeddings-fl-fabric.md b/data-explorer/kusto/functions-library/slm-embeddings-fl-fabric.md new file mode 100644 index 0000000000..acc1a6c5a6 --- /dev/null +++ b/data-explorer/kusto/functions-library/slm-embeddings-fl-fabric.md @@ -0,0 +1,233 @@ +--- +ms.topic: include +ms.date: 01/08/2026 +--- + +# slm_embeddings_fl() + +The function `slm_embeddings_fl()` is a [UDF (user-defined function)](../query/functions/user-defined-functions.md) that generates text embeddings using local Small Language Models (SLM). This function converts text into numerical vector representations that can be used for semantic search, similarity analysis, and other natural language processing tasks. +Currently the function supports [jina-v2-small](https://huggingface.co/jinaai/jina-embeddings-v2-small-en) and [e5-small-v2](https://huggingface.co/intfloat/e5-small-v2) models. + +[!INCLUDE [python-zone-pivot-fabric](../includes/python-zone-pivot-fabric.md)] + +* More for Fabric + +## Syntax + +`T | invoke slm_embeddings_fl(`*text_col*`,` *embeddings_col* [`,` *batch_size* ] [`,` *model_name* ] [`,` *prefix* ]`)` + +[!INCLUDE [syntax-conventions-note](../includes/syntax-conventions-note.md)] + +## Parameters + +|Name|Type|Required|Description| +|--|--|--|--| +|*text_col*| `string` | :heavy_check_mark:|The name of the column containing the text to embed.| +|*embeddings_col*| `string` | :heavy_check_mark:|The name of the column to store the output embeddings.| +|*batch_size*| `int` ||The number of texts to process in each batch. Default is 32.| +|*model_name*| `string` ||The name of the embedding model to use. Supported values are `jina-v2-small` (default) and `e5-small-v2`.| +|*prefix*| `string` ||The text prefix to add before each input. Default is `query:`. For E5 model, use `query:` for search queries and `passage:` for documents to be searched. This parameter is ignored for Jina model.| + +## Function definition + +You can define the function by either embedding its code as a query-defined function, or creating it as a stored function in your database, as follows: + +### [Query-defined](#tab/query-defined) + +Define the function using the following [let statement](../query/let-statement.md). No permissions are required. + +> [!IMPORTANT] +> A [let statement](../query/let-statement.md) can't run on its own. It must be followed by a [tabular expression statement](../query/tabular-expression-statements.md). To run a working example of `slm_embeddings_fl()`, see [Example](#example). + +~~~kusto +let slm_embeddings_fl = (tbl:(*), text_col:string, embeddings_col:string, batch_size:int=32, model_name:string='jina-v2-small', prefix:string='query:') +{ + let kwargs = bag_pack('text_col', text_col, 'embeddings_col', embeddings_col, 'batch_size', batch_size, 'model_name', model_name, 'prefix', prefix); + let code = ```if 1: + from sandbox_utils import Zipackage + Zipackage.install('embedding_engine.zip') +# Zipackage.install('tokenizers-0.22.1.whl') # redundant if tokenizers package is included in the Python image + + from embedding_factory import create_embedding_engine + + text_col = kargs["text_col"] + embeddings_col = kargs["embeddings_col"] + batch_size = kargs["batch_size"] + model_name = kargs["model_name"] + prefix = kargs["prefix"] + + Zipackage.install(f'{model_name}.zip') + + engine = create_embedding_engine(model_name, cache_dir="C:\\Temp") + embeddings = engine.encode(df[text_col].tolist(), batch_size=batch_size, prefix=prefix) # prefix is used only for E5 + + result = df + result[embeddings_col] = list(embeddings) + ```; + tbl + | evaluate hint.distribution=per_node python(typeof(*), code, kwargs, external_artifacts = bag_pack( + 'embedding_engine.zip', 'https://artifactswestus.z22.web.core.windows.net/models/SLM/embedding_engine.zip', +// 'tokenizers-0.22.1.whl', 'https://artifactswestus.z22.web.core.windows.net/models/SLM/tokenizers-0.22.1-cp39-abi3-win_amd64.whl', + 'jina-v2-small.zip', 'https://artifactswestus.z22.web.core.windows.net/models/SLM/jina-v2-small.zip', + 'e5-small-v2.zip', 'https://artifactswestus.z22.web.core.windows.net/models/SLM/e5-small-v2.zip')) +}; +// Write your query to use the function here. +~~~ + +### [Stored](#tab/stored) + +Define the stored function once using the following [`.create function`](../management/create-function.md). [Database User permissions](../access-control/role-based-access-control.md) are required. + +> [!IMPORTANT] +> You must run this code to create the function before you can use the function as shown in the [Example](#example). + +~~~kusto +.create-or-alter function with (folder = "Packages\\AI", docstring = "Embedding using local SLM") +slm_embeddings_fl(tbl:(*), text_col:string, embeddings_col:string, batch_size:int=32, model_name:string='jina-v2-small', prefix:string='query:') +{ + let kwargs = bag_pack('text_col', text_col, 'embeddings_col', embeddings_col, 'batch_size', batch_size, 'model_name', model_name, 'prefix', prefix); + let code = ```if 1: + from sandbox_utils import Zipackage + Zipackage.install('embedding_engine.zip') +# Zipackage.install('tokenizers-0.22.1.whl') # redundant if tokenizers package is included in the Python image + + from embedding_factory import create_embedding_engine + + text_col = kargs["text_col"] + embeddings_col = kargs["embeddings_col"] + batch_size = kargs["batch_size"] + model_name = kargs["model_name"] + prefix = kargs["prefix"] + + Zipackage.install(f'{model_name}.zip') + + engine = create_embedding_engine(model_name, cache_dir="C:\\Temp") + embeddings = engine.encode(df[text_col].tolist(), batch_size=batch_size, prefix=prefix) # prefix is used only for E5 + + result = df + result[embeddings_col] = list(embeddings) + ```; + tbl + | evaluate hint.distribution=per_node python(typeof(*), code, kwargs, external_artifacts = bag_pack( + 'embedding_engine.zip', 'https://artifactswestus.z22.web.core.windows.net/models/SLM/embedding_engine.zip', +// 'tokenizers-0.22.1.whl', 'https://artifactswestus.z22.web.core.windows.net/models/SLM/tokenizers-0.22.1-cp39-abi3-win_amd64.whl', + 'jina-v2-small.zip', 'https://artifactswestus.z22.web.core.windows.net/models/SLM/jina-v2-small.zip', + 'e5-small-v2.zip', 'https://artifactswestus.z22.web.core.windows.net/models/SLM/e5-small-v2.zip')) +} +~~~ + +--- + +## Example + +The following example uses the [invoke operator](../query/invoke-operator.md) to run the function. + +### Generate embeddings and perform semantic search + +### [Query-defined](#tab/query-defined) + +To use a query-defined function, invoke it after the embedded function definition. + +~~~kusto +let slm_embeddings_fl=(tbl:(*), text_col:string, embeddings_col:string, batch_size:int=32, model_name:string='jina-v2-small', prefix:string='query:') +{ + let kwargs = bag_pack('text_col', text_col, 'embeddings_col', embeddings_col, 'batch_size', batch_size, 'model_name', model_name, 'prefix', prefix); + let code = ```if 1: + from sandbox_utils import Zipackage + Zipackage.install('embedding_engine.zip') +# Zipackage.install('tokenizers-0.22.1.whl') # redundant if tokenizers package is included in the Python image + + from embedding_factory import create_embedding_engine + + text_col = kargs["text_col"] + embeddings_col = kargs["embeddings_col"] + batch_size = kargs["batch_size"] + model_name = kargs["model_name"] + prefix = kargs["prefix"] + + Zipackage.install(f'{model_name}.zip') + + engine = create_embedding_engine(model_name, cache_dir="C:\\Temp") + embeddings = engine.encode(df[text_col].tolist(), batch_size=batch_size, prefix=prefix) # prefix is used only for E5 + + result = df + result[embeddings_col] = list(embeddings) + ```; + tbl + | evaluate hint.distribution=per_node python(typeof(*), code, kwargs, external_artifacts = bag_pack( + 'embedding_engine.zip', 'https://artifactswestus.z22.web.core.windows.net/models/SLM/embedding_engine.zip', + 'jina-v2-small.zip', 'https://artifactswestus.z22.web.core.windows.net/models/SLM/jina-v2-small.zip', + 'e5-small-v2.zip', 'https://artifactswestus.z22.web.core.windows.net/models/SLM/e5-small-v2.zip')) +}; +// +// Create a sample dataset with text passages +let passages = datatable(text:string) +[ + "Machine learning models can process natural language efficiently.", + "Python is a versatile programming language for data science.", + "Azure Data Explorer provides fast analytics on large datasets.", + "Embeddings convert text into numerical vector representations.", + "Neural networks learn patterns from training data." +]; +// Generate embeddings for passages using 'passage:' prefix +let passage_embeddings = + passages + | extend text_embeddings=dynamic(null) + | invoke slm_embeddings_fl('text', 'text_embeddings', 32, 'e5-small-v2', 'passage:'); +// Create a search query and find similar passages +let search_query = datatable(query:string) +[ + "How do embeddings work?" +]; +search_query +| extend query_embeddings=dynamic(null) +| invoke slm_embeddings_fl('query', 'query_embeddings', 32, 'e5-small-v2', 'query:') +| extend dummy=1 +| join (passage_embeddings | extend dummy=1) on dummy +| project query, text, similarity=series_cosine_similarity(query_embeddings, text_embeddings, 1.0, 1.0) +| top 3 by similarity desc +~~~ + +### [Stored](#tab/stored) + +> [!IMPORTANT] +> For this example to run successfully, you must first run the [Function definition](#function-definition) code to store the function. + +```kusto +// Create a sample dataset with text passages +let passages = datatable(text:string) +[ + "Machine learning models can process natural language efficiently.", + "Python is a versatile programming language for data science.", + "Azure Data Explorer provides fast analytics on large datasets.", + "Embeddings convert text into numerical vector representations.", + "Neural networks learn patterns from training data." +]; +// Generate embeddings for passages using 'passage:' prefix +let passage_embeddings = + passages + | extend text_embeddings=dynamic(null) + | invoke slm_embeddings_fl('text', 'text_embeddings', 32, 'e5-small-v2', 'passage:'); +// Create a search query and find similar passages +let search_query = datatable(query:string) +[ + "How do embeddings work?" +]; +search_query +| extend query_embeddings=dynamic(null) +| invoke slm_embeddings_fl('query', 'query_embeddings', 32, 'e5-small-v2', 'query:') +| extend dummy=1 +| join (passage_embeddings | extend dummy=1) on dummy +| project query, text, similarity=series_cosine_similarity(query_embeddings, text_embeddings, 1.0, 1.0) +| top 3 by similarity desc +``` + +--- + +**Output** + +| query | text | similarity | +|---|---|---| +| How do embeddings work? | Embeddings convert text into numerical vector representations. | 0.871 | +| How do embeddings work? | Neural networks learn patterns from training data. | 0.812 | +| How do embeddings work? | Machine learning models can process natural language efficiently. | 0.782 | diff --git a/data-explorer/kusto/functions-library/slm-embeddings-fl.md b/data-explorer/kusto/functions-library/slm-embeddings-fl.md index 8691c24b64..e74bc0c61b 100644 --- a/data-explorer/kusto/functions-library/slm-embeddings-fl.md +++ b/data-explorer/kusto/functions-library/slm-embeddings-fl.md @@ -4,232 +4,19 @@ description: This article describes the slm_embeddings_fl() user-defined functi ms.reviewer: adieldar ms.topic: reference ms.date: 12/16/2025 +monikerRange: "microsoft-fabric || azure-data-explorer" --- # slm_embeddings_fl() ->[!INCLUDE [applies](../includes/applies-to-version/applies.md)] [!INCLUDE [fabric](../includes/applies-to-version/fabric.md)] [!INCLUDE [azure-data-explorer](../includes/applies-to-version/azure-data-explorer.md)] +> [!INCLUDE [applies](../includes/applies-to-version/applies.md)] [!INCLUDE [fabric](../includes/applies-to-version/fabric.md)] [!INCLUDE [azure-data-explorer](../includes/applies-to-version/azure-data-explorer.md)] -The function `slm_embeddings_fl()` is a [UDF (user-defined function)](../query/functions/user-defined-functions.md) that generates text embeddings using local Small Language Models (SLM). This function converts text into numerical vector representations that can be used for semantic search, similarity analysis, and other natural language processing tasks. -Currently the function supports [jina-v2-small](https://huggingface.co/jinaai/jina-embeddings-v2-small-en) and [e5-small-v2](https://huggingface.co/intfloat/e5-small-v2) models. +::: moniker range="azure-data-explorer" -[!INCLUDE [python-zone-pivot-fabric](../includes/python-zone-pivot-fabric.md)] +[!INCLUDE [slm-embeddings-fl-adx](../includes/slm-embeddings-fl-adx.md)] +::: moniker-end -## Syntax +::: moniker range="microsoft-fabric" -`T | invoke slm_embeddings_fl(`*text_col*`,` *embeddings_col* [`,` *batch_size* ] [`,` *model_name* ] [`,` *prefix* ]`)` +[!INCLUDE [slm-embeddings-fl-fabric](../includes/slm-embeddings-fl-fabric.md)] -[!INCLUDE [syntax-conventions-note](../includes/syntax-conventions-note.md)] - -## Parameters - -|Name|Type|Required|Description| -|--|--|--|--| -|*text_col*| `string` | :heavy_check_mark:|The name of the column containing the text to embed.| -|*embeddings_col*| `string` | :heavy_check_mark:|The name of the column to store the output embeddings.| -|*batch_size*| `int` ||The number of texts to process in each batch. Default is 32.| -|*model_name*| `string` ||The name of the embedding model to use. Supported values are `jina-v2-small` (default) and `e5-small-v2`.| -|*prefix*| `string` ||The text prefix to add before each input. Default is `query:`. For E5 model, use `query:` for search queries and `passage:` for documents to be searched. This parameter is ignored for Jina model.| - -## Function definition - -You can define the function by either embedding its code as a query-defined function, or creating it as a stored function in your database, as follows: - -### [Query-defined](#tab/query-defined) - -Define the function using the following [let statement](../query/let-statement.md). No permissions are required. - -> [!IMPORTANT] -> A [let statement](../query/let-statement.md) can't run on its own. It must be followed by a [tabular expression statement](../query/tabular-expression-statements.md). To run a working example of `slm_embeddings_fl()`, see [Example](#example). - -~~~kusto -let slm_embeddings_fl = (tbl:(*), text_col:string, embeddings_col:string, batch_size:int=32, model_name:string='jina-v2-small', prefix:string='query:') -{ - let kwargs = bag_pack('text_col', text_col, 'embeddings_col', embeddings_col, 'batch_size', batch_size, 'model_name', model_name, 'prefix', prefix); - let code = ```if 1: - from sandbox_utils import Zipackage - Zipackage.install('embedding_engine.zip') -# Zipackage.install('tokenizers-0.22.1.whl') # redundant if tokenizers package is included in the Python image - - from embedding_factory import create_embedding_engine - - text_col = kargs["text_col"] - embeddings_col = kargs["embeddings_col"] - batch_size = kargs["batch_size"] - model_name = kargs["model_name"] - prefix = kargs["prefix"] - - Zipackage.install(f'{model_name}.zip') - - engine = create_embedding_engine(model_name, cache_dir="C:\\Temp") - embeddings = engine.encode(df[text_col].tolist(), batch_size=batch_size, prefix=prefix) # prefix is used only for E5 - - result = df - result[embeddings_col] = list(embeddings) - ```; - tbl - | evaluate hint.distribution=per_node python(typeof(*), code, kwargs, external_artifacts = bag_pack( - 'embedding_engine.zip', 'https://artifactswestus.z22.web.core.windows.net/models/SLM/embedding_engine.zip', -// 'tokenizers-0.22.1.whl', 'https://artifactswestus.z22.web.core.windows.net/models/SLM/tokenizers-0.22.1-cp39-abi3-win_amd64.whl', - 'jina-v2-small.zip', 'https://artifactswestus.z22.web.core.windows.net/models/SLM/jina-v2-small.zip', - 'e5-small-v2.zip', 'https://artifactswestus.z22.web.core.windows.net/models/SLM/e5-small-v2.zip')) -}; -// Write your query to use the function here. -~~~ - -### [Stored](#tab/stored) - -Define the stored function once using the following [`.create function`](../management/create-function.md). [Database User permissions](../access-control/role-based-access-control.md) are required. - -> [!IMPORTANT] -> You must run this code to create the function before you can use the function as shown in the [Example](#example). - -~~~kusto -.create-or-alter function with (folder = "Packages\\AI", docstring = "Embedding using local SLM") -slm_embeddings_fl(tbl:(*), text_col:string, embeddings_col:string, batch_size:int=32, model_name:string='jina-v2-small', prefix:string='query:') -{ - let kwargs = bag_pack('text_col', text_col, 'embeddings_col', embeddings_col, 'batch_size', batch_size, 'model_name', model_name, 'prefix', prefix); - let code = ```if 1: - from sandbox_utils import Zipackage - Zipackage.install('embedding_engine.zip') -# Zipackage.install('tokenizers-0.22.1.whl') # redundant if tokenizers package is included in the Python image - - from embedding_factory import create_embedding_engine - - text_col = kargs["text_col"] - embeddings_col = kargs["embeddings_col"] - batch_size = kargs["batch_size"] - model_name = kargs["model_name"] - prefix = kargs["prefix"] - - Zipackage.install(f'{model_name}.zip') - - engine = create_embedding_engine(model_name, cache_dir="C:\\Temp") - embeddings = engine.encode(df[text_col].tolist(), batch_size=batch_size, prefix=prefix) # prefix is used only for E5 - - result = df - result[embeddings_col] = list(embeddings) - ```; - tbl - | evaluate hint.distribution=per_node python(typeof(*), code, kwargs, external_artifacts = bag_pack( - 'embedding_engine.zip', 'https://artifactswestus.z22.web.core.windows.net/models/SLM/embedding_engine.zip', -// 'tokenizers-0.22.1.whl', 'https://artifactswestus.z22.web.core.windows.net/models/SLM/tokenizers-0.22.1-cp39-abi3-win_amd64.whl', - 'jina-v2-small.zip', 'https://artifactswestus.z22.web.core.windows.net/models/SLM/jina-v2-small.zip', - 'e5-small-v2.zip', 'https://artifactswestus.z22.web.core.windows.net/models/SLM/e5-small-v2.zip')) -} -~~~ - ---- - -## Example - -The following example uses the [invoke operator](../query/invoke-operator.md) to run the function. - -### Generate embeddings and perform semantic search - -### [Query-defined](#tab/query-defined) - -To use a query-defined function, invoke it after the embedded function definition. - -~~~kusto -let slm_embeddings_fl=(tbl:(*), text_col:string, embeddings_col:string, batch_size:int=32, model_name:string='jina-v2-small', prefix:string='query:') -{ - let kwargs = bag_pack('text_col', text_col, 'embeddings_col', embeddings_col, 'batch_size', batch_size, 'model_name', model_name, 'prefix', prefix); - let code = ```if 1: - from sandbox_utils import Zipackage - Zipackage.install('embedding_engine.zip') -# Zipackage.install('tokenizers-0.22.1.whl') # redundant if tokenizers package is included in the Python image - - from embedding_factory import create_embedding_engine - - text_col = kargs["text_col"] - embeddings_col = kargs["embeddings_col"] - batch_size = kargs["batch_size"] - model_name = kargs["model_name"] - prefix = kargs["prefix"] - - Zipackage.install(f'{model_name}.zip') - - engine = create_embedding_engine(model_name, cache_dir="C:\\Temp") - embeddings = engine.encode(df[text_col].tolist(), batch_size=batch_size, prefix=prefix) # prefix is used only for E5 - - result = df - result[embeddings_col] = list(embeddings) - ```; - tbl - | evaluate hint.distribution=per_node python(typeof(*), code, kwargs, external_artifacts = bag_pack( - 'embedding_engine.zip', 'https://artifactswestus.z22.web.core.windows.net/models/SLM/embedding_engine.zip', - 'jina-v2-small.zip', 'https://artifactswestus.z22.web.core.windows.net/models/SLM/jina-v2-small.zip', - 'e5-small-v2.zip', 'https://artifactswestus.z22.web.core.windows.net/models/SLM/e5-small-v2.zip')) -}; -// -// Create a sample dataset with text passages -let passages = datatable(text:string) -[ - "Machine learning models can process natural language efficiently.", - "Python is a versatile programming language for data science.", - "Azure Data Explorer provides fast analytics on large datasets.", - "Embeddings convert text into numerical vector representations.", - "Neural networks learn patterns from training data." -]; -// Generate embeddings for passages using 'passage:' prefix -let passage_embeddings = - passages - | extend text_embeddings=dynamic(null) - | invoke slm_embeddings_fl('text', 'text_embeddings', 32, 'e5-small-v2', 'passage:'); -// Create a search query and find similar passages -let search_query = datatable(query:string) -[ - "How do embeddings work?" -]; -search_query -| extend query_embeddings=dynamic(null) -| invoke slm_embeddings_fl('query', 'query_embeddings', 32, 'e5-small-v2', 'query:') -| extend dummy=1 -| join (passage_embeddings | extend dummy=1) on dummy -| project query, text, similarity=series_cosine_similarity(query_embeddings, text_embeddings, 1.0, 1.0) -| top 3 by similarity desc -~~~ - -### [Stored](#tab/stored) - -> [!IMPORTANT] -> For this example to run successfully, you must first run the [Function definition](#function-definition) code to store the function. - -```kusto -// Create a sample dataset with text passages -let passages = datatable(text:string) -[ - "Machine learning models can process natural language efficiently.", - "Python is a versatile programming language for data science.", - "Azure Data Explorer provides fast analytics on large datasets.", - "Embeddings convert text into numerical vector representations.", - "Neural networks learn patterns from training data." -]; -// Generate embeddings for passages using 'passage:' prefix -let passage_embeddings = - passages - | extend text_embeddings=dynamic(null) - | invoke slm_embeddings_fl('text', 'text_embeddings', 32, 'e5-small-v2', 'passage:'); -// Create a search query and find similar passages -let search_query = datatable(query:string) -[ - "How do embeddings work?" -]; -search_query -| extend query_embeddings=dynamic(null) -| invoke slm_embeddings_fl('query', 'query_embeddings', 32, 'e5-small-v2', 'query:') -| extend dummy=1 -| join (passage_embeddings | extend dummy=1) on dummy -| project query, text, similarity=series_cosine_similarity(query_embeddings, text_embeddings, 1.0, 1.0) -| top 3 by similarity desc -``` - ---- - -**Output** - -| query | text | similarity | -|---|---|---| -| How do embeddings work? | Embeddings convert text into numerical vector representations. | 0.871 | -| How do embeddings work? | Neural networks learn patterns from training data. | 0.812 | -| How do embeddings work? | Machine learning models can process natural language efficiently. | 0.782 | +::: moniker-end From adc65eadc7318c4c3d2cb80da26965934e4c31b3 Mon Sep 17 00:00:00 2001 From: Adi Eldar Date: Thu, 8 Jan 2026 10:01:24 +0200 Subject: [PATCH 2/7] move the versions files to includes directory --- .../{functions-library => includes}/slm-embeddings-fl-adx.md | 0 .../{functions-library => includes}/slm-embeddings-fl-fabric.md | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename data-explorer/kusto/{functions-library => includes}/slm-embeddings-fl-adx.md (100%) rename data-explorer/kusto/{functions-library => includes}/slm-embeddings-fl-fabric.md (100%) diff --git a/data-explorer/kusto/functions-library/slm-embeddings-fl-adx.md b/data-explorer/kusto/includes/slm-embeddings-fl-adx.md similarity index 100% rename from data-explorer/kusto/functions-library/slm-embeddings-fl-adx.md rename to data-explorer/kusto/includes/slm-embeddings-fl-adx.md diff --git a/data-explorer/kusto/functions-library/slm-embeddings-fl-fabric.md b/data-explorer/kusto/includes/slm-embeddings-fl-fabric.md similarity index 100% rename from data-explorer/kusto/functions-library/slm-embeddings-fl-fabric.md rename to data-explorer/kusto/includes/slm-embeddings-fl-fabric.md From 7f7770d4fcf0d64935d622b0bfb3d6d4ea62a9d9 Mon Sep 17 00:00:00 2001 From: Adi Eldar Date: Thu, 8 Jan 2026 10:12:28 +0200 Subject: [PATCH 3/7] remove duplicate title headings --- data-explorer/kusto/includes/slm-embeddings-fl-adx.md | 2 -- data-explorer/kusto/includes/slm-embeddings-fl-fabric.md | 2 -- 2 files changed, 4 deletions(-) diff --git a/data-explorer/kusto/includes/slm-embeddings-fl-adx.md b/data-explorer/kusto/includes/slm-embeddings-fl-adx.md index 6a2cbee445..60c1166e6d 100644 --- a/data-explorer/kusto/includes/slm-embeddings-fl-adx.md +++ b/data-explorer/kusto/includes/slm-embeddings-fl-adx.md @@ -3,8 +3,6 @@ ms.topic: include ms.date: 01/08/2026 --- -# slm_embeddings_fl() - The function `slm_embeddings_fl()` is a [UDF (user-defined function)](../query/functions/user-defined-functions.md) that generates text embeddings using local Small Language Models (SLM). This function converts text into numerical vector representations that can be used for semantic search, similarity analysis, and other natural language processing tasks. Currently the function supports [jina-v2-small](https://huggingface.co/jinaai/jina-embeddings-v2-small-en) and [e5-small-v2](https://huggingface.co/intfloat/e5-small-v2) models. diff --git a/data-explorer/kusto/includes/slm-embeddings-fl-fabric.md b/data-explorer/kusto/includes/slm-embeddings-fl-fabric.md index acc1a6c5a6..e3a38ee2b1 100644 --- a/data-explorer/kusto/includes/slm-embeddings-fl-fabric.md +++ b/data-explorer/kusto/includes/slm-embeddings-fl-fabric.md @@ -3,8 +3,6 @@ ms.topic: include ms.date: 01/08/2026 --- -# slm_embeddings_fl() - The function `slm_embeddings_fl()` is a [UDF (user-defined function)](../query/functions/user-defined-functions.md) that generates text embeddings using local Small Language Models (SLM). This function converts text into numerical vector representations that can be used for semantic search, similarity analysis, and other natural language processing tasks. Currently the function supports [jina-v2-small](https://huggingface.co/jinaai/jina-embeddings-v2-small-en) and [e5-small-v2](https://huggingface.co/intfloat/e5-small-v2) models. From 02afe90b65ebbf367c35047c90c0c5b1cac4d195 Mon Sep 17 00:00:00 2001 From: Adi Eldar Date: Thu, 8 Jan 2026 13:14:06 +0200 Subject: [PATCH 4/7] Add prerequisites for ADX & Fabric --- .../kusto/includes/slm-embeddings-fl-adx.md | 8 +++- .../includes/slm-embeddings-fl-fabric.md | 38 ++++++++++--------- 2 files changed, 27 insertions(+), 19 deletions(-) diff --git a/data-explorer/kusto/includes/slm-embeddings-fl-adx.md b/data-explorer/kusto/includes/slm-embeddings-fl-adx.md index 60c1166e6d..9fbe7338ce 100644 --- a/data-explorer/kusto/includes/slm-embeddings-fl-adx.md +++ b/data-explorer/kusto/includes/slm-embeddings-fl-adx.md @@ -7,7 +7,13 @@ The function `slm_embeddings_fl()` is a [UDF (user-defined function)](../query/f Currently the function supports [jina-v2-small](https://huggingface.co/jinaai/jina-embeddings-v2-small-en) and [e5-small-v2](https://huggingface.co/intfloat/e5-small-v2) models. [!INCLUDE [python-zone-pivot-fabric](../includes/python-zone-pivot-fabric.md)] -* More for ADX +* Alter the cluster's [callout policy](../management/callout-policy.md) to allow access to the external artifacts (which are referenced in the KQL code below): + +```kusto +.alter-merge cluster policy callout @'[ { "CalloutType": "sandbox_artifacts", "CalloutUriRegex": "artifactswestus\\.z22\\.web\\.core\\.windows\\.net/models/SLM/","CanCall": true } ]' +``` + +Note that this change requires [AllDatabasesAdmin](../access-control/role-based-access-control.md) permissions (for more details see [Using External Artifacts](python-plugin-adx.md#using-external-artifacts)). ## Syntax diff --git a/data-explorer/kusto/includes/slm-embeddings-fl-fabric.md b/data-explorer/kusto/includes/slm-embeddings-fl-fabric.md index e3a38ee2b1..bfdc6f609e 100644 --- a/data-explorer/kusto/includes/slm-embeddings-fl-fabric.md +++ b/data-explorer/kusto/includes/slm-embeddings-fl-fabric.md @@ -7,8 +7,7 @@ The function `slm_embeddings_fl()` is a [UDF (user-defined function)](../query/f Currently the function supports [jina-v2-small](https://huggingface.co/jinaai/jina-embeddings-v2-small-en) and [e5-small-v2](https://huggingface.co/intfloat/e5-small-v2) models. [!INCLUDE [python-zone-pivot-fabric](../includes/python-zone-pivot-fabric.md)] - -* More for Fabric +* Create a lakehouse to host the the external artifacts (which are referenced in the KQL code below), preferably in the same workspace as your eventhouse. ## Syntax @@ -28,7 +27,9 @@ Currently the function supports [jina-v2-small](https://huggingface.co/jinaai/ji ## Function definition -You can define the function by either embedding its code as a query-defined function, or creating it as a stored function in your database, as follows: +* Download the four artifacts in the KQL code below (at the end of the code block see the external_artifacts parameter that reference artifacts in https://artifactswestus.z22.web.core.windows.net/models/SLM container) and upload them to your lakehouse. +* In the KQL code below update the artifacts paths to their one lake paths (e.g. https://msit-onelake.dfs.fabric.microsoft.com/MY_WORKSPACE/MY_LAKEHOUSE.Lakehouse/Files/models/SLM/embedding_engine.zip). +* You can define the function by either embedding its code as a query-defined function, or creating it as a stored function in your database, as follows: ### [Query-defined](#tab/query-defined) @@ -44,7 +45,7 @@ let slm_embeddings_fl = (tbl:(*), text_col:string, embeddings_col:string, batch_ let code = ```if 1: from sandbox_utils import Zipackage Zipackage.install('embedding_engine.zip') -# Zipackage.install('tokenizers-0.22.1.whl') # redundant if tokenizers package is included in the Python image + Zipackage.install('tokenizers-0.22.1.whl') from embedding_factory import create_embedding_engine @@ -64,10 +65,10 @@ let slm_embeddings_fl = (tbl:(*), text_col:string, embeddings_col:string, batch_ ```; tbl | evaluate hint.distribution=per_node python(typeof(*), code, kwargs, external_artifacts = bag_pack( - 'embedding_engine.zip', 'https://artifactswestus.z22.web.core.windows.net/models/SLM/embedding_engine.zip', -// 'tokenizers-0.22.1.whl', 'https://artifactswestus.z22.web.core.windows.net/models/SLM/tokenizers-0.22.1-cp39-abi3-win_amd64.whl', - 'jina-v2-small.zip', 'https://artifactswestus.z22.web.core.windows.net/models/SLM/jina-v2-small.zip', - 'e5-small-v2.zip', 'https://artifactswestus.z22.web.core.windows.net/models/SLM/e5-small-v2.zip')) + 'embedding_engine.zip', 'https://artifactswestus.z22.web.core.windows.net/models/SLM/embedding_engine.zip;impersonate', + 'tokenizers-0.22.1.whl', 'https://artifactswestus.z22.web.core.windows.net/models/SLM/tokenizers-0.22.1-cp39-abi3-win_amd64.whl;impersonate', + 'jina-v2-small.zip', 'https://artifactswestus.z22.web.core.windows.net/models/SLM/jina-v2-small.zip;impersonate', + 'e5-small-v2.zip', 'https://artifactswestus.z22.web.core.windows.net/models/SLM/e5-small-v2.zip;impersonate')) }; // Write your query to use the function here. ~~~ @@ -87,7 +88,7 @@ slm_embeddings_fl(tbl:(*), text_col:string, embeddings_col:string, batch_size:in let code = ```if 1: from sandbox_utils import Zipackage Zipackage.install('embedding_engine.zip') -# Zipackage.install('tokenizers-0.22.1.whl') # redundant if tokenizers package is included in the Python image + Zipackage.install('tokenizers-0.22.1.whl') from embedding_factory import create_embedding_engine @@ -107,10 +108,10 @@ slm_embeddings_fl(tbl:(*), text_col:string, embeddings_col:string, batch_size:in ```; tbl | evaluate hint.distribution=per_node python(typeof(*), code, kwargs, external_artifacts = bag_pack( - 'embedding_engine.zip', 'https://artifactswestus.z22.web.core.windows.net/models/SLM/embedding_engine.zip', -// 'tokenizers-0.22.1.whl', 'https://artifactswestus.z22.web.core.windows.net/models/SLM/tokenizers-0.22.1-cp39-abi3-win_amd64.whl', - 'jina-v2-small.zip', 'https://artifactswestus.z22.web.core.windows.net/models/SLM/jina-v2-small.zip', - 'e5-small-v2.zip', 'https://artifactswestus.z22.web.core.windows.net/models/SLM/e5-small-v2.zip')) + 'embedding_engine.zip', 'https://artifactswestus.z22.web.core.windows.net/models/SLM/embedding_engine.zip;impersonate', + 'tokenizers-0.22.1.whl', 'https://artifactswestus.z22.web.core.windows.net/models/SLM/tokenizers-0.22.1-cp39-abi3-win_amd64.whl;impersonate', + 'jina-v2-small.zip', 'https://artifactswestus.z22.web.core.windows.net/models/SLM/jina-v2-small.zip;impersonate', + 'e5-small-v2.zip', 'https://artifactswestus.z22.web.core.windows.net/models/SLM/e5-small-v2.zip;impersonate')) } ~~~ @@ -127,13 +128,13 @@ The following example uses the [invoke operator](../query/invoke-operator.md) to To use a query-defined function, invoke it after the embedded function definition. ~~~kusto -let slm_embeddings_fl=(tbl:(*), text_col:string, embeddings_col:string, batch_size:int=32, model_name:string='jina-v2-small', prefix:string='query:') +let slm_embeddings_fl = (tbl:(*), text_col:string, embeddings_col:string, batch_size:int=32, model_name:string='jina-v2-small', prefix:string='query:') { let kwargs = bag_pack('text_col', text_col, 'embeddings_col', embeddings_col, 'batch_size', batch_size, 'model_name', model_name, 'prefix', prefix); let code = ```if 1: from sandbox_utils import Zipackage Zipackage.install('embedding_engine.zip') -# Zipackage.install('tokenizers-0.22.1.whl') # redundant if tokenizers package is included in the Python image + Zipackage.install('tokenizers-0.22.1.whl') from embedding_factory import create_embedding_engine @@ -153,9 +154,10 @@ let slm_embeddings_fl=(tbl:(*), text_col:string, embeddings_col:string, batch_si ```; tbl | evaluate hint.distribution=per_node python(typeof(*), code, kwargs, external_artifacts = bag_pack( - 'embedding_engine.zip', 'https://artifactswestus.z22.web.core.windows.net/models/SLM/embedding_engine.zip', - 'jina-v2-small.zip', 'https://artifactswestus.z22.web.core.windows.net/models/SLM/jina-v2-small.zip', - 'e5-small-v2.zip', 'https://artifactswestus.z22.web.core.windows.net/models/SLM/e5-small-v2.zip')) + 'embedding_engine.zip', 'https://artifactswestus.z22.web.core.windows.net/models/SLM/embedding_engine.zip;impersonate', + 'tokenizers-0.22.1.whl', 'https://artifactswestus.z22.web.core.windows.net/models/SLM/tokenizers-0.22.1-cp39-abi3-win_amd64.whl;impersonate', + 'jina-v2-small.zip', 'https://artifactswestus.z22.web.core.windows.net/models/SLM/jina-v2-small.zip;impersonate', + 'e5-small-v2.zip', 'https://artifactswestus.z22.web.core.windows.net/models/SLM/e5-small-v2.zip;impersonate')) }; // // Create a sample dataset with text passages From 4156f47cd9fb5b9bebc4103f6f9dad1f354ce524 Mon Sep 17 00:00:00 2001 From: Adi Eldar Date: Thu, 8 Jan 2026 13:59:35 +0200 Subject: [PATCH 5/7] minor fixes --- .../kusto/includes/slm-embeddings-fl-adx.md | 19 ++++++++++--------- .../includes/slm-embeddings-fl-fabric.md | 2 +- 2 files changed, 11 insertions(+), 10 deletions(-) diff --git a/data-explorer/kusto/includes/slm-embeddings-fl-adx.md b/data-explorer/kusto/includes/slm-embeddings-fl-adx.md index 9fbe7338ce..29e4c476ba 100644 --- a/data-explorer/kusto/includes/slm-embeddings-fl-adx.md +++ b/data-explorer/kusto/includes/slm-embeddings-fl-adx.md @@ -13,7 +13,7 @@ Currently the function supports [jina-v2-small](https://huggingface.co/jinaai/ji .alter-merge cluster policy callout @'[ { "CalloutType": "sandbox_artifacts", "CalloutUriRegex": "artifactswestus\\.z22\\.web\\.core\\.windows\\.net/models/SLM/","CanCall": true } ]' ``` -Note that this change requires [AllDatabasesAdmin](../access-control/role-based-access-control.md) permissions (for more details see [Using External Artifacts](python-plugin-adx.md#using-external-artifacts)). +Note that this change requires [AllDatabasesAdmin](../access-control/role-based-access-control.md) permissions. ## Syntax @@ -49,7 +49,7 @@ let slm_embeddings_fl = (tbl:(*), text_col:string, embeddings_col:string, batch_ let code = ```if 1: from sandbox_utils import Zipackage Zipackage.install('embedding_engine.zip') -# Zipackage.install('tokenizers-0.22.1.whl') # redundant if tokenizers package is included in the Python image + Zipackage.install('tokenizers-0.22.1.whl') # redundant if tokenizers package is included in the Python image from embedding_factory import create_embedding_engine @@ -70,7 +70,7 @@ let slm_embeddings_fl = (tbl:(*), text_col:string, embeddings_col:string, batch_ tbl | evaluate hint.distribution=per_node python(typeof(*), code, kwargs, external_artifacts = bag_pack( 'embedding_engine.zip', 'https://artifactswestus.z22.web.core.windows.net/models/SLM/embedding_engine.zip', -// 'tokenizers-0.22.1.whl', 'https://artifactswestus.z22.web.core.windows.net/models/SLM/tokenizers-0.22.1-cp39-abi3-win_amd64.whl', + 'tokenizers-0.22.1.whl', 'https://artifactswestus.z22.web.core.windows.net/models/SLM/tokenizers-0.22.1-cp39-abi3-win_amd64.whl', 'jina-v2-small.zip', 'https://artifactswestus.z22.web.core.windows.net/models/SLM/jina-v2-small.zip', 'e5-small-v2.zip', 'https://artifactswestus.z22.web.core.windows.net/models/SLM/e5-small-v2.zip')) }; @@ -92,7 +92,7 @@ slm_embeddings_fl(tbl:(*), text_col:string, embeddings_col:string, batch_size:in let code = ```if 1: from sandbox_utils import Zipackage Zipackage.install('embedding_engine.zip') -# Zipackage.install('tokenizers-0.22.1.whl') # redundant if tokenizers package is included in the Python image + Zipackage.install('tokenizers-0.22.1.whl') # redundant if tokenizers package is included in the Python image from embedding_factory import create_embedding_engine @@ -113,7 +113,7 @@ slm_embeddings_fl(tbl:(*), text_col:string, embeddings_col:string, batch_size:in tbl | evaluate hint.distribution=per_node python(typeof(*), code, kwargs, external_artifacts = bag_pack( 'embedding_engine.zip', 'https://artifactswestus.z22.web.core.windows.net/models/SLM/embedding_engine.zip', -// 'tokenizers-0.22.1.whl', 'https://artifactswestus.z22.web.core.windows.net/models/SLM/tokenizers-0.22.1-cp39-abi3-win_amd64.whl', + 'tokenizers-0.22.1.whl', 'https://artifactswestus.z22.web.core.windows.net/models/SLM/tokenizers-0.22.1-cp39-abi3-win_amd64.whl', 'jina-v2-small.zip', 'https://artifactswestus.z22.web.core.windows.net/models/SLM/jina-v2-small.zip', 'e5-small-v2.zip', 'https://artifactswestus.z22.web.core.windows.net/models/SLM/e5-small-v2.zip')) } @@ -138,7 +138,7 @@ let slm_embeddings_fl=(tbl:(*), text_col:string, embeddings_col:string, batch_si let code = ```if 1: from sandbox_utils import Zipackage Zipackage.install('embedding_engine.zip') -# Zipackage.install('tokenizers-0.22.1.whl') # redundant if tokenizers package is included in the Python image + Zipackage.install('tokenizers-0.22.1.whl') # redundant if tokenizers package is included in the Python image from embedding_factory import create_embedding_engine @@ -158,9 +158,10 @@ let slm_embeddings_fl=(tbl:(*), text_col:string, embeddings_col:string, batch_si ```; tbl | evaluate hint.distribution=per_node python(typeof(*), code, kwargs, external_artifacts = bag_pack( - 'embedding_engine.zip', 'https://artifactswestus.z22.web.core.windows.net/models/SLM/embedding_engine.zip', - 'jina-v2-small.zip', 'https://artifactswestus.z22.web.core.windows.net/models/SLM/jina-v2-small.zip', - 'e5-small-v2.zip', 'https://artifactswestus.z22.web.core.windows.net/models/SLM/e5-small-v2.zip')) + 'embedding_engine.zip', 'https://artifactswestus.z22.web.core.windows.net/models/SLM/embedding_engine.zip', + 'tokenizers-0.22.1.whl', 'https://artifactswestus.z22.web.core.windows.net/models/SLM/tokenizers-0.22.1-cp39-abi3-win_amd64.whl', + 'jina-v2-small.zip', 'https://artifactswestus.z22.web.core.windows.net/models/SLM/jina-v2-small.zip', + 'e5-small-v2.zip', 'https://artifactswestus.z22.web.core.windows.net/models/SLM/e5-small-v2.zip')) }; // // Create a sample dataset with text passages diff --git a/data-explorer/kusto/includes/slm-embeddings-fl-fabric.md b/data-explorer/kusto/includes/slm-embeddings-fl-fabric.md index bfdc6f609e..5edd0e7f36 100644 --- a/data-explorer/kusto/includes/slm-embeddings-fl-fabric.md +++ b/data-explorer/kusto/includes/slm-embeddings-fl-fabric.md @@ -27,7 +27,7 @@ Currently the function supports [jina-v2-small](https://huggingface.co/jinaai/ji ## Function definition -* Download the four artifacts in the KQL code below (at the end of the code block see the external_artifacts parameter that reference artifacts in https://artifactswestus.z22.web.core.windows.net/models/SLM container) and upload them to your lakehouse. +* Download the four artifacts in the KQL code below (at the end of the code block see the external_artifacts parameter that reference artifacts, e.g https://artifactswestus.z22.web.core.windows.net/models/SLM/embedding_engine.zip) and upload them to your lakehouse. * In the KQL code below update the artifacts paths to their one lake paths (e.g. https://msit-onelake.dfs.fabric.microsoft.com/MY_WORKSPACE/MY_LAKEHOUSE.Lakehouse/Files/models/SLM/embedding_engine.zip). * You can define the function by either embedding its code as a query-defined function, or creating it as a stored function in your database, as follows: From 4146b206b179d518029d6b54d5b352db7aa0f690 Mon Sep 17 00:00:00 2001 From: Adi Eldar Date: Thu, 8 Jan 2026 14:08:45 +0200 Subject: [PATCH 6/7] typo --- data-explorer/kusto/includes/slm-embeddings-fl-fabric.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/data-explorer/kusto/includes/slm-embeddings-fl-fabric.md b/data-explorer/kusto/includes/slm-embeddings-fl-fabric.md index 5edd0e7f36..f162a5d011 100644 --- a/data-explorer/kusto/includes/slm-embeddings-fl-fabric.md +++ b/data-explorer/kusto/includes/slm-embeddings-fl-fabric.md @@ -7,7 +7,7 @@ The function `slm_embeddings_fl()` is a [UDF (user-defined function)](../query/f Currently the function supports [jina-v2-small](https://huggingface.co/jinaai/jina-embeddings-v2-small-en) and [e5-small-v2](https://huggingface.co/intfloat/e5-small-v2) models. [!INCLUDE [python-zone-pivot-fabric](../includes/python-zone-pivot-fabric.md)] -* Create a lakehouse to host the the external artifacts (which are referenced in the KQL code below), preferably in the same workspace as your eventhouse. +* Create a lakehouse to host the external artifacts (which are referenced in the KQL code below), preferably in the same workspace as your eventhouse. ## Syntax From 1968029209b0fe9a2c6f9db60b8c46e7a8fe4d65 Mon Sep 17 00:00:00 2001 From: ktalmor <193799742+ktalmor@users.noreply.github.com> Date: Thu, 8 Jan 2026 14:08:57 +0200 Subject: [PATCH 7/7] Added download location + Acrlinx check --- data-explorer/sisense.md | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/data-explorer/sisense.md b/data-explorer/sisense.md index d1754061df..192a9df9c3 100644 --- a/data-explorer/sisense.md +++ b/data-explorer/sisense.md @@ -3,7 +3,7 @@ title: Visualize data from Azure Data Explorer using Sisense description: In this article, learn how to set up Azure Data Explorer as a data source for Sisense, and visualize the data. ms.reviewer: orspodek ms.topic: how-to -ms.date: 07/12/2023 +ms.date: 01/08/2026 ms.custom: - has-adal-ref - sfi-image-nochange @@ -22,9 +22,11 @@ You need the following to complete this article: [!INCLUDE [data-explorer-storm-events](includes/data-explorer-storm-events.md)] -## Connect to Sisense dashboards using Azure Data Explorer JDBC connector +## Connect to Sisense dashboards -1. Download and copy the latest versions of the following jar files to *..\Sisense\DataConnectors\jdbcdrivers\adx* +Use the Azure Data Explorer JDBC connector to connect to Sisense. + +1. Download and copy the latest versions of the following jar files to *..\Sisense\DataConnectors\jdbcdrivers\adx* from a site such as [Maven Repository](https://mvnrepository.com/) or [Microsoft's GitHub repository]. * activation-1.1.jar * adal4j-1.6.0.jar @@ -41,8 +43,9 @@ You need the following to complete this article: * oauth2-oidc-sdk-5.24.1.jar * slf4j-api-1.7.21.jar -1. Open **Sisense** app. -1. Select **Data** tab and select **+ElastiCube** to create a new ElastiCube model. +1. Open the **Sisense** application. + +1. Select the **Data** tab and select **+ElastiCube** to create a new ElastiCube model. ![Select ElastiCube.](media/sisense/data-select-elasticube.png) @@ -70,7 +73,7 @@ You need the following to complete this article: |User Name | Microsoft Entra user name | |Password | Microsoft Entra user password | -1. In the **Select Data** tab, search **Select Database** to select the relevant database to which you have permissions. In this example, select *test1*. +1. In the **Select Data** tab, search **Select Database** and select the relevant database to which you have permissions. In this example, select *test1*. ![select database.](media/sisense/select-database.png) @@ -85,11 +88,11 @@ You need the following to complete this article: * In the **Build** window, select **Build**. - ![Build window.](media/sisense/build-window.png) + ![Build window.](media/sisense/build-window.png) * Wait until build process is complete and then select **Build Succeeded**. - ![Build succeeded.](media/sisense/build-succeeded.png) + ![Build succeeded.](media/sisense/build-succeeded.png) ## Create Sisense dashboards @@ -105,7 +108,7 @@ You need the following to complete this article: ![Add fields to StormEvents dashboard.](media/sisense/storm-dashboard-add-field.png) -1. Select **+ Add More Data** to add additional columns to your graph. +1. Select **+ Add More Data** to add extra columns to your graph. ![Add more data to graph.](media/sisense/add-more-data.png) @@ -113,7 +116,7 @@ You need the following to complete this article: ![Storm dashboard.](media/sisense/final-dashboard.png) -You can now explore your data with visual analytics, build additional dashboards, and +You can now explore your data with visual analytics, build more dashboards, and transform data into actionable insights to make an impact on your business. ## Related content