From 112e4185cd1af6a459a13eedc16500e94d357040 Mon Sep 17 00:00:00 2001 From: Aryan Sharma Date: Wed, 14 Jan 2026 12:32:12 +0100 Subject: [PATCH 1/3] docs: add evals api ref to api ref --- api-reference/evaluators/overview.mdx | 61 +++++++++++++++++++++++++++ docs.json | 51 +++++++++++++++++++--- llm-evaluation/list.mdx | 1 - 3 files changed, 107 insertions(+), 6 deletions(-) create mode 100644 api-reference/evaluators/overview.mdx diff --git a/api-reference/evaluators/overview.mdx b/api-reference/evaluators/overview.mdx new file mode 100644 index 0000000..801a271 --- /dev/null +++ b/api-reference/evaluators/overview.mdx @@ -0,0 +1,61 @@ +--- +title: 'Overview' +description: 'Browse all available evaluators in LangWatch to find the right scoring method for your AI agent evaluation use case.' +--- + +LangWatch offers an extensive library of evaluators to help you evaluate the quality and guarantee the safety of your LLM apps. + +While here you can find a reference list, to get the execution code you can use the [Evaluation Wizard](https://app.langwatch.ai/@project/evaluations) on LangWatch platform. + +## Authentication + +To make a call to the Evaluators API, you will need to pass through your LangWatch API key in the header as `x-api-key`. Your API key can be found on the setup page under settings. + +## Base URL + +All evaluator endpoints are available at: +``` +https://app.langwatch.ai/api/evaluations +``` + +## Evaluators List + +import EvaluatorsList from "/snippets/evaluators-list.mdx" + + + +## Running Evaluations + +Set up your first evaluation using the [Evaluation Wizard](https://app.langwatch.ai/@project/evaluations): + + + + + + + +## Instrumenting Custom Evaluator + +If you have a custom evaluator built in-house, you can follow the guide below to integrate. + + + + + +## Common Request Format + +All evaluator endpoints follow a similar pattern: + +``` +POST /api/evaluations/{evaluator_path}/evaluate +``` + +Each evaluator accepts specific input parameters and settings. Refer to the individual evaluator documentation pages for detailed request/response schemas and examples. + +## Response Format + +Successful evaluations return an array of evaluation results with scores, details, and metadata specific to each evaluator type. diff --git a/docs.json b/docs.json index 45e08ce..0b14b6d 100644 --- a/docs.json +++ b/docs.json @@ -116,11 +116,7 @@ { "group": "Built-in Evaluators", "pages": [ - "llm-evaluation/list", - { - "group": "API Docs", - "openapi": "/api-reference/openapi-evals.json" - } + "llm-evaluation/list" ] }, { @@ -491,6 +487,51 @@ "api-reference/scenarios/overview", "api-reference/scenarios/create-event" ] + }, + { + "group": "Evaluators", + "pages": [ + "api-reference/evaluators/overview", + "api-reference/evaluators/exact-match-evaluator", + "api-reference/evaluators/llm-answer-match", + "api-reference/evaluators/bleu-score", + "api-reference/evaluators/llm-factual-match", + "api-reference/evaluators/rouge-score", + "api-reference/evaluators/sql-query-equivalence", + "api-reference/evaluators/llm-as-a-judge-boolean-evaluator", + "api-reference/evaluators/llm-as-a-judge-category-evaluator", + "api-reference/evaluators/llm-as-a-judge-score-evaluator", + "api-reference/evaluators/rubrics-based-scoring", + "api-reference/evaluators/ragas-answer-correctness", + "api-reference/evaluators/ragas-answer-relevancy", + "api-reference/evaluators/ragas-context-precision", + "api-reference/evaluators/ragas-context-recall", + "api-reference/evaluators/ragas-context-relevancy", + "api-reference/evaluators/ragas-context-utilization", + "api-reference/evaluators/ragas-faithfulness", + "api-reference/evaluators/ragas-faithfulness-1", + "api-reference/evaluators/ragas-response-context-precision", + "api-reference/evaluators/ragas-response-context-recall", + "api-reference/evaluators/ragas-response-relevancy", + "api-reference/evaluators/context-f1", + "api-reference/evaluators/context-precision", + "api-reference/evaluators/context-recall", + "api-reference/evaluators/azure-content-safety", + "api-reference/evaluators/azure-jailbreak-detection", + "api-reference/evaluators/azure-prompt-shield", + "api-reference/evaluators/openai-moderation", + "api-reference/evaluators/presidio-pii-detection", + "api-reference/evaluators/custom-basic-evaluator", + "api-reference/evaluators/competitor-blocklist", + "api-reference/evaluators/competitor-allowlist-check", + "api-reference/evaluators/competitor-llm-check", + "api-reference/evaluators/off-topic-evaluator", + "api-reference/evaluators/query-resolution", + "api-reference/evaluators/semantic-similarity-evaluator", + "api-reference/evaluators/summarization-score", + "api-reference/evaluators/valid-format-evaluator", + "api-reference/evaluators/lingua-language-detection" + ] } ] } diff --git a/llm-evaluation/list.mdx b/llm-evaluation/list.mdx index 2933294..e93fa76 100644 --- a/llm-evaluation/list.mdx +++ b/llm-evaluation/list.mdx @@ -7,7 +7,6 @@ LangWatch offers an extensive library of evaluators to help you evaluate the qua While here you can find a reference list, to get the execution code you can use the [Evaluation Wizard](https://app.langwatch.ai/@project/evaluations) on LangWatch platform. - ## Evaluators List import EvaluatorsList from "/snippets/evaluators-list.mdx" From e42df7b541d071c082d8f2d380641db3a7389ebd Mon Sep 17 00:00:00 2001 From: Aryan Sharma Date: Thu, 15 Jan 2026 05:50:16 +0100 Subject: [PATCH 2/3] docs: proper linking to api-ref --- api-reference/evaluators/overview.mdx | 11 +++++------ llm-evaluation/list.mdx | 4 ++++ 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/api-reference/evaluators/overview.mdx b/api-reference/evaluators/overview.mdx index 801a271..56f3bee 100644 --- a/api-reference/evaluators/overview.mdx +++ b/api-reference/evaluators/overview.mdx @@ -3,20 +3,19 @@ title: 'Overview' description: 'Browse all available evaluators in LangWatch to find the right scoring method for your AI agent evaluation use case.' --- +## Intro + LangWatch offers an extensive library of evaluators to help you evaluate the quality and guarantee the safety of your LLM apps. While here you can find a reference list, to get the execution code you can use the [Evaluation Wizard](https://app.langwatch.ai/@project/evaluations) on LangWatch platform. ## Authentication -To make a call to the Evaluators API, you will need to pass through your LangWatch API key in the header as `x-api-key`. Your API key can be found on the setup page under settings. +To make a call to the Evaluators API, you will need to pass through your LangWatch API key in the header as `X-Auth-Token`. Your API key can be found on the setup page under settings. -## Base URL +#### Allowed Methods -All evaluator endpoints are available at: -``` -https://app.langwatch.ai/api/evaluations -``` +- `POST /api/evaluations/{evaluator}/evaluate` - Run an evaluation using a specific evaluator ## Evaluators List diff --git a/llm-evaluation/list.mdx b/llm-evaluation/list.mdx index e93fa76..3abd002 100644 --- a/llm-evaluation/list.mdx +++ b/llm-evaluation/list.mdx @@ -7,6 +7,10 @@ LangWatch offers an extensive library of evaluators to help you evaluate the qua While here you can find a reference list, to get the execution code you can use the [Evaluation Wizard](https://app.langwatch.ai/@project/evaluations) on LangWatch platform. + + Full API documentation for running evaluations programmatically. + + ## Evaluators List import EvaluatorsList from "/snippets/evaluators-list.mdx" From e4ea2b34cb96b1dc43feace34c676652dd16af25 Mon Sep 17 00:00:00 2001 From: Aryan Sharma Date: Thu, 15 Jan 2026 11:50:13 +0100 Subject: [PATCH 3/3] fix: broken-links --- llm-evaluation/offline/code/evaluation-api.mdx | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/llm-evaluation/offline/code/evaluation-api.mdx b/llm-evaluation/offline/code/evaluation-api.mdx index bc536cd..c74e604 100644 --- a/llm-evaluation/offline/code/evaluation-api.mdx +++ b/llm-evaluation/offline/code/evaluation-api.mdx @@ -141,10 +141,12 @@ await evaluation.run(dataset, async ({ item, index, span }) => { ``` The callback receives: + - `item` - The current dataset item - `index` - The current index in the dataset - `span` - An OpenTelemetry span for custom tracing - + + ### Metrics logging