From 112e4185cd1af6a459a13eedc16500e94d357040 Mon Sep 17 00:00:00 2001
From: Aryan Sharma <aryansharma2k2@gmail.com>
Date: Wed, 14 Jan 2026 12:32:12 +0100
Subject: [PATCH 1/3] docs: add evals api ref to api ref

---
 api-reference/evaluators/overview.mdx | 61 +++++++++++++++++++++++++++
 docs.json                             | 51 +++++++++++++++++++---
 llm-evaluation/list.mdx               |  1 -
 3 files changed, 107 insertions(+), 6 deletions(-)
 create mode 100644 api-reference/evaluators/overview.mdx
diff --git a/api-reference/evaluators/overview.mdx b/api-reference/evaluators/overview.mdx
new file mode 100644
index 0000000..801a271
--- /dev/null
+++ b/api-reference/evaluators/overview.mdx
@@ -0,0 +1,61 @@
+---
+title: 'Overview'
+description: 'Browse all available evaluators in LangWatch to find the right scoring method for your AI agent evaluation use case.'
+---
+
+LangWatch offers an extensive library of evaluators to help you evaluate the quality and guarantee the safety of your LLM apps.
+
+While here you can find a reference list, to get the execution code you can use the [Evaluation Wizard](https://app.langwatch.ai/@project/evaluations) on LangWatch platform.
+
+## Authentication
+
+To make a call to the Evaluators API, you will need to pass through your LangWatch API key in the header as `x-api-key`. Your API key can be found on the setup page under settings.
+
+## Base URL
+
+All evaluator endpoints are available at:
+```
+https://app.langwatch.ai/api/evaluations
+```
+
+## Evaluators List
+
+import EvaluatorsList from "/snippets/evaluators-list.mdx"
+
+<EvaluatorsList />
+
+## Running Evaluations
+
+Set up your first evaluation using the [Evaluation Wizard](https://app.langwatch.ai/@project/evaluations):
+
+<a href="https://app.langwatch.ai/@project/evaluations" target="_blank">
+<Frame>
+<img src="/images/offline-evaluation/Screenshot_2025-04-17_at_16.53.38.png" alt="" style={{ maxWidth: '400px' }} noZoom />
+</Frame>
+</a>
+
+## Instrumenting Custom Evaluator
+
+If you have a custom evaluator built in-house, you can follow the guide below to integrate.
+
+<CardGroup cols={1}>
+  <Card
+    title="Instrumenting Custom Evaluator"
+    icon="link"
+    href="/evaluations/custom-evaluator-integration"
+  />
+</CardGroup>
+
+## Common Request Format
+
+All evaluator endpoints follow a similar pattern:
+
+```
+POST /api/evaluations/{evaluator_path}/evaluate
+```
+
+Each evaluator accepts specific input parameters and settings. Refer to the individual evaluator documentation pages for detailed request/response schemas and examples.
+
+## Response Format
+
+Successful evaluations return an array of evaluation results with scores, details, and metadata specific to each evaluator type.
diff --git a/docs.json b/docs.json
index 45e08ce..0b14b6d 100644
--- a/docs.json
+++ b/docs.json
@@ -116,11 +116,7 @@
               {
                 "group": "Built-in Evaluators",
                 "pages": [
-                  "llm-evaluation/list",
-                  {
-                    "group": "API Docs",
-                    "openapi": "/api-reference/openapi-evals.json"
-                  }
+              "llm-evaluation/list"
                 ]
               },
               {
@@ -491,6 +487,51 @@
               "api-reference/scenarios/overview",
               "api-reference/scenarios/create-event"
             ]
+          },
+          {
+            "group": "Evaluators",
+            "pages": [
+              "api-reference/evaluators/overview",
+              "api-reference/evaluators/exact-match-evaluator",
+              "api-reference/evaluators/llm-answer-match",
+              "api-reference/evaluators/bleu-score",
+              "api-reference/evaluators/llm-factual-match",
+              "api-reference/evaluators/rouge-score",
+              "api-reference/evaluators/sql-query-equivalence",
+              "api-reference/evaluators/llm-as-a-judge-boolean-evaluator",
+              "api-reference/evaluators/llm-as-a-judge-category-evaluator",
+              "api-reference/evaluators/llm-as-a-judge-score-evaluator",
+              "api-reference/evaluators/rubrics-based-scoring",
+              "api-reference/evaluators/ragas-answer-correctness",
+              "api-reference/evaluators/ragas-answer-relevancy",
+              "api-reference/evaluators/ragas-context-precision",
+              "api-reference/evaluators/ragas-context-recall",
+              "api-reference/evaluators/ragas-context-relevancy",
+              "api-reference/evaluators/ragas-context-utilization",
+              "api-reference/evaluators/ragas-faithfulness",
+              "api-reference/evaluators/ragas-faithfulness-1",
+              "api-reference/evaluators/ragas-response-context-precision",
+              "api-reference/evaluators/ragas-response-context-recall",
+              "api-reference/evaluators/ragas-response-relevancy",
+              "api-reference/evaluators/context-f1",
+              "api-reference/evaluators/context-precision",
+              "api-reference/evaluators/context-recall",
+              "api-reference/evaluators/azure-content-safety",
+              "api-reference/evaluators/azure-jailbreak-detection",
+              "api-reference/evaluators/azure-prompt-shield",
+              "api-reference/evaluators/openai-moderation",
+              "api-reference/evaluators/presidio-pii-detection",
+              "api-reference/evaluators/custom-basic-evaluator",
+              "api-reference/evaluators/competitor-blocklist",
+              "api-reference/evaluators/competitor-allowlist-check",
+              "api-reference/evaluators/competitor-llm-check",
+              "api-reference/evaluators/off-topic-evaluator",
+              "api-reference/evaluators/query-resolution",
+              "api-reference/evaluators/semantic-similarity-evaluator",
+              "api-reference/evaluators/summarization-score",
+              "api-reference/evaluators/valid-format-evaluator",
+              "api-reference/evaluators/lingua-language-detection"
+            ]
           }
         ]
       }
diff --git a/llm-evaluation/list.mdx b/llm-evaluation/list.mdx
index 2933294..e93fa76 100644
--- a/llm-evaluation/list.mdx
+++ b/llm-evaluation/list.mdx
@@ -7,7 +7,6 @@ LangWatch offers an extensive library of evaluators to help you evaluate the qua
 
 While here you can find a reference list, to get the execution code you can use the [Evaluation Wizard](https://app.langwatch.ai/@project/evaluations) on LangWatch platform.
 
-
 ## Evaluators List
 
 import EvaluatorsList from "/snippets/evaluators-list.mdx"

From e42df7b541d071c082d8f2d380641db3a7389ebd Mon Sep 17 00:00:00 2001
From: Aryan Sharma <aryansharma2k2@gmail.com>
Date: Thu, 15 Jan 2026 05:50:16 +0100
Subject: [PATCH 2/3] docs: proper linking to api-ref

---
 api-reference/evaluators/overview.mdx | 11 +++++------
 llm-evaluation/list.mdx               |  4 ++++
 2 files changed, 9 insertions(+), 6 deletions(-)

diff --git a/api-reference/evaluators/overview.mdx b/api-reference/evaluators/overview.mdx
index 801a271..56f3bee 100644
--- a/api-reference/evaluators/overview.mdx
+++ b/api-reference/evaluators/overview.mdx
@@ -3,20 +3,19 @@ title: 'Overview'
 description: 'Browse all available evaluators in LangWatch to find the right scoring method for your AI agent evaluation use case.'
 ---
 
+## Intro
+
 LangWatch offers an extensive library of evaluators to help you evaluate the quality and guarantee the safety of your LLM apps.
 
 While here you can find a reference list, to get the execution code you can use the [Evaluation Wizard](https://app.langwatch.ai/@project/evaluations) on LangWatch platform.
 
 ## Authentication
 
-To make a call to the Evaluators API, you will need to pass through your LangWatch API key in the header as `x-api-key`. Your API key can be found on the setup page under settings.
+To make a call to the Evaluators API, you will need to pass through your LangWatch API key in the header as `X-Auth-Token`. Your API key can be found on the setup page under settings.
 
-## Base URL
+#### Allowed Methods
 
-All evaluator endpoints are available at:
-```
-https://app.langwatch.ai/api/evaluations
-```
+- `POST /api/evaluations/{evaluator}/evaluate` - Run an evaluation using a specific evaluator
 
 ## Evaluators List
 
diff --git a/llm-evaluation/list.mdx b/llm-evaluation/list.mdx
index e93fa76..3abd002 100644
--- a/llm-evaluation/list.mdx
+++ b/llm-evaluation/list.mdx
@@ -7,6 +7,10 @@ LangWatch offers an extensive library of evaluators to help you evaluate the qua
 
 While here you can find a reference list, to get the execution code you can use the [Evaluation Wizard](https://app.langwatch.ai/@project/evaluations) on LangWatch platform.
 
+<Card title="Evaluators API Reference" icon="code" href="/api-reference/evaluators/overview">
+  Full API documentation for running evaluations programmatically.
+</Card>
+
 ## Evaluators List
 
 import EvaluatorsList from "/snippets/evaluators-list.mdx"

From e4ea2b34cb96b1dc43feace34c676652dd16af25 Mon Sep 17 00:00:00 2001
From: Aryan Sharma <aryansharma2k2@gmail.com>
Date: Thu, 15 Jan 2026 11:50:13 +0100
Subject: [PATCH 3/3] fix: broken-links

---
 llm-evaluation/offline/code/evaluation-api.mdx | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/llm-evaluation/offline/code/evaluation-api.mdx b/llm-evaluation/offline/code/evaluation-api.mdx
index bc536cd..c74e604 100644
--- a/llm-evaluation/offline/code/evaluation-api.mdx
+++ b/llm-evaluation/offline/code/evaluation-api.mdx
@@ -141,10 +141,12 @@ await evaluation.run(dataset, async ({ item, index, span }) => {
 ```
 
 The callback receives:
+
 - `item` - The current dataset item
 - `index` - The current index in the dataset
 - `span` - An OpenTelemetry span for custom tracing
-  </Tab>
+
+</Tab>
 </Tabs>
 
 ### Metrics logging