langwatch · rogeriochaves · Jan 15, 2026 · Jan 14, 2026 · Jan 15, 2026 · Jan 15, 2026
diff --git a/api-reference/evaluators/overview.mdx b/api-reference/evaluators/overview.mdx
@@ -0,0 +1,60 @@
+---
+title: 'Overview'
+description: 'Browse all available evaluators in LangWatch to find the right scoring method for your AI agent evaluation use case.'
+---
+
+## Intro
+
+LangWatch offers an extensive library of evaluators to help you evaluate the quality and guarantee the safety of your LLM apps.
+
+While here you can find a reference list, to get the execution code you can use the [Evaluation Wizard](https://app.langwatch.ai/@project/evaluations) on LangWatch platform.
+
+## Authentication
+
+To make a call to the Evaluators API, you will need to pass through your LangWatch API key in the header as `X-Auth-Token`. Your API key can be found on the setup page under settings.
+
+#### Allowed Methods
+
+- `POST /api/evaluations/{evaluator}/evaluate` - Run an evaluation using a specific evaluator
+
+## Evaluators List
+
+import EvaluatorsList from "/snippets/evaluators-list.mdx"
+
+<EvaluatorsList />
+
+## Running Evaluations
+
+Set up your first evaluation using the [Evaluation Wizard](https://app.langwatch.ai/@project/evaluations):
+
+<a href="https://app.langwatch.ai/@project/evaluations" target="_blank">
+<Frame>
+<img src="/images/offline-evaluation/Screenshot_2025-04-17_at_16.53.38.png" alt="" style={{ maxWidth: '400px' }} noZoom />
+</Frame>
+</a>
+
+## Instrumenting Custom Evaluator
+
+If you have a custom evaluator built in-house, you can follow the guide below to integrate.
+
+<CardGroup cols={1}>
+  <Card
+    title="Instrumenting Custom Evaluator"
+    icon="link"
+    href="/evaluations/custom-evaluator-integration"
+  />
+</CardGroup>
+
+## Common Request Format
+
+All evaluator endpoints follow a similar pattern:
+
+```
+POST /api/evaluations/{evaluator_path}/evaluate
+```
+
+Each evaluator accepts specific input parameters and settings. Refer to the individual evaluator documentation pages for detailed request/response schemas and examples.
+
+## Response Format
+
+Successful evaluations return an array of evaluation results with scores, details, and metadata specific to each evaluator type.
diff --git a/docs.json b/docs.json
@@ -116,11 +116,7 @@
               {
                 "group": "Built-in Evaluators",
                 "pages": [
-                  "llm-evaluation/list",
-                  {
-                    "group": "API Docs",
-                    "openapi": "/api-reference/openapi-evals.json"
-                  }
+              "llm-evaluation/list"
                 ]
               },
               {
@@ -491,6 +487,51 @@
               "api-reference/scenarios/overview",
               "api-reference/scenarios/create-event"
             ]
+          },
+          {
+            "group": "Evaluators",
+            "pages": [
+              "api-reference/evaluators/overview",
+              "api-reference/evaluators/exact-match-evaluator",
+              "api-reference/evaluators/llm-answer-match",
+              "api-reference/evaluators/bleu-score",
+              "api-reference/evaluators/llm-factual-match",
+              "api-reference/evaluators/rouge-score",
+              "api-reference/evaluators/sql-query-equivalence",
+              "api-reference/evaluators/llm-as-a-judge-boolean-evaluator",
+              "api-reference/evaluators/llm-as-a-judge-category-evaluator",
+              "api-reference/evaluators/llm-as-a-judge-score-evaluator",
+              "api-reference/evaluators/rubrics-based-scoring",
+              "api-reference/evaluators/ragas-answer-correctness",
+              "api-reference/evaluators/ragas-answer-relevancy",
+              "api-reference/evaluators/ragas-context-precision",
+              "api-reference/evaluators/ragas-context-recall",
+              "api-reference/evaluators/ragas-context-relevancy",
+              "api-reference/evaluators/ragas-context-utilization",
+              "api-reference/evaluators/ragas-faithfulness",
+              "api-reference/evaluators/ragas-faithfulness-1",
+              "api-reference/evaluators/ragas-response-context-precision",
+              "api-reference/evaluators/ragas-response-context-recall",
+              "api-reference/evaluators/ragas-response-relevancy",
+              "api-reference/evaluators/context-f1",
+              "api-reference/evaluators/context-precision",
+              "api-reference/evaluators/context-recall",
+              "api-reference/evaluators/azure-content-safety",
+              "api-reference/evaluators/azure-jailbreak-detection",
+              "api-reference/evaluators/azure-prompt-shield",
+              "api-reference/evaluators/openai-moderation",
+              "api-reference/evaluators/presidio-pii-detection",
+              "api-reference/evaluators/custom-basic-evaluator",
+              "api-reference/evaluators/competitor-blocklist",
+              "api-reference/evaluators/competitor-allowlist-check",
+              "api-reference/evaluators/competitor-llm-check",
+              "api-reference/evaluators/off-topic-evaluator",
+              "api-reference/evaluators/query-resolution",
+              "api-reference/evaluators/semantic-similarity-evaluator",
+              "api-reference/evaluators/summarization-score",
+              "api-reference/evaluators/valid-format-evaluator",
+              "api-reference/evaluators/lingua-language-detection"
+            ]
           }
         ]
       }

diff --git a/llm-evaluation/list.mdx b/llm-evaluation/list.mdx
@@ -7,6 +7,9 @@ LangWatch offers an extensive library of evaluators to help you evaluate the qua
 
 While here you can find a reference list, to get the execution code you can use the [Evaluation Wizard](https://app.langwatch.ai/@project/evaluations) on LangWatch platform.
 
+<Card title="Evaluators API Reference" icon="code" href="/api-reference/evaluators/overview">
+  Full API documentation for running evaluations programmatically.
+</Card>
 
 ## Evaluators List
 

diff --git a/llm-evaluation/offline/code/evaluation-api.mdx b/llm-evaluation/offline/code/evaluation-api.mdx
@@ -141,10 +141,12 @@ await evaluation.run(dataset, async ({ item, index, span }) => {
 ```
 
 The callback receives:
+
 - `item` - The current dataset item
 - `index` - The current index in the dataset
 - `span` - An OpenTelemetry span for custom tracing
-  </Tab>
+
+</Tab>
 </Tabs>
 
 ### Metrics logging