microsoft · stkillen · Jan 18, 2026 · Jan 20, 2026 · Jan 20, 2026
@@ -8,7 +8,7 @@ using System.TestTools.TestRunner;
 
 permissionset 149030 "AI Test Toolkit"
 {
-    Caption = 'Business Central AI Test Toolkit';
+    Caption = 'Business Central AI Eval Toolkit';
     Access = Public;
     Assignable = true;
 

@@ -1,44 +1,44 @@
-# BC Copilot Test Toolkit
-The BC Copilot Test Toolkit lets developers write and run automated tests for copilot features. The toolkit supports running data driven tests and the ability to get the output generated from the tests.
+# BC Copilot Eval Toolkit
+The BC Copilot Eval Toolkit lets developers write and run automated evals for copilot features. The toolkit supports running data driven evals and the ability to get the output generated from the evals.
 
-## Running AI tests in BC Copilot Test Toolkit
+## Running AI evals in BC Copilot Eval Toolkit
 
 ### Prerequisite
-1. The BC Copilot Test Toolkit is installed
-1. Datasets and tests are written (see [Writing data-driven AI tests](#writing-data-driven-ai-tests))
+1. The BC Copilot Eval Toolkit is installed
+1. Datasets and evals are written (see [Writing data-driven AI evals](#writing-data-driven-ai-evals))
 
 ### Setup
-1. In _Business Central_, open the _AI Test Suite_ page
-1. Upload the required datasets for the tests
-1. Create a test suite
-1. In the test suite, define a line for each codeunit
+1. In _Business Central_, open the _AI Eval Suite_ page
+1. Upload the required datasets for the evals
+1. Create an eval suite
+1. In the eval suite, define a line for each codeunit
 
 ### Execute
-1. Run the AI Test Suite from the header, or one line at a time
-1. The test method will be executed for each dataset line
-    1. If the test evaluates in AL, it will either fail or succeed based on the condition
-    1. If the test output is set, it must be generated for all the tests which needs to be evaluated externally
+1. Run the AI Eval Suite from the header, or one line at a time
+1. The eval method will be executed for each dataset line
+    1. If the eval evaluates in AL, it will either fail or succeed based on the condition
+    1. If the test output is set, it must be generated for all the evals which needs to be evaluated externally
 1. Results are logged in AI 'Log Entries'
 
 ### Inspect the results
 1. Open _Log Entries_ to result of each execution
 1. Download the test output which generates the `.jsonl` file or export the logs to Excel
 1. You can also use the API (page 149038 "AIT Log Entry API") to get the result for a suite
-1. Open AL Test Tool and switch to the created test suite to execute each test manually
+1. Open AL Test Tool and switch to the created eval suite to execute each eval manually
 
 
-## Writing data-driven AI tests
+## Writing data-driven AI evals
 
 ### Defining test codeunit
-A data-driven AI test is defined like any normal AL test, except that it:
-1. Is executed through the BC Copilot Test Toolkit
-2. Utilizes the `AIT Test Context` codeunit to get input for the test
+A data-driven AI eval is defined like any normal AL test, except that it:
+1. Is executed through the BC Copilot Eval Toolkit
+2. Utilizes the `AIT Test Context` codeunit to get input for the eval
 3. Optionally, sets the output using the `AIT Test Context` codeunit
 
 See the `AIT Test Context` for the full API.
 
 #### Example
-An example test for an AI feature that returns an integer.
+An example eval for an AI feature that returns an integer.
 
 ```
     [Test]
@@ -49,10 +49,7 @@ An example test for an AI feature that returns an integer.
         Output: Integer;
         ExpectedOutput: Integer;
     begin
-        // [Scenario] AI Test
-
-        // Get the input from the dataset
-        TestInput := AITestContext.GetQuestion().ValueAsText();
+        // [Scenario] AI Eval
 
         // Call the LLM to get an output
         Output := CopilotFeature.CallLLM(Question);
@@ -65,13 +62,13 @@ An example test for an AI feature that returns an integer.
     end;
 ```
 In this example
-1. This test procedure will be called with each input from the dataset
+1. This eval procedure will be called with each input from the dataset
 1. We get the `question` and `expected_data` from the input dataset using `AITestContext.GetQuestion()` and `AITestContext.GetExpectedData()` respectively
 1. Alternatively, we could use `AITestContext.GetInput()` and get the line as `json` 
 
 
 ### Defining Datasets
-Datasets are provided as `.jsonl` files where each line represents a test case.
+Datasets are provided as `.jsonl` or `.yaml` files where each line/entry represents an eval case.
 
 There's no fixed structure required for each line, but using certain formatting will allow easier access to data and name and description definition.
 
@@ -80,12 +77,12 @@ See the `AIT Test Context` for the full API.
 #### Example
 
 ```
-{"name": "Test01", "question": "A question", "expected_data": 5}
-{"name": "Test02", "question": "A second question", "expected_data": 2}
-{"name": "Test03", "question": "A third question", "expected_data": 2}
+{"name": "Eval01", "question": "A question", "expected_data": 5}
+{"name": "Eval02", "question": "A second question", "expected_data": 2}
+{"name": "Eval03", "question": "A third question", "expected_data": 2}
 ```
 
 In this example
-1. Setting `name` for each line, that will be used in the BC Copilot Test Toolkit when uploading the dataset
+1. Setting `name` for each line, that will be used in the BC Copilot Eval Toolkit when uploading the dataset
 1. Setting `question` for each line, we can use `AITestContext.GetQuestion()` in the test codeunit, to get the question directly
 1. Setting `expected_data` for each line, we can use the `AITestContext.GetExpectedData()` in the test codeunit, to get the question directly
@@ -4,8 +4,8 @@
   "publisher": "Microsoft",
   "version": "28.0.0.0",
   "propagateDependencies": true,
-  "brief": "Provides tools for AI feature regression testing.",
-  "description": "The AI Test Toolkit lets developers write and run automated tests for AI features. The toolkit supports running data driven tests and the ability to get the output generated from the tests.",
+  "brief": "Provides tools for AI feature evaluation.",
+  "description": "The AI Test Toolkit lets developers write and run automated evals for AI features. The toolkit supports running data driven evals and the ability to get the output generated from the evals.",
   "privacyStatement": "https://go.microsoft.com/fwlink/?LinkId=724009",
   "EULA": "https://go.microsoft.com/fwlink/?linkid=2009120",
   "help": "https://go.microsoft.com/fwlink/?LinkId=724011",
@@ -44,4 +44,4 @@
     "includeSourceInSymbolFile": true
   },
   "target": "Cloud"
-}
+}
@@ -11,7 +11,7 @@ using System.Utilities;
 
 page 149042 "AIT CommandLine Card"
 {
-    Caption = 'AI Test Command Line Runner';
+    Caption = 'AI Eval Command Line Runner';
     PageType = Card;
     Extensible = false;
     ApplicationArea = All;
@@ -27,7 +27,7 @@ page 149042 "AIT CommandLine Card"
                 Caption = 'General';
                 field("AIT Suite Code"; AITCode)
                 {
-                    Caption = 'Test Suite Code';
+                    Caption = 'Eval Suite Code';
                     ToolTip = 'Specifies the ID of the suite.';
                     TableRelation = "AIT Test Suite".Code;
                     ShowMandatory = true;
@@ -39,7 +39,7 @@ page 149042 "AIT CommandLine Card"
                         if not AITTestSuite.Get(AITCode) then
                             Error(CannotFindAITSuiteErr, AITCode);
 
-                        // Clear the filter on the test lines
+                        // Clear the filter on the eval lines
                         if AITCode <> xRec."Test Suite Code" then
                             Clear(LineNoFilter);
 
@@ -70,7 +70,7 @@ page 149042 "AIT CommandLine Card"
                 field("Line No. Filter"; LineNoFilter)
                 {
                     Caption = 'Line No. Filter';
-                    ToolTip = 'Specifies the line number to filter the test method lines.';
+                    ToolTip = 'Specifies the line number to filter the eval method lines.';
                     TableRelation = "AIT Test Method Line"."Line No." where("Test Suite Code" = field("Test Suite Code"));
                     BlankZero = true;
 
@@ -81,8 +81,8 @@ page 149042 "AIT CommandLine Card"
                 }
                 field("No. of Pending Tests"; NoOfPendingTests)
                 {
-                    Caption = 'No. of Pending Tests';
-                    ToolTip = 'Specifies the number of test suite lines in the test suite that are yet to be run.';
+                    Caption = 'No. of Pending Evals';
+                    ToolTip = 'Specifies the number of eval suite lines in the eval suite that are yet to be run.';
                     Editable = false;
                 }
             }
@@ -93,14 +93,14 @@ page 149042 "AIT CommandLine Card"
                 field("Input Dataset Filename"; InputDatasetFilename)
                 {
                     Caption = 'Import Input Dataset Filename';
-                    ToolTip = 'Specifies the input dataset filename to import for running the test suite.';
+                    ToolTip = 'Specifies the input dataset filename to import for running the eval suite.';
                     ShowMandatory = InputDataset <> '';
                 }
                 field("Input Dataset"; InputDataset)
                 {
                     Caption = 'Import Input Dataset';
                     MultiLine = true;
-                    ToolTip = 'Specifies the input dataset to import for running the test suite.';
+                    ToolTip = 'Specifies the input dataset to import for running the eval suite.';
 
                     trigger OnValidate()
                     var
@@ -162,7 +162,7 @@ page 149042 "AIT CommandLine Card"
             group("Test Method Lines Group")
             {
                 Editable = false;
-                Caption = 'Test Method Lines';
+                Caption = 'Eval Method Lines';
 
                 repeater("Test Method Lines")
                 {
@@ -195,8 +195,8 @@ page 149042 "AIT CommandLine Card"
                     }
                     field("No. of Tests Failed"; Rec."No. of Tests Executed" - Rec."No. of Tests Passed")
                     {
-                        Caption = 'No. of Tests Failed';
-                        ToolTip = 'Specifies the number of failed tests for the test line.';
+                        Caption = 'No. of Evals Failed';
+                        ToolTip = 'Specifies the number of failed evals for the eval line.';
                     }
                 }
             }
@@ -210,7 +210,7 @@ page 149042 "AIT CommandLine Card"
             {
                 Caption = 'Run Suite';
                 Image = Start;
-                ToolTip = 'Starts running the AI test suite. This action ignores the line number filter and runs all the test lines in the suite.';
+                ToolTip = 'Starts running the AI eval suite. This action ignores the line number filter and runs all the eval lines in the suite.';
 
                 trigger OnAction()
                 begin
@@ -219,9 +219,9 @@ page 149042 "AIT CommandLine Card"
             }
             action(RunNextTest)
             {
-                Caption = 'Run Next Test';
+                Caption = 'Run Next Eval';
                 Image = TestReport;
-                ToolTip = 'Starts running the next test from the Test Method Lines for the given suite.';
+                ToolTip = 'Starts running the next eval from the Eval Method Lines for the given suite.';
 
                 trigger OnAction()
                 begin
@@ -230,9 +230,9 @@ page 149042 "AIT CommandLine Card"
             }
             action(ResetTestSuite)
             {
-                Caption = 'Reset Test Suite';
+                Caption = 'Reset Eval Suite';
                 Image = Restore;
-                ToolTip = 'Resets the test method lines status to run them again. This action ignores the line number filter and resets all the test lines in the suite.';
+                ToolTip = 'Resets the eval method lines status to run them again. This action ignores the line number filter and resets all the eval lines in the suite.';
 
                 trigger OnAction()
                 var
@@ -249,10 +249,10 @@ page 149042 "AIT CommandLine Card"
         {
             action("AI Test Suite")
             {
-                Caption = 'AI Test Suite';
+                Caption = 'AI Eval Suite';
                 ApplicationArea = All;
                 Image = Setup;
-                ToolTip = 'Opens the AI Test Suite page.';
+                ToolTip = 'Opens the AI Eval Suite page.';
 
                 trigger OnAction()
                 var
@@ -292,7 +292,7 @@ page 149042 "AIT CommandLine Card"
     end;
 
     var
-        CannotFindAITSuiteErr: Label 'The specified Test Suite with code %1 cannot be found.', Comment = '%1 = Test Suite id.';
+        CannotFindAITSuiteErr: Label 'The specified Eval Suite with code %1 cannot be found.', Comment = '%1 = Eval Suite id.';
         AITCode: Code[100];
         LineNoFilter: Integer;
         LanguageTagFilter: Text[80];
@@ -305,7 +305,7 @@ page 149042 "AIT CommandLine Card"
     var
         AITTestSuite: Record "AIT Test Suite";
         AITTestSuiteMgt: Codeunit "AIT Test Suite Mgt.";
-        TestSuiteCodeNotFoundErr: Label 'Test Suite with code %1 not found.', Comment = '%1 = Test Suite id.';
+        TestSuiteCodeNotFoundErr: Label 'Eval Suite with code %1 not found.', Comment = '%1 = Eval Suite id.';
     begin
         VerifyTestSuiteCode();
         if not AITTestSuite.Get(AITCode) then
@@ -331,7 +331,7 @@ page 149042 "AIT CommandLine Card"
 
     local procedure VerifyTestSuiteCode()
     var
-        TestSuiteCodeRequiredErr: Label 'Test Suite Code is required to run the suite.';
+        TestSuiteCodeRequiredErr: Label 'Eval Suite Code is required to run the suite.';
     begin
         if AITCode = '' then
             Error(TestSuiteCodeRequiredErr);

@@ -7,7 +7,7 @@ namespace System.TestTools.AITestToolkit;
 using System.TestTools.TestRunner;
 
 /// <summary>
-/// Exposes functions that can be used by the AI tests.
+/// Exposes functions that can be used by the AI evals.
 /// </summary>
 codeunit 149044 "AIT Test Context"
 {
@@ -32,7 +32,7 @@ codeunit 149044 "AIT Test Context"
     /// <summary>
     /// Returns the Test Input value as Test Input Json Codeunit from the input dataset for the current iteration.
     /// </summary>
-    /// <returns>Test Input Json for the current test.</returns>
+    /// <returns>Test Input Json for the current eval.</returns>
     procedure GetInput(): Codeunit "Test Input Json"
     begin
         exit(AITTestContextImpl.GetInput());
@@ -76,7 +76,7 @@ codeunit 149044 "AIT Test Context"
 
     /// <summary>
     /// Get the Expected Data value as text from the input dataset for the current iteration.
-    /// Expected data is used for internal validation if the test was successful.
+    /// Expected data is used for internal validation if the eval was successful.
     /// </summary>
     /// <returns>Test Input Json for the expected data</returns>
     procedure GetExpectedData(): Codeunit "Test Input Json"
@@ -117,8 +117,8 @@ codeunit 149044 "AIT Test Context"
     end;
 
     /// <summary>
-    /// Adds a message to the current test iteration.
-    /// This is used for multi-turn tests to add messages to the output.
+    /// Adds a message to the current eval iteration.
+    /// This is used for multi-turn evals to add messages to the output.
     /// </summary>
     /// <param name="Content">The content of the message.</param>
     /// <param name="Role">The role of the message (e.g., 'user', 'assistant').</param>
@@ -129,8 +129,8 @@ codeunit 149044 "AIT Test Context"
     end;
 
     /// <summary>
-    /// Adds a message to the current test iteration.
-    /// This is used for multi-turn tests to add messages to the output.
+    /// Adds a message to the current eval iteration.
+    /// This is used for multi-turn evals to add messages to the output.
     /// </summary>
     /// <param name="Content">The content of the message.</param>
     /// <param name="Role">The role of the message (e.g., 'user', 'assistant').</param>
@@ -178,7 +178,7 @@ codeunit 149044 "AIT Test Context"
     end;
 
     /// <summary>
-    /// Sets the accuracy of the test.
+    /// Sets the accuracy of the eval.
     /// </summary>
     /// <param name="Accuracy">The accuracy as a decimal between 0 and 1.</param>
     procedure SetAccuracy(Accuracy: Decimal)
@@ -205,11 +205,11 @@ codeunit 149044 "AIT Test Context"
     end;
 
     /// <summary>
-    /// Integration event that is raised after a test run is completed.
+    /// Integration event that is raised after an eval run is completed.
     /// </summary>
-    /// <param name="Code">The code of the test run.</param>
-    /// <param name="Version">The version of the test run.</param>
-    /// <param name="Tag">The tag of the test run.</param>
+    /// <param name="Code">The code of the eval run.</param>
+    /// <param name="Version">The version of the eval run.</param>
+    /// <param name="Tag">The tag of the eval run.</param>
     [IntegrationEvent(false, false)]
     internal procedure OnAfterRunComplete(Code: Code[10]; Version: Integer; Tag: Text[20])
     begin