From 947105687ab3fcbc067f5cca45e3807df055802a Mon Sep 17 00:00:00 2001
From: Chenglong Wang <chenglong.wang@microsoft.com>
Date: Tue, 10 Feb 2026 12:04:30 -0800
Subject: [PATCH 1/6] mcp test

---
 py-src/data_formulator/mcp_server.py | 587 +++++++++++++++++++++++++++
 1 file changed, 587 insertions(+)
 create mode 100644 py-src/data_formulator/mcp_server.py

diff --git a/py-src/data_formulator/mcp_server.py b/py-src/data_formulator/mcp_server.py
new file mode 100644
index 00000000..7c976bff
--- /dev/null
+++ b/py-src/data_formulator/mcp_server.py
@@ -0,0 +1,587 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+
+"""
+Data Formulator MCP Server
+
+Exposes Data Formulator's AI-powered data visualization capabilities
+as an MCP (Model Context Protocol) server with the following tools:
+
+1. visualize_data: Given data + instruction → transformed data + chart (PNG)
+2. explore_data: Multi-turn iterative exploration → rounds of response + data + chart
+
+Usage:
+    # Run as stdio MCP server (for MCP clients like Claude Desktop, VS Code, etc.)
+    python -m data_formulator.mcp_server
+
+    # Or with uvx
+    uvx mcp run data_formulator.mcp_server
+
+Environment variables:
+    OPENAI_API_KEY / ANTHROPIC_API_KEY / etc. - API keys for LLM providers
+    DF_MCP_MODEL_ENDPOINT - LLM provider (default: "openai")
+    DF_MCP_MODEL_NAME     - Model name (default: "gpt-4o")
+    DF_MCP_API_KEY        - API key (overrides provider-specific key)
+    DF_MCP_API_BASE       - Custom API base URL (optional)
+    DATALAKE_ROOT         - Workspace root directory (optional)
+"""
+
+import os
+import sys
+import json
+import base64
+import logging
+import tempfile
+from pathlib import Path
+from typing import Any
+
+import pandas as pd
+
+from dotenv import load_dotenv
+
+# Load environment variables
+load_dotenv(os.path.join(Path(__file__).parent.parent.parent, 'api-keys.env'))
+load_dotenv(os.path.join(Path(__file__).parent, 'api-keys.env'))
+
+from mcp.server.fastmcp import FastMCP
+
+from data_formulator.agents.client_utils import Client
+from data_formulator.agents.agent_data_rec import DataRecAgent
+from data_formulator.agents.agent_data_transform import DataTransformationAgent
+from data_formulator.agents.agent_exploration import ExplorationAgent
+from data_formulator.datalake.workspace import Workspace, WorkspaceWithTempData
+from data_formulator.workflows.create_vl_plots import (
+    assemble_vegailte_chart,
+    spec_to_base64,
+    detect_field_type,
+    create_chart_spec,
+)
+from data_formulator.workflows.exploration_flow import create_chart_spec_from_data
+
+logger = logging.getLogger(__name__)
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+def _get_model_config() -> dict[str, str]:
+    """Build model config from environment variables."""
+    endpoint = os.getenv("DF_MCP_MODEL_ENDPOINT", "openai")
+    model = os.getenv("DF_MCP_MODEL_NAME", "gpt-4o")
+
+    # Resolve API key: explicit > provider-specific
+    api_key = os.getenv("DF_MCP_API_KEY", "")
+    if not api_key:
+        api_key = os.getenv(f"{endpoint.upper()}_API_KEY", "")
+
+    api_base = os.getenv("DF_MCP_API_BASE", os.getenv(f"{endpoint.upper()}_API_BASE", ""))
+    api_version = os.getenv("DF_MCP_API_VERSION", os.getenv(f"{endpoint.upper()}_API_VERSION", ""))
+
+    return {
+        "endpoint": endpoint,
+        "model": model,
+        "api_key": api_key,
+        "api_base": api_base,
+        "api_version": api_version,
+    }
+
+
+def _get_client() -> Client:
+    """Create an LLM client from environment config."""
+    return Client.from_config(_get_model_config())
+
+
+def _get_workspace(session_id: str = "mcp_session") -> Workspace:
+    """Create or reuse a workspace for the MCP session."""
+    return Workspace(session_id)
+
+
+def _parse_data_input(data: str, data_format: str = "auto") -> pd.DataFrame:
+    """
+    Parse data from a string (JSON or CSV) into a DataFrame.
+
+    Args:
+        data: Raw data string (JSON array or CSV text)
+        data_format: "json", "csv", or "auto" (detect automatically)
+
+    Returns:
+        pandas DataFrame
+    """
+    if data_format == "auto":
+        stripped = data.strip()
+        if stripped.startswith("[") or stripped.startswith("{"):
+            data_format = "json"
+        else:
+            data_format = "csv"
+
+    if data_format == "json":
+        parsed = json.loads(data)
+        if isinstance(parsed, dict):
+            parsed = [parsed]
+        return pd.DataFrame(parsed)
+    else:
+        from io import StringIO
+        return pd.read_csv(StringIO(data))
+
+
+def _make_chart_image(
+    rows: list[dict],
+    chart_type: str,
+    chart_encodings: dict[str, str],
+) -> str | None:
+    """Create a base64 PNG from data rows + chart spec. Returns data URL or None."""
+    try:
+        df = pd.DataFrame(rows)
+        if df.empty:
+            return None
+
+        encodings = {}
+        for channel, field in chart_encodings.items():
+            if field and field in df.columns:
+                field_type = detect_field_type(df[field])
+                encodings[channel] = {"field": field, "type": field_type}
+
+        spec = assemble_vegailte_chart(df, chart_type, encodings)
+        if spec:
+            return spec_to_base64(spec)
+    except Exception as e:
+        logger.warning(f"Chart creation failed: {e}")
+    return None
+
+
+# ---------------------------------------------------------------------------
+# MCP Server
+# ---------------------------------------------------------------------------
+
+mcp = FastMCP(
+    "Data Formulator",
+    description=(
+        "AI-powered data visualization server. "
+        "Transform data, generate charts, and explore datasets interactively."
+    ),
+)
+
+
+@mcp.tool()
+def visualize_data(
+    data: str,
+    instruction: str,
+    data_format: str = "auto",
+    table_name: str = "input_data",
+    chart_type: str = "",
+    x: str = "",
+    y: str = "",
+    color: str = "",
+    size: str = "",
+    facet: str = "",
+    max_repair_attempts: int = 1,
+) -> dict[str, Any]:
+    """
+    Transform data and generate a visualization based on a natural language instruction.
+
+    Given tabular data (JSON or CSV) and a natural language instruction, this tool:
+    1. Uses an AI agent to understand the intent and generate transformation code
+    2. Executes the transformation to produce the output data
+    3. Creates a chart (PNG) from the transformed data
+
+    Use this for one-shot data analysis tasks like:
+    - "Show average sales by region as a bar chart"
+    - "Create a scatter plot of price vs rating colored by category"
+    - "Forecast the next 6 months of revenue"
+
+    Args:
+        data: Tabular data as a JSON array of objects or CSV text.
+        instruction: Natural language description of what visualization to create.
+        data_format: "json", "csv", or "auto" (default: auto-detect).
+        table_name: Name for the input table (default: "input_data").
+        chart_type: Optional chart type hint ("bar", "point", "line", "area", "heatmap",
+                     "group_bar", "boxplot", "worldmap", "usmap"). Leave empty to let the AI decide.
+        x: Optional field name for x-axis encoding.
+        y: Optional field name for y-axis encoding.
+        color: Optional field name for color encoding.
+        size: Optional field name for size encoding.
+        facet: Optional field name for facet encoding.
+        max_repair_attempts: Max retries if code execution fails (default: 1).
+
+    Returns:
+        A dictionary with:
+        - status: "ok" or "error"
+        - instruction_summary: Short description of what was done
+        - chart_type: The chart type used
+        - chart_encodings: Mapping of visual channels to fields
+        - transformed_data: List of row dicts (first 50 rows)
+        - transformed_data_full_count: Total row count
+        - chart_image_base64: Base64 PNG data URL of the chart (or null)
+        - code: The Python transformation code generated
+        - reasoning: The AI's reasoning about the transformation
+    """
+    try:
+        # Parse input data
+        df = _parse_data_input(data, data_format)
+        rows = json.loads(df.to_json(orient="records", date_format="iso"))
+
+        input_tables = [{"name": table_name, "rows": rows}]
+
+        # Build chart encodings from optional hints
+        chart_encodings = {}
+        if x: chart_encodings["x"] = x
+        if y: chart_encodings["y"] = y
+        if color: chart_encodings["color"] = color
+        if size: chart_encodings["size"] = size
+        if facet: chart_encodings["facet"] = facet
+
+        # Decide mode: recommendation (no encodings) vs transform (has encodings)
+        mode = "recommendation" if not chart_encodings else "transform"
+
+        # Set up workspace + agent
+        client = _get_client()
+        workspace = _get_workspace()
+        temp_data = [{"name": table_name, "rows": rows}]
+
+        with WorkspaceWithTempData(workspace, temp_data) as ws:
+            if mode == "recommendation":
+                agent = DataRecAgent(client=client, workspace=ws)
+                results = agent.run(input_tables, instruction, n=1)
+            else:
+                agent = DataTransformationAgent(client=client, workspace=ws)
+                goal = {"goal": instruction, "chart_type": chart_type, "chart_encodings": chart_encodings}
+                results = agent.run(
+                    input_tables,
+                    json.dumps(goal),
+                    [],  # no previous messages
+                )
+
+            # Repair loop
+            attempts = 0
+            while results[0]["status"] == "error" and attempts < max_repair_attempts:
+                error_msg = results[0]["content"]
+                repair_instruction = (
+                    f"We ran into the following problem executing the code, please fix it:\n\n"
+                    f"{error_msg}\n\n"
+                    f"Please think step by step, reflect on why the error happens, and fix the code."
+                )
+                prev_dialog = results[0]["dialog"]
+
+                if mode == "recommendation":
+                    results = agent.followup(input_tables, prev_dialog, [], repair_instruction, n=1)
+                else:
+                    results = agent.followup(input_tables, prev_dialog, [], repair_instruction, n=1)
+                attempts += 1
+
+        # Process result
+        result = results[0]
+        if result["status"] != "ok":
+            return {
+                "status": "error",
+                "message": result.get("content", "Unknown error"),
+                "code": result.get("code", ""),
+            }
+
+        transformed_data = result["content"]
+        refined_goal = result.get("refined_goal", {})
+        code = result.get("code", "")
+
+        out_rows = transformed_data.get("rows", [])
+        out_chart_type = refined_goal.get("chart_type", chart_type or "bar")
+        out_encodings = refined_goal.get("chart_encodings", chart_encodings)
+
+        # Generate chart image
+        chart_image = _make_chart_image(out_rows, out_chart_type, out_encodings)
+
+        return {
+            "status": "ok",
+            "instruction_summary": refined_goal.get("display_instruction", instruction),
+            "chart_type": out_chart_type,
+            "chart_encodings": out_encodings,
+            "transformed_data": out_rows[:50],
+            "transformed_data_full_count": len(out_rows),
+            "chart_image_base64": chart_image,
+            "code": code,
+            "reasoning": {
+                "mode": refined_goal.get("mode", mode),
+                "recommendation": refined_goal.get("recommendation", ""),
+                "output_fields": refined_goal.get("output_fields", []),
+            },
+        }
+
+    except Exception as e:
+        logger.exception("visualize_data failed")
+        return {"status": "error", "message": str(e)}
+
+
+@mcp.tool()
+def explore_data(
+    data: str,
+    question: str,
+    data_format: str = "auto",
+    table_name: str = "input_data",
+    max_iterations: int = 3,
+    max_repair_attempts: int = 1,
+) -> dict[str, Any]:
+    """
+    Iteratively explore a dataset through multiple rounds of AI-driven analysis.
+
+    Given tabular data and a high-level exploration question, this tool:
+    1. Breaks the question into a multi-step analysis plan
+    2. For each step: transforms data, creates a chart, and decides the next step
+    3. Returns all exploration steps with their data and charts
+
+    Use this for open-ended data exploration like:
+    - "What are the key trends and patterns in this sales data?"
+    - "Explore the factors that affect student performance"
+    - "Analyze the relationship between weather and energy consumption"
+
+    Args:
+        data: Tabular data as a JSON array of objects or CSV text.
+        question: High-level exploration question or topic.
+        data_format: "json", "csv", or "auto" (default: auto-detect).
+        table_name: Name for the input table (default: "input_data").
+        max_iterations: Maximum number of exploration rounds (default: 3).
+        max_repair_attempts: Max code repair retries per step (default: 1).
+
+    Returns:
+        A dictionary with:
+        - status: "ok" or "error"
+        - question: The original exploration question
+        - steps: List of exploration step results, each containing:
+            - iteration: Step number
+            - question: The question addressed in this step
+            - chart_type: Chart type used
+            - chart_encodings: Visual channel mappings
+            - transformed_data: Rows of transformed data (first 50)
+            - chart_image_base64: Base64 PNG of the chart (or null)
+            - code: Python transformation code
+            - instruction_summary: Short description of what was done
+        - summary: Final summary of exploration findings
+        - total_steps: Number of steps completed
+    """
+    try:
+        # Parse input data
+        df = _parse_data_input(data, data_format)
+        rows = json.loads(df.to_json(orient="records", date_format="iso"))
+
+        input_tables = [{"name": table_name, "rows": rows}]
+
+        client = _get_client()
+        workspace = _get_workspace()
+        temp_data = [{"name": table_name, "rows": rows}]
+
+        steps = []
+
+        with WorkspaceWithTempData(workspace, temp_data) as ws:
+            rec_agent = DataRecAgent(client=client, workspace=ws)
+            exploration_agent = ExplorationAgent(client=client, workspace=ws)
+
+            completed_steps_for_agent = []
+            current_question = question
+            current_plan: list[str] = []
+            previous_dialog: list[dict] = []
+            previous_data: dict = {}
+
+            for iteration in range(1, max_iterations + 1):
+                # Step 1: Transform data for current question
+                if previous_dialog:
+                    latest_sample = previous_data.get("rows", []) if isinstance(previous_data, dict) else []
+                    transform_results = rec_agent.followup(
+                        input_tables=input_tables,
+                        new_instruction=current_question,
+                        latest_data_sample=latest_sample,
+                        dialog=previous_dialog,
+                    )
+                else:
+                    transform_results = rec_agent.run(
+                        input_tables=input_tables,
+                        description=current_question,
+                    )
+
+                # Repair loop
+                attempt = 0
+                while transform_results and transform_results[0]["status"] != "ok" and attempt < max_repair_attempts:
+                    attempt += 1
+                    error_msg = transform_results[0]["content"]
+                    dialog = transform_results[0]["dialog"]
+                    repair_instr = (
+                        f"We ran into the following problem executing the code, please fix it:\n\n"
+                        f"{error_msg}\n\nPlease think step by step and fix the code."
+                    )
+                    transform_results = rec_agent.followup(
+                        input_tables=input_tables,
+                        new_instruction=repair_instr,
+                        latest_data_sample=[],
+                        dialog=dialog,
+                    )
+
+                if not transform_results or transform_results[0]["status"] != "ok":
+                    error_msg = transform_results[0]["content"] if transform_results else "Transform failed"
+                    steps.append({
+                        "iteration": iteration,
+                        "question": current_question,
+                        "status": "error",
+                        "message": error_msg,
+                    })
+                    break
+
+                result = transform_results[0]
+                transformed_data = result["content"]
+                refined_goal = result.get("refined_goal", {})
+                code = result.get("code", "")
+                previous_dialog = result.get("dialog", [])
+                previous_data = transformed_data
+
+                out_rows = transformed_data.get("rows", [])
+                out_chart_type = refined_goal.get("chart_type", "bar")
+                out_encodings = refined_goal.get("chart_encodings", {})
+
+                # Create chart
+                chart_image = _make_chart_image(out_rows, out_chart_type, out_encodings)
+
+                step_result = {
+                    "iteration": iteration,
+                    "question": current_question,
+                    "status": "ok",
+                    "chart_type": out_chart_type,
+                    "chart_encodings": out_encodings,
+                    "transformed_data": out_rows[:50],
+                    "transformed_data_full_count": len(out_rows),
+                    "chart_image_base64": chart_image,
+                    "code": code,
+                    "instruction_summary": refined_goal.get("display_instruction", current_question),
+                }
+                steps.append(step_result)
+
+                # Track for exploration agent
+                completed_steps_for_agent.append({
+                    "question": current_question,
+                    "code": code,
+                    "data": {
+                        "rows": out_rows[:20],
+                        "name": transformed_data.get("virtual", {}).get("table_name", f"step_{iteration}"),
+                    },
+                    "visualization": chart_image,
+                })
+
+                # Step 2: Decide next step via exploration agent
+                if iteration >= max_iterations:
+                    break
+
+                try:
+                    followup_results = exploration_agent.suggest_followup(
+                        input_tables=input_tables,
+                        completed_steps=completed_steps_for_agent,
+                        next_steps=current_plan,
+                    )
+
+                    if followup_results and followup_results[0]["status"] == "ok":
+                        plan = followup_results[0]["content"]
+                        if plan.get("status") in ("present", "warning"):
+                            # Agent decided to stop and present findings
+                            break
+                        next_steps = plan.get("next_steps", [])
+                        if next_steps:
+                            current_question = next_steps[0]
+                            current_plan = next_steps[1:]
+                        else:
+                            break
+                    else:
+                        break
+                except Exception as e:
+                    logger.warning(f"Exploration planning failed: {e}")
+                    break
+
+        # Build summary
+        summary_parts = []
+        for s in steps:
+            if s.get("status") == "ok":
+                summary_parts.append(f"Step {s['iteration']}: {s.get('instruction_summary', s['question'])}")
+
+        return {
+            "status": "ok",
+            "question": question,
+            "steps": steps,
+            "summary": "\n".join(summary_parts) if summary_parts else "No steps completed.",
+            "total_steps": len(steps),
+        }
+
+    except Exception as e:
+        logger.exception("explore_data failed")
+        return {"status": "error", "message": str(e), "steps": []}
+
+
+@mcp.tool()
+def create_chart(
+    data: str,
+    chart_type: str,
+    x: str = "",
+    y: str = "",
+    color: str = "",
+    size: str = "",
+    facet: str = "",
+    data_format: str = "auto",
+) -> dict[str, Any]:
+    """
+    Create a chart directly from data and field mappings (no AI, no transformation).
+
+    This is a fast, deterministic tool for creating standard charts when you already
+    know exactly which fields to use and how to map them.
+
+    Args:
+        data: Tabular data as a JSON array of objects or CSV text.
+        chart_type: One of "bar", "point", "line", "area", "heatmap",
+                    "group_bar", "boxplot".
+        x: Field name for x-axis.
+        y: Field name for y-axis.
+        color: Optional field name for color encoding.
+        size: Optional field name for size encoding.
+        facet: Optional field name for faceting.
+        data_format: "json", "csv", or "auto".
+
+    Returns:
+        A dictionary with:
+        - status: "ok" or "error"
+        - chart_image_base64: Base64 PNG data URL
+        - chart_type: The chart type used
+        - fields_used: List of fields mapped to channels
+    """
+    try:
+        df = _parse_data_input(data, data_format)
+
+        # Build encoding dict
+        fields = []
+        if x: fields.append(x)
+        if y: fields.append(y)
+        if color: fields.append(color)
+        if size: fields.append(size)
+        if facet: fields.append(facet)
+
+        if not fields:
+            return {"status": "error", "message": "At least one field (x or y) is required."}
+
+        spec = create_chart_spec(df, fields, chart_type)
+        if spec:
+            image = spec_to_base64(spec)
+            return {
+                "status": "ok",
+                "chart_image_base64": image,
+                "chart_type": chart_type,
+                "fields_used": fields,
+            }
+        else:
+            return {"status": "error", "message": "Failed to create chart specification."}
+
+    except Exception as e:
+        logger.exception("create_chart failed")
+        return {"status": "error", "message": str(e)}
+
+
+# ---------------------------------------------------------------------------
+# Entry point
+# ---------------------------------------------------------------------------
+
+def main():
+    """Run the MCP server (stdio transport)."""
+    logging.basicConfig(level=logging.WARNING, stream=sys.stderr)
+    mcp.run()
+
+
+if __name__ == "__main__":
+    main()

From dc2130dfb36149fd506db86b44be8c4d921e9acd Mon Sep 17 00:00:00 2001
From: Chenglong Wang <chenglong.wang@microsoft.com>
Date: Tue, 10 Feb 2026 12:07:03 -0800
Subject: [PATCH 2/6] add mcp dependency

---
 pyproject.toml |   1 +
 uv.lock        | 117 +++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 118 insertions(+)

diff --git a/pyproject.toml b/pyproject.toml
index 34599461..e52ffb10 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -50,6 +50,7 @@ dependencies = [
     "yfinance",
     "connectorx>=0.4.5",
     "pyarrow>=23.0.0",
+    "mcp>=1.26.0",
 ]
 
 [project.urls]
diff --git a/uv.lock b/uv.lock
index 0df5b6a4..473ea937 100644
--- a/uv.lock
+++ b/uv.lock
@@ -731,6 +731,7 @@ dependencies = [
     { name = "google-cloud-bigquery" },
     { name = "jupyter" },
     { name = "litellm" },
+    { name = "mcp" },
     { name = "numpy" },
     { name = "openai" },
     { name = "pandas" },
@@ -769,6 +770,7 @@ requires-dist = [
     { name = "google-cloud-bigquery" },
     { name = "jupyter" },
     { name = "litellm" },
+    { name = "mcp", specifier = ">=1.26.0" },
     { name = "numpy" },
     { name = "openai" },
     { name = "pandas" },
@@ -1418,6 +1420,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/2a/39/e50c7c3a983047577ee07d2a9e53faf5a69493943ec3f6a384bdc792deb2/httpx-0.28.1-py3-none-any.whl", hash = "sha256:d909fcccc110f8c7faf814ca82a9a4d816bc5a6dbfea25d6591d6985b8ba59ad", size = 73517, upload-time = "2024-12-06T15:37:21.509Z" },
 ]
 
+[[package]]
+name = "httpx-sse"
+version = "0.4.3"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/0f/4c/751061ffa58615a32c31b2d82e8482be8dd4a89154f003147acee90f2be9/httpx_sse-0.4.3.tar.gz", hash = "sha256:9b1ed0127459a66014aec3c56bebd93da3c1bc8bb6618c8082039a44889a755d", size = 15943, upload-time = "2025-10-10T21:48:22.271Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/d2/fd/6668e5aec43ab844de6fc74927e155a3b37bf40d7c3790e49fc0406b6578/httpx_sse-0.4.3-py3-none-any.whl", hash = "sha256:0ac1c9fe3c0afad2e0ebb25a934a59f4c7823b60792691f779fad2c5568830fc", size = 8960, upload-time = "2025-10-10T21:48:21.158Z" },
+]
+
 [[package]]
 name = "huggingface-hub"
 version = "1.3.7"
@@ -2160,6 +2171,31 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/af/33/ee4519fa02ed11a94aef9559552f3b17bb863f2ecfe1a35dc7f548cde231/matplotlib_inline-0.2.1-py3-none-any.whl", hash = "sha256:d56ce5156ba6085e00a9d54fead6ed29a9c47e215cd1bba2e976ef39f5710a76", size = 9516, upload-time = "2025-10-23T09:00:20.675Z" },
 ]
 
+[[package]]
+name = "mcp"
+version = "1.26.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "anyio" },
+    { name = "httpx" },
+    { name = "httpx-sse" },
+    { name = "jsonschema" },
+    { name = "pydantic" },
+    { name = "pydantic-settings" },
+    { name = "pyjwt", extra = ["crypto"] },
+    { name = "python-multipart" },
+    { name = "pywin32", marker = "sys_platform == 'win32'" },
+    { name = "sse-starlette" },
+    { name = "starlette" },
+    { name = "typing-extensions" },
+    { name = "typing-inspection" },
+    { name = "uvicorn", marker = "sys_platform != 'emscripten'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/fc/6d/62e76bbb8144d6ed86e202b5edd8a4cb631e7c8130f3f4893c3f90262b10/mcp-1.26.0.tar.gz", hash = "sha256:db6e2ef491eecc1a0d93711a76f28dec2e05999f93afd48795da1c1137142c66", size = 608005, upload-time = "2026-01-24T19:40:32.468Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/fd/d9/eaa1f80170d2b7c5ba23f3b59f766f3a0bb41155fbc32a69adfa1adaaef9/mcp-1.26.0-py3-none-any.whl", hash = "sha256:904a21c33c25aa98ddbeb47273033c435e595bbacfdb177f4bd87f6dceebe1ca", size = 233615, upload-time = "2026-01-24T19:40:30.652Z" },
+]
+
 [[package]]
 name = "mistune"
 version = "3.2.0"
@@ -3022,6 +3058,20 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/36/c7/cfc8e811f061c841d7990b0201912c3556bfeb99cdcb7ed24adc8d6f8704/pydantic_core-2.41.5-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:56121965f7a4dc965bff783d70b907ddf3d57f6eba29b6d2e5dabfaf07799c51", size = 2145302, upload-time = "2025-11-04T13:43:46.64Z" },
 ]
 
+[[package]]
+name = "pydantic-settings"
+version = "2.12.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "pydantic" },
+    { name = "python-dotenv" },
+    { name = "typing-inspection" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/43/4b/ac7e0aae12027748076d72a8764ff1c9d82ca75a7a52622e67ed3f765c54/pydantic_settings-2.12.0.tar.gz", hash = "sha256:005538ef951e3c2a68e1c08b292b5f2e71490def8589d4221b95dab00dafcfd0", size = 194184, upload-time = "2025-11-10T14:25:47.013Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/c1/60/5d4751ba3f4a40a6891f24eec885f51afd78d208498268c734e256fb13c4/pydantic_settings-2.12.0-py3-none-any.whl", hash = "sha256:fddb9fd99a5b18da837b29710391e945b1e30c135477f484084ee513adb93809", size = 51880, upload-time = "2025-11-10T14:25:45.546Z" },
+]
+
 [[package]]
 name = "pygments"
 version = "2.19.2"
@@ -3207,6 +3257,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/51/e5/fecf13f06e5e5f67e8837d777d1bc43fac0ed2b77a676804df5c34744727/python_json_logger-4.0.0-py3-none-any.whl", hash = "sha256:af09c9daf6a813aa4cc7180395f50f2a9e5fa056034c9953aec92e381c5ba1e2", size = 15548, upload-time = "2025-10-06T04:15:17.553Z" },
 ]
 
+[[package]]
+name = "python-multipart"
+version = "0.0.22"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/94/01/979e98d542a70714b0cb2b6728ed0b7c46792b695e3eaec3e20711271ca3/python_multipart-0.0.22.tar.gz", hash = "sha256:7340bef99a7e0032613f56dc36027b959fd3b30a787ed62d310e951f7c3a3a58", size = 37612, upload-time = "2026-01-25T10:15:56.219Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/1b/d0/397f9626e711ff749a95d96b7af99b9c566a9bb5129b8e4c10fc4d100304/python_multipart-0.0.22-py3-none-any.whl", hash = "sha256:2b2cd894c83d21bf49d702499531c7bafd057d730c201782048f7945d82de155", size = 24579, upload-time = "2026-01-25T10:15:54.811Z" },
+]
+
 [[package]]
 name = "pytz"
 version = "2025.2"
@@ -3216,6 +3275,25 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/81/c4/34e93fe5f5429d7570ec1fa436f1986fb1f00c3e0f43a589fe2bbcd22c3f/pytz-2025.2-py2.py3-none-any.whl", hash = "sha256:5ddf76296dd8c44c26eb8f4b6f35488f3ccbf6fbbd7adee0b7262d43f0ec2f00", size = 509225, upload-time = "2025-03-25T02:24:58.468Z" },
 ]
 
+[[package]]
+name = "pywin32"
+version = "311"
+source = { registry = "https://pypi.org/simple" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/7c/af/449a6a91e5d6db51420875c54f6aff7c97a86a3b13a0b4f1a5c13b988de3/pywin32-311-cp311-cp311-win32.whl", hash = "sha256:184eb5e436dea364dcd3d2316d577d625c0351bf237c4e9a5fabbcfa5a58b151", size = 8697031, upload-time = "2025-07-14T20:13:13.266Z" },
+    { url = "https://files.pythonhosted.org/packages/51/8f/9bb81dd5bb77d22243d33c8397f09377056d5c687aa6d4042bea7fbf8364/pywin32-311-cp311-cp311-win_amd64.whl", hash = "sha256:3ce80b34b22b17ccbd937a6e78e7225d80c52f5ab9940fe0506a1a16f3dab503", size = 9508308, upload-time = "2025-07-14T20:13:15.147Z" },
+    { url = "https://files.pythonhosted.org/packages/44/7b/9c2ab54f74a138c491aba1b1cd0795ba61f144c711daea84a88b63dc0f6c/pywin32-311-cp311-cp311-win_arm64.whl", hash = "sha256:a733f1388e1a842abb67ffa8e7aad0e70ac519e09b0f6a784e65a136ec7cefd2", size = 8703930, upload-time = "2025-07-14T20:13:16.945Z" },
+    { url = "https://files.pythonhosted.org/packages/e7/ab/01ea1943d4eba0f850c3c61e78e8dd59757ff815ff3ccd0a84de5f541f42/pywin32-311-cp312-cp312-win32.whl", hash = "sha256:750ec6e621af2b948540032557b10a2d43b0cee2ae9758c54154d711cc852d31", size = 8706543, upload-time = "2025-07-14T20:13:20.765Z" },
+    { url = "https://files.pythonhosted.org/packages/d1/a8/a0e8d07d4d051ec7502cd58b291ec98dcc0c3fff027caad0470b72cfcc2f/pywin32-311-cp312-cp312-win_amd64.whl", hash = "sha256:b8c095edad5c211ff31c05223658e71bf7116daa0ecf3ad85f3201ea3190d067", size = 9495040, upload-time = "2025-07-14T20:13:22.543Z" },
+    { url = "https://files.pythonhosted.org/packages/ba/3a/2ae996277b4b50f17d61f0603efd8253cb2d79cc7ae159468007b586396d/pywin32-311-cp312-cp312-win_arm64.whl", hash = "sha256:e286f46a9a39c4a18b319c28f59b61de793654af2f395c102b4f819e584b5852", size = 8710102, upload-time = "2025-07-14T20:13:24.682Z" },
+    { url = "https://files.pythonhosted.org/packages/a5/be/3fd5de0979fcb3994bfee0d65ed8ca9506a8a1260651b86174f6a86f52b3/pywin32-311-cp313-cp313-win32.whl", hash = "sha256:f95ba5a847cba10dd8c4d8fefa9f2a6cf283b8b88ed6178fa8a6c1ab16054d0d", size = 8705700, upload-time = "2025-07-14T20:13:26.471Z" },
+    { url = "https://files.pythonhosted.org/packages/e3/28/e0a1909523c6890208295a29e05c2adb2126364e289826c0a8bc7297bd5c/pywin32-311-cp313-cp313-win_amd64.whl", hash = "sha256:718a38f7e5b058e76aee1c56ddd06908116d35147e133427e59a3983f703a20d", size = 9494700, upload-time = "2025-07-14T20:13:28.243Z" },
+    { url = "https://files.pythonhosted.org/packages/04/bf/90339ac0f55726dce7d794e6d79a18a91265bdf3aa70b6b9ca52f35e022a/pywin32-311-cp313-cp313-win_arm64.whl", hash = "sha256:7b4075d959648406202d92a2310cb990fea19b535c7f4a78d3f5e10b926eeb8a", size = 8709318, upload-time = "2025-07-14T20:13:30.348Z" },
+    { url = "https://files.pythonhosted.org/packages/c9/31/097f2e132c4f16d99a22bfb777e0fd88bd8e1c634304e102f313af69ace5/pywin32-311-cp314-cp314-win32.whl", hash = "sha256:b7a2c10b93f8986666d0c803ee19b5990885872a7de910fc460f9b0c2fbf92ee", size = 8840714, upload-time = "2025-07-14T20:13:32.449Z" },
+    { url = "https://files.pythonhosted.org/packages/90/4b/07c77d8ba0e01349358082713400435347df8426208171ce297da32c313d/pywin32-311-cp314-cp314-win_amd64.whl", hash = "sha256:3aca44c046bd2ed8c90de9cb8427f581c479e594e99b5c0bb19b29c10fd6cb87", size = 9656800, upload-time = "2025-07-14T20:13:34.312Z" },
+    { url = "https://files.pythonhosted.org/packages/c0/d2/21af5c535501a7233e734b8af901574572da66fcc254cb35d0609c9080dd/pywin32-311-cp314-cp314-win_arm64.whl", hash = "sha256:a508e2d9025764a8270f93111a970e1d0fbfc33f4153b388bb649b7eec4f9b42", size = 8932540, upload-time = "2025-07-14T20:13:36.379Z" },
+]
+
 [[package]]
 name = "pywinpty"
 version = "3.0.2"
@@ -3816,6 +3894,19 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/46/2c/1462b1d0a634697ae9e55b3cecdcb64788e8b7d63f54d923fcd0bb140aed/soupsieve-2.8.3-py3-none-any.whl", hash = "sha256:ed64f2ba4eebeab06cc4962affce381647455978ffc1e36bb79a545b91f45a95", size = 37016, upload-time = "2026-01-20T04:27:01.012Z" },
 ]
 
+[[package]]
+name = "sse-starlette"
+version = "3.2.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "anyio" },
+    { name = "starlette" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/8b/8d/00d280c03ffd39aaee0e86ec81e2d3b9253036a0f93f51d10503adef0e65/sse_starlette-3.2.0.tar.gz", hash = "sha256:8127594edfb51abe44eac9c49e59b0b01f1039d0c7461c6fd91d4e03b70da422", size = 27253, upload-time = "2026-01-17T13:11:05.62Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/96/7f/832f015020844a8b8f7a9cbc103dd76ba8e3875004c41e08440ea3a2b41a/sse_starlette-3.2.0-py3-none-any.whl", hash = "sha256:5876954bd51920fc2cd51baee47a080eb88a37b5b784e615abb0b283f801cdbf", size = 12763, upload-time = "2026-01-17T13:11:03.775Z" },
+]
+
 [[package]]
 name = "stack-data"
 version = "0.6.3"
@@ -3830,6 +3921,19 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/f1/7b/ce1eafaf1a76852e2ec9b22edecf1daa58175c090266e9f6c64afcd81d91/stack_data-0.6.3-py3-none-any.whl", hash = "sha256:d5558e0c25a4cb0853cddad3d77da9891a08cb85dd9f9f91b9f8cd66e511e695", size = 24521, upload-time = "2023-09-30T13:58:03.53Z" },
 ]
 
+[[package]]
+name = "starlette"
+version = "0.52.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "anyio" },
+    { name = "typing-extensions", marker = "python_full_version < '3.13'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/c4/68/79977123bb7be889ad680d79a40f339082c1978b5cfcf62c2d8d196873ac/starlette-0.52.1.tar.gz", hash = "sha256:834edd1b0a23167694292e94f597773bc3f89f362be6effee198165a35d62933", size = 2653702, upload-time = "2026-01-18T13:34:11.062Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/81/0d/13d1d239a25cbfb19e740db83143e95c772a1fe10202dda4b76792b114dd/starlette-0.52.1-py3-none-any.whl", hash = "sha256:0029d43eb3d273bc4f83a08720b4912ea4b071087a3b48db01b7c839f7954d74", size = 74272, upload-time = "2026-01-18T13:34:09.188Z" },
+]
+
 [[package]]
 name = "terminado"
 version = "0.18.1"
@@ -4046,6 +4150,19 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/39/08/aaaad47bc4e9dc8c725e68f9d04865dbcb2052843ff09c97b08904852d84/urllib3-2.6.3-py3-none-any.whl", hash = "sha256:bf272323e553dfb2e87d9bfd225ca7b0f467b919d7bbd355436d3fd37cb0acd4", size = 131584, upload-time = "2026-01-07T16:24:42.685Z" },
 ]
 
+[[package]]
+name = "uvicorn"
+version = "0.40.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "click" },
+    { name = "h11" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/c3/d1/8f3c683c9561a4e6689dd3b1d345c815f10f86acd044ee1fb9a4dcd0b8c5/uvicorn-0.40.0.tar.gz", hash = "sha256:839676675e87e73694518b5574fd0f24c9d97b46bea16df7b8c05ea1a51071ea", size = 81761, upload-time = "2025-12-21T14:16:22.45Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/3d/d8/2083a1daa7439a66f3a48589a57d576aa117726762618f6bb09fe3798796/uvicorn-0.40.0-py3-none-any.whl", hash = "sha256:c6c8f55bc8bf13eb6fa9ff87ad62308bbbc33d0b67f84293151efe87e0d5f2ee", size = 68502, upload-time = "2025-12-21T14:16:21.041Z" },
+]
+
 [[package]]
 name = "vega-datasets"
 version = "0.9.0"

From b65c917d1394c4710a84be5264f72d92b071ebb3 Mon Sep 17 00:00:00 2001
From: Chenglong Wang <chenglong.wang@microsoft.com>
Date: Tue, 10 Feb 2026 12:08:33 -0800
Subject: [PATCH 3/6] test file

---
 py-src/data_formulator/mcp_server.py |  55 +++-
 py-src/tests/test_mcp_server.py      | 469 +++++++++++++++++++++++++++
 2 files changed, 520 insertions(+), 4 deletions(-)
 create mode 100644 py-src/tests/test_mcp_server.py

diff --git a/py-src/data_formulator/mcp_server.py b/py-src/data_formulator/mcp_server.py
index 7c976bff..d1501ed0 100644
--- a/py-src/data_formulator/mcp_server.py
+++ b/py-src/data_formulator/mcp_server.py
@@ -10,12 +10,59 @@
 1. visualize_data: Given data + instruction → transformed data + chart (PNG)
 2. explore_data: Multi-turn iterative exploration → rounds of response + data + chart
 
-Usage:
-    # Run as stdio MCP server (for MCP clients like Claude Desktop, VS Code, etc.)
+Setup:
+    # Install with uv (recommended)
+    uv pip install -e ".[mcp]"          # from project root
+    # or install mcp separately
+    uv pip install mcp
+
+Running the MCP server:
+    # Option 1: Run directly with uv
+    uv run python -m data_formulator.mcp_server
+
+    # Option 2: Run with python (after installing)
     python -m data_formulator.mcp_server
 
-    # Or with uvx
-    uvx mcp run data_formulator.mcp_server
+    # Option 3: Run the module file directly
+    uv run py-src/data_formulator/mcp_server.py
+
+Configure in Claude Desktop (claude_desktop_config.json):
+    {
+      "mcpServers": {
+        "data-formulator": {
+          "command": "uv",
+          "args": [
+            "--directory", "/path/to/data-formulator",
+            "run", "python", "-m", "data_formulator.mcp_server"
+          ],
+          "env": {
+            "OPENAI_API_KEY": "sk-...",
+            "DF_MCP_MODEL_ENDPOINT": "openai",
+            "DF_MCP_MODEL_NAME": "gpt-4o"
+          }
+        }
+      }
+    }
+
+Configure in VS Code (settings.json):
+    {
+      "mcp": {
+        "servers": {
+          "data-formulator": {
+            "command": "uv",
+            "args": [
+              "--directory", "/path/to/data-formulator",
+              "run", "python", "-m", "data_formulator.mcp_server"
+            ],
+            "env": {
+              "OPENAI_API_KEY": "sk-...",
+              "DF_MCP_MODEL_ENDPOINT": "openai",
+              "DF_MCP_MODEL_NAME": "gpt-4o"
+            }
+          }
+        }
+      }
+    }
 
 Environment variables:
     OPENAI_API_KEY / ANTHROPIC_API_KEY / etc. - API keys for LLM providers
diff --git a/py-src/tests/test_mcp_server.py b/py-src/tests/test_mcp_server.py
new file mode 100644
index 00000000..27071c98
--- /dev/null
+++ b/py-src/tests/test_mcp_server.py
@@ -0,0 +1,469 @@
+#!/usr/bin/env python3
+"""
+Sample script: Using Data Formulator as an MCP Server
+
+This script demonstrates how to use Data Formulator's MCP server for:
+  1. Data Recommendation / Visualization (one-shot)
+  2. Iterative Data Exploration (multi-turn)
+
+There are TWO ways to use the MCP tools demonstrated here:
+
+  (A) Direct invocation — import and call the tool functions directly
+      (no MCP client/server needed, great for scripting & testing)
+
+  (B) MCP client — connect to the MCP server over stdio and call tools
+      via the MCP protocol (how real MCP hosts like Claude Desktop use it)
+
+Prerequisites:
+    # Install dependencies with uv (from project root):
+    uv pip install -e ".[mcp]"
+    # or:
+    uv pip install mcp pandas vl-convert-python
+
+    # Set your LLM API key:
+    export OPENAI_API_KEY="sk-..."          # or ANTHROPIC_API_KEY, etc.
+    export DF_MCP_MODEL_ENDPOINT="openai"   # openai | anthropic | azure | gemini | ollama
+    export DF_MCP_MODEL_NAME="gpt-4o"       # model name
+
+Usage:
+    # Run all demos with uv (recommended):
+    uv run python py-src/tests/test_mcp_server.py
+
+    # Run all demos (direct invocation, no server process needed):
+    python py-src/tests/test_mcp_server.py
+
+    # Run only one demo:
+    uv run python py-src/tests/test_mcp_server.py --demo 1   # one-shot visualization
+    uv run python py-src/tests/test_mcp_server.py --demo 2   # iterative exploration
+    uv run python py-src/tests/test_mcp_server.py --demo 3   # MCP client over stdio
+"""
+
+import argparse
+import asyncio
+import json
+import base64
+import os
+import sys
+from pathlib import Path
+
+# ---------------------------------------------------------------------------
+# Setup paths so we can import data_formulator
+# ---------------------------------------------------------------------------
+SCRIPT_DIR = Path(__file__).parent
+PROJECT_ROOT = SCRIPT_DIR.parent.parent
+sys.path.insert(0, str(PROJECT_ROOT))
+
+OUTPUT_DIR = SCRIPT_DIR / "mcp_demo_output"
+OUTPUT_DIR.mkdir(exist_ok=True)
+
+
+# ╔═════════════════════════════════════════════════════════════════════════╗
+# ║                         SAMPLE DATA                                    ║
+# ╚═════════════════════════════════════════════════════════════════════════╝
+
+SAMPLE_CSV = """Country,Year,GDP_Billion,Population_Million,CO2_Emission_MT
+United States,2018,20580,327,5280
+United States,2019,21430,329,5130
+United States,2020,20940,331,4570
+United States,2021,23320,332,5010
+United States,2022,25460,333,5060
+China,2018,13890,1393,10060
+China,2019,14280,1398,10170
+China,2020,14720,1402,10670
+China,2021,17730,1405,11470
+China,2022,17960,1406,11400
+Germany,2018,3970,83,759
+Germany,2019,3890,83,702
+Germany,2020,3890,83,644
+Germany,2021,4220,83,675
+Germany,2022,4070,84,666
+India,2018,2710,1353,2480
+India,2019,2870,1366,2600
+India,2020,2660,1380,2440
+India,2021,3180,1393,2710
+India,2022,3390,1407,2830
+Japan,2018,4970,126,1160
+Japan,2019,5080,126,1140
+Japan,2020,5040,126,1060
+Japan,2021,4940,125,1070
+Japan,2022,4230,125,1050
+Brazil,2018,1870,210,460
+Brazil,2019,1870,211,470
+Brazil,2020,1440,212,440
+Brazil,2021,1650,213,490
+Brazil,2022,1920,214,490"""
+
+SAMPLE_JSON = json.dumps([
+    {"Student": "Alice",   "Math": 92, "Science": 88, "English": 95, "History": 78, "Grade": "A"},
+    {"Student": "Bob",     "Math": 76, "Science": 82, "English": 71, "History": 89, "Grade": "B"},
+    {"Student": "Charlie", "Math": 88, "Science": 91, "English": 84, "History": 92, "Grade": "A"},
+    {"Student": "Diana",   "Math": 65, "Science": 70, "English": 90, "History": 85, "Grade": "B"},
+    {"Student": "Eve",     "Math": 95, "Science": 97, "English": 92, "History": 88, "Grade": "A"},
+    {"Student": "Frank",   "Math": 58, "Science": 62, "English": 68, "History": 72, "Grade": "C"},
+    {"Student": "Grace",   "Math": 84, "Science": 79, "English": 88, "History": 91, "Grade": "B"},
+    {"Student": "Henry",   "Math": 91, "Science": 85, "English": 79, "History": 83, "Grade": "A"},
+    {"Student": "Iris",    "Math": 73, "Science": 68, "English": 82, "History": 76, "Grade": "B"},
+    {"Student": "Jack",    "Math": 87, "Science": 93, "English": 86, "History": 80, "Grade": "A"},
+])
+
+
+# ╔═════════════════════════════════════════════════════════════════════════╗
+# ║                  HELPERS                                               ║
+# ╚═════════════════════════════════════════════════════════════════════════╝
+
+def save_chart(base64_data_url: str | None, filename: str) -> None:
+    """Save a base64 data URL (data:image/png;base64,...) to a PNG file."""
+    if not base64_data_url:
+        print(f"  ⚠  No chart image for {filename}")
+        return
+    try:
+        b64 = base64_data_url.split(",", 1)[1] if "," in base64_data_url else base64_data_url
+        filepath = OUTPUT_DIR / filename
+        filepath.write_bytes(base64.b64decode(b64))
+        print(f"  ✅ Chart saved: {filepath}")
+    except Exception as e:
+        print(f"  ❌ Failed to save chart {filename}: {e}")
+
+
+def save_json_result(data: dict, filename: str) -> None:
+    """Save a dict to a JSON file."""
+    filepath = OUTPUT_DIR / filename
+    filepath.write_text(json.dumps(data, indent=2, default=str))
+    print(f"  📄 Result saved: {filepath}")
+
+
+def print_section(title: str) -> None:
+    width = 70
+    print()
+    print("=" * width)
+    print(f"  {title}")
+    print("=" * width)
+
+
+# ╔═════════════════════════════════════════════════════════════════════════╗
+# ║  DEMO 1: One-shot Visualization (Data Recommendation)                 ║
+# ╚═════════════════════════════════════════════════════════════════════════╝
+
+def demo_1_one_shot_visualization():
+    """
+    Demonstrates the `visualize_data` tool:
+      Input:  data (CSV or JSON) + natural language instruction
+      Output: transformed data + chart image (PNG) + reasoning
+    """
+    print_section("DEMO 1: One-shot Data Visualization")
+
+    # Import the MCP tool function directly
+    from data_formulator.mcp_server import visualize_data
+
+    # --- Example 1a: CSV data, let AI recommend a visualization ---
+    print("\n📊 Example 1a: GDP trends (CSV, AI-recommended chart)")
+    print("   Instruction: 'Show GDP trends over time for each country as a line chart'")
+
+    result = visualize_data(
+        data=SAMPLE_CSV,
+        instruction="Show GDP trends over time for each country as a line chart",
+        data_format="csv",
+        table_name="world_economy",
+    )
+
+    print(f"   Status: {result['status']}")
+    if result["status"] == "ok":
+        print(f"   Summary: {result['instruction_summary']}")
+        print(f"   Chart type: {result['chart_type']}")
+        print(f"   Encodings: {result['chart_encodings']}")
+        print(f"   Data rows: {result['transformed_data_full_count']}")
+        print(f"   Code:\n{result['code'][:300]}...")
+        save_chart(result.get("chart_image_base64"), "demo1a_gdp_trends.png")
+        save_json_result(result, "demo1a_result.json")
+    else:
+        print(f"   Error: {result.get('message', 'Unknown')}")
+
+    # --- Example 1b: JSON data, with encoding hints ---
+    print("\n📊 Example 1b: Student scores (JSON, with encoding hints)")
+    print("   Instruction: 'Compare students by their average score across all subjects'")
+
+    result = visualize_data(
+        data=SAMPLE_JSON,
+        instruction="Compare students by their average score across all subjects",
+        data_format="json",
+        table_name="student_scores",
+    )
+
+    print(f"   Status: {result['status']}")
+    if result["status"] == "ok":
+        print(f"   Summary: {result['instruction_summary']}")
+        print(f"   Chart type: {result['chart_type']}")
+        print(f"   Encodings: {result['chart_encodings']}")
+        print(f"   Output fields: {result['reasoning']['output_fields']}")
+        save_chart(result.get("chart_image_base64"), "demo1b_student_avg.png")
+        save_json_result(result, "demo1b_result.json")
+    else:
+        print(f"   Error: {result.get('message', 'Unknown')}")
+
+    # --- Example 1c: CO2 per capita analysis ---
+    print("\n📊 Example 1c: CO2 per capita (CSV, computed metric)")
+    print("   Instruction: 'Calculate CO2 emissions per capita and show as a grouped bar chart by country and year'")
+
+    result = visualize_data(
+        data=SAMPLE_CSV,
+        instruction="Calculate CO2 emissions per capita (CO2 / Population) and show as a grouped bar chart by country for the latest year",
+        data_format="csv",
+        table_name="world_economy",
+    )
+
+    print(f"   Status: {result['status']}")
+    if result["status"] == "ok":
+        print(f"   Summary: {result['instruction_summary']}")
+        print(f"   Chart type: {result['chart_type']}")
+        print(f"   Data preview: {result['transformed_data'][:3]}")
+        save_chart(result.get("chart_image_base64"), "demo1c_co2_per_capita.png")
+        save_json_result(result, "demo1c_result.json")
+    else:
+        print(f"   Error: {result.get('message', 'Unknown')}")
+
+
+# ╔═════════════════════════════════════════════════════════════════════════╗
+# ║  DEMO 2: Iterative Exploration (Multi-turn Workflow)                   ║
+# ╚═════════════════════════════════════════════════════════════════════════╝
+
+def demo_2_iterative_exploration():
+    """
+    Demonstrates the `explore_data` tool:
+      Input:  data + high-level question
+      Output: multiple rounds of analysis, each with data + chart + reasoning
+
+    The AI agent:
+      1. Breaks the question into sub-questions
+      2. For each sub-question: transforms data → creates chart → interprets result
+      3. Decides the next question based on findings
+      4. Presents a summary when exploration is complete
+    """
+    print_section("DEMO 2: Iterative Data Exploration")
+
+    from data_formulator.mcp_server import explore_data
+
+    # --- Example 2a: Explore world economy data ---
+    print("\n🔍 Example 2a: Explore world economy trends")
+    print("   Question: 'Explore the relationship between GDP growth, population, and CO2 emissions'")
+    print("   Max iterations: 3")
+    print("   (This may take a minute as the AI performs multiple analysis rounds...)\n")
+
+    result = explore_data(
+        data=SAMPLE_CSV,
+        question="Explore the relationship between GDP growth, population, and CO2 emissions across countries. What patterns emerge?",
+        data_format="csv",
+        table_name="world_economy",
+        max_iterations=3,
+    )
+
+    print(f"   Status: {result['status']}")
+    print(f"   Total steps completed: {result['total_steps']}")
+
+    if result["status"] == "ok":
+        print(f"\n   📋 Summary:\n   {result['summary']}")
+
+        for step in result["steps"]:
+            i = step["iteration"]
+            print(f"\n   --- Step {i} ---")
+            print(f"   Question: {step['question']}")
+            if step.get("status") == "ok":
+                print(f"   Chart type: {step['chart_type']}")
+                print(f"   Encodings: {step['chart_encodings']}")
+                print(f"   Data rows: {step.get('transformed_data_full_count', 'N/A')}")
+                save_chart(step.get("chart_image_base64"), f"demo2a_step{i}.png")
+            else:
+                print(f"   Error: {step.get('message', 'Unknown')}")
+
+        save_json_result(result, "demo2a_exploration.json")
+
+    # --- Example 2b: Explore student performance ---
+    print("\n\n🔍 Example 2b: Explore student performance patterns")
+    print("   Question: 'Analyze student performance across subjects and identify strengths/weaknesses'")
+    print("   Max iterations: 3\n")
+
+    result = explore_data(
+        data=SAMPLE_JSON,
+        question="Analyze student performance across subjects. Which subjects are hardest? Do grades correlate with specific subjects?",
+        data_format="json",
+        table_name="student_scores",
+        max_iterations=3,
+    )
+
+    print(f"   Status: {result['status']}")
+    print(f"   Total steps: {result['total_steps']}")
+
+    if result["status"] == "ok":
+        print(f"\n   📋 Summary:\n   {result['summary']}")
+
+        for step in result["steps"]:
+            i = step["iteration"]
+            print(f"\n   --- Step {i} ---")
+            print(f"   Question: {step['question']}")
+            if step.get("status") == "ok":
+                print(f"   Chart type: {step['chart_type']}")
+                print(f"   Summary: {step.get('instruction_summary', '')}")
+                save_chart(step.get("chart_image_base64"), f"demo2b_step{i}.png")
+
+        save_json_result(result, "demo2b_exploration.json")
+
+
+# ╔═════════════════════════════════════════════════════════════════════════╗
+# ║  DEMO 3: MCP Client over stdio (full MCP protocol)                    ║
+# ╚═════════════════════════════════════════════════════════════════════════╝
+
+async def demo_3_mcp_client():
+    """
+    Demonstrates connecting to the Data Formulator MCP server as a proper
+    MCP client over stdio transport.
+
+    This is how real MCP hosts (Claude Desktop, VS Code Copilot, etc.) would
+    connect to the server.
+    """
+    print_section("DEMO 3: MCP Client over stdio")
+
+    try:
+        from mcp import ClientSession, StdioServerParameters
+        from mcp.client.stdio import stdio_client
+    except ImportError:
+        print("  ⚠  MCP client SDK not installed. Install with: pip install mcp")
+        print("  Skipping Demo 3.")
+        return
+
+    # The server script to run
+    server_script = str(PROJECT_ROOT / "py-src" / "data_formulator" / "mcp_server.py")
+
+    server_params = StdioServerParameters(
+        command=sys.executable,
+        args=[server_script],
+        env={
+            **os.environ,  # inherit env (API keys, etc.)
+            "PYTHONPATH": str(PROJECT_ROOT / "py-src"),
+        },
+    )
+
+    print("  🔌 Connecting to Data Formulator MCP server...")
+
+    async with stdio_client(server_params) as (read, write):
+        async with ClientSession(read, write) as session:
+            await session.initialize()
+
+            # List available tools
+            tools = await session.list_tools()
+            print(f"  📦 Available tools: {[t.name for t in tools.tools]}")
+
+            # --- Call visualize_data via MCP ---
+            print("\n  📊 Calling visualize_data via MCP protocol...")
+
+            viz_result = await session.call_tool(
+                "visualize_data",
+                arguments={
+                    "data": SAMPLE_CSV,
+                    "instruction": "Show GDP per capita trends over time for each country",
+                    "data_format": "csv",
+                    "table_name": "world_economy",
+                },
+            )
+
+            # MCP returns content as TextContent or other content types
+            for content in viz_result.content:
+                if hasattr(content, "text"):
+                    result = json.loads(content.text)
+                    print(f"  Status: {result.get('status')}")
+                    if result.get("status") == "ok":
+                        print(f"  Summary: {result.get('instruction_summary')}")
+                        print(f"  Chart type: {result.get('chart_type')}")
+                        save_chart(result.get("chart_image_base64"), "demo3_mcp_viz.png")
+                        save_json_result(result, "demo3_mcp_viz.json")
+
+            # --- Call explore_data via MCP ---
+            print("\n  🔍 Calling explore_data via MCP protocol...")
+            print("  (This may take a minute...)")
+
+            explore_result = await session.call_tool(
+                "explore_data",
+                arguments={
+                    "data": SAMPLE_CSV,
+                    "question": "What are the key economic trends across countries?",
+                    "data_format": "csv",
+                    "table_name": "world_economy",
+                    "max_iterations": 2,
+                },
+            )
+
+            for content in explore_result.content:
+                if hasattr(content, "text"):
+                    result = json.loads(content.text)
+                    print(f"  Status: {result.get('status')}")
+                    print(f"  Steps: {result.get('total_steps')}")
+                    if result.get("status") == "ok":
+                        for step in result.get("steps", []):
+                            i = step["iteration"]
+                            print(f"    Step {i}: {step.get('instruction_summary', step.get('question'))}")
+                            save_chart(step.get("chart_image_base64"), f"demo3_mcp_explore_step{i}.png")
+                        save_json_result(result, "demo3_mcp_explore.json")
+
+    print("\n  ✅ MCP client demo complete!")
+
+
+# ╔═════════════════════════════════════════════════════════════════════════╗
+# ║  MAIN                                                                  ║
+# ╚═════════════════════════════════════════════════════════════════════════╝
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Demo: Data Formulator as an MCP Server",
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog="""
+Examples:
+  python test_mcp_server.py              # Run demos 1 & 2 (direct invocation)
+  python test_mcp_server.py --demo 1     # One-shot visualization only
+  python test_mcp_server.py --demo 2     # Iterative exploration only
+  python test_mcp_server.py --demo 3     # MCP client over stdio
+  python test_mcp_server.py --demo all   # Run all demos including MCP client
+        """,
+    )
+    parser.add_argument(
+        "--demo",
+        choices=["1", "2", "3", "all"],
+        default=None,
+        help="Which demo to run (default: 1 and 2)",
+    )
+    args = parser.parse_args()
+
+    print("🚀 Data Formulator MCP Server Demo")
+    print(f"   Output directory: {OUTPUT_DIR}")
+    print(f"   Model endpoint:  {os.getenv('DF_MCP_MODEL_ENDPOINT', 'openai')}")
+    print(f"   Model name:      {os.getenv('DF_MCP_MODEL_NAME', 'gpt-4o')}")
+
+    # Check for API key
+    endpoint = os.getenv("DF_MCP_MODEL_ENDPOINT", "openai")
+    api_key = os.getenv("DF_MCP_API_KEY", os.getenv(f"{endpoint.upper()}_API_KEY", ""))
+    if not api_key:
+        print(f"\n⚠️  No API key found! Set one of:")
+        print(f"   export DF_MCP_API_KEY='your-key'")
+        print(f"   export {endpoint.upper()}_API_KEY='your-key'")
+        print(f"   (or set them in api-keys.env)")
+        sys.exit(1)
+
+    if args.demo == "1":
+        demo_1_one_shot_visualization()
+    elif args.demo == "2":
+        demo_2_iterative_exploration()
+    elif args.demo == "3":
+        asyncio.run(demo_3_mcp_client())
+    elif args.demo == "all":
+        demo_1_one_shot_visualization()
+        demo_2_iterative_exploration()
+        asyncio.run(demo_3_mcp_client())
+    else:
+        # Default: run demos 1 and 2
+        demo_1_one_shot_visualization()
+        demo_2_iterative_exploration()
+
+    print("\n" + "=" * 70)
+    print(f"  ✅ Demo complete! Check outputs in: {OUTPUT_DIR}")
+    print("=" * 70)
+
+
+if __name__ == "__main__":
+    main()

From af95c28a1b2ffa07e36ee1727f01a7666c6c410f Mon Sep 17 00:00:00 2001
From: Chenglong Wang <chenglong.wang@microsoft.com>
Date: Tue, 10 Feb 2026 13:40:08 -0800
Subject: [PATCH 4/6] updates

---
 py-src/data_formulator/datalake/workspace.py | 133 +++++----
 py-src/data_formulator/mcp_server.py         | 293 +++++++++++++++----
 py-src/tests/test_mcp_server.py              | 213 +++++++-------
 pyproject.toml                               |   1 +
 uv.lock                                      |   2 +
 5 files changed, 424 insertions(+), 218 deletions(-)

diff --git a/py-src/data_formulator/datalake/workspace.py b/py-src/data_formulator/datalake/workspace.py
index 776fdd3f..75a5cd33 100644
--- a/py-src/data_formulator/datalake/workspace.py
+++ b/py-src/data_formulator/datalake/workspace.py
@@ -579,77 +579,112 @@ class WorkspaceWithTempData:
     Context manager that temporarily adds temp data (list of {name, rows}) to a workspace
     as parquet files, yields the same workspace, and removes those files on exit.
 
-    OPTIMIZATION: Temp files are written directly to disk WITHOUT metadata updates.
-    This eliminates metadata file locking contention when multiple temp tables are
-    created concurrently. Since temp files are ephemeral (exist only during the context),
-    they don't need to be tracked in workspace.yaml.
-
-    Python code can still access temp files via relative paths (e.g., pd.read_parquet())
-    because the sandbox execution runs with workspace._path as the current working directory.
-
-    Use when the client sends in-memory data (e.g. language == "python"): wrap the
-    workspace so temp tables are visible for the block and then cleaned up.
+    Two modes controlled by ``register_metadata``:
+
+    **register_metadata=False (default)**
+        Temp files are written directly to disk WITHOUT metadata updates.
+        This eliminates metadata file locking contention when multiple temp tables
+        are created concurrently (the Flask/web-app path).  Files use a ``.temp_``
+        prefix so they can be identified for crash-recovery cleanup.
+        Python code can still access them via relative paths because the sandbox
+        runs with ``workspace._path`` as the working directory.
+
+    **register_metadata=True**
+        Files are written with their plain sanitised name **and** registered in
+        ``workspace.yaml`` via ``add_table_metadata``.  On exit they are removed
+        with ``delete_table`` (which cleans up both the file and the metadata
+        entry).  Use this when downstream code needs ``read_data_as_df`` /
+        ``generate_data_summary`` to resolve tables by name – e.g. the MCP server.
     """
 
-    def __init__(self, workspace: Workspace, temp_data: Optional[list[dict[str, Any]]] = None):
+    def __init__(
+        self,
+        workspace: Workspace,
+        temp_data: Optional[list[dict[str, Any]]] = None,
+        register_metadata: bool = False,
+    ):
         self._workspace = workspace
         self._temp_data = temp_data if temp_data else None
-        self._temp_files: list[Path] = []  # Track file paths for cleanup (not table names)
+        self._register_metadata = register_metadata
+        # When register_metadata=False we track file *paths* for cleanup.
+        # When register_metadata=True  we track table *names* for delete_table().
+        self._temp_files: list[Path] = []
+        self._temp_table_names: list[str] = []
 
     def __enter__(self) -> Workspace:
         if not self._temp_data:
             return self._workspace
 
+        from datetime import datetime
         from data_formulator.datalake.parquet_utils import sanitize_table_name
 
         for item in self._temp_data:
             base_name = item.get("name", "table")
             safe_name = sanitize_table_name(base_name)
 
-            # Use .temp_ prefix to distinguish from persistent tables
-            # This also helps with crash recovery - stale temp files can be identified and cleaned up
-            temp_filename = f".temp_{safe_name}.parquet"
-            file_path = self._workspace._path / temp_filename
-
-            # Handle name conflicts by checking filesystem directly (no metadata read needed)
-            counter = 1
-            while file_path.exists():
-                temp_filename = f".temp_{safe_name}_{counter}.parquet"
-                file_path = self._workspace._path / temp_filename
-                counter += 1
-
-            # CRITICAL: Write parquet directly - NO metadata update
-            # This is the key optimization that eliminates metadata file locking contention.
-            # Temp files don't need metadata tracking since they're ephemeral and only
-            # live for the duration of this context.
-            #
-            # Python code can still access them via relative paths since the sandbox
-            # runs with workspace._path as cwd, e.g.:
-            #   pd.read_parquet('.temp_sales.parquet')
-            #   conn.execute("SELECT * FROM read_parquet('.temp_sales.parquet')")
             rows = item.get("rows", [])
             df = pd.DataFrame(rows) if rows else pd.DataFrame()
-            df.to_parquet(file_path)
 
-            self._temp_files.append(file_path)
-            logger.debug(
-                f"Added temp file {file_path.name} to workspace "
-                f"({len(df)} rows, no metadata update)"
-            )
+            if self._register_metadata:
+                # ---- metadata-aware path ----
+                filename = f"{safe_name}.parquet"
+                file_path = self._workspace._path / filename
+                df.to_parquet(file_path)
+
+                from data_formulator.datalake.metadata import TableMetadata
+
+                meta = TableMetadata(
+                    name=safe_name,
+                    source_type="upload",
+                    filename=filename,
+                    file_type="parquet",
+                    created_at=datetime.now(),
+                    row_count=len(df),
+                )
+                self._workspace.add_table_metadata(meta)
+                self._temp_table_names.append(safe_name)
+                logger.debug(
+                    f"Added table {safe_name} to workspace with metadata "
+                    f"({len(df)} rows)"
+                )
+            else:
+                # ---- fast path (no metadata) ----
+                temp_filename = f".temp_{safe_name}.parquet"
+                file_path = self._workspace._path / temp_filename
+
+                counter = 1
+                while file_path.exists():
+                    temp_filename = f".temp_{safe_name}_{counter}.parquet"
+                    file_path = self._workspace._path / temp_filename
+                    counter += 1
+
+                df.to_parquet(file_path)
+                self._temp_files.append(file_path)
+                logger.debug(
+                    f"Added temp file {file_path.name} to workspace "
+                    f"({len(df)} rows, no metadata update)"
+                )
 
         return self._workspace
 
     def __exit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None:
-        # Delete temp files directly - NO metadata update
-        # This is safe because we never added them to metadata in the first place
-        for file_path in self._temp_files:
-            try:
-                file_path.unlink(missing_ok=True)
-                logger.debug(f"Removed temp file {file_path.name}")
-            except Exception as e:
-                logger.warning(f"Failed to remove temp file {file_path}: {e}")
-
-        self._temp_files.clear()
+        if self._register_metadata:
+            # delete_table removes both the parquet file and the metadata entry
+            for name in self._temp_table_names:
+                try:
+                    self._workspace.delete_table(name)
+                    logger.debug(f"Deleted table {name} (file + metadata)")
+                except Exception as e:
+                    logger.warning(f"Failed to delete table {name}: {e}")
+            self._temp_table_names.clear()
+        else:
+            for file_path in self._temp_files:
+                try:
+                    file_path.unlink(missing_ok=True)
+                    logger.debug(f"Removed temp file {file_path.name}")
+                except Exception as e:
+                    logger.warning(f"Failed to remove temp file {file_path}: {e}")
+            self._temp_files.clear()
 
 
 # ==============================================================================
diff --git a/py-src/data_formulator/mcp_server.py b/py-src/data_formulator/mcp_server.py
index d1501ed0..72c26ae8 100644
--- a/py-src/data_formulator/mcp_server.py
+++ b/py-src/data_formulator/mcp_server.py
@@ -7,8 +7,10 @@
 Exposes Data Formulator's AI-powered data visualization capabilities
 as an MCP (Model Context Protocol) server with the following tools:
 
-1. visualize_data: Given data + instruction → transformed data + chart (PNG)
-2. explore_data: Multi-turn iterative exploration → rounds of response + data + chart
+1. list_demo_data: List predefined demo datasets with URLs
+2. visualize_data: Given data URLs + instruction → transformed data + chart (PNG)
+3. explore_data: Multi-turn iterative exploration → rounds of response + data + chart
+4. create_chart: Create a chart directly from data URLs + field mappings
 
 Setup:
     # Install with uv (recommended)
@@ -27,6 +29,27 @@
     uv run py-src/data_formulator/mcp_server.py
 
 Configure in Claude Desktop (claude_desktop_config.json):
+
+    Azure OpenAI with Azure AD auth (recommended for Microsoft users):
+    {
+      "mcpServers": {
+        "data-formulator": {
+          "command": "uv",
+          "args": [
+            "--directory", "/path/to/data-formulator",
+            "run", "python", "-m", "data_formulator.mcp_server"
+          ],
+          "env": {
+            "DF_MCP_MODEL_ENDPOINT": "azure",
+            "DF_MCP_MODEL_NAME": "gpt-4o",
+            "DF_MCP_API_BASE": "https://YOUR_RESOURCE.openai.azure.com/",
+            "DF_MCP_API_VERSION": "2025-04-01-preview"
+          }
+        }
+      }
+    }
+
+    OpenAI (with API key):
     {
       "mcpServers": {
         "data-formulator": {
@@ -45,6 +68,29 @@
     }
 
 Configure in VS Code (settings.json):
+
+    Azure OpenAI with Azure AD auth (recommended for Microsoft users):
+    {
+      "mcp": {
+        "servers": {
+          "data-formulator": {
+            "command": "uv",
+            "args": [
+              "--directory", "/path/to/data-formulator",
+              "run", "python", "-m", "data_formulator.mcp_server"
+            ],
+            "env": {
+              "DF_MCP_MODEL_ENDPOINT": "azure",
+              "DF_MCP_MODEL_NAME": "gpt-4o",
+              "DF_MCP_API_BASE": "https://YOUR_RESOURCE.openai.azure.com/",
+              "DF_MCP_API_VERSION": "2025-04-01-preview"
+            }
+          }
+        }
+      }
+    }
+
+    OpenAI (with API key):
     {
       "mcp": {
         "servers": {
@@ -65,12 +111,26 @@
     }
 
 Environment variables:
-    OPENAI_API_KEY / ANTHROPIC_API_KEY / etc. - API keys for LLM providers
-    DF_MCP_MODEL_ENDPOINT - LLM provider (default: "openai")
+    DF_MCP_MODEL_ENDPOINT - LLM provider: "azure" | "openai" | "anthropic" | "gemini" | "ollama"
+                            (default: "azure")
     DF_MCP_MODEL_NAME     - Model name (default: "gpt-4o")
-    DF_MCP_API_KEY        - API key (overrides provider-specific key)
-    DF_MCP_API_BASE       - Custom API base URL (optional)
+    DF_MCP_API_BASE       - API base URL (required for azure, e.g. "https://YOUR_RESOURCE.openai.azure.com/")
+    DF_MCP_API_VERSION    - API version for Azure (default: "2025-04-01-preview")
+    DF_MCP_API_KEY        - API key (optional for Azure AD auth; required for OpenAI/Anthropic)
+    OPENAI_API_KEY        - Fallback API key for OpenAI endpoint
+    ANTHROPIC_API_KEY     - Fallback API key for Anthropic endpoint
     DATALAKE_ROOT         - Workspace root directory (optional)
+
+    Azure AD auth (no API key needed):
+        When using DF_MCP_MODEL_ENDPOINT=azure with no API key set, the server
+        automatically uses DefaultAzureCredential for token-based auth.
+        Make sure you are logged in via `az login` or have a managed identity.
+
+    Other providers:
+        export DF_MCP_MODEL_ENDPOINT="openai"   && export OPENAI_API_KEY="sk-..."
+        export DF_MCP_MODEL_ENDPOINT="anthropic" && export ANTHROPIC_API_KEY="sk-ant-..."
+        export DF_MCP_MODEL_ENDPOINT="gemini"   && export GEMINI_API_KEY="..."
+        export DF_MCP_MODEL_ENDPOINT="ollama"    # no key needed, runs locally
 """
 
 import os
@@ -79,10 +139,13 @@
 import base64
 import logging
 import tempfile
+from io import StringIO, BytesIO
 from pathlib import Path
 from typing import Any
+from urllib.parse import urlparse
 
 import pandas as pd
+import requests
 
 from dotenv import load_dotenv
 
@@ -104,6 +167,7 @@
     create_chart_spec,
 )
 from data_formulator.workflows.exploration_flow import create_chart_spec_from_data
+from data_formulator.example_datasets_config import EXAMPLE_DATASETS
 
 logger = logging.getLogger(__name__)
 
@@ -113,7 +177,7 @@
 
 def _get_model_config() -> dict[str, str]:
     """Build model config from environment variables."""
-    endpoint = os.getenv("DF_MCP_MODEL_ENDPOINT", "openai")
+    endpoint = os.getenv("DF_MCP_MODEL_ENDPOINT", "azure")
     model = os.getenv("DF_MCP_MODEL_NAME", "gpt-4o")
 
     # Resolve API key: explicit > provider-specific
@@ -143,32 +207,83 @@ def _get_workspace(session_id: str = "mcp_session") -> Workspace:
     return Workspace(session_id)
 
 
-def _parse_data_input(data: str, data_format: str = "auto") -> pd.DataFrame:
+def _detect_format_from_url(url: str) -> str:
+    """Detect data format from URL file extension."""
+    path = urlparse(url).path.lower()
+    if path.endswith(".csv"):
+        return "csv"
+    elif path.endswith(".tsv"):
+        return "tsv"
+    elif path.endswith(".json"):
+        return "json"
+    elif path.endswith(".jsonl"):
+        return "jsonl"
+    elif path.endswith(".xlsx") or path.endswith(".xls"):
+        return "xlsx"
+    return "csv"  # default to CSV
+
+
+def _load_data_from_url(url: str, data_format: str = "auto") -> pd.DataFrame:
     """
-    Parse data from a string (JSON or CSV) into a DataFrame.
+    Download and parse tabular data from a URL.
+
+    Supported formats: csv, tsv, json, jsonl, xlsx.
+    If data_format is "auto", the format is detected from the URL extension.
 
     Args:
-        data: Raw data string (JSON array or CSV text)
-        data_format: "json", "csv", or "auto" (detect automatically)
+        url: URL pointing to a data file (csv, tsv, json, jsonl, or xlsx).
+        data_format: "csv", "tsv", "json", "jsonl", "xlsx", or "auto".
 
     Returns:
         pandas DataFrame
     """
     if data_format == "auto":
-        stripped = data.strip()
-        if stripped.startswith("[") or stripped.startswith("{"):
-            data_format = "json"
-        else:
-            data_format = "csv"
+        data_format = _detect_format_from_url(url)
+
+    resp = requests.get(url, timeout=60)
+    resp.raise_for_status()
 
     if data_format == "json":
-        parsed = json.loads(data)
+        parsed = resp.json()
         if isinstance(parsed, dict):
             parsed = [parsed]
         return pd.DataFrame(parsed)
-    else:
-        from io import StringIO
-        return pd.read_csv(StringIO(data))
+    elif data_format == "jsonl":
+        lines = resp.text.strip().split("\n")
+        records = [json.loads(line) for line in lines if line.strip()]
+        return pd.DataFrame(records)
+    elif data_format == "tsv":
+        return pd.read_csv(StringIO(resp.text), sep="\t")
+    elif data_format in ("xlsx", "xls"):
+        return pd.read_excel(BytesIO(resp.content))
+    else:  # csv
+        return pd.read_csv(StringIO(resp.text))
+
+
+def _load_multiple_urls(data_urls: list[str], table_names: list[str] | None = None) -> list[dict]:
+    """
+    Load multiple data URLs and return a list of table dicts.
+
+    Args:
+        data_urls: List of URLs to load.
+        table_names: Optional list of names for each table.
+                     If not provided, names are derived from the URL filename.
+
+    Returns:
+        List of {"name": str, "rows": list[dict]} dicts.
+    """
+    tables = []
+    for i, url in enumerate(data_urls):
+        df = _load_data_from_url(url)
+        rows = json.loads(df.to_json(orient="records", date_format="iso"))
+        if table_names and i < len(table_names):
+            name = table_names[i]
+        else:
+            # Derive name from URL filename (strip extension)
+            filename = urlparse(url).path.split("/")[-1]
+            name = filename.rsplit(".", 1)[0] if "." in filename else filename
+        tables.append({"name": name, "rows": rows})
+    return tables
 
 
 def _make_chart_image(
@@ -196,25 +311,76 @@ def _make_chart_image(
     return None
 
 
+
+
+
 # ---------------------------------------------------------------------------
 # MCP Server
 # ---------------------------------------------------------------------------
 
 mcp = FastMCP(
     "Data Formulator",
-    description=(
+    instructions=(
         "AI-powered data visualization server. "
-        "Transform data, generate charts, and explore datasets interactively."
+        "Transform data, generate charts, and explore datasets interactively. "
+        "Use list_demo_data to browse available demo datasets, then pass their "
+        "URLs to visualize_data, explore_data, or create_chart."
     ),
 )
 
 
+@mcp.tool()
+def list_demo_data() -> dict[str, Any]:
+    """
+    List predefined demo datasets available for visualization and exploration.
+
+    Returns a curated list of datasets with their URLs, formats, descriptions,
+    and sample data. Use the returned URLs as input to visualize_data,
+    explore_data, or create_chart.
+
+    Returns:
+        A dictionary with:
+        - status: "ok"
+        - datasets: List of dataset entries, each containing:
+            - name: Human-readable dataset name
+            - source: Data source (e.g. "vegadatasets", "tidytuesday")
+            - description: Short description of the dataset
+            - tables: List of tables, each with:
+                - url: URL to download the data file
+                - format: File format ("csv", "json", etc.)
+                - sample: A few sample rows (string or list) to preview the data
+    """
+    datasets = []
+    for ds in EXAMPLE_DATASETS:
+        entry = {
+            "name": ds["name"],
+            "source": ds.get("source", ""),
+            "description": ds.get("description", ""),
+            "tables": [],
+        }
+        for table in ds.get("tables", []):
+            t = {
+                "url": table["url"],
+                "format": table.get("format", "csv"),
+            }
+            # Include a short sample preview
+            sample = table.get("sample", "")
+            if isinstance(sample, list):
+                t["sample"] = sample[:5]  # first 5 rows
+            elif isinstance(sample, str):
+                lines = sample.strip().split("\n")
+                t["sample"] = "\n".join(lines[:6])  # header + 5 rows
+            entry["tables"].append(t)
+        datasets.append(entry)
+
+    return {"status": "ok", "datasets": datasets}
+
+
 @mcp.tool()
 def visualize_data(
-    data: str,
+    data_urls: list[str],
     instruction: str,
-    data_format: str = "auto",
-    table_name: str = "input_data",
+    table_names: list[str] | None = None,
     chart_type: str = "",
     x: str = "",
     y: str = "",
@@ -226,10 +392,13 @@ def visualize_data(
     """
     Transform data and generate a visualization based on a natural language instruction.
 
-    Given tabular data (JSON or CSV) and a natural language instruction, this tool:
-    1. Uses an AI agent to understand the intent and generate transformation code
-    2. Executes the transformation to produce the output data
-    3. Creates a chart (PNG) from the transformed data
+    Given one or more data URLs and a natural language instruction, this tool:
+    1. Downloads the data from the URLs (supports csv, tsv, json, jsonl, xlsx)
+    2. Uses an AI agent to understand the intent and generate transformation code
+    3. Executes the transformation to produce the output data
+    4. Creates a chart (PNG) from the transformed data
+
+    Use list_demo_data to discover available demo datasets and their URLs.
 
     Use this for one-shot data analysis tasks like:
     - "Show average sales by region as a bar chart"
@@ -237,10 +406,11 @@ def visualize_data(
     - "Forecast the next 6 months of revenue"
 
     Args:
-        data: Tabular data as a JSON array of objects or CSV text.
+        data_urls: List of URLs pointing to data files (csv, tsv, json, jsonl, xlsx).
+                   The format is auto-detected from the file extension.
         instruction: Natural language description of what visualization to create.
-        data_format: "json", "csv", or "auto" (default: auto-detect).
-        table_name: Name for the input table (default: "input_data").
+        table_names: Optional list of names for each table (one per URL).
+                     If not provided, names are derived from the URL filename.
         chart_type: Optional chart type hint ("bar", "point", "line", "area", "heatmap",
                      "group_bar", "boxplot", "worldmap", "usmap"). Leave empty to let the AI decide.
         x: Optional field name for x-axis encoding.
@@ -263,11 +433,8 @@ def visualize_data(
         - reasoning: The AI's reasoning about the transformation
     """
     try:
-        # Parse input data
-        df = _parse_data_input(data, data_format)
-        rows = json.loads(df.to_json(orient="records", date_format="iso"))
-
-        input_tables = [{"name": table_name, "rows": rows}]
+        # Load data from URLs
+        input_tables = _load_multiple_urls(data_urls, table_names)
 
         # Build chart encodings from optional hints
         chart_encodings = {}
@@ -283,9 +450,9 @@ def visualize_data(
         # Set up workspace + agent
         client = _get_client()
         workspace = _get_workspace()
-        temp_data = [{"name": table_name, "rows": rows}]
 
-        with WorkspaceWithTempData(workspace, temp_data) as ws:
+        # Use register_metadata=True so agents can resolve tables via read_data_as_df
+        with WorkspaceWithTempData(workspace, input_tables, register_metadata=True) as ws:
             if mode == "recommendation":
                 agent = DataRecAgent(client=client, workspace=ws)
                 results = agent.run(input_tables, instruction, n=1)
@@ -358,20 +525,22 @@ def visualize_data(
 
 @mcp.tool()
 def explore_data(
-    data: str,
+    data_urls: list[str],
     question: str,
-    data_format: str = "auto",
-    table_name: str = "input_data",
+    table_names: list[str] | None = None,
     max_iterations: int = 3,
     max_repair_attempts: int = 1,
 ) -> dict[str, Any]:
     """
     Iteratively explore a dataset through multiple rounds of AI-driven analysis.
 
-    Given tabular data and a high-level exploration question, this tool:
-    1. Breaks the question into a multi-step analysis plan
-    2. For each step: transforms data, creates a chart, and decides the next step
-    3. Returns all exploration steps with their data and charts
+    Given one or more data URLs and a high-level exploration question, this tool:
+    1. Downloads the data from the URLs (supports csv, tsv, json, jsonl, xlsx)
+    2. Breaks the question into a multi-step analysis plan
+    3. For each step: transforms data, creates a chart, and decides the next step
+    4. Returns all exploration steps with their data and charts
+
+    Use list_demo_data to discover available demo datasets and their URLs.
 
     Use this for open-ended data exploration like:
     - "What are the key trends and patterns in this sales data?"
@@ -379,10 +548,11 @@ def explore_data(
     - "Analyze the relationship between weather and energy consumption"
 
     Args:
-        data: Tabular data as a JSON array of objects or CSV text.
+        data_urls: List of URLs pointing to data files (csv, tsv, json, jsonl, xlsx).
+                   The format is auto-detected from the file extension.
         question: High-level exploration question or topic.
-        data_format: "json", "csv", or "auto" (default: auto-detect).
-        table_name: Name for the input table (default: "input_data").
+        table_names: Optional list of names for each table (one per URL).
+                     If not provided, names are derived from the URL filename.
         max_iterations: Maximum number of exploration rounds (default: 3).
         max_repair_attempts: Max code repair retries per step (default: 1).
 
@@ -403,19 +573,15 @@ def explore_data(
         - total_steps: Number of steps completed
     """
     try:
-        # Parse input data
-        df = _parse_data_input(data, data_format)
-        rows = json.loads(df.to_json(orient="records", date_format="iso"))
-
-        input_tables = [{"name": table_name, "rows": rows}]
+        # Load data from URLs
+        input_tables = _load_multiple_urls(data_urls, table_names)
 
         client = _get_client()
         workspace = _get_workspace()
-        temp_data = [{"name": table_name, "rows": rows}]
-
         steps = []
 
-        with WorkspaceWithTempData(workspace, temp_data) as ws:
+        # Use register_metadata=True so agents can resolve tables via read_data_as_df
+        with WorkspaceWithTempData(workspace, input_tables, register_metadata=True) as ws:
             rec_agent = DataRecAgent(client=client, workspace=ws)
             exploration_agent = ExplorationAgent(client=client, workspace=ws)
 
@@ -534,7 +700,6 @@ def explore_data(
                 except Exception as e:
                     logger.warning(f"Exploration planning failed: {e}")
                     break
-
         # Build summary
         summary_parts = []
         for s in steps:
@@ -556,23 +721,24 @@ def explore_data(
 
 @mcp.tool()
 def create_chart(
-    data: str,
+    data_url: str,
     chart_type: str,
     x: str = "",
     y: str = "",
     color: str = "",
     size: str = "",
     facet: str = "",
-    data_format: str = "auto",
 ) -> dict[str, Any]:
     """
-    Create a chart directly from data and field mappings (no AI, no transformation).
+    Create a chart directly from a data URL and field mappings (no AI, no transformation).
 
     This is a fast, deterministic tool for creating standard charts when you already
     know exactly which fields to use and how to map them.
 
+    Use list_demo_data to discover available demo datasets and their URLs.
+
     Args:
-        data: Tabular data as a JSON array of objects or CSV text.
+        data_url: URL pointing to a data file (csv, tsv, json, jsonl, xlsx).
         chart_type: One of "bar", "point", "line", "area", "heatmap",
                     "group_bar", "boxplot".
         x: Field name for x-axis.
@@ -580,7 +746,6 @@ def create_chart(
         color: Optional field name for color encoding.
         size: Optional field name for size encoding.
         facet: Optional field name for faceting.
-        data_format: "json", "csv", or "auto".
 
     Returns:
         A dictionary with:
@@ -590,7 +755,7 @@ def create_chart(
         - fields_used: List of fields mapped to channels
     """
     try:
-        df = _parse_data_input(data, data_format)
+        df = _load_data_from_url(data_url)
 
         # Build encoding dict
         fields = []
diff --git a/py-src/tests/test_mcp_server.py b/py-src/tests/test_mcp_server.py
index 27071c98..b212da8d 100644
--- a/py-src/tests/test_mcp_server.py
+++ b/py-src/tests/test_mcp_server.py
@@ -18,12 +18,28 @@
     # Install dependencies with uv (from project root):
     uv pip install -e ".[mcp]"
     # or:
-    uv pip install mcp pandas vl-convert-python
+    uv pip install mcp pandas vl-convert-python requests
 
-    # Set your LLM API key:
-    export OPENAI_API_KEY="sk-..."          # or ANTHROPIC_API_KEY, etc.
-    export DF_MCP_MODEL_ENDPOINT="openai"   # openai | anthropic | azure | gemini | ollama
-    export DF_MCP_MODEL_NAME="gpt-4o"       # model name
+    # Azure OpenAI with Azure AD auth (recommended for Microsoft users):
+    #   No API key needed — uses DefaultAzureCredential (az login).
+    export DF_MCP_MODEL_ENDPOINT="azure"
+    export DF_MCP_MODEL_NAME="gpt-4o"
+    export DF_MCP_API_BASE="https://YOUR_RESOURCE.openai.azure.com/"
+    export DF_MCP_API_VERSION="2025-04-01-preview"   # optional, has default
+
+    # Alternative: OpenAI
+    #   export DF_MCP_MODEL_ENDPOINT="openai"
+    #   export DF_MCP_MODEL_NAME="gpt-4o"
+    #   export OPENAI_API_KEY="sk-..."
+
+    # Alternative: Anthropic
+    #   export DF_MCP_MODEL_ENDPOINT="anthropic"
+    #   export DF_MCP_MODEL_NAME="claude-sonnet-4-20250514"
+    #   export ANTHROPIC_API_KEY="sk-ant-..."
+
+    # Alternative: Ollama (local, no key)
+    #   export DF_MCP_MODEL_ENDPOINT="ollama"
+    #   export DF_MCP_MODEL_NAME="llama3"
 
 Usage:
     # Run all demos with uv (recommended):
@@ -58,53 +74,17 @@
 
 
 # ╔═════════════════════════════════════════════════════════════════════════╗
-# ║                         SAMPLE DATA                                    ║
+# ║                   DEMO DATA URLS                                       ║
 # ╚═════════════════════════════════════════════════════════════════════════╝
 
-SAMPLE_CSV = """Country,Year,GDP_Billion,Population_Million,CO2_Emission_MT
-United States,2018,20580,327,5280
-United States,2019,21430,329,5130
-United States,2020,20940,331,4570
-United States,2021,23320,332,5010
-United States,2022,25460,333,5060
-China,2018,13890,1393,10060
-China,2019,14280,1398,10170
-China,2020,14720,1402,10670
-China,2021,17730,1405,11470
-China,2022,17960,1406,11400
-Germany,2018,3970,83,759
-Germany,2019,3890,83,702
-Germany,2020,3890,83,644
-Germany,2021,4220,83,675
-Germany,2022,4070,84,666
-India,2018,2710,1353,2480
-India,2019,2870,1366,2600
-India,2020,2660,1380,2440
-India,2021,3180,1393,2710
-India,2022,3390,1407,2830
-Japan,2018,4970,126,1160
-Japan,2019,5080,126,1140
-Japan,2020,5040,126,1060
-Japan,2021,4940,125,1070
-Japan,2022,4230,125,1050
-Brazil,2018,1870,210,460
-Brazil,2019,1870,211,470
-Brazil,2020,1440,212,440
-Brazil,2021,1650,213,490
-Brazil,2022,1920,214,490"""
-
-SAMPLE_JSON = json.dumps([
-    {"Student": "Alice",   "Math": 92, "Science": 88, "English": 95, "History": 78, "Grade": "A"},
-    {"Student": "Bob",     "Math": 76, "Science": 82, "English": 71, "History": 89, "Grade": "B"},
-    {"Student": "Charlie", "Math": 88, "Science": 91, "English": 84, "History": 92, "Grade": "A"},
-    {"Student": "Diana",   "Math": 65, "Science": 70, "English": 90, "History": 85, "Grade": "B"},
-    {"Student": "Eve",     "Math": 95, "Science": 97, "English": 92, "History": 88, "Grade": "A"},
-    {"Student": "Frank",   "Math": 58, "Science": 62, "English": 68, "History": 72, "Grade": "C"},
-    {"Student": "Grace",   "Math": 84, "Science": 79, "English": 88, "History": 91, "Grade": "B"},
-    {"Student": "Henry",   "Math": 91, "Science": 85, "English": 79, "History": 83, "Grade": "A"},
-    {"Student": "Iris",    "Math": 73, "Science": 68, "English": 82, "History": 76, "Grade": "B"},
-    {"Student": "Jack",    "Math": 87, "Science": 93, "English": 86, "History": 80, "Grade": "A"},
-])
+# These URLs come from the predefined demo datasets (also available via
+# the list_demo_data MCP tool). You can use any publicly accessible URL
+# pointing to a csv, tsv, json, jsonl, or xlsx file.
+
+GAPMINDER_URL = "https://raw.githubusercontent.com/vega/vega-datasets/refs/heads/main/data/gapminder.json"
+DISASTERS_URL = "https://raw.githubusercontent.com/vega/vega-datasets/refs/heads/main/data/disasters.csv"
+LIFE_EXPECTANCY_URL = "https://raw.githubusercontent.com/rfordatascience/tidytuesday/refs/heads/main/data/2023/2023-12-05/life_expectancy.csv"
+MOVIES_URL = "https://raw.githubusercontent.com/rfordatascience/tidytuesday/refs/heads/main/data/2025/2025-07-29/movies.csv"
 
 
 # ╔═════════════════════════════════════════════════════════════════════════╗
@@ -147,23 +127,33 @@ def print_section(title: str) -> None:
 def demo_1_one_shot_visualization():
     """
     Demonstrates the `visualize_data` tool:
-      Input:  data (CSV or JSON) + natural language instruction
+      Input:  data URLs + natural language instruction
       Output: transformed data + chart image (PNG) + reasoning
     """
     print_section("DEMO 1: One-shot Data Visualization")
 
-    # Import the MCP tool function directly
-    from data_formulator.mcp_server import visualize_data
+    # Import the MCP tool functions directly
+    from data_formulator.mcp_server import visualize_data, list_demo_data
+
+    # --- Example 1.0: List demo datasets ---
+    print("\n📋 Example 1.0: List available demo datasets")
+    demo_data = list_demo_data()
+    print(f"   Found {len(demo_data['datasets'])} demo datasets:")
+    for ds in demo_data["datasets"]:
+        urls = [t["url"] for t in ds["tables"]]
+        print(f"   • {ds['name']}: {ds['description'][:60]}...")
+        for t in ds["tables"]:
+            print(f"     URL: {t['url'][:80]}... ({t['format']})")
 
-    # --- Example 1a: CSV data, let AI recommend a visualization ---
-    print("\n📊 Example 1a: GDP trends (CSV, AI-recommended chart)")
-    print("   Instruction: 'Show GDP trends over time for each country as a line chart'")
+    # --- Example 1a: Gapminder (JSON URL), let AI recommend a visualization ---
+    print("\n📊 Example 1a: Gapminder life expectancy trends (JSON URL)")
+    print(f"   URL: {GAPMINDER_URL}")
+    print("   Instruction: 'Show life expectancy trends over time for the top 5 most populous countries'")
 
     result = visualize_data(
-        data=SAMPLE_CSV,
-        instruction="Show GDP trends over time for each country as a line chart",
-        data_format="csv",
-        table_name="world_economy",
+        data_urls=[GAPMINDER_URL],
+        instruction="Show life expectancy trends over time for the top 5 most populous countries as a line chart",
+        table_names=["gapminder"],
     )
 
     print(f"   Status: {result['status']}")
@@ -173,20 +163,20 @@ def demo_1_one_shot_visualization():
         print(f"   Encodings: {result['chart_encodings']}")
         print(f"   Data rows: {result['transformed_data_full_count']}")
         print(f"   Code:\n{result['code'][:300]}...")
-        save_chart(result.get("chart_image_base64"), "demo1a_gdp_trends.png")
+        save_chart(result.get("chart_image_base64"), "demo1a_gapminder.png")
         save_json_result(result, "demo1a_result.json")
     else:
         print(f"   Error: {result.get('message', 'Unknown')}")
 
-    # --- Example 1b: JSON data, with encoding hints ---
-    print("\n📊 Example 1b: Student scores (JSON, with encoding hints)")
-    print("   Instruction: 'Compare students by their average score across all subjects'")
+    # --- Example 1b: Disasters (CSV URL) ---
+    print("\n📊 Example 1b: Natural disasters deaths over time (CSV URL)")
+    print(f"   URL: {DISASTERS_URL}")
+    print("   Instruction: 'Show total deaths by disaster type over time'")
 
     result = visualize_data(
-        data=SAMPLE_JSON,
-        instruction="Compare students by their average score across all subjects",
-        data_format="json",
-        table_name="student_scores",
+        data_urls=[DISASTERS_URL],
+        instruction="Show the total deaths by disaster entity for the top 5 deadliest disaster types as a bar chart",
+        table_names=["disasters"],
     )
 
     print(f"   Status: {result['status']}")
@@ -195,20 +185,20 @@ def demo_1_one_shot_visualization():
         print(f"   Chart type: {result['chart_type']}")
         print(f"   Encodings: {result['chart_encodings']}")
         print(f"   Output fields: {result['reasoning']['output_fields']}")
-        save_chart(result.get("chart_image_base64"), "demo1b_student_avg.png")
+        save_chart(result.get("chart_image_base64"), "demo1b_disasters.png")
         save_json_result(result, "demo1b_result.json")
     else:
         print(f"   Error: {result.get('message', 'Unknown')}")
 
-    # --- Example 1c: CO2 per capita analysis ---
-    print("\n📊 Example 1c: CO2 per capita (CSV, computed metric)")
-    print("   Instruction: 'Calculate CO2 emissions per capita and show as a grouped bar chart by country and year'")
+    # --- Example 1c: Netflix movies (CSV URL), computed metric ---
+    print("\n📊 Example 1c: Netflix most viewed movies (CSV URL)")
+    print(f"   URL: {MOVIES_URL}")
+    print("   Instruction: 'Show top 10 most viewed movies'")
 
     result = visualize_data(
-        data=SAMPLE_CSV,
-        instruction="Calculate CO2 emissions per capita (CO2 / Population) and show as a grouped bar chart by country for the latest year",
-        data_format="csv",
-        table_name="world_economy",
+        data_urls=[MOVIES_URL],
+        instruction="Show the top 10 most viewed movies as a horizontal bar chart sorted by views",
+        table_names=["netflix_movies"],
     )
 
     print(f"   Status: {result['status']}")
@@ -216,7 +206,7 @@ def demo_1_one_shot_visualization():
         print(f"   Summary: {result['instruction_summary']}")
         print(f"   Chart type: {result['chart_type']}")
         print(f"   Data preview: {result['transformed_data'][:3]}")
-        save_chart(result.get("chart_image_base64"), "demo1c_co2_per_capita.png")
+        save_chart(result.get("chart_image_base64"), "demo1c_netflix.png")
         save_json_result(result, "demo1c_result.json")
     else:
         print(f"   Error: {result.get('message', 'Unknown')}")
@@ -229,7 +219,7 @@ def demo_1_one_shot_visualization():
 def demo_2_iterative_exploration():
     """
     Demonstrates the `explore_data` tool:
-      Input:  data + high-level question
+      Input:  data URLs + high-level question
       Output: multiple rounds of analysis, each with data + chart + reasoning
 
     The AI agent:
@@ -242,17 +232,17 @@ def demo_2_iterative_exploration():
 
     from data_formulator.mcp_server import explore_data
 
-    # --- Example 2a: Explore world economy data ---
-    print("\n🔍 Example 2a: Explore world economy trends")
-    print("   Question: 'Explore the relationship between GDP growth, population, and CO2 emissions'")
+    # --- Example 2a: Explore Gapminder data ---
+    print("\n🔍 Example 2a: Explore Gapminder global development trends")
+    print(f"   URL: {GAPMINDER_URL}")
+    print("   Question: 'Explore the relationship between population growth, life expectancy, and fertility'")
     print("   Max iterations: 3")
     print("   (This may take a minute as the AI performs multiple analysis rounds...)\n")
 
     result = explore_data(
-        data=SAMPLE_CSV,
-        question="Explore the relationship between GDP growth, population, and CO2 emissions across countries. What patterns emerge?",
-        data_format="csv",
-        table_name="world_economy",
+        data_urls=[GAPMINDER_URL],
+        question="Explore the relationship between population growth, life expectancy, and fertility rates across countries. What patterns emerge?",
+        table_names=["gapminder"],
         max_iterations=3,
     )
 
@@ -276,16 +266,16 @@ def demo_2_iterative_exploration():
 
         save_json_result(result, "demo2a_exploration.json")
 
-    # --- Example 2b: Explore student performance ---
-    print("\n\n🔍 Example 2b: Explore student performance patterns")
-    print("   Question: 'Analyze student performance across subjects and identify strengths/weaknesses'")
+    # --- Example 2b: Explore life expectancy data ---
+    print("\n\n🔍 Example 2b: Explore life expectancy across countries")
+    print(f"   URL: {LIFE_EXPECTANCY_URL}")
+    print("   Question: 'Analyze life expectancy trends and identify countries with the fastest improvements'")
     print("   Max iterations: 3\n")
 
     result = explore_data(
-        data=SAMPLE_JSON,
-        question="Analyze student performance across subjects. Which subjects are hardest? Do grades correlate with specific subjects?",
-        data_format="json",
-        table_name="student_scores",
+        data_urls=[LIFE_EXPECTANCY_URL],
+        question="Analyze life expectancy trends over time. Which regions improved the most? Are there any countries that regressed?",
+        table_names=["life_expectancy"],
         max_iterations=3,
     )
 
@@ -351,16 +341,23 @@ async def demo_3_mcp_client():
             tools = await session.list_tools()
             print(f"  📦 Available tools: {[t.name for t in tools.tools]}")
 
+            # --- Call list_demo_data via MCP ---
+            print("\n  📋 Calling list_demo_data via MCP protocol...")
+            demo_result = await session.call_tool("list_demo_data", arguments={})
+            for content in demo_result.content:
+                if hasattr(content, "text"):
+                    result = json.loads(content.text)
+                    print(f"  Found {len(result.get('datasets', []))} demo datasets")
+
             # --- Call visualize_data via MCP ---
             print("\n  📊 Calling visualize_data via MCP protocol...")
 
             viz_result = await session.call_tool(
                 "visualize_data",
                 arguments={
-                    "data": SAMPLE_CSV,
-                    "instruction": "Show GDP per capita trends over time for each country",
-                    "data_format": "csv",
-                    "table_name": "world_economy",
+                    "data_urls": [GAPMINDER_URL],
+                    "instruction": "Show life expectancy vs fertility as a scatter plot colored by cluster",
+                    "table_names": ["gapminder"],
                 },
             )
 
@@ -382,10 +379,9 @@ async def demo_3_mcp_client():
             explore_result = await session.call_tool(
                 "explore_data",
                 arguments={
-                    "data": SAMPLE_CSV,
-                    "question": "What are the key economic trends across countries?",
-                    "data_format": "csv",
-                    "table_name": "world_economy",
+                    "data_urls": [DISASTERS_URL],
+                    "question": "What are the most common and deadliest types of natural disasters?",
+                    "table_names": ["disasters"],
                     "max_iterations": 2,
                 },
             )
@@ -430,19 +426,26 @@ def main():
     )
     args = parser.parse_args()
 
+    endpoint = os.getenv("DF_MCP_MODEL_ENDPOINT", "azure")
+
     print("🚀 Data Formulator MCP Server Demo")
     print(f"   Output directory: {OUTPUT_DIR}")
-    print(f"   Model endpoint:  {os.getenv('DF_MCP_MODEL_ENDPOINT', 'openai')}")
+    print(f"   Model endpoint:  {endpoint}")
     print(f"   Model name:      {os.getenv('DF_MCP_MODEL_NAME', 'gpt-4o')}")
+    if endpoint == "azure":
+        print(f"   API base:        {os.getenv('DF_MCP_API_BASE', '(not set)')}")
+        print(f"   Auth:            Azure AD (DefaultAzureCredential)")
 
-    # Check for API key
-    endpoint = os.getenv("DF_MCP_MODEL_ENDPOINT", "openai")
+    # Check for API key (not required for Azure AD auth)
     api_key = os.getenv("DF_MCP_API_KEY", os.getenv(f"{endpoint.upper()}_API_KEY", ""))
-    if not api_key:
-        print(f"\n⚠️  No API key found! Set one of:")
+    if not api_key and endpoint not in ("azure", "ollama"):
+        print(f"\n⚠️  No API key found for endpoint '{endpoint}'! Set one of:")
         print(f"   export DF_MCP_API_KEY='your-key'")
         print(f"   export {endpoint.upper()}_API_KEY='your-key'")
         print(f"   (or set them in api-keys.env)")
+        print(f"\n   For Azure OpenAI with AD auth (no key needed):")
+        print(f"   export DF_MCP_MODEL_ENDPOINT=azure")
+        print(f"   export DF_MCP_API_BASE=https://YOUR_RESOURCE.openai.azure.com/")
         sys.exit(1)
 
     if args.demo == "1":
diff --git a/pyproject.toml b/pyproject.toml
index e52ffb10..02300e56 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -50,6 +50,7 @@ dependencies = [
     "yfinance",
     "connectorx>=0.4.5",
     "pyarrow>=23.0.0",
+    "requests",
     "mcp>=1.26.0",
 ]
 
diff --git a/uv.lock b/uv.lock
index 473ea937..e5bfc3a1 100644
--- a/uv.lock
+++ b/uv.lock
@@ -740,6 +740,7 @@ dependencies = [
     { name = "pymysql" },
     { name = "pyodbc" },
     { name = "python-dotenv" },
+    { name = "requests" },
     { name = "scikit-learn" },
     { name = "vega-datasets" },
     { name = "vl-convert-python" },
@@ -779,6 +780,7 @@ requires-dist = [
     { name = "pymysql" },
     { name = "pyodbc" },
     { name = "python-dotenv" },
+    { name = "requests" },
     { name = "scikit-learn" },
     { name = "vega-datasets" },
     { name = "vl-convert-python" },

From d5fc9db658ebb0ab66789fd8e13c8f6581e970bc Mon Sep 17 00:00:00 2001
From: Andres <andrescodas@microsoft.com>
Date: Fri, 13 Feb 2026 07:58:52 -0800
Subject: [PATCH 5/6] case fix

---
 src/app/App.tsx                       | 2 +-
 src/app/dfSlice.tsx                   | 4 ++--
 src/app/tableThunks.ts                | 2 +-
 src/app/useDataRefresh.tsx            | 2 +-
 src/app/utils.tsx                     | 4 ++--
 src/data/utils.ts                     | 2 +-
 src/views/ChartRecBox.tsx             | 4 ++--
 src/views/ChartRenderService.tsx      | 2 +-
 src/views/ChartifactDialog.tsx        | 2 +-
 src/views/ConceptCard.tsx             | 4 ++--
 src/views/ConceptShelf.tsx            | 2 +-
 src/views/DBTableManager.tsx          | 2 +-
 src/views/DataLoadingThread.tsx       | 2 +-
 src/views/DataThread.tsx              | 2 +-
 src/views/DataThreadCards.tsx         | 2 +-
 src/views/DataView.tsx                | 2 +-
 src/views/EncodingBox.tsx             | 2 +-
 src/views/EncodingShelfCard.tsx       | 4 ++--
 src/views/EncodingShelfThread.tsx     | 4 ++--
 src/views/MultiTablePreview.tsx       | 2 +-
 src/views/RefreshDataDialog.tsx       | 2 +-
 src/views/ReportView.tsx              | 2 +-
 src/views/SelectableDataGrid.tsx      | 2 +-
 src/views/TableSelectionView.tsx      | 2 +-
 src/views/UnifiedDataUploadDialog.tsx | 2 +-
 src/views/ViewUtils.tsx               | 4 ++--
 src/views/VisualizationView.tsx       | 4 ++--
 27 files changed, 35 insertions(+), 35 deletions(-)

diff --git a/src/app/App.tsx b/src/app/App.tsx
index 09d65890..6ed6b5e5 100644
--- a/src/app/App.tsx
+++ b/src/app/App.tsx
@@ -66,7 +66,7 @@ import {
 import { About } from '../views/About';
 import { MessageSnackbar } from '../views/MessageSnackbar';
 import { ChartRenderService } from '../views/ChartRenderService';
-import { DictTable } from '../components/ComponentType';
+import { DictTable } from '../components/componentType';
 import { AppDispatch } from './store';
 import dfLogo from '../assets/df-logo.png';
 import { ModelSelectionButton } from '../views/ModelSelectionDialog';
diff --git a/src/app/dfSlice.tsx b/src/app/dfSlice.tsx
index 46b4f079..5f82d4f2 100644
--- a/src/app/dfSlice.tsx
+++ b/src/app/dfSlice.tsx
@@ -2,9 +2,9 @@
 // Licensed under the MIT License.
 
 import { createAsyncThunk, createSlice, PayloadAction, createSelector } from '@reduxjs/toolkit'
-import { Channel, Chart, ChartTemplate, DataCleanBlock, DataSourceConfig, EncodingItem, EncodingMap, FieldItem, Trigger } from '../components/ComponentType'
+import { Channel, Chart, ChartTemplate, DataCleanBlock, DataSourceConfig, EncodingItem, EncodingMap, FieldItem, Trigger } from '../components/componentType'
 import { enableMapSet } from 'immer';
-import { DictTable } from "../components/ComponentType";
+import { DictTable } from "../components/componentType";
 import { Message } from '../views/MessageSnackbar';
 import { getChartTemplate, getChartChannels } from "../components/ChartTemplates"
 import { recommendEncodings } from '../components/chartUtils';
diff --git a/src/app/tableThunks.ts b/src/app/tableThunks.ts
index 4d8f3a61..e79b5e62 100644
--- a/src/app/tableThunks.ts
+++ b/src/app/tableThunks.ts
@@ -13,7 +13,7 @@
  */
 
 import { createAsyncThunk } from '@reduxjs/toolkit';
-import { DataSourceConfig, DictTable } from '../components/ComponentType';
+import { DataSourceConfig, DictTable } from '../components/componentType';
 import { Type } from '../data/types';
 import { inferTypeFromValueArray } from '../data/utils';
 import { fetchWithIdentity, getUrls, computeContentHash } from './utils';
diff --git a/src/app/useDataRefresh.tsx b/src/app/useDataRefresh.tsx
index c37c8751..b33731d9 100644
--- a/src/app/useDataRefresh.tsx
+++ b/src/app/useDataRefresh.tsx
@@ -5,7 +5,7 @@ import { useEffect, useRef, useCallback } from 'react';
 import { useDispatch, useSelector } from 'react-redux';
 import { DataFormulatorState, dfActions, selectRefreshConfigs } from './dfSlice';
 import { AppDispatch } from './store';
-import { DictTable } from '../components/ComponentType';
+import { DictTable } from '../components/componentType';
 import { createTableFromText } from '../data/utils';
 import { fetchWithIdentity, getUrls, computeContentHash } from './utils';
 
diff --git a/src/app/utils.tsx b/src/app/utils.tsx
index a8eb7b5d..5334d0ef 100644
--- a/src/app/utils.tsx
+++ b/src/app/utils.tsx
@@ -5,8 +5,8 @@ import _, {  } from "lodash";
 import { useEffect, useRef } from "react";
 import ts from "typescript";
 import { ChannelGroups, getChartChannels, getChartTemplate } from "../components/ChartTemplates";
-import { Channel, Chart, ChartTemplate, ConceptTransformation, EncodingItem, EncodingMap, FieldItem, Trigger } from "../components/ComponentType";
-import { DictTable } from "../components/ComponentType";
+import { Channel, Chart, ChartTemplate, ConceptTransformation, EncodingItem, EncodingMap, FieldItem, Trigger } from "../components/componentType";
+import { DictTable } from "../components/componentType";
 import { getDType, Type } from "../data/types";
 import * as d3 from 'd3';
 import {
diff --git a/src/data/utils.ts b/src/data/utils.ts
index 1b98deb8..775f9058 100644
--- a/src/data/utils.ts
+++ b/src/data/utils.ts
@@ -5,7 +5,7 @@ import * as d3 from 'd3';
 import Column from './column';
 import * as ExcelJS from 'exceljs';
 
-import { DictTable } from '../components/ComponentType';
+import { DictTable } from '../components/componentType';
 import { CoerceType, TestType, Type } from './types';
 import { ColumnTable } from './table';
 
diff --git a/src/views/ChartRecBox.tsx b/src/views/ChartRecBox.tsx
index d4cbb3b4..44002366 100644
--- a/src/views/ChartRecBox.tsx
+++ b/src/views/ChartRecBox.tsx
@@ -37,12 +37,12 @@ import {
 
 import React from 'react';
 
-import { Chart, FieldItem } from "../components/ComponentType";
+import { Chart, FieldItem } from "../components/componentType";
 
 import _ from 'lodash';
 
 import '../scss/EncodingShelf.scss';
-import { createDictTable, DictTable } from "../components/ComponentType";
+import { createDictTable, DictTable } from "../components/componentType";
 
 import { getUrls, getTriggers, resolveRecommendedChart, fetchWithIdentity } from '../app/utils';
 
diff --git a/src/views/ChartRenderService.tsx b/src/views/ChartRenderService.tsx
index f9b7a2af..3a24eada 100644
--- a/src/views/ChartRenderService.tsx
+++ b/src/views/ChartRenderService.tsx
@@ -20,7 +20,7 @@
 import { FC, useEffect, useRef, useCallback } from 'react';
 import { useSelector, useDispatch } from 'react-redux';
 import { DataFormulatorState, dfActions, dfSelectors } from '../app/dfSlice';
-import { Chart, DictTable, FieldItem } from '../components/ComponentType';
+import { Chart, DictTable, FieldItem } from '../components/componentType';
 import { assembleVegaChart, prepVisTable } from '../app/utils';
 import { getDataTable, checkChartAvailability } from './VisualizationView';
 import { getCachedChart, setCachedChart, computeCacheKey, invalidateChart, ChartCacheEntry } from '../app/chartCache';
diff --git a/src/views/ChartifactDialog.tsx b/src/views/ChartifactDialog.tsx
index 4867291b..2293596f 100644
--- a/src/views/ChartifactDialog.tsx
+++ b/src/views/ChartifactDialog.tsx
@@ -1,7 +1,7 @@
 // Copyright (c) Microsoft Corporation.
 // Licensed under the MIT License.
 
-import { Chart, DictTable, FieldItem } from '../components/ComponentType';
+import { Chart, DictTable, FieldItem } from '../components/componentType';
 import { assembleVegaChart, prepVisTable } from '../app/utils';
 import { exportTableToDsv } from '../data/utils';
 import { ClientConfig } from '../app/dfSlice';
diff --git a/src/views/ConceptCard.tsx b/src/views/ConceptCard.tsx
index 82729bb6..283f4bee 100644
--- a/src/views/ConceptCard.tsx
+++ b/src/views/ConceptCard.tsx
@@ -30,7 +30,7 @@ import HideSourceIcon from '@mui/icons-material/HideSource';
 import ArrowRightIcon from '@mui/icons-material/ArrowRight';
 import AnimateHeight from 'react-animate-height';
 
-import { FieldItem, ConceptTransformation, duplicateField, FieldSource } from '../components/ComponentType';
+import { FieldItem, ConceptTransformation, duplicateField, FieldSource } from '../components/componentType';
 
 import {  testType, Type, TypeList } from "../data/types";
 import React from 'react';
@@ -41,7 +41,7 @@ import { getIconFromType } from './ViewUtils';
 
 
 import _ from 'lodash';
-import { DictTable } from '../components/ComponentType';
+import { DictTable } from '../components/componentType';
 import { CodeBox } from './VisualizationView';
 import { CustomReactTable } from './ReactTable';
 import { alpha } from '@mui/material/styles';
diff --git a/src/views/ConceptShelf.tsx b/src/views/ConceptShelf.tsx
index 6272d98e..65b661f6 100644
--- a/src/views/ConceptShelf.tsx
+++ b/src/views/ConceptShelf.tsx
@@ -19,7 +19,7 @@ import {
 
 import CleaningServicesIcon from '@mui/icons-material/CleaningServices';
 
-import { FieldItem, Channel } from '../components/ComponentType';
+import { FieldItem, Channel } from '../components/componentType';
 
 import React from 'react';
 import { DataFormulatorState, dfActions, dfSelectors } from '../app/dfSlice';
diff --git a/src/views/DBTableManager.tsx b/src/views/DBTableManager.tsx
index 757dbc37..52baa243 100644
--- a/src/views/DBTableManager.tsx
+++ b/src/views/DBTableManager.tsx
@@ -50,7 +50,7 @@ type TableImportConfig =
 import { getUrls, fetchWithIdentity } from '../app/utils';
 import { borderColor } from '../app/tokens';
 import { CustomReactTable } from './ReactTable';
-import { DataSourceConfig, DictTable } from '../components/ComponentType';
+import { DataSourceConfig, DictTable } from '../components/componentType';
 import { Type } from '../data/types';
 import { useDispatch, useSelector } from 'react-redux';
 import { dfActions, dfSelectors } from '../app/dfSlice';
diff --git a/src/views/DataLoadingThread.tsx b/src/views/DataLoadingThread.tsx
index eecb34dd..7d756e70 100644
--- a/src/views/DataLoadingThread.tsx
+++ b/src/views/DataLoadingThread.tsx
@@ -29,7 +29,7 @@ import { useDispatch, useSelector } from 'react-redux';
 import { AppDispatch } from '../app/store';
 import { DataFormulatorState, dfActions, dfSelectors, fetchFieldSemanticType } from '../app/dfSlice';
 import { borderColor, shadow, transition, radius } from '../app/tokens';
-import { DataCleanBlock, DataCleanTableOutput } from '../components/ComponentType';
+import { DataCleanBlock, DataCleanTableOutput } from '../components/componentType';
 import { getUrls, fetchWithIdentity } from '../app/utils';
 import { CustomReactTable } from './ReactTable';
 import { createTableFromText } from '../data/utils';
diff --git a/src/views/DataThread.tsx b/src/views/DataThread.tsx
index 4e164545..82862eed 100644
--- a/src/views/DataThread.tsx
+++ b/src/views/DataThread.tsx
@@ -32,7 +32,7 @@ import '../scss/VisualizationView.scss';
 import { batch, useDispatch, useSelector } from 'react-redux';
 import { DataFormulatorState, dfActions, SSEMessage } from '../app/dfSlice';
 import { getTriggers } from '../app/utils';
-import { Chart, DictTable, Trigger } from "../components/ComponentType";
+import { Chart, DictTable, Trigger } from "../components/componentType";
 
 import DeleteIcon from '@mui/icons-material/Delete';
 import StarIcon from '@mui/icons-material/Star';
diff --git a/src/views/DataThreadCards.tsx b/src/views/DataThreadCards.tsx
index d1f7c838..058b4793 100644
--- a/src/views/DataThreadCards.tsx
+++ b/src/views/DataThreadCards.tsx
@@ -16,7 +16,7 @@ import {
 } from '@mui/material';
 
 import { dfActions } from '../app/dfSlice';
-import { Chart, DictTable, Trigger } from "../components/ComponentType";
+import { Chart, DictTable, Trigger } from "../components/componentType";
 
 import DeleteIcon from '@mui/icons-material/Delete';
 import AddchartIcon from '@mui/icons-material/Addchart';
diff --git a/src/views/DataView.tsx b/src/views/DataView.tsx
index 570b77b1..e08487f3 100644
--- a/src/views/DataView.tsx
+++ b/src/views/DataView.tsx
@@ -10,7 +10,7 @@ import { alpha } from '@mui/material/styles';
 
 import '../scss/DataView.scss';
 
-import { DictTable } from '../components/ComponentType';
+import { DictTable } from '../components/componentType';
 import { DataFormulatorState, dfActions, dfSelectors } from '../app/dfSlice';
 import { useDispatch, useSelector } from 'react-redux';
 import { Type } from '../data/types';
diff --git a/src/views/EncodingBox.tsx b/src/views/EncodingBox.tsx
index 03fd500e..3cf79be8 100644
--- a/src/views/EncodingBox.tsx
+++ b/src/views/EncodingBox.tsx
@@ -42,7 +42,7 @@ import CalendarMonthIcon from '@mui/icons-material/CalendarMonth';
 import QuestionMarkIcon from '@mui/icons-material/QuestionMark';
 
 import { FieldItem, Channel, EncodingItem, AggrOp, AGGR_OP_LIST, 
-        ConceptTransformation, Chart, duplicateField } from "../components/ComponentType";
+        ConceptTransformation, Chart, duplicateField } from "../components/componentType";
 import { EncodingDropResult } from "../views/ConceptShelf";
 
 import _ from 'lodash';
diff --git a/src/views/EncodingShelfCard.tsx b/src/views/EncodingShelfCard.tsx
index 282cc8b6..a0f2b335 100644
--- a/src/views/EncodingShelfCard.tsx
+++ b/src/views/EncodingShelfCard.tsx
@@ -39,12 +39,12 @@ import {
 
 import React from 'react';
 import { ThinkingBufferEffect } from '../components/FunComponents';
-import { Channel, Chart, FieldItem, Trigger, duplicateChart } from "../components/ComponentType";
+import { Channel, Chart, FieldItem, Trigger, duplicateChart } from "../components/componentType";
 
 import _ from 'lodash';
 
 import '../scss/EncodingShelf.scss';
-import { createDictTable, DictTable } from "../components/ComponentType";
+import { createDictTable, DictTable } from "../components/componentType";
 
 import { getUrls, resolveChartFields, getTriggers, assembleVegaChart, resolveRecommendedChart, fetchWithIdentity } from '../app/utils';
 import { EncodingBox } from './EncodingBox';
diff --git a/src/views/EncodingShelfThread.tsx b/src/views/EncodingShelfThread.tsx
index 76af21ad..49baa9bc 100644
--- a/src/views/EncodingShelfThread.tsx
+++ b/src/views/EncodingShelfThread.tsx
@@ -16,11 +16,11 @@ import {
 
 import React from 'react';
 
-import { Chart, Trigger } from "../components/ComponentType";
+import { Chart, Trigger } from "../components/componentType";
 
 
 import '../scss/EncodingShelf.scss';
-import { DictTable } from "../components/ComponentType";
+import { DictTable } from "../components/componentType";
 import { Type } from '../data/types';
 
 import { getTriggers } from '../app/utils';
diff --git a/src/views/MultiTablePreview.tsx b/src/views/MultiTablePreview.tsx
index 55a17c11..74fc4d66 100644
--- a/src/views/MultiTablePreview.tsx
+++ b/src/views/MultiTablePreview.tsx
@@ -14,7 +14,7 @@ import {
     Card,
 } from '@mui/material';
 import DeleteIcon from '@mui/icons-material/Delete';
-import { DictTable } from '../components/ComponentType';
+import { DictTable } from '../components/componentType';
 import { CustomReactTable } from './ReactTable';
 
 export interface MultiTablePreviewProps {
diff --git a/src/views/RefreshDataDialog.tsx b/src/views/RefreshDataDialog.tsx
index 063f7323..0462c306 100644
--- a/src/views/RefreshDataDialog.tsx
+++ b/src/views/RefreshDataDialog.tsx
@@ -25,7 +25,7 @@ import CloseIcon from '@mui/icons-material/Close';
 import UploadFileIcon from '@mui/icons-material/UploadFile';
 import { useSelector } from 'react-redux';
 import { DataFormulatorState } from '../app/dfSlice';
-import { DictTable } from '../components/ComponentType';
+import { DictTable } from '../components/componentType';
 import { createTableFromText, loadTextDataWrapper, loadBinaryDataWrapper } from '../data/utils';
 
 interface TabPanelProps {
diff --git a/src/views/ReportView.tsx b/src/views/ReportView.tsx
index 9477dd5c..5e7b5b20 100644
--- a/src/views/ReportView.tsx
+++ b/src/views/ReportView.tsx
@@ -45,7 +45,7 @@ import { getUrls, assembleVegaChart, getTriggers, prepVisTable, fetchWithIdentit
 import { MuiMarkdown, getOverrides } from 'mui-markdown';
 import embed from 'vega-embed';
 import { getDataTable } from './VisualizationView';
-import { DictTable } from '../components/ComponentType';
+import { DictTable } from '../components/componentType';
 import { AppDispatch } from '../app/store';
 import { Collapse } from '@mui/material';
 import ExpandMoreIcon from '@mui/icons-material/ExpandMore';
diff --git a/src/views/SelectableDataGrid.tsx b/src/views/SelectableDataGrid.tsx
index 24b86667..8e8e4cf6 100644
--- a/src/views/SelectableDataGrid.tsx
+++ b/src/views/SelectableDataGrid.tsx
@@ -21,7 +21,7 @@ import { getIconFromType } from './ViewUtils';
 import { IconButton, TableSortLabel, Typography } from '@mui/material';
 
 import _ from 'lodash';
-import { FieldSource, FieldItem } from '../components/ComponentType';
+import { FieldSource, FieldItem } from '../components/componentType';
 
 import FileDownloadIcon from '@mui/icons-material/FileDownload';
 import { TableIcon } from '../icons';
diff --git a/src/views/TableSelectionView.tsx b/src/views/TableSelectionView.tsx
index c25fadbc..8045cf22 100644
--- a/src/views/TableSelectionView.tsx
+++ b/src/views/TableSelectionView.tsx
@@ -11,7 +11,7 @@ import { borderColor } from '../app/tokens';
 import { StreamIcon } from '../icons';
 import { createTableFromFromObjectArray } from '../data/utils';
 import { MultiTablePreview } from './MultiTablePreview';
-import { DictTable } from '../components/ComponentType';
+import { DictTable } from '../components/componentType';
 
 // Update the interface to support multiple tables per dataset
 export interface DatasetMetadata {
diff --git a/src/views/UnifiedDataUploadDialog.tsx b/src/views/UnifiedDataUploadDialog.tsx
index 8023eb18..d92f61e0 100644
--- a/src/views/UnifiedDataUploadDialog.tsx
+++ b/src/views/UnifiedDataUploadDialog.tsx
@@ -37,7 +37,7 @@ import { useDispatch, useSelector } from 'react-redux';
 import { DataFormulatorState, dfActions, fetchFieldSemanticType } from '../app/dfSlice';
 import { AppDispatch } from '../app/store';
 import { loadTable } from '../app/tableThunks';
-import { DataSourceConfig, DictTable } from '../components/ComponentType';
+import { DataSourceConfig, DictTable } from '../components/componentType';
 import { createTableFromFromObjectArray, createTableFromText, loadTextDataWrapper, loadBinaryDataWrapper } from '../data/utils';
 import { DataLoadingChat } from './DataLoadingChat';
 import { DatasetSelectionView, DatasetMetadata } from './TableSelectionView';
diff --git a/src/views/ViewUtils.tsx b/src/views/ViewUtils.tsx
index 3d654d32..aada1c98 100644
--- a/src/views/ViewUtils.tsx
+++ b/src/views/ViewUtils.tsx
@@ -4,7 +4,7 @@
 import React from "react";
 import ts from "typescript";
 import { runCodeOnInputListsInVM } from "../app/utils";
-import { ConceptTransformation, FieldItem } from "../components/ComponentType";
+import { ConceptTransformation, FieldItem } from "../components/componentType";
 import { Type } from "../data/types";
 import { BooleanIcon, NumericalIcon, StringIcon, DateIcon, UnknownIcon } from '../icons';
 
@@ -12,7 +12,7 @@ import AutoFixHighIcon from '@mui/icons-material/AutoFixHigh';
 import BarChartIcon from '@mui/icons-material/BarChart';
 import CommitIcon from '@mui/icons-material/Commit';
 
-import { DictTable } from '../components/ComponentType';
+import { DictTable } from '../components/componentType';
 
 export const groupConceptItems = (conceptShelfItems: FieldItem[], tables: DictTable[])  => {
     // group concepts based on which source table they belongs to
diff --git a/src/views/VisualizationView.tsx b/src/views/VisualizationView.tsx
index da0a67f0..acaa261b 100644
--- a/src/views/VisualizationView.tsx
+++ b/src/views/VisualizationView.tsx
@@ -44,8 +44,8 @@ import '../scss/VisualizationView.scss';
 import { useDispatch, useSelector } from 'react-redux';
 import { DataFormulatorState, dfActions } from '../app/dfSlice';
 import { assembleVegaChart, extractFieldsFromEncodingMap, getUrls, prepVisTable, fetchWithIdentity } from '../app/utils';
-import { Chart, EncodingItem, EncodingMap, FieldItem } from '../components/ComponentType';
-import { DictTable } from "../components/ComponentType";
+import { Chart, EncodingItem, EncodingMap, FieldItem } from '../components/componentType';
+import { DictTable } from "../components/componentType";
 
 import AddchartIcon from '@mui/icons-material/Addchart';
 import DeleteIcon from '@mui/icons-material/Delete';

From 982b1145649c8dc8e5a489b398b7f1ad175cb645 Mon Sep 17 00:00:00 2001
From: Andres <andrescodas@microsoft.com>
Date: Fri, 13 Feb 2026 08:03:32 -0800
Subject: [PATCH 6/6] StreamableHTTP server support

---
 py-src/data_formulator/mcp_server.py | 28 ++++++++++++++++++++++++++++
 1 file changed, 28 insertions(+)

diff --git a/py-src/data_formulator/mcp_server.py b/py-src/data_formulator/mcp_server.py
index 72c26ae8..4d0d6a1c 100644
--- a/py-src/data_formulator/mcp_server.py
+++ b/py-src/data_formulator/mcp_server.py
@@ -154,6 +154,7 @@
 load_dotenv(os.path.join(Path(__file__).parent, 'api-keys.env'))
 
 from mcp.server.fastmcp import FastMCP
+from mcp.server.transport_security import TransportSecuritySettings
 
 from data_formulator.agents.client_utils import Client
 from data_formulator.agents.agent_data_rec import DataRecAgent
@@ -326,6 +327,11 @@ def _make_chart_image(
         "Use list_demo_data to browse available demo datasets, then pass their "
         "URLs to visualize_data, explore_data, or create_chart."
     ),
+    stateless_http=True,  # each HTTP request is independent; no session affinity needed
+    json_response=True,  # all responses are JSON-serializable dicts
+    transport_security=TransportSecuritySettings(
+        enable_dns_rebinding_protection=False, #  https://github.com/modelcontextprotocol/python-sdk/issues/1798
+    )
 )
 
 
@@ -797,3 +803,25 @@ def main():
 
 if __name__ == "__main__":
     main()
+else:
+
+    # See https://github.com/modelcontextprotocol/python-sdk?tab=readme-ov-file#streamablehttp-servers
+   
+    from starlette.applications import Starlette
+    from starlette.routing import Mount
+    import contextlib
+
+    # Create a lifespan context manager to run the session manager
+    @contextlib.asynccontextmanager
+    async def lifespan(app: Starlette):
+        async with mcp.session_manager.run():
+            yield
+
+
+    # Mount the StreamableHTTP server to the existing ASGI server
+    app = Starlette(
+        routes=[
+            Mount("/", app=mcp.streamable_http_app()),
+        ],
+        lifespan=lifespan,
+    )
\ No newline at end of file