diff --git a/Makefile b/Makefile
index fe0236238fa..3bc172f1d72 100644
--- a/Makefile
+++ b/Makefile
@@ -88,7 +88,7 @@
 #
 # ==============================================================================
 
-.PHONY: voxtral-cuda voxtral-cpu voxtral-metal whisper-cuda whisper-cuda-debug whisper-cpu whisper-metal parakeet-cuda parakeet-cpu parakeet-metal llama-cpu llava-cpu gemma3-cuda gemma3-cpu clean help
+.PHONY: voxtral-cuda voxtral-cpu voxtral-metal whisper-cuda whisper-cuda-debug whisper-cpu whisper-metal parakeet-cuda parakeet-cpu parakeet-metal llama-cpu llava-cpu gemma3-cuda gemma3-cpu gemma3-text-cpu clean help
 
 help:
 	@echo "This Makefile adds targets to build runners for various models on various backends. Run using \`make <target>\`. Available targets:"
@@ -106,6 +106,7 @@ help:
 	@echo "  llava-cpu           - Build Llava runner with CPU backend"
 	@echo "  gemma3-cuda         - Build Gemma3 runner with CUDA backend"
 	@echo "  gemma3-cpu          - Build Gemma3 runner with CPU backend"
+	@echo "  gemma3-text-cpu     - Build Gemma3 text-only runner with CPU backend"
 	@echo "  clean               - Clean build artifacts"
 
 voxtral-cuda:
@@ -234,6 +235,15 @@ gemma3-cpu:
 	@echo "✓ Build complete!"
 	@echo "  Binary: cmake-out/examples/models/gemma3/gemma3_e2e_runner"
 
+gemma3-text-cpu:
+	@echo "==> Building and installing ExecuTorch..."
+	cmake --workflow --preset llm-release
+	@echo "==> Building Gemma3 text runner (CPU)..."
+	cd examples/models/gemma3 && cmake --workflow --preset gemma3-text-cpu
+	@echo ""
+	@echo "✓ Build complete!"
+	@echo "  Binary: cmake-out/examples/models/gemma3/gemma3_text_runner"
+
 clean:
 	rm -rf cmake-out \
 	       extension/llm/tokenizers/build \
diff --git a/examples/models/gemma3/CMakeLists.txt b/examples/models/gemma3/CMakeLists.txt
index d228ca53c46..12d53a8f6c6 100644
--- a/examples/models/gemma3/CMakeLists.txt
+++ b/examples/models/gemma3/CMakeLists.txt
@@ -37,7 +37,6 @@ find_package(executorch CONFIG REQUIRED FIND_ROOT_PATH_BOTH)
 executorch_target_link_options_shared_lib(executorch)
 
 set(link_libraries executorch gflags)
-set(_srcs e2e_runner.cpp)
 
 list(
   APPEND
@@ -109,7 +108,8 @@ endif()
 # Add tokenizers
 list(APPEND link_libraries tokenizers::tokenizers)
 
-add_executable(gemma3_e2e_runner ${_srcs})
+# Executable for multimodal e2e runner (with image support)
+add_executable(gemma3_e2e_runner e2e_runner.cpp)
 if(NOT CMAKE_BUILD_TYPE STREQUAL "Debug")
   target_link_options_gc_sections(gemma3_e2e_runner)
   if(NOT APPLE)
@@ -122,3 +122,18 @@ target_include_directories(
 )
 target_link_libraries(gemma3_e2e_runner PUBLIC ${link_libraries})
 target_compile_options(gemma3_e2e_runner PUBLIC ${_common_compile_options})
+
+# Executable for text-only runner (no image support)
+add_executable(gemma3_text_runner text_runner.cpp)
+if(NOT CMAKE_BUILD_TYPE STREQUAL "Debug")
+  target_link_options_gc_sections(gemma3_text_runner)
+  if(NOT APPLE)
+    target_link_options(gemma3_text_runner PRIVATE "LINKER:-s")
+  endif()
+endif()
+
+target_include_directories(
+  gemma3_text_runner PUBLIC ${_common_include_directories}
+)
+target_link_libraries(gemma3_text_runner PUBLIC ${link_libraries})
+target_compile_options(gemma3_text_runner PUBLIC ${_common_compile_options})
diff --git a/examples/models/gemma3/CMakePresets.json b/examples/models/gemma3/CMakePresets.json
index dcfeceba1cd..376930f32f8 100644
--- a/examples/models/gemma3/CMakePresets.json
+++ b/examples/models/gemma3/CMakePresets.json
@@ -36,6 +36,12 @@
             "configurePreset": "gemma3-cpu",
             "targets": ["gemma3_e2e_runner"]
         },
+        {
+            "name": "gemma3-text-cpu",
+            "displayName": "Build Gemma3 text runner (CPU)",
+            "configurePreset": "gemma3-cpu",
+            "targets": ["gemma3_text_runner"]
+        },
         {
             "name": "gemma3-cuda",
             "displayName": "Build Gemma3 runner (CUDA)",
@@ -58,6 +64,20 @@
                 }
             ]
         },
+        {
+            "name": "gemma3-text-cpu",
+            "displayName": "Configure and build Gemma3 text runner (CPU)",
+            "steps": [
+                {
+                    "type": "configure",
+                    "name": "gemma3-cpu"
+                },
+                {
+                    "type": "build",
+                    "name": "gemma3-text-cpu"
+                }
+            ]
+        },
         {
             "name": "gemma3-cuda",
             "displayName": "Configure and build Gemma3 runner (CUDA)",
diff --git a/examples/models/gemma3/README.md b/examples/models/gemma3/README.md
index 9d36ae2b625..a64133b3c41 100644
--- a/examples/models/gemma3/README.md
+++ b/examples/models/gemma3/README.md
@@ -1,27 +1,96 @@
 # Summary
 
-This example demonstrates how to export and run Google's [Gemma 3](https://huggingface.co/google/gemma-3-4b-it) vision-language multimodal model locally on ExecuTorch with CUDA backend support.
+This example demonstrates how to export and run Google's Gemma 3 models on ExecuTorch:
+- [Gemma 3 4B](https://huggingface.co/google/gemma-3-4b-it) - Vision-language multimodal model (CUDA/CPU)
+- [Gemma 3 1B](https://huggingface.co/google/gemma-3-1b-it) - Text-only instruction-tuned model (CPU)
 
-# Exporting the model
-To export the model, we use [Optimum ExecuTorch](https://github.com/huggingface/optimum-executorch), a repo that enables exporting models straight from the source - from HuggingFace's Transformers repo.
+# Prerequisites
 
 ## Setting up Optimum ExecuTorch
-Install through pip package:
-```
+To export the models, we use [Optimum ExecuTorch](https://github.com/huggingface/optimum-executorch), which enables exporting models from HuggingFace's Transformers.
+
+Install through pip:
+```bash
 pip install optimum-executorch
 ```
 
 Or install from source:
-```
+```bash
 git clone https://github.com/huggingface/optimum-executorch.git
 cd optimum-executorch
 python install_dev.py
 ```
 
-## CUDA Support
-This guide focuses on CUDA backend support for Gemma3, which provides accelerated performance on NVIDIA GPUs.
+## Obtaining the Tokenizer
+Both Gemma 3 models share the same tokenizer. Download `tokenizer.json` from HuggingFace:
+```bash
+mkdir -p gemma-3
+curl -L https://huggingface.co/google/gemma-3-1b-it/resolve/main/tokenizer.json -o gemma-3/tokenizer.json
+```
+
+---
+
+# Gemma 3 1B Text-Only Model (CPU)
+
+This section covers running the lightweight Gemma 3 1B instruction-tuned model for text-only inference on CPU.
+
+## Exporting Gemma 3 1B
+
+```bash
+optimum-cli export executorch \
+  --model "google/gemma-3-1b-it" \
+  --task "text-generation" \
+  --recipe "xnnpack" \
+  --use_custom_sdpa \
+  --use_custom_kv_cache \
+  --output_dir="gemma-3/gemma-3-1b-it"
+```
+
+This will generate:
+- `model.pte` - The exported model
+
+## Building the Text Runner
+
+```bash
+make gemma3-text-cpu
+```
+
+## Running the Text Model
 
-### Exporting with CUDA
+```bash
+./cmake-out/examples/models/gemma3/gemma3_text_runner \
+    --model_path=gemma-3/gemma-3-1b-it/model.pte \
+    --tokenizer_path=gemma-3/tokenizer.json \
+    --prompt="What is the capital of France?" \
+    --max_new_tokens=100
+```
+
+### Available Options
+| Flag | Description | Default |
+|------|-------------|---------|
+| `--model_path` | Path to the exported model.pte | `model.pte` |
+| `--tokenizer_path` | Path to tokenizer.json | `tokenizer.json` |
+| `--prompt` | Text prompt for generation | `Hello, world!` |
+| `--temperature` | Sampling temperature (0 = greedy) | `0.0` |
+| `--max_new_tokens` | Maximum tokens to generate | `100` |
+| `--cpu_threads` | Number of CPU threads (-1 = auto) | `-1` |
+| `--warmup` | Run warmup before generation | `false` |
+
+### Example Output
+```
+The capital of France is **Paris**.
+PyTorchObserver {"prompt_tokens":15,"generated_tokens":12,...}
+```
+
+---
+
+# Gemma 3 4B Multimodal Model (CUDA)
+
+This section covers running the Gemma 3 4B vision-language multimodal model with CUDA backend support.
+
+## Exporting Gemma 3 4B
+
+### Standard Export
 ```bash
 optimum-cli export executorch \
   --model "google/gemma-3-4b-it" \
@@ -29,15 +98,15 @@ optimum-cli export executorch \
   --recipe "cuda" \
   --dtype bfloat16 \
   --device cuda \
-  --output_dir="path/to/output/dir"
+  --output_dir="gemma-3/gemma-3-4b-it"
 ```
 
 This will generate:
 - `model.pte` - The exported model
 - `aoti_cuda_blob.ptd` - The CUDA kernel blob required for runtime
 
-### Exporting with INT4 Quantization (Tile Packed)
-For improved performance and reduced memory footprint, you can export Gemma3 with INT4 weight quantization using tile-packed format:
+### Export with INT4 Quantization (Tile Packed)
+For improved performance and reduced memory footprint:
 
 ```bash
 optimum-cli export executorch \
@@ -50,59 +119,45 @@ optimum-cli export executorch \
   --qlinear_encoder 4w \
   --qlinear_packing_format tile_packed_to_4d \
   --qlinear_encoder_packing_format tile_packed_to_4d \
-  --output_dir="path/to/output/dir"
-```
-
-This will generate the same files (`model.pte` and `aoti_cuda_blob.ptd`) in the `int4` directory.
-
-See the "Building the Gemma3 runner" section below for instructions on building with CUDA support, and the "Running the model" section for runtime instructions.
-
-# Running the model
-To run the model, we will use the Gemma3 runner, which utilizes ExecuTorch's MultiModal runner API.
-The Gemma3 runner will do the following:
-
-- **Image Input**: Load image files (PNG, JPG, etc.) and format them as input tensors for the model
-- **Text Input**: Process text prompts using the tokenizer
-- **Feed the formatted inputs** to the multimodal runner for inference
-
-## Obtaining the tokenizer
-You can download the `tokenizer.json` file from [Gemma 3's HuggingFace repo](https://huggingface.co/unsloth/gemma-3-1b-it):
-```bash
-curl -L https://huggingface.co/unsloth/gemma-3-1b-it/resolve/main/tokenizer.json -o tokenizer.json
+  --output_dir="gemma-3/gemma-3-4b-it-int4"
 ```
 
-## Building the Gemma3 runner
+## Building the Multimodal Runner
 
 ### Prerequisites
-Ensure you have a CUDA-capable GPU and CUDA toolkit installed on your system.
+Ensure you have a CUDA-capable GPU and CUDA toolkit installed.
 
-### Building for CUDA
+### Build Commands
 ```bash
-# Build the Gemma3 runner with CUDA enabled
+# Build with CUDA backend
 make gemma3-cuda
 
-# Build the Gemma3 runner with CPU enabled
+# Build with CPU backend
 make gemma3-cpu
 ```
 
-## Running the model
-You need to provide the following files to run Gemma3:
-- `model.pte` - The exported model file
-- `aoti_cuda_blob.ptd` - The CUDA kernel blob
-- `tokenizer.json` - The tokenizer file
-- An image file (PNG, JPG, etc.)
+## Running the Multimodal Model
+
+The multimodal runner processes both image and text inputs:
 
-### Example usage
 ```bash
 ./cmake-out/examples/models/gemma3/gemma3_e2e_runner \
-  --model_path path/to/model.pte \
-  --data_path path/to/aoti_cuda_blob.ptd \
-  --tokenizer_path path/to/tokenizer.json \
-  --image_path docs/source/_static/img/et-logo.png \ # here we use the ExecuTorch logo as an example
-  --temperature 0
+  --model_path=gemma-3/gemma-3-4b-it/model.pte \
+  --data_path=gemma-3/gemma-3-4b-it/aoti_cuda_blob.ptd \
+  --tokenizer_path=gemma-3/tokenizer.json \
+  --image_path=docs/source/_static/img/et-logo.png \
+  --temperature=0
 ```
 
-# Example output
+### Required Files
+| File | Description |
+|------|-------------|
+| `model.pte` | The exported model file |
+| `aoti_cuda_blob.ptd` | CUDA kernel blob (CUDA only) |
+| `tokenizer.json` | Shared tokenizer |
+| Image file | PNG, JPG, or other supported format |
+
+### Example Output
 ```
 Okay, let's break down what's in the image!
 
@@ -111,5 +166,5 @@ It appears to be a stylized graphic combining:
 *   **A Microchip:** The core shape is a representation of a microchip (the integrated circuit).
 *   **An "On" Symbol:**  There's an "On" symbol (often represented as a circle with a vertical line) incorporated into the microchip design.
 *   **Color Scheme:** The microchip is colored in gray, and
-PyTorchObserver {"prompt_tokens":271,"generated_tokens":99,"model_load_start_ms":0,"model_load_end_ms":0,"inference_start_ms":1761118126790,"inference_end_ms":1761118128385,"prompt_eval_end_ms":1761118127175,"first_token_ms":1761118127175,"aggregate_sampling_time_ms":86,"SCALING_FACTOR_UNITS_PER_SECOND":1000}
+PyTorchObserver {"prompt_tokens":271,"generated_tokens":99,...}
 ```
diff --git a/examples/models/gemma3/text_runner.cpp b/examples/models/gemma3/text_runner.cpp
new file mode 100644
index 00000000000..6474a0d3ea5
--- /dev/null
+++ b/examples/models/gemma3/text_runner.cpp
@@ -0,0 +1,128 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <cstring>
+#include <fstream>
+
+#include <gflags/gflags.h>
+
+#include <executorch/extension/llm/runner/llm_runner_helper.h>
+#include <executorch/extension/llm/runner/text_llm_runner.h>
+#include <executorch/runtime/core/error.h>
+#include <executorch/runtime/platform/log.h>
+
+#if defined(ET_USE_THREADPOOL)
+#include <executorch/extension/threadpool/cpuinfo_utils.h>
+#include <executorch/extension/threadpool/threadpool.h>
+#endif
+
+DEFINE_string(
+    model_path,
+    "model.pte",
+    "Model serialized in flatbuffer format.");
+
+DEFINE_string(tokenizer_path, "tokenizer.json", "Tokenizer path.");
+
+DEFINE_string(prompt, "Hello, world!", "Text prompt.");
+
+DEFINE_double(
+    temperature,
+    0.0f,
+    "Temperature; Default is 0. 0 = greedy argmax sampling (deterministic). Lower temperature = more deterministic");
+
+DEFINE_int32(
+    max_new_tokens,
+    100,
+    "Maximum number of tokens to generate.");
+
+DEFINE_int32(
+    cpu_threads,
+    -1,
+    "Number of CPU threads for inference. Defaults to -1, which implies we'll use a heuristic to derive the # of performant cores for a specific device.");
+
+DEFINE_bool(warmup, false, "Whether to run a warmup run.");
+
+int32_t main(int32_t argc, char** argv) {
+  gflags::ParseCommandLineFlags(&argc, &argv, true);
+
+  const char* model_path = FLAGS_model_path.c_str();
+  const char* tokenizer_path = FLAGS_tokenizer_path.c_str();
+  const char* prompt = FLAGS_prompt.c_str();
+  float temperature = FLAGS_temperature;
+  int32_t max_new_tokens = FLAGS_max_new_tokens;
+  int32_t cpu_threads = FLAGS_cpu_threads;
+  bool warmup = FLAGS_warmup;
+
+#if defined(ET_USE_THREADPOOL)
+  uint32_t num_performant_cores = cpu_threads == -1
+      ? ::executorch::extension::cpuinfo::get_num_performant_cores()
+      : static_cast<uint32_t>(cpu_threads);
+  ET_LOG(
+      Info, "Resetting threadpool with num threads = %d", num_performant_cores);
+  if (num_performant_cores > 0) {
+    ::executorch::extension::threadpool::get_threadpool()
+        ->_unsafe_reset_threadpool(num_performant_cores);
+  }
+#endif
+
+  std::unique_ptr<::tokenizers::Tokenizer> tokenizer =
+      ::executorch::extension::llm::load_tokenizer(tokenizer_path);
+  if (tokenizer == nullptr) {
+    ET_LOG(Error, "Failed to load tokenizer from: %s", tokenizer_path);
+    return 1;
+  }
+
+  // Create text LLM runner
+  std::unique_ptr<::executorch::extension::llm::TextLLMRunner> runner =
+      ::executorch::extension::llm::create_text_llm_runner(
+          model_path, std::move(tokenizer));
+
+  if (runner == nullptr) {
+    ET_LOG(Error, "Failed to create text LLM runner");
+    return 1;
+  }
+
+  // Load runner
+  auto load_error = runner->load();
+  if (load_error != ::executorch::runtime::Error::Ok) {
+    ET_LOG(Error, "Failed to load text LLM runner");
+    return 1;
+  }
+
+  // Format prompt with Gemma3 chat template
+  std::string formatted_prompt = std::string("<start_of_turn>user\n") +
+      std::string(prompt) + std::string("<end_of_turn>\n<start_of_turn>model\n");
+
+  ::executorch::extension::llm::GenerationConfig config;
+  config.max_new_tokens = max_new_tokens;
+  config.temperature = temperature;
+
+  // Run warmup if requested
+  if (warmup) {
+    ET_LOG(Info, "Running warmup...");
+    auto warmup_error = runner->warmup(formatted_prompt, max_new_tokens);
+    if (warmup_error != ::executorch::runtime::Error::Ok) {
+      ET_LOG(Error, "Failed to run warmup");
+      return 1;
+    }
+    runner->reset();
+  }
+
+  ET_LOG(Info, "Generating response...");
+
+  // Note: TextLLMRunner::generate() already handles printing tokens and stats
+  // internally, so we don't need to pass callbacks for printing
+  auto error = runner->generate(formatted_prompt, config);
+
+  if (error != ::executorch::runtime::Error::Ok) {
+    ET_LOG(Error, "Failed to generate with text LLM runner\n");
+    return 1;
+  }
+
+  return 0;
+}