From 6629a93cdbb4847169ec87d7d97e9a780e5ed0d3 Mon Sep 17 00:00:00 2001
From: wwwisman <120352666+wisman-tccr@users.noreply.github.com>
Date: Sun, 27 Jul 2025 17:11:14 +0800
Subject: [PATCH 1/4] Update README.md

---
 smallthinker/README.md | 33 +++++++++++++++++++++++++--------
 1 file changed, 25 insertions(+), 8 deletions(-)

diff --git a/smallthinker/README.md b/smallthinker/README.md
index f7e43d5b..4b93e2ff 100644
--- a/smallthinker/README.md
+++ b/smallthinker/README.md
@@ -5,14 +5,22 @@
 
 ## Demo
 
+
+https://github.com/user-attachments/assets/cefd466e-3b1f-47a9-8dc3-f1cf5119045e
+
+
 ## Speed
 ### SmallThinker 21B 
-| Model                               | Memory(GiB)         | i9 14900 | 1+13 8ge4 | rk3588 (16G) | Raspberry PI 5 |
+| Model                         
+
+https://github.com/user-attachments/assets/37079e94-599b-4e7f-8000-0c095ebe0d59
+
+      | Memory(GiB)         | i9 14900 | 1+13 8ge4 | rk3588 (16G) | Raspberry PI 5 |
 |--------------------------------------|---------------------|----------|-----------|--------------|----------------|
 | SmallThinker 21B+sparse              | 11.47               | 30.19    | 23.03     | 10.84        | 6.61           |
-| SmallThinker 21B+sparse +limited memory | 84                | limit 8G | 20.30     | 15.50        | 8.56           |
+| SmallThinker 21B+sparse +limited memory | limit 8G         | 20.30     | 15.50        | 8.56     | -              |
 | Qwen3 30B A3B                        | 16.20               | 33.52    | 20.18     | 9.07         | -              |
-| Qwen3 30B A3Blimited memory          | 81.38               | limit 8G | 10.11     | 0.18         | 6.32           |
+| Qwen3 30B A3Blimited memory          | limit 8G            | 10.11     | 0.18         | 6.32     | -              |
 | Gemma 3n E2B                         | 1G, theoretically   | 36.88    | 27.06     | 12.50        | 6.66           |
 | Gemma 3n E4B                         | 2G, theoretically   | 21.93    | 16.58     | 7.37         | 4.01           |
 
@@ -31,12 +39,20 @@
 Note：i9 14900、1+13 8ge4 use 4 threads，others use the number of threads that  can achieve the maximum speed 
 
 ## Setup
-
-1. install clang-21 and mold：
+1. cd smallthinker before compiling
+```bash
+cd smallthinker
+```
+2. install clang-21 and mold：
 
 ```bash
 sudo apt install clang-21 mold
 ```
+3. init submodule：
+
+```bash
+git submodule update --init --recursive
+```
 
 ## Convert Model
 ```bash
@@ -140,11 +156,12 @@ python get_no_moe_weights_ffn.py /path/to/gguf_q4_0 /path/to/no_moe_gguf_q4_0
 ```bash
 EXPERT_BUNDLE_PATH=/path/to/bundle ./llama-cli -m /path/to/no_moe_gguf_q4_0 --no-cnv --temp 0.6 --top-k 20 --top-p 0.95 --samplers "temperature;top_k;top_p" -p "<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n<|im_start|>user\nCalculate the integral of f(x) = sin(x) from 0 to 3pi/4.<|im_end|>\n<|im_start|>assistant" -t 4 -n 256 -ub 4
 ```
-### LM Head Sparsity: 
-1. The 4B model uses a sparse lm_head which may lead to some loss in precision. If you want to disable it, change the condition at src/llama-model.cpp:7580 to false.But the speed is slower.
+### Note: 
+1. The models use a sparse lm_head which may lead to some loss in precision. If you want to disable it, change the condition at src/llama-model.cpp:7580 to false.But the speed is slower.
+2. It may require root privileges when running in Termux when run the Memory-Efficient Version.
 
 
 ## Acknowledgements
 
 We would like to thank the following projects:
-- [llama.cpp](https://github.com/ggml-org/llama.cpp)
\ No newline at end of file
+- [llama.cpp](https://github.com/ggml-org/llama.cpp)

From bf5b965df699c8fa4837cec4a19bdf631266793e Mon Sep 17 00:00:00 2001
From: wisman <2659530589@qq.com>
Date: Sun, 27 Jul 2025 09:28:51 +0000
Subject: [PATCH 2/4] update

---
 .gitmodules                                   | 21 +++++++++++++++++++
 smallthinker/ggml/src/ggml-kompute/kompute    |  1 +
 smallthinker/powerinfer/third_part/benchmark  |  1 +
 smallthinker/powerinfer/third_part/fmt        |  1 +
 smallthinker/powerinfer/third_part/googletest |  1 +
 smallthinker/powerinfer/third_part/libaio     |  1 +
 smallthinker/powerinfer/third_part/liburing   |  1 +
 smallthinker/powerinfer/third_part/perfetto   |  1 +
 8 files changed, 28 insertions(+)
 create mode 100644 .gitmodules
 create mode 160000 smallthinker/ggml/src/ggml-kompute/kompute
 create mode 160000 smallthinker/powerinfer/third_part/benchmark
 create mode 160000 smallthinker/powerinfer/third_part/fmt
 create mode 160000 smallthinker/powerinfer/third_part/googletest
 create mode 160000 smallthinker/powerinfer/third_part/libaio
 create mode 160000 smallthinker/powerinfer/third_part/liburing
 create mode 160000 smallthinker/powerinfer/third_part/perfetto

diff --git a/.gitmodules b/.gitmodules
new file mode 100644
index 00000000..599dbe44
--- /dev/null
+++ b/.gitmodules
@@ -0,0 +1,21 @@
+[submodule "smallthinker/ggml/src/ggml-kompute/kompute"]
+	path = smallthinker/ggml/src/ggml-kompute/kompute
+	url = https://github.com/nomic-ai/kompute.git
+[submodule "smallthinker/powerinfer/third_part/perfetto"]
+	path = smallthinker/powerinfer/third_part/perfetto
+	url = https://github.com/google/perfetto.git
+[submodule "smallthinker/powerinfer/third_part/benchmark"]
+	path = smallthinker/powerinfer/third_part/benchmark
+	url = https://github.com/google/benchmark.git
+[submodule "smallthinker/powerinfer/third_part/googletest"]
+	path = smallthinker/powerinfer/third_part/googletest
+	url = https://github.com/google/googletest.git
+[submodule "smallthinker/powerinfer/third_part/libaio"]
+	path = smallthinker/powerinfer/third_part/libaio
+	url = https://github.com/crossbuild/libaio.git
+[submodule "smallthinker/powerinfer/third_part/liburing"]
+	path = smallthinker/powerinfer/third_part/liburing
+	url = https://github.com/axboe/liburing.git
+[submodule "smallthinker/powerinfer/third_part/fmt"]
+	path = smallthinker/powerinfer/third_part/fmt
+	url = https://github.com/fmtlib/fmt.git
diff --git a/smallthinker/ggml/src/ggml-kompute/kompute b/smallthinker/ggml/src/ggml-kompute/kompute
new file mode 160000
index 00000000..7c20efa3
--- /dev/null
+++ b/smallthinker/ggml/src/ggml-kompute/kompute
@@ -0,0 +1 @@
+Subproject commit 7c20efa30bb53d08bf04f84e510275766ebe9923
diff --git a/smallthinker/powerinfer/third_part/benchmark b/smallthinker/powerinfer/third_part/benchmark
new file mode 160000
index 00000000..77c03fbc
--- /dev/null
+++ b/smallthinker/powerinfer/third_part/benchmark
@@ -0,0 +1 @@
+Subproject commit 77c03fbcdcb7f28cd1f65d0e222542ef08ffd277
diff --git a/smallthinker/powerinfer/third_part/fmt b/smallthinker/powerinfer/third_part/fmt
new file mode 160000
index 00000000..35dcc582
--- /dev/null
+++ b/smallthinker/powerinfer/third_part/fmt
@@ -0,0 +1 @@
+Subproject commit 35dcc58263d6b55419a5932bd6b0b3029a0a8c00
diff --git a/smallthinker/powerinfer/third_part/googletest b/smallthinker/powerinfer/third_part/googletest
new file mode 160000
index 00000000..32f9f4c8
--- /dev/null
+++ b/smallthinker/powerinfer/third_part/googletest
@@ -0,0 +1 @@
+Subproject commit 32f9f4c82afa4249af66b55278df15c16b3031ea
diff --git a/smallthinker/powerinfer/third_part/libaio b/smallthinker/powerinfer/third_part/libaio
new file mode 160000
index 00000000..5a546a83
--- /dev/null
+++ b/smallthinker/powerinfer/third_part/libaio
@@ -0,0 +1 @@
+Subproject commit 5a546a834c36070648158d19dd564762d59f8eb8
diff --git a/smallthinker/powerinfer/third_part/liburing b/smallthinker/powerinfer/third_part/liburing
new file mode 160000
index 00000000..f2b6fb85
--- /dev/null
+++ b/smallthinker/powerinfer/third_part/liburing
@@ -0,0 +1 @@
+Subproject commit f2b6fb85b79baf17f2c0ea24a357c652caa2d7ba
diff --git a/smallthinker/powerinfer/third_part/perfetto b/smallthinker/powerinfer/third_part/perfetto
new file mode 160000
index 00000000..967c5777
--- /dev/null
+++ b/smallthinker/powerinfer/third_part/perfetto
@@ -0,0 +1 @@
+Subproject commit 967c577748320170af142088787e64c36790d7b3

From cc25b21c9aa303c92affc7d4e6d8d7d60c03977c Mon Sep 17 00:00:00 2001
From: wwwisman <120352666+wisman-tccr@users.noreply.github.com>
Date: Sun, 27 Jul 2025 17:38:44 +0800
Subject: [PATCH 3/4] Update README.md

---
 smallthinker/README.md | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/smallthinker/README.md b/smallthinker/README.md
index 4b93e2ff..0fe9d06b 100644
--- a/smallthinker/README.md
+++ b/smallthinker/README.md
@@ -1,5 +1,5 @@
 ## Intro
-- SmallThinker is a family of on-device native Mixture-of-Experts (MoE) language models specially designed for local deployment, co-developed by the IPADS and School of AI at Shanghai Jiao Tong University and Zenergize AI. Designed from the ground up for resource-constrained environments, SmallThinker brings powerful, private, and low-latency AI directly to your personal devices, without relying on the cloud.
+- SmallThinker ([SmallThinker-21BA3B-Instruct](https://huggingface.co/PowerInfer/SmallThinker-21BA3B-Instruct) and [SmallThinker-4BA0.6B-Instruct](https://huggingface.co/PowerInfer/SmallThinker-4BA0.6B-Instruct)) is a family of on-device native Mixture-of-Experts (MoE) language models specially designed for local deployment, co-developed by the IPADS and School of AI at Shanghai Jiao Tong University and Zenergize AI. Designed from the ground up for resource-constrained environments, SmallThinker brings powerful, private, and low-latency AI directly to your personal devices, without relying on the cloud.
 
 - This inference framework is specifically optimized for sparse model inference to achieve faster speeds, leveraging the router's pre-selection mechanism to enable efficient inference even in memory-constrained scenarios.
 
@@ -11,11 +11,7 @@ https://github.com/user-attachments/assets/cefd466e-3b1f-47a9-8dc3-f1cf5119045e
 
 ## Speed
 ### SmallThinker 21B 
-| Model                         
-
-https://github.com/user-attachments/assets/37079e94-599b-4e7f-8000-0c095ebe0d59
-
-      | Memory(GiB)         | i9 14900 | 1+13 8ge4 | rk3588 (16G) | Raspberry PI 5 |
+| Model                            | Memory(GiB)         | i9 14900 | 1+13 8ge4 | rk3588 (16G) | Raspberry PI 5 |
 |--------------------------------------|---------------------|----------|-----------|--------------|----------------|
 | SmallThinker 21B+sparse              | 11.47               | 30.19    | 23.03     | 10.84        | 6.61           |
 | SmallThinker 21B+sparse +limited memory | limit 8G         | 20.30     | 15.50        | 8.56     | -              |

From 4eda77147730923a1c770649991ea88be153c6e3 Mon Sep 17 00:00:00 2001
From: wwwisman <120352666+wisman-tccr@users.noreply.github.com>
Date: Sun, 27 Jul 2025 17:40:29 +0800
Subject: [PATCH 4/4] Update README.md

---
 smallthinker/README.md | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/smallthinker/README.md b/smallthinker/README.md
index 0fe9d06b..d868c4c2 100644
--- a/smallthinker/README.md
+++ b/smallthinker/README.md
@@ -35,21 +35,22 @@ https://github.com/user-attachments/assets/cefd466e-3b1f-47a9-8dc3-f1cf5119045e
 Note：i9 14900、1+13 8ge4 use 4 threads，others use the number of threads that  can achieve the maximum speed 
 
 ## Setup
-1. cd smallthinker before compiling
+1. init submodule：
+
 ```bash
-cd smallthinker
+git submodule update --init --recursive
 ```
 2. install clang-21 and mold：
 
 ```bash
 sudo apt install clang-21 mold
 ```
-3. init submodule：
-
+3. cd smallthinker before compiling
 ```bash
-git submodule update --init --recursive
+cd smallthinker
 ```
 
+
 ## Convert Model
 ```bash
 python3 convert_hf_to_gguf.py /path/to/safetensors_model --outtype f16 --outfile /path/to/gguf_fp16 --transpose-down all