From 948ab7a699fbb03d13b65c78d0113021a2fd336d Mon Sep 17 00:00:00 2001 From: Wonjae Jang Date: Tue, 11 Mar 2025 16:01:39 +0900 Subject: [PATCH 1/2] fix file sorting --- powerinfer-py/powerinfer/export_split.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/powerinfer-py/powerinfer/export_split.py b/powerinfer-py/powerinfer/export_split.py index 7f230d8c..e3a940e5 100644 --- a/powerinfer-py/powerinfer/export_split.py +++ b/powerinfer-py/powerinfer/export_split.py @@ -18,7 +18,10 @@ def load_activation_weights(models_base: Path): # But for now, let's assume it is a plain directory of activation_{0, ... , n_layers - 1}.pt *_, files = next(os.walk(models_base)) activation_files = [f for f in files if re.match(r"activation_\d+.pt", f)] - activation_files.sort() + + layer_num = np.array([int(re.sub(f'[^0-9]', '', f)) for f in activation_files]) + idx = np.argsort(layer_num) + activation_files = [activation_files[i] for i in idx] return [torch.load(models_base / f) for f in activation_files] def append_gpu_idx(gguf: GGUFWriter, i_layer: int, activation, select_count) -> None: From 41bcf67b0ed14e9f67111d59a34d67afc43e313f Mon Sep 17 00:00:00 2001 From: Wonjae Jang Date: Wed, 16 Apr 2025 15:18:38 +0900 Subject: [PATCH 2/2] goodies --- build.sh | 8 ++++++++ llama.cpp | 3 +++ scripts/pg19_firstbook_128.txt | 1 + 3 files changed, 12 insertions(+) create mode 100755 build.sh create mode 100644 scripts/pg19_firstbook_128.txt diff --git a/build.sh b/build.sh new file mode 100755 index 00000000..ad772258 --- /dev/null +++ b/build.sh @@ -0,0 +1,8 @@ +#!/bin/bash +#rm -rf build +cmake -S . -B build -DLLAMA_CUBLAS=ON -DLLAMA_GGML_PERF=ON #-DLLAMA_RUN_WARMUP=OFF +cmake --build build --config Release + +# if DLLAMA_GGML_PERF=ON +# -> avg exec time of operator in prefill & decode stage. +# -> ratio of active neurons located in the CPU diff --git a/llama.cpp b/llama.cpp index ac52908a..442fd9b1 100644 --- a/llama.cpp +++ b/llama.cpp @@ -2863,6 +2863,9 @@ struct llama_gpu_split_loader { } } + ggml_tensor * up = model->layers[0].ffn_up; + printf("\n>>> %ld, %ld\n", up->ne[0], up->ne[1]); + const int64_t t_mlp_us = ggml_time_us() - t_start_mlp_us; LLAMA_LOG_INFO(" done (%.2f ms)\n", t_mlp_us / 1000.0); diff --git a/scripts/pg19_firstbook_128.txt b/scripts/pg19_firstbook_128.txt new file mode 100644 index 00000000..73a9a08d --- /dev/null +++ b/scripts/pg19_firstbook_128.txt @@ -0,0 +1 @@ +Half-way down the Rue Saint-Denis, almost at the corner of the Rue du Petit-Lion, there stood formerly one of those delightful houses which enable historians to reconstruct old Paris by analogy. The threatening walls of this tumbledown abode seemed to have been decorated with hieroglyphics. For what other name could the passer-by give to the Xs and Vs which the horizontal or diagonal timbers traced on the front, outlined by little parallel cracks in the plaster? It was evident that every beam quivered in its mortices at the passing of the lightest vehicle. This venerable structure was crowned by a \ No newline at end of file