From 3b82c3d3e7c1bfe4b1dfad06cf28c7d70c04b946 Mon Sep 17 00:00:00 2001 From: Jake Chavis Date: Wed, 4 Feb 2026 23:51:44 -0500 Subject: [PATCH] Refactoring to use new llama_put_adapter_loras --- common/common.cpp | 13 ++++++++----- include/llama.h | 3 +++ src/llama-context.cpp | 38 ++++++++++++++++++++++++++++++++++++++ src/llama-context.h | 4 ++++ 4 files changed, 53 insertions(+), 5 deletions(-) diff --git a/common/common.cpp b/common/common.cpp index 3aa396127ce..2f4cab1a86a 100644 --- a/common/common.cpp +++ b/common/common.cpp @@ -1344,12 +1344,15 @@ std::string get_model_endpoint() { } void common_set_adapter_lora(struct llama_context * ctx, std::vector & lora) { - llama_clear_adapter_lora(ctx); - for (auto & la : lora) { - if (la.scale != 0.0f) { - llama_set_adapter_lora(ctx, la.ptr, la.scale); - } + std::vector loras; + std::vector scales; + + for (auto & la: lora) { + loras.push_back(la.ptr); + scales.push_back(la.scale); } + + llama_put_adapter_loras(ctx, loras.size(), loras.data(), scales.data()); } struct llama_model_params common_model_params_to_llama(common_params & params) { diff --git a/include/llama.h b/include/llama.h index bf4e28a8be1..50cd77b7875 100644 --- a/include/llama.h +++ b/include/llama.h @@ -672,6 +672,9 @@ extern "C" { // Remove all LoRA adapters from given context LLAMA_API void llama_clear_adapter_lora(struct llama_context * ctx); + // Set LoRa adapters on the context. Will only modify if the adapters currently in context are different. + LLAMA_API void llama_put_adapter_loras(struct llama_context * ctx, size_t num_adapters, struct llama_adapter_lora ** adapters, float * scales); + // Apply a loaded control vector to a llama_context, or if data is NULL, clear // the currently loaded vector. // n_embd should be the size of a single layer's control, and data should point diff --git a/src/llama-context.cpp b/src/llama-context.cpp index 203852d0f12..07e4d87d395 100644 --- a/src/llama-context.cpp +++ b/src/llama-context.cpp @@ -1093,6 +1093,40 @@ bool llama_context::rm_adapter_lora( return false; } +void llama_context::put_adapter_loras(size_t num_adapters, llama_adapter_lora ** adapters, float * scales) { + LLAMA_LOG_DEBUG("%s: adapters = %p\n", __func__, (void *) adapters); + + if (are_adapter_loras_same(num_adapters, adapters, scales)) { + return; + } + + clear_adapter_lora(); + + for (size_t i = 0; i < num_adapters; i ++) { + if (scales[i] != 0.0f) { + set_adapter_lora(adapters[i], scales[i]); + } + } +} + +bool llama_context::are_adapter_loras_same(size_t num_adapters, llama_adapter_lora ** adapters, float * scales) { + LLAMA_LOG_DEBUG("%s: adapters = %p\n", __func__, (void *) adapters); + + if (num_adapters != loras.size()) { + return false; + } + + for (size_t i = 0; i < num_adapters; i ++) { + auto it = loras.find(adapters[i]); + + if (it == loras.end() || it->second != scales[i]) { + return false; + } + } + + return true; +} + void llama_context::clear_adapter_lora() { LLAMA_LOG_DEBUG("%s: call\n", __func__); @@ -3243,6 +3277,10 @@ void llama_clear_adapter_lora(llama_context * ctx) { ctx->clear_adapter_lora(); } +void llama_put_adapter_loras(llama_context * ctx, size_t num_adapters, llama_adapter_lora ** adapters, float * scales) { + ctx->put_adapter_loras(num_adapters, adapters, scales); +} + int32_t llama_apply_adapter_cvec( llama_context * ctx, const float * data, diff --git a/src/llama-context.h b/src/llama-context.h index 8e71cdd1dc5..adcb79208f8 100644 --- a/src/llama-context.h +++ b/src/llama-context.h @@ -111,6 +111,10 @@ struct llama_context { bool rm_adapter_lora( llama_adapter_lora * adapter); + void put_adapter_loras(size_t num_adapters, llama_adapter_lora ** adapters, float * scales); + + bool are_adapter_loras_same(size_t num_adapters, llama_adapter_lora ** adapters, float * scales); + void clear_adapter_lora(); bool apply_adapter_cvec(