diff --git a/common/arg.cpp b/common/arg.cpp index 5fbc9022c02..43909265b3b 100644 --- a/common/arg.cpp +++ b/common/arg.cpp @@ -1111,7 +1111,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex [](common_params & params, int value) { params.cpuparams.n_threads = value; if (params.cpuparams.n_threads <= 0) { - params.cpuparams.n_threads = std::thread::hardware_concurrency(); + params.cpuparams.n_threads = cpu_get_num_math(); } } ).set_env("LLAMA_ARG_THREADS")); @@ -1121,7 +1121,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex [](common_params & params, int value) { params.cpuparams_batch.n_threads = value; if (params.cpuparams_batch.n_threads <= 0) { - params.cpuparams_batch.n_threads = std::thread::hardware_concurrency(); + params.cpuparams_batch.n_threads = cpu_get_num_math(); } } )); @@ -3216,7 +3216,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex [](common_params & params, int value) { params.speculative.cpuparams.n_threads = value; if (params.speculative.cpuparams.n_threads <= 0) { - params.speculative.cpuparams.n_threads = std::thread::hardware_concurrency(); + params.speculative.cpuparams.n_threads = cpu_get_num_math(); } } ).set_examples({LLAMA_EXAMPLE_SPECULATIVE, LLAMA_EXAMPLE_SERVER})); @@ -3226,7 +3226,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex [](common_params & params, int value) { params.speculative.cpuparams_batch.n_threads = value; if (params.speculative.cpuparams_batch.n_threads <= 0) { - params.speculative.cpuparams_batch.n_threads = std::thread::hardware_concurrency(); + params.speculative.cpuparams_batch.n_threads = cpu_get_num_math(); } } ).set_examples({LLAMA_EXAMPLE_SPECULATIVE, LLAMA_EXAMPLE_SERVER}));