diff --git a/modules/llama-swap.nix b/modules/llama-swap.nix index bc2c2c3..080de14 100644 --- a/modules/llama-swap.nix +++ b/modules/llama-swap.nix @@ -45,14 +45,15 @@ batch = "-b 1024 -ub 1024"; # default 512 512 hf_repo = ""; image-tokens = "--image-min-tokens 256 --image-max-tokens 1536"; - qwen35-thinking = "--chat-template-kwargs '{\"enable_thinking\":true}'"; - qwen35-no-thinking = "--chat-template-kwargs '{\"enable_thinking\":false}'"; + tools = "--tools 'all'"; + thinking = "--reasoning on"; + no-thinking = "--reasoning off"; }; models = { "qwen3.6-35b-a3b" = { - cmd = "${llama-server} --port $\{PORT\} --host 0.0.0.0 --ctx-size $\{ctx\} -ngl $\{ngl\} $\{kv_cache\} $\{batch\} --hf-repo $\{hf_repo\} $\{image-tokens\} $\{qwen35-no-thinking\} "; + cmd = "${llama-server} --port $\{PORT\} --host 0.0.0.0 --ctx-size $\{ctx\} -ngl $\{ngl\} $\{kv_cache\} $\{batch\} --hf-repo $\{hf_repo\} $\{image-tokens\} $\{no-thinking\} $\{tools\}"; aliases = [ "qwen3.6" ]; ttl = 1800; macros = { @@ -61,7 +62,7 @@ }; }; "qwen3.5-9b" = { - cmd = "${llama-server} --port $\{PORT\} --host 0.0.0.0 --ctx-size $\{ctx\} $\{kv_cache\} -ngl $\{ngl\} --hf-repo $\{hf_repo\} $\{batch\} $\{image-tokens\} $\{qwen35-thinking\} "; + cmd = "${llama-server} --port $\{PORT\} --host 0.0.0.0 --ctx-size $\{ctx\} $\{kv_cache\} -ngl $\{ngl\} --hf-repo $\{hf_repo\} $\{batch\} $\{image-tokens\} $\{thinking\} $\{tools\}"; ttl = 900; macros = { hf_repo = "unsloth/Qwen3.5-9B-GGUF:UD-Q4_K_XL"; @@ -70,7 +71,7 @@ }; "gemma4" = { - cmd = "${llama-server} --port $\{PORT\} --host 0.0.0.0 --ctx-size $\{ctx\} -fa 0 -ngl $\{ngl\} --hf-repo $\{hf_repo\}"; + cmd = "${llama-server} --port $\{PORT\} --host 0.0.0.0 --ctx-size $\{ctx\} -fa 0 -ngl $\{ngl\} --hf-repo $\{hf_repo\} $\{tools\}"; ttl = 900; macros = { hf_repo = "unsloth/gemma-4-26B-A4B-it-GGUF:UD-IQ3_XXS"; @@ -79,7 +80,7 @@ }; "gemma4E4" = { - cmd = "${llama-server} --port $\{PORT\} --host 0.0.0.0 --ctx-size $\{ctx\} $\{kv_cache\} -ngl $\{ngl\} --hf-repo $\{hf_repo\}"; + cmd = "${llama-server} --port $\{PORT\} --host 0.0.0.0 --ctx-size $\{ctx\} $\{kv_cache\} -ngl $\{ngl\} --hf-repo $\{hf_repo\} $\{tools\} "; ttl = 900; macros = { hf_repo = "unsloth/gemma-4-E4B-it-GGUF"; @@ -88,27 +89,18 @@ }; "ministal-3-8b-reasonning" = { - cmd = "${llama-server} --port $\{PORT\} --host 0.0.0.0 --ctx-size $\{ctx\} -ngl $\{ngl\} --hf-repo $\{hf_repo\}"; + cmd = "${llama-server} --port $\{PORT\} --host 0.0.0.0 --ctx-size $\{ctx\} -ngl $\{ngl\} --hf-repo $\{hf_repo\} $\{tools\} "; aliases = [ "ministral3" ]; ttl = 900; macros.hf_repo = "mistralai/Ministral-3-8B-Reasoning-2512-GGUF"; }; "ministal-3-3b" = { - cmd = "${llama-server} --port $\{PORT\} --host 0.0.0.0 --ctx-size $\{ctx\} -ngl $\{ngl\} --hf-repo $\{hf_repo\}"; + cmd = "${llama-server} --port $\{PORT\} --host 0.0.0.0 --ctx-size $\{ctx\} -ngl $\{ngl\} --hf-repo $\{hf_repo\} $\{tools\} "; aliases = [ "ministral3-mini" ]; ttl = 900; macros.hf_repo = "mistralai/Ministral-3-3B-Instruct-2512-GGUF"; }; - "minicpm-o-4_5" = { - cmd = "${llama-server} --port $\{PORT\} --host 0.0.0.0 --ctx-size $\{ctx\} -ngl $\{ngl\} --hf-repo $\{hf_repo\} --mmproj-url https://huggingface.co/openbmb/MiniCPM-o-4_5-gguf/resolve/main/vision/MiniCPM-o-4_5-vision-F16.gguf"; - aliases = [ - "openbmb/MiniCPM-o-4_5-gguf" - "minicpm" - ]; - ttl = 900; - macros.hf_repo = "openbmb/MiniCPM-o-4_5-gguf"; - }; "z-image-turbo" = { cmd = "${sd-server} --listen-port $\{PORT\} --diffusion-model $\{diffusion_model\} --vae $\{vae\} --llm $\{llm\} --offload-to-cpu --cfg-scale 1.0 --height 1024 --width 1024 --steps 4"; checkEndpoint = "/";