fix: llama swap add builtin tools to llama server

2026-05-19 18:01:40 +02:00
parent a96e711a0c
commit 01971d1f83
1 changed files with 9 additions and 17 deletions
@@ -45,14 +45,15 @@
          batch = "-b 1024 -ub 1024"; # default 512 512
          hf_repo = "";
          image-tokens = "--image-min-tokens 256 --image-max-tokens 1536";
-          qwen35-thinking = "--chat-template-kwargs '{\"enable_thinking\":true}'";
-          qwen35-no-thinking = "--chat-template-kwargs '{\"enable_thinking\":false}'";
+          tools = "--tools 'all'";
+          thinking = "--reasoning on";
+          no-thinking = "--reasoning off";
        };

        models = {

          "qwen3.6-35b-a3b" = {
-            cmd = "${llama-server} --port $\{PORT\} --host 0.0.0.0 --ctx-size $\{ctx\} -ngl $\{ngl\} $\{kv_cache\} $\{batch\} --hf-repo $\{hf_repo\} $\{image-tokens\} $\{qwen35-no-thinking\} ";
+            cmd = "${llama-server} --port $\{PORT\} --host 0.0.0.0 --ctx-size $\{ctx\} -ngl $\{ngl\} $\{kv_cache\} $\{batch\} --hf-repo $\{hf_repo\} $\{image-tokens\} $\{no-thinking\} $\{tools\}";
            aliases = [ "qwen3.6" ];
            ttl = 1800;
            macros = {
@@ -61,7 +62,7 @@
            };
          };
          "qwen3.5-9b" = {
-            cmd = "${llama-server} --port $\{PORT\} --host 0.0.0.0 --ctx-size $\{ctx\} $\{kv_cache\} -ngl $\{ngl\} --hf-repo $\{hf_repo\} $\{batch\} $\{image-tokens\} $\{qwen35-thinking\} ";
+            cmd = "${llama-server} --port $\{PORT\} --host 0.0.0.0 --ctx-size $\{ctx\} $\{kv_cache\} -ngl $\{ngl\} --hf-repo $\{hf_repo\} $\{batch\} $\{image-tokens\} $\{thinking\} $\{tools\}";
            ttl = 900;
            macros = {
              hf_repo = "unsloth/Qwen3.5-9B-GGUF:UD-Q4_K_XL";
@@ -70,7 +71,7 @@
          };

          "gemma4" = {
-            cmd = "${llama-server} --port $\{PORT\} --host 0.0.0.0 --ctx-size $\{ctx\} -fa 0 -ngl $\{ngl\} --hf-repo $\{hf_repo\}";
+            cmd = "${llama-server} --port $\{PORT\} --host 0.0.0.0 --ctx-size $\{ctx\} -fa 0 -ngl $\{ngl\} --hf-repo $\{hf_repo\}   $\{tools\}";
            ttl = 900;
            macros = {
              hf_repo = "unsloth/gemma-4-26B-A4B-it-GGUF:UD-IQ3_XXS";
@@ -79,7 +80,7 @@
          };

          "gemma4E4" = {
-            cmd = "${llama-server} --port $\{PORT\} --host 0.0.0.0 --ctx-size $\{ctx\}  $\{kv_cache\} -ngl $\{ngl\} --hf-repo $\{hf_repo\}";
+            cmd = "${llama-server} --port $\{PORT\} --host 0.0.0.0 --ctx-size $\{ctx\}  $\{kv_cache\} -ngl $\{ngl\} --hf-repo $\{hf_repo\} $\{tools\} ";
            ttl = 900;
            macros = {
              hf_repo = "unsloth/gemma-4-E4B-it-GGUF";
@@ -88,27 +89,18 @@
          };

          "ministal-3-8b-reasonning" = {
-            cmd = "${llama-server} --port $\{PORT\} --host 0.0.0.0 --ctx-size $\{ctx\} -ngl $\{ngl\} --hf-repo $\{hf_repo\}";
+            cmd = "${llama-server} --port $\{PORT\} --host 0.0.0.0 --ctx-size $\{ctx\} -ngl $\{ngl\} --hf-repo $\{hf_repo\} $\{tools\} ";
            aliases = [ "ministral3" ];
            ttl = 900;
            macros.hf_repo = "mistralai/Ministral-3-8B-Reasoning-2512-GGUF";
          };
          "ministal-3-3b" = {
-            cmd = "${llama-server} --port $\{PORT\} --host 0.0.0.0 --ctx-size $\{ctx\} -ngl $\{ngl\} --hf-repo $\{hf_repo\}";
+            cmd = "${llama-server} --port $\{PORT\} --host 0.0.0.0 --ctx-size $\{ctx\} -ngl $\{ngl\} --hf-repo $\{hf_repo\}  $\{tools\} ";
            aliases = [ "ministral3-mini" ];
            ttl = 900;
            macros.hf_repo = "mistralai/Ministral-3-3B-Instruct-2512-GGUF";
          };

-          "minicpm-o-4_5" = {
-            cmd = "${llama-server} --port $\{PORT\} --host 0.0.0.0 --ctx-size $\{ctx\} -ngl $\{ngl\} --hf-repo $\{hf_repo\} --mmproj-url https://huggingface.co/openbmb/MiniCPM-o-4_5-gguf/resolve/main/vision/MiniCPM-o-4_5-vision-F16.gguf";
-            aliases = [
-              "openbmb/MiniCPM-o-4_5-gguf"
-              "minicpm"
-            ];
-            ttl = 900;
-            macros.hf_repo = "openbmb/MiniCPM-o-4_5-gguf";
-          };
          "z-image-turbo" = {
            cmd = "${sd-server} --listen-port $\{PORT\} --diffusion-model $\{diffusion_model\} --vae $\{vae\} --llm $\{llm\} --offload-to-cpu --cfg-scale 1.0 --height 1024 --width 1024 --steps 4";
            checkEndpoint = "/";