From 62cedaa51cb71bd7d336df1ff2192c6e6d3e70dc Mon Sep 17 00:00:00 2001 From: Adrian G L Date: Mon, 2 Mar 2026 15:51:40 +0100 Subject: [PATCH] :redid llama-swap --- hosts/galadriel/configuration.nix | 2 +- modules/llama-swap.nix | 92 +++++++++++++++++++++++++++++++ 2 files changed, 93 insertions(+), 1 deletion(-) create mode 100644 modules/llama-swap.nix diff --git a/hosts/galadriel/configuration.nix b/hosts/galadriel/configuration.nix index c7be67d..cdb2761 100644 --- a/hosts/galadriel/configuration.nix +++ b/hosts/galadriel/configuration.nix @@ -38,7 +38,7 @@ ../../modules/miniflux.nix #../../modules/ollama.nix # replaced by llama-cpp + llama-swap #../../modules/openwebui.nix # using llama-cpp built-in UI instead - ../../modules/llama-cpp.nix + ../../modules/llama-swap.nix ../../modules/librechat.nix ../../modules/immich.nix diff --git a/modules/llama-swap.nix b/modules/llama-swap.nix new file mode 100644 index 0000000..fdd07b7 --- /dev/null +++ b/modules/llama-swap.nix @@ -0,0 +1,92 @@ +{ + config, + pkgs, + lib, + ... +}: +{ + environment.systemPackages = [ pkgs.unstable.llama-cpp-vulkan ]; + + services.llama-swap = { + enable = true; + port = 8085; + openFirewall = true; + + settings = + let + #llama-server = "${lib.getExe pkgs.unstable.llama-cpp-vulkan "llama-server"}"; + llama-server = lib.getExe' pkgs.unstable.llama-cpp-vulkan "llama-server"; + in + { + healthCheckTimeout = 180; + startPort = 12000; + globalTTL = 600; + logLevel = "info"; + + macros = { + ctx = 32768; + + ngl = 99; + + quant = "Q4_K_M"; + + kv_type_k = "q4_0"; + kv_type_v = "q4_0"; + kv_cache = "-ctk \${kv_type_k} -ctv \${kv_type_v}"; + + hf_repo = ""; + hf_param = "--hf-repo \${hf_repo}:\${quant}"; + + llama-base = "${llama-server} --port \${PORT} --host 0.0.0.0 --ctx-size \${ctx} -ngl \${ngl} \${kv_cache} \${hf_param}"; + }; + + models = { + + "qwen3.5-35b-a3b" = { + macros = { + ngl = 40; + hf_repo = "unsloth/Qwen3.5-35B-A3B-GGUF"; + quant = "Q2_K_L"; + }; + cmd = "\${llama-base}"; + aliases = [ "qwen3.5" ]; + ttl = 900; + }; + + "ministal-3-8b-reasonning" = { + macros = { + ngl = 99; + hf_repo = "mistralai/Ministral-3-8B-Reasoning-2512-GGUF"; + }; + cmd = "\${llama-base}"; + aliases = [ "ministral" ]; + ttl = 900; + }; + + }; + + peers = { + openrouter = { + proxy = "https://openrouter.ai/api"; + apiKey = "\${env.OPENROUTER_API_KEY}"; + models = [ + "minimax/minimax-m2.5" + "z-ai/glm-5" + "qwen/qwen3-coder-next" + "moonshotai/kimi-k2.5" + ]; + filters = { + stripParams = "temperature, top_p"; + setParams = { + provider = { + data_collection = "deny"; + zdr = true; + }; + }; + }; + }; + }; + + }; + }; +}