diff --git a/hosts/galadriel/configuration.nix b/hosts/galadriel/configuration.nix index c7be67d..cdb2761 100644 --- a/hosts/galadriel/configuration.nix +++ b/hosts/galadriel/configuration.nix @@ -38,7 +38,7 @@ ../../modules/miniflux.nix #../../modules/ollama.nix # replaced by llama-cpp + llama-swap #../../modules/openwebui.nix # using llama-cpp built-in UI instead - ../../modules/llama-cpp.nix + ../../modules/llama-swap.nix ../../modules/librechat.nix ../../modules/immich.nix diff --git a/modules/llama-swap.nix b/modules/llama-swap.nix new file mode 100644 index 0000000..c8060fd --- /dev/null +++ b/modules/llama-swap.nix @@ -0,0 +1,102 @@ +{ + config, + pkgs, + lib, + ... +}: +{ + environment.systemPackages = [ unstable.llama-cpp-vulkan ]; + + services.llama-swap = { + enable = true; + port = 11111; + openFirewall = true; + + settings = + let + llama-server = lib.getExe unstable.llama-cpp-vulkan "llama-server"; + in + { + healthCheckTimeout = 180; + startPort = 12000; + globalTTL = 600; + logLevel = "info"; + + macros = { + ctx = 32768; + + ngl = 99; + + quant = "Q4_K_M"; + + kv_type_k = "q4_0"; + kv_type_v = "q4_0"; + kv_cache = "-ctk ${kv_type_k} -ctv ${kv_type_v}"; + + hf_repo = ""; + hf_param = "--hf-repo ${hf_repo}:${quant}"; + + llama-base = '' + ${llama-server} + --port ${PORT} + --host 0.0.0.0 + --ctx-size ${ctx} + -ngl ${ngl} + ${kv_cache} + ${hf_param} + ''; + }; + + models = { + + "qwen3.5-35b-a3b" = { + macros = { + ngl = 40; + hf_repo = "unsloth/Qwen3.5-35B-A3B-GGUF"; + quant = "Q2_K_L"; + }; + cmd = '' + ${llama-base} + ''; + aliases = [ "qwen3.5" ]; + ttl = 900; + }; + + "ministal-3-8b-reasonning" = { + macros = { + hf_repo = "mistralai/Ministral-3-8B-Reasoning-2512-GGUF"; + }; + cmd = '' + ${llama-base} + ''; + aliases = [ "ministral" ]; + ttl = 900; + }; + + }; + + peers = { + openrouter = { + proxy = "https://openrouter.ai/api"; + apiKey = "\${env.OPENROUTER_API_KEY}"; + models = [ + "minimax/minimax-m2.5" + "z-ai/glm-5" + "qwen/qwen3-coder-next" + "moonshotai/kimi-k2.5" + ]; + filters = { + stripParams = "temperature, top_p"; + setParams = { + provider = { + data_collection = "deny"; + zdr = true; + }; + }; + }; + }; + }; + + }; + }; +}