redid llama-swap

This commit is contained in:
2026-03-02 15:51:40 +01:00
parent ca06437580
commit a2e0263bc0
2 changed files with 103 additions and 1 deletions
+1 -1
View File
@@ -38,7 +38,7 @@
../../modules/miniflux.nix
#../../modules/ollama.nix # replaced by llama-cpp + llama-swap
#../../modules/openwebui.nix # using llama-cpp built-in UI instead
../../modules/llama-cpp.nix
../../modules/llama-swap.nix
../../modules/librechat.nix
../../modules/immich.nix
+102
View File
@@ -0,0 +1,102 @@
{
config,
pkgs,
lib,
...
}:
{
environment.systemPackages = [ unstable.llama-cpp-vulkan ];
services.llama-swap = {
enable = true;
port = 11111;
openFirewall = true;
settings =
let
llama-server = lib.getExe unstable.llama-cpp-vulkan "llama-server";
in
{
healthCheckTimeout = 180;
startPort = 12000;
globalTTL = 600;
logLevel = "info";
macros = {
ctx = 32768;
ngl = 99;
quant = "Q4_K_M";
kv_type_k = "q4_0";
kv_type_v = "q4_0";
kv_cache = "-ctk ${kv_type_k} -ctv ${kv_type_v}";
hf_repo = "";
hf_param = "--hf-repo ${hf_repo}:${quant}";
llama-base = ''
${llama-server}
--port ${PORT}
--host 0.0.0.0
--ctx-size ${ctx}
-ngl ${ngl}
${kv_cache}
${hf_param}
'';
};
models = {
"qwen3.5-35b-a3b" = {
macros = {
ngl = 40;
hf_repo = "unsloth/Qwen3.5-35B-A3B-GGUF";
quant = "Q2_K_L";
};
cmd = ''
${llama-base}
'';
aliases = [ "qwen3.5" ];
ttl = 900;
};
"ministal-3-8b-reasonning" = {
macros = {
hf_repo = "mistralai/Ministral-3-8B-Reasoning-2512-GGUF";
};
cmd = ''
${llama-base}
'';
aliases = [ "ministral" ];
ttl = 900;
};
};
peers = {
openrouter = {
proxy = "https://openrouter.ai/api";
apiKey = "\${env.OPENROUTER_API_KEY}";
models = [
"minimax/minimax-m2.5"
"z-ai/glm-5"
"qwen/qwen3-coder-next"
"moonshotai/kimi-k2.5"
];
filters = {
stripParams = "temperature, top_p";
setParams = {
provider = {
data_collection = "deny";
zdr = true;
};
};
};
};
};
};
};
}