some llama swap fixes

This commit is contained in:
2026-03-03 21:20:22 +01:00
parent c8f305a5bd
commit 16ca4c733b
2 changed files with 20 additions and 7 deletions
+2 -2
View File
@@ -34,8 +34,8 @@
];
services.udev.extraRules = ''
ACTION=="add", SUBSYSTEM=="drm", KERNEL=="card*", ATTR{device/tile0/gt0/engines/ccs0/job_timeout_ms}="10000"
ACTION=="add", SUBSYSTEM=="drm", KERNEL=="card*", ATTR{device/tile0/gt0/engines/rcs0/job_timeout_ms}="10000"
ACTION=="add", SUBSYSTEM=="drm", KERNEL=="card*", ATTR{device/tile0/gt0/engines/ccs0/job_timeout_ms}="100000"
ACTION=="add", SUBSYSTEM=="drm", KERNEL=="card*", ATTR{device/tile0/gt0/engines/rcs0/job_timeout_ms}="100000"
'';
+18 -5
View File
@@ -33,19 +33,26 @@
models = {
"qwen3.5-35b-a3b" = {
cmd = "${llama-server} --port $\{PORT\} --host 0.0.0.0 --ctx-size $\{ctx\} -ngl $\{ngl\} $\{kv_cache\} --hf-repo $\{hf_repo\} --no-mmap --image-max-tokens 2048 --image-min-tokens 1024";
cmd = "${llama-server} --port $\{PORT\} --host 0.0.0.0 --ctx-size $\{ctx\} -ngl $\{ngl\} $\{kv_cache\} --hf-repo $\{hf_repo\} --no-mmap --image-max-tokens 2048 --image-min-tokens 512 --mmproj /var/cache/llama-swap/llama.cpp/unsloth_Qwen3.5-35B-A3B-GGUF_mmproj-F16.gguf";
aliases = [ "qwen3.5" ];
ttl = 900;
ttl = 1800;
macros = {
hf_repo = "unsloth/Qwen3.5-35B-A3B-GGUF:UD-Q2_K_XL";
ngl = 38;
ngl = 40;
ctx = 30000;
};
};
"qwen3.5-9b" = {
cmd = "${llama-server} --port $\{PORT\} --host 0.0.0.0 --ctx-size $\{ctx\} -ngl $\{ngl\} --hf-repo $\{hf_repo\} --no-mmap --image-max-tokens 2048 --image-min-tokens 1024";
cmd = "${llama-server} --port $\{PORT\} --host 0.0.0.0 --ctx-size $\{ctx\} -ngl $\{ngl\} --hf-repo $\{hf_repo\} --no-mmap --image-max-tokens 2048 --image-min-tokens 512 --mmproj-url https://huggingface.co/unsloth/Qwen3.5-9B-GGUF/resolve/main/mmproj-F16.gguf";
ttl = 900;
macros.hf_repo = "unsloth/Qwen3.5-9B-GGUF:UD-Q4_K_XL";
};
"qwen3.5-2b" = {
cmd = "${llama-server} --port $\{PORT\} --host 0.0.0.0 --ctx-size $\{ctx\} -ngl $\{ngl\} --hf-repo $\{hf_repo\} --image-max-tokens 1024 --image-min-tokens 512";
ttl = 900;
macros.hf_repo = "unsloth/Qwen3.5-2B-GGUF:UD-Q8_K_XL";
macros.ctx = 64000;
};
"ministal-3-8b-reasonning" = {
cmd = "${llama-server} --port $\{PORT\} --host 0.0.0.0 --ctx-size $\{ctx\} -ngl $\{ngl\} --hf-repo $\{hf_repo\}";
aliases = [ "ministral3" ];
@@ -56,7 +63,13 @@
cmd = "${llama-server} --port $\{PORT\} --host 0.0.0.0 --ctx-size $\{ctx\} -ngl $\{ngl\} --hf-repo $\{hf_repo\}";
aliases = [ "ministral3-mini" ];
ttl = 900;
macros.hf_repo = "mistralai/Ministral-3-3B-2512-GGUF";
macros.hf_repo = "mistralai/Ministral-3-3B-Instruct-2512-GGUF";
};
"minicpm-o-4_5" = {
cmd = "${llama-server} --port $\{PORT\} --host 0.0.0.0 --ctx-size $\{ctx\} -ngl $\{ngl\} --hf-repo $\{hf_repo\}";
aliases = [ "openbmb/MiniCPM-o-4_5-gguf" "minicpm" ];
ttl = 900;
macros.hf_repo = "openbmb/MiniCPM-o-4_5-gguf";
};
};