From 16ca4c733b5dc22afb2eec7b6c7d9d750eaa50c5 Mon Sep 17 00:00:00 2001 From: Adrian Gunnar Lauterer Date: Tue, 3 Mar 2026 21:20:22 +0100 Subject: [PATCH] some llama swap fixes --- hosts/galadriel/hardware-configuration.nix | 4 ++-- modules/llama-swap.nix | 23 +++++++++++++++++----- 2 files changed, 20 insertions(+), 7 deletions(-) diff --git a/hosts/galadriel/hardware-configuration.nix b/hosts/galadriel/hardware-configuration.nix index 88b344b..f90b10f 100644 --- a/hosts/galadriel/hardware-configuration.nix +++ b/hosts/galadriel/hardware-configuration.nix @@ -34,8 +34,8 @@ ]; services.udev.extraRules = '' - ACTION=="add", SUBSYSTEM=="drm", KERNEL=="card*", ATTR{device/tile0/gt0/engines/ccs0/job_timeout_ms}="10000" - ACTION=="add", SUBSYSTEM=="drm", KERNEL=="card*", ATTR{device/tile0/gt0/engines/rcs0/job_timeout_ms}="10000" + ACTION=="add", SUBSYSTEM=="drm", KERNEL=="card*", ATTR{device/tile0/gt0/engines/ccs0/job_timeout_ms}="100000" + ACTION=="add", SUBSYSTEM=="drm", KERNEL=="card*", ATTR{device/tile0/gt0/engines/rcs0/job_timeout_ms}="100000" ''; diff --git a/modules/llama-swap.nix b/modules/llama-swap.nix index 3dadad0..f3219b2 100644 --- a/modules/llama-swap.nix +++ b/modules/llama-swap.nix @@ -33,19 +33,26 @@ models = { "qwen3.5-35b-a3b" = { - cmd = "${llama-server} --port $\{PORT\} --host 0.0.0.0 --ctx-size $\{ctx\} -ngl $\{ngl\} $\{kv_cache\} --hf-repo $\{hf_repo\} --no-mmap --image-max-tokens 2048 --image-min-tokens 1024"; + cmd = "${llama-server} --port $\{PORT\} --host 0.0.0.0 --ctx-size $\{ctx\} -ngl $\{ngl\} $\{kv_cache\} --hf-repo $\{hf_repo\} --no-mmap --image-max-tokens 2048 --image-min-tokens 512 --mmproj /var/cache/llama-swap/llama.cpp/unsloth_Qwen3.5-35B-A3B-GGUF_mmproj-F16.gguf"; aliases = [ "qwen3.5" ]; - ttl = 900; + ttl = 1800; macros = { hf_repo = "unsloth/Qwen3.5-35B-A3B-GGUF:UD-Q2_K_XL"; - ngl = 38; + ngl = 40; + ctx = 30000; }; }; "qwen3.5-9b" = { - cmd = "${llama-server} --port $\{PORT\} --host 0.0.0.0 --ctx-size $\{ctx\} -ngl $\{ngl\} --hf-repo $\{hf_repo\} --no-mmap --image-max-tokens 2048 --image-min-tokens 1024"; + cmd = "${llama-server} --port $\{PORT\} --host 0.0.0.0 --ctx-size $\{ctx\} -ngl $\{ngl\} --hf-repo $\{hf_repo\} --no-mmap --image-max-tokens 2048 --image-min-tokens 512 --mmproj-url https://huggingface.co/unsloth/Qwen3.5-9B-GGUF/resolve/main/mmproj-F16.gguf"; ttl = 900; macros.hf_repo = "unsloth/Qwen3.5-9B-GGUF:UD-Q4_K_XL"; }; + "qwen3.5-2b" = { + cmd = "${llama-server} --port $\{PORT\} --host 0.0.0.0 --ctx-size $\{ctx\} -ngl $\{ngl\} --hf-repo $\{hf_repo\} --image-max-tokens 1024 --image-min-tokens 512"; + ttl = 900; + macros.hf_repo = "unsloth/Qwen3.5-2B-GGUF:UD-Q8_K_XL"; + macros.ctx = 64000; + }; "ministal-3-8b-reasonning" = { cmd = "${llama-server} --port $\{PORT\} --host 0.0.0.0 --ctx-size $\{ctx\} -ngl $\{ngl\} --hf-repo $\{hf_repo\}"; aliases = [ "ministral3" ]; @@ -56,7 +63,13 @@ cmd = "${llama-server} --port $\{PORT\} --host 0.0.0.0 --ctx-size $\{ctx\} -ngl $\{ngl\} --hf-repo $\{hf_repo\}"; aliases = [ "ministral3-mini" ]; ttl = 900; - macros.hf_repo = "mistralai/Ministral-3-3B-2512-GGUF"; + macros.hf_repo = "mistralai/Ministral-3-3B-Instruct-2512-GGUF"; + }; + "minicpm-o-4_5" = { + cmd = "${llama-server} --port $\{PORT\} --host 0.0.0.0 --ctx-size $\{ctx\} -ngl $\{ngl\} --hf-repo $\{hf_repo\}"; + aliases = [ "openbmb/MiniCPM-o-4_5-gguf" "minicpm" ]; + ttl = 900; + macros.hf_repo = "openbmb/MiniCPM-o-4_5-gguf"; }; };