some llama swap fixes
This commit is contained in:
@@ -34,8 +34,8 @@
|
||||
];
|
||||
|
||||
services.udev.extraRules = ''
|
||||
ACTION=="add", SUBSYSTEM=="drm", KERNEL=="card*", ATTR{device/tile0/gt0/engines/ccs0/job_timeout_ms}="10000"
|
||||
ACTION=="add", SUBSYSTEM=="drm", KERNEL=="card*", ATTR{device/tile0/gt0/engines/rcs0/job_timeout_ms}="10000"
|
||||
ACTION=="add", SUBSYSTEM=="drm", KERNEL=="card*", ATTR{device/tile0/gt0/engines/ccs0/job_timeout_ms}="100000"
|
||||
ACTION=="add", SUBSYSTEM=="drm", KERNEL=="card*", ATTR{device/tile0/gt0/engines/rcs0/job_timeout_ms}="100000"
|
||||
'';
|
||||
|
||||
|
||||
|
||||
+18
-5
@@ -33,19 +33,26 @@
|
||||
|
||||
models = {
|
||||
"qwen3.5-35b-a3b" = {
|
||||
cmd = "${llama-server} --port $\{PORT\} --host 0.0.0.0 --ctx-size $\{ctx\} -ngl $\{ngl\} $\{kv_cache\} --hf-repo $\{hf_repo\} --no-mmap --image-max-tokens 2048 --image-min-tokens 1024";
|
||||
cmd = "${llama-server} --port $\{PORT\} --host 0.0.0.0 --ctx-size $\{ctx\} -ngl $\{ngl\} $\{kv_cache\} --hf-repo $\{hf_repo\} --no-mmap --image-max-tokens 2048 --image-min-tokens 512 --mmproj /var/cache/llama-swap/llama.cpp/unsloth_Qwen3.5-35B-A3B-GGUF_mmproj-F16.gguf";
|
||||
aliases = [ "qwen3.5" ];
|
||||
ttl = 900;
|
||||
ttl = 1800;
|
||||
macros = {
|
||||
hf_repo = "unsloth/Qwen3.5-35B-A3B-GGUF:UD-Q2_K_XL";
|
||||
ngl = 38;
|
||||
ngl = 40;
|
||||
ctx = 30000;
|
||||
};
|
||||
};
|
||||
"qwen3.5-9b" = {
|
||||
cmd = "${llama-server} --port $\{PORT\} --host 0.0.0.0 --ctx-size $\{ctx\} -ngl $\{ngl\} --hf-repo $\{hf_repo\} --no-mmap --image-max-tokens 2048 --image-min-tokens 1024";
|
||||
cmd = "${llama-server} --port $\{PORT\} --host 0.0.0.0 --ctx-size $\{ctx\} -ngl $\{ngl\} --hf-repo $\{hf_repo\} --no-mmap --image-max-tokens 2048 --image-min-tokens 512 --mmproj-url https://huggingface.co/unsloth/Qwen3.5-9B-GGUF/resolve/main/mmproj-F16.gguf";
|
||||
ttl = 900;
|
||||
macros.hf_repo = "unsloth/Qwen3.5-9B-GGUF:UD-Q4_K_XL";
|
||||
};
|
||||
"qwen3.5-2b" = {
|
||||
cmd = "${llama-server} --port $\{PORT\} --host 0.0.0.0 --ctx-size $\{ctx\} -ngl $\{ngl\} --hf-repo $\{hf_repo\} --image-max-tokens 1024 --image-min-tokens 512";
|
||||
ttl = 900;
|
||||
macros.hf_repo = "unsloth/Qwen3.5-2B-GGUF:UD-Q8_K_XL";
|
||||
macros.ctx = 64000;
|
||||
};
|
||||
"ministal-3-8b-reasonning" = {
|
||||
cmd = "${llama-server} --port $\{PORT\} --host 0.0.0.0 --ctx-size $\{ctx\} -ngl $\{ngl\} --hf-repo $\{hf_repo\}";
|
||||
aliases = [ "ministral3" ];
|
||||
@@ -56,7 +63,13 @@
|
||||
cmd = "${llama-server} --port $\{PORT\} --host 0.0.0.0 --ctx-size $\{ctx\} -ngl $\{ngl\} --hf-repo $\{hf_repo\}";
|
||||
aliases = [ "ministral3-mini" ];
|
||||
ttl = 900;
|
||||
macros.hf_repo = "mistralai/Ministral-3-3B-2512-GGUF";
|
||||
macros.hf_repo = "mistralai/Ministral-3-3B-Instruct-2512-GGUF";
|
||||
};
|
||||
"minicpm-o-4_5" = {
|
||||
cmd = "${llama-server} --port $\{PORT\} --host 0.0.0.0 --ctx-size $\{ctx\} -ngl $\{ngl\} --hf-repo $\{hf_repo\}";
|
||||
aliases = [ "openbmb/MiniCPM-o-4_5-gguf" "minicpm" ];
|
||||
ttl = 900;
|
||||
macros.hf_repo = "openbmb/MiniCPM-o-4_5-gguf";
|
||||
};
|
||||
};
|
||||
|
||||
|
||||
Reference in New Issue
Block a user