format and llama tuning
This commit is contained in:
@@ -12,22 +12,30 @@ in
|
||||
|
||||
{
|
||||
environment.systemPackages = [ pkgs.unstable.ollama ];
|
||||
services.llama-cpp = {
|
||||
enable = true;
|
||||
host = "0.0.0.0";
|
||||
port = 11111;
|
||||
package = pkgs.unstable.llama-cpp-vulkan;
|
||||
openFirewall = true;
|
||||
model = "/var/lib/llama/models/Qwen3.5-35B-A3B-UD-Q2_K_XL.gguf" ;
|
||||
extraFlags = [
|
||||
"-c" "32000"
|
||||
"-ngl" "41" # techincally entire qwen3.5
|
||||
"--image-min-tokens" "1024"
|
||||
"--image-max-tokens" "2048"
|
||||
#"--hf-repo" "unsloth/Qwen3.5-35B-A3B-GGUF:Q2_K_L"
|
||||
"--mmproj" "/var/lib/llama/models/mmproj-F16.gguf"
|
||||
"-ctk" "q4_0" "-ctv" "q4_0" # quantisize kv cache.
|
||||
"--no-mmap"
|
||||
];
|
||||
};
|
||||
services.llama-cpp = {
|
||||
enable = true;
|
||||
host = "0.0.0.0";
|
||||
port = 11111;
|
||||
package = pkgs.unstable.llama-cpp-vulkan;
|
||||
openFirewall = true;
|
||||
model = "/var/lib/llama/models/Qwen3.5-35B-A3B-UD-Q2_K_XL.gguf";
|
||||
extraFlags = [
|
||||
"-c"
|
||||
"32000"
|
||||
"-ngl"
|
||||
"41" # techincally entire qwen3.5
|
||||
"--image-min-tokens"
|
||||
"1024"
|
||||
"--image-max-tokens"
|
||||
"2048"
|
||||
#"--hf-repo" "unsloth/Qwen3.5-35B-A3B-GGUF:Q2_K_L"
|
||||
"--mmproj"
|
||||
"/var/lib/llama/models/mmproj-F16.gguf"
|
||||
"-ctk"
|
||||
"q4_0"
|
||||
"-ctv"
|
||||
"q4_0" # quantisize kv cache.
|
||||
"--no-mmap"
|
||||
];
|
||||
};
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user