42 lines
785 B
Nix
42 lines
785 B
Nix
{
|
|
config,
|
|
pkgs,
|
|
lib,
|
|
unstable,
|
|
...
|
|
}:
|
|
|
|
let
|
|
hostname = config.networking.hostName;
|
|
in
|
|
|
|
{
|
|
environment.systemPackages = [ pkgs.unstable.ollama ];
|
|
services.llama-cpp = {
|
|
enable = true;
|
|
host = "0.0.0.0";
|
|
port = 11111;
|
|
package = pkgs.unstable.llama-cpp-vulkan;
|
|
openFirewall = true;
|
|
model = "/var/lib/llama/models/Qwen3.5-35B-A3B-UD-Q2_K_XL.gguf";
|
|
extraFlags = [
|
|
"-c"
|
|
"32000"
|
|
"-ngl"
|
|
"41" # techincally entire qwen3.5
|
|
"--image-min-tokens"
|
|
"1024"
|
|
"--image-max-tokens"
|
|
"2048"
|
|
#"--hf-repo" "unsloth/Qwen3.5-35B-A3B-GGUF:Q2_K_L"
|
|
"--mmproj"
|
|
"/var/lib/llama/models/mmproj-F16.gguf"
|
|
"-ctk"
|
|
"q4_0"
|
|
"-ctv"
|
|
"q4_0" # quantisize kv cache.
|
|
"--no-mmap"
|
|
];
|
|
};
|
|
}
|