92 lines
2.8 KiB
Nix
92 lines
2.8 KiB
Nix
{
|
|
config,
|
|
pkgs,
|
|
lib,
|
|
...
|
|
}:
|
|
|
|
let
|
|
modelDir = "/var/lib/llama-cpp/models";
|
|
|
|
# llama-cpp with Vulkan support for Intel Arc
|
|
llama-cpp-vulkan = pkgs.llama-cpp.override {
|
|
vulkanSupport = true;
|
|
cudaSupport = false;
|
|
rocmSupport = false;
|
|
openclSupport = false;
|
|
blasSupport = true;
|
|
};
|
|
|
|
llama-server = lib.getExe' llama-cpp-vulkan "llama-server";
|
|
|
|
# Model definitions: name -> { url, filename }
|
|
models = {
|
|
"Ministral-3-8B-Reasoning-Q4_K_M" = {
|
|
url = "https://huggingface.co/mistralai/Ministral-3-8B-Reasoning-2512-GGUF/resolve/main/Ministral-3-8B-Reasoning-2512-Q4_K_M.gguf";
|
|
filename = "Ministral-3-8B-Reasoning-2512-Q4_K_M.gguf";
|
|
};
|
|
"Qwen3.5-35B-A3B-UD-Q3_K_M" = {
|
|
url = "https://huggingface.co/unsloth/Qwen3.5-35B-A3B-GGUF/resolve/main/Qwen3.5-35B-A3B-UD-Q3_K_M.gguf";
|
|
filename = "Qwen3.5-35B-A3B-UD-Q3_K_M.gguf";
|
|
};
|
|
"Qwen3.5-27B-UD-Q4_K_XL" = {
|
|
url = "https://huggingface.co/unsloth/Qwen3.5-27B-GGUF/resolve/main/Qwen3.5-27B-UD-Q4_K_XL.gguf";
|
|
filename = "Qwen3.5-27B-UD-Q4_K_XL.gguf";
|
|
};
|
|
"LFM2-24B-A2B-Q4_K_M" = {
|
|
url = "https://huggingface.co/LiquidAI/LFM2-24B-A2B-GGUF/resolve/main/LFM2-24B-A2B-Q4_K_M.gguf";
|
|
filename = "LFM2-24B-A2B-Q4_K_M.gguf";
|
|
};
|
|
"Nanbeige4-3B-Thinking-Q4_K_M" = {
|
|
url = "https://huggingface.co/bartowski/Nanbeige_Nanbeige4-3B-Thinking-2511-GGUF/resolve/main/Nanbeige_Nanbeige4-3B-Thinking-2511-Q4_K_M.gguf";
|
|
filename = "Nanbeige_Nanbeige4-3B-Thinking-2511-Q4_K_M.gguf";
|
|
};
|
|
};
|
|
|
|
# Generate a systemd oneshot service per model that downloads if missing
|
|
downloadServices = lib.mapAttrs' (
|
|
name: model:
|
|
lib.nameValuePair "llama-cpp-download-${name}" {
|
|
description = "Download GGUF model: ${name}";
|
|
after = [ "network-online.target" ];
|
|
wants = [ "network-online.target" ];
|
|
wantedBy = [ "multi-user.target" ];
|
|
|
|
serviceConfig = {
|
|
Type = "oneshot";
|
|
RemainAfterExit = true;
|
|
User = "llama-cpp";
|
|
Group = "llama-cpp";
|
|
ExecStart = pkgs.writeShellScript "download-${name}" ''
|
|
MODEL_PATH="${modelDir}/${model.filename}"
|
|
if [ ! -f "$MODEL_PATH" ]; then
|
|
echo "Downloading ${name}..."
|
|
${lib.getExe pkgs.curl} -L -o "$MODEL_PATH.tmp" "${model.url}"
|
|
mv "$MODEL_PATH.tmp" "$MODEL_PATH"
|
|
echo "Download complete: ${name}"
|
|
else
|
|
echo "Model already exists: ${name}"
|
|
fi
|
|
'';
|
|
};
|
|
}
|
|
) models;
|
|
in
|
|
{
|
|
environment.systemPackages = [ llama-cpp-vulkan ];
|
|
|
|
users.users.llama-cpp = {
|
|
isSystemUser = true;
|
|
group = "llama-cpp";
|
|
home = "/var/lib/llama-cpp";
|
|
description = "llama-cpp service user";
|
|
};
|
|
users.groups.llama-cpp = { };
|
|
|
|
systemd.tmpfiles.rules = [
|
|
"d ${modelDir} 0755 llama-cpp llama-cpp - -"
|
|
];
|
|
|
|
systemd.services = downloadServices;
|
|
}
|