{ config, pkgs, lib, ... }: let modelDir = "/var/lib/llama-cpp/models"; # llama-cpp with Vulkan support for Intel Arc llama-cpp-vulkan = pkgs.llama-cpp.override { vulkanSupport = true; cudaSupport = false; rocmSupport = false; openclSupport = false; blasSupport = true; }; llama-server = lib.getExe' llama-cpp-vulkan "llama-server"; # Model definitions: name -> { url, filename } models = { "Ministral-3-8B-Reasoning-Q4_K_M" = { url = "https://huggingface.co/mistralai/Ministral-3-8B-Reasoning-2512-GGUF/resolve/main/Ministral-3-8B-Reasoning-2512-Q4_K_M.gguf"; filename = "Ministral-3-8B-Reasoning-2512-Q4_K_M.gguf"; }; "Qwen3.5-35B-A3B-UD-Q3_K_M" = { url = "https://huggingface.co/unsloth/Qwen3.5-35B-A3B-GGUF/resolve/main/Qwen3.5-35B-A3B-UD-Q3_K_M.gguf"; filename = "Qwen3.5-35B-A3B-UD-Q3_K_M.gguf"; }; "Qwen3.5-27B-UD-Q4_K_XL" = { url = "https://huggingface.co/unsloth/Qwen3.5-27B-GGUF/resolve/main/Qwen3.5-27B-UD-Q4_K_XL.gguf"; filename = "Qwen3.5-27B-UD-Q4_K_XL.gguf"; }; "LFM2-24B-A2B-Q4_K_M" = { url = "https://huggingface.co/LiquidAI/LFM2-24B-A2B-GGUF/resolve/main/LFM2-24B-A2B-Q4_K_M.gguf"; filename = "LFM2-24B-A2B-Q4_K_M.gguf"; }; "Nanbeige4-3B-Thinking-Q4_K_M" = { url = "https://huggingface.co/bartowski/Nanbeige_Nanbeige4-3B-Thinking-2511-GGUF/resolve/main/Nanbeige_Nanbeige4-3B-Thinking-2511-Q4_K_M.gguf"; filename = "Nanbeige_Nanbeige4-3B-Thinking-2511-Q4_K_M.gguf"; }; }; # Generate a systemd oneshot service per model that downloads if missing downloadServices = lib.mapAttrs' ( name: model: lib.nameValuePair "llama-cpp-download-${name}" { description = "Download GGUF model: ${name}"; after = [ "network-online.target" ]; wants = [ "network-online.target" ]; wantedBy = [ "multi-user.target" ]; serviceConfig = { Type = "oneshot"; RemainAfterExit = true; User = "llama-cpp"; Group = "llama-cpp"; ExecStart = pkgs.writeShellScript "download-${name}" '' MODEL_PATH="${modelDir}/${model.filename}" if [ ! -f "$MODEL_PATH" ]; then echo "Downloading ${name}..." ${lib.getExe pkgs.curl} -L -o "$MODEL_PATH.tmp" "${model.url}" mv "$MODEL_PATH.tmp" "$MODEL_PATH" echo "Download complete: ${name}" else echo "Model already exists: ${name}" fi ''; }; } ) models; in { environment.systemPackages = [ llama-cpp-vulkan ]; users.users.llama-cpp = { isSystemUser = true; group = "llama-cpp"; home = "/var/lib/llama-cpp"; description = "llama-cpp service user"; }; users.groups.llama-cpp = { }; systemd.tmpfiles.rules = [ "d ${modelDir} 0755 llama-cpp llama-cpp - -" ]; systemd.services = downloadServices; }