remove ollama its bad

This commit is contained in:
Your Name
2026-03-01 22:40:31 +01:00
parent 1139d767b1
commit 3b671fe34e
3 changed files with 169 additions and 2 deletions
+4 -2
View File
@@ -36,8 +36,10 @@
../../modules/qbittorrent.nix
../../modules/mealie.nix
../../modules/miniflux.nix
../../modules/ollama.nix
../../modules/openwebui.nix
#../../modules/ollama.nix # replaced by llama-cpp + llama-swap
#../../modules/openwebui.nix # using llama-cpp built-in UI instead
../../modules/llama-cpp.nix
../../modules/llama-swap.nix
../../modules/librechat.nix
../../modules/immich.nix
+91
View File
@@ -0,0 +1,91 @@
{
config,
pkgs,
lib,
...
}:
let
modelDir = "/var/lib/llama-cpp/models";
# llama-cpp with Vulkan support for Intel Arc
llama-cpp-vulkan = pkgs.llama-cpp.override {
vulkanSupport = true;
cudaSupport = false;
rocmSupport = false;
openclSupport = false;
blasSupport = true;
};
llama-server = lib.getExe' llama-cpp-vulkan "llama-server";
# Model definitions: name -> { url, filename }
models = {
"Ministral-3-8B-Reasoning-Q4_K_M" = {
url = "https://huggingface.co/mistralai/Ministral-3-8B-Reasoning-2512-GGUF/resolve/main/Ministral-3-8B-Reasoning-2512-Q4_K_M.gguf";
filename = "Ministral-3-8B-Reasoning-2512-Q4_K_M.gguf";
};
"Qwen3.5-35B-A3B-UD-Q3_K_M" = {
url = "https://huggingface.co/unsloth/Qwen3.5-35B-A3B-GGUF/resolve/main/Qwen3.5-35B-A3B-UD-Q3_K_M.gguf";
filename = "Qwen3.5-35B-A3B-UD-Q3_K_M.gguf";
};
"Qwen3.5-27B-UD-Q4_K_XL" = {
url = "https://huggingface.co/unsloth/Qwen3.5-27B-GGUF/resolve/main/Qwen3.5-27B-UD-Q4_K_XL.gguf";
filename = "Qwen3.5-27B-UD-Q4_K_XL.gguf";
};
"LFM2-24B-A2B-Q4_K_M" = {
url = "https://huggingface.co/LiquidAI/LFM2-24B-A2B-GGUF/resolve/main/LFM2-24B-A2B-Q4_K_M.gguf";
filename = "LFM2-24B-A2B-Q4_K_M.gguf";
};
"Nanbeige4-3B-Thinking-Q4_K_M" = {
url = "https://huggingface.co/bartowski/Nanbeige_Nanbeige4-3B-Thinking-2511-GGUF/resolve/main/Nanbeige_Nanbeige4-3B-Thinking-2511-Q4_K_M.gguf";
filename = "Nanbeige_Nanbeige4-3B-Thinking-2511-Q4_K_M.gguf";
};
};
# Generate a systemd oneshot service per model that downloads if missing
downloadServices = lib.mapAttrs' (
name: model:
lib.nameValuePair "llama-cpp-download-${name}" {
description = "Download GGUF model: ${name}";
after = [ "network-online.target" ];
wants = [ "network-online.target" ];
wantedBy = [ "multi-user.target" ];
serviceConfig = {
Type = "oneshot";
RemainAfterExit = true;
User = "llama-cpp";
Group = "llama-cpp";
ExecStart = pkgs.writeShellScript "download-${name}" ''
MODEL_PATH="${modelDir}/${model.filename}"
if [ ! -f "$MODEL_PATH" ]; then
echo "Downloading ${name}..."
${lib.getExe pkgs.curl} -L -o "$MODEL_PATH.tmp" "${model.url}"
mv "$MODEL_PATH.tmp" "$MODEL_PATH"
echo "Download complete: ${name}"
else
echo "Model already exists: ${name}"
fi
'';
};
}
) models;
in
{
environment.systemPackages = [ llama-cpp-vulkan ];
users.users.llama-cpp = {
isSystemUser = true;
group = "llama-cpp";
home = "/var/lib/llama-cpp";
description = "llama-cpp service user";
};
users.groups.llama-cpp = { };
systemd.tmpfiles.rules = [
"d ${modelDir} 0755 llama-cpp llama-cpp - -"
];
systemd.services = downloadServices;
}
+74
View File
@@ -0,0 +1,74 @@
{
config,
pkgs,
lib,
...
}:
let
modelDir = "/var/lib/llama-cpp/models";
# llama-cpp with Vulkan for Intel Arc
llama-cpp-vulkan = pkgs.llama-cpp.override {
vulkanSupport = true;
cudaSupport = false;
rocmSupport = false;
openclSupport = false;
blasSupport = true;
};
llama-server = lib.getExe' llama-cpp-vulkan "llama-server";
# Common flags for all models
# --no-webui is NOT set so the llama-cpp UI is available per-model
commonFlags = port: "-ngl 99 --port \${PORT} --host 127.0.0.1";
in
{
services.llama-swap = {
enable = true;
port = 11111;
openFirewall = true;
settings = {
healthCheckTimeout = 120;
models = {
"ministral-3-8b-reasoning" = {
cmd = "${llama-server} --port \${PORT} --host 127.0.0.1 -ngl 99 -m ${modelDir}/Ministral-3-8B-Reasoning-2512-Q4_K_M.gguf -c 32768";
aliases = [
"ministral-3"
"ministral"
];
};
"qwen3.5-35b-a3b" = {
cmd = "${llama-server} --port \${PORT} --host 127.0.0.1 -ngl 99 -m ${modelDir}/Qwen3.5-35B-A3B-UD-Q3_K_M.gguf -c 32768";
aliases = [
"qwen3.5-35b"
"qwen-moe"
];
};
"qwen3.5-27b" = {
cmd = "${llama-server} --port \${PORT} --host 127.0.0.1 -ngl 99 -m ${modelDir}/Qwen3.5-27B-UD-Q4_K_XL.gguf -c 16384";
aliases = [
"qwen3.5"
"qwen-27b"
];
};
"lfm2-24b-a2b" = {
cmd = "${llama-server} --port \${PORT} --host 127.0.0.1 -ngl 99 -m ${modelDir}/LFM2-24B-A2B-Q4_K_M.gguf -c 32768";
aliases = [
"lfm2"
"liquid"
];
};
"nanbeige4-3b-thinking" = {
cmd = "${llama-server} --port \${PORT} --host 127.0.0.1 -ngl 99 -m ${modelDir}/Nanbeige_Nanbeige4-3B-Thinking-2511-Q4_K_M.gguf -c 32768";
aliases = [
"nanbeige"
"nanbeige4"
];
};
};
};
};
}