format and llama tuning

This commit is contained in:
2026-03-23 22:19:15 +01:00
parent 8173b617e7
commit 83fdef416e
10 changed files with 84 additions and 71 deletions

View File

@@ -241,7 +241,8 @@ in
"--server"
];
}
{ ## Uses systemd unit instead.
{
# # Uses systemd unit instead.
argv = [
"noctalia-shell"
];

View File

@@ -37,7 +37,7 @@
../../modules/mealie.nix
../../modules/miniflux.nix
#../../modules/ollama.nix # replaced by llama-cpp + llama-swap
#../../modules/openwebui.nix # using llama-cpp built-in UI instead
../../modules/openwebui.nix # using llama-cpp built-in UI instead
../../modules/llama-swap.nix
../../modules/librechat.nix
../../modules/immich.nix

View File

@@ -27,7 +27,7 @@
boot.initrd.kernelModules = [ ];
boot.kernelModules = [ "kvm-amd" ];
boot.extraModulePackages = [ ];
boot.kernelParams = [
boot.kernelParams = [
"xe.force_probe=e212"
"xe.vram_force_mmapable=1"
"transparent_hugepage=always"
@@ -38,7 +38,6 @@
ACTION=="add", SUBSYSTEM=="drm", KERNEL=="card*", ATTR{device/tile0/gt0/engines/rcs0/job_timeout_ms}="100000"
'';
hardware.enableRedistributableFirmware = true;
hardware.firmware = [ pkgs.linux-firmware ];
@@ -51,7 +50,7 @@
extraPackages = with pkgs; [
vpl-gpu-rt
#hardware decode and opencl
#hardware decode and opencl
intel-media-driver # LIBVA_DRIVER_NAME=iHD (for HD Graphics starting Broadwell (2014) and newer)
intel-vaapi-driver # LIBVA_DRIVER_NAME=i965 (older but works better for Firefox/Chromium)
libvdpau-va-gl

View File

@@ -67,10 +67,9 @@
enable = true;
enableSSHSupport = true;
};
services.dbus.enable = true;
services.dbus.implementation = "broker";
services.dbus.implementation = "broker";
services.desktopManager.gnome.enable = true;

View File

@@ -6,7 +6,7 @@
}:
{
services.docling-serve = {
enable = true;
enable = true;
package = pkgs.unstable.docling-serve;
port = 5001;
host = "127.0.0.1";

View File

@@ -50,7 +50,7 @@
# Ensure fcitx5 starts with the session
services.xserver.desktopManager.runXdgAutostartIfNone = true;
# environment.sessionVariables = {
# GTK_IM_MODULE = lib.mkForce "";
# };
# environment.sessionVariables = {
# GTK_IM_MODULE = lib.mkForce "";
# };
}

View File

@@ -6,9 +6,7 @@ let
in
{
sops.secrets."librechat/environmentFile" = {};
sops.secrets."librechat/environmentFile" = { };
# Enable MongoDB
services.mongodb = {
@@ -24,10 +22,13 @@ in
enable = true;
description = "LibreChat server";
# **Native systemd dependency declarations**
requires = [ "mongodb.service" ];
after = [ "network.target" "mongodb.service" ];
after = [
"network.target"
"mongodb.service"
];
serviceConfig = {
EnvironmentFile = config.sops.secrets."librechat/environmentFile".path;
@@ -37,10 +38,10 @@ in
# ExecStart binds to package binary
ExecStart = ''
${pkgs.librechat}/bin/librechat-server \
--host 0.0.0.0 \
--port ${toString librechatPort} \
--config /var/lib/librechat/config.yaml
${pkgs.librechat}/bin/librechat-server \
--host 0.0.0.0 \
--port ${toString librechatPort} \
--config /var/lib/librechat/config.yaml
'';
WorkingDirectory = "/var/lib/librechat";
};
@@ -56,7 +57,7 @@ in
};
users.users.librechat.group = "librechat";
users.groups.librechat = {};
users.groups.librechat = { };
systemd.tmpfiles.rules = [
"d /var/lib/librechat 0755 librechat librechat -"
@@ -67,4 +68,3 @@ in
27017
];
}

View File

@@ -12,22 +12,30 @@ in
{
environment.systemPackages = [ pkgs.unstable.ollama ];
services.llama-cpp = {
enable = true;
host = "0.0.0.0";
port = 11111;
package = pkgs.unstable.llama-cpp-vulkan;
openFirewall = true;
model = "/var/lib/llama/models/Qwen3.5-35B-A3B-UD-Q2_K_XL.gguf" ;
extraFlags = [
"-c" "32000"
"-ngl" "41" # techincally entire qwen3.5
"--image-min-tokens" "1024"
"--image-max-tokens" "2048"
#"--hf-repo" "unsloth/Qwen3.5-35B-A3B-GGUF:Q2_K_L"
"--mmproj" "/var/lib/llama/models/mmproj-F16.gguf"
"-ctk" "q4_0" "-ctv" "q4_0" # quantisize kv cache.
"--no-mmap"
];
};
services.llama-cpp = {
enable = true;
host = "0.0.0.0";
port = 11111;
package = pkgs.unstable.llama-cpp-vulkan;
openFirewall = true;
model = "/var/lib/llama/models/Qwen3.5-35B-A3B-UD-Q2_K_XL.gguf";
extraFlags = [
"-c"
"32000"
"-ngl"
"41" # techincally entire qwen3.5
"--image-min-tokens"
"1024"
"--image-max-tokens"
"2048"
#"--hf-repo" "unsloth/Qwen3.5-35B-A3B-GGUF:Q2_K_L"
"--mmproj"
"/var/lib/llama/models/mmproj-F16.gguf"
"-ctk"
"q4_0"
"-ctv"
"q4_0" # quantisize kv cache.
"--no-mmap"
];
};
}

View File

@@ -25,33 +25,38 @@
logLevel = "info";
macros = {
ctx = 32768;
ctx = 64000;
ngl = 99;
kv_cache = "-ctk q4_0 -ctv q4_0";
kv_cache = "-ctk iq4_nl -ctv iq4_nl";
hf_repo = "";
};
models = {
"qwen3.5-35b-a3b" = {
cmd = "${llama-server} --port $\{PORT\} --host 0.0.0.0 --ctx-size $\{ctx\} -ngl $\{ngl\} $\{kv_cache\} --hf-repo $\{hf_repo\} --no-mmap --image-max-tokens 2048 --image-min-tokens 512 --mmproj /var/cache/llama-swap/llama.cpp/unsloth_Qwen3.5-35B-A3B-GGUF_mmproj-F16.gguf";
cmd = "${llama-server} --port $\{PORT\} --host 0.0.0.0 --ctx-size $\{ctx\} -ngl $\{ngl\} $\{kv_cache\} --hf-repo $\{hf_repo\} --image-max-tokens 1024 --chat-template-kwargs '{\"enable_thinking\":false}'";
aliases = [ "qwen3.5" ];
ttl = 1800;
macros = {
hf_repo = "unsloth/Qwen3.5-35B-A3B-GGUF:UD-Q2_K_XL";
ngl = 40;
ctx = 30000;
hf_repo = "unsloth/Qwen3.5-35B-A3B-GGUF:UD-IQ3_XXS";
};
};
"qwen3.5-9b" = {
cmd = "${llama-server} --port $\{PORT\} --host 0.0.0.0 --ctx-size $\{ctx\} -ngl $\{ngl\} --hf-repo $\{hf_repo\} --no-mmap --image-max-tokens 2048 --image-min-tokens 512 --mmproj-url https://huggingface.co/unsloth/Qwen3.5-9B-GGUF/resolve/main/mmproj-F16.gguf";
cmd = "${llama-server} --port $\{PORT\} --host 0.0.0.0 --ctx-size $\{ctx\} -ngl $\{ngl\} --hf-repo $\{hf_repo\} --no-mmap --image-max-tokens 1024 --image-min-tokens 512 --chat-template-kwargs '{\"enable_thinking\":true}'";
ttl = 900;
macros.hf_repo = "unsloth/Qwen3.5-9B-GGUF:UD-Q4_K_XL";
macros = {
hf_repo = "unsloth/Qwen3.5-9B-GGUF:UD-Q4_K_XL";
ctx = 128000;
};
};
"qwen3.5-2b" = {
cmd = "${llama-server} --port $\{PORT\} --host 0.0.0.0 --ctx-size $\{ctx\} -ngl $\{ngl\} --hf-repo $\{hf_repo\} --image-max-tokens 1024 --image-min-tokens 512";
cmd = "${llama-server} --port $\{PORT\} --host 0.0.0.0 --ctx-size $\{ctx\} -ngl $\{ngl\} --hf-repo $\{hf_repo\} --image-max-tokens 1024";
ttl = 900;
macros.hf_repo = "unsloth/Qwen3.5-2B-GGUF:UD-Q8_K_XL";
macros.ctx = 64000;
macros = {
hf_repo = "unsloth/Qwen3.5-2B-GGUF:UD-Q8_K_XL";
ctx = 128000;
};
};
"ministal-3-8b-reasonning" = {
cmd = "${llama-server} --port $\{PORT\} --host 0.0.0.0 --ctx-size $\{ctx\} -ngl $\{ngl\} --hf-repo $\{hf_repo\}";
@@ -66,8 +71,11 @@
macros.hf_repo = "mistralai/Ministral-3-3B-Instruct-2512-GGUF";
};
"minicpm-o-4_5" = {
cmd = "${llama-server} --port $\{PORT\} --host 0.0.0.0 --ctx-size $\{ctx\} -ngl $\{ngl\} --hf-repo $\{hf_repo\}";
aliases = [ "openbmb/MiniCPM-o-4_5-gguf" "minicpm" ];
cmd = "${llama-server} --port $\{PORT\} --host 0.0.0.0 --ctx-size $\{ctx\} -ngl $\{ngl\} --hf-repo $\{hf_repo\} --mmproj-url https://huggingface.co/openbmb/MiniCPM-o-4_5-gguf/resolve/main/vision/MiniCPM-o-4_5-vision-F16.gguf";
aliases = [
"openbmb/MiniCPM-o-4_5-gguf"
"minicpm"
];
ttl = 900;
macros.hf_repo = "openbmb/MiniCPM-o-4_5-gguf";
};
@@ -108,10 +116,9 @@
"HOME=/var/lib/llama-swap"
"XDG_CACHE_HOME=/var/cache/llama-swap"
"MESA_SHADER_CACHE_DIR=/var/cache/llama-swap/mesa"
"MESA_SHADER_CACHE_MAX_SIZE=1G"
"GGML_VULKAN_MAX_NODES=16"
"GGML_VK_RELAXED_SHAPES=0"
#"MESA_SHADER_CACHE_MAX_SIZE=1G"
#"GGML_VULKAN_MAX_NODES=16"
#"GGML_VK_RELAXED_SHAPES=0"
];
};
}

View File

@@ -34,21 +34,20 @@
# ];
#};
portal = {
enable = true;
xdgOpenUsePortal = true;
config.common.default = [ "gnome"];
extraPortals = [
pkgs.xdg-desktop-portal
pkgs.xdg-desktop-portal-gtk
pkgs.xdg-desktop-portal-gnome
];
configPackages = with pkgs; [
gnome-session
niri
];
};
enable = true;
xdgOpenUsePortal = true;
config.common.default = [ "gnome" ];
extraPortals = [
pkgs.xdg-desktop-portal
pkgs.xdg-desktop-portal-gtk
pkgs.xdg-desktop-portal-gnome
];
configPackages = with pkgs; [
gnome-session
niri
];
};
# Enable autostart functionality (launch apps on login)
autostart.enable = true;