From 83fdef416efb0b4af4df6fd3df74fef16fa3ce2a Mon Sep 17 00:00:00 2001 From: Adrian Gunnar Lauterer Date: Mon, 23 Mar 2026 22:19:15 +0100 Subject: [PATCH] format and llama tuning --- home/niri.nix | 3 +- hosts/galadriel/configuration.nix | 2 +- hosts/galadriel/hardware-configuration.nix | 5 +-- hosts/legolas/configuration.nix | 3 +- modules/docling.nix | 2 +- modules/fcitx5.nix | 6 +-- modules/librechat.nix | 22 +++++------ modules/llama-cpp.nix | 44 +++++++++++++--------- modules/llama-swap.nix | 41 +++++++++++--------- modules/xdg.nix | 27 +++++++------ 10 files changed, 84 insertions(+), 71 deletions(-) diff --git a/home/niri.nix b/home/niri.nix index 6acb7fd..fe67424 100644 --- a/home/niri.nix +++ b/home/niri.nix @@ -241,7 +241,8 @@ in "--server" ]; } - { ## Uses systemd unit instead. + { + # # Uses systemd unit instead. argv = [ "noctalia-shell" ]; diff --git a/hosts/galadriel/configuration.nix b/hosts/galadriel/configuration.nix index cdb2761..4815628 100644 --- a/hosts/galadriel/configuration.nix +++ b/hosts/galadriel/configuration.nix @@ -37,7 +37,7 @@ ../../modules/mealie.nix ../../modules/miniflux.nix #../../modules/ollama.nix # replaced by llama-cpp + llama-swap - #../../modules/openwebui.nix # using llama-cpp built-in UI instead + ../../modules/openwebui.nix # using llama-cpp built-in UI instead ../../modules/llama-swap.nix ../../modules/librechat.nix ../../modules/immich.nix diff --git a/hosts/galadriel/hardware-configuration.nix b/hosts/galadriel/hardware-configuration.nix index f90b10f..aecdb59 100644 --- a/hosts/galadriel/hardware-configuration.nix +++ b/hosts/galadriel/hardware-configuration.nix @@ -27,7 +27,7 @@ boot.initrd.kernelModules = [ ]; boot.kernelModules = [ "kvm-amd" ]; boot.extraModulePackages = [ ]; - boot.kernelParams = [ + boot.kernelParams = [ "xe.force_probe=e212" "xe.vram_force_mmapable=1" "transparent_hugepage=always" @@ -38,7 +38,6 @@ ACTION=="add", SUBSYSTEM=="drm", KERNEL=="card*", ATTR{device/tile0/gt0/engines/rcs0/job_timeout_ms}="100000" ''; - hardware.enableRedistributableFirmware = true; hardware.firmware = [ pkgs.linux-firmware ]; @@ -51,7 +50,7 @@ extraPackages = with pkgs; [ vpl-gpu-rt - #hardware decode and opencl + #hardware decode and opencl intel-media-driver # LIBVA_DRIVER_NAME=iHD (for HD Graphics starting Broadwell (2014) and newer) intel-vaapi-driver # LIBVA_DRIVER_NAME=i965 (older but works better for Firefox/Chromium) libvdpau-va-gl diff --git a/hosts/legolas/configuration.nix b/hosts/legolas/configuration.nix index 97e0535..32d57bb 100644 --- a/hosts/legolas/configuration.nix +++ b/hosts/legolas/configuration.nix @@ -67,10 +67,9 @@ enable = true; enableSSHSupport = true; }; - services.dbus.enable = true; - services.dbus.implementation = "broker"; + services.dbus.implementation = "broker"; services.desktopManager.gnome.enable = true; diff --git a/modules/docling.nix b/modules/docling.nix index de05ce6..efb061e 100644 --- a/modules/docling.nix +++ b/modules/docling.nix @@ -6,7 +6,7 @@ }: { services.docling-serve = { - enable = true; + enable = true; package = pkgs.unstable.docling-serve; port = 5001; host = "127.0.0.1"; diff --git a/modules/fcitx5.nix b/modules/fcitx5.nix index d0e697f..098b3d8 100644 --- a/modules/fcitx5.nix +++ b/modules/fcitx5.nix @@ -50,7 +50,7 @@ # Ensure fcitx5 starts with the session services.xserver.desktopManager.runXdgAutostartIfNone = true; -# environment.sessionVariables = { -# GTK_IM_MODULE = lib.mkForce ""; -# }; + # environment.sessionVariables = { + # GTK_IM_MODULE = lib.mkForce ""; + # }; } diff --git a/modules/librechat.nix b/modules/librechat.nix index 8a9deea..6c2331a 100644 --- a/modules/librechat.nix +++ b/modules/librechat.nix @@ -6,9 +6,7 @@ let in { - - sops.secrets."librechat/environmentFile" = {}; - + sops.secrets."librechat/environmentFile" = { }; # Enable MongoDB services.mongodb = { @@ -24,10 +22,13 @@ in enable = true; description = "LibreChat server"; - + # **Native systemd dependency declarations** requires = [ "mongodb.service" ]; - after = [ "network.target" "mongodb.service" ]; + after = [ + "network.target" + "mongodb.service" + ]; serviceConfig = { EnvironmentFile = config.sops.secrets."librechat/environmentFile".path; @@ -37,10 +38,10 @@ in # ExecStart binds to package binary ExecStart = '' - ${pkgs.librechat}/bin/librechat-server \ - --host 0.0.0.0 \ - --port ${toString librechatPort} \ - --config /var/lib/librechat/config.yaml + ${pkgs.librechat}/bin/librechat-server \ + --host 0.0.0.0 \ + --port ${toString librechatPort} \ + --config /var/lib/librechat/config.yaml ''; WorkingDirectory = "/var/lib/librechat"; }; @@ -56,7 +57,7 @@ in }; users.users.librechat.group = "librechat"; - users.groups.librechat = {}; + users.groups.librechat = { }; systemd.tmpfiles.rules = [ "d /var/lib/librechat 0755 librechat librechat -" @@ -67,4 +68,3 @@ in 27017 ]; } - diff --git a/modules/llama-cpp.nix b/modules/llama-cpp.nix index 770aaa4..e777c92 100644 --- a/modules/llama-cpp.nix +++ b/modules/llama-cpp.nix @@ -12,22 +12,30 @@ in { environment.systemPackages = [ pkgs.unstable.ollama ]; - services.llama-cpp = { - enable = true; - host = "0.0.0.0"; - port = 11111; - package = pkgs.unstable.llama-cpp-vulkan; - openFirewall = true; - model = "/var/lib/llama/models/Qwen3.5-35B-A3B-UD-Q2_K_XL.gguf" ; - extraFlags = [ - "-c" "32000" - "-ngl" "41" # techincally entire qwen3.5 - "--image-min-tokens" "1024" - "--image-max-tokens" "2048" - #"--hf-repo" "unsloth/Qwen3.5-35B-A3B-GGUF:Q2_K_L" - "--mmproj" "/var/lib/llama/models/mmproj-F16.gguf" - "-ctk" "q4_0" "-ctv" "q4_0" # quantisize kv cache. - "--no-mmap" - ]; - }; + services.llama-cpp = { + enable = true; + host = "0.0.0.0"; + port = 11111; + package = pkgs.unstable.llama-cpp-vulkan; + openFirewall = true; + model = "/var/lib/llama/models/Qwen3.5-35B-A3B-UD-Q2_K_XL.gguf"; + extraFlags = [ + "-c" + "32000" + "-ngl" + "41" # techincally entire qwen3.5 + "--image-min-tokens" + "1024" + "--image-max-tokens" + "2048" + #"--hf-repo" "unsloth/Qwen3.5-35B-A3B-GGUF:Q2_K_L" + "--mmproj" + "/var/lib/llama/models/mmproj-F16.gguf" + "-ctk" + "q4_0" + "-ctv" + "q4_0" # quantisize kv cache. + "--no-mmap" + ]; + }; } diff --git a/modules/llama-swap.nix b/modules/llama-swap.nix index f3219b2..434238a 100644 --- a/modules/llama-swap.nix +++ b/modules/llama-swap.nix @@ -25,33 +25,38 @@ logLevel = "info"; macros = { - ctx = 32768; + ctx = 64000; ngl = 99; - kv_cache = "-ctk q4_0 -ctv q4_0"; + kv_cache = "-ctk iq4_nl -ctv iq4_nl"; hf_repo = ""; }; models = { "qwen3.5-35b-a3b" = { - cmd = "${llama-server} --port $\{PORT\} --host 0.0.0.0 --ctx-size $\{ctx\} -ngl $\{ngl\} $\{kv_cache\} --hf-repo $\{hf_repo\} --no-mmap --image-max-tokens 2048 --image-min-tokens 512 --mmproj /var/cache/llama-swap/llama.cpp/unsloth_Qwen3.5-35B-A3B-GGUF_mmproj-F16.gguf"; + cmd = "${llama-server} --port $\{PORT\} --host 0.0.0.0 --ctx-size $\{ctx\} -ngl $\{ngl\} $\{kv_cache\} --hf-repo $\{hf_repo\} --image-max-tokens 1024 --chat-template-kwargs '{\"enable_thinking\":false}'"; aliases = [ "qwen3.5" ]; ttl = 1800; macros = { - hf_repo = "unsloth/Qwen3.5-35B-A3B-GGUF:UD-Q2_K_XL"; - ngl = 40; - ctx = 30000; + hf_repo = "unsloth/Qwen3.5-35B-A3B-GGUF:UD-IQ3_XXS"; }; }; "qwen3.5-9b" = { - cmd = "${llama-server} --port $\{PORT\} --host 0.0.0.0 --ctx-size $\{ctx\} -ngl $\{ngl\} --hf-repo $\{hf_repo\} --no-mmap --image-max-tokens 2048 --image-min-tokens 512 --mmproj-url https://huggingface.co/unsloth/Qwen3.5-9B-GGUF/resolve/main/mmproj-F16.gguf"; + cmd = "${llama-server} --port $\{PORT\} --host 0.0.0.0 --ctx-size $\{ctx\} -ngl $\{ngl\} --hf-repo $\{hf_repo\} --no-mmap --image-max-tokens 1024 --image-min-tokens 512 --chat-template-kwargs '{\"enable_thinking\":true}'"; ttl = 900; - macros.hf_repo = "unsloth/Qwen3.5-9B-GGUF:UD-Q4_K_XL"; + macros = { + hf_repo = "unsloth/Qwen3.5-9B-GGUF:UD-Q4_K_XL"; + ctx = 128000; + }; + }; "qwen3.5-2b" = { - cmd = "${llama-server} --port $\{PORT\} --host 0.0.0.0 --ctx-size $\{ctx\} -ngl $\{ngl\} --hf-repo $\{hf_repo\} --image-max-tokens 1024 --image-min-tokens 512"; + cmd = "${llama-server} --port $\{PORT\} --host 0.0.0.0 --ctx-size $\{ctx\} -ngl $\{ngl\} --hf-repo $\{hf_repo\} --image-max-tokens 1024"; ttl = 900; - macros.hf_repo = "unsloth/Qwen3.5-2B-GGUF:UD-Q8_K_XL"; - macros.ctx = 64000; + macros = { + hf_repo = "unsloth/Qwen3.5-2B-GGUF:UD-Q8_K_XL"; + ctx = 128000; + }; + }; "ministal-3-8b-reasonning" = { cmd = "${llama-server} --port $\{PORT\} --host 0.0.0.0 --ctx-size $\{ctx\} -ngl $\{ngl\} --hf-repo $\{hf_repo\}"; @@ -66,8 +71,11 @@ macros.hf_repo = "mistralai/Ministral-3-3B-Instruct-2512-GGUF"; }; "minicpm-o-4_5" = { - cmd = "${llama-server} --port $\{PORT\} --host 0.0.0.0 --ctx-size $\{ctx\} -ngl $\{ngl\} --hf-repo $\{hf_repo\}"; - aliases = [ "openbmb/MiniCPM-o-4_5-gguf" "minicpm" ]; + cmd = "${llama-server} --port $\{PORT\} --host 0.0.0.0 --ctx-size $\{ctx\} -ngl $\{ngl\} --hf-repo $\{hf_repo\} --mmproj-url https://huggingface.co/openbmb/MiniCPM-o-4_5-gguf/resolve/main/vision/MiniCPM-o-4_5-vision-F16.gguf"; + aliases = [ + "openbmb/MiniCPM-o-4_5-gguf" + "minicpm" + ]; ttl = 900; macros.hf_repo = "openbmb/MiniCPM-o-4_5-gguf"; }; @@ -108,10 +116,9 @@ "HOME=/var/lib/llama-swap" "XDG_CACHE_HOME=/var/cache/llama-swap" "MESA_SHADER_CACHE_DIR=/var/cache/llama-swap/mesa" - "MESA_SHADER_CACHE_MAX_SIZE=1G" - - "GGML_VULKAN_MAX_NODES=16" - "GGML_VK_RELAXED_SHAPES=0" + #"MESA_SHADER_CACHE_MAX_SIZE=1G" + #"GGML_VULKAN_MAX_NODES=16" + #"GGML_VK_RELAXED_SHAPES=0" ]; }; } diff --git a/modules/xdg.nix b/modules/xdg.nix index 6ee1ec1..f70bb59 100644 --- a/modules/xdg.nix +++ b/modules/xdg.nix @@ -34,21 +34,20 @@ # ]; #}; - portal = { - enable = true; - xdgOpenUsePortal = true; - config.common.default = [ "gnome"]; - extraPortals = [ - pkgs.xdg-desktop-portal - pkgs.xdg-desktop-portal-gtk - pkgs.xdg-desktop-portal-gnome - ]; - configPackages = with pkgs; [ - gnome-session - niri - ]; - }; + enable = true; + xdgOpenUsePortal = true; + config.common.default = [ "gnome" ]; + extraPortals = [ + pkgs.xdg-desktop-portal + pkgs.xdg-desktop-portal-gtk + pkgs.xdg-desktop-portal-gnome + ]; + configPackages = with pkgs; [ + gnome-session + niri + ]; + }; # Enable autostart functionality (launch apps on login) autostart.enable = true;