From 83fdef416efb0b4af4df6fd3df74fef16fa3ce2a Mon Sep 17 00:00:00 2001
From: Adrian Gunnar Lauterer <adrian@lauterer.it>
Date: Mon, 23 Mar 2026 22:19:15 +0100
Subject: [PATCH] format and llama tuning

---
 home/niri.nix                              |  3 +-
 hosts/galadriel/configuration.nix          |  2 +-
 hosts/galadriel/hardware-configuration.nix |  5 +--
 hosts/legolas/configuration.nix            |  3 +-
 modules/docling.nix                        |  2 +-
 modules/fcitx5.nix                         |  6 +--
 modules/librechat.nix                      | 22 +++++------
 modules/llama-cpp.nix                      | 44 +++++++++++++---------
 modules/llama-swap.nix                     | 41 +++++++++++---------
 modules/xdg.nix                            | 27 +++++++------
 10 files changed, 84 insertions(+), 71 deletions(-)

diff --git a/home/niri.nix b/home/niri.nix
index 6acb7fd..fe67424 100644
--- a/home/niri.nix
+++ b/home/niri.nix
@@ -241,7 +241,8 @@ in
             "--server"
           ];
         }
-        {  ## Uses systemd unit instead.
+        {
+          # # Uses systemd unit instead.
           argv = [
             "noctalia-shell"
           ];
diff --git a/hosts/galadriel/configuration.nix b/hosts/galadriel/configuration.nix
index cdb2761..4815628 100644
--- a/hosts/galadriel/configuration.nix
+++ b/hosts/galadriel/configuration.nix
@@ -37,7 +37,7 @@
     ../../modules/mealie.nix
     ../../modules/miniflux.nix
     #../../modules/ollama.nix     # replaced by llama-cpp + llama-swap
-    #../../modules/openwebui.nix  # using llama-cpp built-in UI instead
+    ../../modules/openwebui.nix # using llama-cpp built-in UI instead
     ../../modules/llama-swap.nix
     ../../modules/librechat.nix
     ../../modules/immich.nix
diff --git a/hosts/galadriel/hardware-configuration.nix b/hosts/galadriel/hardware-configuration.nix
index f90b10f..aecdb59 100644
--- a/hosts/galadriel/hardware-configuration.nix
+++ b/hosts/galadriel/hardware-configuration.nix
@@ -27,7 +27,7 @@
   boot.initrd.kernelModules = [ ];
   boot.kernelModules = [ "kvm-amd" ];
   boot.extraModulePackages = [ ];
-  boot.kernelParams = [ 
+  boot.kernelParams = [
     "xe.force_probe=e212"
     "xe.vram_force_mmapable=1"
     "transparent_hugepage=always"
@@ -38,7 +38,6 @@
     ACTION=="add", SUBSYSTEM=="drm", KERNEL=="card*", ATTR{device/tile0/gt0/engines/rcs0/job_timeout_ms}="100000"
   '';
 
-
   hardware.enableRedistributableFirmware = true;
   hardware.firmware = [ pkgs.linux-firmware ];
 
@@ -51,7 +50,7 @@
     extraPackages = with pkgs; [
       vpl-gpu-rt
 
-      #hardware decode and opencl 
+      #hardware decode and opencl
       intel-media-driver # LIBVA_DRIVER_NAME=iHD (for HD Graphics starting Broadwell (2014) and newer)
       intel-vaapi-driver # LIBVA_DRIVER_NAME=i965 (older but works better for Firefox/Chromium)
       libvdpau-va-gl
diff --git a/hosts/legolas/configuration.nix b/hosts/legolas/configuration.nix
index 97e0535..32d57bb 100644
--- a/hosts/legolas/configuration.nix
+++ b/hosts/legolas/configuration.nix
@@ -67,10 +67,9 @@
     enable = true;
     enableSSHSupport = true;
   };
-  
 
   services.dbus.enable = true;
-  services.dbus.implementation = "broker"; 
+  services.dbus.implementation = "broker";
 
   services.desktopManager.gnome.enable = true;
 
diff --git a/modules/docling.nix b/modules/docling.nix
index de05ce6..efb061e 100644
--- a/modules/docling.nix
+++ b/modules/docling.nix
@@ -6,7 +6,7 @@
 }:
 {
   services.docling-serve = {
-    enable = true; 
+    enable = true;
     package = pkgs.unstable.docling-serve;
     port = 5001;
     host = "127.0.0.1";
diff --git a/modules/fcitx5.nix b/modules/fcitx5.nix
index d0e697f..098b3d8 100644
--- a/modules/fcitx5.nix
+++ b/modules/fcitx5.nix
@@ -50,7 +50,7 @@
   # Ensure fcitx5 starts with the session
   services.xserver.desktopManager.runXdgAutostartIfNone = true;
 
-#  environment.sessionVariables = {
-#    GTK_IM_MODULE = lib.mkForce "";
-#  };
+  #  environment.sessionVariables = {
+  #    GTK_IM_MODULE = lib.mkForce "";
+  #  };
 }
diff --git a/modules/librechat.nix b/modules/librechat.nix
index 8a9deea..6c2331a 100644
--- a/modules/librechat.nix
+++ b/modules/librechat.nix
@@ -6,9 +6,7 @@ let
 in
 {
 
-
-  sops.secrets."librechat/environmentFile" = {};
-
+  sops.secrets."librechat/environmentFile" = { };
 
   # Enable MongoDB
   services.mongodb = {
@@ -24,10 +22,13 @@ in
     enable = true;
 
     description = "LibreChat server";
-    
+
     # **Native systemd dependency declarations**
     requires = [ "mongodb.service" ];
-    after    = [ "network.target" "mongodb.service" ];
+    after = [
+      "network.target"
+      "mongodb.service"
+    ];
 
     serviceConfig = {
       EnvironmentFile = config.sops.secrets."librechat/environmentFile".path;
@@ -37,10 +38,10 @@ in
 
       # ExecStart binds to package binary
       ExecStart = ''
-        ${pkgs.librechat}/bin/librechat-server \
-          --host 0.0.0.0 \
-          --port ${toString librechatPort} \
- 	  --config /var/lib/librechat/config.yaml 
+               ${pkgs.librechat}/bin/librechat-server \
+                 --host 0.0.0.0 \
+                 --port ${toString librechatPort} \
+        	  --config /var/lib/librechat/config.yaml 
       '';
       WorkingDirectory = "/var/lib/librechat";
     };
@@ -56,7 +57,7 @@ in
   };
 
   users.users.librechat.group = "librechat";
-  users.groups.librechat = {};
+  users.groups.librechat = { };
 
   systemd.tmpfiles.rules = [
     "d /var/lib/librechat 0755 librechat librechat -"
@@ -67,4 +68,3 @@ in
     27017
   ];
 }
-
diff --git a/modules/llama-cpp.nix b/modules/llama-cpp.nix
index 770aaa4..e777c92 100644
--- a/modules/llama-cpp.nix
+++ b/modules/llama-cpp.nix
@@ -12,22 +12,30 @@ in
 
 {
   environment.systemPackages = [ pkgs.unstable.ollama ];
-  services.llama-cpp = { 
-  	enable = true;
-  	host = "0.0.0.0";
-  	port = 11111;
-  	package = pkgs.unstable.llama-cpp-vulkan;
-  	openFirewall = true;
-  	model = "/var/lib/llama/models/Qwen3.5-35B-A3B-UD-Q2_K_XL.gguf" ;
-  	extraFlags = [
-		  "-c" "32000"
-		  "-ngl" "41" # techincally entire qwen3.5 
-                  "--image-min-tokens" "1024"
-		  "--image-max-tokens" "2048"
-                  #"--hf-repo" "unsloth/Qwen3.5-35B-A3B-GGUF:Q2_K_L"
-                  "--mmproj" "/var/lib/llama/models/mmproj-F16.gguf"
-                  "-ctk" "q4_0" "-ctv" "q4_0"  # quantisize kv cache.
-		  "--no-mmap"
-		];
-  }; 
+  services.llama-cpp = {
+    enable = true;
+    host = "0.0.0.0";
+    port = 11111;
+    package = pkgs.unstable.llama-cpp-vulkan;
+    openFirewall = true;
+    model = "/var/lib/llama/models/Qwen3.5-35B-A3B-UD-Q2_K_XL.gguf";
+    extraFlags = [
+      "-c"
+      "32000"
+      "-ngl"
+      "41" # techincally entire qwen3.5
+      "--image-min-tokens"
+      "1024"
+      "--image-max-tokens"
+      "2048"
+      #"--hf-repo" "unsloth/Qwen3.5-35B-A3B-GGUF:Q2_K_L"
+      "--mmproj"
+      "/var/lib/llama/models/mmproj-F16.gguf"
+      "-ctk"
+      "q4_0"
+      "-ctv"
+      "q4_0" # quantisize kv cache.
+      "--no-mmap"
+    ];
+  };
 }
diff --git a/modules/llama-swap.nix b/modules/llama-swap.nix
index f3219b2..434238a 100644
--- a/modules/llama-swap.nix
+++ b/modules/llama-swap.nix
@@ -25,33 +25,38 @@
         logLevel = "info";
 
         macros = {
-          ctx = 32768;
+          ctx = 64000;
           ngl = 99;
-          kv_cache = "-ctk q4_0 -ctv q4_0";
+          kv_cache = "-ctk iq4_nl -ctv iq4_nl";
           hf_repo = "";
         };
 
         models = {
           "qwen3.5-35b-a3b" = {
-            cmd = "${llama-server} --port $\{PORT\} --host 0.0.0.0 --ctx-size $\{ctx\} -ngl $\{ngl\} $\{kv_cache\} --hf-repo $\{hf_repo\} --no-mmap --image-max-tokens 2048 --image-min-tokens 512  --mmproj /var/cache/llama-swap/llama.cpp/unsloth_Qwen3.5-35B-A3B-GGUF_mmproj-F16.gguf";
+            cmd = "${llama-server} --port $\{PORT\} --host 0.0.0.0 --ctx-size $\{ctx\} -ngl $\{ngl\} $\{kv_cache\} --hf-repo $\{hf_repo\} --image-max-tokens 1024 --chat-template-kwargs '{\"enable_thinking\":false}'";
             aliases = [ "qwen3.5" ];
             ttl = 1800;
             macros = {
-              hf_repo = "unsloth/Qwen3.5-35B-A3B-GGUF:UD-Q2_K_XL";
-              ngl = 40;
-              ctx = 30000;
+              hf_repo = "unsloth/Qwen3.5-35B-A3B-GGUF:UD-IQ3_XXS";
             };
           };
           "qwen3.5-9b" = {
-            cmd = "${llama-server} --port $\{PORT\} --host 0.0.0.0 --ctx-size $\{ctx\} -ngl $\{ngl\} --hf-repo $\{hf_repo\} --no-mmap --image-max-tokens 2048 --image-min-tokens 512 --mmproj-url https://huggingface.co/unsloth/Qwen3.5-9B-GGUF/resolve/main/mmproj-F16.gguf";
+            cmd = "${llama-server} --port $\{PORT\} --host 0.0.0.0 --ctx-size $\{ctx\} -ngl $\{ngl\} --hf-repo $\{hf_repo\} --no-mmap --image-max-tokens 1024 --image-min-tokens 512  --chat-template-kwargs '{\"enable_thinking\":true}'";
             ttl = 900;
-            macros.hf_repo = "unsloth/Qwen3.5-9B-GGUF:UD-Q4_K_XL";
+            macros = {
+              hf_repo = "unsloth/Qwen3.5-9B-GGUF:UD-Q4_K_XL";
+              ctx = 128000;
+            };
+
           };
           "qwen3.5-2b" = {
-            cmd = "${llama-server} --port $\{PORT\} --host 0.0.0.0 --ctx-size $\{ctx\} -ngl $\{ngl\} --hf-repo $\{hf_repo\} --image-max-tokens 1024 --image-min-tokens 512";
+            cmd = "${llama-server} --port $\{PORT\} --host 0.0.0.0 --ctx-size $\{ctx\} -ngl $\{ngl\} --hf-repo $\{hf_repo\} --image-max-tokens 1024";
             ttl = 900;
-            macros.hf_repo = "unsloth/Qwen3.5-2B-GGUF:UD-Q8_K_XL";
-            macros.ctx = 64000;
+            macros = {
+              hf_repo = "unsloth/Qwen3.5-2B-GGUF:UD-Q8_K_XL";
+              ctx = 128000;
+            };
+
           };
           "ministal-3-8b-reasonning" = {
             cmd = "${llama-server} --port $\{PORT\} --host 0.0.0.0 --ctx-size $\{ctx\} -ngl $\{ngl\} --hf-repo $\{hf_repo\}";
@@ -66,8 +71,11 @@
             macros.hf_repo = "mistralai/Ministral-3-3B-Instruct-2512-GGUF";
           };
           "minicpm-o-4_5" = {
-            cmd = "${llama-server} --port $\{PORT\} --host 0.0.0.0 --ctx-size $\{ctx\} -ngl $\{ngl\} --hf-repo $\{hf_repo\}";
-            aliases = [ "openbmb/MiniCPM-o-4_5-gguf" "minicpm" ];
+            cmd = "${llama-server} --port $\{PORT\} --host 0.0.0.0 --ctx-size $\{ctx\} -ngl $\{ngl\} --hf-repo $\{hf_repo\} --mmproj-url https://huggingface.co/openbmb/MiniCPM-o-4_5-gguf/resolve/main/vision/MiniCPM-o-4_5-vision-F16.gguf";
+            aliases = [
+              "openbmb/MiniCPM-o-4_5-gguf"
+              "minicpm"
+            ];
             ttl = 900;
             macros.hf_repo = "openbmb/MiniCPM-o-4_5-gguf";
           };
@@ -108,10 +116,9 @@
       "HOME=/var/lib/llama-swap"
       "XDG_CACHE_HOME=/var/cache/llama-swap"
       "MESA_SHADER_CACHE_DIR=/var/cache/llama-swap/mesa"
-      "MESA_SHADER_CACHE_MAX_SIZE=1G"
-      
-      "GGML_VULKAN_MAX_NODES=16"
-      "GGML_VK_RELAXED_SHAPES=0"
+      #"MESA_SHADER_CACHE_MAX_SIZE=1G"
+      #"GGML_VULKAN_MAX_NODES=16"
+      #"GGML_VK_RELAXED_SHAPES=0"
     ];
   };
 }
diff --git a/modules/xdg.nix b/modules/xdg.nix
index 6ee1ec1..f70bb59 100644
--- a/modules/xdg.nix
+++ b/modules/xdg.nix
@@ -34,21 +34,20 @@
     #  ];
     #};
 
-
     portal = {
-        enable = true;
-        xdgOpenUsePortal = true;
-        config.common.default = [ "gnome"];
-        extraPortals = [
-          pkgs.xdg-desktop-portal
-          pkgs.xdg-desktop-portal-gtk
-          pkgs.xdg-desktop-portal-gnome
-        ];
-        configPackages = with pkgs; [
-          gnome-session
-          niri
-        ];
-      };
+      enable = true;
+      xdgOpenUsePortal = true;
+      config.common.default = [ "gnome" ];
+      extraPortals = [
+        pkgs.xdg-desktop-portal
+        pkgs.xdg-desktop-portal-gtk
+        pkgs.xdg-desktop-portal-gnome
+      ];
+      configPackages = with pkgs; [
+        gnome-session
+        niri
+      ];
+    };
 
     # Enable autostart functionality (launch apps on login)
     autostart.enable = true;