format and llama tuning
This commit is contained in:
@@ -241,7 +241,8 @@ in
|
||||
"--server"
|
||||
];
|
||||
}
|
||||
{ ## Uses systemd unit instead.
|
||||
{
|
||||
# # Uses systemd unit instead.
|
||||
argv = [
|
||||
"noctalia-shell"
|
||||
];
|
||||
|
||||
@@ -37,7 +37,7 @@
|
||||
../../modules/mealie.nix
|
||||
../../modules/miniflux.nix
|
||||
#../../modules/ollama.nix # replaced by llama-cpp + llama-swap
|
||||
#../../modules/openwebui.nix # using llama-cpp built-in UI instead
|
||||
../../modules/openwebui.nix # using llama-cpp built-in UI instead
|
||||
../../modules/llama-swap.nix
|
||||
../../modules/librechat.nix
|
||||
../../modules/immich.nix
|
||||
|
||||
@@ -27,7 +27,7 @@
|
||||
boot.initrd.kernelModules = [ ];
|
||||
boot.kernelModules = [ "kvm-amd" ];
|
||||
boot.extraModulePackages = [ ];
|
||||
boot.kernelParams = [
|
||||
boot.kernelParams = [
|
||||
"xe.force_probe=e212"
|
||||
"xe.vram_force_mmapable=1"
|
||||
"transparent_hugepage=always"
|
||||
@@ -38,7 +38,6 @@
|
||||
ACTION=="add", SUBSYSTEM=="drm", KERNEL=="card*", ATTR{device/tile0/gt0/engines/rcs0/job_timeout_ms}="100000"
|
||||
'';
|
||||
|
||||
|
||||
hardware.enableRedistributableFirmware = true;
|
||||
hardware.firmware = [ pkgs.linux-firmware ];
|
||||
|
||||
@@ -51,7 +50,7 @@
|
||||
extraPackages = with pkgs; [
|
||||
vpl-gpu-rt
|
||||
|
||||
#hardware decode and opencl
|
||||
#hardware decode and opencl
|
||||
intel-media-driver # LIBVA_DRIVER_NAME=iHD (for HD Graphics starting Broadwell (2014) and newer)
|
||||
intel-vaapi-driver # LIBVA_DRIVER_NAME=i965 (older but works better for Firefox/Chromium)
|
||||
libvdpau-va-gl
|
||||
|
||||
@@ -67,10 +67,9 @@
|
||||
enable = true;
|
||||
enableSSHSupport = true;
|
||||
};
|
||||
|
||||
|
||||
services.dbus.enable = true;
|
||||
services.dbus.implementation = "broker";
|
||||
services.dbus.implementation = "broker";
|
||||
|
||||
services.desktopManager.gnome.enable = true;
|
||||
|
||||
|
||||
@@ -6,7 +6,7 @@
|
||||
}:
|
||||
{
|
||||
services.docling-serve = {
|
||||
enable = true;
|
||||
enable = true;
|
||||
package = pkgs.unstable.docling-serve;
|
||||
port = 5001;
|
||||
host = "127.0.0.1";
|
||||
|
||||
@@ -50,7 +50,7 @@
|
||||
# Ensure fcitx5 starts with the session
|
||||
services.xserver.desktopManager.runXdgAutostartIfNone = true;
|
||||
|
||||
# environment.sessionVariables = {
|
||||
# GTK_IM_MODULE = lib.mkForce "";
|
||||
# };
|
||||
# environment.sessionVariables = {
|
||||
# GTK_IM_MODULE = lib.mkForce "";
|
||||
# };
|
||||
}
|
||||
|
||||
@@ -6,9 +6,7 @@ let
|
||||
in
|
||||
{
|
||||
|
||||
|
||||
sops.secrets."librechat/environmentFile" = {};
|
||||
|
||||
sops.secrets."librechat/environmentFile" = { };
|
||||
|
||||
# Enable MongoDB
|
||||
services.mongodb = {
|
||||
@@ -24,10 +22,13 @@ in
|
||||
enable = true;
|
||||
|
||||
description = "LibreChat server";
|
||||
|
||||
|
||||
# **Native systemd dependency declarations**
|
||||
requires = [ "mongodb.service" ];
|
||||
after = [ "network.target" "mongodb.service" ];
|
||||
after = [
|
||||
"network.target"
|
||||
"mongodb.service"
|
||||
];
|
||||
|
||||
serviceConfig = {
|
||||
EnvironmentFile = config.sops.secrets."librechat/environmentFile".path;
|
||||
@@ -37,10 +38,10 @@ in
|
||||
|
||||
# ExecStart binds to package binary
|
||||
ExecStart = ''
|
||||
${pkgs.librechat}/bin/librechat-server \
|
||||
--host 0.0.0.0 \
|
||||
--port ${toString librechatPort} \
|
||||
--config /var/lib/librechat/config.yaml
|
||||
${pkgs.librechat}/bin/librechat-server \
|
||||
--host 0.0.0.0 \
|
||||
--port ${toString librechatPort} \
|
||||
--config /var/lib/librechat/config.yaml
|
||||
'';
|
||||
WorkingDirectory = "/var/lib/librechat";
|
||||
};
|
||||
@@ -56,7 +57,7 @@ in
|
||||
};
|
||||
|
||||
users.users.librechat.group = "librechat";
|
||||
users.groups.librechat = {};
|
||||
users.groups.librechat = { };
|
||||
|
||||
systemd.tmpfiles.rules = [
|
||||
"d /var/lib/librechat 0755 librechat librechat -"
|
||||
@@ -67,4 +68,3 @@ in
|
||||
27017
|
||||
];
|
||||
}
|
||||
|
||||
|
||||
@@ -12,22 +12,30 @@ in
|
||||
|
||||
{
|
||||
environment.systemPackages = [ pkgs.unstable.ollama ];
|
||||
services.llama-cpp = {
|
||||
enable = true;
|
||||
host = "0.0.0.0";
|
||||
port = 11111;
|
||||
package = pkgs.unstable.llama-cpp-vulkan;
|
||||
openFirewall = true;
|
||||
model = "/var/lib/llama/models/Qwen3.5-35B-A3B-UD-Q2_K_XL.gguf" ;
|
||||
extraFlags = [
|
||||
"-c" "32000"
|
||||
"-ngl" "41" # techincally entire qwen3.5
|
||||
"--image-min-tokens" "1024"
|
||||
"--image-max-tokens" "2048"
|
||||
#"--hf-repo" "unsloth/Qwen3.5-35B-A3B-GGUF:Q2_K_L"
|
||||
"--mmproj" "/var/lib/llama/models/mmproj-F16.gguf"
|
||||
"-ctk" "q4_0" "-ctv" "q4_0" # quantisize kv cache.
|
||||
"--no-mmap"
|
||||
];
|
||||
};
|
||||
services.llama-cpp = {
|
||||
enable = true;
|
||||
host = "0.0.0.0";
|
||||
port = 11111;
|
||||
package = pkgs.unstable.llama-cpp-vulkan;
|
||||
openFirewall = true;
|
||||
model = "/var/lib/llama/models/Qwen3.5-35B-A3B-UD-Q2_K_XL.gguf";
|
||||
extraFlags = [
|
||||
"-c"
|
||||
"32000"
|
||||
"-ngl"
|
||||
"41" # techincally entire qwen3.5
|
||||
"--image-min-tokens"
|
||||
"1024"
|
||||
"--image-max-tokens"
|
||||
"2048"
|
||||
#"--hf-repo" "unsloth/Qwen3.5-35B-A3B-GGUF:Q2_K_L"
|
||||
"--mmproj"
|
||||
"/var/lib/llama/models/mmproj-F16.gguf"
|
||||
"-ctk"
|
||||
"q4_0"
|
||||
"-ctv"
|
||||
"q4_0" # quantisize kv cache.
|
||||
"--no-mmap"
|
||||
];
|
||||
};
|
||||
}
|
||||
|
||||
@@ -25,33 +25,38 @@
|
||||
logLevel = "info";
|
||||
|
||||
macros = {
|
||||
ctx = 32768;
|
||||
ctx = 64000;
|
||||
ngl = 99;
|
||||
kv_cache = "-ctk q4_0 -ctv q4_0";
|
||||
kv_cache = "-ctk iq4_nl -ctv iq4_nl";
|
||||
hf_repo = "";
|
||||
};
|
||||
|
||||
models = {
|
||||
"qwen3.5-35b-a3b" = {
|
||||
cmd = "${llama-server} --port $\{PORT\} --host 0.0.0.0 --ctx-size $\{ctx\} -ngl $\{ngl\} $\{kv_cache\} --hf-repo $\{hf_repo\} --no-mmap --image-max-tokens 2048 --image-min-tokens 512 --mmproj /var/cache/llama-swap/llama.cpp/unsloth_Qwen3.5-35B-A3B-GGUF_mmproj-F16.gguf";
|
||||
cmd = "${llama-server} --port $\{PORT\} --host 0.0.0.0 --ctx-size $\{ctx\} -ngl $\{ngl\} $\{kv_cache\} --hf-repo $\{hf_repo\} --image-max-tokens 1024 --chat-template-kwargs '{\"enable_thinking\":false}'";
|
||||
aliases = [ "qwen3.5" ];
|
||||
ttl = 1800;
|
||||
macros = {
|
||||
hf_repo = "unsloth/Qwen3.5-35B-A3B-GGUF:UD-Q2_K_XL";
|
||||
ngl = 40;
|
||||
ctx = 30000;
|
||||
hf_repo = "unsloth/Qwen3.5-35B-A3B-GGUF:UD-IQ3_XXS";
|
||||
};
|
||||
};
|
||||
"qwen3.5-9b" = {
|
||||
cmd = "${llama-server} --port $\{PORT\} --host 0.0.0.0 --ctx-size $\{ctx\} -ngl $\{ngl\} --hf-repo $\{hf_repo\} --no-mmap --image-max-tokens 2048 --image-min-tokens 512 --mmproj-url https://huggingface.co/unsloth/Qwen3.5-9B-GGUF/resolve/main/mmproj-F16.gguf";
|
||||
cmd = "${llama-server} --port $\{PORT\} --host 0.0.0.0 --ctx-size $\{ctx\} -ngl $\{ngl\} --hf-repo $\{hf_repo\} --no-mmap --image-max-tokens 1024 --image-min-tokens 512 --chat-template-kwargs '{\"enable_thinking\":true}'";
|
||||
ttl = 900;
|
||||
macros.hf_repo = "unsloth/Qwen3.5-9B-GGUF:UD-Q4_K_XL";
|
||||
macros = {
|
||||
hf_repo = "unsloth/Qwen3.5-9B-GGUF:UD-Q4_K_XL";
|
||||
ctx = 128000;
|
||||
};
|
||||
|
||||
};
|
||||
"qwen3.5-2b" = {
|
||||
cmd = "${llama-server} --port $\{PORT\} --host 0.0.0.0 --ctx-size $\{ctx\} -ngl $\{ngl\} --hf-repo $\{hf_repo\} --image-max-tokens 1024 --image-min-tokens 512";
|
||||
cmd = "${llama-server} --port $\{PORT\} --host 0.0.0.0 --ctx-size $\{ctx\} -ngl $\{ngl\} --hf-repo $\{hf_repo\} --image-max-tokens 1024";
|
||||
ttl = 900;
|
||||
macros.hf_repo = "unsloth/Qwen3.5-2B-GGUF:UD-Q8_K_XL";
|
||||
macros.ctx = 64000;
|
||||
macros = {
|
||||
hf_repo = "unsloth/Qwen3.5-2B-GGUF:UD-Q8_K_XL";
|
||||
ctx = 128000;
|
||||
};
|
||||
|
||||
};
|
||||
"ministal-3-8b-reasonning" = {
|
||||
cmd = "${llama-server} --port $\{PORT\} --host 0.0.0.0 --ctx-size $\{ctx\} -ngl $\{ngl\} --hf-repo $\{hf_repo\}";
|
||||
@@ -66,8 +71,11 @@
|
||||
macros.hf_repo = "mistralai/Ministral-3-3B-Instruct-2512-GGUF";
|
||||
};
|
||||
"minicpm-o-4_5" = {
|
||||
cmd = "${llama-server} --port $\{PORT\} --host 0.0.0.0 --ctx-size $\{ctx\} -ngl $\{ngl\} --hf-repo $\{hf_repo\}";
|
||||
aliases = [ "openbmb/MiniCPM-o-4_5-gguf" "minicpm" ];
|
||||
cmd = "${llama-server} --port $\{PORT\} --host 0.0.0.0 --ctx-size $\{ctx\} -ngl $\{ngl\} --hf-repo $\{hf_repo\} --mmproj-url https://huggingface.co/openbmb/MiniCPM-o-4_5-gguf/resolve/main/vision/MiniCPM-o-4_5-vision-F16.gguf";
|
||||
aliases = [
|
||||
"openbmb/MiniCPM-o-4_5-gguf"
|
||||
"minicpm"
|
||||
];
|
||||
ttl = 900;
|
||||
macros.hf_repo = "openbmb/MiniCPM-o-4_5-gguf";
|
||||
};
|
||||
@@ -108,10 +116,9 @@
|
||||
"HOME=/var/lib/llama-swap"
|
||||
"XDG_CACHE_HOME=/var/cache/llama-swap"
|
||||
"MESA_SHADER_CACHE_DIR=/var/cache/llama-swap/mesa"
|
||||
"MESA_SHADER_CACHE_MAX_SIZE=1G"
|
||||
|
||||
"GGML_VULKAN_MAX_NODES=16"
|
||||
"GGML_VK_RELAXED_SHAPES=0"
|
||||
#"MESA_SHADER_CACHE_MAX_SIZE=1G"
|
||||
#"GGML_VULKAN_MAX_NODES=16"
|
||||
#"GGML_VK_RELAXED_SHAPES=0"
|
||||
];
|
||||
};
|
||||
}
|
||||
|
||||
@@ -34,21 +34,20 @@
|
||||
# ];
|
||||
#};
|
||||
|
||||
|
||||
portal = {
|
||||
enable = true;
|
||||
xdgOpenUsePortal = true;
|
||||
config.common.default = [ "gnome"];
|
||||
extraPortals = [
|
||||
pkgs.xdg-desktop-portal
|
||||
pkgs.xdg-desktop-portal-gtk
|
||||
pkgs.xdg-desktop-portal-gnome
|
||||
];
|
||||
configPackages = with pkgs; [
|
||||
gnome-session
|
||||
niri
|
||||
];
|
||||
};
|
||||
enable = true;
|
||||
xdgOpenUsePortal = true;
|
||||
config.common.default = [ "gnome" ];
|
||||
extraPortals = [
|
||||
pkgs.xdg-desktop-portal
|
||||
pkgs.xdg-desktop-portal-gtk
|
||||
pkgs.xdg-desktop-portal-gnome
|
||||
];
|
||||
configPackages = with pkgs; [
|
||||
gnome-session
|
||||
niri
|
||||
];
|
||||
};
|
||||
|
||||
# Enable autostart functionality (launch apps on login)
|
||||
autostart.enable = true;
|
||||
|
||||
Reference in New Issue
Block a user