From 21ab209d6e230fe5e7d9af29d3ec0eef660e557d Mon Sep 17 00:00:00 2001 From: Oystein Kristoffer Tveit Date: Fri, 27 Sep 2024 21:15:41 +0200 Subject: [PATCH] Initial commit --- LICENSE | 9 +++++++++ README.md | 46 +++++++++++++++++++++++++++++++++++++++++++ flake.lock | 27 +++++++++++++++++++++++++ flake.nix | 15 ++++++++++++++ src/genAttrs.nix | 10 ++++++++++ src/keepAttrs.nix | 10 ++++++++++ src/subtractLists.nix | 17 ++++++++++++++++ src/unique.nix | 12 +++++++++++ 8 files changed, 146 insertions(+) create mode 100644 LICENSE create mode 100644 README.md create mode 100644 flake.lock create mode 100644 flake.nix create mode 100644 src/genAttrs.nix create mode 100644 src/keepAttrs.nix create mode 100644 src/subtractLists.nix create mode 100644 src/unique.nix diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..7dfbfd1 --- /dev/null +++ b/LICENSE @@ -0,0 +1,9 @@ +MIT License + +Copyright (c) 2024 oysteikt + +Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/README.md b/README.md new file mode 100644 index 0000000..68299d0 --- /dev/null +++ b/README.md @@ -0,0 +1,46 @@ +# nixpkgs lib profiling + +This is a repository for profiling nixpkgs lib functions. +The goal is to create alternative implementations of functions that might have +different time/space complexity characteristics, and measure if they are faster +despite potential overhead. + +I am personally especially interested in functions that rely on some type of `fold` +and accumulates a collection (either a list or an attrset). Because the lists and attrsets +will be reallocated on every iteration (this is only a suspicion, I have not verified this), +the time complexity of these functions might be O(n^2) while they really could've been O(n). +For functions like `mergeAttrsList`, this has been somewhat mitigated by folding with a binary +merge function, reducing the time complexity to O(n log n). But I'm hoping to find a more general +solution that could be applied to other functions as well, without too much overhead. + +## Some interesting builtins + +| Name | Expected complexity | Comment | +|------------------|---------------------|---------| +| `hasAttr` | O(1) | this is the only real kind of lookup table we have with low overhead. I need to verify that it is actually amortized O(1) | +| `catAttrs` | O(n) | could this fix the O(n^2) fold collection resizing problem? | +| `concatMap` | O(n * f) | could this be used with attr lookups to fix the O(n^2) fold collection resizing problem? | +| `concatLists` | O(n) | could this be used with attr lookups to fix the O(n^2) fold collection resizing problem? | +| `filter` | O(n * f) | could potentially remove many items at once without realloc? | +| `genList` | O(n * f) | can seemingly allocate an arbitrarily sized list? | +| `intersectAttrs` | O(n log m) (known) | could be used as a shorthand for binary merge? | +| `listToAttrs` | O(n) | could potentially remove many items at once without realloc? | +| `partition` | O(n * f) | could potentially remove many items at once without realloc? | +| `genericClosure` | O(n * f) | We might be able to use this functions uniqueness checking for something, and then use `catAttrs` on the result | + +## evaluation speed measurement + +For now, I'm using this: + +``` +# Alternatively drop NIX_COUNT_CALLS if too verbose +NIX_SHOW_STATS=1 NIX_COUNT_CALLS=1 nix eval .#sometest +``` + +Could alternatively create something with + +``` +nix eval --trace-function-calls .#sometest 2>function-trace.txt +``` + +source: https://discourse.nixos.org/t/nix-flamegraph-or-profiling-tool/33333 diff --git a/flake.lock b/flake.lock new file mode 100644 index 0000000..c79814c --- /dev/null +++ b/flake.lock @@ -0,0 +1,27 @@ +{ + "nodes": { + "nixpkgs-lib": { + "locked": { + "lastModified": 1726966855, + "narHash": "sha256-25ByioeOBFcnitO5lM/Mufnv/u7YtHEHEM8QFuiS40k=", + "owner": "nix-community", + "repo": "nixpkgs.lib", + "rev": "575704ff85d3a41dc5bfef7b55380cbc7b87f3c2", + "type": "github" + }, + "original": { + "owner": "nix-community", + "ref": "master", + "repo": "nixpkgs.lib", + "type": "github" + } + }, + "root": { + "inputs": { + "nixpkgs-lib": "nixpkgs-lib" + } + } + }, + "root": "root", + "version": 7 +} diff --git a/flake.nix b/flake.nix new file mode 100644 index 0000000..d8d666a --- /dev/null +++ b/flake.nix @@ -0,0 +1,15 @@ +{ + inputs.nixpkgs-lib.url = "github:nix-community/nixpkgs.lib/master"; + + outputs = { self, nixpkgs-lib }: let + inherit (nixpkgs-lib) lib; + in { + genAttrs = import ./src/genAttrs.nix lib; + subtractLists = import ./src/subtractLists.nix lib; + unique = import ./src/unique.nix lib; + + # This doesn't actually exist, but I've seen it's definition being used + # a few places. + keepAttrs = import ./src/keepAttrs.nix lib; + }; +} diff --git a/src/genAttrs.nix b/src/genAttrs.nix new file mode 100644 index 0000000..2aff6e3 --- /dev/null +++ b/src/genAttrs.nix @@ -0,0 +1,10 @@ +lib: +let + genAttrsOld = names: f: lib.listToAttrs (map (n: lib.nameValuePair n (f n)) names); + genAttrsNew = names: f: lib.listToAttrs (map (n: { name = n; value = f n; }) names); + + bigdata = lib.imap0 (i: _: toString i) (lib.replicate 999999 null); +in { + old = genAttrsOld bigdata (_: null); + new = genAttrsNew bigdata (_: null); +} diff --git a/src/keepAttrs.nix b/src/keepAttrs.nix new file mode 100644 index 0000000..e71623c --- /dev/null +++ b/src/keepAttrs.nix @@ -0,0 +1,10 @@ +lib: +let + keepAttrsOld = attrs: names: lib.filterAttrs (n: _: builtins.elem n names) attrs; + keepAttrsNew = attrs: names: builtins.intersectAttrs (lib.genAttrs names (_: null)) attrs; + + bigdata = lib.genAttrs (lib.imap0 (i: _: toString i) (lib.replicate 999999 null)) (_: null); +in { + old = keepAttrsOld bigdata [ "123" "456" ]; + new = keepAttrsNew bigdata [ "123" "456" ]; +} diff --git a/src/subtractLists.nix b/src/subtractLists.nix new file mode 100644 index 0000000..9d1cf9b --- /dev/null +++ b/src/subtractLists.nix @@ -0,0 +1,17 @@ +lib: +let + # O(nm) + subtractListsOld = e: builtins.filter (x: !(builtins.elem x e)); + + # O(n + m) (hopefully) + subtractListsNew = e: let + # Assuming genAttrs is O(n) + e' = lib.genAttrs e (_: null); + # Assuming hasAttr is O(1) + in builtins.filter (x: !(builtins.hasAttr x e')); + + bigdata = lib.imap0 (i: _: toString i) (lib.replicate 999999 null); +in { + old = subtractListsOld [ "123" "456" "789" ] bigdata; + new = subtractListsNew [ "123" "456" "789" ] bigdata; +} diff --git a/src/unique.nix b/src/unique.nix new file mode 100644 index 0000000..22db49b --- /dev/null +++ b/src/unique.nix @@ -0,0 +1,12 @@ +lib: +let + uniqueOld = builtins.foldl' (acc: e: if builtins.elem e acc then acc else acc ++ [ e ]) []; + # uniqueNew = xs: let + # entries = lib.genAttrs xs (_: null); + # in builtins.concatMap (x: if ); + + bigdataMostlyDuplicates = (lib.replicate 999999 null) ++ ["unique"]; + bigdataMostlyUnique = lib.imap0 (i: _: toString i) (lib.replicate 999999 null) ++ ["123" "456"]; +in { + old = uniqueOld bigdataMostlyDuplicates; +}