From 8ce7b25ba612c9155b189dbafbb6c01d8b9aad98 Mon Sep 17 00:00:00 2001 From: Robert Hensing Date: Wed, 15 Oct 2025 00:23:16 +0200 Subject: [PATCH] lib.linkedLists: init Best way to return a large concatenated list from a `foldl'`, and probably more use cases. It's a means of last resort, but it's good to have somewhat of a soft landing at least... Engineering around the call stack was fun for me, so I figured I'd just do it. --- doc/doc-support/lib-function-docs.nix | 5 + doc/redirects.json | 3 + lib/default.nix | 1 + lib/linked-lists.nix | 403 +++++++++++++++++++ lib/tests/linked-lists.nix | 546 ++++++++++++++++++++++++++ lib/tests/test-with-nix.nix | 3 + 6 files changed, 961 insertions(+) create mode 100644 lib/linked-lists.nix create mode 100644 lib/tests/linked-lists.nix diff --git a/doc/doc-support/lib-function-docs.nix b/doc/doc-support/lib-function-docs.nix index bb81b50fddef4..f701ec68849ee 100644 --- a/doc/doc-support/lib-function-docs.nix +++ b/doc/doc-support/lib-function-docs.nix @@ -36,6 +36,11 @@ name = "lists"; description = "list manipulation functions"; } + { + name = "linkedLists"; + baseName = "linked-lists"; + description = "linked list functions"; + } { name = "debug"; description = "debugging functions"; diff --git a/doc/redirects.json b/doc/redirects.json index bb9f1188f88aa..13f1742408a57 100644 --- a/doc/redirects.json +++ b/doc/redirects.json @@ -222,6 +222,9 @@ "sec-darwin-libcxx-versions": [ "index.html#sec-darwin-libcxx-versions" ], + "sec-functions-library-linkedLists": [ + "index.html#sec-functions-library-linkedLists" + ], "sec-functions-library-treefmt": [ "index.html#sec-functions-library-treefmt" ], diff --git a/lib/default.nix b/lib/default.nix index 3e8fdd1c762d4..c7fb2c7ea43ac 100644 --- a/lib/default.nix +++ b/lib/default.nix @@ -56,6 +56,7 @@ let # datatypes attrsets = callLibs ./attrsets.nix; lists = callLibs ./lists.nix; + linkedLists = callLibs ./linked-lists.nix; strings = callLibs ./strings.nix; stringsWithDeps = callLibs ./strings-with-deps.nix; diff --git a/lib/linked-lists.nix b/lib/linked-lists.nix new file mode 100644 index 0000000000000..534ac01a3a838 --- /dev/null +++ b/lib/linked-lists.nix @@ -0,0 +1,403 @@ +# Tests in: ./tests/linked-lists.nix +/** + Linked list operations. + + This library provides operations on linked lists of the form `{ head; tail; }` + with `null` representing the empty list. These are useful for streaming operations + and solving stack overflow issues with recursive algorithms that need to process + large amounts of data. + + :::{.warning} + These operations tend to be significantly slower than native lists, + so they should only be used when necessary (e.g., to avoid stack overflow or + for streaming use cases). + ::: + + A linked list is represented as either: + - `null` for an empty list + - `{ head; tail; }` where `head` is the first element and `tail` is the rest of the list +*/ +{ lib }: + +# Algorithmic insights: +# +# We use two main strategies to avoid stack overflow: +# +# 1. Tiered Binary Reduce (for processing list elements): +# - Process the list in exponentially growing chunks: 1, 2, 4, 8, 16, ... +# - Build a binary tree traversal from leaves to root without knowing length upfront +# - Stack depth: O(log n) where n is list length +# - Used by: `tieredBinaryReduce`, `length`, `toList` +# +# 2. Divide and Conquer (for operations where the size of the work is a known quantity): +# - Split the work in half recursively +# - Stack depth: O(log n) where n is the parameter value +# - Used by: `drop` +# +# Functions like `map` and `take` don't need these strategies because they produce +# output cells lazily: each output cell triggers evaluation of at most a bounded number +# of input cells (at most 1 for these functions). This keeps the stack bounded. +# Similarly, a function like `zip` would be fine as it evaluates one cell from each +# of two input lists (not stacked). Grouping consecutive elements is also safe as +# each output cell evaluates at most a small constant number of input cells before +# returning from the stack. +# Safely evaluating the full result is the responsibility of the consumer! + +rec { + + /** + Convert a native Nix list (array) to a linked list. + + # Inputs + + `list` + + : A native Nix list to convert + + # Type + + ``` + fromList :: List a -> LinkedList a + ``` + + where `LinkedList a` is either `null` or `{ head :: a; tail :: LinkedList a; }` + + # Examples + :::{.example} + ## `lib.linkedLists.fromList` usage example + + ```nix + fromList [] + => null + + fromList [ 1 ] + => { head = 1; tail = null; } + + fromList [ 1 2 3 ] + => { head = 1; tail = { head = 2; tail = { head = 3; tail = null; }; }; } + ``` + + ::: + */ + fromList = + list: + builtins.foldl' (acc: elem: { + head = elem; + tail = acc; + }) null (lib.lists.reverseList list); + + /** + Generic tiered binary reduce for linked lists. + + Uses exponentially growing chunks (1, 2, 4, 8, ...) to keep stack depth + at O(log n). This builds a binary tree traversal from leaves to root + without knowing the total length upfront. + + :::{.warning} + The identity value is used O(log n) times (once per chunk), and + the binary operation is called O(n + log n) times. This matters if + either the identity or operation have side effects or are expensive. + ::: + + # Inputs + + `identity` + + : Identity/zero value for the binary operation + + `op` + + : Binary operation to combine values (left -> right -> combined) + + `linkedList` + + : A linked list of values to reduce + + # Type + + ``` + tieredBinaryReduce :: a -> (a -> a -> a) -> LinkedList a -> a + ``` + + # Examples + :::{.example} + ## `lib.linkedLists.tieredBinaryReduce` usage example + + ```nix + tieredBinaryReduce 0 (a: b: a + b) (fromList [ 1 2 3 4 ]) + => 10 + + tieredBinaryReduce 1 (a: b: a * b) (fromList [ 2 3 4 ]) + => 24 + ``` + + ::: + */ + tieredBinaryReduce = + identity: op: linkedList: + let + # Process chunks of exponentially growing size: 1, 2, 4, 8, ... + # Returns { result = reduced value; rest = remaining list } + processChunks = + chunkSize: ll: + if ll == null then + { + result = identity; + rest = null; + } + else + let + # Reduce elements in this chunk using binary tree style + reduceChunk = + size: current: + if size <= 0 || current == null then + { + result = identity; + rest = current; + } + else if size == 1 then + { + result = current.head; + rest = current.tail; + } + else + let + half = size / 2; + leftResult = reduceChunk half current; + rightResult = reduceChunk (size - half) leftResult.rest; + in + { + result = op leftResult.result rightResult.result; + rest = rightResult.rest; + }; + + thisChunk = reduceChunk chunkSize ll; + + # Recursively process next chunk (double the size) + nextResult = processChunks (chunkSize * 2) thisChunk.rest; + in + { + result = op thisChunk.result nextResult.result; + rest = nextResult.rest; + }; + in + (processChunks 1 linkedList).result; + + /** + Get the length of a linked list. + + Uses a tiered binary reduce strategy to keep stack depth at O(log n). + Processes the list in exponentially growing chunks (1, 2, 4, 8, ...), + building a binary tree traversal from leaves to root without knowing + the total length upfront. + + # Inputs + + `linkedList` + + : A linked list + + # Type + + ``` + length :: LinkedList a -> Int + ``` + + # Examples + :::{.example} + ## `lib.linkedLists.length` usage example + + ```nix + length null + => 0 + + length (fromList [ 1 2 3 ]) + => 3 + ``` + + ::: + */ + length = linkedList: tieredBinaryReduce 0 (a: b: a + b) (map (_: 1) linkedList); + + /** + Map a function to operate on linked list elements. + + # Inputs + + `f` + + : Function to apply to each element + + `linkedList` + + : A linked list + + # Type + + ``` + map :: (a -> b) -> LinkedList a -> LinkedList b + ``` + + # Examples + :::{.example} + ## `lib.linkedLists.map` usage example + + ```nix + map (x: x * 2) (fromList [ 1 2 3 ]) + => fromList [ 2 4 6 ] + + map (s: s + "!") (fromList [ "a" "b" ]) + => fromList [ "a!" "b!" ] + ``` + + ::: + */ + map = + f: linkedList: + if linkedList == null then + null + else + { + head = f linkedList.head; + tail = map f linkedList.tail; + }; + + /** + Drop the first n elements from a linked list. + + Uses a divide and conquer strategy to keep stack depth at O(log n) + for large n values. + + # Inputs + + `n` + + : Number of elements to drop + + `linkedList` + + : A linked list + + # Type + + ``` + drop :: Int -> LinkedList a -> LinkedList a + ``` + + # Examples + :::{.example} + ## `lib.linkedLists.drop` usage example + + ```nix + drop 0 (fromList [ 1 2 3 ]) + => fromList [ 1 2 3 ] + + drop 2 (fromList [ 1 2 3 4 5 ]) + => fromList [ 3 4 5 ] + + drop 5 (fromList [ 1 2 3 ]) + => null + ``` + + ::: + */ + drop = + n: linkedList: + if n <= 0 || linkedList == null then + linkedList + else if n == 1 then + linkedList.tail + else + # Divide and conquer: drop half, then drop the remaining half + let + half = n / 2; + afterHalf = drop half linkedList; + in + drop (n - half) afterHalf; + + /** + Take the first n elements from a linked list. + + # Inputs + + `n` + + : Number of elements to take + + `linkedList` + + : A linked list + + # Type + + ``` + take :: Int -> LinkedList a -> LinkedList a + ``` + + # Examples + :::{.example} + ## `lib.linkedLists.take` usage example + + ```nix + take 0 (fromList [ 1 2 3 ]) + => null + + take 2 (fromList [ 1 2 3 ]) + => { head = 1; tail = { head = 2; tail = null; }; } + + take 5 (fromList [ 1 2 3 ]) + => { head = 1; tail = { head = 2; tail = { head = 3; tail = null; }; }; } + ``` + + ::: + */ + take = + n: linkedList: + if n <= 0 || linkedList == null then + null + else + { + head = linkedList.head; + tail = take (n - 1) linkedList.tail; + }; + + /** + Convert a linked list to a native Nix list (array). + + Uses a tiered binary reduce strategy to keep stack depth at O(log n) + for large lists. + + # Inputs + + `linkedList` + + : A linked list to convert (either `null` or `{ head; tail; }`) + + # Type + + ``` + toList :: LinkedList a -> List a + ``` + + where `LinkedList a` is either `null` or `{ head :: a; tail :: LinkedList a; }` + + # Examples + :::{.example} + ## `lib.linkedLists.toList` usage example + + ```nix + toList null + => [] + + toList { head = 1; tail = null; } + => [ 1 ] + + toList { head = 1; tail = { head = 2; tail = { head = 3; tail = null; }; }; } + => [ 1 2 3 ] + ``` + + ::: + */ + toList = linkedList: tieredBinaryReduce [ ] (a: b: a ++ b) (map (x: [ x ]) linkedList); + +} diff --git a/lib/tests/linked-lists.nix b/lib/tests/linked-lists.nix new file mode 100644 index 0000000000000..ffacba9573818 --- /dev/null +++ b/lib/tests/linked-lists.nix @@ -0,0 +1,546 @@ +# Run with: +# cd nixpkgs +# nix-instantiate --eval --strict lib/tests/linked-lists.nix +# +# If the resulting list is empty, all tests passed. +# Alternatively, to run all `lib` tests: +# nix-build lib/tests/release.nix + +let + lib = import ../default.nix; + inherit (lib) linkedLists; + + # Test helper to compare actual vs expected + testCase = + name: expected: actual: + if actual == expected then [ ] else [ { inherit name expected actual; } ]; + + # Helper to create a linked list manually for testing + mkList = + elems: + if elems == [ ] then + null + else + { + head = builtins.head elems; + tail = mkList (builtins.tail elems); + }; + +in + +lib.runTests { + # fromList tests + testFromListEmpty = { + expr = linkedLists.fromList [ ]; + expected = null; + }; + + testFromListSingle = { + expr = linkedLists.fromList [ 1 ]; + expected = { + head = 1; + tail = null; + }; + }; + + testFromListMultiple = { + expr = linkedLists.fromList [ + 1 + 2 + 3 + ]; + expected = { + head = 1; + tail = { + head = 2; + tail = { + head = 3; + tail = null; + }; + }; + }; + }; + + testFromListStrings = { + expr = linkedLists.fromList [ + "a" + "b" + "c" + ]; + expected = { + head = "a"; + tail = { + head = "b"; + tail = { + head = "c"; + tail = null; + }; + }; + }; + }; + + # toList tests + testToListEmpty = { + expr = linkedLists.toList null; + expected = [ ]; + }; + + testToListSingle = { + expr = linkedLists.toList { + head = 1; + tail = null; + }; + expected = [ 1 ]; + }; + + testToListMultiple = { + expr = linkedLists.toList { + head = 1; + tail = { + head = 2; + tail = { + head = 3; + tail = null; + }; + }; + }; + expected = [ + 1 + 2 + 3 + ]; + }; + + testToListStrings = { + expr = linkedLists.toList { + head = "a"; + tail = { + head = "b"; + tail = { + head = "c"; + tail = null; + }; + }; + }; + expected = [ + "a" + "b" + "c" + ]; + }; + + # Round-trip tests + testRoundTripEmpty = { + expr = linkedLists.toList (linkedLists.fromList [ ]); + expected = [ ]; + }; + + testRoundTripSingle = { + expr = linkedLists.toList (linkedLists.fromList [ 42 ]); + expected = [ 42 ]; + }; + + testRoundTripMultiple = { + expr = linkedLists.toList ( + linkedLists.fromList [ + 1 + 2 + 3 + 4 + 5 + ] + ); + expected = [ + 1 + 2 + 3 + 4 + 5 + ]; + }; + + testRoundTripReverse = { + expr = linkedLists.fromList ( + linkedLists.toList { + head = "x"; + tail = { + head = "y"; + tail = null; + }; + } + ); + expected = { + head = "x"; + tail = { + head = "y"; + tail = null; + }; + }; + }; + + # tieredBinaryReduce tests + testTieredBinaryReduceEmpty = { + expr = linkedLists.tieredBinaryReduce 0 (a: b: a + b) null; + expected = 0; + }; + + testTieredBinaryReduceSum = { + expr = linkedLists.tieredBinaryReduce 0 (a: b: a + b) ( + linkedLists.fromList [ + 1 + 2 + 3 + 4 + ] + ); + expected = 10; + }; + + testTieredBinaryReduceProduct = { + expr = linkedLists.tieredBinaryReduce 1 (a: b: a * b) ( + linkedLists.fromList [ + 2 + 3 + 4 + ] + ); + expected = 24; + }; + + testTieredBinaryReduceLarge = { + expr = linkedLists.tieredBinaryReduce 0 (a: b: a + b) ( + linkedLists.fromList (lib.genList (x: x) 20000) + ); + # Triangle sum formula: sum of 0 to n-1 = (n-1) * n / 2 + expected = + let + n = 20000; + in + (n - 1) * n / 2; + }; + + # map tests + testMapEmpty = { + expr = linkedLists.toList (linkedLists.map (x: x * 2) null); + expected = [ ]; + }; + + testMapSingle = { + expr = linkedLists.toList (linkedLists.map (x: x * 2) (linkedLists.fromList [ 5 ])); + expected = [ 10 ]; + }; + + testMapMultiple = { + expr = linkedLists.toList ( + linkedLists.map (x: x * 2) ( + linkedLists.fromList [ + 1 + 2 + 3 + 4 + ] + ) + ); + expected = [ + 2 + 4 + 6 + 8 + ]; + }; + + testMapStrings = { + expr = linkedLists.toList ( + linkedLists.map (s: s + "!") ( + linkedLists.fromList [ + "a" + "b" + "c" + ] + ) + ); + expected = [ + "a!" + "b!" + "c!" + ]; + }; + + testMapLarge = { + expr = linkedLists.length ( + linkedLists.map (x: x * 2) (linkedLists.fromList (lib.genList (x: x) 20000)) + ); + expected = 20000; + }; + + testMapLargePrefix = { + expr = linkedLists.toList ( + linkedLists.take 5 (linkedLists.map (x: x * 2) (linkedLists.fromList (lib.genList (x: x) 20000))) + ); + expected = [ + 0 + 2 + 4 + 6 + 8 + ]; + }; + + # length tests + testLengthEmpty = { + expr = linkedLists.length null; + expected = 0; + }; + + testLengthSingle = { + expr = linkedLists.length (linkedLists.fromList [ 1 ]); + expected = 1; + }; + + testLengthMultiple = { + expr = linkedLists.length ( + linkedLists.fromList [ + 1 + 2 + 3 + 4 + 5 + ] + ); + expected = 5; + }; + + testLengthLarge = { + expr = linkedLists.length (linkedLists.fromList (lib.genList (x: x) 20000)); + expected = 20000; + }; + + # drop tests + testDropZero = { + expr = linkedLists.toList ( + linkedLists.drop 0 ( + linkedLists.fromList [ + 1 + 2 + 3 + ] + ) + ); + expected = [ + 1 + 2 + 3 + ]; + }; + + testDropPartial = { + expr = linkedLists.toList ( + linkedLists.drop 2 ( + linkedLists.fromList [ + 1 + 2 + 3 + 4 + 5 + ] + ) + ); + expected = [ + 3 + 4 + 5 + ]; + }; + + testDropAll = { + expr = linkedLists.toList ( + linkedLists.drop 3 ( + linkedLists.fromList [ + 1 + 2 + 3 + ] + ) + ); + expected = [ ]; + }; + + testDropMoreThanAvailable = { + expr = linkedLists.toList ( + linkedLists.drop 10 ( + linkedLists.fromList [ + 1 + 2 + 3 + ] + ) + ); + expected = [ ]; + }; + + testDropFromEmpty = { + expr = linkedLists.toList (linkedLists.drop 5 null); + expected = [ ]; + }; + + testDropLarge = { + expr = linkedLists.toList ( + linkedLists.drop 15000 (linkedLists.fromList (lib.genList (x: x) 20000)) + ); + expected = lib.genList (x: x + 15000) 5000; + }; + + testDropLargePrefix = { + expr = linkedLists.toList ( + linkedLists.take 5 (linkedLists.drop 15000 (linkedLists.fromList (lib.genList (x: x) 20000))) + ); + expected = [ + 15000 + 15001 + 15002 + 15003 + 15004 + ]; + }; + + # take tests + testTakeZero = { + expr = linkedLists.toList ( + linkedLists.take 0 ( + linkedLists.fromList [ + 1 + 2 + 3 + ] + ) + ); + expected = [ ]; + }; + + testTakePartial = { + expr = linkedLists.toList ( + linkedLists.take 2 ( + linkedLists.fromList [ + 1 + 2 + 3 + 4 + 5 + ] + ) + ); + expected = [ + 1 + 2 + ]; + }; + + testTakeAll = { + expr = linkedLists.toList ( + linkedLists.take 3 ( + linkedLists.fromList [ + 1 + 2 + 3 + ] + ) + ); + expected = [ + 1 + 2 + 3 + ]; + }; + + testTakeMoreThanAvailable = { + expr = linkedLists.toList ( + linkedLists.take 10 ( + linkedLists.fromList [ + 1 + 2 + 3 + ] + ) + ); + expected = [ + 1 + 2 + 3 + ]; + }; + + # Large list tests - testing beyond default max-call-depth + # First test just the prefix to verify fromList is working correctly + testFromListLargePrefix = { + expr = linkedLists.toList (linkedLists.take 5 (linkedLists.fromList (lib.genList (x: x) 20000))); + expected = [ + 0 + 1 + 2 + 3 + 4 + ]; + }; + + testFromListLarge = { + expr = builtins.length (linkedLists.toList (linkedLists.fromList (lib.genList (x: x) 20000))); + expected = 20000; + }; + + testToListLarge = { + expr = builtins.length (linkedLists.toList (linkedLists.fromList (lib.genList (x: x) 20000))); + expected = 20000; + }; + + testRoundTripLarge = { + expr = + let + original = lib.genList (x: x) 20000; + result = linkedLists.toList (linkedLists.fromList original); + in + result == original; + expected = true; + }; + + # Test that first and last elements are preserved in large lists + testLargeListFirstLast = { + expr = + let + original = lib.genList (x: x) 20000; + linked = linkedLists.fromList original; + result = linkedLists.toList linked; + in + { + first = builtins.head result; + last = builtins.elemAt result 19999; + }; + expected = { + first = 0; + last = 19999; + }; + }; + + # Tests with very large lists to ensure O(log n) stack depth + # These would fail with linear reduceChunk/countChunk implementations + testTieredBinaryReduceVeryLarge = { + expr = linkedLists.tieredBinaryReduce 0 (a: b: a + b) ( + linkedLists.fromList (lib.genList (x: x) 100000) + ); + # Triangle sum formula: sum of 0 to n-1 = (n-1) * n / 2 + expected = + let + n = 100000; + in + (n - 1) * n / 2; + }; + + testLengthVeryLarge = { + expr = linkedLists.length (linkedLists.fromList (lib.genList (x: x) 100000)); + expected = 100000; + }; + + testToListVeryLarge = { + expr = builtins.length (linkedLists.toList (linkedLists.fromList (lib.genList (x: x) 100000))); + expected = 100000; + }; +} diff --git a/lib/tests/test-with-nix.nix b/lib/tests/test-with-nix.nix index 4fc65010b8787..54087751c7759 100644 --- a/lib/tests/test-with-nix.nix +++ b/lib/tests/test-with-nix.nix @@ -75,6 +75,9 @@ pkgs.runCommand "nixpkgs-lib-tests-nix-${nix.version}" echo "Running lib/tests/fetchers.nix" [[ $(nix-instantiate --eval --strict lib/tests/fetchers.nix | tee /dev/stderr) == '[ ]' ]]; + echo "Running lib/tests/linked-lists.nix" + [[ $(nix-instantiate --eval --strict lib/tests/linked-lists.nix | tee /dev/stderr) == '[ ]' ]]; + mkdir $out echo success > $out/${nix.version} ''