diff --git a/lib/lists.nix b/lib/lists.nix index fa2a526d16021..5d573dc972259 100644 --- a/lib/lists.nix +++ b/lib/lists.nix @@ -1829,9 +1829,9 @@ rec { /** Remove duplicate elements from the `list`. O(n^2) complexity. - :::{.note} - If the list only contains strings and order is not important, the complexity can be reduced to O(n log n) by using [`lib.lists.uniqueStrings`](#function-library-lib.lists.uniqueStrings) instead. - ::: + See also: + - [`uniqueStrings`](#function-library-lib.lists.uniqueStrings) for O(n log n) performance with strings (does not preserve order) + - [`uniqueByString`](#function-library-lib.lists.uniqueByString) for comparing elements by a custom string key function # Inputs @@ -1870,6 +1870,10 @@ rec { In that case use [`lib.lists.unique`](#function-library-lib.lists.unique) instead. ::: + See also: + - [`unique`](#function-library-lib.lists.unique) for arbitrary types (O(n^2), preserves order) + - [`uniqueByString`](#function-library-lib.lists.uniqueByString) for comparing elements by a custom string key function (O(n^2), preserves order) + # Inputs `list` @@ -1925,6 +1929,87 @@ rec { */ allUnique = list: (length (unique list) == length list); + /** + Remove duplicate elements from a list based on a string key function. + + This is a "stable" deduplication that: + - Keeps the first occurrence of each unique key + - Preserves the relative ordering of kept elements + - Uses string comparison for determining uniqueness + + This is more predictable than approaches that rely on attribute set ordering + (like `listToAttrs` -> `attrValues`), especially when dealing with store paths + whose hashes affect ordering on every change. + + See also: + - [`unique`](#function-library-lib.lists.unique) for comparing elements directly (O(n^2), preserves order) + - [`uniqueStrings`](#function-library-lib.lists.uniqueStrings) for simple string deduplication (O(n log n), does not preserve order) + + # Type + + ``` + uniqueByString :: (a -> String) -> [a] -> [a] + ``` + + # Examples + :::{.example} + ## `lib.lists.uniqueByString` usage example + + ```nix + uniqueByString (x: x) [ "a" "b" "a" "c" ] + => [ "a" "b" "c" ] + + uniqueByString (x: x.id) [ + { id = "1"; name = "foo"; } + { id = "2"; name = "bar"; } + { id = "1"; name = "baz"; } + ] + => [ { id = "1"; name = "foo"; } { id = "2"; name = "bar"; } ] + + uniqueByString toString [ 1 2 1 3 2 ] + => [ 1 2 3 ] + ``` + + ::: + */ + # Tests in: ./tests/misc.nix + uniqueByString = + key: l: + let + r = + foldl' + ( + a@{ list, set }: + elem: + let + k = builtins.unsafeDiscardStringContext (key elem); + in + if set ? ${k} then + a + else + let + # Note: O(n²) copying. Use linkedLists to concat them in one go at the end. + # https://github.com/NixOS/nixpkgs/pull/452088 + # When fixing this, also update the O(n^2) complexity mentioned in the "See also" cross-references elsewhere. + newList = list ++ [ elem ]; + newSet = set // { + ${k} = null; + }; + in + # seq: avoid building an unnecessary tower of thunks + builtins.seq newList { + list = newList; + set = newSet; + } + ) + { + list = [ ]; + set = { }; + } + l; + in + r.list; + /** Intersects list 'list1' and another list (`list2`). diff --git a/lib/tests/misc.nix b/lib/tests/misc.nix index e24ffa29d4f7d..4f7289546be80 100644 --- a/lib/tests/misc.nix +++ b/lib/tests/misc.nix @@ -4841,4 +4841,193 @@ runTests { ]; } ); + + # lib.lists.uniqueByString + + testUniqueByStringBasic = { + expr = lists.uniqueByString (x: x) [ + "a" + "b" + "a" + "c" + "b" + ]; + expected = [ + "a" + "b" + "c" + ]; + }; + + testUniqueByStringEmpty = { + expr = lists.uniqueByString (x: x) [ ]; + expected = [ ]; + }; + + testUniqueByStringNoDuplicates = { + expr = lists.uniqueByString (x: x) [ + "a" + "b" + "c" + ]; + expected = [ + "a" + "b" + "c" + ]; + }; + + testUniqueByStringAllDuplicates = { + expr = lists.uniqueByString (x: x) [ + "a" + "a" + "a" + ]; + expected = [ "a" ]; + }; + + testUniqueByStringPreservesOrder = { + expr = lists.uniqueByString (x: x) [ + "z" + "a" + "m" + "z" + "b" + "a" + ]; + expected = [ + "z" + "a" + "m" + "b" + ]; + }; + + testUniqueByStringCustomKey = { + expr = lists.uniqueByString (x: x.id) [ + { + id = "1"; + value = "first"; + } + { + id = "2"; + value = "second"; + } + { + id = "1"; + value = "duplicate"; + } + { + id = "3"; + value = "third"; + } + ]; + expected = [ + { + id = "1"; + value = "first"; + } + { + id = "2"; + value = "second"; + } + { + id = "3"; + value = "third"; + } + ]; + }; + + testUniqueByStringNumbers = { + expr = lists.uniqueByString toString [ + 1 + 2 + 1 + 3 + 2 + 4 + ]; + expected = [ + 1 + 2 + 3 + 4 + ]; + }; + + testUniqueByStringWithContext = { + expr = + let + drv1 = derivation { + name = "foo"; + system = "x86_64-linux"; + builder = "/bin/sh"; + }; + drv2 = derivation { + name = "bar"; + system = "x86_64-linux"; + builder = "/bin/sh"; + }; + drv3 = derivation { + name = "foo"; + system = "x86_64-linux"; + builder = "/bin/sh"; + }; # same as drv1 + in + lists.uniqueByString (d: d.drvPath) [ + drv1 + drv2 + drv3 + ]; + expected = + let + drv1 = derivation { + name = "foo"; + system = "x86_64-linux"; + builder = "/bin/sh"; + }; + drv2 = derivation { + name = "bar"; + system = "x86_64-linux"; + builder = "/bin/sh"; + }; + in + [ + drv1 + drv2 + ]; + }; + + testUniqueByStringSingleElement = { + expr = lists.uniqueByString (x: x) [ "only" ]; + expected = [ "only" ]; + }; + + testUniqueByStringKeepsFirstOccurrence = { + expr = lists.uniqueByString (x: x.key) [ + { + key = "a"; + order = 1; + } + { + key = "b"; + order = 2; + } + { + key = "a"; + order = 3; + } + ]; + expected = [ + { + key = "a"; + order = 1; + } + { + key = "b"; + order = 2; + } + ]; + }; + }