diff --git a/README.md b/README.md index 55230d8..9a0b06a 100644 --- a/README.md +++ b/README.md @@ -20,6 +20,7 @@ Features: - test - Escaping of "`/`" (by "`~1`") and "`~`" (by "`~0`") - Allow usage of `-` for appending things to list (Add and Copy operation) +- Smart list diffing with `object_hash` for efficient patches on collections with unique identifiers ## Getting started @@ -52,6 +53,36 @@ iex> Jsonpatch.diff(source, destination) ] ``` +### Smart List Diffing with `object_hash` + +Use `object_hash` to generate efficient patches for lists of objects with unique identifiers, producing minimal operations instead of cascading replacements. + +```elixir +iex> original = [ + %{id: 1, name: "Alice"}, + %{id: 2, name: "Bob"} +] +iex> updated = [ + %{id: 99, name: "New"}, + %{id: 1, name: "Alice"}, + %{id: 2, name: "Bob"} +] + +# Traditional pairwise diff - multiple replace operations +# >> Jsonpatch.diff(original, updated) +[ + %{op: "add", path: "/2", value: %{id: 2, name: "Bob"}} + %{op: "replace", path: "/0", value: %{id: 99, name: "New"}}, + %{op: "replace", path: "/1", value: %{id: 1, name: "Alice"}}, +] + +# With object_hash - single add operation +iex> Jsonpatch.diff(original, updated, object_hash: fn %{id: id} -> id end) +[ + %{op: "add", path: "/0", value: %{id: 99, name: "New"}} +] +``` + ### Apply patches ```elixir @@ -69,4 +100,4 @@ iex> Jsonpatch.apply_patch(patch, target) ## Important sources - [Official RFC 6902](https://tools.ietf.org/html/rfc6902) -- [Inspiration: python-json-patch](https://github.com/stefankoegl/python-json-patch) +- [Inspiration: python-json-patch](https://github.com/stefankoegl/python-json-patch) diff --git a/benchmarks/generic_benchmark.exs b/benchmarks/generic_benchmark.exs new file mode 100644 index 0000000..98e2b3a --- /dev/null +++ b/benchmarks/generic_benchmark.exs @@ -0,0 +1,254 @@ +# Jsonpatch Diff Performance Benchmark +# Run with: mix run test/benchmark.exs + +defmodule JsonpatchBenchmark do + @doc """ + Prepare complex test cases for benchmarking + """ + def prepare_test_cases() do + %{ + "Complex Maps - E-commerce Order" => %{ + doc: %{ + "order_id" => "12345", + "customer" => %{ + "name" => "John Doe", + "email" => "john@example.com", + "address" => %{ + "street" => "123 Main St", + "city" => "Springfield", + "country" => "USA" + } + }, + "items" => %{ + "item1" => %{"name" => "Laptop", "price" => 999.99, "quantity" => 1}, + "item2" => %{"name" => "Mouse", "price" => 29.99, "quantity" => 2} + }, + "status" => "pending", + "total" => 1059.97 + }, + expected: %{ + "order_id" => "12345", + "customer" => %{ + "name" => "John Doe", + "email" => "john.doe@example.com", + "address" => %{ + "street" => "456 Oak Ave", + "city" => "Springfield", + "country" => "USA", + "zipcode" => "12345" + }, + "phone" => "+1-555-0123" + }, + "items" => %{ + "item1" => %{"name" => "Gaming Laptop", "price" => 1299.99, "quantity" => 1}, + "item3" => %{"name" => "Keyboard", "price" => 79.99, "quantity" => 1} + }, + "status" => "confirmed", + "total" => 1379.98, + "discount" => 50.00 + } + }, + "Complex Lists - Task Management" => %{ + doc: [ + %{ + "id" => 1, + "task" => "Write documentation", + "priority" => "high", + "completed" => false + }, + %{"id" => 2, "task" => "Fix bug #123", "priority" => "medium", "completed" => true}, + %{"id" => 3, "task" => "Review PR", "priority" => "low", "completed" => false}, + %{"id" => 4, "task" => "Deploy to staging", "priority" => "high", "completed" => false}, + %{"id" => 5, "task" => "Update tests", "priority" => "medium", "completed" => true} + ], + expected: [ + %{ + "id" => 1, + "task" => "Write comprehensive documentation", + "priority" => "high", + "completed" => true + }, + %{ + "id" => 6, + "task" => "Optimize database queries", + "priority" => "high", + "completed" => false + }, + %{"id" => 3, "task" => "Review PR", "priority" => "medium", "completed" => false}, + %{"id" => 7, "task" => "Setup monitoring", "priority" => "low", "completed" => false}, + %{ + "id" => 4, + "task" => "Deploy to production", + "priority" => "critical", + "completed" => false + } + ] + }, + "Mixed Maps and Lists - Social Media Post" => %{ + doc: %{ + "post_id" => "abc123", + "content" => "Just had an amazing day!", + "author" => %{ + "username" => "johndoe", + "followers" => 1250, + "verified" => false + }, + "comments" => [ + %{"user" => "alice", "text" => "Great to hear!", "likes" => 5}, + %{"user" => "bob", "text" => "Awesome!", "likes" => 3} + ], + "tags" => ["happy", "life"], + "metadata" => %{ + "created_at" => "2023-01-01T10:00:00Z", + "location" => "New York", + "device" => "mobile" + } + }, + expected: %{ + "post_id" => "abc123", + "content" => "Just had an absolutely amazing day! #blessed", + "author" => %{ + "username" => "johndoe", + "followers" => 1275, + "verified" => true, + "display_name" => "John Doe" + }, + "comments" => [ + %{"user" => "alice", "text" => "Great to hear! So happy for you!", "likes" => 8}, + %{"user" => "charlie", "text" => "Inspiring!", "likes" => 2}, + %{"user" => "bob", "text" => "Awesome!", "likes" => 3, "reply_to" => "alice"} + ], + "tags" => ["happy", "life", "blessed", "inspiration"], + "metadata" => %{ + "created_at" => "2023-01-01T10:00:00Z", + "updated_at" => "2023-01-01T10:15:00Z", + "location" => "New York", + "device" => "mobile", + "engagement_score" => 8.5 + }, + "reactions" => %{ + "likes" => 45, + "shares" => 12, + "hearts" => 23 + } + } + }, + "Deep Nesting - Configuration Tree" => %{ + doc: %{ + "application" => %{ + "name" => "MyApp", + "version" => "1.0.0", + "modules" => %{ + "authentication" => %{ + "enabled" => true, + "providers" => %{ + "oauth" => %{ + "google" => %{"client_id" => "123", "scopes" => ["email", "profile"]}, + "github" => %{"client_id" => "456", "scopes" => ["user:email"]} + }, + "local" => %{"enabled" => true, "password_policy" => %{"min_length" => 8}} + } + }, + "database" => %{ + "primary" => %{ + "host" => "localhost", + "port" => 5432, + "name" => "myapp_db", + "pool" => %{"size" => 10, "timeout" => 5000} + }, + "replica" => %{ + "host" => "replica.example.com", + "port" => 5432, + "name" => "myapp_db" + } + } + } + } + }, + expected: %{ + "application" => %{ + "name" => "MyApp", + "version" => "1.1.0", + "modules" => %{ + "authentication" => %{ + "enabled" => true, + "providers" => %{ + "oauth" => %{ + "google" => %{ + "client_id" => "123", + "scopes" => ["email", "profile", "calendar"] + }, + "github" => %{"client_id" => "789", "scopes" => ["user:email", "read:user"]}, + "microsoft" => %{"client_id" => "999", "scopes" => ["User.Read"]} + }, + "local" => %{ + "enabled" => true, + "password_policy" => %{"min_length" => 12, "require_symbols" => true} + }, + "saml" => %{ + "enabled" => false, + "metadata_url" => "https://sso.example.com/metadata" + } + } + }, + "database" => %{ + "primary" => %{ + "host" => "db.example.com", + "port" => 5432, + "name" => "myapp_production", + "pool" => %{"size" => 20, "timeout" => 10_000, "idle_timeout" => 30_000} + }, + "cache" => %{ + "host" => "redis.example.com", + "port" => 6379, + "ttl" => 3600 + } + }, + "monitoring" => %{ + "metrics" => %{"enabled" => true, "interval" => 60}, + "logging" => %{"level" => "info", "format" => "json"} + } + }, + "features" => %{ + "feature_flags" => %{"new_ui" => true, "beta_features" => false} + } + } + } + } + } + end + + @doc """ + Run the benchmark + """ + def run_benchmark() do + Benchee.run( + %{ + # I was using it for performance comparision, now faster version is the default one + # "Faster JsonPatch" => fn %{doc: doc, expected: expected} -> + # Jsonpatch.Faster.diff(doc, expected) + # end, + "JsonPatch" => fn %{doc: doc, expected: expected} -> + Jsonpatch.diff(doc, expected) + end + }, + inputs: prepare_test_cases(), + warmup: 0.1, + time: 0.5, + memory_time: 0.2, + reduction_time: 0.2, + parallel: 2, + formatters: [ + Benchee.Formatters.Console + ], + print: [ + benchmarking: true, + configuration: false, + fast_warning: false + ] + ) + end +end + +# Run the benchmark +JsonpatchBenchmark.run_benchmark() diff --git a/benchmarks/object_hash_benchmark.exs b/benchmarks/object_hash_benchmark.exs new file mode 100644 index 0000000..5509471 --- /dev/null +++ b/benchmarks/object_hash_benchmark.exs @@ -0,0 +1,420 @@ +# Object Hash List Diff Performance Benchmark +# Run with: mix run object_hash_benchmark.exs + +defmodule ObjectHashBenchmark do + @doc """ + Prepare test cases focused on list diffing with object hashing + """ + def prepare_test_cases() do + %{ + "Small List (20 items)" => prepare_list_case(20), + "Medium List (100 items)" => prepare_list_case(100), + "Large List (500 items)" => prepare_list_case(500), + "Very Large List (1000 items)" => prepare_list_case(1_000), + "Nested Lists - User Management" => prepare_nested_case(), + "Complex Objects - Product Catalog" => prepare_complex_objects_case(), + "Mixed Operations - Social Feed" => prepare_mixed_operations_case() + } + end + + defp prepare_list_case(size) do + # Create original list with sequential IDs + original = + 1..size + |> Enum.map(fn id -> + %{ + "id" => id, + "name" => "Item #{id}", + "status" => Enum.random(["active", "inactive", "pending"]), + "value" => :rand.uniform(1_000), + "metadata" => %{ + "created_at" => "2023-01-#{rem(id, 28) + 1}T10:00:00Z", + "category" => Enum.random(["A", "B", "C", "D"]) + } + } + end) + + # Create modified list with various operations: + # - Remove some items (every 7th item) + # - Modify some items (every 5th item) + # - Add new items + # - Reorder some items + modified = + original + |> Enum.reject(fn %{"id" => id} -> rem(id, 7) == 0 end) # Remove every 7th + |> Enum.map(fn item -> + if rem(item["id"], 5) == 0 do + # Modify every 5th item + %{item | "name" => "Modified #{item["name"]}", "value" => item["value"] * 2} + else + item + end + end) + |> then(fn list -> + # Add some new items + new_items = + (size + 1)..(size + div(size, 10)) + |> Enum.map(fn id -> + %{ + "id" => id, + "name" => "New Item #{id}", + "status" => "new", + "value" => :rand.uniform(1_000), + "metadata" => %{ + "created_at" => "2023-12-01T10:00:00Z", + "category" => "NEW" + } + } + end) + + list ++ new_items + end) + |> Enum.shuffle() # Reorder items + + %{doc: original, expected: modified} + end + + defp prepare_nested_case() do + original = %{ + "users" => [ + %{ + "id" => 1, + "name" => "Alice", + "permissions" => [ + %{"id" => 101, "resource" => "posts", "action" => "read"}, + %{"id" => 102, "resource" => "posts", "action" => "write"}, + %{"id" => 103, "resource" => "users", "action" => "read"} + ] + }, + %{ + "id" => 2, + "name" => "Bob", + "permissions" => [ + %{"id" => 201, "resource" => "posts", "action" => "read"}, + %{"id" => 202, "resource" => "comments", "action" => "write"} + ] + }, + %{ + "id" => 3, + "name" => "Charlie", + "permissions" => [ + %{"id" => 301, "resource" => "posts", "action" => "read"} + ] + } + ], + "groups" => [ + %{ + "id" => 10, + "name" => "Admins", + "members" => [ + %{"id" => 1, "role" => "owner"}, + %{"id" => 2, "role" => "admin"} + ] + }, + %{ + "id" => 20, + "name" => "Users", + "members" => [ + %{"id" => 2, "role" => "member"}, + %{"id" => 3, "role" => "member"} + ] + } + ] + } + + expected = %{ + "users" => [ + %{ + "id" => 1, + "name" => "Alice Smith", + "permissions" => [ + %{"id" => 101, "resource" => "posts", "action" => "read"}, + %{"id" => 102, "resource" => "posts", "action" => "write"}, + %{"id" => 103, "resource" => "users", "action" => "read"}, + %{"id" => 104, "resource" => "users", "action" => "write"} + ] + }, + %{ + "id" => 4, + "name" => "David", + "permissions" => [ + %{"id" => 401, "resource" => "posts", "action" => "read"} + ] + }, + %{ + "id" => 3, + "name" => "Charlie Brown", + "permissions" => [ + %{"id" => 301, "resource" => "posts", "action" => "read"}, + %{"id" => 302, "resource" => "comments", "action" => "read"} + ] + } + ], + "groups" => [ + %{ + "id" => 10, + "name" => "Administrators", + "members" => [ + %{"id" => 1, "role" => "owner"}, + %{"id" => 4, "role" => "admin"} + ] + }, + %{ + "id" => 30, + "name" => "Moderators", + "members" => [ + %{"id" => 3, "role" => "moderator"} + ] + } + ] + } + + %{doc: original, expected: expected} + end + + defp prepare_complex_objects_case() do + original = [ + %{ + "id" => "prod-001", + "name" => "Laptop Pro", + "price" => 1299.99, + "variants" => [ + %{"id" => "var-001", "color" => "silver", "storage" => "256GB", "stock" => 10}, + %{"id" => "var-002", "color" => "space-gray", "storage" => "512GB", "stock" => 5} + ], + "reviews" => [ + %{"id" => "rev-001", "rating" => 5, "comment" => "Excellent!"}, + %{"id" => "rev-002", "rating" => 4, "comment" => "Very good"} + ] + }, + %{ + "id" => "prod-002", + "name" => "Wireless Mouse", + "price" => 79.99, + "variants" => [ + %{"id" => "var-003", "color" => "black", "connectivity" => "bluetooth", "stock" => 25}, + %{"id" => "var-004", "color" => "white", "connectivity" => "usb", "stock" => 15} + ], + "reviews" => [ + %{"id" => "rev-003", "rating" => 4, "comment" => "Good quality"} + ] + }, + %{ + "id" => "prod-003", + "name" => "Keyboard", + "price" => 129.99, + "variants" => [ + %{"id" => "var-005", "layout" => "US", "switches" => "mechanical", "stock" => 8} + ], + "reviews" => [] + } + ] + + expected = [ + %{ + "id" => "prod-001", + "name" => "Laptop Pro Max", + "price" => 1499.99, + "variants" => [ + %{"id" => "var-001", "color" => "silver", "storage" => "256GB", "stock" => 8}, + %{"id" => "var-002", "color" => "space-gray", "storage" => "512GB", "stock" => 3}, + %{"id" => "var-006", "color" => "gold", "storage" => "1TB", "stock" => 2} + ], + "reviews" => [ + %{"id" => "rev-001", "rating" => 5, "comment" => "Excellent product!"}, + %{"id" => "rev-002", "rating" => 4, "comment" => "Very good"}, + %{"id" => "rev-004", "rating" => 5, "comment" => "Amazing performance"} + ] + }, + %{ + "id" => "prod-004", + "name" => "Wireless Headphones", + "price" => 199.99, + "variants" => [ + %{"id" => "var-007", "color" => "black", "noise_cancelling" => true, "stock" => 12} + ], + "reviews" => [ + %{"id" => "rev-005", "rating" => 5, "comment" => "Great sound quality"} + ] + }, + %{ + "id" => "prod-003", + "name" => "Mechanical Keyboard", + "price" => 149.99, + "variants" => [ + %{"id" => "var-005", "layout" => "US", "switches" => "mechanical", "stock" => 12}, + %{"id" => "var-008", "layout" => "UK", "switches" => "mechanical", "stock" => 5} + ], + "reviews" => [ + %{"id" => "rev-006", "rating" => 4, "comment" => "Solid build quality"} + ] + } + ] + + %{doc: original, expected: expected} + end + + defp prepare_mixed_operations_case() do + # Simulate a social media feed with posts, comments, and reactions + original = [ + %{ + "id" => "post-1", + "content" => "Beautiful sunset today!", + "author" => "alice", + "timestamp" => "2023-01-01T18:00:00Z", + "comments" => [ + %{"id" => "comment-1", "author" => "bob", "text" => "Amazing!"}, + %{"id" => "comment-2", "author" => "charlie", "text" => "Where was this?"} + ], + "reactions" => [ + %{"id" => "react-1", "user" => "bob", "type" => "like"}, + %{"id" => "react-2", "user" => "charlie", "type" => "love"} + ] + }, + %{ + "id" => "post-2", + "content" => "Just finished my morning run!", + "author" => "bob", + "timestamp" => "2023-01-02T08:00:00Z", + "comments" => [ + %{"id" => "comment-3", "author" => "alice", "text" => "Great job!"} + ], + "reactions" => [ + %{"id" => "react-3", "user" => "alice", "type" => "like"} + ] + }, + %{ + "id" => "post-3", + "content" => "Working on a new project", + "author" => "charlie", + "timestamp" => "2023-01-03T14:00:00Z", + "comments" => [], + "reactions" => [] + } + ] + + expected = [ + %{ + "id" => "post-1", + "content" => "Beautiful sunset today! #nature", + "author" => "alice", + "timestamp" => "2023-01-01T18:00:00Z", + "comments" => [ + %{"id" => "comment-1", "author" => "bob", "text" => "Amazing! Where is this?"}, + %{"id" => "comment-4", "author" => "david", "text" => "Stunning colors!"} + ], + "reactions" => [ + %{"id" => "react-1", "user" => "bob", "type" => "like"}, + %{"id" => "react-2", "user" => "charlie", "type" => "love"}, + %{"id" => "react-4", "user" => "david", "type" => "wow"} + ] + }, + %{ + "id" => "post-4", + "content" => "New coffee shop discovery!", + "author" => "david", + "timestamp" => "2023-01-04T09:00:00Z", + "comments" => [ + %{"id" => "comment-5", "author" => "alice", "text" => "I need to try this!"} + ], + "reactions" => [ + %{"id" => "react-5", "user" => "alice", "type" => "like"} + ] + }, + %{ + "id" => "post-3", + "content" => "Working on a new exciting project!", + "author" => "charlie", + "timestamp" => "2023-01-03T14:00:00Z", + "comments" => [ + %{"id" => "comment-6", "author" => "bob", "text" => "Can't wait to see it!"} + ], + "reactions" => [ + %{"id" => "react-6", "user" => "bob", "type" => "like"} + ] + } + ] + + %{doc: original, expected: expected} + end + + @doc """ + Hash function for objects with ID + """ + def id_hash_fn(%{"id" => id}), do: id + # def id_hash_fn(string) when is_binary(string), do: string + def id_hash_fn(_item), do: raise "Unable to find hash" + + @doc """ + Run the benchmark comparing with and without object_hash + """ + def run_benchmark() do + Benchee.run( + %{ + "Without object_hash (pairwise)" => fn %{doc: doc, expected: expected} -> + Jsonpatch.diff(doc, expected) + end, + "With object_hash (greedy)" => fn %{doc: doc, expected: expected} -> + Jsonpatch.diff(doc, expected, object_hash: &id_hash_fn/1) + end, + }, + inputs: prepare_test_cases(), + warmup: 0.2, + time: 1.0, + memory_time: 0.5, + reduction_time: 0.5, + parallel: 1, + formatters: [ + Benchee.Formatters.Console + ], + print: [ + benchmarking: true, + configuration: true, + fast_warning: false + ] + ) + end + + @doc """ + Run a detailed analysis showing the patches generated + """ + def analyze_patches() do + IO.puts("=== Patch Analysis ===\n") + + test_cases = prepare_test_cases() + + Enum.each(test_cases, fn {name, %{doc: doc, expected: expected}} -> + IO.puts("## #{name}") + + patches_without_hash = Jsonpatch.diff(doc, expected) + patches_with_hash = Jsonpatch.diff(doc, expected, object_hash: &id_hash_fn/1) + + IO.puts("Without object_hash: #{length(patches_without_hash)} patches") + IO.puts("With object_hash: #{length(patches_with_hash)} patches") + + # # Show first few patches for comparison + # IO.puts("\nFirst 3 patches without object_hash:") + # patches_without_hash |> Enum.take(3) |> Enum.each(&IO.inspect/1) + + IO.puts("") + + # IO.puts("\nFirst 3 patches with object_hash:") + # patches_with_hash |> Enum.take(3) |> Enum.each(&IO.inspect/1) + + IO.puts("Result with object_hash: #{Jsonpatch.apply_patch!(patches_with_hash, doc) == expected}") + IO.puts("Result without object_hash: #{Jsonpatch.apply_patch!(patches_without_hash, doc) == expected}") + IO.puts("\n" <> String.duplicate("-", 50) <> "\n") + + end) + end +end + +# Run the benchmark +IO.puts("Starting Object Hash Benchmark...") +IO.puts("This benchmark compares list diffing performance with and without object_hash option.") +IO.puts("The object_hash option uses LCS algorithm to better handle list reordering.\n") + +ObjectHashBenchmark.run_benchmark() + +# Uncomment to see patch analysis +# ObjectHashBenchmark.analyze_patches() diff --git a/lib/jsonpatch.ex b/lib/jsonpatch.ex index d685123..1ed13b9 100644 --- a/lib/jsonpatch.ex +++ b/lib/jsonpatch.ex @@ -204,10 +204,12 @@ defmodule Jsonpatch do @spec diff(Types.json_container(), Types.json_container(), Types.opts_diff()) :: [Jsonpatch.t()] def diff(source, destination, opts \\ []) do opts = - Keyword.validate!(opts, + opts + |> Keyword.update(:object_hash, nil, &make_safe_hash_fn/1) + |> Keyword.validate!( ancestor_path: "", - # by default, a no-op - prepare_map: fn map -> map end + prepare_map: fn struct -> struct end, + object_hash: nil ) cond do @@ -290,17 +292,29 @@ defmodule Jsonpatch do do_map_diff(rest, source, ancestor_path, patches, [key | checked_keys], opts) end - defp do_list_diff(destination, source, ancestor_path, patches, idx, opts) + defp do_list_diff(destination, source, ancestor_path, patches, idx, opts) do + if opts[:object_hash] do + do_hash_list_diff(destination, source, ancestor_path, patches, opts) + else + do_pairwise_list_diff(destination, source, ancestor_path, patches, idx, opts) + end + catch + # happens if we've got a nil hash or we tried to hash a non-map + :hash_not_implemented -> + do_pairwise_list_diff(destination, source, ancestor_path, patches, idx, opts) + end + + defp do_pairwise_list_diff(destination, source, ancestor_path, patches, idx, opts) - defp do_list_diff([], [], _path, patches, _idx, _opts), do: patches + defp do_pairwise_list_diff([], [], _path, patches, _idx, _opts), do: patches - defp do_list_diff([], [_item | source_rest], ancestor_path, patches, idx, opts) do + defp do_pairwise_list_diff([], [_item | source_rest], ancestor_path, patches, idx, opts) do # if we find any leftover items in source, we have to remove them patches = [%{op: "remove", path: "#{ancestor_path}/#{idx}"} | patches] - do_list_diff([], source_rest, ancestor_path, patches, idx + 1, opts) + do_pairwise_list_diff([], source_rest, ancestor_path, patches, idx + 1, opts) end - defp do_list_diff(items, [], ancestor_path, patches, idx, opts) do + defp do_pairwise_list_diff(items, [], ancestor_path, patches, idx, opts) do # we have to do it without recursion, because we have to keep the order of the items items |> Enum.map_reduce(idx, fn val, idx -> @@ -311,12 +325,163 @@ defmodule Jsonpatch do |> Kernel.++(patches) end - defp do_list_diff([val | rest], [source_val | source_rest], ancestor_path, patches, idx, opts) do + defp do_pairwise_list_diff( + [val | rest], + [source_val | source_rest], + ancestor_path, + patches, + idx, + opts + ) do # case when there's an item in both desitation and source. Let's just compare them patches = do_diff(val, source_val, ancestor_path, idx, patches, opts) - do_list_diff(rest, source_rest, ancestor_path, patches, idx + 1, opts) + do_pairwise_list_diff(rest, source_rest, ancestor_path, patches, idx + 1, opts) + end + + defp do_hash_list_diff(destination, source, ancestor_path, patches, opts) do + hash_fn = Keyword.fetch!(opts, :object_hash) + + {additions, modifications, removals} = + greedy_find_additions_modifications_removals( + List.to_tuple(destination), + List.to_tuple(source), + index_by(destination, hash_fn), + index_by(source, hash_fn), + hash_fn, + ancestor_path, + opts + ) + + List.flatten([removals, additions, modifications, patches]) end + # credo:disable-for-next-line + defp greedy_find_additions_modifications_removals( + dest, + source, + dest_map, + source_map, + hash_fn, + path, + opts, + dest_idx \\ 0, + source_idx \\ 0, + additions \\ [], + modifications \\ [], + removals \\ [] + ) do + cond do + tuple_size(dest) == dest_idx -> + # we're at the end of the destination tuple, let's remove all remaining source items + removals = add_removals(source_idx, tuple_size(source) - 1, path, removals) + {Enum.reverse(additions), modifications, removals} + + tuple_size(source) == source_idx -> + # we're at the end of the source tuple, let's add all remaining destination items + additions = add_additions(dest_idx, tuple_size(dest) - 1, path, dest, additions, opts) + {Enum.reverse(additions), modifications, removals} + + true -> + # we're in the middle of the tuples, let's find the next matching items + dest_item = elem(dest, dest_idx) + source_item = elem(source, source_idx) + + source_hash = hash_fn.(source_item) + dest_hash = hash_fn.(dest_item) + + if source_hash == dest_hash do + # same items, let's diff recursively and bump both indexes + modifications = do_diff(dest_item, source_item, path, dest_idx, modifications, opts) + + greedy_find_additions_modifications_removals( + dest, + source, + dest_map, + source_map, + hash_fn, + path, + opts, + dest_idx + 1, + source_idx + 1, + additions, + modifications, + removals + ) + else + # different items, let's find index of destination item in source and vice versa + {next_dest_idx, next_source_idx} = + determine_next_idx( + dest_idx, + source_idx, + Map.get(dest_map, source_hash), + Map.get(source_map, dest_hash) + ) + + removals = add_removals(source_idx, next_source_idx - 1, path, removals) + additions = add_additions(dest_idx, next_dest_idx - 1, path, dest, additions, opts) + + greedy_find_additions_modifications_removals( + dest, + source, + dest_map, + source_map, + hash_fn, + path, + opts, + next_dest_idx, + next_source_idx, + additions, + modifications, + removals + ) + end + end + end + + # credo:disable-for-next-line + defp determine_next_idx(d_idx, s_idx, next_d_idx, next_s_idx) do + dest_found = next_d_idx != nil and next_d_idx > d_idx + source_found = next_s_idx != nil and next_s_idx > s_idx + source_closer = dest_found and source_found and next_s_idx - s_idx < next_d_idx - d_idx + + cond do + # in case when we can jump to either of them, we want to jump to the closer one + source_closer -> {d_idx, next_s_idx} + # only source is found ahead, we have to do source jump + next_d_idx == nil and source_found -> {d_idx, next_s_idx} + # only dest is found ahead, we have to do dest jump + next_s_idx == nil and dest_found -> {next_d_idx, s_idx} + # neither is found ahead, we have to advance both indexes + true -> {d_idx + 1, s_idx + 1} + end + end + + @compile {:inline, index_by: 2} + defp index_by(list, hash_fn) do + list + |> Enum.reduce({%{}, 0}, fn item, {map, idx} -> + # if we have a hash collision, we throw an error and handle as if the hash is not implemented + {Map.update(map, hash_fn.(item), idx, fn _ -> throw(:hash_not_implemented) end), idx + 1} + end) + |> elem(0) + end + + @compile {:inline, add_removals: 4} + defp add_removals(from_idx, to_idx, path, removals) do + Enum.reduce(from_idx..to_idx//1, removals, fn idx, removals -> + [%{op: "remove", path: "#{path}/#{idx}"} | removals] + end) + end + + @compile {:inline, add_additions: 6} + defp add_additions(from_idx, to_idx, path, dest_tuple, additions, opts) do + Enum.reduce(from_idx..to_idx//1, additions, fn idx, additions -> + value = dest_tuple |> elem(idx) |> maybe_prepare_map(opts) + [%{op: "add", path: "#{path}/#{idx}", value: value} | additions] + end) + end + + @compile {:inline, maybe_prepare_map: 2} defp maybe_prepare_map(value, opts) when is_map(value) do prepare_fn = Keyword.fetch!(opts, :prepare_map) prepare_fn.(value) @@ -325,7 +490,6 @@ defmodule Jsonpatch do defp maybe_prepare_map(value, _opts), do: value @compile {:inline, escape: 1} - defp escape(fragment) when is_binary(fragment) do fragment = if :binary.match(fragment, "~") != :nomatch, @@ -338,4 +502,19 @@ defmodule Jsonpatch do end defp escape(fragment), do: fragment + + defp make_safe_hash_fn(hash_fn) do + # we want to compare only maps, and returning nil should mean + # we should compare lists pairwise instead + fn + %{} = item -> + case hash_fn.(item) do + nil -> throw(:hash_not_implemented) + hash -> hash + end + + _item -> + throw(:hash_not_implemented) + end + end end diff --git a/lib/jsonpatch/types.ex b/lib/jsonpatch/types.ex index 229f06e..6bf80fd 100644 --- a/lib/jsonpatch/types.ex +++ b/lib/jsonpatch/types.ex @@ -27,12 +27,24 @@ defmodule Jsonpatch.Types do :strings | :atoms | {:custom, convert_fn()} | {:ignore_invalid_paths, :boolean} @typedoc """ - Types options: + Apply patch options: - - `:keys` - controls how path fragments are decoded. + - `:keys` - controls how path fragments are decoded """ @type opts :: [{:keys, opt_keys()}] - @type opts_diff :: [{:ancestor_path, String.t()} | {:prepare_map, (struct() | map() -> map())}] + + @typedoc """ + Diff options: + + - `:ancestor_path` - path to the ancestor of the current node + - `:prepare_map` - function to prepare the map for diffing + - `:object_hash` - function to extract unique identifier from list items for optimized diffing. + """ + @type opts_diff :: [ + {:ancestor_path, String.t()} + | {:prepare_map, (struct() | map() -> map())} + | {:object_hash, (term() -> term())} + ] @type casted_array_index :: :- | non_neg_integer() @type casted_object_key :: atom() | String.t() diff --git a/test/jsonpatch_test.exs b/test/jsonpatch_test.exs index 24bf95c..b89dc04 100644 --- a/test/jsonpatch_test.exs +++ b/test/jsonpatch_test.exs @@ -343,6 +343,432 @@ defmodule JsonpatchTest do patches = Jsonpatch.diff(source, destination) assert Jsonpatch.apply_patch(patches, source) == {:ok, destination} end + + test "Create diff with object_hash for list items - simple insertion" do + original = [ + %{id: 1, name: "test1"}, + %{id: 2, name: "test2"}, + %{id: 3, name: "test3"} + ] + + updated = List.insert_at(original, 0, %{id: 123, name: "test123"}) + + patches = Jsonpatch.diff(original, updated, object_hash: fn %{id: id} -> id end) + + # Should only have one add operation instead of multiple replace operations + assert patches == [ + %{value: %{id: 123, name: "test123"}, path: "/0", op: "add"} + ] + end + + test "Create diff with object_hash for list items - simple insertion with prepare_map" do + original = [] + + updated = [ + %{id: 1, name: "test1"} + ] + + patches = + Jsonpatch.diff(original, updated, + object_hash: fn %{id: id} -> id end, + prepare_map: fn %{id: id} -> %{id: id} end + ) + + expected_patches = [ + %{value: %{id: 1}, path: "/0", op: "add"} + ] + + # Apply the patch and verify it works + assert_equal_patches(patches, expected_patches) + end + + test "Create diff with object_hash for list items - reordering" do + original = [ + %{id: 1, name: "test1"}, + %{id: 2, name: "test2"}, + %{id: 3, name: "test3"} + ] + + updated = [ + %{id: 3, name: "test3"}, + %{id: 1, name: "test1"}, + %{id: 2, name: "test2"} + ] + + patches = Jsonpatch.diff(original, updated, object_hash: fn %{id: id} -> id end) + # Apply the patch and verify it works + assert {:ok, updated} == Jsonpatch.apply_patch(patches, original, keys: :atoms) + end + + test "Create diff with object_hash for list items - removal" do + original = [ + %{id: 1, name: "test1"}, + %{id: 2, name: "test2"}, + %{id: 3, name: "test3"} + ] + + updated = [ + %{id: 1, name: "test1"}, + %{id: 3, name: "test3"} + ] + + patches = Jsonpatch.diff(original, updated, object_hash: fn %{id: id} -> id end) + + # Should only have one remove operation + assert patches == [ + %{path: "/1", op: "remove"} + ] + end + + test "Create diff with object_hash for list items - modification" do + original = [ + %{id: 1, name: "test1"}, + %{id: 2, name: "test2"} + ] + + updated = [ + %{id: 1, name: "modified1"}, + %{id: 2, name: "test2"} + ] + + patches = Jsonpatch.diff(original, updated, object_hash: fn %{id: id} -> id end) + + # Should only modify the changed field + assert patches == [ + %{value: "modified1", path: "/0/name", op: "replace"} + ] + end + + test "Create diff with object_hash validates opts" do + original = [%{id: 1, name: "test1"}] + updated = [%{id: 1, name: "test1"}] + + # Should raise when invalid opts are provided + assert_raise ArgumentError, fn -> + Jsonpatch.diff(original, updated, invalid_option: "value") + end + end + + test "Create diff with object_hash falls back to index-based when function not provided" do + original = [%{id: 1, name: "test1"}] + updated = [%{id: 2, name: "test2"}] + + # Should work the same as before when no object_hash is provided + patches_with_opts = Jsonpatch.diff(original, updated, []) + patches_without_opts = Jsonpatch.diff(original, updated) + + assert patches_with_opts == patches_without_opts + end + + test "Create diff with object_hash - inserting at the beginning" do + original = [ + %{id: 1, name: "test1"}, + %{id: 2, name: "test2"} + ] + + updated = [ + %{id: 3, name: "test3"}, + %{id: 1, name: "test1"}, + %{id: 2, name: "test2"} + ] + + patches = Jsonpatch.diff(original, updated, object_hash: fn %{id: id} -> id end) + assert patches == [%{op: "add", path: "/0", value: %{id: 3, name: "test3"}}] + assert {:ok, updated} == Jsonpatch.apply_patch(patches, original, keys: :atoms) + end + + test "Create diff with object_hash - inserting in the middle" do + original = [ + %{id: 1, name: "test1"}, + %{id: 2, name: "test2"} + ] + + updated = [ + %{id: 1, name: "test1"}, + %{id: 3, name: "test3"}, + %{id: 2, name: "test2"} + ] + + patches = Jsonpatch.diff(original, updated, object_hash: fn %{id: id} -> id end) + assert patches == [%{op: "add", path: "/1", value: %{id: 3, name: "test3"}}] + assert {:ok, updated} == Jsonpatch.apply_patch(patches, original, keys: :atoms) + end + + test "Create diff with object_hash - removing in the middle" do + original = [ + %{id: 1, name: "test1"}, + %{id: 2, name: "test2"}, + %{id: 3, name: "test3"} + ] + + updated = [ + %{id: 1, name: "test1"}, + %{id: 3, name: "test3"} + ] + + patches = Jsonpatch.diff(original, updated, object_hash: fn %{id: id} -> id end) + assert patches == [%{op: "remove", path: "/1"}] + assert {:ok, updated} == Jsonpatch.apply_patch(patches, original, keys: :atoms) + end + + test "Create diff with object_hash - complex reordering with modifications" do + original = [ + %{id: 1, name: "first", status: "active"}, + %{id: 2, name: "second", status: "inactive"}, + %{id: 3, name: "third", status: "active"}, + %{id: 4, name: "fourth", status: "pending"} + ] + + updated = [ + # moved from end, status changed + %{id: 4, name: "fourth", status: "active"}, + # moved and name changed + %{id: 2, name: "second-modified", status: "inactive"}, + # moved and status changed + %{id: 1, name: "first", status: "inactive"}, + # moved, no changes + %{id: 3, name: "third", status: "active"} + ] + + patches = Jsonpatch.diff(original, updated, object_hash: fn %{id: id} -> id end) + assert {:ok, updated} == Jsonpatch.apply_patch(patches, original, keys: :atoms) + end + + test "Create diff with object_hash - simultaneous add, remove, move, and modify" do + original = [ + %{id: 1, name: "keep1", value: 10}, + %{id: 2, name: "remove_me", value: 20}, + %{id: 3, name: "move_me", value: 30}, + %{id: 4, name: "modify_me", value: 40} + ] + + updated = [ + # new item at start + %{id: 5, name: "new_item", value: 50}, + # modified and moved + %{id: 4, name: "modified", value: 45}, + # unchanged + %{id: 1, name: "keep1", value: 10}, + # moved but unchanged + %{id: 3, name: "move_me", value: 30}, + # new item at end + %{id: 6, name: "another_new", value: 60} + ] + + # id: 2 is removed + + patches = Jsonpatch.diff(original, updated, object_hash: fn %{id: id} -> id end) + assert {:ok, updated} == Jsonpatch.apply_patch(patches, original, keys: :atoms) + end + + test "Create diff with object_hash - multiple insertions at different positions" do + original = [ + %{id: 1, name: "first"}, + %{id: 3, name: "third"}, + %{id: 5, name: "fifth"} + ] + + updated = [ + # insert at beginning + %{id: 0, name: "zero"}, + %{id: 1, name: "first"}, + # insert in middle + %{id: 2, name: "second"}, + %{id: 3, name: "third"}, + # insert in middle + %{id: 4, name: "fourth"}, + %{id: 5, name: "fifth"}, + # insert at end + %{id: 6, name: "sixth"} + ] + + patches = Jsonpatch.diff(original, updated, object_hash: fn %{id: id} -> id end) + assert {:ok, ^updated} = Jsonpatch.apply_patch(patches, original, keys: :atoms) + end + + test "Create diff with object_hash - multiple removals at different positions" do + original = [ + %{id: 1, name: "first"}, + %{id: 2, name: "second"}, + %{id: 3, name: "third"}, + %{id: 4, name: "fourth"}, + %{id: 5, name: "fifth"}, + %{id: 6, name: "sixth"} + ] + + updated = [ + # removed first + %{id: 2, name: "second"}, + # removed third and fifth + %{id: 4, name: "fourth"}, + # removed sixth would be here but it's kept + %{id: 6, name: "sixth"} + ] + + patches = Jsonpatch.diff(original, updated, object_hash: fn %{id: id} -> id end) + assert {:ok, updated} == Jsonpatch.apply_patch(patches, original, keys: :atoms) + end + + test "Create diff with object_hash - reverse order" do + original = [ + %{id: 1}, + %{id: 2}, + %{id: 3} + ] + + updated = [ + %{id: 3}, + %{id: 2}, + %{id: 1} + ] + + patches = Jsonpatch.diff(original, updated, object_hash: fn %{id: id} -> id end) + assert {:ok, ^updated} = Jsonpatch.apply_patch(patches, original, keys: :atoms) + end + + test "Create diff with object_hash - reverse order with modifications" do + original = [ + %{id: 1, name: "first", count: 1}, + %{id: 2, name: "second", count: 2}, + %{id: 3, name: "third", count: 3} + ] + + updated = [ + # reversed and modified + %{id: 3, name: "third-modified", count: 30}, + # reversed and count modified + %{id: 2, name: "second", count: 20}, + # reversed and name modified + %{id: 1, name: "first-new", count: 1} + ] + + patches = Jsonpatch.diff(original, updated, object_hash: fn %{id: id} -> id end) + assert {:ok, ^updated} = Jsonpatch.apply_patch(patches, original, keys: :atoms) + end + + test "Create diff with object_hash - empty to populated list" do + original = [] + + updated = [ + %{id: 1, name: "first"}, + %{id: 2, name: "second"}, + %{id: 3, name: "third"} + ] + + patches = Jsonpatch.diff(original, updated, object_hash: fn %{id: id} -> id end) + assert {:ok, ^updated} = Jsonpatch.apply_patch(patches, original, keys: :atoms) + end + + test "Create diff with object_hash - populated to empty list" do + original = [ + %{id: 1, name: "first"}, + %{id: 2, name: "second"}, + %{id: 3, name: "third"} + ] + + updated = [] + + patches = Jsonpatch.diff(original, updated, object_hash: fn %{id: id} -> id end) + assert {:ok, ^updated} = Jsonpatch.apply_patch(patches, original, keys: :atoms) + end + + test "Create diff with object_hash - duplicate hash values handled gracefully" do + original = [ + %{id: 1, name: "first", group: "A"}, + %{id: 2, name: "second", group: "A"}, + %{id: 3, name: "third", group: "B"} + ] + + updated = [ + %{id: 1, name: "first-modified", group: "A"}, + %{id: 3, name: "third", group: "B"}, + %{id: 2, name: "second", group: "A"} + ] + + # Using group as hash (which has duplicates) should still work + patches = Jsonpatch.diff(original, updated, object_hash: fn %{group: group} -> group end) + assert {:ok, ^updated} = Jsonpatch.apply_patch(patches, original, keys: :atoms) + end + + test "Create diff with object_hash - nested objects with modifications" do + original = [ + %{id: 1, user: %{name: "Alice", age: 30}, tags: ["admin"]}, + %{id: 2, user: %{name: "Bob", age: 25}, tags: ["user"]}, + %{id: 3, user: %{name: "Charlie", age: 35}, tags: ["user", "premium"]} + ] + + updated = [ + # moved, age and tags changed + %{id: 2, user: %{name: "Bob", age: 26}, tags: ["user", "active"]}, + # moved, tags changed + %{id: 1, user: %{name: "Alice", age: 30}, tags: ["admin", "senior"]}, + # new item + %{id: 4, user: %{name: "David", age: 28}, tags: ["user"]}, + # moved, tags changed + %{id: 3, user: %{name: "Charlie", age: 35}, tags: ["premium"]} + ] + + patches = Jsonpatch.diff(original, updated, object_hash: fn %{id: id} -> id end) + assert {:ok, updated} == Jsonpatch.apply_patch(patches, original, keys: :atoms) + end + + test "Create diff with object_hash - mixed data types in list" do + original = [ + %{id: 1, name: "object1"}, + %{id: 2, name: "object2"}, + %{id: 3, name: "object3"} + ] + + updated = [ + %{id: 2, name: "object2-modified"}, + %{id: 1, name: "object1"}, + # new item + %{id: 4, name: "object4"} + ] + + # id: 3 removed + + patches = Jsonpatch.diff(original, updated, object_hash: fn %{id: id} -> id end) + assert {:ok, updated} == Jsonpatch.apply_patch(patches, original, keys: :atoms) + end + + test "Create diff with object_hash - large list reordering" do + # Create a larger list to test performance characteristics + original = Enum.map(1..10, fn i -> %{id: i, name: "item#{i}", value: i * 10} end) + + # Reverse the order and modify every 5th item + updated = original |> tl |> Enum.reverse() + + patches = Jsonpatch.diff(original, updated, object_hash: fn %{id: id} -> id end) + assert {:ok, updated} == Jsonpatch.apply_patch(patches, original, keys: :atoms) + end + + test "Create diff with object_hash - interleaved additions and removals" do + original = [ + %{id: 1, name: "keep1"}, + %{id: 2, name: "remove1"}, + %{id: 3, name: "keep2"}, + %{id: 4, name: "remove2"}, + %{id: 5, name: "keep3"} + ] + + updated = [ + %{id: 1, name: "keep1"}, + # new + %{id: 10, name: "add1"}, + %{id: 3, name: "keep2"}, + # new + %{id: 11, name: "add2"}, + %{id: 5, name: "keep3"}, + # new + %{id: 12, name: "add3"} + ] + + # removed id: 2 and 4 + + patches = Jsonpatch.diff(original, updated, object_hash: fn %{id: id} -> id end) + assert {:ok, updated} == Jsonpatch.apply_patch(patches, original, keys: :atoms) + end end describe "Apply patch/es" do