From 81d74cd8caff78e003a53a57483d4cf29eb7b528 Mon Sep 17 00:00:00 2001 From: Zeeshan Lakhani Date: Fri, 9 Oct 2015 17:52:36 -0400 Subject: [PATCH 1/9] yokozuna_rt additions for types and better testing w/ allow-mult=true extractors test (cherry picked from commit 844ee9cdb20bd2f476d28fe7ef80967e23d09046) --- src/yokozuna_rt.erl | 26 ++++++++++++++++++++------ tests/yz_extractors.erl | 21 ++++++++++++++------- tests/yz_handoff.erl | 3 +-- 3 files changed, 35 insertions(+), 15 deletions(-) diff --git a/src/yokozuna_rt.erl b/src/yokozuna_rt.erl index 7723d3e5f..3ddaee5ab 100644 --- a/src/yokozuna_rt.erl +++ b/src/yokozuna_rt.erl @@ -23,6 +23,7 @@ -include("yokozuna_rt.hrl"). -export([check_exists/2, + clear_trees/1, commit/2, expire_trees/1, gen_keys/1, @@ -225,6 +226,15 @@ expire_trees(Cluster) -> timer:sleep(100), ok. +%% @doc Expire YZ trees +-spec clear_trees([node()]) -> ok. +clear_trees(Cluster) -> + lager:info("Expire all trees"), + _ = [ok = rpc:call(Node, yz_entropy_mgr, clear_trees, []) + || Node <- Cluster], + ok. + + %% @doc Remove index directories, removing the index. -spec remove_index_dirs([node()], index_name()) -> ok. remove_index_dirs(Nodes, IndexName) -> @@ -364,20 +374,24 @@ create_and_set_index(Cluster, Pid, Bucket, Index) -> ok = riakc_pb_socket:create_search_index(Pid, Index), %% For possible legacy upgrade reasons, wrap create index in a wait wait_for_index(Cluster, Index), - set_index(Pid, Bucket, Index). + set_index(Pid, hd(Cluster), Bucket, Index). -spec create_and_set_index([node()], pid(), bucket(), index_name(), schema_name()) -> ok. create_and_set_index(Cluster, Pid, Bucket, Index, Schema) -> %% Create a search index and associate with a bucket lager:info("Create a search index ~s with a custom schema named ~s and " ++ - "associate it with bucket ~s", [Index, Schema, Bucket]), + "associate it with bucket ~p", [Index, Schema, Bucket]), ok = riakc_pb_socket:create_search_index(Pid, Index, Schema, []), %% For possible legacy upgrade reasons, wrap create index in a wait wait_for_index(Cluster, Index), - set_index(Pid, Bucket, Index). - --spec set_index(pid(), bucket(), index_name()) -> ok. -set_index(Pid, Bucket, Index) -> + set_index(Pid, hd(Cluster), Bucket, Index). + +-spec set_index(pid(), node(), bucket(), index_name()) -> ok. +set_index(_Pid, Node, {BucketType, _Bucket}, Index) -> + lager:info("Create and activate map-based bucket type ~s and tie it to search_index ~s", + [BucketType, Index]), + rt:create_and_activate_bucket_type(Node, BucketType, [{search_index, Index}]); +set_index(Pid, _Node, Bucket, Index) -> ok = riakc_pb_socket:set_search_index(Pid, Bucket, Index). internal_solr_url(Host, Port, Index) -> diff --git a/tests/yz_extractors.erl b/tests/yz_extractors.erl index 84f7d8b5e..33d6ee8e9 100644 --- a/tests/yz_extractors.erl +++ b/tests/yz_extractors.erl @@ -28,10 +28,12 @@ -include_lib("riakc/include/riakc.hrl"). -define(FMT(S, Args), lists:flatten(io_lib:format(S, Args))). +-define(TYPE1, <<"extractors_in_paradise">>). +-define(TYPE2, <<"extractors_in_paradiso">>). -define(INDEX1, <<"test_idx1">>). --define(BUCKET1, <<"test_bkt1">>). +-define(BUCKET1, {?TYPE1, <<"test_bkt1">>}). -define(INDEX2, <<"test_idx2">>). --define(BUCKET2, <<"test_bkt2">>). +-define(BUCKET2, {?TYPE2, <<"test_bkt2">>}). -define(SCHEMANAME, <<"test">>). -define(TEST_SCHEMA, <<" @@ -278,9 +280,9 @@ get_map(Node) -> verify_extractor(Node, PacketData, Mod) -> rpc:call(Node, yz_extractor, run, [PacketData, Mod]). -bucket_url({Host,Port}, BName, Key) -> - ?FMT("http://~s:~B/buckets/~s/keys/~s", - [Host, Port, BName, Key]). +bucket_url({Host,Port}, {BType, BName}, Key) -> + ?FMT("http://~s:~B/types/~s/buckets/~s/keys/~s", + [Host, Port, BType, BName, Key]). test_extractor_works(Cluster, Packet) -> [rt_intercept:add(ANode, {yz_noop_extractor, @@ -304,7 +306,7 @@ test_extractor_with_aae_expire(Cluster, Index, Bucket, Packet) -> {Host, Port} = rt:select_random(yokozuna_rt:host_entries( rt:connection_info( Cluster))), - URL = bucket_url({Host, Port}, mochiweb_util:quote_plus(Bucket), + URL = bucket_url({Host, Port}, Bucket, mochiweb_util:quote_plus(Key)), CT = ?EXTRACTOR_CT, @@ -326,8 +328,13 @@ test_extractor_with_aae_expire(Cluster, Index, Bucket, Packet) -> yokozuna_rt:override_schema(APid, Cluster, Index, ?SCHEMANAME, ?TEST_SCHEMA_UPGRADE), + {ok, "200", RHeaders, _} = ibrowse:send_req(URL, [{"Content-Type", CT}], get, + [], []), + VC = proplists:get_value("X-Riak-Vclock", RHeaders), + {ok, "204", _, _} = ibrowse:send_req( - URL, [{"Content-Type", CT}], put, Packet), + URL, [{"Content-Type", CT}, {"X-Riak-Vclock", VC}], + put, Packet), yokozuna_rt:commit(Cluster, Index), yokozuna_rt:search_expect({Host, Port}, Index, <<"method">>, diff --git a/tests/yz_handoff.erl b/tests/yz_handoff.erl index ab91d3bdb..4c5b1af6d 100644 --- a/tests/yz_handoff.erl +++ b/tests/yz_handoff.erl @@ -99,8 +99,7 @@ confirm() -> join_node = Node1, admin_node = Node2}], - %% Run Shell Script to count/test # of replicas and leave/join - %% nodes from the cluster + %% Run set of leave/join trials and count/test #'s from the cluster [[begin check_data(Nodes, KeyCount, BucketURL, SearchURL, State), check_counts(Pid, KeyCount, BucketURL) From a255cc153905b6d6e71df9f892f707788afa0fcf Mon Sep 17 00:00:00 2001 From: Brett Hazen Date: Tue, 10 Nov 2015 12:11:27 -0800 Subject: [PATCH 2/9] Add debugging to rt:do_commit/1 to track nothing_planned --- src/rt.erl | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/rt.erl b/src/rt.erl index 96f31e2c1..829eefac5 100644 --- a/src/rt.erl +++ b/src/rt.erl @@ -449,7 +449,7 @@ staged_join(Node, PNode) -> plan_and_commit(Node) -> timer:sleep(500), - lager:info("planning and commiting cluster join"), + lager:info("planning cluster join"), case rpc:call(Node, riak_core_claimant, plan, []) of {error, ring_not_ready} -> lager:info("plan: ring not ready"), @@ -461,6 +461,7 @@ plan_and_commit(Node) -> end. do_commit(Node) -> + lager:info("planning cluster commit"), case rpc:call(Node, riak_core_claimant, commit, []) of {error, plan_changed} -> lager:info("commit: plan changed"), @@ -472,8 +473,9 @@ do_commit(Node) -> timer:sleep(100), maybe_wait_for_changes(Node), do_commit(Node); - {error,nothing_planned} -> + {error, nothing_planned} -> %% Assume plan actually committed somehow + lager:info("commit: nothing planned"), ok; ok -> ok From ce8ef794cb83bcff0030c330515696947774b187 Mon Sep 17 00:00:00 2001 From: Doug Rohrer Date: Mon, 16 Nov 2015 18:28:55 -0500 Subject: [PATCH 3/9] Updated to attempt to make sure we get the "right" vnode PID. --- tests/proxy_overload_recovery.erl | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tests/proxy_overload_recovery.erl b/tests/proxy_overload_recovery.erl index 111320422..883ca19bd 100644 --- a/tests/proxy_overload_recovery.erl +++ b/tests/proxy_overload_recovery.erl @@ -215,6 +215,10 @@ prepare(ThresholdSeed) -> ok = supervisor:terminate_child(riak_core_vnode_proxy_sup, {riak_kv_vnode, Id}), RegName = riak_core_vnode_proxy:reg_name(riak_kv_vnode, Index), undefined = whereis(RegName), + %% Fail if we get back the dead vnode + {ok, VPid1} = riak_core_vnode_manager:get_vnode_pid(Index, riak_kv_vnode), + ?assertNotEqual(VPid1, VPid0), + {ok, PPid} = supervisor:restart_child(riak_core_vnode_proxy_sup, {riak_kv_vnode, Id}), %% Find the proxy pid and check it's alive and matches the supervisor @@ -225,6 +229,7 @@ prepare(ThresholdSeed) -> %% and return the Pid so we know we have the same Pid. {ok, VPid} = riak_core_vnode_proxy:command_return_vnode( {riak_kv_vnode,Index,node()}, timeout), + ?assertEqual(VPid, VPid1), true = is_process_alive(PPid), true = is_process_alive(VPid), From 867dc1a0364e8a19e1a929dd9f3385f46703cfc4 Mon Sep 17 00:00:00 2001 From: Zeeshan Lakhani Date: Wed, 18 Nov 2015 12:50:14 -0500 Subject: [PATCH 4/9] sure up possible race between upgrade and first check --- tests/yz_extractors.erl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/yz_extractors.erl b/tests/yz_extractors.erl index 33d6ee8e9..cf596c43c 100644 --- a/tests/yz_extractors.erl +++ b/tests/yz_extractors.erl @@ -207,6 +207,8 @@ confirm() -> %% Upgrade yokozuna_rt:rolling_upgrade(Cluster, current), + [rt:wait_until_ready(ANode) || ANode <- Cluster], + [rt:assert_capability(ANode, ?YZ_CAP, true) || ANode <- Cluster], [rt:assert_supported(rt:capability(ANode, all), ?YZ_CAP, [true, false]) || ANode <- Cluster], From d855c1f6ba6ba314427867158d29d46cd5b55c81 Mon Sep 17 00:00:00 2001 From: Nick Marino Date: Tue, 24 Nov 2015 11:52:21 -0500 Subject: [PATCH 5/9] Log the start of each test in repl_aae_fullsync This makes it much easier to tell which code is being executed at a given point in the log, since many of the tests print identical log messages. --- tests/repl_aae_fullsync.erl | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/tests/repl_aae_fullsync.erl b/tests/repl_aae_fullsync.erl index a04e86012..5c4ebf15c 100644 --- a/tests/repl_aae_fullsync.erl +++ b/tests/repl_aae_fullsync.erl @@ -46,6 +46,8 @@ confirm() -> pass. simple_test() -> + lager:info("Starting simple_test"), + %% Deploy 6 nodes. Nodes = rt:deploy_nodes(6, ?CONF(5), [riak_kv, riak_repl]), @@ -118,6 +120,8 @@ simple_test() -> pass. dual_test() -> + lager:info("Starting dual_test"), + %% Deploy 6 nodes. Nodes = rt:deploy_nodes(6, ?CONF(infinity), [riak_kv, riak_repl]), @@ -218,6 +222,8 @@ dual_test() -> pass. bidirectional_test() -> + lager:info("Starting bidirectional_test"), + %% Deploy 6 nodes. Nodes = rt:deploy_nodes(6, ?CONF(5), [riak_kv, riak_repl]), @@ -301,6 +307,8 @@ bidirectional_test() -> pass. difference_test() -> + lager:info("Starting difference_test"), + %% Deploy 6 nodes. Nodes = rt:deploy_nodes(6, ?CONF(5), [riak_kv, riak_repl]), @@ -393,6 +401,8 @@ difference_test() -> pass. deadlock_test() -> + lager:info("Starting deadlock_test"), + %% Deploy 6 nodes. Nodes = rt:deploy_nodes(6, ?CONF(5), [riak_kv, riak_repl]), From 619b24e7d3777055ab0954b9c16408a5db0706c2 Mon Sep 17 00:00:00 2001 From: Doug Rohrer Date: Tue, 24 Nov 2015 13:38:25 -0500 Subject: [PATCH 6/9] Updated tests to explicitly set allow_mult and dvv_enabled, as overriding default_bucket_props in advanced_config without explicitly setting these returns different values with the fix for allow_mult turning to true with an app.config file present. --- src/rt.erl | 8 +++++++- tests/bucket_types.erl | 6 +++++- tests/ensemble_byzantine.erl | 13 +++++++++---- tests/ensemble_ring_changes.erl | 7 ++++++- tests/ensemble_util.erl | 13 +++++++++---- tests/http_bucket_types.erl | 6 +++++- tests/http_security.erl | 2 +- tests/overload.erl | 7 ++++++- tests/pb_security.erl | 2 +- tests/repl_aae_fullsync.erl | 7 ++++++- tests/repl_aae_fullsync_custom_n.erl | 7 ++++++- tests/repl_cancel_fullsync.erl | 7 ++++++- tests/repl_location_failures.erl | 7 ++++++- tests/replication_object_reformat.erl | 7 ++++++- tests/verify_counter_repl.erl | 2 +- tests/verify_handoff_write_once.erl | 7 ++++++- tests/verify_write_once.erl | 7 ++++++- tests/yz_default_bucket_type_upgrade.erl | 7 ++++++- 18 files changed, 98 insertions(+), 24 deletions(-) diff --git a/src/rt.erl b/src/rt.erl index 829eefac5..09bfa768d 100644 --- a/src/rt.erl +++ b/src/rt.erl @@ -721,7 +721,13 @@ wait_until_no_pending_changes(Nodes) -> rpc:multicall(Nodes, riak_core_vnode_manager, force_handoffs, []), {Rings, BadNodes} = rpc:multicall(Nodes, riak_core_ring_manager, get_raw_ring, []), Changes = [ riak_core_ring:pending_changes(Ring) =:= [] || {ok, Ring} <- Rings ], - BadNodes =:= [] andalso length(Changes) =:= length(Nodes) andalso lists:all(fun(T) -> T end, Changes) + case BadNodes =:= [] andalso length(Changes) =:= length(Nodes) andalso lists:all(fun(T) -> T end, Changes) of + true -> true; + false -> + NodesWithChanges = [Node || {Node, false} <- lists:zip(Nodes -- BadNodes, Changes)], + lager:info("Changes not yet complete, or bad nodes. BadNodes=~p, Nodes with Pending Changes=~p~n", [BadNodes, NodesWithChanges]), + false + end end, ?assertEqual(ok, wait_until(F)), ok. diff --git a/tests/bucket_types.erl b/tests/bucket_types.erl index 1715cee82..7e426be5a 100644 --- a/tests/bucket_types.erl +++ b/tests/bucket_types.erl @@ -10,7 +10,11 @@ confirm() -> lager:info("Deploy some nodes"), Nodes = rt:build_cluster(4, [], [ {riak_core, [{default_bucket_props, - [{n_val, 2}]}]}]), + [ + {n_val, 2}, + {allow_mult, true}, + {dvv_enabled, true} + ]}]}]), Node = hd(Nodes), RMD = riak_test_runner:metadata(), diff --git a/tests/ensemble_byzantine.erl b/tests/ensemble_byzantine.erl index 0449fc574..155f1d5b0 100644 --- a/tests/ensemble_byzantine.erl +++ b/tests/ensemble_byzantine.erl @@ -56,7 +56,7 @@ confirm() -> test_lose_minority_synctrees(PBC, Bucket, Key, Val, PL), test_lose_majority_synctrees(PBC, Bucket, Key, Val, PL), test_lose_minority_synctrees_one_node_partitioned(PBC, Bucket, Key, Val, - PL, Nodes), + PL, Nodes), test_lose_all_data_and_trees_except_one_node(PBC, Bucket, Key, Val, PL), {ok, _NewVal} = test_backup_restore_data_not_trees(Bucket, Key, Val, PL), test_lose_all_data(PBC, Bucket, Key, PL), @@ -64,7 +64,12 @@ confirm() -> pass. config() -> - [{riak_core, [{default_bucket_props, [{n_val, 5}]}, + [{riak_core, [{default_bucket_props, + [ + {n_val, 5}, + {allow_mult, true}, + {dvv_enabled, true} + ]}, {vnode_management_timer, 1000}, {ring_creation_size, 16}, {enable_consensus, true}, @@ -79,7 +84,7 @@ test_lose_majority_synctrees(PBC, Bucket, Key, Val, PL) -> assert_lose_synctrees_and_recover(PBC, Bucket, Key, Val, PL, Majority). test_lose_minority_synctrees_one_node_partitioned(PBC, Bucket, Key, Val, PL, - Nodes) -> + Nodes) -> Minority = minority_vnodes(PL), {{Idx0, Node0}, primary} = hd(PL), Ensemble = {kv, Idx0, 5}, @@ -251,7 +256,7 @@ kill_peers(Ensemble, Nodes) -> Peers = [P || P={_Id, N} <- View, lists:member(N, Nodes)], lager:info("Killing Peers: ~p", [Peers]), Pids = [rpc:call(Node, riak_ensemble_manager, get_peer_pid, - [Ensemble, Peer]) || Peer <- Peers], + [Ensemble, Peer]) || Peer <- Peers], [exit(Pid, kill) || Pid <- Pids, Pid =/= undefined]. wipe_partitions(PL) -> diff --git a/tests/ensemble_ring_changes.erl b/tests/ensemble_ring_changes.erl index 6823d76ee..59e9bc365 100644 --- a/tests/ensemble_ring_changes.erl +++ b/tests/ensemble_ring_changes.erl @@ -27,7 +27,12 @@ -define(RING_SIZE, 16). config() -> - [{riak_core, [{default_bucket_props, [{n_val, 5}]}, + [{riak_core, [{default_bucket_props, + [ + {n_val, 5}, + {allow_mult, true}, + {dvv_enabled, true} + ]}, {vnode_management_timer, 1000}, {ring_creation_size, ?RING_SIZE}, {enable_consensus, true}, diff --git a/tests/ensemble_util.erl b/tests/ensemble_util.erl index d6f79145b..e4cbb9db8 100644 --- a/tests/ensemble_util.erl +++ b/tests/ensemble_util.erl @@ -58,10 +58,15 @@ fast_config(Nval, EnableAAE) when is_boolean(EnableAAE) -> fast_config(NVal, RingSize, EnableAAE) -> [config_aae(EnableAAE), - {riak_core, [{default_bucket_props, [{n_val, NVal}]}, - {vnode_management_timer, 1000}, - {ring_creation_size, RingSize}, - {enable_consensus, true}]}]. + {riak_core, [{default_bucket_props, + [ + {n_val, NVal, + {allow_mult, true}, + {dvv_enabled, true}} + ]}, + {vnode_management_timer, 1000}, + {ring_creation_size, RingSize}, + {enable_consensus, true}]}]. config_aae(true) -> {riak_kv, [{anti_entropy_build_limit, {100, 1000}}, diff --git a/tests/http_bucket_types.erl b/tests/http_bucket_types.erl index d5994b159..2ffe118c0 100644 --- a/tests/http_bucket_types.erl +++ b/tests/http_bucket_types.erl @@ -13,7 +13,11 @@ confirm() -> lager:info("Deploy some nodes"), Nodes = rt:build_cluster(4, [], [ {riak_core, [{default_bucket_props, - [{n_val, 2}]}]}]), + [ + {n_val, 2}, + {allow_mult, true}, + {dvv_enabled, true} + ]}]}]), Node = hd(Nodes), RMD = riak_test_runner:metadata(), diff --git a/tests/http_security.erl b/tests/http_security.erl index 8bf09ddb6..fac70e329 100644 --- a/tests/http_security.erl +++ b/tests/http_security.erl @@ -30,7 +30,7 @@ confirm() -> PrivDir = rt:priv_dir(), Conf = [ {riak_core, [ - {default_bucket_props, [{allow_mult, true}]}, + {default_bucket_props, [{allow_mult, true}, {dvv_enabled, true}]}, {ssl, [ {certfile, filename:join([CertDir, "site3.basho.com/cert.pem"])}, diff --git a/tests/overload.erl b/tests/overload.erl index 54b17adcd..819f029a7 100644 --- a/tests/overload.erl +++ b/tests/overload.erl @@ -59,7 +59,12 @@ default_config(#config{ fsm_limit = FsmLimit }) -> [{riak_core, [{ring_creation_size, 8}, - {default_bucket_props, [{n_val, 5}]}, + {default_bucket_props, + [ + {n_val, 5}, + {allow_mult, true}, + {dvv_enabled, true} + ]}, {vnode_management_timer, 1000}, {enable_health_checks, false}, {enable_consensus, true}, diff --git a/tests/pb_security.erl b/tests/pb_security.erl index 3b06d1d7a..00d1bac96 100644 --- a/tests/pb_security.erl +++ b/tests/pb_security.erl @@ -53,7 +53,7 @@ confirm() -> PrivDir = rt:priv_dir(), Conf = [ {riak_core, [ - {default_bucket_props, [{allow_mult, true}]}, + {default_bucket_props, [{allow_mult, true}, {dvv_enabled, true}]}, {ssl, [ {certfile, filename:join([CertDir,"site3.basho.com/cert.pem"])}, {keyfile, filename:join([CertDir, "site3.basho.com/key.pem"])}, diff --git a/tests/repl_aae_fullsync.erl b/tests/repl_aae_fullsync.erl index 5c4ebf15c..41397831b 100644 --- a/tests/repl_aae_fullsync.erl +++ b/tests/repl_aae_fullsync.erl @@ -16,7 +16,12 @@ {riak_core, [ {ring_creation_size, 8}, - {default_bucket_props, [{n_val, 1}]} + {default_bucket_props, + [ + {n_val, 1}, + {allow_mult, true}, + {dvv_enabled, true} + ]} ] }, {riak_kv, diff --git a/tests/repl_aae_fullsync_custom_n.erl b/tests/repl_aae_fullsync_custom_n.erl index a8294bcd2..715510b03 100644 --- a/tests/repl_aae_fullsync_custom_n.erl +++ b/tests/repl_aae_fullsync_custom_n.erl @@ -22,7 +22,12 @@ confirm() -> {riak_core, [ {ring_creation_size, 8}, - {default_bucket_props, [{n_val, 1}]} + {default_bucket_props, + [ + {n_val, 1}, + {allow_mult, true}, + {dvv_enabled, true} + ]} ] }, {riak_kv, diff --git a/tests/repl_cancel_fullsync.erl b/tests/repl_cancel_fullsync.erl index 03c69b9ef..bcbea39ef 100644 --- a/tests/repl_cancel_fullsync.erl +++ b/tests/repl_cancel_fullsync.erl @@ -11,7 +11,12 @@ {riak_core, [ {ring_creation_size, 8}, - {default_bucket_props, [{n_val, 1}]} + {default_bucket_props, + [ + {n_val, 1}, + {allow_mult, true}, + {dvv_enabled, true} + ]} ] }, {riak_kv, diff --git a/tests/repl_location_failures.erl b/tests/repl_location_failures.erl index cb74985f5..6151a4546 100644 --- a/tests/repl_location_failures.erl +++ b/tests/repl_location_failures.erl @@ -13,7 +13,12 @@ {riak_core, [ {ring_creation_size, 8}, - {default_bucket_props, [{n_val, 1}]} + {default_bucket_props, + [ + {n_val, 1}, + {allow_mult, true}, + {dvv_enabled, true} + ]} ] }, {riak_kv, diff --git a/tests/replication_object_reformat.erl b/tests/replication_object_reformat.erl index cb1cf6851..4112d9a57 100644 --- a/tests/replication_object_reformat.erl +++ b/tests/replication_object_reformat.erl @@ -11,7 +11,12 @@ {riak_core, [ {ring_creation_size, 8}, - {default_bucket_props, [{n_val, ?N}]} + {default_bucket_props, + [ + {n_val, ?N}, + {allow_mult, true}, + {dvv_enabled, true} + ]} ] }, {riak_kv, diff --git a/tests/verify_counter_repl.erl b/tests/verify_counter_repl.erl index 79a107b37..7e1a282ca 100644 --- a/tests/verify_counter_repl.erl +++ b/tests/verify_counter_repl.erl @@ -62,7 +62,7 @@ confirm() -> make_clusters() -> Conf = [{riak_repl, [{fullsync_on_connect, false}, {fullsync_interval, disabled}]}, - {riak_core, [{default_bucket_props, [{allow_mult, true}]}]}], + {riak_core, [{default_bucket_props, [{allow_mult, true}, {dvv_enabled, true}]}]}], Nodes = rt:deploy_nodes(6, Conf, [riak_kv, riak_repl]), {ClusterA, ClusterB} = lists:split(3, Nodes), A = make_cluster(ClusterA, "A"), diff --git a/tests/verify_handoff_write_once.erl b/tests/verify_handoff_write_once.erl index 9753946ed..1e311577b 100644 --- a/tests/verify_handoff_write_once.erl +++ b/tests/verify_handoff_write_once.erl @@ -60,7 +60,12 @@ confirm() -> create_config(Backend) -> [{riak_core, [ - {default_bucket_props, [{n_val, 1}]}, + {default_bucket_props, + [ + {n_val, 1}, + {allow_mult, true}, + {dvv_enabled, true} + ]}, {ring_creation_size, 8}, {handoff_acksync_threshold, 20}, {handoff_concurrency, 4}, diff --git a/tests/verify_write_once.erl b/tests/verify_write_once.erl index 02fcf41d4..71b7bcccf 100644 --- a/tests/verify_write_once.erl +++ b/tests/verify_write_once.erl @@ -304,7 +304,12 @@ config(RingSize, NVal) -> config(RingSize, NVal, Backend) -> [ {riak_core, [ - {default_bucket_props, [{n_val, NVal}]}, + {default_bucket_props, + [ + {n_val, NVal}, + {allow_mult, true}, + {dvv_enabled, true} + ]}, {vnode_management_timer, 1000}, {ring_creation_size, RingSize}] }, diff --git a/tests/yz_default_bucket_type_upgrade.erl b/tests/yz_default_bucket_type_upgrade.erl index f3d94e849..b61b4353c 100644 --- a/tests/yz_default_bucket_type_upgrade.erl +++ b/tests/yz_default_bucket_type_upgrade.erl @@ -38,7 +38,12 @@ [{riak_core, [ {ring_creation_size, 16}, - {default_bucket_props, [{n_val, ?N}]}, + {default_bucket_props, + [ + {n_val, ?N}, + {allow_mult, true}, + {dvv_enabled, true} + ]}, {anti_entropy_build_limit, {100, 1000}}, {anti_entropy_concurrency, 8} ]}, From 8e5b9a892e45497450904112aa4bc4bcf757af5e Mon Sep 17 00:00:00 2001 From: Nick Marino Date: Tue, 24 Nov 2015 15:17:01 -0500 Subject: [PATCH 7/9] Fix misplaced character in ensemble_util config --- tests/ensemble_util.erl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/ensemble_util.erl b/tests/ensemble_util.erl index e4cbb9db8..b3bca16b3 100644 --- a/tests/ensemble_util.erl +++ b/tests/ensemble_util.erl @@ -60,9 +60,9 @@ fast_config(NVal, RingSize, EnableAAE) -> [config_aae(EnableAAE), {riak_core, [{default_bucket_props, [ - {n_val, NVal, + {n_val, NVal}, {allow_mult, true}, - {dvv_enabled, true}} + {dvv_enabled, true} ]}, {vnode_management_timer, 1000}, {ring_creation_size, RingSize}, From 57659d3c1dc30d5be8e79688022be763961a4241 Mon Sep 17 00:00:00 2001 From: Doug Rohrer Date: Tue, 1 Dec 2015 12:24:41 -0500 Subject: [PATCH 8/9] Forward port changes to proxy_overload_recovery to make it pass in the face of indeterminate behavior. --- tests/proxy_overload_recovery.erl | 75 ++++++++++++++++++++----------- 1 file changed, 50 insertions(+), 25 deletions(-) diff --git a/tests/proxy_overload_recovery.erl b/tests/proxy_overload_recovery.erl index 883ca19bd..03248d008 100644 --- a/tests/proxy_overload_recovery.erl +++ b/tests/proxy_overload_recovery.erl @@ -208,16 +208,13 @@ prepare(ThresholdSeed) -> {ok, VPid0} = riak_core_vnode_manager:get_vnode_pid(Id, riak_kv_vnode), sys:resume(VPid0), ok = supervisor:terminate_child(riak_core_vnode_sup, VPid0), - false = is_process_alive(VPid0), %% Reset the proxy pid to make sure it resets state and picks up the new %% environment variables ok = supervisor:terminate_child(riak_core_vnode_proxy_sup, {riak_kv_vnode, Id}), RegName = riak_core_vnode_proxy:reg_name(riak_kv_vnode, Index), undefined = whereis(RegName), - %% Fail if we get back the dead vnode - {ok, VPid1} = riak_core_vnode_manager:get_vnode_pid(Index, riak_kv_vnode), - ?assertNotEqual(VPid1, VPid0), + VPid1 = wait_for_vnode_change(VPid0, Index), {ok, PPid} = supervisor:restart_child(riak_core_vnode_proxy_sup, {riak_kv_vnode, Id}), @@ -269,14 +266,14 @@ resume_args(#tstate{rt = RT}) -> resume(#rt{ppid = PPid, vpid = VPid}) -> sys:resume(VPid), %% Use the sys:get_status call to force a synchronous call - %% against the vnode proxy to ensure all messages sent by + %% against the vnode & the proxy to ensure all messages sent by %% this process have been serviced and there are no pending %% 'ping's in the vnode before we continue. %% Then drain the vnode to make sure any pending pongs have - %% been sent. - ok = drain(VPid), + %% been sent, and ensure the proxy has + _ = sys:get_status(PPid), _ = sys:get_status(VPid), - _ = sys:get_status(PPid). + ok = drain([VPid, PPid]). resume_next(S, _V, _A) -> S#tstate{vnode_running = true, proxy_msgs = 0, direct_msgs = 0}. @@ -329,28 +326,28 @@ overloaded_args(#tstate{vnode_running = Running, rt = RT}) -> overloaded(Running, #rt{ppid = PPid, vpid = VPid}) -> case Running of true -> - ok = drain(PPid), % make sure all proxy msgs processed/dropped - ok = drain(VPid); % make sure any pending ping/pongs are processed + ok = drain([PPid, VPid]); _ -> ok end, - {riak_core_vnode_proxy:overloaded(PPid), - msgq_len(VPid), % just for debug so we can review in log output - sys:get_status(PPid)}. % ditto + {messages, PMsgs} = process_info(PPid, messages), + {messages, VMsgs} = process_info(VPid, messages), + Overloaded = riak_core_vnode_proxy:overloaded(PPid), + {Overloaded, {VMsgs, PMsgs}, sys:get_status(PPid)}. overloaded_post(#tstate{threshold = undefined}, _A, - {R, _VnodeQ, _ProxyStatus}) -> + {R, _Messages, _ProxyStatus}) -> %% If there are no thresholds there should never be an overload eq(R, false); overloaded_post(#tstate{vnode_running = true}, _A, - {R, _VnodeQ = 0, _ProxyStatus}) -> + {R, _Messages, _ProxyStatus}) -> %% If the vnode is running, we have cleared queues so %% should not be in overload. eq(R, false); overloaded_post(#tstate{vnode_running = false, proxy_msgs = ProxyMsgs, threshold = Threshold}, _A, - {ResultOverload, _VnodeQ, _ProxyStatus}) -> + {ResultOverload, _Messages, _ProxyStatus}) -> %% Either %% mailbox is completely an estimate based on proxy msgs %% or mailbox is a check + estimate since @@ -397,16 +394,33 @@ prep_env(Var, Val) -> %% Wait until all messages are drained by the Pid. No guarantees %% about future messages being sent, or that responses for the %% last message consumed have been transmitted. -%% -drain(Pid) -> - case erlang:process_info(Pid, message_queue_len) of - {message_queue_len, 0} -> +%% NOTE: The "drain 3 times in a row" was determined empirically, +%% and may not be sufficient (2 was not). Given time constraints, +%% living with it for now. If this fails, we should really add some +%% tracing code around the send of messages to Vnode and Proxy to +%% determine where extra messages are coming from rather than just +%% make this "try 4 times" +%% +drain(Pid) when is_pid(Pid) -> + drain([Pid], {-1, -1}); + +drain(Pids) when is_list(Pids) -> + drain(Pids, {-1, -1}). +drain(Pids, {PrevPrev, Prev}) -> + _ = [sys:suspend(Pid) || Pid <- Pids], + Len = lists:foldl(fun(Pid, Acc0) -> + {message_queue_len, Len} = erlang:process_info(Pid, message_queue_len), + Acc0 + Len + end, 0, Pids), + _ = [sys:resume(Pid) || Pid <- Pids], + case {PrevPrev, Prev, Len} of + {0, 0, 0} -> ok; - {message_queue_len, L} when L > 0 -> - timer:sleep(1), % give it a millisecond to drain - drain(Pid); - ER -> - ER + _ -> + %% Attempt to ensure something else is scheduled before we try to drain again + erlang:yield(), + timer:sleep(1), + drain(Pids, {Prev, Len}) end. %% Return the length of the message queue (or crash if proc dead) @@ -462,3 +476,14 @@ confirm() -> pass. -endif. + + +wait_for_vnode_change(VPid0, Index) -> + {ok, VPid1} = riak_core_vnode_manager:get_vnode_pid(Index, riak_kv_vnode), + case VPid1 of + VPid0 -> + timer:sleep(1), + wait_for_vnode_change(VPid0, Index); + _ -> + VPid1 + end. From 2c2e57d898cf66bd917e542e050854bba795d0fc Mon Sep 17 00:00:00 2001 From: Ted Burghart Date: Wed, 9 Dec 2015 14:45:41 -0500 Subject: [PATCH 9/9] Move wait_for_vnode_change inside the -ifdef(EQC) clause. --- tests/proxy_overload_recovery.erl | 34 +++++++++++++++---------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/tests/proxy_overload_recovery.erl b/tests/proxy_overload_recovery.erl index 03248d008..400d21ea0 100644 --- a/tests/proxy_overload_recovery.erl +++ b/tests/proxy_overload_recovery.erl @@ -17,14 +17,14 @@ %% cleared and the model is reset. The main goal is to advance the proxy %% into interesting new states. %% -%% This test can be run outside of riak_test while working on it. +%% This test can be run outside of riak_test while working on it. %% Symlink the source into a release build and run -%% c(proxy_overload_recovery). +%% c(proxy_overload_recovery). %% proxy_overload_recovery:run(300). % Run for 5 mins %% %% On failure you can re-run counter examples *and* print out the internal %% state with the run. -%% proxy_overload_recovery:check(). +%% proxy_overload_recovery:check(). %% %% TODO/Questions: %% 1) Is there a better way to do the initialization step? @@ -137,7 +137,7 @@ prop_proxy_recovery() -> [catch msgq_len(VPid)]) end end, - measure(duration, Msecs, + measure(duration, Msecs, aggregate(with_title("Commands"), command_names(Cmds), pretty_commands(?MODULE, Cmds, {H, S, Res}, Res == ok)))) @@ -173,13 +173,13 @@ precondition_common(#tstate{rt = undefined}, {call, _M, F, _A}) -> precondition_common(_, {call, _M, F, _A}) -> F /= prepare. -%% %% Make sure we're still running what we think we're running - uncomment +%% %% Make sure we're still running what we think we're running - uncomment %% %% if having process death issues %% invariant(#tstate{rt = undefined}) -> %% true; %% invariant(#tstate{rt = #rt{id = Index, ppid = PPid, vpid = VPid}}) -> %% RegName = riak_core_vnode_proxy:reg_name(riak_kv_vnode, Index), -%% PPid = whereis(RegName), % Check process we think it is. +%% PPid = whereis(RegName), % Check process we think it is. %% true = is_process_alive(PPid), %% true = is_process_alive(VPid), %% true. @@ -467,6 +467,17 @@ add_eqc_apps(Nodes) -> end || App <- Apps, Node <- Nodes], ok. + +wait_for_vnode_change(VPid0, Index) -> + {ok, VPid1} = riak_core_vnode_manager:get_vnode_pid(Index, riak_kv_vnode), + case VPid1 of + VPid0 -> + timer:sleep(1), + wait_for_vnode_change(VPid0, Index); + _ -> + VPid1 + end. + -else. %% no EQC -export([confirm/0]). @@ -476,14 +487,3 @@ confirm() -> pass. -endif. - - -wait_for_vnode_change(VPid0, Index) -> - {ok, VPid1} = riak_core_vnode_manager:get_vnode_pid(Index, riak_kv_vnode), - case VPid1 of - VPid0 -> - timer:sleep(1), - wait_for_vnode_change(VPid0, Index); - _ -> - VPid1 - end.