88
99package org .elasticsearch .upgrades ;
1010
11+ import org .apache .http .client .methods .HttpGet ;
1112import org .apache .http .client .methods .HttpPost ;
13+ import org .elasticsearch .Version ;
1214import org .elasticsearch .client .Request ;
1315import org .elasticsearch .client .Response ;
1416import org .elasticsearch .cluster .metadata .IndexMetadata ;
17+ import org .elasticsearch .common .Strings ;
1518import org .elasticsearch .common .settings .Settings ;
19+ import org .elasticsearch .common .xcontent .XContentBuilder ;
1620import org .elasticsearch .common .xcontent .support .XContentMapValues ;
21+ import org .elasticsearch .core .Nullable ;
1722import org .elasticsearch .index .query .QueryBuilder ;
1823import org .elasticsearch .index .query .QueryBuilders ;
1924import org .elasticsearch .repositories .blobstore .BlobStoreRepository ;
2631
2732import static org .elasticsearch .cluster .routing .UnassignedInfo .INDEX_DELAYED_NODE_LEFT_TIMEOUT_SETTING ;
2833import static org .elasticsearch .cluster .routing .allocation .decider .MaxRetryAllocationDecider .SETTING_ALLOCATION_MAX_RETRY ;
29- import static org .elasticsearch .upgrades . AbstractRollingTestCase . ClusterType . MIXED ;
34+ import static org .elasticsearch .common . xcontent . XContentFactory . jsonBuilder ;
3035import static org .hamcrest .Matchers .allOf ;
3136import static org .hamcrest .Matchers .equalTo ;
3237import static org .hamcrest .Matchers .greaterThanOrEqualTo ;
38+ import static org .hamcrest .Matchers .is ;
3339import static org .hamcrest .Matchers .lessThan ;
40+ import static org .hamcrest .Matchers .notNullValue ;
3441
3542public class SnapshotBasedRecoveryIT extends AbstractRollingTestCase {
3643 public void testSnapshotBasedRecovery () throws Exception {
@@ -66,17 +73,41 @@ public void testSnapshotBasedRecovery() throws Exception {
6673 break ;
6774 case MIXED :
6875 case UPGRADED :
69- // the following `if` for first round mixed was added as a selective test mute. Sometimes the primary shard ends
70- // on the upgraded node. This causes issues when removing and adding replicas, since then we cannot allocate to
71- // any of the old nodes. That is an issue only for the first mixed round, hence this check.
72- // Ideally we would find the reason the primary ends on the upgraded node and fix that (or figure out that it
73- // is all good).
74- // @AwaitsFix(bugUrl = https://github.com/elastic/elasticsearch/issues/76595)
75- if (CLUSTER_TYPE != MIXED || FIRST_MIXED_ROUND == false ) {
76- // Drop replicas
77- updateIndexSettings (indexName , Settings .builder ().put (IndexMetadata .INDEX_NUMBER_OF_REPLICAS_SETTING .getKey (), 0 ));
76+ if (FIRST_MIXED_ROUND ) {
77+ String upgradedNodeId = getUpgradedNodeId ();
78+
79+ if (upgradedNodeId != null ) {
80+ updateIndexSettings (
81+ indexName ,
82+ Settings .builder ()
83+ .put ("index.routing.allocation.exclude._id" , upgradedNodeId )
84+ );
85+ }
86+
87+ String primaryNodeId = getPrimaryNodeIdOfShard (indexName , 0 );
88+ Version primaryNodeVersion = getNodeVersion (primaryNodeId );
89+
90+ // Sometimes the primary shard ends on the upgraded node (i.e. after a rebalance)
91+ // This causes issues when removing and adding replicas, since then we cannot allocate to any of the old nodes.
92+ // That is an issue only for the first mixed round.
93+ // In that case we exclude the upgraded node from the shard allocation and cancel the shard to force moving
94+ // the primary to a node in the old version, this allows adding replicas in the first mixed round.
95+ if (primaryNodeVersion .after (UPGRADE_FROM_VERSION )) {
96+ cancelShard (indexName , 0 , primaryNodeId );
97+
98+ String currentPrimaryNodeId = getPrimaryNodeIdOfShard (indexName , 0 );
99+ assertThat (getNodeVersion (currentPrimaryNodeId ), is (equalTo (UPGRADE_FROM_VERSION )));
100+ }
101+ } else {
102+ updateIndexSettings (
103+ indexName ,
104+ Settings .builder ()
105+ .putNull ("index.routing.allocation.exclude._id" )
106+ );
78107 }
79- ensureGreen (indexName );
108+
109+ // Drop replicas
110+ updateIndexSettings (indexName , Settings .builder ().put (IndexMetadata .INDEX_NUMBER_OF_REPLICAS_SETTING .getKey (), 0 ));
80111
81112 updateIndexSettings (indexName , Settings .builder ().put (IndexMetadata .INDEX_NUMBER_OF_REPLICAS_SETTING .getKey (), 1 ));
82113 ensureGreen (indexName );
@@ -88,6 +119,81 @@ public void testSnapshotBasedRecovery() throws Exception {
88119 }
89120 }
90121
122+ @ Nullable
123+ private String getUpgradedNodeId () throws IOException {
124+ Request request = new Request (HttpGet .METHOD_NAME , "_nodes/_all" );
125+ Response response = client ().performRequest (request );
126+ Map <String , Object > responseMap = responseAsMap (response );
127+ Map <String , Map <String , Object >> nodes = extractValue (responseMap , "nodes" );
128+ for (Map .Entry <String , Map <String , Object >> nodeInfoEntry : nodes .entrySet ()) {
129+ Version nodeVersion = Version .fromString (extractValue (nodeInfoEntry .getValue (), "version" ));
130+ if (nodeVersion .after (UPGRADE_FROM_VERSION )) {
131+ return nodeInfoEntry .getKey ();
132+ }
133+ }
134+ return null ;
135+ }
136+
137+ private Version getNodeVersion (String primaryNodeId ) throws IOException {
138+ Request request = new Request (HttpGet .METHOD_NAME , "_nodes/" + primaryNodeId );
139+ Response response = client ().performRequest (request );
140+ String nodeVersion = extractValue (responseAsMap (response ), "nodes." + primaryNodeId + ".version" );
141+ return Version .fromString (nodeVersion );
142+ }
143+
144+ private String getPrimaryNodeIdOfShard (String indexName , int shard ) throws Exception {
145+ String primaryNodeId ;
146+ try (XContentBuilder builder = jsonBuilder ()) {
147+ builder .startObject ();
148+ {
149+ builder .field ("index" , indexName );
150+ builder .field ("shard" , shard );
151+ builder .field ("primary" , true );
152+ }
153+ builder .endObject ();
154+
155+ Request request = new Request (HttpGet .METHOD_NAME , "_cluster/allocation/explain" );
156+ request .setJsonEntity (Strings .toString (builder ));
157+
158+ Response response = client ().performRequest (request );
159+ Map <String , Object > responseMap = responseAsMap (response );
160+ primaryNodeId = extractValue (responseMap , "current_node.id" );
161+ }
162+ assertThat (primaryNodeId , is (notNullValue ()));
163+
164+ return primaryNodeId ;
165+ }
166+
167+ private void cancelShard (String indexName , int shard , String nodeName ) throws IOException {
168+ try (XContentBuilder builder = jsonBuilder ()) {
169+ builder .startObject ();
170+ {
171+ builder .startArray ("commands" );
172+ {
173+ builder .startObject ();
174+ {
175+ builder .startObject ("cancel" );
176+ {
177+ builder .field ("index" , indexName );
178+ builder .field ("shard" , shard );
179+ builder .field ("node" , nodeName );
180+ builder .field ("allow_primary" , true );
181+ }
182+ builder .endObject ();
183+ }
184+ builder .endObject ();
185+ }
186+ builder .endArray ();
187+ }
188+ builder .endObject ();
189+
190+ Request request = new Request (HttpPost .METHOD_NAME , "/_cluster/reroute" );
191+ request .setJsonEntity (Strings .toString (builder ));
192+ Response response = client ().performRequest (request );
193+ assertOK (response );
194+ }
195+ }
196+
91197 private void assertMatchAllReturnsAllDocuments (String indexName , int numDocs ) throws IOException {
92198 Map <String , Object > searchResults = search (indexName , QueryBuilders .matchAllQuery ());
93199 List <Map <String , Object >> hits = extractValue (searchResults , "hits.hits" );
0 commit comments