Skip to content

Commit 31e7ec9

Browse files
authored
OWLS-72816 Generate event and status when operator can't scale cluster past maximum (#2097)
* create event and update domain status condition when replicas value is not valid for the weblogic cluster * update domain events doc with InvalidReplicasValue event * containsServer in WlsDomainConfig should return true for clustered servers * misc code/test cleanup * fix failing test after merge from develop * rename InvalidReplicasValue event to DomainValidationError event * create event for validation warnings in ValidateDomainTopologyStep. * update with example of new DomainValidationError event
1 parent 00d80f0 commit 31e7ec9

File tree

16 files changed

+564
-93
lines changed

16 files changed

+564
-93
lines changed

docs-source/content/userguide/managing-domains/domain-events.md

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ The operator generates these event types, which indicate the following:
2929
* `DomainProcessingRetrying`: The operator is going to retry the processing of a domain after it encountered an failure.
3030
* `DomainProcessingCompleted`: The operator successfully completed the processing of a domain resource.
3131
* `DomainProcessingAborted`: The operator stopped processing a domain when the operator encountered a fatal error or a failure that persisted after the specified maximum number of retries.
32+
* `DomainValidationError`: A validation error or warning is found in a domain resource. Please refer to the event message for details.
3233

3334
#### Operator-generated event details
3435

@@ -242,6 +243,39 @@ Source:
242243
Type: Warning
243244
Events: <none>
244245
246+
```
247+
Example of a `DomainValidationError` event:
248+
249+
```none
250+
251+
Name: sample-domain1.DomainValidationError.1608160013145
252+
Namespace: sample-domain1-ns
253+
Labels: weblogic.createdByOperator=true
254+
weblogic.domainUID=sample-domain1
255+
Annotations: <none>
256+
API Version: v1
257+
Event Time: <nil>
258+
First Timestamp: <nil>
259+
Involved Object:
260+
API Version: weblogic.oracle/v8
261+
Kind: Domain
262+
Name: sample-domain1
263+
Namespace: sample-domain1-ns
264+
Kind: Event
265+
Last Timestamp: 2020-12-16T23:06:53Z
266+
Message: Validation error in domain resource domain1: Replica request of 5 exceeds the maximum dynamic server count of 2 configured for cluster cluster-1
267+
Metadata:
268+
Creation Timestamp: 2020-12-16T23:06:53Z
269+
Resource Version: 11222690
270+
Self Link: /api/v1/namespaces/sample-domain1-ns/events/sample-domain1.DomainValidationError.1608160013145
271+
UID: cd4e6a2f-8ddb-4902-90a7-d993bceb567d
272+
Reason: DomainValidationError
273+
Reporting Component: weblogic.operator
274+
Reporting Instance: weblogic-operator-67c75bc4bf-d4flw
275+
Source:
276+
Type: Warning
277+
Events: <none>
278+
245279
```
246280

247281
Example of domain processing completed after failure and retries:

operator/src/main/java/oracle/kubernetes/operator/DomainProcessorImpl.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -655,6 +655,11 @@ public boolean wasInspectionRun() {
655655
return inspectionRun;
656656
}
657657

658+
@Override
659+
public boolean isExplicitRecheck() {
660+
return explicitRecheck;
661+
}
662+
658663
private boolean shouldContinue() {
659664
DomainPresenceInfo cachedInfo = getExistingDomainPresenceInfo(getNamespace(), getDomainUid());
660665

operator/src/main/java/oracle/kubernetes/operator/EventConstants.java

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ public interface EventConstants {
1414
String DOMAIN_PROCESSING_FAILED_EVENT = "DomainProcessingFailed";
1515
String DOMAIN_PROCESSING_RETRYING_EVENT = "DomainProcessingRetrying";
1616
String DOMAIN_PROCESSING_ABORTED_EVENT = "DomainProcessingAborted";
17+
String DOMAIN_VALIDATION_ERROR_EVENT = "DomainValidationError";
1718
String EVENT_NORMAL = "Normal";
1819
String EVENT_WARNING = "Warning";
1920
String WEBLOGIC_OPERATOR_COMPONENT = "weblogic.operator";
@@ -31,4 +32,6 @@ public interface EventConstants {
3132
= "Retrying the processing of domain resource %s after one or more failed attempts";
3233
String DOMAIN_PROCESSING_ABORTED_PATTERN
3334
= "Aborting the processing of domain resource %s permanently due to: %s";
35+
String DOMAIN_VALIDATION_ERROR_PATTERN
36+
= "Validation error in domain resource %s: %s";
3437
}

operator/src/main/java/oracle/kubernetes/operator/MakeRightDomainOperation.java

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,12 @@ static boolean isInspectionRequired(Packet packet) {
5252
return domainRequiresIntrospectionInCurrentMakeRight(packet) && !wasInspectionRun(packet);
5353
}
5454

55+
boolean isExplicitRecheck();
56+
57+
static boolean isExplicitRecheck(Packet packet) {
58+
return fromPacket(packet).map(MakeRightDomainOperation::isExplicitRecheck).orElse(false);
59+
}
60+
5561
/**
5662
* Returns true if the packet contains info about a domain that requires introspection in a sequences of steps
5763
* before server pods are created or modified.

operator/src/main/java/oracle/kubernetes/operator/helpers/DomainValidationSteps.java

Lines changed: 40 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,9 @@
33

44
package oracle.kubernetes.operator.helpers;
55

6-
import java.util.ArrayList;
76
import java.util.List;
87
import java.util.Objects;
8+
import java.util.Optional;
99

1010
import io.kubernetes.client.openapi.models.V1ConfigMap;
1111
import io.kubernetes.client.openapi.models.V1ConfigMapList;
@@ -14,8 +14,11 @@
1414
import io.kubernetes.client.openapi.models.V1Secret;
1515
import io.kubernetes.client.openapi.models.V1SecretList;
1616
import oracle.kubernetes.operator.DomainStatusUpdater;
17+
import oracle.kubernetes.operator.MakeRightDomainOperation;
1718
import oracle.kubernetes.operator.ProcessingConstants;
1819
import oracle.kubernetes.operator.calls.CallResponse;
20+
import oracle.kubernetes.operator.helpers.EventHelper.EventData;
21+
import oracle.kubernetes.operator.helpers.EventHelper.EventItem;
1922
import oracle.kubernetes.operator.logging.LoggingFacade;
2023
import oracle.kubernetes.operator.logging.LoggingFactory;
2124
import oracle.kubernetes.operator.logging.MessageKeys;
@@ -24,13 +27,13 @@
2427
import oracle.kubernetes.operator.work.NextAction;
2528
import oracle.kubernetes.operator.work.Packet;
2629
import oracle.kubernetes.operator.work.Step;
27-
import oracle.kubernetes.weblogic.domain.model.Cluster;
2830
import oracle.kubernetes.weblogic.domain.model.Domain;
31+
import oracle.kubernetes.weblogic.domain.model.DomainSpec;
2932
import oracle.kubernetes.weblogic.domain.model.KubernetesResourceLookup;
30-
import oracle.kubernetes.weblogic.domain.model.ManagedServer;
3133

3234
import static java.lang.System.lineSeparator;
3335
import static oracle.kubernetes.operator.DomainStatusUpdater.BAD_DOMAIN;
36+
import static oracle.kubernetes.operator.helpers.EventHelper.createEventStep;
3437
import static oracle.kubernetes.operator.logging.MessageKeys.DOMAIN_VALIDATION_FAILED;
3538

3639
public class DomainValidationSteps {
@@ -146,43 +149,57 @@ static class ValidateDomainTopologyStep extends Step {
146149
}
147150

148151

149-
private void logAndAddWarning(List<String> validationWarnings, String messageKey, Object... params) {
150-
LOGGER.warning(messageKey, params);
151-
validationWarnings.add(LOGGER.formatMessage(messageKey, params));
152+
private void logAndAddValidationWarning(DomainPresenceInfo info, String msgId, Object... messageParams) {
153+
LOGGER.warning(msgId, messageParams);
154+
info.addValidationWarning(LOGGER.formatMessage(msgId, messageParams));
152155
}
153156

154157
private void validate(DomainPresenceInfo info, WlsDomainConfig wlsDomainConfig) {
155-
List<String> validationWarnings = new ArrayList<>();
158+
DomainSpec domainSpec = info.getDomain().getSpec();
156159

157-
Domain domain = info.getDomain();
160+
info.clearValidationWarnings();
158161

159-
// log warnings for clusters that are specified in domain resource but not configured
162+
// log warnings for each cluster that is specified in domain resource but not configured
160163
// in the WebLogic domain
161-
for (Cluster cluster : domain.getSpec().getClusters()) {
162-
if (!wlsDomainConfig.containsCluster(cluster.getClusterName())) {
163-
logAndAddWarning(validationWarnings, MessageKeys.NO_CLUSTER_IN_DOMAIN, cluster.getClusterName());
164-
}
165-
}
166-
// log warnings for managed servers that are specified in domain resource but not configured
164+
domainSpec.getClusters().forEach(
165+
c -> warnIfClusterDoesNotExist(wlsDomainConfig, c.getClusterName(), info));
166+
167+
// log warnings for each managed server that is specified in domain resource but not configured
167168
// in the WebLogic domain
168-
for (ManagedServer server : domain.getSpec().getManagedServers()) {
169-
if (!wlsDomainConfig.containsServer(server.getServerName())) {
170-
logAndAddWarning(validationWarnings, MessageKeys.NO_MANAGED_SERVER_IN_DOMAIN, server.getServerName());
171-
}
169+
domainSpec.getManagedServers().forEach(
170+
s -> warnIfServerDoesNotExist(wlsDomainConfig, s.getServerName(), info));
171+
}
172+
173+
private void warnIfClusterDoesNotExist(WlsDomainConfig domainConfig,
174+
String clusterName, DomainPresenceInfo info) {
175+
if (!domainConfig.containsCluster(clusterName)) {
176+
logAndAddValidationWarning(info, MessageKeys.NO_CLUSTER_IN_DOMAIN, clusterName);
172177
}
173-
info.clearValidationWarnings();
174-
for (String warning: validationWarnings) {
175-
info.addValidationWarning(warning);
178+
}
179+
180+
private void warnIfServerDoesNotExist(WlsDomainConfig domainConfig,
181+
String serverName, DomainPresenceInfo info) {
182+
if (!domainConfig.containsServer(serverName)) {
183+
logAndAddValidationWarning(info, MessageKeys.NO_MANAGED_SERVER_IN_DOMAIN, serverName);
176184
}
177185
}
178186

179187
@Override
180188
public NextAction apply(Packet packet) {
181189
DomainPresenceInfo info = packet.getSpi(DomainPresenceInfo.class);
190+
boolean isExplicitRecheck = MakeRightDomainOperation.isExplicitRecheck(packet);
182191
WlsDomainConfig wlsDomainConfig = (WlsDomainConfig) packet.get(ProcessingConstants.DOMAIN_TOPOLOGY);
183192
validate(info, wlsDomainConfig);
184193

185-
return doNext(packet);
194+
return doNext(getNextStep(info.getValidationWarningsAsString(), isExplicitRecheck, getNext()), packet);
195+
}
196+
197+
private Step getNextStep(String message, boolean skipCreateEvent, Step next) {
198+
return skipCreateEvent
199+
? next
200+
: Optional.ofNullable((message))
201+
.map(m -> createEventStep(new EventData(EventItem.DOMAIN_VALIDATION_ERROR, m), next))
202+
.orElse(next);
186203
}
187204
}
188205

operator/src/main/java/oracle/kubernetes/operator/helpers/EventHelper.java

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,8 @@
3737
import static oracle.kubernetes.operator.EventConstants.DOMAIN_PROCESSING_RETRYING_PATTERN;
3838
import static oracle.kubernetes.operator.EventConstants.DOMAIN_PROCESSING_STARTING_EVENT;
3939
import static oracle.kubernetes.operator.EventConstants.DOMAIN_PROCESSING_STARTING_PATTERN;
40+
import static oracle.kubernetes.operator.EventConstants.DOMAIN_VALIDATION_ERROR_EVENT;
41+
import static oracle.kubernetes.operator.EventConstants.DOMAIN_VALIDATION_ERROR_PATTERN;
4042
import static oracle.kubernetes.operator.EventConstants.EVENT_NORMAL;
4143
import static oracle.kubernetes.operator.EventConstants.EVENT_WARNING;
4244
import static oracle.kubernetes.operator.EventConstants.WEBLOGIC_OPERATOR_COMPONENT;
@@ -60,10 +62,27 @@ public static Step createEventStep(
6062
return new CreateEventStep(eventData);
6163
}
6264

65+
/**
66+
* Factory for {@link Step} that asynchronously create an event.
67+
*
68+
* @param eventData event item
69+
* @param next next step
70+
* @return Step for creating an event
71+
*/
72+
public static Step createEventStep(
73+
EventData eventData, Step next) {
74+
return new CreateEventStep(eventData, next);
75+
}
76+
6377
public static class CreateEventStep extends Step {
6478
private final EventData eventData;
6579

6680
CreateEventStep(EventData eventData) {
81+
this(eventData, null);
82+
}
83+
84+
CreateEventStep(EventData eventData, Step next) {
85+
super(next);
6786
this.eventData = eventData;
6887
}
6988

@@ -254,6 +273,28 @@ public String getMessage(DomainPresenceInfo info, EventData eventData) {
254273
}
255274

256275
},
276+
DOMAIN_VALIDATION_ERROR {
277+
@Override
278+
public String getType() {
279+
return EVENT_WARNING;
280+
}
281+
282+
@Override
283+
public String getReason() {
284+
return DOMAIN_VALIDATION_ERROR_EVENT;
285+
}
286+
287+
@Override
288+
public String getPattern() {
289+
return DOMAIN_VALIDATION_ERROR_PATTERN;
290+
}
291+
292+
@Override
293+
public String getMessage(DomainPresenceInfo info, EventData eventData) {
294+
return String.format(DOMAIN_VALIDATION_ERROR_PATTERN,
295+
info.getDomainUid(), Optional.ofNullable(eventData.message).orElse(""));
296+
}
297+
},
257298
EMPTY {
258299
@Override
259300
protected String getPattern() {

operator/src/main/java/oracle/kubernetes/operator/helpers/JobHelper.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -356,7 +356,7 @@ public NextAction apply(Packet packet) {
356356
packet);
357357
}
358358

359-
return doNext(DomainValidationSteps.createValidateDomainTopologyStep(getNext()), packet);
359+
return doNext(packet);
360360
}
361361
}
362362

operator/src/main/java/oracle/kubernetes/operator/steps/ManagedServersUpStep.java

Lines changed: 30 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -16,10 +16,13 @@
1616
import javax.annotation.Nonnull;
1717

1818
import oracle.kubernetes.operator.DomainStatusUpdater;
19+
import oracle.kubernetes.operator.MakeRightDomainOperation;
1920
import oracle.kubernetes.operator.ProcessingConstants;
2021
import oracle.kubernetes.operator.helpers.DomainPresenceInfo;
2122
import oracle.kubernetes.operator.helpers.DomainPresenceInfo.ServerShutdownInfo;
2223
import oracle.kubernetes.operator.helpers.DomainPresenceInfo.ServerStartupInfo;
24+
import oracle.kubernetes.operator.helpers.EventHelper.EventData;
25+
import oracle.kubernetes.operator.helpers.EventHelper.EventItem;
2326
import oracle.kubernetes.operator.helpers.PodHelper;
2427
import oracle.kubernetes.operator.logging.LoggingFacade;
2528
import oracle.kubernetes.operator.logging.LoggingFactory;
@@ -37,6 +40,7 @@
3740
import static java.util.Comparator.comparing;
3841
import static oracle.kubernetes.operator.DomainStatusUpdater.MANAGED_SERVERS_STARTING_PROGRESS_REASON;
3942
import static oracle.kubernetes.operator.DomainStatusUpdater.createProgressingStartedEventStep;
43+
import static oracle.kubernetes.operator.helpers.EventHelper.createEventStep;
4044

4145
public class ManagedServersUpStep extends Step {
4246
static final String SERVERS_UP_MSG =
@@ -97,9 +101,11 @@ private static void insert(List<Step> steps, Step step) {
97101
public NextAction apply(Packet packet) {
98102
LOGGER.entering();
99103
DomainPresenceInfo info = packet.getSpi(DomainPresenceInfo.class);
104+
boolean isExplicitRecheck = MakeRightDomainOperation.isExplicitRecheck(packet);
100105
WlsDomainConfig config = (WlsDomainConfig) packet.get(ProcessingConstants.DOMAIN_TOPOLOGY);
101106

102-
ServersUpStepFactory factory = new ServersUpStepFactory(config, info.getDomain());
107+
ServersUpStepFactory factory = new ServersUpStepFactory(config,
108+
info.getDomain(), info, isExplicitRecheck);
103109

104110
if (LOGGER.isFineEnabled()) {
105111
LOGGER.fine(SERVERS_UP_MSG, factory.domain.getDomainUid(), getRunningServers(info));
@@ -157,15 +163,21 @@ Step createServerStep(
157163
static class ServersUpStepFactory {
158164
final WlsDomainConfig domainTopology;
159165
final Domain domain;
166+
final DomainPresenceInfo info;
167+
final boolean skipEventCreation;
160168
List<ServerStartupInfo> startupInfos;
161169
List<ServerShutdownInfo> shutdownInfos = new ArrayList<>();
162170
final Collection<String> servers = new ArrayList<>();
163171
final Collection<String> preCreateServers = new ArrayList<>();
164172
final Map<String, Integer> replicas = new HashMap<>();
173+
private Step eventStep;
165174

166-
ServersUpStepFactory(WlsDomainConfig domainTopology, Domain domain) {
175+
ServersUpStepFactory(WlsDomainConfig domainTopology, Domain domain,
176+
DomainPresenceInfo info, boolean skipEventCreation) {
167177
this.domainTopology = domainTopology;
168178
this.domain = domain;
179+
this.info = info;
180+
this.skipEventCreation = skipEventCreation;
169181
}
170182

171183
/**
@@ -223,11 +235,8 @@ boolean exceedsMaxConfiguredClusterSize(WlsClusterConfig clusterConfig) {
223235
}
224236

225237
private Step createNextStep(Step next) {
226-
if (servers.isEmpty()) {
227-
return next;
228-
} else {
229-
return new ManagedServerUpIteratorStep(getStartupInfos(), next);
230-
}
238+
Step nextStep = (servers.isEmpty()) ? next : new ManagedServerUpIteratorStep(getStartupInfos(), next);
239+
return Optional.ofNullable(eventStep).map(s -> Step.chain(s, nextStep)).orElse(nextStep);
231240
}
232241

233242
Collection<ServerStartupInfo> getStartupInfos() {
@@ -270,8 +279,7 @@ private Integer getReplicaCount(String clusterName) {
270279
private void logIfReplicasExceedsClusterServersMax(WlsClusterConfig clusterConfig) {
271280
if (exceedsMaxConfiguredClusterSize(clusterConfig)) {
272281
String clusterName = clusterConfig.getClusterName();
273-
LOGGER.warning(
274-
MessageKeys.REPLICAS_EXCEEDS_TOTAL_CLUSTER_SERVER_COUNT,
282+
addValidationErrorEventAndWarning(MessageKeys.REPLICAS_EXCEEDS_TOTAL_CLUSTER_SERVER_COUNT,
275283
domain.getReplicaCount(clusterName),
276284
clusterConfig.getMaxDynamicClusterSize(),
277285
clusterName);
@@ -281,18 +289,26 @@ private void logIfReplicasExceedsClusterServersMax(WlsClusterConfig clusterConfi
281289
private void logIfReplicasLessThanClusterServersMin(WlsClusterConfig clusterConfig) {
282290
if (lessThanMinConfiguredClusterSize(clusterConfig)) {
283291
String clusterName = clusterConfig.getClusterName();
284-
LOGGER.warning(
285-
MessageKeys.REPLICAS_LESS_THAN_TOTAL_CLUSTER_SERVER_COUNT,
286-
domain.getReplicaCount(clusterName),
287-
clusterConfig.getMinDynamicClusterSize(),
288-
clusterName);
292+
addValidationErrorEventAndWarning(MessageKeys.REPLICAS_LESS_THAN_TOTAL_CLUSTER_SERVER_COUNT,
293+
domain.getReplicaCount(clusterName),
294+
clusterConfig.getMinDynamicClusterSize(),
295+
clusterName);
289296

290297
// Reset current replica count so we don't scale down less than minimum
291298
// dynamic cluster size
292299
domain.setReplicaCount(clusterName, clusterConfig.getMinDynamicClusterSize());
293300
}
294301
}
295302

303+
private void addValidationErrorEventAndWarning(String msgId, Object... messageParams) {
304+
LOGGER.warning(msgId, messageParams);
305+
String message = LOGGER.formatMessage(msgId, messageParams);
306+
if (!skipEventCreation) {
307+
eventStep = createEventStep(new EventData(EventItem.DOMAIN_VALIDATION_ERROR, message));
308+
}
309+
info.addValidationWarning(message);
310+
}
311+
296312
private boolean lessThanMinConfiguredClusterSize(WlsClusterConfig clusterConfig) {
297313
if (clusterConfig != null) {
298314
String clusterName = clusterConfig.getClusterName();

0 commit comments

Comments
 (0)