diff --git a/integration-tests/src/test/java/oracle/weblogic/kubernetes/ItServerStartPolicy.java b/integration-tests/src/test/java/oracle/weblogic/kubernetes/ItServerStartPolicy.java index 4dc3d89b3fd..986217a4945 100644 --- a/integration-tests/src/test/java/oracle/weblogic/kubernetes/ItServerStartPolicy.java +++ b/integration-tests/src/test/java/oracle/weblogic/kubernetes/ItServerStartPolicy.java @@ -1433,7 +1433,8 @@ private void scalingClusters(String clusterName, String serverPodName, int repli executeLifecycleScript(STATUS_CLUSTER_SCRIPT, CLUSTER_LIFECYCLE, clusterName), String.format("Failed to run %s", STATUS_CLUSTER_SCRIPT)); - assertTrue(verifyExecuteResult(result, regex), "The script should scale the given cluster: " + clusterName); + assertTrue(verifyExecuteResult(result, regex), "The script should scale the given cluster: " + clusterName + + ", the result is -> " + result + " . It doesn't match the expected pattern -> " + regex); logger.info("The cluster {0} scaled successfully.", clusterName); } diff --git a/operator/src/main/java/oracle/kubernetes/operator/DomainProcessorImpl.java b/operator/src/main/java/oracle/kubernetes/operator/DomainProcessorImpl.java index 8f8cf3772ea..833867c645a 100644 --- a/operator/src/main/java/oracle/kubernetes/operator/DomainProcessorImpl.java +++ b/operator/src/main/java/oracle/kubernetes/operator/DomainProcessorImpl.java @@ -882,7 +882,7 @@ private boolean shouldContinue() { return true; } else if (shouldReportAbortedEvent()) { return true; - } else if (hasExceededRetryCount() && !isImgRestartIntrospectVerChanged(liveInfo, cachedInfo)) { + } else if (hasExceededRetryCount(liveInfo) && !isImgRestartIntrospectVerChanged(liveInfo, cachedInfo)) { LOGGER.severe(ProcessingConstants.EXCEEDED_INTROSPECTOR_MAX_RETRY_COUNT_ERROR_MSG); return false; } else if (isFatalIntrospectorError()) { @@ -893,10 +893,7 @@ private boolean shouldContinue() { return false; // we have already cached this } else if (shouldRecheck(cachedInfo)) { - if (hasExceededRetryCount()) { - resetIntrospectorJobFailureCount(); - } - if (getCurrentIntrospectFailureRetryCount() > 0) { + if (getCurrentIntrospectFailureRetryCount(liveInfo) > 0) { logRetryCount(cachedInfo); ensureRetryingEventPresent(); } @@ -917,29 +914,10 @@ private void ensureRetryingEventPresent() { } } - private void resetIntrospectorJobFailureCount() { - Optional.ofNullable(liveInfo) - .map(DomainPresenceInfo::getDomain) - .map(Domain::getStatus) - .map(DomainStatus::resetIntrospectJobFailureCount); - } - - private boolean hasExceededRetryCount() { - return getCurrentIntrospectFailureRetryCount() - >= DomainPresence.getDomainPresenceFailureRetryMaxCount(); - } - - private Integer getCurrentIntrospectFailureRetryCount() { - return Optional.ofNullable(liveInfo) - .map(DomainPresenceInfo::getDomain) - .map(Domain::getStatus) - .map(DomainStatus::getIntrospectJobFailureCount) - .orElse(0); - } private void logRetryCount(DomainPresenceInfo cachedInfo) { LOGGER.info(MessageKeys.INTROSPECT_JOB_FAILED_RETRY_COUNT, cachedInfo.getDomain().getDomainUid(), - getCurrentIntrospectFailureRetryCount(), + getCurrentIntrospectFailureRetryCount(liveInfo), DomainPresence.getDomainPresenceFailureRetryMaxCount()); } @@ -1068,11 +1046,24 @@ private static String getIntrospectVersion(DomainPresenceInfo info) { .orElse(null); } + private Integer getCurrentIntrospectFailureRetryCount(DomainPresenceInfo info) { + return Optional.ofNullable(info) + .map(DomainPresenceInfo::getDomain) + .map(Domain::getStatus) + .map(DomainStatus::getIntrospectJobFailureCount) + .orElse(0); + } + private static boolean isCachedInfoNewer(DomainPresenceInfo liveInfo, DomainPresenceInfo cachedInfo) { return liveInfo.getDomain() != null && KubernetesUtils.isFirstNewer(cachedInfo.getDomain().getMetadata(), liveInfo.getDomain().getMetadata()); } + private boolean hasExceededRetryCount(DomainPresenceInfo info) { + return getCurrentIntrospectFailureRetryCount(info) + >= DomainPresence.getDomainPresenceFailureRetryMaxCount(); + } + @SuppressWarnings("unused") private void runDomainPlan( Domain dom, @@ -1092,11 +1083,11 @@ public void onCompletion(Packet packet) { @Override public void onThrowable(Packet packet, Throwable throwable) { logThrowable(throwable); - gate.startFiberIfLastFiberMatches( domainUid, Fiber.getCurrentIfSet(), - DomainStatusUpdater.createFailureRelatedSteps(throwable, null), + Step.chain(DomainStatusUpdater.createFailureCountStep(), + DomainStatusUpdater.createFailureRelatedSteps(throwable, null)), plan.packet, new CompletionCallback() { @Override @@ -1119,7 +1110,7 @@ public void onThrowable(Packet packet, Throwable throwable) { LoggingContext.setThreadContext().namespace(ns).domainUid(domainUid)) { existing.setPopulated(false); // proceed only if we have not already retried max number of times - int retryCount = existing.incrementAndGetFailureCount(); + int retryCount = getCurrentIntrospectFailureRetryCount(existing); LOGGER.fine( "Failure count for DomainPresenceInfo: " + existing @@ -1179,6 +1170,11 @@ Step createDomainUpPlan(DomainPresenceInfo info) { bringAdminServerUp(info, delegate.getPodAwaiterStepFactory(info.getNamespace())), managedServerStrategy); + if (hasExceededRetryCount(info) && isImgRestartIntrospectVerChanged(info, + getExistingDomainPresenceInfo(info.getNamespace(), info.getDomainUid()))) { + domainUpStrategy = Step.chain(DomainStatusUpdater.createResetFailureCountStep(), domainUpStrategy); + } + return Step.chain( createDomainUpInitialStep(info), ConfigMapHelper.readExistingIntrospectorConfigMap(info.getNamespace(), info.getDomainUid()), @@ -1226,8 +1222,7 @@ private static class TailStep extends Step { @Override public NextAction apply(Packet packet) { - packet.getSpi(DomainPresenceInfo.class).complete(); - return doNext(packet); + return doNext(DomainStatusUpdater.createResetFailureCountStep(), packet); } } diff --git a/operator/src/main/java/oracle/kubernetes/operator/DomainStatusUpdater.java b/operator/src/main/java/oracle/kubernetes/operator/DomainStatusUpdater.java index d906c4a0dba..06119f65f5a 100644 --- a/operator/src/main/java/oracle/kubernetes/operator/DomainStatusUpdater.java +++ b/operator/src/main/java/oracle/kubernetes/operator/DomainStatusUpdater.java @@ -842,6 +842,22 @@ private boolean isFatalError(DomainStatus domainStatus) { } } + public static Step createResetFailureCountStep() { + return new ResetFailureCountStep(); + } + + static class ResetFailureCountStep extends DomainStatusUpdaterStep { + + public ResetFailureCountStep() { + super(null); + } + + @Override + void modifyStatus(DomainStatus domainStatus) { + domainStatus.resetIntrospectJobFailureCount(); + } + } + public static Step recordLastIntrospectJobProcessedUid(String lastIntrospectJobProcessedId) { return new RecordLastIntrospectJobProcessedUidStep(lastIntrospectJobProcessedId); } diff --git a/operator/src/main/java/oracle/kubernetes/operator/PodWatcher.java b/operator/src/main/java/oracle/kubernetes/operator/PodWatcher.java index d16ae72b45a..cc9d05ceda5 100644 --- a/operator/src/main/java/oracle/kubernetes/operator/PodWatcher.java +++ b/operator/src/main/java/oracle/kubernetes/operator/PodWatcher.java @@ -476,13 +476,8 @@ protected DefaultResponseStep resumeIfReady(Callback callback) { return new DefaultResponseStep<>(getNext()) { @Override public NextAction onSuccess(Packet packet, CallResponse callResponse) { - DomainPresenceInfo info = packet.getSpi(DomainPresenceInfo.class); - String serverName = (String)packet.get(SERVER_NAME); - if ((info != null) && (callResponse != null) && (callResponse.getResult() == null)) { - info.setServerPod(serverName, null); - } - + String serverName = callback.geServerName(); if (isReady(callResponse.getResult(), info, serverName) || callback.didResumeFiber()) { callback.proceedFromWait(callResponse.getResult()); return null; diff --git a/operator/src/main/java/oracle/kubernetes/operator/ProcessingConstants.java b/operator/src/main/java/oracle/kubernetes/operator/ProcessingConstants.java index 551a0994fcb..ecdaa33bce7 100644 --- a/operator/src/main/java/oracle/kubernetes/operator/ProcessingConstants.java +++ b/operator/src/main/java/oracle/kubernetes/operator/ProcessingConstants.java @@ -27,6 +27,7 @@ public interface ProcessingConstants { String DOMAIN_TOPOLOGY = "domainTopology"; String JOB_POD_NAME = "jobPodName"; + String JOB_POD_CONTAINER_WAITING_REASON = "jobPodContainerWaitingReason"; String DOMAIN_INTROSPECTOR_JOB = "domainIntrospectorJob"; String DOMAIN_INTROSPECTOR_LOG_RESULT = "domainIntrospectorLogResult"; String DOMAIN_INTROSPECT_REQUESTED = "domainIntrospectRequested"; diff --git a/operator/src/main/java/oracle/kubernetes/operator/WaitForReadyStep.java b/operator/src/main/java/oracle/kubernetes/operator/WaitForReadyStep.java index 1dd1cfaa264..d74d8fea095 100644 --- a/operator/src/main/java/oracle/kubernetes/operator/WaitForReadyStep.java +++ b/operator/src/main/java/oracle/kubernetes/operator/WaitForReadyStep.java @@ -178,7 +178,6 @@ void logWaiting(String name) { @Override public final NextAction apply(Packet packet) { String serverName = (String)packet.get(SERVER_NAME); - DomainPresenceInfo info = packet.getSpi(DomainPresenceInfo.class); if (shouldTerminateFiber(initialResource)) { return doTerminate(createTerminationException(initialResource), packet); } else if (isReady(initialResource, packet.getSpi(DomainPresenceInfo.class), serverName)) { @@ -280,10 +279,12 @@ class Callback implements Consumer { private final Packet packet; private final AtomicBoolean didResume = new AtomicBoolean(false); private final AtomicInteger recheckCount = new AtomicInteger(0); + private final String serverName; Callback(AsyncFiber fiber, Packet packet) { this.fiber = fiber; this.packet = packet; + this.serverName = (String) packet.get(SERVER_NAME); } @Override @@ -320,6 +321,10 @@ int incrementAndGetRecheckCount() { int getRecheckCount() { return recheckCount.get(); } + + String geServerName() { + return serverName; + } } private void handleResourceReady(AsyncFiber fiber, Packet packet, T resource) { diff --git a/operator/src/main/java/oracle/kubernetes/operator/helpers/DomainPresenceInfo.java b/operator/src/main/java/oracle/kubernetes/operator/helpers/DomainPresenceInfo.java index a13b7535308..1f1f6113283 100644 --- a/operator/src/main/java/oracle/kubernetes/operator/helpers/DomainPresenceInfo.java +++ b/operator/src/main/java/oracle/kubernetes/operator/helpers/DomainPresenceInfo.java @@ -13,7 +13,6 @@ import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ConcurrentMap; import java.util.concurrent.atomic.AtomicBoolean; -import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.atomic.AtomicReference; import java.util.concurrent.locks.ReadWriteLock; import java.util.concurrent.locks.ReentrantReadWriteLock; @@ -54,7 +53,6 @@ public class DomainPresenceInfo { private final AtomicReference domain; private final AtomicBoolean isDeleting = new AtomicBoolean(false); private final AtomicBoolean isPopulated = new AtomicBoolean(false); - private final AtomicInteger retryCount = new AtomicInteger(0); private final AtomicReference> serverStartupInfo; private final AtomicReference> serverShutdownInfo; @@ -547,23 +545,6 @@ public void setPopulated(boolean populated) { isPopulated.set(populated); } - private void resetFailureCount() { - retryCount.set(0); - } - - public int incrementAndGetFailureCount() { - return retryCount.incrementAndGet(); - } - - int getRetryCount() { - return retryCount.get(); - } - - /** Sets the last completion time to now. */ - public void complete() { - resetFailureCount(); - } - EventItem getLastEventItem() { return lastEventItem; } diff --git a/operator/src/main/java/oracle/kubernetes/operator/helpers/JobHelper.java b/operator/src/main/java/oracle/kubernetes/operator/helpers/JobHelper.java index e7b8dd9a377..503cd6e4d16 100644 --- a/operator/src/main/java/oracle/kubernetes/operator/helpers/JobHelper.java +++ b/operator/src/main/java/oracle/kubernetes/operator/helpers/JobHelper.java @@ -10,6 +10,9 @@ import java.util.Objects; import java.util.Optional; +import io.kubernetes.client.openapi.models.V1ContainerState; +import io.kubernetes.client.openapi.models.V1ContainerStateWaiting; +import io.kubernetes.client.openapi.models.V1ContainerStatus; import io.kubernetes.client.openapi.models.V1DeleteOptions; import io.kubernetes.client.openapi.models.V1EnvVar; import io.kubernetes.client.openapi.models.V1Job; @@ -18,6 +21,7 @@ import io.kubernetes.client.openapi.models.V1ObjectMeta; import io.kubernetes.client.openapi.models.V1Pod; import io.kubernetes.client.openapi.models.V1PodList; +import io.kubernetes.client.openapi.models.V1PodStatus; import io.kubernetes.client.openapi.models.V1Volume; import io.kubernetes.client.openapi.models.V1VolumeMount; import oracle.kubernetes.operator.DomainProcessorImpl; @@ -459,29 +463,115 @@ static class ReplaceOrCreateStep extends DefaultResponseStep { @Override public NextAction onSuccess(Packet packet, CallResponse callResponse) { - List nextSteps = new ArrayList<>(); DomainPresenceInfo info = packet.getSpi(DomainPresenceInfo.class); V1Job job = (V1Job) callResponse.getResult(); if ((job != null) && (packet.get(ProcessingConstants.DOMAIN_INTROSPECTOR_JOB) == null)) { packet.put(ProcessingConstants.DOMAIN_INTROSPECTOR_JOB, job); } + return doNext(getIntrospectorPodStatus(info.getDomainUid(), info.getNamespace(), getNext()), packet); + } + + private Step getIntrospectorPodStatus(String domainUid, String namespace, Step next) { + return new CallBuilder() + .withLabelSelectors(LabelConstants.JOBNAME_LABEL) + .listPodAsync(namespace, new IntrospectorPodStatusStep(domainUid, next)); + } + + private static boolean isImagePullError(String jobPodContainerWaitingReason) { + return Optional.ofNullable(jobPodContainerWaitingReason) + .map(s -> s.contains("ErrImagePull") || s.contains("ImagePullBackOff")) + .orElse(false); + } + + private static boolean isJobTimedout(DomainPresenceInfo info) { + return Objects.equals(getReason(info), "DeadlineExceeded") || getMessage(info).contains("DeadlineExceeded"); + } + + private static boolean isJobNewOrNotProcesssed(V1Job job, String lastJobProcessedUid) { + return (lastJobProcessedUid == null) || (!lastJobProcessedUid.equals(job.getMetadata().getUid())); + } + + private static String getLastIntrospectJobProcessedId(DomainPresenceInfo info) { + return Optional.of(info) + .map(DomainPresenceInfo::getDomain) + .map(Domain::getStatus) + .map(DomainStatus::getLastIntrospectJobProcessedUid) + .orElse(null); + } - OffsetDateTime startTime = createNextSteps(nextSteps, info, job, getNext()); - packet.putIfAbsent(START_TIME, startTime); - return doNext(nextSteps.get(0), packet); + private static String getReason(DomainPresenceInfo info) { + return Optional.of(info) + .map(DomainPresenceInfo::getDomain) + .map(Domain::getStatus) + .map(DomainStatus::getReason) + .orElse(null); } + private static String getMessage(DomainPresenceInfo info) { + return Optional.of(info) + .map(DomainPresenceInfo::getDomain) + .map(Domain::getStatus) + .map(DomainStatus::getMessage) + .orElse(""); + } - static OffsetDateTime createNextSteps(List nextSteps, DomainPresenceInfo info, - V1Job job, Step next) { + private class IntrospectorPodStatusStep extends ResponseStep { + private final String domainUid; + + IntrospectorPodStatusStep(String domainUid, Step next) { + super(next); + this.domainUid = domainUid; + } + + @Override + public NextAction onFailure(Packet packet, CallResponse callResponse) { + return super.onFailure(packet, callResponse); + } + + @Override + public NextAction onSuccess(Packet packet, CallResponse callResponse) { + List nextSteps = new ArrayList<>(); + Optional.ofNullable(callResponse.getResult()) + .map(V1PodList::getItems) + .orElseGet(Collections::emptyList) + .forEach(pod -> recordJobPodNameAndStatus(packet, pod)); + + V1Job job = (V1Job) packet.get(ProcessingConstants.DOMAIN_INTROSPECTOR_JOB); + OffsetDateTime startTime = createNextSteps(nextSteps, packet, job, getNext()); + packet.putIfAbsent(START_TIME, startTime); + return doContinueListOrNext(callResponse, packet, nextSteps.get(0)); + } + + private void recordJobPodNameAndStatus(Packet packet, V1Pod pod) { + String name = Optional.ofNullable(pod).map(V1Pod::getMetadata).map(V1ObjectMeta::getName).orElse(""); + if (name.startsWith(JobHelper.createJobName(domainUid))) { + packet.put(ProcessingConstants.JOB_POD_NAME, name); + + packet.put(ProcessingConstants.JOB_POD_CONTAINER_WAITING_REASON, Optional.ofNullable(pod.getStatus()) + .map(V1PodStatus::getContainerStatuses).map(statuses -> statuses.get(0)) + .map(V1ContainerStatus::getState).map(V1ContainerState::getWaiting) + .map(V1ContainerStateWaiting::getReason).orElse(null)); + } + } + } + + static OffsetDateTime createNextSteps(List nextSteps, Packet packet, V1Job job, Step next) { OffsetDateTime jobStartTime; + DomainPresenceInfo info = packet.getSpi(DomainPresenceInfo.class); String namespace = info.getNamespace(); + String jobPodContainerWaitingReason = (String) packet.get(ProcessingConstants.JOB_POD_CONTAINER_WAITING_REASON); + if (job != null) { jobStartTime = Optional.ofNullable(job.getMetadata()) .map(V1ObjectMeta::getCreationTimestamp).orElse(OffsetDateTime.now()); String lastIntrospectJobProcessedId = getLastIntrospectJobProcessedId(info); - if ((lastIntrospectJobProcessedId == null) - || (!lastIntrospectJobProcessedId.equals(job.getMetadata().getUid()))) { + + if (isJobTimedout(info) || (isImagePullError(jobPodContainerWaitingReason))) { + jobStartTime = OffsetDateTime.now(); + packet.put(DOMAIN_INTROSPECT_REQUESTED, ReadDomainIntrospectorPodLogResponseStep.INTROSPECTION_FAILED); + nextSteps.add(Step.chain(deleteDomainIntrospectorJobStep(null), + createDomainIntrospectorJobStep(next))); + } else if (isJobNewOrNotProcesssed(job, lastIntrospectJobProcessedId)) { nextSteps.add(Step.chain(readDomainIntrospectorPodLogStep(null), deleteDomainIntrospectorJobStep(null), ConfigMapHelper.createIntrospectorConfigMapStep(next))); @@ -499,14 +589,6 @@ static OffsetDateTime createNextSteps(List nextSteps, DomainPresenceInfo i } return jobStartTime; } - - private static String getLastIntrospectJobProcessedId(DomainPresenceInfo info) { - return Optional.of(info) - .map(DomainPresenceInfo::getDomain) - .map(Domain::getStatus) - .map(DomainStatus::getLastIntrospectJobProcessedUid) - .orElse(null); - } } static ReadDomainIntrospectorPodLogStep readDomainIntrospectorPodLog(Step next) { diff --git a/operator/src/main/java/oracle/kubernetes/operator/helpers/JobStepContext.java b/operator/src/main/java/oracle/kubernetes/operator/helpers/JobStepContext.java index 20561698605..5a98595d902 100644 --- a/operator/src/main/java/oracle/kubernetes/operator/helpers/JobStepContext.java +++ b/operator/src/main/java/oracle/kubernetes/operator/helpers/JobStepContext.java @@ -261,7 +261,12 @@ private V1ObjectMeta createMetadata() { private long getActiveDeadlineSeconds(TuningParameters.PodTuning podTuning) { return getIntrospectorJobActiveDeadlineSeconds(podTuning) - + (DEFAULT_ACTIVE_DEADLINE_INCREMENT_SECONDS * info.getRetryCount()); + + (DEFAULT_ACTIVE_DEADLINE_INCREMENT_SECONDS * getIntrospectJobFailureCount()); + } + + private Integer getIntrospectJobFailureCount() { + return Optional.ofNullable(info.getDomain().getStatus()) + .map(s -> s.getIntrospectJobFailureCount()).orElse(0); } V1JobSpec createJobSpec(TuningParameters tuningParameters) { diff --git a/operator/src/main/java/oracle/kubernetes/operator/helpers/ResponseStep.java b/operator/src/main/java/oracle/kubernetes/operator/helpers/ResponseStep.java index b7fac70eace..573e75b8b47 100644 --- a/operator/src/main/java/oracle/kubernetes/operator/helpers/ResponseStep.java +++ b/operator/src/main/java/oracle/kubernetes/operator/helpers/ResponseStep.java @@ -139,7 +139,7 @@ private NextAction doPotentialRetry(Step conflictStep, Packet packet, CallRespon .map(rs -> rs.doPotentialRetry(conflictStep, packet, callResponse.getStatusCode())) .orElseGet(() -> { LOGGER.fine(MessageKeys.ASYNC_NO_RETRY, - callResponse.getRequestParams().call, + Optional.ofNullable(callResponse.getRequestParams()).map(rp -> rp.call).orElse(""), callResponse.getExceptionString(), callResponse.getStatusCode(), callResponse.getHeadersString()); return null; }); diff --git a/operator/src/main/java/oracle/kubernetes/operator/helpers/SecretHelper.java b/operator/src/main/java/oracle/kubernetes/operator/helpers/SecretHelper.java index 6c636bbf434..03f0cef1cdf 100644 --- a/operator/src/main/java/oracle/kubernetes/operator/helpers/SecretHelper.java +++ b/operator/src/main/java/oracle/kubernetes/operator/helpers/SecretHelper.java @@ -61,10 +61,13 @@ public NextAction apply(Packet packet) { secretName = dpi.getDomain().getWebLogicCredentialsSecretName(); namespace = dpi.getNamespace(); - LOGGER.fine(MessageKeys.RETRIEVING_SECRET, secretName); - Step read = new CallBuilder().readSecretAsync(secretName, namespace, new SecretResponseStep(getNext())); - - return doNext(read, packet); + if (secretName != null) { + LOGGER.fine(MessageKeys.RETRIEVING_SECRET, secretName); + Step read = new CallBuilder().readSecretAsync(secretName, namespace, new SecretResponseStep(getNext())); + return doNext(read, packet); + } else { + return doNext(packet); + } } } diff --git a/operator/src/main/java/oracle/kubernetes/operator/steps/HttpRequestProcessing.java b/operator/src/main/java/oracle/kubernetes/operator/steps/HttpRequestProcessing.java index 823475b25f4..01696c60e13 100644 --- a/operator/src/main/java/oracle/kubernetes/operator/steps/HttpRequestProcessing.java +++ b/operator/src/main/java/oracle/kubernetes/operator/steps/HttpRequestProcessing.java @@ -58,12 +58,14 @@ AuthorizationSource getAuthorizationSource() { } final HttpRequest.Builder createRequestBuilder(String url) { - return HttpRequest.newBuilder() + HttpRequest.Builder builder = HttpRequest.newBuilder() .uri(URI.create(url)) - .header("Authorization", getAuthorizationSource().createBasicAuthorizationString()) .header("Accept", "application/json") .header("Content-Type", "application/json") .header("X-Requested-By", "WebLogic Operator"); + Optional.ofNullable(getAuthorizationSource()) + .ifPresent(source -> builder.header("Authorization", source.createBasicAuthorizationString())); + return builder; } /** diff --git a/operator/src/main/java/oracle/kubernetes/weblogic/domain/model/Domain.java b/operator/src/main/java/oracle/kubernetes/weblogic/domain/model/Domain.java index efd7e58556d..379bf73e091 100644 --- a/operator/src/main/java/oracle/kubernetes/weblogic/domain/model/Domain.java +++ b/operator/src/main/java/oracle/kubernetes/weblogic/domain/model/Domain.java @@ -445,7 +445,7 @@ public Domain withStatus(DomainStatus status) { * @return the secret name */ public String getWebLogicCredentialsSecretName() { - return spec.getWebLogicCredentialsSecret().getName(); + return Optional.ofNullable(spec.getWebLogicCredentialsSecret()).map(s -> s.getName()).orElse(null); } /** diff --git a/operator/src/main/resources/Operator.properties b/operator/src/main/resources/Operator.properties index d0a46ef3fb3..c12285f7a4c 100644 --- a/operator/src/main/resources/Operator.properties +++ b/operator/src/main/resources/Operator.properties @@ -107,7 +107,9 @@ WLSKO-0154=Job {0} failed due to reason: DeadlineExceeded. \ Use kubectl describe for the job and its pod for more job failure information. \ The job may be retried by the operator up to {3} \ times with longer ActiveDeadlineSeconds value in each subsequent retry. \ - Use tuning parameter 'domainPresenceFailureRetryMaxCount' to configure max retries. + Use tuning parameter 'domainPresenceFailureRetryMaxCount' to configure max retries. \ + Use 'spec.configuration.introspectorJobActiveDeadlineSeconds' to increase the job \ + timeout interval if the job still fails after the retries are exhausted. WLSKO-0156=Access denied for operator service account for operation {0} on resource {1} in namespace {2}. WLSKO-0157=Domain {0} is not valid: {1} WLSKO-162=Unable to read internal certificate at path {0} diff --git a/operator/src/main/resources/scripts/modelInImage.sh b/operator/src/main/resources/scripts/modelInImage.sh index f3df504f7c4..b3247c3982c 100755 --- a/operator/src/main/resources/scripts/modelInImage.sh +++ b/operator/src/main/resources/scripts/modelInImage.sh @@ -33,7 +33,12 @@ IMG_MODELS_ROOTDIR="${IMG_MODELS_HOME}" IMG_ARCHIVES_ROOTDIR="${IMG_MODELS_HOME}" IMG_VARIABLE_FILES_ROOTDIR="${IMG_MODELS_HOME}" WDT_ROOT="${WDT_INSTALL_HOME:-/u01/wdt/weblogic-deploy}" -WDT_OUTPUT="/tmp/wdt_output.log" +WDT_OUTPUT_DIR="${LOG_HOME:-/tmp}" +WDT_OUTPUT="${WDT_OUTPUT_DIR}/wdt_output.log" +WDT_CREATE_DOMAIN_LOG=createDomain.log +WDT_UPDATE_DOMAIN_LOG=updateDomain.log +WDT_VALIDATE_MODEL_LOG=validateModel.log +WDT_COMPARE_MODEL_LOG=compareModel.log WDT_BINDIR="${WDT_ROOT}/bin" WDT_FILTER_JSON="/weblogic-operator/scripts/model-filters.json" WDT_CREATE_FILTER="/weblogic-operator/scripts/model-wdt-create-filter.py" @@ -65,6 +70,12 @@ export WDT_MODEL_SECRETS_DIRS="/weblogic-operator/config-overrides-secrets" #For now: export WDT_MODEL_SECRETS_NAME_DIR_PAIRS="__weblogic-credentials__=/weblogic-operator/secrets,__WEBLOGIC-CREDENTIALS__=/weblogic-operator/secrets" +if [ ! -d "${WDT_OUTPUT_DIR}" ]; then + trace "Creating WDT standard output directory: '${WDT_OUTPUT_DIR}'" + createFolder "${WDT_OUTPUT_DIR}" +fi + + # sort_files sort the files according to the names and naming conventions and write the result to stdout # $1 directory # $2 extension @@ -660,6 +671,8 @@ function diff_model() { fi fi + wdtRotateAndCopyLogFile "${WDT_COMPARE_MODEL_LOG}" + trace "Exiting diff_model" } @@ -829,6 +842,8 @@ function generateMergedModel() { exitOrLoop fi + wdtRotateAndCopyLogFile "${WDT_VALIDATE_MODEL_LOG}" + # restore trap start_trap trace "Exiting generateMergedModel" @@ -924,6 +939,8 @@ function wdtCreatePrimordialDomain() { cat ${WDT_OUTPUT} fi + wdtRotateAndCopyLogFile "${WDT_CREATE_DOMAIN_LOG}" + # restore trap start_trap trace "Exiting wdtCreatePrimordialDomain" @@ -992,6 +1009,8 @@ function wdtUpdateModelDomain() { encrypt_decrypt_model "encrypt" ${DOMAIN_HOME}/wlsdeploy/domain_model.json.b64 ${MII_PASSPHRASE} \ ${DOMAIN_HOME}/wlsdeploy/domain_model.json + wdtRotateAndCopyLogFile "${WDT_UPDATE_DOMAIN_LOG}" + # restore trap start_trap trace "Exiting wdtUpdateModelDomain" @@ -1084,6 +1103,8 @@ function wdtHandleOnlineUpdate() { cp /tmp/encrypted_merge_model.json ${DOMAIN_HOME}/wlsdeploy/domain_model.json + wdtRotateAndCopyLogFile ${WDT_UPDATE_DOMAIN_LOG} + trace "wrote updateResult" start_trap @@ -1312,3 +1333,16 @@ function logSevereAndExit() { trace SEVERE "cp '$1' failed" exitOrLoop } + +# Function to rotate WDT script log file and copy the file to WDT output dir. +# parameter: +# 1 - Name of the log file to rotate and copy to WDT output directory. +function wdtRotateAndCopyLogFile() { + local logFileName=$1 + testLogFileRotate "${WDT_OUTPUT_DIR}/${logFileName}" + [ $? -ne 0 ] && trace SEVERE "Error accessing '${WDT_OUTPUT_DIR}'. See previous log messages." && exit 1 + + logFileRotate ${WDT_OUTPUT_DIR}/${logFileName} ${WDT_LOG_FILE_MAX:-11} + + cp ${WDT_ROOT}/logs/${logFileName} ${WDT_OUTPUT_DIR}/ +} diff --git a/operator/src/test/java/oracle/kubernetes/operator/helpers/DomainIntrospectorJobTest.java b/operator/src/test/java/oracle/kubernetes/operator/helpers/DomainIntrospectorJobTest.java index 7196abe6b68..b2c975c6e48 100644 --- a/operator/src/test/java/oracle/kubernetes/operator/helpers/DomainIntrospectorJobTest.java +++ b/operator/src/test/java/oracle/kubernetes/operator/helpers/DomainIntrospectorJobTest.java @@ -67,6 +67,7 @@ import static oracle.kubernetes.operator.ProcessingConstants.FATAL_ERROR_DOMAIN_STATUS_MESSAGE; import static oracle.kubernetes.operator.ProcessingConstants.INTROSPECTION_ERROR; import static oracle.kubernetes.operator.ProcessingConstants.JOBWATCHER_COMPONENT_NAME; +import static oracle.kubernetes.operator.ProcessingConstants.JOB_POD_CONTAINER_WAITING_REASON; import static oracle.kubernetes.operator.ProcessingConstants.JOB_POD_NAME; import static oracle.kubernetes.operator.helpers.DomainStatusMatcher.hasStatus; import static oracle.kubernetes.operator.helpers.KubernetesTestSupport.DOMAIN; @@ -599,7 +600,7 @@ void whenDomainStatusContainsNullLastIntrospectProcessedJobUid_correctStepsExecu IntrospectionTestUtils.defineResources(testSupport, "passed"); testSupport.addToPacket(DOMAIN_INTROSPECTOR_JOB, testSupport.getResourceWithName(JOB, getJobName())); - JobHelper.ReplaceOrCreateStep.createNextSteps(nextSteps, domainPresenceInfo, job, terminalStep); + JobHelper.ReplaceOrCreateStep.createNextSteps(nextSteps, testSupport.getPacket(), job, terminalStep); assertThat(nextSteps.get(0), hasChainWithStepsInOrder("WatchDomainIntrospectorJobReadyStep", "ReadDomainIntrospectorPodStep", "ReadDomainIntrospectorPodLogStep", @@ -617,13 +618,62 @@ void whenDomainStatusContainsProcessedJobIdSameAsCurrentJob_correctStepsExecuted IntrospectionTestUtils.defineResources(testSupport, "passed"); testSupport.addToPacket(DOMAIN_INTROSPECTOR_JOB, testSupport.getResourceWithName(JOB, getJobName())); - JobHelper.ReplaceOrCreateStep.createNextSteps(nextSteps, domainPresenceInfo, job, terminalStep); + JobHelper.ReplaceOrCreateStep.createNextSteps(nextSteps, testSupport.getPacket(), job, terminalStep); assertThat(nextSteps.get(0), hasChainWithStepsInOrder("WatchDomainIntrospectorJobReadyStep", "DeleteDomainIntrospectorJobStep", "IntrospectionRequestStep", "DomainIntrospectorJobStep")); } + @Test + void whenJobTimedout_correctStepsExecuted() { + List nextSteps = new ArrayList<>(); + domainPresenceInfo.getDomain() + .setStatus(new DomainStatus().withReason("DeadlineExceeded")); + V1Job job = new V1Job().metadata(new V1ObjectMeta().name(getJobName()).namespace(NS).uid(JOB_UID)) + .status(new V1JobStatus()); + testSupport.defineResources(job); + IntrospectionTestUtils.defineResources(testSupport, "passed"); + testSupport.addToPacket(DOMAIN_INTROSPECTOR_JOB, testSupport.getResourceWithName(JOB, getJobName())); + + JobHelper.ReplaceOrCreateStep.createNextSteps(nextSteps, testSupport.getPacket(), job, terminalStep); + + assertThat(nextSteps.get(0), hasChainWithStepsInOrder("DeleteDomainIntrospectorJobStep", + "DomainIntrospectorJobStep")); + } + + @Test + void whenJobHasErrorPullingImage_correctStepsExecuted() { + List nextSteps = new ArrayList<>(); + V1Job job = new V1Job().metadata(new V1ObjectMeta().name(getJobName()).namespace(NS).uid(JOB_UID)) + .status(new V1JobStatus()); + testSupport.defineResources(job); + IntrospectionTestUtils.defineResources(testSupport, "passed"); + testSupport.addToPacket(DOMAIN_INTROSPECTOR_JOB, testSupport.getResourceWithName(JOB, getJobName())); + testSupport.addToPacket(JOB_POD_CONTAINER_WAITING_REASON, "ErrImagePull"); + + JobHelper.ReplaceOrCreateStep.createNextSteps(nextSteps, testSupport.getPacket(), job, terminalStep); + + assertThat(nextSteps.get(0), hasChainWithStepsInOrder("DeleteDomainIntrospectorJobStep", + "DomainIntrospectorJobStep")); + } + + @Test + void whenJobHasImagePullBackOffError_correctStepsExecuted() { + List nextSteps = new ArrayList<>(); + V1Job job = new V1Job().metadata(new V1ObjectMeta().name(getJobName()).namespace(NS).uid(JOB_UID)) + .status(new V1JobStatus()); + testSupport.defineResources(job); + IntrospectionTestUtils.defineResources(testSupport, "passed"); + testSupport.addToPacket(DOMAIN_INTROSPECTOR_JOB, testSupport.getResourceWithName(JOB, getJobName())); + testSupport.addToPacket(JOB_POD_CONTAINER_WAITING_REASON, "ImagePullBackOff"); + + JobHelper.ReplaceOrCreateStep.createNextSteps(nextSteps, testSupport.getPacket(), job, terminalStep); + + assertThat(nextSteps.get(0), hasChainWithStepsInOrder("DeleteDomainIntrospectorJobStep", + "DomainIntrospectorJobStep")); + } + @Test void whenCurrentJobIsNull_correctStepsExecuted() { List nextSteps = new ArrayList<>(); @@ -631,7 +681,7 @@ void whenCurrentJobIsNull_correctStepsExecuted() { IntrospectionTestUtils.defineResources(testSupport, "passed"); testSupport.addToPacket(DOMAIN_INTROSPECTOR_JOB, testSupport.getResourceWithName(JOB, getJobName())); - JobHelper.ReplaceOrCreateStep.createNextSteps(nextSteps, domainPresenceInfo, job, terminalStep); + JobHelper.ReplaceOrCreateStep.createNextSteps(nextSteps, testSupport.getPacket(), job, terminalStep); assertThat(nextSteps.get(0), hasChainWithStepsInOrder("ReadIntrospectorConfigMapStep", "DomainIntrospectorJobStep")); diff --git a/operator/src/test/java/oracle/kubernetes/operator/helpers/JobHelperTest.java b/operator/src/test/java/oracle/kubernetes/operator/helpers/JobHelperTest.java index b7e967154d0..da5c8124124 100644 --- a/operator/src/test/java/oracle/kubernetes/operator/helpers/JobHelperTest.java +++ b/operator/src/test/java/oracle/kubernetes/operator/helpers/JobHelperTest.java @@ -48,6 +48,7 @@ import oracle.kubernetes.weblogic.domain.model.ConfigurationConstants; import oracle.kubernetes.weblogic.domain.model.Domain; import oracle.kubernetes.weblogic.domain.model.DomainSpec; +import oracle.kubernetes.weblogic.domain.model.DomainStatus; import oracle.kubernetes.weblogic.domain.model.DomainValidationBaseTest; import oracle.kubernetes.weblogic.domain.model.ServerEnvVars; import org.hamcrest.Matcher; @@ -671,7 +672,9 @@ private static V1PodSpec getTemplateSpec(V1JobSpec jobSpec) { @Test void verify_introspectorPodSpec_activeDeadlineSeconds_retry_values() { - int failureCount = domainPresenceInfo.incrementAndGetFailureCount(); + domainPresenceInfo.getDomain() + .setStatus(new DomainStatus().withIntrospectJobFailureCount(1)); + int failureCount = domainPresenceInfo.getDomain().getStatus().getIntrospectJobFailureCount(); V1JobSpec jobSpec = createJobSpec();