This repository has been archived by the owner on Jan 8, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 330
Fix a few minor kubernetes papercuts #2399
Merged
Merged
Changes from all commits
Commits
Show all changes
3 commits
Select commit
Hold shift + click to select a range
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
```release-note:feature | ||
plugin/k8: Report events on failed pods when a deployment fails | ||
``` | ||
|
||
```release-note:bug | ||
plugin/k8: Setup Kubernetes services for different workspaces properly | ||
``` |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -19,6 +19,7 @@ import ( | |
"k8s.io/apimachinery/pkg/api/errors" | ||
k8sresource "k8s.io/apimachinery/pkg/api/resource" | ||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" | ||
"k8s.io/apimachinery/pkg/types" | ||
"k8s.io/apimachinery/pkg/util/intstr" | ||
"k8s.io/apimachinery/pkg/util/wait" | ||
_ "k8s.io/client-go/plugin/pkg/client/auth" | ||
|
@@ -580,6 +581,7 @@ func (p *Platform) resourceDeploymentCreate( | |
}, | ||
InitialDelaySeconds: initialDelaySeconds, | ||
TimeoutSeconds: timeoutSeconds, | ||
FailureThreshold: failureThreshold, | ||
} | ||
} | ||
|
||
|
@@ -681,6 +683,8 @@ func (p *Platform) resourceDeploymentCreate( | |
return err | ||
} | ||
|
||
ev := clientSet.CoreV1().Events(ns) | ||
|
||
// We successfully created or updated, so set the name on our state so | ||
// that if we error, we'll partially clean up properly. THIS IS IMPORTANT. | ||
state.Name = result.Name | ||
|
@@ -698,20 +702,25 @@ func (p *Platform) resourceDeploymentCreate( | |
reportedError bool | ||
) | ||
|
||
timeout := 10 * time.Minute | ||
// We wait the maximum amount of time that the deployment controller would wait for a pod | ||
// to start before exiting. We double the time to allow for various Kubernetes based | ||
// delays in startup, detection, and reporting. | ||
timeout := time.Duration((timeoutSeconds*failureThreshold)+initialDelaySeconds) * 2 * time.Second | ||
|
||
podsSeen := make(map[types.UID]string) | ||
|
||
// Wait on the Pod to start | ||
err = wait.PollImmediate(2*time.Second, timeout, func() (bool, error) { | ||
err = wait.PollImmediate(time.Second, timeout, func() (bool, error) { | ||
dep, err := dc.Get(ctx, result.Name, metav1.GetOptions{}) | ||
if err != nil { | ||
return false, err | ||
} | ||
|
||
if time.Since(lastStatus) > 10*time.Second { | ||
step.Update(fmt.Sprintf( | ||
"Waiting on deployment to become available: %d/%d/%d", | ||
"Waiting on deployment to become available: requested=%d running=%d ready=%d", | ||
*dep.Spec.Replicas, | ||
dep.Status.UnavailableReplicas, | ||
dep.Status.UnavailableReplicas+dep.Status.AvailableReplicas, | ||
dep.Status.AvailableReplicas, | ||
)) | ||
lastStatus = time.Now() | ||
|
@@ -730,6 +739,8 @@ func (p *Platform) resourceDeploymentCreate( | |
} | ||
|
||
for _, p := range pods.Items { | ||
podsSeen[p.UID] = p.Name | ||
|
||
for _, cs := range p.Status.ContainerStatuses { | ||
if cs.Ready { | ||
continue | ||
|
@@ -760,6 +771,29 @@ func (p *Platform) resourceDeploymentCreate( | |
return false, nil | ||
}) | ||
if err != nil { | ||
step.Update("Error detected waiting for Deployment to start.") | ||
evanphx marked this conversation as resolved.
Show resolved
Hide resolved
|
||
step.Status(terminal.StatusError) | ||
step.Abort() | ||
|
||
ui.Output("The following is events for pods observed while attempting to start the Deployment", terminal.WithWarningStyle()) | ||
|
||
for uid, name := range podsSeen { | ||
sel := ev.GetFieldSelector(nil, nil, nil, (*string)(&uid)) | ||
|
||
events, err := ev.List(ctx, metav1.ListOptions{ | ||
FieldSelector: sel.String(), | ||
}) | ||
if err == nil { | ||
ui.Output("Events for %s", name, terminal.WithHeaderStyle()) | ||
for _, ev := range events.Items { | ||
if ev.Type == "Normal" { | ||
continue | ||
} | ||
ui.Output(" %s: %s (%s)", ev.Type, ev.Message, ev.Reason) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I also noticed this change in your demo, excited for this! 😄 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||
} | ||
} | ||
} | ||
|
||
if err == wait.ErrWaitTimeout { | ||
err = fmt.Errorf("Deployment was not able to start pods after %s", timeout) | ||
} | ||
|
@@ -772,6 +806,8 @@ func (p *Platform) resourceDeploymentCreate( | |
return nil | ||
} | ||
|
||
var deleteGrace = int64(120) | ||
|
||
// Destroy deletes the K8S deployment. | ||
func (p *Platform) resourceDeploymentDestroy( | ||
ctx context.Context, | ||
|
@@ -791,8 +827,14 @@ func (p *Platform) resourceDeploymentDestroy( | |
step.Done() | ||
|
||
step = sg.Add("Deleting deployment...") | ||
|
||
del := metav1.DeletePropagationBackground | ||
|
||
deployclient := csinfo.Clientset.AppsV1().Deployments(ns) | ||
if err := deployclient.Delete(ctx, state.Name, metav1.DeleteOptions{}); err != nil { | ||
if err := deployclient.Delete(ctx, state.Name, metav1.DeleteOptions{ | ||
GracePeriodSeconds: &deleteGrace, | ||
PropagationPolicy: &del, | ||
}); err != nil { | ||
return err | ||
} | ||
|
||
|
@@ -843,6 +885,10 @@ func (p *Platform) resourceAutoscalerCreate( | |
// we don't return the error, this was mostly to provide a helpful warning | ||
log.Info("receieved error while listing pods in attempt to detect existing metrics-server: %s", err) | ||
err = nil | ||
|
||
// The apis return an non-nil but empty value when observing an error, so we need to be sure to | ||
// skip the code below. | ||
metricsPods = nil | ||
} | ||
|
||
if metricsPods != nil && len(metricsPods.Items) == 0 { | ||
|
@@ -929,7 +975,7 @@ func (p *Platform) resourceAutoscalerDestroy( | |
sg terminal.StepGroup, | ||
csinfo *clientsetInfo, | ||
) error { | ||
if p.config.AutoscaleConfig == nil && state.Name == "" { | ||
if state.Name == "" { | ||
// No autoscale config, so don't destroy one | ||
return nil | ||
} | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -724,12 +724,18 @@ func (r *Releaser) Release( | |
ctx context.Context, | ||
log hclog.Logger, | ||
src *component.Source, | ||
job *component.JobInfo, | ||
ui terminal.UI, | ||
target *Deployment, | ||
dcr *component.DeclaredResourcesResp, | ||
) (*Release, error) { | ||
var result Release | ||
result.ServiceName = src.App | ||
|
||
if job.Workspace == "default" { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think we should throw this in the labels too for the label selector. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Not sure what ya mean.... |
||
result.ServiceName = src.App | ||
} else { | ||
result.ServiceName = src.App + "-" + job.Workspace | ||
} | ||
|
||
sg := ui.StepGroup() | ||
defer sg.Wait() | ||
|
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I noticed this change in your demo and I like it! 👍🏻 ✨
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.