diff --git a/CHANGELOG.md b/CHANGELOG.md index 1a22eb130ab..124de060320 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,8 @@ # Changelog +## 1.17.4-dev +* Bug - Fixed a bug where task stuck in pending due to the cni setup process hangs [#1358](https://github.com/aws/amazon-ecs-agent/pull/1358) + ## 1.17.3 * Enhancement - Distinct startContainerTimeouts for windows/linux, introduce a new environment variable `ECS_CONTAINER_START_TIMEOUT` to make it configurable [#1321](https://github.com/aws/amazon-ecs-agent/pull/1321) * Enhancement - Add support for containers to inhereit ENI private DNS hostnames for `awsvpc` tasks [#1278](https://github.com/aws/amazon-ecs-agent/pull/1278) diff --git a/agent/ecscni/mocks/ecscni_mocks.go b/agent/ecscni/mocks/ecscni_mocks.go index 67ae4530395..3d40cec0165 100644 --- a/agent/ecscni/mocks/ecscni_mocks.go +++ b/agent/ecscni/mocks/ecscni_mocks.go @@ -11,102 +11,89 @@ // express or implied. See the License for the specific language governing // permissions and limitations under the License. -// Code generated by MockGen. DO NOT EDIT. +// Automatically generated by MockGen. DO NOT EDIT! // Source: github.com/aws/amazon-ecs-agent/agent/ecscni (interfaces: CNIClient) -// Package mock_ecscni is a generated GoMock package. package mock_ecscni import ( - reflect "reflect" + context "context" ecscni "github.com/aws/amazon-ecs-agent/agent/ecscni" current "github.com/containernetworking/cni/pkg/types/current" gomock "github.com/golang/mock/gomock" ) -// MockCNIClient is a mock of CNIClient interface +// Mock of CNIClient interface type MockCNIClient struct { ctrl *gomock.Controller - recorder *MockCNIClientMockRecorder + recorder *_MockCNIClientRecorder } -// MockCNIClientMockRecorder is the mock recorder for MockCNIClient -type MockCNIClientMockRecorder struct { +// Recorder for MockCNIClient (not exported) +type _MockCNIClientRecorder struct { mock *MockCNIClient } -// NewMockCNIClient creates a new mock instance func NewMockCNIClient(ctrl *gomock.Controller) *MockCNIClient { mock := &MockCNIClient{ctrl: ctrl} - mock.recorder = &MockCNIClientMockRecorder{mock} + mock.recorder = &_MockCNIClientRecorder{mock} return mock } -// EXPECT returns an object that allows the caller to indicate expected use -func (m *MockCNIClient) EXPECT() *MockCNIClientMockRecorder { - return m.recorder +func (_m *MockCNIClient) EXPECT() *_MockCNIClientRecorder { + return _m.recorder } -// Capabilities mocks base method -func (m *MockCNIClient) Capabilities(arg0 string) ([]string, error) { - ret := m.ctrl.Call(m, "Capabilities", arg0) +func (_m *MockCNIClient) Capabilities(_param0 string) ([]string, error) { + ret := _m.ctrl.Call(_m, "Capabilities", _param0) ret0, _ := ret[0].([]string) ret1, _ := ret[1].(error) return ret0, ret1 } -// Capabilities indicates an expected call of Capabilities -func (mr *MockCNIClientMockRecorder) Capabilities(arg0 interface{}) *gomock.Call { - return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "Capabilities", reflect.TypeOf((*MockCNIClient)(nil).Capabilities), arg0) +func (_mr *_MockCNIClientRecorder) Capabilities(arg0 interface{}) *gomock.Call { + return _mr.mock.ctrl.RecordCall(_mr.mock, "Capabilities", arg0) } -// CleanupNS mocks base method -func (m *MockCNIClient) CleanupNS(arg0 *ecscni.Config) error { - ret := m.ctrl.Call(m, "CleanupNS", arg0) +func (_m *MockCNIClient) CleanupNS(_param0 *ecscni.Config) error { + ret := _m.ctrl.Call(_m, "CleanupNS", _param0) ret0, _ := ret[0].(error) return ret0 } -// CleanupNS indicates an expected call of CleanupNS -func (mr *MockCNIClientMockRecorder) CleanupNS(arg0 interface{}) *gomock.Call { - return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "CleanupNS", reflect.TypeOf((*MockCNIClient)(nil).CleanupNS), arg0) +func (_mr *_MockCNIClientRecorder) CleanupNS(arg0 interface{}) *gomock.Call { + return _mr.mock.ctrl.RecordCall(_mr.mock, "CleanupNS", arg0) } -// ReleaseIPResource mocks base method -func (m *MockCNIClient) ReleaseIPResource(arg0 *ecscni.Config) error { - ret := m.ctrl.Call(m, "ReleaseIPResource", arg0) +func (_m *MockCNIClient) ReleaseIPResource(_param0 *ecscni.Config) error { + ret := _m.ctrl.Call(_m, "ReleaseIPResource", _param0) ret0, _ := ret[0].(error) return ret0 } -// ReleaseIPResource indicates an expected call of ReleaseIPResource -func (mr *MockCNIClientMockRecorder) ReleaseIPResource(arg0 interface{}) *gomock.Call { - return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "ReleaseIPResource", reflect.TypeOf((*MockCNIClient)(nil).ReleaseIPResource), arg0) +func (_mr *_MockCNIClientRecorder) ReleaseIPResource(arg0 interface{}) *gomock.Call { + return _mr.mock.ctrl.RecordCall(_mr.mock, "ReleaseIPResource", arg0) } -// SetupNS mocks base method -func (m *MockCNIClient) SetupNS(arg0 *ecscni.Config) (*current.Result, error) { - ret := m.ctrl.Call(m, "SetupNS", arg0) +func (_m *MockCNIClient) SetupNS(_param0 *ecscni.Config, _param1 context.Context) (*current.Result, error) { + ret := _m.ctrl.Call(_m, "SetupNS", _param0, _param1) ret0, _ := ret[0].(*current.Result) ret1, _ := ret[1].(error) return ret0, ret1 } -// SetupNS indicates an expected call of SetupNS -func (mr *MockCNIClientMockRecorder) SetupNS(arg0 interface{}) *gomock.Call { - return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "SetupNS", reflect.TypeOf((*MockCNIClient)(nil).SetupNS), arg0) +func (_mr *_MockCNIClientRecorder) SetupNS(arg0, arg1 interface{}) *gomock.Call { + return _mr.mock.ctrl.RecordCall(_mr.mock, "SetupNS", arg0, arg1) } -// Version mocks base method -func (m *MockCNIClient) Version(arg0 string) (string, error) { - ret := m.ctrl.Call(m, "Version", arg0) +func (_m *MockCNIClient) Version(_param0 string) (string, error) { + ret := _m.ctrl.Call(_m, "Version", _param0) ret0, _ := ret[0].(string) ret1, _ := ret[1].(error) return ret0, ret1 } -// Version indicates an expected call of Version -func (mr *MockCNIClientMockRecorder) Version(arg0 interface{}) *gomock.Call { - return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "Version", reflect.TypeOf((*MockCNIClient)(nil).Version), arg0) +func (_mr *_MockCNIClientRecorder) Version(arg0 interface{}) *gomock.Call { + return _mr.mock.ctrl.RecordCall(_mr.mock, "Version", arg0) } diff --git a/agent/ecscni/plugin.go b/agent/ecscni/plugin.go index 2d1747ee730..0d1845098d5 100644 --- a/agent/ecscni/plugin.go +++ b/agent/ecscni/plugin.go @@ -14,6 +14,7 @@ package ecscni import ( + "context" "encoding/json" "fmt" "net" @@ -38,7 +39,7 @@ type CNIClient interface { // Capabilities returns the capabilities supported by a plugin Capabilities(string) ([]string, error) // SetupNS sets up the namespace of container - SetupNS(*Config) (*current.Result, error) + SetupNS(*Config, context.Context) (*current.Result, error) // CleanupNS cleans up the container namespace CleanupNS(*Config) error // ReleaseIPResource marks the ip available in the ipam db @@ -69,7 +70,24 @@ func NewClient(cfg *Config) CNIClient { // SetupNS will set up the namespace of container, including create the bridge // and the veth pair, move the eni to container namespace, setup the routes -func (client *cniClient) SetupNS(cfg *Config) (*current.Result, error) { +func (client *cniClient) SetupNS(cfg *Config, ctx context.Context) (*current.Result, error) { + var result *current.Result + var err error + setupDone := make(chan struct{}) + go func() { + result, err = client.setupNS(cfg) + setupDone <- struct{}{} + }() + + select { + case <-ctx.Done(): + return nil, errors.New("cni setup: container namespace setup timed out") + case <-setupDone: + return result, err + } +} + +func (client *cniClient) setupNS(cfg *Config) (*current.Result, error) { runtimeConfig := libcni.RuntimeConf{ ContainerID: cfg.ContainerID, NetNS: fmt.Sprintf(netnsFormat, cfg.ContainerPID), diff --git a/agent/ecscni/plugin_test.go b/agent/ecscni/plugin_test.go index 643ed6b08ab..3b5e8a72ad0 100644 --- a/agent/ecscni/plugin_test.go +++ b/agent/ecscni/plugin_test.go @@ -1,9 +1,12 @@ package ecscni import ( + "context" "encoding/json" "fmt" + "sync" "testing" + "time" "github.com/aws/amazon-ecs-agent/agent/ecscni/mocks_libcni" "github.com/containernetworking/cni/libcni" @@ -43,10 +46,37 @@ func TestSetupNS(t *testing.T) { }), ) - _, err = ecscniClient.SetupNS(&Config{AdditionalLocalRoutes: additionalRoutes}) + _, err = ecscniClient.SetupNS(&Config{AdditionalLocalRoutes: additionalRoutes}, context.TODO()) assert.NoError(t, err) } +func TestSetupNSTimeout(t *testing.T) { + ctrl := gomock.NewController(t) + defer ctrl.Finish() + + var wg sync.WaitGroup + wg.Add(1) + + ecscniClient := NewClient(&Config{}) + libcniClient := mock_libcni.NewMockCNI(ctrl) + ecscniClient.(*cniClient).libcni = libcniClient + + gomock.InOrder( + // ENI plugin was called first + libcniClient.EXPECT().AddNetwork(gomock.Any(), gomock.Any()).Return(¤t.Result{}, nil).Do( + func(net *libcni.NetworkConfig, rt *libcni.RuntimeConf) { + wg.Wait() + }).MaxTimes(1), + libcniClient.EXPECT().AddNetwork(gomock.Any(), gomock.Any()).Return(¤t.Result{}, nil).MaxTimes(1), + ) + ctx, cancel := context.WithTimeout(context.TODO(), 1*time.Millisecond) + defer cancel() + + _, err := ecscniClient.SetupNS(&Config{}, ctx) + assert.Error(t, err) + wg.Done() +} + func TestCleanupNS(t *testing.T) { ctrl := gomock.NewController(t) defer ctrl.Finish() diff --git a/agent/engine/docker_task_engine.go b/agent/engine/docker_task_engine.go index 58a604be9d1..aa4ea020ddb 100644 --- a/agent/engine/docker_task_engine.go +++ b/agent/engine/docker_task_engine.go @@ -61,6 +61,7 @@ const ( labelTaskDefinitionFamily = labelPrefix + "task-definition-family" labelTaskDefinitionVersion = labelPrefix + "task-definition-version" labelCluster = labelPrefix + "cluster" + cniSetupTimeout = 1 * time.Minute ) // DockerTaskEngine is a state machine for managing a task and its containers @@ -917,8 +918,10 @@ func (engine *DockerTaskEngine) provisionContainerResources(task *api.Task, cont }, } } + ctx, cancel := context.WithTimeout(engine.ctx, cniSetupTimeout) + defer cancel() // Invoke the libcni to config the network namespace for the container - result, err := engine.cniClient.SetupNS(cniConfig) + result, err := engine.cniClient.SetupNS(cniConfig, ctx) if err != nil { seelog.Errorf("Task engine [%s]: unable to configure pause container namespace: %v", task.Arn, err) diff --git a/agent/engine/docker_task_engine_test.go b/agent/engine/docker_task_engine_test.go index af4de97b890..c10e909652f 100644 --- a/agent/engine/docker_task_engine_test.go +++ b/agent/engine/docker_task_engine_test.go @@ -371,7 +371,7 @@ func TestTaskWithSteadyStateResourcesProvisioned(t *testing.T) { State: docker.State{Pid: 23}, }, nil), // Then setting up the pause container network namespace - mockCNIClient.EXPECT().SetupNS(gomock.Any()).Return(nsResult, nil), + mockCNIClient.EXPECT().SetupNS(gomock.Any(), gomock.Any()).Return(nsResult, nil), // Once the pause container is started, sleep container will be created client.EXPECT().APIVersion().Return(defaultDockerClientAPIVersion, nil), @@ -1083,7 +1083,7 @@ func TestPauseContainerHappyPath(t *testing.T) { ID: pauseContainerID, State: docker.State{Pid: containerPid}, }, nil), - cniClient.EXPECT().SetupNS(gomock.Any()).Return(nsResult, nil), + cniClient.EXPECT().SetupNS(gomock.Any(), gomock.Any()).Return(nsResult, nil), ) // For the other container