From 3b195b54a42eab5167f4baad2f02301883adbf26 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sandor=20Sz=C3=BCcs?= Date: Thu, 16 Apr 2026 21:47:22 +0200 Subject: [PATCH 1/2] fix: proxy filter panics did not run applyFiltersOnError MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Example: In the proxy a route with filter chain a() -> b() exists. b() panics and a() implements errorHandlerFilter interface and return true from HandleErrorResponse(). In this case we should run the filter a().Response(), because we need to cleanup things as expected. For example scheduler filters like fifo() or lifo() need to decrement the active concurrent requests counter or we will stop requests in the queue, wich would be miss leading. If we panic in b() and everyone will look at behavior of a(), because we are stuck there caused by not running the cleanup. Signed-off-by: Sandor Szücs --- proxy/proxy.go | 10 +++++++-- proxy/proxy_test.go | 52 +++++++++++++++++++++++++++++++++++++++++---- 2 files changed, 56 insertions(+), 6 deletions(-) diff --git a/proxy/proxy.go b/proxy/proxy.go index df29bd73db..62b881a4ee 100644 --- a/proxy/proxy.go +++ b/proxy/proxy.go @@ -906,7 +906,7 @@ func WithParams(p Params) *Proxy { } // applies filters to a request -func (p *Proxy) applyFiltersToRequest(f []*routing.RouteFilter, ctx *context) []*routing.RouteFilter { +func (p *Proxy) applyFiltersToRequest(f []*routing.RouteFilter, ctx *context, counter *int) []*routing.RouteFilter { if len(f) == 0 { return f } @@ -927,6 +927,7 @@ func (p *Proxy) applyFiltersToRequest(f []*routing.RouteFilter, ctx *context) [] ctx.request = ctx.request.WithContext(labelCtx) fi.Request(ctx) + (*counter)++ ctx.request = ctx.request.WithContext(parentCtx) pprof.SetGoroutineLabels(parentCtx) @@ -1246,6 +1247,7 @@ func stack() []byte { func (p *Proxy) do(ctx *context, parentSpan ot.Span) (err error) { var requestElapsed, responseElapsed time.Duration + processedFiltersCounter := 0 requestStopWatch, responseStopWatch := newStopWatch(), newStopWatch() requestStopWatch.Start() @@ -1259,6 +1261,10 @@ func (p *Proxy) do(ctx *context, parentSpan ot.Span) (err error) { perr := &proxyError{ err: fmt.Errorf("panic caused by: %v", r), } + + if processedFiltersCounter != 0 { + p.applyFiltersOnError(ctx, ctx.route.Filters[:processedFiltersCounter]) + } p.makeErrorResponse(ctx, perr) err = perr } @@ -1302,7 +1308,7 @@ func (p *Proxy) do(ctx *context, parentSpan ot.Span) (err error) { ctx.applyRoute(route, params, p.flags.PreserveHost()) requestStopWatch.Stop() - processedFilters := p.applyFiltersToRequest(ctx.route.Filters, ctx) + processedFilters := p.applyFiltersToRequest(ctx.route.Filters, ctx, &processedFiltersCounter) requestStopWatch.Start() // not every of these branches could end up in a response to the client diff --git a/proxy/proxy_test.go b/proxy/proxy_test.go index 3c2ec1d8d7..da4de7454e 100644 --- a/proxy/proxy_test.go +++ b/proxy/proxy_test.go @@ -30,11 +30,14 @@ import ( "github.com/zalando/skipper/eskip" "github.com/zalando/skipper/filters" "github.com/zalando/skipper/filters/builtin" + fscheduler "github.com/zalando/skipper/filters/scheduler" "github.com/zalando/skipper/loadbalancer" "github.com/zalando/skipper/logging" "github.com/zalando/skipper/logging/loggingtest" + "github.com/zalando/skipper/metrics/metricstest" "github.com/zalando/skipper/routing" "github.com/zalando/skipper/routing/testdataclient" + "github.com/zalando/skipper/scheduler" teePredicate "github.com/zalando/skipper/predicates/tee" ) @@ -1028,13 +1031,25 @@ func TestFilterPanic(t *testing.T) { }) defer s.Close() + metrics := &metricstest.MockMetrics{} + defer metrics.Close() + d := 50 * time.Millisecond + reg := scheduler.RegistryWith(scheduler.Options{ + Metrics: metrics, + EnableRouteFIFOMetrics: true, + MetricsUpdateTimeout: d, + }) + defer reg.Close() + fr := make(filters.Registry) fr.Register(builtin.NewAppendRequestHeader()) fr.Register(builtin.NewAppendResponseHeader()) + fr.Register(fscheduler.NewFifo()) fr.Register(new(nilFilterSpec)) doc := fmt.Sprintf(`test: Path("/foo") -> + fifo(2000,20,"1s") -> appendRequestHeader("X-Expected", "before") -> appendResponseHeader("X-Expected", "before") -> nilFilter() -> @@ -1042,13 +1057,32 @@ func TestFilterPanic(t *testing.T) { appendResponseHeader("X-Expected", "after") -> "%s"`, s.URL) - tp, err := newTestProxyWithFilters(fr, doc, FlagsNone) - require.NoError(t, err) - defer tp.close() + dc, err := testdataclient.NewDoc(doc) + if err != nil { + t.Fatalf("Failed to create testdataclient: %v", err) + } + defer dc.Close() + ro := routing.Options{ + SignalFirstLoad: true, + FilterRegistry: fr, + DataClients: []routing.DataClient{dc}, + PostProcessors: []routing.PostProcessor{reg}, + } + rt := routing.New(ro) + defer rt.Close() + <-rt.FirstLoad() + + pr := WithParams(Params{ + Flags: FlagsNone, + Metrics: metrics, + Routing: rt, + }) + defer pr.Close() r := httptest.NewRequest("GET", "/foo", nil) w := httptest.NewRecorder() - tp.proxy.ServeHTTP(w, r) + now := time.Now() + pr.ServeHTTP(w, r) assert.Equal(t, http.StatusInternalServerError, w.Code) assert.Equal(t, int32(0), backendRequests, "expected no backend request") @@ -1059,6 +1093,16 @@ func TestFilterPanic(t *testing.T) { if err = testLog.WaitFor(msg, 100*time.Millisecond); err != nil { t.Errorf("expected '%s' in logs", msg) } + + // wait for updateMetrics to happen + time.Sleep(50*time.Millisecond - time.Since(now)) + + // metrics should show that we cleaned up the active request + metrics.WithGauges(func(g map[string]float64) { + assert.Equal(t, float64(0), g["fifo.test.active"]) + assert.Equal(t, float64(0), g["fifo.test.queued"]) + }) + } func TestFilterPanicPrintStackRate(t *testing.T) { From fc62faa211df4d9175c8b398bd90554a709184bd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sandor=20Sz=C3=BCcs?= Date: Fri, 17 Apr 2026 18:59:58 +0200 Subject: [PATCH 2/2] fix as commented MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Sandor Szücs --- proxy/proxy_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/proxy/proxy_test.go b/proxy/proxy_test.go index da4de7454e..f44dd3a9ad 100644 --- a/proxy/proxy_test.go +++ b/proxy/proxy_test.go @@ -1095,7 +1095,7 @@ func TestFilterPanic(t *testing.T) { } // wait for updateMetrics to happen - time.Sleep(50*time.Millisecond - time.Since(now)) + time.Sleep(d - time.Since(now)) // metrics should show that we cleaned up the active request metrics.WithGauges(func(g map[string]float64) {