Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 32 additions & 0 deletions changelog/fragments/1765396860-improve-upload-performance.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
# Kind can be one of:
# - breaking-change: a change to previously-documented behavior
# - deprecation: functionality that is being removed in a later release
# - bug-fix: fixes a problem in a previous version
# - enhancement: extends functionality but does not break or fix existing behavior
# - feature: new functionality
# - known-issue: problems that we are aware of in a given version
# - security: impacts on the security of a product or a user’s deployment.
# - upgrade: important information for someone upgrading from a prior version
# - other: does not fit into any of the other categories
kind: enhancement

# Change summary; a 80ish characters long description of the change.
summary: improves file upload performance for large files

# Long description; in case the summary is not enough to describe the change
# this field accommodate a description without length limits.
# NOTE: This field will be rendered only for breaking-change and known-issue kinds at the moment.
#description:

# Affected component; a word indicating the component this changeset affects.
component: fleet-server

# PR URL; optional; the PR number that added the changeset.
# If not present is automatically filled by the tooling finding the PR where this changelog fragment has been added.
# NOTE: the tooling supports backports, so it's able to fill the original PR number instead of the backport PR number.
# Please provide it if you are adding a fragment for a different PR.
#pr: https://github.com/owner/repo/1234

# Issue URL; optional; the GitHub issue related to this changeset (either closes or is part of).
# If not present is automatically filled by the tooling with the issue linked to the PR number.
#issue: https://github.com/owner/repo/1234
14 changes: 13 additions & 1 deletion internal/pkg/file/uploader/es.go
Original file line number Diff line number Diff line change
Expand Up @@ -123,14 +123,15 @@ func IndexChunk(ctx context.Context, client *elasticsearch.Client, body *cbor.Ch
span, _ := apm.StartSpan(ctx, "createChunk", "create")
defer span.End()
chunkDocID := fmt.Sprintf("%s.%d", fileID, chunkNum)
// This create doc happens *many* times per file, so it must not use any expensive parameters (e.g. refresh)
// for a 5GB file, that is 1,280 create calls
resp, err := client.Create(fmt.Sprintf(UploadDataIndexPattern, source), chunkDocID, body, func(req *esapi.CreateRequest) {
req.DocumentID = chunkDocID
if req.Header == nil {
req.Header = make(http.Header)
}
req.Header.Set("Content-Type", "application/cbor")
req.Header.Set("Accept", "application/json")
req.Refresh = "true"
})
if err != nil {
return err
Expand Down Expand Up @@ -186,3 +187,14 @@ func DeleteAllChunksForFile(ctx context.Context, bulker bulk.Bulk, source string
_, err = client.DeleteByQuery([]string{fmt.Sprintf(UploadDataIndexPattern, source)}, bytes.NewReader(q), client.DeleteByQuery.WithContext(ctx))
return err
}

func EnsureChunksIndexed(ctx context.Context, client *elasticsearch.Client, source string) error {
req := esapi.IndicesRefreshRequest{
Index: []string{fmt.Sprintf(UploadDataIndexPattern, source)},
}
resp, err := req.Do(ctx, client)
if resp.StatusCode < 200 || resp.StatusCode >= 300 {
zerolog.Ctx(ctx).Warn().Int("status_code", resp.StatusCode).Msg("File Chunk Index refresh gave abnormal response")
}
return err
}
42 changes: 42 additions & 0 deletions internal/pkg/file/uploader/es_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
// or more contributor license agreements. Licensed under the Elastic License;
// you may not use this file except in compliance with the Elastic License.

package uploader

import (
"bytes"
"io"
"net/http"
"strings"
"testing"

"github.com/elastic/fleet-server/v7/internal/pkg/file/cbor"
"github.com/elastic/fleet-server/v7/internal/pkg/testing/esutil"
"github.com/stretchr/testify/assert"
)

func TestIndexChunkDoesNotUseExpensiveParams(t *testing.T) {
c, mockTX := esutil.MockESClient(t)
chunker := cbor.NewChunkWriter(bytes.NewReader([]byte{}), false, "", "", 100)
called := false
mockTX.RoundTripFn = func(req *http.Request) (*http.Response, error) {
if strings.Contains(req.URL.Path, "/_create") {
called = true
refr := req.URL.Query().Get("refresh")
assert.NotEqual(t, "true", refr, "Chunk Index operation must not use expensive refresh parameter")
}
respHeaders := make(http.Header)
respHeaders.Set("X-Elastic-Product", "Elasticsearch")
return &http.Response{
StatusCode: 200,
Body: io.NopCloser(strings.NewReader(`{}`)),
Header: respHeaders,
}, nil
}

err := IndexChunk(t.Context(), c, chunker, "mypkg", "sampleFileID", 0)
assert.NoError(t, err)

assert.True(t, called, "_create API was not called")
}
7 changes: 7 additions & 0 deletions internal/pkg/file/uploader/finalize.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import (
"crypto/sha256"
"encoding/hex"
"errors"
"fmt"
"sort"
"strings"

Expand Down Expand Up @@ -42,6 +43,12 @@ func (u *Uploader) Complete(ctx context.Context, id string, transitHash string)
return info, ErrStatusNoUploads
}

// complete may be called before most recent chunks are available for search yet
// this explicitly calls refresh once at the end, instead of refreshing on each chunk
if err := EnsureChunksIndexed(ctx, u.bulker.Client(), info.Source); err != nil {
return info, fmt.Errorf("unable to refresh chunk data index: %w", err)
}

chunks, err := file.GetChunkInfos(ctx, u.bulker, UploadDataIndexPattern, info.DocID, file.GetChunkInfoOpt{IncludeSize: true, RequireHash: true})
if err != nil {
return info, err
Expand Down
129 changes: 129 additions & 0 deletions internal/pkg/file/uploader/finalize_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
// or more contributor license agreements. Licensed under the Elastic License;
// you may not use this file except in compliance with the Elastic License.

package uploader

import (
"context"
"encoding/json"
"fmt"
"io"
"net/http"
"strings"
"testing"
"time"

"github.com/elastic/fleet-server/v7/internal/pkg/cache"
"github.com/elastic/fleet-server/v7/internal/pkg/config"
"github.com/elastic/fleet-server/v7/internal/pkg/es"
"github.com/elastic/fleet-server/v7/internal/pkg/file"
itesting "github.com/elastic/fleet-server/v7/internal/pkg/testing"
"github.com/elastic/fleet-server/v7/internal/pkg/testing/esutil"

"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/mock"
"github.com/stretchr/testify/require"
)

/*
This test verifies that chunks are made available for search
before the Uploader performs that search and counts/verifies each chunk.
It specifically looks for an elasticsearch call to `../_refresh/` API,
to occur before a chunk-search call. This prevents the index-then-search
data race problem with final chunks *right before* completion API call.

If alternative means are used to address the data-race race condition,
this test may be updated.
*/
func TestUploadCompletePerformsRefreshBeforeChunkSearch(t *testing.T) {

/**
* Setup & Mocking only
* avoid asserts here, since the setup path is involved, and executes
* a lot of code paths. Those paths are not under test here
**/
refreshCalled := false
size := 200
fakeBulk := itesting.NewMockBulk()
fakeIntegrationSrc := "endpoint"
// hash of null chunk, and then transithash OF that hash
nullHash := "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855"
transitHashNull := "5df6e0e2761359d30a8275058e299fcc0381534545f55cf43e41983f5d4c9456"
mockClient, mockTX := esutil.MockESClient(t)

// populate mock info for a *metadata* search, which may occur during finalization
mockMeta, _ := json.Marshal(map[string]any{
"action_id": "actionID",
"agent_id": "agentID",
"src": fakeIntegrationSrc,
"file": map[string]interface{}{
"size": size,
"ChunkSize": file.MaxChunkSize,
"Status": file.StatusProgress,
},
"upload_id": "some-id",
"upload_start": time.Now().UnixMilli(),
})
fakeBulk.On("Search",
mock.MatchedBy(func(_ context.Context) bool { return true }), // match context.Context
".fleet-fileds-fromhost-meta-*", // *metadata* index (NOT CHUNK/DATA!)
mock.Anything, // query bytes
mock.Anything, // bulk opts
).Return(&es.ResultT{
HitsT: es.HitsT{
Hits: []es.HitT{{
ID: "_sampledocID",
Source: mockMeta,
}},
},
}, nil).Maybe() // not under test, calling is not required
fakeBulk.On("Client").Return(mockClient).Maybe() // inject our mock client, if used
mockTX.RoundTripFn = func(req *http.Request) (*http.Response, error) {
if strings.Contains(req.URL.Path, fmt.Sprintf(UploadDataIndexPattern, fakeIntegrationSrc)+"/_refresh") {
refreshCalled = true
}
respHeaders := make(http.Header)
respHeaders.Set("X-Elastic-Product", "Elasticsearch")
return &http.Response{
StatusCode: 200,
Body: io.NopCloser(strings.NewReader(`{}`)),
Header: respHeaders,
}, nil
}

fakeBulk.On("Search",
mock.MatchedBy(func(_ context.Context) bool { return true }), // match context.Context
".fleet-fileds-fromhost-data-*", // *DATA* (chunk) search
mock.Anything, // query bytes
mock.Anything, // bulk opts

).Run(func(args mock.Arguments) {
// runs during execution, before return
assert.True(t, refreshCalled, "Chunk finalization search occurred without refresh")
}).Return(&es.ResultT{
HitsT: es.HitsT{
Hits: []es.HitT{{
ID: "actionID.agentID.0",
Fields: map[string]any{
file.FieldBaseID: []any{"actionID.agentID"},
file.FieldSHA2: []any{nullHash},
"size": []any{size},
file.FieldLast: []any{true},
},
}},
},
}, nil)

c, err := cache.New(config.Cache{NumCounters: 100, MaxCost: 100000})
require.NoError(t, err) // panic-exit if prereq fails, not intentional testing
u := New(nil, fakeBulk, c, size_ptr(size), time.Hour)

/**
* Begin actual execution & assertions
**/
_, err = u.Complete(t.Context(), "actionID", transitHashNull)
assert.NoError(t, err)

assert.True(t, refreshCalled, "_refresh API was not called during file finalization")
}