From 2bf7d2d8fbd39e1e3181d18e586090e402ff472a Mon Sep 17 00:00:00 2001 From: Lunny Xiao Date: Sun, 4 Jan 2026 22:55:12 -0800 Subject: [PATCH 01/11] Add object pool interface and refactor related code --- modules/git/blob_nogogit.go | 31 ++---- modules/git/catfile/batch.go | 57 ---------- modules/git/catfile/object_pool.go | 30 ++++++ modules/git/catfile/object_pool_cmd.go | 102 ++++++++++++++++++ .../languagestats/language_stats_nogogit.go | 32 +++--- modules/git/pipeline/lfs_nogogit.go | 49 +++------ modules/git/repo_base_nogogit.go | 16 +-- modules/git/repo_branch_nogogit.go | 20 ++-- modules/git/repo_commit_nogogit.go | 56 ++++------ modules/git/repo_tag_nogogit.go | 26 ++--- modules/git/repo_tree_nogogit.go | 26 ++--- modules/git/tree_entry_nogogit.go | 11 +- modules/git/tree_nogogit.go | 23 ++-- modules/gitrepo/cat_file.go | 4 +- modules/indexer/code/bleve/bleve.go | 20 ++-- .../code/elasticsearch/elasticsearch.go | 22 ++-- 16 files changed, 260 insertions(+), 265 deletions(-) create mode 100644 modules/git/catfile/object_pool.go create mode 100644 modules/git/catfile/object_pool_cmd.go diff --git a/modules/git/blob_nogogit.go b/modules/git/blob_nogogit.go index 88e2be792bce9..ccede82545084 100644 --- a/modules/git/blob_nogogit.go +++ b/modules/git/blob_nogogit.go @@ -26,27 +26,23 @@ type Blob struct { // DataAsync gets a ReadCloser for the contents of a blob without reading it all. // Calling the Close function on the result will discard all unread output. func (b *Blob) DataAsync() (io.ReadCloser, error) { - batch, cancel, err := b.repo.CatFileBatch(b.repo.Ctx) + objectPool, cancel, err := b.repo.CatFileBatch(b.repo.Ctx) if err != nil { return nil, err } - rd := batch.Reader() - _, err = batch.Writer().Write([]byte(b.ID.String() + "\n")) - if err != nil { - cancel() - return nil, err - } - _, _, size, err := ReadBatchLine(rd) + object, err := objectPool.Object(b.repo.Ctx, b.ID.String()) if err != nil { cancel() return nil, err } + + rd := object.Reader b.gotSize = true - b.size = size + b.size = object.Size - if size < 4096 { - bs, err := io.ReadAll(io.LimitReader(rd, size)) + if b.size < 4096 { + bs, err := io.ReadAll(io.LimitReader(rd, b.size)) defer cancel() if err != nil { return nil, err @@ -57,7 +53,7 @@ func (b *Blob) DataAsync() (io.ReadCloser, error) { return &blobReader{ rd: rd, - n: size, + n: b.size, cancel: cancel, }, nil } @@ -68,25 +64,20 @@ func (b *Blob) Size() int64 { return b.size } - batch, cancel, err := b.repo.CatFileBatchCheck(b.repo.Ctx) + objInfoPool, cancel, err := b.repo.CatFileBatchCheck(b.repo.Ctx) if err != nil { log.Debug("error whilst reading size for %s in %s. Error: %v", b.ID.String(), b.repo.Path, err) return 0 } defer cancel() - _, err = batch.Writer().Write([]byte(b.ID.String() + "\n")) - if err != nil { - log.Debug("error whilst reading size for %s in %s. Error: %v", b.ID.String(), b.repo.Path, err) - return 0 - } - _, _, b.size, err = ReadBatchLine(batch.Reader()) + objInfo, err := objInfoPool.ObjectInfo(b.repo.Ctx, b.ID.String()) if err != nil { log.Debug("error whilst reading size for %s in %s. Error: %v", b.ID.String(), b.repo.Path, err) return 0 } + b.size = objInfo.Size b.gotSize = true - return b.size } diff --git a/modules/git/catfile/batch.go b/modules/git/catfile/batch.go index 1facb8946eb06..fd99f3e7af742 100644 --- a/modules/git/catfile/batch.go +++ b/modules/git/catfile/batch.go @@ -21,63 +21,6 @@ type WriteCloserError interface { CloseWithError(err error) error } -type Batch interface { - Writer() WriteCloserError - Reader() *bufio.Reader - Close() -} - -// batch represents an active `git cat-file --batch` or `--batch-check` invocation -// paired with the pipes that feed/read from it. Call Close to release resources. -type batch struct { - cancel context.CancelFunc - reader *bufio.Reader - writer WriteCloserError -} - -// NewBatch creates a new cat-file --batch process for the provided repository path. -// The returned Batch must be closed once the caller has finished with it. -func NewBatch(ctx context.Context, repoPath string) (Batch, error) { - if err := EnsureValidGitRepository(ctx, repoPath); err != nil { - return nil, err - } - - var batch batch - batch.writer, batch.reader, batch.cancel = catFileBatch(ctx, repoPath) - return &batch, nil -} - -// NewBatchCheck creates a cat-file --batch-check process for the provided repository path. -// The returned Batch must be closed once the caller has finished with it. -func NewBatchCheck(ctx context.Context, repoPath string) (Batch, error) { - if err := EnsureValidGitRepository(ctx, repoPath); err != nil { - return nil, err - } - - var check batch - check.writer, check.reader, check.cancel = catFileBatchCheck(ctx, repoPath) - return &check, nil -} - -func (b *batch) Writer() WriteCloserError { - return b.writer -} - -func (b *batch) Reader() *bufio.Reader { - return b.reader -} - -// Close stops the underlying git cat-file process and releases held resources. -func (b *batch) Close() { - if b == nil || b.cancel == nil { - return - } - b.cancel() - b.reader = nil - b.writer = nil - b.cancel = nil -} - // EnsureValidGitRepository runs `git rev-parse` in the repository path to make sure // the directory is a valid git repository. This avoids git cat-file hanging indefinitely // when invoked in invalid paths. diff --git a/modules/git/catfile/object_pool.go b/modules/git/catfile/object_pool.go new file mode 100644 index 0000000000000..b48e27768a959 --- /dev/null +++ b/modules/git/catfile/object_pool.go @@ -0,0 +1,30 @@ +// Copyright 2024 The Gitea Authors. All rights reserved. +// SPDX-License-Identifier: MIT + +package catfile + +import ( + "bufio" + "context" +) + +type ObjectInfo struct { + ID string + Type string + Size int64 +} + +type ObjectInfoPool interface { + ObjectInfo(ctx context.Context, sha string) (*ObjectInfo, error) + Close() +} + +type Object struct { + ObjectInfo + Reader *bufio.Reader +} + +type ObjectPool interface { + Object(ctx context.Context, sha string) (*Object, error) + Close() +} diff --git a/modules/git/catfile/object_pool_cmd.go b/modules/git/catfile/object_pool_cmd.go new file mode 100644 index 0000000000000..9e9750465b983 --- /dev/null +++ b/modules/git/catfile/object_pool_cmd.go @@ -0,0 +1,102 @@ +// Copyright 2025 The Gitea Authors. All rights reserved. +// SPDX-License-Identifier: MIT + +package catfile + +import ( + "bufio" + "context" +) + +// batchCheck represents an active `git cat-file --batch-check` invocation +// paired with the pipes that feed/read from it. Call Close to release resources. +type batchCheck struct { + cancel context.CancelFunc + reader *bufio.Reader + writer WriteCloserError +} + +// NewBatchCheck creates a cat-file --batch-check process for the provided repository path. +// The returned Batch must be closed once the caller has finished with it. +func NewObjectInfoPool(ctx context.Context, repoPath string) (ObjectInfoPool, error) { + if err := EnsureValidGitRepository(ctx, repoPath); err != nil { + return nil, err + } + + var check batchCheck + check.writer, check.reader, check.cancel = catFileBatchCheck(ctx, repoPath) + return &check, nil +} + +func (b *batchCheck) ObjectInfo(ctx context.Context, refName string) (*ObjectInfo, error) { + _, err := b.writer.Write([]byte(refName + "\n")) + if err != nil { + return nil, err + } + + var objInfo ObjectInfo + var oid []byte + oid, objInfo.Type, objInfo.Size, err = ReadBatchLine(b.reader) + if err != nil { + return nil, err + } + objInfo.ID = string(oid) + return &objInfo, nil +} + +// Close stops the underlying git cat-file process and releases held resources. +func (b *batchCheck) Close() { + if b.cancel != nil { + b.cancel() + } + if b.writer != nil { + _ = b.writer.Close() + } +} + +// batch represents an active `git cat-file --batch` invocation +// paired with the pipes that feed/read from it. Call Close to release resources. +type batch struct { + cancel context.CancelFunc + reader *bufio.Reader + writer WriteCloserError +} + +// NewBatch creates a new cat-file --batch process for the provided repository path. +// The returned Batch must be closed once the caller has finished with it. +func NewObjectPool(ctx context.Context, repoPath string) (ObjectPool, error) { + if err := EnsureValidGitRepository(ctx, repoPath); err != nil { + return nil, err + } + + var batch batch + batch.writer, batch.reader, batch.cancel = catFileBatch(ctx, repoPath) + return &batch, nil +} + +func (b *batch) Object(ctx context.Context, refName string) (*Object, error) { + _, err := b.writer.Write([]byte(refName + "\n")) + if err != nil { + return nil, err + } + + var obj Object + var oid []byte + oid, obj.Type, obj.Size, err = ReadBatchLine(b.reader) + if err != nil { + return nil, err + } + obj.ID = string(oid) + obj.Reader = b.reader + + return &obj, nil +} + +func (b *batch) Close() { + if b.cancel != nil { + b.cancel() + } + if b.writer != nil { + _ = b.writer.Close() + } +} diff --git a/modules/git/languagestats/language_stats_nogogit.go b/modules/git/languagestats/language_stats_nogogit.go index da291ae8481d7..3597a0960d1ca 100644 --- a/modules/git/languagestats/language_stats_nogogit.go +++ b/modules/git/languagestats/language_stats_nogogit.go @@ -22,34 +22,30 @@ import ( func GetLanguageStats(repo *git.Repository, commitID string) (map[string]int64, error) { // We will feed the commit IDs in order into cat-file --batch, followed by blobs as necessary. // so let's create a batch stdin and stdout - batch, cancel, err := repo.CatFileBatch(repo.Ctx) + objectPool, cancel, err := repo.CatFileBatch(repo.Ctx) if err != nil { return nil, err } defer cancel() - writeID := func(id string) error { - _, err := batch.Writer().Write([]byte(id + "\n")) - return err - } - - if err := writeID(commitID); err != nil { + object, err := objectPool.Object(repo.Ctx, commitID) + if err != nil { return nil, err } - batchReader := batch.Reader() - shaBytes, typ, size, err := git.ReadBatchLine(batchReader) - if typ != "commit" { + if object.Type != "commit" { log.Debug("Unable to get commit for: %s. Err: %v", commitID, err) return nil, git.ErrNotExist{ID: commitID} } - sha, err := git.NewIDFromString(string(shaBytes)) + batchReader := object.Reader + + sha, err := git.NewIDFromString(object.ID) if err != nil { log.Debug("Unable to get commit for: %s. Err: %v", commitID, err) return nil, git.ErrNotExist{ID: commitID} } - commit, err := git.CommitFromReader(repo, sha, io.LimitReader(batchReader, size)) + commit, err := git.CommitFromReader(repo, sha, io.LimitReader(batchReader, object.Size)) if err != nil { log.Debug("Unable to get commit for: %s. Err: %v", commitID, err) return nil, err @@ -145,20 +141,18 @@ func GetLanguageStats(repo *git.Repository, commitID string) (map[string]int64, // If content can not be read or file is too big just do detection by filename if f.Size() <= bigFileSize { - if err := writeID(f.ID.String()); err != nil { - return nil, err - } - _, _, size, err := git.ReadBatchLine(batchReader) + object, err := objectPool.Object(repo.Ctx, f.ID.String()) if err != nil { log.Debug("Error reading blob: %s Err: %v", f.ID.String(), err) return nil, err } + batchReader := object.Reader - sizeToRead := size + sizeToRead := object.Size discard := int64(1) - if size > fileSizeLimit { + if object.Size > fileSizeLimit { sizeToRead = fileSizeLimit - discard = size - fileSizeLimit + 1 + discard = object.Size - fileSizeLimit + 1 } _, err = contentBuf.ReadFrom(io.LimitReader(batchReader, sizeToRead)) diff --git a/modules/git/pipeline/lfs_nogogit.go b/modules/git/pipeline/lfs_nogogit.go index 6f1a860c1dba7..51f0cf26407d2 100644 --- a/modules/git/pipeline/lfs_nogogit.go +++ b/modules/git/pipeline/lfs_nogogit.go @@ -47,15 +47,12 @@ func FindLFSFile(repo *git.Repository, objectID git.ObjectID) ([]*LFSResult, err // Next feed the commits in order into cat-file --batch, followed by their trees and sub trees as necessary. // so let's create a batch stdin and stdout - batch, cancel, err := repo.CatFileBatch(repo.Ctx) + objectPool, cancel, err := repo.CatFileBatch(repo.Ctx) if err != nil { return nil, err } defer cancel() - batchStdinWriter := batch.Writer() - batchReader := batch.Reader() - // We'll use a scanner for the revList because it's simpler than a bufio.Reader scan := bufio.NewScanner(revListReader) trees := [][]byte{} @@ -67,43 +64,32 @@ func FindLFSFile(repo *git.Repository, objectID git.ObjectID) ([]*LFSResult, err for scan.Scan() { // Get the next commit ID - commitID := scan.Bytes() - - // push the commit to the cat-file --batch process - _, err := batchStdinWriter.Write(commitID) - if err != nil { - return nil, err - } - _, err = batchStdinWriter.Write([]byte{'\n'}) - if err != nil { - return nil, err - } + commitID := scan.Text() var curCommit *git.Commit curPath := "" commitReadingLoop: for { - _, typ, size, err := git.ReadBatchLine(batchReader) + object, err := objectPool.Object(repo.Ctx, commitID) if err != nil { return nil, err } - switch typ { + batchReader := object.Reader + + switch object.Type { case "tag": // This shouldn't happen but if it does well just get the commit and try again - id, err := git.ReadTagObjectID(batchReader, size) - if err != nil { - return nil, err - } - _, err = batchStdinWriter.Write([]byte(id + "\n")) + id, err := git.ReadTagObjectID(batchReader, object.Size) if err != nil { return nil, err } + commitID = id continue case "commit": // Read in the commit to get its tree and in case this is one of the last used commits - curCommit, err = git.CommitFromReader(repo, git.MustIDFromString(string(commitID)), io.LimitReader(batchReader, size)) + curCommit, err = git.CommitFromReader(repo, git.MustIDFromString(commitID), io.LimitReader(batchReader, object.Size)) if err != nil { return nil, err } @@ -111,13 +97,11 @@ func FindLFSFile(repo *git.Repository, objectID git.ObjectID) ([]*LFSResult, err return nil, err } - if _, err := batchStdinWriter.Write([]byte(curCommit.Tree.ID.String() + "\n")); err != nil { - return nil, err - } + commitID = curCommit.Tree.ID.String() curPath = "" case "tree": var n int64 - for n < size { + for n < object.Size { mode, fname, binObjectID, count, err := git.ParseCatFileTreeLine(objectID.Type(), batchReader, modeBuf, fnameBuf, workingShaBuf) if err != nil { return nil, err @@ -143,14 +127,7 @@ func FindLFSFile(repo *git.Repository, objectID git.ObjectID) ([]*LFSResult, err return nil, err } if len(trees) > 0 { - _, err := batchStdinWriter.Write(trees[len(trees)-1]) - if err != nil { - return nil, err - } - _, err = batchStdinWriter.Write([]byte("\n")) - if err != nil { - return nil, err - } + commitID = string(trees[len(trees)-1]) curPath = paths[len(paths)-1] trees = trees[:len(trees)-1] paths = paths[:len(paths)-1] @@ -158,7 +135,7 @@ func FindLFSFile(repo *git.Repository, objectID git.ObjectID) ([]*LFSResult, err break commitReadingLoop } default: - if err := git.DiscardFull(batchReader, size+1); err != nil { + if err := git.DiscardFull(batchReader, object.Size+1); err != nil { return nil, err } } diff --git a/modules/git/repo_base_nogogit.go b/modules/git/repo_base_nogogit.go index 97a43b90fd3a2..fc06b7e7afa00 100644 --- a/modules/git/repo_base_nogogit.go +++ b/modules/git/repo_base_nogogit.go @@ -24,10 +24,10 @@ type Repository struct { tagCache *ObjectCache[*Tag] batchInUse bool - batch catfile.Batch + batch catfile.ObjectPool checkInUse bool - check catfile.Batch + check catfile.ObjectInfoPool Ctx context.Context LastCommitCache *LastCommitCache @@ -57,10 +57,10 @@ func OpenRepository(ctx context.Context, repoPath string) (*Repository, error) { } // CatFileBatch obtains a CatFileBatch for this repository -func (repo *Repository) CatFileBatch(ctx context.Context) (catfile.Batch, func(), error) { +func (repo *Repository) CatFileBatch(ctx context.Context) (catfile.ObjectPool, func(), error) { if repo.batch == nil { var err error - repo.batch, err = catfile.NewBatch(ctx, repo.Path) + repo.batch, err = catfile.NewObjectPool(ctx, repo.Path) if err != nil { return nil, nil, err } @@ -74,7 +74,7 @@ func (repo *Repository) CatFileBatch(ctx context.Context) (catfile.Batch, func() } log.Debug("Opening temporary cat file batch for: %s", repo.Path) - tempBatch, err := catfile.NewBatch(ctx, repo.Path) + tempBatch, err := catfile.NewObjectPool(ctx, repo.Path) if err != nil { return nil, nil, err } @@ -82,10 +82,10 @@ func (repo *Repository) CatFileBatch(ctx context.Context) (catfile.Batch, func() } // CatFileBatchCheck obtains a CatFileBatchCheck for this repository -func (repo *Repository) CatFileBatchCheck(ctx context.Context) (catfile.Batch, func(), error) { +func (repo *Repository) CatFileBatchCheck(ctx context.Context) (catfile.ObjectInfoPool, func(), error) { if repo.check == nil { var err error - repo.check, err = catfile.NewBatchCheck(ctx, repo.Path) + repo.check, err = catfile.NewObjectInfoPool(ctx, repo.Path) if err != nil { return nil, nil, err } @@ -99,7 +99,7 @@ func (repo *Repository) CatFileBatchCheck(ctx context.Context) (catfile.Batch, f } log.Debug("Opening temporary cat file batch-check for: %s", repo.Path) - tempBatchCheck, err := catfile.NewBatchCheck(ctx, repo.Path) + tempBatchCheck, err := catfile.NewObjectInfoPool(ctx, repo.Path) if err != nil { return nil, nil, err } diff --git a/modules/git/repo_branch_nogogit.go b/modules/git/repo_branch_nogogit.go index 09873fb2c626d..1305edd60a1e4 100644 --- a/modules/git/repo_branch_nogogit.go +++ b/modules/git/repo_branch_nogogit.go @@ -8,7 +8,6 @@ package git import ( "bufio" - "bytes" "context" "io" "strings" @@ -23,19 +22,18 @@ func (repo *Repository) IsObjectExist(name string) bool { return false } - batch, cancel, err := repo.CatFileBatchCheck(repo.Ctx) + objInfoPool, cancel, err := repo.CatFileBatchCheck(repo.Ctx) if err != nil { log.Debug("Error writing to CatFileBatchCheck %v", err) return false } defer cancel() - _, err = batch.Writer().Write([]byte(name + "\n")) + objInfo, err := objInfoPool.ObjectInfo(repo.Ctx, name) if err != nil { - log.Debug("Error writing to CatFileBatchCheck %v", err) + log.Debug("Error writing to ObjectInfo %v", err) return false } - sha, _, _, err := ReadBatchLine(batch.Reader()) - return err == nil && bytes.HasPrefix(sha, []byte(strings.TrimSpace(name))) + return strings.HasPrefix(objInfo.ID, strings.TrimSpace(name)) } // IsReferenceExist returns true if given reference exists in the repository. @@ -44,18 +42,14 @@ func (repo *Repository) IsReferenceExist(name string) bool { return false } - batch, cancel, err := repo.CatFileBatchCheck(repo.Ctx) + objInfoPool, cancel, err := repo.CatFileBatchCheck(repo.Ctx) if err != nil { log.Debug("Error writing to CatFileBatchCheck %v", err) return false } defer cancel() - _, err = batch.Writer().Write([]byte(name + "\n")) - if err != nil { - log.Debug("Error writing to CatFileBatchCheck %v", err) - return false - } - _, _, _, err = ReadBatchLine(batch.Reader()) + + _, err = objInfoPool.ObjectInfo(repo.Ctx, name) return err == nil } diff --git a/modules/git/repo_commit_nogogit.go b/modules/git/repo_commit_nogogit.go index a3d728eb6d05b..02f35a9895e11 100644 --- a/modules/git/repo_commit_nogogit.go +++ b/modules/git/repo_commit_nogogit.go @@ -37,21 +37,20 @@ func (repo *Repository) ResolveReference(name string) (string, error) { // GetRefCommitID returns the last commit ID string of given reference (branch or tag). func (repo *Repository) GetRefCommitID(name string) (string, error) { - batch, cancel, err := repo.CatFileBatchCheck(repo.Ctx) + objInfoPool, cancel, err := repo.CatFileBatchCheck(repo.Ctx) if err != nil { return "", err } defer cancel() - _, err = batch.Writer().Write([]byte(name + "\n")) + + objInfo, err := objInfoPool.ObjectInfo(repo.Ctx, name) if err != nil { + if IsErrNotExist(err) { + return "", ErrNotExist{name, ""} + } return "", err } - shaBs, _, _, err := ReadBatchLine(batch.Reader()) - if IsErrNotExist(err) { - return "", ErrNotExist{name, ""} - } - - return string(shaBs), nil + return objInfo.ID, nil } // IsCommitExist returns true if given commit exists in current repository. @@ -68,20 +67,17 @@ func (repo *Repository) IsCommitExist(name string) bool { } func (repo *Repository) getCommit(id ObjectID) (*Commit, error) { - batch, cancel, err := repo.CatFileBatch(repo.Ctx) + objectPool, cancel, err := repo.CatFileBatch(repo.Ctx) if err != nil { return nil, err } defer cancel() - _, _ = batch.Writer().Write([]byte(id.String() + "\n")) - - return repo.getCommitFromBatchReader(batch, id) + return repo.getCommitFromBatchReader(objectPool, id) } -func (repo *Repository) getCommitFromBatchReader(batch catfile.Batch, id ObjectID) (*Commit, error) { - rd := batch.Reader() - _, typ, size, err := ReadBatchLine(rd) +func (repo *Repository) getCommitFromBatchReader(objectPool catfile.ObjectPool, id ObjectID) (*Commit, error) { + object, err := objectPool.Object(repo.Ctx, id.String()) if err != nil { if errors.Is(err, io.EOF) || IsErrNotExist(err) { return nil, ErrNotExist{ID: id.String()} @@ -89,13 +85,15 @@ func (repo *Repository) getCommitFromBatchReader(batch catfile.Batch, id ObjectI return nil, err } - switch typ { + rd := object.Reader + + switch object.Type { case "missing": return nil, ErrNotExist{ID: id.String()} case "tag": // then we need to parse the tag // and load the commit - data, err := io.ReadAll(io.LimitReader(rd, size)) + data, err := io.ReadAll(io.LimitReader(rd, object.Size)) if err != nil { return nil, err } @@ -108,18 +106,14 @@ func (repo *Repository) getCommitFromBatchReader(batch catfile.Batch, id ObjectI return nil, err } - if _, err := batch.Writer().Write([]byte(tag.Object.String() + "\n")); err != nil { - return nil, err - } - - commit, err := repo.getCommitFromBatchReader(batch, tag.Object) + commit, err := repo.getCommitFromBatchReader(objectPool, tag.Object) if err != nil { return nil, err } return commit, nil case "commit": - commit, err := CommitFromReader(repo, id, io.LimitReader(rd, size)) + commit, err := CommitFromReader(repo, id, io.LimitReader(rd, object.Size)) if err != nil { return nil, err } @@ -130,8 +124,8 @@ func (repo *Repository) getCommitFromBatchReader(batch catfile.Batch, id ObjectI return commit, nil default: - log.Debug("Unknown typ: %s", typ) - if err := DiscardFull(rd, size+1); err != nil { + log.Debug("Unknown typ: %s", object.Type) + if err := DiscardFull(rd, object.Size+1); err != nil { return nil, err } return nil, ErrNotExist{ @@ -153,22 +147,18 @@ func (repo *Repository) ConvertToGitID(commitID string) (ObjectID, error) { } } - batch, cancel, err := repo.CatFileBatchCheck(repo.Ctx) + objInfoPool, cancel, err := repo.CatFileBatchCheck(repo.Ctx) if err != nil { return nil, err } defer cancel() - _, err = batch.Writer().Write([]byte(commitID + "\n")) - if err != nil { - return nil, err - } - sha, _, _, err := ReadBatchLine(batch.Reader()) + + objInfo, err := objInfoPool.ObjectInfo(repo.Ctx, commitID) if err != nil { if IsErrNotExist(err) { return nil, ErrNotExist{commitID, ""} } return nil, err } - - return MustIDFromString(string(sha)), nil + return MustIDFromString(objInfo.ID), nil } diff --git a/modules/git/repo_tag_nogogit.go b/modules/git/repo_tag_nogogit.go index 88d9edcbd88b5..6d7ce5c385045 100644 --- a/modules/git/repo_tag_nogogit.go +++ b/modules/git/repo_tag_nogogit.go @@ -24,23 +24,19 @@ func (repo *Repository) IsTagExist(name string) bool { // GetTagType gets the type of the tag, either commit (simple) or tag (annotated) func (repo *Repository) GetTagType(id ObjectID) (string, error) { - batch, cancel, err := repo.CatFileBatchCheck(repo.Ctx) + objInfoPool, cancel, err := repo.CatFileBatchCheck(repo.Ctx) if err != nil { return "", err } defer cancel() - _, err = batch.Writer().Write([]byte(id.String() + "\n")) - if err != nil { - return "", err - } - _, typ, _, err := ReadBatchLine(batch.Reader()) + objInfo, err := objInfoPool.ObjectInfo(repo.Ctx, id.String()) if err != nil { if IsErrNotExist(err) { return "", ErrNotExist{ID: id.String()} } return "", err } - return typ, nil + return objInfo.Type, nil } func (repo *Repository) getTag(tagID ObjectID, name string) (*Tag, error) { @@ -88,25 +84,23 @@ func (repo *Repository) getTag(tagID ObjectID, name string) (*Tag, error) { } // The tag is an annotated tag with a message. - batch, cancel, err := repo.CatFileBatch(repo.Ctx) + objectPool, cancel, err := repo.CatFileBatch(repo.Ctx) if err != nil { return nil, err } defer cancel() - rd := batch.Reader() - if _, err := batch.Writer().Write([]byte(tagID.String() + "\n")); err != nil { - return nil, err - } - _, typ, size, err := ReadBatchLine(rd) + object, err := objectPool.Object(repo.Ctx, tagID.String()) if err != nil { if errors.Is(err, io.EOF) || IsErrNotExist(err) { return nil, ErrNotExist{ID: tagID.String()} } return nil, err } - if typ != "tag" { - if err := DiscardFull(rd, size+1); err != nil { + + rd := object.Reader + if object.Type != "tag" { + if err := DiscardFull(rd, object.Size+1); err != nil { return nil, err } return nil, ErrNotExist{ID: tagID.String()} @@ -114,7 +108,7 @@ func (repo *Repository) getTag(tagID ObjectID, name string) (*Tag, error) { // then we need to parse the tag // and load the commit - data, err := io.ReadAll(io.LimitReader(rd, size)) + data, err := io.ReadAll(io.LimitReader(rd, object.Size)) if err != nil { return nil, err } diff --git a/modules/git/repo_tree_nogogit.go b/modules/git/repo_tree_nogogit.go index e6e2ee9fa0655..5a0b93bf7cb27 100644 --- a/modules/git/repo_tree_nogogit.go +++ b/modules/git/repo_tree_nogogit.go @@ -10,26 +10,23 @@ import ( ) func (repo *Repository) getTree(id ObjectID) (*Tree, error) { - batch, cancel, err := repo.CatFileBatch(repo.Ctx) + objectPool, cancel, err := repo.CatFileBatch(repo.Ctx) if err != nil { return nil, err } defer cancel() - wr := batch.Writer() - rd := batch.Reader() - _, _ = wr.Write([]byte(id.String() + "\n")) - - // ignore the SHA - _, typ, size, err := ReadBatchLine(rd) + object, err := objectPool.Object(repo.Ctx, id.String()) if err != nil { return nil, err } - switch typ { + rd := object.Reader + + switch object.Type { case "tag": resolvedID := id - data, err := io.ReadAll(io.LimitReader(rd, size)) + data, err := io.ReadAll(io.LimitReader(rd, object.Size)) if err != nil { return nil, err } @@ -38,17 +35,14 @@ func (repo *Repository) getTree(id ObjectID) (*Tree, error) { return nil, err } - if _, err := wr.Write([]byte(tag.Object.String() + "\n")); err != nil { - return nil, err - } - commit, err := repo.getCommitFromBatchReader(batch, tag.Object) + commit, err := repo.getCommitFromBatchReader(objectPool, tag.Object) if err != nil { return nil, err } commit.Tree.ResolvedID = resolvedID return &commit.Tree, nil case "commit": - commit, err := CommitFromReader(repo, id, io.LimitReader(rd, size)) + commit, err := CommitFromReader(repo, id, io.LimitReader(rd, object.Size)) if err != nil { return nil, err } @@ -64,14 +58,14 @@ func (repo *Repository) getTree(id ObjectID) (*Tree, error) { if err != nil { return nil, err } - tree.entries, err = catBatchParseTreeEntries(objectFormat, tree, rd, size) + tree.entries, err = catBatchParseTreeEntries(objectFormat, tree, rd, object.Size) if err != nil { return nil, err } tree.entriesParsed = true return tree, nil default: - if err := DiscardFull(rd, size+1); err != nil { + if err := DiscardFull(rd, object.Size+1); err != nil { return nil, err } return nil, ErrNotExist{ diff --git a/modules/git/tree_entry_nogogit.go b/modules/git/tree_entry_nogogit.go index 0ea7aeed9d44c..90fea1dbd9c55 100644 --- a/modules/git/tree_entry_nogogit.go +++ b/modules/git/tree_entry_nogogit.go @@ -15,23 +15,18 @@ func (te *TreeEntry) Size() int64 { return te.size } - batch, cancel, err := te.ptree.repo.CatFileBatchCheck(te.ptree.repo.Ctx) + objInfoPool, cancel, err := te.ptree.repo.CatFileBatchCheck(te.ptree.repo.Ctx) if err != nil { log.Debug("error whilst reading size for %s in %s. Error: %v", te.ID.String(), te.ptree.repo.Path, err) return 0 } defer cancel() - _, err = batch.Writer().Write([]byte(te.ID.String() + "\n")) + objInfo, err := objInfoPool.ObjectInfo(te.ptree.repo.Ctx, te.ID.String()) if err != nil { log.Debug("error whilst reading size for %s in %s. Error: %v", te.ID.String(), te.ptree.repo.Path, err) return 0 } - _, _, te.size, err = ReadBatchLine(batch.Reader()) - if err != nil { - log.Debug("error whilst reading size for %s in %s. Error: %v", te.ID.String(), te.ptree.repo.Path, err) - return 0 - } - + te.size = objInfo.Size te.sized = true return te.size } diff --git a/modules/git/tree_nogogit.go b/modules/git/tree_nogogit.go index b8561dd3523e5..94e6d0357a275 100644 --- a/modules/git/tree_nogogit.go +++ b/modules/git/tree_nogogit.go @@ -27,32 +27,31 @@ func (t *Tree) ListEntries() (Entries, error) { } if t.repo != nil { - batch, cancel, err := t.repo.CatFileBatch(t.repo.Ctx) + objectPool, cancel, err := t.repo.CatFileBatch(t.repo.Ctx) if err != nil { return nil, err } defer cancel() - wr := batch.Writer() - rd := batch.Reader() - _, _ = wr.Write([]byte(t.ID.String() + "\n")) - _, typ, sz, err := ReadBatchLine(rd) + object, err := objectPool.Object(t.repo.Ctx, t.ID.String()) if err != nil { return nil, err } - if typ == "commit" { - treeID, err := ReadTreeID(rd, sz) + + rd := object.Reader + + if object.Type == "commit" { + treeID, err := ReadTreeID(rd, object.Size) if err != nil && err != io.EOF { return nil, err } - _, _ = wr.Write([]byte(treeID + "\n")) - _, typ, sz, err = ReadBatchLine(rd) + object, err = objectPool.Object(t.repo.Ctx, treeID) if err != nil { return nil, err } } - if typ == "tree" { - t.entries, err = catBatchParseTreeEntries(t.ID.Type(), t, rd, sz) + if object.Type == "tree" { + t.entries, err = catBatchParseTreeEntries(t.ID.Type(), t, rd, object.Size) if err != nil { return nil, err } @@ -61,7 +60,7 @@ func (t *Tree) ListEntries() (Entries, error) { } // Not a tree just use ls-tree instead - if err := DiscardFull(rd, sz+1); err != nil { + if err := DiscardFull(rd, object.Size+1); err != nil { return nil, err } } diff --git a/modules/gitrepo/cat_file.go b/modules/gitrepo/cat_file.go index 0e5fc9951c3ba..d02421d9b780a 100644 --- a/modules/gitrepo/cat_file.go +++ b/modules/gitrepo/cat_file.go @@ -9,6 +9,6 @@ import ( "code.gitea.io/gitea/modules/git/catfile" ) -func NewBatch(ctx context.Context, repo Repository) (catfile.Batch, error) { - return catfile.NewBatch(ctx, repoPath(repo)) +func NewObjectPool(ctx context.Context, repo Repository) (catfile.ObjectPool, error) { + return catfile.NewObjectPool(ctx, repoPath(repo)) } diff --git a/modules/indexer/code/bleve/bleve.go b/modules/indexer/code/bleve/bleve.go index a3727bd0cbd99..48b7c632db6bd 100644 --- a/modules/indexer/code/bleve/bleve.go +++ b/modules/indexer/code/bleve/bleve.go @@ -14,7 +14,6 @@ import ( repo_model "code.gitea.io/gitea/models/repo" "code.gitea.io/gitea/modules/analyze" "code.gitea.io/gitea/modules/charset" - "code.gitea.io/gitea/modules/git" "code.gitea.io/gitea/modules/git/catfile" "code.gitea.io/gitea/modules/git/gitcmd" "code.gitea.io/gitea/modules/gitrepo" @@ -151,7 +150,7 @@ func NewIndexer(indexDir string) *Indexer { } } -func (b *Indexer) addUpdate(ctx context.Context, catfileBatch catfile.Batch, commitSha string, +func (b *Indexer) addUpdate(ctx context.Context, objectPool catfile.ObjectPool, commitSha string, update internal.FileUpdate, repo *repo_model.Repository, batch *inner_bleve.FlushingBatch, ) error { // Ignore vendored files in code search @@ -177,16 +176,13 @@ func (b *Indexer) addUpdate(ctx context.Context, catfileBatch catfile.Batch, com return b.addDelete(update.Filename, repo, batch) } - if _, err := catfileBatch.Writer().Write([]byte(update.BlobSha + "\n")); err != nil { - return err - } - - batchReader := catfileBatch.Reader() - _, _, size, err = git.ReadBatchLine(batchReader) + object, err := objectPool.Object(ctx, update.BlobSha) if err != nil { return err } + batchReader := object.Reader + fileContents, err := io.ReadAll(io.LimitReader(batchReader, size)) if err != nil { return err @@ -219,18 +215,18 @@ func (b *Indexer) addDelete(filename string, repo *repo_model.Repository, batch func (b *Indexer) Index(ctx context.Context, repo *repo_model.Repository, sha string, changes *internal.RepoChanges) error { batch := inner_bleve.NewFlushingBatch(b.inner.Indexer, maxBatchSize) if len(changes.Updates) > 0 { - catfileBatch, err := gitrepo.NewBatch(ctx, repo) + objectPool, err := gitrepo.NewObjectPool(ctx, repo) if err != nil { return err } - defer catfileBatch.Close() + defer objectPool.Close() for _, update := range changes.Updates { - if err := b.addUpdate(ctx, catfileBatch, sha, update, repo, batch); err != nil { + if err := b.addUpdate(ctx, objectPool, sha, update, repo, batch); err != nil { return err } } - catfileBatch.Close() + objectPool.Close() } for _, filename := range changes.RemovedFilenames { if err := b.addDelete(filename, repo, batch); err != nil { diff --git a/modules/indexer/code/elasticsearch/elasticsearch.go b/modules/indexer/code/elasticsearch/elasticsearch.go index 653df0bd1102d..2a48d4f6568cc 100644 --- a/modules/indexer/code/elasticsearch/elasticsearch.go +++ b/modules/indexer/code/elasticsearch/elasticsearch.go @@ -13,7 +13,6 @@ import ( repo_model "code.gitea.io/gitea/models/repo" "code.gitea.io/gitea/modules/analyze" "code.gitea.io/gitea/modules/charset" - "code.gitea.io/gitea/modules/git" "code.gitea.io/gitea/modules/git/catfile" "code.gitea.io/gitea/modules/git/gitcmd" "code.gitea.io/gitea/modules/gitrepo" @@ -139,7 +138,7 @@ const ( }` ) -func (b *Indexer) addUpdate(ctx context.Context, batch catfile.Batch, sha string, update internal.FileUpdate, repo *repo_model.Repository) ([]elastic.BulkableRequest, error) { +func (b *Indexer) addUpdate(ctx context.Context, objectPool catfile.ObjectPool, sha string, update internal.FileUpdate, repo *repo_model.Repository) ([]elastic.BulkableRequest, error) { // Ignore vendored files in code search if setting.Indexer.ExcludeVendored && analyze.IsVendor(update.Filename) { return nil, nil @@ -162,15 +161,13 @@ func (b *Indexer) addUpdate(ctx context.Context, batch catfile.Batch, sha string return []elastic.BulkableRequest{b.addDelete(update.Filename, repo)}, nil } - if _, err := batch.Writer().Write([]byte(update.BlobSha + "\n")); err != nil { - return nil, err - } - - batchReader := batch.Reader() - _, _, size, err = git.ReadBatchLine(batchReader) + object, err := objectPool.Object(ctx, update.BlobSha) if err != nil { return nil, err } + size = object.Size + + batchReader := object.Reader fileContents, err := io.ReadAll(io.LimitReader(batchReader, size)) if err != nil { @@ -211,14 +208,13 @@ func (b *Indexer) addDelete(filename string, repo *repo_model.Repository) elasti func (b *Indexer) Index(ctx context.Context, repo *repo_model.Repository, sha string, changes *internal.RepoChanges) error { reqs := make([]elastic.BulkableRequest, 0) if len(changes.Updates) > 0 { - batch, err := gitrepo.NewBatch(ctx, repo) + objectPool, err := gitrepo.NewObjectPool(ctx, repo) if err != nil { return err } - defer batch.Close() - + defer objectPool.Close() for _, update := range changes.Updates { - updateReqs, err := b.addUpdate(ctx, batch, sha, update, repo) + updateReqs, err := b.addUpdate(ctx, objectPool, sha, update, repo) if err != nil { return err } @@ -226,7 +222,7 @@ func (b *Indexer) Index(ctx context.Context, repo *repo_model.Repository, sha st reqs = append(reqs, updateReqs...) } } - batch.Close() + objectPool.Close() } for _, filename := range changes.RemovedFilenames { From dac0492b8eab3540b6e2872a4bab76619839293f Mon Sep 17 00:00:00 2001 From: Lunny Xiao Date: Mon, 5 Jan 2026 14:22:29 -0800 Subject: [PATCH 02/11] some improvements --- modules/git/blob_nogogit.go | 9 ++++++--- modules/git/catfile/object_pool.go | 10 ++-------- modules/git/catfile/object_pool_cmd.go | 13 ++++++------- .../git/languagestats/language_stats_nogogit.go | 14 +++++++++----- modules/git/pipeline/lfs_nogogit.go | 8 +++++--- modules/git/repo_branch_nogogit.go | 4 ++-- modules/git/repo_commit_nogogit.go | 14 ++++++-------- modules/git/repo_tag_nogogit.go | 10 +++++----- modules/git/repo_tree_nogogit.go | 11 ++++++++--- modules/git/tree_entry_nogogit.go | 2 +- modules/git/tree_nogogit.go | 17 +++++++++++++---- modules/indexer/code/bleve/bleve.go | 8 ++++++-- .../indexer/code/elasticsearch/elasticsearch.go | 8 +++++--- 13 files changed, 74 insertions(+), 54 deletions(-) diff --git a/modules/git/blob_nogogit.go b/modules/git/blob_nogogit.go index ccede82545084..5f83740f6bf0c 100644 --- a/modules/git/blob_nogogit.go +++ b/modules/git/blob_nogogit.go @@ -10,6 +10,7 @@ import ( "bytes" "io" + "code.gitea.io/gitea/modules/git/catfile" "code.gitea.io/gitea/modules/log" ) @@ -31,13 +32,15 @@ func (b *Blob) DataAsync() (io.ReadCloser, error) { return nil, err } - object, err := objectPool.Object(b.repo.Ctx, b.ID.String()) + object, rd, err := objectPool.Object(b.ID.String()) if err != nil { cancel() + if catfile.IsErrObjectNotFound(err) { + return nil, ErrNotExist{ID: b.ID.String()} + } return nil, err } - rd := object.Reader b.gotSize = true b.size = object.Size @@ -70,7 +73,7 @@ func (b *Blob) Size() int64 { return 0 } defer cancel() - objInfo, err := objInfoPool.ObjectInfo(b.repo.Ctx, b.ID.String()) + objInfo, err := objInfoPool.ObjectInfo(b.ID.String()) if err != nil { log.Debug("error whilst reading size for %s in %s. Error: %v", b.ID.String(), b.repo.Path, err) return 0 diff --git a/modules/git/catfile/object_pool.go b/modules/git/catfile/object_pool.go index b48e27768a959..9000e1f639b20 100644 --- a/modules/git/catfile/object_pool.go +++ b/modules/git/catfile/object_pool.go @@ -5,7 +5,6 @@ package catfile import ( "bufio" - "context" ) type ObjectInfo struct { @@ -15,16 +14,11 @@ type ObjectInfo struct { } type ObjectInfoPool interface { - ObjectInfo(ctx context.Context, sha string) (*ObjectInfo, error) + ObjectInfo(refName string) (*ObjectInfo, error) Close() } -type Object struct { - ObjectInfo - Reader *bufio.Reader -} - type ObjectPool interface { - Object(ctx context.Context, sha string) (*Object, error) + Object(refName string) (*ObjectInfo, *bufio.Reader, error) Close() } diff --git a/modules/git/catfile/object_pool_cmd.go b/modules/git/catfile/object_pool_cmd.go index 9e9750465b983..5495599a7eeb8 100644 --- a/modules/git/catfile/object_pool_cmd.go +++ b/modules/git/catfile/object_pool_cmd.go @@ -28,7 +28,7 @@ func NewObjectInfoPool(ctx context.Context, repoPath string) (ObjectInfoPool, er return &check, nil } -func (b *batchCheck) ObjectInfo(ctx context.Context, refName string) (*ObjectInfo, error) { +func (b *batchCheck) ObjectInfo(refName string) (*ObjectInfo, error) { _, err := b.writer.Write([]byte(refName + "\n")) if err != nil { return nil, err @@ -74,22 +74,21 @@ func NewObjectPool(ctx context.Context, repoPath string) (ObjectPool, error) { return &batch, nil } -func (b *batch) Object(ctx context.Context, refName string) (*Object, error) { +func (b *batch) Object(refName string) (*ObjectInfo, *bufio.Reader, error) { _, err := b.writer.Write([]byte(refName + "\n")) if err != nil { - return nil, err + return nil, nil, err } - var obj Object + var obj ObjectInfo var oid []byte oid, obj.Type, obj.Size, err = ReadBatchLine(b.reader) if err != nil { - return nil, err + return nil, nil, err } obj.ID = string(oid) - obj.Reader = b.reader - return &obj, nil + return &obj, b.reader, nil } func (b *batch) Close() { diff --git a/modules/git/languagestats/language_stats_nogogit.go b/modules/git/languagestats/language_stats_nogogit.go index 3597a0960d1ca..4bbf272be1820 100644 --- a/modules/git/languagestats/language_stats_nogogit.go +++ b/modules/git/languagestats/language_stats_nogogit.go @@ -12,6 +12,7 @@ import ( "code.gitea.io/gitea/modules/analyze" "code.gitea.io/gitea/modules/git" "code.gitea.io/gitea/modules/git/attribute" + "code.gitea.io/gitea/modules/git/catfile" "code.gitea.io/gitea/modules/log" "code.gitea.io/gitea/modules/optional" @@ -28,8 +29,11 @@ func GetLanguageStats(repo *git.Repository, commitID string) (map[string]int64, } defer cancel() - object, err := objectPool.Object(repo.Ctx, commitID) + object, batchReader, err := objectPool.Object(commitID) if err != nil { + if catfile.IsErrObjectNotFound(err) { + return nil, git.ErrNotExist{ID: commitID} + } return nil, err } if object.Type != "commit" { @@ -37,8 +41,6 @@ func GetLanguageStats(repo *git.Repository, commitID string) (map[string]int64, return nil, git.ErrNotExist{ID: commitID} } - batchReader := object.Reader - sha, err := git.NewIDFromString(object.ID) if err != nil { log.Debug("Unable to get commit for: %s. Err: %v", commitID, err) @@ -141,12 +143,14 @@ func GetLanguageStats(repo *git.Repository, commitID string) (map[string]int64, // If content can not be read or file is too big just do detection by filename if f.Size() <= bigFileSize { - object, err := objectPool.Object(repo.Ctx, f.ID.String()) + object, batchReader, err := objectPool.Object(f.ID.String()) if err != nil { + if catfile.IsErrObjectNotFound(err) { + return nil, git.ErrNotExist{ID: f.ID.String()} + } log.Debug("Error reading blob: %s Err: %v", f.ID.String(), err) return nil, err } - batchReader := object.Reader sizeToRead := object.Size discard := int64(1) diff --git a/modules/git/pipeline/lfs_nogogit.go b/modules/git/pipeline/lfs_nogogit.go index 51f0cf26407d2..cc91b74b54a03 100644 --- a/modules/git/pipeline/lfs_nogogit.go +++ b/modules/git/pipeline/lfs_nogogit.go @@ -14,6 +14,7 @@ import ( "sync" "code.gitea.io/gitea/modules/git" + "code.gitea.io/gitea/modules/git/catfile" "code.gitea.io/gitea/modules/git/gitcmd" ) @@ -71,13 +72,14 @@ func FindLFSFile(repo *git.Repository, objectID git.ObjectID) ([]*LFSResult, err commitReadingLoop: for { - object, err := objectPool.Object(repo.Ctx, commitID) + object, batchReader, err := objectPool.Object(commitID) if err != nil { + if catfile.IsErrObjectNotFound(err) { + return nil, git.ErrNotExist{ID: commitID} + } return nil, err } - batchReader := object.Reader - switch object.Type { case "tag": // This shouldn't happen but if it does well just get the commit and try again diff --git a/modules/git/repo_branch_nogogit.go b/modules/git/repo_branch_nogogit.go index 1305edd60a1e4..975bce9455cf4 100644 --- a/modules/git/repo_branch_nogogit.go +++ b/modules/git/repo_branch_nogogit.go @@ -28,7 +28,7 @@ func (repo *Repository) IsObjectExist(name string) bool { return false } defer cancel() - objInfo, err := objInfoPool.ObjectInfo(repo.Ctx, name) + objInfo, err := objInfoPool.ObjectInfo(name) if err != nil { log.Debug("Error writing to ObjectInfo %v", err) return false @@ -49,7 +49,7 @@ func (repo *Repository) IsReferenceExist(name string) bool { } defer cancel() - _, err = objInfoPool.ObjectInfo(repo.Ctx, name) + _, err = objInfoPool.ObjectInfo(name) return err == nil } diff --git a/modules/git/repo_commit_nogogit.go b/modules/git/repo_commit_nogogit.go index 02f35a9895e11..572384563801d 100644 --- a/modules/git/repo_commit_nogogit.go +++ b/modules/git/repo_commit_nogogit.go @@ -43,9 +43,9 @@ func (repo *Repository) GetRefCommitID(name string) (string, error) { } defer cancel() - objInfo, err := objInfoPool.ObjectInfo(repo.Ctx, name) + objInfo, err := objInfoPool.ObjectInfo(name) if err != nil { - if IsErrNotExist(err) { + if catfile.IsErrObjectNotFound(err) { return "", ErrNotExist{name, ""} } return "", err @@ -77,16 +77,14 @@ func (repo *Repository) getCommit(id ObjectID) (*Commit, error) { } func (repo *Repository) getCommitFromBatchReader(objectPool catfile.ObjectPool, id ObjectID) (*Commit, error) { - object, err := objectPool.Object(repo.Ctx, id.String()) + object, rd, err := objectPool.Object(id.String()) if err != nil { - if errors.Is(err, io.EOF) || IsErrNotExist(err) { + if errors.Is(err, io.EOF) || catfile.IsErrObjectNotFound(err) { return nil, ErrNotExist{ID: id.String()} } return nil, err } - rd := object.Reader - switch object.Type { case "missing": return nil, ErrNotExist{ID: id.String()} @@ -153,9 +151,9 @@ func (repo *Repository) ConvertToGitID(commitID string) (ObjectID, error) { } defer cancel() - objInfo, err := objInfoPool.ObjectInfo(repo.Ctx, commitID) + objInfo, err := objInfoPool.ObjectInfo(commitID) if err != nil { - if IsErrNotExist(err) { + if catfile.IsErrObjectNotFound(err) { return nil, ErrNotExist{commitID, ""} } return nil, err diff --git a/modules/git/repo_tag_nogogit.go b/modules/git/repo_tag_nogogit.go index 6d7ce5c385045..e089850212844 100644 --- a/modules/git/repo_tag_nogogit.go +++ b/modules/git/repo_tag_nogogit.go @@ -10,6 +10,7 @@ import ( "errors" "io" + "code.gitea.io/gitea/modules/git/catfile" "code.gitea.io/gitea/modules/log" ) @@ -29,9 +30,9 @@ func (repo *Repository) GetTagType(id ObjectID) (string, error) { return "", err } defer cancel() - objInfo, err := objInfoPool.ObjectInfo(repo.Ctx, id.String()) + objInfo, err := objInfoPool.ObjectInfo(id.String()) if err != nil { - if IsErrNotExist(err) { + if catfile.IsErrObjectNotFound(err) { return "", ErrNotExist{ID: id.String()} } return "", err @@ -90,15 +91,14 @@ func (repo *Repository) getTag(tagID ObjectID, name string) (*Tag, error) { } defer cancel() - object, err := objectPool.Object(repo.Ctx, tagID.String()) + object, rd, err := objectPool.Object(tagID.String()) if err != nil { - if errors.Is(err, io.EOF) || IsErrNotExist(err) { + if errors.Is(err, io.EOF) || catfile.IsErrObjectNotFound(err) { return nil, ErrNotExist{ID: tagID.String()} } return nil, err } - rd := object.Reader if object.Type != "tag" { if err := DiscardFull(rd, object.Size+1); err != nil { return nil, err diff --git a/modules/git/repo_tree_nogogit.go b/modules/git/repo_tree_nogogit.go index 5a0b93bf7cb27..13981d532d37e 100644 --- a/modules/git/repo_tree_nogogit.go +++ b/modules/git/repo_tree_nogogit.go @@ -7,6 +7,8 @@ package git import ( "io" + + "code.gitea.io/gitea/modules/git/catfile" ) func (repo *Repository) getTree(id ObjectID) (*Tree, error) { @@ -16,13 +18,16 @@ func (repo *Repository) getTree(id ObjectID) (*Tree, error) { } defer cancel() - object, err := objectPool.Object(repo.Ctx, id.String()) + object, rd, err := objectPool.Object(id.String()) if err != nil { + if catfile.IsErrObjectNotFound(err) { + return nil, ErrNotExist{ + ID: id.String(), + } + } return nil, err } - rd := object.Reader - switch object.Type { case "tag": resolvedID := id diff --git a/modules/git/tree_entry_nogogit.go b/modules/git/tree_entry_nogogit.go index 90fea1dbd9c55..73b345868d17b 100644 --- a/modules/git/tree_entry_nogogit.go +++ b/modules/git/tree_entry_nogogit.go @@ -21,7 +21,7 @@ func (te *TreeEntry) Size() int64 { return 0 } defer cancel() - objInfo, err := objInfoPool.ObjectInfo(te.ptree.repo.Ctx, te.ID.String()) + objInfo, err := objInfoPool.ObjectInfo(te.ID.String()) if err != nil { log.Debug("error whilst reading size for %s in %s. Error: %v", te.ID.String(), te.ptree.repo.Path, err) return 0 diff --git a/modules/git/tree_nogogit.go b/modules/git/tree_nogogit.go index 94e6d0357a275..b552a758926b7 100644 --- a/modules/git/tree_nogogit.go +++ b/modules/git/tree_nogogit.go @@ -9,6 +9,7 @@ import ( "io" "strings" + "code.gitea.io/gitea/modules/git/catfile" "code.gitea.io/gitea/modules/git/gitcmd" ) @@ -33,20 +34,28 @@ func (t *Tree) ListEntries() (Entries, error) { } defer cancel() - object, err := objectPool.Object(t.repo.Ctx, t.ID.String()) + object, rd, err := objectPool.Object(t.ID.String()) if err != nil { + if catfile.IsErrObjectNotFound(err) { + return nil, ErrNotExist{ + ID: t.ID.String(), + } + } return nil, err } - rd := object.Reader - if object.Type == "commit" { treeID, err := ReadTreeID(rd, object.Size) if err != nil && err != io.EOF { return nil, err } - object, err = objectPool.Object(t.repo.Ctx, treeID) + object, rd, err = objectPool.Object(treeID) if err != nil { + if catfile.IsErrObjectNotFound(err) { + return nil, ErrNotExist{ + ID: treeID, + } + } return nil, err } } diff --git a/modules/indexer/code/bleve/bleve.go b/modules/indexer/code/bleve/bleve.go index 48b7c632db6bd..4b1278774f22f 100644 --- a/modules/indexer/code/bleve/bleve.go +++ b/modules/indexer/code/bleve/bleve.go @@ -14,6 +14,7 @@ import ( repo_model "code.gitea.io/gitea/models/repo" "code.gitea.io/gitea/modules/analyze" "code.gitea.io/gitea/modules/charset" + "code.gitea.io/gitea/modules/git" "code.gitea.io/gitea/modules/git/catfile" "code.gitea.io/gitea/modules/git/gitcmd" "code.gitea.io/gitea/modules/gitrepo" @@ -176,12 +177,15 @@ func (b *Indexer) addUpdate(ctx context.Context, objectPool catfile.ObjectPool, return b.addDelete(update.Filename, repo, batch) } - object, err := objectPool.Object(ctx, update.BlobSha) + object, batchReader, err := objectPool.Object(update.BlobSha) if err != nil { + if catfile.IsErrObjectNotFound(err) { + return git.ErrNotExist{ID: update.BlobSha} + } return err } - batchReader := object.Reader + size = object.Size fileContents, err := io.ReadAll(io.LimitReader(batchReader, size)) if err != nil { diff --git a/modules/indexer/code/elasticsearch/elasticsearch.go b/modules/indexer/code/elasticsearch/elasticsearch.go index 2a48d4f6568cc..abb5495655d41 100644 --- a/modules/indexer/code/elasticsearch/elasticsearch.go +++ b/modules/indexer/code/elasticsearch/elasticsearch.go @@ -13,6 +13,7 @@ import ( repo_model "code.gitea.io/gitea/models/repo" "code.gitea.io/gitea/modules/analyze" "code.gitea.io/gitea/modules/charset" + "code.gitea.io/gitea/modules/git" "code.gitea.io/gitea/modules/git/catfile" "code.gitea.io/gitea/modules/git/gitcmd" "code.gitea.io/gitea/modules/gitrepo" @@ -161,14 +162,15 @@ func (b *Indexer) addUpdate(ctx context.Context, objectPool catfile.ObjectPool, return []elastic.BulkableRequest{b.addDelete(update.Filename, repo)}, nil } - object, err := objectPool.Object(ctx, update.BlobSha) + object, batchReader, err := objectPool.Object(update.BlobSha) if err != nil { + if catfile.IsErrObjectNotFound(err) { + return nil, git.ErrNotExist{ID: update.BlobSha} + } return nil, err } size = object.Size - batchReader := object.Reader - fileContents, err := io.ReadAll(io.LimitReader(batchReader, size)) if err != nil { return nil, err From 67847fbbea6a33b7541b57eb7f81577ab8a40528 Mon Sep 17 00:00:00 2001 From: Lunny Xiao Date: Mon, 5 Jan 2026 14:31:42 -0800 Subject: [PATCH 03/11] remove unnecessary functions --- modules/git/batch_reader.go | 21 --------------------- modules/git/pipeline/lfs_nogogit.go | 4 ++-- modules/git/tree_nogogit.go | 2 +- 3 files changed, 3 insertions(+), 24 deletions(-) diff --git a/modules/git/batch_reader.go b/modules/git/batch_reader.go index 3d612f5549e30..ae4f49dabe6f2 100644 --- a/modules/git/batch_reader.go +++ b/modules/git/batch_reader.go @@ -10,27 +10,6 @@ import ( "code.gitea.io/gitea/modules/git/catfile" ) -// ReadBatchLine reads the header line from cat-file --batch while preserving the traditional return signature. -func ReadBatchLine(rd *bufio.Reader) (sha []byte, typ string, size int64, err error) { - sha, typ, size, err = catfile.ReadBatchLine(rd) - return sha, typ, size, convertCatfileError(err, sha) -} - -// ReadTagObjectID reads a tag object ID hash from a cat-file --batch stream, throwing away the rest of the stream. -func ReadTagObjectID(rd *bufio.Reader, size int64) (string, error) { - return catfile.ReadTagObjectID(rd, size) -} - -// ReadTreeID reads a tree ID from a cat-file --batch stream, throwing away the rest of the stream. -func ReadTreeID(rd *bufio.Reader, size int64) (string, error) { - return catfile.ReadTreeID(rd, size) -} - -// BinToHex converts a binary hash into a hex encoded one. -func BinToHex(objectFormat ObjectFormat, sha, out []byte) []byte { - return catfile.BinToHex(objectFormat, sha, out) -} - // ParseCatFileTreeLine reads an entry from a tree in a cat-file --batch stream. func ParseCatFileTreeLine(objectFormat ObjectFormat, rd *bufio.Reader, modeBuf, fnameBuf, shaBuf []byte) (mode, fname, sha []byte, n int, err error) { mode, fname, sha, n, err = catfile.ParseCatFileTreeLine(objectFormat, rd, modeBuf, fnameBuf, shaBuf) diff --git a/modules/git/pipeline/lfs_nogogit.go b/modules/git/pipeline/lfs_nogogit.go index cc91b74b54a03..b28a816d0df13 100644 --- a/modules/git/pipeline/lfs_nogogit.go +++ b/modules/git/pipeline/lfs_nogogit.go @@ -83,7 +83,7 @@ func FindLFSFile(repo *git.Repository, objectID git.ObjectID) ([]*LFSResult, err switch object.Type { case "tag": // This shouldn't happen but if it does well just get the commit and try again - id, err := git.ReadTagObjectID(batchReader, object.Size) + id, err := catfile.ReadTagObjectID(batchReader, object.Size) if err != nil { return nil, err } @@ -120,7 +120,7 @@ func FindLFSFile(repo *git.Repository, objectID git.ObjectID) ([]*LFSResult, err resultsMap[curCommit.ID.String()+":"+curPath+string(fname)] = &result } else if string(mode) == git.EntryModeTree.String() { hexObjectID := make([]byte, objectID.Type().FullLength()) - git.BinToHex(objectID.Type(), binObjectID, hexObjectID) + catfile.BinToHex(objectID.Type(), binObjectID, hexObjectID) trees = append(trees, hexObjectID) paths = append(paths, curPath+string(fname)+"/") } diff --git a/modules/git/tree_nogogit.go b/modules/git/tree_nogogit.go index b552a758926b7..f975b23153de6 100644 --- a/modules/git/tree_nogogit.go +++ b/modules/git/tree_nogogit.go @@ -45,7 +45,7 @@ func (t *Tree) ListEntries() (Entries, error) { } if object.Type == "commit" { - treeID, err := ReadTreeID(rd, object.Size) + treeID, err := catfile.ReadTreeID(rd, object.Size) if err != nil && err != io.EOF { return nil, err } From b76d71f83ed646da80522a3a021f94af0215417a Mon Sep 17 00:00:00 2001 From: Lunny Xiao Date: Tue, 6 Jan 2026 11:55:03 -0800 Subject: [PATCH 04/11] Use a new interface for object pool so that it's possible to introduce batch command --- modules/git/batch_reader.go | 8 +- modules/git/blob_nogogit.go | 34 +--- modules/git/catfile/object_pool.go | 19 +- modules/git/catfile/object_pool_batch.go | 163 ++++++++++++++++++ modules/git/catfile/object_pool_cmd.go | 101 ----------- modules/git/catfile/reader.go | 8 +- .../languagestats/language_stats_nogogit.go | 15 +- modules/git/parse_treeentry.go | 4 +- modules/git/pipeline/lfs_nogogit.go | 17 +- modules/git/repo_base_nogogit.go | 80 ++------- modules/git/repo_branch_nogogit.go | 17 +- modules/git/repo_commit_nogogit.go | 33 +--- modules/git/repo_tag_nogogit.go | 19 +- modules/git/repo_tree_nogogit.go | 14 +- modules/git/tree_entry_nogogit.go | 8 +- modules/git/tree_nogogit.go | 15 +- modules/gitrepo/cat_file.go | 2 +- 17 files changed, 245 insertions(+), 312 deletions(-) create mode 100644 modules/git/catfile/object_pool_batch.go delete mode 100644 modules/git/catfile/object_pool_cmd.go diff --git a/modules/git/batch_reader.go b/modules/git/batch_reader.go index ae4f49dabe6f2..325f9a0a2e8c6 100644 --- a/modules/git/batch_reader.go +++ b/modules/git/batch_reader.go @@ -4,23 +4,17 @@ package git import ( - "bufio" "errors" "code.gitea.io/gitea/modules/git/catfile" ) // ParseCatFileTreeLine reads an entry from a tree in a cat-file --batch stream. -func ParseCatFileTreeLine(objectFormat ObjectFormat, rd *bufio.Reader, modeBuf, fnameBuf, shaBuf []byte) (mode, fname, sha []byte, n int, err error) { +func ParseCatFileTreeLine(objectFormat ObjectFormat, rd catfile.ReadCloseDiscarder, modeBuf, fnameBuf, shaBuf []byte) (mode, fname, sha []byte, n int, err error) { mode, fname, sha, n, err = catfile.ParseCatFileTreeLine(objectFormat, rd, modeBuf, fnameBuf, shaBuf) return mode, fname, sha, n, convertCatfileError(err, nil) } -// DiscardFull discards the requested number of bytes from the buffered reader. -func DiscardFull(rd *bufio.Reader, discard int64) error { - return catfile.DiscardFull(rd, discard) -} - func convertCatfileError(err error, defaultID []byte) error { if err == nil { return nil diff --git a/modules/git/blob_nogogit.go b/modules/git/blob_nogogit.go index 5f83740f6bf0c..0b6141efcf9f7 100644 --- a/modules/git/blob_nogogit.go +++ b/modules/git/blob_nogogit.go @@ -6,7 +6,6 @@ package git import ( - "bufio" "bytes" "io" @@ -27,14 +26,8 @@ type Blob struct { // DataAsync gets a ReadCloser for the contents of a blob without reading it all. // Calling the Close function on the result will discard all unread output. func (b *Blob) DataAsync() (io.ReadCloser, error) { - objectPool, cancel, err := b.repo.CatFileBatch(b.repo.Ctx) + object, rd, err := b.repo.objectPool.Object(b.ID.String()) if err != nil { - return nil, err - } - - object, rd, err := objectPool.Object(b.ID.String()) - if err != nil { - cancel() if catfile.IsErrObjectNotFound(err) { return nil, ErrNotExist{ID: b.ID.String()} } @@ -46,7 +39,6 @@ func (b *Blob) DataAsync() (io.ReadCloser, error) { if b.size < 4096 { bs, err := io.ReadAll(io.LimitReader(rd, b.size)) - defer cancel() if err != nil { return nil, err } @@ -55,9 +47,8 @@ func (b *Blob) DataAsync() (io.ReadCloser, error) { } return &blobReader{ - rd: rd, - n: b.size, - cancel: cancel, + rd: rd, + n: b.size, }, nil } @@ -67,13 +58,7 @@ func (b *Blob) Size() int64 { return b.size } - objInfoPool, cancel, err := b.repo.CatFileBatchCheck(b.repo.Ctx) - if err != nil { - log.Debug("error whilst reading size for %s in %s. Error: %v", b.ID.String(), b.repo.Path, err) - return 0 - } - defer cancel() - objInfo, err := objInfoPool.ObjectInfo(b.ID.String()) + objInfo, err := b.repo.objectPool.ObjectInfo(b.ID.String()) if err != nil { log.Debug("error whilst reading size for %s in %s. Error: %v", b.ID.String(), b.repo.Path, err) return 0 @@ -85,9 +70,8 @@ func (b *Blob) Size() int64 { } type blobReader struct { - rd *bufio.Reader - n int64 - cancel func() + rd catfile.ReadCloseDiscarder + n int64 } func (b *blobReader) Read(p []byte) (n int, err error) { @@ -107,13 +91,11 @@ func (b *blobReader) Close() error { if b.rd == nil { return nil } - - defer b.cancel() - - if err := DiscardFull(b.rd, b.n+1); err != nil { + if err := catfile.DiscardFull(b.rd, b.n+1); err != nil { return err } + b.rd.Close() b.rd = nil return nil diff --git a/modules/git/catfile/object_pool.go b/modules/git/catfile/object_pool.go index 9000e1f639b20..651e4d15b856c 100644 --- a/modules/git/catfile/object_pool.go +++ b/modules/git/catfile/object_pool.go @@ -1,10 +1,10 @@ -// Copyright 2024 The Gitea Authors. All rights reserved. +// Copyright 2026 The Gitea Authors. All rights reserved. // SPDX-License-Identifier: MIT package catfile import ( - "bufio" + "io" ) type ObjectInfo struct { @@ -13,12 +13,19 @@ type ObjectInfo struct { Size int64 } -type ObjectInfoPool interface { - ObjectInfo(refName string) (*ObjectInfo, error) - Close() +type Discarder interface { + Discard(n int) (int, error) +} + +type ReadCloseDiscarder interface { + io.ReadCloser + Discarder + ReadBytes(delim byte) ([]byte, error) + ReadSlice(delim byte) (line []byte, err error) } type ObjectPool interface { - Object(refName string) (*ObjectInfo, *bufio.Reader, error) + ObjectInfo(refName string) (*ObjectInfo, error) + Object(refName string) (*ObjectInfo, ReadCloseDiscarder, error) Close() } diff --git a/modules/git/catfile/object_pool_batch.go b/modules/git/catfile/object_pool_batch.go new file mode 100644 index 0000000000000..345f7c34226be --- /dev/null +++ b/modules/git/catfile/object_pool_batch.go @@ -0,0 +1,163 @@ +// Copyright 2026 The Gitea Authors. All rights reserved. +// SPDX-License-Identifier: MIT + +package catfile + +import ( + "bufio" + "context" + + "code.gitea.io/gitea/modules/log" +) + +// batch represents an active `git cat-file --batch` invocation +// paired with the pipes that feed/read from it. Call Close to release resources. +type batch struct { + cancel context.CancelFunc + reader *bufio.Reader + writer WriteCloserError + inUse bool +} + +func (b *batch) Close() { + if b.cancel != nil { + b.cancel() + } + if b.writer != nil { + _ = b.writer.Close() + } +} + +type batchObjectPool struct { + ctx context.Context + repoPath string + batches []*batch + batchChecks []*batch +} + +// NewBatch creates a new cat-file --batch process for the provided repository path. +// The returned Batch must be closed once the caller has finished with it. +func NewBatchObjectPool(ctx context.Context, repoPath string) (ObjectPool, error) { + if err := EnsureValidGitRepository(ctx, repoPath); err != nil { + return nil, err + } + + return &batchObjectPool{ + ctx: ctx, + repoPath: repoPath, + }, nil +} + +func (b *batchObjectPool) getBatch() *batch { + for _, batch := range b.batches { + if !batch.inUse { + batch.inUse = true + return batch + } + } + if len(b.batches) > 1 { + log.Debug("Opening temporary cat file batch for: %s", b.repoPath) + } + newBatch := b.newBatch() + newBatch.inUse = true + b.batches = append(b.batches, newBatch) + return newBatch +} + +func (b *batchObjectPool) getBatchCheck() *batch { + for _, batch := range b.batchChecks { + if !batch.inUse { + batch.inUse = true + return batch + } + } + if len(b.batchChecks) > 1 { + log.Debug("Opening temporary cat file batch-check for: %s", b.repoPath) + } + newBatch := b.newBatchCheck() + newBatch.inUse = true + b.batchChecks = append(b.batchChecks, newBatch) + return newBatch +} + +func (b *batchObjectPool) newBatch() *batch { + var batch batch + batch.writer, batch.reader, batch.cancel = catFileBatch(b.ctx, b.repoPath) + return &batch +} + +func (b *batchObjectPool) newBatchCheck() *batch { + var check batch + check.writer, check.reader, check.cancel = catFileBatchCheck(b.ctx, b.repoPath) + return &check +} + +func (b *batchObjectPool) closeBatchCheck(batch *batch) { + if batch != nil { + batch.inUse = false + } +} + +func (b *batchObjectPool) closeBatch(batch *batch) { + if batch != nil { + batch.inUse = false + } +} + +func (b *batchObjectPool) ObjectInfo(refName string) (*ObjectInfo, error) { + batch := b.getBatchCheck() + defer b.closeBatchCheck(batch) + + _, err := batch.writer.Write([]byte(refName + "\n")) + if err != nil { + return nil, err + } + + var objInfo ObjectInfo + var oid []byte + oid, objInfo.Type, objInfo.Size, err = ReadBatchLine(batch.reader) + if err != nil { + return nil, err + } + objInfo.ID = string(oid) + return &objInfo, nil +} + +type readCloser struct { + *bufio.Reader + batch *batch +} + +func (rc *readCloser) Close() error { + rc.batch.inUse = false + return nil +} + +func (b *batchObjectPool) Object(refName string) (*ObjectInfo, ReadCloseDiscarder, error) { + batch := b.getBatch() + defer b.closeBatch(batch) + + _, err := batch.writer.Write([]byte(refName + "\n")) + if err != nil { + return nil, nil, err + } + + var obj ObjectInfo + var oid []byte + oid, obj.Type, obj.Size, err = ReadBatchLine(batch.reader) + if err != nil { + return nil, nil, err + } + obj.ID = string(oid) + + return &obj, &readCloser{Reader: batch.reader, batch: batch}, nil +} + +func (b *batchObjectPool) Close() { + for _, batch := range b.batches { + batch.Close() + } + for _, batchCheck := range b.batchChecks { + batchCheck.Close() + } +} diff --git a/modules/git/catfile/object_pool_cmd.go b/modules/git/catfile/object_pool_cmd.go deleted file mode 100644 index 5495599a7eeb8..0000000000000 --- a/modules/git/catfile/object_pool_cmd.go +++ /dev/null @@ -1,101 +0,0 @@ -// Copyright 2025 The Gitea Authors. All rights reserved. -// SPDX-License-Identifier: MIT - -package catfile - -import ( - "bufio" - "context" -) - -// batchCheck represents an active `git cat-file --batch-check` invocation -// paired with the pipes that feed/read from it. Call Close to release resources. -type batchCheck struct { - cancel context.CancelFunc - reader *bufio.Reader - writer WriteCloserError -} - -// NewBatchCheck creates a cat-file --batch-check process for the provided repository path. -// The returned Batch must be closed once the caller has finished with it. -func NewObjectInfoPool(ctx context.Context, repoPath string) (ObjectInfoPool, error) { - if err := EnsureValidGitRepository(ctx, repoPath); err != nil { - return nil, err - } - - var check batchCheck - check.writer, check.reader, check.cancel = catFileBatchCheck(ctx, repoPath) - return &check, nil -} - -func (b *batchCheck) ObjectInfo(refName string) (*ObjectInfo, error) { - _, err := b.writer.Write([]byte(refName + "\n")) - if err != nil { - return nil, err - } - - var objInfo ObjectInfo - var oid []byte - oid, objInfo.Type, objInfo.Size, err = ReadBatchLine(b.reader) - if err != nil { - return nil, err - } - objInfo.ID = string(oid) - return &objInfo, nil -} - -// Close stops the underlying git cat-file process and releases held resources. -func (b *batchCheck) Close() { - if b.cancel != nil { - b.cancel() - } - if b.writer != nil { - _ = b.writer.Close() - } -} - -// batch represents an active `git cat-file --batch` invocation -// paired with the pipes that feed/read from it. Call Close to release resources. -type batch struct { - cancel context.CancelFunc - reader *bufio.Reader - writer WriteCloserError -} - -// NewBatch creates a new cat-file --batch process for the provided repository path. -// The returned Batch must be closed once the caller has finished with it. -func NewObjectPool(ctx context.Context, repoPath string) (ObjectPool, error) { - if err := EnsureValidGitRepository(ctx, repoPath); err != nil { - return nil, err - } - - var batch batch - batch.writer, batch.reader, batch.cancel = catFileBatch(ctx, repoPath) - return &batch, nil -} - -func (b *batch) Object(refName string) (*ObjectInfo, *bufio.Reader, error) { - _, err := b.writer.Write([]byte(refName + "\n")) - if err != nil { - return nil, nil, err - } - - var obj ObjectInfo - var oid []byte - oid, obj.Type, obj.Size, err = ReadBatchLine(b.reader) - if err != nil { - return nil, nil, err - } - obj.ID = string(oid) - - return &obj, b.reader, nil -} - -func (b *batch) Close() { - if b.cancel != nil { - b.cancel() - } - if b.writer != nil { - _ = b.writer.Close() - } -} diff --git a/modules/git/catfile/reader.go b/modules/git/catfile/reader.go index 1785cc4cc0342..b09af97547441 100644 --- a/modules/git/catfile/reader.go +++ b/modules/git/catfile/reader.go @@ -69,7 +69,7 @@ func ReadBatchLine(rd *bufio.Reader) (sha []byte, typ string, size int64, err er } // ReadTagObjectID reads a tag object ID hash from a cat-file --batch stream, throwing away the rest. -func ReadTagObjectID(rd *bufio.Reader, size int64) (string, error) { +func ReadTagObjectID(rd ReadCloseDiscarder, size int64) (string, error) { var id string var n int64 headerLoop: @@ -94,7 +94,7 @@ headerLoop: } // ReadTreeID reads a tree ID from a cat-file --batch stream, throwing away the rest of the commit content. -func ReadTreeID(rd *bufio.Reader, size int64) (string, error) { +func ReadTreeID(rd ReadCloseDiscarder, size int64) (string, error) { var id string var n int64 headerLoop: @@ -136,7 +136,7 @@ func BinToHex(objectFormat ObjectFormat, sha, out []byte) []byte { // ParseCatFileTreeLine reads an entry from a tree in a cat-file --batch stream and avoids allocations // where possible. Each line is composed of: // SP NUL -func ParseCatFileTreeLine(objectFormat ObjectFormat, rd *bufio.Reader, modeBuf, fnameBuf, shaBuf []byte) (mode, fname, sha []byte, n int, err error) { +func ParseCatFileTreeLine(objectFormat ObjectFormat, rd ReadCloseDiscarder, modeBuf, fnameBuf, shaBuf []byte) (mode, fname, sha []byte, n int, err error) { var readBytes []byte readBytes, err = rd.ReadSlice('\x00') @@ -192,7 +192,7 @@ func ParseCatFileTreeLine(objectFormat ObjectFormat, rd *bufio.Reader, modeBuf, } // DiscardFull discards the requested amount of bytes from the buffered reader regardless of its internal limit. -func DiscardFull(rd *bufio.Reader, discard int64) error { +func DiscardFull(rd ReadCloseDiscarder, discard int64) error { if discard > math.MaxInt32 { n, err := rd.Discard(math.MaxInt32) discard -= int64(n) diff --git a/modules/git/languagestats/language_stats_nogogit.go b/modules/git/languagestats/language_stats_nogogit.go index 4bbf272be1820..6e919c7eed5a5 100644 --- a/modules/git/languagestats/language_stats_nogogit.go +++ b/modules/git/languagestats/language_stats_nogogit.go @@ -23,19 +23,15 @@ import ( func GetLanguageStats(repo *git.Repository, commitID string) (map[string]int64, error) { // We will feed the commit IDs in order into cat-file --batch, followed by blobs as necessary. // so let's create a batch stdin and stdout - objectPool, cancel, err := repo.CatFileBatch(repo.Ctx) - if err != nil { - return nil, err - } - defer cancel() - - object, batchReader, err := objectPool.Object(commitID) + object, batchReader, err := repo.ObjectPool().Object(commitID) if err != nil { if catfile.IsErrObjectNotFound(err) { return nil, git.ErrNotExist{ID: commitID} } return nil, err } + defer batchReader.Close() + if object.Type != "commit" { log.Debug("Unable to get commit for: %s. Err: %v", commitID, err) return nil, git.ErrNotExist{ID: commitID} @@ -143,7 +139,7 @@ func GetLanguageStats(repo *git.Repository, commitID string) (map[string]int64, // If content can not be read or file is too big just do detection by filename if f.Size() <= bigFileSize { - object, batchReader, err := objectPool.Object(f.ID.String()) + object, batchReader, err := repo.ObjectPool().Object(f.ID.String()) if err != nil { if catfile.IsErrObjectNotFound(err) { return nil, git.ErrNotExist{ID: f.ID.String()} @@ -151,6 +147,7 @@ func GetLanguageStats(repo *git.Repository, commitID string) (map[string]int64, log.Debug("Error reading blob: %s Err: %v", f.ID.String(), err) return nil, err } + defer batchReader.Close() sizeToRead := object.Size discard := int64(1) @@ -164,7 +161,7 @@ func GetLanguageStats(repo *git.Repository, commitID string) (map[string]int64, return nil, err } content = contentBuf.Bytes() - if err := git.DiscardFull(batchReader, discard); err != nil { + if err := catfile.DiscardFull(batchReader, discard); err != nil { return nil, err } } diff --git a/modules/git/parse_treeentry.go b/modules/git/parse_treeentry.go index e14d9f17b5dcd..b7bb8ac2f09ad 100644 --- a/modules/git/parse_treeentry.go +++ b/modules/git/parse_treeentry.go @@ -4,11 +4,11 @@ package git import ( - "bufio" "bytes" "fmt" "io" + "code.gitea.io/gitea/modules/git/catfile" "code.gitea.io/gitea/modules/log" ) @@ -47,7 +47,7 @@ func parseTreeEntries(data []byte, ptree *Tree) ([]*TreeEntry, error) { return entries, nil } -func catBatchParseTreeEntries(objectFormat ObjectFormat, ptree *Tree, rd *bufio.Reader, sz int64) ([]*TreeEntry, error) { +func catBatchParseTreeEntries(objectFormat ObjectFormat, ptree *Tree, rd catfile.ReadCloseDiscarder, sz int64) ([]*TreeEntry, error) { fnameBuf := make([]byte, 4096) modeBuf := make([]byte, 40) shaBuf := make([]byte, objectFormat.FullLength()) diff --git a/modules/git/pipeline/lfs_nogogit.go b/modules/git/pipeline/lfs_nogogit.go index b28a816d0df13..8c21ca1bf2c5a 100644 --- a/modules/git/pipeline/lfs_nogogit.go +++ b/modules/git/pipeline/lfs_nogogit.go @@ -48,12 +48,6 @@ func FindLFSFile(repo *git.Repository, objectID git.ObjectID) ([]*LFSResult, err // Next feed the commits in order into cat-file --batch, followed by their trees and sub trees as necessary. // so let's create a batch stdin and stdout - objectPool, cancel, err := repo.CatFileBatch(repo.Ctx) - if err != nil { - return nil, err - } - defer cancel() - // We'll use a scanner for the revList because it's simpler than a bufio.Reader scan := bufio.NewScanner(revListReader) trees := [][]byte{} @@ -72,7 +66,7 @@ func FindLFSFile(repo *git.Repository, objectID git.ObjectID) ([]*LFSResult, err commitReadingLoop: for { - object, batchReader, err := objectPool.Object(commitID) + object, batchReader, err := repo.ObjectPool().Object(commitID) if err != nil { if catfile.IsErrObjectNotFound(err) { return nil, git.ErrNotExist{ID: commitID} @@ -85,6 +79,7 @@ func FindLFSFile(repo *git.Repository, objectID git.ObjectID) ([]*LFSResult, err // This shouldn't happen but if it does well just get the commit and try again id, err := catfile.ReadTagObjectID(batchReader, object.Size) if err != nil { + batchReader.Close() return nil, err } commitID = id @@ -93,9 +88,11 @@ func FindLFSFile(repo *git.Repository, objectID git.ObjectID) ([]*LFSResult, err // Read in the commit to get its tree and in case this is one of the last used commits curCommit, err = git.CommitFromReader(repo, git.MustIDFromString(commitID), io.LimitReader(batchReader, object.Size)) if err != nil { + batchReader.Close() return nil, err } if _, err := batchReader.Discard(1); err != nil { + batchReader.Close() return nil, err } @@ -106,6 +103,7 @@ func FindLFSFile(repo *git.Repository, objectID git.ObjectID) ([]*LFSResult, err for n < object.Size { mode, fname, binObjectID, count, err := git.ParseCatFileTreeLine(objectID.Type(), batchReader, modeBuf, fnameBuf, workingShaBuf) if err != nil { + batchReader.Close() return nil, err } n += int64(count) @@ -126,6 +124,7 @@ func FindLFSFile(repo *git.Repository, objectID git.ObjectID) ([]*LFSResult, err } } if _, err := batchReader.Discard(1); err != nil { + batchReader.Close() return nil, err } if len(trees) > 0 { @@ -137,10 +136,12 @@ func FindLFSFile(repo *git.Repository, objectID git.ObjectID) ([]*LFSResult, err break commitReadingLoop } default: - if err := git.DiscardFull(batchReader, object.Size+1); err != nil { + if err := catfile.DiscardFull(batchReader, object.Size+1); err != nil { + batchReader.Close() return nil, err } } + batchReader.Close() } } diff --git a/modules/git/repo_base_nogogit.go b/modules/git/repo_base_nogogit.go index fc06b7e7afa00..567a0c1d5cdfc 100644 --- a/modules/git/repo_base_nogogit.go +++ b/modules/git/repo_base_nogogit.go @@ -11,7 +11,6 @@ import ( "path/filepath" "code.gitea.io/gitea/modules/git/catfile" - "code.gitea.io/gitea/modules/log" "code.gitea.io/gitea/modules/util" ) @@ -23,11 +22,7 @@ type Repository struct { tagCache *ObjectCache[*Tag] - batchInUse bool - batch catfile.ObjectPool - - checkInUse bool - check catfile.ObjectInfoPool + objectPool catfile.ObjectPool Ctx context.Context LastCommitCache *LastCommitCache @@ -49,76 +44,29 @@ func OpenRepository(ctx context.Context, repoPath string) (*Repository, error) { return nil, util.NewNotExistErrorf("no such file or directory") } - return &Repository{ - Path: repoPath, - tagCache: newObjectCache[*Tag](), - Ctx: ctx, - }, nil -} - -// CatFileBatch obtains a CatFileBatch for this repository -func (repo *Repository) CatFileBatch(ctx context.Context) (catfile.ObjectPool, func(), error) { - if repo.batch == nil { - var err error - repo.batch, err = catfile.NewObjectPool(ctx, repo.Path) - if err != nil { - return nil, nil, err - } - } - - if !repo.batchInUse { - repo.batchInUse = true - return repo.batch, func() { - repo.batchInUse = false - }, nil - } - - log.Debug("Opening temporary cat file batch for: %s", repo.Path) - tempBatch, err := catfile.NewObjectPool(ctx, repo.Path) + objectPool, err := catfile.NewBatchObjectPool(ctx, repoPath) if err != nil { - return nil, nil, err - } - return tempBatch, tempBatch.Close, nil -} - -// CatFileBatchCheck obtains a CatFileBatchCheck for this repository -func (repo *Repository) CatFileBatchCheck(ctx context.Context) (catfile.ObjectInfoPool, func(), error) { - if repo.check == nil { - var err error - repo.check, err = catfile.NewObjectInfoPool(ctx, repo.Path) - if err != nil { - return nil, nil, err - } + return nil, err } - if !repo.checkInUse { - repo.checkInUse = true - return repo.check, func() { - repo.checkInUse = false - }, nil - } + return &Repository{ + Path: repoPath, + tagCache: newObjectCache[*Tag](), + objectPool: objectPool, + Ctx: ctx, + }, nil +} - log.Debug("Opening temporary cat file batch-check for: %s", repo.Path) - tempBatchCheck, err := catfile.NewObjectInfoPool(ctx, repo.Path) - if err != nil { - return nil, nil, err - } - return tempBatchCheck, tempBatchCheck.Close, nil +func (repo *Repository) ObjectPool() catfile.ObjectPool { + return repo.objectPool } func (repo *Repository) Close() error { if repo == nil { return nil } - if repo.batch != nil { - repo.batch.Close() - repo.batch = nil - repo.batchInUse = false - } - if repo.check != nil { - repo.check.Close() - repo.check = nil - repo.checkInUse = false + if repo.objectPool != nil { + repo.objectPool.Close() } repo.LastCommitCache = nil repo.tagCache = nil diff --git a/modules/git/repo_branch_nogogit.go b/modules/git/repo_branch_nogogit.go index 975bce9455cf4..e5716588d9562 100644 --- a/modules/git/repo_branch_nogogit.go +++ b/modules/git/repo_branch_nogogit.go @@ -22,13 +22,7 @@ func (repo *Repository) IsObjectExist(name string) bool { return false } - objInfoPool, cancel, err := repo.CatFileBatchCheck(repo.Ctx) - if err != nil { - log.Debug("Error writing to CatFileBatchCheck %v", err) - return false - } - defer cancel() - objInfo, err := objInfoPool.ObjectInfo(name) + objInfo, err := repo.objectPool.ObjectInfo(name) if err != nil { log.Debug("Error writing to ObjectInfo %v", err) return false @@ -42,14 +36,7 @@ func (repo *Repository) IsReferenceExist(name string) bool { return false } - objInfoPool, cancel, err := repo.CatFileBatchCheck(repo.Ctx) - if err != nil { - log.Debug("Error writing to CatFileBatchCheck %v", err) - return false - } - defer cancel() - - _, err = objInfoPool.ObjectInfo(name) + _, err := repo.objectPool.ObjectInfo(name) return err == nil } diff --git a/modules/git/repo_commit_nogogit.go b/modules/git/repo_commit_nogogit.go index 572384563801d..efe19b182cc28 100644 --- a/modules/git/repo_commit_nogogit.go +++ b/modules/git/repo_commit_nogogit.go @@ -37,13 +37,7 @@ func (repo *Repository) ResolveReference(name string) (string, error) { // GetRefCommitID returns the last commit ID string of given reference (branch or tag). func (repo *Repository) GetRefCommitID(name string) (string, error) { - objInfoPool, cancel, err := repo.CatFileBatchCheck(repo.Ctx) - if err != nil { - return "", err - } - defer cancel() - - objInfo, err := objInfoPool.ObjectInfo(name) + objInfo, err := repo.objectPool.ObjectInfo(name) if err != nil { if catfile.IsErrObjectNotFound(err) { return "", ErrNotExist{name, ""} @@ -67,23 +61,14 @@ func (repo *Repository) IsCommitExist(name string) bool { } func (repo *Repository) getCommit(id ObjectID) (*Commit, error) { - objectPool, cancel, err := repo.CatFileBatch(repo.Ctx) - if err != nil { - return nil, err - } - defer cancel() - - return repo.getCommitFromBatchReader(objectPool, id) -} - -func (repo *Repository) getCommitFromBatchReader(objectPool catfile.ObjectPool, id ObjectID) (*Commit, error) { - object, rd, err := objectPool.Object(id.String()) + object, rd, err := repo.objectPool.Object(id.String()) if err != nil { if errors.Is(err, io.EOF) || catfile.IsErrObjectNotFound(err) { return nil, ErrNotExist{ID: id.String()} } return nil, err } + defer rd.Close() switch object.Type { case "missing": @@ -104,7 +89,7 @@ func (repo *Repository) getCommitFromBatchReader(objectPool catfile.ObjectPool, return nil, err } - commit, err := repo.getCommitFromBatchReader(objectPool, tag.Object) + commit, err := repo.getCommit(tag.Object) if err != nil { return nil, err } @@ -123,7 +108,7 @@ func (repo *Repository) getCommitFromBatchReader(objectPool catfile.ObjectPool, return commit, nil default: log.Debug("Unknown typ: %s", object.Type) - if err := DiscardFull(rd, object.Size+1); err != nil { + if err := catfile.DiscardFull(rd, object.Size+1); err != nil { return nil, err } return nil, ErrNotExist{ @@ -145,13 +130,7 @@ func (repo *Repository) ConvertToGitID(commitID string) (ObjectID, error) { } } - objInfoPool, cancel, err := repo.CatFileBatchCheck(repo.Ctx) - if err != nil { - return nil, err - } - defer cancel() - - objInfo, err := objInfoPool.ObjectInfo(commitID) + objInfo, err := repo.objectPool.ObjectInfo(commitID) if err != nil { if catfile.IsErrObjectNotFound(err) { return nil, ErrNotExist{commitID, ""} diff --git a/modules/git/repo_tag_nogogit.go b/modules/git/repo_tag_nogogit.go index e089850212844..bde5f50608606 100644 --- a/modules/git/repo_tag_nogogit.go +++ b/modules/git/repo_tag_nogogit.go @@ -25,12 +25,7 @@ func (repo *Repository) IsTagExist(name string) bool { // GetTagType gets the type of the tag, either commit (simple) or tag (annotated) func (repo *Repository) GetTagType(id ObjectID) (string, error) { - objInfoPool, cancel, err := repo.CatFileBatchCheck(repo.Ctx) - if err != nil { - return "", err - } - defer cancel() - objInfo, err := objInfoPool.ObjectInfo(id.String()) + objInfo, err := repo.objectPool.ObjectInfo(id.String()) if err != nil { if catfile.IsErrObjectNotFound(err) { return "", ErrNotExist{ID: id.String()} @@ -84,23 +79,17 @@ func (repo *Repository) getTag(tagID ObjectID, name string) (*Tag, error) { return tag, nil } - // The tag is an annotated tag with a message. - objectPool, cancel, err := repo.CatFileBatch(repo.Ctx) - if err != nil { - return nil, err - } - defer cancel() - - object, rd, err := objectPool.Object(tagID.String()) + object, rd, err := repo.objectPool.Object(tagID.String()) if err != nil { if errors.Is(err, io.EOF) || catfile.IsErrObjectNotFound(err) { return nil, ErrNotExist{ID: tagID.String()} } return nil, err } + defer rd.Close() if object.Type != "tag" { - if err := DiscardFull(rd, object.Size+1); err != nil { + if err := catfile.DiscardFull(rd, object.Size+1); err != nil { return nil, err } return nil, ErrNotExist{ID: tagID.String()} diff --git a/modules/git/repo_tree_nogogit.go b/modules/git/repo_tree_nogogit.go index 13981d532d37e..2fd2a2cd44727 100644 --- a/modules/git/repo_tree_nogogit.go +++ b/modules/git/repo_tree_nogogit.go @@ -12,13 +12,7 @@ import ( ) func (repo *Repository) getTree(id ObjectID) (*Tree, error) { - objectPool, cancel, err := repo.CatFileBatch(repo.Ctx) - if err != nil { - return nil, err - } - defer cancel() - - object, rd, err := objectPool.Object(id.String()) + object, rd, err := repo.objectPool.Object(id.String()) if err != nil { if catfile.IsErrObjectNotFound(err) { return nil, ErrNotExist{ @@ -27,6 +21,7 @@ func (repo *Repository) getTree(id ObjectID) (*Tree, error) { } return nil, err } + defer rd.Close() switch object.Type { case "tag": @@ -39,8 +34,9 @@ func (repo *Repository) getTree(id ObjectID) (*Tree, error) { if err != nil { return nil, err } + rd.Close() // close reader to avoid leaks - commit, err := repo.getCommitFromBatchReader(objectPool, tag.Object) + commit, err := repo.getCommit(tag.Object) if err != nil { return nil, err } @@ -70,7 +66,7 @@ func (repo *Repository) getTree(id ObjectID) (*Tree, error) { tree.entriesParsed = true return tree, nil default: - if err := DiscardFull(rd, object.Size+1); err != nil { + if err := catfile.DiscardFull(rd, object.Size+1); err != nil { return nil, err } return nil, ErrNotExist{ diff --git a/modules/git/tree_entry_nogogit.go b/modules/git/tree_entry_nogogit.go index 73b345868d17b..970b3664415ba 100644 --- a/modules/git/tree_entry_nogogit.go +++ b/modules/git/tree_entry_nogogit.go @@ -15,13 +15,7 @@ func (te *TreeEntry) Size() int64 { return te.size } - objInfoPool, cancel, err := te.ptree.repo.CatFileBatchCheck(te.ptree.repo.Ctx) - if err != nil { - log.Debug("error whilst reading size for %s in %s. Error: %v", te.ID.String(), te.ptree.repo.Path, err) - return 0 - } - defer cancel() - objInfo, err := objInfoPool.ObjectInfo(te.ID.String()) + objInfo, err := te.ptree.repo.objectPool.ObjectInfo(te.ID.String()) if err != nil { log.Debug("error whilst reading size for %s in %s. Error: %v", te.ID.String(), te.ptree.repo.Path, err) return 0 diff --git a/modules/git/tree_nogogit.go b/modules/git/tree_nogogit.go index f975b23153de6..935ceec0caf76 100644 --- a/modules/git/tree_nogogit.go +++ b/modules/git/tree_nogogit.go @@ -28,13 +28,7 @@ func (t *Tree) ListEntries() (Entries, error) { } if t.repo != nil { - objectPool, cancel, err := t.repo.CatFileBatch(t.repo.Ctx) - if err != nil { - return nil, err - } - defer cancel() - - object, rd, err := objectPool.Object(t.ID.String()) + object, rd, err := t.repo.objectPool.Object(t.ID.String()) if err != nil { if catfile.IsErrObjectNotFound(err) { return nil, ErrNotExist{ @@ -43,13 +37,15 @@ func (t *Tree) ListEntries() (Entries, error) { } return nil, err } + defer rd.Close() if object.Type == "commit" { treeID, err := catfile.ReadTreeID(rd, object.Size) if err != nil && err != io.EOF { return nil, err } - object, rd, err = objectPool.Object(treeID) + rd.Close() + object, rd, err = t.repo.objectPool.Object(treeID) if err != nil { if catfile.IsErrObjectNotFound(err) { return nil, ErrNotExist{ @@ -58,6 +54,7 @@ func (t *Tree) ListEntries() (Entries, error) { } return nil, err } + defer rd.Close() } if object.Type == "tree" { t.entries, err = catBatchParseTreeEntries(t.ID.Type(), t, rd, object.Size) @@ -69,7 +66,7 @@ func (t *Tree) ListEntries() (Entries, error) { } // Not a tree just use ls-tree instead - if err := DiscardFull(rd, object.Size+1); err != nil { + if err := catfile.DiscardFull(rd, object.Size+1); err != nil { return nil, err } } diff --git a/modules/gitrepo/cat_file.go b/modules/gitrepo/cat_file.go index d02421d9b780a..91e78b8021cb1 100644 --- a/modules/gitrepo/cat_file.go +++ b/modules/gitrepo/cat_file.go @@ -10,5 +10,5 @@ import ( ) func NewObjectPool(ctx context.Context, repo Repository) (catfile.ObjectPool, error) { - return catfile.NewObjectPool(ctx, repoPath(repo)) + return catfile.NewBatchObjectPool(ctx, repoPath(repo)) } From 788d78c94a5b3fad4c60bcf651a50a16170001b0 Mon Sep 17 00:00:00 2001 From: Lunny Xiao Date: Tue, 6 Jan 2026 12:25:15 -0800 Subject: [PATCH 05/11] Fix reader close --- modules/git/blob_nogogit.go | 1 + modules/git/catfile/object_pool_batch.go | 32 +++++++++---------- .../languagestats/language_stats_nogogit.go | 28 +--------------- modules/git/pipeline/lfs_nogogit.go | 2 +- modules/git/repo_commit_nogogit.go | 8 ++++- modules/git/repo_tree_nogogit.go | 6 +++- modules/git/tree_nogogit.go | 3 +- modules/indexer/code/bleve/bleve.go | 1 + .../code/elasticsearch/elasticsearch.go | 2 ++ 9 files changed, 35 insertions(+), 48 deletions(-) diff --git a/modules/git/blob_nogogit.go b/modules/git/blob_nogogit.go index 0b6141efcf9f7..fc345d227dc66 100644 --- a/modules/git/blob_nogogit.go +++ b/modules/git/blob_nogogit.go @@ -38,6 +38,7 @@ func (b *Blob) DataAsync() (io.ReadCloser, error) { b.size = object.Size if b.size < 4096 { + defer rd.Close() bs, err := io.ReadAll(io.LimitReader(rd, b.size)) if err != nil { return nil, err diff --git a/modules/git/catfile/object_pool_batch.go b/modules/git/catfile/object_pool_batch.go index 345f7c34226be..b7b9821d73772 100644 --- a/modules/git/catfile/object_pool_batch.go +++ b/modules/git/catfile/object_pool_batch.go @@ -55,8 +55,8 @@ func (b *batchObjectPool) getBatch() *batch { return batch } } - if len(b.batches) > 1 { - log.Debug("Opening temporary cat file batch for: %s", b.repoPath) + if len(b.batches) >= 1 { + log.Warn("Opening temporary cat file batch for: %s", b.repoPath) } newBatch := b.newBatch() newBatch.inUse = true @@ -64,6 +64,18 @@ func (b *batchObjectPool) getBatch() *batch { return newBatch } +func (b *batchObjectPool) newBatch() *batch { + var batch batch + batch.writer, batch.reader, batch.cancel = catFileBatch(b.ctx, b.repoPath) + return &batch +} + +func (b *batchObjectPool) closeBatch(batch *batch) { + if batch != nil { + batch.inUse = false + } +} + func (b *batchObjectPool) getBatchCheck() *batch { for _, batch := range b.batchChecks { if !batch.inUse { @@ -71,8 +83,8 @@ func (b *batchObjectPool) getBatchCheck() *batch { return batch } } - if len(b.batchChecks) > 1 { - log.Debug("Opening temporary cat file batch-check for: %s", b.repoPath) + if len(b.batchChecks) >= 1 { + log.Warn("Opening temporary cat file batch-check for: %s", b.repoPath) } newBatch := b.newBatchCheck() newBatch.inUse = true @@ -80,12 +92,6 @@ func (b *batchObjectPool) getBatchCheck() *batch { return newBatch } -func (b *batchObjectPool) newBatch() *batch { - var batch batch - batch.writer, batch.reader, batch.cancel = catFileBatch(b.ctx, b.repoPath) - return &batch -} - func (b *batchObjectPool) newBatchCheck() *batch { var check batch check.writer, check.reader, check.cancel = catFileBatchCheck(b.ctx, b.repoPath) @@ -98,12 +104,6 @@ func (b *batchObjectPool) closeBatchCheck(batch *batch) { } } -func (b *batchObjectPool) closeBatch(batch *batch) { - if batch != nil { - batch.inUse = false - } -} - func (b *batchObjectPool) ObjectInfo(refName string) (*ObjectInfo, error) { batch := b.getBatchCheck() defer b.closeBatchCheck(batch) diff --git a/modules/git/languagestats/language_stats_nogogit.go b/modules/git/languagestats/language_stats_nogogit.go index 6e919c7eed5a5..d4fe84b342380 100644 --- a/modules/git/languagestats/language_stats_nogogit.go +++ b/modules/git/languagestats/language_stats_nogogit.go @@ -21,36 +21,11 @@ import ( // GetLanguageStats calculates language stats for git repository at specified commit func GetLanguageStats(repo *git.Repository, commitID string) (map[string]int64, error) { - // We will feed the commit IDs in order into cat-file --batch, followed by blobs as necessary. - // so let's create a batch stdin and stdout - object, batchReader, err := repo.ObjectPool().Object(commitID) - if err != nil { - if catfile.IsErrObjectNotFound(err) { - return nil, git.ErrNotExist{ID: commitID} - } - return nil, err - } - defer batchReader.Close() - - if object.Type != "commit" { - log.Debug("Unable to get commit for: %s. Err: %v", commitID, err) - return nil, git.ErrNotExist{ID: commitID} - } - - sha, err := git.NewIDFromString(object.ID) - if err != nil { - log.Debug("Unable to get commit for: %s. Err: %v", commitID, err) - return nil, git.ErrNotExist{ID: commitID} - } - - commit, err := git.CommitFromReader(repo, sha, io.LimitReader(batchReader, object.Size)) + commit, err := repo.GetCommit(commitID) if err != nil { log.Debug("Unable to get commit for: %s. Err: %v", commitID, err) return nil, err } - if _, err = batchReader.Discard(1); err != nil { - return nil, err - } tree := commit.Tree @@ -137,7 +112,6 @@ func GetLanguageStats(repo *git.Repository, commitID string) (map[string]int64, } // If content can not be read or file is too big just do detection by filename - if f.Size() <= bigFileSize { object, batchReader, err := repo.ObjectPool().Object(f.ID.String()) if err != nil { diff --git a/modules/git/pipeline/lfs_nogogit.go b/modules/git/pipeline/lfs_nogogit.go index 8c21ca1bf2c5a..7b8f4093a4605 100644 --- a/modules/git/pipeline/lfs_nogogit.go +++ b/modules/git/pipeline/lfs_nogogit.go @@ -83,7 +83,6 @@ func FindLFSFile(repo *git.Repository, objectID git.ObjectID) ([]*LFSResult, err return nil, err } commitID = id - continue case "commit": // Read in the commit to get its tree and in case this is one of the last used commits curCommit, err = git.CommitFromReader(repo, git.MustIDFromString(commitID), io.LimitReader(batchReader, object.Size)) @@ -133,6 +132,7 @@ func FindLFSFile(repo *git.Repository, objectID git.ObjectID) ([]*LFSResult, err trees = trees[:len(trees)-1] paths = paths[:len(paths)-1] } else { + batchReader.Close() break commitReadingLoop } default: diff --git a/modules/git/repo_commit_nogogit.go b/modules/git/repo_commit_nogogit.go index efe19b182cc28..01a9958f9ab91 100644 --- a/modules/git/repo_commit_nogogit.go +++ b/modules/git/repo_commit_nogogit.go @@ -68,22 +68,26 @@ func (repo *Repository) getCommit(id ObjectID) (*Commit, error) { } return nil, err } - defer rd.Close() switch object.Type { case "missing": + rd.Close() return nil, ErrNotExist{ID: id.String()} case "tag": // then we need to parse the tag // and load the commit data, err := io.ReadAll(io.LimitReader(rd, object.Size)) if err != nil { + rd.Close() return nil, err } _, err = rd.Discard(1) if err != nil { + rd.Close() return nil, err } + rd.Close() + tag, err := parseTagData(id.Type(), data) if err != nil { return nil, err @@ -96,6 +100,7 @@ func (repo *Repository) getCommit(id ObjectID) (*Commit, error) { return commit, nil case "commit": + defer rd.Close() commit, err := CommitFromReader(repo, id, io.LimitReader(rd, object.Size)) if err != nil { return nil, err @@ -107,6 +112,7 @@ func (repo *Repository) getCommit(id ObjectID) (*Commit, error) { return commit, nil default: + defer rd.Close() log.Debug("Unknown typ: %s", object.Type) if err := catfile.DiscardFull(rd, object.Size+1); err != nil { return nil, err diff --git a/modules/git/repo_tree_nogogit.go b/modules/git/repo_tree_nogogit.go index 2fd2a2cd44727..547741ad739f2 100644 --- a/modules/git/repo_tree_nogogit.go +++ b/modules/git/repo_tree_nogogit.go @@ -21,17 +21,18 @@ func (repo *Repository) getTree(id ObjectID) (*Tree, error) { } return nil, err } - defer rd.Close() switch object.Type { case "tag": resolvedID := id data, err := io.ReadAll(io.LimitReader(rd, object.Size)) if err != nil { + rd.Close() return nil, err } tag, err := parseTagData(id.Type(), data) if err != nil { + rd.Close() return nil, err } rd.Close() // close reader to avoid leaks @@ -43,6 +44,7 @@ func (repo *Repository) getTree(id ObjectID) (*Tree, error) { commit.Tree.ResolvedID = resolvedID return &commit.Tree, nil case "commit": + defer rd.Close() commit, err := CommitFromReader(repo, id, io.LimitReader(rd, object.Size)) if err != nil { return nil, err @@ -53,6 +55,7 @@ func (repo *Repository) getTree(id ObjectID) (*Tree, error) { commit.Tree.ResolvedID = commit.ID return &commit.Tree, nil case "tree": + defer rd.Close() tree := NewTree(repo, id) tree.ResolvedID = id objectFormat, err := repo.GetObjectFormat() @@ -66,6 +69,7 @@ func (repo *Repository) getTree(id ObjectID) (*Tree, error) { tree.entriesParsed = true return tree, nil default: + defer rd.Close() if err := catfile.DiscardFull(rd, object.Size+1); err != nil { return nil, err } diff --git a/modules/git/tree_nogogit.go b/modules/git/tree_nogogit.go index 935ceec0caf76..3e2a38fb2aa53 100644 --- a/modules/git/tree_nogogit.go +++ b/modules/git/tree_nogogit.go @@ -37,14 +37,13 @@ func (t *Tree) ListEntries() (Entries, error) { } return nil, err } - defer rd.Close() if object.Type == "commit" { treeID, err := catfile.ReadTreeID(rd, object.Size) + rd.Close() if err != nil && err != io.EOF { return nil, err } - rd.Close() object, rd, err = t.repo.objectPool.Object(treeID) if err != nil { if catfile.IsErrObjectNotFound(err) { diff --git a/modules/indexer/code/bleve/bleve.go b/modules/indexer/code/bleve/bleve.go index 4b1278774f22f..b760d5c33a34b 100644 --- a/modules/indexer/code/bleve/bleve.go +++ b/modules/indexer/code/bleve/bleve.go @@ -184,6 +184,7 @@ func (b *Indexer) addUpdate(ctx context.Context, objectPool catfile.ObjectPool, } return err } + defer batchReader.Close() size = object.Size diff --git a/modules/indexer/code/elasticsearch/elasticsearch.go b/modules/indexer/code/elasticsearch/elasticsearch.go index abb5495655d41..eeeacf9417f14 100644 --- a/modules/indexer/code/elasticsearch/elasticsearch.go +++ b/modules/indexer/code/elasticsearch/elasticsearch.go @@ -169,6 +169,8 @@ func (b *Indexer) addUpdate(ctx context.Context, objectPool catfile.ObjectPool, } return nil, err } + defer batchReader.Close() + size = object.Size fileContents, err := io.ReadAll(io.LimitReader(batchReader, size)) From 44f1aa380cc2ada8130f94bdb18295eab80e6416 Mon Sep 17 00:00:00 2001 From: Lunny Xiao Date: Tue, 6 Jan 2026 14:24:56 -0800 Subject: [PATCH 06/11] Fix test and some renames --- modules/git/blob_nogogit.go | 12 +++---- .../languagestats/language_stats_nogogit.go | 14 ++++---- modules/git/pipeline/lfs_nogogit.go | 34 +++++++++---------- modules/git/repo_commit_nogogit.go | 28 +++++++-------- modules/git/repo_tag_nogogit.go | 12 +++---- modules/git/repo_tree_nogogit.go | 26 +++++++------- modules/git/tree_entry.go | 6 ++-- modules/git/tree_nogogit.go | 18 +++++----- modules/indexer/code/bleve/bleve.go | 10 +++--- .../code/elasticsearch/elasticsearch.go | 8 ++--- routers/api/v1/repo/hook_test.go | 3 +- routers/api/v1/utils/hook_test.go | 12 ++++--- routers/web/repo/view_readme.go | 6 ++-- services/contexttest/context_tests.go | 10 ++++-- services/repository/files/content_test.go | 3 +- services/repository/files/diff_test.go | 6 ++-- services/repository/files/tree.go | 12 +++++-- services/repository/files/tree_test.go | 6 ++-- tests/integration/repofiles_change_test.go | 21 ++++++++---- 19 files changed, 139 insertions(+), 108 deletions(-) diff --git a/modules/git/blob_nogogit.go b/modules/git/blob_nogogit.go index fc345d227dc66..c90b6623dfd7c 100644 --- a/modules/git/blob_nogogit.go +++ b/modules/git/blob_nogogit.go @@ -26,7 +26,7 @@ type Blob struct { // DataAsync gets a ReadCloser for the contents of a blob without reading it all. // Calling the Close function on the result will discard all unread output. func (b *Blob) DataAsync() (io.ReadCloser, error) { - object, rd, err := b.repo.objectPool.Object(b.ID.String()) + objectInfo, contentReader, err := b.repo.objectPool.Object(b.ID.String()) if err != nil { if catfile.IsErrObjectNotFound(err) { return nil, ErrNotExist{ID: b.ID.String()} @@ -35,20 +35,20 @@ func (b *Blob) DataAsync() (io.ReadCloser, error) { } b.gotSize = true - b.size = object.Size + b.size = objectInfo.Size if b.size < 4096 { - defer rd.Close() - bs, err := io.ReadAll(io.LimitReader(rd, b.size)) + defer contentReader.Close() + bs, err := io.ReadAll(io.LimitReader(contentReader, b.size)) if err != nil { return nil, err } - _, err = rd.Discard(1) + _, err = contentReader.Discard(1) return io.NopCloser(bytes.NewReader(bs)), err } return &blobReader{ - rd: rd, + rd: contentReader, n: b.size, }, nil } diff --git a/modules/git/languagestats/language_stats_nogogit.go b/modules/git/languagestats/language_stats_nogogit.go index d4fe84b342380..e7cd0dee4939b 100644 --- a/modules/git/languagestats/language_stats_nogogit.go +++ b/modules/git/languagestats/language_stats_nogogit.go @@ -113,7 +113,7 @@ func GetLanguageStats(repo *git.Repository, commitID string) (map[string]int64, // If content can not be read or file is too big just do detection by filename if f.Size() <= bigFileSize { - object, batchReader, err := repo.ObjectPool().Object(f.ID.String()) + objectInfo, contentReader, err := repo.ObjectPool().Object(f.ID.String()) if err != nil { if catfile.IsErrObjectNotFound(err) { return nil, git.ErrNotExist{ID: f.ID.String()} @@ -121,21 +121,21 @@ func GetLanguageStats(repo *git.Repository, commitID string) (map[string]int64, log.Debug("Error reading blob: %s Err: %v", f.ID.String(), err) return nil, err } - defer batchReader.Close() + defer contentReader.Close() - sizeToRead := object.Size + sizeToRead := objectInfo.Size discard := int64(1) - if object.Size > fileSizeLimit { + if objectInfo.Size > fileSizeLimit { sizeToRead = fileSizeLimit - discard = object.Size - fileSizeLimit + 1 + discard = objectInfo.Size - fileSizeLimit + 1 } - _, err = contentBuf.ReadFrom(io.LimitReader(batchReader, sizeToRead)) + _, err = contentBuf.ReadFrom(io.LimitReader(contentReader, sizeToRead)) if err != nil { return nil, err } content = contentBuf.Bytes() - if err := catfile.DiscardFull(batchReader, discard); err != nil { + if err := catfile.DiscardFull(contentReader, discard); err != nil { return nil, err } } diff --git a/modules/git/pipeline/lfs_nogogit.go b/modules/git/pipeline/lfs_nogogit.go index 7b8f4093a4605..a976a14ad4800 100644 --- a/modules/git/pipeline/lfs_nogogit.go +++ b/modules/git/pipeline/lfs_nogogit.go @@ -66,7 +66,7 @@ func FindLFSFile(repo *git.Repository, objectID git.ObjectID) ([]*LFSResult, err commitReadingLoop: for { - object, batchReader, err := repo.ObjectPool().Object(commitID) + objectInfo, contentReader, err := repo.ObjectPool().Object(commitID) if err != nil { if catfile.IsErrObjectNotFound(err) { return nil, git.ErrNotExist{ID: commitID} @@ -74,24 +74,24 @@ func FindLFSFile(repo *git.Repository, objectID git.ObjectID) ([]*LFSResult, err return nil, err } - switch object.Type { + switch objectInfo.Type { case "tag": // This shouldn't happen but if it does well just get the commit and try again - id, err := catfile.ReadTagObjectID(batchReader, object.Size) + id, err := catfile.ReadTagObjectID(contentReader, objectInfo.Size) if err != nil { - batchReader.Close() + contentReader.Close() return nil, err } commitID = id case "commit": // Read in the commit to get its tree and in case this is one of the last used commits - curCommit, err = git.CommitFromReader(repo, git.MustIDFromString(commitID), io.LimitReader(batchReader, object.Size)) + curCommit, err = git.CommitFromReader(repo, git.MustIDFromString(commitID), io.LimitReader(contentReader, objectInfo.Size)) if err != nil { - batchReader.Close() + contentReader.Close() return nil, err } - if _, err := batchReader.Discard(1); err != nil { - batchReader.Close() + if _, err := contentReader.Discard(1); err != nil { + contentReader.Close() return nil, err } @@ -99,10 +99,10 @@ func FindLFSFile(repo *git.Repository, objectID git.ObjectID) ([]*LFSResult, err curPath = "" case "tree": var n int64 - for n < object.Size { - mode, fname, binObjectID, count, err := git.ParseCatFileTreeLine(objectID.Type(), batchReader, modeBuf, fnameBuf, workingShaBuf) + for n < objectInfo.Size { + mode, fname, binObjectID, count, err := git.ParseCatFileTreeLine(objectID.Type(), contentReader, modeBuf, fnameBuf, workingShaBuf) if err != nil { - batchReader.Close() + contentReader.Close() return nil, err } n += int64(count) @@ -122,8 +122,8 @@ func FindLFSFile(repo *git.Repository, objectID git.ObjectID) ([]*LFSResult, err paths = append(paths, curPath+string(fname)+"/") } } - if _, err := batchReader.Discard(1); err != nil { - batchReader.Close() + if _, err := contentReader.Discard(1); err != nil { + contentReader.Close() return nil, err } if len(trees) > 0 { @@ -132,16 +132,16 @@ func FindLFSFile(repo *git.Repository, objectID git.ObjectID) ([]*LFSResult, err trees = trees[:len(trees)-1] paths = paths[:len(paths)-1] } else { - batchReader.Close() + contentReader.Close() break commitReadingLoop } default: - if err := catfile.DiscardFull(batchReader, object.Size+1); err != nil { - batchReader.Close() + if err := catfile.DiscardFull(contentReader, objectInfo.Size+1); err != nil { + contentReader.Close() return nil, err } } - batchReader.Close() + contentReader.Close() } } diff --git a/modules/git/repo_commit_nogogit.go b/modules/git/repo_commit_nogogit.go index 01a9958f9ab91..0ab0b4a036277 100644 --- a/modules/git/repo_commit_nogogit.go +++ b/modules/git/repo_commit_nogogit.go @@ -61,7 +61,7 @@ func (repo *Repository) IsCommitExist(name string) bool { } func (repo *Repository) getCommit(id ObjectID) (*Commit, error) { - object, rd, err := repo.objectPool.Object(id.String()) + objectInfo, contentReader, err := repo.objectPool.Object(id.String()) if err != nil { if errors.Is(err, io.EOF) || catfile.IsErrObjectNotFound(err) { return nil, ErrNotExist{ID: id.String()} @@ -69,24 +69,24 @@ func (repo *Repository) getCommit(id ObjectID) (*Commit, error) { return nil, err } - switch object.Type { + switch objectInfo.Type { case "missing": - rd.Close() + contentReader.Close() return nil, ErrNotExist{ID: id.String()} case "tag": // then we need to parse the tag // and load the commit - data, err := io.ReadAll(io.LimitReader(rd, object.Size)) + data, err := io.ReadAll(io.LimitReader(contentReader, objectInfo.Size)) if err != nil { - rd.Close() + contentReader.Close() return nil, err } - _, err = rd.Discard(1) + _, err = contentReader.Discard(1) if err != nil { - rd.Close() + contentReader.Close() return nil, err } - rd.Close() + contentReader.Close() tag, err := parseTagData(id.Type(), data) if err != nil { @@ -100,21 +100,21 @@ func (repo *Repository) getCommit(id ObjectID) (*Commit, error) { return commit, nil case "commit": - defer rd.Close() - commit, err := CommitFromReader(repo, id, io.LimitReader(rd, object.Size)) + defer contentReader.Close() + commit, err := CommitFromReader(repo, id, io.LimitReader(contentReader, objectInfo.Size)) if err != nil { return nil, err } - _, err = rd.Discard(1) + _, err = contentReader.Discard(1) if err != nil { return nil, err } return commit, nil default: - defer rd.Close() - log.Debug("Unknown typ: %s", object.Type) - if err := catfile.DiscardFull(rd, object.Size+1); err != nil { + defer contentReader.Close() + log.Debug("Unknown typ: %s", objectInfo.Type) + if err := catfile.DiscardFull(contentReader, objectInfo.Size+1); err != nil { return nil, err } return nil, ErrNotExist{ diff --git a/modules/git/repo_tag_nogogit.go b/modules/git/repo_tag_nogogit.go index bde5f50608606..114f2854a7015 100644 --- a/modules/git/repo_tag_nogogit.go +++ b/modules/git/repo_tag_nogogit.go @@ -79,17 +79,17 @@ func (repo *Repository) getTag(tagID ObjectID, name string) (*Tag, error) { return tag, nil } - object, rd, err := repo.objectPool.Object(tagID.String()) + objectInfo, contentReader, err := repo.objectPool.Object(tagID.String()) if err != nil { if errors.Is(err, io.EOF) || catfile.IsErrObjectNotFound(err) { return nil, ErrNotExist{ID: tagID.String()} } return nil, err } - defer rd.Close() + defer contentReader.Close() - if object.Type != "tag" { - if err := catfile.DiscardFull(rd, object.Size+1); err != nil { + if objectInfo.Type != "tag" { + if err := catfile.DiscardFull(contentReader, objectInfo.Size+1); err != nil { return nil, err } return nil, ErrNotExist{ID: tagID.String()} @@ -97,11 +97,11 @@ func (repo *Repository) getTag(tagID ObjectID, name string) (*Tag, error) { // then we need to parse the tag // and load the commit - data, err := io.ReadAll(io.LimitReader(rd, object.Size)) + data, err := io.ReadAll(io.LimitReader(contentReader, objectInfo.Size)) if err != nil { return nil, err } - _, err = rd.Discard(1) + _, err = contentReader.Discard(1) if err != nil { return nil, err } diff --git a/modules/git/repo_tree_nogogit.go b/modules/git/repo_tree_nogogit.go index 547741ad739f2..5954eef1d0c4d 100644 --- a/modules/git/repo_tree_nogogit.go +++ b/modules/git/repo_tree_nogogit.go @@ -12,7 +12,7 @@ import ( ) func (repo *Repository) getTree(id ObjectID) (*Tree, error) { - object, rd, err := repo.objectPool.Object(id.String()) + objectInfo, contentReader, err := repo.objectPool.Object(id.String()) if err != nil { if catfile.IsErrObjectNotFound(err) { return nil, ErrNotExist{ @@ -22,20 +22,20 @@ func (repo *Repository) getTree(id ObjectID) (*Tree, error) { return nil, err } - switch object.Type { + switch objectInfo.Type { case "tag": resolvedID := id - data, err := io.ReadAll(io.LimitReader(rd, object.Size)) + data, err := io.ReadAll(io.LimitReader(contentReader, objectInfo.Size)) if err != nil { - rd.Close() + contentReader.Close() return nil, err } tag, err := parseTagData(id.Type(), data) if err != nil { - rd.Close() + contentReader.Close() return nil, err } - rd.Close() // close reader to avoid leaks + contentReader.Close() // close reader to avoid leaks commit, err := repo.getCommit(tag.Object) if err != nil { @@ -44,33 +44,33 @@ func (repo *Repository) getTree(id ObjectID) (*Tree, error) { commit.Tree.ResolvedID = resolvedID return &commit.Tree, nil case "commit": - defer rd.Close() - commit, err := CommitFromReader(repo, id, io.LimitReader(rd, object.Size)) + defer contentReader.Close() + commit, err := CommitFromReader(repo, id, io.LimitReader(contentReader, objectInfo.Size)) if err != nil { return nil, err } - if _, err := rd.Discard(1); err != nil { + if _, err := contentReader.Discard(1); err != nil { return nil, err } commit.Tree.ResolvedID = commit.ID return &commit.Tree, nil case "tree": - defer rd.Close() + defer contentReader.Close() tree := NewTree(repo, id) tree.ResolvedID = id objectFormat, err := repo.GetObjectFormat() if err != nil { return nil, err } - tree.entries, err = catBatchParseTreeEntries(objectFormat, tree, rd, object.Size) + tree.entries, err = catBatchParseTreeEntries(objectFormat, tree, contentReader, objectInfo.Size) if err != nil { return nil, err } tree.entriesParsed = true return tree, nil default: - defer rd.Close() - if err := catfile.DiscardFull(rd, object.Size+1); err != nil { + defer contentReader.Close() + if err := catfile.DiscardFull(contentReader, objectInfo.Size+1); err != nil { return nil, err } return nil, ErrNotExist{ diff --git a/modules/git/tree_entry.go b/modules/git/tree_entry.go index e7e4ea2d5bdb2..b50a0f0d52727 100644 --- a/modules/git/tree_entry.go +++ b/modules/git/tree_entry.go @@ -126,13 +126,13 @@ func EntryFollowLinks(commit *Commit, firstFullPath string, firstTreeEntry *Tree } // returns the Tree pointed to by this TreeEntry, or nil if this is not a tree -func (te *TreeEntry) Tree() *Tree { +func (te *TreeEntry) Tree() (*Tree, error) { t, err := te.ptree.repo.getTree(te.ID) if err != nil { - return nil + return nil, err } t.ptree = te.ptree - return t + return t, nil } // GetSubJumpablePathName return the full path of subdirectory jumpable ( contains only one directory ) diff --git a/modules/git/tree_nogogit.go b/modules/git/tree_nogogit.go index 3e2a38fb2aa53..6338f0456335b 100644 --- a/modules/git/tree_nogogit.go +++ b/modules/git/tree_nogogit.go @@ -28,7 +28,7 @@ func (t *Tree) ListEntries() (Entries, error) { } if t.repo != nil { - object, rd, err := t.repo.objectPool.Object(t.ID.String()) + objectInfo, contentReader, err := t.repo.objectPool.Object(t.ID.String()) if err != nil { if catfile.IsErrObjectNotFound(err) { return nil, ErrNotExist{ @@ -38,13 +38,13 @@ func (t *Tree) ListEntries() (Entries, error) { return nil, err } - if object.Type == "commit" { - treeID, err := catfile.ReadTreeID(rd, object.Size) - rd.Close() + if objectInfo.Type == "commit" { + treeID, err := catfile.ReadTreeID(contentReader, objectInfo.Size) + contentReader.Close() if err != nil && err != io.EOF { return nil, err } - object, rd, err = t.repo.objectPool.Object(treeID) + objectInfo, contentReader, err = t.repo.objectPool.Object(treeID) if err != nil { if catfile.IsErrObjectNotFound(err) { return nil, ErrNotExist{ @@ -53,10 +53,10 @@ func (t *Tree) ListEntries() (Entries, error) { } return nil, err } - defer rd.Close() + defer contentReader.Close() } - if object.Type == "tree" { - t.entries, err = catBatchParseTreeEntries(t.ID.Type(), t, rd, object.Size) + if objectInfo.Type == "tree" { + t.entries, err = catBatchParseTreeEntries(t.ID.Type(), t, contentReader, objectInfo.Size) if err != nil { return nil, err } @@ -65,7 +65,7 @@ func (t *Tree) ListEntries() (Entries, error) { } // Not a tree just use ls-tree instead - if err := catfile.DiscardFull(rd, object.Size+1); err != nil { + if err := catfile.DiscardFull(contentReader, objectInfo.Size+1); err != nil { return nil, err } } diff --git a/modules/indexer/code/bleve/bleve.go b/modules/indexer/code/bleve/bleve.go index b760d5c33a34b..a448ca15921e7 100644 --- a/modules/indexer/code/bleve/bleve.go +++ b/modules/indexer/code/bleve/bleve.go @@ -177,18 +177,18 @@ func (b *Indexer) addUpdate(ctx context.Context, objectPool catfile.ObjectPool, return b.addDelete(update.Filename, repo, batch) } - object, batchReader, err := objectPool.Object(update.BlobSha) + objectInfo, contentReader, err := objectPool.Object(update.BlobSha) if err != nil { if catfile.IsErrObjectNotFound(err) { return git.ErrNotExist{ID: update.BlobSha} } return err } - defer batchReader.Close() + defer contentReader.Close() - size = object.Size + size = objectInfo.Size - fileContents, err := io.ReadAll(io.LimitReader(batchReader, size)) + fileContents, err := io.ReadAll(io.LimitReader(contentReader, size)) if err != nil { return err } else if !typesniffer.DetectContentType(fileContents).IsText() { @@ -197,7 +197,7 @@ func (b *Indexer) addUpdate(ctx context.Context, objectPool catfile.ObjectPool, fileContents = nil } - if _, err = batchReader.Discard(1); err != nil { + if _, err = contentReader.Discard(1); err != nil { return err } id := internal.FilenameIndexerID(repo.ID, update.Filename) diff --git a/modules/indexer/code/elasticsearch/elasticsearch.go b/modules/indexer/code/elasticsearch/elasticsearch.go index eeeacf9417f14..c52eb1f5331f5 100644 --- a/modules/indexer/code/elasticsearch/elasticsearch.go +++ b/modules/indexer/code/elasticsearch/elasticsearch.go @@ -162,18 +162,18 @@ func (b *Indexer) addUpdate(ctx context.Context, objectPool catfile.ObjectPool, return []elastic.BulkableRequest{b.addDelete(update.Filename, repo)}, nil } - object, batchReader, err := objectPool.Object(update.BlobSha) + object, contentReader, err := objectPool.Object(update.BlobSha) if err != nil { if catfile.IsErrObjectNotFound(err) { return nil, git.ErrNotExist{ID: update.BlobSha} } return nil, err } - defer batchReader.Close() + defer contentReader.Close() size = object.Size - fileContents, err := io.ReadAll(io.LimitReader(batchReader, size)) + fileContents, err := io.ReadAll(io.LimitReader(contentReader, size)) if err != nil { return nil, err } else if !typesniffer.DetectContentType(fileContents).IsText() { @@ -181,7 +181,7 @@ func (b *Indexer) addUpdate(ctx context.Context, objectPool catfile.ObjectPool, return nil, nil } - if _, err = batchReader.Discard(1); err != nil { + if _, err = contentReader.Discard(1); err != nil { return nil, err } id := internal.FilenameIndexerID(repo.ID, update.Filename) diff --git a/routers/api/v1/repo/hook_test.go b/routers/api/v1/repo/hook_test.go index f8d61ccf00067..451a57effeeb1 100644 --- a/routers/api/v1/repo/hook_test.go +++ b/routers/api/v1/repo/hook_test.go @@ -20,7 +20,8 @@ func TestTestHook(t *testing.T) { ctx, _ := contexttest.MockAPIContext(t, "user2/repo1/wiki/_pages") ctx.SetPathParam("id", "1") contexttest.LoadRepo(t, ctx, 1) - contexttest.LoadRepoCommit(t, ctx) + cancel := contexttest.LoadRepoCommit(t, ctx) + defer cancel() contexttest.LoadUser(t, ctx, 2) TestHook(ctx) assert.Equal(t, http.StatusNoContent, ctx.Resp.WrittenStatus()) diff --git a/routers/api/v1/utils/hook_test.go b/routers/api/v1/utils/hook_test.go index e5e8ce07cecfe..99eaa90c5c5e9 100644 --- a/routers/api/v1/utils/hook_test.go +++ b/routers/api/v1/utils/hook_test.go @@ -20,7 +20,8 @@ func TestTestHookValidation(t *testing.T) { t.Run("Test Validation", func(t *testing.T) { ctx, _ := contexttest.MockAPIContext(t, "user2/repo1/hooks") contexttest.LoadRepo(t, ctx, 1) - contexttest.LoadRepoCommit(t, ctx) + cancel := contexttest.LoadRepoCommit(t, ctx) + defer cancel() contexttest.LoadUser(t, ctx, 2) checkCreateHookOption(ctx, &structs.CreateHookOption{ @@ -36,7 +37,8 @@ func TestTestHookValidation(t *testing.T) { t.Run("Test Validation with invalid URL", func(t *testing.T) { ctx, _ := contexttest.MockAPIContext(t, "user2/repo1/hooks") contexttest.LoadRepo(t, ctx, 1) - contexttest.LoadRepoCommit(t, ctx) + cancel := contexttest.LoadRepoCommit(t, ctx) + defer cancel() contexttest.LoadUser(t, ctx, 2) checkCreateHookOption(ctx, &structs.CreateHookOption{ @@ -52,7 +54,8 @@ func TestTestHookValidation(t *testing.T) { t.Run("Test Validation with invalid webhook type", func(t *testing.T) { ctx, _ := contexttest.MockAPIContext(t, "user2/repo1/hooks") contexttest.LoadRepo(t, ctx, 1) - contexttest.LoadRepoCommit(t, ctx) + cancel := contexttest.LoadRepoCommit(t, ctx) + defer cancel() contexttest.LoadUser(t, ctx, 2) checkCreateHookOption(ctx, &structs.CreateHookOption{ @@ -68,7 +71,8 @@ func TestTestHookValidation(t *testing.T) { t.Run("Test Validation with empty content type", func(t *testing.T) { ctx, _ := contexttest.MockAPIContext(t, "user2/repo1/hooks") contexttest.LoadRepo(t, ctx, 1) - contexttest.LoadRepoCommit(t, ctx) + cancel := contexttest.LoadRepoCommit(t, ctx) + defer cancel() contexttest.LoadUser(t, ctx, 2) checkCreateHookOption(ctx, &structs.CreateHookOption{ diff --git a/routers/web/repo/view_readme.go b/routers/web/repo/view_readme.go index f1fa5732f0a79..11461568fe680 100644 --- a/routers/web/repo/view_readme.go +++ b/routers/web/repo/view_readme.go @@ -93,9 +93,9 @@ func findReadmeFileInEntries(ctx *context.Context, parentDir string, entries []* if subTreeEntry == nil { continue } - subTree := subTreeEntry.Tree() - if subTree == nil { - // this should be impossible; if subTreeEntry exists so should this. + subTree, err := subTreeEntry.Tree() + if err != nil { // this should be impossible; if subTreeEntry exists so should this. + log.Error("Unable to get tree for %s: %v", subTreeEntry.Name(), err) continue } childEntries, err := subTree.ListEntries() diff --git a/services/contexttest/context_tests.go b/services/contexttest/context_tests.go index 44d9f4a70f0ec..acac28d815ac9 100644 --- a/services/contexttest/context_tests.go +++ b/services/contexttest/context_tests.go @@ -130,7 +130,7 @@ func LoadRepo(t *testing.T, ctx gocontext.Context, repoID int64) { } // LoadRepoCommit loads a repo's commit into a test context. -func LoadRepoCommit(t *testing.T, ctx gocontext.Context) { +func LoadRepoCommit(t *testing.T, ctx gocontext.Context) func() { var repo *context.Repository switch ctx := ctx.(type) { case *context.Context: @@ -143,7 +143,12 @@ func LoadRepoCommit(t *testing.T, ctx gocontext.Context) { gitRepo, err := gitrepo.OpenRepository(ctx, repo.Repository) require.NoError(t, err) - defer gitRepo.Close() + // we can't close gitrepo here because it will be stored in Commit and be used by other places + cancel := func() { + if gitRepo != nil { + gitRepo.Close() + } + } if repo.RefFullName == "" { repo.RefFullName = git_module.RefNameFromBranch(repo.Repository.DefaultBranch) @@ -153,6 +158,7 @@ func LoadRepoCommit(t *testing.T, ctx gocontext.Context) { } repo.Commit, err = gitRepo.GetCommit(repo.RefFullName.String()) require.NoError(t, err) + return cancel } // LoadUser load a user into a test context diff --git a/services/repository/files/content_test.go b/services/repository/files/content_test.go index d72f918074d16..f551cfdcec1ff 100644 --- a/services/repository/files/content_test.go +++ b/services/repository/files/content_test.go @@ -25,7 +25,8 @@ func TestGetContents(t *testing.T) { ctx, _ := contexttest.MockContext(t, "user2/repo1") ctx.SetPathParam("id", "1") contexttest.LoadRepo(t, ctx, 1) - contexttest.LoadRepoCommit(t, ctx) + cancel := contexttest.LoadRepoCommit(t, ctx) + defer cancel() contexttest.LoadUser(t, ctx, 2) contexttest.LoadGitRepo(t, ctx) diff --git a/services/repository/files/diff_test.go b/services/repository/files/diff_test.go index ae702e4189804..6b492b383a70f 100644 --- a/services/repository/files/diff_test.go +++ b/services/repository/files/diff_test.go @@ -20,7 +20,8 @@ func TestGetDiffPreview(t *testing.T) { ctx, _ := contexttest.MockContext(t, "user2/repo1") ctx.SetPathParam("id", "1") contexttest.LoadRepo(t, ctx, 1) - contexttest.LoadRepoCommit(t, ctx) + cancel := contexttest.LoadRepoCommit(t, ctx) + defer cancel() contexttest.LoadUser(t, ctx, 2) contexttest.LoadGitRepo(t, ctx) defer ctx.Repo.GitRepo.Close() @@ -137,7 +138,8 @@ func TestGetDiffPreviewErrors(t *testing.T) { ctx, _ := contexttest.MockContext(t, "user2/repo1") ctx.SetPathParam("id", "1") contexttest.LoadRepo(t, ctx, 1) - contexttest.LoadRepoCommit(t, ctx) + cancel := contexttest.LoadRepoCommit(t, ctx) + defer cancel() contexttest.LoadUser(t, ctx, 2) contexttest.LoadGitRepo(t, ctx) defer ctx.Repo.GitRepo.Close() diff --git a/services/repository/files/tree.go b/services/repository/files/tree.go index e481a3e7d2672..3f164ee7c29b9 100644 --- a/services/repository/files/tree.go +++ b/services/repository/files/tree.go @@ -207,7 +207,11 @@ func listTreeNodes(ctx context.Context, repoLink string, renderedIconPool *filei if subTreePath[0] == '/' { subTreePath = subTreePath[1:] } - subNodes, err := listTreeNodes(ctx, repoLink, renderedIconPool, commit, entry.Tree(), subTreePath, subPathRemaining) + tree, err := entry.Tree() + if err != nil { + return nil, err + } + subNodes, err := listTreeNodes(ctx, repoLink, renderedIconPool, commit, tree, subTreePath, subPathRemaining) if err != nil { log.Error("listTreeNodes: %v", err) } else { @@ -224,5 +228,9 @@ func GetTreeViewNodes(ctx context.Context, repoLink string, renderedIconPool *fi if err != nil { return nil, err } - return listTreeNodes(ctx, repoLink, renderedIconPool, commit, entry.Tree(), treePath, subPath) + tree, err := entry.Tree() + if err != nil { + return nil, err + } + return listTreeNodes(ctx, repoLink, renderedIconPool, commit, tree, treePath, subPath) } diff --git a/services/repository/files/tree_test.go b/services/repository/files/tree_test.go index e7511b3eed446..a3a2d864e9d0d 100644 --- a/services/repository/files/tree_test.go +++ b/services/repository/files/tree_test.go @@ -20,7 +20,8 @@ func TestGetTreeBySHA(t *testing.T) { unittest.PrepareTestEnv(t) ctx, _ := contexttest.MockContext(t, "user2/repo1") contexttest.LoadRepo(t, ctx, 1) - contexttest.LoadRepoCommit(t, ctx) + cancel := contexttest.LoadRepoCommit(t, ctx) + defer cancel() contexttest.LoadUser(t, ctx, 2) contexttest.LoadGitRepo(t, ctx) defer ctx.Repo.GitRepo.Close() @@ -59,7 +60,8 @@ func TestGetTreeViewNodes(t *testing.T) { ctx, _ := contexttest.MockContext(t, "user2/repo1") ctx.Repo.RefFullName = git.RefNameFromBranch("sub-home-md-img-check") contexttest.LoadRepo(t, ctx, 1) - contexttest.LoadRepoCommit(t, ctx) + cancel := contexttest.LoadRepoCommit(t, ctx) + defer cancel() contexttest.LoadUser(t, ctx, 2) contexttest.LoadGitRepo(t, ctx) defer ctx.Repo.GitRepo.Close() diff --git a/tests/integration/repofiles_change_test.go b/tests/integration/repofiles_change_test.go index 6fd42401c5259..3498a4c35b6fc 100644 --- a/tests/integration/repofiles_change_test.go +++ b/tests/integration/repofiles_change_test.go @@ -349,7 +349,8 @@ func TestChangeRepoFilesForCreate(t *testing.T) { ctx, _ := contexttest.MockContext(t, "user2/repo1") ctx.SetPathParam("id", "1") contexttest.LoadRepo(t, ctx, 1) - contexttest.LoadRepoCommit(t, ctx) + cancel := contexttest.LoadRepoCommit(t, ctx) + defer cancel() contexttest.LoadUser(t, ctx, 2) contexttest.LoadGitRepo(t, ctx) defer ctx.Repo.GitRepo.Close() @@ -386,7 +387,8 @@ func TestChangeRepoFilesForUpdate(t *testing.T) { ctx, _ := contexttest.MockContext(t, "user2/repo1") ctx.SetPathParam("id", "1") contexttest.LoadRepo(t, ctx, 1) - contexttest.LoadRepoCommit(t, ctx) + cancel := contexttest.LoadRepoCommit(t, ctx) + defer cancel() contexttest.LoadUser(t, ctx, 2) contexttest.LoadGitRepo(t, ctx) defer ctx.Repo.GitRepo.Close() @@ -420,7 +422,8 @@ func TestChangeRepoFilesForUpdateWithFileMove(t *testing.T) { ctx, _ := contexttest.MockContext(t, "user2/repo1") ctx.SetPathParam("id", "1") contexttest.LoadRepo(t, ctx, 1) - contexttest.LoadRepoCommit(t, ctx) + cancel := contexttest.LoadRepoCommit(t, ctx) + defer cancel() contexttest.LoadUser(t, ctx, 2) contexttest.LoadGitRepo(t, ctx) defer ctx.Repo.GitRepo.Close() @@ -469,7 +472,8 @@ func TestChangeRepoFilesForUpdateWithFileRename(t *testing.T) { ctx, _ := contexttest.MockContext(t, "user2/lfs") ctx.SetPathParam("id", "54") contexttest.LoadRepo(t, ctx, 54) - contexttest.LoadRepoCommit(t, ctx) + cancel := contexttest.LoadRepoCommit(t, ctx) + defer cancel() contexttest.LoadUser(t, ctx, 2) contexttest.LoadGitRepo(t, ctx) defer ctx.Repo.GitRepo.Close() @@ -503,7 +507,8 @@ func TestChangeRepoFilesWithoutBranchNames(t *testing.T) { ctx, _ := contexttest.MockContext(t, "user2/repo1") ctx.SetPathParam("id", "1") contexttest.LoadRepo(t, ctx, 1) - contexttest.LoadRepoCommit(t, ctx) + cancel := contexttest.LoadRepoCommit(t, ctx) + defer cancel() contexttest.LoadUser(t, ctx, 2) contexttest.LoadGitRepo(t, ctx) defer ctx.Repo.GitRepo.Close() @@ -534,7 +539,8 @@ func TestChangeRepoFilesForDelete(t *testing.T) { ctx, _ := contexttest.MockContext(t, "user2/repo1") ctx.SetPathParam("id", "1") contexttest.LoadRepo(t, ctx, 1) - contexttest.LoadRepoCommit(t, ctx) + cancel := contexttest.LoadRepoCommit(t, ctx) + defer cancel() contexttest.LoadUser(t, ctx, 2) contexttest.LoadGitRepo(t, ctx) defer ctx.Repo.GitRepo.Close() @@ -621,7 +627,8 @@ func TestChangeRepoFilesErrors(t *testing.T) { ctx, _ := contexttest.MockContext(t, "user2/repo1") ctx.SetPathParam("id", "1") contexttest.LoadRepo(t, ctx, 1) - contexttest.LoadRepoCommit(t, ctx) + cancel := contexttest.LoadRepoCommit(t, ctx) + defer cancel() contexttest.LoadUser(t, ctx, 2) contexttest.LoadGitRepo(t, ctx) defer ctx.Repo.GitRepo.Close() From 69fd41ddd557a78dc8f300f49e29b88968301fd1 Mon Sep 17 00:00:00 2001 From: Lunny Xiao Date: Tue, 6 Jan 2026 14:29:47 -0800 Subject: [PATCH 07/11] Make duplicated close safe --- modules/git/catfile/object_pool_batch.go | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/modules/git/catfile/object_pool_batch.go b/modules/git/catfile/object_pool_batch.go index b7b9821d73772..4a3962a90d60b 100644 --- a/modules/git/catfile/object_pool_batch.go +++ b/modules/git/catfile/object_pool_batch.go @@ -22,9 +22,11 @@ type batch struct { func (b *batch) Close() { if b.cancel != nil { b.cancel() + b.cancel = nil } if b.writer != nil { _ = b.writer.Close() + b.writer = nil } } @@ -157,7 +159,9 @@ func (b *batchObjectPool) Close() { for _, batch := range b.batches { batch.Close() } + b.batches = nil for _, batchCheck := range b.batchChecks { batchCheck.Close() } + b.batchChecks = nil } From 7f59b96680ee191c5f640be00cf6504eb0884ce2 Mon Sep 17 00:00:00 2001 From: Lunny Xiao Date: Tue, 6 Jan 2026 14:33:50 -0800 Subject: [PATCH 08/11] improvement --- modules/git/blob_nogogit.go | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/modules/git/blob_nogogit.go b/modules/git/blob_nogogit.go index c90b6623dfd7c..abaea95335b96 100644 --- a/modules/git/blob_nogogit.go +++ b/modules/git/blob_nogogit.go @@ -92,12 +92,9 @@ func (b *blobReader) Close() error { if b.rd == nil { return nil } - if err := catfile.DiscardFull(b.rd, b.n+1); err != nil { - return err - } + err := catfile.DiscardFull(b.rd, b.n+1) b.rd.Close() b.rd = nil - - return nil + return err } From 32f6fe30472c1c0741356e10372e902019e2dbeb Mon Sep 17 00:00:00 2001 From: Lunny Xiao Date: Tue, 6 Jan 2026 14:45:06 -0800 Subject: [PATCH 09/11] Fix bug --- modules/git/catfile/object_pool_batch.go | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/modules/git/catfile/object_pool_batch.go b/modules/git/catfile/object_pool_batch.go index 4a3962a90d60b..29810c599dffc 100644 --- a/modules/git/catfile/object_pool_batch.go +++ b/modules/git/catfile/object_pool_batch.go @@ -37,8 +37,8 @@ type batchObjectPool struct { batchChecks []*batch } -// NewBatch creates a new cat-file --batch process for the provided repository path. -// The returned Batch must be closed once the caller has finished with it. +// NewBatchObjectPool creates a new ObjectPool that uses git cat-file --batch and --batch-check +// to read objects from the repository at repoPath. func NewBatchObjectPool(ctx context.Context, repoPath string) (ObjectPool, error) { if err := EnsureValidGitRepository(ctx, repoPath); err != nil { return nil, err @@ -66,18 +66,14 @@ func (b *batchObjectPool) getBatch() *batch { return newBatch } +// newBatch creates a new cat-file --batch process for the provided repository path. +// The returned Batch must be closed when objectPool closed. func (b *batchObjectPool) newBatch() *batch { var batch batch batch.writer, batch.reader, batch.cancel = catFileBatch(b.ctx, b.repoPath) return &batch } -func (b *batchObjectPool) closeBatch(batch *batch) { - if batch != nil { - batch.inUse = false - } -} - func (b *batchObjectPool) getBatchCheck() *batch { for _, batch := range b.batchChecks { if !batch.inUse { @@ -94,13 +90,15 @@ func (b *batchObjectPool) getBatchCheck() *batch { return newBatch } +// newBatchCheck creates a new cat-file --batch-check process for the provided repository path. +// The returned Batch must be closed when objectPool closed. func (b *batchObjectPool) newBatchCheck() *batch { var check batch check.writer, check.reader, check.cancel = catFileBatchCheck(b.ctx, b.repoPath) return &check } -func (b *batchObjectPool) closeBatchCheck(batch *batch) { +func releaseBatchCheck(batch *batch) { if batch != nil { batch.inUse = false } @@ -108,7 +106,7 @@ func (b *batchObjectPool) closeBatchCheck(batch *batch) { func (b *batchObjectPool) ObjectInfo(refName string) (*ObjectInfo, error) { batch := b.getBatchCheck() - defer b.closeBatchCheck(batch) + defer releaseBatchCheck(batch) _, err := batch.writer.Write([]byte(refName + "\n")) if err != nil { @@ -137,7 +135,6 @@ func (rc *readCloser) Close() error { func (b *batchObjectPool) Object(refName string) (*ObjectInfo, ReadCloseDiscarder, error) { batch := b.getBatch() - defer b.closeBatch(batch) _, err := batch.writer.Write([]byte(refName + "\n")) if err != nil { From bd34c795032ad1cccfe72646cbcdc4a98c15dd8a Mon Sep 17 00:00:00 2001 From: Lunny Xiao Date: Tue, 6 Jan 2026 20:07:28 -0800 Subject: [PATCH 10/11] Change some comments and fix test --- modules/git/catfile/object_pool_batch.go | 59 +++++++++++++++--------- modules/git/repo_tree_nogogit.go | 2 +- modules/git/tree_nogogit.go | 2 +- 3 files changed, 39 insertions(+), 24 deletions(-) diff --git a/modules/git/catfile/object_pool_batch.go b/modules/git/catfile/object_pool_batch.go index 29810c599dffc..92c3cc9b5ffd1 100644 --- a/modules/git/catfile/object_pool_batch.go +++ b/modules/git/catfile/object_pool_batch.go @@ -40,62 +40,71 @@ type batchObjectPool struct { // NewBatchObjectPool creates a new ObjectPool that uses git cat-file --batch and --batch-check // to read objects from the repository at repoPath. func NewBatchObjectPool(ctx context.Context, repoPath string) (ObjectPool, error) { - if err := EnsureValidGitRepository(ctx, repoPath); err != nil { - return nil, err - } - return &batchObjectPool{ ctx: ctx, repoPath: repoPath, }, nil } -func (b *batchObjectPool) getBatch() *batch { +func (b *batchObjectPool) getBatch() (*batch, error) { for _, batch := range b.batches { if !batch.inUse { batch.inUse = true - return batch + return batch, nil } } if len(b.batches) >= 1 { - log.Warn("Opening temporary cat file batch for: %s", b.repoPath) + log.Warn("Opening more than one cat file batch in the same goroutine for: %s", b.repoPath) + } + newBatch, err := b.newBatch() + if err != nil { + return nil, err } - newBatch := b.newBatch() newBatch.inUse = true b.batches = append(b.batches, newBatch) - return newBatch + return newBatch, nil } // newBatch creates a new cat-file --batch process for the provided repository path. // The returned Batch must be closed when objectPool closed. -func (b *batchObjectPool) newBatch() *batch { +func (b *batchObjectPool) newBatch() (*batch, error) { + if err := EnsureValidGitRepository(b.ctx, b.repoPath); err != nil { + return nil, err + } var batch batch batch.writer, batch.reader, batch.cancel = catFileBatch(b.ctx, b.repoPath) - return &batch + return &batch, nil } -func (b *batchObjectPool) getBatchCheck() *batch { +func (b *batchObjectPool) getBatchCheck() (*batch, error) { for _, batch := range b.batchChecks { if !batch.inUse { batch.inUse = true - return batch + return batch, nil } } if len(b.batchChecks) >= 1 { - log.Warn("Opening temporary cat file batch-check for: %s", b.repoPath) + log.Warn("Opening more than one cat file batch-check in the same goroutine for: %s", b.repoPath) + } + newBatch, err := b.newBatchCheck() + if err != nil { + return nil, err } - newBatch := b.newBatchCheck() newBatch.inUse = true b.batchChecks = append(b.batchChecks, newBatch) - return newBatch + return newBatch, nil } // newBatchCheck creates a new cat-file --batch-check process for the provided repository path. // The returned Batch must be closed when objectPool closed. -func (b *batchObjectPool) newBatchCheck() *batch { +func (b *batchObjectPool) newBatchCheck() (*batch, error) { + if err := EnsureValidGitRepository(b.ctx, b.repoPath); err != nil { + return nil, err + } + var check batch check.writer, check.reader, check.cancel = catFileBatchCheck(b.ctx, b.repoPath) - return &check + return &check, nil } func releaseBatchCheck(batch *batch) { @@ -105,10 +114,13 @@ func releaseBatchCheck(batch *batch) { } func (b *batchObjectPool) ObjectInfo(refName string) (*ObjectInfo, error) { - batch := b.getBatchCheck() + batch, err := b.getBatchCheck() + if err != nil { + return nil, err + } defer releaseBatchCheck(batch) - _, err := batch.writer.Write([]byte(refName + "\n")) + _, err = batch.writer.Write([]byte(refName + "\n")) if err != nil { return nil, err } @@ -134,9 +146,12 @@ func (rc *readCloser) Close() error { } func (b *batchObjectPool) Object(refName string) (*ObjectInfo, ReadCloseDiscarder, error) { - batch := b.getBatch() + batch, err := b.getBatch() + if err != nil { + return nil, nil, err + } - _, err := batch.writer.Write([]byte(refName + "\n")) + _, err = batch.writer.Write([]byte(refName + "\n")) if err != nil { return nil, nil, err } diff --git a/modules/git/repo_tree_nogogit.go b/modules/git/repo_tree_nogogit.go index 5954eef1d0c4d..46c63f1480604 100644 --- a/modules/git/repo_tree_nogogit.go +++ b/modules/git/repo_tree_nogogit.go @@ -35,7 +35,7 @@ func (repo *Repository) getTree(id ObjectID) (*Tree, error) { contentReader.Close() return nil, err } - contentReader.Close() // close reader to avoid leaks + contentReader.Close() // close reader to avoid open a new process in the same goroutine commit, err := repo.getCommit(tag.Object) if err != nil { diff --git a/modules/git/tree_nogogit.go b/modules/git/tree_nogogit.go index 6338f0456335b..ed2521152086f 100644 --- a/modules/git/tree_nogogit.go +++ b/modules/git/tree_nogogit.go @@ -40,7 +40,7 @@ func (t *Tree) ListEntries() (Entries, error) { if objectInfo.Type == "commit" { treeID, err := catfile.ReadTreeID(contentReader, objectInfo.Size) - contentReader.Close() + contentReader.Close() // close reader to avoid open a new process in the same goroutine if err != nil && err != io.EOF { return nil, err } From 0322178d13a9b614657cef763f7a7d1f0b062e56 Mon Sep 17 00:00:00 2001 From: Lunny Xiao Date: Tue, 6 Jan 2026 22:09:09 -0800 Subject: [PATCH 11/11] Fix bug --- modules/git/catfile/object_pool_batch.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/modules/git/catfile/object_pool_batch.go b/modules/git/catfile/object_pool_batch.go index 92c3cc9b5ffd1..b7955fc487eea 100644 --- a/modules/git/catfile/object_pool_batch.go +++ b/modules/git/catfile/object_pool_batch.go @@ -153,6 +153,7 @@ func (b *batchObjectPool) Object(refName string) (*ObjectInfo, ReadCloseDiscarde _, err = batch.writer.Write([]byte(refName + "\n")) if err != nil { + batch.inUse = false return nil, nil, err } @@ -160,6 +161,7 @@ func (b *batchObjectPool) Object(refName string) (*ObjectInfo, ReadCloseDiscarde var oid []byte oid, obj.Type, obj.Size, err = ReadBatchLine(batch.reader) if err != nil { + batch.inUse = false return nil, nil, err } obj.ID = string(oid)