Skip to content
Merged
Show file tree
Hide file tree
Changes from 19 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 0 additions & 35 deletions modules/git/attribute.go

This file was deleted.

107 changes: 107 additions & 0 deletions modules/git/attribute/attribute.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
// Copyright 2025 The Gitea Authors. All rights reserved.
// SPDX-License-Identifier: MIT

package attribute

import (
"strings"

"code.gitea.io/gitea/modules/optional"
)

type Attribute string

const (
LinguistVendored = "linguist-vendored"
LinguistGenerated = "linguist-generated"
LinguistDocumentation = "linguist-documentation"
LinguistDetectable = "linguist-detectable"
LinguistLanguage = "linguist-language"
GitlabLanguage = "gitlab-language"
Lockable = "lockable"
)

var LinguistAttributes = []string{
LinguistVendored,
LinguistGenerated,
LinguistDocumentation,
LinguistDetectable,
LinguistLanguage,
GitlabLanguage,
}

func (a Attribute) IsUnspecified() bool {
return a == "" || a == "unspecified"
}

func (a Attribute) ToString() optional.Option[string] {
if !a.IsUnspecified() {
return optional.Some(string(a))
}
return optional.None[string]()
}

// true if "set"/"true", false if "unset"/"false", none otherwise
func (a Attribute) ToBool() optional.Option[bool] {
switch a {
case "set", "true":
return optional.Some(true)
case "unset", "false":
return optional.Some(false)
}
return optional.None[bool]()
}

type Attributes map[string]Attribute

func (attrs Attributes) Get(name string) Attribute {
if value, has := attrs[name]; has {
return value
}
return ""
}

func (attrs Attributes) GetVendored() optional.Option[bool] {
return attrs.Get(LinguistVendored).ToBool()
}

func (attrs Attributes) GetGenerated() optional.Option[bool] {
return attrs.Get(LinguistGenerated).ToBool()
}

func (attrs Attributes) GetDocumentation() optional.Option[bool] {
return attrs.Get(LinguistDocumentation).ToBool()
}

func (attrs Attributes) GetDetectable() optional.Option[bool] {
return attrs.Get(LinguistDetectable).ToBool()
}

func (attrs Attributes) GetLinguistLanguage() optional.Option[string] {
return attrs.Get(LinguistLanguage).ToString()
}

func (attrs Attributes) GetGitlabLanguage() optional.Option[string] {
attrStr := attrs.Get(GitlabLanguage).ToString()
if attrStr.Has() {
raw := attrStr.Value()
// gitlab-language may have additional parameters after the language
// ignore them and just use the main language
// https://docs.gitlab.com/ee/user/project/highlighting.html#override-syntax-highlighting-for-a-file-type
if idx := strings.IndexByte(raw, '?'); idx >= 0 {
return optional.Some(raw[:idx])
}
}
return attrStr
}

func (attrs Attributes) GetLanguage() optional.Option[string] {
// prefer linguist-language over gitlab-language
// if linguist-language is not set, use gitlab-language
// if both are not set, return none
language := attrs.GetLinguistLanguage()
if language.Value() == "" {
language = attrs.GetGitlabLanguage()
}
return language
}
35 changes: 35 additions & 0 deletions modules/git/attribute/attribute_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
// Copyright 2025 The Gitea Authors. All rights reserved.
// SPDX-License-Identifier: MIT

package attribute

import (
"testing"

"github.com/stretchr/testify/assert"
)

func Test_Attribute(t *testing.T) {
assert.Empty(t, Attribute("").ToString().Value())
assert.Empty(t, Attribute("unspecified").ToString().Value())
assert.Equal(t, "python", Attribute("python").ToString().Value())
assert.Equal(t, "Java", Attribute("Java").ToString().Value())

attributes := Attributes{
LinguistGenerated: "true",
LinguistDocumentation: "false",
LinguistDetectable: "set",
LinguistLanguage: "Python",
GitlabLanguage: "Java",
"filter": "unspecified",
"test": "",
}

assert.Empty(t, attributes.Get("test").ToString().Value())
assert.Empty(t, attributes.Get("filter").ToString().Value())
assert.Equal(t, "Python", attributes.Get(LinguistLanguage).ToString().Value())
assert.Equal(t, "Java", attributes.Get(GitlabLanguage).ToString().Value())
assert.True(t, attributes.Get(LinguistGenerated).ToBool().Value())
assert.False(t, attributes.Get(LinguistDocumentation).ToBool().Value())
assert.True(t, attributes.Get(LinguistDetectable).ToBool().Value())
}
216 changes: 216 additions & 0 deletions modules/git/attribute/batch.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,216 @@
// Copyright 2019 The Gitea Authors. All rights reserved.
// SPDX-License-Identifier: MIT

package attribute

import (
"bytes"
"context"
"fmt"
"os"
"path/filepath"
"time"

"code.gitea.io/gitea/modules/git"
"code.gitea.io/gitea/modules/log"
)

// BatchChecker provides a reader for check-attribute content that can be long running
type BatchChecker struct {
attributesNum int
repo *git.Repository
stdinWriter *os.File
stdOut *nulSeparatedAttributeWriter
ctx context.Context
cancel context.CancelFunc
cmd *git.Command
}

// NewBatchChecker creates a check attribute reader for the current repository and provided commit ID
// If treeish is empty, then it will use current working directory, otherwise it will use the provided treeish on the bare repo
func NewBatchChecker(repo *git.Repository, treeish string, attributes []string) (checker *BatchChecker, returnedErr error) {
ctx, cancel := context.WithCancel(repo.Ctx)
defer func() {
if returnedErr != nil {
cancel()
}
}()

cmd, envs, cleanup, err := checkAttrCommand(repo, treeish, nil, attributes)
if err != nil {
return nil, err
}
defer func() {
if returnedErr != nil {
cleanup()
}
}()

cmd.AddArguments("--stdin")

checker = &BatchChecker{
attributesNum: len(attributes),
repo: repo,
ctx: ctx,
cmd: cmd,
cancel: func() {
cancel()
cleanup()
},
}

stdinReader, stdinWriter, err := os.Pipe()
if err != nil {
return nil, err
}
checker.stdinWriter = stdinWriter

lw := new(nulSeparatedAttributeWriter)
lw.attributes = make(chan attributeTriple, len(attributes))
lw.closed = make(chan struct{})
checker.stdOut = lw

go func() {
defer func() {
_ = stdinReader.Close()
_ = lw.Close()
}()
stdErr := new(bytes.Buffer)
err := cmd.Run(ctx, &git.RunOpts{
Env: envs,
Dir: repo.Path,
Stdin: stdinReader,
Stdout: lw,
Stderr: stdErr,
})

if err != nil && !git.IsErrCanceledOrKilled(err) {
log.Error("Attribute checker for commit %s exits with error: %v", treeish, err)
}
checker.cancel()
}()

return checker, nil
}

// CheckPath check attr for given path
func (c *BatchChecker) CheckPath(path string) (rs Attributes, err error) {
defer func() {
if err != nil && err != c.ctx.Err() {
log.Error("Unexpected error when checking path %s in %s, error: %v", path, filepath.Base(c.repo.Path), err)
}
}()

select {
case <-c.ctx.Done():
return nil, c.ctx.Err()
default:
}

if _, err = c.stdinWriter.Write([]byte(path + "\x00")); err != nil {
defer c.Close()
return nil, err
}

reportTimeout := func() error {
stdOutClosed := false
select {
case <-c.stdOut.closed:
stdOutClosed = true
default:
}
debugMsg := fmt.Sprintf("check path %q in repo %q", path, filepath.Base(c.repo.Path))
debugMsg += fmt.Sprintf(", stdOut: tmp=%q, pos=%d, closed=%v", string(c.stdOut.tmp), c.stdOut.pos, stdOutClosed)
if c.cmd != nil {
debugMsg += fmt.Sprintf(", process state: %q", c.cmd.ProcessState())
}
_ = c.Close()
return fmt.Errorf("CheckPath timeout: %s", debugMsg)
}

rs = make(map[string]Attribute)
for i := 0; i < c.attributesNum; i++ {
select {
case <-time.After(5 * time.Second):
// there is no "hang" problem now. This code is just used to catch other potential problems.
return nil, reportTimeout()
case attr, ok := <-c.stdOut.ReadAttribute():
if !ok {
return nil, c.ctx.Err()
}
rs[attr.Attribute] = Attribute(attr.Value)
case <-c.ctx.Done():
return nil, c.ctx.Err()
}
}
return rs, nil
}

func (c *BatchChecker) Close() error {
c.cancel()
err := c.stdinWriter.Close()
return err
}

type attributeTriple struct {
Filename string
Attribute string
Value string
}

type nulSeparatedAttributeWriter struct {
tmp []byte
attributes chan attributeTriple
closed chan struct{}
working attributeTriple
pos int
}

func (wr *nulSeparatedAttributeWriter) Write(p []byte) (n int, err error) {
l, read := len(p), 0

nulIdx := bytes.IndexByte(p, '\x00')
for nulIdx >= 0 {
wr.tmp = append(wr.tmp, p[:nulIdx]...)
switch wr.pos {
case 0:
wr.working = attributeTriple{
Filename: string(wr.tmp),
}
case 1:
wr.working.Attribute = string(wr.tmp)
case 2:
wr.working.Value = string(wr.tmp)
}
wr.tmp = wr.tmp[:0]
wr.pos++
if wr.pos > 2 {
wr.attributes <- wr.working
wr.pos = 0
}
read += nulIdx + 1
if l > read {
p = p[nulIdx+1:]
nulIdx = bytes.IndexByte(p, '\x00')
} else {
return l, nil
}
}
wr.tmp = append(wr.tmp, p...)
return l, nil
}

func (wr *nulSeparatedAttributeWriter) ReadAttribute() <-chan attributeTriple {
return wr.attributes
}

func (wr *nulSeparatedAttributeWriter) Close() error {
select {
case <-wr.closed:
return nil
default:
}
close(wr.attributes)
close(wr.closed)
return nil
}
Loading