Skip to content

Commit

Permalink
cmd/go: check that expected Origin fields are present to reuse module…
Browse files Browse the repository at this point in the history
… info

When 'go list' or 'go mod download' uses a proxy to resolve a version
query like "@latest", it may have origin metadata about the resolved
version but not about the inputs that would be needed to resolve the
same query without using the proxy.

We shouldn't just redact the incomplete information, because it might
be useful independent of the -reuse flag. Instead, we examine the
query to decide which origin information it ought to need, and avoid
reusing it if that information isn't included.

Fixes #61423.

Change-Id: Ibeaa46ebba284beee285cbb1898e271e5a5b257b
Cq-Include-Trybots: luci.golang.try:gotip-linux-amd64-longtest,gotip-windows-amd64-longtest
Reviewed-on: https://go-review.googlesource.com/c/go/+/543155
Reviewed-by: Michael Matloob <[email protected]>
Commit-Queue: Bryan Mills <[email protected]>
Auto-Submit: Bryan Mills <[email protected]>
LUCI-TryBot-Result: Go LUCI <[email protected]>
  • Loading branch information
Bryan C. Mills authored and gopherbot committed Nov 17, 2023
1 parent 3ff5632 commit bb8a96f
Show file tree
Hide file tree
Showing 4 changed files with 176 additions and 19 deletions.
7 changes: 4 additions & 3 deletions src/cmd/go/internal/modfetch/codehost/codehost.go
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,8 @@ type Origin struct {
URL string `json:",omitempty"` // URL of repository
Subdir string `json:",omitempty"` // subdirectory in repo

Hash string `json:",omitempty"` // commit hash or ID

// If TagSum is non-empty, then the resolution of this module version
// depends on the set of tags present in the repo, specifically the tags
// of the form TagPrefix + a valid semver version.
Expand All @@ -111,8 +113,7 @@ type Origin struct {
// and the Hash is the Git object hash the ref maps to.
// Other VCS might choose differently, but the idea is that Ref is the name
// with a mutable meaning while Hash is a name with an immutable meaning.
Ref string `json:",omitempty"`
Hash string `json:",omitempty"`
Ref string `json:",omitempty"`

// If RepoSum is non-empty, then the resolution of this module version
// failed due to the repo being available but the version not being present.
Expand All @@ -124,7 +125,7 @@ type Origin struct {
// Checkable reports whether the Origin contains anything that can be checked.
// If not, the Origin is purely informational and should fail a CheckReuse call.
func (o *Origin) Checkable() bool {
return o.TagSum != "" || o.Ref != "" || o.Hash != "" || o.RepoSum != ""
return o != nil && (o.TagSum != "" || o.Ref != "" || o.Hash != "" || o.RepoSum != "")
}

// ClearCheckable clears the Origin enough to make Checkable return false.
Expand Down
25 changes: 15 additions & 10 deletions src/cmd/go/internal/modload/build.go
Original file line number Diff line number Diff line change
Expand Up @@ -214,6 +214,10 @@ func mergeOrigin(m1, m2 *codehost.Origin) *codehost.Origin {
// Excluded versions will be omitted. If listRetracted is false, retracted
// versions will also be omitted.
func addVersions(ctx context.Context, m *modinfo.ModulePublic, listRetracted bool) {
// TODO(bcmills): Would it make sense to check for reuse here too?
// Perhaps that doesn't buy us much, though: we would always have to fetch
// all of the version tags to list the available versions anyway.

allowed := CheckAllowed
if listRetracted {
allowed = CheckExclusions
Expand Down Expand Up @@ -319,29 +323,30 @@ func moduleInfo(ctx context.Context, rs *Requirements, m module.Version, mode Li
return
}

if old := reuse[module.Version{Path: m.Path, Version: m.Version}]; old != nil {
if err := checkReuse(ctx, m.Path, old.Origin); err == nil {
*m = *old
m.Query = ""
m.Dir = ""
return
}
}

checksumOk := func(suffix string) bool {
return rs == nil || m.Version == "" || !mustHaveSums() ||
modfetch.HaveSum(module.Version{Path: m.Path, Version: m.Version + suffix})
}

mod := module.Version{Path: m.Path, Version: m.Version}

if m.Version != "" {
if old := reuse[mod]; old != nil && old.Origin.Checkable() {
if err := checkReuse(ctx, mod, old.Origin); err == nil {
*m = *old
m.Query = ""
m.Dir = ""
return
}
}

if q, err := Query(ctx, m.Path, m.Version, "", nil); err != nil {
m.Error = &modinfo.ModuleError{Err: err.Error()}
} else {
m.Version = q.Version
m.Time = &q.Time
}
}
mod := module.Version{Path: m.Path, Version: m.Version}

if m.GoVersion == "" && checksumOk("/go.mod") {
// Load the go.mod file to determine the Go version, since it hasn't
Expand Down
74 changes: 68 additions & 6 deletions src/cmd/go/internal/modload/query.go
Original file line number Diff line number Diff line change
Expand Up @@ -98,18 +98,80 @@ func queryReuse(ctx context.Context, path, query, current string, allowed Allowe
return info, err
}

// checkReuse checks whether a revision of a given module or a version list
// checkReuse checks whether a revision of a given module
// for a given module may be reused, according to the information in origin.
func checkReuse(ctx context.Context, path string, old *codehost.Origin) error {
func checkReuse(ctx context.Context, m module.Version, old *codehost.Origin) error {
return modfetch.TryProxies(func(proxy string) error {
repo, err := lookupRepo(ctx, proxy, path)
repo, err := lookupRepo(ctx, proxy, m.Path)
if err != nil {
return err
}
return repo.CheckReuse(ctx, old)
return checkReuseRepo(ctx, repo, m.Path, m.Version, old)
})
}

func checkReuseRepo(ctx context.Context, repo versionRepo, path, query string, origin *codehost.Origin) error {
if !origin.Checkable() {
return errors.New("Origin is not checkable")
}

// Ensure that the Origin actually includes enough fields to resolve the query.
// If we got the previous Origin data from a proxy, it may be missing something
// that we would have needed to resolve the query directly from the repo.
switch {
case origin.RepoSum != "":
// A RepoSum is always acceptable, since it incorporates everything
// (and is often associated with an error result).

case query == module.CanonicalVersion(query):
// This query refers to a specific version, and Go module versions
// are supposed to be cacheable and immutable (confirmed with checksums).
// If the version exists at all, we shouldn't need any extra information
// to identify which commit it resolves to.
//
// It may be associated with a Ref for a semantic-version tag, but if so
// we don't expect that tag to change in the future. We also don't need a
// TagSum: if a tag is removed from some ancestor commit, the version may
// change from valid to invalid, but we're ok with keeping stale versions
// as long as they were valid at some point in the past.
//
// If the version did not successfully resolve, the origin may indicate
// a TagSum and/or RepoSum instead of a Hash, in which case we still need
// to check those to ensure that the error is still applicable.

case IsRevisionQuery(path, query):
// This query may refer to a branch, non-version tag, or commit ID.
//
// If it is a commit ID, we expect to see a Hash in the Origin data. On
// the other hand, if it is not a commit ID, we expect to see either a Ref
// (for a positive result) or a RepoSum (for a negative result), since
// we don't expect refs in general to remain stable over time.
if origin.Hash == "" && origin.Ref == "" {
return fmt.Errorf("query %q requires a Hash or Ref", query)
}
// Once we resolve the query to a particular commit, we will need to
// also identify the most appropriate version to assign to that commit.
// (It may correspond to more than one valid version.)
//
// The most appropriate version depends on the tags associated with
// both the commit itself (if the commit is a tagged version)
// and its ancestors (if we need to produce a pseudo-version for it).
if origin.TagSum == "" {
return fmt.Errorf("query %q requires a TagSum", query)
}

default:
// The query may be "latest" or a version inequality or prefix.
// Its result depends on the absence of higher tags matching the query,
// not just the state of an individual ref or tag.
if origin.TagSum == "" {
return fmt.Errorf("query %q requires a TagSum", query)
}
}

return repo.CheckReuse(ctx, origin)
}

// AllowedFunc is used by Query and other functions to filter out unsuitable
// versions, for example, those listed in exclude directives in the main
// module's go.mod file.
Expand Down Expand Up @@ -163,8 +225,8 @@ func queryProxy(ctx context.Context, proxy, path, query, current string, allowed
return nil, err
}

if old := reuse[module.Version{Path: path, Version: query}]; old != nil {
if err := repo.CheckReuse(ctx, old.Origin); err == nil {
if old := reuse[module.Version{Path: path, Version: query}]; old != nil && old.Origin.Checkable() {
if err := checkReuseRepo(ctx, repo, path, query, old.Origin); err == nil {
info := &modfetch.RevInfo{
Version: old.Version,
Origin: old.Origin,
Expand Down
89 changes: 89 additions & 0 deletions src/cmd/go/testdata/script/mod_list_issue61423.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
[short] skip 'generates a vcstest git repo'
[!git] skip

mkdir $WORK/mod1
mkdir $WORK/mod2
env GONOSUMDB=vcs-test.golang.org

env GOPROXY=direct
env GOMODCACHE=$WORK/mod1


# If we query a module version from a git repo, we expect its
# Origin data to be reusable.

go list -m -json vcs-test.golang.org/git/issue61415.git@latest
cp stdout git-latest.json
stdout '"Version": "v0.0.0-20231114180001-f213069baa68"'
stdout '"Origin":'
stdout '"VCS": "git"'
stdout '"Hash": "f213069baa68ec26412fb373c7cf6669db1f8e69"'
stdout '"Ref": "HEAD"'
stdout '"TagSum": "t1:47DEQpj8HBSa\+/TImW\+5JCeuQeRkm5NMpJWZG3hSuFU="'

go list -reuse=git-latest.json -m -json vcs-test.golang.org/git/issue61415.git@latest
stdout '"Version": "v0.0.0-20231114180001-f213069baa68"'
stdout '"Origin":'
stdout '"VCS": "git"'
stdout '"Hash": "f213069baa68ec26412fb373c7cf6669db1f8e69"'
stdout '"Ref": "HEAD"'
stdout '"TagSum": "t1:47DEQpj8HBSa\+/TImW\+5JCeuQeRkm5NMpJWZG3hSuFU="'
stdout '"Reuse": true'


# Now we construct a filesystem-based module proxy that
# contains only an older commit.

go clean -modcache

go mod download -json vcs-test.golang.org/git/issue61415.git@08a4fa6bb9c04ffba03b26ae427b0d6335d90a2a
stdout '"Version": "v0.0.0-20231114180000-08a4fa6bb9c0"'
stdout '"Origin":'
stdout '"VCS": "git"'
stdout '"Hash": "08a4fa6bb9c04ffba03b26ae427b0d6335d90a2a"'

[GOOS:windows] env GOPROXY=file:///$WORK/mod1/cache/download
[!GOOS:windows] env GOPROXY=file://$WORK/mod1/cache/download
env GOMODCACHE=$WORK/modcache2


# If we resolve the "latest" version query using a proxy,
# it is only going to have Git origin information about the one
# commit — not the other tags that would go into resolving
# the underlying version list.

go list -m -json vcs-test.golang.org/git/issue61415.git@latest
cp stdout proxy-latest.json
stdout '"Version": "v0.0.0-20231114180000-08a4fa6bb9c0"'
stdout '"Origin":'
stdout '"VCS": "git"'
stdout '"Hash": "08a4fa6bb9c04ffba03b26ae427b0d6335d90a2a"'
! stdout '"Ref":'
! stdout '"TagSum":'

# The -reuse flag has no effect with a proxy, since the proxy can serve
# metadata about a given module version cheaply anyway.
go list -reuse=proxy-latest.json -m -json vcs-test.golang.org/git/issue61415.git@latest
stdout '"Version": "v0.0.0-20231114180000-08a4fa6bb9c0"'
stdout '"Origin":'
stdout '"VCS": "git"'
stdout '"Hash": "08a4fa6bb9c04ffba03b26ae427b0d6335d90a2a"'
! stdout '"Ref":'
! stdout '"TagSum":'
! stdout '"Reuse":'


# With GOPROXY=direct, the -reuse flag has an effect, but
# the Origin data from the proxy should not be sufficient
# for the proxy response to be reused.

env GOPROXY=direct

go list -reuse=proxy-latest.json -m -json vcs-test.golang.org/git/issue61415.git@latest
stdout '"Version": "v0.0.0-20231114180001-f213069baa68"'
stdout '"Origin":'
stdout '"VCS": "git"'
stdout '"Hash": "f213069baa68ec26412fb373c7cf6669db1f8e69"'
stdout '"Ref": "HEAD"'
stdout '"TagSum": "t1:47DEQpj8HBSa\+/TImW\+5JCeuQeRkm5NMpJWZG3hSuFU="'
! stdout '"Reuse":'

0 comments on commit bb8a96f

Please sign in to comment.