Skip to content

Commit 6444502

Browse files
dmitshurgopherbot
authored andcommitted
cmd/coordinator: triple writeSnapshot timeout for reverse builders
This timeout is meant to be an upper bound, and some reverse builders have been observed to need a bit over 5 minutes to finish the upload. Give them more time and update the comment to describe the 2022 state. Also log how many bytes they've managed to copy before failing. Updates golang/go#52235. Updates golang/go#49149. Change-Id: I20f850620f0aa8126968862f2ad9a096fa32ce03 Reviewed-on: https://go-review.googlesource.com/c/build/+/398697 Trust: Carlos Amedee <[email protected]> Reviewed-by: Carlos Amedee <[email protected]> Trust: Dmitri Shuralyov <[email protected]> Run-TryBot: Dmitri Shuralyov <[email protected]> Auto-Submit: Dmitri Shuralyov <[email protected]> Reviewed-by: Heschi Kreinick <[email protected]> TryBot-Result: Gopher Robot <[email protected]>
1 parent 58bb2c5 commit 6444502

File tree

1 file changed

+12
-7
lines changed

1 file changed

+12
-7
lines changed

Diff for: cmd/coordinator/buildstatus.go

+12-7
Original file line numberDiff line numberDiff line change
@@ -864,11 +864,16 @@ func (st *buildStatus) cleanForSnapshot(bc buildlet.Client) error {
864864
func (st *buildStatus) writeSnapshot(bc buildlet.Client) (err error) {
865865
sp := st.CreateSpan("write_snapshot_to_gcs")
866866
defer func() { sp.Done(err) }()
867-
// This should happen in 15 seconds or so, but I saw timeouts
868-
// a couple times at 1 minute. Some buildlets might be far
869-
// away on the network, so be more lenient. The timeout mostly
870-
// is here to prevent infinite hangs.
871-
ctx, cancel := context.WithTimeout(st.ctx, 5*time.Minute)
867+
// A typical Go snapshot tarball in April 2022 is around 150 MB in size.
868+
// Builders with a fast uplink speed can upload the tar within seconds or minutes.
869+
// Reverse builders might be far away on the network, so be more lenient for them.
870+
// (Fast builds require a sufficiently fast uplink speed or turning off snapshots,
871+
// so the timeout here is mostly an upper bound to prevent infinite hangs.)
872+
timeout := 5 * time.Minute
873+
if st.conf.IsReverse() {
874+
timeout *= 3
875+
}
876+
ctx, cancel := context.WithTimeout(st.ctx, timeout)
872877
defer cancel()
873878

874879
tsp := st.CreateSpan("fetch_snapshot_reader_from_buildlet")
@@ -890,8 +895,8 @@ func (st *buildStatus) writeSnapshot(bc buildlet.Client) (err error) {
890895
wr := sc.Bucket(bucket).Object(st.SnapshotObjectName()).NewWriter(ctx)
891896
wr.ContentType = "application/octet-stream"
892897
wr.ACL = append(wr.ACL, storage.ACLRule{Entity: storage.AllUsers, Role: storage.RoleReader})
893-
if _, err := io.Copy(wr, tgz); err != nil {
894-
st.logf("failed to write snapshot to GCS: %v", err)
898+
if n, err := io.Copy(wr, tgz); err != nil {
899+
st.logf("failed to write snapshot to GCS after copying %d bytes: %v", n, err)
895900
return err
896901
}
897902

0 commit comments

Comments
 (0)