Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 34 additions & 9 deletions .github/workflows/usage-metrics.yml
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ jobs:
# Query all versions in a single command
go run collect-metrics.go $VERSION_FLAGS -csv "../docs/usage-metrics.csv"

- name: Create Pull Request
- name: Create or update Pull Request
run: |
git config user.name "github-actions[bot]"
git config user.email "github-actions[bot]@users.noreply.github.com"
Expand All @@ -75,16 +75,41 @@ jobs:
exit 0
fi

# Create a new branch for the PR
DATE=$(date +%Y-%m-%d)
BRANCH_NAME="chore/update-usage-metrics-$DATE"
git checkout -b "$BRANCH_NAME"
MONTH=$(date +%Y-%m)
BRANCH_NAME="chore/update-usage-metrics-$MONTH"

# Check if the branch already exists on the remote
if git ls-remote --heads origin "$BRANCH_NAME" | grep -q "$BRANCH_NAME"; then
echo "Branch $BRANCH_NAME already exists, pushing new commit to it"
# Preserve the newly collected metrics before switching branches
cp docs/usage-metrics.csv /tmp/usage-metrics.csv
git fetch origin "$BRANCH_NAME"
git checkout "$BRANCH_NAME"
# Restore the newly collected metrics on top of the existing branch
cp /tmp/usage-metrics.csv docs/usage-metrics.csv
git add docs/usage-metrics.csv
# Check again after checkout — the diff might be empty if the
# branch already has identical data
if git diff --staged --quiet; then
echo "No changes to commit after checking out existing branch"
exit 0
fi
else
echo "Creating new branch $BRANCH_NAME"
git checkout -b "$BRANCH_NAME"
fi
Comment thread
coderabbitai[bot] marked this conversation as resolved.

git commit -m "chore(metrics): update usage metrics ($DATE)"
git push -u origin "$BRANCH_NAME"

# Create PR using gh CLI
gh pr create \
--title "chore: update usage metrics ($DATE)" \
--body "Automated update of usage metrics data. This PR updates the usage metrics CSV file with the latest GitHub usage data for testcontainers-go versions." \
--base main
# Create a PR only if one doesn't already exist for this branch
EXISTING_PR=$(gh pr list --head "$BRANCH_NAME" --state open --json number --jq '.[0].number')
if [ -n "$EXISTING_PR" ]; then
echo "PR #$EXISTING_PR already exists for branch $BRANCH_NAME, updated with new commit"
else
gh pr create \
--title "chore: update usage metrics ($MONTH)" \
--body "Automated update of usage metrics data. This PR updates the usage metrics CSV file with the latest GitHub usage data for testcontainers-go versions." \
--base main
fi
186 changes: 124 additions & 62 deletions usage-metrics/collect-metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -55,43 +55,80 @@ func main() {

func collectMetrics(versions []string, csvPath string) error {
date := time.Now().Format("2006-01-02")
metrics := make([]usageMetric, 0, len(versions))
metrics := make(map[string]usageMetric)

// Query all versions sequentially
// Build a unique, non-empty list of versions to query
pending := make([]string, 0, len(versions))
seen := make(map[string]struct{}, len(versions))
for _, version := range versions {
version = strings.TrimSpace(version)
if version == "" {
continue
}

// Add delay BEFORE querying to avoid rate limiting
if len(metrics) > 0 {
log.Printf("Waiting 7 seconds before querying next version...")
time.Sleep(7 * time.Second) // 10 requests per 60 seconds = 6 seconds minimum
if _, ok := seen[version]; ok {
continue
}
seen[version] = struct{}{}
pending = append(pending, version)
}
if len(pending) == 0 {
return errors.New("at least one non-empty version is required")
}
Comment thread
coderabbitai[bot] marked this conversation as resolved.

count, err := queryGitHubUsageWithRetry(version)
if err != nil {
log.Printf("Warning: Failed to query version %s after retries: %v", version, err)
continue
const (
maxPasses = 5
interRequestWait = 7 * time.Second // 10 requests per 60 seconds = 6 seconds minimum
passCooldown = 120 * time.Second // wait for rate limit window to fully reset between passes
)

for pass := 0; pass < maxPasses && len(pending) > 0; pass++ {
if pass > 0 {
log.Printf("Pass %d: waiting %v for rate limit window to reset before retrying %d failed version(s)...",
pass+1, passCooldown, len(pending))
time.Sleep(passCooldown)
} else {
log.Printf("Pass 1: querying %d version(s)...", len(pending))
}

metric := usageMetric{
Date: date,
Version: version,
Count: count,
var failed []string
queriesMade := 0
for _, version := range pending {
// Add delay before querying to avoid rate limiting
if queriesMade > 0 {
log.Printf("Waiting %v before querying next version...", interRequestWait)
time.Sleep(interRequestWait)
}

count, err := queryGitHubUsage(version)
queriesMade++
if err != nil {
log.Printf("Pass %d: failed to query version %s: %v", pass+1, version, err)
if isRetryableError(err) {
failed = append(failed, version)
continue
}
return fmt.Errorf("query %s: %w", version, err)
}

metrics[version] = usageMetric{
Date: date,
Version: version,
Count: count,
}
fmt.Printf("Successfully queried: %s has %d usages on %s\n", version, count, date)
}

metrics = append(metrics, metric)
fmt.Printf("Successfully queried: %s has %d usages on %s\n", version, count, metric.Date)
pending = failed
if len(pending) == 0 {
log.Printf("All versions queried successfully after %d pass(es).", pass+1)
}
}

// Sort metrics by version
sort.Slice(metrics, func(i, j int) bool {
return metrics[i].Version < metrics[j].Version
})
if len(pending) > 0 {
log.Printf("Warning: %d version(s) still failed after %d passes: %s", len(pending), maxPasses, strings.Join(pending, ", "))
}

// Write all metrics to CSV
// Append new metrics to CSV
for _, metric := range metrics {
if err := appendToCSV(csvPath, metric); err != nil {
log.Printf("Warning: Failed to write metric for %s: %v", metric.Version, err)
Expand All @@ -100,49 +137,25 @@ func collectMetrics(versions []string, csvPath string) error {
fmt.Printf("Successfully recorded: %s has %d usages on %s\n", metric.Version, metric.Count, metric.Date)
}

return nil
}

func queryGitHubUsageWithRetry(version string) (int, error) {
var lastErr error
// Backoff intervals: wait longer for rate limit to reset (rolling window)
backoffIntervals := []time.Duration{
60 * time.Second, // Wait for rolling window
60 * time.Second,
60 * time.Second,
// Sort the entire CSV so rows are ordered by (date, version) regardless
// of the order they were appended across multiple runs.
if err := sortCSV(csvPath); err != nil {
return fmt.Errorf("sort csv: %w", err)
}

// maxRetries includes the initial attempt plus one retry per backoff interval
maxRetries := len(backoffIntervals) + 1

for attempt := 0; attempt < maxRetries; attempt++ {
if attempt > 0 {
// Use predefined backoff intervals
waitTime := backoffIntervals[attempt-1]
log.Printf("Retrying version %s in %v (attempt %d/%d)", version, waitTime, attempt+1, maxRetries)
time.Sleep(waitTime)
}

count, err := queryGitHubUsage(version)
if err == nil {
return count, nil
}

lastErr = err

// Check if it's a rate limit error
if strings.Contains(err.Error(), "rate limit") ||
strings.Contains(err.Error(), "403") ||
strings.Contains(err.Error(), "429") {
log.Printf("Rate limit hit for version %s, will retry with backoff", version)
continue
}

// For non-rate-limit errors, retry but with shorter backoff
log.Printf("Error querying version %s: %v", version, err)
}
return nil
}

return 0, fmt.Errorf("max retries reached: %w", lastErr)
// isRetryableError returns true for rate-limit and transient HTTP errors
// that are worth retrying in a subsequent pass.
func isRetryableError(err error) bool {
msg := err.Error()
return strings.Contains(msg, "rate limit") ||
strings.Contains(msg, "403") ||
strings.Contains(msg, "429") ||
strings.Contains(msg, "500") ||
strings.Contains(msg, "502") ||
strings.Contains(msg, "503")
}

func queryGitHubUsage(version string) (int, error) {
Expand Down Expand Up @@ -173,6 +186,55 @@ func queryGitHubUsage(version string) (int, error) {
return resp.TotalCount, nil
}

func sortCSV(csvPath string) error {
absPath, err := filepath.Abs(csvPath)
if err != nil {
return fmt.Errorf("resolve path: %w", err)
}

file, err := os.Open(absPath)
if err != nil {
return fmt.Errorf("open file: %w", err)
}

reader := csv.NewReader(file)
records, err := reader.ReadAll()
file.Close()
if err != nil {
return fmt.Errorf("read csv: %w", err)
}

if len(records) <= 1 {
return nil // nothing to sort (header only or empty)
}

header := records[0]
data := records[1:]

sort.SliceStable(data, func(i, j int) bool {
if data[i][0] != data[j][0] {
return data[i][0] < data[j][0] // date ascending
}
return data[i][1] < data[j][1] // version ascending
})
Comment thread
coderabbitai[bot] marked this conversation as resolved.

out, err := os.Create(absPath)
if err != nil {
return fmt.Errorf("create file: %w", err)
}
defer out.Close()

writer := csv.NewWriter(out)
if err := writer.Write(header); err != nil {
return fmt.Errorf("write header: %w", err)
}
if err := writer.WriteAll(data); err != nil {
return fmt.Errorf("write records: %w", err)
}

return nil
}

func appendToCSV(csvPath string, metric usageMetric) error {
absPath, err := filepath.Abs(csvPath)
if err != nil {
Expand Down
Loading