diff --git a/pkg/cache/cache_test.go b/pkg/cache/cache_test.go index 56d3beb5..996ccc2b 100644 --- a/pkg/cache/cache_test.go +++ b/pkg/cache/cache_test.go @@ -4,7 +4,6 @@ import ( "bytes" "context" "database/sql" - "errors" "fmt" "io" "net/http" @@ -1248,107 +1247,6 @@ func testDeleteNar(factory cacheFactory) func(*testing.T) { } } -// TestDeadlock_NarInfo_Triggers_Nar_Refetch reproduces a deadlock where pulling a NarInfo -// triggers a Nar fetch (because compression is none), and both waiting on each other -// if they share the same lock/job key. -func testDeadlockNarInfoTriggersNarRefetch(factory cacheFactory) func(*testing.T) { - return func(t *testing.T) { - t.Parallel() - - c, _, _, _, _, cleanup := factory(t) - t.Cleanup(cleanup) - - // 1. Setup a test server - ts := testdata.NewTestServer(t, 1) - t.Cleanup(ts.Close) - - // CRITICAL: We must ensure NarInfoHash == NarHash to cause the collision in upstreamJobs map. - // The deadlock happens because pullNarInfo starts a job with key=hash, and then prePullNar - // tries to start a job with key=hash (derived from URL). - - // NarInfoHash is 32 chars. - // narURL.Hash comes from URL. - // We want narURL.Hash == NarInfoHash. - collisionHash := "11111111111111111111111111111111" - - entry := testdata.Entry{ - NarInfoHash: collisionHash, - NarHash: collisionHash, - NarCompression: "none", - NarInfoText: `StorePath: /nix/store/11111111111111111111111111111111-test-1.0 -URL: nar/11111111111111111111111111111111.nar -Compression: none -FileHash: sha256:1111111111111111111111111111111111111111111111111111 -FileSize: 123 -NarHash: sha256:1111111111111111111111111111111111111111111111111111 -NarSize: 123 -References: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa-dummy -Deriver: dddddddddddddddddddddddddddddddd-test-1.0.drv -Sig: cache.nixos.org-1:MadTCU1OSFCGUw4aqCKpLCZJpqBc7AbLvO7wgdlls0eq1DwaSnF/82SZE+wJGEiwlHbnZR+14daSaec0W3XoBQ== -`, - NarText: "content-of-the-nar", - } - ts.AddEntry(entry) - - // Add debug handler to see what's being requested and serve content manually - ts.AddMaybeHandler(func(w http.ResponseWriter, r *http.Request) bool { - if r.URL.Path == "/"+collisionHash+".narinfo" { - _, _ = w.Write([]byte(entry.NarInfoText)) - - return true - } - - if r.URL.Path == "/nar/"+collisionHash+".nar" { - _, _ = w.Write([]byte(entry.NarText)) - - return true - } - - return false // Let the real handler process other things - }) - - uc, err := upstream.New(newContext(), testhelper.MustParseURL(t, ts.URL), nil) - require.NoError(t, err) - - c.AddUpstreamCaches(newContext(), uc) - - // Wait for health check - select { - case <-c.GetHealthChecker().Trigger(): - case <-time.After(5 * time.Second): - t.Fatal("timeout waiting for upstream health check") - } - - // 2. Trigger the download - // We use a timeout to detect the deadlock - ctx, cancel := context.WithTimeout(newContext(), 5*time.Second) - defer cancel() - - done := make(chan struct{}) - - var narInfo *narinfo.NarInfo - - go func() { - defer close(done) - - narInfo, err = c.GetNarInfo(ctx, entry.NarInfoHash) - }() - - select { - case <-done: - case <-ctx.Done(): - if errors.Is(ctx.Err(), context.DeadlineExceeded) { - t.Fatal("Deadlock detected! GetNarInfo timed out.") - } - case <-time.After(10 * time.Second): - t.Fatal("Timeout waiting for GetNarInfo to complete") - } - - require.NoError(t, err) - assert.NotNil(t, narInfo) - } -} - // TestDeadlock_ContextCancellation_DuringDownload reproduces a deadlock that occurs when // a context is canceled during a download, causing cleanup code to attempt closing channels // that may have already been closed. This test specifically targets the issue fixed in #433 @@ -2809,7 +2707,6 @@ func runCacheTestSuite(t *testing.T, factory cacheFactory) { t.Run("GetNarInfoMigratesInvalidURL", testGetNarInfoMigratesInvalidURL(factory)) t.Run("GetNarInfoConcurrentMigrationAttempts", testGetNarInfoConcurrentMigrationAttempts(factory)) t.Run("DeleteNar", testDeleteNar(factory)) - t.Run("DeadlockNarInfoTriggersNarRefetch", testDeadlockNarInfoTriggersNarRefetch(factory)) t.Run("DeadlockContextCancellationDuringDownload", testDeadlockContextCancellationDuringDownload(factory)) t.Run("BackgroundDownloadCompletionAfterCancellation", testBackgroundDownloadCompletionAfterCancellation(factory)) diff --git a/pkg/cache/cdc_test.go b/pkg/cache/cdc_test.go index 52ba009f..54591808 100644 --- a/pkg/cache/cdc_test.go +++ b/pkg/cache/cdc_test.go @@ -152,14 +152,14 @@ func testCDCMixedMode(factory cacheFactory) func(*testing.T) { require.NoError(t, c.SetCDCConfiguration(false, 0, 0, 0)) blobContent := "traditional blob content" - nuBlob := nar.URL{Hash: "blob1", Compression: nar.CompressionTypeNone} + nuBlob := nar.URL{Hash: "1s8p1kgdms8rmxkq24q51wc7zpn0aqcwgzvc473v9cii7z2qyxq0", Compression: nar.CompressionTypeNone} require.NoError(t, c.PutNar(ctx, nuBlob, io.NopCloser(strings.NewReader(blobContent)))) // 2. Store chunks with CDC enabled require.NoError(t, c.SetCDCConfiguration(true, 1024, 4096, 8192)) chunkContent := "chunked content" - nuChunk := nar.URL{Hash: "chunk1", Compression: nar.CompressionTypeNone} + nuChunk := nar.URL{Hash: "00ji9synj1r6h6sjw27wwv8fw98myxsg92q5ma1pvrbmh451kc27", Compression: nar.CompressionTypeNone} require.NoError(t, c.PutNar(ctx, nuChunk, io.NopCloser(strings.NewReader(chunkContent)))) // 3. Retrieve both @@ -203,7 +203,7 @@ func testCDCGetNarInfo(factory cacheFactory) func(*testing.T) { // Create and store a NAR with CDC enabled content := "this is test content for GetNarInfo with CDC enabled" - nu := nar.URL{Hash: "testnarinfo1", Compression: nar.CompressionTypeNone} + nu := nar.URL{Hash: "1s8p1kgdms8rmxkq24q51wc7zpn0aqcwgzvc473v9cii7z2qyxq0", Compression: nar.CompressionTypeNone} err = c.PutNar(ctx, nu, io.NopCloser(strings.NewReader(content))) require.NoError(t, err) @@ -214,24 +214,24 @@ func testCDCGetNarInfo(factory cacheFactory) func(*testing.T) { assert.Positive(t, count, "chunks should exist in database") // Store a narinfo that references this NAR - niText := `StorePath: /nix/store/test-path -URL: nar/testnarinfo1.nar + niText := `StorePath: /nix/store/0amzzlz5w7ihknr59cn0q56pvp17bqqz-test-path +URL: nar/1s8p1kgdms8rmxkq24q51wc7zpn0aqcwgzvc473v9cii7z2qyxq0.nar Compression: none -FileHash: sha256:0000000000000000000000000000000000000000000000000000000000000000 +FileHash: sha256:1s8p1kgdms8rmxkq24q51wc7zpn0aqcwgzvc473v9cii7z2qyxq0 FileSize: 52 -NarHash: sha256:0000000000000000000000000000000000000000000000000000000000000000 +NarHash: sha256:1s8p1kgdms8rmxkq24q51wc7zpn0aqcwgzvc473v9cii7z2qyxq0 NarSize: 52 ` - err = c.PutNarInfo(ctx, "testnarinfohash", io.NopCloser(strings.NewReader(niText))) + err = c.PutNarInfo(ctx, "0amzzlz5w7ihknr59cn0q56pvp17bqqz", io.NopCloser(strings.NewReader(niText))) require.NoError(t, err) // Now call GetNarInfo. Since the NAR is stored as chunks and NOT as a whole file, // the old version of getNarInfoFromDatabase would fail to find it and purge the narinfo. - _, err = c.GetNarInfo(ctx, "testnarinfohash") + _, err = c.GetNarInfo(ctx, "0amzzlz5w7ihknr59cn0q56pvp17bqqz") require.NoError(t, err, "GetNarInfo should succeed even if NAR is only in chunks") // Verify that the narinfo still exists in the database - _, err = c.GetNarInfo(ctx, "testnarinfohash") + _, err = c.GetNarInfo(ctx, "0amzzlz5w7ihknr59cn0q56pvp17bqqz") require.NoError(t, err, "GetNarInfo should still succeed (not purged)") } } diff --git a/pkg/nar/filepath_test.go b/pkg/nar/filepath_test.go index c5721769..232e8912 100644 --- a/pkg/nar/filepath_test.go +++ b/pkg/nar/filepath_test.go @@ -19,8 +19,16 @@ func TestNarFilePath(t *testing.T) { compression string path string }{ - {hash: "abc123", compression: "", path: filepath.Join("a", "ab", "abc123.nar")}, - {hash: "def456", compression: "xz", path: filepath.Join("d", "de", "def456.nar.xz")}, + { + hash: "1mb5fxh7nzbx1b2q40bgzwjnjh8xqfap9mfnfqxlvvgvdyv8xwps", + compression: "", + path: filepath.Join("1", "1m", "1mb5fxh7nzbx1b2q40bgzwjnjh8xqfap9mfnfqxlvvgvdyv8xwps.nar"), + }, + { + hash: "1mb5fxh7nzbx1b2q40bgzwjnjh8xqfap9mfnfqxlvvgvdyv8xwps", + compression: "xz", + path: filepath.Join("1", "1m", "1mb5fxh7nzbx1b2q40bgzwjnjh8xqfap9mfnfqxlvvgvdyv8xwps.nar.xz"), + }, } for _, test := range []string{"", "a", "ab"} { diff --git a/pkg/nar/hash.go b/pkg/nar/hash.go index 38fe8a2e..8f9c7564 100644 --- a/pkg/nar/hash.go +++ b/pkg/nar/hash.go @@ -3,17 +3,23 @@ package nar import ( "errors" "regexp" + + "github.com/kalbasit/ncps/pkg/narinfo" ) +// NormalizedHashPattern defines the valid characters for a Nix32 encoded hash. +// Nix32 uses a 32-character alphabet excluding 'e', 'o', 'u', and 't'. +// Valid characters: 0-9, a-d, f-n, p-s, v-z +// Hashes must be exactly 52 characters long. +const NormalizedHashPattern = `[0-9a-df-np-sv-z]{52}` + +const HashPattern = `(` + narinfo.HashPattern + `-)?` + NormalizedHashPattern + var ( // ErrInvalidHash is returned if the hash is not valid. ErrInvalidHash = errors.New("invalid nar hash") - // narHashPattern defines the valid characters for a nar hash. - //nolint:gochecknoglobals // This is used in other regexes to ensure they validate the same thing. - narHashPattern = `[a-z0-9_-]+` - - narHashRegexp = regexp.MustCompile(`^(` + narHashPattern + `)$`) + narHashRegexp = regexp.MustCompile(`^(` + HashPattern + `)$`) ) func ValidateHash(hash string) error { diff --git a/pkg/nar/url.go b/pkg/nar/url.go index 57cb2b2a..5f6619d5 100644 --- a/pkg/nar/url.go +++ b/pkg/nar/url.go @@ -15,8 +15,8 @@ var ( // ErrInvalidURL is returned if the regexp did not match the given URL. ErrInvalidURL = errors.New("invalid nar URL") - // https://regex101.com/r/yPwxpw/4 - narRegexp = regexp.MustCompile(`^nar/(` + narHashPattern + `)\.nar(\.([a-z0-9]+))?(\?([a-z0-9=&]*))?$`) + // hashValidationRegexp validates that a string matches the HashPattern. + hashValidationRegexp = regexp.MustCompile(`^(` + HashPattern + `)$`) ) // URL represents a nar URL. @@ -27,29 +27,65 @@ type URL struct { } // ParseURL parses a nar URL (as present in narinfo) and returns its components. +// It accepts URLs in the format: [path/].nar[.][?query] +// The hash must match HashPattern. This implementation is flexible about the +// directory structure - only the filename matters, not the "nar/" prefix. func ParseURL(u string) (URL, error) { - if u == "" || !strings.HasPrefix(u, "nar/") { + if u == "" { return URL{}, ErrInvalidURL } - sm := narRegexp.FindStringSubmatch(u) - if len(sm) != 6 { + // Separate the query string from the path + pathPart, rawQuery, _ := strings.Cut(u, "?") + + // Get the filename (last component of the path) + filename := filepath.Base(pathPart) + if filename == "" || filename == "." { + return URL{}, ErrInvalidURL + } + + // The filename must contain ".nar" followed by optional compression extension + // Format: hash.nar[.compression] + // Everything before .nar is the hash, everything after is optional compression + hash, afterNar, found := strings.Cut(filename, ".nar") + if !found || hash == "" { return URL{}, ErrInvalidURL } - nu := URL{Hash: sm[1]} + // Validate that the hash matches HashPattern before processing further + if !hashValidationRegexp.MatchString(hash) { + return URL{}, ErrInvalidURL + } - var err error + // Extract compression extension (e.g., ".bz2" -> "bz2", "" -> "") + var compression string - if nu.Compression, err = CompressionTypeFromExtension(sm[3]); err != nil { + if afterNar != "" { + // afterNar should start with a dot + if !strings.HasPrefix(afterNar, ".") { + return URL{}, ErrInvalidURL + } + + compression = afterNar[1:] // remove leading dot + } + + // Determine compression type + ct, err := CompressionTypeFromExtension(compression) + if err != nil { return URL{}, fmt.Errorf("error computing the compression type: %w", err) } - if nu.Query, err = url.ParseQuery(sm[5]); err != nil { + // Parse the query string if present + query, err := url.ParseQuery(rawQuery) + if err != nil { return URL{}, fmt.Errorf("error parsing the RawQuery as url.Values: %w", err) } - return nu, nil + return URL{ + Hash: hash, + Compression: ct, + Query: query, + }, nil } // NewLogger returns a new logger with the right fields. diff --git a/pkg/nar/url_test.go b/pkg/nar/url_test.go index bb79ea06..6bfde518 100644 --- a/pkg/nar/url_test.go +++ b/pkg/nar/url_test.go @@ -125,7 +125,7 @@ func TestParseURL(t *testing.T) { narURL, err := nar.ParseURL(test.url) - if assert.ErrorIs(t, test.err, err) { + if assert.ErrorIs(t, err, test.err) { assert.Equal(t, test.narURL, narURL) } }) diff --git a/pkg/server/server_test.go b/pkg/server/server_test.go index 2044007b..374b7da6 100644 --- a/pkg/server/server_test.go +++ b/pkg/server/server_test.go @@ -918,7 +918,7 @@ func TestGetNar_ZstdCompression(t *testing.T) { // 1. Put an uncompressed Nar into the cache. narData := strings.Repeat("uncompressed nar data ", 1000) - narHash := "00000000000000000000000000000001" // dummy 32-char hash + narHash := "0000000000000000000000000000000000000000000000000001" // dummy 52-char hash narURL := ts.URL + "/nar/" + narHash + ".nar" req, err := http.NewRequestWithContext( @@ -991,7 +991,7 @@ func TestGetNar_NoZstdCompression(t *testing.T) { // 1. Put an uncompressed Nar into the cache. narData := uncompressedNarData - narHash := "00000000000000000000000000000002" // dummy 32-char hash + narHash := "0000000000000000000000000000000000000000000000000002" // dummy 52-char hash narURL := ts.URL + "/nar/" + narHash + ".nar" req, err := http.NewRequestWithContext( @@ -1057,7 +1057,7 @@ func TestGetNar_ZstdCompression_Head(t *testing.T) { // 1. Put an uncompressed Nar into the cache. narData := uncompressedNarData - narHash := "00000000000000000000000000000003" // dummy 32-char hash + narHash := "0000000000000000000000000000000000000000000000000003" // dummy 52-char hash narURL := ts.URL + "/nar/" + narHash + ".nar" req, err := http.NewRequestWithContext( @@ -1145,7 +1145,7 @@ func TestGetNar_HeaderSettingSequence(t *testing.T) { // Put an uncompressed Nar into the cache. narData := uncompressedNarData - narHash := "00000000000000000000000000000004" + narHash := "0000000000000000000000000000000000000000000000000004" narURL := "/nar/" + narHash + ".nar" req := httptest.NewRequest(http.MethodPut, narURL, strings.NewReader(narData)) diff --git a/pkg/storage/s3/s3_test.go b/pkg/storage/s3/s3_test.go index 928bde5a..ff34d677 100644 --- a/pkg/storage/s3/s3_test.go +++ b/pkg/storage/s3/s3_test.go @@ -526,7 +526,7 @@ func TestHasNarInfo_ErrorPaths(t *testing.T) { store, err := storage_s3.New(ctx, cfgWithMock) require.NoError(t, err) - exists := store.HasNarInfo(ctx, "hash") + exists := store.HasNarInfo(ctx, "00ji9synj1r6h6sjw27wwv8fw98myxsg92q5ma1pvrbmh451kc27") assert.False(t, exists) }) @@ -783,7 +783,7 @@ func TestHasNar_ErrorPaths(t *testing.T) { store, err := storage_s3.New(ctx, cfgWithMock) require.NoError(t, err) - narURL := nar.URL{Hash: "hash", Compression: "none"} + narURL := nar.URL{Hash: "00ji9synj1r6h6sjw27wwv8fw98myxsg92q5ma1pvrbmh451kc27", Compression: "none"} exists := store.HasNar(ctx, narURL) assert.False(t, exists) }) @@ -841,7 +841,7 @@ func TestPutNar_ErrorPaths(t *testing.T) { store, err := storage_s3.New(ctx, cfgWithMock) require.NoError(t, err) - narURL := nar.URL{Hash: "hash", Compression: "none"} + narURL := nar.URL{Hash: "00ji9synj1r6h6sjw27wwv8fw98myxsg92q5ma1pvrbmh451kc27", Compression: "none"} _, err = store.PutNar(ctx, narURL, strings.NewReader("content")) require.Error(t, err) assert.Contains(t, err.Error(), "error checking if nar exists") @@ -873,7 +873,7 @@ func TestPutNar_ErrorPaths(t *testing.T) { store, err := storage_s3.New(ctx, cfgWithMock) require.NoError(t, err) - narURL := nar.URL{Hash: "hash", Compression: "none"} + narURL := nar.URL{Hash: "00ji9synj1r6h6sjw27wwv8fw98myxsg92q5ma1pvrbmh451kc27", Compression: "none"} _, err = store.PutNar(ctx, narURL, strings.NewReader("content")) require.Error(t, err) assert.Contains(t, err.Error(), "error putting nar to S3") @@ -935,7 +935,7 @@ func TestNar_ErrorPaths(t *testing.T) { store, err := storage_s3.New(ctx, cfgWithMock) require.NoError(t, err) - narURL := nar.URL{Hash: "hash", Compression: "none"} + narURL := nar.URL{Hash: "00ji9synj1r6h6sjw27wwv8fw98myxsg92q5ma1pvrbmh451kc27", Compression: "none"} _, _, err = store.GetNar(ctx, narURL) require.Error(t, err) assert.Contains(t, err.Error(), "get failed") @@ -962,7 +962,7 @@ func TestNar_ErrorPaths(t *testing.T) { store, err := storage_s3.New(ctx, cfgWithMock) require.NoError(t, err) - narURL := nar.URL{Hash: "hash", Compression: "none"} + narURL := nar.URL{Hash: "00ji9synj1r6h6sjw27wwv8fw98myxsg92q5ma1pvrbmh451kc27", Compression: "none"} err = store.DeleteNar(ctx, narURL) require.Error(t, err) assert.Contains(t, err.Error(), "error checking if nar exists") @@ -993,7 +993,7 @@ func TestNar_ErrorPaths(t *testing.T) { store, err := storage_s3.New(ctx, cfgWithMock) require.NoError(t, err) - narURL := nar.URL{Hash: "hash", Compression: "none"} + narURL := nar.URL{Hash: "00ji9synj1r6h6sjw27wwv8fw98myxsg92q5ma1pvrbmh451kc27", Compression: "none"} err = store.DeleteNar(ctx, narURL) require.Error(t, err) assert.Contains(t, err.Error(), "delete failed")