Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(dnslink-gw): breadcrumbs and CID column when dir listing #7699

Merged
merged 4 commits into from
Nov 10, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions assets/bindata.go

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion assets/bindata_version_hash.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,5 @@
package assets

const (
BindataVersionHash = "514e5ae28d8adb84955801b56ef47aca44bf9cc8"
BindataVersionHash = "605b5945438e1fe2eaf8a6571cca7ecda12d5599"
)
2 changes: 1 addition & 1 deletion assets/dir-index-html
10 changes: 7 additions & 3 deletions core/corehttp/gateway_handler.go
Original file line number Diff line number Diff line change
Expand Up @@ -379,8 +379,9 @@ func (i *gatewayHandler) getOrHeadHandler(w http.ResponseWriter, r *http.Request

hash := resolvedPath.Cid().String()

// Storage for gateway URL to be used when linking to other rootIDs. This
// will be blank unless subdomain resolution is being used for this request.
// Gateway root URL to be used when linking to other rootIDs.
// This will be blank unless subdomain or DNSLink resolution is being used
// for this request.
var gwURL string

// Get gateway hostname and build gateway URL.
Expand All @@ -390,13 +391,16 @@ func (i *gatewayHandler) getOrHeadHandler(w http.ResponseWriter, r *http.Request
gwURL = ""
}

dnslink := hasDNSLinkOrigin(gwURL, urlPath)

// See comment above where originalUrlPath is declared.
tplData := listingTemplateData{
GatewayURL: gwURL,
DNSLink: dnslink,
Listing: dirListing,
Size: size,
Path: urlPath,
Breadcrumbs: breadcrumbs(urlPath),
Breadcrumbs: breadcrumbs(urlPath, dnslink),
BackLink: backLink,
Hash: hash,
}
Expand Down
30 changes: 28 additions & 2 deletions core/corehttp/gateway_indexPage.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ import (
// structs for directory listing
type listingTemplateData struct {
GatewayURL string
DNSLink bool
Listing []directoryItem
Size string
Path string
Expand All @@ -34,16 +35,16 @@ type breadcrumb struct {
Path string
}

func breadcrumbs(urlPath string) []breadcrumb {
func breadcrumbs(urlPath string, dnslinkOrigin bool) []breadcrumb {
var ret []breadcrumb

p, err := ipfspath.ParsePath(urlPath)
if err != nil {
// No breadcrumbs, fallback to bare Path in template
return ret
}

segs := p.Segments()
contentRoot := segs[1]
for i, seg := range segs {
if i == 0 {
ret = append(ret, breadcrumb{Name: seg})
Expand All @@ -55,13 +56,38 @@ func breadcrumbs(urlPath string) []breadcrumb {
}
}

// Drop the /ipns/<fqdn> prefix from breadcrumb Paths when directory
// listing on a DNSLink website (loaded due to Host header in HTTP
// request). Necessary because the hostname most likely won't have a
// public gateway mounted.
if dnslinkOrigin {
prefix := "/ipns/" + contentRoot
for i, crumb := range ret {
if strings.HasPrefix(crumb.Path, prefix) {
ret[i].Path = strings.Replace(crumb.Path, prefix, "", 1)
}
}
// Make contentRoot breadcrumb link to the website root
ret[1].Path = "/"
}

return ret
}

func shortHash(hash string) string {
return (hash[0:4] + "\u2026" + hash[len(hash)-4:])
}

// helper to detect DNSLink website context
// (when hostname from gwURL is matching /ipns/<fqdn> in path)
func hasDNSLinkOrigin(gwURL string, path string) bool {
if gwURL != "" {
fqdn := stripPort(strings.TrimPrefix(gwURL, "//"))
return strings.HasPrefix(path, "/ipns/"+fqdn)
}
return false
}

var listingTemplate *template.Template

func init() {
Expand Down
20 changes: 16 additions & 4 deletions core/corehttp/gateway_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -391,6 +391,10 @@ func TestIPNSHostnameRedirect(t *testing.T) {
}
}

// Test directory listing on DNSLink website
// (scenario when Host header is the same as URL hostname)
// This is basic regression test: additional end-to-end tests
// can be found in test/sharness/t0115-gateway-dir-listing.sh
func TestIPNSHostnameBacklinks(t *testing.T) {
ns := mockNamesys{}
ts, api, ctx := newTestServerAndNode(t, ns)
Expand Down Expand Up @@ -437,15 +441,15 @@ func TestIPNSHostnameBacklinks(t *testing.T) {
t.Fatal(err)
}

// expect correct backlinks
// expect correct links
body, err := ioutil.ReadAll(res.Body)
if err != nil {
t.Fatalf("error reading response: %s", err)
}
s := string(body)
t.Logf("body: %s\n", string(body))

if !matchPathOrBreadcrumbs(s, "/ipns/<a href=\"/ipns/example.net\">example.net</a>/<a href=\"/ipns/example.net/foo%3F%20%23%3C%27\">foo? #&lt;&#39;</a>") {
if !matchPathOrBreadcrumbs(s, "/ipns/<a href=\"//example.net/\">example.net</a>/<a href=\"//example.net/foo%3F%20%23%3C%27\">foo? #&lt;&#39;</a>") {
t.Fatalf("expected a path in directory listing")
}
if !strings.Contains(s, "<a href=\"/foo%3F%20%23%3C%27/./..\">") {
Expand All @@ -454,6 +458,10 @@ func TestIPNSHostnameBacklinks(t *testing.T) {
if !strings.Contains(s, "<a href=\"/foo%3F%20%23%3C%27/file.txt\">") {
t.Fatalf("expected file in directory listing")
}
if !strings.Contains(s, "<a class=\"ipfs-hash\" href=\"https://cid.ipfs.io/#") {
// https://github.com/ipfs/dir-index-html/issues/42
t.Fatalf("expected links to cid.ipfs.io in CID column when on DNSLink website")
}
if !strings.Contains(s, k2.Cid().String()) {
t.Fatalf("expected hash in directory listing")
}
Expand Down Expand Up @@ -487,6 +495,10 @@ func TestIPNSHostnameBacklinks(t *testing.T) {
if !strings.Contains(s, "<a href=\"/file.txt\">") {
t.Fatalf("expected file in directory listing")
}
if !strings.Contains(s, "<a class=\"ipfs-hash\" href=\"https://cid.ipfs.io/#") {
// https://github.com/ipfs/dir-index-html/issues/42
t.Fatalf("expected links to cid.ipfs.io in CID column when on DNSLink website")
}
if !strings.Contains(s, k.Cid().String()) {
t.Fatalf("expected hash in directory listing")
}
Expand All @@ -511,7 +523,7 @@ func TestIPNSHostnameBacklinks(t *testing.T) {
s = string(body)
t.Logf("body: %s\n", string(body))

if !matchPathOrBreadcrumbs(s, "/ipns/<a href=\"/ipns/example.net\">example.net</a>/<a href=\"/ipns/example.net/foo%3F%20%23%3C%27\">foo? #&lt;&#39;</a>/<a href=\"/ipns/example.net/foo%3F%20%23%3C%27/bar\">bar</a>") {
if !matchPathOrBreadcrumbs(s, "/ipns/<a href=\"//example.net/\">example.net</a>/<a href=\"//example.net/foo%3F%20%23%3C%27\">foo? #&lt;&#39;</a>/<a href=\"//example.net/foo%3F%20%23%3C%27/bar\">bar</a>") {
t.Fatalf("expected a path in directory listing")
}
if !strings.Contains(s, "<a href=\"/foo%3F%20%23%3C%27/bar/./..\">") {
Expand Down Expand Up @@ -545,7 +557,7 @@ func TestIPNSHostnameBacklinks(t *testing.T) {
s = string(body)
t.Logf("body: %s\n", string(body))

if !matchPathOrBreadcrumbs(s, "/ipns/<a href=\"/ipns/example.net\">example.net</a>") {
if !matchPathOrBreadcrumbs(s, "/ipns/<a href=\"//example.net/\">example.net</a>") {
t.Fatalf("expected a path in directory listing")
}
if !strings.Contains(s, "<a href=\"/good-prefix/\">") {
Expand Down
21 changes: 14 additions & 7 deletions core/corehttp/hostname.go
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,7 @@ func HostnameOption() ServeOption {
if !gw.NoDNSLink && isDNSLinkRequest(r.Context(), coreAPI, host) {
// rewrite path and handle as DNSLink
r.URL.Path = "/ipns/" + stripPort(host) + r.URL.Path
childMux.ServeHTTP(w, r)
childMux.ServeHTTP(w, withHostnameContext(r, host))
return
}

Expand All @@ -143,10 +143,6 @@ func HostnameOption() ServeOption {
if gw, hostname, ns, rootID, ok := knownSubdomainDetails(host, knownGateways); ok {
// Looks like we're using a known gateway in subdomain mode.

// Add gateway hostname context for linking to other root ids.
// Example: localhost/ipfs/{cid}
ctx := context.WithValue(r.Context(), "gw-hostname", hostname)

// Assemble original path prefix.
pathPrefix := "/" + ns + "/" + rootID

Expand Down Expand Up @@ -201,7 +197,7 @@ func HostnameOption() ServeOption {
r.URL.Path = pathPrefix + r.URL.Path

// Serve path request
childMux.ServeHTTP(w, r.WithContext(ctx))
childMux.ServeHTTP(w, withHostnameContext(r, hostname))
return
}
// We don't have a known gateway. Fallback on DNSLink lookup
Expand All @@ -213,7 +209,7 @@ func HostnameOption() ServeOption {
if !cfg.Gateway.NoDNSLink && isDNSLinkRequest(r.Context(), coreAPI, host) {
// rewrite path and handle as DNSLink
r.URL.Path = "/ipns/" + stripPort(host) + r.URL.Path
childMux.ServeHTTP(w, r)
childMux.ServeHTTP(w, withHostnameContext(r, host))
return
}

Expand All @@ -234,6 +230,17 @@ type wildcardHost struct {
spec *config.GatewaySpec
}

// Extends request context to include hostname of a canonical gateway root
// (subdomain root or dnslink fqdn)
func withHostnameContext(r *http.Request, hostname string) *http.Request {
// This is required for links on directory listing pages to work correctly
// on subdomain and dnslink gateways. While DNSlink could read value from
// Host header, subdomain gateways have more comples rules (knownSubdomainDetails)
// More: https://github.com/ipfs/dir-index-html/issues/42
ctx := context.WithValue(r.Context(), "gw-hostname", hostname)
return r.WithContext(ctx)
}

func prepareKnownGateways(publicGateways map[string]*config.GatewaySpec) gatewayHosts {
var hosts gatewayHosts

Expand Down
3 changes: 2 additions & 1 deletion test/sharness/t0114-gateway-subdomains.sh
Original file line number Diff line number Diff line change
Expand Up @@ -432,7 +432,8 @@ test_expect_success "valid parent directory path in directory listing at {cid}.i
test_should_contain "<a href=\"/ipfs/ipns/bar\">bar</a>" list_response
'

# Note we test for sneaky subdir names {cid}.ipfs.example.com/ipfs/ipns/ :^)
# Note 1: we test for sneaky subdir names {cid}.ipfs.example.com/ipfs/ipns/ :^)
# Note 2: example.com/ipfs/.. present in HTML will be redirected to subdomain, so this is expected behavior
test_expect_success "valid breadcrumb links in the header of directory listing at {cid}.ipfs.example.com/sub/dir" '
curl -s -H "Host: $DIR_FQDN" http://127.0.0.1:$GWAY_PORT/ipfs/ipns/ > list_response &&
test_should_contain "Index of" list_response &&
Expand Down
146 changes: 146 additions & 0 deletions test/sharness/t0115-gateway-dir-listing.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,146 @@
#!/usr/bin/env bash
#
# Copyright (c) Protocol Labs

test_description="Test directory listing (dir-index-html) on the HTTP gateway"
Copy link
Member Author

@lidel lidel Sep 29, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

💭

This file adds proper end-to-end tests for directory listing on Gateway port that protects us against regressions such as ipfs/dir-index-html#42 by testing each gateway type:

  • path gateway
  • subdomain gateway
  • dnslink website gateway

We check behavior of:

  • etag/unicode support
  • breadcrumbs
  • file name column
  • hash column

This level of testing is necessary, because URIs will be different on each gw type, and we literally had no tests for this apart from subdomain ones. Now we test all gw types and I can sleep better ;)



. lib/test-lib.sh

## ============================================================================
## Start IPFS Node and prepare test CIDs
## ============================================================================

test_expect_success "ipfs init" '
export IPFS_PATH="$(pwd)/.ipfs" &&
ipfs init --profile=test > /dev/null
'

test_launch_ipfs_daemon --offline

test_expect_success "Add the test directory" '
mkdir -p rootDir/ipfs &&
mkdir -p rootDir/ipns &&
mkdir -p rootDir/api &&
mkdir -p rootDir/ą/ę &&
echo "I am a txt file on path with utf8" > rootDir/ą/ę/file-źł.txt &&
echo "I am a txt file in confusing /api dir" > rootDir/api/file.txt &&
echo "I am a txt file in confusing /ipfs dir" > rootDir/ipfs/file.txt &&
echo "I am a txt file in confusing /ipns dir" > rootDir/ipns/file.txt &&
DIR_CID=$(ipfs add -Qr --cid-version 1 rootDir) &&
FILE_CID=$(ipfs files stat /ipfs/$DIR_CID/ą/ę/file-źł.txt | head -1)
'

## ============================================================================
## Test dir listing on path gateway (eg. 127.0.0.1:8080/ipfs/)
## ============================================================================

test_expect_success "path gw: backlink on root CID should point at self" '
curl -sD - http://127.0.0.1:$GWAY_PORT/ipfs/${DIR_CID}/ > list_response &&
test_should_contain "Index of" list_response &&
test_should_contain "<a href=\"/ipfs/$DIR_CID/\">..</a>" list_response
'

test_expect_success "path gw: Etag should be present" '
curl -sD - http://127.0.0.1:$GWAY_PORT/ipfs/${DIR_CID}/ą/ę > list_response &&
test_should_contain "Index of" list_response &&
test_should_contain "Etag: \"DirIndex-" list_response
'

test_expect_success "path gw: breadcrumbs should point at /ipfs namespace mounted at Origin root" '
test_should_contain "/ipfs/<a href=\"/ipfs/$DIR_CID\">$DIR_CID</a>/<a href=\"/ipfs/$DIR_CID/%C4%85\">ą</a>/<a href=\"/ipfs/$DIR_CID/%C4%85/%C4%99\">ę</a>" list_response
'

test_expect_success "path gw: backlink should point at parent directory" '
test_should_contain "<a href=\"/ipfs/$DIR_CID/%C4%85/%C4%99/..\">..</a>" list_response
'

test_expect_success "path gw: name column should be a link to its content path" '
test_should_contain "<a href=\"/ipfs/$DIR_CID/%C4%85/%C4%99/file-%C5%BA%C5%82.txt\">file-źł.txt</a>" list_response
'

test_expect_success "path gw: hash column should be a CID link with filename param" '
test_should_contain "<a class=\"ipfs-hash\" href=\"/ipfs/$FILE_CID?filename=file-%25C5%25BA%25C5%2582.txt\">" list_response
'

## ============================================================================
## Test dir listing on subdomain gateway (eg. <cid>.ipfs.localhost:8080)
## ============================================================================

DIR_HOSTNAME="${DIR_CID}.ipfs.localhost"
# note: we skip DNS lookup by running curl with --resolve $DIR_HOSTNAME:127.0.0.1

test_expect_success "path gw: backlink on root CID should point origin root" '
curl -sD - --resolve $DIR_HOSTNAME:$GWAY_PORT:127.0.0.1 http://$DIR_HOSTNAME:$GWAY_PORT/ > list_response &&
test_should_contain "Index of" list_response &&
test_should_contain "<a href=\"/\">..</a>" list_response
'

test_expect_success "path gw: Etag should be present" '
curl -sD - --resolve $DIR_HOSTNAME:$GWAY_PORT:127.0.0.1 http://$DIR_HOSTNAME:$GWAY_PORT/ą/ę > list_response &&
test_should_contain "Index of" list_response &&
test_should_contain "Etag: \"DirIndex-" list_response
'

test_expect_success "subdomain gw: breadcrumbs should leverage path-based router mounted on the parent domain" '
test_should_contain "/ipfs/<a href=\"//localhost:$GWAY_PORT/ipfs/$DIR_CID\">$DIR_CID</a>/<a href=\"//localhost:$GWAY_PORT/ipfs/$DIR_CID/%C4%85\">ą</a>/<a href=\"//localhost:$GWAY_PORT/ipfs/$DIR_CID/%C4%85/%C4%99\">ę</a>" list_response
'

test_expect_success "path gw: name column should be a link to content root mounted at subdomain origin" '
test_should_contain "<a href=\"/%C4%85/%C4%99/file-%C5%BA%C5%82.txt\">file-źł.txt</a>" list_response
'

test_expect_success "path gw: hash column should be a CID link to path router with filename param" '
test_should_contain "<a class=\"ipfs-hash\" href=\"//localhost:$GWAY_PORT/ipfs/$FILE_CID?filename=file-%25C5%25BA%25C5%2582.txt\">" list_response
'

## ============================================================================
## Test dir listing on DNSLink gateway (eg. example.com)
## ============================================================================

# DNSLink test requires a daemon in online mode with precached /ipns/ mapping
test_kill_ipfs_daemon
DNSLINK_HOSTNAME="website.example.com"
export IPFS_NS_MAP="$DNSLINK_HOSTNAME:/ipfs/$DIR_CID"
test_launch_ipfs_daemon

# Note that:
# - this type of gateway is also tested in gateway_test.go#TestIPNSHostnameBacklinks
# (go tests and sharness tests should be kept in sync)
# - we skip DNS lookup by running curl with --resolve $DNSLINK_HOSTNAME:127.0.0.1

test_expect_success "dnslink gw: backlink on root CID should point origin root" '
curl -v -sD - --resolve $DNSLINK_HOSTNAME:$GWAY_PORT:127.0.0.1 http://$DNSLINK_HOSTNAME:$GWAY_PORT/ > list_response &&
test_should_contain "Index of" list_response &&
test_should_contain "<a href=\"/\">..</a>" list_response
'

test_expect_success "dnslink gw: Etag should be present" '
curl -sD - --resolve $DNSLINK_HOSTNAME:$GWAY_PORT:127.0.0.1 http://$DNSLINK_HOSTNAME:$GWAY_PORT/ą/ę > list_response &&
test_should_contain "Index of" list_response &&
test_should_contain "Etag: \"DirIndex-" list_response
'

test_expect_success "dnslink gw: breadcrumbs should point at content root mounted at dnslink origin" '
test_should_contain "/ipns/<a href=\"//$DNSLINK_HOSTNAME:$GWAY_PORT/\">website.example.com</a>/<a href=\"//$DNSLINK_HOSTNAME:$GWAY_PORT/%C4%85\">ą</a>/<a href=\"//$DNSLINK_HOSTNAME:$GWAY_PORT/%C4%85/%C4%99\">ę</a>" list_response
'

test_expect_success "dnslink gw: name column should be a link to content root mounted at dnslink origin" '
test_should_contain "<a href=\"/%C4%85/%C4%99/file-%C5%BA%C5%82.txt\">file-źł.txt</a>" list_response
'

# DNSLink websites don't have public gateway mounted by default
# See: https://github.com/ipfs/dir-index-html/issues/42
test_expect_success "dnslink gw: hash column should be a CID link to cid.ipfs.io" '
test_should_contain "<a class=\"ipfs-hash\" href=\"https://cid.ipfs.io/#$FILE_CID\" target=\"_blank\" rel=\"noreferrer noopener\">" list_response
'

## ============================================================================
## End of tests, cleanup
## ============================================================================

test_kill_ipfs_daemon
test_expect_success "clean up ipfs dir" '
rm -rf "$IPFS_PATH"
'
test_done