Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(gw): Cache-Control: only-if-cached #9082

Merged
merged 1 commit into from
Jul 7, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion core/corehttp/gateway.go
Original file line number Diff line number Diff line change
Expand Up @@ -92,12 +92,16 @@ func GatewayOption(writable bool, paths ...string) ServeOption {
"X-Ipfs-Roots",
}, headers[ACEHeadersName]...))

var gateway http.Handler = newGatewayHandler(GatewayConfig{
var gateway http.Handler
gateway, err = newGatewayHandler(GatewayConfig{
Headers: headers,
Writable: writable,
PathPrefixes: cfg.Gateway.PathPrefixes,
FastDirIndexThreshold: int(cfg.Gateway.FastDirIndexThreshold.WithDefault(100)),
}, api)
if err != nil {
return nil, err
}

gateway = otelhttp.NewHandler(gateway, "Gateway.Request")

Expand Down
46 changes: 40 additions & 6 deletions core/corehttp/gateway_handler.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ import (
path "github.com/ipfs/go-path"
"github.com/ipfs/go-path/resolver"
coreiface "github.com/ipfs/interface-go-ipfs-core"
options "github.com/ipfs/interface-go-ipfs-core/options"
ipath "github.com/ipfs/interface-go-ipfs-core/path"
routing "github.com/libp2p/go-libp2p-core/routing"
prometheus "github.com/prometheus/client_golang/prometheus"
Expand Down Expand Up @@ -66,8 +67,9 @@ type redirectTemplateData struct {
// gatewayHandler is a HTTP handler that serves IPFS objects (accessible by default at /ipfs/<path>)
// (it serves requests like GET /ipfs/QmVRzPKPzNtSrEzBFm2UZfxmPAgnaLke4DMcerbsGGSaFe/link)
type gatewayHandler struct {
config GatewayConfig
api coreiface.CoreAPI
config GatewayConfig
api coreiface.CoreAPI
offlineApi coreiface.CoreAPI

// generic metrics
firstContentBlockGetMetric *prometheus.HistogramVec
Expand Down Expand Up @@ -211,10 +213,15 @@ func newGatewayHistogramMetric(name string, help string) *prometheus.HistogramVe
return histogramMetric
}

func newGatewayHandler(c GatewayConfig, api coreiface.CoreAPI) *gatewayHandler {
func newGatewayHandler(c GatewayConfig, api coreiface.CoreAPI) (*gatewayHandler, error) {
offlineApi, err := api.WithOptions(options.Api.Offline(true))
if err != nil {
return nil, err
}
i := &gatewayHandler{
config: c,
api: api,
config: c,
api: api,
offlineApi: offlineApi,
// Improved Metrics
// ----------------------------
// Time till the first content block (bar in /ipfs/cid/foo/bar)
Expand Down Expand Up @@ -255,7 +262,7 @@ func newGatewayHandler(c GatewayConfig, api coreiface.CoreAPI) *gatewayHandler {
"The time to receive the first UnixFS node on a GET from the gateway.",
),
}
return i
return i, nil
}

func parseIpfsPath(p string) (cid.Cid, string, error) {
Expand Down Expand Up @@ -360,6 +367,11 @@ func (i *gatewayHandler) getOrHeadHandler(w http.ResponseWriter, r *http.Request
}

contentPath := ipath.New(r.URL.Path)

if requestHandled := i.handleOnlyIfCached(w, r, contentPath, logger); requestHandled {
return
}

if requestHandled := handleSuperfluousNamespace(w, r, contentPath); requestHandled {
return
}
Expand Down Expand Up @@ -956,6 +968,28 @@ func debugStr(path string) string {
return q
}

// Detect 'Cache-Control: only-if-cached' in request and return data if it is already in the local datastore.
// https:/ipfs/specs/blob/main/http-gateways/PATH_GATEWAY.md#cache-control-request-header
func (i *gatewayHandler) handleOnlyIfCached(w http.ResponseWriter, r *http.Request, contentPath ipath.Path, logger *zap.SugaredLogger) (requestHandled bool) {
if r.Header.Get("Cache-Control") == "only-if-cached" {
_, err := i.offlineApi.Block().Stat(r.Context(), contentPath)
if err != nil {
if r.Method == http.MethodHead {
w.WriteHeader(http.StatusPreconditionFailed)
return true
}
errMsg := fmt.Sprintf("%q not in local datastore", contentPath.String())
http.Error(w, errMsg, http.StatusPreconditionFailed)
return true
}
if r.Method == http.MethodHead {
w.WriteHeader(http.StatusOK)
return true
}
}
return false
Copy link
Contributor

@Jorropo Jorropo Jul 6, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You have no happy path for a GET request so it will exit by that point.

In other words.
Let's assume I only store the root block of a file.
You first stat the root block, no error, request is not HEAD and so you return false, and the request continue as usual.
However when the following code tries to actually send the unixfs file, they will hit the network to fetch the childs.

It's not complient with my interpretation of the gateway spec:

if the gateway already has the data

Well we don't know at that point, we know we have the root but more ? Who knows ?

I think there is two solutions here:

  1. For now, the spec is just making things up until it matches whatever our code do.
    You can add a SHOULD (RFC2119) to the spec sentence accepting that cheap checks might be better.
  2. At that point handleOnlyIfCached return a dagstore object that the rest of the code uses to fetch the data.
    So the rest of the code tries to serve the request as usual but using an offline dagstore.

Sadly I think we cannot do the second option (even tho I like it more) because we cannot send an HTTP error after starting to send the payload (why are we using HTTP again ? browsers .... 😞).

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@Jorropo thanks for noting this.

I've made a conscious decision to only check root block and use block stat --offline for everything (even DAG requests) instead of dag stat --offline as the latter could act as inexpensive DoS vecor (dag stat on Wikipedia DAG does not sound fun, even if all blocks are in the datastore).

The motivation for only-if-cached is to identify gateways which don't have any part of a DAG cached, and prioritize ones that do.

In that spirit, I agree that the spec should provide guidance to implementers, who will have to make a similar decision as I did (or implement custom heuristic/index)

Opened ipfs/specs#297 to clarify spec, but let's merge this PR as-is.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@lidel I think that the performance gain block stat --offline can be worth it. I'm ok with this just wanted the spec to be in accordance.

I didn't thought you would dag stat --offline
But rather do dagService = offlineBlockservice

So the directory index code (or file one if that a file, ...) would run in offline mode. It wouldn't be a DOS vector since it's really the same thing as usual but without networking.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Opened ipfs/specs#297 to clarify spec, but let's merge this PR as-is.

🥳

}

func handleUnsupportedHeaders(r *http.Request) (err *requestError) {
// X-Ipfs-Gateway-Prefix was removed (https:/ipfs/go-ipfs/issues/7702)
// TODO: remove this after go-ipfs 0.13 ships
Expand Down
22 changes: 22 additions & 0 deletions test/sharness/t0116-gateway-cache.sh
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,28 @@ test_expect_success "Prepare IPNS unixfs content path for testing" '
cat curl_ipns_file_output
'

# Cache-Control: only-if-cached
test_expect_success "HEAD for /ipfs/ with only-if-cached succeeds when in local datastore" '
curl -sv -I -H "Cache-Control: only-if-cached" "http://127.0.0.1:$GWAY_PORT/ipfs/$ROOT1_CID/root2/root3/root4/index.html" > curl_onlyifcached_postitive_head 2>&1 &&
cat curl_onlyifcached_postitive_head &&
grep "< HTTP/1.1 200 OK" curl_onlyifcached_postitive_head
'
test_expect_success "HEAD for /ipfs/ with only-if-cached fails when not in local datastore" '
curl -sv -I -H "Cache-Control: only-if-cached" "http://127.0.0.1:$GWAY_PORT/ipfs/$(date | ipfs add --only-hash -Q)" > curl_onlyifcached_negative_head 2>&1 &&
cat curl_onlyifcached_negative_head &&
grep "< HTTP/1.1 412 Precondition Failed" curl_onlyifcached_negative_head
'
test_expect_success "GET for /ipfs/ with only-if-cached succeeds when in local datastore" '
curl -svX GET -H "Cache-Control: only-if-cached" "http://127.0.0.1:$GWAY_PORT/ipfs/$ROOT1_CID/root2/root3/root4/index.html" >/dev/null 2>curl_onlyifcached_postitive_out &&
cat curl_onlyifcached_postitive_out &&
grep "< HTTP/1.1 200 OK" curl_onlyifcached_postitive_out
'
test_expect_success "GET for /ipfs/ with only-if-cached fails when not in local datastore" '
curl -svX GET -H "Cache-Control: only-if-cached" "http://127.0.0.1:$GWAY_PORT/ipfs/$(date | ipfs add --only-hash -Q)" >/dev/null 2>curl_onlyifcached_negative_out &&
cat curl_onlyifcached_negative_out &&
grep "< HTTP/1.1 412 Precondition Failed" curl_onlyifcached_negative_out
'

# X-Ipfs-Path

## dir generated listing
Expand Down