diff --git a/CHANGELOG.md b/CHANGELOG.md index 7f28ab1a..875baa54 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -58,7 +58,7 @@ When a release tag is made, this block of bullet points will just slide down to - The codecs do not reject other orderings when parsing serial data. The `ipld.Node` trees resulting from deserialization will still preserve the serialized order. However, it has now become impossible to re-encode data in that same preserved order. - - If doing your own encoding, there are customization options in `dagcbor.MarshalOptions.MapSortMode` and `dagjson.MarshalOptions.SortMapKeys`. + - If doing your own encoding, there are customization options in `dagcbor.EncodeOptions.MapSortMode` and `dagjson.EncodeOptions.MapSortMode`. (However, note that these options are not available to you while using any systems that only operate in terms of multicodec codes.) - _Be cautious of this change._ It is now extremely easy to write code which puts data into an `ipld.Node` in memory in one order, then save and load that data using these codecs, and end up with different data as a result because the sorting changes the order of data. diff --git a/codec/README.md b/codec/README.md new file mode 100644 index 00000000..8cc753ac --- /dev/null +++ b/codec/README.md @@ -0,0 +1,77 @@ +Codecs +====== + +The `go-ipld-prime/codec` package is a grouping package. +The subpackages contains some codecs which reside in this repo. + +The codecs included here are our "batteries included" codecs, +but they are not otherwise special. + +It is not necessary for a codec to be a subpackage here to be a valid codec to use with go-ipld-prime; +anything that implements the `ipld.Encoder` and `ipld.Decoder` interfaces is fine. + + +Terminology +----------- + +We generally refer to "codecs" as having an "encode" function and "decode" function. + +We consider "encoding" to be the process of going from {Data Model} to {serial data}, +and "decoding" to be the process of going from {serial data} to {Data Model}. + +### Codec vs Multicodec + +A "codec" is _any_ function that goes from {Data Model} to {serial data}, or vice versa. + +A "multicodec" is a function which does that and is _also_ specifically recognized and described in +the tables in https://github.com/multiformats/multicodec/ . + +Multicodecs generally leave no further room for customization and configuration, +because their entire behavior is supposed to be specified by a multicodec indicator code number. + +Our codecs, in the child packages of this one, usually offer configuration options. +They also usually offer exactly one function, which does *not* allow configuration, +which is supplying a multicodec-compatible behavior. +You'll see this marked in the docs on those functions. + +### Marshal vs Encode + +It's common to see the terms "marshal" and "unmarshal" used in golang. + +Those terms are usually describing when structured data is transformed into linearized, tokenized data +(and then, perhaps, all the way to serially encoded data), or vice versa. + +We would use the words the same way... except we don't end up using them, +because that feature doesn't really come up in our codec layer. + +In IPLD, we would describe mapping some typed data into Data Model as "marshalling". +(It's one step shy of tokenizing, but barely: Data Model does already have defined ordering for every element of data.) +And we do have systems that do this: +`bindnode` and our codegen systems both do this, implicitly, when they give you an `ipld.Node` of the representation of some data. + +We just don't end up talking about it as "marshalling" because of how it's done implicitly by those systems. +As a result, all of our features relating to codecs only end up speaking about "encoding" and "decoding". + +### Legacy code + +There are some appearances of the words "marshal" and "unmarshal" in some of our subpackages here. + +That verbiage is generally on the way out. +For functions and structures with those names, you'll notice their docs marking them as deprecated. + + +Why have "batteries-included" codecs? +------------------------------------- + +These codecs live in this repo because they're commonly used, highly supported, +and general-purpose codecs that we recommend for widespread usage in new developments. + +Also, it's just plain nice to have something in-repo for development purposes. +It makes sure that if we try to make any API changes, we immediately see if they'd make codecs harder to implement. +We also use the batteries-included codecs for debugging, for test fixtures, and for benchmarking. + +Further yet, the batteries-included codecs let us offer getting-started APIs. +For example, we offer some helper APIs which use codecs like e.g. JSON to give consumers of the libraries +one-step helper methods that "do the right thing" with zero config... so long as they happen to use that codec. +Even for consumers who don't use those codecs, such functions then serve as natural documentation +and examples for what to do to put their codec of choice to work. diff --git a/codec/api.go b/codec/api.go index 3e41f651..b61c4e07 100644 --- a/codec/api.go +++ b/codec/api.go @@ -42,3 +42,11 @@ type ErrBudgetExhausted struct{} func (e ErrBudgetExhausted) Error() string { return "decoder resource budget exhausted (message too long or too complex)" } + +type MapSortMode uint8 + +const ( + MapSortMode_None MapSortMode = iota + MapSortMode_Lexical + MapSortMode_RFC7049 +) diff --git a/codec/cbor/multicodec.go b/codec/cbor/multicodec.go index da086434..71335de8 100644 --- a/codec/cbor/multicodec.go +++ b/codec/cbor/multicodec.go @@ -3,8 +3,6 @@ package cbor import ( "io" - "github.com/polydawn/refmt/cbor" - "github.com/ipld/go-ipld-prime" "github.com/ipld/go-ipld-prime/codec/dagcbor" "github.com/ipld/go-ipld-prime/multicodec" @@ -20,12 +18,22 @@ func init() { multicodec.RegisterDecoder(0x51, Decode) } +// Decode deserializes data from the given io.Reader and feeds it into the given ipld.NodeAssembler. +// Decode fits the ipld.Decoder function interface. +// +// This is the function that will be registered in the default multicodec registry during package init time. func Decode(na ipld.NodeAssembler, r io.Reader) error { - return dagcbor.Unmarshal(na, cbor.NewDecoder(cbor.DecodeOptions{}, r), - dagcbor.UnmarshalOptions{AllowLinks: false}) + return dagcbor.DecodeOptions{ + AllowLinks: false, + }.Decode(na, r) } +// Encode walks the given ipld.Node and serializes it to the given io.Writer. +// Encode fits the ipld.Encoder function interface. +// +// This is the function that will be registered in the default multicodec registry during package init time. func Encode(n ipld.Node, w io.Writer) error { - return dagcbor.Marshal(n, cbor.NewEncoder(w), - dagcbor.MarshalOptions{AllowLinks: false}) + return dagcbor.EncodeOptions{ + AllowLinks: false, + }.Encode(n, w) } diff --git a/codec/dagcbor/marshal.go b/codec/dagcbor/marshal.go index e008b683..e8ce035c 100644 --- a/codec/dagcbor/marshal.go +++ b/codec/dagcbor/marshal.go @@ -2,12 +2,15 @@ package dagcbor import ( "fmt" + "io" "sort" + "github.com/polydawn/refmt/cbor" "github.com/polydawn/refmt/shared" "github.com/polydawn/refmt/tok" ipld "github.com/ipld/go-ipld-prime" + "github.com/ipld/go-ipld-prime/codec" cidlink "github.com/ipld/go-ipld-prime/linking/cid" ) @@ -15,28 +18,46 @@ import ( // except for the `case ipld.Kind_Link` block, // which is dag-cbor's special sauce for schemafree links. -const ( - MapSortMode_none = iota - MapSortMode_RFC7049 -) - -type MarshalOptions struct { - // If true, allow encoding of Link nodes as CBOR tag(42), otherwise reject - // them as unencodable +// EncodeOptions can be used to customize the behavior of an encoding function. +// The Encode method on this struct fits the ipld.Encoder function interface. +type EncodeOptions struct { + // If true, allow encoding of Link nodes as CBOR tag(42); + // otherwise, reject them as unencodable. AllowLinks bool - // Control the sorting of map keys, MapSortMode_none for no sorting or - // MapSortMode_RFC7049 for length-first bytewise sorting as per RFC7049 and - // DAG-CBOR - MapSortMode int + // Control the sorting of map keys, using one of the `codec.MapSortMode_*` constants. + MapSortMode codec.MapSortMode } -func Marshal(n ipld.Node, sink shared.TokenSink, options MarshalOptions) error { +// Encode walks the given ipld.Node and serializes it to the given io.Writer. +// Encode fits the ipld.Encoder function interface. +// +// The behavior of the encoder can be customized by setting fields in the EncodeOptions struct before calling this method. +func (cfg EncodeOptions) Encode(n ipld.Node, w io.Writer) error { + // Probe for a builtin fast path. Shortcut to that if possible. + type detectFastPath interface { + EncodeDagCbor(io.Writer) error + } + if n2, ok := n.(detectFastPath); ok { + return n2.EncodeDagCbor(w) + } + // Okay, generic inspection path. + return Marshal(n, cbor.NewEncoder(w), cfg) +} + +// Future work: we would like to remove the Marshal function, +// and in particular, stop seeing types from refmt (like shared.TokenSink) be visible. +// Right now, some kinds of configuration (e.g. for whitespace and prettyprint) are only available through interacting with the refmt types; +// we should improve our API so that this can be done with only our own types in this package. + +// Marshal is a deprecated function. +// Please consider switching to EncodeOptions.Encode instead. +func Marshal(n ipld.Node, sink shared.TokenSink, options EncodeOptions) error { var tk tok.Token return marshal(n, &tk, sink, options) } -func marshal(n ipld.Node, tk *tok.Token, sink shared.TokenSink, options MarshalOptions) error { +func marshal(n ipld.Node, tk *tok.Token, sink shared.TokenSink, options EncodeOptions) error { switch n.Kind() { case ipld.Kind_Invalid: return fmt.Errorf("cannot traverse a node that is absent") @@ -138,14 +159,14 @@ func marshal(n ipld.Node, tk *tok.Token, sink shared.TokenSink, options MarshalO } } -func marshalMap(n ipld.Node, tk *tok.Token, sink shared.TokenSink, options MarshalOptions) error { +func marshalMap(n ipld.Node, tk *tok.Token, sink shared.TokenSink, options EncodeOptions) error { // Emit start of map. tk.Type = tok.TMapOpen tk.Length = int(n.Length()) // TODO: overflow check if _, err := sink.Step(tk); err != nil { return err } - if options.MapSortMode == MapSortMode_RFC7049 { + if options.MapSortMode != codec.MapSortMode_None { // Collect map entries, then sort by key type entry struct { key string @@ -163,14 +184,22 @@ func marshalMap(n ipld.Node, tk *tok.Token, sink shared.TokenSink, options Marsh } entries = append(entries, entry{keyStr, v}) } - // RFC7049 style sort as per DAG-CBOR spec - sort.Slice(entries, func(i, j int) bool { - li, lj := len(entries[i].key), len(entries[j].key) - if li == lj { + // Apply the desired sort function. + switch options.MapSortMode { + case codec.MapSortMode_Lexical: + sort.Slice(entries, func(i, j int) bool { return entries[i].key < entries[j].key - } - return li < lj - }) + }) + case codec.MapSortMode_RFC7049: + sort.Slice(entries, func(i, j int) bool { + // RFC7049 style sort as per DAG-CBOR spec + li, lj := len(entries[i].key), len(entries[j].key) + if li == lj { + return entries[i].key < entries[j].key + } + return li < lj + }) + } // Emit map contents (and recurse). for _, e := range entries { tk.Type = tok.TString diff --git a/codec/dagcbor/multicodec.go b/codec/dagcbor/multicodec.go index 05b7c982..c09daf66 100644 --- a/codec/dagcbor/multicodec.go +++ b/codec/dagcbor/multicodec.go @@ -3,9 +3,8 @@ package dagcbor import ( "io" - "github.com/polydawn/refmt/cbor" - "github.com/ipld/go-ipld-prime" + "github.com/ipld/go-ipld-prime/codec" "github.com/ipld/go-ipld-prime/multicodec" ) @@ -19,28 +18,31 @@ func init() { multicodec.RegisterDecoder(0x71, Decode) } +// Decode deserializes data from the given io.Reader and feeds it into the given ipld.NodeAssembler. +// Decode fits the ipld.Decoder function interface. +// +// A similar function is available on DecodeOptions type if you would like to customize any of the decoding details. +// This function uses the defaults for the dag-cbor codec +// (meaning: links (indicated by tag 42) are decoded). +// +// This is the function that will be registered in the default multicodec registry during package init time. func Decode(na ipld.NodeAssembler, r io.Reader) error { - // Probe for a builtin fast path. Shortcut to that if possible. - type detectFastPath interface { - DecodeDagCbor(io.Reader) error - } - if na2, ok := na.(detectFastPath); ok { - return na2.DecodeDagCbor(r) - } - // Okay, generic builder path. - return Unmarshal(na, cbor.NewDecoder(cbor.DecodeOptions{}, r), - UnmarshalOptions{AllowLinks: true}) + return DecodeOptions{ + AllowLinks: true, + }.Decode(na, r) } +// Encode walks the given ipld.Node and serializes it to the given io.Writer. +// Encode fits the ipld.Encoder function interface. +// +// A similar function is available on EncodeOptions type if you would like to customize any of the encoding details. +// This function uses the defaults for the dag-cbor codec +// (meaning: links are encoded, and map keys are sorted (with RFC7049 ordering!) during encode). +// +// This is the function that will be registered in the default multicodec registry during package init time. func Encode(n ipld.Node, w io.Writer) error { - // Probe for a builtin fast path. Shortcut to that if possible. - type detectFastPath interface { - EncodeDagCbor(io.Writer) error - } - if n2, ok := n.(detectFastPath); ok { - return n2.EncodeDagCbor(w) - } - // Okay, generic inspection path. - return Marshal(n, cbor.NewEncoder(w), - MarshalOptions{AllowLinks: true, MapSortMode: MapSortMode_RFC7049}) + return EncodeOptions{ + AllowLinks: true, + MapSortMode: codec.MapSortMode_RFC7049, + }.Encode(n, w) } diff --git a/codec/dagcbor/unmarshal.go b/codec/dagcbor/unmarshal.go index cf17cd7f..b4bef240 100644 --- a/codec/dagcbor/unmarshal.go +++ b/codec/dagcbor/unmarshal.go @@ -3,9 +3,11 @@ package dagcbor import ( "errors" "fmt" + "io" "math" cid "github.com/ipfs/go-cid" + "github.com/polydawn/refmt/cbor" "github.com/polydawn/refmt/shared" "github.com/polydawn/refmt/tok" @@ -27,12 +29,37 @@ const ( // except for the `case tok.TBytes` block, // which has dag-cbor's special sauce for detecting schemafree links. -type UnmarshalOptions struct { +// DecodeOptions can be used to customize the behavior of a decoding function. +// The Decode method on this struct fits the ipld.Decoder function interface. +type DecodeOptions struct { // If true, parse DAG-CBOR tag(42) as Link nodes, otherwise reject them AllowLinks bool } -func Unmarshal(na ipld.NodeAssembler, tokSrc shared.TokenSource, options UnmarshalOptions) error { +// Decode deserializes data from the given io.Reader and feeds it into the given ipld.NodeAssembler. +// Decode fits the ipld.Decoder function interface. +// +// The behavior of the decoder can be customized by setting fields in the DecodeOptions struct before calling this method. +func (cfg DecodeOptions) Decode(na ipld.NodeAssembler, r io.Reader) error { + // Probe for a builtin fast path. Shortcut to that if possible. + type detectFastPath interface { + DecodeDagCbor(io.Reader) error + } + if na2, ok := na.(detectFastPath); ok { + return na2.DecodeDagCbor(r) + } + // Okay, generic builder path. + return Unmarshal(na, cbor.NewDecoder(cbor.DecodeOptions{}, r), cfg) +} + +// Future work: we would like to remove the Unmarshal function, +// and in particular, stop seeing types from refmt (like shared.TokenSource) be visible. +// Right now, some kinds of configuration (e.g. for whitespace and prettyprint) are only available through interacting with the refmt types; +// we should improve our API so that this can be done with only our own types in this package. + +// Unmarshal is a deprecated function. +// Please consider switching to DecodeOptions.Decode instead. +func Unmarshal(na ipld.NodeAssembler, tokSrc shared.TokenSource, options DecodeOptions) error { // Have a gas budget, which will be decremented as we allocate memory, and an error returned when execeeded (or about to be exceeded). // This is a DoS defense mechanism. // It's *roughly* in units of bytes (but only very, VERY roughly) -- it also treats words as 1 in many cases. @@ -41,7 +68,7 @@ func Unmarshal(na ipld.NodeAssembler, tokSrc shared.TokenSource, options Unmarsh return unmarshal1(na, tokSrc, &gas, options) } -func unmarshal1(na ipld.NodeAssembler, tokSrc shared.TokenSource, gas *int, options UnmarshalOptions) error { +func unmarshal1(na ipld.NodeAssembler, tokSrc shared.TokenSource, gas *int, options DecodeOptions) error { var tk tok.Token done, err := tokSrc.Step(&tk) if err != nil { @@ -55,7 +82,7 @@ func unmarshal1(na ipld.NodeAssembler, tokSrc shared.TokenSource, gas *int, opti // starts with the first token already primed. Necessary to get recursion // to flow right without a peek+unpeek system. -func unmarshal2(na ipld.NodeAssembler, tokSrc shared.TokenSource, tk *tok.Token, gas *int, options UnmarshalOptions) error { +func unmarshal2(na ipld.NodeAssembler, tokSrc shared.TokenSource, tk *tok.Token, gas *int, options DecodeOptions) error { // FUTURE: check for schema.TypedNodeBuilder that's going to parse a Link (they can slurp any token kind they want). switch tk.Type { case tok.TMapOpen: diff --git a/codec/dagjson/marshal.go b/codec/dagjson/marshal.go index 644d38cb..371e8e1f 100644 --- a/codec/dagjson/marshal.go +++ b/codec/dagjson/marshal.go @@ -3,12 +3,15 @@ package dagjson import ( "encoding/base64" "fmt" + "io" "sort" + "github.com/polydawn/refmt/json" "github.com/polydawn/refmt/shared" "github.com/polydawn/refmt/tok" ipld "github.com/ipld/go-ipld-prime" + "github.com/ipld/go-ipld-prime/codec" cidlink "github.com/ipld/go-ipld-prime/linking/cid" ) @@ -16,7 +19,9 @@ import ( // except for the `case ipld.Kind_Link` block, // which is dag-json's special sauce for schemafree links. -type MarshalOptions struct { +// EncodeOptions can be used to customize the behavior of an encoding function. +// The Encode method on this struct fits the ipld.Encoder function interface. +type EncodeOptions struct { // If true, will encode nodes with a Link kind using the DAG-JSON // `{"/":"cid string"}` form. EncodeLinks bool @@ -25,12 +30,26 @@ type MarshalOptions struct { // `{"/":{"bytes":"base64 bytes..."}}` form. EncodeBytes bool - // If true, will sort map keys prior to encoding using plain bytewise - // comparison. - SortMapKeys bool + // Control the sorting of map keys, using one of the `codec.MapSortMode_*` constants. + MapSortMode codec.MapSortMode } -func Marshal(n ipld.Node, sink shared.TokenSink, options MarshalOptions) error { +// Encode walks the given ipld.Node and serializes it to the given io.Writer. +// Encode fits the ipld.Encoder function interface. +// +// The behavior of the encoder can be customized by setting fields in the EncodeOptions struct before calling this method. +func (cfg EncodeOptions) Encode(n ipld.Node, w io.Writer) error { + return Marshal(n, json.NewEncoder(w, json.EncodeOptions{}), cfg) +} + +// Future work: we would like to remove the Marshal function, +// and in particular, stop seeing types from refmt (like shared.TokenSink) be visible. +// Right now, some kinds of configuration (e.g. for whitespace and prettyprint) are only available through interacting with the refmt types; +// we should improve our API so that this can be done with only our own types in this package. + +// Marshal is a deprecated function. +// Please consider switching to EncodeOptions.Encode instead. +func Marshal(n ipld.Node, sink shared.TokenSink, options EncodeOptions) error { var tk tok.Token switch n.Kind() { case ipld.Kind_Invalid: @@ -46,7 +65,7 @@ func Marshal(n ipld.Node, sink shared.TokenSink, options MarshalOptions) error { if _, err := sink.Step(&tk); err != nil { return err } - if options.SortMapKeys { + if options.MapSortMode != codec.MapSortMode_None { // Collect map entries, then sort by key type entry struct { key string @@ -64,7 +83,22 @@ func Marshal(n ipld.Node, sink shared.TokenSink, options MarshalOptions) error { } entries = append(entries, entry{keyStr, v}) } - sort.Slice(entries, func(i, j int) bool { return entries[i].key < entries[j].key }) + // Apply the desired sort function. + switch options.MapSortMode { + case codec.MapSortMode_Lexical: + sort.Slice(entries, func(i, j int) bool { + return entries[i].key < entries[j].key + }) + case codec.MapSortMode_RFC7049: + sort.Slice(entries, func(i, j int) bool { + // RFC7049 style sort as per DAG-CBOR spec + li, lj := len(entries[i].key), len(entries[j].key) + if li == lj { + return entries[i].key < entries[j].key + } + return li < lj + }) + } // Emit map contents (and recurse). for _, e := range entries { tk.Type = tok.TString diff --git a/codec/dagjson/multicodec.go b/codec/dagjson/multicodec.go index 232d5fe9..d2931eb0 100644 --- a/codec/dagjson/multicodec.go +++ b/codec/dagjson/multicodec.go @@ -1,12 +1,10 @@ package dagjson import ( - "fmt" "io" - "github.com/polydawn/refmt/json" - "github.com/ipld/go-ipld-prime" + "github.com/ipld/go-ipld-prime/codec" "github.com/ipld/go-ipld-prime/multicodec" ) @@ -20,46 +18,33 @@ func init() { multicodec.RegisterDecoder(0x0129, Decode) } +// Decode deserializes data from the given io.Reader and feeds it into the given ipld.NodeAssembler. +// Decode fits the ipld.Decoder function interface. +// +// A similar function is available on DecodeOptions type if you would like to customize any of the decoding details. +// This function uses the defaults for the dag-json codec +// (meaning: links are decoded, and bytes are decoded). +// +// This is the function that will be registered in the default multicodec registry during package init time. func Decode(na ipld.NodeAssembler, r io.Reader) error { - err := Unmarshal(na, json.NewDecoder(r), UnmarshalOptions{ + return DecodeOptions{ ParseLinks: true, ParseBytes: true, - }) - if err != nil { - return err - } - // Slurp any remaining whitespace. - // (This is relevant if our reader is tee'ing bytes to a hasher, and - // the json contained any trailing whitespace.) - // (We can't actually support multiple objects per reader from here; - // we can't unpeek if we find a non-whitespace token, so our only - // option is to error if this reader seems to contain more content.) - var buf [1]byte - for { - _, err := r.Read(buf[:]) - switch buf[0] { - case ' ', 0x0, '\t', '\r', '\n': // continue - default: - return fmt.Errorf("unexpected content after end of json object") - } - if err == nil { - continue - } else if err == io.EOF { - return nil - } else { - return err - } - } + }.Decode(na, r) } +// Encode walks the given ipld.Node and serializes it to the given io.Writer. +// Encode fits the ipld.Encoder function interface. +// +// A similar function is available on EncodeOptions type if you would like to customize any of the encoding details. +// This function uses the defaults for the dag-json codec +// (meaning: links are encoded, bytes are encoded, and map keys are sorted during encode). +// +// This is the function that will be registered in the default multicodec registry during package init time. func Encode(n ipld.Node, w io.Writer) error { - // Shell out directly to generic inspection path. - // (There's not really any fastpaths of note for json.) - // Write another function if you need to tune encoding options about whitespace. - return Marshal(n, json.NewEncoder(w, json.EncodeOptions{}), - MarshalOptions{ - EncodeLinks: true, - EncodeBytes: true, - SortMapKeys: true, - }) + return EncodeOptions{ + EncodeLinks: true, + EncodeBytes: true, + MapSortMode: codec.MapSortMode_Lexical, + }.Encode(n, w) } diff --git a/codec/dagjson/unmarshal.go b/codec/dagjson/unmarshal.go index 93cab58b..2dcdeff4 100644 --- a/codec/dagjson/unmarshal.go +++ b/codec/dagjson/unmarshal.go @@ -3,8 +3,10 @@ package dagjson import ( "encoding/base64" "fmt" + "io" cid "github.com/ipfs/go-cid" + "github.com/polydawn/refmt/json" "github.com/polydawn/refmt/shared" "github.com/polydawn/refmt/tok" @@ -20,7 +22,9 @@ import ( // several steps of handling maps, because it necessitates peeking several // tokens before deciding what kind of value to create). -type UnmarshalOptions struct { +// DecodeOptions can be used to customize the behavior of a decoding function. +// The Decode method on this struct fits the ipld.Decoder function interface. +type DecodeOptions struct { // If true, parse DAG-JSON `{"/":"cid string"}` as a Link kind node rather // than a plain map ParseLinks bool @@ -30,7 +34,48 @@ type UnmarshalOptions struct { ParseBytes bool } -func Unmarshal(na ipld.NodeAssembler, tokSrc shared.TokenSource, options UnmarshalOptions) error { +// Decode deserializes data from the given io.Reader and feeds it into the given ipld.NodeAssembler. +// Decode fits the ipld.Decoder function interface. +// +// The behavior of the decoder can be customized by setting fields in the DecodeOptions struct before calling this method. +func (cfg DecodeOptions) Decode(na ipld.NodeAssembler, r io.Reader) error { + err := Unmarshal(na, json.NewDecoder(r), cfg) + if err != nil { + return err + } + // Slurp any remaining whitespace. + // This behavior may be due for review. + // (This is relevant if our reader is tee'ing bytes to a hasher, and + // the json contained any trailing whitespace.) + // (We can't actually support multiple objects per reader from here; + // we can't unpeek if we find a non-whitespace token, so our only + // option is to error if this reader seems to contain more content.) + var buf [1]byte + for { + _, err := r.Read(buf[:]) + switch buf[0] { + case ' ', 0x0, '\t', '\r', '\n': // continue + default: + return fmt.Errorf("unexpected content after end of json object") + } + if err == nil { + continue + } else if err == io.EOF { + return nil + } else { + return err + } + } +} + +// Future work: we would like to remove the Unmarshal function, +// and in particular, stop seeing types from refmt (like shared.TokenSource) be visible. +// Right now, some kinds of configuration (e.g. for whitespace and prettyprint) are only available through interacting with the refmt types; +// we should improve our API so that this can be done with only our own types in this package. + +// Unmarshal is a deprecated function. +// Please consider switching to DecodeOptions.Decode instead. +func Unmarshal(na ipld.NodeAssembler, tokSrc shared.TokenSource, options DecodeOptions) error { var st unmarshalState st.options = options done, err := tokSrc.Step(&st.tk[0]) @@ -46,7 +91,7 @@ func Unmarshal(na ipld.NodeAssembler, tokSrc shared.TokenSource, options Unmarsh type unmarshalState struct { tk [7]tok.Token // mostly, only 0'th is used... but [1:7] are used during lookahead for links. shift int // how many times to slide something out of tk[1:7] instead of getting a new token. - options UnmarshalOptions + options DecodeOptions } // step leaves a "new" token in tk[0], diff --git a/codec/json/multicodec.go b/codec/json/multicodec.go index 9f2f8c7c..fb58578f 100644 --- a/codec/json/multicodec.go +++ b/codec/json/multicodec.go @@ -1,12 +1,12 @@ package json import ( - "fmt" "io" rfmtjson "github.com/polydawn/refmt/json" "github.com/ipld/go-ipld-prime" + "github.com/ipld/go-ipld-prime/codec" "github.com/ipld/go-ipld-prime/codec/dagjson" "github.com/ipld/go-ipld-prime/multicodec" ) @@ -21,40 +21,21 @@ func init() { multicodec.RegisterDecoder(0x0200, Decode) } +// Decode deserializes data from the given io.Reader and feeds it into the given ipld.NodeAssembler. +// Decode fits the ipld.Decoder function interface. +// +// This is the function that will be registered in the default multicodec registry during package init time. func Decode(na ipld.NodeAssembler, r io.Reader) error { - // Shell out directly to generic builder path. - // (There's not really any fastpaths of note for json.) - err := dagjson.Unmarshal(na, rfmtjson.NewDecoder(r), dagjson.UnmarshalOptions{ + return dagjson.DecodeOptions{ ParseLinks: false, ParseBytes: false, - }) - if err != nil { - return err - } - // Slurp any remaining whitespace. - // (This is relevant if our reader is tee'ing bytes to a hasher, and - // the json contained any trailing whitespace.) - // (We can't actually support multiple objects per reader from here; - // we can't unpeek if we find a non-whitespace token, so our only - // option is to error if this reader seems to contain more content.) - var buf [1]byte - for { - _, err := r.Read(buf[:]) - switch buf[0] { - case ' ', 0x0, '\t', '\r', '\n': // continue - default: - return fmt.Errorf("unexpected content after end of json object") - } - if err == nil { - continue - } else if err == io.EOF { - return nil - } else { - return err - } - } + }.Decode(na, r) } +// Encode walks the given ipld.Node and serializes it to the given io.Writer. +// Encode fits the ipld.Encoder function interface. +// +// This is the function that will be registered in the default multicodec registry during package init time. func Encode(n ipld.Node, w io.Writer) error { // Shell out directly to generic inspection path. // (There's not really any fastpaths of note for json.) @@ -62,9 +43,9 @@ func Encode(n ipld.Node, w io.Writer) error { return dagjson.Marshal(n, rfmtjson.NewEncoder(w, rfmtjson.EncodeOptions{ Line: []byte{'\n'}, Indent: []byte{'\t'}, - }), dagjson.MarshalOptions{ + }), dagjson.EncodeOptions{ EncodeLinks: false, EncodeBytes: false, - SortMapKeys: false, + MapSortMode: codec.MapSortMode_None, }) } diff --git a/codec/marshal.go b/codec/marshal.go index 2b2eb5c6..4dc9f846 100644 --- a/codec/marshal.go +++ b/codec/marshal.go @@ -9,6 +9,12 @@ import ( ipld "github.com/ipld/go-ipld-prime" ) +// Future work: we would like to remove the Marshal function, +// and in particular, stop seeing types from refmt (like shared.TokenSink) be visible. + +// Marshal is a deprecated function. +// Please consider switching to one of the Encode functions of one of the subpackages instead. +// // Marshal provides a very general node-to-tokens marshalling feature. // It can handle either cbor or json by being combined with a refmt TokenSink. // diff --git a/codec/unmarshal.go b/codec/unmarshal.go index 51f7ac80..99d4cc3f 100644 --- a/codec/unmarshal.go +++ b/codec/unmarshal.go @@ -10,6 +10,9 @@ import ( ipld "github.com/ipld/go-ipld-prime" ) +// Future work: we would like to remove the Unmarshal function, +// and in particular, stop seeing types from refmt (like shared.TokenSource) be visible. + // wishlist: if we could reconstruct the ipld.Path of an error while // *unwinding* from that error... that'd be nice. // (trying to build it proactively would waste tons of allocs on the happy path.) @@ -19,6 +22,9 @@ import ( // They're effectively doing double duty: testing the builders, too. // (Is that sensible? Should it be refactored? Not sure; maybe!) +// Unmarshal is a deprecated function. +// Please consider switching to one of the Decode functions of one of the subpackages instead. +// // Unmarshal provides a very general tokens-to-node unmarshalling feature. // It can handle either cbor or json by being combined with a refmt TokenSink. // diff --git a/node/tests/testcase.go b/node/tests/testcase.go index 63648f87..5ccf1cba 100644 --- a/node/tests/testcase.go +++ b/node/tests/testcase.go @@ -11,6 +11,7 @@ import ( . "github.com/warpfork/go-wish" "github.com/ipld/go-ipld-prime" + "github.com/ipld/go-ipld-prime/codec" "github.com/ipld/go-ipld-prime/codec/dagjson" "github.com/ipld/go-ipld-prime/schema" "github.com/ipld/go-ipld-prime/traversal" @@ -210,10 +211,10 @@ func testMarshal(t *testing.T, n ipld.Node, data string) { // We'll marshal with "pretty" linebreaks and indents (and re-format the fixture to the same) for better diffing. prettyprint := json.EncodeOptions{Line: []byte{'\n'}, Indent: []byte{'\t'}} var buf bytes.Buffer - err := dagjson.Marshal(n, json.NewEncoder(&buf, prettyprint), dagjson.MarshalOptions{ + err := dagjson.Marshal(n, json.NewEncoder(&buf, prettyprint), dagjson.EncodeOptions{ EncodeLinks: true, EncodeBytes: true, - SortMapKeys: true, + MapSortMode: codec.MapSortMode_Lexical, }) if err != nil { t.Errorf("marshal failed: %s", err)