-
Notifications
You must be signed in to change notification settings - Fork 483
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Refactored Serialise/Flat-Via. Fixes #6083 #6144
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
### Changed | ||
|
||
- Renamed decodeViaFlat to decodeViaFlatWith | ||
- Renamed AsSerialize to FlatViaSerialise |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,40 @@ | ||
module Codec.Extras.FlatViaSerialise | ||
( FlatViaSerialise (..) | ||
) where | ||
|
||
import Codec.Serialise (Serialise, deserialiseOrFail, serialise) | ||
import Data.ByteString.Lazy qualified as BSL (toStrict) | ||
import Flat | ||
|
||
{- Note [Flat serialisation for strict and lazy bytestrings] | ||
The `flat` serialisation of a bytestring consists of a sequence of chunks, with each chunk preceded | ||
by a single byte saying how long it is. The end of a serialised bytestring is marked by a | ||
zero-length chunk. In the Plutus Core specification we recommend that all bytestrings should be | ||
serialised in a canonical way as a sequence of zero or more 255-byte chunks followed by an optional | ||
final chunk of length less than 255 followed by a zero-length chunk (ie, a 0x00 byte). We do allow | ||
the decoder to accept non-canonical encodings. The `flat` library always encodes strict Haskell | ||
bytestrings in this way, but lazy bytestrings, which are essentially lists of strict bytestrings, | ||
may be encoded non-canonically since it's more efficient just to emit a short chunk as is. The | ||
Plutus Core `bytestring` type is strict so bytestring values are always encoded canonically. | ||
However, we serialise `Data` objects (and perhaps objects of other types as well) by encoding them | ||
to CBOR and then flat-serialising the resulting bytestring; but the `serialise` method from | ||
`Codec.Serialise` produces lazy bytestrings and if we were to serialise them directly then we could | ||
end up with non-canonical encodings, which would mean that identical `Data` objects might be | ||
serialised into different bytestrings. To avoid this we convert the output of `serialise` into a | ||
strict bytestring before flat-encoding it. This may lead to a small loss of efficiency during | ||
encoding, but this doesn't matter because we only ever do flat serialisation off the chain. We can | ||
convert `Data` objects to bytestrings on the chain using the `serialiseData` builtin, but this | ||
performs CBOR serialisation and the result is always in a canonical form. -} | ||
|
||
-- | For deriving 'Flat' instances via 'Serialize'. | ||
newtype FlatViaSerialise a = FlatViaSerialise { unFlatViaSerialise :: a } | ||
|
||
instance Serialise a => Flat (FlatViaSerialise a) where | ||
-- See Note [Flat serialisation for strict and lazy bytestrings] | ||
bezirg marked this conversation as resolved.
Show resolved
Hide resolved
|
||
encode = encode . BSL.toStrict . serialise . unFlatViaSerialise | ||
decode = do | ||
errOrX <- deserialiseOrFail <$> decode | ||
case errOrX of | ||
Left err -> fail $ show err -- Here we embed a 'Serialise' error into a 'Flat' one. | ||
Right x -> pure $ FlatViaSerialise x | ||
size = size . BSL.toStrict . serialise . unFlatViaSerialise |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,13 +1,12 @@ | ||
{-# LANGUAGE LambdaCase #-} | ||
{-# LANGUAGE OverloadedStrings #-} | ||
|
||
module Codec.CBOR.Extras ( | ||
SerialiseViaFlat (..), | ||
decodeViaFlat, | ||
DeserialiseFailureInfo (..), | ||
DeserialiseFailureReason (..), | ||
readDeserialiseFailureInfo, | ||
) where | ||
module Codec.Extras.SerialiseViaFlat | ||
( SerialiseViaFlat (..) | ||
, decodeViaFlatWith | ||
, DeserialiseFailureInfo (..) | ||
, DeserialiseFailureReason (..) | ||
, readDeserialiseFailureInfo | ||
) where | ||
|
||
import Codec.CBOR.Decoding qualified as CBOR | ||
import Codec.CBOR.Read qualified as CBOR | ||
|
@@ -20,14 +19,14 @@ import Prettyprinter (Pretty (pretty), (<+>)) | |
{- | Newtype to provide 'Serialise' instances for types with a 'Flat' instance | ||
that just encodes the flat-serialized value as a CBOR bytestring | ||
-} | ||
newtype SerialiseViaFlat a = SerialiseViaFlat a | ||
newtype SerialiseViaFlat a = SerialiseViaFlat { unSerialiseViaFlat :: a } | ||
|
||
instance (Flat.Flat a) => Serialise (SerialiseViaFlat a) where | ||
encode (SerialiseViaFlat a) = encode $ Flat.flat a | ||
decode = SerialiseViaFlat <$> decodeViaFlat Flat.decode | ||
encode = encode . Flat.flat . unSerialiseViaFlat | ||
decode = SerialiseViaFlat <$> decodeViaFlatWith Flat.decode | ||
|
||
decodeViaFlat :: Flat.Get a -> CBOR.Decoder s a | ||
decodeViaFlat decoder = do | ||
decodeViaFlatWith :: Flat.Get a -> CBOR.Decoder s a | ||
decodeViaFlatWith decoder = do | ||
bs <- CBOR.decodeBytes | ||
-- lift any flat's failures to be cborg failures (MonadFail) | ||
fromRightM (fail . show) $ Flat.unflatWith decoder bs | ||
|
@@ -45,16 +44,16 @@ readDeserialiseFailureInfo (CBOR.DeserialiseFailure byteOffset reason) = | |
DeserialiseFailureInfo byteOffset $ interpretReason reason | ||
where | ||
-- Note that this is subject to change if `cborg` dependency changes. | ||
-- Currently: cborg-0.2.9.0 | ||
-- Currently: cborg-0.2.10.0 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't think I'd looked at this code before. It'll probably be sheer luck if we notice that the |
||
interpretReason :: String -> DeserialiseFailureReason | ||
interpretReason = \case | ||
-- Relevant Sources: | ||
-- <https://github.com/well-typed/cborg/blob/cborg-0.2.9.0/cborg/src/Codec/CBOR/Read.hs#L226> | ||
-- <https://github.com/well-typed/cborg/blob/cborg-0.2.9.0/cborg/src/Codec/CBOR/Read.hs#L1424> | ||
-- <https://github.com/well-typed/cborg/blob/cborg-0.2.9.0/cborg/src/Codec/CBOR/Read.hs#L1441> | ||
-- <https://github.com/well-typed/cborg/blob/cborg-0.2.10.0/cborg/src/Codec/CBOR/Read.hs#L226> | ||
-- <https://github.com/well-typed/cborg/blob/cborg-0.2.10.0/cborg/src/Codec/CBOR/Read.hs#L1424> | ||
-- <https://github.com/well-typed/cborg/blob/cborg-0.2.10.0/cborg/src/Codec/CBOR/Read.hs#L1441> | ||
"end of input" -> EndOfInput | ||
-- Relevant Sources: | ||
-- <https://github.com/well-typed/cborg/blob/cborg-0.2.9.0/cborg/src/Codec/CBOR/Read.hs#L1051> | ||
-- <https://github.com/well-typed/cborg/blob/cborg-0.2.10.0/cborg/src/Codec/CBOR/Read.hs#L1051> | ||
"expected bytes" -> ExpectedBytes | ||
msg -> OtherReason msg | ||
|
||
|
@@ -80,8 +79,8 @@ data DeserialiseFailureReason | |
EndOfInput | ||
| -- | The bytes inside the input are malformed. | ||
ExpectedBytes | ||
| -- | A failure reason we (plutus) are not aware of, use whatever | ||
-- message that `cborg` returns. | ||
| -- | This is either a cbor failure that we (plutus) are not aware of, | ||
-- or an underlying flat failure. We use whatever message `cborg` or flat returns. | ||
OtherReason String | ||
deriving stock (Eq, Show) | ||
|
||
|
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -12,17 +12,15 @@ | |||||
-- encoding of TPLC] and Note [Stable encoding of UPLC] before touching anything | ||||||
-- in this file. | ||||||
module PlutusCore.Flat | ||||||
( AsSerialize (..) | ||||||
, safeEncodeBits | ||||||
( safeEncodeBits | ||||||
) where | ||||||
|
||||||
import Codec.Extras.FlatViaSerialise | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
import PlutusCore.Core | ||||||
import PlutusCore.Data (Data) | ||||||
import PlutusCore.DeBruijn | ||||||
import PlutusCore.Name.Unique | ||||||
|
||||||
import Codec.Serialise (Serialise, deserialiseOrFail, serialise) | ||||||
import Data.ByteString.Lazy qualified as BSL (toStrict) | ||||||
import Data.Proxy | ||||||
import Flat | ||||||
import Flat.Decoder | ||||||
|
@@ -105,41 +103,6 @@ This phase-1 validation is in place both for normal (locked scripts) and for inl | |||||
so the nodes' behavior does not change. | ||||||
-} | ||||||
|
||||||
{- Note [Flat serialisation for strict and lazy bytestrings] | ||||||
The `flat` serialisation of a bytestring consists of a sequence of chunks, with each chunk preceded | ||||||
by a single byte saying how long it is. The end of a serialised bytestring is marked by a | ||||||
zero-length chunk. In the Plutus Core specification we recommend that all bytestrings should be | ||||||
serialised in a canonical way as a sequence of zero or more 255-byte chunks followed by an optional | ||||||
final chunk of length less than 255 followed by a zero-length chunk (ie, a 0x00 byte). We do allow | ||||||
the decoder to accept non-canonical encodings. The `flat` library always encodes strict Haskell | ||||||
bytestrings in this way, but lazy bytestrings, which are essentially lists of strict bytestrings, | ||||||
may be encoded non-canonically since it's more efficient just to emit a short chunk as is. The | ||||||
Plutus Core `bytestring` type is strict so bytestring values are always encoded canonically. | ||||||
However, we serialise `Data` objects (and perhaps objects of other types as well) by encoding them | ||||||
to CBOR and then flat-serialising the resulting bytestring; but the `serialise` method from | ||||||
`Codec.Serialise` produces lazy bytestrings and if we were to serialise them directly then we could | ||||||
end up with non-canonical encodings, which would mean that identical `Data` objects might be | ||||||
serialised into different bytestrings. To avoid this we convert the output of `serialise` into a | ||||||
strict bytestring before flat-encoding it. This may lead to a small loss of efficiency during | ||||||
encoding, but this doesn't matter because we only ever do flat serialisation off the chain. We can | ||||||
convert `Data` objects to bytestrings on the chain using the `serialiseData` builtin, but this | ||||||
performs CBOR serialisation and the result is always in a canonical form. -} | ||||||
|
||||||
-- | For deriving 'Flat' instances via 'Serialize'. | ||||||
newtype AsSerialize a = AsSerialize | ||||||
{ unAsSerialize :: a | ||||||
} deriving newtype (Serialise) | ||||||
|
||||||
instance Serialise a => Flat (AsSerialize a) where | ||||||
-- See Note [Flat serialisation for strict and lazy bytestrings] | ||||||
encode = encode . BSL.toStrict . serialise | ||||||
decode = do | ||||||
errOrX <- deserialiseOrFail <$> decode | ||||||
case errOrX of | ||||||
Left err -> fail $ show err -- Here we embed a 'Serialise' error into a 'Flat' one. | ||||||
Right x -> pure x | ||||||
size = size . BSL.toStrict . serialise | ||||||
|
||||||
safeEncodeBits :: NumBits -> Word8 -> Encoding | ||||||
safeEncodeBits maxBits v = | ||||||
if 2 ^ maxBits <= v | ||||||
|
@@ -156,7 +119,7 @@ encodeConstant = safeEncodeBits constantWidth | |||||
decodeConstant :: Get Word8 | ||||||
decodeConstant = dBEBits8 constantWidth | ||||||
|
||||||
deriving via AsSerialize Data instance Flat Data | ||||||
deriving via FlatViaSerialise Data instance Flat Data | ||||||
|
||||||
decodeKindedUniFlat :: Closed uni => Get (SomeTypeIn (Kinded uni)) | ||||||
decodeKindedUniFlat = | ||||||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -23,6 +23,7 @@ import Data.Vector qualified as V | |
import Flat | ||
import Flat.Decoder | ||
import Flat.Encoder | ||
import Flat.Encoder.Strict (sizeListWith) | ||
import Universe | ||
|
||
{- | ||
|
@@ -91,17 +92,6 @@ encoding of bytestrings is a sequence of 255-byte chunks. This is okay, since us | |
be broken up by the chunk metadata. | ||
-} | ||
|
||
-- TODO: This is present upstream in newer versions of flat, remove once we get there. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 👍🏼 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Well spotted! |
||
-- | Compute the size needed for a list using the given size function for the elements. | ||
-- Goes with 'encodeListWith'. | ||
sizeListWith :: (a -> NumBits -> NumBits) -> [a] -> NumBits -> NumBits | ||
sizeListWith sizer = go | ||
where | ||
-- Single bit to say stop | ||
go [] sz = sz + 1 | ||
-- Size for the rest plus size for the element, plus one for a tag to say keep going | ||
go (x:xs) sz = go xs $ sizer x $ sz + 1 | ||
|
||
-- | Using 4 bits to encode term tags. | ||
termTagWidth :: NumBits | ||
termTagWidth = 4 | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.