Skip to content

bugfix: fixed incorrect bytestring encoding PlutusData #269

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
Oct 13, 2023
17 changes: 10 additions & 7 deletions pycardano/plutus.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
CBORSerializable,
DictCBORSerializable,
IndefiniteList,
MetadataIndefiniteList,
Primitive,
RawCBOR,
default_encoder,
Expand Down Expand Up @@ -489,7 +490,7 @@ def CONSTR_ID(cls):
return getattr(cls, k)

def __post_init__(self):
valid_types = (PlutusData, dict, IndefiniteList, int, bytes)
valid_types = (PlutusData, dict, MetadataIndefiniteList, int, bytes)
for f in fields(self):
if inspect.isclass(f.type) and not issubclass(f.type, valid_types):
raise TypeError(
Expand All @@ -499,7 +500,7 @@ def __post_init__(self):
def to_shallow_primitive(self) -> CBORTag:
primitives: Primitive = super().to_shallow_primitive()
if primitives:
primitives = IndefiniteList(primitives)
primitives = MetadataIndefiniteList(primitives)
tag = get_tag(self.CONSTR_ID)
if tag:
return CBORTag(tag, primitives)
Expand Down Expand Up @@ -553,7 +554,7 @@ def _dfs(obj):
return {"int": obj}
elif isinstance(obj, bytes):
return {"bytes": obj.hex()}
elif isinstance(obj, IndefiniteList) or isinstance(obj, list):
elif isinstance(obj, MetadataIndefiniteList) or isinstance(obj, list):
return {"list": [_dfs(item) for item in obj]}
elif isinstance(obj, dict):
return {"map": [{"v": _dfs(v), "k": _dfs(k)} for k, v in obj.items()]}
Expand Down Expand Up @@ -669,7 +670,7 @@ def _dfs(obj):
elif "bytes" in obj:
return bytes.fromhex(obj["bytes"])
elif "list" in obj:
return IndefiniteList([_dfs(item) for item in obj["list"]])
return MetadataIndefiniteList([_dfs(item) for item in obj["list"]])
else:
raise DeserializeException(f"Unexpected data structure: {obj}")
else:
Expand Down Expand Up @@ -701,12 +702,12 @@ class RawPlutusData(CBORSerializable):
def to_primitive(self) -> CBORTag:
def _dfs(obj):
if isinstance(obj, list) and obj:
return IndefiniteList([_dfs(item) for item in obj])
return MetadataIndefiniteList([_dfs(item) for item in obj])
elif isinstance(obj, dict):
return {_dfs(k): _dfs(v) for k, v in obj.items()}
elif isinstance(obj, CBORTag) and isinstance(obj.value, list) and obj.value:
if obj.tag != 102:
value = IndefiniteList([_dfs(item) for item in obj.value])
value = MetadataIndefiniteList([_dfs(item) for item in obj.value])
else:
value = [_dfs(item) for item in obj.value]
return CBORTag(tag=obj.tag, value=value)
Expand All @@ -723,7 +724,9 @@ def __deepcopy__(self, memo):
return self.__class__.from_cbor(self.to_cbor_hex())


Datum = Union[PlutusData, dict, int, bytes, IndefiniteList, RawCBOR, RawPlutusData]
Datum = Union[
PlutusData, dict, int, bytes, MetadataIndefiniteList, RawCBOR, RawPlutusData
]
"""Plutus Datum type. A Union type that contains all valid datum types."""


Expand Down
32 changes: 30 additions & 2 deletions pycardano/serialization.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,13 @@ def __init__(self, li: Primitive): # type: ignore
super().__init__(li) # type: ignore


class MetadataIndefiniteList(UserList):
"""Dummy class to catch special requirements for PlutusData encoding."""

def __init__(self, li: Primitive): # type: ignore
super().__init__(li) # type: ignore


class IndefiniteFrozenList(FrozenList, IndefiniteList): # type: ignore
pass

Expand Down Expand Up @@ -166,17 +173,31 @@ def default_encoder(
FrozenList,
IndefiniteFrozenList,
frozendict,
MetadataIndefiniteList,
),
), (
f"Type of input value is not CBORSerializable, " f"got {type(value)} instead."
)
if isinstance(value, (IndefiniteList, IndefiniteFrozenList)):
if isinstance(
value, (IndefiniteList, IndefiniteFrozenList, MetadataIndefiniteList)
):
# Currently, cbor2 doesn't support indefinite list, therefore we need special
# handling here to explicitly write header (b'\x9f'), each body item, and footer (b'\xff') to
# the output bytestring.
encoder.write(b"\x9f")
for item in value:
encoder.encode(item)
if (
isinstance(value, MetadataIndefiniteList)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't see why the encoding as MetadatIndefiniteList is necessary, can we not simply encode all bytes longer than 64 bytes as a list?

Copy link
Contributor Author

@theeldermillenial theeldermillenial Sep 21, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's a good question. I'm still new to Cardano. I guess these were my thoughts and how I would push back on encoding all bytes the same way.

  1. If bytes length only has a restriction for metadata, will encoding them the same way in other parts of the message cause an issue?
  2. Why impose the same criteria on other parts of the message if it's not strictly required?
  3. Won't chunking bytes data cause nominally larger message lengths, and by extension, marginally higher tx fees?

If you feel these are non-issues, I think it's easy enough to remove the dummy class and encode everything the same way.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think in retrospect your comments on the issue I raised make more sense now.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

IIRC the cardano ledger generally specifies that the cbor encoding of bytestrings should be at most 64 bytes long each piece. This would prevent OOM attacks when reading very long bytestrings. However the ledger does not enforce this, leading to different implementations being abound on chain.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

But isn't OOM attack prevented my maximum transaction size anyway? Again, just playing devils advocate here. I'm still relatively new to Cardano.

I can revert changes back to the original without the dummy class for PlutusData. Just give me a yay or nay. With these changes I was able to successfully submit to smart contracts, so I know these changes work correctly.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just wanted to bump this so I can finish it off :)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would prefer this without the dummy class - maybe you can revert to that and see if both the test cases and your submission pass?

then Jerry or I can create a test case for exactly this datum submission

and isinstance(item, bytes)
and len(item) > 64
):
encoder.write(b"\x5f")
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is only activated when an item is inside a indefinite list. Do we need to break byte strings that are not part of indefinite list?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

AFAIK we need to break all bytes that are longer than 64 bytes

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I may have misunderstood, but it seemed to me that this was the best place to put it since all PlutusData are cast to IndefiniteList.

If I pull it out of the IndefiniteList block, will it be handled properly? I guess it should.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I guess you (correctly) noticed that all PlutusData fields are part of an indefinite list. However plutusdata can also contain bytes without being part of PlutusData (i.e. pure bytes or bytes that are keys in dictionaries)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So is the final answer to pull it outside of the IndefiniteList block?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In this documentation it seems that yes, we need dummy classes. But not for lists, for bytes! :)

I am also wondering if there are cases where integers are incorrectly encoded (when they exceed 64 bytes size) since I implemented a special case for this here: https://github.com/OpShin/uplc/blob/448f634cc1225de6dd7390b670b01396d2e71156/uplc/ast.py#L430

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I guess I am seeing more and more the intuition behind all the custom classes in OpShin.

I realize it's a bigger lift, but is there any reason why we wouldn't just take OpShin's implementation and pull it over to here? Then, just rely on pycardano rather than duplicating efforts across repos?

I apologize if I'm speaking out of ignorance and there are things I'm not considering, but this seems like it might be the more lasting implementation.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No worries at all. The code I wrote for OpShin/UPLC was created after pycardano was written, hence there might be a point in copying it over. Then again, the UPLC implementation is really only catered towards PlutusData, while PyCardano also handles serialization of all other kinds of things - not sure if anything will break.

Long story short: The only reason that there are two different implementations is that no one yet tried to unify them.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Okay, I would like to have this done sooner rather than later. Can I just create a dummy class for bytes to patch this and open a more general issue about syncing datum handling between OpShin and pycardano?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes sounds good to me! Would also prefer to get this resolved over any big open stale PR :)

for i in range(0, len(item), 64):
imax = min(i + 64, len(item))
encoder.encode(item[i:imax])
encoder.write(b"\xff")
else:
encoder.encode(item)
encoder.write(b"\xff")
elif isinstance(value, RawCBOR):
encoder.write(value.cbor)
Expand Down Expand Up @@ -511,6 +532,13 @@ def _restore_typed_primitive(
return IndefiniteList(v)
except TypeError:
raise DeserializeException(f"Can not initialize IndefiniteList from {v}")
elif isclass(t) and issubclass(t, MetadataIndefiniteList):
try:
return MetadataIndefiniteList(v)
except TypeError:
raise DeserializeException(
f"Can not initialize MetadataIndefiniteList from {v}"
)
elif hasattr(t, "__origin__") and (t.__origin__ is dict):
t_args = t.__args__
if len(t_args) != 2:
Expand Down
42 changes: 34 additions & 8 deletions test/pycardano/test_plutus.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
RedeemerTag,
plutus_script_hash,
)
from pycardano.serialization import IndefiniteList
from pycardano.serialization import IndefiniteList, MetadataIndefiniteList


@dataclass
Expand All @@ -28,7 +28,7 @@ class MyTest(PlutusData):

a: int
b: bytes
c: IndefiniteList
c: MetadataIndefiniteList
d: dict


Expand Down Expand Up @@ -76,7 +76,9 @@ def test_plutus_data():
"""Ground truth of this test is generated by test/resources/haskell/PlutusData. See its README for more details."""
key_hash = bytes.fromhex("c2ff616e11299d9094ce0a7eb5b7284b705147a822f4ffbd471f971a")
deadline = 1643235300000
testa = BigTest(MyTest(123, b"1234", IndefiniteList([4, 5, 6]), {1: b"1", 2: b"2"}))
testa = BigTest(
MyTest(123, b"1234", MetadataIndefiniteList([4, 5, 6]), {1: b"1", 2: b"2"})
)
testb = LargestTest()

my_vesting = VestingParam(
Expand All @@ -93,7 +95,9 @@ def test_plutus_data():
def test_plutus_data_json():
key_hash = bytes.fromhex("c2ff616e11299d9094ce0a7eb5b7284b705147a822f4ffbd471f971a")
deadline = 1643235300000
testa = BigTest(MyTest(123, b"1234", IndefiniteList([4, 5, 6]), {1: b"1", 2: b"2"}))
testa = BigTest(
MyTest(123, b"1234", MetadataIndefiniteList([4, 5, 6]), {1: b"1", 2: b"2"})
)
testb = LargestTest()

my_vesting = VestingParam(
Expand Down Expand Up @@ -159,7 +163,7 @@ def test_plutus_data_cbor_dict():


def test_plutus_data_to_json_wrong_type():
test = MyTest(123, b"1234", IndefiniteList([4, 5, 6]), {1: b"1", 2: b"2"})
test = MyTest(123, b"1234", MetadataIndefiniteList([4, 5, 6]), {1: b"1", 2: b"2"})
test.a = "123"
with pytest.raises(TypeError):
test.to_json()
Expand Down Expand Up @@ -223,7 +227,7 @@ def test_execution_units_bool():


def test_redeemer():
data = MyTest(123, b"234", IndefiniteList([4, 5, 6]), {1: b"1", 2: b"2"})
data = MyTest(123, b"234", MetadataIndefiniteList([4, 5, 6]), {1: b"1", 2: b"2"})
redeemer = MyRedeemer(data, ExecutionUnits(1000000, 1000000))
redeemer.tag = RedeemerTag.SPEND
assert (
Expand All @@ -234,7 +238,7 @@ def test_redeemer():


def test_redeemer_empty_datum():
data = MyTest(123, b"234", IndefiniteList([]), {1: b"1", 2: b"2"})
data = MyTest(123, b"234", MetadataIndefiniteList([]), {1: b"1", 2: b"2"})
redeemer = MyRedeemer(data, ExecutionUnits(1000000, 1000000))
redeemer.tag = RedeemerTag.SPEND
assert (
Expand Down Expand Up @@ -306,7 +310,7 @@ def test_clone_raw_plutus_data():
def test_clone_plutus_data():
key_hash = bytes.fromhex("c2ff616e11299d9094ce0a7eb5b7284b705147a822f4ffbd471f971a")
deadline = 1643235300000
testa = BigTest(MyTest(123, b"1234", IndefiniteList([4, 5, 6]), {1: b"1", 2: b"2"}))
testa = BigTest(MyTest(123, b"1234", MetadataIndefiniteList([4, 5, 6]), {1: b"1", 2: b"2"}))
testb = LargestTest()
my_vesting = VestingParam(
beneficiary=key_hash, deadline=deadline, testa=testa, testb=testb
Expand Down Expand Up @@ -396,3 +400,25 @@ class A(PlutusData):
assert (
res == res2
), "Same class has different default constructor id in two consecutive runs"


def test_plutus_data_long_bytes():
@dataclass
class A(PlutusData):
a: bytes

quote = (
"The line separating good and evil passes ... right through every human heart."
)

quote_hex = (
"d866821a51e835649f5f5840546865206c696e652073657061726174696e6720676f6f6420616e"
+ "64206576696c20706173736573202e2e2e207269676874207468726f7567682065766572794d"
+ "2068756d616e2068656172742effff"
)

A_tmp = A(quote.encode())

assert (
A_tmp.to_cbor_hex() == quote_hex
), "Long metadata bytestring is encoded incorrectly."