Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Test v3.14 roundtrip #34

Merged
merged 4 commits into from
Dec 3, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .github/workflows/release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -110,3 +110,6 @@ jobs:
uses: pypa/gh-action-pypi-publish@release/v1
with:
repository-url: https://test.pypi.org/legacy/
# Don't fail when this is another push to the same version. If we put
# the git hash in the version string, this could be removed.
skip-existing: true
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,9 @@ To convert rm files to other formats, you can use [rmc](https://github.com/rickl
### Unreleased

Fixes:

- Fix AssertionError when some ids are missing in a `CrdtSequence` ([#36](https://github.com/ricklupton/rmscene/pull/36))
- Store any unparsed data in blocks as raw bytes to allow for round-trip saving of files written in a newer format than the parsing code knows about.

### v0.6.0

Expand Down
72 changes: 45 additions & 27 deletions src/rmscene/scene_stream.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,11 +62,44 @@ def lookup(cls, block_type: int) -> tp.Optional[tp.Type[Block]]:
return match
return None

@classmethod
def read(self, reader: TaggedBlockReader) -> Optional[Block]:
"""
Maybe parse a block from the reader stream.
"""
with reader.read_block() as block_info:
if block_info is None:
return

block_type = Block.lookup(block_info.block_type)
if block_type:
try:
block = block_type.from_stream(reader)
except Exception as e:
_logger.warning("Error reading block: %s", e)
reader.data.data.seek(block_info.offset)
data = reader.data.read_bytes(block_info.size)
block = UnreadableBlock(str(e), data, block_info)
else:
msg = (
f"Unknown block type {block_info.block_type}. "
f"Skipping {block_info.size} bytes."
)
_logger.warning(msg)
data = reader.data.read_bytes(block_info.size)
block = UnreadableBlock(msg, data, block_info)

# Keep any unparsed extra data
block.extra_data = block_info.extra_data
return block

def write(self, writer: TaggedBlockWriter):
"""Write the block header and content to the stream."""
min_version, current_version = self.version_info(writer)
with writer.write_block(self.get_block_type(), min_version, current_version):
self.to_stream(writer)
# Write any leftover extra data that wasn't parsed
writer.data.write_bytes(self.extra_data)

@classmethod
@abstractmethod
Expand Down Expand Up @@ -421,6 +454,7 @@ def line_to_stream(line: si.Line, writer: TaggedBlockWriter, version: int = 2):
class SceneItemBlock(Block):
parent_id: CrdtId
item: CrdtSequenceItem
extra_value_data: bytes = b""

ITEM_TYPE: tp.ClassVar[int] = 0

Expand Down Expand Up @@ -457,16 +491,16 @@ def from_stream(cls, stream: TaggedBlockReader) -> SceneItemBlock:
item_type = stream.data.read_uint8()
assert item_type == subclass.ITEM_TYPE
value = subclass.value_from_stream(stream)
# Keep known extra data
extra_data = block_info.extra_data
# Keep known extra data from within the value subblock
extra_value_data = block_info.extra_data
else:
value = None
extra_data = b""
extra_value_data = b""

return subclass(
parent_id,
CrdtSequenceItem(item_id, left_id, right_id, deleted_length, value),
extra_data=extra_data,
extra_value_data=extra_value_data,
)

def to_stream(self, writer: TaggedBlockWriter):
Expand All @@ -482,7 +516,7 @@ def to_stream(self, writer: TaggedBlockWriter):
writer.data.write_uint8(self.ITEM_TYPE)
self.value_to_stream(writer, self.item.value)

writer.data.write_bytes(self.extra_data)
writer.data.write_bytes(self.extra_value_data)

@classmethod
@abstractmethod
Expand Down Expand Up @@ -795,28 +829,12 @@ def _read_blocks(stream: TaggedBlockReader) -> Iterator[Block]:
Parse blocks from reMarkable v6 file.
"""
while True:
with stream.read_block() as block_info:
if block_info is None:
# no more blocks
return

block_type = Block.lookup(block_info.block_type)
if block_type:
try:
yield block_type.from_stream(stream)
except Exception as e:
_logger.warning("Error reading block: %s", e)
stream.data.data.seek(block_info.offset)
data = stream.data.read_bytes(block_info.size)
yield UnreadableBlock(str(e), data, block_info)
else:
msg = (
f"Unknown block type {block_info.block_type}. "
f"Skipping {block_info.size} bytes."
)
_logger.warning(msg)
data = stream.data.read_bytes(block_info.size)
yield UnreadableBlock(msg, data, block_info)
maybe_block = Block.read(stream)
if maybe_block:
yield maybe_block
else:
# no more blocks
return


def read_blocks(data: tp.BinaryIO) -> Iterator[Block]:
Expand Down
98 changes: 68 additions & 30 deletions tests/test_scene_stream.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,21 +33,22 @@ def _hex_lines(b, n=32):
]


@pytest.mark.parametrize(
"test_file,version",
[
("Normal_AB.rm", "3.0"),
("Normal_A_stroke_2_layers.rm", "3.0"),
("Normal_A_stroke_2_layers_v3.2.2.rm", "3.2.2"),
("Normal_A_stroke_2_layers_v3.3.2.rm", "3.3.2"),
("Bold_Heading_Bullet_Normal.rm", "3.0"),
("Lines_v2.rm", "3.1"),
("Lines_v2_updated.rm", "3.2"), # extra 7fXXXX part of Line data was added
("Wikipedia_highlighted_p1.rm", "3.1"),
("Wikipedia_highlighted_p2.rm", "3.1"),
("With_SceneInfo_Block.rm", "3.4"), # XXX version?
],
)
TEST_FILES_AND_VERSIONS = [
("Normal_AB.rm", "3.0"),
("Normal_A_stroke_2_layers.rm", "3.0"),
("Normal_A_stroke_2_layers_v3.2.2.rm", "3.2.2"),
("Normal_A_stroke_2_layers_v3.3.2.rm", "3.3.2"),
("Bold_Heading_Bullet_Normal.rm", "3.0"),
("Lines_v2.rm", "3.1"),
("Lines_v2_updated.rm", "3.2"), # extra 7fXXXX part of Line data was added
("Wikipedia_highlighted_p1.rm", "3.1"),
("Wikipedia_highlighted_p2.rm", "3.1"),
("With_SceneInfo_Block.rm", "3.4"), # XXX version?
("Color_and_tool_v3.14.4.rm", "3.14"),
]


@pytest.mark.parametrize("test_file,version", TEST_FILES_AND_VERSIONS)
def test_full_roundtrip(test_file, version):
with open(DATA_PATH / test_file, "rb") as f:
data = f.read()
Expand All @@ -67,6 +68,28 @@ def test_full_roundtrip(test_file, version):
assert _hex_lines(input_buf.getvalue()) == _hex_lines(output_buf.getvalue())


# FIXME: remove xfail when parsing updated

TEST_FILES_FOR_FULL_PARSING = [
pytest.param(
filename,
marks=pytest.mark.xfail if filename == "Color_and_tool_v3.14.4.rm" else [],
)
for filename, _ in TEST_FILES_AND_VERSIONS
]


@pytest.mark.parametrize("test_file", TEST_FILES_FOR_FULL_PARSING)
def test_files_fully_parsed(test_file):
with open(DATA_PATH / test_file, "rb") as f:
result = list(read_blocks(f))

# Check none of the blocks were unreadable and do not have extra data
for block in result:
assert not isinstance(block, UnreadableBlock)
assert not block.extra_data


def test_normal_ab():
with open(DATA_PATH / "Normal_AB.rm", "rb") as f:
result = list(read_blocks(f))
Expand Down Expand Up @@ -234,16 +257,13 @@ def test_blocks_roundtrip(block):
writer = TaggedBlockWriter(buf)
reader = TaggedBlockReader(buf)

# Use 4 as a fallback -- it only matters for the SceneItem blocks
block_type = getattr(block, "BLOCK_TYPE", 4)
with writer.write_block(block_type, 1, 1):
block.to_stream(writer)

block.write(writer)
buf.seek(0)
logger.info("After writing block %s", type(block))
logger.info("Buffer: %s", buf.getvalue().hex())
with reader.read_block():
block2 = block.from_stream(reader)

block2 = Block.read(reader)

assert block2 == block


Expand All @@ -259,7 +279,28 @@ def test_write_blocks():
assert buf.getvalue()[43:].hex() == "05000000000101001f01012101"


def test_blocks_keep_unknown_data():
def test_blocks_keep_unknown_data_in_main_block():
# The "E1 FF" is represents new, unknown data -- note that this might need
# to be changed in future if the next id starts to actually be used in a
# future update!
data_hex = """
21000000 0000010D
1C 06000000
1F 0000
2F 0000
2C 05000000
1F 0000 21 01
3C 05000000
1F 0000 21 01
E1 FF
"""
buf = BytesIO(HEADER_V6 + bytes.fromhex(data_hex))
block = next(read_blocks(buf))
assert isinstance(block, SceneInfo)
assert block.extra_data == bytes.fromhex("E1 FF")


def test_blocks_keep_unknown_data_in_value_subblock():
# The "8f 010f" is represents new, unknown data -- note that this might need
# to be changed in future if the next id starts to actually be used in a
# future update!
Expand All @@ -286,7 +327,7 @@ def test_blocks_keep_unknown_data():
buf = BytesIO(HEADER_V6 + bytes.fromhex(data_hex))
block = next(read_blocks(buf))
assert isinstance(block, SceneLineItemBlock)
assert block.extra_data == bytes.fromhex("8f 0101")
assert block.extra_value_data == bytes.fromhex("8f 0101")


def test_error_in_block_contained():
Expand Down Expand Up @@ -339,6 +380,7 @@ def test_error_in_block_contained():
author_ids_block_strategy = st.builds(
AuthorIdsBlock,
st.dictionaries(st.integers(min_value=0, max_value=65535), st.uuids()),
extra_data=st.binary(),
)

block_strategy = st.one_of(
Expand All @@ -355,15 +397,11 @@ def test_blocks_roundtrip_2(block):
writer = TaggedBlockWriter(buf)
reader = TaggedBlockReader(buf)

# Mock header
with writer.write_block(4, 1, 1):
block.to_stream(writer)

block.write(writer)
buf.seek(0)
logger.info("After writing block %s", type(block))
logger.info("Buffer: %s", buf.getvalue().hex())
with reader.read_block():
block2 = block.from_stream(reader)
block2 = Block.read(reader)
assert block2 == block


Expand Down
Loading