From c784502b4373c28a17965807e4ad7709f4fea36a Mon Sep 17 00:00:00 2001 From: Serial <69764315+Serial-ATA@users.noreply.github.com> Date: Sun, 15 Sep 2024 07:24:36 -0400 Subject: [PATCH] EBML: Improve `ElementReader` tree navigation --- lofty/src/ebml/element_reader.rs | 198 ++++++++++++--------- lofty/src/ebml/read.rs | 4 + lofty/src/ebml/read/segment.rs | 2 +- lofty/src/ebml/read/segment_attachments.rs | 7 +- lofty/src/ebml/read/segment_chapters.rs | 2 +- lofty/src/ebml/read/segment_cluster.rs | 2 +- lofty/src/ebml/read/segment_tags.rs | 16 +- lofty/src/ebml/read/segment_tracks.rs | 2 +- 8 files changed, 131 insertions(+), 102 deletions(-) diff --git a/lofty/src/ebml/element_reader.rs b/lofty/src/ebml/element_reader.rs index 4334563c..9b0143d3 100644 --- a/lofty/src/ebml/element_reader.rs +++ b/lofty/src/ebml/element_reader.rs @@ -2,7 +2,7 @@ use crate::ebml::vint::VInt; use crate::error::Result; use crate::macros::{decode_err, try_vec}; -use std::io::Read; +use std::io::{self, Read}; use std::ops::{Deref, DerefMut}; use byteorder::{BigEndian, ReadBytesExt}; @@ -241,15 +241,21 @@ ebml_master_elements! { }, } -#[derive(Debug)] +const MAX_DEPTH: u8 = 16; +const ROOT_DEPTH: u8 = 1; + +#[derive(Copy, Clone, Debug)] +struct Depth { + level: u8, + length: VInt, +} + +#[derive(Copy, Clone, Debug)] struct MasterElementContext { element: MasterElement, - remaining_length: VInt, + depth: Depth, } -const MAX_DEPTH: u8 = 16; -const ROOT_DEPTH: u8 = 1; - #[derive(Debug)] struct ElementReaderContext { depth: u8, @@ -269,8 +275,7 @@ struct ElementReaderContext { /// to know which depth to lock the reader at again (if any). /// /// This will **always** be sorted, so the current lock will be at the end. - lock_depths: Vec, - lock_len: VInt, + lock_depths: Vec, } impl Default for ElementReaderContext { @@ -284,11 +289,41 @@ impl Default for ElementReaderContext { max_size_length: 8, locked: false, lock_depths: Vec::with_capacity(MAX_DEPTH as usize), - lock_len: VInt::ZERO, } } } +impl ElementReaderContext { + fn current_master(&self) -> Option { + if self.depth == 0 { + return None; + } + + self.masters.get((self.depth - 1) as usize).copied() + } + + fn current_master_length(&self) -> VInt { + assert!(self.depth > 0); + self.current_master() + .expect("should have current master element") + .depth + .length + } + + fn propagate_length_change(&mut self, length: u64) { + for master in &mut self.masters { + master.depth.length = master.depth.length.saturating_sub(length); + } + } + + fn remaining_lock_length(&self) -> VInt { + assert!(self.locked && !self.lock_depths.is_empty()); + + let lock_depth = *self.lock_depths.last().unwrap(); + self.masters[lock_depth - 1].depth.length + } +} + #[derive(Debug)] pub(crate) enum ElementReaderYield { Master((ElementIdent, VInt)), @@ -321,30 +356,37 @@ impl ElementReaderYield { /// An EBML element reader. pub struct ElementReader { reader: R, - ctx: ElementReaderContext, + pub(self) ctx: ElementReaderContext, } impl Read for ElementReader where R: Read, { - fn read(&mut self, buf: &mut [u8]) -> std::io::Result { + fn read(&mut self, buf: &mut [u8]) -> io::Result { if self.ctx.locked { - let lock_len = self.ctx.lock_len.value(); + let lock_len = self.ctx.remaining_lock_length().value(); if buf.len() > lock_len as usize { - return Err(std::io::Error::new( - std::io::ErrorKind::UnexpectedEof, + return Err(io::Error::new( + io::ErrorKind::UnexpectedEof, "Cannot read past the end of the current master element", )); } } let ret = self.reader.read(buf)?; - let len = self.current_master_length(); - self.set_current_master_length(len.saturating_sub(ret as u64)); + if self.ctx.current_master().is_none() { + return Ok(ret); + } - if self.ctx.locked { - self.ctx.lock_len = self.ctx.lock_len.saturating_sub(ret as u64); + self.ctx.propagate_length_change(ret as u64); + + let current_master = self + .ctx + .current_master() + .expect("should have current master element"); + if current_master.depth.length == 0 { + self.goto_previous_master()?; } Ok(ret) @@ -370,34 +412,9 @@ where self.ctx.max_size_length = len } - fn current_master(&self) -> Option { - if self.ctx.depth == 0 { - assert!(self.ctx.masters.is_empty()); - return None; - } - - Some(self.ctx.masters[(self.ctx.depth - 1) as usize].element) - } - - fn current_master_length(&self) -> VInt { - if self.ctx.depth == 0 { - assert!(self.ctx.masters.is_empty()); - return VInt::ZERO; - } - - self.ctx.masters[(self.ctx.depth - 1) as usize].remaining_length - } - - fn set_current_master_length(&mut self, length: VInt) { - if self.ctx.depth == 0 { - assert!(self.ctx.masters.is_empty()); - return; - } - - self.ctx.masters[(self.ctx.depth - 1) as usize].remaining_length = length; - } - fn push_new_master(&mut self, master: MasterElement, size: VInt) -> Result<()> { + log::debug!("New master element: {:?}", master.id); + if self.ctx.depth == MAX_DEPTH { decode_err!(@BAIL Ebml, "Maximum depth reached"); } @@ -405,7 +422,7 @@ where // If we are at the root level, we do not increment the depth // since we are not actually inside a master element. // For example, we are moving from \EBML to \Segment. - let at_root_level = self.ctx.depth == ROOT_DEPTH && self.current_master_length() == 0; + let at_root_level = self.ctx.depth == ROOT_DEPTH && self.ctx.current_master_length() == 0; if at_root_level { assert_eq!(self.ctx.masters.len(), 1); self.ctx.masters.clear(); @@ -415,20 +432,47 @@ where self.ctx.masters.push(MasterElementContext { element: master, - remaining_length: size, + depth: Depth { + level: self.ctx.depth, + length: size, + }, }); Ok(()) } + fn goto_previous_master(&mut self) -> io::Result<()> { + let lock_depth = self + .ctx + .lock_depths + .last() + .copied() + .unwrap_or(ROOT_DEPTH as usize); + if lock_depth == self.ctx.depth as usize || self.ctx.depth == 0 { + return Ok(()); + } + + if self.ctx.depth == ROOT_DEPTH { + return Err(io::Error::new( + io::ErrorKind::Other, + "Cannot go to previous master element, already at root", + )); + } + + while self.ctx.current_master_length() == 0 + && (self.ctx.depth as usize != lock_depth && self.ctx.depth != ROOT_DEPTH) + { + self.ctx.depth -= 1; + let _ = self.ctx.masters.pop(); + } + + Ok(()) + } + fn goto_next_master(&mut self) -> Result { self.exhaust_current_master()?; - let header = ElementHeader::read( - &mut self.reader, - self.ctx.max_id_length, - self.ctx.max_size_length, - )?; + let header = ElementHeader::read(self, self.ctx.max_id_length, self.ctx.max_size_length)?; let Some(master) = master_elements().get(&header.id) else { // We encountered an unknown master element return Ok(ElementReaderYield::Unknown(header)); @@ -439,35 +483,23 @@ where Ok(ElementReaderYield::Master((master.id, header.size))) } - fn goto_previous_master(&mut self) -> Result<()> { - if self.ctx.depth == ROOT_DEPTH || self.ctx.lock_depths.last() == Some(&self.ctx.depth) { - decode_err!(@BAIL Ebml, "Cannot go to previous master element, already at root") - } - - self.exhaust_current_master()?; - - self.ctx.depth -= 1; - let _ = self.ctx.masters.pop(); - - Ok(()) - } - pub(crate) fn next(&mut self) -> Result { - let Some(current_master) = self.current_master() else { + let Some(current_master) = self.ctx.current_master() else { return self.goto_next_master(); }; - if self.ctx.locked && self.ctx.lock_len == 0 { + if self.ctx.locked && self.ctx.remaining_lock_length() == 0 { return Ok(ElementReaderYield::Eof); } - if self.current_master_length() == 0 { + if current_master.depth.length == 0 { return self.goto_next_master(); } let header = ElementHeader::read(self, self.ctx.max_id_length, self.ctx.max_size_length)?; let Some((_, child)) = current_master + .element .children .iter() .find(|(id, _)| *id == header.id) @@ -476,12 +508,11 @@ where }; if child.data_type == ElementDataType::Master { - self.push_new_master( - *master_elements() - .get(&header.id) - .expect("Nested master elements should be defined at this level."), - header.size, - )?; + let master = *master_elements() + .get(&header.id) + .expect("Nested master elements should be defined at this level."); + + self.push_new_master(master, header.size)?; // We encountered a nested master element return Ok(ElementReaderYield::Master((child.ident, header.size))); @@ -491,12 +522,11 @@ where } pub(crate) fn exhaust_current_master(&mut self) -> Result<()> { - let master_length = self.current_master_length().value(); - if master_length == 0 { + let Some(current_master) = self.ctx.current_master() else { return Ok(()); - } + }; - self.skip(master_length)?; + self.skip(current_master.depth.length.value())?; Ok(()) } @@ -504,8 +534,7 @@ where log::trace!("New lock at depth: {}", self.ctx.depth); self.ctx.locked = true; - self.ctx.lock_len = self.current_master_length(); - self.ctx.lock_depths.push(self.ctx.depth); + self.ctx.lock_depths.push(self.ctx.depth as usize); } pub(crate) fn unlock(&mut self) { @@ -520,7 +549,6 @@ where }; log::trace!("Moving lock to depth: {}", last); - self.ctx.lock_len = self.ctx.masters[(*last - 1) as usize].remaining_length; } pub(crate) fn children(&mut self) -> ElementChildIterator<'_, R> { @@ -531,12 +559,12 @@ where pub(crate) fn skip(&mut self, length: u64) -> Result<()> { log::trace!("Skipping {} bytes", length); - let current_master_length = self.current_master_length(); + let current_master_length = self.ctx.current_master_length(); if length > current_master_length.value() { decode_err!(@BAIL Ebml, "Cannot skip past the end of the current master element") } - std::io::copy(&mut self.by_ref().take(length), &mut std::io::sink())?; + std::io::copy(&mut self.by_ref().take(length), &mut io::sink())?; Ok(()) } @@ -688,9 +716,9 @@ where .lock_depths .last() .expect("a child iterator should always have a lock depth"); - assert!(lock_depth <= self.reader.ctx.depth); + assert!(lock_depth <= self.reader.ctx.depth as usize); - self.reader.ctx.masters[(lock_depth - 1) as usize].remaining_length == 0 + self.reader.ctx.remaining_lock_length() == 0 } } diff --git a/lofty/src/ebml/read.rs b/lofty/src/ebml/read.rs index a0104632..0a544b72 100644 --- a/lofty/src/ebml/read.rs +++ b/lofty/src/ebml/read.rs @@ -33,6 +33,8 @@ where // First we need to go through the elements in the EBML master element read_ebml_header(&mut element_reader, parse_options, &mut properties)?; + log::debug!("File verified to be EBML"); + loop { let res = element_reader.next()?; match res { @@ -70,6 +72,8 @@ fn read_ebml_header( where R: Read + Seek, { + log::trace!("Reading EBML header"); + match element_reader.next() { Ok(ElementReaderYield::Master((ElementIdent::EBML, _))) => {}, Ok(_) => decode_err!(@BAIL Ebml, "File does not start with an EBML master element"), diff --git a/lofty/src/ebml/read/segment.rs b/lofty/src/ebml/read/segment.rs index 6ee5bbf5..524e1db1 100644 --- a/lofty/src/ebml/read/segment.rs +++ b/lofty/src/ebml/read/segment.rs @@ -79,7 +79,7 @@ where } }, ElementReaderYield::Eof => break, - _ => unreachable!("Unhandled child element in \\Ebml\\Segment: {child:?}"), + _ => unreachable!("Unhandled child element in \\Segment: {child:?}"), } } diff --git a/lofty/src/ebml/read/segment_attachments.rs b/lofty/src/ebml/read/segment_attachments.rs index 5dcc9f2f..81007ea6 100644 --- a/lofty/src/ebml/read/segment_attachments.rs +++ b/lofty/src/ebml/read/segment_attachments.rs @@ -24,7 +24,7 @@ where tag.attached_files.push(attached_file); }, ElementReaderYield::Eof => break, - _ => unreachable!("Unhandled child element in \\Ebml\\Segment\\Attachments: {child:?}"), + _ => unreachable!("Unhandled child element in \\Segment\\Attachments: {child:?}"), } } @@ -50,8 +50,7 @@ where match child { ElementReaderYield::Eof => break, _ => unreachable!( - "Unhandled child element in \\Ebml\\Segment\\Attachments\\AttachedFile: \ - {child:?}" + "Unhandled child element in \\Segment\\Attachments\\AttachedFile: {child:?}" ), } }; @@ -84,7 +83,7 @@ where used_end_time = Some(children_reader.read_unsigned_int(size)?); }, _ => unreachable!( - "Unhandled child element in \\Ebml\\Segment\\Attachments\\AttachedFile: {child:?}" + "Unhandled child element in \\Segment\\Attachments\\AttachedFile: {child:?}" ), } } diff --git a/lofty/src/ebml/read/segment_chapters.rs b/lofty/src/ebml/read/segment_chapters.rs index fb028b1f..965b3ce2 100644 --- a/lofty/src/ebml/read/segment_chapters.rs +++ b/lofty/src/ebml/read/segment_chapters.rs @@ -14,5 +14,5 @@ pub(super) fn read_from( where R: Read + Seek, { - unimplemented!("\\Ebml\\Segment\\Chapters") + unimplemented!("\\Segment\\Chapters") } diff --git a/lofty/src/ebml/read/segment_cluster.rs b/lofty/src/ebml/read/segment_cluster.rs index 2863b3f9..a08d9bdc 100644 --- a/lofty/src/ebml/read/segment_cluster.rs +++ b/lofty/src/ebml/read/segment_cluster.rs @@ -13,5 +13,5 @@ pub(super) fn read_from( where R: Read + Seek, { - unimplemented!("\\Ebml\\Segment\\Cluster") + unimplemented!("\\Segment\\Cluster") } diff --git a/lofty/src/ebml/read/segment_tags.rs b/lofty/src/ebml/read/segment_tags.rs index cefcc13a..01f26a7d 100644 --- a/lofty/src/ebml/read/segment_tags.rs +++ b/lofty/src/ebml/read/segment_tags.rs @@ -20,7 +20,7 @@ where read_tag(&mut children_reader.children(), tag)? }, ElementReaderYield::Eof => break, - _ => unimplemented!("Unhandled child element in \\Ebml\\Segment\\Tags: {child:?}"), + _ => unimplemented!("Unhandled child element in \\Segment\\Tags: {child:?}"), } } @@ -36,7 +36,7 @@ where match child { ElementReaderYield::Eof => break, _ => { - unreachable!("Unhandled child element in \\Ebml\\Segment\\Tags\\Tag: {child:?}") + unreachable!("Unhandled child element in \\Segment\\Tags\\Tag: {child:?}") }, } }; @@ -49,7 +49,7 @@ where let _ = read_simple_tag(&mut children_reader.children())?; }, _ => { - unimplemented!("Unhandled child element in \\Ebml\\Segment\\Tags\\Tag: {master:?}"); + unimplemented!("Unhandled child element in \\Segment\\Tags\\Tag: {master:?}"); }, } } @@ -82,7 +82,7 @@ where match child { ElementReaderYield::Eof => break, _ => unreachable!( - "Unhandled child element in \\Ebml\\Segment\\Tags\\Tag\\Targets: {child:?}" + "Unhandled child element in \\Segment\\Tags\\Tag\\Targets: {child:?}" ), } }; @@ -107,9 +107,7 @@ where attachment_uid.push(children_reader.read_unsigned_int(size.value())?); }, _ => { - unreachable!( - "Unhandled child element in \\Ebml\\Segment\\Tags\\Tag\\Targets: {child:?}" - ) + unreachable!("Unhandled child element in \\Segment\\Tags\\Tag\\Targets: {child:?}") }, } } @@ -147,7 +145,7 @@ where match child { ElementReaderYield::Eof => break, _ => unreachable!( - "Unhandled child element in \\Ebml\\Segment\\Tags\\Tag\\SimpleTag: {child:?}" + "Unhandled child element in \\Segment\\Tags\\Tag\\SimpleTag: {child:?}" ), } }; @@ -199,7 +197,7 @@ where }, _ => { unreachable!( - "Unhandled child element in \\Ebml\\Segment\\Tags\\Tag\\SimpleTag: {child:?}" + "Unhandled child element in \\Segment\\Tags\\Tag\\SimpleTag: {child:?}" ); }, } diff --git a/lofty/src/ebml/read/segment_tracks.rs b/lofty/src/ebml/read/segment_tracks.rs index 016132c3..25da6ba9 100644 --- a/lofty/src/ebml/read/segment_tracks.rs +++ b/lofty/src/ebml/read/segment_tracks.rs @@ -24,7 +24,7 @@ where }, ElementReaderYield::Eof => break, _ => { - unimplemented!("Unhandled child element in \\Ebml\\Segment\\Tracks: {child:?}"); + unimplemented!("Unhandled child element in \\Segment\\Tracks: {child:?}"); }, } }