From 19ededc9eb5b84985d11c0403dbfba29442f0cea Mon Sep 17 00:00:00 2001 From: Wonwoo Choi Date: Fri, 20 Sep 2024 00:30:37 +0900 Subject: [PATCH 1/7] Bufferless container parser --- jxl/src/container/box_header.rs | 6 +- jxl/src/container/mod.rs | 274 +++++++++++++++++++++----------- jxl/src/headers/frame_header.rs | 5 +- jxl/src/util.rs | 2 + jxl_cli/src/main.rs | 30 +++- 5 files changed, 212 insertions(+), 105 deletions(-) diff --git a/jxl/src/container/box_header.rs b/jxl/src/container/box_header.rs index f64a219..1c9237d 100644 --- a/jxl/src/container/box_header.rs +++ b/jxl/src/container/box_header.rs @@ -5,7 +5,7 @@ // // Originally written for jxl-oxide. -use crate::{error::Error, util::ConcatSlice}; +use crate::error::Error; /// Box header used in JPEG XL containers. #[derive(Debug, Clone)] @@ -24,9 +24,7 @@ pub enum HeaderParseResult { } impl ContainerBoxHeader { - pub(super) fn parse(reader: &ConcatSlice<'_, '_>) -> Result { - let mut buf = [0u8; 16]; - let buf = reader.peek(&mut buf); + pub(super) fn parse(buf: &[u8]) -> Result { let (tbox, box_size, header_size) = match *buf { [0, 0, 0, 1, t0, t1, t2, t3, s0, s1, s2, s3, s4, s5, s6, s7, ..] => { let xlbox = u64::from_be_bytes([s0, s1, s2, s3, s4, s5, s6, s7]); diff --git a/jxl/src/container/mod.rs b/jxl/src/container/mod.rs index 862746b..6ced0e9 100644 --- a/jxl/src/container/mod.rs +++ b/jxl/src/container/mod.rs @@ -9,25 +9,14 @@ pub mod box_header; use box_header::*; -use crate::{error::Error, util::ConcatSlice}; +use crate::error::{Error, Result}; /// Container format parser. -#[derive(Default)] +#[derive(Debug, Default)] pub struct ContainerParser { state: DetectState, - buf: Vec, - codestream: Vec, - aux_boxes: Vec<(ContainerBoxType, Vec)>, jxlp_index_state: JxlpIndexState, -} - -impl std::fmt::Debug for ContainerParser { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.debug_struct("ContainerParser") - .field("state", &self.state) - .field("jxlp_index_state", &self.jxlp_index_state) - .finish_non_exhaustive() - } + previous_consumed_bytes: usize, } #[derive(Debug, Default)] @@ -37,8 +26,8 @@ enum DetectState { WaitingBoxHeader, WaitingJxlpIndex(ContainerBoxHeader), InAuxBox { + #[allow(unused)] header: ContainerBoxHeader, - data: Vec, bytes_left: Option, }, InCodestream { @@ -70,43 +59,53 @@ enum JxlpIndexState { JxlpFinished, } -impl ContainerParser { +/// Iterator that reads over a buffer and emits parser events. +pub struct ParseEvents<'inner, 'buf> { + inner: &'inner mut ContainerParser, + remaining_input: &'buf [u8], + finished: bool, +} + +impl<'inner, 'buf> ParseEvents<'inner, 'buf> { const CODESTREAM_SIG: [u8; 2] = [0xff, 0x0a]; const CONTAINER_SIG: [u8; 12] = [0, 0, 0, 0xc, b'J', b'X', b'L', b' ', 0xd, 0xa, 0x87, 0xa]; - pub fn new() -> Self { - Self::default() - } - - pub fn kind(&self) -> BitstreamKind { - match self.state { - DetectState::WaitingSignature => BitstreamKind::Unknown, - DetectState::WaitingBoxHeader - | DetectState::WaitingJxlpIndex(..) - | DetectState::InAuxBox { .. } => BitstreamKind::Container, - DetectState::InCodestream { kind, .. } | DetectState::Done(kind) => kind, + fn new(parser: &'inner mut ContainerParser, input: &'buf [u8]) -> Self { + parser.previous_consumed_bytes = 0; + Self { + inner: parser, + remaining_input: input, + finished: false, } } - pub fn feed_bytes(&mut self, input: &[u8]) -> Result<(), Error> { - let state = &mut self.state; - let mut reader = ConcatSlice::new(&self.buf, input); + fn emit_single(&mut self) -> Result>> { + let state = &mut self.inner.state; + let jxlp_index_state = &mut self.inner.jxlp_index_state; + let buf = &mut self.remaining_input; loop { + if buf.is_empty() { + self.finished = true; + return Ok(None); + } + match state { DetectState::WaitingSignature => { - let mut signature_buf = [0u8; 12]; - let buf = reader.peek(&mut signature_buf); if buf.starts_with(&Self::CODESTREAM_SIG) { tracing::trace!("Codestream signature found"); *state = DetectState::InCodestream { kind: BitstreamKind::BareCodestream, bytes_left: None, }; + return Ok(Some(ParseEvent::BitstreamKind( + BitstreamKind::BareCodestream, + ))); } else if buf.starts_with(&Self::CONTAINER_SIG) { tracing::trace!("Container signature found"); *state = DetectState::WaitingBoxHeader; - reader.advance(Self::CONTAINER_SIG.len()); + *buf = &buf[Self::CONTAINER_SIG.len()..]; + return Ok(Some(ParseEvent::BitstreamKind(BitstreamKind::Container))); } else if !Self::CODESTREAM_SIG.starts_with(buf) && !Self::CONTAINER_SIG.starts_with(buf) { @@ -115,21 +114,22 @@ impl ContainerParser { kind: BitstreamKind::Invalid, bytes_left: None, }; + return Ok(Some(ParseEvent::BitstreamKind(BitstreamKind::Invalid))); } else { - break; + return Ok(None); } } - DetectState::WaitingBoxHeader => match ContainerBoxHeader::parse(&reader)? { + DetectState::WaitingBoxHeader => match ContainerBoxHeader::parse(buf)? { HeaderParseResult::Done { header, header_size, } => { - reader.advance(header_size); + *buf = &buf[header_size..]; let tbox = header.box_type(); if tbox == ContainerBoxType::CODESTREAM { - match self.jxlp_index_state { + match jxlp_index_state { JxlpIndexState::Initial => { - self.jxlp_index_state = JxlpIndexState::SingleJxlc; + *jxlp_index_state = JxlpIndexState::SingleJxlc; } JxlpIndexState::SingleJxlc => { tracing::debug!("Duplicate jxlc box found"); @@ -152,9 +152,9 @@ impl ContainerParser { } } - match &mut self.jxlp_index_state { + match jxlp_index_state { JxlpIndexState::Initial => { - self.jxlp_index_state = JxlpIndexState::Jxlp(0); + *jxlp_index_state = JxlpIndexState::Jxlp(0); } JxlpIndexState::Jxlp(index) => { *index += 1; @@ -172,31 +172,25 @@ impl ContainerParser { *state = DetectState::WaitingJxlpIndex(header); } else { let bytes_left = header.box_size().map(|x| x as usize); - *state = DetectState::InAuxBox { - header, - data: Vec::new(), - bytes_left, - }; + *state = DetectState::InAuxBox { header, bytes_left }; } } - HeaderParseResult::NeedMoreData => break, + HeaderParseResult::NeedMoreData => return Ok(None), }, DetectState::WaitingJxlpIndex(header) => { - let mut buf = [0u8; 4]; - reader.peek(&mut buf); - if buf.len() < 4 { - break; - } + let &[b0, b1, b2, b3, ..] = &**buf else { + return Ok(None); + }; - let index = u32::from_be_bytes(buf); - reader.advance(4); + let index = u32::from_be_bytes([b0, b1, b2, b3]); + *buf = &buf[4..]; let is_last = index & 0x80000000 != 0; let index = index & 0x7fffffff; - match self.jxlp_index_state { + match *jxlp_index_state { JxlpIndexState::Jxlp(expected_index) if expected_index == index => { if is_last { - self.jxlp_index_state = JxlpIndexState::JxlpFinished; + *jxlp_index_state = JxlpIndexState::JxlpFinished; } } JxlpIndexState::Jxlp(expected_index) => { @@ -221,69 +215,167 @@ impl ContainerParser { DetectState::InCodestream { bytes_left: None, .. } => { - reader.fill_vec(None, &mut self.codestream)?; - break; + let payload = *buf; + *buf = &[]; + return Ok(Some(ParseEvent::Codestream(payload))); } DetectState::InCodestream { bytes_left: Some(bytes_left), .. } => { - let bytes_written = reader.fill_vec(Some(*bytes_left), &mut self.codestream)?; - *bytes_left -= bytes_written; - if *bytes_left == 0 { + let payload = if buf.len() >= *bytes_left { + let (payload, remaining) = buf.split_at(*bytes_left); *state = DetectState::WaitingBoxHeader; + *buf = remaining; + payload } else { - break; - } + let payload = *buf; + *bytes_left -= buf.len(); + *buf = &[]; + payload + }; + return Ok(Some(ParseEvent::Codestream(payload))); } DetectState::InAuxBox { - data, + header: _, bytes_left: None, - .. } => { - reader.fill_vec(None, data)?; - break; + let _payload = *buf; + *buf = &[]; + // FIXME: emit auxiliary box event } DetectState::InAuxBox { - header, - data, + header: _, bytes_left: Some(bytes_left), } => { - let bytes_written = reader.fill_vec(Some(*bytes_left), data)?; - *bytes_left -= bytes_written; - if *bytes_left == 0 { - self.aux_boxes - .push((header.box_type(), std::mem::take(data))); + let _payload = if buf.len() >= *bytes_left { + let (payload, remaining) = buf.split_at(*bytes_left); *state = DetectState::WaitingBoxHeader; + *buf = remaining; + payload } else { - break; - } + let payload = *buf; + *bytes_left -= buf.len(); + *buf = &[]; + payload + }; + // FIXME: emit auxiliary box event } - DetectState::Done(_) => break, + DetectState::Done(_) => return Ok(None), } } + } +} - let (buf_slice, input_slice) = reader.remaining_slices(); - if buf_slice.is_empty() { - self.buf.clear(); - } else { - let remaining_buf_from = self.buf.len() - buf_slice.len(); - self.buf.drain(..remaining_buf_from); +impl std::fmt::Debug for ParseEvents<'_, '_> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("ParseEvents") + .field("inner", &self.inner) + .field( + "remaining_input", + &format_args!("({} byte(s))", self.remaining_input.len()), + ) + .field("finished", &self.finished) + .finish() + } +} + +impl<'inner, 'buf> Iterator for ParseEvents<'inner, 'buf> { + type Item = Result>; + + fn next(&mut self) -> Option { + if self.finished { + return None; + } + + let initial_buf = self.remaining_input; + let event = self.emit_single(); + + if event.is_err() { + self.finished = true; + } + + self.inner.previous_consumed_bytes += initial_buf.len() - self.remaining_input.len(); + event.transpose() + } +} + +/// Parser event emitted by [`ParseEvents`]. +pub enum ParseEvent<'buf> { + /// Bitstream structure is detected. + BitstreamKind(BitstreamKind), + /// Codestream data is read. + Codestream(&'buf [u8]), +} + +impl std::fmt::Debug for ParseEvent<'_> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::BitstreamKind(kind) => f.debug_tuple("BitstreamKind").field(kind).finish(), + Self::Codestream(buf) => f + .debug_tuple("Codestream") + .field(&format_args!("{} byte(s)", buf.len())) + .finish(), } - self.buf.try_reserve(input_slice.len())?; - self.buf.extend_from_slice(input_slice); - Ok(()) } +} - pub fn take_bytes(&mut self) -> Vec { - std::mem::take(&mut self.codestream) +impl ContainerParser { + pub fn new() -> Self { + Self::default() } - pub fn finish(&mut self) { - if let DetectState::InAuxBox { header, data, .. } = &mut self.state { - self.aux_boxes - .push((header.box_type(), std::mem::take(data))); + pub fn kind(&self) -> BitstreamKind { + match self.state { + DetectState::WaitingSignature => BitstreamKind::Unknown, + DetectState::WaitingBoxHeader + | DetectState::WaitingJxlpIndex(..) + | DetectState::InAuxBox { .. } => BitstreamKind::Container, + DetectState::InCodestream { kind, .. } | DetectState::Done(kind) => kind, } + } + + /// Parses input buffer and generates parser events. + /// + /// The parser might not fully consume the buffer. Use [`previous_consumed_bytes`] to get how + /// many bytes are consumed. Bytes not consumed by the parser should be processed again. + /// + /// [`previous_consumed_bytes`]: ContainerDetectingReader::previous_consumed_bytes + pub fn process_bytes<'inner, 'buf>( + &'inner mut self, + input: &'buf [u8], + ) -> ParseEvents<'inner, 'buf> { + ParseEvents::new(self, input) + } + + /// Get how much bytes are consumed by the previous call of [`process_bytes`]. + /// + /// Bytes not consumed by the parser should be fed into the parser again. + /// + /// [`process_bytes`]: ContainerDetectingReader::process_bytes + pub fn previous_consumed_bytes(&self) -> usize { + self.previous_consumed_bytes + } + + pub fn finish(&mut self) { + // FIXME: validate state self.state = DetectState::Done(self.kind()); } } + +#[cfg(test)] +impl ContainerParser { + pub(crate) fn collect_codestream(input: &[u8]) -> Result> { + let mut parser = Self::new(); + let mut codestream = Vec::new(); + for event in parser.process_bytes(input) { + match event? { + ParseEvent::BitstreamKind(_) => {} + ParseEvent::Codestream(buf) => { + codestream.extend_from_slice(buf); + } + } + } + Ok(codestream) + } +} diff --git a/jxl/src/headers/frame_header.rs b/jxl/src/headers/frame_header.rs index d70d124..1e39504 100644 --- a/jxl/src/headers/frame_header.rs +++ b/jxl/src/headers/frame_header.rs @@ -416,10 +416,7 @@ mod test_frame_header { }; fn test_frame_header(image: &[u8], correct_frame_header: FrameHeader) { - let mut parser = ContainerParser::new(); - parser.feed_bytes(image).unwrap(); - let codestream = parser.take_bytes(); - + let codestream = ContainerParser::collect_codestream(image).unwrap(); let mut br = BitReader::new(&codestream); let fh = FileHeaders::read(&mut br).unwrap(); diff --git a/jxl/src/util.rs b/jxl/src/util.rs index bd3d5ee..201d0dd 100644 --- a/jxl/src/util.rs +++ b/jxl/src/util.rs @@ -3,8 +3,10 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. +#[allow(unused)] mod concat_slice; mod log2; +#[allow(unused)] pub use concat_slice::*; pub use log2::*; diff --git a/jxl_cli/src/main.rs b/jxl_cli/src/main.rs index b43b211..cd61edf 100644 --- a/jxl_cli/src/main.rs +++ b/jxl_cli/src/main.rs @@ -4,7 +4,7 @@ // license that can be found in the LICENSE file. use jxl::bit_reader::BitReader; -use jxl::container::ContainerParser; +use jxl::container::{ContainerParser, ParseEvent}; use jxl::headers::{ encodings::UnconditionalCoder, frame_header::{FrameHeader, FrameHeaderNonserialized}, @@ -58,19 +58,37 @@ fn main() { let mut parser = ContainerParser::new(); let mut buf = vec![0u8; 4096]; + let mut buf_valid = 0usize; + let mut codestream = Vec::new(); loop { - let count = file.read(&mut buf).expect("cannot read data from file"); + let count = file + .read(&mut buf[buf_valid..]) + .expect("cannot read data from file"); if count == 0 { break; } + buf_valid += count; - if let Err(err) = parser.feed_bytes(&buf[..count]) { - println!("Error parsing JXL codestream: {err}"); - return; + for event in parser.process_bytes(&buf[..buf_valid]) { + match event { + Ok(ParseEvent::BitstreamKind(kind)) => { + println!("Bitstream kind: {kind:?}"); + } + Ok(ParseEvent::Codestream(buf)) => { + codestream.extend_from_slice(buf); + } + Err(err) => { + println!("Error parsing JXL codestream: {err}"); + return; + } + } } + + let consumed = parser.previous_consumed_bytes(); + buf.copy_within(consumed..buf_valid, 0); + buf_valid -= consumed; } - let codestream = parser.take_bytes(); let res = parse_jxl_codestream(&codestream); if let Err(err) = res { println!("Error parsing JXL codestream: {}", err) From 7f171c066141979abb20d811bf894186d18a05e2 Mon Sep 17 00:00:00 2001 From: Wonwoo Choi Date: Fri, 20 Sep 2024 00:35:24 +0900 Subject: [PATCH 2/7] Move container parser states to another module --- jxl/src/container/mod.rs | 268 +----------------------------------- jxl/src/container/parse.rs | 270 +++++++++++++++++++++++++++++++++++++ 2 files changed, 274 insertions(+), 264 deletions(-) create mode 100644 jxl/src/container/parse.rs diff --git a/jxl/src/container/mod.rs b/jxl/src/container/mod.rs index 6ced0e9..c21d85c 100644 --- a/jxl/src/container/mod.rs +++ b/jxl/src/container/mod.rs @@ -6,10 +6,11 @@ // Originally written for jxl-oxide. pub mod box_header; +pub mod parse; use box_header::*; - -use crate::error::{Error, Result}; +use parse::*; +pub use parse::ParseEvent; /// Container format parser. #[derive(Debug, Default)] @@ -59,267 +60,6 @@ enum JxlpIndexState { JxlpFinished, } -/// Iterator that reads over a buffer and emits parser events. -pub struct ParseEvents<'inner, 'buf> { - inner: &'inner mut ContainerParser, - remaining_input: &'buf [u8], - finished: bool, -} - -impl<'inner, 'buf> ParseEvents<'inner, 'buf> { - const CODESTREAM_SIG: [u8; 2] = [0xff, 0x0a]; - const CONTAINER_SIG: [u8; 12] = [0, 0, 0, 0xc, b'J', b'X', b'L', b' ', 0xd, 0xa, 0x87, 0xa]; - - fn new(parser: &'inner mut ContainerParser, input: &'buf [u8]) -> Self { - parser.previous_consumed_bytes = 0; - Self { - inner: parser, - remaining_input: input, - finished: false, - } - } - - fn emit_single(&mut self) -> Result>> { - let state = &mut self.inner.state; - let jxlp_index_state = &mut self.inner.jxlp_index_state; - let buf = &mut self.remaining_input; - - loop { - if buf.is_empty() { - self.finished = true; - return Ok(None); - } - - match state { - DetectState::WaitingSignature => { - if buf.starts_with(&Self::CODESTREAM_SIG) { - tracing::trace!("Codestream signature found"); - *state = DetectState::InCodestream { - kind: BitstreamKind::BareCodestream, - bytes_left: None, - }; - return Ok(Some(ParseEvent::BitstreamKind( - BitstreamKind::BareCodestream, - ))); - } else if buf.starts_with(&Self::CONTAINER_SIG) { - tracing::trace!("Container signature found"); - *state = DetectState::WaitingBoxHeader; - *buf = &buf[Self::CONTAINER_SIG.len()..]; - return Ok(Some(ParseEvent::BitstreamKind(BitstreamKind::Container))); - } else if !Self::CODESTREAM_SIG.starts_with(buf) - && !Self::CONTAINER_SIG.starts_with(buf) - { - tracing::debug!(?buf, "Invalid signature"); - *state = DetectState::InCodestream { - kind: BitstreamKind::Invalid, - bytes_left: None, - }; - return Ok(Some(ParseEvent::BitstreamKind(BitstreamKind::Invalid))); - } else { - return Ok(None); - } - } - DetectState::WaitingBoxHeader => match ContainerBoxHeader::parse(buf)? { - HeaderParseResult::Done { - header, - header_size, - } => { - *buf = &buf[header_size..]; - let tbox = header.box_type(); - if tbox == ContainerBoxType::CODESTREAM { - match jxlp_index_state { - JxlpIndexState::Initial => { - *jxlp_index_state = JxlpIndexState::SingleJxlc; - } - JxlpIndexState::SingleJxlc => { - tracing::debug!("Duplicate jxlc box found"); - return Err(Error::InvalidBox); - } - JxlpIndexState::Jxlp(_) | JxlpIndexState::JxlpFinished => { - tracing::debug!("Found jxlc box instead of jxlp box"); - return Err(Error::InvalidBox); - } - } - - *state = DetectState::InCodestream { - kind: BitstreamKind::Container, - bytes_left: header.box_size().map(|x| x as usize), - }; - } else if tbox == ContainerBoxType::PARTIAL_CODESTREAM { - if let Some(box_size) = header.box_size() { - if box_size < 4 { - return Err(Error::InvalidBox); - } - } - - match jxlp_index_state { - JxlpIndexState::Initial => { - *jxlp_index_state = JxlpIndexState::Jxlp(0); - } - JxlpIndexState::Jxlp(index) => { - *index += 1; - } - JxlpIndexState::SingleJxlc => { - tracing::debug!("jxlp box found after jxlc box"); - return Err(Error::InvalidBox); - } - JxlpIndexState::JxlpFinished => { - tracing::debug!("found another jxlp box after the final one"); - return Err(Error::InvalidBox); - } - } - - *state = DetectState::WaitingJxlpIndex(header); - } else { - let bytes_left = header.box_size().map(|x| x as usize); - *state = DetectState::InAuxBox { header, bytes_left }; - } - } - HeaderParseResult::NeedMoreData => return Ok(None), - }, - DetectState::WaitingJxlpIndex(header) => { - let &[b0, b1, b2, b3, ..] = &**buf else { - return Ok(None); - }; - - let index = u32::from_be_bytes([b0, b1, b2, b3]); - *buf = &buf[4..]; - let is_last = index & 0x80000000 != 0; - let index = index & 0x7fffffff; - - match *jxlp_index_state { - JxlpIndexState::Jxlp(expected_index) if expected_index == index => { - if is_last { - *jxlp_index_state = JxlpIndexState::JxlpFinished; - } - } - JxlpIndexState::Jxlp(expected_index) => { - tracing::debug!( - expected_index, - actual_index = index, - "Out-of-order jxlp box found", - ); - return Err(Error::InvalidBox); - } - state => { - tracing::debug!(?state, "invalid jxlp index state in WaitingJxlpIndex"); - unreachable!("invalid jxlp index state in WaitingJxlpIndex"); - } - } - - *state = DetectState::InCodestream { - kind: BitstreamKind::Container, - bytes_left: header.box_size().map(|x| x as usize - 4), - }; - } - DetectState::InCodestream { - bytes_left: None, .. - } => { - let payload = *buf; - *buf = &[]; - return Ok(Some(ParseEvent::Codestream(payload))); - } - DetectState::InCodestream { - bytes_left: Some(bytes_left), - .. - } => { - let payload = if buf.len() >= *bytes_left { - let (payload, remaining) = buf.split_at(*bytes_left); - *state = DetectState::WaitingBoxHeader; - *buf = remaining; - payload - } else { - let payload = *buf; - *bytes_left -= buf.len(); - *buf = &[]; - payload - }; - return Ok(Some(ParseEvent::Codestream(payload))); - } - DetectState::InAuxBox { - header: _, - bytes_left: None, - } => { - let _payload = *buf; - *buf = &[]; - // FIXME: emit auxiliary box event - } - DetectState::InAuxBox { - header: _, - bytes_left: Some(bytes_left), - } => { - let _payload = if buf.len() >= *bytes_left { - let (payload, remaining) = buf.split_at(*bytes_left); - *state = DetectState::WaitingBoxHeader; - *buf = remaining; - payload - } else { - let payload = *buf; - *bytes_left -= buf.len(); - *buf = &[]; - payload - }; - // FIXME: emit auxiliary box event - } - DetectState::Done(_) => return Ok(None), - } - } - } -} - -impl std::fmt::Debug for ParseEvents<'_, '_> { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.debug_struct("ParseEvents") - .field("inner", &self.inner) - .field( - "remaining_input", - &format_args!("({} byte(s))", self.remaining_input.len()), - ) - .field("finished", &self.finished) - .finish() - } -} - -impl<'inner, 'buf> Iterator for ParseEvents<'inner, 'buf> { - type Item = Result>; - - fn next(&mut self) -> Option { - if self.finished { - return None; - } - - let initial_buf = self.remaining_input; - let event = self.emit_single(); - - if event.is_err() { - self.finished = true; - } - - self.inner.previous_consumed_bytes += initial_buf.len() - self.remaining_input.len(); - event.transpose() - } -} - -/// Parser event emitted by [`ParseEvents`]. -pub enum ParseEvent<'buf> { - /// Bitstream structure is detected. - BitstreamKind(BitstreamKind), - /// Codestream data is read. - Codestream(&'buf [u8]), -} - -impl std::fmt::Debug for ParseEvent<'_> { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match self { - Self::BitstreamKind(kind) => f.debug_tuple("BitstreamKind").field(kind).finish(), - Self::Codestream(buf) => f - .debug_tuple("Codestream") - .field(&format_args!("{} byte(s)", buf.len())) - .finish(), - } - } -} - impl ContainerParser { pub fn new() -> Self { Self::default() @@ -365,7 +105,7 @@ impl ContainerParser { #[cfg(test)] impl ContainerParser { - pub(crate) fn collect_codestream(input: &[u8]) -> Result> { + pub(crate) fn collect_codestream(input: &[u8]) -> crate::error::Result> { let mut parser = Self::new(); let mut codestream = Vec::new(); for event in parser.process_bytes(input) { diff --git a/jxl/src/container/parse.rs b/jxl/src/container/parse.rs new file mode 100644 index 0000000..b386e94 --- /dev/null +++ b/jxl/src/container/parse.rs @@ -0,0 +1,270 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. +// +// Originally written for jxl-oxide. + +use super::{box_header::*, DetectState, JxlpIndexState, BitstreamKind, ContainerParser}; +use crate::error::{Error, Result}; + +/// Iterator that reads over a buffer and emits parser events. +pub struct ParseEvents<'inner, 'buf> { + inner: &'inner mut ContainerParser, + remaining_input: &'buf [u8], + finished: bool, +} + +impl<'inner, 'buf> ParseEvents<'inner, 'buf> { + const CODESTREAM_SIG: [u8; 2] = [0xff, 0x0a]; + const CONTAINER_SIG: [u8; 12] = [0, 0, 0, 0xc, b'J', b'X', b'L', b' ', 0xd, 0xa, 0x87, 0xa]; + + pub(super) fn new(parser: &'inner mut ContainerParser, input: &'buf [u8]) -> Self { + parser.previous_consumed_bytes = 0; + Self { + inner: parser, + remaining_input: input, + finished: false, + } + } + + fn emit_single(&mut self) -> Result>> { + let state = &mut self.inner.state; + let jxlp_index_state = &mut self.inner.jxlp_index_state; + let buf = &mut self.remaining_input; + + loop { + if buf.is_empty() { + self.finished = true; + return Ok(None); + } + + match state { + DetectState::WaitingSignature => { + if buf.starts_with(&Self::CODESTREAM_SIG) { + tracing::trace!("Codestream signature found"); + *state = DetectState::InCodestream { + kind: BitstreamKind::BareCodestream, + bytes_left: None, + }; + return Ok(Some(ParseEvent::BitstreamKind( + BitstreamKind::BareCodestream, + ))); + } else if buf.starts_with(&Self::CONTAINER_SIG) { + tracing::trace!("Container signature found"); + *state = DetectState::WaitingBoxHeader; + *buf = &buf[Self::CONTAINER_SIG.len()..]; + return Ok(Some(ParseEvent::BitstreamKind(BitstreamKind::Container))); + } else if !Self::CODESTREAM_SIG.starts_with(buf) + && !Self::CONTAINER_SIG.starts_with(buf) + { + tracing::debug!(?buf, "Invalid signature"); + *state = DetectState::InCodestream { + kind: BitstreamKind::Invalid, + bytes_left: None, + }; + return Ok(Some(ParseEvent::BitstreamKind(BitstreamKind::Invalid))); + } else { + return Ok(None); + } + } + DetectState::WaitingBoxHeader => match ContainerBoxHeader::parse(buf)? { + HeaderParseResult::Done { + header, + header_size, + } => { + *buf = &buf[header_size..]; + let tbox = header.box_type(); + if tbox == ContainerBoxType::CODESTREAM { + match jxlp_index_state { + JxlpIndexState::Initial => { + *jxlp_index_state = JxlpIndexState::SingleJxlc; + } + JxlpIndexState::SingleJxlc => { + tracing::debug!("Duplicate jxlc box found"); + return Err(Error::InvalidBox); + } + JxlpIndexState::Jxlp(_) | JxlpIndexState::JxlpFinished => { + tracing::debug!("Found jxlc box instead of jxlp box"); + return Err(Error::InvalidBox); + } + } + + *state = DetectState::InCodestream { + kind: BitstreamKind::Container, + bytes_left: header.box_size().map(|x| x as usize), + }; + } else if tbox == ContainerBoxType::PARTIAL_CODESTREAM { + if let Some(box_size) = header.box_size() { + if box_size < 4 { + return Err(Error::InvalidBox); + } + } + + match jxlp_index_state { + JxlpIndexState::Initial => { + *jxlp_index_state = JxlpIndexState::Jxlp(0); + } + JxlpIndexState::Jxlp(index) => { + *index += 1; + } + JxlpIndexState::SingleJxlc => { + tracing::debug!("jxlp box found after jxlc box"); + return Err(Error::InvalidBox); + } + JxlpIndexState::JxlpFinished => { + tracing::debug!("found another jxlp box after the final one"); + return Err(Error::InvalidBox); + } + } + + *state = DetectState::WaitingJxlpIndex(header); + } else { + let bytes_left = header.box_size().map(|x| x as usize); + *state = DetectState::InAuxBox { header, bytes_left }; + } + } + HeaderParseResult::NeedMoreData => return Ok(None), + }, + DetectState::WaitingJxlpIndex(header) => { + let &[b0, b1, b2, b3, ..] = &**buf else { + return Ok(None); + }; + + let index = u32::from_be_bytes([b0, b1, b2, b3]); + *buf = &buf[4..]; + let is_last = index & 0x80000000 != 0; + let index = index & 0x7fffffff; + + match *jxlp_index_state { + JxlpIndexState::Jxlp(expected_index) if expected_index == index => { + if is_last { + *jxlp_index_state = JxlpIndexState::JxlpFinished; + } + } + JxlpIndexState::Jxlp(expected_index) => { + tracing::debug!( + expected_index, + actual_index = index, + "Out-of-order jxlp box found", + ); + return Err(Error::InvalidBox); + } + state => { + tracing::debug!(?state, "invalid jxlp index state in WaitingJxlpIndex"); + unreachable!("invalid jxlp index state in WaitingJxlpIndex"); + } + } + + *state = DetectState::InCodestream { + kind: BitstreamKind::Container, + bytes_left: header.box_size().map(|x| x as usize - 4), + }; + } + DetectState::InCodestream { + bytes_left: None, .. + } => { + let payload = *buf; + *buf = &[]; + return Ok(Some(ParseEvent::Codestream(payload))); + } + DetectState::InCodestream { + bytes_left: Some(bytes_left), + .. + } => { + let payload = if buf.len() >= *bytes_left { + let (payload, remaining) = buf.split_at(*bytes_left); + *state = DetectState::WaitingBoxHeader; + *buf = remaining; + payload + } else { + let payload = *buf; + *bytes_left -= buf.len(); + *buf = &[]; + payload + }; + return Ok(Some(ParseEvent::Codestream(payload))); + } + DetectState::InAuxBox { + header: _, + bytes_left: None, + } => { + let _payload = *buf; + *buf = &[]; + // FIXME: emit auxiliary box event + } + DetectState::InAuxBox { + header: _, + bytes_left: Some(bytes_left), + } => { + let _payload = if buf.len() >= *bytes_left { + let (payload, remaining) = buf.split_at(*bytes_left); + *state = DetectState::WaitingBoxHeader; + *buf = remaining; + payload + } else { + let payload = *buf; + *bytes_left -= buf.len(); + *buf = &[]; + payload + }; + // FIXME: emit auxiliary box event + } + DetectState::Done(_) => return Ok(None), + } + } + } +} + +impl std::fmt::Debug for ParseEvents<'_, '_> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("ParseEvents") + .field("inner", &self.inner) + .field( + "remaining_input", + &format_args!("({} byte(s))", self.remaining_input.len()), + ) + .field("finished", &self.finished) + .finish() + } +} + +impl<'inner, 'buf> Iterator for ParseEvents<'inner, 'buf> { + type Item = Result>; + + fn next(&mut self) -> Option { + if self.finished { + return None; + } + + let initial_buf = self.remaining_input; + let event = self.emit_single(); + + if event.is_err() { + self.finished = true; + } + + self.inner.previous_consumed_bytes += initial_buf.len() - self.remaining_input.len(); + event.transpose() + } +} + +/// Parser event emitted by [`ParseEvents`]. +pub enum ParseEvent<'buf> { + /// Bitstream structure is detected. + BitstreamKind(BitstreamKind), + /// Codestream data is read. + Codestream(&'buf [u8]), +} + +impl std::fmt::Debug for ParseEvent<'_> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::BitstreamKind(kind) => f.debug_tuple("BitstreamKind").field(kind).finish(), + Self::Codestream(buf) => f + .debug_tuple("Codestream") + .field(&format_args!("{} byte(s)", buf.len())) + .finish(), + } + } +} From 1ad2179ebfff4bee4f5ce88a9185863040746cc3 Mon Sep 17 00:00:00 2001 From: Wonwoo Choi Date: Fri, 20 Sep 2024 01:03:01 +0900 Subject: [PATCH 3/7] Remove unused method `finish` --- jxl/src/container/mod.rs | 10 ++-------- jxl/src/container/parse.rs | 3 +-- 2 files changed, 3 insertions(+), 10 deletions(-) diff --git a/jxl/src/container/mod.rs b/jxl/src/container/mod.rs index c21d85c..7542438 100644 --- a/jxl/src/container/mod.rs +++ b/jxl/src/container/mod.rs @@ -9,8 +9,8 @@ pub mod box_header; pub mod parse; use box_header::*; -use parse::*; pub use parse::ParseEvent; +use parse::*; /// Container format parser. #[derive(Debug, Default)] @@ -35,7 +35,6 @@ enum DetectState { kind: BitstreamKind, bytes_left: Option, }, - Done(BitstreamKind), } /// Structure of the decoded bitstream. @@ -71,7 +70,7 @@ impl ContainerParser { DetectState::WaitingBoxHeader | DetectState::WaitingJxlpIndex(..) | DetectState::InAuxBox { .. } => BitstreamKind::Container, - DetectState::InCodestream { kind, .. } | DetectState::Done(kind) => kind, + DetectState::InCodestream { kind, .. } => kind, } } @@ -96,11 +95,6 @@ impl ContainerParser { pub fn previous_consumed_bytes(&self) -> usize { self.previous_consumed_bytes } - - pub fn finish(&mut self) { - // FIXME: validate state - self.state = DetectState::Done(self.kind()); - } } #[cfg(test)] diff --git a/jxl/src/container/parse.rs b/jxl/src/container/parse.rs index b386e94..7092136 100644 --- a/jxl/src/container/parse.rs +++ b/jxl/src/container/parse.rs @@ -5,7 +5,7 @@ // // Originally written for jxl-oxide. -use super::{box_header::*, DetectState, JxlpIndexState, BitstreamKind, ContainerParser}; +use super::{box_header::*, BitstreamKind, ContainerParser, DetectState, JxlpIndexState}; use crate::error::{Error, Result}; /// Iterator that reads over a buffer and emits parser events. @@ -210,7 +210,6 @@ impl<'inner, 'buf> ParseEvents<'inner, 'buf> { }; // FIXME: emit auxiliary box event } - DetectState::Done(_) => return Ok(None), } } } From 9377d66f12758de43049020fb8cb58a3bd2ba4d3 Mon Sep 17 00:00:00 2001 From: Wonwoo Choi Date: Fri, 20 Sep 2024 01:14:14 +0900 Subject: [PATCH 4/7] Fix links in docs Mistaken while copying code from jxl-oxide :sweat_smile: --- jxl/src/container/mod.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/jxl/src/container/mod.rs b/jxl/src/container/mod.rs index 7542438..fe89664 100644 --- a/jxl/src/container/mod.rs +++ b/jxl/src/container/mod.rs @@ -79,7 +79,7 @@ impl ContainerParser { /// The parser might not fully consume the buffer. Use [`previous_consumed_bytes`] to get how /// many bytes are consumed. Bytes not consumed by the parser should be processed again. /// - /// [`previous_consumed_bytes`]: ContainerDetectingReader::previous_consumed_bytes + /// [`previous_consumed_bytes`]: ContainerParser::previous_consumed_bytes pub fn process_bytes<'inner, 'buf>( &'inner mut self, input: &'buf [u8], @@ -91,7 +91,7 @@ impl ContainerParser { /// /// Bytes not consumed by the parser should be fed into the parser again. /// - /// [`process_bytes`]: ContainerDetectingReader::process_bytes + /// [`process_bytes`]: ContainerParser::process_bytes pub fn previous_consumed_bytes(&self) -> usize { self.previous_consumed_bytes } From 6f87fa645a48d4d719e8490c8cdb5cfd99380d38 Mon Sep 17 00:00:00 2001 From: Wonwoo Choi Date: Sun, 22 Sep 2024 03:44:30 +0900 Subject: [PATCH 5/7] Update docs --- jxl/src/container/mod.rs | 4 ++-- jxl/src/container/parse.rs | 3 +++ 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/jxl/src/container/mod.rs b/jxl/src/container/mod.rs index fe89664..d6c6489 100644 --- a/jxl/src/container/mod.rs +++ b/jxl/src/container/mod.rs @@ -87,9 +87,9 @@ impl ContainerParser { ParseEvents::new(self, input) } - /// Get how much bytes are consumed by the previous call of [`process_bytes`]. + /// Get how many bytes are consumed by the previous call to [`process_bytes`]. /// - /// Bytes not consumed by the parser should be fed into the parser again. + /// Bytes not consumed by the parser should be processed again. /// /// [`process_bytes`]: ContainerParser::process_bytes pub fn previous_consumed_bytes(&self) -> usize { diff --git a/jxl/src/container/parse.rs b/jxl/src/container/parse.rs index 7092136..d46839e 100644 --- a/jxl/src/container/parse.rs +++ b/jxl/src/container/parse.rs @@ -253,6 +253,9 @@ pub enum ParseEvent<'buf> { /// Bitstream structure is detected. BitstreamKind(BitstreamKind), /// Codestream data is read. + /// + /// Returned data may be partial. Complete codestream can be obtained by concatenating all data + /// of `Codestream` events. Codestream(&'buf [u8]), } From 46a1f249d6cbb433eec990dd80f0c6754291fcba Mon Sep 17 00:00:00 2001 From: Wonwoo Choi Date: Sun, 22 Sep 2024 03:44:34 +0900 Subject: [PATCH 6/7] Remove redundant debug log --- jxl/src/container/parse.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/jxl/src/container/parse.rs b/jxl/src/container/parse.rs index d46839e..27d1f06 100644 --- a/jxl/src/container/parse.rs +++ b/jxl/src/container/parse.rs @@ -151,8 +151,7 @@ impl<'inner, 'buf> ParseEvents<'inner, 'buf> { return Err(Error::InvalidBox); } state => { - tracing::debug!(?state, "invalid jxlp index state in WaitingJxlpIndex"); - unreachable!("invalid jxlp index state in WaitingJxlpIndex"); + unreachable!("invalid jxlp index state in WaitingJxlpIndex: {state:?}"); } } From 6bc1cce79b65cda94e8dcbf57d0ae5c709887c11 Mon Sep 17 00:00:00 2001 From: Wonwoo Choi Date: Sun, 22 Sep 2024 04:42:53 +0900 Subject: [PATCH 7/7] Add a test of streaming bitstream parser --- Cargo.lock | 16 +++++++++ jxl/Cargo.toml | 3 ++ jxl/src/container/mod.rs | 71 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 90 insertions(+) diff --git a/Cargo.lock b/Cargo.lock index fb79721..e0bec4d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2,6 +2,21 @@ # It is not intended for manual editing. version = 3 +[[package]] +name = "arbitrary" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7d5a26814d8dcb93b0e5a0ff3c6d80a8843bafb21b39e8e18a6f05471870e110" + +[[package]] +name = "arbtest" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "23909d5fb517fac2a8a4c887e847dbe41dd22ec46914586f5727980d0a193fdc" +dependencies = [ + "arbitrary", +] + [[package]] name = "array-init" version = "2.1.0" @@ -30,6 +45,7 @@ checksum = "1b43ede17f21864e81be2fa654110bf1e793774238d86ef8555c37e6519c0403" name = "jxl" version = "0.1.0" dependencies = [ + "arbtest", "array-init", "byteorder", "half", diff --git a/jxl/Cargo.toml b/jxl/Cargo.toml index ca26ffa..fa65d41 100644 --- a/jxl/Cargo.toml +++ b/jxl/Cargo.toml @@ -16,6 +16,9 @@ half = "1.7.1" tracing = "0.1.40" jxl_headers_derive = { path = "../jxl_headers_derive" } +[dev-dependencies] +arbtest = "0.3.1" + [features] debug_tools = [] diff --git a/jxl/src/container/mod.rs b/jxl/src/container/mod.rs index d6c6489..af96f17 100644 --- a/jxl/src/container/mod.rs +++ b/jxl/src/container/mod.rs @@ -113,3 +113,74 @@ impl ContainerParser { Ok(codestream) } } + +#[cfg(test)] +mod test { + use super::*; + + #[rustfmt::skip] + const HEADER: &[u8] = &[ + 0x00, 0x00, 0x00, 0x0c, b'J', b'X', b'L', b' ', 0x0d, 0x0a, 0x87, 0x0a, 0x00, 0x00, 0x00, 0x14, + b'f', b't', b'y', b'p', b'j', b'x', b'l', b' ', 0x00, 0x00, 0x00, 0x00, b'j', b'x', b'l', b' ', + ]; + + #[test] + fn parse_partial() { + arbtest::arbtest(|u| { + // Prepare arbitrary container format data with two jxlp boxes. + let total_len = u.arbitrary_len::()?; + let mut codestream0 = vec![0u8; total_len / 2]; + u.fill_buffer(&mut codestream0)?; + let mut codestream1 = vec![0u8; total_len - codestream0.len()]; + u.fill_buffer(&mut codestream1)?; + + let mut container = HEADER.to_vec(); + container.extend_from_slice(&(12 + codestream0.len() as u32).to_be_bytes()); + container.extend_from_slice(b"jxlp\x00\x00\x00\x00"); + container.extend_from_slice(&codestream0); + + container.extend_from_slice(&(12 + codestream1.len() as u32).to_be_bytes()); + container.extend_from_slice(b"jxlp\x80\x00\x00\x01"); + container.extend_from_slice(&codestream1); + + let mut expected = codestream0; + expected.extend(codestream1); + + // Create a list of arbitrary splits. + let mut tests = Vec::new(); + u.arbitrary_loop(Some(1), Some(10), |u| { + let split_at_idx = u.choose_index(container.len())?; + tests.push(container.split_at(split_at_idx)); + Ok(std::ops::ControlFlow::Continue(())) + })?; + + // Test if split index doesn't affect final codestream. + for (first, second) in tests { + let mut codestream = Vec::new(); + let mut parser = ContainerParser::new(); + + for event in parser.process_bytes(first) { + let event = event.unwrap(); + if let ParseEvent::Codestream(data) = event { + codestream.extend_from_slice(data); + } + } + + let consumed = parser.previous_consumed_bytes(); + let mut second_chunk = first[consumed..].to_vec(); + second_chunk.extend_from_slice(second); + + for event in parser.process_bytes(&second_chunk) { + let event = event.unwrap(); + if let ParseEvent::Codestream(data) = event { + codestream.extend_from_slice(data); + } + } + + assert_eq!(codestream, expected); + } + + Ok(()) + }); + } +}