Skip to content

Commit

Permalink
Add parser for Chemstation 179 file type.
Browse files Browse the repository at this point in the history
I don't have any 181 files to test with, but it should be easy to
adapt the new code to them too. I rewrote how metadata is extracted
from these files too.

Closes #42.
  • Loading branch information
bovee committed Jul 11, 2024
1 parent 98c49d5 commit bc967b1
Show file tree
Hide file tree
Showing 12 changed files with 457 additions and 300 deletions.
2 changes: 1 addition & 1 deletion entab-cli/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,7 @@ mod tests {
run(
["entab", "--metadata"],
&b">test\nACGT"[..],
io::Cursor::new(&mut out)
io::Cursor::new(&mut out),
)?;
assert_eq!(&out[..], b"key\tvalue\n");
Ok(())
Expand Down
8 changes: 2 additions & 6 deletions entab-js/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -76,12 +76,8 @@ impl Reader {
#[wasm_bindgen]
pub fn next(&mut self) -> Result<JsValue, JsValue> {
if let Some(value) = self.reader.next_record().map_err(to_js)? {
let obj: BTreeMap<&str, Value> = self
.headers
.iter()
.map(AsRef::as_ref)
.zip(value)
.collect();
let obj: BTreeMap<&str, Value> =
self.headers.iter().map(AsRef::as_ref).zip(value).collect();
serde_wasm_bindgen::to_value(&NextRecord {
value: Some(obj),
done: false,
Expand Down
24 changes: 11 additions & 13 deletions entab/fuzz/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

32 changes: 25 additions & 7 deletions entab/src/filetype.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,8 @@ pub enum FileType {
// chemoinformatics
/// Agilent format used for MS-MS trace data
AgilentMsMsScan, // bin 0x01, 0x01
/// Agilent format used for flame ionization data (array-based)
AgilentChemstationArray,
/// Agilent format used for UV-visible array data
AgilentChemstationDad,
/// Agilent format used for flame ionization trace data
Expand Down Expand Up @@ -123,9 +125,9 @@ impl FileType {
b"\x02\x33\x31\x00" => return FileType::AgilentChemstationDad,
b"\x02\x38\x31\x00" => return FileType::AgilentChemstationFid,
b"\x03\x02\x00\x00" => return FileType::AgilentMasshunterDad,
b"\x03\x31\x33\x30" => return FileType::AgilentChemstationUv,
b"\x03\x31\x33\x30" => return FileType::AgilentChemstationMwd,
b"\x03\x31\x33\x31" => return FileType::AgilentChemstationUv,
b"\x03\x31\x37\x39" => return FileType::AgilentChemstationUv,
b"\x03\x31\x37\x39" => return FileType::AgilentChemstationArray,
b"\x28\xB5\x2F\xFD" => return FileType::Zstd,
b"\x4F\x62\x6A\x01" => return FileType::ApacheAvro,
b"\xFF\xD8\xFF\xDB" | b"\xFF\xD8\xFF\xE0" | b"\xFF\xD8\xFF\xE1"
Expand All @@ -140,7 +142,14 @@ impl FileType {
}
}
if magic.len() < 2 {
return FileType::Unknown(Some(magic.iter().take(8).map(|x| format!("{:x}", x)).collect::<Vec<String>>().join("")));
return FileType::Unknown(Some(
magic
.iter()
.take(8)
.map(|x| format!("{:x}", x))
.collect::<Vec<String>>()
.join(""),
));
}
match &magic[..2] {
[0x0F | 0x1F, 0x8B] => return FileType::Gzip,
Expand All @@ -154,7 +163,12 @@ impl FileType {
b">" => FileType::Fasta,
b"@" => FileType::Fastq,
_ => FileType::Unknown(Some(
magic.iter().take(8).map(|x| format!("{:x}", x)).collect::<Vec<String>>().join("")
magic
.iter()
.take(8)
.map(|x| format!("{:x}", x))
.collect::<Vec<String>>()
.join(""),
)),
}
}
Expand All @@ -172,6 +186,7 @@ impl FileType {
"cdf" => &[FileType::NetCdf],
"cf" => &[FileType::ThermoCf],
"ch" => &[
FileType::AgilentChemstationArray,
FileType::AgilentChemstationFid,
FileType::AgilentChemstationMwd,
],
Expand Down Expand Up @@ -213,6 +228,7 @@ impl FileType {
/// If a file is unsupported, an error will be returned.
pub fn to_parser_name<'a>(&self, hint: Option<&'a str>) -> Result<&'a str, EtError> {
Ok(match (self, hint) {
(FileType::AgilentChemstationArray, None) => "chemstation_array",
(FileType::AgilentChemstationDad, None) => "chemstation_dad",
(FileType::AgilentChemstationFid, None) => "chemstation_fid",
(FileType::AgilentChemstationMs, None) => "chemstation_ms",
Expand Down Expand Up @@ -246,6 +262,7 @@ mod tests {
#[test]
fn test_parser_names() {
let filetypes = [
(FileType::AgilentChemstationArray, "chemstation_array"),
(FileType::AgilentChemstationFid, "chemstation_fid"),
(FileType::AgilentChemstationMs, "chemstation_ms"),
(FileType::AgilentChemstationMwd, "chemstation_mwd"),
Expand Down Expand Up @@ -273,8 +290,9 @@ mod tests {
let unknown_type = FileType::from_magic(b"\x00\x00\x00\x00");
assert_eq!(unknown_type, FileType::Unknown(Some("0000".to_string())));

assert_eq!(unknown_type.to_parser_name(None).unwrap_err().msg, "File starting with #0000# has no parser");


assert_eq!(
unknown_type.to_parser_name(None).unwrap_err().msg,
"File starting with #0000# has no parser"
);
}
}
Loading

0 comments on commit bc967b1

Please sign in to comment.