Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
371 changes: 312 additions & 59 deletions Cargo.lock

Large diffs are not rendered by default.

8 changes: 8 additions & 0 deletions crates/sage-cli/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,14 @@ license = "MIT"
name = "sage"
path = "src/main.rs"


[features]

default = []

mzmlb = ["sage-cloudpath/mzdata"]


[dependencies]
sage-core = { path = "../sage" }
sage-cloudpath = { path = "../sage-cloudpath", features = ["parquet"] }
Expand Down
12 changes: 11 additions & 1 deletion crates/sage-cli/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -200,12 +200,22 @@ impl Runner {

let path_lower = path.to_lowercase();
let res = if path_lower.ends_with(".mgf.gz") || path_lower.ends_with(".mgf") {
sage_cloudpath::util::read_mgf(path_lower, file_id)
sage_cloudpath::util::read_mgf(path, file_id)
} else if bruker_extensions
.iter()
.any(|ext| path_lower.ends_with(ext))
{
sage_cloudpath::util::read_tdf(path, file_id)
} else if path_lower.ends_with(".mzmlb") {
#[cfg(feature = "mzmlb")]
{
sage_cloudpath::util::read_mzmlb(path, file_id, sn)
}
#[cfg(not(feature = "mzmlb"))]
{
// Fall back to prior behavior
sage_cloudpath::util::read_mzml(path, file_id, sn)
}
} else {
sage_cloudpath::util::read_mzml(path, file_id, sn)
};
Expand Down
5 changes: 5 additions & 0 deletions crates/sage-cloudpath/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,9 @@ license = "MIT"

# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

[features]
default = []

[dependencies]
thiserror = "1.0"
async-compression = { version = "0.3", features = ["tokio", "gzip", "zlib"] }
Expand All @@ -33,3 +36,5 @@ serde_json = "1.0"

sage-core = { path = "../sage" }
parquet = { version = "44.0.0", optional = true, default-features = false, features = ["zstd"] }

mzdata = {version = "0.11.0", default-features = false, features = ["mzmlb", "hdf5_static", "zlib"], optional = true}
2 changes: 2 additions & 0 deletions crates/sage-cloudpath/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ use tokio::io::{AsyncBufRead, AsyncRead, AsyncWriteExt, BufReader};

pub mod mgf;
pub mod mzml;
#[cfg(feature = "mzdata")]
pub mod mzmlb;
pub mod tdf;
pub mod util;

Expand Down
131 changes: 131 additions & 0 deletions crates/sage-cloudpath/src/mzmlb.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
#![cfg(feature = "mzdata")]

use std::io;
use std::path::Path;

use mzdata::io::MzMLbReader as MzMLbReaderImpl;
use mzdata::prelude::*;
use mzdata::RawSpectrum as RawSpectrumImpl;
use sage_core::mass::Tolerance;
use sage_core::spectrum::{Precursor, RawSpectrum};

pub struct MzMLbReader {
ms_level: Option<u8>,
// If set to Some(level) and noise intensities are present in the MzML file,
// divide intensities at this MS-level by noise to calculate S/N
signal_to_noise: Option<u8>,

file_id: usize,
}

impl MzMLbReader {
/// Create a new [`MzMLbReader`] with a minimum MS level filter
///
/// # Example
///
/// A minimum level of 2 will not parse or return MS1 scans
pub fn with_file_id_and_level_filter(file_id: usize, ms_level: u8) -> Self {
Self {
ms_level: Some(ms_level),
file_id,
signal_to_noise: None,
}
}

pub fn with_file_id(file_id: usize) -> Self {
Self {
ms_level: None,
signal_to_noise: None,
file_id,
}
}

pub fn set_file_id(&mut self, file_id: usize) -> &mut Self {
self.file_id = file_id;
self
}

pub fn set_signal_to_noise(&mut self, sn: Option<u8>) -> &mut Self {
self.signal_to_noise = sn;
self
}

pub fn parse<B>(&self, b: B) -> Result<Vec<RawSpectrum>, io::Error>
where
B: AsRef<Path>,
{
let reader = MzMLbReaderImpl::new(&b)?;

let spectra = reader
.into_iter()
.filter(|scan| {
if let Some(ms_level) = self.ms_level {
return scan.ms_level() == ms_level;
} else {
return true;
}
})
.map(|scan| {
let scan: RawSpectrumImpl = scan.into();
let mut precusors = Vec::new();
match scan.precursor() {
Some(p) => {
let p = Precursor {
mz: p.mz() as f32,
intensity: Some(p.ion.intensity),
charge: p.ion.charge.and_then(|v| Some(v as u8)),
spectrum_ref: p.precursor_id.clone(),
isolation_window: Some(Tolerance::Da(
p.isolation_window.lower_bound as f32,
p.isolation_window.upper_bound as f32,
)),
};
precusors.push(p)
}
None => {}
}
RawSpectrum {
file_id: self.file_id,
ms_level: scan.ms_level(),
id: scan.description.id.clone(),
precursors: precusors,
ion_injection_time: scan.acquisition().first_scan().unwrap().injection_time,
representation: match scan.description.signal_continuity {
mzdata::spectrum::SignalContinuity::Unknown => {
sage_core::spectrum::Representation::Profile
}
mzdata::spectrum::SignalContinuity::Centroid => {
sage_core::spectrum::Representation::Centroid
}
mzdata::spectrum::SignalContinuity::Profile => {
sage_core::spectrum::Representation::Profile
}
},
scan_start_time: scan.start_time() as f32,
total_ion_current: scan.peaks().tic(),
mz: scan.mzs().iter().map(|mz| (*mz) as f32).collect(),
intensity: scan.intensities().to_vec(),
}
})
.collect();
Ok(spectra)
}
}


#[cfg(test)]
mod test {
use super::*;

#[test]
fn test_read_mzmlb() -> io::Result<()> {
let spectra = MzMLbReader::with_file_id(0).parse("../../tests/LQSRPAAPPAPGPGQLTLR.mzMLb")?;
assert_eq!(spectra.len(), 1);
let s = spectra.first().unwrap();
assert_eq!(s.id, "controllerType=0 controllerNumber=1 scan=30069");
assert_eq!(s.mz.len(), 299);
assert_eq!(s.intensity.len(), 299);
Ok(())
}

}
15 changes: 15 additions & 0 deletions crates/sage-cloudpath/src/util.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,21 @@ pub fn read_tdf<S: AsRef<str>>(s: S, file_id: usize) -> Result<Vec<RawSpectrum>,
}
}

#[cfg(feature = "mzdata")]
pub fn read_mzmlb<S: AsRef<str>>(
s: S,
file_id: usize,
signal_to_noise: Option<u8>,
) -> Result<Vec<RawSpectrum>, Error> {
let res = crate::mzmlb::MzMLbReader::with_file_id(file_id)
.set_signal_to_noise(signal_to_noise)
.parse(s.as_ref());
match res {
Ok(spectra) => Ok(spectra),
Err(e) => Err(Error::IO(e)),
}
}

pub fn read_mgf<S: AsRef<str>>(path: S, file_id: usize) -> Result<Vec<RawSpectrum>, Error> {
read_and_execute(path, |mut bf| async move {
let mut contents = String::new();
Expand Down
Binary file added tests/LQSRPAAPPAPGPGQLTLR.mzMLb
Binary file not shown.