Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -10,4 +10,5 @@ data/
*.ms2
*.fasta
*.fas
**/*.DS_Store
**/*.DS_Store
*.parquet
33 changes: 26 additions & 7 deletions crates/sage-cli/src/input.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@ pub struct Search {
pub min_matched_peaks: u16,
pub report_psms: usize,
pub predict_rt: bool,
pub predict_im: bool,
pub align_rt: bool,
pub mzml_paths: Vec<String>,
pub output_paths: Vec<String>,
pub bruker_config: BrukerProcessingConfig,
Expand Down Expand Up @@ -65,6 +67,8 @@ pub struct Input {
pub deisotope: Option<bool>,
pub quant: Option<QuantOptions>,
pub predict_rt: Option<bool>,
pub predict_im: Option<bool>,
pub align_rt: Option<bool>,
pub output_directory: Option<String>,
pub mzml_paths: Option<Vec<String>>,
pub bruker_config: Option<BrukerProcessingConfig>,
Expand All @@ -82,6 +86,7 @@ pub struct LfqOptions {
pub ppm_tolerance: Option<f32>,
pub mobility_pct_tolerance: Option<f32>,
pub combine_charge_states: Option<bool>,
pub mbr: Option<bool>,
}

impl From<LfqOptions> for LfqSettings {
Expand All @@ -98,6 +103,7 @@ impl From<LfqOptions> for LfqSettings {
combine_charge_states: value
.combine_charge_states
.unwrap_or(default.combine_charge_states),
mbr: value.mbr.unwrap_or(true),
};
if settings.ppm_tolerance > 20.0 {
log::warn!("lfq_settings.ppm_tolerance is higher than expected");
Expand Down Expand Up @@ -285,13 +291,24 @@ impl Input {
}
}

if !self.predict_rt.unwrap_or(true)
&& self.quant.as_ref().and_then(|q| q.lfq).unwrap_or(false)
{
log::warn!(
"`predict_rt: false` and `lfq: true` are incompatible. Setting `predict_rt: true`"
);
self.predict_rt = Some(true);
if !self.align_rt.unwrap_or(true) {
if self.predict_rt.unwrap_or(true) {
log::warn!(
"`predict_rt: true` and `align_rt: false` are incompatible. Setting `align_rt: true`"
);
self.align_rt = Some(true);
} else if let Some(quant) = &self.quant {
if quant.lfq.unwrap_or(false)
&& quant
.lfq_options
.as_ref()
.and_then(|f| f.mbr)
.unwrap_or(true)
{
log::warn!("`mbr: true` and `align_rt: false` are incompatible. Setting `align_rt: true`");
self.align_rt = Some(true);
}
}
}

let mzml_paths = self.mzml_paths.expect("'mzml_paths' must be provided!");
Expand Down Expand Up @@ -330,6 +347,8 @@ impl Input {
chimera: self.chimera.unwrap_or(false),
wide_window: self.wide_window.unwrap_or(false),
predict_rt: self.predict_rt.unwrap_or(true),
predict_im: self.predict_im.unwrap_or(true),
align_rt: self.align_rt.unwrap_or(true),
output_paths: Vec::new(),
write_pin: self.write_pin.unwrap_or(false),
bruker_config: self.bruker_config.unwrap_or_default(),
Expand Down
109 changes: 71 additions & 38 deletions crates/sage-cli/src/runner.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ use super::output::SageResults;
use super::telemetry;
use anyhow::Context;
use csv::ByteRecord;
use fnv::FnvHashMap;
use log::info;
use rayon::prelude::*;
use sage_cloudpath::{CloudPath, FileFormat};
Expand All @@ -11,6 +12,7 @@ use sage_core::fasta::Fasta;
use sage_core::ion_series::Kind;
use sage_core::lfq::{Peak, PrecursorId};
use sage_core::mass::Tolerance;
use sage_core::ml::retention_alignment::Alignment;
use sage_core::peptide::Peptide;
use sage_core::scoring::Fragments;
use sage_core::scoring::{Feature, Scorer};
Expand Down Expand Up @@ -436,7 +438,7 @@ impl Runner {
let spectra = spectra
.ms1
.into_iter()
.map(|x| sp.process_with_mobility(x))
.map(|x| SpectrumProcessor::process_with_mobility(x))
.collect();
MS1Spectra::WithMobility(spectra)
} else {
Expand Down Expand Up @@ -482,26 +484,18 @@ impl Runner {
//Collect all results into a single container
let mut outputs = self.batch_files(&scorer, parallel);

let alignments = if self.parameters.predict_rt {
// Poisson probability is usually the best single feature for refining FDR.
// Take our set of 1% FDR filtered PSMs, and use them to train a linear
// regression model for predicting retention time
outputs
.features
.par_sort_unstable_by(|a, b| a.poisson.total_cmp(&b.poisson));
sage_core::ml::qvalue::spectrum_q_value(&mut outputs.features);

let alignments = sage_core::ml::retention_alignment::global_alignment(
&mut outputs.features,
self.parameters.mzml_paths.len(),
);
let _ = sage_core::ml::retention_model::predict(&self.database, &mut outputs.features);
let _ = sage_core::ml::mobility_model::predict(&self.database, &mut outputs.features);
Some(alignments)
} else {
None
};
// Poisson probability is usually the best single feature for refining FDR.
// Take our set of 1% FDR filtered PSMs, and use them to train a linear
// regression model for predicting retention time
outputs
.features
.par_sort_unstable_by(|a, b| a.poisson.total_cmp(&b.poisson));
sage_core::ml::qvalue::spectrum_q_value(&mut outputs.features);

let alignments = Self::align(&self.parameters, &mut outputs.features);
Self::predict(&self.parameters, &self.database, &mut outputs.features);

log::info!("Calculating FDR");
let q_spectrum = self.spectrum_fdr(&mut outputs.features);
let q_peptide = sage_core::fdr::picked_peptide(&self.database, &mut outputs.features);
let q_protein = sage_core::fdr::picked_protein(&self.database, &mut outputs.features);
Expand All @@ -518,24 +512,13 @@ impl Runner {
})
.collect::<Vec<_>>();

let areas = alignments.and_then(|alignments| {
if self.parameters.quant.lfq {
log::trace!("performing LFQ");
let mut areas = sage_core::lfq::build_feature_map(
self.parameters.quant.lfq_settings,
self.parameters.precursor_charge,
&outputs.features,
)
.quantify(&self.database, &outputs.ms1, &alignments);

let q_precursor = sage_core::fdr::picked_precursor(&mut areas);

log::info!("discovered {} target MS1 peaks at 5% FDR", q_precursor);
Some(areas)
} else {
None
}
});
let areas = Self::quantify(
&self.parameters,
&self.database,
&outputs.features,
&outputs.ms1,
alignments,
);

log::info!(
"discovered {} target peptide-spectrum matches at 1% FDR",
Expand Down Expand Up @@ -627,6 +610,56 @@ impl Runner {

Ok(telemetry)
}

pub fn align(parameters: &Search, features: &mut [Feature]) -> Vec<Alignment> {
let alignments = if parameters.align_rt {
log::info!("aligning retention times");
sage_core::ml::retention_alignment::global_alignment(
features,
parameters.mzml_paths.len(),
)
} else {
sage_core::ml::retention_alignment::no_alignment(features, parameters.mzml_paths.len())
};
alignments
}

pub fn predict(parameters: &Search, database: &IndexedDatabase, features: &mut [Feature]) {
if parameters.predict_rt {
log::info!("predicting retention times ");
let _ = sage_core::ml::retention_model::predict(database, features);
};
if parameters.predict_im {
log::info!("predicting mobility values");
let _ = sage_core::ml::mobility_model::predict(database, features);
};
}

pub fn quantify(
parameters: &Search,
database: &IndexedDatabase,
features: &[Feature],
ms1_spectra: &MS1Spectra,
alignments: Vec<Alignment>,
) -> Option<FnvHashMap<(PrecursorId, bool), (Peak, Vec<f64>)>> {
let areas = if parameters.quant.lfq {
let mut areas = sage_core::lfq::quantify(
&parameters.quant.lfq_settings,
parameters.precursor_charge,
database,
features,
ms1_spectra,
alignments,
);
let q_precursor = sage_core::fdr::picked_precursor(&mut areas);
log::info!("discovered {} target MS1 peaks at 5% FDR", q_precursor);
Some(areas)
} else {
None
};
areas
}

pub fn serialize_feature(&self, feature: &Feature, filenames: &[String]) -> csv::ByteRecord {
let mut record = csv::ByteRecord::new();

Expand Down
29 changes: 12 additions & 17 deletions crates/sage-cloudpath/src/tdf.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,14 +11,14 @@ use timsrust::readers::SpectrumReaderConfig as TimsrustSpectrumConfig;
pub struct TdfReader;

#[derive(Deserialize, Serialize, Debug, Clone, Copy)]
pub struct BrukerMS1CentoidingConfig {
pub struct BrukerMS1CentroidingConfig {
pub mz_ppm: f32,
pub ims_pct: f32,
}

impl Default for BrukerMS1CentoidingConfig {
impl Default for BrukerMS1CentroidingConfig {
fn default() -> Self {
BrukerMS1CentoidingConfig {
BrukerMS1CentroidingConfig {
mz_ppm: 5.0,
ims_pct: 3.0,
}
Expand All @@ -28,7 +28,7 @@ impl Default for BrukerMS1CentoidingConfig {
#[derive(Default, Deserialize, Serialize, Debug, Clone, Copy)]
pub struct BrukerProcessingConfig {
pub ms2: TimsrustSpectrumConfig,
pub ms1: BrukerMS1CentoidingConfig,
pub ms1: BrukerMS1CentroidingConfig,
}

impl TdfReader {
Expand All @@ -43,20 +43,19 @@ impl TdfReader {
.with_path(path_name.as_ref())
.with_config(config.ms2.clone())
.finalize()?;
let mut spectra = self.read_msn_spectra(file_id, &spectrum_reader)?;
let mut spectra = Self::read_msn_spectra(file_id, &spectrum_reader)?;
if requires_ms1 {
let ms1s = self.read_ms1_spectra(&path_name, file_id, config.ms1)?;
let ms1s = Self::read_ms1_spectra(&path_name, file_id, config.ms1)?;
spectra.extend(ms1s);
}

Ok(spectra)
}

fn read_ms1_spectra(
&self,
pub fn read_ms1_spectra(
path_name: impl AsRef<str>,
file_id: usize,
config: BrukerMS1CentoidingConfig,
config: BrukerMS1CentroidingConfig,
) -> Result<Vec<RawSpectrum>, timsrust::TimsRustError> {
let start = std::time::Instant::now();
let frame_reader = timsrust::readers::FrameReader::new(path_name.as_ref())?;
Expand Down Expand Up @@ -117,8 +116,7 @@ impl TdfReader {
Ok(ms1_spectra)
}

fn read_msn_spectra(
&self,
pub fn read_msn_spectra(
file_id: usize,
spectrum_reader: &SpectrumReader,
) -> Result<Vec<RawSpectrum>, timsrust::TimsRustError> {
Expand Down Expand Up @@ -221,7 +219,7 @@ impl PeakBuffer {

// The "order" is sorted by intensity
// This will be used later during the centroiding (for details check that implementation)
self.order.extend((0..self.len()));
self.order.extend(0..self.len());
self.order.sort_unstable_by(|&a, &b| {
self.peaks[b]
.intensity
Expand Down Expand Up @@ -380,21 +378,18 @@ impl PeakBuffer {
}
}

self
.agg_buff
self.agg_buff
.sort_unstable_by(|a, b| a.mz.partial_cmp(&b.mz).unwrap());
// println!("Centroiding: Start len: {}; end len: {};", arr_len, result.len());
// Ultra data is usually start: 40k end 10k,
// HT2 data is usually start 400k end 40k, limiting to 10k
// rarely leaves peaks with intensity > 200 ... ive never seen
// it happen. -JSP 2025-Jan

self
.agg_buff
self.agg_buff
.drain(..)
.into_iter()
.map(|x| (x.mz, (x.intensity, x.im)))
.unzip()
}
}

2 changes: 1 addition & 1 deletion crates/sage/src/fdr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -235,7 +235,7 @@ pub fn picked_precursor(
.map(|score| ((score.ix, score.decoy), score.q))
.collect::<FnvHashMap<_, _>>();

peaks.par_iter_mut().for_each(|((ix), (peak, _))| {
peaks.par_iter_mut().for_each(|(ix, (peak, _))| {
peak.q_value = scores[ix];
});
passing
Expand Down
Loading