diff --git a/Cargo.lock b/Cargo.lock index 437a2d67..72caf850 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -160,6 +160,12 @@ dependencies = [ "num", ] +[[package]] +name = "ascii" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d92bec98840b8f03a5ff5413de5293bfcd8bf96467cf5452609f939ec6f5de16" + [[package]] name = "assert-json-diff" version = "1.1.0" @@ -585,9 +591,9 @@ checksum = "9e1b586273c5702936fe7b7d6896644d8be71e6314cfe09d3167c95f712589e8" [[package]] name = "base64" -version = "0.21.0" +version = "0.21.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a4a4ddaa51a5bc52a6948f74c06d20aaaddb71924eab79b8c97a8c556e942d6a" +checksum = "35636a1494ede3b646cc98f74f8e62c773a38a659ebc777a2cf26b9b74171df9" [[package]] name = "base64-simd" @@ -620,6 +626,15 @@ dependencies = [ "generic-array", ] +[[package]] +name = "blosc-src" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3c3372aaf42f24a767cfe596161cbfac42c60bd9e034e50d4f63cfebe13f68f4" +dependencies = [ + "cmake", +] + [[package]] name = "brotli" version = "3.4.0" @@ -739,6 +754,15 @@ dependencies = [ "os_str_bytes", ] +[[package]] +name = "cmake" +version = "0.1.50" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a31c789563b815f77f4250caee12365734369f942439b7defd71e18a48197130" +dependencies = [ + "cc", +] + [[package]] name = "const-random" version = "0.1.15" @@ -804,16 +828,6 @@ dependencies = [ "cfg-if", ] -[[package]] -name = "crossbeam-channel" -version = "0.5.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c2dd04ddaf88237dc3b8d8f9a3c1004b506b54b3313403944054d23c0870c521" -dependencies = [ - "cfg-if", - "crossbeam-utils", -] - [[package]] name = "crossbeam-deque" version = "0.8.2" @@ -892,7 +906,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6d2301688392eb071b0bf1a37be05c469d3cc4dbbd95df672fe28ab021e6a096" dependencies = [ "quote", - "syn", + "syn 1.0.109", ] [[package]] @@ -905,7 +919,7 @@ dependencies = [ "hashbrown 0.12.3", "lock_api", "once_cell", - "parking_lot_core", + "parking_lot_core 0.9.7", "rayon", ] @@ -934,7 +948,7 @@ checksum = "adc2ab4d5a16117f9029e9a6b5e4e79f4c67f6519bc134210d4d4a04ba31f41b" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 1.0.109", ] [[package]] @@ -965,6 +979,33 @@ dependencies = [ "termcolor", ] +[[package]] +name = "equivalent" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5" + +[[package]] +name = "errno" +version = "0.2.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f639046355ee4f37944e44f60642c6f3a7efa3cf6b78c78a0d989a8ce6c396a1" +dependencies = [ + "errno-dragonfly", + "libc", + "winapi", +] + +[[package]] +name = "errno-dragonfly" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aa68f1b12764fab894d2755d2518754e71b4fd80ecfb822714a1206c2aab39bf" +dependencies = [ + "cc", + "libc", +] + [[package]] name = "extend" version = "0.1.2" @@ -974,7 +1015,7 @@ dependencies = [ "proc-macro-error", "proc-macro2", "quote", - "syn", + "syn 1.0.109", ] [[package]] @@ -1015,6 +1056,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f82b0f4c27ad9f8bfd1f3208d882da2b09c301bc1c828fd3a00d0216d2fbbff6" dependencies = [ "crc32fast", + "libz-sys", "miniz_oxide", ] @@ -1089,7 +1131,7 @@ checksum = "bdfb8ce053d86b91919aad980c220b1fb8401a9394410e1c289ed7e66b61835d" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 1.0.109", ] [[package]] @@ -1155,7 +1197,7 @@ dependencies = [ "futures-sink", "futures-util", "http", - "indexmap", + "indexmap 1.9.1", "slab", "tokio", "tokio-util", @@ -1198,6 +1240,77 @@ dependencies = [ "hashbrown 0.14.0", ] +[[package]] +name = "hdf5" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bdcd9b131fd67bb827b386d0dc63d3e74196a14616ef800acf87ca5fef741a10" +dependencies = [ + "bitflags 1.3.2", + "blosc-src", + "cfg-if", + "errno", + "hdf5-derive", + "hdf5-sys", + "hdf5-types", + "lazy_static", + "libc", + "lzf-sys", + "ndarray", + "parking_lot", + "paste", +] + +[[package]] +name = "hdf5-derive" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a5a77ac6a41e6880594d506118c0b8bc665ec959fe4636e0c84809756d224820" +dependencies = [ + "proc-macro2", + "quote", + "syn 1.0.109", +] + +[[package]] +name = "hdf5-src" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e01493db39ddc0519cf2a83d620d2c037fee60f4fed724cb72dc23763f1727a8" +dependencies = [ + "cmake", + "libz-sys", +] + +[[package]] +name = "hdf5-sys" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4842d5980dc311a7c8933c7b45534fdae84df5ae7939a0ae8e449a56d4beb3d2" +dependencies = [ + "hdf5-src", + "libc", + "libloading", + "libz-sys", + "pkg-config", + "regex", + "serde", + "serde_derive", + "winreg", +] + +[[package]] +name = "hdf5-types" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b47268c0dfb499b1ffe5638b6e7694e7a87fe49fb92eca998a4346e5483e428f" +dependencies = [ + "ascii", + "cfg-if", + "hdf5-sys", + "libc", +] + [[package]] name = "hermit-abi" version = "0.1.19" @@ -1358,6 +1471,17 @@ dependencies = [ "hashbrown 0.12.3", ] +[[package]] +name = "indexmap" +version = "2.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ad227c3af19d4914570ad36d30409928b75967c298feb9ea1969db3a610bb14e" +dependencies = [ + "equivalent", + "hashbrown 0.14.0", + "serde", +] + [[package]] name = "instant" version = "0.1.12" @@ -1494,6 +1618,16 @@ version = "0.2.147" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b4668fb0ea861c1df094127ac5f1da3409a82116a4ba74fca2e58ef927159bb3" +[[package]] +name = "libloading" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b67380fd3b2fbe7527a606e18729d21c6f3951633d0500574c4dc22d2d638b9f" +dependencies = [ + "cfg-if", + "winapi", +] + [[package]] name = "libm" version = "0.2.8" @@ -1511,6 +1645,17 @@ dependencies = [ "vcpkg", ] +[[package]] +name = "libz-sys" +version = "1.1.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d97137b25e321a73eef1418d1d5d2eda4d77e12813f8e6dead84bc52c5870a7b" +dependencies = [ + "cc", + "pkg-config", + "vcpkg", +] + [[package]] name = "linreg" version = "0.2.0" @@ -1533,12 +1678,9 @@ dependencies = [ [[package]] name = "log" -version = "0.4.17" +version = "0.4.20" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "abb12e687cfb44aa40f41fc3978ef76448f9b6038cad6aef4259d3c095a2382e" -dependencies = [ - "cfg-if", -] +checksum = "b5e6163cb8c49088c2c36f57875e58ccd8c87c7427f7fbd50ea6710b2f3f2e8f" [[package]] name = "lz4" @@ -1560,6 +1702,25 @@ dependencies = [ "libc", ] +[[package]] +name = "lzf-sys" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0798d023ce0905e2c77ed96de92aab929ff9db2036cbef4edfee0daf33582aec" +dependencies = [ + "cc", +] + +[[package]] +name = "matrixmultiply" +version = "0.3.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7574c1cf36da4798ab73da5b215bbf444f50718207754cb522201d78d1cd0ff2" +dependencies = [ + "autocfg", + "rawpointer", +] + [[package]] name = "md-5" version = "0.10.5" @@ -1569,6 +1730,12 @@ dependencies = [ "digest", ] +[[package]] +name = "md5" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "490cc448043f947bae3cbee9c203358d62dbee0db12107a74be5c30ccfd09771" + [[package]] name = "memchr" version = "2.5.0" @@ -1611,6 +1778,51 @@ dependencies = [ "windows-sys 0.42.0", ] +[[package]] +name = "mzdata" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9cf6f27be7fb811470287cb72fcad6fd3c1212eb33918798bfccc2ab25f988f6" +dependencies = [ + "base64-simd", + "bytemuck", + "flate2", + "hdf5", + "hdf5-sys", + "indexmap 2.0.1", + "lazy_static", + "libz-sys", + "log", + "md5", + "mzpeaks", + "ndarray", + "num-traits", + "quick-xml", + "regex", + "serde", + "serde_json", + "thiserror", +] + +[[package]] +name = "mzpeaks" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d519a4d34d369ad9869553909c659de71e682119d58f00861d11be2978a25e67" + +[[package]] +name = "ndarray" +version = "0.15.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "adb12d4e967ec485a5f71c6311fe28158e9d6f4bc4a447b474184d0f91a8fa32" +dependencies = [ + "matrixmultiply", + "num-complex", + "num-integer", + "num-traits", + "rawpointer", +] + [[package]] name = "ntapi" version = "0.4.1" @@ -1758,6 +1970,31 @@ version = "0.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4030760ffd992bef45b0ae3f10ce1aba99e33464c90d14dd7c039884963ddc7a" +[[package]] +name = "parking_lot" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7d17b78036a60663b797adeaee46f5c9dfebb86948d1255007a1d6be0271ff99" +dependencies = [ + "instant", + "lock_api", + "parking_lot_core 0.8.6", +] + +[[package]] +name = "parking_lot_core" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "60a2cfe6f0ad2bfc16aefa463b497d5c7a5ecd44a23efa72aa342d90177356dc" +dependencies = [ + "cfg-if", + "instant", + "libc", + "redox_syscall", + "smallvec", + "winapi", +] + [[package]] name = "parking_lot_core" version = "0.9.7" @@ -1785,7 +2022,7 @@ dependencies = [ "arrow-ipc", "arrow-schema", "arrow-select", - "base64 0.21.0", + "base64 0.21.5", "brotli", "bytes", "chrono", @@ -1850,7 +2087,7 @@ checksum = "069bdb1e05adc7a8990dce9cc75370895fbe4e3d58b9b73bf1aee56359344a55" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 1.0.109", ] [[package]] @@ -1892,7 +2129,7 @@ dependencies = [ "proc-macro-error-attr", "proc-macro2", "quote", - "syn", + "syn 1.0.109", "version_check", ] @@ -1915,9 +2152,9 @@ checksum = "dc375e1527247fe1a97d8b7156678dfe7c1af2fc075c9a4db3690ecd2a148068" [[package]] name = "proc-macro2" -version = "1.0.47" +version = "1.0.69" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5ea3d908b0e36316caf9e9e2c4625cdde190a7e6f440d794667ed17a1855e725" +checksum = "134c189feb4956b20f6f547d2cf727d4c0fe06722b20a0eec87ed445a97f92da" dependencies = [ "unicode-ident", ] @@ -1929,6 +2166,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "eff6510e86862b57b210fd8cbe8ed3f0d7d600b9c2863cd4549a2e033c66e956" dependencies = [ "memchr", + "serde", "tokio", ] @@ -1951,14 +2189,14 @@ checksum = "b22a693222d716a9587786f37ac3f6b4faedb5b80c23914e7303ff5a1d8016e9" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 1.0.109", ] [[package]] name = "quote" -version = "1.0.21" +version = "1.0.33" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bbe448f377a7d6961e30f5955f9b8d106c3f5e449d493ee1b125c1d43c2b5179" +checksum = "5267fca4496028628a95160fc423a33e8b2e6af8a5302579e322e4b520293cae" dependencies = [ "proc-macro2", ] @@ -1981,28 +2219,30 @@ dependencies = [ "getrandom", ] +[[package]] +name = "rawpointer" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "60a357793950651c4ed0f3f52338f53b2f809f32d83a07f72909fa13e4c6c1e3" + [[package]] name = "rayon" -version = "1.5.3" +version = "1.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bd99e5772ead8baa5215278c9b15bf92087709e9c1b2d1f97cdb5a183c933a7d" +checksum = "9c27db03db7734835b3f53954b534c91069375ce6ccaa2e065441e07d9b6cdb1" dependencies = [ - "autocfg", - "crossbeam-deque", "either", "rayon-core", ] [[package]] name = "rayon-core" -version = "1.9.3" +version = "1.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "258bcdb5ac6dad48491bb2992db6b7cf74878b0384908af124823d118c99683f" +checksum = "5ce3fb6ad83f861aac485e76e1985cd109d9a3713802152be56c3b1f0e0658ed" dependencies = [ - "crossbeam-channel", "crossbeam-deque", "crossbeam-utils", - "num_cpus", ] [[package]] @@ -2043,7 +2283,7 @@ version = "0.11.18" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cde824a14b7c14f85caff81225f411faacc04a2013f41670f41443742b1c1c55" dependencies = [ - "base64 0.21.0", + "base64 0.21.5", "bytes", "encoding_rs", "futures-core", @@ -2179,7 +2419,7 @@ version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d194b56d58803a43635bdc398cd17e383d6f71f9182b9a192c127ca42494a59b" dependencies = [ - "base64 0.21.0", + "base64 0.21.5", ] [[package]] @@ -2232,6 +2472,7 @@ dependencies = [ "futures", "http", "log", + "mzdata", "once_cell", "parquet 44.0.0", "quick-xml", @@ -2324,29 +2565,29 @@ checksum = "a3f0bf26fd526d2a95683cd0f87bf103b8539e2ca1ef48ce002d67aad59aa0b4" [[package]] name = "serde" -version = "1.0.147" +version = "1.0.193" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d193d69bae983fc11a79df82342761dfbf28a99fc8d203dca4c3c1b590948965" +checksum = "25dd9975e68d0cb5aa1120c288333fc98731bd1dd12f561e468ea4728c042b89" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.147" +version = "1.0.193" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4f1d362ca8fc9c3e3a7484440752472d68a6caa98f1ab81d99b5dfe517cec852" +checksum = "43576ca501357b9b071ac53cdc7da8ef0cbd9493d8df094cd821777ea6e894d3" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.39", ] [[package]] name = "serde_json" -version = "1.0.87" +version = "1.0.108" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6ce777b7b150d76b9cf60d28b55f5847135a003f7d7350c6be7a773508ce7d45" +checksum = "3d1c7e3eac408d115102c4c24ad393e0821bb3a5df4d506a80f85f7a742a526b" dependencies = [ "itoa 1.0.4", "ryu", @@ -2450,9 +2691,20 @@ checksum = "6bdef32e8150c2a081110b42772ffe7d7c9032b606bc226c8260fd97e0976601" [[package]] name = "syn" -version = "1.0.103" +version = "1.0.109" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "syn" +version = "2.0.39" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a864042229133ada95abf3b54fdc62ef5ccabe9515b64717bcb9a1919e59445d" +checksum = "23e78b90f2fcf45d3e842032ce32e3f2d1545ba6636271dcbf24fa306d87be7a" dependencies = [ "proc-macro2", "quote", @@ -2485,22 +2737,22 @@ dependencies = [ [[package]] name = "thiserror" -version = "1.0.39" +version = "1.0.50" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a5ab016db510546d856297882807df8da66a16fb8c4101cb8b30054b0d5b2d9c" +checksum = "f9a7210f5c9a7156bb50aa36aed4c95afb51df0df00713949448cf9e97d382d2" dependencies = [ "thiserror-impl", ] [[package]] name = "thiserror-impl" -version = "1.0.39" +version = "1.0.50" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5420d42e90af0c38c3290abcca25b9b3bdf379fc9f55c528f53a269d9c9a267e" +checksum = "266b2e40bc00e5a6c09c3584011e08b06f123c00362c92b975ba9843aaaa14b8" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.39", ] [[package]] @@ -2607,7 +2859,7 @@ checksum = "d266c00fde287f55d3f1c3e96c500c362a2b8c695076ec180f27918820bc6df8" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 1.0.109", ] [[package]] @@ -2705,7 +2957,7 @@ checksum = "4017f8f45139870ca7e672686113917c71c7a6e02d4924eda67186083c03081a" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 1.0.109", ] [[package]] @@ -2859,7 +3111,7 @@ dependencies = [ "once_cell", "proc-macro2", "quote", - "syn", + "syn 1.0.109", "wasm-bindgen-shared", ] @@ -2893,7 +3145,7 @@ checksum = "07bc0c051dc5f23e307b13285f9d75df86bfdf816c5721e573dec1f9b8aa193c" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 1.0.109", "wasm-bindgen-backend", "wasm-bindgen-shared", ] @@ -3169,6 +3421,7 @@ version = "0.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "80d0f4e272c85def139476380b12f9ac60926689dd2e01d4923222f40580869d" dependencies = [ + "serde", "winapi", ] diff --git a/crates/sage-cli/Cargo.toml b/crates/sage-cli/Cargo.toml index 842a7d1e..da5b71d8 100644 --- a/crates/sage-cli/Cargo.toml +++ b/crates/sage-cli/Cargo.toml @@ -14,6 +14,14 @@ license = "MIT" name = "sage" path = "src/main.rs" + +[features] + +default = [] + +mzmlb = ["sage-cloudpath/mzdata"] + + [dependencies] sage-core = { path = "../sage" } sage-cloudpath = { path = "../sage-cloudpath", features = ["parquet"] } diff --git a/crates/sage-cli/src/main.rs b/crates/sage-cli/src/main.rs index 16d414c8..87b9fd23 100644 --- a/crates/sage-cli/src/main.rs +++ b/crates/sage-cli/src/main.rs @@ -200,12 +200,22 @@ impl Runner { let path_lower = path.to_lowercase(); let res = if path_lower.ends_with(".mgf.gz") || path_lower.ends_with(".mgf") { - sage_cloudpath::util::read_mgf(path_lower, file_id) + sage_cloudpath::util::read_mgf(path, file_id) } else if bruker_extensions .iter() .any(|ext| path_lower.ends_with(ext)) { sage_cloudpath::util::read_tdf(path, file_id) + } else if path_lower.ends_with(".mzmlb") { + #[cfg(feature = "mzmlb")] + { + sage_cloudpath::util::read_mzmlb(path, file_id, sn) + } + #[cfg(not(feature = "mzmlb"))] + { + // Fall back to prior behavior + sage_cloudpath::util::read_mzml(path, file_id, sn) + } } else { sage_cloudpath::util::read_mzml(path, file_id, sn) }; diff --git a/crates/sage-cloudpath/Cargo.toml b/crates/sage-cloudpath/Cargo.toml index 70c53c1c..e0461a7f 100644 --- a/crates/sage-cloudpath/Cargo.toml +++ b/crates/sage-cloudpath/Cargo.toml @@ -10,6 +10,9 @@ license = "MIT" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html +[features] +default = [] + [dependencies] thiserror = "1.0" async-compression = { version = "0.3", features = ["tokio", "gzip", "zlib"] } @@ -33,3 +36,5 @@ serde_json = "1.0" sage-core = { path = "../sage" } parquet = { version = "44.0.0", optional = true, default-features = false, features = ["zstd"] } + +mzdata = {version = "0.11.0", default-features = false, features = ["mzmlb", "hdf5_static", "zlib"], optional = true} diff --git a/crates/sage-cloudpath/src/lib.rs b/crates/sage-cloudpath/src/lib.rs index 930be015..17c3bf05 100644 --- a/crates/sage-cloudpath/src/lib.rs +++ b/crates/sage-cloudpath/src/lib.rs @@ -8,6 +8,8 @@ use tokio::io::{AsyncBufRead, AsyncRead, AsyncWriteExt, BufReader}; pub mod mgf; pub mod mzml; +#[cfg(feature = "mzdata")] +pub mod mzmlb; pub mod tdf; pub mod util; diff --git a/crates/sage-cloudpath/src/mzmlb.rs b/crates/sage-cloudpath/src/mzmlb.rs new file mode 100644 index 00000000..26f26e0e --- /dev/null +++ b/crates/sage-cloudpath/src/mzmlb.rs @@ -0,0 +1,131 @@ +#![cfg(feature = "mzdata")] + +use std::io; +use std::path::Path; + +use mzdata::io::MzMLbReader as MzMLbReaderImpl; +use mzdata::prelude::*; +use mzdata::RawSpectrum as RawSpectrumImpl; +use sage_core::mass::Tolerance; +use sage_core::spectrum::{Precursor, RawSpectrum}; + +pub struct MzMLbReader { + ms_level: Option, + // If set to Some(level) and noise intensities are present in the MzML file, + // divide intensities at this MS-level by noise to calculate S/N + signal_to_noise: Option, + + file_id: usize, +} + +impl MzMLbReader { + /// Create a new [`MzMLbReader`] with a minimum MS level filter + /// + /// # Example + /// + /// A minimum level of 2 will not parse or return MS1 scans + pub fn with_file_id_and_level_filter(file_id: usize, ms_level: u8) -> Self { + Self { + ms_level: Some(ms_level), + file_id, + signal_to_noise: None, + } + } + + pub fn with_file_id(file_id: usize) -> Self { + Self { + ms_level: None, + signal_to_noise: None, + file_id, + } + } + + pub fn set_file_id(&mut self, file_id: usize) -> &mut Self { + self.file_id = file_id; + self + } + + pub fn set_signal_to_noise(&mut self, sn: Option) -> &mut Self { + self.signal_to_noise = sn; + self + } + + pub fn parse(&self, b: B) -> Result, io::Error> + where + B: AsRef, + { + let reader = MzMLbReaderImpl::new(&b)?; + + let spectra = reader + .into_iter() + .filter(|scan| { + if let Some(ms_level) = self.ms_level { + return scan.ms_level() == ms_level; + } else { + return true; + } + }) + .map(|scan| { + let scan: RawSpectrumImpl = scan.into(); + let mut precusors = Vec::new(); + match scan.precursor() { + Some(p) => { + let p = Precursor { + mz: p.mz() as f32, + intensity: Some(p.ion.intensity), + charge: p.ion.charge.and_then(|v| Some(v as u8)), + spectrum_ref: p.precursor_id.clone(), + isolation_window: Some(Tolerance::Da( + p.isolation_window.lower_bound as f32, + p.isolation_window.upper_bound as f32, + )), + }; + precusors.push(p) + } + None => {} + } + RawSpectrum { + file_id: self.file_id, + ms_level: scan.ms_level(), + id: scan.description.id.clone(), + precursors: precusors, + ion_injection_time: scan.acquisition().first_scan().unwrap().injection_time, + representation: match scan.description.signal_continuity { + mzdata::spectrum::SignalContinuity::Unknown => { + sage_core::spectrum::Representation::Profile + } + mzdata::spectrum::SignalContinuity::Centroid => { + sage_core::spectrum::Representation::Centroid + } + mzdata::spectrum::SignalContinuity::Profile => { + sage_core::spectrum::Representation::Profile + } + }, + scan_start_time: scan.start_time() as f32, + total_ion_current: scan.peaks().tic(), + mz: scan.mzs().iter().map(|mz| (*mz) as f32).collect(), + intensity: scan.intensities().to_vec(), + } + }) + .collect(); + Ok(spectra) + } +} + + +#[cfg(test)] +mod test { + use super::*; + + #[test] + fn test_read_mzmlb() -> io::Result<()> { + let spectra = MzMLbReader::with_file_id(0).parse("../../tests/LQSRPAAPPAPGPGQLTLR.mzMLb")?; + assert_eq!(spectra.len(), 1); + let s = spectra.first().unwrap(); + assert_eq!(s.id, "controllerType=0 controllerNumber=1 scan=30069"); + assert_eq!(s.mz.len(), 299); + assert_eq!(s.intensity.len(), 299); + Ok(()) + } + +} \ No newline at end of file diff --git a/crates/sage-cloudpath/src/util.rs b/crates/sage-cloudpath/src/util.rs index badb95b6..e6770818 100644 --- a/crates/sage-cloudpath/src/util.rs +++ b/crates/sage-cloudpath/src/util.rs @@ -24,6 +24,21 @@ pub fn read_tdf>(s: S, file_id: usize) -> Result, } } +#[cfg(feature = "mzdata")] +pub fn read_mzmlb>( + s: S, + file_id: usize, + signal_to_noise: Option, +) -> Result, Error> { + let res = crate::mzmlb::MzMLbReader::with_file_id(file_id) + .set_signal_to_noise(signal_to_noise) + .parse(s.as_ref()); + match res { + Ok(spectra) => Ok(spectra), + Err(e) => Err(Error::IO(e)), + } +} + pub fn read_mgf>(path: S, file_id: usize) -> Result, Error> { read_and_execute(path, |mut bf| async move { let mut contents = String::new(); diff --git a/tests/LQSRPAAPPAPGPGQLTLR.mzMLb b/tests/LQSRPAAPPAPGPGQLTLR.mzMLb new file mode 100644 index 00000000..814f8402 Binary files /dev/null and b/tests/LQSRPAAPPAPGPGQLTLR.mzMLb differ