libpappsomspp
Library for mass spectrometry
Loading...
Searching...
No Matches
spomsspectrum.cpp
Go to the documentation of this file.
1/**
2 * \file pappsomspp/processing/specpeptidoms/spomsspectrum.cpp
3 * \date 24/03/2025
4 * \author Aurélien Berthier
5 * \brief SpecPeptidOMS Spectrum
6 *
7 * C++ implementation of the SpecPeptidOMS algorithm described in :
8 * (1) Benoist, É.; Jean, G.; Rogniaux, H.; Fertin, G.; Tessier, D. SpecPeptidOMS Directly and
9 * Rapidly Aligns Mass Spectra on Whole Proteomes and Identifies Peptides That Are Not Necessarily
10 * Tryptic: Implications for Peptidomics. J. Proteome Res. 2025.
11 * https://doi.org/10.1021/acs.jproteome.4c00870.
12 */
13
14/*
15 * Copyright (c) 2025 Aurélien Berthier
16 * <aurelien.berthier@ls2n.fr>
17 *
18 * This program is free software: you can redistribute it and/or modify
19 * it under the terms of the GNU General Public License as published by
20 * the Free Software Foundation, either version 3 of the License, or
21 * (at your option) any later version.
22 *
23 * This program is distributed in the hope that it will be useful,
24 * but WITHOUT ANY WARRANTY; without even the implied warranty of
25 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
26 * GNU General Public License for more details.
27 *
28 * You should have received a copy of the GNU General Public License
29 * along with this program. If not, see <http://www.gnu.org/licenses/>.
30 */
31
32#include <algorithm>
33#include <unordered_set>
34#include "spomsspectrum.h"
39
40namespace pappso
41{
42namespace specpeptidoms
43{
44// SpOMSSpectrum::SpOMSSpectrum(const specglob::ExperimentalSpectrum &exp_spectrum)
46 pappso::PrecisionPtr precision_ptr,
47 const pappso::AaCode &aaCode)
48 : std::vector<pappso::specglob::ExperimentalSpectrumDataPoint>(
49 specglob::ExperimentalSpectrum(qmass_spectrum, precision_ptr)),
50 m_qualifiedMassSpectrum(qmass_spectrum),
51 m_precision_ptr(precision_ptr),
52 m_aaCode(aaCode),
53 m_precursor_mass_error(0)
54{
56 for(std::size_t iter = 0; iter < m_aaCode.getSize(); iter++)
57 {
58 m_aapositions.push_back(std::make_shared<std::vector<AaPosition>>());
59 m_aapositions.back()->reserve(this->size() - 1);
60 }
61 m_supported_peaks.reserve(this->size());
62 m_supported_peaks.push_back(std::make_shared<std::vector<uint8_t>>());
63 m_reindexed_peaks.push_back(0);
64 for(std::size_t iter = 1; iter < this->size(); iter++)
65 {
66 m_supported_peaks.push_back(std::make_shared<std::vector<uint8_t>>());
67 m_reindexed_peaks.push_back(-1);
68 }
69 this->at(0).peak_mz = pappso::MHPLUS + 2 * pappso::MPROTIUM + pappso::MASSOXYGEN;
72}
73
75 : std::vector<pappso::specglob::ExperimentalSpectrumDataPoint>(
76 pappso::specglob::ExperimentalSpectrum(other.m_qualifiedMassSpectrum, other.m_precision_ptr)),
77 m_qualifiedMassSpectrum(other.m_qualifiedMassSpectrum),
78 m_aapositions(other.m_aapositions),
79 m_precision_ptr(other.m_precision_ptr),
80 m_supported_peaks(other.m_supported_peaks),
81 m_reindexed_peaks(other.m_reindexed_peaks),
82 m_aaCode(other.m_aaCode),
83 m_complementary_peak_indexes(other.m_complementary_peak_indexes),
84 m_precursor_mass_error(other.m_precursor_mass_error)
85{
86}
87
89 double precursor_mass_error)
90 : std::vector<pappso::specglob::ExperimentalSpectrumDataPoint>(
91 pappso::specglob::ExperimentalSpectrum(
92 other.m_qualifiedMassSpectrum, other.m_precision_ptr, precursor_mass_error)),
93 m_qualifiedMassSpectrum(other.m_qualifiedMassSpectrum),
94 m_precision_ptr(other.m_precision_ptr),
95 m_aaCode(other.m_aaCode),
96 m_precursor_mass_error(precursor_mass_error)
97{
99 for(std::size_t iter = 0; iter < m_aaCode.getSize(); iter++)
100 {
101 m_aapositions.push_back(std::make_shared<std::vector<AaPosition>>());
102 m_aapositions.back()->reserve(this->size() - 1);
103 }
104 m_supported_peaks.reserve(this->size());
105 m_supported_peaks.push_back(std::make_shared<std::vector<uint8_t>>());
106 m_reindexed_peaks.push_back(0);
107 for(std::size_t iter = 1; iter < this->size(); iter++)
108 {
109 m_supported_peaks.push_back(std::make_shared<std::vector<uint8_t>>());
110 m_reindexed_peaks.push_back(-1);
111 }
112 this->at(0).peak_mz = pappso::MHPLUS + 2 * pappso::MPROTIUM + pappso::MASSOXYGEN;
113 this->back().peak_mz =
116}
117
121
122// Add comments !!
123void
125{
126 bool found;
127 uint8_t aa;
128 std::vector<double>::iterator iter1, iter2;
129 std::size_t peak1, peak2, next_l_peak;
130 std::vector<double> mass_list = getMassList();
131
132 peak1 = -1;
133 for(iter1 = mass_list.begin(); iter1 != mass_list.end(); iter1++)
134 {
135 peak1++;
136 peak2 = peak1;
137 for(iter2 = iter1 + 1; iter2 != mass_list.end(); iter2++)
138 {
139 peak2++;
140 aa = m_aaCode.getAaCodeByMass(*(iter2) - *(iter1), m_precision_ptr);
141 if(aa != 0)
142 {
143 next_l_peak = 0;
144 for(std::size_t iter = 1; iter < peak1;
145 iter++) // Search of the closer supported left peak.
146 // Possible optimization => search from the right
147 {
148 if(m_reindexed_peaks.at(iter) >= 0)
149 {
150 next_l_peak = iter;
151 }
152 }
153 if(m_reindexed_peaks.at(peak2) == -1)
154 {
155 addSupportedPeak(peak2);
156 m_supported_peaks.at(peak2)->push_back(aa);
157 }
158 if(m_reindexed_peaks.at(peak1) >= 0)
159 {
160 addAaPosition(aa, peak2, peak1, next_l_peak, true);
161 }
162 else
163 {
164 addAaPosition(aa, peak2, next_l_peak, next_l_peak, false);
165 }
166 }
167 }
168 }
169
170 removeUnsupportedMasses();
171 correctPeakIndexes();
172
173 // std::size_t i = 0;
174 // for(auto &data_point : *this)
175 // {
176 // data_point.indice = i;
177 // i++;
178 // }
179
180 fillComplementaryPeakIndexes();
181}
182
183// pappso::Aa const *
184// SpOMSSpectrum::findAAMass(double mass, bool *found) const
185// {
186// bool ok;
187// // auto charge = m_qualifiedMassSpectrum.getPrecursorCharge(&ok);
188
189// if(!ok)
190// {
191// throw pappso::PappsoException(
192// QObject::tr("precursor charge is not defined in spectrum %1")
193// .arg(m_qualifiedMassSpectrum.getMassSpectrumId().getNativeId()));
194// }
195// pappso::MzRange mz_range(mass / m_qualifiedMassSpectrum.getPrecursorCharge(),
196// m_precision_ptr);
197
198// for(std::unordered_map<const Aa, double>::const_iterator aa = aaMasses.begin();
199// aa != aaMasses.end();
200// aa++)
201// {
202// if(mz_range.contains(aa->second))
203// {
204// if(found != nullptr)
205// {
206// *found = true;
207// }
208// return &(aa->first);
209// }
210// }
211// if(found != nullptr)
212// {
213// *found = false;
214// }
215// return nullptr;
216// }
217
218// Not sure if optimal
219void
221{
222 std::vector<specglob::ExperimentalSpectrumDataPoint> kept_peaks;
223 for(std::vector<specglob::ExperimentalSpectrumDataPoint>::iterator iter = this->begin();
224 iter != this->end();
225 iter++)
226 {
227 if(m_reindexed_peaks.at(iter->indice) >= 0)
228 {
229 kept_peaks.push_back(*iter);
230 }
231 }
232 this->clear();
233 this->assign(kept_peaks.begin(), kept_peaks.end());
234}
235
236void
238 const std::size_t r_peak,
239 const std::size_t l_peak,
240 const std::size_t next_l_peak,
241 bool l_support)
242{
243 // aa=0 corresponds to no amino acid identified, thus aa is always >=1. We substract 1 to aa to
244 // avoid keeping an empty, useless vector.
245 if(l_support)
246 {
247 m_aapositions.at(aa - 1)->push_back(
248 {r_peak, l_peak, next_l_peak, computeCondition(l_peak, l_support), l_support});
249 }
250 else
251 {
252 m_aapositions.at(aa - 1)->push_back(
253 {r_peak, next_l_peak, next_l_peak, computeCondition(l_peak, l_support), l_support});
254 }
255}
256
257uint32_t
259 bool l_support) const
260{
261 uint32_t condition;
262 if(l_peak == 0)
263 {
264 condition = 2;
265 }
266 else if(!l_support)
267 {
268 condition = 1;
269 }
270 else
271 {
272 condition = 0;
273 for(std::vector<uint8_t>::iterator aa = m_supported_peaks.at(l_peak)->begin();
274 aa != m_supported_peaks.at(l_peak)->end();
275 aa++)
276 {
277 condition += 2 << *(aa);
278 }
279 }
280 return condition;
281}
282
283
284const std::vector<pappso::specpeptidoms::AaPosition> &
286{
287
288 return *m_aapositions.at(aa_code - 1);
289}
290
291std::vector<pappso::specpeptidoms::AaPosition>
293 std::uint8_t aa_code, std::vector<std::size_t> &peaks_to_remove) const
294{
295 std::vector<AaPosition> aa_positions;
296 for(auto aap : *m_aapositions.at(aa_code - 1))
297 {
298 if(std::find(peaks_to_remove.begin(), peaks_to_remove.end(), aap.r_peak) ==
299 peaks_to_remove.end())
300 {
301 aa_positions.push_back(aap);
302 }
303 }
304 return aa_positions;
305}
306
307std::vector<double>
309{
310 std::vector<double> mass_list;
311 for(const specglob::ExperimentalSpectrumDataPoint &n : *this)
312 {
313 mass_list.push_back(n.peak_mz);
314 }
315 return mass_list;
316}
317
320{
321 return this->at(indice).type;
322}
323
324uint
326{
327 return m_qualifiedMassSpectrum.getPrecursorCharge();
328}
329double
331{
332 return m_qualifiedMassSpectrum.getPrecursorMass();
333}
334
335
336double
337pappso::specpeptidoms::SpOMSSpectrum::getMZShift(std::size_t l_peak, std::size_t r_peak) const
338{
339 if(std::max(r_peak, l_peak) > size())
340 {
342 QObject::tr("getMZShift : l_peak %1 or r_peak %2 greater than size %3")
343 .arg(l_peak)
344 .arg(r_peak)
345 .arg(size()));
346 }
347 return this->at(r_peak).peak_mz - this->at(l_peak).peak_mz;
348}
349
350double
352{
353 if(peak > size())
354 {
356 QObject::tr("getMissingMass : peak %1 greater than size %2").arg(peak).arg(size()));
357 }
358 return this->m_qualifiedMassSpectrum.getPrecursorMass() - m_precursor_mass_error -
359 this->at(peak).peak_mz + MHPLUS;
360}
361
362void
364{
365 std::size_t counter = 0;
366 for(std::size_t iter = 0; iter < peak; iter++)
367 {
368 if(m_reindexed_peaks.at(iter) >= 0)
369 {
370 counter++;
371 }
372 }
373 m_reindexed_peaks.at(peak) = counter;
374 for(std::size_t iter = peak + 1; iter < m_reindexed_peaks.size(); iter++)
375 {
376 if(m_reindexed_peaks.at(iter) >= 0)
377 {
378 m_reindexed_peaks.at(iter)++;
379 }
380 }
381}
382
383void
385{
386 for(auto aa = m_aapositions.begin(); aa != m_aapositions.end(); aa++)
387 {
388 for(auto aap = aa->get()->begin(); aap != aa->get()->end(); aap++)
389 {
390 aap->l_peak = m_reindexed_peaks.at(aap->l_peak);
391 aap->r_peak = m_reindexed_peaks.at(aap->r_peak);
392 aap->next_l_peak = m_reindexed_peaks.at(aap->next_l_peak);
393 }
394 }
395}
396
397void
399{
400 std::size_t left_index, right_index;
401
402 m_complementary_peak_indexes.reserve(this->size());
403 while(m_complementary_peak_indexes.size() < this->size())
404 {
405 m_complementary_peak_indexes.push_back(0);
406 }
407 left_index = 0;
408 right_index = this->size() - 1;
409 double comp_mass = m_qualifiedMassSpectrum.getPrecursorMass() + 2 * MHPLUS;
410
411 while(left_index < right_index)
412 {
413 pappso::MzRange mz_range(comp_mass - this->at(left_index).peak_mz, m_precision_ptr);
414 if(mz_range.contains(this->at(right_index).peak_mz))
415 {
416 m_complementary_peak_indexes.at(left_index) = right_index;
417 m_complementary_peak_indexes.at(right_index) = left_index;
418 qDebug() << left_index << right_index;
419 }
420 if(comp_mass - this->at(left_index).peak_mz - this->at(right_index).peak_mz >= 0)
421 {
422 left_index++;
423 }
424 else
425 {
426 right_index--;
427 }
428 }
429}
430
431std::size_t
433{
434 return m_complementary_peak_indexes.at(peak);
435}
436} // namespace specpeptidoms
437} // namespace pappso
collection of integer code for each amino acid 0 => null 1 to 20 => amino acid sorted by there mass (...
Definition aacode.h:44
std::size_t getSize() const
Definition aacode.cpp:74
bool contains(pappso_double) const
Definition mzrange.cpp:120
Class representing a fully specified mass spectrum.
double getPrecursorMass(bool *ok_p=nullptr) const
get precursor mass given the charge stats and precursor mz
void preprocessSpectrum()
Preprocess the spectrum.
double getMZShift(std::size_t l_peak, std::size_t r_peak) const
Returns the mz difference between two peaks.
uint getPrecursorCharge() const
Returns the spectrum's precursor's charge.
SpOMSSpectrum(pappso::QualifiedMassSpectrum &qmass_spectrum, pappso::PrecisionPtr precision_ptr, const pappso::AaCode &aaCode)
double getMissingMass(std::size_t peak) const
Returns the missing mass between a peak and the precursor's mass (shift at the end).
std::vector< std::shared_ptr< std::vector< uint8_t > > > m_supported_peaks
uint32_t computeCondition(const std::size_t l_peak, bool l_support) const
Computes the "condition" integer, used to apply the three peaks rule.
void addAaPosition(uint8_t aa, const std::size_t r_peak, const std::size_t l_peak, const std::size_t next_l_peak, bool l_support)
Adds an amino acid position to the data structure.
void removeUnsupportedMasses()
Removes the unsupported peaks (without an amino acid to the left) from the spectrum.
pappso::QualifiedMassSpectrum m_qualifiedMassSpectrum
std::vector< std::shared_ptr< std::vector< AaPosition > > > m_aapositions
void correctPeakIndexes()
Reindexes the peaks after removal of the unsupported peaks.
void addSupportedPeak(std::size_t peak)
Add a peak to the supported peaks list.
void fillComplementaryPeakIndexes()
For each point of the spectrum, indicate the index of its complementary peak;.
std::size_t getComplementaryPeak(std::size_t peak) const
const std::vector< AaPosition > & getAaPositions(std::uint8_t aa_code) const
Returns the list of aa_positions for a given amino acid code.
specglob::ExperimentalSpectrumDataPointType peakType(std::size_t indice) const
Returns the type of one of the spectrum's peaks.
std::vector< double > getMassList() const
Returns the spectrum's list of masses.
ExperimentalSpectrumDataPointType
Definition types.h:78
tries to keep as much as possible monoisotopes, removing any possible C13 peaks and changes multichar...
Definition aa.cpp:39
const pappso_double MHPLUS(1.007276466879)
const pappso_double MPROTIUM(1.007825032241)
unsigned int uint
Definition types.h:68
const pappso_double MASSOXYGEN(15.99491461956)