libpappsomspp
Library for mass spectrometry
Loading...
Searching...
No Matches
semiglobalalignment.cpp
Go to the documentation of this file.
1/**
2 * \file pappsomspp/processing/specpeptidoms/semiglobalalignment.cpp
3 * \date 24/03/2025
4 * \author Aurélien Berthier
5 * \brief protein to spectrum alignment
6 *
7 * C++ implementation of the SpecPeptidOMS algorithm described in :
8 * (1) Benoist, É.; Jean, G.; Rogniaux, H.; Fertin, G.; Tessier, D. SpecPeptidOMS Directly and
9 * Rapidly Aligns Mass Spectra on Whole Proteomes and Identifies Peptides That Are Not Necessarily
10 * Tryptic: Implications for Peptidomics. J. Proteome Res. 2025.
11 * https://doi.org/10.1021/acs.jproteome.4c00870.
12 */
13
14/*
15 * Copyright (c) 2025 Aurélien Berthier
16 * <aurelien.berthier@ls2n.fr>
17 *
18 *
19 * This program is free software: you can redistribute it and/or modify
20 * it under the terms of the GNU General Public License as published by
21 * the Free Software Foundation, either version 3 of the License, or
22 * (at your option) any later version.
23 *
24 * This program is distributed in the hope that it will be useful,
25 * but WITHOUT ANY WARRANTY; without even the implied warranty of
26 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
27 * GNU General Public License for more details.
28 *
29 * You should have received a copy of the GNU General Public License
30 * along with this program. If not, see <http://www.gnu.org/licenses/>.
31 */
32
33#include <QString>
34#include <QStringRef>
35#include <algorithm>
36#include "semiglobalalignment.h"
40#include "correctiontree.h"
43
44void
46{
47 peaks.clear();
49 score = 0;
50 begin_shift = 0.0;
51 end_shift = 0.0;
52 shifts.clear();
53 SPC = 0;
54 beginning = 0;
55 end = 0;
56}
57
58
59QString
60pappso::specpeptidoms::Alignment::getPeptideString(const QString &protein_sequence) const
61{
62 return protein_sequence.mid(beginning, end - beginning);
63}
64
65double
67{
68 double sum_of_elems = std::accumulate(shifts.begin(), shifts.end(), 0);
69 return begin_shift + sum_of_elems + end_shift;
70}
71
72
73std::size_t
75{
76 return beginning;
77}
78
79
81 const ScoreValues &score_values,
82 const pappso::PrecisionPtr precision_ptr,
83 const pappso::AaCode &aaCode)
84 : m_scorevalues(score_values), m_aaCode(aaCode)
85{
86 m_precision_ptr = precision_ptr;
87
88 KeyCell key_cell_init_first;
89 key_cell_init_first.n_row = 0;
90 key_cell_init_first.score = 0;
91 key_cell_init_first.beginning = 0;
92 key_cell_init_first.tree_id = 0;
93 m_interest_cells.push_back(key_cell_init_first);
94}
95
96const std::vector<pappso::specpeptidoms::KeyCell> &
98{
99 return m_interest_cells;
100}
101
102void
104 const SpOMSProtein *protein_ptr)
105{
106 // m_scenario.clear();
107 // TODO don't forget to reset any important variable
108 // m_best_alignment.reset();
109 // m_best_corrected_alignment.reset();
110 // m_best_post_processed_alignment.reset();
111
112 std::size_t sequence_length = protein_ptr->size();
113
114 KeyCell key_cell_init;
115 key_cell_init.n_row = 0;
116 key_cell_init.score = m_scorevalues.get(ScoreType::init);
117 key_cell_init.beginning = 0;
118 key_cell_init.tree_id = 0;
119
120 m_interest_cells.resize(spectrum.size());
121 std::fill(m_interest_cells.begin(), m_interest_cells.end(), key_cell_init);
122
123 m_interest_cells.at(0).score = 0;
124
125 // m_location_saver.resetLocationSaver();
126 m_updated_cells.clear();
127 for(std::size_t row_number = 1; row_number <= sequence_length; row_number++)
128 {
129 updateAlignmentMatrix(*protein_ptr,
130 row_number,
131 spectrum.getAaPositions(protein_ptr->at(row_number - 1).code),
132 spectrum,
133 true,
134 protein_ptr);
135 }
136}
137
138void
140 const SpOMSProtein *protein_ptr,
141 const std::size_t beginning,
142 const std::size_t length)
143{
144 try
145 {
146 qDebug();
147 const QString &protein_seq = protein_ptr->getSequence();
148 std::size_t length2;
149 if((qsizetype)(beginning + length) <= protein_seq.size())
150 {
151 length2 = length;
152 }
153 else
154 {
155 length2 = protein_seq.size() - beginning;
156 }
157
158 qDebug();
159 QString sequence_str = protein_seq.sliced(protein_seq.size() - beginning - length2, length2);
160
161 SpOMSProtein sequence("sub_sequence", sequence_str, m_aaCode);
162
163 // std::reverse(sequence.begin(), sequence.end());
164 std::vector<AaPosition> aa_positions;
165 CorrectionTree correction_tree;
166
167 qDebug();
168 m_scenario.reserve(length2 + 1, spectrum.size());
169 m_interest_cells.reserve(spectrum.size());
170 m_interest_cells.at(0).n_row = 0;
171 m_interest_cells.at(0).score = 0;
172 m_interest_cells.at(0).beginning = 0;
173 m_interest_cells.at(0).tree_id = 0;
174 for(std::size_t i = 1; i < m_interest_cells.size(); i++)
175 {
176 m_interest_cells.at(i).n_row = 0;
177 m_interest_cells.at(i).score = m_scorevalues.get(ScoreType::init);
178 m_interest_cells.at(i).beginning = 0;
179 m_interest_cells.at(i).tree_id = 0;
180 }
181 qDebug();
182 for(std::size_t iter = m_interest_cells.size(); iter < spectrum.size(); iter++)
183 {
184 m_interest_cells.push_back({0, m_scorevalues.get(ScoreType::init), 0, 0});
185 }
186 qDebug();
187 m_scenario.resetScenario();
188 qDebug();
189 for(std::size_t row_number = 1; row_number <= length2; row_number++)
190 {
191
192 qDebug() << "row_number - 1=" << row_number - 1 << " sequence.size()=" << sequence.size();
193 // aa = Aa(sequence[row_number - 1].unicode());
194 updateAlignmentMatrix(sequence,
195 row_number,
196 spectrum.getAaPositions(sequence[row_number - 1].code),
197 spectrum,
198 false,
199 protein_ptr);
200 }
201 qDebug();
202 saveBestAlignment(sequence, spectrum, protein_seq.size() - beginning);
203
204 qDebug() << m_scenario.getBestScore() << " " << MIN_ALIGNMENT_SCORE;
205 // Correction : if complementary peaks are used, corrected spectra without one of the two
206 // peaks are generated and aligned. The best alignment is kept.
207 if(m_scenario.getBestScore() >
208 MIN_ALIGNMENT_SCORE) // We only correct alignments with acceptable scores
209 {
210 // we only correct alignment if the sequence has a minimum amino acid diversity
211 if(checkSequenceDiversity(sequence.getSequence(), 5, 2))
212 {
213
214 qDebug();
215 m_best_corrected_alignment.score = 0;
216 for(std::size_t iter : m_best_alignment.peaks)
217 {
218 if(iter > spectrum.getComplementaryPeak(iter))
219 {
220 break;
221 }
222 else if(std::find(m_best_alignment.peaks.begin(),
223 m_best_alignment.peaks.end(),
224 spectrum.getComplementaryPeak(iter)) !=
225 m_best_alignment.peaks.end())
226 {
227 correction_tree.addPeaks(iter, spectrum.getComplementaryPeak(iter));
228 }
229 }
230 qDebug();
231 std::vector<std::vector<std::size_t>> corrections = correction_tree.getPeaks();
232 if(corrections.size() > 0)
233 {
234 m_best_alignment.score =
235 0; // Reset the best alignment score (we dont want to keep
236 // the original alignment if corrections are needed)
237 qDebug();
238 for(auto peaks_to_remove : corrections)
239 {
240 qDebug();
241 correctAlign(sequence,
242 protein_ptr,
243 spectrum,
244 peaks_to_remove,
245 protein_seq.size() - beginning);
246 qDebug();
247 }
248 qDebug();
249 m_best_alignment = m_best_corrected_alignment;
250 }
251 }
252 }
253 else
254 {
255 // this sequence has too much redundancy
256 // we have to lower the score
257 m_best_alignment.score = 0;
258 }
259 qDebug();
260 }
261 catch(const pappso::PappsoException &err)
262 {
264 QObject::tr("SemiGlobalAlignment::preciseAlign failed :\n%1").arg(err.qwhat()));
265 }
266}
267
268void
270 const SpOMSProtein *protein_ptr,
271 const SpOMSSpectrum &spectrum,
272 std::vector<std::size_t> &peaks_to_remove,
273 std::size_t offset)
274{
275 std::vector<AaPosition> aa_positions;
276 CorrectionTree correction_tree;
277 std::vector<std::size_t> final_peaks_to_remove;
278
279 KeyCell key_cell_init;
280 key_cell_init.beginning = 0;
281 key_cell_init.n_row = 0;
282 key_cell_init.score = m_scorevalues.get(ScoreType::init);
283 key_cell_init.tree_id = 0;
284
285 std::fill(m_interest_cells.begin(), m_interest_cells.end(), key_cell_init);
286
287 m_interest_cells.at(0).score = 0;
288
289 m_scenario.resetScenario();
290 qDebug();
291 for(qsizetype row_number = 1; row_number <= sequence.size(); row_number++)
292 {
293 qDebug() << row_number - 1 << " " << sequence.size();
294 qDebug() << "sequence[row_number - 1].aa" << (char)sequence[row_number - 1].aa;
295 qDebug();
296 aa_positions = spectrum.getAaPositions(sequence[row_number - 1].code, peaks_to_remove);
297 qDebug();
298 updateAlignmentMatrix(sequence, row_number, aa_positions, spectrum, false, protein_ptr);
299 qDebug();
300 }
301
302 qDebug();
303 // Correction : if complementary peaks are used, corrected spectra without one of the two peaks
304 // are generated and aligned. The best alignment is kept.
305 qDebug() << m_scenario.getBestScore();
306 if(m_scenario.getBestScore() >
307 MIN_ALIGNMENT_SCORE) // We only correct alignments with acceptable scores
308 {
309 qDebug();
310 qDebug() << sequence.getSequence();
311 qDebug() << offset;
312 qDebug() << spectrum.getPrecursorCharge();
313 saveBestAlignment(sequence, spectrum, offset);
314 qDebug();
315 for(std::size_t iter : m_best_alignment.peaks)
316 {
317 qDebug() << "iter:" << iter << "comp:" << spectrum.getComplementaryPeak(iter);
318 if(iter == spectrum.getComplementaryPeak(iter))
319 {
320 continue;
321 }
322 else if(iter > spectrum.getComplementaryPeak(iter))
323 {
324 break;
325 }
326 else if(std::find(m_best_alignment.peaks.begin(),
327 m_best_alignment.peaks.end(),
328 spectrum.getComplementaryPeak(iter)) != m_best_alignment.peaks.end())
329 {
330 correction_tree.addPeaks(iter, spectrum.getComplementaryPeak(iter));
331 }
332 }
333 std::vector<std::vector<std::size_t>> corrections = correction_tree.getPeaks();
334 if(corrections.size() > 0)
335 {
336 for(auto new_peaks_to_remove : corrections)
337 {
338 final_peaks_to_remove = std::vector<std::size_t>(new_peaks_to_remove);
339 final_peaks_to_remove.insert(
340 final_peaks_to_remove.end(), peaks_to_remove.begin(), peaks_to_remove.end());
341 correctAlign(sequence, protein_ptr, spectrum, final_peaks_to_remove, offset);
342 }
343 }
344 else if(m_scenario.getBestScore() > m_best_corrected_alignment.score)
345 {
346 m_best_corrected_alignment = m_best_alignment;
347 }
348 }
349 qDebug();
350}
351
352void
354 const SpOMSProtein *protein_ptr,
355 std::size_t beginning,
356 std::size_t length,
357 const std::vector<double> &shifts)
358{
359 std::size_t current_SPC = m_best_alignment.SPC;
360 int current_best_score = m_best_alignment.score;
361 m_best_post_processed_alignment.score = 0;
362 for(double precursor_mass_error : shifts)
363 {
364 SpOMSSpectrum corrected_spectrum(spectrum, precursor_mass_error);
365 preciseAlign(corrected_spectrum, protein_ptr, beginning, length);
366 if(m_best_alignment.score >= m_best_post_processed_alignment.score)
367 {
368 m_best_post_processed_alignment = m_best_alignment;
369 }
370 }
371 if(m_best_post_processed_alignment.SPC > current_SPC &&
372 m_best_post_processed_alignment.score >= current_best_score)
373 {
374 m_best_alignment = m_best_post_processed_alignment;
375 }
376}
377
378void
381 const std::size_t row_number,
382 const std::vector<AaPosition> &aa_positions,
383 const SpOMSSpectrum &spectrum,
384 const bool fast_align,
385 const pappso::specpeptidoms::SpOMSProtein *protein_ptr)
386{
387 int where = 0;
388 try
389 {
390 int score_found, score_shift, best_score, alt_score, tree_id;
391 uint32_t condition; // FIXME : may be used uninitialised
392 std::size_t best_column, shift, beginning, missing_aas, length, perfect_shift_origin;
393 KeyCell *current_cell_ptr, *tested_cell_ptr;
394 AlignType alignment_type, temp_align_type;
395
396 double smallest_aa_mass = m_aaCode.getMass((std::uint8_t)1);
397
398 m_updated_cells.reserve(aa_positions.size());
399 where = 1;
400 // Computation of the threePeaks condition, see spomsspectrum.h for more details.
401 if(fast_align)
402 {
403 condition = 3;
404 if(row_number > 1)
405 {
406 qDebug() << (char)sequence.at(row_number - 2).aa;
407 qDebug() << "condition" << condition;
408 condition += 2 << sequence.at(row_number - 2).code;
409 qDebug();
410 qDebug() << "condition" << condition;
411 }
412 }
413 where = 2;
414 for(std::vector<AaPosition>::const_iterator aa_position = aa_positions.begin();
415 aa_position != aa_positions.end();
416 aa_position++)
417 {
418
419 where = 3;
420 if(((condition & aa_position->condition) != 0) ||
421 !fast_align) // Verification of the threePeaks condition (only during first alignment).
422 {
423 current_cell_ptr = &m_interest_cells.at(aa_position->r_peak);
424 if(spectrum.peakType(aa_position->r_peak) ==
426 {
427 score_found = m_scorevalues.get(ScoreType::foundDouble);
428 score_shift = m_scorevalues.get(ScoreType::foundShiftDouble);
429 }
430 else
431 {
432 score_found = m_scorevalues.get(ScoreType::found);
433 score_shift = m_scorevalues.get(ScoreType::foundShift);
434 }
435
436 // not found case (always computed)
437 best_column = aa_position->r_peak;
438 best_score = current_cell_ptr->score + (row_number - current_cell_ptr->n_row) *
439 m_scorevalues.get(ScoreType::notFound);
440 beginning = current_cell_ptr->beginning;
441 tree_id = current_cell_ptr->tree_id;
442 alignment_type = AlignType::notFound;
443
444 // found case (Can only happen if the left peak is supported)
445 if(aa_position->l_support)
446 {
447 tested_cell_ptr = &m_interest_cells.at(aa_position->l_peak);
448 if(aa_position->l_peak == 0)
449 {
450 alt_score = tested_cell_ptr->score + score_found;
451 }
452 else
453 {
454 if(tested_cell_ptr->n_row == row_number - 1)
455 {
456 alt_score = tested_cell_ptr->score +
457 (row_number - tested_cell_ptr->n_row - 1) *
458 m_scorevalues.get(ScoreType::notFound) +
459 score_found;
460 }
461 else
462 {
463 alt_score = tested_cell_ptr->score +
464 (row_number - tested_cell_ptr->n_row - 1) *
465 m_scorevalues.get(ScoreType::notFound) +
466 score_shift;
467 }
468 }
469 if(alt_score >= best_score)
470 {
471 alignment_type = AlignType::found;
472 best_score = alt_score;
473 best_column = aa_position->l_peak;
474 if(best_column == 0)
475 {
476 if(row_number < ALIGNMENT_SURPLUS)
477 {
478 beginning = 0;
479 }
480 else
481 {
482 beginning = std::max((std::size_t)(row_number - ALIGNMENT_SURPLUS),
483 (std::size_t)0);
484 }
485 if(fast_align)
486 {
487 tree_id = m_location_saver.getNextTree();
488 }
489 }
490 else
491 {
492 beginning = tested_cell_ptr->beginning;
493 tree_id = tested_cell_ptr->tree_id;
494 }
495 }
496 }
497
498 where = 4;
499 // generic shift case (all shifts are tested)
500 shift = 0;
501 while(shift < aa_position->next_l_peak)
502 {
503 tested_cell_ptr = &m_interest_cells.at(aa_position->next_l_peak - shift);
504 // verification saut parfait
505 if(perfectShiftPossible(sequence,
506 spectrum,
507 tested_cell_ptr->n_row,
508 row_number,
509 aa_position->next_l_peak - shift,
510 aa_position->r_peak))
511 {
512 alt_score = tested_cell_ptr->score +
513 (row_number - tested_cell_ptr->n_row - 1) *
514 m_scorevalues.get(ScoreType::notFound) +
515 score_found;
516 temp_align_type = AlignType::perfectShift;
517 }
518 else
519 {
520 alt_score = tested_cell_ptr->score +
521 (row_number - tested_cell_ptr->n_row - 1) *
522 m_scorevalues.get(ScoreType::notFound) +
523 score_shift;
524 temp_align_type = AlignType::shift;
525 }
526 if(alt_score > best_score)
527 {
528 alignment_type = temp_align_type;
529 best_score = alt_score;
530 best_column = aa_position->next_l_peak - shift;
531 beginning = tested_cell_ptr->beginning;
532 tree_id = tested_cell_ptr->tree_id;
533 }
534 shift++;
535 }
536
537 where = 5;
538 // case shift from column 0 (no penalties if all precedent amino acids are missed)
539 tested_cell_ptr = &m_interest_cells.at(0);
540 // verification saut parfait
541 perfect_shift_origin =
542 perfectShiftPossibleFrom0(sequence, spectrum, row_number, aa_position->r_peak);
543 if(perfect_shift_origin != row_number)
544 {
545 alt_score = tested_cell_ptr->score + score_found;
546 temp_align_type = AlignType::perfectShift;
547 }
548 else
549 {
550 alt_score = tested_cell_ptr->score + score_shift;
551 temp_align_type = AlignType::shift;
552 }
553
554 where = 6;
555 if(alt_score > best_score)
556 {
557 alignment_type = temp_align_type;
558 best_score = alt_score;
559 best_column = 0;
560 missing_aas =
561 std::floor(spectrum.getMZShift(0, aa_position->l_peak) / smallest_aa_mass);
562 if(row_number < ALIGNMENT_SURPLUS + missing_aas)
563 {
564 beginning = 0;
565 }
566 else
567 {
568 beginning =
569 std::max((std::size_t)(row_number - missing_aas - ALIGNMENT_SURPLUS),
570 (std::size_t)0);
571 }
572 where = 7;
573 if(fast_align)
574 {
575 tree_id = m_location_saver.getNextTree();
576 }
577 }
578
579 where = 8;
580 if(best_column != aa_position->r_peak)
581 {
582 m_updated_cells.push_back(
583 {aa_position->r_peak, {row_number, best_score, beginning, tree_id}});
584 }
585
586 where = 9;
587 if(best_score > m_location_saver.getMinScore(tree_id) && fast_align)
588 {
589 length =
590 row_number - beginning + 1 +
591 std::ceil(spectrum.getMissingMass(aa_position->r_peak) / smallest_aa_mass) +
593 where = 10;
594 m_location_saver.addLocation(beginning, length, tree_id, best_score, protein_ptr);
595 }
596 else if(!fast_align)
597 {
598
599 where = 11;
600 if(alignment_type == AlignType::perfectShift && best_column == 0)
601 {
602 m_scenario.saveOrigin(row_number,
603 aa_position->r_peak,
604 perfect_shift_origin,
605 0,
606 best_score,
608 }
609 else
610 {
611 m_scenario.saveOrigin(row_number,
612 aa_position->r_peak,
613 m_interest_cells.at(best_column).n_row,
614 best_column,
615 best_score,
616 alignment_type);
617 }
618 }
619 }
620 }
621
622 where = 30;
623 // Update row number in column 0
624 m_updated_cells.push_back({0, {row_number, 0, 0, 0}});
625
626 // Save updated key cells in the matrix
627 while(m_updated_cells.size() > 0)
628 {
629 qDebug() << m_interest_cells.size() << " " << m_updated_cells.back().first;
630 m_interest_cells.at(m_updated_cells.back().first) = m_updated_cells.back().second;
631 m_updated_cells.pop_back();
632 }
633 where++;
634 }
635 catch(const std::exception &stderr)
636 {
638 QObject::tr("updateAlignmentMatrix failed std::exception :\n%1 %2")
639 .arg(where)
640 .arg(stderr.what()));
641 }
642 catch(const pappso::PappsoException &err)
643 {
645 QObject::tr("updateAlignmentMatrix failed :\n%1").arg(err.qwhat()));
646 }
647}
648
649bool
652 const SpOMSSpectrum &spectrum,
653 const std::size_t origin_row,
654 const std::size_t current_row,
655 const std::size_t l_peak,
656 const std::size_t r_peak) const
657{
658 try
659 {
660 double missing_mass = 0;
661 auto it_end = sequence.begin() + current_row;
662 for(auto iter = sequence.begin() + origin_row; (iter != it_end) && (iter != sequence.end());
663 iter++)
664 {
665 missing_mass += iter->mass; // Aa(iter->unicode()).getMass();
666 }
667 if(missing_mass > 0)
668 {
669 pappso::MzRange mz_range(missing_mass, m_precision_ptr);
670 return mz_range.contains(spectrum.getMZShift(l_peak, r_peak));
671 }
672 else
673 {
674 return false;
675 }
676 }
677 catch(const std::exception &stderr)
678 {
680 QObject::tr("perfectShiftPossible failed std exception:\n%1").arg(stderr.what()));
681 }
682 catch(const pappso::PappsoException &err)
683 {
685 QObject::tr("perfectShiftPossible failed :\n%1").arg(err.qwhat()));
686 }
687}
688
689std::size_t
692 const SpOMSSpectrum &spectrum,
693 const std::size_t current_row,
694 const std::size_t r_peak) const
695{
696 std::size_t perfect_shift_origin = current_row;
697 double missing_mass = spectrum.getMZShift(0, r_peak);
698 pappso::MzRange mz_range(missing_mass, m_precision_ptr);
699 double aa_mass = 0;
700 while(aa_mass < missing_mass && perfect_shift_origin > 0 && !mz_range.contains(aa_mass))
701 {
702 aa_mass += sequence.at(perfect_shift_origin - 1)
703 .mass; // Aa(sequence.at(perfect_shift_origin - 1).unicode()).getMass();
704 perfect_shift_origin--;
705 }
706 if(mz_range.contains(aa_mass))
707 {
708 return perfect_shift_origin;
709 }
710 else
711 {
712 return current_row;
713 }
714}
715
716std::size_t
719 const SpOMSSpectrum &spectrum,
720 std::size_t end_row,
721 std::size_t end_peak) const
722{
723 try
724 {
725 std::size_t perfect_shift_end = end_row + 1;
726 double missing_mass = spectrum.getMissingMass(end_peak);
727 pappso::MzRange mz_range(missing_mass, m_precision_ptr);
728 double aa_mass = 0;
729 while(aa_mass < missing_mass && perfect_shift_end < (std::size_t)sequence.size() &&
730 !mz_range.contains(aa_mass))
731 {
732 aa_mass += sequence.at(perfect_shift_end - 1)
733 .mass; // Aa(sequence.at(perfect_shift_end - 1).unicode()).getMass();
734 perfect_shift_end++;
735 }
736 if(mz_range.contains(aa_mass))
737 {
738 return perfect_shift_end - 1;
739 }
740 else
741 {
742 return end_row;
743 }
744 }
745 catch(const pappso::PappsoException &err)
746 {
748 QObject::tr("perfectShiftPossibleEnd failed :\n%1").arg(err.qwhat()));
749 }
750}
751
755
758{
759 return m_location_saver;
760}
761
764{
765 return m_scenario;
766}
767
768void
771 const SpOMSSpectrum &spectrum,
772 std::size_t offset)
773{
774 qDebug();
775 m_best_alignment.m_peptideModel.reset();
776 m_best_alignment.peaks.clear();
777 m_best_alignment.shifts.clear();
778 std::size_t previous_row; // FIXME : may be used uninitialised
779 std::size_t previous_column = 0;
780 std::size_t perfect_shift_end;
781 std::pair<std::vector<ScenarioCell>, int> best_alignment = m_scenario.getBestAlignment();
782 m_best_alignment.score = best_alignment.second;
783 std::vector<SpOMSAa> skipped_aa;
784 double skipped_mass;
785 // Retrieving beginning and end
786 if(best_alignment.first.front().previous_row > offset)
787 {
789 QString("best_alignment.first.front().previous_row > offset %1 %2")
790 .arg(offset)
791 .arg(best_alignment.first.front().previous_row));
792 }
793 if(best_alignment.first.back().previous_row > offset)
794 {
796 QString("best_alignment.first.back().previous_row > offset %1 %2")
797 .arg(offset)
798 .arg(best_alignment.first.back().previous_row));
799 }
800 m_best_alignment.beginning = offset - best_alignment.first.front().previous_row;
801 m_best_alignment.end = offset - best_alignment.first.back().previous_row;
802
803 qDebug();
804 AminoAcidModel aa_model;
805 aa_model.m_massDifference = 0;
806 // Filling temp_interpretation and peaks vectors
807 for(auto cell : best_alignment.first)
808 {
809 switch(cell.alignment_type)
810 {
811 case AlignType::found:
812 aa_model.m_aminoAcid = sequence.at(previous_row - 1).aa;
813 aa_model.m_massDifference = 0;
814 aa_model.m_skipped = false;
815 m_best_alignment.m_peptideModel.push_back(aa_model);
816 if(previous_row > cell.previous_row + 1)
817 {
818 skipped_mass = sequence.at(previous_row - 1)
819 .mass; // Aa(sequence.at(previous_row - 1).unicode()).getMass();
820 skipped_aa =
821 sequence.sliced(cell.previous_row, previous_row - cell.previous_row - 1);
822 aa_model.m_massDifference = 0;
823 aa_model.m_skipped = true;
824 for(auto aa : skipped_aa)
825 {
826 aa_model.m_aminoAcid = aa.aa;
827 m_best_alignment.m_peptideModel.push_back(aa_model);
828 skipped_mass += aa.mass; // Aa(aa.unicode()).getMass();
829 }
830 m_best_alignment.m_peptideModel.back().m_massDifference =
831 spectrum.getMZShift(cell.previous_column, previous_column) - skipped_mass;
832 }
833 m_best_alignment.peaks.push_back(cell.previous_column);
834 break;
836 aa_model.m_aminoAcid = sequence.at(previous_row - 1).aa;
837 aa_model.m_massDifference = 0;
838 aa_model.m_skipped = true;
839 m_best_alignment.m_peptideModel.push_back(aa_model);
840 break;
841 case AlignType::shift:
842
843 aa_model.m_aminoAcid = sequence.at(previous_row - 1).aa;
844 aa_model.m_massDifference = spectrum.getMZShift(cell.previous_column, previous_column) -
845 aa_model.m_aminoAcid.getMass();
846 aa_model.m_skipped = false;
847 m_best_alignment.m_peptideModel.push_back(aa_model);
848 m_best_alignment.peaks.push_back(cell.previous_column);
849 m_best_alignment.shifts.push_back(
850 spectrum.getMZShift(cell.previous_column, previous_column) -
851 sequence.at(previous_row - 1).mass);
852 break;
854 m_best_alignment.peaks.push_back(cell.previous_column);
855 skipped_aa = sequence.sliced(cell.previous_row, previous_row - cell.previous_row);
856 std::reverse(skipped_aa.begin(), skipped_aa.end());
857 aa_model.m_massDifference = 0;
858 aa_model.m_skipped = false;
859 for(auto aa : skipped_aa)
860 {
861 aa_model.m_aminoAcid = aa.aa;
862 m_best_alignment.m_peptideModel.push_back(aa_model);
863 }
864 break;
865 case AlignType::init:
866 previous_row = cell.previous_row;
867 previous_column = cell.previous_column;
868 m_best_alignment.peaks.push_back(cell.previous_column);
869 break;
870 }
871 previous_row = cell.previous_row;
872 previous_column = cell.previous_column;
873 }
874 std::reverse(m_best_alignment.peaks.begin(), m_best_alignment.peaks.end());
875 std::reverse(m_best_alignment.m_peptideModel.begin(), m_best_alignment.m_peptideModel.end());
876
877 qDebug();
878 // Compute begin_shift and end_shift
879 MzRange zero(0, m_precision_ptr);
880 m_best_alignment.begin_shift = spectrum.getMZShift(0, m_best_alignment.peaks.front());
881 m_best_alignment.end_shift = spectrum.getMissingMass(m_best_alignment.peaks.back());
882 if(zero.contains(m_best_alignment.end_shift))
883 {
884 m_best_alignment.end_shift = 0;
885 }
886
887 qDebug();
888 // Computing SPC
889 m_best_alignment.SPC = 0;
890 for(auto peak : m_best_alignment.peaks)
891 {
892 switch(spectrum.at(peak).type)
893 {
895 qDebug() << peak << "native";
896 m_best_alignment.SPC += 1;
897 break;
899 qDebug() << peak << "both";
900 m_best_alignment.SPC += 2;
901 break;
903 qDebug() << peak << "synthetic";
904 break;
906 qDebug() << peak << "symmetric";
907 m_best_alignment.SPC += 1;
908 break;
909 }
910 }
911
912 qDebug();
913 // Final check of the end shift
914 if(m_best_alignment.end_shift > 0)
915 {
916 perfect_shift_end = perfectShiftPossibleEnd(sequence,
917 spectrum,
918 best_alignment.first.front().previous_row,
919 m_best_alignment.peaks.back());
920 if(perfect_shift_end != best_alignment.first.front().previous_row)
921 {
922 skipped_aa =
923 sequence.sliced(best_alignment.first.front().previous_row,
924 perfect_shift_end - best_alignment.first.front().previous_row);
925 aa_model.m_massDifference = 0;
926 aa_model.m_skipped = true;
927 for(auto aa = skipped_aa.begin(); aa != skipped_aa.end(); aa++)
928 {
929 aa_model.m_aminoAcid = aa->aa;
930 m_best_alignment.m_peptideModel.push_back(aa_model);
931 }
932 m_best_alignment.beginning = offset - perfect_shift_end;
933 m_best_alignment.end_shift = 0;
934 }
935 else
936 {
937 m_best_alignment.score += m_scorevalues.get(ScoreType::foundShift);
938 }
939 }
940
941 qDebug();
942 // Writing final interpretation
943 if(m_best_alignment.end_shift > 0)
944 {
945 m_best_alignment.m_peptideModel.setNterShift(m_best_alignment.end_shift);
946 }
947
948 std::reverse(m_best_alignment.m_peptideModel.begin(), m_best_alignment.m_peptideModel.end());
949 if(m_best_alignment.begin_shift > 0)
950 {
951 m_best_alignment.m_peptideModel.setCterShift(m_best_alignment.begin_shift);
952 }
953
954 m_best_alignment.m_peptideModel.setPrecursorMass(spectrum.getPrecursorMass());
955 qDebug();
956}
957
960{
961 return m_best_alignment;
962}
963
964std::vector<double>
966 const Alignment &alignment,
967 const QString &protein_seq)
968{
969 // qDebug() << protein_seq;
970 if(alignment.end > (std::size_t)protein_seq.size())
971 {
972 throw pappso::ExceptionOutOfRange(QString("alignment.end > protein_seq.size() %1 %2")
973 .arg(alignment.end)
974 .arg(protein_seq.size()));
975 }
976 std::vector<double> potential_mass_errors(alignment.shifts);
977 double shift = alignment.end_shift;
978 std::size_t index;
979 if(alignment.beginning > 0)
980 { // -1 on unsigned int makes it wrong
981 index = alignment.beginning - 1;
982 while(shift > 0 && index > 0)
983 {
984 potential_mass_errors.push_back(shift);
985 // qDebug() << " shift=" << shift << " index=" << index
986 // << " letter=" << protein_seq.at(index).toLatin1();
987 shift -= aa_code.getMass(
988 protein_seq.at(index).toLatin1()); // Aa(protein_seq.at(index).unicode()).getMass();
989 index--;
990 }
991 }
992
993 // qDebug() << "second";
994 shift = alignment.begin_shift;
995 index = alignment.end + 1;
996 while(shift > 0 && index < (std::size_t)protein_seq.size())
997 {
998 potential_mass_errors.push_back(shift);
999 qDebug() << " shift=" << shift << " index=" << index
1000 << " letter=" << protein_seq.at(index).toLatin1();
1001 shift -= aa_code.getMass(
1002 protein_seq.at(index).toLatin1()); // Aa(protein_seq.at(index).unicode()).getMass();
1003 index++;
1004 }
1005 // qDebug();
1006 return potential_mass_errors;
1007}
1008
1009bool
1011 std::size_t window,
1012 std::size_t minimum_aa_diversity)
1013{
1014 qDebug() << "sequence=" << sequence << " window=" << window
1015 << " minimum_aa_diversity=" << minimum_aa_diversity;
1016 if(sequence.size() < window)
1017 return false;
1018 auto it_begin = sequence.begin();
1019 auto it_end = sequence.begin() + window;
1020 QString window_copy(sequence.mid(0, window));
1021 while(it_end != sequence.end())
1022 {
1023 std::partial_sort_copy(it_begin, it_end, window_copy.begin(), window_copy.end());
1024
1025 qDebug() << window_copy;
1026 std::size_t uniqueCount =
1027 std::unique(window_copy.begin(), window_copy.end()) - window_copy.begin();
1028
1029 qDebug() << uniqueCount;
1030 if(uniqueCount < minimum_aa_diversity)
1031 return false;
1032 it_begin++;
1033 it_end++;
1034 }
1035 return true;
1036}
collection of integer code for each amino acid 0 => null 1 to 20 => amino acid sorted by there mass (...
Definition aacode.h:44
double getMass(uint8_t aa_code) const
get the mass of the amino acid given its integer code the amino acid can bear some modification (if a...
Definition aacode.cpp:223
pappso_double getMass() const override
Definition aa.cpp:90
bool contains(pappso_double) const
Definition mzrange.cpp:120
virtual const QString & qwhat() const
std::vector< std::vector< std::size_t > > getPeaks() const
void addPeaks(std::size_t peak1, std::size_t peak2)
const Alignment & getBestAlignment() const
Returns a const ref to m_best_alignment.
Scenario getScenario() const
Returns a copy of m_scenario.
std::size_t perfectShiftPossibleEnd(const pappso::specpeptidoms::SpOMSProtein &sequence, const SpOMSSpectrum &spectrum, std::size_t end_row, std::size_t end_peak) const
indicates if a perfect shift is possible between the provided positions
void updateAlignmentMatrix(const pappso::specpeptidoms::SpOMSProtein &sequence, const std::size_t row_number, const std::vector< AaPosition > &aa_positions, const SpOMSSpectrum &spectrum, const bool fast_align, const pappso::specpeptidoms::SpOMSProtein *protein_ptr)
updates the scores of the alignment matrix for a given amino acid as well as the location heap/scenar...
void postProcessingAlign(const SpOMSSpectrum &spectrum, const SpOMSProtein *protein_ptr, std::size_t beginning, std::size_t length, const std::vector< double > &shifts)
performs the post-processing : generates corrected spectra and align them
void preciseAlign(const SpOMSSpectrum &spectrum, const SpOMSProtein *protein_ptr, const std::size_t beginning, const std::size_t length)
performs the second alignment search between a protein subsequence and a spectrum.
void correctAlign(const SpOMSProtein &protein_subseq, const SpOMSProtein *protein_ptr, const SpOMSSpectrum &spectrum, std::vector< std::size_t > &peaks_to_remove, std::size_t offset)
Recursively performs the correction of the alignment.
const std::vector< KeyCell > & getInterestCells() const
convenient function for degub purpose
bool perfectShiftPossible(const pappso::specpeptidoms::SpOMSProtein &sequence, const SpOMSSpectrum &spectrum, const std::size_t origin_row, const std::size_t current_row, const std::size_t l_peak, const std::size_t r_peak) const
indicates if a perfect shift is possible between the provided positions
void fastAlign(const SpOMSSpectrum &spectrum, const SpOMSProtein *protein_ptr)
perform the first alignment search between a protein sequence and a spectrum. The member location hea...
static bool checkSequenceDiversity(const QString &sequence, std::size_t window, std::size_t minimum_aa_diversity)
check that the sequence has a minimum of amino acid checkSequenceDiversity
std::size_t perfectShiftPossibleFrom0(const pappso::specpeptidoms::SpOMSProtein &sequence, const SpOMSSpectrum &spectrum, const std::size_t current_row, const std::size_t r_peak) const
indicates if a perfect shift is possible from the spectrum beginning to the provided peak.
static std::vector< double > getPotentialMassErrors(const pappso::AaCode &aa_code, const Alignment &alignment, const QString &protein_seq)
Returns a list of the potential mass errors corresponding to the provided alignment in the provided p...
void saveBestAlignment(const SpOMSProtein &sequence, const SpOMSSpectrum &spectrum, std::size_t offset)
Stores the best alignment from m_scenario in m_best_alignment.
SemiGlobalAlignment(const ScoreValues &score_values, const pappso::PrecisionPtr precision_ptr, const AaCode &aaCode)
LocationSaver getLocationSaver() const
Returns a copy of m_location_saver.
const QString & getSequence() const
std::vector< SpOMSAa > sliced(std::size_t position, std::size_t length) const
double getMZShift(std::size_t l_peak, std::size_t r_peak) const
Returns the mz difference between two peaks.
uint getPrecursorCharge() const
Returns the spectrum's precursor's charge.
double getMissingMass(std::size_t peak) const
Returns the missing mass between a peak and the precursor's mass (shift at the end).
std::size_t getComplementaryPeak(std::size_t peak) const
const std::vector< AaPosition > & getAaPositions(std::uint8_t aa_code) const
Returns the list of aa_positions for a given amino acid code.
specglob::ExperimentalSpectrumDataPointType peakType(std::size_t indice) const
Returns the type of one of the spectrum's peaks.
@ synthetic
does not correspond to existing peak, for computational purpose
@ both
both, the ion and the complement exists in the original spectrum
@ symmetric
new peak : computed symmetric mass from a corresponding native peak
const uint ALIGNMENT_SURPLUS(5)
const int MIN_ALIGNMENT_SCORE(15)
void reset()
reinitialize to default score_values
QString getPeptideString(const QString &protein_sequence) const
convenient function to get peptide sequence from location
double getNonAlignedMass() const
convenient function to get the remaining non explained mass shift
std::vector< std::size_t > peaks
std::size_t getPositionStart() const
get position of start on the protein sequence