libpappsomspp
Library for mass spectrometry
Loading...
Searching...
No Matches
psmfeaturesscan.cpp
Go to the documentation of this file.
1/**
2 * \file pappsomspp/processing/cbor/psm/evalscan/psmfeaturesscan.cpp
3 * \date 15/07/2025
4 * \author Olivier Langella
5 * \brief compute features on scan's PSM
6 */
7
8/*******************************************************************************
9 * Copyright (c) 2025 Olivier Langella <Olivier.Langella@universite-paris-saclay.fr>.
10 *
11 * This file is part of PAPPSOms-tools.
12 *
13 * PAPPSOms-tools is free software: you can redistribute it and/or modify
14 * it under the terms of the GNU General Public License as published by
15 * the Free Software Foundation, either version 3 of the License, or
16 * (at your option) any later version.
17 *
18 * PAPPSOms-tools is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 * GNU General Public License for more details.
22 *
23 * You should have received a copy of the GNU General Public License
24 * along with PAPPSOms-tools. If not, see <http://www.gnu.org/licenses/>.
25 *
26 ******************************************************************************/
27
28#include "psmfeaturesscan.h"
29#include <QCborArray>
30#include <QCborMap>
31#include "../../../../peptide/peptideproformaparser.h"
32#include "../../../../psm/xtandem/xtandemhyperscore.h"
33
34
35namespace pappso
36{
37namespace cbor
38{
39namespace psm
40{
41
43 pappso::XtandemSpectrumProcess &tandem_spectrum_process,
44 std::list<Enums::PeptideIon> &ion_list,
45 pappso::PsmFeatures &psm_features,
46 pappso::PrecisionPtr fragment_tolerance)
47 : CborScanMapBase(psm_file_scan_process),
48 m_tandemSpectrumProcess(tandem_spectrum_process),
49 m_ionList(ion_list),
50 m_psmFeatures(psm_features)
51{
52 m_fragmentTolerance = fragment_tolerance;
53}
54
58
59double
60PsmFeaturesScan::checkInf(double input) const
61{
62 if(input < 0)
63 return 0;
64 return input;
65}
66
67void
69{
70
71 if(keys().contains("psm_list"))
72 {
74
75 pappso::MassSpectrum spectrum =
76 m_tandemSpectrumProcess.process(*qualified_mass_spectrum.get()->getMassSpectrumSPtr().get(),
77 qualified_mass_spectrum.get()->getPrecursorMz(),
78 qualified_mass_spectrum.get()->getPrecursorCharge());
79
80 // qWarning() << qualified_mass_spectrum.get()->getMassSpectrumId().getSpectrumIndex();
81 QCborArray new_psm_arr;
82 for(QCborValue cbor_psm : value("psm_list").toArray())
83 {
84 QCborMap cbor_psm_map = cbor_psm.toMap();
85 QCborMap cbor_psm_features;
86 pappso::PeptideSp peptide_sp =
87 pappso::PeptideProFormaParser::parseString(cbor_psm_map.value("proforma").toString());
88
89 if(peptide_sp.get()->size() < 2)
90 {
91 throw pappso::PappsoException(QObject::tr("peptide in psm too small %1")
92 .arg(cbor_psm_map.value("proforma").toString()));
93 }
94 // qWarning() << cbor_psm_map.value("proforma").toString() << "end " <<
95 // qualified_mass_spectrum.get()->getMassSpectrumId().getSpectrumIndex();
96
97 std::size_t peptide_size = peptide_sp.get()->size();
98 cbor_psm_features.insert(QString("peptide_size"), (unsigned int)peptide_size);
99
100 pappso::XtandemHyperscore hyperscore(spectrum,
101 peptide_sp,
102 qualified_mass_spectrum.get()->getPrecursorCharge(),
104 m_ionList,
105 true);
106 cbor_psm_features.insert(QString("hyperscore"), QCborValue(hyperscore.getHyperscore()));
107 // qWarning() << cbor_psm_map.value("proforma").toString() << "end2 " <<
108 // qualified_mass_spectrum.get()->getMassSpectrumId().getSpectrumIndex();
109
111 peptide_sp,
112 qualified_mass_spectrum.get()->getMassSpectrumSPtr().get(),
113 qualified_mass_spectrum.get()->getPrecursorCharge(),
114 2);
115 // TIC
116 cbor_psm_features.insert(QString("total_intensity"),
117 std::log(m_psmFeatures.getTotalIntensity()));
118 // MaxIntALL
119 cbor_psm_features.insert(QString("max_intensity"),
120 checkInf(std::log(qualified_mass_spectrum.get()
121 ->getMassSpectrumSPtr()
122 .get()
123 ->maxIntensityDataPoint()
124 .y)));
125
126 // qWarning() << cbor_psm_map.value("proforma").toString() << "end3 " <<
127 // qualified_mass_spectrum.get()->getMassSpectrumId().getSpectrumIndex();
128 // MaxYionInt
129 cbor_psm_features.insert(
130 QString("MaxYionInt"),
132
133 // MaxBionInt
134 cbor_psm_features.insert(
135 QString("MaxBionInt"),
137
138 // SumYmatchInt
139 cbor_psm_features.insert(
140 QString("SumYmatchInt"),
142
143 // SumBmatchInt
144 cbor_psm_features.insert(
145 QString("SumBmatchInt"),
147
148 // FracYmatchInt
149 cbor_psm_features.insert(
150 QString("FracYmatchInt"),
153 // FracBmatchInt
154 cbor_psm_features.insert(
155 QString("FracBmatchInt"),
158
159 // SeqCoverYion
160 cbor_psm_features.insert(
161 QString("SeqCoverYion"),
163 (double)peptide_size);
164 // SeqCoverBion
165 cbor_psm_features.insert(
166 QString("SeqCoverBion"),
168 (double)peptide_size);
169
170
171 // ConsecutiveYion
172 cbor_psm_features.insert(
173 QString("ConsecutiveYion"),
175 // ConsecutiveBion
176 cbor_psm_features.insert(
177 QString("ConsecutiveBion"),
179
180 // MassErrMean
181 cbor_psm_features.insert(QString("MassErrMean"), m_psmFeatures.getMatchedMzDiffMean());
182
183 // MassErrSD
184 cbor_psm_features.insert(QString("MassErrSD"), m_psmFeatures.getMatchedMzDiffSd());
185
186 // NumofAnnoPeaks
187 cbor_psm_features.insert(QString("NumofAnnoPeaks"),
188 (unsigned int)m_psmFeatures.getNumberOfMatchedIons());
189
190 // qWarning() << cbor_psm_map.value("proforma").toString() << "end2 " <<
191 // qualified_mass_spectrum.get()->getMassSpectrumId().getSpectrumIndex();
192 // NumofComplementPeaks
193 std::size_t num_of_pairs = m_psmFeatures.countMatchedIonComplementPairs();
194 cbor_psm_features.insert(QString("NumofComplementPeaks"), (unsigned int)num_of_pairs);
195 if(num_of_pairs > 0)
196 {
197 // SumComplementPeaksInt
198 cbor_psm_features.insert(
199 QString("SumComplementPeaksInt"),
201
202 // FracComplementPeaksInt
203 cbor_psm_features.insert(
204 QString("FracComplementPeaksInt"),
207 // SeqCoverComplementPeaks
208 cbor_psm_features.insert(
209 QString("SeqCoverComplementPeaks"),
211 (double)peptide_size);
212 }
214 cbor_psm_features.insert(QString("lrSize"), (unsigned int)lr.getSize());
215
216
217 double coeff_of_determination = lr.getCoefficientOfDetermination();
218 if(std::isnan(coeff_of_determination))
219 {
220 }
221 else
222 {
223 cbor_psm_features.insert(QString("lrCoeffDet"), coeff_of_determination);
224 }
225
226
227 QCborMap psm_eval = cbor_psm_map.value("eval").toMap();
228 psm_eval.remove(QString("features"));
229 psm_eval.insert(QString("features"), cbor_psm_features);
230 cbor_psm_map.remove(QString("eval"));
231 cbor_psm_map.insert(QString("eval"), psm_eval);
232
233 new_psm_arr.push_back(cbor_psm_map);
234 }
235
236 insert(QString("psm_list"), new_psm_arr);
237 // qWarning() << "end " <<
238 // qualified_mass_spectrum.get()->getMassSpectrumId().getSpectrumIndex();
239 }
240}
241
242} // namespace psm
243} // namespace cbor
244} // namespace pappso
std::size_t getSize() const
get data size
double getCoefficientOfDetermination() const
get Coefficient of determination (R2)
Class to represent a mass spectrum.
static PeptideSp parseString(const QString &pepstr)
double getMaxIntensityPeakIonMatch(Enums::PeptideIon ion_type) const
double getIntensityOfMatchedIon(Enums::PeptideIon ion_type)
get the sum of intensity of a specific ion
std::size_t getNumberOfMatchedIons() const
number of matched ions (peaks)
std::size_t getAaSequenceCoverage(Enums::PeptideIon ion_type)
number of amino acid covered by matched ions
double getTotalIntensity() const
sum of all peak intensities (matched or not)
double getMatchedMzDiffMean() const
get mean deviation of matched peak mass delta
double getTotalIntensityOfMatchedIonComplementPairs() const
intensity of matched ion complement
std::size_t countMatchedIonComplementPairs() const
count the number of matched ion complement
std::size_t getComplementPairsAaSequenceCoverage()
number of amino acid covered by matched complement pairs of ions
std::size_t getMaxConsecutiveIon(Enums::PeptideIon ion_type)
get the maximum consecutive fragments of one ion type
LinearRegression getIonIsotopeLinearRegression() const
void setPeptideSpectrumCharge(const pappso::PeptideSp peptideSp, const MassSpectrum *p_spectrum, unsigned int parent_charge, unsigned int max_isotope_number)
double getMatchedMzDiffSd() const
get standard deviation of matched peak mass delta
std::map< pappso_double, pappso_double > toMap() const
Definition trace.cpp:691
pappso_double getHyperscore() const
MassSpectrum process(const MassSpectrum &spectrum, pappso_double parent_ion_mass, unsigned int parent_charge) const
process raw spectrum to prepare hyperscore computation
pappso::QualifiedMassSpectrumSPtr getCurrentQualifiedMassSpectrumSPtr() const
PsmFeaturesScan(const PsmFileScanProcess &psm_file_scan_process, pappso::XtandemSpectrumProcess &tandem_spectrum_process, std::list< pappso::Enums::PeptideIon > &ion_list, pappso::PsmFeatures &psm_features, pappso::PrecisionPtr fragment_tolerance)
pappso::XtandemSpectrumProcess & m_tandemSpectrumProcess
std::list< pappso::Enums::PeptideIon > & m_ionList
double checkInf(double input) const
pappso::PrecisionPtr m_fragmentTolerance
Basic PSM file reader to process scan (parallelized scan processing)
@ y
Cter amino ions.
@ b
Nter acylium ions.
tries to keep as much as possible monoisotopes, removing any possible C13 peaks and changes multichar...
Definition aa.cpp:39
std::shared_ptr< QualifiedMassSpectrum > QualifiedMassSpectrumSPtr
std::shared_ptr< const Peptide > PeptideSp