libpappsomspp
Library for mass spectrometry
Loading...
Searching...
No Matches
mzidentmlreader.h
Go to the documentation of this file.
1/**
2 * \file src/input/mzidentml/mzidentmlreader.h
3 * \date 24/11/2022
4 * \author Olivier Langella
5 * \brief new method to read mzIdentML XML files
6 */
7
8
9/*******************************************************************************
10 * Copyright (c) 2022 Olivier Langella
11 *<Olivier.Langella@universite-paris-saclay.fr>.
12 *
13 * This file is part of i2MassChroQ.
14 *
15 * i2MassChroQ is free software: you can redistribute it and/or modify
16 * it under the terms of the GNU General Public License as published by
17 * the Free Software Foundation, either version 3 of the License, or
18 * (at your option) any later version.
19 *
20 * i2MassChroQ is distributed in the hope that it will be useful,
21 * but WITHOUT ANY WARRANTY; without even the implied warranty of
22 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23 * GNU General Public License for more details.
24 *
25 * You should have received a copy of the GNU General Public License
26 * along with i2MassChroQ. If not, see <http://www.gnu.org/licenses/>.
27 *
28 ******************************************************************************/
29#pragma once
30
36
37namespace pappso
38{
39namespace cbor
40{
41namespace psm
42{
43
44
45/**
46 * @todo write docs
47 */
49{
50 public:
51 /**
52 * Default constructor
53 */
56 const QFileInfo &mzident_file);
57 /**
58 * Destructor
59 */
60 virtual ~MzIdentMlReader();
61
62
63 protected:
64 virtual void readStream() override;
65
66
67 private:
72 void readDBSequence();
73 void readPeptide();
76 void readInputs();
77 void readAnalysisData();
78 bool readSearchDatabase();
79 void readSpectraData();
82
83 void finalDebrief();
84
85
86 /** \def IdentificationEngine identification engine
87 *
88 */
89 enum class IdentificationEngine : std::int8_t
90 {
91 unknown = 0, ///< X!Tandem
92 XTandem = 1, ///< MS:1001476 X!Tandem was used to analyze the spectra.
93 mascot = 2, ///< MS:1001207 The name of the Mascot search engine.
94 peptider = 3, ///< peptider
95 OMSSA = 4, ///< MS:1001475 Open Mass Spectrometry Search Algorithm was used to
96 ///< analyze the spectra.
97 SEQUEST = 5, ///< MS:1001208 The name of the SEQUEST search engine.
98 Comet = 6, ///< MS:1002251 Comet open-source sequence search engine developed
99 ///< at the University of Washington. PMID:23148064
100 Morpheus = 7, ///< MS:1002661 "Morpheus search engine." [PMID:23323968]
101 MSGFplus = 8, ///< MS:1002048 "MS-GF+ software used to analyze the spectra." [PSI:PI]
102 SpecOMS = 9, ///< SpecOMS C++ implementation
103 sage = 10, ///< sage
104 PEAKS_Studio = 11, ///< PEAKS Studio
105 };
106
107
108 struct CvParam
109 {
110 QString cvRef;
111 QString accession;
112 QString name;
113 QString value;
115 QString unitName;
116 QString unitCvRef;
117
118 QString toString() const;
119 };
120
122 {
124 std::size_t location;
126 };
127
129 {
130 QString accession;
132 QString sequence;
133 QString description;
134 std::shared_ptr<Protein> protein_sp;
136 std::vector<CvParam> cvParamList;
137 };
138
140 {
141 QString file;
142 };
143
145 {
146 QString file;
147 QString name;
148 };
150 {
153 std::size_t start;
154 std::size_t end;
156 };
157
159 {
160 QString name;
161 QString value;
162 QString toString() const;
163 };
164
166 {
167 unsigned int chargeState;
171 std::vector<MzidPeptideEvidence> mzidPeptideEvidenceList;
172
173 std::vector<CvParam> cvParamList;
174 std::vector<UserParam> userParamList;
175 };
176
178 {
179 QString id;
180 QString spectrumID;
181 // IdentificationMzIdentMlFileSp mzident_source_sp;
182 // IdentificationGroup *identification_group_p;
183 std::size_t scanNum;
184 std::size_t spectrumIndex;
185 bool isSpectrumIndex = false;
187 std::vector<SpectrumIdentificationItem> spectrumIdentificationItemList;
188
189 std::vector<CvParam> cvParamList;
190 std::vector<UserParam> userParamList;
191 };
192
194
196
197
198 void readSpectrumIdentificationItem(SpectrumIdentificationResult &spectrum_identification_result);
199
200 // void
201 // processSpectrumIdentificationItem(SpectrumIdentificationResult
202 // &spectrum_identification_result,
203 // const SpectrumIdentificationItem
204 // &spectrumIdentificationItem);
205
207 const SpectrumIdentificationResult &spectrum_identificatio_result);
208
209
210 bool writeTandemEval(const std::vector<CvParam> &cv_param_list);
211
212 void
213 writeSpectrumIdentificationItem(const SpectrumIdentificationItem &spectrum_identification_item);
214
215 private:
217 // Project *mp_project;
222
223
224 /** @brief store association between xml ID and an identification engine
225 */
226 std::map<QString, IdentificationEngine> m_IdentificationEngineMap;
227
228
229 /** @brief store association between xml ID and fasta files
230 */
231 std::map<QString, MzidSearchDatabase> m_mzidSearchDatabaseIdMap;
232
233
234 /** @brief store association between xml ID and peptide sequence
235 */
236 std::map<QString, PeptideSp> m_PeptideIdMap;
237
238
239 /** @brief store association between xml ID and peptide evidence
240 */
241 std::map<QString, MzidPeptideEvidence> m_MzidPeptideEvidenceIdMap;
242
243
244 /** @brief store association between xml ID and SpectraData
245 */
246 std::map<QString, MzidSpectraData> m_mzidSpectraDataIdMap;
247
248 /** @brief store association between xml ID and DBSequence
249 */
250 std::map<QString, MzidDBSequence> m_MzidDBSequenceIdMap;
251
252 /** @brief associates database ref id to protein shared pointer
253 * because the search database id is not described before the protein (silly
254 * idea IMHO) we keep association of protein to database in this map until the
255 * real search database definition appears We then have to reprocess each
256 * protein to set the right fasta file pointer
257 */
258 std::map<QString, std::vector<ProteinSp>> m_searchDatabase_ref2proteinList;
259
260
261 /** @brief store all identification results by spectra xml id
262 */
263 std::map<QString, std::vector<SpectrumIdentificationResult>>
265
266 QFileInfo m_mzidentFile;
268};
269} // namespace psm
270} // namespace cbor
271} // namespace pappso
pappso::cbor::CborStreamWriter * mp_cborWriter
std::map< QString, PeptideSp > m_PeptideIdMap
store association between xml ID and peptide sequence
IdentificationEngine m_identificationEngine
@ MSGFplus
MS:1002048 "MS-GF+ software used to analyze the spectra." [PSI:PI].
@ SEQUEST
MS:1001208 The name of the SEQUEST search engine.
@ XTandem
MS:1001476 X!Tandem was used to analyze the spectra.
@ Morpheus
MS:1002661 "Morpheus search engine." [PMID:23323968].
@ mascot
MS:1001207 The name of the Mascot search engine.
bool writeTandemEval(const std::vector< CvParam > &cv_param_list)
std::map< QString, std::vector< ProteinSp > > m_searchDatabase_ref2proteinList
associates database ref id to protein shared pointer because the search database id is not described ...
void writeSpectrumIdentificationItem(const SpectrumIdentificationItem &spectrum_identification_item)
std::map< QString, MzidPeptideEvidence > m_MzidPeptideEvidenceIdMap
store association between xml ID and peptide evidence
pappso::UiMonitorInterface * mp_monitor
void writeSpectrumIdentificationResult(const SpectrumIdentificationResult &spectrum_identificatio_result)
std::map< QString, IdentificationEngine > m_IdentificationEngineMap
store association between xml ID and an identification engine
std::map< QString, MzidSpectraData > m_mzidSpectraDataIdMap
store association between xml ID and SpectraData
std::map< QString, MzidSearchDatabase > m_mzidSearchDatabaseIdMap
store association between xml ID and fasta files
std::map< QString, std::vector< SpectrumIdentificationResult > > m_spectrumIdentificationResultBySpectraIdMap
store all identification results by spectra xml id
void readSpectrumIdentificationItem(SpectrumIdentificationResult &spectrum_identification_result)
std::map< QString, MzidDBSequence > m_MzidDBSequenceIdMap
store association between xml ID and DBSequence
store PsmProtein in a map with accession as key
tries to keep as much as possible monoisotopes, removing any possible C13 peaks and changes multichar...
Definition aa.cpp:39
std::shared_ptr< const Peptide > PeptideSp
std::shared_ptr< const Protein > ProteinSp
shared pointer on a Protein object
Definition protein.h:47
std::vector< SpectrumIdentificationItem > spectrumIdentificationItemList