libpappsomspp
Library for mass spectrometry
Loading...
Searching...
No Matches
psmfilereaderbase.cpp
Go to the documentation of this file.
1/**
2 * \file pappsomspp/processing/cbor/psm/psmfilereaderbase.h
3 * \date 05/07/2025
4 * \author Olivier Langella
5 * \brief Base class to read CBOR PSM file
6 */
7
8/*******************************************************************************
9 * Copyright (c) 2025 Olivier Langella <Olivier.Langella@universite-paris-saclay.fr>.
10 *
11 * This file is part of PAPPSOms-tools.
12 *
13 * PAPPSOms-tools is free software: you can redistribute it and/or modify
14 * it under the terms of the GNU General Public License as published by
15 * the Free Software Foundation, either version 3 of the License, or
16 * (at your option) any later version.
17 *
18 * PAPPSOms-tools is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 * GNU General Public License for more details.
22 *
23 * You should have received a copy of the GNU General Public License
24 * along with PAPPSOms-tools. If not, see <http://www.gnu.org/licenses/>.
25 *
26 ******************************************************************************/
27
28#include "psmfilereaderbase.h"
30#include <QDebug>
31#include "../../../peptide/peptideproformaparser.h"
32#include <QCborArray>
33
34
35namespace pappso
36{
37namespace cbor
38{
39namespace psm
40{
41
45
51
52void
54{
55 qDebug();
56 initCborReader(cborp);
57
58 qDebug();
59 if(mpa_cborReader->isMap())
60 {
61 readRoot(monitor);
62 }
63 qDebug();
64}
65
66void
70
71bool
73{
74 for(auto &it : m_currentPsmProteinRefList)
75 {
76 if(!m_proteinMap.getByAccession(it.accession).isTarget)
77 return true;
78 }
79 return false;
80}
81
82bool
84{
85 for(auto &it : m_currentPsmProteinRefList)
86 {
87 if(m_proteinMap.getByAccession(it.accession).isTarget)
88 return true;
89 }
90 return false;
91}
92
93
94void
96{
97 qDebug();
98 mpa_cborReader->enterContainer();
99
101 if(m_expectedString == "informations")
102 {
103 readInformations(monitor);
105
106 if(m_expectedString == "log")
107 {
108 readLog(monitor);
110 }
111
112 logReady(monitor);
113 }
114 else
115 {
116 throw pappso::PappsoException("ERROR: expecting informations element");
117 }
118
119
120 if(m_expectedString == "parameter_map")
121 {
122 readParameterMap(monitor);
123 }
124 else
125 {
126 throw pappso::PappsoException("ERROR: expecting parameter_map element");
127 }
128
129
131 m_targetFastaFiles.clear();
132 m_decoyFastaFiles.clear();
133 if(m_expectedString == "target_fasta_files")
134 {
137 }
138
139 if(m_expectedString == "decoy_fasta_files")
140 {
143 }
144 fastaFilesReady(monitor);
145
146 if(m_expectedString == "protein_map")
147 {
148 readProteinMap(monitor);
150 }
151
152 if(m_expectedString == "sample_list")
153 {
154 sampleListStarted(monitor);
155 mpa_cborReader->enterContainer(); // array
156 while(!mpa_cborReader->lastError() && mpa_cborReader->hasNext())
157 {
158 readSample(monitor);
159 }
160 mpa_cborReader->leaveContainer(); // array
161 sampleListFinished(monitor);
162 }
163 else
164 {
166 QObject::tr("ERROR: expecting sample_list element not %1").arg(m_expectedString));
167 }
168 mpa_cborReader->leaveContainer(); // whole file
169 if(!mpa_cborReader->lastError() && mpa_cborReader->hasNext())
170 {
171 readRoot(monitor);
172 }
173}
174
175void
177{
178 bool is_ok;
179 // m_cborInformations.clear();
181
182 if(!is_ok)
183 {
184 throw pappso::PappsoException("ERROR: PSM cbor header informations not well formed");
185 }
186 qDebug() << m_cborInformations.keys();
187 if(m_cborInformations.value("type").toString() != "psm")
188 {
189 QStringList all_keys;
190 for(auto it_k : m_cborInformations.keys())
191 {
192 all_keys << it_k.toString();
193 }
195 QObject::tr("ERROR: this file does not contain PSM data but %1 and %2")
196 .arg(m_cborInformations.value("type").toString())
197 .arg(all_keys.join(" ")));
198 }
199 informationsReady(monitor);
200}
201
202void
204{
205 bool is_ok;
206 // m_cborInformations.clear();
208
209 if(!is_ok)
210 {
211 throw pappso::PappsoException("ERROR: PSM cbor header log not well formed");
212 }
213}
214
215
216void
218{
219 bool is_ok;
220 m_cborParameterMap.clear();
222
223 if(!is_ok)
224 {
225 throw pappso::PappsoException("ERROR: PSM cbor parameter_map not well formed");
226 }
227 parameterMapReady(monitor);
228}
229
230void
236
237
240{
241 PsmProteinRef protein_ref;
242 protein_ref.accession = "";
243 protein_ref.positions.clear();
244 mpa_cborReader->enterContainer();
246 qDebug() << m_expectedString;
247 if(m_expectedString == "accession")
248 {
249 is_ok = mpa_cborReader->decodeString(protein_ref.accession);
250 if(!is_ok)
251 {
252 throw pappso::PappsoException("ERROR: protein accession is not a string");
253 }
254 }
255 else
256 {
257 throw pappso::PappsoException("ERROR: expecting accession element in PSM protein_list");
258 }
259
261 qDebug() << m_expectedString;
262 if(m_expectedString == "positions")
263 {
264 mpa_cborReader->readArray(protein_ref.positions);
265
266 // mpa_cborReader->next();
267 }
268 else
269 {
271 QString("ERROR: expecting positions element in PSM protein_list not %1")
272 .arg(m_expectedString));
273 }
274 mpa_cborReader->leaveContainer();
275
276 qDebug() << "end";
277 return protein_ref;
278}
279
280
283{
284 PsmFile file;
285 mpa_cborReader->enterContainer();
287 if(m_expectedString == "name")
288 {
290 {
291 throw pappso::PappsoException("file name is not a string");
292 }
293 }
294 else
295 {
296 throw pappso::PappsoException("ERROR: expecting name element in file");
297 }
298 mpa_cborReader->leaveContainer();
299 return file;
300}
301
302
303void
305{
306 writer.startMap();
307 writer.append("name");
308 writer.append(psm_file.name);
309 writer.endMap();
310}
311
312void
314 const std::vector<PsmFile> &file_list)
315{
316 writer.startArray();
317 for(auto &psm_file : file_list)
318 {
319 writePsmFile(writer, psm_file);
320 }
321 writer.endArray();
322}
323
324
325void
327{
328 //"name": "tandem2017_nopatch_20120906_balliau_extract_1_A01_urnb-1",
329 qDebug();
330 mpa_cborReader->enterContainer();
332
333 qDebug() << m_expectedString;
334 if(m_expectedString == "name")
335 {
337 {
338 throw pappso::PappsoException("sample name is not a string");
339 }
340 }
341 else
342 {
343 throw pappso::PappsoException("ERROR: expecting name element in file");
344 }
345 //"identification_file_list": [{ "name":
346 //"/home/langella/data1/tandem/tandem2017_nopatch_20120906_balliau_extract_1_A01_urnb-1.xml",
347 //}],
348
350
351 qDebug() << m_expectedString;
353 if(m_expectedString == "identification_file_list")
354 {
355 bool is_ok;
356 mpa_cborReader->enterContainer();
357
358 while(!mpa_cborReader->lastError() && mpa_cborReader->hasNext())
359 {
361 }
362 mpa_cborReader->leaveContainer();
363
365 }
366 //"peaklist_file": {"name": "tandem2017_nopatch_20120906_balliau_extract_1_A01_urnb-1.mzml"
367 //},
368
369 if(m_expectedString == "peaklist_file")
370 {
371 bool is_ok;
373 }
374 else
375 {
376 throw pappso::PappsoException("ERROR: expecting peaklist_file element in sample");
377 }
378 //"scan_list": [
379 sampleStarted(monitor);
381 if(m_expectedString == "scan_list")
382 {
383 mpa_cborReader->enterContainer();
384
385 while(!mpa_cborReader->lastError() && mpa_cborReader->hasNext())
386 {
387 readScan(monitor);
388 }
389 mpa_cborReader->leaveContainer();
390 }
391 else
392 {
393 throw pappso::PappsoException("ERROR: expecting scan_list element in sample");
394 }
395 mpa_cborReader->leaveContainer();
396
397 sampleFinished(monitor);
398}
399
400void
402{
403 qDebug();
404 m_cborScanId.clear();
405 mpa_cborReader->enterContainer();
406 //"id": {
407 //"index": 1976
408 //},
409 qDebug() << "scan begin";
410
412 qDebug() << m_expectedString;
413 if(m_expectedString == "id")
414 {
416 {
417 throw pappso::PappsoException(QObject::tr("id element in scan is not a cbor map"));
418 }
419 }
420 else
421 {
423 QObject::tr("ERROR: expecting id element in scan not %1").arg(m_expectedString));
424 }
425 //"precursor": {
426 //"z": 2,
427 //"mz": 1120.529471
428 //},
429
431 m_cborScanPrecursor.clear();
432 qDebug() << m_expectedString;
433 if(m_expectedString == "precursor")
434 {
436 {
437 throw pappso::PappsoException(QObject::tr("precursor element in scan is not a cbor map"));
438 }
439 }
440 //"ms2": {PSM CBOR format documentation
441 //"rt": 12648.87,
442 //"mz" :[1,2,3,4],
443 //"intensity" : [1,2,3,4]
444 //},
445
447 qDebug() << m_expectedString;
448 m_cborScanMs2.clear();
449 if(m_expectedString == "ms2")
450 {
452 {
454 QObject::tr("ms2 element in scan is not a cbor map %1 %2:\n%3")
456 .arg(m_cborScanId.value("index").toInteger())
457 .arg(mpa_cborReader->lastError().toString()));
458 }
459 }
460 //"psm_list": [
461 scanStarted(monitor);
462
464 qDebug() << m_expectedString;
465 if(m_expectedString == "psm_list")
466 {
467 mpa_cborReader->enterContainer();
468 while(!mpa_cborReader->lastError() && mpa_cborReader->hasNext())
469 {
470 readPsm(monitor);
471 }
472 mpa_cborReader->leaveContainer();
473 }
474
475 mpa_cborReader->leaveContainer();
476 qDebug() << "scan end";
477 scanFinished(monitor);
478 qDebug();
479}
480
481void
483{
484 bool is_ok;
485 mpa_cborReader->enterContainer();
487 // "proforma": "AQEEM[+15.99491]AQVAK",
488 if(m_expectedString == "proforma")
489 {
491 {
492 throw pappso::PappsoException("ERROR: proforma element in psm-scan is not a string");
493 }
494 }
495 else
496 {
497 throw pappso::PappsoException("ERROR: expecting proforma element in psm-scan");
498 }
499 //"protein_list" : [
500 //{
501 //"accession": "GRMZM2G083841_P01",
502 //"position": [15,236]
503 //}
504 //],
505
508 qDebug() << m_expectedString;
509
510 if(m_expectedString == "protein_list")
511 {
512 mpa_cborReader->enterContainer(); // array
513 while(!mpa_cborReader->lastError() && mpa_cborReader->hasNext())
514 {
516 if(!is_ok)
517 {
518 qDebug();
520 QObject::tr("ERROR: reading protein_list element in psm-scan"));
521 }
522 }
523 // qDebug() << mpa_cborReader->type();
524 mpa_cborReader->leaveContainer(); // array
525 }
526 else
527 {
528 throw pappso::PappsoException("ERROR: expecting protein_list element in psm-scan");
529 }
530 // props: {
531 m_cborScanPsmProps.clear();
532
533 //"eval": {
534 qDebug();
535 m_cborScanPsmEval.clear();
537 qDebug() << m_expectedString;
538
539 if(m_expectedString == "props")
540 {
542 if(!is_ok)
543 {
544 throw pappso::PappsoException("ERROR: props element in psm-scan is not well formed");
545 }
546 if(!getExpectedString())
547 {
549 QObject::tr("ERROR: expecting eval element in psm-scan %1").arg(m_currentPsmProforma));
550 }
551 }
552 if(m_expectedString == "eval")
553 {
555 if(!is_ok)
556 {
557 throw pappso::PappsoException("ERROR: eval element in psm-scan is not well formed");
558 }
559 }
560 else
561
562 {
564 QObject::tr("ERROR: expecting eval element in psm-scan %1 not %2 in %3 %4 %5")
566 .arg(m_expectedString)
567 .arg(__FILE__)
568 .arg(__FUNCTION__)
569 .arg(__LINE__));
570 }
571
572
573 qDebug() << m_expectedString;
574
575
576 mpa_cborReader->leaveContainer();
577 qDebug();
578 psmReady(monitor);
579}
580
581void
583{
584 // PSM is ready, do what you want :)
585}
586
587void
591
592void
596
597void
601
602
603void
607
608void
612
613void
617
618void
622
623void
627
628void
632
635{
636 pappso::PeptideSp peptide_sp;
637 if(m_currentPsmProforma.isEmpty())
638 {
639 throw pappso::PappsoException(QObject::tr("ERROR: m_currentPsmProforma is empty"));
640 }
641 else
642 {
644 }
645 return peptide_sp;
646}
647
650{
651 if(m_currentPeaklistFile.name.isEmpty())
652 {
653 throw pappso::PappsoException(QObject::tr("ERROR: m_currentPeaklistFile is empty"));
654 }
655 if(m_cborScanId.isEmpty())
656 {
657 throw pappso::PappsoException(QObject::tr("ERROR: m_cborScanId is empty"));
658 }
659 if(m_cborScanPrecursor.isEmpty())
660 {
661 throw pappso::PappsoException(QObject::tr("ERROR: m_cborScanPrecursor is empty"));
662 }
663 if(m_cborScanMs2.isEmpty())
664 {
665 throw pappso::PappsoException(QObject::tr("ERROR: m_cborScanMs2 is empty"));
666 }
667
668 if(!m_cborScanId.keys().contains("index"))
669 {
670 throw pappso::PappsoException("There is no scan index");
671 }
672 if(!m_cborScanMs2.keys().contains("mz"))
673 {
674 throw pappso::PappsoException("There is no ms2 mz values");
675 }
676 if(!m_cborScanMs2.keys().contains("intensity"))
677 {
678 throw pappso::PappsoException("There is no ms2 intensity values");
679 }
682 pappso::MsRunIdCstSPtr msrun_id_sp = std::make_shared<const pappso::MsRunId>(msrun_id);
683 pappso::MassSpectrumId ms_id(msrun_id_sp);
684 ms_id.setSpectrumIndex(m_cborScanId.value("index").toInteger());
685
686 // native_id
687 if(m_cborScanId.keys().contains("native_id"))
688 {
689 ms_id.setNativeId(m_cborScanId.value("native_id").toString());
690 }
691
692 std::vector<DataPoint> data_point_vector;
693 std::size_t i = 0;
694 for(auto cbor_mz_value : m_cborScanMs2.value("mz").toArray())
695 {
696 data_point_vector.push_back(
697 {cbor_mz_value.toDouble(), m_cborScanMs2.value("intensity").toArray().at(i).toDouble()});
698 i++;
699 }
700
701
702 MassSpectrum mass_spectrum(data_point_vector);
703 pappso::PrecursorIonData precursor_ion_data;
704
705 pappso::QualifiedMassSpectrum qualified_mass_spectrum(ms_id);
706 qualified_mass_spectrum.setMassSpectrumSPtr(mass_spectrum.makeMassSpectrumSPtr());
707 qualified_mass_spectrum.setMsLevel(2);
708
709 if(m_cborScanPrecursor.keys().contains("z"))
710 {
711 precursor_ion_data.charge = m_cborScanPrecursor.value("z").toInteger();
712 }
713 if(m_cborScanPrecursor.keys().contains("mz"))
714 {
715 precursor_ion_data.mz = m_cborScanPrecursor.value("mz").toDouble();
716 }
717 if(m_cborScanPrecursor.keys().contains("intensity"))
718 {
719 precursor_ion_data.intensity = m_cborScanPrecursor.value("intensity").toDouble();
720 }
721 qualified_mass_spectrum.appendPrecursorIonData(precursor_ion_data);
722 if(m_cborScanMs2.keys().contains("rt"))
723 {
724 qualified_mass_spectrum.setRtInSeconds(m_cborScanMs2.value("rt").toDouble());
725 }
726
727
728 return qualified_mass_spectrum.makeQualifiedMassSpectrumSPtr();
729}
730
731void
735
736void
740
741
742double
743PsmFileReaderBase::getPrecursorMass(double mz_prec, uint charge) const
744{
745 // compute precursor mass given the charge state
746 mz_prec = mz_prec * (double)charge;
747 mz_prec -= (MHPLUS * (double)charge);
748 return mz_prec;
749}
750
751
752} // namespace psm
753} // namespace cbor
754} // namespace pappso
void setNativeId(const QString &native_id)
void setSpectrumIndex(std::size_t index)
Class to represent a mass spectrum.
MassSpectrumSPtr makeMassSpectrumSPtr() const
MS run identity MsRunId identifies an MS run with a unique ID (XmlId) and contains eventually informa...
Definition msrunid.h:54
void setSampleName(const QString &name)
set a sample name for this MsRunId
Definition msrunid.cpp:77
static PeptideSp parseString(const QString &pepstr)
Class representing a fully specified mass spectrum.
void appendPrecursorIonData(const PrecursorIonData &precursor_ion_data)
void setMsLevel(uint ms_level)
Set the mass spectrum level.
QualifiedMassSpectrumSPtr makeQualifiedMassSpectrumSPtr() const
void setMassSpectrumSPtr(MassSpectrumSPtr massSpectrum)
Set the MassSpectrumSPtr.
void setRtInSeconds(pappso_double rt)
Set the retention time in seconds.
bool readCborMap(QCborMap &cbor_map)
bool readCborArray(QCborArray &cbor_array)
bool readArray(std::vector< std::size_t > &int_list)
bool decodeString(QString &the_str)
decode the current cbor value as a string the point to the next value the current value is decoded as...
std::vector< PsmProteinRef > m_currentPsmProteinRefList
pappso::QualifiedMassSpectrumSPtr getCurrentQualifiedMassSpectrumSPtr() const
pappso::PeptideSp getCurrentPsmPeptideSp() const
virtual void sampleListStarted(pappso::UiMonitorInterface &monitor)
void writePsmFileList(CborStreamWriter &writer, const std::vector< PsmFile > &file_list)
double getPrecursorMass(double mz_prec, uint charge) const
convenient function do compute precusor ion mass
virtual void logReady(pappso::UiMonitorInterface &monitor)
virtual void scanStarted(pappso::UiMonitorInterface &monitor)
virtual void readPsm(pappso::UiMonitorInterface &monitor)
virtual void readLog(pappso::UiMonitorInterface &monitor)
virtual void proteinMapReady(pappso::UiMonitorInterface &monitor)
virtual void sampleStarted(pappso::UiMonitorInterface &monitor)
virtual void readParameterMap(pappso::UiMonitorInterface &monitor)
void readCbor(QFile *cborp, pappso::UiMonitorInterface &monitor)
virtual void readScan(pappso::UiMonitorInterface &monitor)
virtual void readInformations(pappso::UiMonitorInterface &monitor)
virtual void scanFinished(pappso::UiMonitorInterface &monitor)
virtual void sampleListFinished(pappso::UiMonitorInterface &monitor)
virtual void psmReady(pappso::UiMonitorInterface &monitor)
virtual void informationsReady(pappso::UiMonitorInterface &monitor)
void readRoot(pappso::UiMonitorInterface &monitor)
std::vector< PsmFile > m_currentIdentificationFileList
void writePsmFile(CborStreamWriter &writer, const PsmFile &psm_file)
virtual void fastaFilesReady(pappso::UiMonitorInterface &monitor)
virtual void parameterMapReady(pappso::UiMonitorInterface &monitor)
virtual void readProteinMap(pappso::UiMonitorInterface &monitor)
virtual void readSample(pappso::UiMonitorInterface &monitor)
PsmProteinRef readPsmProteinRef(bool &is_ok)
virtual void sampleFinished(pappso::UiMonitorInterface &monitor)
const pappso::cbor::psm::PsmProtein & getByAccession(const QString &accession) const
retrieve a PsmProtein with its accession
void readMap(CborStreamReader &reader)
reads the protein map from a cbor input stream
tries to keep as much as possible monoisotopes, removing any possible C13 peaks and changes multichar...
Definition aa.cpp:39
std::shared_ptr< QualifiedMassSpectrum > QualifiedMassSpectrumSPtr
std::shared_ptr< const Peptide > PeptideSp
std::shared_ptr< const MsRunId > MsRunIdCstSPtr
Definition msrunid.h:46
const pappso_double MHPLUS(1.007276466879)
unsigned int uint
Definition types.h:68