libpappsomspp
Library for mass spectrometry
Loading...
Searching...
No Matches
msfileaccessor.cpp
Go to the documentation of this file.
1// #include <proteowizard/pwiz/data/msdata/DefaultReaderList.hpp>
2
3#include <QDebug>
4#include <QFile>
5#include <QFileInfo>
6
7
8#include "msfileaccessor.h"
9#include "pwizmsfilereader.h"
14
21
28
29namespace pappso
30{
31
32
33MsFileAccessor::MsFileAccessor(const QString &file_name,
34 const QString &xml_prefix)
35 : m_fileName(file_name), m_xmlPrefix(xml_prefix)
36{
37 QFile file(file_name);
38 if(!file.exists())
39 throw(ExceptionNotFound(QObject::tr("File %1 not found.")
40 .arg(QFileInfo(file_name).absoluteFilePath())));
41
42
43 m_oboPsiModTermNativeIDFormat.setAccession("MS:1000824");
44 m_oboPsiModTermNativeIDFormat.m_name = "no nativeID format";
46 "No nativeID format indicates that the file tagged with this term does not "
47 "contain spectra that can have a nativeID format.";
48}
49
58
62
63const QString &
65{
66 return m_fileName;
67}
68
74
75const OboPsiModTerm
77{
78 OboPsiModTerm term;
79
80 // is_a: MS:1000560 ! mass spectrometer file format
81 switch(m_fileFormat)
82 {
84 term.setAccession("MS:1001560");
85 term.m_name = "SCIEX TOF/TOF T2D format";
86 term.m_definition =
87 "Applied Biosystems/MDS Analytical Technologies TOF/TOF instrument "
88 "export format.";
89 break;
91 term.setAccession("MS:1000562");
92 term.m_name = "ABI WIFF format";
93 term.m_definition = "Applied Biosystems WIFF file format.";
94 break;
96 term.setAccession("MS:1001509");
97 term.m_name = "Agilent MassHunter format";
98 term.m_definition =
99 "A data file format found in an Agilent MassHunter directory which "
100 "contains raw data acquired by an Agilent mass spectrometer.";
101 break;
103 break;
105 term.setAccession("MS:1000825");
106 term.m_name = "Bruker FID format";
107 term.m_definition = "Bruker FID file format.";
108 break;
110 term.setAccession("MS:1002817");
111 term.m_name = "Bruker TDF format";
112 term.m_definition = "Bruker TDF raw file format.";
113 break;
115 term.setAccession("MS:1000567");
116 term.m_name = "Bruker/Agilent YEP format";
117 term.m_definition = "Bruker/Agilent YEP file format.";
118 break;
120 term.setAccession("MS:1001062");
121 term.m_name = "Mascot MGF format";
122 term.m_definition = "Mascot MGF file format.";
123 break;
125 break;
127 term.setAccession("MS:1001881");
128 term.m_name = "mz5 format";
129 term.m_definition = "mz5 file format, modelled after mzML.";
130 break;
132 term.setAccession("MS:1000584");
133 term.m_name = "mzML format";
134 term.m_definition =
135 "Proteomics Standards Inititative mzML file format.";
136 break;
138 // mzCBOR is a direct translation of mzML
139 // waiting for a true OBO term, we choose the mzML definition
140 term.setAccession("MS:1000584");
141 term.m_name = "mzML format";
142 term.m_definition =
143 "Proteomics Standards Inititative mzML file format.";
144 break;
146 term.setAccession("MS:1000566");
147 term.m_name = "ISB mzXML format";
148 term.m_definition = "Institute of Systems Biology mzXML file format.";
149 break;
151 break;
153
154 term.setAccession("MS:1000563");
155 term.m_name = "Thermo RAW format";
156 term.m_definition = "Thermo Scientific RAW file format.";
157 break;
159 break;
161 term.setAccession("MS:1000526");
162 term.m_name = "Waters raw format";
163 term.m_definition =
164 "Waters data file format found in a Waters RAW directory, generated "
165 "from an MS acquisition.";
166 break;
168 term.setAccession("MS:1001369");
169 term.m_name = "BafAscii text format";
170 term.m_definition =
171 "Simple text file format obtained by exporting Bruker Baf to ascii "
172 "using Bruker software";
173 break;
175 term.setAccession("MS:1001369");
176 term.m_name = "text format";
177 term.m_definition =
178 "Simple text file format of \"m/z<separator>intensity\" value pairs "
179 "for a single mass spectrum, a PMF (or single MS2) search.";
180 break;
181 default:
182 break;
183 }
184
185 return term;
186}
187
188const OboPsiModTerm &
195
196std::vector<MsRunIdCstSPtr>
198{
199 qDebug();
200 // if (mzcbor_ms_file_reader.accept(m_fileName)) {
201 // }
202 std::vector<MsRunIdCstSPtr> ms_run_ids;
203
204
205 // try the mzcbor file reader
206 MzcborMsFileReader mzcbor_ms_file_reader(m_fileName);
207 if(mzcbor_ms_file_reader.getFileFormat() ==
209 {
210 qDebug() << "sure, this is mzcbor";
211
212 ms_run_ids = mzcbor_ms_file_reader.getMsRunIds(m_xmlPrefix);
214 return ms_run_ids;
215 }
216
217 // Try the PwizMsFileReader
218
219 PwizMsFileReader pwiz_ms_file_reader(m_fileName);
220
221 ms_run_ids = pwiz_ms_file_reader.getMsRunIds(m_xmlPrefix);
222 if(ms_run_ids.size())
223 {
224 qDebug() << "Might well be handled using the Pwiz code.";
225
226 m_fileFormat = pwiz_ms_file_reader.getFileFormat();
228
229 // But the user might have configured one preferred reader type.
230
232 if(pref != m_preferredFileReaderTypeMap.end())
233 {
234 m_fileReaderType = pref->second;
235 }
236
237 return ms_run_ids;
238 }
239
240 // qDebug() << "The Pwiz reader did not work.";
241
242 // Try the TimsData reader
243
244 try
245 {
246 QString tims_dir = m_fileName;
247 if(!QFileInfo(tims_dir).isDir())
248 {
249 tims_dir = QFileInfo(m_fileName).absolutePath();
250 }
251
252 TimsMsFileReader tims_file_reader(tims_dir);
253
254 ms_run_ids = tims_file_reader.getMsRunIds(m_xmlPrefix);
255
256 if(ms_run_ids.size())
257 {
258 // qDebug() << "Might well be handled using the Bruker code";
259
260 m_fileName = tims_dir;
261 m_fileFormat = tims_file_reader.getFileFormat();
263
265 if(pref != m_preferredFileReaderTypeMap.end())
266 {
267 m_fileReaderType = pref->second;
268 }
269
270 // qDebug() << "Returning Bruker::tims ms run(s)."
271 // << "with preferred reader type:"
272 // << Utils::fileReaderTypeAsString(m_fileReaderType);
273
274 return ms_run_ids;
275 }
276 }
277 catch(const pappso::ExceptionNotRecognized &error)
278 {
279 // qDebug() << "The Tims reader did not work.";
280 }
281
282
283 // Try the Baf->ascii export format from Bruker Compass
284
285 try
286 {
287 ms_run_ids.clear();
288 BafAsciiFileReader baf_ascii_ms_file_reader(m_fileName);
289
290 ms_run_ids = baf_ascii_ms_file_reader.getMsRunIds(m_xmlPrefix);
291
292 if(ms_run_ids.size())
293 {
294 // qDebug() << "Might well be handled using the BafAscii code";
295
297
298 m_fileFormat = baf_ascii_ms_file_reader.getFileFormat();
299
301 {
302 ms_run_ids.clear();
303 }
304 else
305 {
306 return ms_run_ids;
307 }
308 }
309 }
310 catch(const pappso::PappsoException &error)
311 {
312 // qDebug() << "This is not a BafAscii code file" << error.qwhat();
313 }
314
315
316 // qDebug() << "The BafAscii reader did not work.";
317
318 // At this point try the XyMsFileReader
319
320 XyMsFileReader xy_ms_file_reader(m_fileName);
321
322 ms_run_ids = xy_ms_file_reader.getMsRunIds(m_xmlPrefix);
323
324 if(ms_run_ids.size())
325 {
326 // qDebug() << "Might well be handled using the XY code";
327
329
330 m_fileFormat = xy_ms_file_reader.getFileFormat();
331
332 return ms_run_ids;
333 }
334
335 // qDebug() << "The XY reader did not work.";
336
337
338 return ms_run_ids;
339}
340
341void
343 Enums::FileReaderType reader_type)
344{
345 // qDebug();
346
347 auto ret = m_preferredFileReaderTypeMap.insert(
348 std::pair<Enums::MsDataFormat, Enums::FileReaderType>(format, reader_type));
349
350 if(!ret.second)
351 {
352 // replace
353 ret.first->second = reader_type;
354 }
355}
356
359{
360 // qDebug();
361
362 auto ret = m_preferredFileReaderTypeMap.find(format);
363
364 if(ret != m_preferredFileReaderTypeMap.end())
365 {
366 return ret->second;
367 }
368
369 return m_fileReaderType;
370}
371
377
378void
380{
381 mcsp_selectedMsRunId = ms_run_id_csp;
382}
383
389
392{
393 // try TimsData reader
394 QString tims_dir = m_fileName;
395 if(!QFileInfo(tims_dir).isDir())
396 {
397 tims_dir = QFileInfo(m_fileName).absolutePath();
398 }
399 TimsMsFileReader tims_file_reader(tims_dir);
400
401 std::vector<MsRunIdCstSPtr> ms_run_ids =
402 tims_file_reader.getMsRunIds(m_xmlPrefix);
403
404 if(ms_run_ids.size())
405 {
406 // qDebug() << "Might well be handled using the Bruker code";
408 m_fileFormat = tims_file_reader.getFileFormat();
409 m_fileName = tims_dir;
410
411 return std::make_shared<TimsMsRunReaderMs2>(ms_run_ids.front());
412 }
413 else
414 {
416 QObject::tr("Unable to read mz data directory %1 with TimsTOF reader.")
417 .arg(tims_dir)));
418 }
419}
420
423{
424 qDebug();
425
426 // We want to return a MsRunReader that accounts for the configuration that
427 // the user might have set.
428
429 if(m_fileName != ms_run_id->getFileName())
431 QObject::tr("The MsRunId instance must have the name file name as the "
432 "MsFileAccessor. %1 != %2")
433 .arg(m_fileName)
434 .arg(ms_run_id->getFileName())));
435
437 {
438 qDebug() << "Returning a MzcborMsRunReader.";
439
440 return std::make_shared<MzcborMsRunReader>(ms_run_id);
441 }
444 {
445 qDebug() << "Returning a PwizMsRunReader.";
446 auto pwiz_reader = std::make_shared<PwizMsRunReader>(ms_run_id);
448 pwiz_reader->getOboPsiModTermNativeIDFormat();
449 return pwiz_reader;
450 }
452 {
453 // qDebug() << "Returning a XyMsRunReader.";
454
455 return std::make_shared<XyMsRunReader>(ms_run_id);
456 }
459 {
460 qDebug() << "Returning a TimsMsRunReader Enums::FileReaderType::tims";
461 return std::make_shared<TimsMsRunReader>(ms_run_id);
462 }
465 {
466 qDebug() << "Returning a TimsFramesMsRunReader "
467 "Enums::FileReaderType::tims_frames.";
468
469 return std::make_shared<TimsFramesMsRunReader>(ms_run_id);
470 }
473 {
474 qDebug()
475 << "Returning a TimsMsRunReaderMs2 Enums::FileReaderType::tims_ms2";
476
477 return std::make_shared<TimsMsRunReaderMs2>(ms_run_id);
478 }
481 {
482 qDebug() << "Returning a Enums::FileReaderType::tims_dia";
483
484 // qInfo() << "std::make_shared<TimsMsRunReaderDia>(ms_run_id);";
485 return std::make_shared<TimsMsRunReaderDia>(ms_run_id);
486 }
489 {
490 // qDebug() << "Returning a BafAsciiMsRunReader.";
491
492 return std::make_shared<BafAsciiMsRunReader>(ms_run_id);
493 }
495 {
496 if(ms_run_id.get()->getMsDataFormat() == Enums::MsDataFormat::xy)
497 {
498 return std::make_shared<XyMsRunReader>(ms_run_id);
499 }
500 else
501 {
502 auto pwiz_reader = std::make_shared<PwizMsRunReader>(ms_run_id);
504 pwiz_reader->getOboPsiModTermNativeIDFormat();
505 return pwiz_reader;
506 }
507 }
508 else
509 {
510 throw PappsoException(QObject::tr("No file format was found."));
511 }
512
513 return nullptr;
514}
515
517MsFileAccessor::msRunReaderSPtr(std::size_t ms_run_id_index)
518{
519 std::vector<MsRunIdCstSPtr> ms_run_ids = getMsRunIds();
520 if(ms_run_id_index >= ms_run_ids.size())
521 throw PappsoException(QObject::tr("MsRunId request out-of-bound error."));
522
523 return msRunReaderSPtr(ms_run_ids.at(ms_run_id_index));
524}
525
533
539
542 MsRunIdCstSPtr ms_run_id, Enums::FileReaderType preferred_file_reader_type)
543{
544 QFile file(ms_run_id.get()->getFileName());
545 if(!file.exists())
546 throw(ExceptionNotFound(
547 QObject::tr("unable to build a reader : file %1 not found.")
548 .arg(QFileInfo(ms_run_id.get()->getFileName()).absoluteFilePath())));
549
550 Enums::MsDataFormat file_format = ms_run_id.get()->getMsDataFormat();
551
552 if(file_format == Enums::MsDataFormat::xy)
553 {
554 // qDebug() << "Returning a XyMsRunReader.";
555
556 return std::make_shared<XyMsRunReader>(ms_run_id);
557 }
558 else if(file_format == Enums::MsDataFormat::brukerBafAscii)
559 {
560 // qDebug() << "Returning a XyMsRunReader.";
561
562 return std::make_shared<BafAsciiMsRunReader>(ms_run_id);
563 }
564 else if(file_format == Enums::MsDataFormat::unknown)
565 {
566 throw(PappsoException(
567 QObject::tr("unable to build a reader for %1 : unknown file format")
568 .arg(QFileInfo(ms_run_id.get()->getFileName()).absoluteFilePath())));
569 }
570
571 else if(file_format == Enums::MsDataFormat::brukerTims)
572 {
573 if(preferred_file_reader_type == Enums::FileReaderType::tims)
574 {
575 return std::make_shared<TimsMsRunReader>(ms_run_id);
576 }
577 else if(preferred_file_reader_type == Enums::FileReaderType::tims_ms2)
578 {
579 return std::make_shared<TimsMsRunReaderMs2>(ms_run_id);
580 }
581 else if(preferred_file_reader_type == Enums::FileReaderType::tims_frames)
582 {
583 qDebug()
584 << "returning std::make_shared<TimsFramesMsRunReader>(ms_run_id).";
585 return std::make_shared<TimsFramesMsRunReader>(ms_run_id);
586 }
587 // qDebug() << "by default, build a TimsMsRunReader.";
588 return std::make_shared<TimsMsRunReader>(ms_run_id);
589 }
590 else if(file_format == Enums::MsDataFormat::mzcbor)
591 {
592 return std::make_shared<MzcborMsRunReader>(ms_run_id);
593 }
594 else
595 {
596 // qDebug() << "Returning a PwizMsRunReader .";
597 return std::make_shared<PwizMsRunReader>(ms_run_id);
598 }
599}
600
603 const QString &xml_id)
604{
605 std::vector<MsRunIdCstSPtr> run_list = getMsRunIds();
606 MsRunReaderSPtr reader_sp;
607 for(MsRunIdCstSPtr &original_run_id : run_list)
608 {
609 if(original_run_id.get()->getRunId() == run_id)
610 {
611 MsRunId new_run_id(*original_run_id.get());
612 new_run_id.setXmlId(xml_id);
613
614 return msRunReaderSPtr(std::make_shared<MsRunId>(new_run_id));
615 }
616 }
617
618 if((run_id.isEmpty()) && (run_list.size() == 1))
619 {
620 MsRunId new_run_id(*run_list[0].get());
621 new_run_id.setXmlId(xml_id);
622
623 return msRunReaderSPtr(std::make_shared<MsRunId>(new_run_id));
624 }
625
626
627 if(reader_sp == nullptr)
628 {
629 throw(
630 ExceptionNotFound(QObject::tr("run id %1 not found in file %2")
631 .arg(run_id)
632 .arg(QFileInfo(m_fileName).absoluteFilePath())));
633 }
634 return reader_sp;
635}
636
637
638} // namespace pappso
virtual Enums::MsDataFormat getFileFormat() override
virtual std::vector< MsRunIdCstSPtr > getMsRunIds(const QString &run_prefix) override
excetion to use when an item type is not recognized
const OboPsiModTerm & getOboPsiModTermNativeIDFormat() const
get OboPsiModTerm corresponding to the nativeID format format of mz data
MsRunIdCstSPtr getSelectedMsRunId() const
MsRunReaderSPtr msRunReaderSPtr(MsRunIdCstSPtr ms_run_id)
Enums::FileReaderType m_fileReaderType
Enums::MsDataFormat m_fileFormat
void setPreferredFileReaderType(Enums::MsDataFormat format, Enums::FileReaderType reader_type)
given an mz format, explicitly set the preferred reader
Enums::MsDataFormat getFileFormat() const
get the raw format of mz data
Enums::FileReaderType getFileReaderType() const
get the file reader type
MsRunReaderSPtr msRunReaderSPtrForSelectedMsRunId()
MsRunIdCstSPtr mcsp_selectedMsRunId
std::vector< MsRunIdCstSPtr > getMsRunIds()
OboPsiModTerm m_oboPsiModTermNativeIDFormat
void setSelectedMsRunId(MsRunIdCstSPtr ms_run_id_csp)
const OboPsiModTerm getOboPsiModTermFileFormat() const
get OboPsiModTerm corresponding to the raw format of mz data
MsRunReaderSPtr getMsRunReaderSPtrByRunId(const QString &run_id, const QString &xml_id)
get an msrun reader by finding the run_id in file
Enums::FileReaderType getpreferredFileReaderType(Enums::MsDataFormat format)
MsFileAccessor(const QString &file_name, const QString &xml_prefix)
static MsRunReaderSPtr buildMsRunReaderSPtr(MsRunIdCstSPtr ms_run_id)
get an MsRunReader directly from a valid MsRun ID
std::map< Enums::MsDataFormat, Enums::FileReaderType > m_preferredFileReaderTypeMap
TimsMsRunReaderMs2SPtr buildTimsMsRunReaderMs2SPtr()
if possible, builds directly a dedicated Tims TOF tdf file reader
const QString & getFileName() const
MS run identity MsRunId identifies an MS run with a unique ID (XmlId) and contains eventually informa...
Definition msrunid.h:54
void setXmlId(const QString &xml_id)
set an XML unique identifier for this MsRunId
Definition msrunid.cpp:147
virtual std::vector< MsRunIdCstSPtr > getMsRunIds(const QString &run_prefix) override
virtual Enums::MsDataFormat getFileFormat() override
void setAccession(const QString &accession)
virtual std::vector< MsRunIdCstSPtr > getMsRunIds(const QString &run_prefix) override
virtual Enums::MsDataFormat getFileFormat() override
virtual Enums::MsDataFormat getFileFormat() override
virtual std::vector< MsRunIdCstSPtr > getMsRunIds(const QString &run_prefix) override
virtual std::vector< MsRunIdCstSPtr > getMsRunIds(const QString &run_prefix) override
virtual Enums::MsDataFormat getFileFormat() override
MSrun file reader for mzcbor.
@ unknown
unknown format
Definition types.h:149
@ SQLite3
SQLite3 format.
Definition types.h:153
@ MGF
Mascot format.
Definition types.h:152
@ pwiz
using libpwizlite
Definition types.h:177
@ tims
TimsMsRunReader : each scan is returned as a mass spectrum.
Definition types.h:181
tries to keep as much as possible monoisotopes, removing any possible C13 peaks and changes multichar...
Definition aa.cpp:39
std::shared_ptr< MsRunReader > MsRunReaderSPtr
Definition msrunreader.h:57
std::shared_ptr< TimsMsRunReaderMs2 > TimsMsRunReaderMs2SPtr
std::shared_ptr< const MsRunId > MsRunIdCstSPtr
Definition msrunid.h:46