libpappsomspp
Library for mass spectrometry
Loading...
Searching...
No Matches
pappso::specpeptidoms::SemiGlobalAlignment Class Reference

#include <semiglobalalignment.h>

Public Member Functions

 SemiGlobalAlignment (const ScoreValues &score_values, const pappso::PrecisionPtr precision_ptr, const AaCode &aaCode)
 ~SemiGlobalAlignment ()
void fastAlign (const SpOMSSpectrum &spectrum, const SpOMSProtein *protein_ptr)
 perform the first alignment search between a protein sequence and a spectrum. The member location heap is filled with the candidates locations.
void preciseAlign (const SpOMSSpectrum &spectrum, const SpOMSProtein *protein_ptr, const std::size_t beginning, const std::size_t length)
 performs the second alignment search between a protein subsequence and a spectrum.
void postProcessingAlign (const SpOMSSpectrum &spectrum, const SpOMSProtein *protein_ptr, std::size_t beginning, std::size_t length, const std::vector< double > &shifts)
 performs the post-processing : generates corrected spectra and align them
LocationSaver getLocationSaver () const
 Returns a copy of m_location_saver.
Scenario getScenario () const
 Returns a copy of m_scenario.
const AlignmentgetBestAlignment () const
 Returns a const ref to m_best_alignment.
const std::vector< KeyCell > & getInterestCells () const
 convenient function for degub purpose
const std::vector< KeyCell > & oneAlignStep (const pappso::specpeptidoms::SpOMSProtein &sequence, const std::size_t row_number, const std::vector< AaPosition > &aa_positions, const SpOMSSpectrum &spectrum, const bool fast_align, const pappso::specpeptidoms::SpOMSProtein *protein_ptr)
 function made for testing the fastAlign process, process one line and return the alignment matrix
void initFastAlign (const SpOMSSpectrum &spectrum)
 function made for testing the fastAlign process, initiate the variables for alignment
void initpreciseAlign (const SpOMSSpectrum &spectrum, std::size_t length2)
 function made for testing the preciseAlign process, initiate the variables for alignment

Static Public Member Functions

static std::vector< double > getPotentialMassErrors (const pappso::AaCode &aa_code, const Alignment &alignment, const QString &protein_seq)
 Returns a list of the potential mass errors corresponding to the provided alignment in the provided protein sequence.
static bool checkSequenceDiversity (const QString &sequence, std::size_t window, std::size_t minimum_aa_diversity)
 check that the sequence has a minimum of amino acid checkSequenceDiversity

Private Member Functions

void saveBestAlignment (const SpOMSProtein &sequence, const SpOMSSpectrum &spectrum, std::size_t offset)
 Stores the best alignment from m_scenario in m_best_alignment.
void correctAlign (const SpOMSProtein &protein_subseq, const SpOMSProtein *protein_ptr, const SpOMSSpectrum &spectrum, std::vector< std::size_t > &peaks_to_remove, std::size_t offset)
 Recursively performs the correction of the alignment.
void updateAlignmentMatrix (const pappso::specpeptidoms::SpOMSProtein &sequence, const std::size_t row_number, const std::vector< AaPosition > &aa_positions, const SpOMSSpectrum &spectrum, const bool fast_align, const pappso::specpeptidoms::SpOMSProtein *protein_ptr)
 updates the scores of the alignment matrix for a given amino acid as well as the location heap/scenario.
bool perfectShiftPossible (const pappso::specpeptidoms::SpOMSProtein &sequence, const SpOMSSpectrum &spectrum, const std::size_t origin_row, const std::size_t current_row, const std::size_t l_peak, const std::size_t r_peak) const
 indicates if a perfect shift is possible between the provided positions
std::size_t perfectShiftPossibleFrom0 (const pappso::specpeptidoms::SpOMSProtein &sequence, const SpOMSSpectrum &spectrum, const std::size_t current_row, const std::size_t r_peak) const
 indicates if a perfect shift is possible from the spectrum beginning to the provided peak. Returns the perfect shift origin if the shift is possible, otherwise returns the current row.
std::size_t perfectShiftPossibleEnd (const pappso::specpeptidoms::SpOMSProtein &sequence, const SpOMSSpectrum &spectrum, std::size_t end_row, std::size_t end_peak) const
 indicates if a perfect shift is possible between the provided positions

Private Attributes

std::vector< KeyCellm_interest_cells
std::vector< std::pair< std::size_t, KeyCell > > m_updated_cells
const ScoreValuesm_scorevalues
const int min_score = 15
pappso::PrecisionPtr m_precision_ptr
const AaCodem_aaCode
LocationSaver m_location_saver
Scenario m_scenario
Alignment m_best_alignment
Alignment m_best_corrected_alignment
Alignment m_best_post_processed_alignment

Detailed Description

Definition at line 91 of file semiglobalalignment.h.

Constructor & Destructor Documentation

◆ SemiGlobalAlignment()

pappso::specpeptidoms::SemiGlobalAlignment::SemiGlobalAlignment ( const ScoreValues & score_values,
const pappso::PrecisionPtr precision_ptr,
const AaCode & aaCode )

Default constructor

Definition at line 79 of file semiglobalalignment.cpp.

83 : m_scorevalues(score_values), m_aaCode(aaCode)
84{
85 m_precision_ptr = precision_ptr;
86
87 KeyCell key_cell_init_first;
88 key_cell_init_first.n_row = 0;
89 key_cell_init_first.score = 0;
90 key_cell_init_first.beginning = 0;
91 key_cell_init_first.tree_id = 0;
92 m_interest_cells.push_back(key_cell_init_first);
93}

References pappso::specpeptidoms::KeyCell::beginning, m_aaCode, m_interest_cells, m_precision_ptr, m_scorevalues, pappso::specpeptidoms::KeyCell::n_row, pappso::specpeptidoms::KeyCell::score, and pappso::specpeptidoms::KeyCell::tree_id.

◆ ~SemiGlobalAlignment()

pappso::specpeptidoms::SemiGlobalAlignment::~SemiGlobalAlignment ( )

Destructor

Definition at line 803 of file semiglobalalignment.cpp.

804{
805}

Member Function Documentation

◆ checkSequenceDiversity()

bool pappso::specpeptidoms::SemiGlobalAlignment::checkSequenceDiversity ( const QString & sequence,
std::size_t window,
std::size_t minimum_aa_diversity )
static

check that the sequence has a minimum of amino acid checkSequenceDiversity

Parameters
sequenceprotein sequence
windowthe size of substring to check
minimum_aa_diversityminimum number of different amino acid in this window

Definition at line 1061 of file semiglobalalignment.cpp.

1064{
1065 qDebug() << "sequence=" << sequence << " window=" << window
1066 << " minimum_aa_diversity=" << minimum_aa_diversity;
1067 if(sequence.size() < window)
1068 return false;
1069 auto it_begin = sequence.begin();
1070 auto it_end = sequence.begin() + window;
1071 QString window_copy(sequence.mid(0, window));
1072 while(it_end != sequence.end())
1073 {
1074 std::partial_sort_copy(it_begin, it_end, window_copy.begin(), window_copy.end());
1075
1076 qDebug() << window_copy;
1077 std::size_t uniqueCount =
1078 std::unique(window_copy.begin(), window_copy.end()) - window_copy.begin();
1079
1080 qDebug() << uniqueCount;
1081 if(uniqueCount < minimum_aa_diversity)
1082 return false;
1083 it_begin++;
1084 it_end++;
1085 }
1086 return true;
1087}

Referenced by preciseAlign(), pappso::cbor::psm::PsmSpecPeptidOmsScan::sequenceAlignment(), and pappso::cbor::psm::PsmSpecPeptidOmsScan::storeAlignment().

◆ correctAlign()

void pappso::specpeptidoms::SemiGlobalAlignment::correctAlign ( const SpOMSProtein & protein_subseq,
const SpOMSProtein * protein_ptr,
const SpOMSSpectrum & spectrum,
std::vector< std::size_t > & peaks_to_remove,
std::size_t offset )
private

Recursively performs the correction of the alignment.

Parameters
protein_seqProtein reversed sequence to align.
protein_ptrProtein pointer on the sequence to align.
spectrumSpectrum to align.
peaks_to_removePeaks to remove from the spectrum.
offsetSize of the protein sequence minus beginning of the alignment. Used to compute the position of the alignment in the protein sequence.

Definition at line 279 of file semiglobalalignment.cpp.

284{
285 std::vector<AaPosition> aa_positions;
286 CorrectionTree correction_tree;
287 std::vector<std::size_t> final_peaks_to_remove;
288
289 KeyCell key_cell_init;
290 key_cell_init.beginning = 0;
291 key_cell_init.n_row = 0;
292 key_cell_init.score = m_scorevalues.get(ScoreType::init);
293 key_cell_init.tree_id = 0;
294
295 std::fill(m_interest_cells.begin(), m_interest_cells.end(), key_cell_init);
296
297 m_interest_cells.at(0).score = 0;
298
299 m_scenario.resetScenario();
300 qDebug();
301 for(qsizetype row_number = 1; row_number <= sequence.size(); row_number++)
302 {
303 qDebug() << row_number - 1 << " " << sequence.size();
304 qDebug() << "sequence[row_number - 1].aa" << (char)sequence[row_number - 1].aa;
305 qDebug();
306 aa_positions = spectrum.getAaPositions(sequence[row_number - 1].code, peaks_to_remove);
307 qDebug();
308 updateAlignmentMatrix(sequence, row_number, aa_positions, spectrum, false, protein_ptr);
309 qDebug();
310 }
311
312 qDebug();
313 // Correction : if complementary peaks are used, corrected spectra without one of the two peaks
314 // are generated and aligned. The best alignment is kept.
315 qDebug() << m_scenario.getBestScore();
316 if(m_scenario.getBestScore() >
317 MIN_ALIGNMENT_SCORE) // We only correct alignments with acceptable scores
318 {
319 qDebug();
320 qDebug() << sequence.getSequence();
321 qDebug() << offset;
322 qDebug() << spectrum.getPrecursorCharge();
323 saveBestAlignment(sequence, spectrum, offset);
324 qDebug();
325 for(std::size_t iter : m_best_alignment.peaks)
326 {
327 qDebug() << "iter:" << iter << "comp:" << spectrum.getComplementaryPeak(iter);
328 if(iter == spectrum.getComplementaryPeak(iter))
329 {
330 continue;
331 }
332 else if(iter > spectrum.getComplementaryPeak(iter))
333 {
334 break;
335 }
336 else if(std::find(m_best_alignment.peaks.begin(),
337 m_best_alignment.peaks.end(),
338 spectrum.getComplementaryPeak(iter)) != m_best_alignment.peaks.end())
339 {
340 correction_tree.addPeaks(iter, spectrum.getComplementaryPeak(iter));
341 }
342 }
343 std::vector<std::vector<std::size_t>> corrections = correction_tree.getPeaks();
344 if(corrections.size() > 0)
345 {
346 for(auto new_peaks_to_remove : corrections)
347 {
348 final_peaks_to_remove = std::vector<std::size_t>(new_peaks_to_remove);
349 final_peaks_to_remove.insert(
350 final_peaks_to_remove.end(), peaks_to_remove.begin(), peaks_to_remove.end());
351 correctAlign(sequence, protein_ptr, spectrum, final_peaks_to_remove, offset);
352 }
353 }
354 else if(m_scenario.getBestScore() > m_best_corrected_alignment.score)
355 {
357 }
358 }
359 qDebug();
360}
void updateAlignmentMatrix(const pappso::specpeptidoms::SpOMSProtein &sequence, const std::size_t row_number, const std::vector< AaPosition > &aa_positions, const SpOMSSpectrum &spectrum, const bool fast_align, const pappso::specpeptidoms::SpOMSProtein *protein_ptr)
updates the scores of the alignment matrix for a given amino acid as well as the location heap/scenar...
void correctAlign(const SpOMSProtein &protein_subseq, const SpOMSProtein *protein_ptr, const SpOMSSpectrum &spectrum, std::vector< std::size_t > &peaks_to_remove, std::size_t offset)
Recursively performs the correction of the alignment.
void saveBestAlignment(const SpOMSProtein &sequence, const SpOMSSpectrum &spectrum, std::size_t offset)
Stores the best alignment from m_scenario in m_best_alignment.
const int MIN_ALIGNMENT_SCORE(15)

References pappso::specpeptidoms::CorrectionTree::addPeaks(), pappso::specpeptidoms::KeyCell::beginning, correctAlign(), pappso::specpeptidoms::SpOMSSpectrum::getAaPositions(), pappso::specpeptidoms::SpOMSSpectrum::getComplementaryPeak(), pappso::specpeptidoms::CorrectionTree::getPeaks(), pappso::specpeptidoms::SpOMSSpectrum::getPrecursorCharge(), pappso::specpeptidoms::SpOMSProtein::getSequence(), pappso::specpeptidoms::init, m_best_alignment, m_best_corrected_alignment, m_interest_cells, m_scenario, m_scorevalues, pappso::specpeptidoms::MIN_ALIGNMENT_SCORE(), pappso::specpeptidoms::KeyCell::n_row, saveBestAlignment(), pappso::specpeptidoms::KeyCell::score, pappso::specpeptidoms::KeyCell::tree_id, and updateAlignmentMatrix().

Referenced by correctAlign(), and preciseAlign().

◆ fastAlign()

void pappso::specpeptidoms::SemiGlobalAlignment::fastAlign ( const SpOMSSpectrum & spectrum,
const SpOMSProtein * protein_ptr )

perform the first alignment search between a protein sequence and a spectrum. The member location heap is filled with the candidates locations.

Parameters
spectrumSpectrum to align
protein_ptrProtein pointer on the sequence to align.

Definition at line 102 of file semiglobalalignment.cpp.

104{
105 std::size_t sequence_length = protein_ptr->size();
106
107 initFastAlign(spectrum);
108
109 for(std::size_t row_number = 1; row_number <= sequence_length; row_number++)
110 {
111 updateAlignmentMatrix(*protein_ptr,
112 row_number,
113 spectrum.getAaPositions(protein_ptr->at(row_number - 1).code),
114 spectrum,
115 true,
116 protein_ptr);
117 }
118}
void initFastAlign(const SpOMSSpectrum &spectrum)
function made for testing the fastAlign process, initiate the variables for alignment

References pappso::specpeptidoms::SpOMSSpectrum::getAaPositions(), initFastAlign(), and updateAlignmentMatrix().

Referenced by pappso::cbor::psm::PsmSpecPeptidOmsScan::sequenceAlignment().

◆ getBestAlignment()

const pappso::specpeptidoms::Alignment & pappso::specpeptidoms::SemiGlobalAlignment::getBestAlignment ( ) const

Returns a const ref to m_best_alignment.

Definition at line 1010 of file semiglobalalignment.cpp.

1011{
1012 return m_best_alignment;
1013}

References m_best_alignment.

Referenced by pappso::cbor::psm::PsmSpecPeptidOmsScan::sequenceAlignment().

◆ getInterestCells()

const std::vector< pappso::specpeptidoms::KeyCell > & pappso::specpeptidoms::SemiGlobalAlignment::getInterestCells ( ) const

convenient function for degub purpose

Definition at line 96 of file semiglobalalignment.cpp.

97{
98 return m_interest_cells;
99}

References m_interest_cells.

Referenced by oneAlignStep().

◆ getLocationSaver()

pappso::specpeptidoms::LocationSaver pappso::specpeptidoms::SemiGlobalAlignment::getLocationSaver ( ) const

Returns a copy of m_location_saver.

Definition at line 808 of file semiglobalalignment.cpp.

References m_location_saver.

Referenced by pappso::cbor::psm::PsmSpecPeptidOmsScan::sequenceAlignment().

◆ getPotentialMassErrors()

std::vector< double > pappso::specpeptidoms::SemiGlobalAlignment::getPotentialMassErrors ( const pappso::AaCode & aa_code,
const Alignment & alignment,
const QString & protein_seq )
static

Returns a list of the potential mass errors corresponding to the provided alignment in the provided protein sequence.

Parameters
aa_codethe amino acid code of reference to get aminon acid masses
alignmentAlignment for which to get the potential mass errors.
protein_seqProtein sequence corresponding to the provided alignment.

Definition at line 1016 of file semiglobalalignment.cpp.

1019{
1020 // qDebug() << protein_seq;
1021 if(alignment.end > (std::size_t)protein_seq.size())
1022 {
1023 throw pappso::ExceptionOutOfRange(QString("alignment.end > protein_seq.size() %1 %2")
1024 .arg(alignment.end)
1025 .arg(protein_seq.size()));
1026 }
1027 std::vector<double> potential_mass_errors(alignment.shifts);
1028 double shift = alignment.end_shift;
1029 std::size_t index;
1030 if(alignment.beginning > 0)
1031 { // -1 on unsigned int makes it wrong
1032 index = alignment.beginning - 1;
1033 while(shift > 0 && index > 0)
1034 {
1035 potential_mass_errors.push_back(shift);
1036 // qDebug() << " shift=" << shift << " index=" << index
1037 // << " letter=" << protein_seq.at(index).toLatin1();
1038 shift -= aa_code.getMass(
1039 protein_seq.at(index).toLatin1()); // Aa(protein_seq.at(index).unicode()).getMass();
1040 index--;
1041 }
1042 }
1043
1044 // qDebug() << "second";
1045 shift = alignment.begin_shift;
1046 index = alignment.end + 1;
1047 while(shift > 0 && index < (std::size_t)protein_seq.size())
1048 {
1049 potential_mass_errors.push_back(shift);
1050 qDebug() << " shift=" << shift << " index=" << index
1051 << " letter=" << protein_seq.at(index).toLatin1();
1052 shift -= aa_code.getMass(
1053 protein_seq.at(index).toLatin1()); // Aa(protein_seq.at(index).unicode()).getMass();
1054 index++;
1055 }
1056 // qDebug();
1057 return potential_mass_errors;
1058}
double getMass(uint8_t aa_code) const
get the mass of the amino acid given its integer code the amino acid can bear some modification (if a...
Definition aacode.cpp:224

References pappso::specpeptidoms::Alignment::begin_shift, pappso::specpeptidoms::Alignment::beginning, pappso::specpeptidoms::Alignment::end, pappso::specpeptidoms::Alignment::end_shift, pappso::AaCode::getMass(), pappso::specpeptidoms::shift, and pappso::specpeptidoms::Alignment::shifts.

Referenced by pappso::cbor::psm::PsmSpecPeptidOmsScan::sequenceAlignment().

◆ getScenario()

pappso::specpeptidoms::Scenario pappso::specpeptidoms::SemiGlobalAlignment::getScenario ( ) const

Returns a copy of m_scenario.

Definition at line 814 of file semiglobalalignment.cpp.

815{
816 return m_scenario;
817}

References m_scenario.

◆ initFastAlign()

void pappso::specpeptidoms::SemiGlobalAlignment::initFastAlign ( const SpOMSSpectrum & spectrum)

function made for testing the fastAlign process, initiate the variables for alignment

Definition at line 121 of file semiglobalalignment.cpp.

122{
123 // m_scenario.clear();
124 // TODO don't forget to reset any important variable
125 // m_best_alignment.reset();
126 // m_best_corrected_alignment.reset();
127 // m_best_post_processed_alignment.reset();
128
129 KeyCell key_cell_init;
130 key_cell_init.n_row = 0;
131 key_cell_init.score = m_scorevalues.get(ScoreType::init);
132 key_cell_init.beginning = 0;
133 key_cell_init.tree_id = 0;
134
135 m_interest_cells.resize(spectrum.size());
136 std::fill(m_interest_cells.begin(), m_interest_cells.end(), key_cell_init);
137
138 m_interest_cells.at(0).score = 0;
139
140 // m_location_saver.resetLocationSaver();
141 m_updated_cells.clear();
142}
std::vector< std::pair< std::size_t, KeyCell > > m_updated_cells

References pappso::specpeptidoms::KeyCell::beginning, pappso::specpeptidoms::init, m_interest_cells, m_scorevalues, m_updated_cells, pappso::specpeptidoms::KeyCell::n_row, pappso::specpeptidoms::KeyCell::score, and pappso::specpeptidoms::KeyCell::tree_id.

Referenced by fastAlign().

◆ initpreciseAlign()

void pappso::specpeptidoms::SemiGlobalAlignment::initpreciseAlign ( const SpOMSSpectrum & spectrum,
std::size_t length2 )

function made for testing the preciseAlign process, initiate the variables for alignment

Definition at line 145 of file semiglobalalignment.cpp.

147{
148 m_scenario.reserve(length2 + 1, spectrum.size());
149 m_interest_cells.reserve(spectrum.size());
150 m_interest_cells.at(0).n_row = 0;
151 m_interest_cells.at(0).score = 0;
152 m_interest_cells.at(0).beginning = 0;
153 m_interest_cells.at(0).tree_id = 0;
154 for(std::size_t i = 1; i < m_interest_cells.size(); i++)
155 {
156 m_interest_cells.at(i).n_row = 0;
158 m_interest_cells.at(i).beginning = 0;
159 m_interest_cells.at(i).tree_id = 0;
160 }
161 for(std::size_t iter = m_interest_cells.size(); iter < spectrum.size(); iter++)
162 {
163 m_interest_cells.push_back({0, m_scorevalues.get(ScoreType::init), 0, 0});
164 }
165 m_scenario.resetScenario();
166}

References pappso::specpeptidoms::init, m_interest_cells, m_scenario, and m_scorevalues.

Referenced by preciseAlign().

◆ oneAlignStep()

const std::vector< pappso::specpeptidoms::KeyCell > & pappso::specpeptidoms::SemiGlobalAlignment::oneAlignStep ( const pappso::specpeptidoms::SpOMSProtein & sequence,
const std::size_t row_number,
const std::vector< AaPosition > & aa_positions,
const SpOMSSpectrum & spectrum,
const bool fast_align,
const pappso::specpeptidoms::SpOMSProtein * protein_ptr )

function made for testing the fastAlign process, process one line and return the alignment matrix

Definition at line 1090 of file semiglobalalignment.cpp.

1097{
1098 updateAlignmentMatrix(sequence, row_number, aa_positions, spectrum, fast_align, protein_ptr);
1099 return getInterestCells();
1100}
const std::vector< KeyCell > & getInterestCells() const
convenient function for degub purpose

References getInterestCells(), and updateAlignmentMatrix().

◆ perfectShiftPossible()

bool pappso::specpeptidoms::SemiGlobalAlignment::perfectShiftPossible ( const pappso::specpeptidoms::SpOMSProtein & sequence,
const SpOMSSpectrum & spectrum,
const std::size_t origin_row,
const std::size_t current_row,
const std::size_t l_peak,
const std::size_t r_peak ) const
private

indicates if a perfect shift is possible between the provided positions

Parameters
sequenceReversed sequence of the protein being aligned
spectrumSpectrum being aligned
origin_rowbeginning row of the aa gap to verify (== index of the first missing aa in sequence)
current_rowrow being processed (== index of the current AaPosition in sequence)
l_peakleft peak index of the mz gap to verify
r_peakright peak index of the mz gap to verify

Definition at line 701 of file semiglobalalignment.cpp.

708{
709 try
710 {
711 double missing_mass = 0;
712 auto it_end = sequence.begin() + current_row;
713 for(auto iter = sequence.begin() + origin_row; (iter != it_end) && (iter != sequence.end());
714 iter++)
715 {
716 missing_mass += iter->mass; // Aa(iter->unicode()).getMass();
717 }
718 if(missing_mass > 0)
719 {
720 pappso::MzRange mz_range(missing_mass, m_precision_ptr);
721 return mz_range.contains(spectrum.getMZShift(l_peak, r_peak));
722 }
723 else
724 {
725 return false;
726 }
727 }
728 catch(const std::exception &error)
729 {
730 throw pappso::PappsoException(
731 QObject::tr("perfectShiftPossible failed std exception:\n%1").arg(error.what()));
732 }
733 catch(const pappso::PappsoException &err)
734 {
735 throw pappso::PappsoException(
736 QObject::tr("perfectShiftPossible failed :\n%1").arg(err.qwhat()));
737 }
738}
virtual const QString & qwhat() const

References pappso::MzRange::contains(), pappso::specpeptidoms::SpOMSSpectrum::getMZShift(), m_precision_ptr, and pappso::PappsoException::qwhat().

Referenced by updateAlignmentMatrix().

◆ perfectShiftPossibleEnd()

std::size_t pappso::specpeptidoms::SemiGlobalAlignment::perfectShiftPossibleEnd ( const pappso::specpeptidoms::SpOMSProtein & sequence,
const SpOMSSpectrum & spectrum,
std::size_t end_row,
std::size_t end_peak ) const
private

indicates if a perfect shift is possible between the provided positions

Parameters
sequenceReversed sequence of the protein being aligned
spectrumSpectrum being aligned
end_rowIndex of the last aligned row.
end_peakIndex of the last aligned peak.

Definition at line 768 of file semiglobalalignment.cpp.

773{
774 try
775 {
776 std::size_t perfect_shift_end = end_row + 1;
777 double missing_mass = spectrum.getMissingMass(end_peak);
778 pappso::MzRange mz_range(missing_mass, m_precision_ptr);
779 double aa_mass = 0;
780 while(aa_mass < missing_mass && perfect_shift_end < (std::size_t)sequence.size() &&
781 !mz_range.contains(aa_mass))
782 {
783 aa_mass += sequence.at(perfect_shift_end - 1)
784 .mass; // Aa(sequence.at(perfect_shift_end - 1).unicode()).getMass();
785 perfect_shift_end++;
786 }
787 if(mz_range.contains(aa_mass))
788 {
789 return perfect_shift_end - 1;
790 }
791 else
792 {
793 return end_row;
794 }
795 }
796 catch(const pappso::PappsoException &err)
797 {
798 throw pappso::PappsoException(
799 QObject::tr("perfectShiftPossibleEnd failed :\n%1").arg(err.qwhat()));
800 }
801}

References pappso::MzRange::contains(), pappso::specpeptidoms::SpOMSSpectrum::getMissingMass(), m_precision_ptr, and pappso::PappsoException::qwhat().

Referenced by saveBestAlignment().

◆ perfectShiftPossibleFrom0()

std::size_t pappso::specpeptidoms::SemiGlobalAlignment::perfectShiftPossibleFrom0 ( const pappso::specpeptidoms::SpOMSProtein & sequence,
const SpOMSSpectrum & spectrum,
const std::size_t current_row,
const std::size_t r_peak ) const
private

indicates if a perfect shift is possible from the spectrum beginning to the provided peak. Returns the perfect shift origin if the shift is possible, otherwise returns the current row.

Parameters
sequenceReversed sequence of the protein being aligned
spectrumSpectrum being aligned
current_rowrow being processed (== index of the current AaPosition in sequence)
r_peakright peak index of the mz gap to verify

Definition at line 741 of file semiglobalalignment.cpp.

746{
747 std::size_t perfect_shift_origin = current_row;
748 double missing_mass = spectrum.getMZShift(0, r_peak);
749 pappso::MzRange mz_range(missing_mass, m_precision_ptr);
750 double aa_mass = 0;
751 while(aa_mass < missing_mass && perfect_shift_origin > 0 && !mz_range.contains(aa_mass))
752 {
753 aa_mass += sequence.at(perfect_shift_origin - 1)
754 .mass; // Aa(sequence.at(perfect_shift_origin - 1).unicode()).getMass();
755 perfect_shift_origin--;
756 }
757 if(mz_range.contains(aa_mass))
758 {
759 return perfect_shift_origin;
760 }
761 else
762 {
763 return current_row;
764 }
765}

References pappso::MzRange::contains(), pappso::specpeptidoms::SpOMSSpectrum::getMZShift(), and m_precision_ptr.

Referenced by updateAlignmentMatrix().

◆ postProcessingAlign()

void pappso::specpeptidoms::SemiGlobalAlignment::postProcessingAlign ( const SpOMSSpectrum & spectrum,
const SpOMSProtein * protein_ptr,
std::size_t beginning,
std::size_t length,
const std::vector< double > & shifts )

performs the post-processing : generates corrected spectra and align them

Parameters
spectrumSpectrum to align
protein_ptrProtein pointer on the sequence to align.
beginningIndex of the beginning of the subsequence to align.
lengthLength of the subsequence to align.
shiftsList of potential precursor mass errors to test.

Definition at line 363 of file semiglobalalignment.cpp.

368{
369 std::size_t current_SPC = m_best_alignment.SPC;
370 int current_best_score = m_best_alignment.score;
372 for(double precursor_mass_error : shifts)
373 {
374 SpOMSSpectrum corrected_spectrum(spectrum, precursor_mass_error);
375 preciseAlign(corrected_spectrum, protein_ptr, beginning, length);
377 {
379 }
380 }
381 if(m_best_post_processed_alignment.SPC > current_SPC &&
382 m_best_post_processed_alignment.score >= current_best_score)
383 {
385 }
386}
void preciseAlign(const SpOMSSpectrum &spectrum, const SpOMSProtein *protein_ptr, const std::size_t beginning, const std::size_t length)
performs the second alignment search between a protein subsequence and a spectrum.

References m_best_alignment, m_best_post_processed_alignment, and preciseAlign().

Referenced by pappso::cbor::psm::PsmSpecPeptidOmsScan::sequenceAlignment().

◆ preciseAlign()

void pappso::specpeptidoms::SemiGlobalAlignment::preciseAlign ( const SpOMSSpectrum & spectrum,
const SpOMSProtein * protein_ptr,
const std::size_t beginning,
const std::size_t length )

performs the second alignment search between a protein subsequence and a spectrum.

Parameters
spectrumSpectrum to align
protein_ptrProtein pointer on the sequence to align.
beginningIndex of the beginning of the subsequence to align.
lengthLength of the subsequence to align.

Definition at line 169 of file semiglobalalignment.cpp.

173{
174 try
175 {
176 qDebug();
177 const QString &protein_seq = protein_ptr->getSequence();
178 std::size_t length2;
179 if((qsizetype)(beginning + length) <= protein_seq.size())
180 {
181 length2 = length;
182 }
183 else
184 {
185 length2 = protein_seq.size() - beginning;
186 }
187
188 qDebug();
189 QString sequence_str = protein_seq.sliced(protein_seq.size() - beginning - length2, length2);
190
191 SpOMSProtein sequence("sub_sequence", sequence_str, m_aaCode);
192
193 // std::reverse(sequence.begin(), sequence.end());
194 std::vector<AaPosition> aa_positions;
195 CorrectionTree correction_tree;
196
197 initpreciseAlign(spectrum, length2);
198
199 for(std::size_t row_number = 1; row_number <= length2; row_number++)
200 {
201
202 qDebug() << "row_number - 1=" << row_number - 1 << " sequence.size()=" << sequence.size();
203 // aa = Aa(sequence[row_number - 1].unicode());
204 updateAlignmentMatrix(sequence,
205 row_number,
206 spectrum.getAaPositions(sequence[row_number - 1].code),
207 spectrum,
208 false,
209 protein_ptr);
210 }
211 qDebug();
212 saveBestAlignment(sequence, spectrum, protein_seq.size() - beginning);
213
214 qDebug() << m_scenario.getBestScore() << " " << MIN_ALIGNMENT_SCORE;
215 // Correction : if complementary peaks are used, corrected spectra without one of the two
216 // peaks are generated and aligned. The best alignment is kept.
217 if(m_scenario.getBestScore() >
218 MIN_ALIGNMENT_SCORE) // We only correct alignments with acceptable scores
219 {
220 // we only correct alignment if the sequence has a minimum amino acid diversity
221 if(checkSequenceDiversity(sequence.getSequence(), 5, 2))
222 {
223
224 qDebug();
226 for(std::size_t iter : m_best_alignment.peaks)
227 {
228 if(iter > spectrum.getComplementaryPeak(iter))
229 {
230 break;
231 }
232 else if(std::find(m_best_alignment.peaks.begin(),
233 m_best_alignment.peaks.end(),
234 spectrum.getComplementaryPeak(iter)) !=
235 m_best_alignment.peaks.end())
236 {
237 correction_tree.addPeaks(iter, spectrum.getComplementaryPeak(iter));
238 }
239 }
240 qDebug();
241 std::vector<std::vector<std::size_t>> corrections = correction_tree.getPeaks();
242 if(corrections.size() > 0)
243 {
244 m_best_alignment.score =
245 0; // Reset the best alignment score (we dont want to keep
246 // the original alignment if corrections are needed)
247 qDebug();
248 for(auto peaks_to_remove : corrections)
249 {
250 qDebug();
251 correctAlign(sequence,
252 protein_ptr,
253 spectrum,
254 peaks_to_remove,
255 protein_seq.size() - beginning);
256 qDebug();
257 }
258 qDebug();
260 }
261 }
262 }
263 else
264 {
265 // this sequence has too much redundancy
266 // we have to lower the score
267 m_best_alignment.score = 0;
268 }
269 qDebug();
270 }
271 catch(const pappso::PappsoException &err)
272 {
273 throw pappso::PappsoException(
274 QObject::tr("SemiGlobalAlignment::preciseAlign failed :\n%1").arg(err.qwhat()));
275 }
276}
void initpreciseAlign(const SpOMSSpectrum &spectrum, std::size_t length2)
function made for testing the preciseAlign process, initiate the variables for alignment
static bool checkSequenceDiversity(const QString &sequence, std::size_t window, std::size_t minimum_aa_diversity)
check that the sequence has a minimum of amino acid checkSequenceDiversity

References pappso::specpeptidoms::CorrectionTree::addPeaks(), checkSequenceDiversity(), correctAlign(), pappso::specpeptidoms::SpOMSSpectrum::getAaPositions(), pappso::specpeptidoms::SpOMSSpectrum::getComplementaryPeak(), pappso::specpeptidoms::CorrectionTree::getPeaks(), pappso::specpeptidoms::SpOMSProtein::getSequence(), initpreciseAlign(), m_aaCode, m_best_alignment, m_best_corrected_alignment, m_scenario, pappso::specpeptidoms::MIN_ALIGNMENT_SCORE(), pappso::PappsoException::qwhat(), saveBestAlignment(), and updateAlignmentMatrix().

Referenced by postProcessingAlign(), and pappso::cbor::psm::PsmSpecPeptidOmsScan::sequenceAlignment().

◆ saveBestAlignment()

void pappso::specpeptidoms::SemiGlobalAlignment::saveBestAlignment ( const SpOMSProtein & sequence,
const SpOMSSpectrum & spectrum,
std::size_t offset )
private

Stores the best alignment from m_scenario in m_best_alignment.

Parameters
sequencereversed sequence of the current alignment.
spectrumSpectrum currently being aligned.
offsetSize of the protein sequence minus beginning of the alignment. Used to compute the position of the alignment in the protein sequence.

Definition at line 820 of file semiglobalalignment.cpp.

824{
825 qDebug();
826 m_best_alignment.m_peptideModel.reset();
827 m_best_alignment.peaks.clear();
828 m_best_alignment.shifts.clear();
829 std::size_t previous_row; // FIXME : may be used uninitialised
830 std::size_t previous_column = 0;
831 std::size_t perfect_shift_end;
832 std::pair<std::vector<ScenarioCell>, int> best_alignment = m_scenario.getBestAlignment();
833 m_best_alignment.score = best_alignment.second;
834 std::vector<SpOMSAa> skipped_aa;
835 double skipped_mass;
836 // Retrieving beginning and end
837 if(best_alignment.first.front().previous_row > offset)
838 {
839 throw pappso::PappsoException(
840 QString("best_alignment.first.front().previous_row > offset %1 %2")
841 .arg(offset)
842 .arg(best_alignment.first.front().previous_row));
843 }
844 if(best_alignment.first.back().previous_row > offset)
845 {
846 throw pappso::PappsoException(
847 QString("best_alignment.first.back().previous_row > offset %1 %2")
848 .arg(offset)
849 .arg(best_alignment.first.back().previous_row));
850 }
851 m_best_alignment.beginning = offset - best_alignment.first.front().previous_row;
852 m_best_alignment.end = offset - best_alignment.first.back().previous_row - 1;
853
854 qDebug();
855 AminoAcidModel aa_model;
856 aa_model.m_massDifference = 0;
857 // Filling temp_interpretation and peaks vectors
858 for(auto cell : best_alignment.first)
859 {
860 switch(cell.alignment_type)
861 {
862 case AlignType::found:
863 aa_model.m_aminoAcid = sequence.at(previous_row - 1).aa;
864 aa_model.m_massDifference = 0;
865 aa_model.m_skipped = false;
866 m_best_alignment.m_peptideModel.push_back(aa_model);
867 if(previous_row > cell.previous_row + 1)
868 {
869 skipped_mass = sequence.at(previous_row - 1)
870 .mass; // Aa(sequence.at(previous_row - 1).unicode()).getMass();
871 skipped_aa =
872 sequence.sliced(cell.previous_row, previous_row - cell.previous_row - 1);
873 aa_model.m_massDifference = 0;
874 aa_model.m_skipped = true;
875 for(auto aa : skipped_aa)
876 {
877 aa_model.m_aminoAcid = aa.aa;
878 m_best_alignment.m_peptideModel.push_back(aa_model);
879 skipped_mass += aa.mass; // Aa(aa.unicode()).getMass();
880 }
881 m_best_alignment.m_peptideModel.back().m_massDifference =
882 spectrum.getMZShift(cell.previous_column, previous_column) - skipped_mass;
883 }
884 m_best_alignment.peaks.push_back(cell.previous_column);
885 break;
887 aa_model.m_aminoAcid = sequence.at(previous_row - 1).aa;
888 aa_model.m_massDifference = 0;
889 aa_model.m_skipped = true;
890 m_best_alignment.m_peptideModel.push_back(aa_model);
891 break;
892 case AlignType::shift:
893
894 aa_model.m_aminoAcid = sequence.at(previous_row - 1).aa;
895 aa_model.m_massDifference = spectrum.getMZShift(cell.previous_column, previous_column) -
896 aa_model.m_aminoAcid.getMass();
897 aa_model.m_skipped = false;
898 m_best_alignment.m_peptideModel.push_back(aa_model);
899 m_best_alignment.peaks.push_back(cell.previous_column);
900 m_best_alignment.shifts.push_back(
901 spectrum.getMZShift(cell.previous_column, previous_column) -
902 sequence.at(previous_row - 1).mass);
903 break;
905 m_best_alignment.peaks.push_back(cell.previous_column);
906 skipped_aa = sequence.sliced(cell.previous_row, previous_row - cell.previous_row);
907 std::reverse(skipped_aa.begin(), skipped_aa.end());
908 aa_model.m_massDifference = 0;
909 aa_model.m_skipped = false;
910 for(auto aa : skipped_aa)
911 {
912 aa_model.m_aminoAcid = aa.aa;
913 m_best_alignment.m_peptideModel.push_back(aa_model);
914 }
915 break;
916 case AlignType::init:
917 previous_row = cell.previous_row;
918 previous_column = cell.previous_column;
919 m_best_alignment.peaks.push_back(cell.previous_column);
920 break;
921 }
922 previous_row = cell.previous_row;
923 previous_column = cell.previous_column;
924 }
925 std::reverse(m_best_alignment.peaks.begin(), m_best_alignment.peaks.end());
926 std::reverse(m_best_alignment.m_peptideModel.begin(), m_best_alignment.m_peptideModel.end());
927
928 qDebug();
929 // Compute begin_shift and end_shift
930 MzRange zero(0, m_precision_ptr);
931 m_best_alignment.begin_shift = spectrum.getMZShift(0, m_best_alignment.peaks.front());
932 m_best_alignment.end_shift = spectrum.getMissingMass(m_best_alignment.peaks.back());
933 if(zero.contains(m_best_alignment.end_shift))
934 {
935 m_best_alignment.end_shift = 0;
936 }
937
938 qDebug();
939 // Computing SPC
940 m_best_alignment.SPC = 0;
941 for(auto peak : m_best_alignment.peaks)
942 {
943 switch(spectrum.at(peak).type)
944 {
946 qDebug() << peak << "native";
947 m_best_alignment.SPC += 1;
948 break;
950 qDebug() << peak << "both";
951 m_best_alignment.SPC += 2;
952 break;
954 qDebug() << peak << "synthetic";
955 break;
957 qDebug() << peak << "symmetric";
958 m_best_alignment.SPC += 1;
959 break;
960 }
961 }
962
963 qDebug();
964 // Final check of the end shift
965 if(m_best_alignment.end_shift > 0)
966 {
967 perfect_shift_end = perfectShiftPossibleEnd(sequence,
968 spectrum,
969 best_alignment.first.front().previous_row,
970 m_best_alignment.peaks.back());
971 if(perfect_shift_end != best_alignment.first.front().previous_row)
972 {
973 skipped_aa =
974 sequence.sliced(best_alignment.first.front().previous_row,
975 perfect_shift_end - best_alignment.first.front().previous_row);
976 aa_model.m_massDifference = 0;
977 aa_model.m_skipped = true;
978 for(auto aa = skipped_aa.begin(); aa != skipped_aa.end(); aa++)
979 {
980 aa_model.m_aminoAcid = aa->aa;
981 m_best_alignment.m_peptideModel.push_back(aa_model);
982 }
983 m_best_alignment.beginning = offset - perfect_shift_end;
984 m_best_alignment.end_shift = 0;
985 }
986 else
987 {
989 }
990 }
991
992 qDebug();
993 // Writing final interpretation
994 if(m_best_alignment.end_shift > 0)
995 {
996 m_best_alignment.m_peptideModel.setNterShift(m_best_alignment.end_shift);
997 }
998
999 std::reverse(m_best_alignment.m_peptideModel.begin(), m_best_alignment.m_peptideModel.end());
1000 if(m_best_alignment.begin_shift > 0)
1001 {
1002 m_best_alignment.m_peptideModel.setCterShift(m_best_alignment.begin_shift);
1003 }
1004
1005 m_best_alignment.m_peptideModel.setPrecursorMass(spectrum.getPrecursorMass());
1006 qDebug();
1007}
std::size_t perfectShiftPossibleEnd(const pappso::specpeptidoms::SpOMSProtein &sequence, const SpOMSSpectrum &spectrum, std::size_t end_row, std::size_t end_peak) const
indicates if a perfect shift is possible between the provided positions
@ aa
best possible : more than one direct MS2 fragmentation in same MSRUN
Definition types.h:45
@ synthetic
does not correspond to existing peak, for computational purpose
Definition types.h:85
@ both
both, the ion and the complement exists in the original spectrum
Definition types.h:83
@ symmetric
new peak : computed symmetric mass from a corresponding native peak
Definition types.h:81

References pappso::specglob::both, pappso::MzRange::contains(), pappso::specpeptidoms::found, pappso::specpeptidoms::foundShift, pappso::Aa::getMass(), pappso::specpeptidoms::SpOMSSpectrum::getMissingMass(), pappso::specpeptidoms::SpOMSSpectrum::getMZShift(), pappso::specpeptidoms::SpOMSSpectrum::getPrecursorMass(), pappso::specpeptidoms::init, pappso::specpeptidoms::AminoAcidModel::m_aminoAcid, m_best_alignment, pappso::specpeptidoms::AminoAcidModel::m_massDifference, m_precision_ptr, m_scenario, m_scorevalues, pappso::specpeptidoms::AminoAcidModel::m_skipped, pappso::specglob::native, pappso::specpeptidoms::notFound, pappso::specpeptidoms::perfectShift, perfectShiftPossibleEnd(), pappso::specpeptidoms::shift, pappso::specpeptidoms::SpOMSProtein::sliced(), pappso::specglob::symmetric, and pappso::specglob::synthetic.

Referenced by correctAlign(), and preciseAlign().

◆ updateAlignmentMatrix()

void pappso::specpeptidoms::SemiGlobalAlignment::updateAlignmentMatrix ( const pappso::specpeptidoms::SpOMSProtein & sequence,
const std::size_t row_number,
const std::vector< AaPosition > & aa_positions,
const SpOMSSpectrum & spectrum,
const bool fast_align,
const pappso::specpeptidoms::SpOMSProtein * protein_ptr )
private

updates the scores of the alignment matrix for a given amino acid as well as the location heap/scenario.

Parameters
sequenceReversed sequence of the protein being aligned /!\ In case of a subsequence alignment, the provided protein must only contain the sequence to align.
row_numbernumber of the row to update (== index in sequence of the amino acid being aligned)
aa_positionslist of the AaPositions of the current amino acid
spectrumSpectrum being aligned
fast_alignWhether to use the fast version of the algorithm (for 1st alignemnt step)
protein_ptrProtein pointer on the sequence to align. /!\ In case of a subsequence alignment, the provided protein must be the protein of origin (i.e. complete sequence).

Definition at line 389 of file semiglobalalignment.cpp.

396{
397 int where = 0;
398 try
399 {
400 int score_found, score_shift, best_score, alt_score, tree_id;
401 uint32_t condition; // FIXME : may be used uninitialised
402 std::size_t best_column, shift, beginning, missing_aas, length, perfect_shift_origin;
403 KeyCell *current_cell_ptr, *tested_cell_ptr;
404 AlignType alignment_type, temp_align_type;
405
406 double smallest_aa_mass = m_aaCode.getMass((std::uint8_t)1);
407
408 m_updated_cells.reserve(aa_positions.size());
409 where = 1;
410 // Computation of the threePeaks condition, see spomsspectrum.h for more details.
411 if(fast_align)
412 {
413 condition = 3;
414 if(row_number > 1)
415 {
416 qDebug() << (char)sequence.at(row_number - 2).aa;
417 qDebug() << "condition" << condition << "aa" << (char)sequence.at(row_number - 2).aa
418 << sequence.at(row_number - 2).code;
419 condition += 2 << sequence.at(row_number - 2).code;
420 qDebug();
421 qDebug() << "condition" << condition;
422 }
423 }
424 where = 2;
425 for(std::vector<AaPosition>::const_iterator aa_position = aa_positions.begin();
426 aa_position != aa_positions.end();
427 aa_position++)
428 {
429 qDebug() << "l_peak" << aa_position->l_peak << "r_peak" << aa_position->r_peak << "l_mass"
430 << aa_position->l_mass << "l_support" << aa_position->l_support << "condition"
431 << aa_position->condition;
432
433 where = 3;
434 if(((condition & aa_position->condition) != 0) ||
435 !fast_align) // Verification of the threePeaks condition (only during first alignment).
436 {
437 if(fast_align)
438 {
439 qDebug() << "threePeaks condition verified";
440 }
441
442 current_cell_ptr = &m_interest_cells.at(aa_position->r_peak);
443 if(spectrum.peakType(aa_position->r_peak) ==
444 specglob::ExperimentalSpectrumDataPointType::both) // The score used are different
445 // depending on the right peak
446 // type (simple or double).
447 {
448 qDebug() << "double peak";
449 score_found = m_scorevalues.get(ScoreType::foundDouble);
451 }
452 else
453 {
454 qDebug() << "single peak";
455 score_found = m_scorevalues.get(ScoreType::found);
456 score_shift = m_scorevalues.get(ScoreType::foundShift);
457 }
458
459 // not found case (always computed)
460 best_column = aa_position->r_peak;
461 best_score = current_cell_ptr->score + (row_number - current_cell_ptr->n_row) *
463 beginning = current_cell_ptr->beginning;
464 tree_id = current_cell_ptr->tree_id;
465 alignment_type = AlignType::notFound;
466 qDebug() << "not found" << best_score;
467
468 // found case (Can only happen if the left peak is supported)
469 if(aa_position->l_support)
470 {
471 tested_cell_ptr = &m_interest_cells.at(aa_position->l_peak);
472 if(aa_position->l_peak == 0)
473 {
474 alt_score = tested_cell_ptr->score + score_found;
475 }
476 else
477 {
478 if(tested_cell_ptr->n_row == row_number - 1)
479 {
480 alt_score = tested_cell_ptr->score +
481 (row_number - tested_cell_ptr->n_row - 1) *
483 score_found;
484 }
485 else
486 {
487 alt_score = tested_cell_ptr->score +
488 (row_number - tested_cell_ptr->n_row - 1) *
490 score_shift;
491 }
492 }
493 if(alt_score >= best_score) // If the found case improves the score, the new value
494 // of the key cell is computed.
495 {
496 alignment_type = AlignType::found;
497 best_score = alt_score;
498 best_column = aa_position->l_peak;
499 if(best_column == 0)
500 {
501 if(row_number < ALIGNMENT_SURPLUS)
502 {
503 beginning = 0;
504 }
505 else
506 {
507 beginning = std::max((std::size_t)(row_number - ALIGNMENT_SURPLUS),
508 (std::size_t)0);
509 }
510 if(fast_align)
511 {
512 tree_id = m_location_saver.getNextTree();
513 }
514 }
515 else
516 {
517 beginning = tested_cell_ptr->beginning;
518 tree_id = tested_cell_ptr->tree_id;
519 }
520 qDebug() << "found" << best_score << "from" << best_column << beginning
521 << tree_id;
522 }
523 }
524
525 where = 4;
526 // generic shift case (all shifts are tested)
527 if(aa_position->l_support)
528 {
529 shift = 1;
530 }
531 else
532 {
533 shift = 0;
534 }
535 while(shift < aa_position->l_peak)
536 {
537 tested_cell_ptr = &m_interest_cells.at(aa_position->l_peak - shift);
538 // verification saut parfait
539 if(perfectShiftPossible(sequence,
540 spectrum,
541 tested_cell_ptr->n_row,
542 row_number,
543 aa_position->l_peak - shift,
544 aa_position->r_peak) &&
546 {
547 alt_score = tested_cell_ptr->score +
548 (row_number - tested_cell_ptr->n_row - 1) *
550 score_found;
551 temp_align_type = AlignType::perfectShift;
552 }
553 else
554 {
555 alt_score = tested_cell_ptr->score +
556 (row_number - tested_cell_ptr->n_row - 1) *
558 score_shift;
559 temp_align_type = AlignType::shift;
560 }
561 if(alt_score > best_score)
562 {
563 alignment_type = temp_align_type;
564 best_score = alt_score;
565 best_column = aa_position->l_peak - shift;
566 beginning = tested_cell_ptr->beginning;
567 tree_id = tested_cell_ptr->tree_id;
568 qDebug() << "shift" << best_score << "from" << best_column;
569 }
570 shift++;
571 }
572
573 where = 5;
574 // case shift from column 0 (no penalties if all precedent amino acids are missed)
575 tested_cell_ptr = &m_interest_cells.at(0);
576 // verification saut parfait
577 if(aa_position->r_peak <= TOL_PEAKS_MISSING_FIRST_COLUMN)
578 {
579 perfect_shift_origin =
580 perfectShiftPossibleFrom0(sequence, spectrum, row_number, aa_position->r_peak);
581 }
582 else
583 {
584 perfect_shift_origin = row_number;
585 }
586
587 if(perfect_shift_origin != row_number)
588 {
589 alt_score = tested_cell_ptr->score + score_found;
590 temp_align_type = AlignType::perfectShift;
591 }
592 else
593 {
594 alt_score = tested_cell_ptr->score + score_shift;
595 temp_align_type = AlignType::shift;
596 }
597
598 where = 6;
599 if(alt_score > best_score)
600 {
601 qDebug() << "shift" << alt_score << "from 0";
602 alignment_type = temp_align_type;
603 best_score = alt_score;
604 best_column = 0;
605 missing_aas =
606 std::floor((aa_position->l_mass -
608 smallest_aa_mass);
609 qDebug() << "missing aas" << missing_aas;
610 if(row_number < ALIGNMENT_SURPLUS + missing_aas)
611 {
612 beginning = 0;
613 }
614 else
615 {
616 beginning =
617 std::max((std::size_t)(row_number - missing_aas - ALIGNMENT_SURPLUS),
618 (std::size_t)0);
619 }
620 where = 7;
621 if(fast_align)
622 {
623 tree_id = m_location_saver.getNextTree();
624 }
625 }
626
627 where = 8;
628 if(best_column != aa_position->r_peak)
629 {
630 m_updated_cells.push_back(
631 {aa_position->r_peak, {row_number, best_score, beginning, tree_id}});
632 }
633
634 where = 9;
635 if(best_score > m_location_saver.getMinScore(tree_id) && fast_align)
636 {
637 length =
638 row_number - beginning + 1 +
639 std::ceil(spectrum.getMissingMass(aa_position->r_peak) / smallest_aa_mass) +
641 where = 10;
642 m_location_saver.addLocation(beginning, length, tree_id, best_score, protein_ptr);
643 }
644 else if(!fast_align)
645 {
646
647 where = 11;
648 if(alignment_type == AlignType::perfectShift && best_column == 0)
649 {
650 m_scenario.saveOrigin(row_number,
651 aa_position->r_peak,
652 perfect_shift_origin,
653 0,
654 best_score,
656 }
657 else
658 {
659 m_scenario.saveOrigin(row_number,
660 aa_position->r_peak,
661 m_interest_cells.at(best_column).n_row,
662 best_column,
663 best_score,
664 alignment_type);
665 }
666 }
667 }
668 }
669
670 where = 30;
671 // Update row number in column 0
672 m_updated_cells.push_back({0, {row_number, 0, 0, 0}});
673
674 // Save updated key cells in the matrix
675 while(m_updated_cells.size() > 0)
676 {
677 qDebug() << m_interest_cells.size() << " " << m_updated_cells.back().first
678 << m_updated_cells.back().second.n_row << m_updated_cells.back().second.score
679 << m_updated_cells.back().second.beginning
680 << m_updated_cells.back().second.tree_id;
681 m_interest_cells.at(m_updated_cells.back().first) = m_updated_cells.back().second;
682 m_updated_cells.pop_back();
683 }
684 where++;
685 }
686 catch(const std::exception &error)
687 {
688 throw pappso::PappsoException(
689 QObject::tr("updateAlignmentMatrix failed std::exception :\n%1 %2")
690 .arg(where)
691 .arg(error.what()));
692 }
693 catch(const pappso::PappsoException &err)
694 {
695 throw pappso::PappsoException(
696 QObject::tr("updateAlignmentMatrix failed :\n%1").arg(err.qwhat()));
697 }
698}
bool perfectShiftPossible(const pappso::specpeptidoms::SpOMSProtein &sequence, const SpOMSSpectrum &spectrum, const std::size_t origin_row, const std::size_t current_row, const std::size_t l_peak, const std::size_t r_peak) const
indicates if a perfect shift is possible between the provided positions
std::size_t perfectShiftPossibleFrom0(const pappso::specpeptidoms::SpOMSProtein &sequence, const SpOMSSpectrum &spectrum, const std::size_t current_row, const std::size_t r_peak) const
indicates if a perfect shift is possible from the spectrum beginning to the provided peak....
const uint ALIGNMENT_SURPLUS(5)
const uint TOL_PEAKS_MISSING(4)
const uint TOL_PEAKS_MISSING_FIRST_COLUMN(5)
const pappso_double MHPLUS(1.007276466879)
const pappso_double MPROTIUM(1.007825032241)
const pappso_double MASSOXYGEN(15.99491461956)

References pappso::specpeptidoms::ALIGNMENT_SURPLUS(), pappso::specpeptidoms::KeyCell::beginning, pappso::specglob::both, pappso::specpeptidoms::found, pappso::specpeptidoms::foundDouble, pappso::specpeptidoms::foundShift, pappso::specpeptidoms::foundShiftDouble, pappso::specpeptidoms::SpOMSSpectrum::getMissingMass(), m_aaCode, m_interest_cells, m_location_saver, m_scenario, m_scorevalues, m_updated_cells, pappso::MASSOXYGEN(), pappso::MHPLUS(), pappso::MPROTIUM(), pappso::specpeptidoms::KeyCell::n_row, pappso::specpeptidoms::notFound, pappso::specpeptidoms::SpOMSSpectrum::peakType(), pappso::specpeptidoms::perfectShift, perfectShiftPossible(), perfectShiftPossibleFrom0(), pappso::PappsoException::qwhat(), pappso::specpeptidoms::KeyCell::score, pappso::specpeptidoms::shift, pappso::specpeptidoms::TOL_PEAKS_MISSING(), pappso::specpeptidoms::TOL_PEAKS_MISSING_FIRST_COLUMN(), and pappso::specpeptidoms::KeyCell::tree_id.

Referenced by correctAlign(), fastAlign(), oneAlignStep(), and preciseAlign().

Member Data Documentation

◆ m_aaCode

const AaCode& pappso::specpeptidoms::SemiGlobalAlignment::m_aaCode
private

Definition at line 300 of file semiglobalalignment.h.

Referenced by SemiGlobalAlignment(), preciseAlign(), and updateAlignmentMatrix().

◆ m_best_alignment

Alignment pappso::specpeptidoms::SemiGlobalAlignment::m_best_alignment
private

◆ m_best_corrected_alignment

Alignment pappso::specpeptidoms::SemiGlobalAlignment::m_best_corrected_alignment
private

Definition at line 304 of file semiglobalalignment.h.

Referenced by correctAlign(), and preciseAlign().

◆ m_best_post_processed_alignment

Alignment pappso::specpeptidoms::SemiGlobalAlignment::m_best_post_processed_alignment
private

Definition at line 305 of file semiglobalalignment.h.

Referenced by postProcessingAlign().

◆ m_interest_cells

std::vector<KeyCell> pappso::specpeptidoms::SemiGlobalAlignment::m_interest_cells
private

◆ m_location_saver

LocationSaver pappso::specpeptidoms::SemiGlobalAlignment::m_location_saver
private

Definition at line 301 of file semiglobalalignment.h.

Referenced by getLocationSaver(), and updateAlignmentMatrix().

◆ m_precision_ptr

pappso::PrecisionPtr pappso::specpeptidoms::SemiGlobalAlignment::m_precision_ptr
private

◆ m_scenario

Scenario pappso::specpeptidoms::SemiGlobalAlignment::m_scenario
private

◆ m_scorevalues

const ScoreValues& pappso::specpeptidoms::SemiGlobalAlignment::m_scorevalues
private

◆ m_updated_cells

std::vector<std::pair<std::size_t, KeyCell> > pappso::specpeptidoms::SemiGlobalAlignment::m_updated_cells
private

Definition at line 296 of file semiglobalalignment.h.

Referenced by initFastAlign(), and updateAlignmentMatrix().

◆ min_score

const int pappso::specpeptidoms::SemiGlobalAlignment::min_score = 15
private

Definition at line 298 of file semiglobalalignment.h.


The documentation for this class was generated from the following files: