libpappsomspp
Library for mass spectrometry
Loading...
Searching...
No Matches
spectree.cpp
Go to the documentation of this file.
1/**
2 * \file pappsomspp/processing/spectree/spectree.cpp
3 * \date 11/12/2023
4 * \author Olivier Langella
5 * \brief Matthieu David's SpecTree structure
6 *
7 * C++ implementation of algorithm already described in :
8 * 1. David, M., Fertin, G., Rogniaux, H. & Tessier, D. SpecOMS: A Full Open
9 * Modification Search Method Performing All-to-All Spectra Comparisons within
10 * Minutes. J. Proteome Res. 16, 3030–3038 (2017).
11 *
12 * https://www.theses.fr/2019NANT4092
13 */
14
15
16/*
17 * SpecTree
18 * Copyright (C) 2023 Olivier Langella
19 * <olivier.langella@universite-paris-saclay.fr>
20 *
21 * This program is free software: you can redistribute ipetide to spectrum
22 * alignmentt and/or modify it under the terms of the GNU General Public License
23 * as published by the Free Software Foundation, either version 3 of the
24 * License, or (at your option) any later version.
25 *
26 * This program is distributed in the hope that it will be useful,
27 * but WITHOUT ANY WARRANTY; without even the implied warranty of
28 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
29 * GNU General Public License for more details.
30 *
31 * You should have received a copy of the GNU General Public License
32 * along with this program. If not, see <http://www.gnu.org/licenses/>.
33 *
34 */
35
36#include "spectree.h"
37#include <QDebug>
38#include <QObject>
41
42
43namespace pappso
44{
45namespace spectree
46{
47
48SpecTree::SpecTree(const BucketClustering &bucket_clustering)
49{
50
51
52 std::vector<Bucket> bins = bucket_clustering.asSortedList();
53 SpecTreeNode node;
54 node.value = bins[0].getCartList().at(0);
55 node.count = 1;
56 qDebug() << node.value;
57 // Specific processing for the first bucket : the first node and all its
58 // children are added in SpecTrees and the transversal accessor is updated
59 // accordingly
60
61
62 std::vector<std::size_t> spectrumLastNodeIndex;
63 spectrumLastNodeIndex.resize(bucket_clustering.getItemCartCount(), index_not_defined);
65
66 m_spectrumFirstNodeIndex[node.value] = m_nodeList.size() - 1;
67
68
69 m_nodeList.push_back(node);
70 manageSideAccess(spectrumLastNodeIndex);
71 for(std::size_t cpt = 1; cpt < bins[0].size(); ++cpt)
72 {
73 node.parentIndex = m_nodeList.size() - 1;
75 node.value = bins[0].getCartList().at(cpt);
76 node.count = 1;
77 m_nodeList.push_back(node);
78 manageSideAccess(spectrumLastNodeIndex);
79 }
80
81
82 qDebug();
83 // For the rest of the buckets, we increment the node counter in prefix
84 // common parts without creating new nodes. Once out of the prefix common
85 // part, new nodes are created similarily as they were for the first bucket.
86 for(std::size_t bins_index = 1; bins_index < bins.size(); ++bins_index)
87 {
88 qDebug() << "bins_index=" << bins_index;
89 std::size_t pos = 0;
90 bool common = false;
91 std::size_t parentNode = index_not_defined;
92
93 std::size_t previous_bin_size = bins[bins_index - 1].size();
94
95 // detection of a common first element
96 if(pos < previous_bin_size)
97 {
98 if(bins[bins_index].getCartList().at(pos) == bins[bins_index - 1].getCartList().at(pos))
99 {
100 common = true;
101 }
102 }
103 // insertion procedure for a common prefix part insertion : we go
104 // through existing nodes and increment their counter value by one
105 while(common)
106 {
107 qDebug() << "common pos=" << pos;
108 std::size_t spectrum_index = bins[bins_index].getCartList().at(pos);
109 qDebug() << "common spectrum_index=" << spectrum_index;
110 std::size_t tempNodeIndex = spectrumLastNodeIndex[spectrum_index];
111 qDebug() << "common tempNodeIndex=" << tempNodeIndex;
112 m_nodeList[tempNodeIndex].count++;
113 // checking on the prefix common part persistence conditions ; if
114 // not valid, we are not in a common prefix part anymore for the
115 // currently inserted bucket
116 if((previous_bin_size - 1 == pos) || (bins[bins_index].size() - 1 == pos))
117 {
118 qDebug() << "last one";
119 common = false;
120 }
121 else
122 {
123 qDebug() << "bins[bins_index - 1].getSpectrumIndex(pos + 1)="
124 << bins[bins_index - 1].getCartList().at(pos + 1);
125 qDebug() << "bins[bins_index].getSpectrumIndex(pos + 1)="
126 << bins[bins_index].getCartList().at(pos + 1);
127 if(bins[bins_index - 1].getCartList().at(pos + 1) !=
128 bins[bins_index].getCartList().at(pos + 1))
129 {
130 common = false;
131 }
132 }
133 parentNode = tempNodeIndex;
134 ++pos;
135
136 qDebug() << "common=" << common;
137 }
138 qDebug();
139 // insertion procedure for all the remaining nodes not in the prefix
140 // common part : we create new nodes with a counter value of 1 and
141 // update the transversal accessor and last inserted node list
142 while(pos < bins[bins_index].size())
143 {
144 std::size_t spectrum_index = bins[bins_index].getCartList().at(pos);
145
146 node.parentIndex = parentNode;
148 node.value = spectrum_index;
149 node.count = 1;
150 m_nodeList.push_back(node);
151 manageSideAccess(spectrumLastNodeIndex);
152 parentNode = m_nodeList.size() - 1;
153
154 ++pos;
155 }
156 }
157}
158
162
163void
165{
166 m_nodeList.push_back(node);
167}
168
169void
170SpecTree::manageSideAccess(std::vector<std::size_t> &spectrumLastNodeIndex)
171{
172 auto spectrum_index = m_nodeList.back().value;
173 auto node_position = m_nodeList.size() - 1;
174 if(m_spectrumFirstNodeIndex[spectrum_index] == index_not_defined)
175 {
176 // first occurence of this spectrum
177 m_spectrumFirstNodeIndex[spectrum_index] = node_position;
178 spectrumLastNodeIndex[spectrum_index] = node_position;
179 }
180 else
181 {
182 if(spectrumLastNodeIndex[spectrum_index] == node_position)
183 {
184 }
185 else
186 {
187 m_nodeList[spectrumLastNodeIndex[spectrum_index]].nextIndex = node_position;
188 spectrumLastNodeIndex[spectrum_index] = node_position;
189 }
190 }
191}
192
193QString
195{
196 QString node_str("node|spectrum|parent|next|count|\n");
197 std::size_t i = 0;
198 for(auto node : m_nodeList)
199 {
200 int parent = -1;
201 if(node.parentIndex < index_not_defined)
202 parent = node.parentIndex;
203 int next = -1;
204 if(node.nextIndex < index_not_defined)
205 next = node.nextIndex;
206 node_str.append(QString("node_%1|%2|%3|%4|%5end\n")
207 .arg(i)
208 .arg(node.value)
209 .arg(parent)
210 .arg(next)
211 .arg(node.count));
212
213 i++;
214 }
215 return node_str;
216}
217
218const std::vector<std::size_t> &
223
224void
226 SpecXtractInterface &spec_xtract,
227 std::size_t minimum_count,
228 std::size_t cart_id_range_max,
229 std::size_t cart_id_range_min,
230 std::size_t target_cart_id_max,
231 std::size_t target_cart_id_min) const
232{
233
234 if(cart_id_range_max < cart_id_range_min)
235 {
236 std::swap(cart_id_range_max, cart_id_range_min);
237 }
238
239 if(cart_id_range_max < m_spectrumFirstNodeIndex.size())
240 {
241
242 monitor.setStatus(QObject::tr("starting SpecXtract operation"));
243
244 MapSimilarityCount map_count;
245 map_count.map_id_count.resize(cart_id_range_max);
246
247 // initialisation of the structure to store the compared pairs
248 // int[][] results = new int[m_count][3];
249 // int count = 0;
250
251 // iteration on the transversal accessor starting from the deepest spetrum
252 // and climbing up to limit
253 std::size_t spectra1 = cart_id_range_max;
254 monitor.setTotalSteps(cart_id_range_max - cart_id_range_min);
255 while(true)
256 {
257
258 if(monitor.shouldIstop())
259 {
260 throw pappso::ExceptionInterrupted(QObject::tr("SpecXtract job stopped"));
261 }
262 qDebug() << "spectra1=" << spectra1;
263 spec_xtract.beginItemCartExtraction(spectra1);
264 monitor.count();
265 // System.out.println(start);
266
267 map_count.keys.clear();
268 map_count.aboveThreshold.clear();
269
271 map_count, minimum_count, spectra1, target_cart_id_max, target_cart_id_min);
272
273 qDebug() << "spectra1=" << spectra1
274 << " map_count.aboveThreshold.size()=" << map_count.aboveThreshold.size()
275 << " map_count.keys.size()=" << map_count.keys.size();
276
277 for(std::size_t spectra2 : map_count.aboveThreshold)
278 {
279
280 // std::size_t spectra2 = *it;
281 qDebug() << "spectra2=" << spectra2;
282
283 // std::size_t total_extracted_count =
284 // map_count.map_id_count.at(spectra2);
285 spec_xtract.reportSimilarity(
286 spectra1, spectra2, map_count.map_id_count.at(spectra2).count);
287 }
288
289 spec_xtract.endItemCartExtraction(spectra1);
290 // monitor.count();
291 if(spectra1 == cart_id_range_min)
292 break;
293 spectra1--;
294 }
295 }
296 else
297 {
299 QObject::tr("item cart index %1 out of range (item cart id max size=%2)")
300 .arg(cart_id_range_max)
301 .arg(m_spectrumFirstNodeIndex.size()));
302 }
303 qDebug();
304}
305
306void
308 MapSimilarityCount &map_count,
309 std::size_t minimum_count,
310 std::size_t target_cart_id_max,
311 std::size_t target_cart_id_min) const
312{
313
314 std::size_t parent_id = start_node.parentIndex;
315
316 qDebug() << " start node spectrum=" << start_node.value << " parent_id=" << parent_id
317 << " init_count=" << start_node.count << " minimum_count=" << minimum_count
318 << " target_cart_id_min=" << target_cart_id_min;
319
320 while(parent_id != index_not_defined)
321 {
322 // next_node :
323 const SpecTree::SpecTreeNode &current_node = m_nodeList[parent_id];
324
325 if(current_node.value < target_cart_id_min)
326 break;
327 if(current_node.value <= target_cart_id_max)
328 {
329 auto &map_id_count = map_count.map_id_count[current_node.value];
330 if(map_id_count.lastWitness == start_node.value)
331 {
332 map_id_count.count += start_node.count;
333 // if at one time, a similarity exceeds the threshold value for
334 // the first time, it is written into the proper stack
335 if((map_id_count.count >= minimum_count) && (map_id_count.aboveThreshold == false))
336 {
337 map_id_count.aboveThreshold = true;
338 map_count.aboveThreshold.push_back(current_node.value);
339 }
340 }
341 else
342 {
343 map_id_count.lastWitness = start_node.value;
344 map_id_count.count = start_node.count;
345 map_id_count.aboveThreshold = false;
346 // if at one time, a similarity exceeds the threshold value for
347 // the first time, it is written into the proper stack
348 if(map_id_count.count >= minimum_count)
349 {
350 map_id_count.aboveThreshold = true;
351 map_count.aboveThreshold.push_back(current_node.value);
352 }
353 map_count.keys.push_back(current_node.value);
354 }
355 }
356 parent_id = current_node.parentIndex;
357 }
358}
359
360std::size_t
362 std::size_t spectrum_index_target) const
363{
364 std::size_t parent_id = start_node.parentIndex;
365
366 while(parent_id != index_not_defined)
367 {
368 // next_node :
369 const SpecTree::SpecTreeNode &current_node = m_nodeList[parent_id];
370
371 if(current_node.value < spectrum_index_target)
372 return 0;
373 if(current_node.value == spectrum_index_target)
374 return start_node.count;
375
376 parent_id = current_node.parentIndex;
377 }
378 return 0;
379}
380
381
382/**
383 * Extraction of the similarities above a given threshold between a given
384 * spectrum and all other spectra higher located in SpecTrees.
385 * @param tree_indices SpecTrees the data has to be extracted from
386 * @param spectrum_index The spectrum with ated in SpecTrees.
387 * @param tree_indices SpecTrees the data has to be extracted from
388 * @param spectrum_index The spectrum with which we want to extract similarities
389 * @param minimum_count The threshold to use for the extraction
390 * @since 0.1
391 */
392void
394 std::size_t minimum_count,
395 std::size_t spectrum_index,
396 std::size_t target_cart_id_max,
397 std::size_t target_cart_id_min) const
398{
399
400 qDebug() << " spectrum_index=" << spectrum_index;
401 // sOne;
402 // we position the current node at the first occurence of the spectrum in
403 // the transversal accessor and initialise variablethresholds accordingly
404 std::size_t node_index = m_spectrumFirstNodeIndex[spectrum_index];
405 while(node_index != index_not_defined)
406 {
407 qDebug() << "node_index=" << node_index;
408 const SpecTreeNode &current_node = m_nodeList[node_index];
409 // go back in the tree branch from the current_node
411 current_node, map_count, minimum_count, target_cart_id_max, target_cart_id_min);
412 node_index = current_node.nextIndex;
413 }
414 qDebug();
415}
416
417std::size_t
419 std::size_t spectrum_b_index) const
420{
421 if(spectrum_a_index > spectrum_b_index)
422 std::swap(spectrum_a_index, spectrum_b_index);
423
424 std::size_t count_target = 0;
425 if(spectrum_b_index < m_spectrumFirstNodeIndex.size())
426 {
427 std::size_t node_index = m_spectrumFirstNodeIndex[spectrum_b_index];
428 while(node_index != index_not_defined)
429 {
430 qDebug() << "node_index=" << node_index;
431 const SpecTreeNode &current_node = m_nodeList[node_index];
432 // go back in the tree branch from the current_node
433 count_target += walkBackInBranchFromNodeToTarget(current_node, spectrum_a_index);
434 node_index = current_node.nextIndex;
435 }
436 }
437 else
438 {
440 QObject::tr("spectrum_index %1 out of range (spectrum_index max size=%2)")
441 .arg(spectrum_b_index)
442 .arg(m_spectrumFirstNodeIndex.size()));
443 }
444 return count_target;
445}
446} // namespace spectree
447} // namespace pappso
virtual void setStatus(const QString &status)=0
current status of the process
virtual void setTotalSteps(std::size_t total_number_of_steps)
use it if the number of steps is known in an algorithm the total number of steps is usefull to report...
virtual bool shouldIstop()=0
should the procces be stopped ? If true, then cancel process Use this function at strategic point of ...
virtual void count()=0
count steps report when a step is computed in an algorithm
std::vector< Bucket > asSortedList() const
std::vector< SpecTreeNode > m_nodeList
Definition spectree.h:169
std::size_t walkBackInBranchFromNodeToTarget(const SpecTree::SpecTreeNode &start_node, std::size_t spectrum_index_target) const
Definition spectree.cpp:361
const std::vector< std::size_t > & getSpectrumFirstNodeIndex() const
get the adress map of sepctrum index and their first node index
Definition spectree.cpp:219
static constexpr std::size_t index_not_defined
Definition spectree.h:117
void walkBackInBranchFromNode(const SpecTree::SpecTreeNode &start_node, MapSimilarityCount &map_count, std::size_t minimum_count, std::size_t target_cart_id_max, std::size_t target_cart_id_min) const
Definition spectree.cpp:307
std::vector< std::size_t > m_spectrumFirstNodeIndex
Definition spectree.h:170
QString toString() const
Definition spectree.cpp:194
std::size_t extractSpectrumPairSimilarityCount(std::size_t spectrum_a_index, std::size_t spectrum_b_index) const
get the number of common component for a pair of spectrum
Definition spectree.cpp:418
void manageSideAccess(std::vector< std::size_t > &spectrumLastNodeIndex)
Definition spectree.cpp:170
void addNewNode(const SpecTreeNode &node)
Definition spectree.cpp:164
void extractSpectrumSimilarityCount(MapSimilarityCount &map_count, std::size_t minimum_count, std::size_t spectrum_index, std::size_t target_cart_id_max, std::size_t target_cart_id_min) const
get a map of similarities for a given spectrum index
Definition spectree.cpp:393
SpecTree(const BucketClustering &bucket_clustering)
Definition spectree.cpp:48
void xtract(UiMonitorInterface &monitor, SpecXtractInterface &spec_xtract, std::size_t minimum_count, std::size_t cart_id_range_max, std::size_t cart_id_range_min, std::size_t target_cart_id_max, std::size_t target_cart_id_min) const
Definition spectree.cpp:225
yield similarities between pairs of ItemCart
virtual void beginItemCartExtraction(std::size_t cart_id_a)
virtual void reportSimilarity(std::size_t cart_id_a, std::size_t cart_id_b, std::size_t similarity)=0
virtual void endItemCartExtraction(std::size_t cart_id_a)
tries to keep as much as possible monoisotopes, removing any possible C13 peaks and changes multichar...
Definition aa.cpp:39
std::vector< MapSimilarityCountElement > map_id_count
Definition spectree.h:136
std::vector< std::size_t > aboveThreshold
Definition spectree.h:135