IsoSpec 2.2.1
Loading...
Searching...
No Matches
isoSpec++.cpp
1/*
2 * Copyright (C) 2015-2020 Mateusz Łącki and Michał Startek.
3 *
4 * This file is part of IsoSpec.
5 *
6 * IsoSpec is free software: you can redistribute it and/or modify
7 * it under the terms of the Simplified ("2-clause") BSD licence.
8 *
9 * IsoSpec is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
12 *
13 * You should have received a copy of the Simplified BSD Licence
14 * along with IsoSpec. If not, see <https://opensource.org/licenses/BSD-2-Clause>.
15 */
16
17
18#include "isoSpec++.h"
19#include <cmath>
20#include <algorithm>
21#include <vector>
22#include <cstdlib>
23#include <cstring>
24#include <unordered_map>
25#include <queue>
26#include <utility>
27#include <iostream>
28#include <iomanip>
29#include <stdexcept>
30#include <string>
31#include <limits>
32#include <memory>
33#include <cassert>
34#include <cctype>
35#include "platform.h"
36#include "conf.h"
37#include "dirtyAllocator.h"
38#include "operators.h"
39#include "summator.h"
40#include "marginalTrek++.h"
41#include "misc.h"
42#include "element_tables.h"
43#include "fasta.h"
44
45
46
47namespace IsoSpec
48{
49
50Iso::Iso() :
51disowned(false),
52dimNumber(0),
53isotopeNumbers(new int[0]),
54atomCounts(new int[0]),
55confSize(0),
56allDim(0),
57marginals(new Marginal*[0])
58{}
59
60
61Iso::Iso(
62 int _dimNumber,
63 const int* _isotopeNumbers,
64 const int* _atomCounts,
65 const double* const * _isotopeMasses,
66 const double* const * _isotopeProbabilities
67) :
68disowned(false),
69dimNumber(_dimNumber),
70isotopeNumbers(array_copy<int>(_isotopeNumbers, _dimNumber)),
71atomCounts(array_copy<int>(_atomCounts, _dimNumber)),
72confSize(_dimNumber * sizeof(int)),
73allDim(0),
74marginals(nullptr)
75{
76 for(int ii = 0; ii < dimNumber; ++ii)
77 allDim += isotopeNumbers[ii];
78
79 std::unique_ptr<double[]> masses(new double[allDim]);
80 std::unique_ptr<double[]> probs(new double[allDim]);
81 size_t idx = 0;
82
83 for(int ii = 0; ii < dimNumber; ++ii)
84 for(int jj = 0; jj < isotopeNumbers[ii]; ++jj)
85 {
86 masses[idx] = _isotopeMasses[ii][jj];
87 probs[idx] = _isotopeProbabilities[ii][jj];
88 ++idx;
89 }
90
91 allDim = 0; // setupMarginals will recalculate it, assuming it's set to 0
92
93 try{
94 setupMarginals(masses.get(), probs.get());
95 }
96 catch(...)
97 {
98 delete[] isotopeNumbers;
99 delete[] atomCounts;
100 // Since we're throwing in a constructor, the destructor won't run, and we don't need to NULL these.
101 // However, this is not the fast code path and we can afford two unneeded instructions to keep
102 // some static analysis tools happy.
103 isotopeNumbers = nullptr;
104 atomCounts = nullptr;
105 throw;
106 }
107}
108
109Iso::Iso(
110 int _dimNumber,
111 const int* _isotopeNumbers,
112 const int* _atomCounts,
113 const double* _isotopeMasses,
114 const double* _isotopeProbabilities
115) :
116disowned(false),
117dimNumber(_dimNumber),
118isotopeNumbers(array_copy<int>(_isotopeNumbers, _dimNumber)),
119atomCounts(array_copy<int>(_atomCounts, _dimNumber)),
120confSize(_dimNumber * sizeof(int)),
121allDim(0),
122marginals(nullptr)
123{
124 try{
125 setupMarginals(_isotopeMasses, _isotopeProbabilities);
126 }
127 catch(...)
128 {
129 delete[] isotopeNumbers;
130 delete[] atomCounts;
131 // Since we're throwing in a constructor, the destructor won't run, and we don't need to NULL these.
132 // However, this is not the fast code path and we can afford two unneeded instructions to keep
133 // some static analysis tools happy.
134 isotopeNumbers = nullptr;
135 atomCounts = nullptr;
136 throw;
137 }
138}
139
140Iso::Iso(Iso&& other) :
141disowned(other.disowned),
142dimNumber(other.dimNumber),
143isotopeNumbers(other.isotopeNumbers),
144atomCounts(other.atomCounts),
145confSize(other.confSize),
146allDim(other.allDim),
147marginals(other.marginals)
148{
149 other.disowned = true;
150}
151
152
153Iso::Iso(const Iso& other, bool fullcopy) :
154disowned(!fullcopy),
155dimNumber(other.dimNumber),
156isotopeNumbers(fullcopy ? array_copy<int>(other.isotopeNumbers, dimNumber) : other.isotopeNumbers),
157atomCounts(fullcopy ? array_copy<int>(other.atomCounts, dimNumber) : other.atomCounts),
158confSize(other.confSize),
159allDim(other.allDim),
160marginals(fullcopy ? new Marginal*[dimNumber] : other.marginals)
161{
162 if(fullcopy)
163 {
164 for(int ii = 0; ii < dimNumber; ii++)
165 marginals[ii] = new Marginal(*other.marginals[ii]);
166 }
167}
168
169Iso Iso::FromFASTA(const char* fasta, bool use_nominal_masses, bool add_water)
170{
171 int atomCounts[6];
172
173 parse_fasta(fasta, atomCounts);
174
175 if(add_water)
176 {
177 atomCounts[1] += 2;
178 atomCounts[3] += 1;
179 }
180
181 const int dimNr = atomCounts[5] > 0 ? 6 : 5;
182
183 return Iso(dimNr, aa_isotope_numbers, atomCounts, use_nominal_masses ? aa_elem_nominal_masses : aa_elem_masses, aa_elem_probabilities);
184}
185
186inline void Iso::setupMarginals(const double* _isotopeMasses, const double* _isotopeProbabilities)
187{
188 if (marginals == nullptr)
189 {
190 int ii = 0;
192 try
193 {
194 while(ii < dimNumber)
195 {
196 marginals[ii] = new Marginal(
197 &_isotopeMasses[allDim],
198 &_isotopeProbabilities[allDim],
199 isotopeNumbers[ii],
200 atomCounts[ii]
201 );
202 allDim += isotopeNumbers[ii];
203 ii++;
204 }
205 }
206 catch(...)
207 {
208 ii--;
209 while(ii >= 0)
210 {
211 delete marginals[ii];
212 ii--;
213 }
214 delete[] marginals;
215 marginals = nullptr;
216 throw;
217 }
218 }
219}
220
222{
223 if(!disowned)
224 {
225 if (marginals != nullptr)
226 dealloc_table(marginals, dimNumber);
227 delete[] isotopeNumbers;
228 delete[] atomCounts;
229 }
230}
231
232bool Iso::doMarginalsNeedSorting() const
233{
234 int nontrivial_marginals = 0;
235 for(int ii = 0; ii < dimNumber; ii++)
236 {
237 if(marginals[ii]->get_isotopeNo() > 1)
238 nontrivial_marginals++;
239 if(nontrivial_marginals > 1)
240 return true;
241 }
242 return false;
243}
244
246{
247 double mass = 0.0;
248 for (int ii = 0; ii < dimNumber; ii++)
249 mass += marginals[ii]->getLightestConfMass();
250 return mass;
251}
252
254{
255 double mass = 0.0;
256 for (int ii = 0; ii < dimNumber; ii++)
257 mass += marginals[ii]->getHeaviestConfMass();
258 return mass;
259}
260
262{
263 double mass = 0.0;
264 for (int ii = 0; ii < dimNumber; ii++)
265 mass += marginals[ii]->getMonoisotopicConfMass();
266 return mass;
267}
268
270{
271 double lprob = 0.0;
272 for (int ii = 0; ii < dimNumber; ii++)
273 lprob += marginals[ii]->getSmallestLProb();
274 return lprob;
275}
276
277double Iso::getModeMass() const
278{
279 double mass = 0.0;
280 for (int ii = 0; ii < dimNumber; ii++)
281 mass += marginals[ii]->getModeMass();
282 return mass;
283}
284
285double Iso::getModeLProb() const
286{
287 double ret = 0.0;
288 for (int ii = 0; ii < dimNumber; ii++)
289 ret += marginals[ii]->getModeLProb();
290 return ret;
291}
292
294{
295 double mass = 0.0;
296 for (int ii = 0; ii < dimNumber; ii++)
298 return mass;
299}
300
301double Iso::variance() const
302{
303 double ret = 0.0;
304 for(int ii = 0; ii < dimNumber; ii++)
305 ret += marginals[ii]->variance();
306 return ret;
307}
308
309
310Iso::Iso(const char* formula, bool use_nominal_masses) :
311disowned(false),
312allDim(0),
313marginals(nullptr)
314{
315 std::vector<double> isotope_masses;
316 std::vector<double> isotope_probabilities;
317
318 dimNumber = parse_formula(formula, isotope_masses, isotope_probabilities, &isotopeNumbers, &atomCounts, &confSize, use_nominal_masses);
319
320 setupMarginals(isotope_masses.data(), isotope_probabilities.data());
321}
322
323
324void Iso::addElement(int atomCount, int noIsotopes, const double* isotopeMasses, const double* isotopeProbabilities)
325{
326 Marginal* m = new Marginal(isotopeMasses, isotopeProbabilities, noIsotopes, atomCount);
327 realloc_append<int>(&isotopeNumbers, noIsotopes, dimNumber);
328 realloc_append<int>(&atomCounts, atomCount, dimNumber);
329 realloc_append<Marginal*>(&marginals, m, dimNumber);
330 dimNumber++;
331 confSize += sizeof(int);
332 allDim += noIsotopes;
333}
334
335void Iso::saveMarginalLogSizeEstimates(double* priorities, double target_total_prob) const
336{
337 /*
338 * We shall now use Gaussian approximations of the marginal multinomial distributions to estimate
339 * how many configurations we shall need to visit from each marginal. This should be approximately
340 * proportional to the volume of the optimal P-ellipsoid of the marginal, which, in turn is defined
341 * by the quantile function of the chi-square distribution plus some modifications.
342 *
343 * We're dropping the constant factor and the (monotonic) exp() transform - these will be used as keys
344 * for sorting, so only the ordering is important.
345 */
346
347 double K = allDim - dimNumber;
348
349 double log_R2 = log(InverseChiSquareCDF2(K, target_total_prob));
350
351 for(int ii = 0; ii < dimNumber; ii++)
352 priorities[ii] = marginals[ii]->getLogSizeEstimate(log_R2);
353}
354
355unsigned int parse_formula(const char* formula, std::vector<double>& isotope_masses, std::vector<double>& isotope_probabilities, int** isotopeNumbers, int** atomCounts, unsigned int* confSize, bool use_nominal_masses)
356{
357 // This function is NOT guaranteed to be secure against malicious input. It should be used only for debugging.
358 size_t slen = strlen(formula);
359 // Yes, it would be more elegant to use std::string here, but it's the only promiment place where it would be used in IsoSpec, and avoiding it here
360 // means we can run the whole thing through Clang's memory sanitizer without the need for instrumented libc++/libstdc++. That's worth messing with char pointers a
361 // little bit.
362 std::vector<std::pair<const char*, size_t> > elements;
363 std::vector<int> numbers;
364
365 if(slen == 0)
366 throw std::invalid_argument("Invalid formula: can't be empty");
367
368 if(!isdigit(formula[slen-1]))
369 throw std::invalid_argument("Invalid formula: every element must be followed by a number - write H2O1 and not H2O for water");
370
371 for(size_t ii = 0; ii < slen; ii++)
372 if(!isdigit(formula[ii]) && !isalpha(formula[ii]))
373 throw std::invalid_argument("Invalid formula: contains invalid (non-digit, non-alpha) character");
374
375 size_t position = 0;
376
377 while(position < slen)
378 {
379 size_t elem_end = position;
380 while(isalpha(formula[elem_end]))
381 elem_end++;
382 size_t digit_end = elem_end;
383 while(isdigit(formula[digit_end]))
384 digit_end++;
385 elements.emplace_back(&formula[position], elem_end-position);
386 numbers.push_back(std::stoi(&formula[elem_end]));
387 position = digit_end;
388 }
389
390 std::vector<int> element_indexes;
391
392 for (unsigned int i = 0; i < elements.size(); i++)
393 {
394 int idx = -1;
395 for(int j = 0; j < ISOSPEC_NUMBER_OF_ISOTOPIC_ENTRIES; j++)
396 {
397 if ((strlen(elem_table_symbol[j]) == elements[i].second) && (strncmp(elements[i].first, elem_table_symbol[j], elements[i].second) == 0))
398 {
399 idx = j;
400 break;
401 }
402 }
403 if(idx < 0)
404 throw std::invalid_argument("Invalid formula");
405 element_indexes.push_back(idx);
406 }
407
408 std::vector<int> _isotope_numbers;
409 const double* masses = use_nominal_masses ? elem_table_massNo : elem_table_mass;
410
411 for(std::vector<int>::iterator it = element_indexes.begin(); it != element_indexes.end(); ++it)
412 {
413 int num = 0;
414 int at_idx = *it;
415 int elem_ID = elem_table_ID[at_idx];
416 while(at_idx < ISOSPEC_NUMBER_OF_ISOTOPIC_ENTRIES && elem_table_ID[at_idx] == elem_ID)
417 {
418 isotope_masses.push_back(masses[at_idx]);
419 isotope_probabilities.push_back(elem_table_probability[at_idx]);
420 at_idx++;
421 num++;
422 }
423 _isotope_numbers.push_back(num);
424 }
425
426 const unsigned int dimNumber = elements.size();
427
428 *isotopeNumbers = array_copy<int>(_isotope_numbers.data(), dimNumber);
429 *atomCounts = array_copy<int>(numbers.data(), dimNumber);
430 *confSize = dimNumber * sizeof(int);
431
432 return dimNumber;
433}
434
435
436/*
437 * ----------------------------------------------------------------------------------------------------------
438 */
439
440
441
442IsoGenerator::IsoGenerator(Iso&& iso, bool alloc_partials) :
443 Iso(std::move(iso)),
444 mode_lprob(getModeLProb()),
445 partialLProbs(alloc_partials ? new double[dimNumber+1] : nullptr),
446 partialMasses(alloc_partials ? new double[dimNumber+1] : nullptr),
447 partialProbs(alloc_partials ? new double[dimNumber+1] : nullptr)
448{
449 for(int ii = 0; ii < dimNumber; ++ii)
450 marginals[ii]->ensureModeConf();
451 if(alloc_partials)
452 {
455 partialProbs[dimNumber] = 1.0;
456 }
457}
458
459
461{
462 if(partialLProbs != nullptr)
463 delete[] partialLProbs;
464 if(partialMasses != nullptr)
465 delete[] partialMasses;
466 if(partialProbs != nullptr)
467 delete[] partialProbs;
468}
469
470
471
472/*
473 * ----------------------------------------------------------------------------------------------------------
474 */
475
476static const double minsqrt = -1.3407796239501852e+154; // == constexpr(-sqrt(std::numeric_limits<double>::max()));
477
478IsoThresholdGenerator::IsoThresholdGenerator(Iso&& iso, double _threshold, bool _absolute, int tabSize, int hashSize, bool reorder_marginals)
479: IsoGenerator(std::move(iso)),
480Lcutoff(_threshold <= 0.0 ? minsqrt : (_absolute ? log(_threshold) : log(_threshold) + mode_lprob))
481{
482 counter = new int[dimNumber];
483 maxConfsLPSum = new double[dimNumber-1];
484 marginalResultsUnsorted = new PrecalculatedMarginal*[dimNumber];
485
486 empty = false;
487
488 const bool marginalsNeedSorting = doMarginalsNeedSorting();
489
490 for(int ii = 0; ii < dimNumber; ii++)
491 {
492 counter[ii] = 0;
493 marginalResultsUnsorted[ii] = new PrecalculatedMarginal(std::move(*(marginals[ii])),
494 Lcutoff - mode_lprob + marginals[ii]->fastGetModeLProb(),
495 marginalsNeedSorting,
496 tabSize,
497 hashSize);
498
499 if(!marginalResultsUnsorted[ii]->inRange(0))
500 empty = true;
501 }
502
503 if(reorder_marginals && dimNumber > 1)
504 {
505 OrderMarginalsBySizeDecresing<PrecalculatedMarginal> comparator(marginalResultsUnsorted);
506 int* tmpMarginalOrder = new int[dimNumber];
507
508 for(int ii = 0; ii < dimNumber; ii++)
509 tmpMarginalOrder[ii] = ii;
510
511 std::sort(tmpMarginalOrder, tmpMarginalOrder + dimNumber, comparator);
512 marginalResults = new PrecalculatedMarginal*[dimNumber];
513
514 for(int ii = 0; ii < dimNumber; ii++)
515 marginalResults[ii] = marginalResultsUnsorted[tmpMarginalOrder[ii]];
516
517 marginalOrder = new int[dimNumber];
518 for(int ii = 0; ii < dimNumber; ii++)
519 marginalOrder[tmpMarginalOrder[ii]] = ii;
520
521 delete[] tmpMarginalOrder;
522 }
523 else
524 {
525 marginalResults = marginalResultsUnsorted;
526 marginalOrder = nullptr;
527 }
528
529 lProbs_ptr_start = marginalResults[0]->get_lProbs_ptr();
530
531 if(dimNumber > 1)
532 maxConfsLPSum[0] = marginalResults[0]->fastGetModeLProb();
533
534 for(int ii = 1; ii < dimNumber-1; ii++)
535 maxConfsLPSum[ii] = maxConfsLPSum[ii-1] + marginalResults[ii]->fastGetModeLProb();
536
537 lProbs_ptr = lProbs_ptr_start;
538
539 partialLProbs_second = partialLProbs;
540 partialLProbs_second++;
541
542 if(!empty)
543 {
544 recalc(dimNumber-1);
545 counter[0]--;
546 lProbs_ptr--;
547 }
548 else
549 {
551 lcfmsv = std::numeric_limits<double>::infinity();
552 }
553}
554
556{
557 for(int ii = 0; ii < dimNumber; ii++)
558 {
559 counter[ii] = marginalResults[ii]->get_no_confs()-1;
560 partialLProbs[ii] = -std::numeric_limits<double>::infinity();
561 }
562 partialLProbs[dimNumber] = -std::numeric_limits<double>::infinity();
563 lProbs_ptr = lProbs_ptr_start + marginalResults[0]->get_no_confs()-1;
564}
565
567{
568 if(empty)
569 return 0;
570
571 if(dimNumber == 1)
572 return marginalResults[0]->get_no_confs();
573
574 const double* lProbs_ptr_l = marginalResults[0]->get_lProbs_ptr() + marginalResults[0]->get_no_confs();
575
576 std::unique_ptr<const double* []> lProbs_restarts(new const double*[dimNumber]);
577
578 for(int ii = 0; ii < dimNumber; ii++)
579 lProbs_restarts[ii] = lProbs_ptr_l;
580
581 size_t count = 0;
582
583 while(*lProbs_ptr_l < lcfmsv)
584 lProbs_ptr_l--;
585
586 while(true)
587 {
588 count += lProbs_ptr_l - lProbs_ptr_start + 1;
589
590 int idx = 0;
591 int * cntr_ptr = counter;
592
593 while(idx < dimNumber - 1)
594 {
595 *cntr_ptr = 0;
596 idx++;
597 cntr_ptr++;
598 (*cntr_ptr)++;
599
600 partialLProbs[idx] = partialLProbs[idx+1] + marginalResults[idx]->get_lProb(counter[idx]);
601 if(partialLProbs[idx] + maxConfsLPSum[idx-1] >= Lcutoff)
602 {
603 short_recalc(idx-1);
604 lProbs_ptr_l = lProbs_restarts[idx];
605 while(*lProbs_ptr_l < lcfmsv)
606 lProbs_ptr_l--;
607 for(idx--; idx > 0; idx--)
608 lProbs_restarts[idx] = lProbs_ptr_l;
609 break;
610 }
611 }
612 if(idx == dimNumber - 1)
613 {
614 reset();
615 return count;
616 }
617 }
618}
619
621{
622 if(empty)
623 {
625 return;
626 }
627
629
630 memset(counter, 0, sizeof(int)*dimNumber);
631 recalc(dimNumber-1);
632 counter[0]--;
633
634 lProbs_ptr = lProbs_ptr_start - 1;
635}
636
637IsoThresholdGenerator::~IsoThresholdGenerator()
638{
639 delete[] counter;
640 delete[] maxConfsLPSum;
641 if (marginalResultsUnsorted != marginalResults)
642 delete[] marginalResultsUnsorted;
643 dealloc_table(marginalResults, dimNumber);
644 if(marginalOrder != nullptr)
645 delete[] marginalOrder;
646}
647
648
649/*
650 * ------------------------------------------------------------------------------------------------------------------------
651 */
652
653
654IsoLayeredGenerator::IsoLayeredGenerator(Iso&& iso, int tabSize, int hashSize, bool reorder_marginals, double t_prob_hint)
655: IsoGenerator(std::move(iso))
656{
657 counter = new int[dimNumber];
658 maxConfsLPSum = new double[dimNumber-1];
659 currentLThreshold = nextafter(mode_lprob, -std::numeric_limits<double>::infinity());
660 lastLThreshold = (std::numeric_limits<double>::min)();
661 marginalResultsUnsorted = new LayeredMarginal*[dimNumber];
662 resetPositions = new const double*[dimNumber];
663 marginalsNeedSorting = doMarginalsNeedSorting();
664
665 memset(counter, 0, sizeof(int)*dimNumber);
666
667 for(int ii = 0; ii < dimNumber; ii++)
668 marginalResultsUnsorted[ii] = new LayeredMarginal(std::move(*(marginals[ii])), tabSize, hashSize);
669
670 if(reorder_marginals && dimNumber > 1)
671 {
672 double* marginal_priorities = new double[dimNumber];
673
674 saveMarginalLogSizeEstimates(marginal_priorities, t_prob_hint);
675
676 int* tmpMarginalOrder = new int[dimNumber];
677
678 for(int ii = 0; ii < dimNumber; ii++)
679 tmpMarginalOrder[ii] = ii;
680
681 TableOrder<double> TO(marginal_priorities);
682
683 std::sort(tmpMarginalOrder, tmpMarginalOrder + dimNumber, TO);
684 marginalResults = new LayeredMarginal*[dimNumber];
685
686 for(int ii = 0; ii < dimNumber; ii++)
687 marginalResults[ii] = marginalResultsUnsorted[tmpMarginalOrder[ii]];
688
689 marginalOrder = new int[dimNumber];
690 for(int ii = 0; ii < dimNumber; ii++)
691 marginalOrder[tmpMarginalOrder[ii]] = ii;
692
693 delete[] tmpMarginalOrder;
694 delete[] marginal_priorities;
695 }
696 else
697 {
698 marginalResults = marginalResultsUnsorted;
699 marginalOrder = nullptr;
700 }
701
702 lProbs_ptr_start = marginalResults[0]->get_lProbs_ptr();
703
704 if(dimNumber > 1)
705 maxConfsLPSum[0] = marginalResults[0]->fastGetModeLProb();
706
707 for(int ii = 1; ii < dimNumber-1; ii++)
708 maxConfsLPSum[ii] = maxConfsLPSum[ii-1] + marginalResults[ii]->fastGetModeLProb();
709
710 lProbs_ptr = lProbs_ptr_start;
711
712 partialLProbs_second = partialLProbs;
713 partialLProbs_second++;
714
715 counter[0]--;
716 lProbs_ptr--;
717 lastLThreshold = 10.0;
718 IsoLayeredGenerator::nextLayer(-0.00001);
719}
720
721bool IsoLayeredGenerator::nextLayer(double offset)
722{
723 size_t first_mrg_size = marginalResults[0]->get_no_confs();
724
725 if(lastLThreshold < getUnlikeliestPeakLProb())
726 return false;
727
728 lastLThreshold = currentLThreshold;
729 currentLThreshold += offset;
730
731 for(int ii = 0; ii < dimNumber; ii++)
732 {
733 marginalResults[ii]->extend(currentLThreshold - mode_lprob + marginalResults[ii]->fastGetModeLProb(), marginalsNeedSorting);
734 counter[ii] = 0;
735 }
736
737 lProbs_ptr_start = marginalResults[0]->get_lProbs_ptr(); // vector relocation might have happened
738
739 lProbs_ptr = lProbs_ptr_start + first_mrg_size - 1;
740
741 for(int ii = 0; ii < dimNumber; ii++)
742 resetPositions[ii] = lProbs_ptr;
743
744 recalc(dimNumber-1);
745
746 return true;
747}
748
749bool IsoLayeredGenerator::carry()
750{
751 // If we reached this point, a carry is needed
752
753 int idx = 0;
754
755 int * cntr_ptr = counter;
756
757 while(idx < dimNumber-1)
758 {
759 *cntr_ptr = 0;
760 idx++;
761 cntr_ptr++;
762 (*cntr_ptr)++;
763 partialLProbs[idx] = partialLProbs[idx+1] + marginalResults[idx]->get_lProb(counter[idx]);
764 if(partialLProbs[idx] + maxConfsLPSum[idx-1] >= currentLThreshold)
765 {
766 partialMasses[idx] = partialMasses[idx+1] + marginalResults[idx]->get_mass(counter[idx]);
767 partialProbs[idx] = partialProbs[idx+1] * marginalResults[idx]->get_prob(counter[idx]);
768 recalc(idx-1);
769 lProbs_ptr = resetPositions[idx];
770
771 while(*lProbs_ptr <= last_lcfmsv)
772 lProbs_ptr--;
773
774 for(int ii = 0; ii < idx; ii++)
775 resetPositions[ii] = lProbs_ptr;
776
777 return true;
778 }
779 }
780
781 return false;
782}
783
784
786{
787 for(int ii = 0; ii < dimNumber; ii++)
788 {
789 counter[ii] = marginalResults[ii]->get_no_confs()-1;
790 partialLProbs[ii] = -std::numeric_limits<double>::infinity();
791 }
792 partialLProbs[dimNumber] = -std::numeric_limits<double>::infinity();
793 lProbs_ptr = lProbs_ptr_start + marginalResults[0]->get_no_confs()-1;
794}
795
796IsoLayeredGenerator::~IsoLayeredGenerator()
797{
798 delete[] counter;
799 delete[] maxConfsLPSum;
800 delete[] resetPositions;
801 if (marginalResultsUnsorted != marginalResults)
802 delete[] marginalResultsUnsorted;
803 dealloc_table(marginalResults, dimNumber);
804 if(marginalOrder != nullptr)
805 delete[] marginalOrder;
806}
807
808
809/*
810 * ------------------------------------------------------------------------------------------------------------------------
811 */
812
813
814IsoOrderedGenerator::IsoOrderedGenerator(Iso&& iso, int _tabSize, int _hashSize) :
815IsoGenerator(std::move(iso), false), allocator(dimNumber, _tabSize)
816{
817 partialLProbs = &currentLProb;
818 partialMasses = &currentMass;
819 partialProbs = &currentProb;
820
821 marginalResults = new MarginalTrek*[dimNumber];
822
823 for(int i = 0; i < dimNumber; i++)
824 marginalResults[i] = new MarginalTrek(std::move(*(marginals[i])), _tabSize, _hashSize);
825
826 logProbs = new const pod_vector<double>*[dimNumber];
827 masses = new const pod_vector<double>*[dimNumber];
828 marginalConfs = new const pod_vector<int*>*[dimNumber];
829
830 for(int i = 0; i < dimNumber; i++)
831 {
832 masses[i] = &marginalResults[i]->conf_masses();
833 logProbs[i] = &marginalResults[i]->conf_lprobs();
834 marginalConfs[i] = &marginalResults[i]->confs();
835 }
836
837 topConf = allocator.newConf();
838 memset(
839 reinterpret_cast<char*>(topConf) + sizeof(double),
840 0,
841 sizeof(int)*dimNumber
842 );
843
844 *(reinterpret_cast<double*>(topConf)) =
845 combinedSum(
846 getConf(topConf),
847 logProbs,
849 );
850
851 pq.push(topConf);
852}
853
854
856{
857 dealloc_table<MarginalTrek*>(marginalResults, dimNumber);
858 delete[] logProbs;
859 delete[] masses;
860 delete[] marginalConfs;
861 partialLProbs = nullptr;
862 partialMasses = nullptr;
863 partialProbs = nullptr;
864}
865
866
868{
869 if(pq.size() < 1)
870 return false;
871
872
873 topConf = pq.top();
874 pq.pop();
875
876 int* topConfIsoCounts = getConf(topConf);
877
878 currentLProb = *(reinterpret_cast<double*>(topConf));
879 currentMass = combinedSum( topConfIsoCounts, masses, dimNumber );
880 currentProb = exp(currentLProb);
881
882 ccount = -1;
883 for(int j = 0; j < dimNumber; ++j)
884 {
885 if(marginalResults[j]->probeConfigurationIdx(topConfIsoCounts[j] + 1))
886 {
887 if(ccount == -1)
888 {
889 topConfIsoCounts[j]++;
890 *(reinterpret_cast<double*>(topConf)) = combinedSum(topConfIsoCounts, logProbs, dimNumber);
891 pq.push(topConf);
892 topConfIsoCounts[j]--;
893 ccount = j;
894 }
895 else
896 {
897 void* acceptedCandidate = allocator.newConf();
898 int* acceptedCandidateIsoCounts = getConf(acceptedCandidate);
899 memcpy(acceptedCandidateIsoCounts, topConfIsoCounts, confSize);
900
901 acceptedCandidateIsoCounts[j]++;
902
903 *(reinterpret_cast<double*>(acceptedCandidate)) = combinedSum(acceptedCandidateIsoCounts, logProbs, dimNumber);
904
905 pq.push(acceptedCandidate);
906 }
907 }
908 if(topConfIsoCounts[j] > 0)
909 break;
910 }
911 if(ccount >=0)
912 topConfIsoCounts[ccount]++;
913
914 return true;
915}
916
917
918/*
919 * ---------------------------------------------------------------------------------------------------
920 */
921
922
923IsoStochasticGenerator::IsoStochasticGenerator(Iso&& iso, size_t no_molecules, double _precision, double _beta_bias) :
924IsoGenerator(std::move(iso)),
925ILG(std::move(*this)),
926to_sample_left(no_molecules),
927precision(_precision),
928beta_bias(_beta_bias),
929confs_prob(0.0),
930chasing_prob(0.0)
931{}
932
933/*
934 * ---------------------------------------------------------------------------------------------------
935 */
936
937
938
939
940
941} // namespace IsoSpec
942
The generator of isotopologues.
Definition isoSpec++.h:184
virtual ~IsoGenerator()
Destructor.
IsoGenerator(Iso &&iso, bool alloc_partials=true)
Move constructor.
The Iso class for the calculation of the isotopic distribution.
Definition isoSpec++.h:49
void saveMarginalLogSizeEstimates(double *priorities, double target_total_prob) const
Save estimates of logarithms of target sizes of marginals using Gaussian approximation into argument ...
double getHeaviestPeakMass() const
Get the mass of the heaviest peak in the isotopic distribution.
double getTheoreticalAverageMass() const
Get the theoretical average mass of the molecule.
double getUnlikeliestPeakLProb() const
Get the logprobability of the least probable subisotopologue.
void addElement(int atomCount, int noIsotopes, const double *isotopeMasses, const double *isotopeProbabilities)
Add an element to the molecule. Note: this method can only be used BEFORE Iso is used to construct an...
double getModeMass() const
Get the mass of the mode-configuration (if there are many modes, it is undefined which one will be se...
int * isotopeNumbers
Definition isoSpec++.h:64
static Iso FromFASTA(const char *fasta, bool use_nominal_masses=false, bool add_water=true)
Constructor (named) from aminoacid FASTA sequence as C string.
double getLightestPeakMass() const
Get the mass of the lightest peak in the isotopic distribution.
double variance() const
Get the theoretical variance of the distribution.
double getMonoisotopicPeakMass() const
unsigned int confSize
Definition isoSpec++.h:66
virtual ~Iso()
Destructor.
double getModeLProb() const
Get the log-probability of the mode-configuration (if there are many modes, they share this value).
int * atomCounts
Definition isoSpec++.h:65
Marginal ** marginals
Definition isoSpec++.h:68
void terminate_search()
Block the subsequent search of isotopologues.
ISOSPEC_FORCE_INLINE void recalc(int idx)
Recalculate the current partial log-probabilities, masses, and probabilities.
Definition isoSpec++.h:520
virtual ~IsoOrderedGenerator()
Destructor.
bool advanceToNextConfiguration() override final
Advance to the next, not yet visited, most probable isotopologue.
void terminate_search()
Block the subsequent search of isotopologues.
double get_mass(int idx) const
get the mass of the idx-th subisotopologue, see details in PrecalculatedMarginal::get_mass.
double get_prob(int idx) const
get the probability of the idx-th subisotopologue, see details in PrecalculatedMarginal::get_eProb.
bool extend(double new_threshold, bool do_sort=true)
Extend the set of computed subisotopologues to those above the new threshold.
unsigned int get_no_confs() const
Get the number of precomputed subisotopologues, see details in PrecalculatedMarginal::get_no_confs.
const double * get_lProbs_ptr() const
get the pointer to lProbs array. Accessing index -1 is legal and returns a guardian of -inf....
double get_lProb(int idx) const
get the log-probability of the idx-th subisotopologue, see details in PrecalculatedMarginal::get_lPro...
The marginal distribution class (a subisotopologue).
double fastGetModeLProb()
Get the log-probability of the mode subisotopologue. Results undefined if ensureModeConf() wasn't cal...
The marginal distribution class (a subisotopologue).
Precalculated Marginal class.
unsigned int get_no_confs() const
Get the number of precomputed subisotopologues.
const double * get_lProbs_ptr() const
Get the table of the log-probabilities of subisotopologues.
const double & get_lProb(int idx) const
Get the log-probability of the idx-th subisotopologue.