libpappsomspp
Library for mass spectrometry
mzintegrationparams.cpp
Go to the documentation of this file.
1 /* BEGIN software license
2  *
3  * msXpertSuite - mass spectrometry software suite
4  * -----------------------------------------------
5  * Copyright(C) 2009,...,2018 Filippo Rusconi
6  *
7  * http://www.msxpertsuite.org
8  *
9  * This file is part of the msXpertSuite project.
10  *
11  * The msXpertSuite project is the successor of the massXpert project. This
12  * project now includes various independent modules:
13  *
14  * - massXpert, model polymer chemistries and simulate mass spectrometric data;
15  * - mineXpert, a powerful TIC chromatogram/mass spectrum viewer/miner;
16  *
17  * This program is free software: you can redistribute it and/or modify
18  * it under the terms of the GNU General Public License as published by
19  * the Free Software Foundation, either version 3 of the License, or
20  * (at your option) any later version.
21  *
22  * This program is distributed in the hope that it will be useful,
23  * but WITHOUT ANY WARRANTY; without even the implied warranty of
24  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
25  * GNU General Public License for more details.
26  *
27  * You should have received a copy of the GNU General Public License
28  * along with this program. If not, see <http://www.gnu.org/licenses/>.
29  *
30  * END software license
31  */
32 
33 
34 /////////////////////// StdLib includes
35 #include <map>
36 #include <cmath>
37 
38 
39 /////////////////////// Qt includes
40 #include <QDebug>
41 #include <QString>
42 #include <QFile>
43 #include <QDateTime>
44 
45 
46 /////////////////////// pappsomspp includes
47 #include "../../utils.h"
48 #include "../../massspectrum/massspectrum.h"
49 
50 
51 /////////////////////// Local includes
52 #include "mzintegrationparams.h"
53 
54 
55 namespace pappso
56 {
57 
58 
59 //! Map relating the BinningType to a textual representation
60 std::map<BinningType, QString> binningTypeMap{
61  {BinningType::NONE, "NONE"},
62  {BinningType::DATA_BASED, "DATA_BASED"},
63  {BinningType::ARBITRARY, "ARBITRARY"}};
64 
65 
67 {
70 }
71 
72 
75  BinningType binningType,
76  int decimalPlaces,
77  pappso::PrecisionPtr precisionPtr,
78  bool applyMzShift,
79  pappso::pappso_double mzShift,
80  bool removeZeroValDataPoints)
81  : m_smallestMz(minMz),
82  m_greatestMz(maxMz),
83  m_binningType(binningType),
84  m_decimalPlaces(decimalPlaces),
85  mp_precision(precisionPtr),
86  m_applyMzShift(applyMzShift),
87  m_mzShift(mzShift),
88  m_removeZeroValDataPoints(removeZeroValDataPoints)
89 {
90  if(mp_precision == nullptr)
92 }
93 
94 
96  : m_smallestMz(other.m_smallestMz),
97  m_greatestMz(other.m_greatestMz),
98  m_binningType(other.m_binningType),
99  m_decimalPlaces(other.m_decimalPlaces),
100  mp_precision(other.mp_precision),
101  m_applyMzShift(other.m_applyMzShift),
102  m_mzShift(other.m_mzShift),
103  m_removeZeroValDataPoints(other.m_removeZeroValDataPoints)
104 {
105  if(mp_precision == nullptr)
107 }
108 
109 
111 {
112 }
113 
114 
117 {
118  if(this == &other)
119  return *this;
120 
121  m_smallestMz = other.m_smallestMz;
122  m_greatestMz = other.m_greatestMz;
124 
126 
127  mp_precision = other.mp_precision;
128  if(mp_precision == nullptr)
130 
132  m_mzShift = other.m_mzShift;
134 
135  return *this;
136 }
137 
138 
139 void
141 {
142  m_smallestMz = value;
143 }
144 
145 
146 void
148 {
149  m_smallestMz = m_smallestMz > value ? value : m_smallestMz;
150 }
151 
152 
155 {
156  return m_smallestMz;
157 }
158 
159 
160 void
162 {
163  m_greatestMz = value;
164 }
165 
166 
167 void
169 {
170  m_greatestMz = m_greatestMz < value ? value : m_greatestMz;
171 }
172 
173 
176 {
177  return m_greatestMz;
178 }
179 
180 void
182 {
183  m_binningType = binningType;
184 }
185 
188 {
189  return m_binningType;
190 }
191 
192 void
194 {
195  m_decimalPlaces = decimal_places;
196 }
197 
198 
199 int
201 {
202  return m_decimalPlaces;
203 }
204 
205 void
207 {
208  mp_precision = precisionPtr;
209 
210  if(mp_precision == nullptr)
212 }
213 
216 {
217  return mp_precision;
218 }
219 
220 
221 void
223 {
224  m_applyMzShift = applyMzShift;
225 }
226 
227 
228 bool
230 {
231  return m_applyMzShift;
232 }
233 
234 
235 void
237 {
238  m_removeZeroValDataPoints = removeOrNot;
239 }
240 
241 
242 bool
244 {
246 }
247 
248 
249 void
251 {
252  m_mzShift = value;
253 }
254 
255 
256 double
258 {
259  return m_mzShift;
260 }
261 
262 
263 //! Reset the instance to default values.
264 void
266 {
267  m_smallestMz = std::numeric_limits<double>::min();
268  m_greatestMz = std::numeric_limits<double>::min();
270 
271  // Special case for this member datum
273 
274  m_applyMzShift = false;
275  m_mzShift = 0;
277 }
278 
279 
280 bool
282 {
283  int errors = 0;
284 
286  {
287  // qDebug() << "m_smallestMz:" << m_smallestMz;
288  // qDebug() << "smallest is max:" << (m_smallestMz ==
289  // std::numeric_limits<double>::max());
290 
291  errors += (m_smallestMz == std::numeric_limits<double>::max() ? 1 : 0);
292 
293  // qDebug() << "m_greatestMz:" << m_greatestMz;
294  // qDebug() << "greatest is min:" << (m_greatestMz ==
295  // std::numeric_limits<double>::min());
296  errors += (m_greatestMz == std::numeric_limits<double>::min() ? 1 : 0);
297 
298  // if(mp_precision != nullptr)
299  // qDebug() << mp_precision->toString();
300 
301  errors += (mp_precision == nullptr ? 1 : 0);
302  }
303 
304  if(errors)
305  {
306  qDebug()
307  << "The m/z integration parameters are not valid or do not apply...";
308  }
309 
310  return !errors;
311 }
312 
313 
314 bool
316 {
317  return (m_smallestMz != std::numeric_limits<double>::max()) &&
318  (m_greatestMz != std::numeric_limits<double>::min());
319 }
320 
321 
322 std::vector<double>
324 {
325 
326  // qDebug();
327 
328  std::vector<double> bins;
329 
331  {
332  // If no binning is to be performed, fine.
333  return bins;
334  }
336  {
337  // Use only data in the MzIntegrationParams member data.
338  return createArbitraryBins();
339  }
341  {
342  // qDebug();
343 
344  qFatal("Programming error.");
345  }
346 
347  return bins;
348 }
349 
350 
351 std::vector<double>
353 {
354 
355  // qDebug();
356 
357  std::vector<double> bins;
358 
360  {
361  // If no binning is to be performed, fine.
362  return bins;
363  }
365  {
366  // Use only data in the MzIntegrationParams member data.
367  return createArbitraryBins();
368  }
370  {
371  // qDebug();
372 
373  // Use the first spectrum to perform the data-based bins
374 
375  return createDataBasedBins(mass_spectrum_csp);
376  }
377 
378  return bins;
379 }
380 
381 
382 std::vector<double>
384 {
385 
386  //qDebug();
387 
388  // Now starts the tricky stuff. Depending on how the binning has been
389  // configured, we need to take diverse actions.
390 
391  //qDebug() << "Bin specification:" << mp_precision->toString();
392 
395 
396  //qDebug() << QString::asprintf("min_mz: %.6f\n", min_mz)
397  //<< QString::asprintf("max_mz: %.6f\n", max_mz);
398 
399  pappso::pappso_double binSize = mp_precision->delta(min_mz);
400 
401  //qDebug() << QString::asprintf(
402  //"binSize is the precision delta for min_mz: %.6f\n", binSize);
403 
404  // Only compute the decimal places if they were not configured already.
405  if(m_decimalPlaces == -1)
406  {
407  // qDebug() << "Now checking how many decimal places are needed.";
408 
409  // We want as many decimal places as there are 0s between the integral
410  // part of the double and the first non-0 cipher. For example, if
411  // binSize is 0.004, zero decimals is 2 and m_decimalPlaces is set to 3,
412  // because we want decimals up to 4 included.
413 
415 
416  // qDebug() << "With binSize" << binSize
417  //<< " m_decimalPlaces was computed to be:" << m_decimalPlaces;
418  }
419 
420  // Now that we have defined the value of m_decimalPlaces, let's use that
421  // value.
422 
423  double first_mz = ceil((min_mz * std::pow(10, m_decimalPlaces)) - 0.49) /
424  pow(10, m_decimalPlaces);
425  double last_mz =
426  ceil((max_mz * pow(10, m_decimalPlaces)) - 0.49) / pow(10, m_decimalPlaces);
427 
428  // qDebug() << "After having accounted for the decimals, new min/max values:"
429  //<< QString::asprintf("Very first data point: %.6f\n", first_mz)
430  //<< QString::asprintf("Very last data point to reach: %.6f\n",
431  // last_mz);
432 
433  // Instanciate the vector of mz double_s that we'll feed with the bins.
434 
435  std::vector<pappso::pappso_double> bins;
436 
437  // Store that very first value for later use in the loop.
438  // The bins are notking more than:
439  //
440  // 1. The first mz (that is the smallest mz value found in all the spectra
441  // 2. A sequence of mz values corresponding to that first mz value
442  // incremented by the bin size.
443 
444  // Seed the root of the bin vector with the first mz value rounded above as
445  // requested.
446  pappso::pappso_double previous_mz_bin = first_mz;
447 
448  bins.push_back(previous_mz_bin);
449 
450  // Now continue adding mz values until we have reached the end of the
451  // spectrum, that is the max_mz value, as converted using the decimals to
452  // last_mz.
453 
454  // debugCount value used below for debugging purposes.
455  // int debugCount = 0;
456 
457  while(previous_mz_bin <= last_mz)
458  {
459 
460  // qDebug() << "Now starting the bin creation loop.";
461 
462  // Calculate dynamically the precision delta according to the current mz
463  // value.
464 
465  //double precision_delta = mp_precision->delta(previous_mz_bin);
466  // qDebug() << "precision_delta: " << precision_delta;
467 
468  double current_mz = previous_mz_bin + mp_precision->delta(previous_mz_bin);
469 
470  // qDebug() << QString::asprintf(
471  //"previous_mzBin: %.6f and current_mz: %.6f\n",
472  // previous_mz_bin,
473  // current_mz);
474 
475  // Now apply on the obtained mz value the decimals that were either set
476  // or computed earlier.
477 
478  double current_rounded_mz =
479  ceil((current_mz * pow(10, m_decimalPlaces)) - 0.49) /
480  pow(10, m_decimalPlaces);
481 
482  // qDebug() << QString::asprintf(
483  //"current_mz: %.6f and current_rounded_mz: %.6f and previous_mzBin "
484  //": % .6f\n ",
485  // current_mz,
486  // current_rounded_mz,
487  // previous_mz_bin);
488 
489  // If rounding makes the new value identical to the previous one, then
490  // that means that we need to decrease roughness.
491 
492  if(current_rounded_mz == previous_mz_bin)
493  {
494  ++m_decimalPlaces;
495 
496  current_rounded_mz =
497  ceil((current_mz * pow(10, m_decimalPlaces)) - 0.49) /
498  pow(10, m_decimalPlaces);
499 
500  // qDebug().noquote()
501  //<< "Had to increment decimal places by one while creating the bins "
502  //"in BinningType::ARBITRARY mode..";
503  }
504 
505  bins.push_back(current_rounded_mz);
506 
507  // Use the local_mz value for the storage of the previous mz bin.
508  previous_mz_bin = current_rounded_mz;
509  }
510 
511 
512 #if 0
513 
514  QString fileName = "/tmp/massSpecArbitraryBins.txt-at-" +
515  QDateTime::currentDateTime().toString("yyyyMMdd-HH-mm-ss");
516 
517  qDebug() << "Writing the list of bins setup in the "
518  "mass spectrum in file "
519  << fileName;
520 
521  QFile file(fileName);
522  file.open(QIODevice::WriteOnly);
523 
524  QTextStream fileStream(&file);
525 
526  for(auto &&bin : bins)
527  fileStream << QString("%1\n").arg(bin, 0, 'f', 10);
528 
529  fileStream.flush();
530  file.close();
531 
532 #endif
533 
534  // qDebug() << "Prepared bins with " << bins.size() << "elements."
535  //<< "starting with mz" << bins.front() << "ending with mz"
536  //<< bins.back();
537 
538  return bins;
539 }
540 
541 
542 std::vector<double>
544  pappso::MassSpectrumCstSPtr mass_spectrum_csp)
545 {
546  // qDebug();
547 
548  // The bins in *this mass spectrum must be calculated starting from the
549  // data in the mass_spectrum_csp parameter.
550 
551  // Instanciate the vector of mz double_s that we'll feed with the bins.
552 
553  std::vector<pappso::pappso_double> bins;
554 
555  if(mass_spectrum_csp->size() < 2)
556  return bins;
557 
558  // Make sure the spectrum is sorted, as this functions takes for granted
559  // that the DataPoint instances are sorted in ascending x (== mz) value
560  // order.
561  pappso::MassSpectrum local_mass_spectrum = *mass_spectrum_csp;
562  local_mass_spectrum.sortMz();
563 
565 
566  // qDebug() << "The min_mz:" << min_mz;
567 
568  if(m_decimalPlaces != -1)
569  min_mz = ceil((min_mz * pow(10, m_decimalPlaces)) - 0.49) /
570  pow(10, m_decimalPlaces);
571 
572 
573  // Two values for the definition of a MassSpectrumBin.
574 
575  // The first value of the mz range that defines the bin. This value is part
576  // of the bin.
577  pappso::pappso_double start_mz_in = min_mz;
578 
579  // The second value of the mz range that defines the bin. This value is
580  // *not* part of the bin.
581  pappso::pappso_double end_mz_out;
582 
583  std::vector<pappso::DataPoint>::const_iterator it =
584  local_mass_spectrum.begin();
585 
586  pappso::pappso_double prev_mz = it->x;
587 
588  if(m_decimalPlaces != -1)
589  prev_mz = ceil((prev_mz * pow(10, m_decimalPlaces)) - 0.49) /
590  pow(10, m_decimalPlaces);
591 
592  ++it;
593 
594  while(it != local_mass_spectrum.end())
595  {
596  pappso::pappso_double next_mz = it->x;
597 
598  if(m_decimalPlaces != -1)
599  next_mz = ceil((next_mz * pow(10, m_decimalPlaces)) - 0.49) /
600  pow(10, m_decimalPlaces);
601 
602  pappso::pappso_double step = next_mz - prev_mz;
603  end_mz_out = start_mz_in + step;
604 
605  if(m_decimalPlaces != -1)
606  end_mz_out = ceil((end_mz_out * pow(10, m_decimalPlaces)) - 0.49) /
607  pow(10, m_decimalPlaces);
608 
609  // The data point that is crafted has a 0 y-value. The binning must
610  // indeed not create artificial intensity data.
611 
612  // qDebug() << "Pushing back bin:" << start_mz_in << end_mz_out;
613 
614  bins.push_back(start_mz_in);
615 
616  // Prepare next bin
617  start_mz_in = end_mz_out;
618 
619  // Update prev_mz to be the current one for next iteration.
620  prev_mz = next_mz;
621 
622  // Now got the next DataPoint instance.
623  ++it;
624  }
625 
626 #if 0
627 
628  QString fileName = "/tmp/massSpecDataBasedBins.txt";
629 
630  qDebug() << "Writing the list of bins setup in the "
631  "mass spectrum in file "
632  << fileName;
633 
634  QFile file(fileName);
635  file.open(QIODevice::WriteOnly);
636 
637  QTextStream fileStream(&file);
638 
639  for(auto &&bin : m_bins)
640  fileStream << QString("[%1-%2]\n")
641  .arg(bin.startMzIn, 0, 'f', 10)
642  .arg(bin.endMzOut, 0, 'f', 10);
643 
644  fileStream.flush();
645  file.close();
646 
647  qDebug() << "elements."
648  << "starting with mz" << m_bins.front().startMzIn << "ending with mz"
649  << m_bins.back().endMzOut;
650 
651 #endif
652 
653  return bins;
654 }
655 
656 
657 QString
658 MzIntegrationParams::toString(int offset, const QString &spacer) const
659 {
660  QString lead;
661 
662  for(int iter = 0; iter < offset; ++iter)
663  lead += spacer;
664 
665  QString text = lead;
666  text += "m/z integration parameters:\n";
667 
668  text += lead;
669  text += spacer;
670  if(m_smallestMz != std::numeric_limits<double>::max())
671  text.append(
672  QString::asprintf("Smallest (first) m/z: %.6f\n", m_smallestMz));
673 
674  text += lead;
675  text += spacer;
676  if(m_greatestMz != std::numeric_limits<double>::min())
677  text.append(QString::asprintf("Greatest (last) m/z: %.6f\n", m_greatestMz));
678 
679  text += lead;
680  text += spacer;
681  text.append(QString("Decimal places: %1\n").arg(m_decimalPlaces));
682 
683  std::map<BinningType, QString>::iterator it;
684  it = binningTypeMap.find(m_binningType);
685 
686  if(it == binningTypeMap.end())
687  qFatal("Programming error.");
688 
689  text += lead;
690  text += spacer;
691  text.append(QString("Binning type: %1\n").arg(it->second.toLatin1().data()));
692 
693  // Only provide the details relative to the ARBITRARY binning type.
694 
696  {
697  text += lead;
698  text += spacer;
699  text += spacer;
700  text.append(QString("Bin nominal size: %1\n")
701  .arg(mp_precision->getNominal(), 0, 'f', 6));
702 
703  text += lead;
704  text += spacer;
705  text += spacer;
706  text.append(QString("Bin size: %2\n")
707  .arg(mp_precision->toString().toLatin1().data()));
708  }
709 
710  // Now other data that are independent of the bin settings.
711 
712  text += lead;
713  text += spacer;
714  text +=
715  QString("Apply m/z shift: %1\n").arg(m_applyMzShift ? "true" : "false");
716 
717  if(m_applyMzShift)
718  {
719  text += lead;
720  text += spacer;
721  text += spacer;
722  text += QString("m/z shift: %1").arg(m_mzShift, 0, 'f', 6);
723  }
724 
725  text += lead;
726  text += spacer;
727  text += QString("Remove 0-val data points: %1\n")
728  .arg(m_removeZeroValDataPoints ? "true" : "false");
729 
730  return text;
731 }
732 
733 
734 } // namespace pappso
735 
Class to represent a mass spectrum.
Definition: massspectrum.h:71
void sortMz()
Sort the DataPoint instances of this spectrum.
The MzIntegrationParams class provides the parameters definining how m/z !
pappso::pappso_double getSmallestMz() const
pappso::pappso_double m_smallestMz
MzIntegrationParams & operator=(const MzIntegrationParams &other)
pappso::pappso_double getGreatestMz() const
pappso::pappso_double m_greatestMz
pappso::PrecisionPtr getPrecision() const
std::vector< double > createArbitraryBins()
void setPrecision(pappso::PrecisionPtr precisionPtr)
void updateSmallestMz(pappso::pappso_double value)
void updateGreatestMz(pappso::pappso_double value)
QString toString(int offset=0, const QString &spacer=QString()) const
pappso::PrecisionPtr mp_precision
void setSmallestMz(pappso::pappso_double value)
void setBinningType(BinningType binningType)
void reset()
Reset the instance to default values.
std::vector< double > createDataBasedBins(pappso::MassSpectrumCstSPtr massSpectrum)
void setApplyMzShift(bool applyMzShift)
void setDecimalPlaces(int decimal_places)
std::vector< pappso::pappso_double > createBins()
void setRemoveZeroValDataPoints(bool removeOrNot=true)
void setGreatestMz(pappso::pappso_double value)
pappso::pappso_double m_mzShift
virtual QString toString() const =0
virtual pappso_double getNominal() const final
Definition: precision.cpp:65
virtual pappso_double delta(pappso_double value) const =0
static PrecisionPtr getPpmInstance(pappso_double value)
get a ppm precision pointer
Definition: precision.cpp:150
static PrecisionPtr getDaltonInstance(pappso_double value)
get a Dalton precision pointer
Definition: precision.cpp:130
static int zeroDecimalsInValue(pappso_double value)
0.11 would return 0 (no empty decimal) 2.001 would return 2 1000.0001254 would return 3
Definition: utils.cpp:81
tries to keep as much as possible monoisotopes, removing any possible C13 peaks and changes multichar...
Definition: aa.cpp:39
BinningType
Type of binning when performing integrations to a mass spectrum.
@ DATA_BASED
binning based on mass spectral data
@ ARBITRARY
binning based on arbitrary bin size value
@ NONE
< no binning
double pappso_double
A type definition for doubles.
Definition: types.h:48
std::shared_ptr< const MassSpectrum > MassSpectrumCstSPtr
Definition: massspectrum.h:55
std::map< BinningType, QString > binningTypeMap
Map relating the BinningType to a textual representation.