 |
OpenMS
2.4.0
|
Go to the documentation of this file.
95 bool isRTColumnOn()
const;
98 void wrapSVM(std::vector<AASequence>& peptide_sequences, std::vector<double>& predicted_retention_times);
107 void setDefaultParams_();
162 void updateMembers_()
override;
SimTypes::SimCoordinateType total_gradient_time_
Total gradient time.
Definition: RTSimulation.h:135
the nu parameter for nu-SVR
Definition: SVMWrapper.h:96
the kernel type
Definition: SVMWrapper.h:93
Base class for TOPP applications.
Definition: TOPPBase.h:150
SimTypes::SimCoordinateType rt_sampling_rate_
bin size in rt dimension
Definition: RTSimulation.h:145
the C parameter of the svm
Definition: SVMWrapper.h:95
const AASequence & getSequence() const
returns the peptide sequence without trailing or following spaces
std::vector< std::vector< std::pair< Int, double > > > sequences
Definition: SVMWrapper.h:56
the epsilon parameter for epsilon-SVR
Definition: SVMWrapper.h:97
Simulates/Predicts retention times for peptides or peptide separation.
Definition: RTSimulation.h:54
This class serves for reading in and writing FASTA files.
Definition: FASTAFile.h:64
static FileTypes::Type getTypeByFileName(const String &filename)
Determines the file type from a file name.
void setValue(const String &key, const DataValue &value, const String &description="", const StringList &tags=StringList())
Sets a value.
A more convenient string class.
Definition: String.h:57
void setRT(double rt)
sets the RT of the MS2 spectrum where the identification occurred
In-Memory representation of a mass spectrometry experiment.
Definition: MSExperiment.h:77
This class provides some basic file handling methods for text files.
Definition: TextFile.h:46
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition: Types.h:127
with N-terminus and C-terminus
Definition: Residue.h:152
void getSignificanceBorders(svm_problem *data, std::pair< double, double > &borders, double confidence=0.95, Size number_of_runs=5, Size number_of_partitions=5, double step_size=0.01, Size max_iterations=1000000)
calculates the significance borders of the error model and stores them in 'sigmas'
#define LOG_INFO
Macro if a information, e.g. a status should be reported.
Definition: LogStream.h:454
The file pendant of the Param class used to load and store the param datastructure as paramXML.
Definition: ParamXMLFile.h:49
void saveModel(std::string modelFilename) const
saves the svm model
const DataValue & getValue(const String &key) const
Returns a value of a parameter.
boost::shared_ptr< SimRandomNumberGenerator > MutableSimRandomNumberGeneratorPtr
Definition: SimTypes.h:174
double egh_tau_scale_
EGH tau scale parameter of the lorentzian variation.
Definition: RTSimulation.h:150
String toUnmodifiedString() const
returns the peptide as string without any modifications
double egh_variance_location_
EGH sigma value.
Definition: RTSimulation.h:153
Int getIntParameter(SVM_parameter_type type)
You can get the actual int- parameters of the svm.
Serves for encoding sequences into feature vectors.
Definition: LibSVMEncoder.h:54
void setParameter(SVM_parameter_type type, Int value)
You can set the parameters of the svm:
Representation of a protein identification run.
Definition: ProteinIdentification.h:61
void setHits(const std::vector< PeptideHit > &hits)
Sets the peptide hits.
A base class for all classes handling default parameters.
Definition: DefaultParamHandler.h:91
bool store(const String &filename) const
void load(const String &filename, std::vector< ProteinIdentification > &protein_ids, std::vector< PeptideIdentification > &peptide_ids)
Loads the identifications of an idXML file without identifier.
any text format, which has only loose definition of what it actually contains – thus it is usually ha...
Definition: FileTypes.h:95
double egh_variance_scale_
EGH sigma scale parameter of the lorentzian variation.
Definition: RTSimulation.h:155
Main OpenMS namespace.
Definition: FeatureDeconvolution.h:46
bool toBool() const
Conversion to bool.
the svm type cab be NU_SVR or EPSILON_SVR
Definition: SVMWrapper.h:92
svm_problem * encodeLibSVMProblemWithCompositionAndLengthVectors(const std::vector< String > &sequences, std::vector< double > &labels, const String &allowed_characters, UInt maximum_sequence_length)
creates composition vectors with additional length information for 'sequences' and stores them in Lib...
bool exists(const String &key) const
Tests if a parameter is set (expecting its fully qualified name, e.g., TextExporter:1:proteins_only)
SimTypes::MutableSimRandomNumberGeneratorPtr rnd_gen_
Random number generator.
Definition: RTSimulation.h:159
static void filterPeptidesByRTPredictPValue(std::vector< PeptideIdentification > &peptides, const String &metavalue_key, double threshold=0.05)
Filters peptide identifications according to p-values from RTPredict.
Definition: SVMWrapper.h:100
static void keepBestPeptideHits(std::vector< PeptideIdentification > &peptides, bool strict=false)
Filters peptide identifications keeping only the single best-scoring hit per ID.
static double meanSquareError(IteratorType1 begin_a, IteratorType1 end_a, IteratorType2 begin_b, IteratorType2 end_b)
Calculates the mean square error for the values in [begin_a, end_a) and [begin_b, end_b)
Definition: StatisticFunctions.h:383
std::vector< double > labels
Definition: SVMWrapper.h:57
bool split(const char splitter, std::vector< String > &substrings, bool quote_protect=false) const
Splits a string into substrings using splitter as delimiter.
static void destroyProblem(svm_problem *problem)
frees all the memory of the svm_problem instance
double egh_tau_location_
EGH tau value.
Definition: RTSimulation.h:148
Data structure used in SVMWrapper.
Definition: SVMWrapper.h:54
double getPValue(double sigma1, double sigma2, std::pair< double, double > point)
calculates a p-value for a given data point using the model parameters
unsigned int UInt
Unsigned integer type.
Definition: Types.h:94
static const DataValue EMPTY
Empty data value for comparisons.
Definition: DataValue.h:62
Int train(struct svm_problem *problem)
trains the svm
void loadModel(std::string modelFilename)
loads the model
int main(int argc, const char **argv)
Definition: INIFileEditor.cpp:73
Serves as a wrapper for the libsvm.
Definition: SVMWrapper.h:79
void store(String filename, const std::vector< ProteinIdentification > &protein_ids, const std::vector< PeptideIdentification > &peptide_ids, const String &document_id="")
Stores the data in an idXML file.
ptrdiff_t SignedSize
Signed Size type e.g. used as pointer difference.
Definition: Types.h:134
A container for features.
Definition: FeatureMap.h:93
Represents the peptide hits for a spectrum.
Definition: PeptideIdentification.h:62
static void load(const String &filename, std::vector< FASTAEntry > &data)
loads a FASTA file given by 'filename' and stores the information in 'data'
OpenMS::String rt_model_file_
Definition: RTSimulation.h:132
void predict(struct svm_problem *problem, std::vector< double > &predicted_labels)
predicts the labels using the trained model
SimTypes::SimCoordinateType gradient_max_
Maximal observed gradient time.
Definition: RTSimulation.h:142
double getDoubleParameter(SVM_parameter_type type)
You can get the actual double- parameters of the svm.
bool load(const String &filename)
Definition: SVMWrapper.h:101
void encodeProblemWithOligoBorderVectors(const std::vector< AASequence > &sequences, UInt k_mer_length, const String &allowed_characters, UInt border_length, std::vector< std::vector< std::pair< Int, double > > > &vectors)
creates oligo border vectors vectors for 'sequences' and stores them in 'vectors'
the degree for the polynomial- kernel
Definition: SVMWrapper.h:94
Management and storage of parameters / INI files.
Definition: Param.h:74
void store(const String &filename, const Param ¶m) const
Write XML file.
double performCrossValidation(svm_problem *problem_ul, const SVMData &problem_l, const bool is_labeled, const std::map< SVM_parameter_type, double > &start_values_map, const std::map< SVM_parameter_type, double > &step_sizes_map, const std::map< SVM_parameter_type, double > &end_values_map, Size number_of_partitions, Size number_of_runs, std::map< SVM_parameter_type, double > &best_parameters, bool additive_step_sizes=true, bool output=false, String performances_file_name="performances.txt", bool mcc_as_performance_measure=false)
Performs a CV for the data given by 'problem'.
Map class based on the STL map (containing several convenience functions)
Definition: Map.h:50
static AASequence fromString(const String &s, bool permissive=true)
create AASequence object by parsing an OpenMS string
static void filterHitsByScore(std::vector< IdentificationType > &ids, double threshold_score)
Filters peptide or protein identifications according to the score of the hits.
Definition: IDFilter.h:694
SimTypes::SimCoordinateType gradient_min_
gradient ranges
Definition: RTSimulation.h:140
static void removeDecoyHits(std::vector< IdentificationType > &ids)
Removes hits annotated as decoys from peptide or protein identifications.
Definition: IDFilter.h:791
void setTrainingSample(svm_problem *training_sample)
This is used for being able to perform predictions with non libsvm standard kernels.
void load(const String &filename, Param ¶m)
Read XML file.
void setMZ(double mz)
sets the MZ of the MS2 spectrum
std::vector< String >::const_iterator ConstIterator
Non-mutable iterator.
Definition: TextFile.h:56
String toString(T i)
toString functions (single argument)
Definition: StringUtils.h:68
Definition: SVMWrapper.h:107
void setLogType(LogType type) const
Sets the progress log that should be used. The default type is NONE!
Used to load and store idXML files.
Definition: IdXMLFile.h:63
static double pearsonCorrelationCoefficient(IteratorType1 begin_a, IteratorType1 end_a, IteratorType2 begin_b, IteratorType2 end_b)
Calculates the Pearson correlation coefficient for the values in [begin_a, end_a) and [begin_b,...
Definition: StatisticFunctions.h:509
Representation of a peptide hit.
Definition: PeptideHit.h:54