casacore
Loading...
Searching...
No Matches
StringDistance.h
Go to the documentation of this file.
1//# StringDistance.h: Class to deal with Levensthein distance of strings
2//# Copyright (C) 2009
3//# Associated Universities, Inc. Washington DC, USA.
4//#
5//# This library is free software; you can redistribute it and/or modify it
6//# under the terms of the GNU Library General Public License as published by
7//# the Free Software Foundation; either version 2 of the License, or (at your
8//# option) any later version.
9//#
10//# This library is distributed in the hope that it will be useful, but WITHOUT
11//# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12//# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
13//# License for more details.
14//#
15//# You should have received a copy of the GNU Library General Public License
16//# along with this library; if not, write to the Free Software Foundation,
17//# Inc., 675 Massachusetts Ave, Cambridge, MA 02139, USA.
18//#
19//# Correspondence concerning AIPS++ should be addressed as follows:
20//# Internet email: aips2-request@nrao.edu.
21//# Postal address: AIPS++ Project Office
22//# National Radio Astronomy Observatory
23//# 520 Edgemont Road
24//# Charlottesville, VA 22903-2475 USA
25//#
26//# $Id$
27
28#ifndef CASA_STRINGDISTANCE_H
29#define CASA_STRINGDISTANCE_H
30
31//# Includes
32#include <casacore/casa/aips.h>
33#include <casacore/casa/BasicSL/String.h>
34#include <casacore/casa/Arrays/Matrix.h>
35
36namespace casacore {
37
38// <summary>
39// Class to deal with Levensthein distance of strings.
40// </summary>
41
42// <synopsis>
43// The Levenshtein Distance is a metric telling how similar strings are.
44// It is also known as the Edit Distance.
45//
46// The distance tells how many operations (i.e., character substitutions,
47// insertions, and deletions are needed to transform one string into another.
48// <br>There are several extensions to the basic definition:
49// <ul>
50// <li> Treat a swap of adjacent characters as one operation.
51// <li> Give a weight to operations (e.g., insertions and deletions have
52// half the weight of the other operations).
53// <li> Take locality into account. For example, successive substitutions
54// weigh more than non-successive.
55// <li> Extend to wildcarded strings.
56// </ul>
57// This class optionally uses the swap extension. Furthermore one can
58// optionally ignore blanks. By default both options are used.
59//
60// The code is based on code written by Anders Sewerin Johansen.
61// Calculating the distance is an expensive O(N^2) operation, thus
62// should be used with care.
63//
64// The class is constructed with the source string to compare against.
65// Thereafter its <code>match</code> or <code>distance</code>
66// function can be used for each target string.
67// </synopsis>
68
70{
71public:
72 // Default constructor sets maxDistance to 0.
74
75 // Construct from the source string and maximum distance.
76 // If the maximum distance is negative, it defaults to 1+strlength/3.
77 // Note that maximum distance 0 means that the strings must match exactly.
79 Bool countSwaps=True, Bool ignoreBlanks=True,
80 Bool caseInsensitive=False);
81
82 // Get data members.
83 // <group>
84 const string& source() const
85 { return itsSource; }
87 { return itsMaxDistance; }
88 const Matrix<Int>& matrix() const
89 { return itsMatrix; }
90 // </group>
91
92 // Test if the given target string is within the maximum distance.
93 Bool match (const String& target) const;
94
95 // Calculate the distance from the string to the string given in the constructor.
96 // If the length of target exceeds source length + maxDistance,
97 // the difference in lengths is returned.
98 Int distance (const String& target) const;
99
100 // Calculate the distance between the two strings.
101 // This is slower than the <src>distance</src> member function, because
102 // it has to allocate the underlying Matrix for each invocation.
103 static Int distance (const String& source, const String& target,
104 Bool countSwaps=True);
105
106 // Remove blanks from the given string.
108
109private:
110 // Calculate the distance.
111 static Int doDistance (const String& source, const String& target,
112 Bool countSwaps, Matrix<Int>& matrix);
113
114
115private:
122};
123
124} //# end namespace
125
126#endif
StringDistance(const String &source, Int maxDistance=-1, Bool countSwaps=True, Bool ignoreBlanks=True, Bool caseInsensitive=False)
Construct from the source string and maximum distance.
static Int doDistance(const String &source, const String &target, Bool countSwaps, Matrix< Int > &matrix)
Calculate the distance.
const Matrix< Int > & matrix() const
static Int distance(const String &source, const String &target, Bool countSwaps=True)
Calculate the distance between the two strings.
static String removeBlanks(const String &source)
Remove blanks from the given string.
Int distance(const String &target) const
Calculate the distance from the string to the string given in the constructor.
StringDistance()
Default constructor sets maxDistance to 0.
Bool match(const String &target) const
Test if the given target string is within the maximum distance.
const string & source() const
Get data members.
String: the storage and methods of handling collections of characters.
Definition String.h:225
this file contains all the compiler specific defines
Definition mainpage.dox:28
const Bool False
Definition aipstype.h:44
int Int
Definition aipstype.h:50
bool Bool
Define the standard types used by Casacore.
Definition aipstype.h:42
const Bool True
Definition aipstype.h:43