ParamKit
A small library helping to parse commandline parameters (for Windows).
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
strings_util.cpp
Go to the documentation of this file.
1 #include "strings_util.h"
2 
3 #include <algorithm>
4 #include <cstring>
5 
6 #define MIN(x,y) ((x) < (y) ? (x) : (y))
7 
8 std::string paramkit::util::to_lowercase(std::string str)
9 {
10  std::transform(str.begin(), str.end(), str.begin(), tolower);
11  return str;
12 }
13 
14 bool paramkit::util::is_cstr_equal(char const *a, char const *b, const size_t max_len, bool ignoreCase)
15 {
16  if (a == b) return true;
17  if (!a || !b) return false;
18  for (size_t i = 0; i < max_len; ++i) {
19  if (ignoreCase) {
20  if (tolower(a[i]) != tolower(b[i])) {
21  return false;
22  }
23  }
24  else {
25  if (a[i] != b[i]) {
26  return false;
27  }
28  }
29  if (a[i] == '\0') break;
30  }
31  return true;
32 }
33 
34 bool paramkit::util::strequals(const std::string& a, const std::string& b, bool ignoreCase)
35 {
36  size_t aLen = a.size();
37  if (b.size() != aLen) return false;
38 
39  for (size_t i = 0; i < aLen; ++i) {
40  if (!ignoreCase) {
41  if (a[i] != b[i]) return false;
42  }
43  else {
44  if (tolower(a[i]) != tolower(b[i])) return false;
45  }
46  }
47  return true;
48 }
49 
50 size_t paramkit::util::levenshtein_distance(const char s1[], const char s2[])
51 {
52  const size_t MAX_LEN = 100;
53  const size_t len1 = strlen(s1);
54  const size_t len2 = strlen(s2);
55 
56  if (len1 >= MAX_LEN || len2 >= MAX_LEN) return(-1);
57 
58  //init the distance matrix
59  int dist[MAX_LEN][MAX_LEN] = { 0 };
60  for (int i = 0;i <= len1;i++) {
61  dist[0][i] = i;
62  }
63  for (int j = 0;j <= len2; j++) {
64  dist[j][0] = j;
65  }
66  // calculate
67  for (int j = 1;j <= len1; j++) {
68  for (int i = 1;i <= len2; i++) {
69  int track = 1;
70  if (s1[i - 1] == s2[j - 1]) {
71  track = 0;
72  }
73  int t = MIN((dist[i - 1][j] + 1), (dist[i][j - 1] + 1));
74  dist[i][j] = MIN(t, (dist[i - 1][j - 1] + track));
75  }
76  }
77  return dist[len2][len1];
78 }
79 
80 inline void calc_histogram(const char s1[], size_t hist1[255])
81 {
82  memset(hist1, 0, 255);
83  const size_t len1 = strlen(s1);
84  for (size_t i = 0; i < len1; i++) {
85  const char c = tolower(s1[i]);
86  hist1[c]++;
87  }
88 }
89 
90 inline size_t calc_unique_chars(size_t hist1[255])
91 {
92  size_t count = 0;
93  for (size_t i = 0; i < 255; i++) {
94  if (hist1[i] != 0) count++;
95  }
96  return count;
97 }
98 
99 bool paramkit::util::has_similar_histogram(const char s1[], const char s2[])
100 {
101  const size_t MAX_LEN = 255;
102  size_t hist1[MAX_LEN] = { 0 };
103  size_t hist2[MAX_LEN] = { 0 };
104 
105  calc_histogram(s1, hist1);
106  calc_histogram(s2, hist2);
107 
108  size_t sim = 0;
109  for (size_t i = 0; i < MAX_LEN; i++) {
110  if (hist1[i] != 0 && hist2[i] != 0 ) sim++;
111  }
112  const size_t uniq1 = calc_unique_chars(hist1);
113  const size_t uniq2 = calc_unique_chars(hist2);
114  if (sim == uniq1 && sim == uniq2) {
115  return true;
116  }
117  //
118  return false;
119 }
120 
121 paramkit::util::stringsim_type paramkit::util::has_keyword( std::string param, std::string filter)
122 {
123  if (param.empty() || filter.empty()) {
124  return SIM_NONE;
125  }
126  param = to_lowercase(param);
127  filter = to_lowercase(filter);
128  const bool sim_found = (param.find(filter) != std::string::npos) || (filter.find(param) != std::string::npos);
129  if (sim_found) return SIM_SUBSTR;
130  return SIM_NONE;
131 }
132 
133 paramkit::util::stringsim_type paramkit::util::is_string_similar(const std::string &param, const std::string &filter)
134 {
135  if (param.empty() || filter.empty()) {
136  return SIM_NONE;
137  }
138  bool sim_found = false;
139  if (has_keyword(param, filter) != SIM_NONE) {
140  return SIM_SUBSTR;
141  }
142  size_t dist = util::levenshtein_distance(filter.c_str(), param.c_str());
143  if (dist == 1 || dist <= (param.length() / 2)) {
144  sim_found = true;
145  }
146  if (dist >= param.length() || dist >= filter.length()) {
147  sim_found = false;
148  }
149  if (sim_found) return SIM_LAV_DIST;
150 
151  sim_found = util::has_similar_histogram(filter.c_str(), param.c_str());
152  if (sim_found) return SIM_HIST;
153 
154  return SIM_NONE;
155 }
bool has_similar_histogram(const char s1[], const char s2[])
size_t levenshtein_distance(const char s1[], const char s2[])
stringsim_type is_string_similar(const std::string &param, const std::string &filter)
stringsim_type has_keyword(const std::string param, const std::string filter)
std::string to_lowercase(std::string)
Definition: strings_util.cpp:8
bool strequals(const std::string &a, const std::string &b, bool ignoreCase=true)
bool is_cstr_equal(char const *a, char const *b, const size_t max_len, bool ignoreCase=true)
void calc_histogram(const char s1[], size_t hist1[255])
#define MIN(x, y)
Definition: strings_util.cpp:6
size_t calc_unique_chars(size_t hist1[255])
The set of utility functions related with string processing, and finding similarity between strings.